Add files using upload-large-folder tool

Browse files

Files changed (11) hide show

README.md +1 -0
config.json +64 -0
experiment_cfg/metadata.json +187 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
optimizer.pt +3 -0
rng_state_0.pth +3 -0
rng_state_1.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +734 -0

README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ Finetuned on 333x3 episodes of red, green, & blue cubes being touched. Generalizes well to other colors and shapes. Checkpointed at 1k steps.

config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,187 @@

+{
+    "new_embodiment": {
+        "statistics": {
+            "state": {
+                "single_arm": {
+                    "max": [
+                        2.0003252029418945,
+                        2.8696048259735107,
+                        -2.973998198285699e-05,
+                        1.2001643180847168,
+                        1.636069416999817
+                    ],
+                    "min": [
+                        -1.6366058588027954,
+                        -0.0013483419315889478,
+                        -2.867553949356079,
+                        -0.15347807109355927,
+                        -2.408719539642334
+                    ],
+                    "mean": [
+                        0.5613881349563599,
+                        1.2009493112564087,
+                        -1.1912317276000977,
+                        0.7247916460037231,
+                        -0.5904473662376404
+                    ],
+                    "std": [
+                        0.9785857200622559,
+                        0.7331446409225464,
+                        0.6282459497451782,
+                        0.23302637040615082,
+                        0.9657405614852905
+                    ],
+                    "q01": [
+                        -1.5449657726287842,
+                        -1.0683193213480989e-05,
+                        -2.7843262195587157,
+                        0.2979250168800354,
+                        -1.9947426080703736
+                    ],
+                    "q99": [
+                        1.9999996423721313,
+                        2.7311124897003163,
+                        -0.3246664524078371,
+                        1.2000004053115845,
+                        1.5377632951736446
+                    ]
+                },
+                "gripper": {
+                    "max": [
+                        0.703035831451416
+                    ],
+                    "min": [
+                        -0.06299631297588348
+                    ],
+                    "mean": [
+                        0.024504341185092926
+                    ],
+                    "std": [
+                        0.08595108985900879
+                    ],
+                    "q01": [
+                        -0.010759906060993672
+                    ],
+                    "q99": [
+                        0.4408635914325714
+                    ]
+                }
+            },
+            "action": {
+                "single_arm": {
+                    "max": [
+                        2.000000476837158,
+                        2.8696048259735107,
+                        -0.029101531952619553,
+                        1.2001643180847168,
+                        1.6363924741744995
+                    ],
+                    "min": [
+                        -1.6367615461349487,
+                        -0.0013483419315889478,
+                        -2.870177745819092,
+                        -0.15347807109355927,
+                        -2.378706455230713
+                    ],
+                    "mean": [
+                        0.5605649352073669,
+                        1.2173237800598145,
+                        -1.1972321271896362,
+                        0.7227405309677124,
+                        -0.5876930356025696
+                    ],
+                    "std": [
+                        0.9839946627616882,
+                        0.7347026467323303,
+                        0.6269937753677368,
+                        0.23072293400764465,
+                        0.9713127017021179
+                    ],
+                    "q01": [
+                        -1.5492996978759765,
+                        -8.676056131662335e-06,
+                        -2.7688085556030275,
+                        0.30216108322143553,
+                        -1.9937553739547729
+                    ],
+                    "q99": [
+                        1.9999995231628418,
+                        2.744625654220581,
+                        -0.33259372472763077,
+                        1.200000286102295,
+                        1.5410190296173096
+                    ]
+                },
+                "gripper": {
+                    "max": [
+                        0.703035831451416
+                    ],
+                    "min": [
+                        -0.06299631297588348
+                    ],
+                    "mean": [
+                        0.02159128151834011
+                    ],
+                    "std": [
+                        0.07854700088500977
+                    ],
+                    "q01": [
+                        -0.01098821684718132
+                    ],
+                    "q99": [
+                        0.41087187886238097
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "webcam": {
+                    "resolution": [
+                        640,
+                        480
+                    ],
+                    "channels": 3,
+                    "fps": 30.0
+                }
+            },
+            "state": {
+                "single_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "single_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "new_embodiment"
+    }
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a5a2e3982d2f0d475a6bbfd4b7eb7ace8d29cb89aead3eba6a96736e3138ec5
+size 4999367032

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f8f6e3d4c9ae4e9a3ed1d025d893b99a236845708cd12495469812f41b57a3d
+size 2586508600

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f75938496ae2e6484380c63bc241c2aa134598e299c3cde3af74c90448f6a3cc
+size 8550325978

rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6942e7d0fad9ea5ca8d6849b863c5fca113d1802c0c6b4b6cb63f75db30e17a1
+size 14512

rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:675c736ed11ba57de7a85176857b7317f9f79aec1c909ebf5cc00810df70079a
+size 14512

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:057194249d9cdc822a0752df7fedc436fc30dce92062cd380d9a3f5704199672
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,734 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.8090614886731392,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.008090614886731391,
+      "grad_norm": 7.823570251464844,
+      "learning_rate": 6e-06,
+      "loss": 0.8903,
+      "step": 10
+    },
+    {
+      "epoch": 0.016181229773462782,
+      "grad_norm": 2.272317886352539,
+      "learning_rate": 1.2666666666666668e-05,
+      "loss": 0.4601,
+      "step": 20
+    },
+    {
+      "epoch": 0.024271844660194174,
+      "grad_norm": 1.5555341243743896,
+      "learning_rate": 1.9333333333333333e-05,
+      "loss": 0.276,
+      "step": 30
+    },
+    {
+      "epoch": 0.032362459546925564,
+      "grad_norm": 1.7244459390640259,
+      "learning_rate": 2.6000000000000002e-05,
+      "loss": 0.1865,
+      "step": 40
+    },
+    {
+      "epoch": 0.040453074433656956,
+      "grad_norm": 1.4668376445770264,
+      "learning_rate": 3.266666666666667e-05,
+      "loss": 0.1422,
+      "step": 50
+    },
+    {
+      "epoch": 0.04854368932038835,
+      "grad_norm": 2.02225661277771,
+      "learning_rate": 3.933333333333333e-05,
+      "loss": 0.1238,
+      "step": 60
+    },
+    {
+      "epoch": 0.05663430420711974,
+      "grad_norm": 1.1487993001937866,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 0.0791,
+      "step": 70
+    },
+    {
+      "epoch": 0.06472491909385113,
+      "grad_norm": 0.5569996237754822,
+      "learning_rate": 5.266666666666666e-05,
+      "loss": 0.0819,
+      "step": 80
+    },
+    {
+      "epoch": 0.07281553398058252,
+      "grad_norm": 1.194751262664795,
+      "learning_rate": 5.9333333333333343e-05,
+      "loss": 0.0698,
+      "step": 90
+    },
+    {
+      "epoch": 0.08090614886731391,
+      "grad_norm": 0.8545956015586853,
+      "learning_rate": 6.6e-05,
+      "loss": 0.0732,
+      "step": 100
+    },
+    {
+      "epoch": 0.0889967637540453,
+      "grad_norm": 0.6914941072463989,
+      "learning_rate": 7.266666666666667e-05,
+      "loss": 0.0641,
+      "step": 110
+    },
+    {
+      "epoch": 0.0970873786407767,
+      "grad_norm": 0.4049356281757355,
+      "learning_rate": 7.933333333333334e-05,
+      "loss": 0.0565,
+      "step": 120
+    },
+    {
+      "epoch": 0.10517799352750809,
+      "grad_norm": 0.7143370509147644,
+      "learning_rate": 8.6e-05,
+      "loss": 0.0555,
+      "step": 130
+    },
+    {
+      "epoch": 0.11326860841423948,
+      "grad_norm": 0.9746547937393188,
+      "learning_rate": 9.266666666666666e-05,
+      "loss": 0.0523,
+      "step": 140
+    },
+    {
+      "epoch": 0.12135922330097088,
+      "grad_norm": 0.6225072145462036,
+      "learning_rate": 9.933333333333334e-05,
+      "loss": 0.0492,
+      "step": 150
+    },
+    {
+      "epoch": 0.12944983818770225,
+      "grad_norm": 0.6568852066993713,
+      "learning_rate": 9.999753945398704e-05,
+      "loss": 0.0478,
+      "step": 160
+    },
+    {
+      "epoch": 0.13754045307443366,
+      "grad_norm": 0.345377653837204,
+      "learning_rate": 9.998903417374228e-05,
+      "loss": 0.0465,
+      "step": 170
+    },
+    {
+      "epoch": 0.14563106796116504,
+      "grad_norm": 0.756610095500946,
+      "learning_rate": 9.997445481536973e-05,
+      "loss": 0.0467,
+      "step": 180
+    },
+    {
+      "epoch": 0.15372168284789645,
+      "grad_norm": 0.494722843170166,
+      "learning_rate": 9.995380315038119e-05,
+      "loss": 0.0407,
+      "step": 190
+    },
+    {
+      "epoch": 0.16181229773462782,
+      "grad_norm": 0.7570058107376099,
+      "learning_rate": 9.99270816881235e-05,
+      "loss": 0.042,
+      "step": 200
+    },
+    {
+      "epoch": 0.16990291262135923,
+      "grad_norm": 0.3776613473892212,
+      "learning_rate": 9.989429367547377e-05,
+      "loss": 0.0429,
+      "step": 210
+    },
+    {
+      "epoch": 0.1779935275080906,
+      "grad_norm": 0.5018635988235474,
+      "learning_rate": 9.985544309644475e-05,
+      "loss": 0.0367,
+      "step": 220
+    },
+    {
+      "epoch": 0.18608414239482202,
+      "grad_norm": 0.3979959487915039,
+      "learning_rate": 9.98105346717008e-05,
+      "loss": 0.0341,
+      "step": 230
+    },
+    {
+      "epoch": 0.1941747572815534,
+      "grad_norm": 0.5319691300392151,
+      "learning_rate": 9.97595738579843e-05,
+      "loss": 0.0347,
+      "step": 240
+    },
+    {
+      "epoch": 0.2022653721682848,
+      "grad_norm": 0.5348716974258423,
+      "learning_rate": 9.970256684745258e-05,
+      "loss": 0.0308,
+      "step": 250
+    },
+    {
+      "epoch": 0.21035598705501618,
+      "grad_norm": 0.6536835432052612,
+      "learning_rate": 9.963952056692549e-05,
+      "loss": 0.0301,
+      "step": 260
+    },
+    {
+      "epoch": 0.21844660194174756,
+      "grad_norm": 0.5518924593925476,
+      "learning_rate": 9.957044267704384e-05,
+      "loss": 0.0364,
+      "step": 270
+    },
+    {
+      "epoch": 0.22653721682847897,
+      "grad_norm": 0.40961554646492004,
+      "learning_rate": 9.949534157133844e-05,
+      "loss": 0.0293,
+      "step": 280
+    },
+    {
+      "epoch": 0.23462783171521034,
+      "grad_norm": 0.7217941880226135,
+      "learning_rate": 9.941422637521035e-05,
+      "loss": 0.0363,
+      "step": 290
+    },
+    {
+      "epoch": 0.24271844660194175,
+      "grad_norm": 0.5057093501091003,
+      "learning_rate": 9.932710694482191e-05,
+      "loss": 0.0295,
+      "step": 300
+    },
+    {
+      "epoch": 0.25080906148867316,
+      "grad_norm": 0.75019770860672,
+      "learning_rate": 9.923399386589933e-05,
+      "loss": 0.033,
+      "step": 310
+    },
+    {
+      "epoch": 0.2588996763754045,
+      "grad_norm": 0.48032259941101074,
+      "learning_rate": 9.913489845244626e-05,
+      "loss": 0.0277,
+      "step": 320
+    },
+    {
+      "epoch": 0.2669902912621359,
+      "grad_norm": 0.5418844223022461,
+      "learning_rate": 9.902983274536912e-05,
+      "loss": 0.0264,
+      "step": 330
+    },
+    {
+      "epoch": 0.2750809061488673,
+      "grad_norm": 0.5400299429893494,
+      "learning_rate": 9.891880951101407e-05,
+      "loss": 0.028,
+      "step": 340
+    },
+    {
+      "epoch": 0.28317152103559873,
+      "grad_norm": 0.7953261733055115,
+      "learning_rate": 9.880184223961573e-05,
+      "loss": 0.0245,
+      "step": 350
+    },
+    {
+      "epoch": 0.2912621359223301,
+      "grad_norm": 0.47858819365501404,
+      "learning_rate": 9.867894514365802e-05,
+      "loss": 0.0272,
+      "step": 360
+    },
+    {
+      "epoch": 0.2993527508090615,
+      "grad_norm": 0.3962852954864502,
+      "learning_rate": 9.855013315614725e-05,
+      "loss": 0.0262,
+      "step": 370
+    },
+    {
+      "epoch": 0.3074433656957929,
+      "grad_norm": 0.840124785900116,
+      "learning_rate": 9.841542192879762e-05,
+      "loss": 0.0249,
+      "step": 380
+    },
+    {
+      "epoch": 0.3155339805825243,
+      "grad_norm": 0.6563257575035095,
+      "learning_rate": 9.82748278301294e-05,
+      "loss": 0.0268,
+      "step": 390
+    },
+    {
+      "epoch": 0.32362459546925565,
+      "grad_norm": 0.4959515929222107,
+      "learning_rate": 9.812836794348004e-05,
+      "loss": 0.0241,
+      "step": 400
+    },
+    {
+      "epoch": 0.33171521035598706,
+      "grad_norm": 0.5108354091644287,
+      "learning_rate": 9.797606006492841e-05,
+      "loss": 0.0329,
+      "step": 410
+    },
+    {
+      "epoch": 0.33980582524271846,
+      "grad_norm": 0.5321593880653381,
+      "learning_rate": 9.781792270113241e-05,
+      "loss": 0.0248,
+      "step": 420
+    },
+    {
+      "epoch": 0.3478964401294498,
+      "grad_norm": 0.7112411856651306,
+      "learning_rate": 9.765397506708023e-05,
+      "loss": 0.0266,
+      "step": 430
+    },
+    {
+      "epoch": 0.3559870550161812,
+      "grad_norm": 0.4580034911632538,
+      "learning_rate": 9.748423708375563e-05,
+      "loss": 0.0228,
+      "step": 440
+    },
+    {
+      "epoch": 0.3640776699029126,
+      "grad_norm": 0.43798476457595825,
+      "learning_rate": 9.730872937571739e-05,
+      "loss": 0.0239,
+      "step": 450
+    },
+    {
+      "epoch": 0.37216828478964403,
+      "grad_norm": 0.5347399711608887,
+      "learning_rate": 9.712747326859315e-05,
+      "loss": 0.0265,
+      "step": 460
+    },
+    {
+      "epoch": 0.3802588996763754,
+      "grad_norm": 0.5635089874267578,
+      "learning_rate": 9.69404907864883e-05,
+      "loss": 0.027,
+      "step": 470
+    },
+    {
+      "epoch": 0.3883495145631068,
+      "grad_norm": 0.3755838871002197,
+      "learning_rate": 9.674780464930979e-05,
+      "loss": 0.0234,
+      "step": 480
+    },
+    {
+      "epoch": 0.3964401294498382,
+      "grad_norm": 0.522113561630249,
+      "learning_rate": 9.654943827000548e-05,
+      "loss": 0.021,
+      "step": 490
+    },
+    {
+      "epoch": 0.4045307443365696,
+      "grad_norm": 0.43958115577697754,
+      "learning_rate": 9.634541575171929e-05,
+      "loss": 0.0214,
+      "step": 500
+    },
+    {
+      "epoch": 0.41262135922330095,
+      "grad_norm": 0.4480895698070526,
+      "learning_rate": 9.613576188486253e-05,
+      "loss": 0.0258,
+      "step": 510
+    },
+    {
+      "epoch": 0.42071197411003236,
+      "grad_norm": 0.410576730966568,
+      "learning_rate": 9.59205021441015e-05,
+      "loss": 0.0226,
+      "step": 520
+    },
+    {
+      "epoch": 0.42880258899676377,
+      "grad_norm": 0.45604780316352844,
+      "learning_rate": 9.569966268526232e-05,
+      "loss": 0.0256,
+      "step": 530
+    },
+    {
+      "epoch": 0.4368932038834951,
+      "grad_norm": 0.30307430028915405,
+      "learning_rate": 9.54732703421526e-05,
+      "loss": 0.0204,
+      "step": 540
+    },
+    {
+      "epoch": 0.4449838187702265,
+      "grad_norm": 0.29722708463668823,
+      "learning_rate": 9.524135262330098e-05,
+      "loss": 0.0198,
+      "step": 550
+    },
+    {
+      "epoch": 0.45307443365695793,
+      "grad_norm": 0.38580235838890076,
+      "learning_rate": 9.50039377086147e-05,
+      "loss": 0.0168,
+      "step": 560
+    },
+    {
+      "epoch": 0.46116504854368934,
+      "grad_norm": 0.39507967233657837,
+      "learning_rate": 9.476105444595534e-05,
+      "loss": 0.0157,
+      "step": 570
+    },
+    {
+      "epoch": 0.4692556634304207,
+      "grad_norm": 0.34573355317115784,
+      "learning_rate": 9.451273234763371e-05,
+      "loss": 0.0176,
+      "step": 580
+    },
+    {
+      "epoch": 0.4773462783171521,
+      "grad_norm": 0.2983342111110687,
+      "learning_rate": 9.425900158682385e-05,
+      "loss": 0.0177,
+      "step": 590
+    },
+    {
+      "epoch": 0.4854368932038835,
+      "grad_norm": 0.38746461272239685,
+      "learning_rate": 9.399989299389661e-05,
+      "loss": 0.0216,
+      "step": 600
+    },
+    {
+      "epoch": 0.4935275080906149,
+      "grad_norm": 0.39340198040008545,
+      "learning_rate": 9.373543805267368e-05,
+      "loss": 0.0221,
+      "step": 610
+    },
+    {
+      "epoch": 0.5016181229773463,
+      "grad_norm": 0.47980770468711853,
+      "learning_rate": 9.346566889660193e-05,
+      "loss": 0.0172,
+      "step": 620
+    },
+    {
+      "epoch": 0.5097087378640777,
+      "grad_norm": 0.421115905046463,
+      "learning_rate": 9.319061830484898e-05,
+      "loss": 0.0156,
+      "step": 630
+    },
+    {
+      "epoch": 0.517799352750809,
+      "grad_norm": 0.3385259509086609,
+      "learning_rate": 9.291031969832026e-05,
+      "loss": 0.0176,
+      "step": 640
+    },
+    {
+      "epoch": 0.5258899676375405,
+      "grad_norm": 0.32277145981788635,
+      "learning_rate": 9.262480713559808e-05,
+      "loss": 0.0169,
+      "step": 650
+    },
+    {
+      "epoch": 0.5339805825242718,
+      "grad_norm": 0.32181084156036377,
+      "learning_rate": 9.233411530880326e-05,
+      "loss": 0.0187,
+      "step": 660
+    },
+    {
+      "epoch": 0.5420711974110033,
+      "grad_norm": 0.5838663578033447,
+      "learning_rate": 9.20382795393797e-05,
+      "loss": 0.0228,
+      "step": 670
+    },
+    {
+      "epoch": 0.5501618122977346,
+      "grad_norm": 0.30314013361930847,
+      "learning_rate": 9.173733577380258e-05,
+      "loss": 0.02,
+      "step": 680
+    },
+    {
+      "epoch": 0.558252427184466,
+      "grad_norm": 0.5964832901954651,
+      "learning_rate": 9.143132057921058e-05,
+      "loss": 0.018,
+      "step": 690
+    },
+    {
+      "epoch": 0.5663430420711975,
+      "grad_norm": 0.4126530587673187,
+      "learning_rate": 9.112027113896262e-05,
+      "loss": 0.0173,
+      "step": 700
+    },
+    {
+      "epoch": 0.5744336569579288,
+      "grad_norm": 0.4253070652484894,
+      "learning_rate": 9.080422524811982e-05,
+      "loss": 0.0257,
+      "step": 710
+    },
+    {
+      "epoch": 0.5825242718446602,
+      "grad_norm": 0.5417248606681824,
+      "learning_rate": 9.048322130885305e-05,
+      "loss": 0.0177,
+      "step": 720
+    },
+    {
+      "epoch": 0.5906148867313916,
+      "grad_norm": 0.3459491431713104,
+      "learning_rate": 9.015729832577681e-05,
+      "loss": 0.019,
+      "step": 730
+    },
+    {
+      "epoch": 0.598705501618123,
+      "grad_norm": 0.3335317373275757,
+      "learning_rate": 8.982649590120982e-05,
+      "loss": 0.0169,
+      "step": 740
+    },
+    {
+      "epoch": 0.6067961165048543,
+      "grad_norm": 0.6572862267494202,
+      "learning_rate": 8.949085423036296e-05,
+      "loss": 0.0198,
+      "step": 750
+    },
+    {
+      "epoch": 0.6148867313915858,
+      "grad_norm": 0.540212869644165,
+      "learning_rate": 8.91504140964553e-05,
+      "loss": 0.016,
+      "step": 760
+    },
+    {
+      "epoch": 0.6229773462783171,
+      "grad_norm": 0.24550016224384308,
+      "learning_rate": 8.880521686575857e-05,
+      "loss": 0.0168,
+      "step": 770
+    },
+    {
+      "epoch": 0.6310679611650486,
+      "grad_norm": 0.5790821313858032,
+      "learning_rate": 8.845530448257085e-05,
+      "loss": 0.0184,
+      "step": 780
+    },
+    {
+      "epoch": 0.63915857605178,
+      "grad_norm": 0.6583966612815857,
+      "learning_rate": 8.810071946411989e-05,
+      "loss": 0.0167,
+      "step": 790
+    },
+    {
+      "epoch": 0.6472491909385113,
+      "grad_norm": 0.3252386450767517,
+      "learning_rate": 8.774150489539707e-05,
+      "loss": 0.0173,
+      "step": 800
+    },
+    {
+      "epoch": 0.6553398058252428,
+      "grad_norm": 0.37483498454093933,
+      "learning_rate": 8.737770442392212e-05,
+      "loss": 0.0169,
+      "step": 810
+    },
+    {
+      "epoch": 0.6634304207119741,
+      "grad_norm": 0.29752904176712036,
+      "learning_rate": 8.700936225443959e-05,
+      "loss": 0.018,
+      "step": 820
+    },
+    {
+      "epoch": 0.6715210355987055,
+      "grad_norm": 0.37370291352272034,
+      "learning_rate": 8.663652314354765e-05,
+      "loss": 0.0178,
+      "step": 830
+    },
+    {
+      "epoch": 0.6796116504854369,
+      "grad_norm": 0.24446973204612732,
+      "learning_rate": 8.625923239425978e-05,
+      "loss": 0.0132,
+      "step": 840
+    },
+    {
+      "epoch": 0.6877022653721683,
+      "grad_norm": 0.45145106315612793,
+      "learning_rate": 8.587753585050004e-05,
+      "loss": 0.0196,
+      "step": 850
+    },
+    {
+      "epoch": 0.6957928802588996,
+      "grad_norm": 0.24976196885108948,
+      "learning_rate": 8.549147989153276e-05,
+      "loss": 0.017,
+      "step": 860
+    },
+    {
+      "epoch": 0.7038834951456311,
+      "grad_norm": 0.41941019892692566,
+      "learning_rate": 8.510111142632698e-05,
+      "loss": 0.0153,
+      "step": 870
+    },
+    {
+      "epoch": 0.7119741100323624,
+      "grad_norm": 0.3423904776573181,
+      "learning_rate": 8.470647788785665e-05,
+      "loss": 0.0143,
+      "step": 880
+    },
+    {
+      "epoch": 0.7200647249190939,
+      "grad_norm": 0.28540822863578796,
+      "learning_rate": 8.430762722733714e-05,
+      "loss": 0.0125,
+      "step": 890
+    },
+    {
+      "epoch": 0.7281553398058253,
+      "grad_norm": 0.38557255268096924,
+      "learning_rate": 8.390460790839882e-05,
+      "loss": 0.0193,
+      "step": 900
+    },
+    {
+      "epoch": 0.7362459546925566,
+      "grad_norm": 0.496142715215683,
+      "learning_rate": 8.349746890119826e-05,
+      "loss": 0.0137,
+      "step": 910
+    },
+    {
+      "epoch": 0.7443365695792881,
+      "grad_norm": 0.42488303780555725,
+      "learning_rate": 8.308625967646795e-05,
+      "loss": 0.0159,
+      "step": 920
+    },
+    {
+      "epoch": 0.7524271844660194,
+      "grad_norm": 0.28551360964775085,
+      "learning_rate": 8.267103019950529e-05,
+      "loss": 0.0155,
+      "step": 930
+    },
+    {
+      "epoch": 0.7605177993527508,
+      "grad_norm": 0.3401723802089691,
+      "learning_rate": 8.225183092410128e-05,
+      "loss": 0.0133,
+      "step": 940
+    },
+    {
+      "epoch": 0.7686084142394822,
+      "grad_norm": 0.34012413024902344,
+      "learning_rate": 8.182871278641009e-05,
+      "loss": 0.0191,
+      "step": 950
+    },
+    {
+      "epoch": 0.7766990291262136,
+      "grad_norm": 0.385560005903244,
+      "learning_rate": 8.140172719875979e-05,
+      "loss": 0.0108,
+      "step": 960
+    },
+    {
+      "epoch": 0.7847896440129449,
+      "grad_norm": 0.3427627980709076,
+      "learning_rate": 8.097092604340542e-05,
+      "loss": 0.0121,
+      "step": 970
+    },
+    {
+      "epoch": 0.7928802588996764,
+      "grad_norm": 0.45653820037841797,
+      "learning_rate": 8.053636166622476e-05,
+      "loss": 0.0154,
+      "step": 980
+    },
+    {
+      "epoch": 0.8009708737864077,
+      "grad_norm": 0.3446105122566223,
+      "learning_rate": 8.009808687035798e-05,
+      "loss": 0.0171,
+      "step": 990
+    },
+    {
+      "epoch": 0.8090614886731392,
+      "grad_norm": 0.25365254282951355,
+      "learning_rate": 7.965615490979163e-05,
+      "loss": 0.014,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 3000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}