Add files using upload-large-folder tool
Browse files- .gitattributes +12 -0
- checkpoints/0000005000/.metadata +0 -0
- checkpoints/0000005000/__0_0.distcp +3 -0
- checkpoints/0000005000/__1_0.distcp +3 -0
- checkpoints/0000005000/__2_0.distcp +3 -0
- checkpoints/0000005000/__3_0.distcp +3 -0
- checkpoints/0000005000/params.json +1 -0
- checkpoints/0000005000/train_state_00000.json +1 -0
- checkpoints/0000005000/train_state_00001.json +1 -0
- checkpoints/0000005000/train_state_00002.json +1 -0
- checkpoints/0000005000/train_state_00003.json +1 -0
- checkpoints/0000007500/.metadata +0 -0
- checkpoints/0000007500/__0_0.distcp +3 -0
- checkpoints/0000007500/__1_0.distcp +3 -0
- checkpoints/0000007500/__2_0.distcp +3 -0
- checkpoints/0000007500/__3_0.distcp +3 -0
- checkpoints/0000007500/params.json +1 -0
- checkpoints/0000007500/train_state_00000.json +1 -0
- checkpoints/0000007500/train_state_00001.json +1 -0
- checkpoints/0000007500/train_state_00002.json +1 -0
- checkpoints/0000007500/train_state_00003.json +1 -0
- checkpoints/0000010000/.metadata +0 -0
- checkpoints/0000010000/__0_0.distcp +3 -0
- checkpoints/0000010000/__1_0.distcp +3 -0
- checkpoints/0000010000/__2_0.distcp +3 -0
- checkpoints/0000010000/__3_0.distcp +3 -0
- checkpoints/0000010000/params.json +1 -0
- checkpoints/0000010000/train_state_00000.json +1 -0
- checkpoints/0000010000/train_state_00001.json +1 -0
- checkpoints/0000010000/train_state_00002.json +1 -0
- checkpoints/0000010000/train_state_00003.json +1 -0
- config.yaml +127 -0
- metrics.jsonl +0 -0
- profiling/memory_trace_plot/000000_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_5045.html +0 -0
- profiling/memory_trace_plot/000000_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_5046.html +0 -0
- profiling/memory_trace_plot/000000_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_5047.html +0 -0
- profiling/memory_trace_plot/000000_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_5048.html +0 -0
- profiling/memory_trace_plot/000004_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156949.html +0 -0
- profiling/memory_trace_plot/000004_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156950.html +0 -0
- profiling/memory_trace_plot/000004_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156951.html +0 -0
- profiling/memory_trace_plot/000004_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156952.html +0 -0
- profiling/profile_CPU_CUDA_000104/stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156949.1736859652960604292.pt.trace.json.gz +3 -0
- profiling/profile_CPU_CUDA_000104/stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156950.1736859652960425115.pt.trace.json.gz +3 -0
- profiling/profile_CPU_CUDA_000104/stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156951.1736859652960391492.pt.trace.json.gz +3 -0
- profiling/profile_CPU_CUDA_000104/stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156952.1736859652964530446.pt.trace.json.gz +3 -0
- train.log +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
checkpoints/0000005000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoints/0000010000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
38 |
+
checkpoints/0000005000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
39 |
+
checkpoints/0000010000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
40 |
+
checkpoints/0000005000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
41 |
+
checkpoints/0000007500/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
42 |
+
checkpoints/0000007500/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
43 |
+
checkpoints/0000010000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
44 |
+
checkpoints/0000005000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
45 |
+
checkpoints/0000010000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
46 |
+
checkpoints/0000007500/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
47 |
+
checkpoints/0000007500/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
checkpoints/0000005000/.metadata
ADDED
Binary file (891 kB). View file
|
|
checkpoints/0000005000/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f04e232dab8ad35920d52d9281d96102858fefc41014af8f4781d9c3bf786b9a
|
3 |
+
size 6269795820
|
checkpoints/0000005000/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b33d3703bb4fbb653ac6f716667024a1bfd562b7284d0b55864ffd44529ae5e6
|
3 |
+
size 6269918456
|
checkpoints/0000005000/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:821751b288c26e511657b0e8c1fdd55b17d7d3658939b380d3cf3a5b1177a1b5
|
3 |
+
size 6269918456
|
checkpoints/0000005000/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a9d1d460937073c8fab5ceca4dbf980a29637b92d3a93fe7c54c0c60d14262e
|
3 |
+
size 6269929976
|
checkpoints/0000005000/params.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"name": "large_lm", "dump_dir": "./dump_dir_llama1b2-mla-nope", "seed": 777, "grad_acc_steps": 4, "gc_collect_freq": 1000, "probe_freq": null, "steps": 60000, "data": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}}, "optim": {"lr": 0.003, "weight_decay": 0.033, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 5000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": 128, "n_heads": 48, "n_kv_heads": 48, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "rope_type": "none", "rope_inv_freq_learnable": false, "max_seqlen": 4096, "use_mla": "simple", "q_lora_rank": 1536, "kv_lora_rank": 512, "seed": 42, "vocab_size": 100512, "weight_tying": false, "sliding_window": null}, "distributed": {"dp_shard": 1, "dp_replicate": 4, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": true, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 2500, "keep": 3}, "eval": {"every": 5000000000, "keep": -1}, "path": "dump_dir_llama1b2-mla-nope/checkpoints", "init_ckpt_path": null, "continue_training_from_init": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 1, "eval": {"harness": {"tasks": ["hellaswag", {"task": "boolq", "dataset_kwargs": {"trust_remote_code": true}}, "piqa", {"task": "social_iqa", "dataset_kwargs": {"trust_remote_code": true}}, "winogrande", "openbookqa", "arc_easy", "arc_challenge", "race", "commonsense_qa", "copa"]}, "validation": {"max_steps": 1000}, "generator": {"max_tokens": 16384, "dtype": "bf16"}}}
|
checkpoints/0000005000/train_state_00000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 5000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 150, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 6607731770, "block_size": 4, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 116957465294829441358728163251172057088, "inc": 11676600559890430755450356507027720041}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 544, "rng_state": {"bit_generator": "PCG64", "state": {"state": 217372614558858270047326785110483183256, "inc": 77357518920597472829800677777012462921}, "has_uint32": 0, "uinteger": 1261801782}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 5000, "verbose": false, "_step_count": 5001, "_get_lr_called_within_step": false, "_last_lr": [0.003], "lr_lambdas": [{}]}}
|
checkpoints/0000005000/train_state_00001.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 5000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 3900, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 6640187291, "block_size": 4, "offset": 1, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 10739271888206257284104416304912590255, "inc": 239634081480473411747239400828488620799}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 544, "rng_state": {"bit_generator": "PCG64", "state": {"state": 111971710528476317519385300620568727400, "inc": 270234035871729269002159329014059236425}, "has_uint32": 0, "uinteger": 3738072740}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 5000, "verbose": false, "_step_count": 5001, "_get_lr_called_within_step": false, "_last_lr": [0.003], "lr_lambdas": [{}]}}
|
checkpoints/0000005000/train_state_00002.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 5000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 999, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 6616873351, "block_size": 4, "offset": 2, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 80307960852030799535988801098747157249, "inc": 6027823433652931085739778990793808165}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 544, "rng_state": {"bit_generator": "PCG64", "state": {"state": 193255143898955324632868240168082594757, "inc": 188564971970541749319992297790591572713}, "has_uint32": 1, "uinteger": 2493352001}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 5000, "verbose": false, "_step_count": 5001, "_get_lr_called_within_step": false, "_last_lr": [0.003], "lr_lambdas": [{}]}}
|
checkpoints/0000005000/train_state_00003.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 5000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 2972, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 6613650866, "block_size": 4, "offset": 3, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 2275586677364107908403719337356115310, "inc": 92941856108932518968286621281627530405}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 544, "rng_state": {"bit_generator": "PCG64", "state": {"state": 330904797009784483495043111624937677362, "inc": 66050176413739185524746886687120723265}, "has_uint32": 1, "uinteger": 361610763}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 5000, "verbose": false, "_step_count": 5001, "_get_lr_called_within_step": false, "_last_lr": [0.003], "lr_lambdas": [{}]}}
|
checkpoints/0000007500/.metadata
ADDED
Binary file (891 kB). View file
|
|
checkpoints/0000007500/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc9b1f2a51135638f00b6505fb654c8f3bb8bf31f1f68959a39b91c858e90f39
|
3 |
+
size 6269795820
|
checkpoints/0000007500/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4b35ad9f58b8b95a964b8202800a1cf5b480e7614541910e17683f01d762bab
|
3 |
+
size 6269918456
|
checkpoints/0000007500/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2663a7b82c2e3ec9a11ced46eac2183e2dd0b4d9873f34177bc713a004ab20a
|
3 |
+
size 6269918456
|
checkpoints/0000007500/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9be209c798c7b05fecbc653ae6eb309903eb6b696953e9a5db5c0024fdfdf29b
|
3 |
+
size 6269929976
|
checkpoints/0000007500/params.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"name": "large_lm", "dump_dir": "./dump_dir_llama1b2-mla-nope", "seed": 777, "grad_acc_steps": 4, "gc_collect_freq": 1000, "probe_freq": null, "steps": 60000, "data": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}}, "optim": {"lr": 0.003, "weight_decay": 0.033, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 5000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": 128, "n_heads": 48, "n_kv_heads": 48, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "rope_type": "none", "rope_inv_freq_learnable": false, "max_seqlen": 4096, "use_mla": "simple", "q_lora_rank": 1536, "kv_lora_rank": 512, "seed": 42, "vocab_size": 100512, "weight_tying": false, "sliding_window": null}, "distributed": {"dp_shard": 1, "dp_replicate": 4, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": true, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 2500, "keep": 3}, "eval": {"every": 5000000000, "keep": -1}, "path": "dump_dir_llama1b2-mla-nope/checkpoints", "init_ckpt_path": null, "continue_training_from_init": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 1, "eval": {"harness": {"tasks": ["hellaswag", {"task": "boolq", "dataset_kwargs": {"trust_remote_code": true}}, "piqa", {"task": "social_iqa", "dataset_kwargs": {"trust_remote_code": true}}, "winogrande", "openbookqa", "arc_easy", "arc_challenge", "race", "commonsense_qa", "copa"]}, "validation": {"max_steps": 1000}, "generator": {"max_tokens": 16384, "dtype": "bf16"}}}
|
checkpoints/0000007500/train_state_00000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 7500, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 69, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 10113618055, "block_size": 4, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 250961530461244709219974906902314560064, "inc": 11676600559890430755450356507027720041}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 304, "rng_state": {"bit_generator": "PCG64", "state": {"state": 257249518588449700085006402877116505719, "inc": 77357518920597472829800677777012462921}, "has_uint32": 1, "uinteger": 1510510048}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 7500, "verbose": false, "_step_count": 7501, "_get_lr_called_within_step": false, "_last_lr": [0.0029847321780892364], "lr_lambdas": [{}]}}
|
checkpoints/0000007500/train_state_00001.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 7500, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 1117, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 10110900930, "block_size": 4, "offset": 1, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 229638605646274825952038691562990166134, "inc": 239634081480473411747239400828488620799}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 304, "rng_state": {"bit_generator": "PCG64", "state": {"state": 63238839380895823977613874428622884291, "inc": 270234035871729269002159329014059236425}, "has_uint32": 0, "uinteger": 1153491855}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 7500, "verbose": false, "_step_count": 7501, "_get_lr_called_within_step": false, "_last_lr": [0.0029847321780892364], "lr_lambdas": [{}]}}
|
checkpoints/0000007500/train_state_00002.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 7500, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 2154, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 10109878234, "block_size": 4, "offset": 2, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 335974197860280248009091718238776495483, "inc": 6027823433652931085739778990793808165}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 304, "rng_state": {"bit_generator": "PCG64", "state": {"state": 86291758075527494784795753374413108651, "inc": 188564971970541749319992297790591572713}, "has_uint32": 1, "uinteger": 3034547316}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 7500, "verbose": false, "_step_count": 7501, "_get_lr_called_within_step": false, "_last_lr": [0.0029847321780892364], "lr_lambdas": [{}]}}
|
checkpoints/0000007500/train_state_00003.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 7500, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 9848, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 10080780840, "block_size": 4, "offset": 3, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 67546572180903625206881038198239593294, "inc": 92941856108932518968286621281627530405}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 304, "rng_state": {"bit_generator": "PCG64", "state": {"state": 201293876700324822339716126358743939011, "inc": 66050176413739185524746886687120723265}, "has_uint32": 1, "uinteger": 410357184}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 7500, "verbose": false, "_step_count": 7501, "_get_lr_called_within_step": false, "_last_lr": [0.0029847321780892364], "lr_lambdas": [{}]}}
|
checkpoints/0000010000/.metadata
ADDED
Binary file (891 kB). View file
|
|
checkpoints/0000010000/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e51b498d78fdc42d234d10e33ede78ba528350546e6371c5d61d2d961e8fa7a0
|
3 |
+
size 6269795820
|
checkpoints/0000010000/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db3e5ca1672564a8fad74fb7fcbd33bf200162a38da28de7820c479a8b3e4fcc
|
3 |
+
size 6269918456
|
checkpoints/0000010000/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cece731ea37a974e99b7cc90ae512c9fe6a718cfd546ed0be1e3aecf4ea2d3f7
|
3 |
+
size 6269918456
|
checkpoints/0000010000/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c54070cf2638773c427db986f82d950f334a078657dc81c1f2b6fe401533cfbe
|
3 |
+
size 6269929976
|
checkpoints/0000010000/params.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"name": "large_lm", "dump_dir": "./dump_dir_llama1b2-mla-nope", "seed": 777, "grad_acc_steps": 4, "gc_collect_freq": 1000, "probe_freq": null, "steps": 60000, "data": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}}, "optim": {"lr": 0.003, "weight_decay": 0.033, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 5000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": 128, "n_heads": 48, "n_kv_heads": 48, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "rope_type": "none", "rope_inv_freq_learnable": false, "max_seqlen": 4096, "use_mla": "simple", "q_lora_rank": 1536, "kv_lora_rank": 512, "seed": 42, "vocab_size": 100512, "weight_tying": false, "sliding_window": null}, "distributed": {"dp_shard": 1, "dp_replicate": 4, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": true, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 2500, "keep": 3}, "eval": {"every": 5000000000, "keep": -1}, "path": "dump_dir_llama1b2-mla-nope/checkpoints", "init_ckpt_path": null, "continue_training_from_init": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 1, "eval": {"harness": {"tasks": ["hellaswag", {"task": "boolq", "dataset_kwargs": {"trust_remote_code": true}}, "piqa", {"task": "social_iqa", "dataset_kwargs": {"trust_remote_code": true}}, "winogrande", "openbookqa", "arc_easy", "arc_challenge", "race", "commonsense_qa", "copa"]}, "validation": {"max_steps": 1000}, "generator": {"max_tokens": 16384, "dtype": "bf16"}}}
|
checkpoints/0000010000/train_state_00000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 10000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 1111, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 13581762433, "block_size": 4, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 256691589743441607857337604322035445481, "inc": 11676600559890430755450356507027720041}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 64, "rng_state": {"bit_generator": "PCG64", "state": {"state": 148518481751991550208943711063121591411, "inc": 77357518920597472829800677777012462921}, "has_uint32": 0, "uinteger": 2361302826}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 10000, "verbose": false, "_step_count": 10001, "_get_lr_called_within_step": false, "_last_lr": [0.002939239521182286], "lr_lambdas": [{}]}}
|
checkpoints/0000010000/train_state_00001.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 10000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 148, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 13619349703, "block_size": 4, "offset": 1, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 84761814812092392884047079743976048654, "inc": 239634081480473411747239400828488620799}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 64, "rng_state": {"bit_generator": "PCG64", "state": {"state": 199702575982534398233040729877985002113, "inc": 270234035871729269002159329014059236425}, "has_uint32": 0, "uinteger": 3945260749}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 10000, "verbose": false, "_step_count": 10001, "_get_lr_called_within_step": false, "_last_lr": [0.002939239521182286], "lr_lambdas": [{}]}}
|
checkpoints/0000010000/train_state_00002.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 10000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 363, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 13575510997, "block_size": 4, "offset": 2, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 283708652666995279554761018309190231517, "inc": 6027823433652931085739778990793808165}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 64, "rng_state": {"bit_generator": "PCG64", "state": {"state": 67640385783639426033058747703376541531, "inc": 188564971970541749319992297790591572713}, "has_uint32": 1, "uinteger": 73894754}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 10000, "verbose": false, "_step_count": 10001, "_get_lr_called_within_step": false, "_last_lr": [0.002939239521182286], "lr_lambdas": [{}]}}
|
checkpoints/0000010000/train_state_00003.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 10000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 153, "it_state": {"it_state": {"root_dir": "./data", "sources": {"fineweb_edu_10bt_shuffled": 100.0}, "source_to_state": {"fineweb_edu_10bt_shuffled": {"file_path": "data/fineweb_edu_10bt_shuffled/fineweb_edu_10bt.chunk.00.jsonl", "position": 13581140201, "block_size": 4, "offset": 3, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 40064333776320419906719568341894502871, "inc": 92941856108932518968286621281627530405}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "tiktoken", "path": "tokenizers/cl100k_base.tiktoken"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 64, "rng_state": {"bit_generator": "PCG64", "state": {"state": 263127327191645331890859686374146161995, "inc": 66050176413739185524746886687120723265}, "has_uint32": 0, "uinteger": 4225641473}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.003], "last_epoch": 10000, "verbose": false, "_step_count": 10001, "_get_lr_called_within_step": false, "_last_lr": [0.002939239521182286], "lr_lambdas": [{}]}}
|
config.yaml
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: large_lm
|
2 |
+
dump_dir: ./dump_dir_llama1b2-mla-nope
|
3 |
+
seed: 777
|
4 |
+
grad_acc_steps: 4
|
5 |
+
gc_collect_freq: 1000
|
6 |
+
probe_freq: null
|
7 |
+
steps: 60000
|
8 |
+
data:
|
9 |
+
root_dir: ./data
|
10 |
+
sources:
|
11 |
+
fineweb_edu_10bt_shuffled: 100.0
|
12 |
+
batch_size: 4
|
13 |
+
seq_len: 4096
|
14 |
+
n_views: 2
|
15 |
+
seed: 42
|
16 |
+
add_bos: true
|
17 |
+
add_eos: true
|
18 |
+
load_async: true
|
19 |
+
prefetch_size: 1024
|
20 |
+
tokenizer:
|
21 |
+
name: tiktoken
|
22 |
+
path: tokenizers/cl100k_base.tiktoken
|
23 |
+
optim:
|
24 |
+
lr: 0.003
|
25 |
+
weight_decay: 0.033
|
26 |
+
epsilon: 1.0e-08
|
27 |
+
beta1: 0.9
|
28 |
+
beta2: 0.95
|
29 |
+
clip: 1.0
|
30 |
+
scheduler: cosine
|
31 |
+
warmup: 5000
|
32 |
+
lr_min_ratio: 1.0e-06
|
33 |
+
cycle_length: 1.0
|
34 |
+
cosine_theta: 1.0
|
35 |
+
annealing_step: 1000
|
36 |
+
decay_fraction: 0.1
|
37 |
+
exp_factor: 0.5
|
38 |
+
model:
|
39 |
+
dim: 2048
|
40 |
+
n_layers: 25
|
41 |
+
head_dim: 128
|
42 |
+
n_heads: 48
|
43 |
+
n_kv_heads: 48
|
44 |
+
ffn_dim_multiplier: null
|
45 |
+
multiple_of: 256
|
46 |
+
norm_eps: 1.0e-05
|
47 |
+
rope_theta: 10000.0
|
48 |
+
init_base_std: null
|
49 |
+
init_std_factor: disabled
|
50 |
+
rope_type: none
|
51 |
+
rope_inv_freq_learnable: false
|
52 |
+
max_seqlen: 4096
|
53 |
+
use_mla: simple
|
54 |
+
q_lora_rank: 1536
|
55 |
+
kv_lora_rank: 512
|
56 |
+
seed: 42
|
57 |
+
vocab_size: 100512
|
58 |
+
weight_tying: false
|
59 |
+
sliding_window: null
|
60 |
+
distributed:
|
61 |
+
dp_shard: 1
|
62 |
+
dp_replicate: 4
|
63 |
+
tp_size: 1
|
64 |
+
selective_activation_checkpointing: false
|
65 |
+
compile: true
|
66 |
+
fsdp_type: full_shard
|
67 |
+
model_dtype: bf16
|
68 |
+
float8_recipe: null
|
69 |
+
float8_filter: layers\.[0-9]+\.
|
70 |
+
matmul_allow_tf32: true
|
71 |
+
detect_anomaly: false
|
72 |
+
compile_cache_size_limit: 8
|
73 |
+
spawn_method: forkserver
|
74 |
+
env:
|
75 |
+
MKL_SERVICE_FORCE_INTEL: GNU
|
76 |
+
OMP_NUM_THREADS: '1'
|
77 |
+
MKL_NUM_THREADS: '1'
|
78 |
+
ENABLE_INTRA_NODE_COMM: '1'
|
79 |
+
TORCH_NCCL_AVOID_RECORD_STREAMS: '1'
|
80 |
+
NCCL_IB_TIMEOUT: '22'
|
81 |
+
NCCL_DEBUG: INFO
|
82 |
+
TORCH_NCCL_ASYNC_ERROR_HANDLING: '1'
|
83 |
+
checkpoint:
|
84 |
+
dump:
|
85 |
+
every: 2500
|
86 |
+
keep: 3
|
87 |
+
eval:
|
88 |
+
every: 5000000000
|
89 |
+
keep: -1
|
90 |
+
path: dump_dir_llama1b2-mla-nope/checkpoints
|
91 |
+
init_ckpt_path: null
|
92 |
+
continue_training_from_init: false
|
93 |
+
profiling:
|
94 |
+
run: true
|
95 |
+
trace_folder: profiling
|
96 |
+
mem_warmup: 0
|
97 |
+
mem_steps: 4
|
98 |
+
profile_warmup: 100
|
99 |
+
profile_steps: 4
|
100 |
+
logging:
|
101 |
+
freq: 1
|
102 |
+
acc_freq: null
|
103 |
+
wandb: null
|
104 |
+
async_eval_gpus: 1
|
105 |
+
eval:
|
106 |
+
harness:
|
107 |
+
tasks:
|
108 |
+
- hellaswag
|
109 |
+
- task: boolq
|
110 |
+
dataset_kwargs:
|
111 |
+
trust_remote_code: true
|
112 |
+
- piqa
|
113 |
+
- task: social_iqa
|
114 |
+
dataset_kwargs:
|
115 |
+
trust_remote_code: true
|
116 |
+
- winogrande
|
117 |
+
- openbookqa
|
118 |
+
- arc_easy
|
119 |
+
- arc_challenge
|
120 |
+
- race
|
121 |
+
- commonsense_qa
|
122 |
+
- copa
|
123 |
+
validation:
|
124 |
+
max_steps: 1000
|
125 |
+
generator:
|
126 |
+
max_tokens: 16384
|
127 |
+
dtype: bf16
|
metrics.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/memory_trace_plot/000000_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_5045.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/memory_trace_plot/000000_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_5046.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/memory_trace_plot/000000_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_5047.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/memory_trace_plot/000000_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_5048.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/memory_trace_plot/000004_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156949.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/memory_trace_plot/000004_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156950.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/memory_trace_plot/000004_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156951.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/memory_trace_plot/000004_stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156952.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
profiling/profile_CPU_CUDA_000104/stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156949.1736859652960604292.pt.trace.json.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48636fd5ce1fb4c20a4c33e77b3d771d1cb373015a634539207308672f2327ef
|
3 |
+
size 2413137
|
profiling/profile_CPU_CUDA_000104/stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156950.1736859652960425115.pt.trace.json.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19c1c1a55c1624e83604084ef81783d721d561e16e2cbee4769e74e7042ca61d
|
3 |
+
size 2422313
|
profiling/profile_CPU_CUDA_000104/stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156951.1736859652960391492.pt.trace.json.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc7a3c9dfdba69b1eb2d5ed081b8eab35cc98fba7c905c9a811a87dfc1d469f2
|
3 |
+
size 2422892
|
profiling/profile_CPU_CUDA_000104/stable-diffusion-xl-dev-2-retina-newsroom-gpu-v3-85f5d97fdc6s78_156952.1736859652964530446.pt.trace.json.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0aea693c2a58039ee01660bb86b6e019f148ac66d8dc15de86739a3e6444fc7
|
3 |
+
size 2421057
|
train.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|