impossibleexchange commited on
Commit
c634f22
1 Parent(s): 9b1f91d

Upload training_config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yml +82 -0
training_config.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ _component_: models.lora_mmllama3_8b
3
+ lora_attn_modules:
4
+ - q_proj
5
+ - v_proj
6
+ apply_lora_to_mlp: false
7
+ apply_lora_to_output: false
8
+ lora_rank: 8
9
+ lora_alpha: 16
10
+ perception_tokens: 2
11
+ use_clip: false
12
+ tokenizer:
13
+ _component_: models.a2a_tokenizer
14
+ path: checkpoints/Meta-Llama-3-8B-Instruct/tokenizer.model
15
+ checkpointer:
16
+ _component_: torchtune.utils.FullModelMetaCheckpointer
17
+ checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/
18
+ checkpoint_files:
19
+ - consolidated.00.pth
20
+ adapter_checkpoint: null
21
+ recipe_checkpoint: null
22
+ output_dir: output_checkpoints/experiment_1
23
+ model_type: LLAMA3
24
+ resume_from_checkpoint: false
25
+ interim_checkpoint_steps: 5000
26
+ interim_gen_steps: null
27
+ max_new_tokens: 100
28
+ temperature: 0.8
29
+ top_k: 300
30
+ dataset:
31
+ _component_: ds.EvenBatcher
32
+ dataset:
33
+ _component_: ds.RoundRobinDataset
34
+ datasets:
35
+ - _component_: ds.OmegaVideoCaptionDataset
36
+ length: 500000
37
+ - _component_: ds.LlavaInstructDataset
38
+ dataset_path: ds/coco_llava_instruct/output.parquet
39
+ train_on_input: false
40
+ - _component_: ds.LlavaInstructDataset
41
+ dataset_path: ds/vision_flan/output.parquet
42
+ train_on_input: false
43
+ - _component_: ds.CaptionInstructDataset
44
+ dataset_path: ds/sam_llava/output.parquet
45
+ train_on_input: false
46
+ seed: null
47
+ shuffle: true
48
+ batch_size: 2
49
+ optimizer:
50
+ _component_: torch.optim.AdamW
51
+ weight_decay: 0.01
52
+ lr: 3.0e-06
53
+ lr_scheduler:
54
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
55
+ num_warmup_steps: 100
56
+ loss:
57
+ _component_: torch.nn.CrossEntropyLoss
58
+ epochs: 1
59
+ max_steps_per_epoch: null
60
+ gradient_accumulation_steps: 32
61
+ compile: false
62
+ output_dir: /home/user/omegalabs-anytoany-bittensor/modelss
63
+ metric_logger:
64
+ _component_: torchtune.utils.metric_logging.DiskLogger
65
+ log_dir: ${output_dir}
66
+ log_every_n_steps: null
67
+ device: cuda
68
+ dtype: bf16
69
+ enable_activation_checkpointing: false
70
+ profiler:
71
+ _component_: torchtune.utils.profiler
72
+ enabled: false
73
+ inference:
74
+ prompt_template: 'Video:
75
+
76
+ {video}
77
+
78
+ Caption the previous video.'
79
+ max_new_tokens: 300
80
+ temperature: 0.8
81
+ top_k: 300
82
+ quantizer: null