End of training
Browse files
README.md
CHANGED
@@ -34,8 +34,8 @@ datasets:
|
|
34 |
dataset_prepared_path: FinUpTagsNoTestNoExNewCodeLlama
|
35 |
val_set_size: 0
|
36 |
output_dir: models/codellama34bTestL4
|
37 |
-
|
38 |
-
|
39 |
sequence_len: 4096
|
40 |
sample_packing: true
|
41 |
pad_to_sequence_len: true
|
@@ -54,12 +54,12 @@ wandb_project: 'codellamaFeed'
|
|
54 |
wandb_entity:
|
55 |
wandb_watch:
|
56 |
wandb_run_id:
|
57 |
-
wandb_name: '34bLora4'
|
58 |
wandb_log_model:
|
59 |
|
60 |
gradient_accumulation_steps: 4
|
61 |
micro_batch_size: 1
|
62 |
-
num_epochs:
|
63 |
optimizer: adamw_torch
|
64 |
lr_scheduler: cosine
|
65 |
learning_rate: 0.0002
|
@@ -98,7 +98,7 @@ special_tokens:
|
|
98 |
|
99 |
</details><br>
|
100 |
|
101 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/afrias5/codellamaFeed/runs/
|
102 |
# CodeLlamaL4
|
103 |
|
104 |
This model is a fine-tuned version of [codellama/CodeLlama-34b-Python-hf](https://huggingface.co/codellama/CodeLlama-34b-Python-hf) on the None dataset.
|
@@ -132,7 +132,7 @@ The following hyperparameters were used during training:
|
|
132 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
133 |
- lr_scheduler_type: cosine
|
134 |
- lr_scheduler_warmup_steps: 10
|
135 |
-
- num_epochs:
|
136 |
|
137 |
### Training results
|
138 |
|
|
|
34 |
dataset_prepared_path: FinUpTagsNoTestNoExNewCodeLlama
|
35 |
val_set_size: 0
|
36 |
output_dir: models/codellama34bTestL4
|
37 |
+
lora_model_dir: models/codellama34bTestL4/checkpoint-40
|
38 |
+
auto_resume_from_checkpoints: true
|
39 |
sequence_len: 4096
|
40 |
sample_packing: true
|
41 |
pad_to_sequence_len: true
|
|
|
54 |
wandb_entity:
|
55 |
wandb_watch:
|
56 |
wandb_run_id:
|
57 |
+
wandb_name: '34bLora4'
|
58 |
wandb_log_model:
|
59 |
|
60 |
gradient_accumulation_steps: 4
|
61 |
micro_batch_size: 1
|
62 |
+
num_epochs: 8
|
63 |
optimizer: adamw_torch
|
64 |
lr_scheduler: cosine
|
65 |
learning_rate: 0.0002
|
|
|
98 |
|
99 |
</details><br>
|
100 |
|
101 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/afrias5/codellamaFeed/runs/81byeenq)
|
102 |
# CodeLlamaL4
|
103 |
|
104 |
This model is a fine-tuned version of [codellama/CodeLlama-34b-Python-hf](https://huggingface.co/codellama/CodeLlama-34b-Python-hf) on the None dataset.
|
|
|
132 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
133 |
- lr_scheduler_type: cosine
|
134 |
- lr_scheduler_warmup_steps: 10
|
135 |
+
- num_epochs: 8
|
136 |
|
137 |
### Training results
|
138 |
|