MatVet
/

granite-code-plans-3.1-8b-lora

Generated from Trainer

8-bit precision

Model card Files Files and versions Community

MatVet commited on Jan 27

Commit

b4b87bf

·

verified ·

1 Parent(s): b2a463a

Update README.md

Files changed (1) hide show

README.md +5 -6

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ base_model: ibm-granite/granite-3.1-8b-instruct
 tags:
 - generated_from_trainer
 model-index:
-- name: home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.1-8b-lora
   results: []
 ---
@@ -27,7 +27,7 @@ load_in_4bit: false
 strict: false
 datasets:
-- path: /home/ec2-user/SageMaker/task_decomposition/data/task_decomposition_training_data_code.jsonl
   type: chat_template
   chat_template: tokenizer_default
   field_messages: conversations
@@ -40,7 +40,7 @@ sequence_len: 8192
 sample_packing: false
 pad_to_sequence_len: true
 eval_sample_packing: false
-output_dir: /home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.1-8b-lora
 wandb_project: null
 wandb_entity: null
@@ -93,9 +93,9 @@ weight_decay: 0.0
 </details><br>
-# home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.0-8b-lora
-This model is a fine-tuned version of [ibm-granite/granite-3.1-8b-instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) on the /home/ec2-user/SageMaker/task_decomposition/data/task_decomposition_training_data_code.jsonl dataset.
 ## Model description
@@ -119,7 +119,6 @@ The following hyperparameters were used during training:
 - eval_batch_size: 1
 - seed: 42
 - distributed_type: multi-GPU
-- num_devices: 8
 - gradient_accumulation_steps: 8
 - total_train_batch_size: 64
 - total_eval_batch_size: 8

 tags:
 - generated_from_trainer
 model-index:
+- name: granite-code-plans-3.1-8b-lora
   results: []
 ---
 strict: false
 datasets:
+- path: task_decomposition_training_data_code.jsonl
   type: chat_template
   chat_template: tokenizer_default
   field_messages: conversations
 sample_packing: false
 pad_to_sequence_len: true
 eval_sample_packing: false
+output_dir: granite-code-plans-3.1-8b-lora
 wandb_project: null
 wandb_entity: null
 </details><br>
+# home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.1-8b-lora
+This model is a fine-tuned version of [ibm-granite/granite-3.1-8b-instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) on the task_decomposition_training_data_code.jsonl dataset.
 ## Model description
 - eval_batch_size: 1
 - seed: 42
 - distributed_type: multi-GPU
 - gradient_accumulation_steps: 8
 - total_train_batch_size: 64
 - total_eval_batch_size: 8