Update README.md
Browse files
README.md
CHANGED
@@ -5,7 +5,7 @@ base_model: ibm-granite/granite-3.1-8b-instruct
|
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
model-index:
|
8 |
-
- name:
|
9 |
results: []
|
10 |
---
|
11 |
|
@@ -27,7 +27,7 @@ load_in_4bit: false
|
|
27 |
strict: false
|
28 |
|
29 |
datasets:
|
30 |
-
- path:
|
31 |
type: chat_template
|
32 |
chat_template: tokenizer_default
|
33 |
field_messages: conversations
|
@@ -40,7 +40,7 @@ sequence_len: 8192
|
|
40 |
sample_packing: false
|
41 |
pad_to_sequence_len: true
|
42 |
eval_sample_packing: false
|
43 |
-
output_dir:
|
44 |
|
45 |
wandb_project: null
|
46 |
wandb_entity: null
|
@@ -93,9 +93,9 @@ weight_decay: 0.0
|
|
93 |
|
94 |
</details><br>
|
95 |
|
96 |
-
# home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.
|
97 |
|
98 |
-
This model is a fine-tuned version of [ibm-granite/granite-3.1-8b-instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) on the
|
99 |
|
100 |
## Model description
|
101 |
|
@@ -119,7 +119,6 @@ The following hyperparameters were used during training:
|
|
119 |
- eval_batch_size: 1
|
120 |
- seed: 42
|
121 |
- distributed_type: multi-GPU
|
122 |
-
- num_devices: 8
|
123 |
- gradient_accumulation_steps: 8
|
124 |
- total_train_batch_size: 64
|
125 |
- total_eval_batch_size: 8
|
|
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
model-index:
|
8 |
+
- name: granite-code-plans-3.1-8b-lora
|
9 |
results: []
|
10 |
---
|
11 |
|
|
|
27 |
strict: false
|
28 |
|
29 |
datasets:
|
30 |
+
- path: task_decomposition_training_data_code.jsonl
|
31 |
type: chat_template
|
32 |
chat_template: tokenizer_default
|
33 |
field_messages: conversations
|
|
|
40 |
sample_packing: false
|
41 |
pad_to_sequence_len: true
|
42 |
eval_sample_packing: false
|
43 |
+
output_dir: granite-code-plans-3.1-8b-lora
|
44 |
|
45 |
wandb_project: null
|
46 |
wandb_entity: null
|
|
|
93 |
|
94 |
</details><br>
|
95 |
|
96 |
+
# home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.1-8b-lora
|
97 |
|
98 |
+
This model is a fine-tuned version of [ibm-granite/granite-3.1-8b-instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) on the task_decomposition_training_data_code.jsonl dataset.
|
99 |
|
100 |
## Model description
|
101 |
|
|
|
119 |
- eval_batch_size: 1
|
120 |
- seed: 42
|
121 |
- distributed_type: multi-GPU
|
|
|
122 |
- gradient_accumulation_steps: 8
|
123 |
- total_train_batch_size: 64
|
124 |
- total_eval_batch_size: 8
|