MatVet commited on
Commit
b4b87bf
·
verified ·
1 Parent(s): b2a463a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -6
README.md CHANGED
@@ -5,7 +5,7 @@ base_model: ibm-granite/granite-3.1-8b-instruct
5
  tags:
6
  - generated_from_trainer
7
  model-index:
8
- - name: home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.1-8b-lora
9
  results: []
10
  ---
11
 
@@ -27,7 +27,7 @@ load_in_4bit: false
27
  strict: false
28
 
29
  datasets:
30
- - path: /home/ec2-user/SageMaker/task_decomposition/data/task_decomposition_training_data_code.jsonl
31
  type: chat_template
32
  chat_template: tokenizer_default
33
  field_messages: conversations
@@ -40,7 +40,7 @@ sequence_len: 8192
40
  sample_packing: false
41
  pad_to_sequence_len: true
42
  eval_sample_packing: false
43
- output_dir: /home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.1-8b-lora
44
 
45
  wandb_project: null
46
  wandb_entity: null
@@ -93,9 +93,9 @@ weight_decay: 0.0
93
 
94
  </details><br>
95
 
96
- # home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.0-8b-lora
97
 
98
- This model is a fine-tuned version of [ibm-granite/granite-3.1-8b-instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) on the /home/ec2-user/SageMaker/task_decomposition/data/task_decomposition_training_data_code.jsonl dataset.
99
 
100
  ## Model description
101
 
@@ -119,7 +119,6 @@ The following hyperparameters were used during training:
119
  - eval_batch_size: 1
120
  - seed: 42
121
  - distributed_type: multi-GPU
122
- - num_devices: 8
123
  - gradient_accumulation_steps: 8
124
  - total_train_batch_size: 64
125
  - total_eval_batch_size: 8
 
5
  tags:
6
  - generated_from_trainer
7
  model-index:
8
+ - name: granite-code-plans-3.1-8b-lora
9
  results: []
10
  ---
11
 
 
27
  strict: false
28
 
29
  datasets:
30
+ - path: task_decomposition_training_data_code.jsonl
31
  type: chat_template
32
  chat_template: tokenizer_default
33
  field_messages: conversations
 
40
  sample_packing: false
41
  pad_to_sequence_len: true
42
  eval_sample_packing: false
43
+ output_dir: granite-code-plans-3.1-8b-lora
44
 
45
  wandb_project: null
46
  wandb_entity: null
 
93
 
94
  </details><br>
95
 
96
+ # home/ec2-user/SageMaker/task_decomposition/trained_models/granite-code-plans-3.1-8b-lora
97
 
98
+ This model is a fine-tuned version of [ibm-granite/granite-3.1-8b-instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) on the task_decomposition_training_data_code.jsonl dataset.
99
 
100
  ## Model description
101
 
 
119
  - eval_batch_size: 1
120
  - seed: 42
121
  - distributed_type: multi-GPU
 
122
  - gradient_accumulation_steps: 8
123
  - total_train_batch_size: 64
124
  - total_eval_batch_size: 8