End of training
Browse files- README.md +5 -5
- adapter_config.json +2 -2
- adapter_model.bin +1 -1
- adapter_model.safetensors +1 -1
- training_args.bin +1 -1
    	
        README.md
    CHANGED
    
    | @@ -54,7 +54,7 @@ hub_model_id: nblinh/9d94bc60-34ce-43f9-8b83-e855fe65505c | |
| 54 | 
             
            hub_repo: null
         | 
| 55 | 
             
            hub_strategy: end
         | 
| 56 | 
             
            hub_token: null
         | 
| 57 | 
            -
            learning_rate: 0. | 
| 58 | 
             
            load_in_4bit: true
         | 
| 59 | 
             
            load_in_8bit: true
         | 
| 60 | 
             
            local_rank: null
         | 
| @@ -66,7 +66,7 @@ lora_model_dir: null | |
| 66 | 
             
            lora_r: 16
         | 
| 67 | 
             
            lora_target_linear: true
         | 
| 68 | 
             
            lr_scheduler: cosine
         | 
| 69 | 
            -
            max_steps:  | 
| 70 | 
             
            micro_batch_size: 1
         | 
| 71 | 
             
            mlflow_experiment_name: /tmp/4ee3c1dbc52892e5_train_data.json
         | 
| 72 | 
             
            model_type: AutoModelForCausalLM
         | 
| @@ -122,20 +122,20 @@ More information needed | |
| 122 | 
             
            ### Training hyperparameters
         | 
| 123 |  | 
| 124 | 
             
            The following hyperparameters were used during training:
         | 
| 125 | 
            -
            - learning_rate: 0. | 
| 126 | 
             
            - train_batch_size: 1
         | 
| 127 | 
             
            - eval_batch_size: 1
         | 
| 128 | 
             
            - seed: 42
         | 
| 129 | 
             
            - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
         | 
| 130 | 
             
            - lr_scheduler_type: cosine
         | 
| 131 | 
             
            - lr_scheduler_warmup_steps: 10
         | 
| 132 | 
            -
            - training_steps:  | 
| 133 |  | 
| 134 | 
             
            ### Training results
         | 
| 135 |  | 
| 136 | 
             
            | Training Loss | Epoch  | Step | Validation Loss |
         | 
| 137 | 
             
            |:-------------:|:------:|:----:|:---------------:|
         | 
| 138 | 
            -
            | 0.0           | 0. | 
| 139 |  | 
| 140 |  | 
| 141 | 
             
            ### Framework versions
         | 
|  | |
| 54 | 
             
            hub_repo: null
         | 
| 55 | 
             
            hub_strategy: end
         | 
| 56 | 
             
            hub_token: null
         | 
| 57 | 
            +
            learning_rate: 0.0003
         | 
| 58 | 
             
            load_in_4bit: true
         | 
| 59 | 
             
            load_in_8bit: true
         | 
| 60 | 
             
            local_rank: null
         | 
|  | |
| 66 | 
             
            lora_r: 16
         | 
| 67 | 
             
            lora_target_linear: true
         | 
| 68 | 
             
            lr_scheduler: cosine
         | 
| 69 | 
            +
            max_steps: 100
         | 
| 70 | 
             
            micro_batch_size: 1
         | 
| 71 | 
             
            mlflow_experiment_name: /tmp/4ee3c1dbc52892e5_train_data.json
         | 
| 72 | 
             
            model_type: AutoModelForCausalLM
         | 
|  | |
| 122 | 
             
            ### Training hyperparameters
         | 
| 123 |  | 
| 124 | 
             
            The following hyperparameters were used during training:
         | 
| 125 | 
            +
            - learning_rate: 0.0003
         | 
| 126 | 
             
            - train_batch_size: 1
         | 
| 127 | 
             
            - eval_batch_size: 1
         | 
| 128 | 
             
            - seed: 42
         | 
| 129 | 
             
            - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
         | 
| 130 | 
             
            - lr_scheduler_type: cosine
         | 
| 131 | 
             
            - lr_scheduler_warmup_steps: 10
         | 
| 132 | 
            +
            - training_steps: 100
         | 
| 133 |  | 
| 134 | 
             
            ### Training results
         | 
| 135 |  | 
| 136 | 
             
            | Training Loss | Epoch  | Step | Validation Loss |
         | 
| 137 | 
             
            |:-------------:|:------:|:----:|:---------------:|
         | 
| 138 | 
            +
            | 0.0           | 0.0156 | 100  | nan             |
         | 
| 139 |  | 
| 140 |  | 
| 141 | 
             
            ### Framework versions
         | 
    	
        adapter_config.json
    CHANGED
    
    | @@ -20,12 +20,12 @@ | |
| 20 | 
             
              "rank_pattern": {},
         | 
| 21 | 
             
              "revision": null,
         | 
| 22 | 
             
              "target_modules": [
         | 
| 23 | 
            -
                "gate_proj",
         | 
| 24 | 
             
                "up_proj",
         | 
| 25 | 
             
                "k_proj",
         | 
| 26 | 
            -
                "q_proj",
         | 
| 27 | 
             
                "o_proj",
         | 
| 28 | 
             
                "v_proj",
         | 
|  | |
|  | |
| 29 | 
             
                "down_proj"
         | 
| 30 | 
             
              ],
         | 
| 31 | 
             
              "task_type": "CAUSAL_LM",
         | 
|  | |
| 20 | 
             
              "rank_pattern": {},
         | 
| 21 | 
             
              "revision": null,
         | 
| 22 | 
             
              "target_modules": [
         | 
|  | |
| 23 | 
             
                "up_proj",
         | 
| 24 | 
             
                "k_proj",
         | 
|  | |
| 25 | 
             
                "o_proj",
         | 
| 26 | 
             
                "v_proj",
         | 
| 27 | 
            +
                "q_proj",
         | 
| 28 | 
            +
                "gate_proj",
         | 
| 29 | 
             
                "down_proj"
         | 
| 30 | 
             
              ],
         | 
| 31 | 
             
              "task_type": "CAUSAL_LM",
         | 
    	
        adapter_model.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 167934026
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:c16477246674515acbf92a6e3658c6bf9f352e0f97e9fc94bf0ab99df74cd0db
         | 
| 3 | 
             
            size 167934026
         | 
    	
        adapter_model.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 167832240
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ef800e06ac4fc0a5fc48f5a1bb57cda565ee2cbd55b9fde6085c34573c21ed54
         | 
| 3 | 
             
            size 167832240
         | 
    	
        training_args.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 6776
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:45538e3a232857059f47d606a5020956b7ee2cbd38c96bb4c9f729f7b0bef812
         | 
| 3 | 
             
            size 6776
         | 
