Model save
Browse files- README.md +60 -0
- adapter_config.json +4 -7
- adapter_model.safetensors +2 -2
- all_results.json +13 -0
- eval_results.json +8 -0
- tokenizer_config.json +1 -3
- train_results.json +8 -0
- trainer_state.json +54 -0
- training_args.bin +2 -2
    	
        README.md
    ADDED
    
    | @@ -0,0 +1,60 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            license: apache-2.0
         | 
| 3 | 
            +
            base_model: alignment-handbook/zephyr-7b-dpo-full
         | 
| 4 | 
            +
            tags:
         | 
| 5 | 
            +
            - generated_from_trainer
         | 
| 6 | 
            +
            model-index:
         | 
| 7 | 
            +
            - name: zephyr-7b-sft-lora
         | 
| 8 | 
            +
              results: []
         | 
| 9 | 
            +
            ---
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            <!-- This model card has been generated automatically according to the information the Trainer had access to. You
         | 
| 12 | 
            +
            should probably proofread and complete it, then remove this comment. -->
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            # zephyr-7b-sft-lora
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            This model is a fine-tuned version of [alignment-handbook/zephyr-7b-dpo-full](https://huggingface.co/alignment-handbook/zephyr-7b-dpo-full) on an unknown dataset.
         | 
| 17 | 
            +
            It achieves the following results on the evaluation set:
         | 
| 18 | 
            +
            - Loss: 1.7803
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            ## Model description
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            More information needed
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            ## Intended uses & limitations
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            More information needed
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            ## Training and evaluation data
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            More information needed
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            ## Training procedure
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            ### Training hyperparameters
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            The following hyperparameters were used during training:
         | 
| 37 | 
            +
            - learning_rate: 2e-05
         | 
| 38 | 
            +
            - train_batch_size: 4
         | 
| 39 | 
            +
            - eval_batch_size: 8
         | 
| 40 | 
            +
            - seed: 42
         | 
| 41 | 
            +
            - distributed_type: multi-GPU
         | 
| 42 | 
            +
            - gradient_accumulation_steps: 128
         | 
| 43 | 
            +
            - total_train_batch_size: 512
         | 
| 44 | 
            +
            - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
         | 
| 45 | 
            +
            - lr_scheduler_type: cosine
         | 
| 46 | 
            +
            - num_epochs: 1
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            ### Training results
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            | Training Loss | Epoch | Step | Validation Loss |
         | 
| 51 | 
            +
            |:-------------:|:-----:|:----:|:---------------:|
         | 
| 52 | 
            +
            | 2.0318        | 0.32  | 10   | 1.7803          |
         | 
| 53 | 
            +
             | 
| 54 | 
            +
             | 
| 55 | 
            +
            ### Framework versions
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            - Transformers 4.35.0
         | 
| 58 | 
            +
            - Pytorch 2.1.0
         | 
| 59 | 
            +
            - Datasets 2.14.6
         | 
| 60 | 
            +
            - Tokenizers 0.14.1
         | 
    	
        adapter_config.json
    CHANGED
    
    | @@ -8,21 +8,18 @@ | |
| 8 | 
             
              "init_lora_weights": true,
         | 
| 9 | 
             
              "layers_pattern": null,
         | 
| 10 | 
             
              "layers_to_transform": null,
         | 
| 11 | 
            -
              "loftq_config": {},
         | 
| 12 | 
             
              "lora_alpha": 16,
         | 
| 13 | 
            -
              "lora_dropout": 0. | 
| 14 | 
            -
              "megatron_config": null,
         | 
| 15 | 
            -
              "megatron_core": "megatron.core",
         | 
| 16 | 
             
              "modules_to_save": null,
         | 
| 17 | 
             
              "peft_type": "LORA",
         | 
| 18 | 
            -
              "r":  | 
| 19 | 
             
              "rank_pattern": {},
         | 
| 20 | 
             
              "revision": null,
         | 
| 21 | 
             
              "target_modules": [
         | 
| 22 | 
            -
                " | 
| 23 | 
             
                "k_proj",
         | 
| 24 | 
             
                "o_proj",
         | 
| 25 | 
            -
                " | 
| 26 | 
             
              ],
         | 
| 27 | 
             
              "task_type": "CAUSAL_LM"
         | 
| 28 | 
             
            }
         | 
|  | |
| 8 | 
             
              "init_lora_weights": true,
         | 
| 9 | 
             
              "layers_pattern": null,
         | 
| 10 | 
             
              "layers_to_transform": null,
         | 
|  | |
| 11 | 
             
              "lora_alpha": 16,
         | 
| 12 | 
            +
              "lora_dropout": 0.1,
         | 
|  | |
|  | |
| 13 | 
             
              "modules_to_save": null,
         | 
| 14 | 
             
              "peft_type": "LORA",
         | 
| 15 | 
            +
              "r": 64,
         | 
| 16 | 
             
              "rank_pattern": {},
         | 
| 17 | 
             
              "revision": null,
         | 
| 18 | 
             
              "target_modules": [
         | 
| 19 | 
            +
                "q_proj",
         | 
| 20 | 
             
                "k_proj",
         | 
| 21 | 
             
                "o_proj",
         | 
| 22 | 
            +
                "v_proj"
         | 
| 23 | 
             
              ],
         | 
| 24 | 
             
              "task_type": "CAUSAL_LM"
         | 
| 25 | 
             
            }
         | 
    	
        adapter_model.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7632c5ef709adc03b83f1f32f43d9dd75f3294bca163ead95e2d1731d9130310
         | 
| 3 | 
            +
            size 218138576
         | 
    	
        all_results.json
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "epoch": 0.32,
         | 
| 3 | 
            +
                "eval_loss": 1.7802537679672241,
         | 
| 4 | 
            +
                "eval_runtime": 153.411,
         | 
| 5 | 
            +
                "eval_samples": 1769,
         | 
| 6 | 
            +
                "eval_samples_per_second": 11.531,
         | 
| 7 | 
            +
                "eval_steps_per_second": 1.447,
         | 
| 8 | 
            +
                "train_loss": 2.409137284755707,
         | 
| 9 | 
            +
                "train_runtime": 4600.7142,
         | 
| 10 | 
            +
                "train_samples": 15899,
         | 
| 11 | 
            +
                "train_samples_per_second": 3.456,
         | 
| 12 | 
            +
                "train_steps_per_second": 0.007
         | 
| 13 | 
            +
            }
         | 
    	
        eval_results.json
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "epoch": 0.32,
         | 
| 3 | 
            +
                "eval_loss": 1.7802537679672241,
         | 
| 4 | 
            +
                "eval_runtime": 153.411,
         | 
| 5 | 
            +
                "eval_samples": 1769,
         | 
| 6 | 
            +
                "eval_samples_per_second": 11.531,
         | 
| 7 | 
            +
                "eval_steps_per_second": 1.447
         | 
| 8 | 
            +
            }
         | 
    	
        tokenizer_config.json
    CHANGED
    
    | @@ -1,6 +1,4 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
              "add_bos_token": true,
         | 
| 3 | 
            -
              "add_eos_token": false,
         | 
| 4 | 
             
              "added_tokens_decoder": {
         | 
| 5 | 
             
                "0": {
         | 
| 6 | 
             
                  "content": "<unk>",
         | 
| @@ -29,7 +27,7 @@ | |
| 29 | 
             
              },
         | 
| 30 | 
             
              "additional_special_tokens": [],
         | 
| 31 | 
             
              "bos_token": "<s>",
         | 
| 32 | 
            -
              "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<| | 
| 33 | 
             
              "clean_up_tokenization_spaces": false,
         | 
| 34 | 
             
              "eos_token": "</s>",
         | 
| 35 | 
             
              "legacy": true,
         | 
|  | |
| 1 | 
             
            {
         | 
|  | |
|  | |
| 2 | 
             
              "added_tokens_decoder": {
         | 
| 3 | 
             
                "0": {
         | 
| 4 | 
             
                  "content": "<unk>",
         | 
|  | |
| 27 | 
             
              },
         | 
| 28 | 
             
              "additional_special_tokens": [],
         | 
| 29 | 
             
              "bos_token": "<s>",
         | 
| 30 | 
            +
              "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
         | 
| 31 | 
             
              "clean_up_tokenization_spaces": false,
         | 
| 32 | 
             
              "eos_token": "</s>",
         | 
| 33 | 
             
              "legacy": true,
         | 
    	
        train_results.json
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "epoch": 0.32,
         | 
| 3 | 
            +
                "train_loss": 2.409137284755707,
         | 
| 4 | 
            +
                "train_runtime": 4600.7142,
         | 
| 5 | 
            +
                "train_samples": 15899,
         | 
| 6 | 
            +
                "train_samples_per_second": 3.456,
         | 
| 7 | 
            +
                "train_steps_per_second": 0.007
         | 
| 8 | 
            +
            }
         | 
    	
        trainer_state.json
    ADDED
    
    | @@ -0,0 +1,54 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "best_metric": null,
         | 
| 3 | 
            +
              "best_model_checkpoint": null,
         | 
| 4 | 
            +
              "epoch": 0.3220125786163522,
         | 
| 5 | 
            +
              "eval_steps": 500,
         | 
| 6 | 
            +
              "global_step": 10,
         | 
| 7 | 
            +
              "is_hyper_param_search": false,
         | 
| 8 | 
            +
              "is_local_process_zero": true,
         | 
| 9 | 
            +
              "is_world_process_zero": true,
         | 
| 10 | 
            +
              "log_history": [
         | 
| 11 | 
            +
                {
         | 
| 12 | 
            +
                  "epoch": 0.03,
         | 
| 13 | 
            +
                  "learning_rate": 1.994869323391895e-05,
         | 
| 14 | 
            +
                  "loss": 2.5826,
         | 
| 15 | 
            +
                  "step": 1
         | 
| 16 | 
            +
                },
         | 
| 17 | 
            +
                {
         | 
| 18 | 
            +
                  "epoch": 0.16,
         | 
| 19 | 
            +
                  "learning_rate": 1.8743466161445823e-05,
         | 
| 20 | 
            +
                  "loss": 2.4436,
         | 
| 21 | 
            +
                  "step": 5
         | 
| 22 | 
            +
                },
         | 
| 23 | 
            +
                {
         | 
| 24 | 
            +
                  "epoch": 0.32,
         | 
| 25 | 
            +
                  "learning_rate": 1.5289640103269626e-05,
         | 
| 26 | 
            +
                  "loss": 2.0318,
         | 
| 27 | 
            +
                  "step": 10
         | 
| 28 | 
            +
                },
         | 
| 29 | 
            +
                {
         | 
| 30 | 
            +
                  "epoch": 0.32,
         | 
| 31 | 
            +
                  "eval_loss": 1.780266523361206,
         | 
| 32 | 
            +
                  "eval_runtime": 154.3285,
         | 
| 33 | 
            +
                  "eval_samples_per_second": 11.463,
         | 
| 34 | 
            +
                  "eval_steps_per_second": 1.438,
         | 
| 35 | 
            +
                  "step": 10
         | 
| 36 | 
            +
                },
         | 
| 37 | 
            +
                {
         | 
| 38 | 
            +
                  "epoch": 0.32,
         | 
| 39 | 
            +
                  "step": 10,
         | 
| 40 | 
            +
                  "total_flos": 4.903271030325248e+17,
         | 
| 41 | 
            +
                  "train_loss": 2.409137284755707,
         | 
| 42 | 
            +
                  "train_runtime": 4600.7142,
         | 
| 43 | 
            +
                  "train_samples_per_second": 3.456,
         | 
| 44 | 
            +
                  "train_steps_per_second": 0.007
         | 
| 45 | 
            +
                }
         | 
| 46 | 
            +
              ],
         | 
| 47 | 
            +
              "logging_steps": 5,
         | 
| 48 | 
            +
              "max_steps": 31,
         | 
| 49 | 
            +
              "num_train_epochs": 1,
         | 
| 50 | 
            +
              "save_steps": 500,
         | 
| 51 | 
            +
              "total_flos": 4.903271030325248e+17,
         | 
| 52 | 
            +
              "trial_name": null,
         | 
| 53 | 
            +
              "trial_params": null
         | 
| 54 | 
            +
            }
         | 
    	
        training_args.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:4f87f906fe3f0905c5f58981519e7e15b12915c522bece020f0cea4c22413af6
         | 
| 3 | 
            +
            size 4664
         |