Training in progress, step 1000
Browse files- adapter_config.json +39 -0
- config.json +1 -1
- model.safetensors +1 -1
- runs/Apr17_15-18-53_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744895946.dws-13.informatik.uni-mannheim.de.346181.0 +3 -0
- runs/Apr17_15-21-20_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896097.dws-13.informatik.uni-mannheim.de.347590.0 +3 -0
- runs/Apr17_15-30-37_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896656.dws-13.informatik.uni-mannheim.de.352082.0 +3 -0
- runs/Apr17_15-34-47_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896907.dws-13.informatik.uni-mannheim.de.354277.0 +3 -0
- runs/Apr17_15-38-37_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897136.dws-13.informatik.uni-mannheim.de.358574.0 +3 -0
- runs/Apr17_15-44-21_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897478.dws-13.informatik.uni-mannheim.de.361911.0 +3 -0
- runs/Apr17_15-47-02_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897641.dws-13.informatik.uni-mannheim.de.367715.0 +3 -0
- runs/Apr17_17-06-05_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744902382.dws-13.informatik.uni-mannheim.de.397931.0 +3 -0
- training_args.bin +2 -2
    	
        adapter_config.json
    ADDED
    
    | @@ -0,0 +1,39 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "alpha_pattern": {},
         | 
| 3 | 
            +
              "auto_mapping": null,
         | 
| 4 | 
            +
              "base_model_name_or_path": null,
         | 
| 5 | 
            +
              "bias": "none",
         | 
| 6 | 
            +
              "corda_config": null,
         | 
| 7 | 
            +
              "eva_config": null,
         | 
| 8 | 
            +
              "exclude_modules": null,
         | 
| 9 | 
            +
              "fan_in_fan_out": null,
         | 
| 10 | 
            +
              "inference_mode": false,
         | 
| 11 | 
            +
              "init_lora_weights": true,
         | 
| 12 | 
            +
              "layer_replication": null,
         | 
| 13 | 
            +
              "layers_pattern": null,
         | 
| 14 | 
            +
              "layers_to_transform": null,
         | 
| 15 | 
            +
              "loftq_config": {},
         | 
| 16 | 
            +
              "lora_alpha": 16,
         | 
| 17 | 
            +
              "lora_bias": false,
         | 
| 18 | 
            +
              "lora_dropout": 0.05,
         | 
| 19 | 
            +
              "megatron_config": null,
         | 
| 20 | 
            +
              "megatron_core": "megatron.core",
         | 
| 21 | 
            +
              "modules_to_save": null,
         | 
| 22 | 
            +
              "peft_type": "LORA",
         | 
| 23 | 
            +
              "r": 8,
         | 
| 24 | 
            +
              "rank_pattern": {},
         | 
| 25 | 
            +
              "revision": null,
         | 
| 26 | 
            +
              "target_modules": [
         | 
| 27 | 
            +
                "up_proj",
         | 
| 28 | 
            +
                "k_proj",
         | 
| 29 | 
            +
                "o_proj",
         | 
| 30 | 
            +
                "down_proj",
         | 
| 31 | 
            +
                "gate_proj",
         | 
| 32 | 
            +
                "v_proj",
         | 
| 33 | 
            +
                "q_proj"
         | 
| 34 | 
            +
              ],
         | 
| 35 | 
            +
              "task_type": "CAUSAL_LM",
         | 
| 36 | 
            +
              "trainable_token_indices": null,
         | 
| 37 | 
            +
              "use_dora": false,
         | 
| 38 | 
            +
              "use_rslora": false
         | 
| 39 | 
            +
            }
         | 
    	
        config.json
    CHANGED
    
    | @@ -20,7 +20,7 @@ | |
| 20 | 
             
              "sliding_window": 32768,
         | 
| 21 | 
             
              "tie_word_embeddings": true,
         | 
| 22 | 
             
              "torch_dtype": "bfloat16",
         | 
| 23 | 
            -
              "transformers_version": "4.51. | 
| 24 | 
             
              "use_cache": false,
         | 
| 25 | 
             
              "use_sliding_window": false,
         | 
| 26 | 
             
              "vocab_size": 151936
         | 
|  | |
| 20 | 
             
              "sliding_window": 32768,
         | 
| 21 | 
             
              "tie_word_embeddings": true,
         | 
| 22 | 
             
              "torch_dtype": "bfloat16",
         | 
| 23 | 
            +
              "transformers_version": "4.51.3",
         | 
| 24 | 
             
              "use_cache": false,
         | 
| 25 | 
             
              "use_sliding_window": false,
         | 
| 26 | 
             
              "vocab_size": 151936
         | 
    	
        model.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 3087467144
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:77484cafe0ac030d4a24440e079daed0a0f20943974fb667686f912f8948f0e5
         | 
| 3 | 
             
            size 3087467144
         | 
    	
        runs/Apr17_15-18-53_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744895946.dws-13.informatik.uni-mannheim.de.346181.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:5dca305dd35e6ad8052f1ca286aa7fc351bc2c91059df391a82a74b0ea90e939
         | 
| 3 | 
            +
            size 7279
         | 
    	
        runs/Apr17_15-21-20_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896097.dws-13.informatik.uni-mannheim.de.347590.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:46e60667aa09af54b351db133bc37245f95900c3cb60cf948f38a7c3924fa6fb
         | 
| 3 | 
            +
            size 16799
         | 
    	
        runs/Apr17_15-30-37_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896656.dws-13.informatik.uni-mannheim.de.352082.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:05c2258c915174c2689550bfad8f1bde5cb07395e14ec388fbe78d9f96afbe7b
         | 
| 3 | 
            +
            size 14080
         | 
    	
        runs/Apr17_15-34-47_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896907.dws-13.informatik.uni-mannheim.de.354277.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:3027cb401193747296d7cecab4181c7ff1b8f4cacc384466da95dbafc6237f88
         | 
| 3 | 
            +
            size 10000
         | 
    	
        runs/Apr17_15-38-37_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897136.dws-13.informatik.uni-mannheim.de.358574.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f3b1a32ef20678274e49c312b06c745d9f88db7ad8df82b237dd4e6b404656e1
         | 
| 3 | 
            +
            size 15440
         | 
    	
        runs/Apr17_15-44-21_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897478.dws-13.informatik.uni-mannheim.de.361911.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:01457b5632ba720ae0fbd37c68146b6df655770b9375811ca122e7247b375e08
         | 
| 3 | 
            +
            size 10680
         | 
    	
        runs/Apr17_15-47-02_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897641.dws-13.informatik.uni-mannheim.de.367715.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:16d7eba0e2149a84b1d8c4354de5c722f374f8261f90dea6a210f67c99553175
         | 
| 3 | 
            +
            size 73239
         | 
    	
        runs/Apr17_17-06-05_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744902382.dws-13.informatik.uni-mannheim.de.397931.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:e5fb65086de7896a9568fa730ba3a18b970385091a6e1f2d29e4936cdef3d9ae
         | 
| 3 | 
            +
            size 697781
         | 
    	
        training_args.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f54aab527e52224d27152bc9919e57e2e695d6220dc31b2172ed3380d9ca2eca
         | 
| 3 | 
            +
            size 7672
         | 
