Rexhaif commited on
Commit
a3b39c6
·
verified ·
1 Parent(s): 5e7fff5

Training in progress, step 1000

Browse files
adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": false,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "up_proj",
28
+ "k_proj",
29
+ "o_proj",
30
+ "down_proj",
31
+ "gate_proj",
32
+ "v_proj",
33
+ "q_proj"
34
+ ],
35
+ "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_rslora": false
39
+ }
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "sliding_window": 32768,
21
  "tie_word_embeddings": true,
22
  "torch_dtype": "bfloat16",
23
- "transformers_version": "4.51.0",
24
  "use_cache": false,
25
  "use_sliding_window": false,
26
  "vocab_size": 151936
 
20
  "sliding_window": 32768,
21
  "tie_word_embeddings": true,
22
  "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.51.3",
24
  "use_cache": false,
25
  "use_sliding_window": false,
26
  "vocab_size": 151936
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88ffaf399c01d63fc9f1335a19c1d4d47f650c42e7c59108a136b71ae481194a
3
  size 3087467144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77484cafe0ac030d4a24440e079daed0a0f20943974fb667686f912f8948f0e5
3
  size 3087467144
runs/Apr17_15-18-53_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744895946.dws-13.informatik.uni-mannheim.de.346181.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dca305dd35e6ad8052f1ca286aa7fc351bc2c91059df391a82a74b0ea90e939
3
+ size 7279
runs/Apr17_15-21-20_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896097.dws-13.informatik.uni-mannheim.de.347590.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e60667aa09af54b351db133bc37245f95900c3cb60cf948f38a7c3924fa6fb
3
+ size 16799
runs/Apr17_15-30-37_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896656.dws-13.informatik.uni-mannheim.de.352082.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c2258c915174c2689550bfad8f1bde5cb07395e14ec388fbe78d9f96afbe7b
3
+ size 14080
runs/Apr17_15-34-47_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744896907.dws-13.informatik.uni-mannheim.de.354277.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3027cb401193747296d7cecab4181c7ff1b8f4cacc384466da95dbafc6237f88
3
+ size 10000
runs/Apr17_15-38-37_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897136.dws-13.informatik.uni-mannheim.de.358574.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3b1a32ef20678274e49c312b06c745d9f88db7ad8df82b237dd4e6b404656e1
3
+ size 15440
runs/Apr17_15-44-21_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897478.dws-13.informatik.uni-mannheim.de.361911.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01457b5632ba720ae0fbd37c68146b6df655770b9375811ca122e7247b375e08
3
+ size 10680
runs/Apr17_15-47-02_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744897641.dws-13.informatik.uni-mannheim.de.367715.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d7eba0e2149a84b1d8c4354de5c722f374f8261f90dea6a210f67c99553175
3
+ size 73239
runs/Apr17_17-06-05_dws-13.informatik.uni-mannheim.de/events.out.tfevents.1744902382.dws-13.informatik.uni-mannheim.de.397931.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fb65086de7896a9568fa730ba3a18b970385091a6e1f2d29e4936cdef3d9ae
3
+ size 697781
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:222bc846b2070abf618f8e8b27e62457d02026c1547537c819cda73895175bc8
3
- size 7608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54aab527e52224d27152bc9919e57e2e695d6220dc31b2172ed3380d9ca2eca
3
+ size 7672