Oysiyl commited on
Commit
564e9c2
·
verified ·
1 Parent(s): 3c4a716

Oysiyl/colqwen_ufo

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- library_name: transformers
3
- license: mit
4
- base_model: vidore/ColSmolVLM-Instruct-256M-base
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,7 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # colqwen_ufo
16
 
17
- This model is a fine-tuned version of [vidore/ColSmolVLM-Instruct-256M-base](https://huggingface.co/vidore/ColSmolVLM-Instruct-256M-base) on an unknown dataset.
 
 
18
 
19
  ## Model description
20
 
@@ -34,21 +36,30 @@ More information needed
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 5e-05
37
- - train_batch_size: 1
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
41
  - lr_scheduler_type: linear
42
- - lr_scheduler_warmup_steps: 10
43
  - num_epochs: 1
44
 
45
  ### Training results
46
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  ### Framework versions
50
 
 
51
  - Transformers 4.51.3
52
- - Pytorch 2.5.1
53
- - Datasets 3.5.0
54
- - Tokenizers 0.21.1
 
1
  ---
2
+ library_name: peft
3
+ license: apache-2.0
4
+ base_model: vidore/colqwen2-base
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # colqwen_ufo
16
 
17
+ This model is a fine-tuned version of [vidore/colqwen2-base](https://huggingface.co/vidore/colqwen2-base) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.0562
20
 
21
  ## Model description
22
 
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 5e-05
39
+ - train_batch_size: 4
40
  - eval_batch_size: 8
41
  - seed: 42
42
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
+ - lr_scheduler_warmup_steps: 100
45
  - num_epochs: 1
46
 
47
  ### Training results
48
 
49
+ | Training Loss | Epoch | Step | Validation Loss |
50
+ |:-------------:|:------:|:----:|:---------------:|
51
+ | 0.1137 | 0.1636 | 80 | 0.0768 |
52
+ | 0.0307 | 0.3272 | 160 | 0.0621 |
53
+ | 0.0336 | 0.4908 | 240 | 0.0627 |
54
+ | 0.0217 | 0.6544 | 320 | 0.0579 |
55
+ | 0.0278 | 0.8180 | 400 | 0.0563 |
56
+ | 0.0265 | 0.9816 | 480 | 0.0562 |
57
 
58
 
59
  ### Framework versions
60
 
61
+ - PEFT 0.15.2
62
  - Transformers 4.51.3
63
+ - Pytorch 2.6.0+cu124
64
+ - Datasets 3.3.1
65
+ - Tokenizers 0.21.0
adapter_config.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "vidore/ColSmolVLM-Instruct-256M-base",
 
 
 
5
  "bias": "none",
6
  "corda_config": null,
7
  "eva_config": null,
@@ -13,18 +16,21 @@
13
  "layers_pattern": null,
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
- "lora_alpha": 32,
17
  "lora_bias": false,
18
- "lora_dropout": 0.1,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
- "r": 32,
24
  "rank_pattern": {},
25
  "revision": null,
26
- "target_modules": "(.*(model.text_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(custom_text_proj).*$)",
27
- "task_type": "FEATURE_EXTRACTION",
 
 
 
28
  "trainable_token_indices": null,
29
  "use_dora": false,
30
  "use_rslora": false
 
1
  {
2
  "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "ColQwen2",
5
+ "parent_library": "colpali_engine.models.qwen2.colqwen2.modeling_colqwen2"
6
+ },
7
+ "base_model_name_or_path": "vidore/colqwen2-base",
8
  "bias": "none",
9
  "corda_config": null,
10
  "eva_config": null,
 
16
  "layers_pattern": null,
17
  "layers_to_transform": null,
18
  "loftq_config": {},
19
+ "lora_alpha": 8,
20
  "lora_bias": false,
21
+ "lora_dropout": 0.2,
22
  "megatron_config": null,
23
  "megatron_core": "megatron.core",
24
  "modules_to_save": null,
25
  "peft_type": "LORA",
26
+ "r": 4,
27
  "rank_pattern": {},
28
  "revision": null,
29
+ "target_modules": [
30
+ "q_proj",
31
+ "k_proj"
32
+ ],
33
+ "task_type": null,
34
  "trainable_token_indices": null,
35
  "use_dora": false,
36
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41673fb85f448ff15356e4dfcf9e039d7c6e3cffa1936bc3513f55453005e63e
3
- size 39135840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66dca3fd48867b79a469387304647321e0dea39f5438071886421b47bdd58b61
3
+ size 132730032
runs/May07_17-18-40_ip-10-192-10-175/events.out.tfevents.1746638328.ip-10-192-10-175.3983.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db5ff73724d08ad080f059d57bbb9ef413450224878213e3f312b92f6749c29f
3
+ size 5638
runs/May07_17-19-39_ip-10-192-10-175/events.out.tfevents.1746638386.ip-10-192-10-175.3983.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:928da6ab483c928002ff76eb093c4adeb93f66303e433ff9f5d19827e65b5cdc
3
+ size 5638
runs/May07_17-22-24_ip-10-192-10-175/events.out.tfevents.1746638561.ip-10-192-10-175.43874.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c449e99f36c7376b7201e4970e7467e5ec3083e59cd0378b9696a1eb7cba8d
3
+ size 8875
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32ec14544898b9243dc956d50f0fbbf0bc2d36113ef552e8be825d584c7545b5
3
- size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89555fce4fe9ffb7b930a4668f937f799afe169db4bde67006db1bea17e66b49
3
+ size 5304