Aratako/reward-test-sarashina

Browse files

Files changed (8) hide show

README.md +135 -0
config.json +38 -0
model.safetensors +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +171 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,135 @@

+---
+library_name: transformers
+license: other
+base_model: sbintuitions/sarashina2.1-1b
+tags:
+- generated_from_trainer
+metrics:
+- pearsonr
+- spearmanr
+model-index:
+- name: test-clf-sarashina
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# test-clf-sarashina
+This model is a fine-tuned version of [sbintuitions/sarashina2.1-1b](https://huggingface.co/sbintuitions/sarashina2.1-1b) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.0627
+- Mae: 0.7734
+- R2: 0.4137
+- Pearsonr: 0.6549
+- Spearmanr: 0.6483
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 4
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 16
+- optimizer: Use OptimizerNames.PAGED_ADAMW_8BIT with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine_with_min_lr
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Mae    | R2       | Pearsonr | Spearmanr |
+|:-------------:|:------:|:----:|:---------------:|:------:|:--------:|:--------:|:---------:|
+| 33.4003       | 0.0440 | 30   | 41.9736         | 5.0785 | -19.5514 | 0.0060   | 0.0030    |
+| 68.9791       | 0.0880 | 60   | 26.7601         | 3.9223 | -12.1025 | 0.0141   | 0.0121    |
+| 7.0015        | 0.1320 | 90   | 18.4436         | 3.2333 | -8.0305  | 0.0940   | 0.1009    |
+| 9.5128        | 0.1760 | 120  | 12.7227         | 2.6544 | -5.2294  | 0.1979   | 0.2112    |
+| 8.1175        | 0.2200 | 150  | 8.9334          | 2.3069 | -3.3740  | 0.2409   | 0.2638    |
+| 5.7487        | 0.2640 | 180  | 5.1040          | 1.6855 | -1.4991  | 0.3342   | 0.3609    |
+| 4.2498        | 0.3080 | 210  | 6.3587          | 2.0904 | -2.1134  | 0.3393   | 0.3770    |
+| 3.211         | 0.3520 | 240  | 2.5982          | 1.2158 | -0.2722  | 0.4370   | 0.4679    |
+| 12.1813       | 0.3960 | 270  | 2.2745          | 1.0969 | -0.1137  | 0.4647   | 0.4956    |
+| 2.4104        | 0.4400 | 300  | 2.1392          | 1.1266 | -0.0474  | 0.4725   | 0.5040    |
+| 2.3           | 0.4840 | 330  | 2.0064          | 1.1148 | 0.0176   | 0.5250   | 0.5458    |
+| 4.1499        | 0.5281 | 360  | 2.4043          | 1.1457 | -0.1772  | 0.5252   | 0.5581    |
+| 1.6108        | 0.5721 | 390  | 1.6467          | 0.9633 | 0.1937   | 0.5714   | 0.5802    |
+| 1.81          | 0.6161 | 420  | 1.5801          | 0.9609 | 0.2263   | 0.5978   | 0.6000    |
+| 2.1724        | 0.6601 | 450  | 1.6209          | 0.9791 | 0.2063   | 0.5482   | 0.4986    |
+| 2.0144        | 0.7041 | 480  | 1.7803          | 0.9999 | 0.1283   | 0.6017   | 0.5923    |
+| 4.5503        | 0.7481 | 510  | 1.3499          | 0.8849 | 0.3391   | 0.5942   | 0.5621    |
+| 4.3733        | 0.7921 | 540  | 3.5365          | 1.6830 | -0.7316  | 0.6105   | 0.5996    |
+| 1.4027        | 0.8361 | 570  | 1.3477          | 0.9035 | 0.3401   | 0.6328   | 0.6183    |
+| 2.429         | 0.8801 | 600  | 1.2318          | 0.8174 | 0.3969   | 0.6315   | 0.6103    |
+| 0.9818        | 0.9241 | 630  | 1.8458          | 1.0223 | 0.0962   | 0.6372   | 0.6054    |
+| 1.5284        | 0.9681 | 660  | 1.2547          | 0.8434 | 0.3856   | 0.6402   | 0.6157    |
+| 1.5741        | 1.0117 | 690  | 1.3692          | 0.8539 | 0.3296   | 0.6279   | 0.6049    |
+| 2.251         | 1.0557 | 720  | 1.7575          | 1.1040 | 0.1395   | 0.6439   | 0.6396    |
+| 0.5807        | 1.0997 | 750  | 1.2974          | 0.8208 | 0.3648   | 0.6481   | 0.6275    |
+| 1.8142        | 1.1437 | 780  | 1.5116          | 0.9475 | 0.2599   | 0.6530   | 0.6302    |
+| 0.8249        | 1.1878 | 810  | 1.3097          | 0.8618 | 0.3588   | 0.6447   | 0.6145    |
+| 1.4956        | 1.2318 | 840  | 2.2229          | 1.2553 | -0.0884  | 0.6392   | 0.6362    |
+| 2.0046        | 1.2758 | 870  | 2.1057          | 1.2350 | -0.0310  | 0.6468   | 0.6260    |
+| 1.3633        | 1.3198 | 900  | 1.2954          | 0.8883 | 0.3657   | 0.6533   | 0.6312    |
+| 3.1957        | 1.3638 | 930  | 1.8696          | 1.1447 | 0.0846   | 0.6451   | 0.6154    |
+| 0.793         | 1.4078 | 960  | 1.2303          | 0.8429 | 0.3976   | 0.6413   | 0.6052    |
+| 2.2954        | 1.4518 | 990  | 1.3201          | 0.8624 | 0.3537   | 0.6571   | 0.6240    |
+| 1.7479        | 1.4958 | 1020 | 1.2562          | 0.8144 | 0.3849   | 0.6399   | 0.6125    |
+| 1.6785        | 1.5398 | 1050 | 1.7362          | 1.0790 | 0.1499   | 0.6492   | 0.6269    |
+| 1.4885        | 1.5838 | 1080 | 2.3715          | 1.3151 | -0.1612  | 0.6480   | 0.6215    |
+| 3.2869        | 1.6278 | 1110 | 1.5134          | 0.9746 | 0.2590   | 0.6498   | 0.6329    |
+| 2.7832        | 1.6718 | 1140 | 1.2013          | 0.8259 | 0.4118   | 0.6593   | 0.6298    |
+| 1.4801        | 1.7158 | 1170 | 1.5599          | 1.0123 | 0.2362   | 0.6573   | 0.6294    |
+| 1.3236        | 1.7598 | 1200 | 1.9116          | 1.1420 | 0.0640   | 0.6457   | 0.6253    |
+| 0.627         | 1.8038 | 1230 | 1.3976          | 0.9289 | 0.3157   | 0.6609   | 0.6430    |
+| 1.0075        | 1.8478 | 1260 | 1.2831          | 0.8849 | 0.3718   | 0.6589   | 0.6491    |
+| 1.2107        | 1.8918 | 1290 | 1.1647          | 0.7974 | 0.4298   | 0.6625   | 0.6453    |
+| 0.8244        | 1.9358 | 1320 | 1.5123          | 0.9705 | 0.2595   | 0.6544   | 0.6296    |
+| 1.1668        | 1.9798 | 1350 | 1.2957          | 0.8695 | 0.3656   | 0.6584   | 0.6356    |
+| 1.0709        | 2.0235 | 1380 | 1.1780          | 0.8212 | 0.4232   | 0.6648   | 0.6458    |
+| 0.8817        | 2.0675 | 1410 | 1.2616          | 0.8664 | 0.3823   | 0.6662   | 0.6506    |
+| 1.3163        | 2.1115 | 1440 | 1.1625          | 0.7953 | 0.4308   | 0.6615   | 0.6458    |
+| 1.3449        | 2.1555 | 1470 | 1.4815          | 0.9598 | 0.2746   | 0.6547   | 0.6425    |
+| 1.1915        | 2.1995 | 1500 | 1.2140          | 0.8206 | 0.4056   | 0.6677   | 0.6518    |
+| 0.7156        | 2.2435 | 1530 | 1.2342          | 0.8335 | 0.3957   | 0.6633   | 0.6468    |
+| 0.8369        | 2.2875 | 1560 | 1.2145          | 0.8241 | 0.4053   | 0.6672   | 0.6503    |
+| 1.2756        | 2.3315 | 1590 | 1.4552          | 0.9521 | 0.2875   | 0.6693   | 0.6523    |
+| 1.2847        | 2.3755 | 1620 | 1.3366          | 0.8992 | 0.3456   | 0.6706   | 0.6490    |
+| 0.9743        | 2.4195 | 1650 | 1.3276          | 0.8922 | 0.3500   | 0.6680   | 0.6471    |
+| 1.2146        | 2.4635 | 1680 | 1.2658          | 0.8572 | 0.3802   | 0.6711   | 0.6509    |
+| 1.0538        | 2.5075 | 1710 | 1.2478          | 0.8455 | 0.3890   | 0.6710   | 0.6516    |
+| 0.7843        | 2.5515 | 1740 | 1.3749          | 0.9140 | 0.3268   | 0.6665   | 0.6479    |
+| 1.9038        | 2.5955 | 1770 | 1.2816          | 0.8703 | 0.3725   | 0.6678   | 0.6488    |
+| 2.0116        | 2.6395 | 1800 | 1.2002          | 0.8211 | 0.4124   | 0.6693   | 0.6500    |
+| 1.5984        | 2.6835 | 1830 | 1.3324          | 0.8956 | 0.3476   | 0.6699   | 0.6503    |
+| 0.7943        | 2.7275 | 1860 | 1.2132          | 0.8308 | 0.4060   | 0.6691   | 0.6477    |
+| 0.544         | 2.7715 | 1890 | 1.3286          | 0.8932 | 0.3495   | 0.6699   | 0.6497    |
+| 1.9314        | 2.8155 | 1920 | 1.3196          | 0.8859 | 0.3539   | 0.6688   | 0.6490    |
+| 0.5876        | 2.8596 | 1950 | 1.2934          | 0.8711 | 0.3667   | 0.6688   | 0.6488    |
+| 0.5792        | 2.9036 | 1980 | 1.2532          | 0.8488 | 0.3864   | 0.6697   | 0.6494    |
+| 0.8113        | 2.9476 | 2010 | 1.2244          | 0.8336 | 0.4005   | 0.6705   | 0.6496    |
+| 0.8149        | 2.9916 | 2040 | 1.2827          | 0.8662 | 0.3720   | 0.6698   | 0.6502    |
+### Framework versions
+- Transformers 4.49.0
+- Pytorch 2.6.0+cu124
+- Datasets 3.3.2
+- Tokenizers 0.21.0

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_name_or_path": "sbintuitions/sarashina2.1-1b",
+  "architectures": [
+    "LlamaForSequenceClassification"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 112,
+  "hidden_act": "silu",
+  "hidden_size": 1792,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 6272,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 8,
+  "pad_token_id": 3,
+  "pretraining_tp": 1,
+  "problem_type": "regression",
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0",
+  "use_cache": true,
+  "vocab_size": 102400
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44123612045086f9a8a4afa63c8db6b86870b24b37164b00cef02f22c422bae8
+size 2448105072

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<cls>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "<sep>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:008293028e1a9d9a1038d9b63d989a2319797dfeaa03f171093a57b33a3a8277
+size 1831879

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,171 @@

+{
+  "add_bos_token": false,
+  "add_dummy_prefix_space": false,
+  "add_eos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<sep>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<cls>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "8": {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "9": {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "10": {
+      "content": "<|available_tools|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "11": {
+      "content": "<|tool_calls|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "12": {
+      "content": "<|tool_results|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "13": {
+      "content": "<|code|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "14": {
+      "content": "<|file|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "102397": {
+      "content": "<|prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "102398": {
+      "content": "<|suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "102399": {
+      "content": "<|middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<cls>",
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_ids": 0,
+  "extra_special_tokens": {},
+  "keep_accents": true,
+  "legacy": false,
+  "mask_token": "<mask>",
+  "model_max_length": 8192,
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "sep_token": "<sep>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2025e822a93d31255510dc63d53d95c7588c132c1cb5959db0016213b84edaf6
+size 5368