Training in progress, epoch 1
Browse files- config.json +6 -6
- model.safetensors +2 -2
- runs/Jan13_01-01-54_46046415ec16/events.out.tfevents.1736730115.46046415ec16.261.0 +3 -0
- tokenizer.json +41 -2
- tokenizer_config.json +1 -1
- training_args.bin +3 -0
config.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"DeepseekV3ForCausalLM"
|
5 |
],
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
8 |
"auto_map": {
|
9 |
-
"AutoConfig": "configuration_deepseek.DeepseekV3Config",
|
10 |
-
"AutoModel": "modeling_deepseek.DeepseekV3Model",
|
11 |
-
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
|
12 |
},
|
13 |
"aux_loss_alpha": 0.001,
|
14 |
"bos_token_id": 0,
|
@@ -54,8 +54,8 @@
|
|
54 |
"tie_word_embeddings": false,
|
55 |
"topk_group": 4,
|
56 |
"topk_method": "noaux_tc",
|
57 |
-
"torch_dtype": "
|
58 |
-
"transformers_version": "4.
|
59 |
"use_cache": true,
|
60 |
"v_head_dim": 128,
|
61 |
"vocab_size": 129280
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "tonyshark/deepseek-v3-1b",
|
3 |
"architectures": [
|
4 |
"DeepseekV3ForCausalLM"
|
5 |
],
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
8 |
"auto_map": {
|
9 |
+
"AutoConfig": "tonyshark/deepseek-v3-1b--configuration_deepseek.DeepseekV3Config",
|
10 |
+
"AutoModel": "tonyshark/deepseek-v3-1b--modeling_deepseek.DeepseekV3Model",
|
11 |
+
"AutoModelForCausalLM": "tonyshark/deepseek-v3-1b--modeling_deepseek.DeepseekV3ForCausalLM"
|
12 |
},
|
13 |
"aux_loss_alpha": 0.001,
|
14 |
"bos_token_id": 0,
|
|
|
54 |
"tie_word_embeddings": false,
|
55 |
"topk_group": 4,
|
56 |
"topk_method": "noaux_tc",
|
57 |
+
"torch_dtype": "float32",
|
58 |
+
"transformers_version": "4.46.1",
|
59 |
"use_cache": true,
|
60 |
"v_head_dim": 128,
|
61 |
"vocab_size": 129280
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4d0893b4a6f7ad8282fe4255f087a31b6c0b1f6d95b956cb1d85a85e11fa7e3
|
3 |
+
size 4198331024
|
runs/Jan13_01-01-54_46046415ec16/events.out.tfevents.1736730115.46046415ec16.261.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:906a12664fe166fc82b3a97f8ae7993546d54364ee2ab0b60419d4bdc75d9b7b
|
3 |
+
size 6719
|
tokenizer.json
CHANGED
@@ -1,7 +1,19 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"added_tokens": [
|
6 |
{
|
7 |
"id": 0,
|
@@ -7419,6 +7431,12 @@
|
|
7419 |
"id": "A",
|
7420 |
"type_id": 0
|
7421 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
7422 |
}
|
7423 |
],
|
7424 |
"pair": [
|
@@ -7434,6 +7452,12 @@
|
|
7434 |
"type_id": 0
|
7435 |
}
|
7436 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
7437 |
{
|
7438 |
"SpecialToken": {
|
7439 |
"id": "<|begin▁of▁sentence|>",
|
@@ -7445,6 +7469,12 @@
|
|
7445 |
"id": "B",
|
7446 |
"type_id": 1
|
7447 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
7448 |
}
|
7449 |
],
|
7450 |
"special_tokens": {
|
@@ -7456,6 +7486,15 @@
|
|
7456 |
"tokens": [
|
7457 |
"<|begin▁of▁sentence|>"
|
7458 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7459 |
}
|
7460 |
}
|
7461 |
},
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": {
|
4 |
+
"direction": "Right",
|
5 |
+
"max_length": 128,
|
6 |
+
"strategy": "LongestFirst",
|
7 |
+
"stride": 0
|
8 |
+
},
|
9 |
+
"padding": {
|
10 |
+
"strategy": "BatchLongest",
|
11 |
+
"direction": "Left",
|
12 |
+
"pad_to_multiple_of": null,
|
13 |
+
"pad_id": 1,
|
14 |
+
"pad_type_id": 0,
|
15 |
+
"pad_token": "<|end▁of▁sentence|>"
|
16 |
+
},
|
17 |
"added_tokens": [
|
18 |
{
|
19 |
"id": 0,
|
|
|
7431 |
"id": "A",
|
7432 |
"type_id": 0
|
7433 |
}
|
7434 |
+
},
|
7435 |
+
{
|
7436 |
+
"SpecialToken": {
|
7437 |
+
"id": "<|end▁of▁sentence|>",
|
7438 |
+
"type_id": 0
|
7439 |
+
}
|
7440 |
}
|
7441 |
],
|
7442 |
"pair": [
|
|
|
7452 |
"type_id": 0
|
7453 |
}
|
7454 |
},
|
7455 |
+
{
|
7456 |
+
"SpecialToken": {
|
7457 |
+
"id": "<|end▁of▁sentence|>",
|
7458 |
+
"type_id": 0
|
7459 |
+
}
|
7460 |
+
},
|
7461 |
{
|
7462 |
"SpecialToken": {
|
7463 |
"id": "<|begin▁of▁sentence|>",
|
|
|
7469 |
"id": "B",
|
7470 |
"type_id": 1
|
7471 |
}
|
7472 |
+
},
|
7473 |
+
{
|
7474 |
+
"SpecialToken": {
|
7475 |
+
"id": "<|end▁of▁sentence|>",
|
7476 |
+
"type_id": 1
|
7477 |
+
}
|
7478 |
}
|
7479 |
],
|
7480 |
"special_tokens": {
|
|
|
7486 |
"tokens": [
|
7487 |
"<|begin▁of▁sentence|>"
|
7488 |
]
|
7489 |
+
},
|
7490 |
+
"<|end▁of▁sentence|>": {
|
7491 |
+
"id": "<|end▁of▁sentence|>",
|
7492 |
+
"ids": [
|
7493 |
+
1
|
7494 |
+
],
|
7495 |
+
"tokens": [
|
7496 |
+
"<|end▁of▁sentence|>"
|
7497 |
+
]
|
7498 |
}
|
7499 |
}
|
7500 |
},
|
tokenizer_config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
-
"add_eos_token":
|
4 |
"add_prefix_space": null,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
+
"add_eos_token": true,
|
4 |
"add_prefix_space": null,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64839e50b7edf20ea18785ae47789266b16e874589534193bc54617cdebab4c4
|
3 |
+
size 5560
|