Kinsleykinsley commited on
Commit
ea7d115
·
verified ·
1 Parent(s): 13dcd4d

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</SEQ>": 34,
3
+ "</SMILES>": 32,
4
+ "<SEQ>": 33,
5
+ "<SMILES>": 31,
6
+ "<|endoftext|>": 30,
7
+ "[PAD]": 35
8
+ }
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "ProGenForCausalLM"
5
+ ],
6
+ "attn_pdrop": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "hugohrban/progen2-small--configuration_progen.ProGenConfig",
9
+ "AutoModelForCausalLM": "hugohrban/progen2-small--modeling_progen.ProGenForCausalLM"
10
+ },
11
+ "bos_token_id": 1,
12
+ "embd_pdrop": 0.0,
13
+ "embed_dim": 1024,
14
+ "eos_token_id": 2,
15
+ "gradient_checkpointing": false,
16
+ "initializer_range": 0.02,
17
+ "layer_norm_epsilon": 1e-05,
18
+ "model_type": "progen",
19
+ "n_embd": 1024,
20
+ "n_head": 16,
21
+ "n_inner": null,
22
+ "n_layer": 12,
23
+ "n_positions": 1024,
24
+ "resid_pdrop": 0.0,
25
+ "rotary_dim": 32,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "task_specific_params": {
33
+ "text-generation": {
34
+ "do_sample": true,
35
+ "max_length": 50,
36
+ "temperature": 1.0
37
+ }
38
+ },
39
+ "tokenizer_class": "GPT2Tokenizer",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.51.3",
42
+ "use_cache": true,
43
+ "vocab_size": 32,
44
+ "vocab_size_emb": 32,
45
+ "vocab_size_lm_head": 32
46
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.51.3"
6
+ }
merges.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ #version: 0.2
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391873f7bd3f2fd90e1b32839b1ff568cf27ea009815c8ba2d3c4c1c376bcd0d
3
+ size 604510743
special_tokens_map.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<SMILES>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</SMILES>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<SEQ>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</SEQ>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ ],
32
+ "bos_token": "<|endoftext|>",
33
+ "eos_token": "<|endoftext|>",
34
+ "pad_token": {
35
+ "content": "[PAD]",
36
+ "lstrip": false,
37
+ "normalized": false,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ },
41
+ "unk_token": "<|endoftext|>"
42
+ }
tokenizer.json ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 512
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 35,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
+ "added_tokens": [
20
+ {
21
+ "id": 0,
22
+ "content": "<|pad|>",
23
+ "single_word": false,
24
+ "lstrip": false,
25
+ "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
+ },
29
+ {
30
+ "id": 1,
31
+ "content": "<|bos|>",
32
+ "single_word": false,
33
+ "lstrip": false,
34
+ "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
+ },
38
+ {
39
+ "id": 2,
40
+ "content": "<|eos|>",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
+ },
47
+ {
48
+ "id": 30,
49
+ "content": "<|endoftext|>",
50
+ "single_word": false,
51
+ "lstrip": false,
52
+ "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
+ },
56
+ {
57
+ "id": 31,
58
+ "content": "<SMILES>",
59
+ "single_word": false,
60
+ "lstrip": false,
61
+ "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
+ },
65
+ {
66
+ "id": 32,
67
+ "content": "</SMILES>",
68
+ "single_word": false,
69
+ "lstrip": false,
70
+ "rstrip": false,
71
+ "normalized": false,
72
+ "special": true
73
+ },
74
+ {
75
+ "id": 33,
76
+ "content": "<SEQ>",
77
+ "single_word": false,
78
+ "lstrip": false,
79
+ "rstrip": false,
80
+ "normalized": false,
81
+ "special": true
82
+ },
83
+ {
84
+ "id": 34,
85
+ "content": "</SEQ>",
86
+ "single_word": false,
87
+ "lstrip": false,
88
+ "rstrip": false,
89
+ "normalized": false,
90
+ "special": true
91
+ },
92
+ {
93
+ "id": 35,
94
+ "content": "[PAD]",
95
+ "single_word": false,
96
+ "lstrip": false,
97
+ "rstrip": false,
98
+ "normalized": false,
99
+ "special": true
100
+ }
101
+ ],
102
+ "normalizer": null,
103
+ "pre_tokenizer": {
104
+ "type": "ByteLevel",
105
+ "add_prefix_space": false,
106
+ "trim_offsets": true,
107
+ "use_regex": true
108
+ },
109
+ "post_processor": {
110
+ "type": "ByteLevel",
111
+ "add_prefix_space": true,
112
+ "trim_offsets": true,
113
+ "use_regex": true
114
+ },
115
+ "decoder": {
116
+ "type": "ByteLevel",
117
+ "add_prefix_space": true,
118
+ "trim_offsets": true,
119
+ "use_regex": true
120
+ },
121
+ "model": {
122
+ "type": "BPE",
123
+ "dropout": null,
124
+ "unk_token": null,
125
+ "continuing_subword_prefix": null,
126
+ "end_of_word_suffix": null,
127
+ "fuse_unk": false,
128
+ "byte_fallback": false,
129
+ "ignore_merges": false,
130
+ "vocab": {
131
+ "<|pad|>": 0,
132
+ "<|bos|>": 1,
133
+ "<|eos|>": 2,
134
+ "1": 3,
135
+ "2": 4,
136
+ "A": 5,
137
+ "B": 6,
138
+ "C": 7,
139
+ "D": 8,
140
+ "E": 9,
141
+ "F": 10,
142
+ "G": 11,
143
+ "H": 12,
144
+ "I": 13,
145
+ "K": 14,
146
+ "L": 15,
147
+ "M": 16,
148
+ "N": 17,
149
+ "O": 18,
150
+ "P": 19,
151
+ "Q": 20,
152
+ "R": 21,
153
+ "S": 22,
154
+ "T": 23,
155
+ "U": 24,
156
+ "V": 25,
157
+ "W": 26,
158
+ "X": 27,
159
+ "Y": 28,
160
+ "Z": 29
161
+ },
162
+ "merges": []
163
+ }
164
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|pad|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|bos|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<|eos|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "30": {
29
+ "content": "<|endoftext|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "31": {
37
+ "content": "<SMILES>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "32": {
45
+ "content": "</SMILES>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "33": {
53
+ "content": "<SEQ>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "34": {
61
+ "content": "</SEQ>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "35": {
69
+ "content": "[PAD]",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ }
76
+ },
77
+ "additional_special_tokens": [
78
+ "<SMILES>",
79
+ "</SMILES>",
80
+ "<SEQ>",
81
+ "</SEQ>"
82
+ ],
83
+ "bos_token": "<|endoftext|>",
84
+ "clean_up_tokenization_spaces": false,
85
+ "eos_token": "<|endoftext|>",
86
+ "extra_special_tokens": {},
87
+ "model_max_length": 1000000000000000019884624838656,
88
+ "pad_token": "[PAD]",
89
+ "tokenizer_class": "GPT2Tokenizer",
90
+ "unk_token": "<|endoftext|>"
91
+ }
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<|pad|>":0,"<|bos|>":1,"<|eos|>":2,"1":3,"2":4,"A":5,"B":6,"C":7,"D":8,"E":9,"F":10,"G":11,"H":12,"I":13,"K":14,"L":15,"M":16,"N":17,"O":18,"P":19,"Q":20,"R":21,"S":22,"T":23,"U":24,"V":25,"W":26,"X":27,"Y":28,"Z":29}