SinaAhmadi commited on
Commit
108149f
1 Parent(s): 921cd24

Mazandarani-Persian models and vocab files

Browse files
models/Mazandarani-Persian/best.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e91f2510b484e2a254762609c062c8a6fdcf736caf99a612e3bf9512938852e
3
+ size 39254858
models/Mazandarani-Persian/config.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Mazanderani-Persian_1"
2
+
3
+ data:
4
+ train: "datasets/Mazanderani-Persian/1/train"
5
+ dev: "datasets/Mazanderani-Persian/1/dev"
6
+ test: "datasets/Mazanderani-Persian/1/test"
7
+ level: "char"
8
+ lowercase: False
9
+ normalize: False
10
+ max_sent_length: 100
11
+ dataset_type: "plain"
12
+
13
+ src:
14
+ lang: "src"
15
+ voc_limit: 100
16
+ voc_min_freq: 5
17
+ level: "char"
18
+ trg:
19
+ lang: "trg"
20
+ voc_limit: 100
21
+ voc_min_freq: 5
22
+ level: "char"
23
+
24
+ training:
25
+ random_seed: 42
26
+ optimizer: "adam"
27
+ learning_rate: 0.001
28
+ learning_rate_min: 0.0002
29
+ weight_decay: 0.0
30
+ clip_grad_norm: 1.0
31
+ batch_size: 64
32
+ scheduling: "plateau"
33
+ patience: 10
34
+ decrease_factor: 0.5
35
+ early_stopping_metric: "loss"
36
+ epochs: 80
37
+ validation_freq: 1000
38
+ logging_freq: 100
39
+ eval_metric: "bleu"
40
+ model_dir: "models/Mazanderani-Persian"
41
+ overwrite: True
42
+ shuffle: True
43
+ use_cuda: True
44
+ max_output_length: 100
45
+ print_valid_sents: [0, 3, 6, 9]
46
+ keep_best_ckpts: -1
47
+
48
+ testing:
49
+ n_best: 1
50
+ beam_size: 4
51
+ beam_alpha: 1.0
52
+ eval_metrics: ["bleu", "chrf", "sequence_accuracy"]
53
+ max_output_length: 50
54
+ batch_size: 10
55
+ batch_type: "sentence"
56
+ return_prob: "none"
57
+
58
+ model:
59
+ initializer: "xavier_uniform"
60
+ init_gain: 1.0
61
+ bias_initializer: "zeros"
62
+ embed_initializer: "xavier_uniform"
63
+ embed_init_gain: 1.0
64
+ encoder:
65
+ type: "transformer"
66
+ num_layers: 6
67
+ num_heads: 8
68
+ embeddings:
69
+ embedding_dim: 128
70
+ scale: True
71
+ # typically ff_size = 4 x hidden_size
72
+ hidden_size: 128
73
+ ff_size: 512
74
+ dropout: 0.2
75
+ layer_norm: "pre"
76
+ decoder:
77
+ type: "transformer"
78
+ num_layers: 6
79
+ num_heads: 8
80
+ embeddings:
81
+ embedding_dim: 128
82
+ scale: True
83
+ # typically ff_size = 4 x hidden_size
84
+ hidden_size: 128
85
+ ff_size: 512
86
+ dropout: 0.2
87
+ layer_norm: "pre"
models/Mazandarani-Persian/src_vocab.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <unk>
2
+ <pad>
3
+ <s>
4
+ </s>
5
+
6
+ ا
7
+ ه
8
+ ی
9
+ ن
10
+ و
11
+ ر
12
+ ت
13
+ م
14
+ د
15
+ س
16
+ ل
17
+ ب
18
+ ک
19
+ ش
20
+ ع
21
+ ج
22
+ ز
23
+ گ
24
+ ق
25
+ ف
26
+ ِ
27
+
28
+ 1
29
+ .
30
+ پ
31
+ خ
32
+ 0
33
+ ،
34
+ ئ
35
+ 2
36
+ ط
37
+ ح
38
+ آ
39
+ 9
40
+ 3
41
+ 8
42
+ 5
43
+ ّ
44
+ 4
45
+ 6
46
+ 7
47
+ َ
48
+ چ
49
+ ص
50
+ غ
51
+ ٬
52
+ ژ
53
+ ض
54
+ ٫
55
+ ث
56
+ ي
57
+ ذ
58
+ ظ
59
+ :
60
+ ً
61
+ أ
62
+ ء
63
+ ٔ
64
+ ُ
65
+ =
66
+ ك
67
+ ,
68
+
69
+ ؛
70
+
71
+ ؤ
72
+ ە
73
+ а
74
+ и
75
+ ْ
76
+ с
77
+ о
78
+ р
79
+ ة
80
+ е
81
+ ؟
82
+ к
83
+ ۀ
84
+ л
85
+
86
+
87
+ ;
88
+ ա
89
+ н
90
+ إ
91
+ в
92
+ т
93
+
94
+ я
95
+ К
96
+ з
97
+ ы
98
+ !
99
+ ́
100
+ у
101
+ ր
102
+
103
+ %
104
+ _
models/Mazandarani-Persian/trg_vocab.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <unk>
2
+ <pad>
3
+ <s>
4
+ </s>
5
+
6
+ ا
7
+ ه
8
+ ی
9
+ ن
10
+ و
11
+ ر
12
+ ت
13
+ م
14
+ د
15
+ س
16
+ ل
17
+ ب
18
+ ک
19
+ ش
20
+ ع
21
+ ج
22
+ ز
23
+ گ
24
+ ق
25
+ ف
26
+ ِ
27
+
28
+ 1
29
+ .
30
+ پ
31
+ خ
32
+ 0
33
+ ،
34
+ ئ
35
+ 2
36
+ ط
37
+ ح
38
+ آ
39
+ 9
40
+ 3
41
+ 8
42
+ 5
43
+ ّ
44
+ 4
45
+ 6
46
+ 7
47
+ َ
48
+ چ
49
+ ص
50
+ غ
51
+ ٬
52
+ ژ
53
+ ض
54
+ ٫
55
+ ث
56
+ ي
57
+ ذ
58
+ ظ
59
+ :
60
+ ً
61
+ أ
62
+ ء
63
+ ٔ
64
+ ُ
65
+ =
66
+ ك
67
+ ,
68
+
69
+ ؛
70
+ ؤ
71
+ ە
72
+ а
73
+ и
74
+ ْ
75
+ с
76
+ о
77
+ р
78
+ ة
79
+ ˇ
80
+ е
81
+ ؟
82
+ к
83
+ ۀ
84
+ л
85
+
86
+ ;
87
+ ա
88
+ н
89
+ إ
90
+ в
91
+ т
92
+
93
+ я
94
+ К
95
+ з
96
+ ы
97
+ !
98
+ ́
99
+ у
100
+ ր
101
+
102
+ %
103
+ _
104
+ б