damilojohn commited on
Commit
0079cb1
·
verified ·
1 Parent(s): e1c60b9

Upload 2 files

Browse files
Files changed (2) hide show
  1. configuration_afrolid.py +79 -0
  2. modelling_afrolid.py +0 -0
configuration_afrolid.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+ from dataclasses import dataclass, asdict
3
+
4
+ # @dataclass
5
+ # class QuantNoiseConfig:
6
+ # pq: float = 0.0
7
+ # pq_block_size: int = 8
8
+
9
+ @dataclass
10
+ class QuantNoiseConfig:
11
+ pq: float = 0.0
12
+ pq_block_size: int = 8
13
+
14
+ def to_dict(self):
15
+ return asdict(self)
16
+
17
+ @classmethod
18
+ def from_dict(cls, data):
19
+ return cls(**data)
20
+
21
+
22
+ repo_name = "damilojohn/AfroLid"
23
+
24
+
25
+ class AfroLidConfig(PretrainedConfig):
26
+ model_type = "afrolid"
27
+
28
+ def __init__(self,
29
+ encoder_vocab_size=64001,
30
+ decoder_vocab_size=528,
31
+ embed_dim=768,
32
+ ffn_dim=3072,
33
+ num_heads=12,
34
+ num_layers=12,
35
+ max_seq_len=512,
36
+ dropout=0.1,
37
+ attention_dropout=0.1,
38
+ activation_dropout=0.0,
39
+ layerdrop=0.0,
40
+ normalize_before=False,
41
+ learned_pos=False,
42
+ max_source_positions=1024,
43
+ max_target_positions=1024,
44
+ no_token_positional_embeddings=False,
45
+ share_decoder_input_output_embed=True,
46
+ share_all_embeddings=False,
47
+ layernorm_embedding=False,
48
+ checkpoint_activations=False,
49
+ offload_activations=False,
50
+ bias=False,
51
+ **kwargs):
52
+ """
53
+ AfroLid configuration class for an encoder-decoder transformer model,
54
+ with support for QuantNoiseConfig.
55
+ """
56
+ self.encoder_vocab_size = encoder_vocab_size
57
+ self.decoder_vocab_size = decoder_vocab_size
58
+ self.embed_dim = embed_dim
59
+ self.ffn_dim = ffn_dim
60
+ self.num_heads = num_heads
61
+ self.num_layers = num_layers
62
+ self.max_seq_len = max_seq_len
63
+ self.dropout = dropout
64
+ self.attention_dropout = attention_dropout
65
+ self.activation_dropout = activation_dropout
66
+ self.layerdrop = layerdrop
67
+ self.normalize_before = normalize_before
68
+ self.learned_pos = learned_pos
69
+ self.max_source_positions = max_source_positions
70
+ self.max_target_positions = max_target_positions
71
+ self.no_token_positional_embeddings = no_token_positional_embeddings
72
+ self.share_decoder_input_output_embed = share_decoder_input_output_embed
73
+ self.share_all_embeddings = share_all_embeddings
74
+ self.layernorm_embedding = layernorm_embedding
75
+ self.checkpoint_activations = checkpoint_activations
76
+ self.offload_activations = offload_activations
77
+ self.bias = bias
78
+
79
+ super().__init__(**kwargs)
modelling_afrolid.py ADDED
The diff for this file is too large to render. See raw diff