Upload 2 files
Browse files- configuration_afrolid.py +79 -0
- modelling_afrolid.py +0 -0
configuration_afrolid.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig
|
2 |
+
from dataclasses import dataclass, asdict
|
3 |
+
|
4 |
+
# @dataclass
|
5 |
+
# class QuantNoiseConfig:
|
6 |
+
# pq: float = 0.0
|
7 |
+
# pq_block_size: int = 8
|
8 |
+
|
9 |
+
@dataclass
|
10 |
+
class QuantNoiseConfig:
|
11 |
+
pq: float = 0.0
|
12 |
+
pq_block_size: int = 8
|
13 |
+
|
14 |
+
def to_dict(self):
|
15 |
+
return asdict(self)
|
16 |
+
|
17 |
+
@classmethod
|
18 |
+
def from_dict(cls, data):
|
19 |
+
return cls(**data)
|
20 |
+
|
21 |
+
|
22 |
+
repo_name = "damilojohn/AfroLid"
|
23 |
+
|
24 |
+
|
25 |
+
class AfroLidConfig(PretrainedConfig):
|
26 |
+
model_type = "afrolid"
|
27 |
+
|
28 |
+
def __init__(self,
|
29 |
+
encoder_vocab_size=64001,
|
30 |
+
decoder_vocab_size=528,
|
31 |
+
embed_dim=768,
|
32 |
+
ffn_dim=3072,
|
33 |
+
num_heads=12,
|
34 |
+
num_layers=12,
|
35 |
+
max_seq_len=512,
|
36 |
+
dropout=0.1,
|
37 |
+
attention_dropout=0.1,
|
38 |
+
activation_dropout=0.0,
|
39 |
+
layerdrop=0.0,
|
40 |
+
normalize_before=False,
|
41 |
+
learned_pos=False,
|
42 |
+
max_source_positions=1024,
|
43 |
+
max_target_positions=1024,
|
44 |
+
no_token_positional_embeddings=False,
|
45 |
+
share_decoder_input_output_embed=True,
|
46 |
+
share_all_embeddings=False,
|
47 |
+
layernorm_embedding=False,
|
48 |
+
checkpoint_activations=False,
|
49 |
+
offload_activations=False,
|
50 |
+
bias=False,
|
51 |
+
**kwargs):
|
52 |
+
"""
|
53 |
+
AfroLid configuration class for an encoder-decoder transformer model,
|
54 |
+
with support for QuantNoiseConfig.
|
55 |
+
"""
|
56 |
+
self.encoder_vocab_size = encoder_vocab_size
|
57 |
+
self.decoder_vocab_size = decoder_vocab_size
|
58 |
+
self.embed_dim = embed_dim
|
59 |
+
self.ffn_dim = ffn_dim
|
60 |
+
self.num_heads = num_heads
|
61 |
+
self.num_layers = num_layers
|
62 |
+
self.max_seq_len = max_seq_len
|
63 |
+
self.dropout = dropout
|
64 |
+
self.attention_dropout = attention_dropout
|
65 |
+
self.activation_dropout = activation_dropout
|
66 |
+
self.layerdrop = layerdrop
|
67 |
+
self.normalize_before = normalize_before
|
68 |
+
self.learned_pos = learned_pos
|
69 |
+
self.max_source_positions = max_source_positions
|
70 |
+
self.max_target_positions = max_target_positions
|
71 |
+
self.no_token_positional_embeddings = no_token_positional_embeddings
|
72 |
+
self.share_decoder_input_output_embed = share_decoder_input_output_embed
|
73 |
+
self.share_all_embeddings = share_all_embeddings
|
74 |
+
self.layernorm_embedding = layernorm_embedding
|
75 |
+
self.checkpoint_activations = checkpoint_activations
|
76 |
+
self.offload_activations = offload_activations
|
77 |
+
self.bias = bias
|
78 |
+
|
79 |
+
super().__init__(**kwargs)
|
modelling_afrolid.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|