jefson08 commited on
Commit
55dc389
1 Parent(s): e39d024

Training in progress, step 1500

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "indictrans-en-ne-checkpoint/checkpoint-732",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
@@ -12,18 +12,18 @@
12
  "AutoModelForSeq2SeqLM": "modeling_indictrans.IndicTransForConditionalGeneration"
13
  },
14
  "bos_token_id": 0,
15
- "decoder_attention_heads": 8,
16
- "decoder_embed_dim": 512,
17
- "decoder_ffn_dim": 2048,
18
  "decoder_layerdrop": 0,
19
  "decoder_layers": 18,
20
  "decoder_normalize_before": true,
21
  "decoder_start_token_id": 2,
22
  "decoder_vocab_size": 122672,
23
  "dropout": 0.2,
24
- "encoder_attention_heads": 8,
25
- "encoder_embed_dim": 512,
26
- "encoder_ffn_dim": 2048,
27
  "encoder_layerdrop": 0,
28
  "encoder_layers": 18,
29
  "encoder_normalize_before": true,
@@ -31,14 +31,14 @@
31
  "eos_token_id": 2,
32
  "init_std": 0.02,
33
  "is_encoder_decoder": true,
34
- "layernorm_embedding": true,
35
  "max_source_positions": 256,
36
  "max_target_positions": 256,
37
  "model_type": "IndicTrans",
38
  "num_hidden_layers": 18,
39
  "pad_token_id": 1,
40
  "scale_embedding": true,
41
- "share_decoder_input_output_embed": true,
42
  "tokenizer_class": "IndicTransTokenizer",
43
  "torch_dtype": "bfloat16",
44
  "transformers_version": "4.44.2",
 
1
  {
2
+ "_name_or_path": "indictrans2-en-indic-1B",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
 
12
  "AutoModelForSeq2SeqLM": "modeling_indictrans.IndicTransForConditionalGeneration"
13
  },
14
  "bos_token_id": 0,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_embed_dim": 1024,
17
+ "decoder_ffn_dim": 8192,
18
  "decoder_layerdrop": 0,
19
  "decoder_layers": 18,
20
  "decoder_normalize_before": true,
21
  "decoder_start_token_id": 2,
22
  "decoder_vocab_size": 122672,
23
  "dropout": 0.2,
24
+ "encoder_attention_heads": 16,
25
+ "encoder_embed_dim": 1024,
26
+ "encoder_ffn_dim": 8192,
27
  "encoder_layerdrop": 0,
28
  "encoder_layers": 18,
29
  "encoder_normalize_before": true,
 
31
  "eos_token_id": 2,
32
  "init_std": 0.02,
33
  "is_encoder_decoder": true,
34
+ "layernorm_embedding": false,
35
  "max_source_positions": 256,
36
  "max_target_positions": 256,
37
  "model_type": "IndicTrans",
38
  "num_hidden_layers": 18,
39
  "pad_token_id": 1,
40
  "scale_embedding": true,
41
+ "share_decoder_input_output_embed": false,
42
  "tokenizer_class": "IndicTransTokenizer",
43
  "torch_dtype": "bfloat16",
44
  "transformers_version": "4.44.2",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57ab9a212c34ac081c016977ec6a878b53636ae17a36dfd7aed8f52136c99d01
3
- size 549258952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b119a9e743ae5f25320dd5bc3973c2c8e39c3cb81d397aa2cab123b921254d8
3
+ size 2231178416
runs/Aug25_05-13-29_ip-10-192-11-185/events.out.tfevents.1724562810.ip-10-192-11-185.4259.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06697ddb9996c0d7a0c5d5c411f33e46f9b88cb8803300d20a254e47c079f15e
3
+ size 12024
runs/Aug25_05-20-42_ip-10-192-11-185/events.out.tfevents.1724563243.ip-10-192-11-185.12802.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5afb96f355dea401e97736077432f294404e472bf5d70d5ee2cfaeb54f3202e
3
+ size 5795
runs/Aug25_05-22-20_ip-10-192-11-185/events.out.tfevents.1724563340.ip-10-192-11-185.14365.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5321ce71e4fe574859a181a23a83ef0d96e80b18a58078adbb4bea12bb18cc2c
3
+ size 69268
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89410dd144daa94d81e9649aa908c9a7aa5b26014f77632c4e3dbbbf389351cf
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a46a0d317ad95b2ddbaf58e3b8f7b936e045b021e8cf9d726a1a02c71a8fcbc
3
  size 5368