karrrr123456 commited on
Commit
1952869
·
verified ·
1 Parent(s): f460a06

Upload 9 files

Browse files
README.md CHANGED
@@ -1,16 +1,3 @@
1
- ---
2
- license: mit
3
- datasets:
4
- - EleutherAI/pile
5
- language:
6
- - en
7
- metrics:
8
- - accuracy
9
- base_model:
10
- - allenai/olmOCR-7B-0225-preview
11
- new_version: allenai/olmOCR-7B-0225-preview
12
- pipeline_tag: text-generation
13
- library_name: adapter-transformers
14
- tags:
15
- - text-generation-inference
16
- ---
 
1
+ # Basic AI Model
2
+
3
+ This project includes essential components for an AI model without any data.
 
 
 
 
 
 
 
 
 
 
 
 
 
model/config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "model_type": "custom_ai",
3
+ "num_layers": 12,
4
+ "hidden_size": 768
5
+ }
model/model.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Placeholder script for defining the model architecture
tokenizer/merges.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ t h
2
+ e _
3
+ a _
4
+ i s
5
+ s a
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>",
5
+ "pad_token": "<pad>",
6
+ "mask_token": "<mask>"
7
+ }
tokenizer/tokenizer.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Placeholder script for initializing and using a tokenizer
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "tokenizer_type": "BPE",
3
+ "vocab_size": 30000
4
+ }
tokenizer/vocab.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "the": 0,
3
+ "a": 1,
4
+ "is": 2,
5
+ "sample": 3,
6
+ "text": 4
7
+ }
utils/helpers.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Placeholder for utility functions