File size: 420 Bytes
035761e
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer
# from datasets import load_dataset


# dataset = load_dataset("HuggingFaceTB/smollm-corpus", "cosmopedia-v2")

# use tokeniser https://huggingface.co/HuggingFaceTB/cosmo2-tokenizer
# tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer")

model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M")

print(model)