added missing imports

- import statements were missing
- max_length parameter in generate function was set to lower value than the input tokens, which was producing error, set it to little high than input tokens (from 10 to 64 as 48 as input tokens).

Files changed (1) hide show

README.md +3 -1

README.md CHANGED Viewed

@@ -33,6 +33,8 @@ pip install git+https://github.com/huggingface/transformers.git@refs/pull/33410/
 ```
 And then load the model :
 ```python
 model = AutoModelForCausalLM.from_pretrained("HF1BitLLM/Llama3-8B-1.58-100B-tokens", device_map="cuda", torch_dtype=torch.bfloat16)
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
@@ -40,7 +42,7 @@ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
 input_text = "Daniel went back to the the the garden. Mary travelled to the kitchen. Sandra journeyed to the kitchen. Sandra went to the hallway. John went to the bedroom. Mary went back to the garden. Where is Mary?\nAnswer:"
 input_ids = tokenizer.encode(input_text, return_tensors="pt").cuda()
-output = model.generate(input_ids, max_length=10, do_sample=False)
 generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
 print(generated_text)
 ```

 ```
 And then load the model :
 ```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
 model = AutoModelForCausalLM.from_pretrained("HF1BitLLM/Llama3-8B-1.58-100B-tokens", device_map="cuda", torch_dtype=torch.bfloat16)
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
 input_text = "Daniel went back to the the the garden. Mary travelled to the kitchen. Sandra journeyed to the kitchen. Sandra went to the hallway. John went to the bedroom. Mary went back to the garden. Where is Mary?\nAnswer:"
 input_ids = tokenizer.encode(input_text, return_tensors="pt").cuda()
+output = model.generate(input_ids, max_length=64, do_sample=False)
 generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
 print(generated_text)
 ```