Update inference.py
Browse files- inference.py +19 -8
inference.py
CHANGED
@@ -1,10 +1,21 @@
|
|
|
|
1 |
from optimum.intel import OVModelForCausalLM
|
2 |
-
from transformers import AutoTokenizer, pipeline
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer
|
2 |
from optimum.intel import OVModelForCausalLM
|
|
|
3 |
|
4 |
+
model_path = "helenai/ibm-granite-granite-8b-code-instruct-ov"
|
5 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
6 |
+
model = OVModelForCausalLM.from_pretrained(model_path)
|
7 |
+
|
8 |
+
# change input text as desired
|
9 |
+
chat = [
|
10 |
+
{ "role": "user", "content": "Write a code to find the maximum value in a list of numbers." },
|
11 |
+
]
|
12 |
+
chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
13 |
+
# tokenize the text
|
14 |
+
input_tokens = tokenizer(chat, return_tensors="pt")
|
15 |
+
# generate output tokens
|
16 |
+
output = model.generate(**input_tokens, max_new_tokens=100)
|
17 |
+
# decode output tokens into text
|
18 |
+
output = tokenizer.batch_decode(output)
|
19 |
+
# loop over the batch to print, in this example the batch size is 1
|
20 |
+
for i in output:
|
21 |
+
print(i)
|