Add vLLM serve
Browse files
README.md
CHANGED
@@ -53,9 +53,12 @@ tokenizer = AutoTokenizer.from_pretrained("ilsp/Llama-Krikri-8B-Instruct")
|
|
53 |
|
54 |
model.to(device)
|
55 |
|
|
|
|
|
|
|
56 |
messages = [
|
57 |
-
{"role": "system", "content":
|
58 |
-
{"role": "user", "content":
|
59 |
]
|
60 |
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
|
61 |
input_prompt = tokenizer(prompt, return_tensors='pt').to(device)
|
@@ -64,6 +67,39 @@ outputs = model.generate(input_prompt['input_ids'], max_new_tokens=256, do_sampl
|
|
64 |
print(tokenizer.batch_decode(outputs)[0])
|
65 |
```
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
# Evaluation
|
69 |
|
|
|
53 |
|
54 |
model.to(device)
|
55 |
|
56 |
+
system_prompt = "Είσαι το Κρικρί, ένα εξαιρετικά ανεπτυγμένο μοντέλο Τεχνητής Νοημοσύνης για τα ελληνικα και εκπαιδεύτηκες από το ΙΕΛ του Ε.Κ. \"Αθηνά\"."
|
57 |
+
user_prompt = "Σε τι διαφέρει ένα κρικρί από ένα λάμα;"
|
58 |
+
|
59 |
messages = [
|
60 |
+
{"role": "system", "content": system_prompt},
|
61 |
+
{"role": "user", "content": user_prompt},
|
62 |
]
|
63 |
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
|
64 |
input_prompt = tokenizer(prompt, return_tensors='pt').to(device)
|
|
|
67 |
print(tokenizer.batch_decode(outputs)[0])
|
68 |
```
|
69 |
|
70 |
+
# How to serve with OpenAI compatible server via vLLM
|
71 |
+
|
72 |
+
```bash
|
73 |
+
vllm serve ilsp/Llama-Krikri-8B-Instruct \
|
74 |
+
--enforce-eager \
|
75 |
+
--dtype 'bfloat16' \
|
76 |
+
--api-key token-abc123
|
77 |
+
```
|
78 |
+
|
79 |
+
Then, the model can be used through Python using:
|
80 |
+
```python
|
81 |
+
from openai import OpenAI
|
82 |
+
|
83 |
+
api_key = "token-abc123"
|
84 |
+
base_url = "http://localhost:8000/v1"
|
85 |
+
|
86 |
+
client = OpenAI(
|
87 |
+
api_key=api_key,
|
88 |
+
base_url=base_url,
|
89 |
+
)
|
90 |
+
|
91 |
+
system_prompt = "Είσαι ένα ανεπτυγμένο μεταφραστικό σύστημα που απαντάει απευθείας με λίστες Python."
|
92 |
+
user_prompt = "Δώσε μου την παρακάτω λίστα με μεταφρασμένο κάθε string της στα ελληνικά: ['Ethics of duty', 'Postmodern ethics', 'Consequentialist ethics', 'Utilitarian ethics', 'Deontological ethics', 'Virtue ethics', 'Relativist ethics']"
|
93 |
+
|
94 |
+
messages = [
|
95 |
+
{"role": "system", "content": system_prompt},
|
96 |
+
{"role": "user", "content": user_prompt},
|
97 |
+
]
|
98 |
+
|
99 |
+
response = client.chat.completions.create(model="ilsp/Llama-Krikri-8B-Instruct",
|
100 |
+
messages=messages)
|
101 |
+
print(response.choices[0].message.content)
|
102 |
+
```
|
103 |
|
104 |
# Evaluation
|
105 |
|