|
|
---
|
|
|
license: llama3
|
|
|
---
|
|
|
|
|
|
- Foundation Model [Bllossom 8B](https://huggingface.co/MLP-KTLim/llama-3-Korean-Bllossom-8B)
|
|
|
- datasets
|
|
|
- [Koalpaca v1.1a](https://huggingface.co/datasets/beomi/KoAlpaca-v1.1a)
|
|
|
- [jojo0217/korean_safe_conversation](https://huggingface.co/datasets/jojo0217/korean_safe_conversation)
|
|
|
|
|
|
# Query
|
|
|
```python
|
|
|
|
|
|
import torch
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
|
|
|
|
BASE_MODEL = "sh2orc/llama-3-korean-8b"
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,device_map="auto")
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
|
|
tokenizer.pad_token = tokenizer.eos_token
|
|
|
tokenizer.padding_side = 'right'
|
|
|
|
|
|
instruction = "νκ°μλ λκ΅κ° λͺ κ° μμ΄?"
|
|
|
|
|
|
pipe = pipeline("text-generation",
|
|
|
model=model,
|
|
|
tokenizer=tokenizer,
|
|
|
max_new_tokens=1024)
|
|
|
|
|
|
messages = [
|
|
|
{"role": "user", "content": instruction},
|
|
|
]
|
|
|
|
|
|
prompt = pipe.tokenizer.apply_chat_template(
|
|
|
messages,
|
|
|
tokenize=False,
|
|
|
add_generation_prompt=True
|
|
|
)
|
|
|
|
|
|
outputs = pipe(
|
|
|
prompt,
|
|
|
do_sample=True,
|
|
|
temperature=0.8,
|
|
|
top_k=10,
|
|
|
top_p=0.9,
|
|
|
add_special_tokens=True,
|
|
|
eos_token_id = [
|
|
|
pipe.tokenizer.eos_token_id,
|
|
|
pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
|
]
|
|
|
)
|
|
|
|
|
|
print(outputs[0]['generated_text'][len(prompt):])
|
|
|
```
|
|
|
|
|
|
# Result
|
|
|
<pre>
|
|
|
|
|
|
νκ°μλ μ΄ 8κ°μ λ€λ¦¬(κ΅)κ° μμ΅λλ€. κ·Έ μ€ 3κ°λ λΆμͺ½μΌλ‘ ν₯ν΄ μκ³ , λλ¨Έμ§ 5κ°λ λ¨μͺ½μΌλ‘ ν₯ν΄ μμ΅λλ€.
|
|
|
|
|
|
</pre>
|
|
|
|