File size: 3,711 Bytes
6b8a81e 0bf1330 6b8a81e 0bf1330 6b8a81e 0bf1330 f0d4888 6b8a81e f0d4888 6b8a81e f0d4888 6b8a81e f0d4888 6b8a81e f0d4888 0bf1330 f0d4888 0bf1330 f0d4888 0bf1330 f0d4888 0bf1330 f0d4888 0bf1330 f0d4888 0bf1330 f0d4888 0bf1330 f0d4888 0bf1330 f0d4888 0bf1330 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
---
license: apache-2.0
base_model: Qwen/Qwen3-1.7B
tags:
- text-generation
- korean
- news
- event-extraction
- fine-tuned
- qwen
language:
- ko
library_name: transformers
pipeline_tag: text-generation
---
# Qwen3-1.7B News Event Extraction Model
์ด ๋ชจ๋ธ์ **Qwen/Qwen3-1.7B**๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ํ๊ตญ์ด ๋ด์ค ํ
์คํธ ๋ถ์ ๋ฐ ์ด๋ฒคํธ ์ถ์ถ์ ์ํด **LoRA** ๋ฐฉ์์ผ๋ก ํ์ธํ๋๋ ๋ชจ๋ธ์
๋๋ค.
## โจ ๋ชจ๋ธ ๊ธฐ๋ฅ
- **๋ด์ค ์นดํ
๊ณ ๋ฆฌ ๋ถ๋ฅ**: ๋ถ๋์ฐ, ์ฐ์
, ์คํผ๋์ธ, ์ฆ๊ถ ์นดํ
๊ณ ๋ฆฌ๋ก ๋ถ๋ฅ
- **ํต์ฌ ์ด๋ฒคํธ ์ถ์ถ**: ๋ด์ค ํ
์คํธ์์ ์ฃผ์ ์ด๋ฒคํธ๋ค์ ์ถ์ถ
- **๊ตฌ์กฐํ๋ ์ถ๋ ฅ**: Python dictionary ํ์์ผ๋ก ๊ฒฐ๊ณผ ์ ๊ณต
## ์ฌ์ฉ๋ฒ
### ๊ธฐ๋ณธ ์ค์
```python
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
# ๋ชจ๋ธ ๋ฐ ํ ํฌ๋์ด์ ๋ก๋
repo_id = "sogm1/qwen3-1.7b-news-event-merged"
tokenizer = AutoTokenizer.from_pretrained(repo_id)
model = AutoModelForCausalLM.from_pretrained(
repo_id,
torch_dtype="auto",
device_map="auto"
).eval()
```
### ์์คํ
ํ๋กฌํํธ
```python
system_prompt = """๋น์ ์ ๋ด์ค ํ
์คํธ๋ฅผ ๋ถ์ํ์ฌ ์นดํ
๊ณ ๋ฆฌ๋ฅผ ๋ถ๋ฅํ๊ณ ์ฃผ์ ํต์ฌ ์ด๋ฒคํธ๋ค์ ์ถ์ถํ๋ ์ ๋ฌธ ๋ถ์ ์์คํ
์
๋๋ค.
์ฃผ์ด์ง ํ
์คํธ๋ฅผ ๋ถ์ํ์ฌ ๋ฐ๋์ ํ์ด์ฌ์ dictionary ํ์์ผ๋ก ๊ฒฐ๊ณผ๋ฅผ ์์ฑํ์ญ์์ค.
๋ถ์ ๊ฒฐ๊ณผ๋ ๋ค์ ํ์์ผ๋ก ์์ฑํ์ญ์์ค:
๋ต๋ณ:
{"category": "['๋ถ๋์ฐ', '์ฐ์
', '์คํผ๋์ธ', '์ฆ๊ถ'] ์ค ํ๋",
"event_count": "ํต์ฌ ์ด๋ฒคํธ ๊ฐ์(์ ์)",
"events": ["์ด๋ฒคํธ1", "์ด๋ฒคํธ2", ...]}"""
```
### ineference
```python
def generate_analysis(model, tokenizer, text, system_prompt):
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"๋ค์ ๋ด์ค ํ
์คํธ๋ฅผ ๋ถ์ํด์ฃผ์ธ์:\n\n{text}"}
]
prompt_text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False
)
inputs = tokenizer([prompt_text], return_tensors="pt").to(model.device)
with torch.no_grad():
generated_ids = model.generate(
**inputs,
max_length=8192,
temperature=0.5,
top_p=1.0,
do_sample=False
)
output_ids = generated_ids[0][len(inputs.input_ids[0]):]
decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
return decoded_output
```
## ์ฌ์ฉ์์
```python
# ์ํ ๋ด์ค ํ
์คํธ
news_text = """
์ผ์ฑ์ ์๊ฐ 3๋ถ๊ธฐ ์ค์ ๋ฐํ์์ ๋ฉ๋ชจ๋ฆฌ ๋ฐ๋์ฒด ๋ถ๋ฌธ์ ํ๋ณต์ธ๋ฅผ ๋ณด๊ณ ํ๋ค.
ํ์ฌ๋ D๋จ ๊ฐ๊ฒฉ์ด ์ ๋ถ๊ธฐ ๋๋น 15% ์์นํ์ผ๋ฉฐ, ๋ธ๋ํ๋์ ์ถํ๋๋ 20% ์ฆ๊ฐํ๋ค๊ณ ๋ฐํ๋ค.
ํนํ AI ์๋ฒ์ฉ ๊ณ ๋์ญํญ๋ฉ๋ชจ๋ฆฌ(HBM) ๋งค์ถ์ด ์ ๋
๋๊ธฐ ๋๋น 300% ๊ธ์ฆํ๋ฉด์ ์ค์ ๊ฐ์ ์ ๊ฒฌ์ธํ๋ค.
"""
# ๋ถ์ ์คํ
result = generate_analysis(model, tokenizer, news_text, system_prompt)
# ๊ฒฐ๊ณผ ํ์ฑ ๋ฐ ์ถ๋ ฅ
if "๋ต๋ณ:" in result:
json_part = result.split("๋ต๋ณ:")[-1].strip()
parsed_result = json.loads(json_part)
print(f" ์นดํ
๊ณ ๋ฆฌ: {parsed_result['category']}")
print(f" ์ด๋ฒคํธ ๊ฐ์: {parsed_result['event_count']}")
print("์ด๋ฒคํธ ๋ชฉ๋ก:")
for i, event in enumerate(parsed_result['events'], 1):
print(f" {i}. {event}")
```
## ์ถ๋ ฅ์์
```python
์นดํ
๊ณ ๋ฆฌ: ์ฐ์
์ด๋ฒคํธ ๊ฐ์: 3
์ด๋ฒคํธ ๋ชฉ๋ก:
1. D๋จ ๊ฐ๊ฒฉ์ด ์ ๋ถ๊ธฐ ๋๋น 15% ์์น
2. ๋ธ๋ํ๋์ ์ถํ๋์ด 20% ์ฆ๊ฐ
3. HBM ๋งค์ถ์ด ์ ๋
๋๊ธฐ ๋๋น 300% ๊ธ์ฆ
``` |