Update README.md
Browse files
README.md
CHANGED
@@ -16,9 +16,9 @@ pipeline_tag: text-generation
|
|
16 |
|
17 |
# Qwen3-1.7B News Event Extraction Model
|
18 |
|
19 |
-
์ด ๋ชจ๋ธ์ Qwen/Qwen3-1.7B
|
20 |
|
21 |
-
## ๋ชจ๋ธ ๊ธฐ๋ฅ
|
22 |
|
23 |
- **๋ด์ค ์นดํ
๊ณ ๋ฆฌ ๋ถ๋ฅ**: ๋ถ๋์ฐ, ์ฐ์
, ์คํผ๋์ธ, ์ฆ๊ถ ์นดํ
๊ณ ๋ฆฌ๋ก ๋ถ๋ฅ
|
24 |
- **ํต์ฌ ์ด๋ฒคํธ ์ถ์ถ**: ๋ด์ค ํ
์คํธ์์ ์ฃผ์ ์ด๋ฒคํธ๋ค์ ์ถ์ถ
|
@@ -26,6 +26,9 @@ pipeline_tag: text-generation
|
|
26 |
|
27 |
## ์ฌ์ฉ๋ฒ
|
28 |
|
|
|
|
|
|
|
29 |
import torch
|
30 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
31 |
import json
|
@@ -38,8 +41,10 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
38 |
torch_dtype="auto",
|
39 |
device_map="auto"
|
40 |
).eval()
|
|
|
41 |
|
42 |
-
|
|
|
43 |
system_prompt = """๋น์ ์ ๋ด์ค ํ
์คํธ๋ฅผ ๋ถ์ํ์ฌ ์นดํ
๊ณ ๋ฆฌ๋ฅผ ๋ถ๋ฅํ๊ณ ์ฃผ์ ํต์ฌ ์ด๋ฒคํธ๋ค์ ์ถ์ถํ๋ ์ ๋ฌธ ๋ถ์ ์์คํ
์
๋๋ค.
|
44 |
์ฃผ์ด์ง ํ
์คํธ๋ฅผ ๋ถ์ํ์ฌ ๋ฐ๋์ ํ์ด์ฌ์ dictionary ํ์์ผ๋ก ๊ฒฐ๊ณผ๋ฅผ ์์ฑํ์ญ์์ค.
|
45 |
|
@@ -49,8 +54,10 @@ system_prompt = """๋น์ ์ ๋ด์ค ํ
์คํธ๋ฅผ ๋ถ์ํ์ฌ ์นดํ
๊ณ ๋ฆฌ๋ฅผ
|
|
49 |
{"category": "['๋ถ๋์ฐ', '์ฐ์
', '์คํผ๋์ธ', '์ฆ๊ถ'] ์ค ํ๋",
|
50 |
"event_count": "ํต์ฌ ์ด๋ฒคํธ ๊ฐ์(์ ์)",
|
51 |
"events": ["์ด๋ฒคํธ1", "์ด๋ฒคํธ2", ...]}"""
|
|
|
52 |
|
53 |
-
|
|
|
54 |
def generate_analysis(model, tokenizer, text, system_prompt):
|
55 |
messages = [
|
56 |
{"role": "system", "content": system_prompt},
|
@@ -79,8 +86,11 @@ def generate_analysis(model, tokenizer, text, system_prompt):
|
|
79 |
decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
|
80 |
|
81 |
return decoded_output
|
|
|
82 |
|
83 |
-
|
|
|
|
|
84 |
news_text = """
|
85 |
์ผ์ฑ์ ์๊ฐ 3๋ถ๊ธฐ ์ค์ ๋ฐํ์์ ๋ฉ๋ชจ๋ฆฌ ๋ฐ๋์ฒด ๋ถ๋ฌธ์ ํ๋ณต์ธ๋ฅผ ๋ณด๊ณ ํ๋ค.
|
86 |
ํ์ฌ๋ D๋จ ๊ฐ๊ฒฉ์ด ์ ๋ถ๊ธฐ ๋๋น 15% ์์นํ์ผ๋ฉฐ, ๋ธ๋ํ๋์ ์ถํ๋๋ 20% ์ฆ๊ฐํ๋ค๊ณ ๋ฐํ๋ค.
|
@@ -90,15 +100,23 @@ news_text = """
|
|
90 |
# ๋ถ์ ์คํ
|
91 |
result = generate_analysis(model, tokenizer, news_text, system_prompt)
|
92 |
|
93 |
-
# ๊ฒฐ๊ณผ ํ์ฑ
|
94 |
if "๋ต๋ณ:" in result:
|
95 |
json_part = result.split("๋ต๋ณ:")[-1].strip()
|
96 |
parsed_result = json.loads(json_part)
|
97 |
|
98 |
-
print(f"์นดํ
๊ณ ๋ฆฌ: {parsed_result['category']}")
|
99 |
-
print(f"์ด๋ฒคํธ ๊ฐ์: {parsed_result['event_count']}")
|
100 |
print("์ด๋ฒคํธ ๋ชฉ๋ก:")
|
101 |
for i, event in enumerate(parsed_result['events'], 1):
|
102 |
-
print(f"
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Qwen3-1.7B News Event Extraction Model
|
18 |
|
19 |
+
์ด ๋ชจ๋ธ์ **Qwen/Qwen3-1.7B**๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ํ๊ตญ์ด ๋ด์ค ํ
์คํธ ๋ถ์ ๋ฐ ์ด๋ฒคํธ ์ถ์ถ์ ์ํด **LoRA** ๋ฐฉ์์ผ๋ก ํ์ธํ๋๋ ๋ชจ๋ธ์
๋๋ค.
|
20 |
|
21 |
+
## โจ ๋ชจ๋ธ ๊ธฐ๋ฅ
|
22 |
|
23 |
- **๋ด์ค ์นดํ
๊ณ ๋ฆฌ ๋ถ๋ฅ**: ๋ถ๋์ฐ, ์ฐ์
, ์คํผ๋์ธ, ์ฆ๊ถ ์นดํ
๊ณ ๋ฆฌ๋ก ๋ถ๋ฅ
|
24 |
- **ํต์ฌ ์ด๋ฒคํธ ์ถ์ถ**: ๋ด์ค ํ
์คํธ์์ ์ฃผ์ ์ด๋ฒคํธ๋ค์ ์ถ์ถ
|
|
|
26 |
|
27 |
## ์ฌ์ฉ๋ฒ
|
28 |
|
29 |
+
### ๊ธฐ๋ณธ ์ค์
|
30 |
+
|
31 |
+
```python
|
32 |
import torch
|
33 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
34 |
import json
|
|
|
41 |
torch_dtype="auto",
|
42 |
device_map="auto"
|
43 |
).eval()
|
44 |
+
```
|
45 |
|
46 |
+
### ์์คํ
ํ๋กฌํํธ
|
47 |
+
```python
|
48 |
system_prompt = """๋น์ ์ ๋ด์ค ํ
์คํธ๋ฅผ ๋ถ์ํ์ฌ ์นดํ
๊ณ ๋ฆฌ๋ฅผ ๋ถ๋ฅํ๊ณ ์ฃผ์ ํต์ฌ ์ด๋ฒคํธ๋ค์ ์ถ์ถํ๋ ์ ๋ฌธ ๋ถ์ ์์คํ
์
๋๋ค.
|
49 |
์ฃผ์ด์ง ํ
์คํธ๋ฅผ ๋ถ์ํ์ฌ ๋ฐ๋์ ํ์ด์ฌ์ dictionary ํ์์ผ๋ก ๊ฒฐ๊ณผ๋ฅผ ์์ฑํ์ญ์์ค.
|
50 |
|
|
|
54 |
{"category": "['๋ถ๋์ฐ', '์ฐ์
', '์คํผ๋์ธ', '์ฆ๊ถ'] ์ค ํ๋",
|
55 |
"event_count": "ํต์ฌ ์ด๋ฒคํธ ๊ฐ์(์ ์)",
|
56 |
"events": ["์ด๋ฒคํธ1", "์ด๋ฒคํธ2", ...]}"""
|
57 |
+
```
|
58 |
|
59 |
+
### ineference
|
60 |
+
```python
|
61 |
def generate_analysis(model, tokenizer, text, system_prompt):
|
62 |
messages = [
|
63 |
{"role": "system", "content": system_prompt},
|
|
|
86 |
decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
|
87 |
|
88 |
return decoded_output
|
89 |
+
```
|
90 |
|
91 |
+
## ์ฌ์ฉ์์
|
92 |
+
```python
|
93 |
+
# ์ํ ๋ด์ค ํ
์คํธ
|
94 |
news_text = """
|
95 |
์ผ์ฑ์ ์๊ฐ 3๋ถ๊ธฐ ์ค์ ๋ฐํ์์ ๋ฉ๋ชจ๋ฆฌ ๋ฐ๋์ฒด ๋ถ๋ฌธ์ ํ๋ณต์ธ๋ฅผ ๋ณด๊ณ ํ๋ค.
|
96 |
ํ์ฌ๋ D๋จ ๊ฐ๊ฒฉ์ด ์ ๋ถ๊ธฐ ๋๋น 15% ์์นํ์ผ๋ฉฐ, ๋ธ๋ํ๋์ ์ถํ๋๋ 20% ์ฆ๊ฐํ๋ค๊ณ ๋ฐํ๋ค.
|
|
|
100 |
# ๋ถ์ ์คํ
|
101 |
result = generate_analysis(model, tokenizer, news_text, system_prompt)
|
102 |
|
103 |
+
# ๊ฒฐ๊ณผ ํ์ฑ ๋ฐ ์ถ๋ ฅ
|
104 |
if "๋ต๋ณ:" in result:
|
105 |
json_part = result.split("๋ต๋ณ:")[-1].strip()
|
106 |
parsed_result = json.loads(json_part)
|
107 |
|
108 |
+
print(f" ์นดํ
๊ณ ๋ฆฌ: {parsed_result['category']}")
|
109 |
+
print(f" ์ด๋ฒคํธ ๊ฐ์: {parsed_result['event_count']}")
|
110 |
print("์ด๋ฒคํธ ๋ชฉ๋ก:")
|
111 |
for i, event in enumerate(parsed_result['events'], 1):
|
112 |
+
print(f" {i}. {event}")
|
113 |
+
```
|
114 |
+
## ์ถ๋ ฅ์์
|
115 |
+
```python
|
116 |
+
์นดํ
๊ณ ๋ฆฌ: ์ฐ์
|
117 |
+
์ด๋ฒคํธ ๊ฐ์: 3
|
118 |
+
์ด๋ฒคํธ ๋ชฉ๋ก:
|
119 |
+
1. D๋จ ๊ฐ๊ฒฉ์ด ์ ๋ถ๊ธฐ ๋๋น 15% ์์น
|
120 |
+
2. ๋ธ๋ํ๋์ ์ถํ๋์ด 20% ์ฆ๊ฐ
|
121 |
+
3. HBM ๋งค์ถ์ด ์ ๋
๋๊ธฐ ๋๋น 300% ๊ธ์ฆ
|
122 |
+
```
|