sogm1 commited on
Commit
0bf1330
ยท
verified ยท
1 Parent(s): f0d4888

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -11
README.md CHANGED
@@ -16,9 +16,9 @@ pipeline_tag: text-generation
16
 
17
  # Qwen3-1.7B News Event Extraction Model
18
 
19
- ์ด ๋ชจ๋ธ์€ Qwen/Qwen3-1.7B๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•œ๊ตญ์–ด ๋‰ด์Šค ํ…์ŠคํŠธ ๋ถ„์„ ๋ฐ ์ด๋ฒคํŠธ ์ถ”์ถœ์„ ์œ„ํ•ด LoRA ๋ฐฉ์‹์œผ๋กœ ํŒŒ์ธํŠœ๋‹๋œ ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค.
20
 
21
- ## ๋ชจ๋ธ ๊ธฐ๋Šฅ
22
 
23
  - **๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜**: ๋ถ€๋™์‚ฐ, ์‚ฐ์—…, ์˜คํ”ผ๋‹ˆ์–ธ, ์ฆ๊ถŒ ์นดํ…Œ๊ณ ๋ฆฌ๋กœ ๋ถ„๋ฅ˜
24
  - **ํ•ต์‹ฌ ์ด๋ฒคํŠธ ์ถ”์ถœ**: ๋‰ด์Šค ํ…์ŠคํŠธ์—์„œ ์ฃผ์š” ์ด๋ฒคํŠธ๋“ค์„ ์ถ”์ถœ
@@ -26,6 +26,9 @@ pipeline_tag: text-generation
26
 
27
  ## ์‚ฌ์šฉ๋ฒ•
28
 
 
 
 
29
  import torch
30
  from transformers import AutoTokenizer, AutoModelForCausalLM
31
  import json
@@ -38,8 +41,10 @@ model = AutoModelForCausalLM.from_pretrained(
38
  torch_dtype="auto",
39
  device_map="auto"
40
  ).eval()
 
41
 
42
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์ •์˜
 
43
  system_prompt = """๋‹น์‹ ์€ ๋‰ด์Šค ํ…์ŠคํŠธ๋ฅผ ๋ถ„์„ํ•˜์—ฌ ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ๋ถ„๋ฅ˜ํ•˜๊ณ  ์ฃผ์š” ํ•ต์‹ฌ ์ด๋ฒคํŠธ๋“ค์„ ์ถ”์ถœํ•˜๋Š” ์ „๋ฌธ ๋ถ„์„ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค.
44
  ์ฃผ์–ด์ง„ ํ…์ŠคํŠธ๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๋ฐ˜๋“œ์‹œ ํŒŒ์ด์ฌ์˜ dictionary ํ˜•์‹์œผ๋กœ ๊ฒฐ๊ณผ๋ฅผ ์ž‘์„ฑํ•˜์‹ญ์‹œ์˜ค.
45
 
@@ -49,8 +54,10 @@ system_prompt = """๋‹น์‹ ์€ ๋‰ด์Šค ํ…์ŠคํŠธ๋ฅผ ๋ถ„์„ํ•˜์—ฌ ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ
49
  {"category": "['๋ถ€๋™์‚ฐ', '์‚ฐ์—…', '์˜คํ”ผ๋‹ˆ์–ธ', '์ฆ๊ถŒ'] ์ค‘ ํ•˜๋‚˜",
50
  "event_count": "ํ•ต์‹ฌ ์ด๋ฒคํŠธ ๊ฐœ์ˆ˜(์ •์ˆ˜)",
51
  "events": ["์ด๋ฒคํŠธ1", "์ด๋ฒคํŠธ2", ...]}"""
 
52
 
53
- # ๋ถ„์„ ํ•จ์ˆ˜
 
54
  def generate_analysis(model, tokenizer, text, system_prompt):
55
  messages = [
56
  {"role": "system", "content": system_prompt},
@@ -79,8 +86,11 @@ def generate_analysis(model, tokenizer, text, system_prompt):
79
  decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
80
 
81
  return decoded_output
 
82
 
83
- # ์‚ฌ์šฉ ์˜ˆ์‹œ
 
 
84
  news_text = """
85
  ์‚ผ์„ฑ์ „์ž๊ฐ€ 3๋ถ„๊ธฐ ์‹ค์  ๋ฐœํ‘œ์—์„œ ๋ฉ”๋ชจ๋ฆฌ ๋ฐ˜๋„์ฒด ๋ถ€๋ฌธ์˜ ํšŒ๋ณต์„ธ๋ฅผ ๋ณด๊ณ ํ–ˆ๋‹ค.
86
  ํšŒ์‚ฌ๋Š” D๋žจ ๊ฐ€๊ฒฉ์ด ์ „๋ถ„๊ธฐ ๋Œ€๋น„ 15% ์ƒ์Šนํ–ˆ์œผ๋ฉฐ, ๋‚ธ๋“œํ”Œ๋ž˜์‹œ ์ถœํ•˜๋Ÿ‰๋„ 20% ์ฆ๊ฐ€ํ–ˆ๋‹ค๊ณ  ๋ฐํ˜”๋‹ค.
@@ -90,15 +100,23 @@ news_text = """
90
  # ๋ถ„์„ ์‹คํ–‰
91
  result = generate_analysis(model, tokenizer, news_text, system_prompt)
92
 
93
- # ๊ฒฐ๊ณผ ํŒŒ์‹ฑ
94
  if "๋‹ต๋ณ€:" in result:
95
  json_part = result.split("๋‹ต๋ณ€:")[-1].strip()
96
  parsed_result = json.loads(json_part)
97
 
98
- print(f"์นดํ…Œ๊ณ ๋ฆฌ: {parsed_result['category']}")
99
- print(f"์ด๋ฒคํŠธ ๊ฐœ์ˆ˜: {parsed_result['event_count']}")
100
  print("์ด๋ฒคํŠธ ๋ชฉ๋ก:")
101
  for i, event in enumerate(parsed_result['events'], 1):
102
- print(f" {i}. {event}")
103
-
104
- Apache 2.0 License
 
 
 
 
 
 
 
 
 
16
 
17
  # Qwen3-1.7B News Event Extraction Model
18
 
19
+ ์ด ๋ชจ๋ธ์€ **Qwen/Qwen3-1.7B**๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•œ๊ตญ์–ด ๋‰ด์Šค ํ…์ŠคํŠธ ๋ถ„์„ ๋ฐ ์ด๋ฒคํŠธ ์ถ”์ถœ์„ ์œ„ํ•ด **LoRA** ๋ฐฉ์‹์œผ๋กœ ํŒŒ์ธํŠœ๋‹๋œ ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค.
20
 
21
+ ## โœจ ๋ชจ๋ธ ๊ธฐ๋Šฅ
22
 
23
  - **๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜**: ๋ถ€๋™์‚ฐ, ์‚ฐ์—…, ์˜คํ”ผ๋‹ˆ์–ธ, ์ฆ๊ถŒ ์นดํ…Œ๊ณ ๋ฆฌ๋กœ ๋ถ„๋ฅ˜
24
  - **ํ•ต์‹ฌ ์ด๋ฒคํŠธ ์ถ”์ถœ**: ๋‰ด์Šค ํ…์ŠคํŠธ์—์„œ ์ฃผ์š” ์ด๋ฒคํŠธ๋“ค์„ ์ถ”์ถœ
 
26
 
27
  ## ์‚ฌ์šฉ๋ฒ•
28
 
29
+ ### ๊ธฐ๋ณธ ์„ค์ •
30
+
31
+ ```python
32
  import torch
33
  from transformers import AutoTokenizer, AutoModelForCausalLM
34
  import json
 
41
  torch_dtype="auto",
42
  device_map="auto"
43
  ).eval()
44
+ ```
45
 
46
+ ### ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
47
+ ```python
48
  system_prompt = """๋‹น์‹ ์€ ๋‰ด์Šค ํ…์ŠคํŠธ๋ฅผ ๋ถ„์„ํ•˜์—ฌ ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ๋ถ„๋ฅ˜ํ•˜๊ณ  ์ฃผ์š” ํ•ต์‹ฌ ์ด๋ฒคํŠธ๋“ค์„ ์ถ”์ถœํ•˜๋Š” ์ „๋ฌธ ๋ถ„์„ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค.
49
  ์ฃผ์–ด์ง„ ํ…์ŠคํŠธ๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๋ฐ˜๋“œ์‹œ ํŒŒ์ด์ฌ์˜ dictionary ํ˜•์‹์œผ๋กœ ๊ฒฐ๊ณผ๋ฅผ ์ž‘์„ฑํ•˜์‹ญ์‹œ์˜ค.
50
 
 
54
  {"category": "['๋ถ€๋™์‚ฐ', '์‚ฐ์—…', '์˜คํ”ผ๋‹ˆ์–ธ', '์ฆ๊ถŒ'] ์ค‘ ํ•˜๋‚˜",
55
  "event_count": "ํ•ต์‹ฌ ์ด๋ฒคํŠธ ๊ฐœ์ˆ˜(์ •์ˆ˜)",
56
  "events": ["์ด๋ฒคํŠธ1", "์ด๋ฒคํŠธ2", ...]}"""
57
+ ```
58
 
59
+ ### ineference
60
+ ```python
61
  def generate_analysis(model, tokenizer, text, system_prompt):
62
  messages = [
63
  {"role": "system", "content": system_prompt},
 
86
  decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
87
 
88
  return decoded_output
89
+ ```
90
 
91
+ ## ์‚ฌ์šฉ์˜ˆ์‹œ
92
+ ```python
93
+ # ์ƒ˜ํ”Œ ๋‰ด์Šค ํ…์ŠคํŠธ
94
  news_text = """
95
  ์‚ผ์„ฑ์ „์ž๊ฐ€ 3๋ถ„๊ธฐ ์‹ค์  ๋ฐœํ‘œ์—์„œ ๋ฉ”๋ชจ๋ฆฌ ๋ฐ˜๋„์ฒด ๋ถ€๋ฌธ์˜ ํšŒ๋ณต์„ธ๋ฅผ ๋ณด๊ณ ํ–ˆ๋‹ค.
96
  ํšŒ์‚ฌ๋Š” D๋žจ ๊ฐ€๊ฒฉ์ด ์ „๋ถ„๊ธฐ ๋Œ€๋น„ 15% ์ƒ์Šนํ–ˆ์œผ๋ฉฐ, ๋‚ธ๋“œํ”Œ๋ž˜์‹œ ์ถœํ•˜๋Ÿ‰๋„ 20% ์ฆ๊ฐ€ํ–ˆ๋‹ค๊ณ  ๋ฐํ˜”๋‹ค.
 
100
  # ๋ถ„์„ ์‹คํ–‰
101
  result = generate_analysis(model, tokenizer, news_text, system_prompt)
102
 
103
+ # ๊ฒฐ๊ณผ ํŒŒ์‹ฑ ๋ฐ ์ถœ๋ ฅ
104
  if "๋‹ต๋ณ€:" in result:
105
  json_part = result.split("๋‹ต๋ณ€:")[-1].strip()
106
  parsed_result = json.loads(json_part)
107
 
108
+ print(f" ์นดํ…Œ๊ณ ๋ฆฌ: {parsed_result['category']}")
109
+ print(f" ์ด๋ฒคํŠธ ๊ฐœ์ˆ˜: {parsed_result['event_count']}")
110
  print("์ด๋ฒคํŠธ ๋ชฉ๋ก:")
111
  for i, event in enumerate(parsed_result['events'], 1):
112
+ print(f" {i}. {event}")
113
+ ```
114
+ ## ์ถœ๋ ฅ์˜ˆ์‹œ
115
+ ```python
116
+ ์นดํ…Œ๊ณ ๋ฆฌ: ์‚ฐ์—…
117
+ ์ด๋ฒคํŠธ ๊ฐœ์ˆ˜: 3
118
+ ์ด๋ฒคํŠธ ๋ชฉ๋ก:
119
+ 1. D๋žจ ๊ฐ€๊ฒฉ์ด ์ „๋ถ„๊ธฐ ๋Œ€๋น„ 15% ์ƒ์Šน
120
+ 2. ๋‚ธ๋“œํ”Œ๋ž˜์‹œ ์ถœํ•˜๋Ÿ‰์ด 20% ์ฆ๊ฐ€
121
+ 3. HBM ๋งค์ถœ์ด ์ „๋…„ ๋™๊ธฐ ๋Œ€๋น„ 300% ๊ธ‰์ฆ
122
+ ```