Update app.py
Browse files
app.py
CHANGED
@@ -3,36 +3,59 @@ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
|
3 |
import torch
|
4 |
from PIL import Image
|
5 |
import numpy as np
|
|
|
6 |
|
7 |
# OCR ๋ชจ๋ธ ๋ฐ ํ๋ก์ธ์ ์ด๊ธฐํ
|
8 |
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
|
9 |
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
|
10 |
|
11 |
-
# ์ ๋ต ๋ฐ ํด์ค ๋ฐ์ดํฐ๋ฒ ์ด์ค
|
12 |
answer_key = {
|
13 |
-
"1": {
|
14 |
-
|
15 |
-
|
16 |
-
},
|
17 |
-
"
|
18 |
-
|
19 |
-
"explanation": "์ผ๊ถ๋ถ๋ฆฝ์ ์
๋ฒ๋ถ, ํ์ ๋ถ, ์ฌ๋ฒ๋ถ๋ก ๊ถ๋ ฅ์ ๋๋์ด ์๋ก ๊ฒฌ์ ์ ๊ท ํ์ ์ด๋ฃจ๊ฒ ํ๋ ์ ๋์
๋๋ค. ์ด๋ฅผ ํตํด ํ ์ชฝ์ ๊ถ๋ ฅ์ด ์ง์ค๋๋ ๊ฒ์ ๋ง์ ์ ์์ต๋๋ค."
|
20 |
-
},
|
21 |
-
"3": {
|
22 |
-
"answer": "์ง๋ฐฉ์์น์ ๋",
|
23 |
-
"explanation": "์ง๋ฐฉ์์น์ ๋๋ ์ง์ญ์ ์ผ์ ๊ทธ ์ง์ญ ์ฃผ๋ฏผ๋ค์ด ์ง์ ๊ฒฐ์ ํ๊ณ ์ฒ๋ฆฌํ๋ ์ ๋์
๋๋ค. ์ฃผ๋ฏผ๋ค์ด ์ง์ ์ง๋ฐฉ์์น๋จ์ฒด์ฅ๊ณผ ์ง๋ฐฉ์ํ ์์์ ์ ์ถํฉ๋๋ค."
|
24 |
-
}
|
25 |
}
|
26 |
|
27 |
-
def
|
28 |
-
"""
|
29 |
if isinstance(image, np.ndarray):
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
def recognize_text(image):
|
34 |
"""์๊ธ์จ ์ธ์ ํจ์"""
|
35 |
-
|
|
|
|
|
36 |
pixel_values = processor(image, return_tensors="pt").pixel_values
|
37 |
|
38 |
with torch.no_grad():
|
@@ -43,6 +66,10 @@ def recognize_text(image):
|
|
43 |
|
44 |
def grade_answer(question_number, student_answer):
|
45 |
"""๋ต์ ์ฑ์ ํจ์"""
|
|
|
|
|
|
|
|
|
46 |
correct_answer = answer_key[question_number]["answer"]
|
47 |
explanation = answer_key[question_number]["explanation"]
|
48 |
|
@@ -50,44 +77,72 @@ def grade_answer(question_number, student_answer):
|
|
50 |
is_correct = student_answer.replace(" ", "").lower() == correct_answer.replace(" ", "").lower()
|
51 |
|
52 |
return {
|
53 |
-
"
|
|
|
|
|
54 |
"์ ๋ต": correct_answer,
|
55 |
"ํด์ค": explanation
|
56 |
}
|
57 |
|
58 |
-
def
|
59 |
-
"""์ ์ฒด ์ฒ๋ฆฌ ํจ์"""
|
60 |
-
if
|
61 |
-
return "
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
์ ๋ต: {result['์ ๋ต']}
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
|
79 |
-
|
|
|
80 |
|
81 |
# Gradio ์ธํฐํ์ด์ค ์์ฑ
|
82 |
iface = gr.Interface(
|
83 |
-
fn=
|
84 |
-
inputs=
|
85 |
-
gr.Image(label="๋ต์ ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ์ธ์", type="numpy"),
|
86 |
-
gr.Dropdown(choices=["1", "2", "3"], label="๋ฌธ์ ๋ฒํธ๋ฅผ ์ ํํ์ธ์")
|
87 |
-
],
|
88 |
outputs=gr.Textbox(label="์ฑ์ ๊ฒฐ๊ณผ"),
|
89 |
title="์ด๋ฑํ๊ต ์ฌํ ์ํ์ง ์ฑ์ ํ๋ก๊ทธ๋จ",
|
90 |
-
description="
|
|
|
|
|
|
|
|
|
91 |
)
|
92 |
|
93 |
if __name__ == "__main__":
|
|
|
3 |
import torch
|
4 |
from PIL import Image
|
5 |
import numpy as np
|
6 |
+
import cv2
|
7 |
|
8 |
# OCR ๋ชจ๋ธ ๋ฐ ํ๋ก์ธ์ ์ด๊ธฐํ
|
9 |
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
|
10 |
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
|
11 |
|
12 |
+
# ์ ๋ต ๋ฐ ํด์ค ๋ฐ์ดํฐ๋ฒ ์ด์ค (20๋ฌธ์ )
|
13 |
answer_key = {
|
14 |
+
"1": {"answer": "๋ฏผ์ฃผ์ฃผ์", "explanation": "๋ฏผ์ฃผ์ฃผ์๋ ๊ตญ๋ฏผ์ด ์ฃผ์ธ์ด ๋์ด ๋๋ผ์ ์ค์ํ ์ผ์ ๊ฒฐ์ ํ๋ ์ ๋์
๋๋ค."},
|
15 |
+
"2": {"answer": "์ผ๊ถ๋ถ๋ฆฝ", "explanation": "์ผ๊ถ๋ถ๋ฆฝ์ ์
๋ฒ๋ถ, ํ์ ๋ถ, ์ฌ๋ฒ๋ถ๋ก ๊ถ๋ ฅ์ ๋๋์ด ์๋ก ๊ฒฌ์ ์ ๊ท ํ์ ์ด๋ฃจ๊ฒ ํ๋ ์ ๋์
๋๋ค."},
|
16 |
+
"3": {"answer": "์ง๋ฐฉ์์น์ ๋", "explanation": "์ง๋ฐฉ์์น์ ๋๋ ์ง์ญ์ ์ผ์ ๊ทธ ์ง์ญ ์ฃผ๋ฏผ๋ค์ด ์ง์ ๊ฒฐ์ ํ๊ณ ์ฒ๋ฆฌํ๋ ์ ๋์
๋๋ค."},
|
17 |
+
"4": {"answer": "ํ๋ฒ", "explanation": "ํ๋ฒ์ ๊ตญ๊ฐ์ ์ต๊ณ ๋ฒ์ผ๋ก, ๊ตญ๋ฏผ์ ๊ธฐ๋ณธ๊ถ๊ณผ ์ ๋ถ ์กฐ์ง์ ๋ํ ๊ธฐ๋ณธ ์์น์ ๋ด๊ณ ์์ต๋๋ค."},
|
18 |
+
"5": {"answer": "๊ตญํ", "explanation": "๊ตญํ๋ ๋ฒ๋ฅ ์ ๋ง๋ค๊ณ ์ ๋ถ๋ฅผ ๊ฐ์ํ๋ ์
๋ฒ๋ถ์ ์ญํ ์ ๋ด๋นํฉ๋๋ค."},
|
19 |
+
# 6~20๋ฒ๊น์ง ๋ฌธ์ ์ถ๊ฐ (์ค์ ์ด์ ์์๋ ์ฌ๊ธฐ์ ์ถ๊ฐ)
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
}
|
21 |
|
22 |
+
def segment_answers(image):
|
23 |
+
"""์ํ์ง์์ ๋ต์ ์์ญ์ ๋ถํ ํ๋ ํจ์"""
|
24 |
if isinstance(image, np.ndarray):
|
25 |
+
pil_image = Image.fromarray(image)
|
26 |
+
else:
|
27 |
+
return None
|
28 |
+
|
29 |
+
# ์ด๋ฏธ์ง๋ฅผ ๊ทธ๋ ์ด์ค์ผ์ผ๋ก ๋ณํ
|
30 |
+
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
31 |
+
|
32 |
+
# ์ด๋ฏธ์ง ์ด์งํ
|
33 |
+
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
|
34 |
+
|
35 |
+
# ์ค๊ณฝ์ ์ฐพ๊ธฐ
|
36 |
+
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
37 |
+
|
38 |
+
# ๋ต์ ์์ญ ์ถ์ถ
|
39 |
+
answer_regions = []
|
40 |
+
for contour in contours:
|
41 |
+
x, y, w, h = cv2.boundingRect(contour)
|
42 |
+
if w > 50 and h > 20: # ์ต์ ํฌ๊ธฐ ํํฐ๋ง
|
43 |
+
region = image[y:y+h, x:x+w]
|
44 |
+
answer_regions.append({
|
45 |
+
'image': region,
|
46 |
+
'position': (y, x) # y์ขํ๋ก ์ ๋ ฌํ๊ธฐ ์ํด (y,x) ์์๋ก ์ ์ฅ
|
47 |
+
})
|
48 |
+
|
49 |
+
# y์ขํ๋ฅผ ๊ธฐ์ค์ผ๋ก ์ ๋ ฌ (์์์ ์๋๋ก)
|
50 |
+
answer_regions.sort(key=lambda x: x['position'][0])
|
51 |
+
|
52 |
+
return [region['image'] for region in answer_regions]
|
53 |
|
54 |
def recognize_text(image):
|
55 |
"""์๊ธ์จ ์ธ์ ํจ์"""
|
56 |
+
if isinstance(image, np.ndarray):
|
57 |
+
image = Image.fromarray(image)
|
58 |
+
|
59 |
pixel_values = processor(image, return_tensors="pt").pixel_values
|
60 |
|
61 |
with torch.no_grad():
|
|
|
66 |
|
67 |
def grade_answer(question_number, student_answer):
|
68 |
"""๋ต์ ์ฑ์ ํจ์"""
|
69 |
+
question_number = str(question_number)
|
70 |
+
if question_number not in answer_key:
|
71 |
+
return None
|
72 |
+
|
73 |
correct_answer = answer_key[question_number]["answer"]
|
74 |
explanation = answer_key[question_number]["explanation"]
|
75 |
|
|
|
77 |
is_correct = student_answer.replace(" ", "").lower() == correct_answer.replace(" ", "").lower()
|
78 |
|
79 |
return {
|
80 |
+
"๋ฌธ์ ๋ฒํธ": question_number,
|
81 |
+
"ํ์๋ต์": student_answer,
|
82 |
+
"์ ๋ต์ฌ๋ถ": "O" if is_correct else "X",
|
83 |
"์ ๋ต": correct_answer,
|
84 |
"ํด์ค": explanation
|
85 |
}
|
86 |
|
87 |
+
def process_full_exam(image):
|
88 |
+
"""์ ์ฒด ์ํ์ง ์ฒ๋ฆฌ ํจ์"""
|
89 |
+
if image is None or not isinstance(image, np.ndarray):
|
90 |
+
return "์ํ์ง ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํด์ฃผ์ธ์."
|
91 |
|
92 |
+
try:
|
93 |
+
# ๋ต์ ์์ญ ๋ถํ
|
94 |
+
answer_regions = segment_answers(image)
|
95 |
+
if not answer_regions:
|
96 |
+
return "๋ต์ ์์ญ์ ์ฐพ์ ์ ์์ต๋๋ค. ์ด๋ฏธ์ง๋ฅผ ํ์ธํด์ฃผ์ธ์."
|
97 |
+
|
98 |
+
# ์ฑ์ ๊ฒฐ๊ณผ ์ ์ฅ
|
99 |
+
results = []
|
100 |
+
total_correct = 0
|
101 |
+
|
102 |
+
# ๊ฐ ๋ต์ ์์ญ ์ฒ๋ฆฌ
|
103 |
+
for idx, region in enumerate(answer_regions, 1):
|
104 |
+
if idx > len(answer_key): # ์ ์๋ ๋ฌธ์ ์๋ฅผ ์ด๊ณผํ๋ฉด ์ค๋จ
|
105 |
+
break
|
106 |
+
|
107 |
+
# ํ
์คํธ ์ธ์
|
108 |
+
recognized_text = recognize_text(region)
|
109 |
+
|
110 |
+
# ์ฑ์
|
111 |
+
result = grade_answer(idx, recognized_text)
|
112 |
+
if result:
|
113 |
+
results.append(result)
|
114 |
+
if result["์ ๋ต์ฌ๋ถ"] == "O":
|
115 |
+
total_correct += 1
|
116 |
+
|
117 |
+
# ๊ฒฐ๊ณผ ํฌ๋งทํ
|
118 |
+
score = (total_correct / len(results)) * 100
|
119 |
+
output = f"์ด์ : {score:.1f}์ (20๋ฌธ์ ์ค {total_correct}๊ฐ ์ ๋ต)\n\n"
|
120 |
+
output += "=== ์์ธ ์ฑ์ ๊ฒฐ๊ณผ ===\n\n"
|
121 |
+
|
122 |
+
for result in results:
|
123 |
+
output += f"""
|
124 |
+
[{result['๋ฌธ์ ๋ฒํธ']}๋ฒ] {'โ' if result['์ ๋ต์ฌ๋ถ']=='O' else 'โ'}
|
125 |
+
ํ์๋ต์: {result['ํ์๋ต์']}
|
126 |
์ ๋ต: {result['์ ๋ต']}
|
127 |
+
ํด์ค: {result['ํด์ค']}
|
128 |
+
"""
|
129 |
+
|
130 |
+
return output
|
131 |
|
132 |
+
except Exception as e:
|
133 |
+
return f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
134 |
|
135 |
# Gradio ์ธํฐํ์ด์ค ์์ฑ
|
136 |
iface = gr.Interface(
|
137 |
+
fn=process_full_exam,
|
138 |
+
inputs=gr.Image(label="์ํ์ง ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ์ธ์", type="numpy"),
|
|
|
|
|
|
|
139 |
outputs=gr.Textbox(label="์ฑ์ ๊ฒฐ๊ณผ"),
|
140 |
title="์ด๋ฑํ๊ต ์ฌํ ์ํ์ง ์ฑ์ ํ๋ก๊ทธ๋จ",
|
141 |
+
description="""
|
142 |
+
์ ์ฒด ์ํ์ง๋ฅผ ํ ๋ฒ์ ์ฑ์ ํ๋ ํ๋ก๊ทธ๋จ์
๋๋ค.
|
143 |
+
์ํ์ง์ ๋ต์์ด ์ ๋ณด์ด๋๋ก ๊นจ๋ํ๊ฒ ์ค์บํ๊ฑฐ๋ ์ดฌ์ํด์ฃผ์ธ์.
|
144 |
+
""",
|
145 |
+
examples=[], # ์์ ์ด๋ฏธ์ง๋ฅผ ์ถ๊ฐํ ์ ์์ต๋๋ค
|
146 |
)
|
147 |
|
148 |
if __name__ == "__main__":
|