import streamlit as st import pandas as pd import os from src.FisrtModule.module1 import MisconceptionModel from src.SecondModule.module2 import SimilarQuestionGenerator from src.ThirdModule.module3 import AnswerVerifier import logging from typing import Optional, Tuple from pylatexenc.latex2text import LatexNodes2Text import re logging.basicConfig(level=logging.DEBUG) # Initialize Misconception Model @st.cache_resource def load_misconception_model(): return MisconceptionModel( model_name="minsuas/Misconceptions__1", misconception_mapping_path=os.path.join(data_path, 'misconception_mapping.parquet'), misconception_embs_paths=[os.path.join(data_path, f'embs_misconception-9-9.npy')] ) # Streamlit 페이지 기본 설정 st.set_page_config( page_title="MisconcepTutor", layout="wide", initial_sidebar_state="expanded" ) @st.cache_resource def load_answer_verifier(): """답안 검증 모델 로드""" from src.ThirdModule.module3 import AnswerVerifier return AnswerVerifier() # 경로 설정 base_path = os.path.dirname(os.path.abspath(__file__)) data_path = os.path.join(base_path, 'Data') misconception_csv_path = os.path.join(data_path, 'misconception_mapping.csv') # 로깅 설정 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # 세션 상태 초기화 - 가장 먼저 실행되도록 최상단에 배치 if 'initialized' not in st.session_state: st.session_state.initialized = True st.session_state.wrong_questions = [] st.session_state.misconceptions = [] st.session_state.current_question_index = 0 st.session_state.generated_questions = [] st.session_state.current_step = 'initial' st.session_state.selected_wrong_answer = None st.session_state.questions = [] logger.info("Session state initialized") # 문제 생성기 초기화 @st.cache_resource def load_question_generator(): """문제 생성 모델 로드""" if not os.path.exists(misconception_csv_path): st.error(f"CSV 파일이 존재하지 않습니다: {misconception_csv_path}") raise FileNotFoundError(f"CSV 파일이 존재하지 않습니다: {misconception_csv_path}") return SimilarQuestionGenerator(misconception_csv_path=misconception_csv_path) # CSV 데이터 로드 함수 @st.cache_data def load_data(data_file='/train.csv', selected_indexes=None): #def load_data(data_file='/processed_mathqa2.csv', selected_indexes=None): try: file_path = os.path.join(data_path, data_file.lstrip('/')) df = pd.read_csv(file_path) logger.info(f"Data loaded successfully from {file_path}") if selected_indexes is not None: #df = df.loc[selected_indexes] # 신규 문제 df = df.loc[df['QuestionId'].isin(selected_indexes)] # QuestionId 기준 필터링 logger.info(f"Data filtered to selected indexes: {selected_indexes}") # 필터링 후 다시 한번 중복 체크 if df.duplicated(subset=['QuestionText']).any(): df = df.drop_duplicates(subset=['QuestionText'], keep='first') logger.warning("Removed duplicates from selected indexes") return df except FileNotFoundError: st.error(f"파일을 찾을 수 없습니다: {data_file}") logger.error(f"File not found: {data_file}") return None def start_quiz(): """퀴즈 시작 및 초기화""" #selected_indexes = [2519, 3852, 3404, 3896, 7602, 3946, 12977, 1878, 7602, 3589, 9] # 12038 문제 끊김? # 1302 동일 문제인가? # 3473 안나옴? # 3887 답안 틀림 # 9699 수식 이상 # 9752 문제 끊김 # train.csv selected_indexes = [1866, 1864, 1845, 1862, 1861, 1829, 1827, 1802, 1741, 1725] # 1671] # 확정 : 1671, 1725 # 통과 : 1864, 1866, 1845, 1861, 1862, 1802, 1827, 1829, 1741, # 세모 : 1825, # 문제 안이쁨 : 1868, 1847, 1834, 1841, 1809, 1804, 1672, 1731, 1736, 1746, 1692, 1775, 1781 # 문제 이상 : 1792, 1804, 1813, 1679, 1711 df = load_data(selected_indexes=selected_indexes) if df is None or df.empty: st.error("데이터를 불러올 수 없습니다. 데이터셋을 확인해주세요.") return #st.session_state.questions = df.sample(n=10, random_state=42) st.session_state.questions = df st.session_state.current_step = 'quiz' st.session_state.current_question_index = 0 st.session_state.wrong_questions = [] st.session_state.misconceptions = [] st.session_state.generated_questions = [] logger.info("Quiz started") def generate_similar_question(wrong_q, misconception_id, generator): """유사 문제 생성""" logger.info(f"Generating similar question for misconception_id: {misconception_id}") # 입력 데이터 유효성 검사 if not isinstance(wrong_q, dict): logger.error(f"Invalid wrong_q type: {type(wrong_q)}") st.error("유사 문제 생성에 필요한 데이터 형식이 잘못되었습니다.") return None try: # misconception_id가 없거나 NaN인 경우 다른 misconception 사용 if pd.isna(misconception_id): logger.info("Original misconception_id is NaN, trying to find alternative") # 현재까지 나온 misconception들 중에서 선택 available_misconceptions = [m for m in st.session_state.misconceptions if not pd.isna(m)] if available_misconceptions: # 가장 최근에 나온 misconception 선택 misconception_id = available_misconceptions[-1] logger.info(f"Using alternative misconception_id: {misconception_id}") else: # 기본 misconception ID 사용 (예: 가장 기본적인 misconception) misconception_id = 2001 # 적절한 기본값으로 수정 필요 logger.info(f"Using default misconception_id: {misconception_id}") # 데이터 준비 (튜플 변환 방지) input_data = { 'construct_name': str(wrong_q.get('ConstructName', '')), 'subject_name': str(wrong_q.get('SubjectName', '')), 'question_text': str(wrong_q.get('QuestionText', '')), 'correct_answer_text': str(wrong_q.get(f'Answer{wrong_q["CorrectAnswer"]}Text', '')), 'wrong_answer_text': str(wrong_q.get(f'Answer{st.session_state.selected_wrong_answer}Text', '')), 'misconception_id': int(misconception_id) } logger.info(f"Prepared input data: {input_data}") with st.spinner("📝 유사 문제를 생성하고 있습니다..."): # 유사 문제 생성 호출 generated_q, _ = generator.generate_similar_question_with_text( construct_name=input_data['construct_name'], subject_name=input_data['subject_name'], question_text=input_data['question_text'], correct_answer_text=input_data['correct_answer_text'], wrong_answer_text=input_data['wrong_answer_text'], misconception_id=input_data['misconception_id'] ) if generated_q: verifier = load_answer_verifier() with st.status("🤔 AI가 문제를 검토하고 있습니다..."): st.write("답안의 정확성을 검증하고 있습니다...") verified_answer = verifier.verify_answer( question=generated_q.question, choices=generated_q.choices ) if verified_answer: logger.info(f"Answer verified: {verified_answer}") st.write("✅ 검증 완료!") result = { 'question': generated_q.question, 'choices': generated_q.choices, 'correct': verified_answer, 'explanation': generated_q.explanation } st.session_state['current_similar_question_answer'] = verified_answer return result else: logger.warning("Answer verification failed, using original answer") st.write("⚠️ 검증에 실패했습니다. 원본 답안을 사용합니다.") result = { 'question': generated_q.question, 'choices': generated_q.choices, 'correct': generated_q.correct_answer, 'explanation': generated_q.explanation } st.session_state['current_similar_question_answer'] = generated_q.correct_answer return result except Exception as e: logger.error(f"Error in generate_similar_question: {str(e)}") st.error(f"문제 생성 중 오류가 발생했습니다: {str(e)}") return None return None def handle_answer(answer, current_q): """답변 처리""" if answer != current_q['CorrectAnswer']: wrong_q_dict = current_q.to_dict() st.session_state.wrong_questions.append(wrong_q_dict) st.session_state.selected_wrong_answer = answer misconception_key = f'Misconception{answer}Id' misconception_id = current_q.get(misconception_key) st.session_state.misconceptions.append(misconception_id) st.session_state.current_question_index += 1 if st.session_state.current_question_index >= len(st.session_state.questions): st.session_state.current_step = 'review' else: st.session_state.current_step = 'quiz' def display_math_content(content): """ Display mathematical content with proper formatting. Args: content (str): The math content to display """ # Convert LaTeX to plain text for display from pylatexenc.latex2text import LatexNodes2Text # Clean and format the content formatted_content = LatexNodes2Text().latex_to_text(content) st.markdown(f'