import pandas as pd import numpy as np import json import os import logging import pickle from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.metrics import f1_score, mean_absolute_error, accuracy_score, precision_score, recall_score logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]: %(message)s") class StudentRecommendationSystem: def __init__(self, json_dir: str = "./data"): self.json_dir = json_dir try: with open(os.path.join(json_dir, "subjects.json"), "r") as f: subjects_data = json.load(f) self.core_subjects = subjects_data["core_subjects"] logging.info("Loaded subjects data successfully.") except Exception as e: logging.error("Error loading subjects data: " + str(e)) raise try: with open(os.path.join(json_dir, "universities.json"), "r") as f: universities_data = json.load(f) self.top_universities = universities_data["top_universities"] logging.info("Loaded universities data successfully.") except Exception as e: logging.error("Error loading universities data: " + str(e)) raise try: with open(os.path.join(json_dir, "departments.json"), "r") as f: departments_data = json.load(f) self.program_departments = departments_data["program_departments"] logging.info("Loaded departments data successfully.") except Exception as e: logging.error("Error loading departments data: " + str(e)) raise self.university_programs = {} for program in self.core_subjects: self.university_programs[program] = {"core_subjects": self.core_subjects[program]} logging.info("University programs mapping created.") self.ai_model = self._train_dummy_model() def _train_dummy_model(self) -> Pipeline: X = np.array([[0], [50], [100]]) y = np.array([0, 50, 100]) pipeline = Pipeline([ ('scaler', StandardScaler()), ('regressor', LinearRegression()) ]) pipeline.fit(X, y) logging.info("Dummy AI model pipeline trained successfully.") return pipeline def _refine_match_score(self, score: float) -> float: refined = self.ai_model.predict(np.array([[score]]))[0] logging.debug(f"Refined score for raw score {score} is {refined}.") return refined def predict_success_probability(self, refined_score: float) -> float: probability = refined_score / 100.0 logging.debug(f"Predicted success probability from refined score {refined_score} is {probability}.") return probability def load_student_grades(self, grades_data: dict = None, grades_file: str = None) -> pd.DataFrame: if grades_file: try: with open(grades_file, "r") as f: grades_data = json.load(f) if "sample_grades" in grades_data: grades_data = grades_data["sample_grades"] logging.info(f"Student grades loaded from file: {grades_file}") except Exception as e: logging.error("Error loading student grades file: " + str(e)) raise if not grades_data: raise ValueError("Either grades_data or grades_file must be provided") self.student_data = pd.DataFrame(list(grades_data.items()), columns=['Subject', 'Grade']) return self.student_data def identify_strengths(self, threshold: float = 85) -> pd.DataFrame: strengths = self.student_data[self.student_data['Grade'] >= threshold] return strengths.sort_values(by='Grade', ascending=False) def calculate_program_match(self, strengths: pd.DataFrame) -> pd.DataFrame: program_scores = {} for program, details in self.university_programs.items(): score = 0 core_subjects = details["core_subjects"] total_possible_score = len(core_subjects) * 100 for subject in core_subjects: subject_grade = self.student_data[self.student_data['Subject'] == subject] if not subject_grade.empty: score += subject_grade.iloc[0]['Grade'] raw_score = (score / total_possible_score) * 100 if total_possible_score > 0 else 0 refined_score = self._refine_match_score(raw_score) success_probability = self.predict_success_probability(refined_score) program_scores[program] = { "raw_score": raw_score, "refined_score": refined_score, "success_probability": success_probability } logging.debug(f"Program {program}: raw_score {raw_score}, refined_score {refined_score}, success_probability {success_probability}") program_df = pd.DataFrame([ {"Program": program, "Raw Score": scores["raw_score"], "AI Refined Score": scores["refined_score"], "Success Probability": scores["success_probability"]} for program, scores in program_scores.items() ]) return program_df.sort_values(by='AI Refined Score', ascending=False) def get_top_recommendations(self, program_matches: pd.DataFrame, top_n: int = 3) -> list: recommendations = [] for i in range(min(top_n, len(program_matches))): program = program_matches.iloc[i]['Program'] raw_score = program_matches.iloc[i]['Raw Score'] refined_score = program_matches.iloc[i]['AI Refined Score'] success_probability = program_matches.iloc[i]['Success Probability'] if refined_score >= 50: universities = self.top_universities.get(program, ["No specific recommendations"]) departments = self.program_departments.get(program, ["No specific departments"]) recommendations.append({ "program": program, "raw_score": raw_score, "refined_score": refined_score, "success_probability": success_probability, "recommended_universities": universities[:3], "recommended_departments": departments[:3] }) return recommendations def evaluate_recommendations(self, program_matches_df: pd.DataFrame, ground_truth: dict, threshold: float = 60) -> dict: predictions = program_matches_df.apply(lambda row: 1 if row["AI Refined Score"] >= threshold else 0, axis=1).tolist() actuals = [ground_truth.get(program, 0) for program in program_matches_df["Program"].tolist()] metrics = { "f1_score": f1_score(actuals, predictions), "accuracy": accuracy_score(actuals, predictions), "precision": precision_score(actuals, predictions, zero_division=0), "recall": recall_score(actuals, predictions, zero_division=0), "mae": mean_absolute_error(actuals, predictions) } logging.info("Evaluation metrics computed.") return metrics def save_ai_model(self, file_path: str) -> None: try: with open(file_path, "wb") as f: pickle.dump(self.ai_model, f) logging.info(f"AI model saved to {file_path}") except Exception as e: logging.error("Error saving AI model: " + str(e)) raise def load_ai_model(self, file_path: str) -> None: try: with open(file_path, "rb") as f: self.ai_model = pickle.load(f) logging.info(f"AI model loaded from {file_path}") except Exception as e: logging.error("Error loading AI model: " + str(e)) raise def process_student_data(self, grades_data: dict = None, grades_file: str = None, strength_threshold: float = 85) -> dict: self.load_student_grades(grades_data, grades_file) strengths = self.identify_strengths(strength_threshold) program_matches = self.calculate_program_match(strengths) recommendations = self.get_top_recommendations(program_matches) report = { "strengths": strengths.to_dict('records'), "program_matches": program_matches.to_dict('records'), "top_recommendations": recommendations } return report def setup_json_directory(json_dir: str = "./data") -> str: if not os.path.exists(json_dir): os.makedirs(json_dir) return json_dir def main(): json_dir = setup_json_directory() sample_data_file = os.path.join(json_dir, "sample-data.json") recommendation_system = StudentRecommendationSystem(json_dir) results = recommendation_system.process_student_data(grades_file=sample_data_file) with open(sample_data_file, "r") as f: sample_data = json.load(f) sample_grades = sample_data["sample_grades"] print("\n===== STUDENT ACADEMIC PROFILE =====") print("\nSubjects and Grades:") for subject in sample_grades: print(f"- {subject}: {sample_grades[subject]}") print("\n===== ACADEMIC STRENGTHS =====") for strength in results["strengths"]: print(f"- {strength['Subject']}: {strength['Grade']}") print("\n===== PROGRAM MATCHES (Including AI Details) =====") for match in results["program_matches"]: print(f"- {match['Program']}: Raw Score = {match['Raw Score']:.1f}%, AI Refined Score = {match['AI Refined Score']:.1f}%, Success Probability = {match['Success Probability']:.2f}") print("\n===== PROGRAM RECOMMENDATIONS =====") for i, rec in enumerate(results["top_recommendations"], 1): print(f"\n{i}. {rec['program']} (Raw Score: {rec['raw_score']:.1f}%, AI Refined Score: {rec['refined_score']:.1f}%, Success Probability: {rec['success_probability']:.2f})") print(" Recommended Universities:") for uni in rec['recommended_universities']: print(f" - {uni}") print(" Recommended Departments:") for dept in rec['recommended_departments']: print(f" - {dept}") program_matches_df = pd.DataFrame(results["program_matches"]) dummy_ground_truth = {row["Program"]: (1 if row["Raw Score"] >= 65 else 0) for idx, row in program_matches_df.iterrows()} evaluation_metrics = recommendation_system.evaluate_recommendations(program_matches_df, dummy_ground_truth) print("\n===== EVALUATION METRICS =====") for metric, value in evaluation_metrics.items(): print(f"{metric.capitalize()}: {value:.2f}") recommendation_system.save_ai_model(os.path.join(json_dir, "ai_model.pkl")) if __name__ == "__main__": main()