Spaces:

kerols77
/

UniRecommend

Sleeping

App Files Files Community

kerols77 commited on Jun 6

Commit

1f8582e

verified ·

1 Parent(s): 4457db5

Upload 8 files

Browse files

Files changed (8) hide show

Dockerfile +33 -0
app.py +91 -0
data/departments.json +69 -0
data/sample-data.json +39 -0
data/subjects.json +68 -0
data/universities.json +67 -0
requirements.txt +6 -0
student_recommendation_system.py +220 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+# Use Python 3.9 slim image as base
+FROM python:3.9-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements file first (for better Docker layer caching)
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the application code
+COPY . .
+# Create data directory for JSON files
+RUN mkdir -p data
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PORT=7860
+# Expose the port that the app runs on
+EXPOSE 7860
+# Command to run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from flask import Flask, request, jsonify
+import os
+import requests
+from student_recommendation_system import StudentRecommendationSystem, setup_json_directory
+app = Flask(__name__)
+json_dir = setup_json_directory()
+recommendation_system = StudentRecommendationSystem(json_dir)
+FORWARD_URL = "http://54.242.19.19:3000/api/report/send-report/"
+GRADES_API_BASE_URL = "http://54.242.19.19:3000/api/grades/report/"
+@app.route('/recommend/<int:student_id>', methods=['POST'])
+def recommend(student_id):
+    try:
+        grades_api_url = f"{GRADES_API_BASE_URL}{student_id}"
+        try:
+            grades_response = requests.get(grades_api_url, timeout=10)
+            grades_response.raise_for_status()
+            grades_data_raw = grades_response.json()
+        except requests.exceptions.RequestException as e:
+            return jsonify({"error": f"Failed to fetch grades data: {str(e)}"}), 500
+        except ValueError as e:
+            return jsonify({"error": "Invalid JSON response from grades API"}), 500
+        if "grades_data" not in grades_data_raw:
+            return jsonify({"error": "No 'grades_data' found in external API response"}), 400
+        email = grades_data_raw.get("email")
+        if not email:
+            return jsonify({"error": "No 'email' found in external API response"}), 400
+        try:
+            grades_data = {subject: float(score) for subject, score in grades_data_raw["grades_data"].items()}
+        except (ValueError, TypeError) as e:
+            return jsonify({"error": f"Invalid grades data format: {str(e)}"}), 400
+        results = recommendation_system.process_student_data(grades_data=grades_data)
+        strengths = results["strengths"]
+        top_recs = results["top_recommendations"]
+        clean_recs = [
+            {k: v for k, v in rec.items() if k not in ("raw_score", "refined_score")}
+            for rec in top_recs
+        ]
+        response_payload = {
+            "email": email,
+            "studentName": "Ahmed Hassan",
+            "academic_strengths": strengths,
+            "top_recommendations": clean_recs
+        }
+        model_path = os.path.join(json_dir, "ai_model.pkl")
+        recommendation_system.save_ai_model(model_path)
+        try:
+            requests.post(FORWARD_URL, json=response_payload, timeout=10)
+        except requests.exceptions.RequestException as e:
+            print(f"Warning: Failed to forward data to report API: {str(e)}")
+        return jsonify(response_payload), 200
+    except Exception as e:
+        return jsonify({"error": f"Internal server error: {str(e)}"}), 500
+@app.route('/recommend', methods=['POST'])
+def recommend_post():
+    data = request.get_json(silent=True)
+    if not data or "grades_data" not in data:
+        return jsonify({"error": "No 'grades_data' provided in JSON payload."}), 400
+    try:
+        grades_data = {subject: float(score) for subject, score in data["grades_data"].items()}
+        results = recommendation_system.process_student_data(grades_data=grades_data)
+        strengths = results["strengths"]
+        top_recs = results["top_recommendations"]
+        clean_recs = [
+            {k: v for k, v in rec.items() if k not in ("raw_score", "refined_score")}
+            for rec in top_recs
+        ]
+        response_payload = {
+            "academic_strengths": strengths,
+            "top_recommendations": clean_recs
+        }
+        model_path = os.path.join(json_dir, "ai_model.pkl")
+        recommendation_system.save_ai_model(model_path)
+        try:
+            requests.post(FORWARD_URL, json=response_payload, timeout=10)
+        except requests.exceptions.RequestException as e:
+            print(f"Warning: Failed to forward data to report API: {str(e)}")
+        return jsonify(response_payload), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+if __name__ == '__main__':
+    port = int(os.environ.get("PORT", 7860))
+    app.run(host="0.0.0.0", port=port, debug=False)

data/departments.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "program_departments": {
+    "Engineering": [
+      "Civil Engineering",
+      "Mechanical Engineering",
+      "Electrical Engineering",
+      "Electronics and Communications Engineering",
+      "Industrial Engineering",
+      "Computer & Systems Engineering"
+    ],
+    "Medicine": [
+      "General Medicine",
+      "Surgery",
+      "Pediatrics",
+      "Obstetrics and Gynecology",
+      "Internal Medicine",
+      "Orthopedics"
+    ],
+    "Law": [
+      "Civil Law",
+      "Criminal Law",
+      "International Law",
+      "Constitutional Law",
+      "Commercial Law"
+    ],
+    "Computer Science": [
+      "Software Engineering",
+      "Data Science",
+      "Networks and Communications",
+      "Information Security",
+      "Artificial Intelligence",
+      "Human-Computer Interaction"
+    ],
+    "Business": [
+      "Finance",
+      "Marketing",
+      "Management",
+      "Accounting",
+      "International Business"
+    ],
+    "Pharmacy": [
+      "Pharmaceutical Chemistry",
+      "Clinical Pharmacy",
+      "Pharmacognosy",
+      "Pharmacology",
+      "Biopharmaceutics"
+    ],
+    "Architecture": [
+      "Structural Design",
+      "Urban Planning",
+      "Interior Design",
+      "Landscape Architecture"
+    ],
+    "Literature": [
+      "Modern Arabic Literature",
+      "Classical Arabic Literature",
+      "Comparative Literature",
+      "Linguistics",
+      "Translation Studies"
+    ],
+    "Agriculture": [
+      "Agronomy",
+      "Horticulture",
+      "Agricultural Engineering",
+      "Soil and Water Management",
+      "Plant Protection"
+    ]
+  }
+}

data/sample-data.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "grades_data": {
+    "Mathematics": 92,
+    "Physics": 88,
+    "Chemistry": 85,
+    "Technical Drawing": 75,
+    "Mechanics": 80,
+    "Biology": 78,
+    "Anatomy": 70,
+    "Physiology": 68,
+    "Programming": 90,
+    "Data Structures": 87,
+    "Algorithms": 85,
+    "Discrete Mathematics": 83,
+    "History": 85,
+    "Arabic": 90,
+    "Political Science": 73,
+    "Social Studies": 76,
+    "Logic": 74,
+    "Economics": 76,
+    "Accounting": 80,
+    "Business Studies": 82,
+    "Marketing": 70,
+    "Management": 74,
+    "Pharmacology": 88,
+    "Organic Chemistry": 84,
+    "Biochemistry": 81,
+    "Design": 79,
+    "Engineering Drawing": 77,
+    "Architectural History": 81,
+    "World Literature": 80,
+    "Creative Writing": 86,
+    "English Literature": 88,
+    "Agronomy": 83,
+    "Plant Science": 81,
+    "Soil Science": 79,
+    "Irrigation": 76
+  }
+}

data/subjects.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "core_subjects": {
+    "Engineering": [
+      "Mathematics",
+      "Physics",
+      "Chemistry",
+      "Technical Drawing",
+      "Mechanics"
+    ],
+    "Medicine": [
+      "Biology",
+      "Chemistry",
+      "Physics",
+      "Anatomy",
+      "Physiology"
+    ],
+    "Law": [
+      "History",
+      "Arabic",
+      "Political Science",
+      "Social Studies",
+      "Logic"
+    ],
+    "Computer Science": [
+      "Mathematics",
+      "Programming",
+      "Data Structures",
+      "Algorithms",
+      "Discrete Mathematics"
+    ],
+    "Business": [
+      "Economics",
+      "Accounting",
+      "Business Studies",
+      "Marketing",
+      "Management"
+    ],
+    "Pharmacy": [
+      "Biology",
+      "Chemistry",
+      "Pharmacology",
+      "Organic Chemistry",
+      "Biochemistry"
+    ],
+    "Architecture": [
+      "Mathematics",
+      "Physics",
+      "Design",
+      "Engineering Drawing",
+      "Architectural History"
+    ],
+    "Literature": [
+      "Arabic",
+      "History",
+      "World Literature",
+      "Creative Writing",
+      "English Literature"
+    ],
+    "Agriculture": [
+      "Biology",
+      "Chemistry",
+      "Agronomy",
+      "Plant Science",
+      "Soil Science",
+      "Irrigation"
+    ]
+  }
+}

data/universities.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+  "top_universities": {
+    "Engineering": [
+      "Cairo University - Faculty of Engineering",
+      "Ain Shams University - Faculty of Engineering",
+      "Alexandria University - Faculty of Engineering",
+      "Helwan University - Faculty of Engineering",
+      "Mansoura University - Faculty of Engineering",
+      "Assiut University - Faculty of Engineering",
+      "Tanta University - Faculty of Engineering"
+    ],
+    "Medicine": [
+      "Cairo University - Faculty of Medicine",
+      "Alexandria University - Faculty of Medicine",
+      "Mansoura University - Faculty of Medicine",
+      "Assiut University - Faculty of Medicine",
+      "Tanta University - Faculty of Medicine"
+    ],
+    "Law": [
+      "Cairo University - Faculty of Law",
+      "Ain Shams University - Faculty of Law",
+      "Alexandria University - Faculty of Law",
+      "Mansoura University - Faculty of Law",
+      "Zagazig University - Faculty of Law"
+    ],
+    "Computer Science": [
+      "The American University in Cairo",
+      "Cairo University - Faculty of Computers and Information",
+      "Ain Shams University - Faculty of Computers and Information",
+      "Helwan University - Faculty of Computers and Information",
+      "Alexandria University - Faculty of Information Technology"
+    ],
+    "Business": [
+      "Cairo University - Faculty of Commerce",
+      "Ain Shams University - Faculty of Commerce",
+      "Alexandria University - Faculty of Commerce",
+      "Helwan University - Faculty of Commerce",
+      "Mansoura University - Faculty of Commerce"
+    ],
+    "Pharmacy": [
+      "Cairo University - Faculty of Pharmacy",
+      "Ain Shams University - Faculty of Pharmacy",
+      "Alexandria University - Faculty of Pharmacy",
+      "Mansoura University - Faculty of Pharmacy",
+      "Tanta University - Faculty of Pharmacy"
+    ],
+    "Architecture": [
+      "Cairo University - Faculty of Engineering / Architecture",
+      "Ain Shams University - Faculty of Architecture",
+      "Alexandria University - Faculty of Engineering / Architecture",
+      "Helwan University - Faculty of Architecture"
+    ],
+    "Literature": [
+      "Cairo University - Faculty of Arts",
+      "Ain Shams University - Faculty of Arts",
+      "Alexandria University - Faculty of Arts",
+      "Helwan University - Faculty of Arts"
+    ],
+    "Agriculture": [
+      "Cairo University - Faculty of Agriculture",
+      "Alexandria University - Faculty of Agriculture",
+      "Mansoura University - Faculty of Agriculture",
+      "Assiut University - Faculty of Agriculture",
+      "Tanta University - Faculty of Agriculture"
+    ]
+  }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+Flask==2.3.3
+pandas==2.0.3
+numpy==1.24.3
+scikit-learn==1.3.0
+requests==2.31.0
+Werkzeug==2.3.7

student_recommendation_system.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import pandas as pd
+import numpy as np
+import json
+import os
+import logging
+import pickle
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import f1_score, mean_absolute_error, accuracy_score, precision_score, recall_score
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]: %(message)s")
+class StudentRecommendationSystem:
+    def __init__(self, json_dir: str = "./data"):
+        self.json_dir = json_dir
+        try:
+            with open(os.path.join(json_dir, "subjects.json"), "r") as f:
+                subjects_data = json.load(f)
+                self.core_subjects = subjects_data["core_subjects"]
+            logging.info("Loaded subjects data successfully.")
+        except Exception as e:
+            logging.error("Error loading subjects data: " + str(e))
+            raise
+        try:
+            with open(os.path.join(json_dir, "universities.json"), "r") as f:
+                universities_data = json.load(f)
+                self.top_universities = universities_data["top_universities"]
+            logging.info("Loaded universities data successfully.")
+        except Exception as e:
+            logging.error("Error loading universities data: " + str(e))
+            raise
+        try:
+            with open(os.path.join(json_dir, "departments.json"), "r") as f:
+                departments_data = json.load(f)
+                self.program_departments = departments_data["program_departments"]
+            logging.info("Loaded departments data successfully.")
+        except Exception as e:
+            logging.error("Error loading departments data: " + str(e))
+            raise
+        self.university_programs = {}
+        for program in self.core_subjects:
+            self.university_programs[program] = {"core_subjects": self.core_subjects[program]}
+        logging.info("University programs mapping created.")
+        self.ai_model = self._train_dummy_model()
+    def _train_dummy_model(self) -> Pipeline:
+        X = np.array([[0], [50], [100]])
+        y = np.array([0, 50, 100])
+        pipeline = Pipeline([
+            ('scaler', StandardScaler()),
+            ('regressor', LinearRegression())
+        ])
+        pipeline.fit(X, y)
+        logging.info("Dummy AI model pipeline trained successfully.")
+        return pipeline
+    def _refine_match_score(self, score: float) -> float:
+        refined = self.ai_model.predict(np.array([[score]]))[0]
+        logging.debug(f"Refined score for raw score {score} is {refined}.")
+        return refined
+    def predict_success_probability(self, refined_score: float) -> float:
+        probability = refined_score / 100.0
+        logging.debug(f"Predicted success probability from refined score {refined_score} is {probability}.")
+        return probability
+    def load_student_grades(self, grades_data: dict = None, grades_file: str = None) -> pd.DataFrame:
+        if grades_file:
+            try:
+                with open(grades_file, "r") as f:
+                    grades_data = json.load(f)
+                    if "sample_grades" in grades_data:
+                        grades_data = grades_data["sample_grades"]
+                logging.info(f"Student grades loaded from file: {grades_file}")
+            except Exception as e:
+                logging.error("Error loading student grades file: " + str(e))
+                raise
+        if not grades_data:
+            raise ValueError("Either grades_data or grades_file must be provided")
+        self.student_data = pd.DataFrame(list(grades_data.items()), columns=['Subject', 'Grade'])
+        return self.student_data
+    def identify_strengths(self, threshold: float = 85) -> pd.DataFrame:
+        strengths = self.student_data[self.student_data['Grade'] >= threshold]
+        return strengths.sort_values(by='Grade', ascending=False)
+    def calculate_program_match(self, strengths: pd.DataFrame) -> pd.DataFrame:
+        program_scores = {}
+        for program, details in self.university_programs.items():
+            score = 0
+            core_subjects = details["core_subjects"]
+            total_possible_score = len(core_subjects) * 100
+            for subject in core_subjects:
+                subject_grade = self.student_data[self.student_data['Subject'] == subject]
+                if not subject_grade.empty:
+                    score += subject_grade.iloc[0]['Grade']
+            raw_score = (score / total_possible_score) * 100 if total_possible_score > 0 else 0
+            refined_score = self._refine_match_score(raw_score)
+            success_probability = self.predict_success_probability(refined_score)
+            program_scores[program] = {
+                "raw_score": raw_score,
+                "refined_score": refined_score,
+                "success_probability": success_probability
+            }
+            logging.debug(f"Program {program}: raw_score {raw_score}, refined_score {refined_score}, success_probability {success_probability}")
+        program_df = pd.DataFrame([
+            {"Program": program,
+             "Raw Score": scores["raw_score"],
+             "AI Refined Score": scores["refined_score"],
+             "Success Probability": scores["success_probability"]}
+            for program, scores in program_scores.items()
+        ])
+        return program_df.sort_values(by='AI Refined Score', ascending=False)
+    def get_top_recommendations(self, program_matches: pd.DataFrame, top_n: int = 3) -> list:
+        recommendations = []
+        for i in range(min(top_n, len(program_matches))):
+            program = program_matches.iloc[i]['Program']
+            raw_score = program_matches.iloc[i]['Raw Score']
+            refined_score = program_matches.iloc[i]['AI Refined Score']
+            success_probability = program_matches.iloc[i]['Success Probability']
+            if refined_score >= 50:
+                universities = self.top_universities.get(program, ["No specific recommendations"])
+                departments = self.program_departments.get(program, ["No specific departments"])
+                recommendations.append({
+                    "program": program,
+                    "raw_score": raw_score,
+                    "refined_score": refined_score,
+                    "success_probability": success_probability,
+                    "recommended_universities": universities[:3],
+                    "recommended_departments": departments[:3]
+                })
+        return recommendations
+    def evaluate_recommendations(self, program_matches_df: pd.DataFrame, ground_truth: dict, threshold: float = 60) -> dict:
+        predictions = program_matches_df.apply(lambda row: 1 if row["AI Refined Score"] >= threshold else 0, axis=1).tolist()
+        actuals = [ground_truth.get(program, 0) for program in program_matches_df["Program"].tolist()]
+        metrics = {
+            "f1_score": f1_score(actuals, predictions),
+            "accuracy": accuracy_score(actuals, predictions),
+            "precision": precision_score(actuals, predictions, zero_division=0),
+            "recall": recall_score(actuals, predictions, zero_division=0),
+            "mae": mean_absolute_error(actuals, predictions)
+        }
+        logging.info("Evaluation metrics computed.")
+        return metrics
+    def save_ai_model(self, file_path: str) -> None:
+        try:
+            with open(file_path, "wb") as f:
+                pickle.dump(self.ai_model, f)
+            logging.info(f"AI model saved to {file_path}")
+        except Exception as e:
+            logging.error("Error saving AI model: " + str(e))
+            raise
+    def load_ai_model(self, file_path: str) -> None:
+        try:
+            with open(file_path, "rb") as f:
+                self.ai_model = pickle.load(f)
+            logging.info(f"AI model loaded from {file_path}")
+        except Exception as e:
+            logging.error("Error loading AI model: " + str(e))
+            raise
+    def process_student_data(self, grades_data: dict = None, grades_file: str = None, strength_threshold: float = 85) -> dict:
+        self.load_student_grades(grades_data, grades_file)
+        strengths = self.identify_strengths(strength_threshold)
+        program_matches = self.calculate_program_match(strengths)
+        recommendations = self.get_top_recommendations(program_matches)
+        report = {
+            "strengths": strengths.to_dict('records'),
+            "program_matches": program_matches.to_dict('records'),
+            "top_recommendations": recommendations
+        }
+        return report
+def setup_json_directory(json_dir: str = "./data") -> str:
+    if not os.path.exists(json_dir):
+        os.makedirs(json_dir)
+    return json_dir
+def main():
+    json_dir = setup_json_directory()
+    sample_data_file = os.path.join(json_dir, "sample-data.json")
+    recommendation_system = StudentRecommendationSystem(json_dir)
+    results = recommendation_system.process_student_data(grades_file=sample_data_file)
+    with open(sample_data_file, "r") as f:
+        sample_data = json.load(f)
+        sample_grades = sample_data["sample_grades"]
+    print("\n===== STUDENT ACADEMIC PROFILE =====")
+    print("\nSubjects and Grades:")
+    for subject in sample_grades:
+        print(f"- {subject}: {sample_grades[subject]}")
+    print("\n===== ACADEMIC STRENGTHS =====")
+    for strength in results["strengths"]:
+        print(f"- {strength['Subject']}: {strength['Grade']}")
+    print("\n===== PROGRAM MATCHES (Including AI Details) =====")
+    for match in results["program_matches"]:
+        print(f"- {match['Program']}: Raw Score = {match['Raw Score']:.1f}%, AI Refined Score = {match['AI Refined Score']:.1f}%, Success Probability = {match['Success Probability']:.2f}")
+    print("\n===== PROGRAM RECOMMENDATIONS =====")
+    for i, rec in enumerate(results["top_recommendations"], 1):
+        print(f"\n{i}. {rec['program']} (Raw Score: {rec['raw_score']:.1f}%, AI Refined Score: {rec['refined_score']:.1f}%, Success Probability: {rec['success_probability']:.2f})")
+        print("   Recommended Universities:")
+        for uni in rec['recommended_universities']:
+            print(f"   - {uni}")
+        print("   Recommended Departments:")
+        for dept in rec['recommended_departments']:
+            print(f"   - {dept}")
+    program_matches_df = pd.DataFrame(results["program_matches"])
+    dummy_ground_truth = {row["Program"]: (1 if row["Raw Score"] >= 65 else 0) for idx, row in program_matches_df.iterrows()}
+    evaluation_metrics = recommendation_system.evaluate_recommendations(program_matches_df, dummy_ground_truth)
+    print("\n===== EVALUATION METRICS =====")
+    for metric, value in evaluation_metrics.items():
+        print(f"{metric.capitalize()}: {value:.2f}")
+    recommendation_system.save_ai_model(os.path.join(json_dir, "ai_model.pkl"))
+if __name__ == "__main__":
+    main()