Spaces:

agents-course
/

Students_leaderboard

Running

App Files Files Community

Jofthomas HF Staff commited on 3 days ago

Commit

1e9f877

verified ·

1 Parent(s): 5af1bed

Create data.py

Browse files

Files changed (1) hide show

app/data.py +72 -0

app/data.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import time
+import pandas as pd
+from datasets import load_dataset
+from fastapi import HTTPException
+import logging
+logger = logging.getLogger(__name__)
+DATASET_NAME = "agents-course/unit4-students-scores"
+CACHE_DURATION_SECONDS = 60  # Cache data for 60 seconds
+# Simple in-memory cache
+cached_data = None
+last_cache_time = 0
+def get_sorted_leaderboard_data():
+    """
+    Loads data from Hugging Face dataset, sorts it, and caches the result.
+    Returns the sorted data as a list of dictionaries.
+    """
+    global cached_data, last_cache_time
+    current_time = time.time()
+    # Check cache validity
+    if cached_data is not None and (current_time - last_cache_time) < CACHE_DURATION_SECONDS:
+        logger.info("Returning cached leaderboard data.")
+        return cached_data
+    logger.info(f"Cache expired or empty. Fetching fresh data from {DATASET_NAME}...")
+    try:
+        # Load the dataset
+        dataset = load_dataset(DATASET_NAME, split="train")
+        # Convert to pandas DataFrame for easier sorting
+        df = pd.DataFrame(dataset)
+        # Ensure required columns exist
+        required_columns = ['username', 'score', 'timestamp', 'code']
+        if not all(col in df.columns for col in required_columns):
+            missing = [col for col in required_columns if col not in df.columns]
+            raise ValueError(f"Dataset missing required columns: {missing}")
+        # Convert timestamp to datetime objects for proper sorting
+        # Handle potential errors during conversion
+        df['timestamp_dt'] = pd.to_datetime(df['timestamp'], errors='coerce')
+        # Drop rows where timestamp conversion failed
+        df.dropna(subset=['timestamp_dt'], inplace=True)
+        # Sort by score (descending) and then by timestamp (ascending)
+        df_sorted = df.sort_values(by=['score', 'timestamp_dt'], ascending=[False, True])
+        # Select only the columns needed for the frontend + code
+        # Convert DataFrame to list of dictionaries (JSON serializable)
+        # Use original timestamp string for display consistency if needed,
+        # but sorting was done on datetime objects.
+        leaderboard = df_sorted[['username', 'score', 'timestamp', 'code']].to_dict(orient='records')
+        # Update cache
+        cached_data = leaderboard
+        last_cache_time = current_time
+        logger.info(f"Successfully fetched and cached data. {len(leaderboard)} entries.")
+        return cached_data
+    except Exception as e:
+        logger.error(f"Error loading or processing dataset {DATASET_NAME}: {e}", exc_info=True)
+        # Re-raise as HTTPException so FastAPI returns a proper error response
+        raise HTTPException(status_code=500, detail=f"Failed to load or process leaderboard data: {e}")
+# Optional: Add an __init__.py file in the app directory
+# Create an empty file named app/__init__.py