Spaces:

udaysankarjalli
/

healthcare-disease-predictor

Sleeping

App Files Files Community

github-actions[bot] commited on 22 days ago

Commit

e9d74dc

1 Parent(s): 5f402a9

🤖 Auto-sync from GitHub 502b1dc4d7694ca52daf67481f49a16413e51144

Browse files

Files changed (9) hide show

.github/workflows/deploy-to-hf.yml +13 -12
hf-space/app.py +1 -1
hf-space/hf-space/hf-space/.gitignore +6 -0
hf-space/hf-space/hf-space/hf-space/.github/workflows/deploy-to-hf.yml +70 -0
hf-space/hf-space/hf-space/hf-space/app.py +59 -0
hf-space/hf-space/hf-space/hf-space/hf-space/README.md +22 -0
hf-space/hf-space/hf-space/hf-space/requirements.txt +9 -0
hf-space/hf-space/hf-space/hf-space/train_model.py +124 -0
hf-space/hf-space/train_model.py +1 -1

.github/workflows/deploy-to-hf.yml CHANGED Viewed

@@ -9,6 +9,13 @@ jobs:
   deploy:
     runs-on: ubuntu-latest
     steps:
       - name: 🧩 Checkout repository
         uses: actions/checkout@v4
@@ -22,6 +29,7 @@ jobs:
         run: |
           pip install --upgrade pip
           pip install -r requirements.txt
       - name: 🧠 Train the healthcare model
         run: |
@@ -34,10 +42,6 @@ jobs:
           git config --global user.name "github-actions[bot]"
       - name: 🚀 Clone Hugging Face Space
-        env:
-          HF_USERNAME: "udaysankarjalli"
-          HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
-          SPACE_NAME: "healthcare-disease-predictor"
         run: |
           if [ -z "$HF_TOKEN" ]; then
             echo "❌ ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
@@ -46,25 +50,22 @@ jobs:
           echo "🔹 Cloning Hugging Face Space..."
           git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
-      - name: 🔄 Sync files to Hugging Face Space
         run: |
           echo "🔹 Syncing files to Hugging Face Space..."
-          # Exclude .git and model folder
           rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
           cd hf-space
           git add .
           git commit -m "🤖 Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
-      - name: 📤 Push to Hugging Face
         run: |
-          echo "🔹 Pushing latest changes to Hugging Face..."
           cd hf-space
           git push origin main || { echo "❌ Push failed. Check your Hugging Face token permissions."; exit 1; }
       - name: ✅ Summary
         run: |
           echo "🎉 Deployment completed successfully!"
-          echo "Your model and app are now live on Hugging Face: https://huggingface.co/spaces/udaysankarjalli/healthcare-disease-predictor"

   deploy:
     runs-on: ubuntu-latest
+    # ✅ Global environment variables
+    env:
+      HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}   # GitHub Actions secret
+      HF_USERNAME: "udaysankarjalli"
+      MODEL_REPO: "healthcare-disease-predictor-model"  # Separate repo for large model files
+      SPACE_NAME: "healthcare-disease-predictor"
     steps:
       - name: 🧩 Checkout repository
         uses: actions/checkout@v4
         run: |
           pip install --upgrade pip
           pip install -r requirements.txt
+          pip install huggingface-hub
       - name: 🧠 Train the healthcare model
         run: |
           git config --global user.name "github-actions[bot]"
       - name: 🚀 Clone Hugging Face Space
         run: |
           if [ -z "$HF_TOKEN" ]; then
             echo "❌ ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
           echo "🔹 Cloning Hugging Face Space..."
           git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
+      - name: 🔄 Sync app code to Hugging Face Space
         run: |
           echo "🔹 Syncing files to Hugging Face Space..."
+          # Exclude .git and model folder (avoid large files)
           rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
           cd hf-space
           git add .
           git commit -m "🤖 Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
+      - name: 📤 Push app code to Hugging Face Space
         run: |
           cd hf-space
           git push origin main || { echo "❌ Push failed. Check your Hugging Face token permissions."; exit 1; }
       - name: ✅ Summary
         run: |
           echo "🎉 Deployment completed successfully!"
+          echo "App is live: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
+          echo "Model uploaded to HF Model Repo: https://huggingface.co/$HF_USERNAME/$MODEL_REPO"

hf-space/app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
 # -----------------------------
 # Step 1: Download model from Hugging Face Hub
 # ----------------------------
-HF_TOKEN = os.environ.get("HF_TOKEN")  # GitHub Actions or Space secret
 REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
 MODEL_FILENAME = "healthcare_model.joblib"

 # -----------------------------
 # Step 1: Download model from Hugging Face Hub
 # ----------------------------
+HF_TOKEN = os.environ.get("github_actions_deploy_healthcare")# GitHub Actions or Space secret
 REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
 MODEL_FILENAME = "healthcare_model.joblib"

hf-space/hf-space/hf-space/.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+# Ignore model artifacts
+model/
+*.joblib
+*.pkl
+*.npy
+*.csv

hf-space/hf-space/hf-space/hf-space/.github/workflows/deploy-to-hf.yml ADDED Viewed

	@@ -0,0 +1,70 @@

+name: Deploy Healthcare Model to Hugging Face
+on:
+  push:
+    branches:
+      - main
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: 🧩 Checkout repository
+        uses: actions/checkout@v4
+      - name: 🐍 Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: 📦 Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -r requirements.txt
+      - name: 🧠 Train the healthcare model
+        run: |
+          echo "Starting model training..."
+          python train_model.py
+      - name: ⚙️ Set up Git identity
+        run: |
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "github-actions[bot]"
+      - name: 🚀 Clone Hugging Face Space
+        env:
+          HF_USERNAME: "udaysankarjalli"
+          HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
+          SPACE_NAME: "healthcare-disease-predictor"
+        run: |
+          if [ -z "$HF_TOKEN" ]; then
+            echo "❌ ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
+            exit 1
+          fi
+          echo "🔹 Cloning Hugging Face Space..."
+          git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
+      - name: 🔄 Sync files to Hugging Face Space
+        run: |
+          echo "🔹 Syncing files to Hugging Face Space..."
+          # Exclude .git and model folder
+          rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
+          cd hf-space
+          git add .
+          git commit -m "🤖 Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
+      - name: 📤 Push to Hugging Face
+        run: |
+          echo "🔹 Pushing latest changes to Hugging Face..."
+          cd hf-space
+          git push origin main || { echo "❌ Push failed. Check your Hugging Face token permissions."; exit 1; }
+      - name: ✅ Summary
+        run: |
+          echo "🎉 Deployment completed successfully!"
+          echo "Your model and app are now live on Hugging Face: https://huggingface.co/spaces/udaysankarjalli/healthcare-disease-predictor"

hf-space/hf-space/hf-space/hf-space/app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+import joblib
+import pandas as pd
+import numpy as np
+import os
+from huggingface_hub import hf_hub_download
+# -----------------------------
+# Step 1: Download model from Hugging Face Hub
+# ----------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")  # GitHub Actions or Space secret
+REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
+MODEL_FILENAME = "healthcare_model.joblib"
+try:
+    model_path = hf_hub_download(
+        repo_id=REPO_ID,
+        filename=MODEL_FILENAME,
+        token=HF_TOKEN
+    )
+    print(f"✅ Model downloaded successfully: {model_path}")
+except Exception as e:
+    raise FileNotFoundError(f"Failed to download model from HF Hub: {e}")
+# Load the trained pipeline
+pipe = joblib.load(model_path)
+# ----------------------------
+# Step 2: Prediction function
+# ----------------------------
+def predict_top_k(symptoms_text, duration_days, severity):
+    row = {
+        'symptoms_text': symptoms_text,
+        'duration_days_reported': duration_days,
+        'severity_level': severity
+    }
+    X = pd.DataFrame([row])
+    proba = pipe.predict_proba(X)[0]
+    classes = pipe.classes_
+    idx = np.argsort(proba)[::-1][:3]
+    return [{'disease': classes[i], 'probability': float(proba[i])} for i in idx]
+# ----------------------------
+# Step 3: Gradio Interface
+# ----------------------------
+iface = gr.Interface(
+    fn=predict_top_k,
+    inputs=[
+        gr.Textbox(label="Symptoms Text"),
+        gr.Number(label="Duration (days)"),
+        gr.Dropdown(label="Severity Level", choices=['mild', 'moderate', 'severe'], value='mild')
+    ],
+    outputs=gr.JSON(label="Top 3 Predicted Diseases"),
+    title="🩺 Healthcare Disease Prediction",
+    description="Enter symptoms and details to get top disease predictions."
+)
+if __name__ == "__main__":
+    iface.launch()

hf-space/hf-space/hf-space/hf-space/hf-space/README.md ADDED Viewed

	@@ -0,0 +1,22 @@

+---
+title: Healthcare Disease Predictor
+emoji: 🩺
+colorFrom: green
+colorTo: blue
+sdk: gradio
+sdk_version: "5.47.0"
+app_file: app.py
+pinned: false
+---
+## 🧠 Features
+- Uses real healthcare dataset (`health_chatbot_structured_features.csv`)
+- Trains RandomForestClassifier with TF-IDF + OneHot + Median Imputation pipeline
+- Auto-saves `.joblib` model
+- Interactive Gradio interface for disease prediction
+## 🚀 CI/CD with GitHub Actions
+Whenever you push changes, GitHub Actions automatically:
+1. Runs `train_model.py`
+2. Saves trained model (`.joblib`)
+3. Pushes the updated model and app to your Hugging Face Space

hf-space/hf-space/hf-space/hf-space/requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+pandas
+numpy
+scikit-learn
+joblib
+gradio==5.47.0
+fastapi
+uvicorn
+pydantic
+starlette

hf-space/hf-space/hf-space/hf-space/train_model.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import warnings
+warnings.filterwarnings('ignore')
+import pandas as pd, numpy as np
+from pathlib import Path
+from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
+from sklearn.pipeline import Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report, accuracy_score
+import joblib
+import os
+from huggingface_hub import HfApi
+# ----------------------------
+# Step 1: Load dataset
+# ----------------------------
+df = pd.read_csv("health_chatbot_structured_features.csv")
+print("Data shape:", df.shape)
+# ----------------------------
+# Step 2: Define features & target
+# ----------------------------
+features = ['symptoms_text', 'duration_days_reported', 'severity_level']
+target = 'disease_label'
+train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[target])
+print("Train:", train_df.shape, " Test:", test_df.shape)
+# ----------------------------
+# Step 3: Preprocessing setup
+# ----------------------------
+def flatten_text(x):
+    return x.ravel()
+numeric_features = ['duration_days_reported']
+numeric_transformer = SimpleImputer(strategy='median')
+categorical_features = ['severity_level']
+categorical_transformer = Pipeline([
+    ('imputer', SimpleImputer(strategy='most_frequent')),
+    ('onehot', OneHotEncoder(handle_unknown='ignore'))
+])
+text_feature = 'symptoms_text'
+text_transformer = Pipeline([
+    ('imputer', SimpleImputer(strategy='constant', fill_value='')),
+    ('flatten', FunctionTransformer(flatten_text, validate=False)),
+    ('tfidf', TfidfVectorizer(ngram_range=(1,2), max_df=0.95))
+])
+preprocessor = ColumnTransformer([
+    ('num', numeric_transformer, numeric_features),
+    ('cat', categorical_transformer, categorical_features),
+    ('text', text_transformer, [text_feature])
+])
+# ----------------------------
+# Step 4: Model Pipeline
+# ----------------------------
+pipe = Pipeline([
+    ('preprocessor', preprocessor),
+    ('clf', RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
+])
+# ----------------------------
+# Step 5: Train & Evaluate
+# ----------------------------
+cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
+scores = cross_val_score(pipe, train_df[features], train_df[target], cv=cv, scoring='accuracy', n_jobs=-1)
+print(f"\nCV accuracy (mean ± std): {scores.mean():.4f} ± {scores.std():.4f}")
+pipe.fit(train_df[features], train_df[target])
+preds = pipe.predict(test_df[features])
+print("\nTest accuracy:", accuracy_score(test_df[target], preds))
+print("\nClassification report:\n", classification_report(test_df[target], preds))
+# ----------------------------
+# Step 6: Save model artifacts
+# ----------------------------
+Path("model").mkdir(exist_ok=True)
+model_path = "model/healthcare_model.joblib"
+train_path = "model/train_data.csv"
+test_path = "model/test_data.csv"
+joblib.dump(pipe, model_path)
+train_df.to_csv(train_path, index=False)
+test_df.to_csv(test_path, index=False)
+print("\n✅ Model and data saved successfully:")
+print(f"   Model  → {model_path}")
+print(f"   Train  → {train_path}")
+print(f"   Test   → {test_path}")
+# ----------------------------
+# Step 7: Upload model to Hugging Face Hub
+# ----------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")  # GitHub Action secret
+REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"  # separate repo for large model files
+if HF_TOKEN:
+    api = HfApi()
+    # Upload model
+    api.upload_file(path_or_fileobj=model_path,
+                    path_in_repo="healthcare_model.joblib",
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN)
+    # Upload train/test CSVs (optional)
+    api.upload_file(path_or_fileobj=train_path,
+                    path_in_repo="train_data.csv",
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN)
+    api.upload_file(path_or_fileobj=test_path,
+                    path_in_repo="test_data.csv",
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN)
+    print("✅ Model and data uploaded successfully to Hugging Face Hub!")
+else:
+    print("⚠️ HF_TOKEN not found. Skipping upload.")

hf-space/hf-space/train_model.py CHANGED Viewed

@@ -100,7 +100,7 @@ print(f"   Test   → {test_path}")
 # ----------------------------
 # Step 7: Upload model to Hugging Face Hub
 # ----------------------------
-HF_TOKEN = os.environ.get("HF_TOKEN")  # GitHub Action secret
 REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"  # separate repo for large model files
 if HF_TOKEN:

 # ----------------------------
 # Step 7: Upload model to Hugging Face Hub
 # ----------------------------
+HF_TOKEN = os.environ.get("HG_HEALTHCAREDATA")   # GitHub Action secret
 REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"  # separate repo for large model files
 if HF_TOKEN: