Spaces:

udaysankarjalli
/

healthcare-disease-predictor

Sleeping

App Files Files Community

github-actions[bot] commited on Oct 7

Commit

5f7433b

1 Parent(s): 041410e

🤖 Auto-sync from GitHub 4b29aaec4b6a30806be3e7b6b3dc2c5f234dda3b

Browse files

Files changed (8) hide show

.gitattributes +0 -35
.github/workflows/deploy-to-hf.yml +70 -0
README.md +14 -5
app.py +59 -0
health_chatbot_structured_features.csv +0 -0
hf-space/README.md +22 -0
requirements.txt +9 -0
train_model.py +124 -0

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.github/workflows/deploy-to-hf.yml ADDED Viewed

	@@ -0,0 +1,70 @@

+name: Deploy Healthcare Model to Hugging Face
+on:
+  push:
+    branches:
+      - main
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: 🧩 Checkout repository
+        uses: actions/checkout@v4
+      - name: 🐍 Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: 📦 Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -r requirements.txt
+      - name: 🧠 Train the healthcare model
+        run: |
+          echo "Starting model training..."
+          python train_model.py
+      - name: ⚙️ Set up Git identity
+        run: |
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          git config --global user.name "github-actions[bot]"
+      - name: 🚀 Clone Hugging Face Space
+        env:
+          HF_USERNAME: "udaysankarjalli"
+          HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
+          SPACE_NAME: "healthcare-disease-predictor"
+        run: |
+          if [ -z "$HF_TOKEN" ]; then
+            echo "❌ ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
+            exit 1
+          fi
+          echo "🔹 Cloning Hugging Face Space..."
+          git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
+      - name: 🔄 Sync files to Hugging Face Space
+        run: |
+          echo "🔹 Syncing files to Hugging Face Space..."
+          # Exclude .git and model folder
+          rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
+          cd hf-space
+          git add .
+          git commit -m "🤖 Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
+      - name: 📤 Push to Hugging Face
+        run: |
+          echo "🔹 Pushing latest changes to Hugging Face..."
+          cd hf-space
+          git push origin main || { echo "❌ Push failed. Check your Hugging Face token permissions."; exit 1; }
+      - name: ✅ Summary
+        run: |
+          echo "🎉 Deployment completed successfully!"
+          echo "Your model and app are now live on Hugging Face: https://huggingface.co/spaces/udaysankarjalli/healthcare-disease-predictor"

README.md CHANGED Viewed

@@ -1,13 +1,22 @@
 ---
 title: Healthcare Disease Predictor
-emoji: 🐨
 colorFrom: green
-colorTo: purple
 sdk: gradio
-sdk_version: 5.49.0
 app_file: app.py
 pinned: false
-short_description: healthcare-disease-predictor
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Healthcare Disease Predictor
+emoji: 🩺
 colorFrom: green
+colorTo: blue
 sdk: gradio
+sdk_version: "5.47.0"
 app_file: app.py
 pinned: false
 ---
+## 🧠 Features
+- Uses real healthcare dataset (`health_chatbot_structured_features.csv`)
+- Trains RandomForestClassifier with TF-IDF + OneHot + Median Imputation pipeline
+- Auto-saves `.joblib` model
+- Interactive Gradio interface for disease prediction
+## 🚀 CI/CD with GitHub Actions
+Whenever you push changes, GitHub Actions automatically:
+1. Runs `train_model.py`
+2. Saves trained model (`.joblib`)
+3. Pushes the updated model and app to your Hugging Face Space

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+import joblib
+import pandas as pd
+import numpy as np
+import os
+from huggingface_hub import hf_hub_download
+# -----------------------------
+# Step 1: Download model from Hugging Face Hub
+# ----------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")  # GitHub Actions or Space secret
+REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
+MODEL_FILENAME = "healthcare_model.joblib"
+try:
+    model_path = hf_hub_download(
+        repo_id=REPO_ID,
+        filename=MODEL_FILENAME,
+        token=HF_TOKEN
+    )
+    print(f"✅ Model downloaded successfully: {model_path}")
+except Exception as e:
+    raise FileNotFoundError(f"Failed to download model from HF Hub: {e}")
+# Load the trained pipeline
+pipe = joblib.load(model_path)
+# ----------------------------
+# Step 2: Prediction function
+# ----------------------------
+def predict_top_k(symptoms_text, duration_days, severity):
+    row = {
+        'symptoms_text': symptoms_text,
+        'duration_days_reported': duration_days,
+        'severity_level': severity
+    }
+    X = pd.DataFrame([row])
+    proba = pipe.predict_proba(X)[0]
+    classes = pipe.classes_
+    idx = np.argsort(proba)[::-1][:3]
+    return [{'disease': classes[i], 'probability': float(proba[i])} for i in idx]
+# ----------------------------
+# Step 3: Gradio Interface
+# ----------------------------
+iface = gr.Interface(
+    fn=predict_top_k,
+    inputs=[
+        gr.Textbox(label="Symptoms Text"),
+        gr.Number(label="Duration (days)"),
+        gr.Dropdown(label="Severity Level", choices=['mild', 'moderate', 'severe'], value='mild')
+    ],
+    outputs=gr.JSON(label="Top 3 Predicted Diseases"),
+    title="🩺 Healthcare Disease Prediction",
+    description="Enter symptoms and details to get top disease predictions."
+)
+if __name__ == "__main__":
+    iface.launch()

health_chatbot_structured_features.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

hf-space/README.md ADDED Viewed

	@@ -0,0 +1,22 @@

+---
+title: Healthcare Disease Predictor
+emoji: 🩺
+colorFrom: green
+colorTo: blue
+sdk: gradio
+sdk_version: "5.47.0"
+app_file: app.py
+pinned: false
+---
+## 🧠 Features
+- Uses real healthcare dataset (`health_chatbot_structured_features.csv`)
+- Trains RandomForestClassifier with TF-IDF + OneHot + Median Imputation pipeline
+- Auto-saves `.joblib` model
+- Interactive Gradio interface for disease prediction
+## 🚀 CI/CD with GitHub Actions
+Whenever you push changes, GitHub Actions automatically:
+1. Runs `train_model.py`
+2. Saves trained model (`.joblib`)
+3. Pushes the updated model and app to your Hugging Face Space

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+pandas
+numpy
+scikit-learn
+joblib
+gradio==5.47.0
+fastapi
+uvicorn
+pydantic
+starlette

train_model.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import warnings
+warnings.filterwarnings('ignore')
+import pandas as pd, numpy as np
+from pathlib import Path
+from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
+from sklearn.pipeline import Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
+from sklearn.impute import SimpleImputer
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report, accuracy_score
+import joblib
+import os
+from huggingface_hub import HfApi
+# ----------------------------
+# Step 1: Load dataset
+# ----------------------------
+df = pd.read_csv("health_chatbot_structured_features.csv")
+print("Data shape:", df.shape)
+# ----------------------------
+# Step 2: Define features & target
+# ----------------------------
+features = ['symptoms_text', 'duration_days_reported', 'severity_level']
+target = 'disease_label'
+train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[target])
+print("Train:", train_df.shape, " Test:", test_df.shape)
+# ----------------------------
+# Step 3: Preprocessing setup
+# ----------------------------
+def flatten_text(x):
+    return x.ravel()
+numeric_features = ['duration_days_reported']
+numeric_transformer = SimpleImputer(strategy='median')
+categorical_features = ['severity_level']
+categorical_transformer = Pipeline([
+    ('imputer', SimpleImputer(strategy='most_frequent')),
+    ('onehot', OneHotEncoder(handle_unknown='ignore'))
+])
+text_feature = 'symptoms_text'
+text_transformer = Pipeline([
+    ('imputer', SimpleImputer(strategy='constant', fill_value='')),
+    ('flatten', FunctionTransformer(flatten_text, validate=False)),
+    ('tfidf', TfidfVectorizer(ngram_range=(1,2), max_df=0.95))
+])
+preprocessor = ColumnTransformer([
+    ('num', numeric_transformer, numeric_features),
+    ('cat', categorical_transformer, categorical_features),
+    ('text', text_transformer, [text_feature])
+])
+# ----------------------------
+# Step 4: Model Pipeline
+# ----------------------------
+pipe = Pipeline([
+    ('preprocessor', preprocessor),
+    ('clf', RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
+])
+# ----------------------------
+# Step 5: Train & Evaluate
+# ----------------------------
+cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
+scores = cross_val_score(pipe, train_df[features], train_df[target], cv=cv, scoring='accuracy', n_jobs=-1)
+print(f"\nCV accuracy (mean ± std): {scores.mean():.4f} ± {scores.std():.4f}")
+pipe.fit(train_df[features], train_df[target])
+preds = pipe.predict(test_df[features])
+print("\nTest accuracy:", accuracy_score(test_df[target], preds))
+print("\nClassification report:\n", classification_report(test_df[target], preds))
+# ----------------------------
+# Step 6: Save model artifacts
+# ----------------------------
+Path("model").mkdir(exist_ok=True)
+model_path = "model/healthcare_model.joblib"
+train_path = "model/train_data.csv"
+test_path = "model/test_data.csv"
+joblib.dump(pipe, model_path)
+train_df.to_csv(train_path, index=False)
+test_df.to_csv(test_path, index=False)
+print("\n✅ Model and data saved successfully:")
+print(f"   Model  → {model_path}")
+print(f"   Train  → {train_path}")
+print(f"   Test   → {test_path}")
+# ----------------------------
+# Step 7: Upload model to Hugging Face Hub
+# ----------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")  # GitHub Action secret
+REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"  # separate repo for large model files
+if HF_TOKEN:
+    api = HfApi()
+    # Upload model
+    api.upload_file(path_or_fileobj=model_path,
+                    path_in_repo="healthcare_model.joblib",
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN)
+    # Upload train/test CSVs (optional)
+    api.upload_file(path_or_fileobj=train_path,
+                    path_in_repo="train_data.csv",
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN)
+    api.upload_file(path_or_fileobj=test_path,
+                    path_in_repo="test_data.csv",
+                    repo_id=REPO_ID,
+                    token=HF_TOKEN)
+    print("✅ Model and data uploaded successfully to Hugging Face Hub!")
+else:
+    print("⚠️ HF_TOKEN not found. Skipping upload.")