github-actions[bot]
commited on
Commit
Β·
e9d74dc
1
Parent(s):
5f402a9
π€ Auto-sync from GitHub 502b1dc4d7694ca52daf67481f49a16413e51144
Browse files- .github/workflows/deploy-to-hf.yml +13 -12
- hf-space/app.py +1 -1
- hf-space/hf-space/hf-space/.gitignore +6 -0
- hf-space/hf-space/hf-space/hf-space/.github/workflows/deploy-to-hf.yml +70 -0
- hf-space/hf-space/hf-space/hf-space/app.py +59 -0
- hf-space/hf-space/hf-space/hf-space/hf-space/README.md +22 -0
- hf-space/hf-space/hf-space/hf-space/requirements.txt +9 -0
- hf-space/hf-space/hf-space/hf-space/train_model.py +124 -0
- hf-space/hf-space/train_model.py +1 -1
.github/workflows/deploy-to-hf.yml
CHANGED
|
@@ -9,6 +9,13 @@ jobs:
|
|
| 9 |
deploy:
|
| 10 |
runs-on: ubuntu-latest
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
steps:
|
| 13 |
- name: π§© Checkout repository
|
| 14 |
uses: actions/checkout@v4
|
|
@@ -22,6 +29,7 @@ jobs:
|
|
| 22 |
run: |
|
| 23 |
pip install --upgrade pip
|
| 24 |
pip install -r requirements.txt
|
|
|
|
| 25 |
|
| 26 |
- name: π§ Train the healthcare model
|
| 27 |
run: |
|
|
@@ -34,10 +42,6 @@ jobs:
|
|
| 34 |
git config --global user.name "github-actions[bot]"
|
| 35 |
|
| 36 |
- name: π Clone Hugging Face Space
|
| 37 |
-
env:
|
| 38 |
-
HF_USERNAME: "udaysankarjalli"
|
| 39 |
-
HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
|
| 40 |
-
SPACE_NAME: "healthcare-disease-predictor"
|
| 41 |
run: |
|
| 42 |
if [ -z "$HF_TOKEN" ]; then
|
| 43 |
echo "β ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
|
|
@@ -46,25 +50,22 @@ jobs:
|
|
| 46 |
echo "πΉ Cloning Hugging Face Space..."
|
| 47 |
git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
|
| 48 |
|
| 49 |
-
|
| 50 |
-
- name: π Sync files to Hugging Face Space
|
| 51 |
run: |
|
| 52 |
echo "πΉ Syncing files to Hugging Face Space..."
|
| 53 |
-
# Exclude .git and model folder
|
| 54 |
rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
|
| 55 |
cd hf-space
|
| 56 |
git add .
|
| 57 |
git commit -m "π€ Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
|
| 58 |
|
| 59 |
-
|
| 60 |
-
- name: π€ Push to Hugging Face
|
| 61 |
run: |
|
| 62 |
-
echo "πΉ Pushing latest changes to Hugging Face..."
|
| 63 |
cd hf-space
|
| 64 |
git push origin main || { echo "β Push failed. Check your Hugging Face token permissions."; exit 1; }
|
| 65 |
-
|
| 66 |
|
| 67 |
- name: β
Summary
|
| 68 |
run: |
|
| 69 |
echo "π Deployment completed successfully!"
|
| 70 |
-
echo "
|
|
|
|
|
|
| 9 |
deploy:
|
| 10 |
runs-on: ubuntu-latest
|
| 11 |
|
| 12 |
+
# β
Global environment variables
|
| 13 |
+
env:
|
| 14 |
+
HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }} # GitHub Actions secret
|
| 15 |
+
HF_USERNAME: "udaysankarjalli"
|
| 16 |
+
MODEL_REPO: "healthcare-disease-predictor-model" # Separate repo for large model files
|
| 17 |
+
SPACE_NAME: "healthcare-disease-predictor"
|
| 18 |
+
|
| 19 |
steps:
|
| 20 |
- name: π§© Checkout repository
|
| 21 |
uses: actions/checkout@v4
|
|
|
|
| 29 |
run: |
|
| 30 |
pip install --upgrade pip
|
| 31 |
pip install -r requirements.txt
|
| 32 |
+
pip install huggingface-hub
|
| 33 |
|
| 34 |
- name: π§ Train the healthcare model
|
| 35 |
run: |
|
|
|
|
| 42 |
git config --global user.name "github-actions[bot]"
|
| 43 |
|
| 44 |
- name: π Clone Hugging Face Space
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
run: |
|
| 46 |
if [ -z "$HF_TOKEN" ]; then
|
| 47 |
echo "β ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
|
|
|
|
| 50 |
echo "πΉ Cloning Hugging Face Space..."
|
| 51 |
git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
|
| 52 |
|
| 53 |
+
- name: π Sync app code to Hugging Face Space
|
|
|
|
| 54 |
run: |
|
| 55 |
echo "πΉ Syncing files to Hugging Face Space..."
|
| 56 |
+
# Exclude .git and model folder (avoid large files)
|
| 57 |
rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
|
| 58 |
cd hf-space
|
| 59 |
git add .
|
| 60 |
git commit -m "π€ Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
|
| 61 |
|
| 62 |
+
- name: π€ Push app code to Hugging Face Space
|
|
|
|
| 63 |
run: |
|
|
|
|
| 64 |
cd hf-space
|
| 65 |
git push origin main || { echo "β Push failed. Check your Hugging Face token permissions."; exit 1; }
|
|
|
|
| 66 |
|
| 67 |
- name: β
Summary
|
| 68 |
run: |
|
| 69 |
echo "π Deployment completed successfully!"
|
| 70 |
+
echo "App is live: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
|
| 71 |
+
echo "Model uploaded to HF Model Repo: https://huggingface.co/$HF_USERNAME/$MODEL_REPO"
|
hf-space/app.py
CHANGED
|
@@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
|
|
| 8 |
# -----------------------------
|
| 9 |
# Step 1: Download model from Hugging Face Hub
|
| 10 |
# ----------------------------
|
| 11 |
-
HF_TOKEN = os.environ.get("
|
| 12 |
REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
|
| 13 |
MODEL_FILENAME = "healthcare_model.joblib"
|
| 14 |
|
|
|
|
| 8 |
# -----------------------------
|
| 9 |
# Step 1: Download model from Hugging Face Hub
|
| 10 |
# ----------------------------
|
| 11 |
+
HF_TOKEN = os.environ.get("github_actions_deploy_healthcare")# GitHub Actions or Space secret
|
| 12 |
REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
|
| 13 |
MODEL_FILENAME = "healthcare_model.joblib"
|
| 14 |
|
hf-space/hf-space/hf-space/.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore model artifacts
|
| 2 |
+
model/
|
| 3 |
+
*.joblib
|
| 4 |
+
*.pkl
|
| 5 |
+
*.npy
|
| 6 |
+
*.csv
|
hf-space/hf-space/hf-space/hf-space/.github/workflows/deploy-to-hf.yml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploy Healthcare Model to Hugging Face
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
deploy:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
|
| 12 |
+
steps:
|
| 13 |
+
- name: π§© Checkout repository
|
| 14 |
+
uses: actions/checkout@v4
|
| 15 |
+
|
| 16 |
+
- name: π Set up Python
|
| 17 |
+
uses: actions/setup-python@v4
|
| 18 |
+
with:
|
| 19 |
+
python-version: "3.10"
|
| 20 |
+
|
| 21 |
+
- name: π¦ Install dependencies
|
| 22 |
+
run: |
|
| 23 |
+
pip install --upgrade pip
|
| 24 |
+
pip install -r requirements.txt
|
| 25 |
+
|
| 26 |
+
- name: π§ Train the healthcare model
|
| 27 |
+
run: |
|
| 28 |
+
echo "Starting model training..."
|
| 29 |
+
python train_model.py
|
| 30 |
+
|
| 31 |
+
- name: βοΈ Set up Git identity
|
| 32 |
+
run: |
|
| 33 |
+
git config --global user.email "github-actions[bot]@users.noreply.github.com"
|
| 34 |
+
git config --global user.name "github-actions[bot]"
|
| 35 |
+
|
| 36 |
+
- name: π Clone Hugging Face Space
|
| 37 |
+
env:
|
| 38 |
+
HF_USERNAME: "udaysankarjalli"
|
| 39 |
+
HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
|
| 40 |
+
SPACE_NAME: "healthcare-disease-predictor"
|
| 41 |
+
run: |
|
| 42 |
+
if [ -z "$HF_TOKEN" ]; then
|
| 43 |
+
echo "β ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
|
| 44 |
+
exit 1
|
| 45 |
+
fi
|
| 46 |
+
echo "πΉ Cloning Hugging Face Space..."
|
| 47 |
+
git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
- name: π Sync files to Hugging Face Space
|
| 51 |
+
run: |
|
| 52 |
+
echo "πΉ Syncing files to Hugging Face Space..."
|
| 53 |
+
# Exclude .git and model folder
|
| 54 |
+
rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
|
| 55 |
+
cd hf-space
|
| 56 |
+
git add .
|
| 57 |
+
git commit -m "π€ Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
- name: π€ Push to Hugging Face
|
| 61 |
+
run: |
|
| 62 |
+
echo "πΉ Pushing latest changes to Hugging Face..."
|
| 63 |
+
cd hf-space
|
| 64 |
+
git push origin main || { echo "β Push failed. Check your Hugging Face token permissions."; exit 1; }
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
- name: β
Summary
|
| 68 |
+
run: |
|
| 69 |
+
echo "π Deployment completed successfully!"
|
| 70 |
+
echo "Your model and app are now live on Hugging Face: https://huggingface.co/spaces/udaysankarjalli/healthcare-disease-predictor"
|
hf-space/hf-space/hf-space/hf-space/app.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import joblib
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import os
|
| 6 |
+
from huggingface_hub import hf_hub_download
|
| 7 |
+
|
| 8 |
+
# -----------------------------
|
| 9 |
+
# Step 1: Download model from Hugging Face Hub
|
| 10 |
+
# ----------------------------
|
| 11 |
+
HF_TOKEN = os.environ.get("HF_TOKEN") # GitHub Actions or Space secret
|
| 12 |
+
REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
|
| 13 |
+
MODEL_FILENAME = "healthcare_model.joblib"
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
model_path = hf_hub_download(
|
| 17 |
+
repo_id=REPO_ID,
|
| 18 |
+
filename=MODEL_FILENAME,
|
| 19 |
+
token=HF_TOKEN
|
| 20 |
+
)
|
| 21 |
+
print(f"β
Model downloaded successfully: {model_path}")
|
| 22 |
+
except Exception as e:
|
| 23 |
+
raise FileNotFoundError(f"Failed to download model from HF Hub: {e}")
|
| 24 |
+
|
| 25 |
+
# Load the trained pipeline
|
| 26 |
+
pipe = joblib.load(model_path)
|
| 27 |
+
|
| 28 |
+
# ----------------------------
|
| 29 |
+
# Step 2: Prediction function
|
| 30 |
+
# ----------------------------
|
| 31 |
+
def predict_top_k(symptoms_text, duration_days, severity):
|
| 32 |
+
row = {
|
| 33 |
+
'symptoms_text': symptoms_text,
|
| 34 |
+
'duration_days_reported': duration_days,
|
| 35 |
+
'severity_level': severity
|
| 36 |
+
}
|
| 37 |
+
X = pd.DataFrame([row])
|
| 38 |
+
proba = pipe.predict_proba(X)[0]
|
| 39 |
+
classes = pipe.classes_
|
| 40 |
+
idx = np.argsort(proba)[::-1][:3]
|
| 41 |
+
return [{'disease': classes[i], 'probability': float(proba[i])} for i in idx]
|
| 42 |
+
|
| 43 |
+
# ----------------------------
|
| 44 |
+
# Step 3: Gradio Interface
|
| 45 |
+
# ----------------------------
|
| 46 |
+
iface = gr.Interface(
|
| 47 |
+
fn=predict_top_k,
|
| 48 |
+
inputs=[
|
| 49 |
+
gr.Textbox(label="Symptoms Text"),
|
| 50 |
+
gr.Number(label="Duration (days)"),
|
| 51 |
+
gr.Dropdown(label="Severity Level", choices=['mild', 'moderate', 'severe'], value='mild')
|
| 52 |
+
],
|
| 53 |
+
outputs=gr.JSON(label="Top 3 Predicted Diseases"),
|
| 54 |
+
title="π©Ί Healthcare Disease Prediction",
|
| 55 |
+
description="Enter symptoms and details to get top disease predictions."
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
iface.launch()
|
hf-space/hf-space/hf-space/hf-space/hf-space/README.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Healthcare Disease Predictor
|
| 3 |
+
emoji: π©Ί
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: "5.47.0"
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## π§ Features
|
| 13 |
+
- Uses real healthcare dataset (`health_chatbot_structured_features.csv`)
|
| 14 |
+
- Trains RandomForestClassifier with TF-IDF + OneHot + Median Imputation pipeline
|
| 15 |
+
- Auto-saves `.joblib` model
|
| 16 |
+
- Interactive Gradio interface for disease prediction
|
| 17 |
+
|
| 18 |
+
## π CI/CD with GitHub Actions
|
| 19 |
+
Whenever you push changes, GitHub Actions automatically:
|
| 20 |
+
1. Runs `train_model.py`
|
| 21 |
+
2. Saves trained model (`.joblib`)
|
| 22 |
+
3. Pushes the updated model and app to your Hugging Face Space
|
hf-space/hf-space/hf-space/hf-space/requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
numpy
|
| 3 |
+
scikit-learn
|
| 4 |
+
joblib
|
| 5 |
+
gradio==5.47.0
|
| 6 |
+
fastapi
|
| 7 |
+
uvicorn
|
| 8 |
+
pydantic
|
| 9 |
+
starlette
|
hf-space/hf-space/hf-space/hf-space/train_model.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import warnings
|
| 2 |
+
warnings.filterwarnings('ignore')
|
| 3 |
+
|
| 4 |
+
import pandas as pd, numpy as np
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
|
| 7 |
+
from sklearn.pipeline import Pipeline
|
| 8 |
+
from sklearn.compose import ColumnTransformer
|
| 9 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 10 |
+
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
|
| 11 |
+
from sklearn.impute import SimpleImputer
|
| 12 |
+
from sklearn.ensemble import RandomForestClassifier
|
| 13 |
+
from sklearn.metrics import classification_report, accuracy_score
|
| 14 |
+
import joblib
|
| 15 |
+
import os
|
| 16 |
+
from huggingface_hub import HfApi
|
| 17 |
+
|
| 18 |
+
# ----------------------------
|
| 19 |
+
# Step 1: Load dataset
|
| 20 |
+
# ----------------------------
|
| 21 |
+
df = pd.read_csv("health_chatbot_structured_features.csv")
|
| 22 |
+
print("Data shape:", df.shape)
|
| 23 |
+
|
| 24 |
+
# ----------------------------
|
| 25 |
+
# Step 2: Define features & target
|
| 26 |
+
# ----------------------------
|
| 27 |
+
features = ['symptoms_text', 'duration_days_reported', 'severity_level']
|
| 28 |
+
target = 'disease_label'
|
| 29 |
+
|
| 30 |
+
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[target])
|
| 31 |
+
print("Train:", train_df.shape, " Test:", test_df.shape)
|
| 32 |
+
|
| 33 |
+
# ----------------------------
|
| 34 |
+
# Step 3: Preprocessing setup
|
| 35 |
+
# ----------------------------
|
| 36 |
+
|
| 37 |
+
def flatten_text(x):
|
| 38 |
+
return x.ravel()
|
| 39 |
+
|
| 40 |
+
numeric_features = ['duration_days_reported']
|
| 41 |
+
numeric_transformer = SimpleImputer(strategy='median')
|
| 42 |
+
|
| 43 |
+
categorical_features = ['severity_level']
|
| 44 |
+
categorical_transformer = Pipeline([
|
| 45 |
+
('imputer', SimpleImputer(strategy='most_frequent')),
|
| 46 |
+
('onehot', OneHotEncoder(handle_unknown='ignore'))
|
| 47 |
+
])
|
| 48 |
+
|
| 49 |
+
text_feature = 'symptoms_text'
|
| 50 |
+
text_transformer = Pipeline([
|
| 51 |
+
('imputer', SimpleImputer(strategy='constant', fill_value='')),
|
| 52 |
+
('flatten', FunctionTransformer(flatten_text, validate=False)),
|
| 53 |
+
('tfidf', TfidfVectorizer(ngram_range=(1,2), max_df=0.95))
|
| 54 |
+
])
|
| 55 |
+
|
| 56 |
+
preprocessor = ColumnTransformer([
|
| 57 |
+
('num', numeric_transformer, numeric_features),
|
| 58 |
+
('cat', categorical_transformer, categorical_features),
|
| 59 |
+
('text', text_transformer, [text_feature])
|
| 60 |
+
])
|
| 61 |
+
|
| 62 |
+
# ----------------------------
|
| 63 |
+
# Step 4: Model Pipeline
|
| 64 |
+
# ----------------------------
|
| 65 |
+
pipe = Pipeline([
|
| 66 |
+
('preprocessor', preprocessor),
|
| 67 |
+
('clf', RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
|
| 68 |
+
])
|
| 69 |
+
|
| 70 |
+
# ----------------------------
|
| 71 |
+
# Step 5: Train & Evaluate
|
| 72 |
+
# ----------------------------
|
| 73 |
+
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
|
| 74 |
+
scores = cross_val_score(pipe, train_df[features], train_df[target], cv=cv, scoring='accuracy', n_jobs=-1)
|
| 75 |
+
print(f"\nCV accuracy (mean Β± std): {scores.mean():.4f} Β± {scores.std():.4f}")
|
| 76 |
+
|
| 77 |
+
pipe.fit(train_df[features], train_df[target])
|
| 78 |
+
preds = pipe.predict(test_df[features])
|
| 79 |
+
print("\nTest accuracy:", accuracy_score(test_df[target], preds))
|
| 80 |
+
print("\nClassification report:\n", classification_report(test_df[target], preds))
|
| 81 |
+
|
| 82 |
+
# ----------------------------
|
| 83 |
+
# Step 6: Save model artifacts
|
| 84 |
+
# ----------------------------
|
| 85 |
+
Path("model").mkdir(exist_ok=True)
|
| 86 |
+
|
| 87 |
+
model_path = "model/healthcare_model.joblib"
|
| 88 |
+
train_path = "model/train_data.csv"
|
| 89 |
+
test_path = "model/test_data.csv"
|
| 90 |
+
|
| 91 |
+
joblib.dump(pipe, model_path)
|
| 92 |
+
train_df.to_csv(train_path, index=False)
|
| 93 |
+
test_df.to_csv(test_path, index=False)
|
| 94 |
+
|
| 95 |
+
print("\nβ
Model and data saved successfully:")
|
| 96 |
+
print(f" Model β {model_path}")
|
| 97 |
+
print(f" Train β {train_path}")
|
| 98 |
+
print(f" Test β {test_path}")
|
| 99 |
+
|
| 100 |
+
# ----------------------------
|
| 101 |
+
# Step 7: Upload model to Hugging Face Hub
|
| 102 |
+
# ----------------------------
|
| 103 |
+
HF_TOKEN = os.environ.get("HF_TOKEN") # GitHub Action secret
|
| 104 |
+
REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model" # separate repo for large model files
|
| 105 |
+
|
| 106 |
+
if HF_TOKEN:
|
| 107 |
+
api = HfApi()
|
| 108 |
+
# Upload model
|
| 109 |
+
api.upload_file(path_or_fileobj=model_path,
|
| 110 |
+
path_in_repo="healthcare_model.joblib",
|
| 111 |
+
repo_id=REPO_ID,
|
| 112 |
+
token=HF_TOKEN)
|
| 113 |
+
# Upload train/test CSVs (optional)
|
| 114 |
+
api.upload_file(path_or_fileobj=train_path,
|
| 115 |
+
path_in_repo="train_data.csv",
|
| 116 |
+
repo_id=REPO_ID,
|
| 117 |
+
token=HF_TOKEN)
|
| 118 |
+
api.upload_file(path_or_fileobj=test_path,
|
| 119 |
+
path_in_repo="test_data.csv",
|
| 120 |
+
repo_id=REPO_ID,
|
| 121 |
+
token=HF_TOKEN)
|
| 122 |
+
print("β
Model and data uploaded successfully to Hugging Face Hub!")
|
| 123 |
+
else:
|
| 124 |
+
print("β οΈ HF_TOKEN not found. Skipping upload.")
|
hf-space/hf-space/train_model.py
CHANGED
|
@@ -100,7 +100,7 @@ print(f" Test β {test_path}")
|
|
| 100 |
# ----------------------------
|
| 101 |
# Step 7: Upload model to Hugging Face Hub
|
| 102 |
# ----------------------------
|
| 103 |
-
HF_TOKEN = os.environ.get("
|
| 104 |
REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model" # separate repo for large model files
|
| 105 |
|
| 106 |
if HF_TOKEN:
|
|
|
|
| 100 |
# ----------------------------
|
| 101 |
# Step 7: Upload model to Hugging Face Hub
|
| 102 |
# ----------------------------
|
| 103 |
+
HF_TOKEN = os.environ.get("HG_HEALTHCAREDATA") # GitHub Action secret
|
| 104 |
REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model" # separate repo for large model files
|
| 105 |
|
| 106 |
if HF_TOKEN:
|