github-actions[bot] commited on
Commit
5f7433b
·
1 Parent(s): 041410e

🤖 Auto-sync from GitHub 4b29aaec4b6a30806be3e7b6b3dc2c5f234dda3b

Browse files
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/deploy-to-hf.yml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy Healthcare Model to Hugging Face
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: 🧩 Checkout repository
14
+ uses: actions/checkout@v4
15
+
16
+ - name: 🐍 Set up Python
17
+ uses: actions/setup-python@v4
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: 📦 Install dependencies
22
+ run: |
23
+ pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+
26
+ - name: 🧠 Train the healthcare model
27
+ run: |
28
+ echo "Starting model training..."
29
+ python train_model.py
30
+
31
+ - name: ⚙️ Set up Git identity
32
+ run: |
33
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
34
+ git config --global user.name "github-actions[bot]"
35
+
36
+ - name: 🚀 Clone Hugging Face Space
37
+ env:
38
+ HF_USERNAME: "udaysankarjalli"
39
+ HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
40
+ SPACE_NAME: "healthcare-disease-predictor"
41
+ run: |
42
+ if [ -z "$HF_TOKEN" ]; then
43
+ echo "❌ ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
44
+ exit 1
45
+ fi
46
+ echo "🔹 Cloning Hugging Face Space..."
47
+ git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
48
+
49
+
50
+ - name: 🔄 Sync files to Hugging Face Space
51
+ run: |
52
+ echo "🔹 Syncing files to Hugging Face Space..."
53
+ # Exclude .git and model folder
54
+ rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
55
+ cd hf-space
56
+ git add .
57
+ git commit -m "🤖 Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
58
+
59
+
60
+ - name: 📤 Push to Hugging Face
61
+ run: |
62
+ echo "🔹 Pushing latest changes to Hugging Face..."
63
+ cd hf-space
64
+ git push origin main || { echo "❌ Push failed. Check your Hugging Face token permissions."; exit 1; }
65
+
66
+
67
+ - name: ✅ Summary
68
+ run: |
69
+ echo "🎉 Deployment completed successfully!"
70
+ echo "Your model and app are now live on Hugging Face: https://huggingface.co/spaces/udaysankarjalli/healthcare-disease-predictor"
README.md CHANGED
@@ -1,13 +1,22 @@
1
  ---
2
  title: Healthcare Disease Predictor
3
- emoji: 🐨
4
  colorFrom: green
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.49.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: healthcare-disease-predictor
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Healthcare Disease Predictor
3
+ emoji: 🩺
4
  colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: "5.47.0"
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
+ ## 🧠 Features
13
+ - Uses real healthcare dataset (`health_chatbot_structured_features.csv`)
14
+ - Trains RandomForestClassifier with TF-IDF + OneHot + Median Imputation pipeline
15
+ - Auto-saves `.joblib` model
16
+ - Interactive Gradio interface for disease prediction
17
+
18
+ ## 🚀 CI/CD with GitHub Actions
19
+ Whenever you push changes, GitHub Actions automatically:
20
+ 1. Runs `train_model.py`
21
+ 2. Saves trained model (`.joblib`)
22
+ 3. Pushes the updated model and app to your Hugging Face Space
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ import os
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ # -----------------------------
9
+ # Step 1: Download model from Hugging Face Hub
10
+ # ----------------------------
11
+ HF_TOKEN = os.environ.get("HF_TOKEN") # GitHub Actions or Space secret
12
+ REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
13
+ MODEL_FILENAME = "healthcare_model.joblib"
14
+
15
+ try:
16
+ model_path = hf_hub_download(
17
+ repo_id=REPO_ID,
18
+ filename=MODEL_FILENAME,
19
+ token=HF_TOKEN
20
+ )
21
+ print(f"✅ Model downloaded successfully: {model_path}")
22
+ except Exception as e:
23
+ raise FileNotFoundError(f"Failed to download model from HF Hub: {e}")
24
+
25
+ # Load the trained pipeline
26
+ pipe = joblib.load(model_path)
27
+
28
+ # ----------------------------
29
+ # Step 2: Prediction function
30
+ # ----------------------------
31
+ def predict_top_k(symptoms_text, duration_days, severity):
32
+ row = {
33
+ 'symptoms_text': symptoms_text,
34
+ 'duration_days_reported': duration_days,
35
+ 'severity_level': severity
36
+ }
37
+ X = pd.DataFrame([row])
38
+ proba = pipe.predict_proba(X)[0]
39
+ classes = pipe.classes_
40
+ idx = np.argsort(proba)[::-1][:3]
41
+ return [{'disease': classes[i], 'probability': float(proba[i])} for i in idx]
42
+
43
+ # ----------------------------
44
+ # Step 3: Gradio Interface
45
+ # ----------------------------
46
+ iface = gr.Interface(
47
+ fn=predict_top_k,
48
+ inputs=[
49
+ gr.Textbox(label="Symptoms Text"),
50
+ gr.Number(label="Duration (days)"),
51
+ gr.Dropdown(label="Severity Level", choices=['mild', 'moderate', 'severe'], value='mild')
52
+ ],
53
+ outputs=gr.JSON(label="Top 3 Predicted Diseases"),
54
+ title="🩺 Healthcare Disease Prediction",
55
+ description="Enter symptoms and details to get top disease predictions."
56
+ )
57
+
58
+ if __name__ == "__main__":
59
+ iface.launch()
health_chatbot_structured_features.csv ADDED
The diff for this file is too large to render. See raw diff
 
hf-space/README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Healthcare Disease Predictor
3
+ emoji: 🩺
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: "5.47.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ ## 🧠 Features
13
+ - Uses real healthcare dataset (`health_chatbot_structured_features.csv`)
14
+ - Trains RandomForestClassifier with TF-IDF + OneHot + Median Imputation pipeline
15
+ - Auto-saves `.joblib` model
16
+ - Interactive Gradio interface for disease prediction
17
+
18
+ ## 🚀 CI/CD with GitHub Actions
19
+ Whenever you push changes, GitHub Actions automatically:
20
+ 1. Runs `train_model.py`
21
+ 2. Saves trained model (`.joblib`)
22
+ 3. Pushes the updated model and app to your Hugging Face Space
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ joblib
5
+ gradio==5.47.0
6
+ fastapi
7
+ uvicorn
8
+ pydantic
9
+ starlette
train_model.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings('ignore')
3
+
4
+ import pandas as pd, numpy as np
5
+ from pathlib import Path
6
+ from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
7
+ from sklearn.pipeline import Pipeline
8
+ from sklearn.compose import ColumnTransformer
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
+ from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
11
+ from sklearn.impute import SimpleImputer
12
+ from sklearn.ensemble import RandomForestClassifier
13
+ from sklearn.metrics import classification_report, accuracy_score
14
+ import joblib
15
+ import os
16
+ from huggingface_hub import HfApi
17
+
18
+ # ----------------------------
19
+ # Step 1: Load dataset
20
+ # ----------------------------
21
+ df = pd.read_csv("health_chatbot_structured_features.csv")
22
+ print("Data shape:", df.shape)
23
+
24
+ # ----------------------------
25
+ # Step 2: Define features & target
26
+ # ----------------------------
27
+ features = ['symptoms_text', 'duration_days_reported', 'severity_level']
28
+ target = 'disease_label'
29
+
30
+ train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[target])
31
+ print("Train:", train_df.shape, " Test:", test_df.shape)
32
+
33
+ # ----------------------------
34
+ # Step 3: Preprocessing setup
35
+ # ----------------------------
36
+
37
+ def flatten_text(x):
38
+ return x.ravel()
39
+
40
+ numeric_features = ['duration_days_reported']
41
+ numeric_transformer = SimpleImputer(strategy='median')
42
+
43
+ categorical_features = ['severity_level']
44
+ categorical_transformer = Pipeline([
45
+ ('imputer', SimpleImputer(strategy='most_frequent')),
46
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
47
+ ])
48
+
49
+ text_feature = 'symptoms_text'
50
+ text_transformer = Pipeline([
51
+ ('imputer', SimpleImputer(strategy='constant', fill_value='')),
52
+ ('flatten', FunctionTransformer(flatten_text, validate=False)),
53
+ ('tfidf', TfidfVectorizer(ngram_range=(1,2), max_df=0.95))
54
+ ])
55
+
56
+ preprocessor = ColumnTransformer([
57
+ ('num', numeric_transformer, numeric_features),
58
+ ('cat', categorical_transformer, categorical_features),
59
+ ('text', text_transformer, [text_feature])
60
+ ])
61
+
62
+ # ----------------------------
63
+ # Step 4: Model Pipeline
64
+ # ----------------------------
65
+ pipe = Pipeline([
66
+ ('preprocessor', preprocessor),
67
+ ('clf', RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
68
+ ])
69
+
70
+ # ----------------------------
71
+ # Step 5: Train & Evaluate
72
+ # ----------------------------
73
+ cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
74
+ scores = cross_val_score(pipe, train_df[features], train_df[target], cv=cv, scoring='accuracy', n_jobs=-1)
75
+ print(f"\nCV accuracy (mean ± std): {scores.mean():.4f} ± {scores.std():.4f}")
76
+
77
+ pipe.fit(train_df[features], train_df[target])
78
+ preds = pipe.predict(test_df[features])
79
+ print("\nTest accuracy:", accuracy_score(test_df[target], preds))
80
+ print("\nClassification report:\n", classification_report(test_df[target], preds))
81
+
82
+ # ----------------------------
83
+ # Step 6: Save model artifacts
84
+ # ----------------------------
85
+ Path("model").mkdir(exist_ok=True)
86
+
87
+ model_path = "model/healthcare_model.joblib"
88
+ train_path = "model/train_data.csv"
89
+ test_path = "model/test_data.csv"
90
+
91
+ joblib.dump(pipe, model_path)
92
+ train_df.to_csv(train_path, index=False)
93
+ test_df.to_csv(test_path, index=False)
94
+
95
+ print("\n✅ Model and data saved successfully:")
96
+ print(f" Model → {model_path}")
97
+ print(f" Train → {train_path}")
98
+ print(f" Test → {test_path}")
99
+
100
+ # ----------------------------
101
+ # Step 7: Upload model to Hugging Face Hub
102
+ # ----------------------------
103
+ HF_TOKEN = os.environ.get("HF_TOKEN") # GitHub Action secret
104
+ REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model" # separate repo for large model files
105
+
106
+ if HF_TOKEN:
107
+ api = HfApi()
108
+ # Upload model
109
+ api.upload_file(path_or_fileobj=model_path,
110
+ path_in_repo="healthcare_model.joblib",
111
+ repo_id=REPO_ID,
112
+ token=HF_TOKEN)
113
+ # Upload train/test CSVs (optional)
114
+ api.upload_file(path_or_fileobj=train_path,
115
+ path_in_repo="train_data.csv",
116
+ repo_id=REPO_ID,
117
+ token=HF_TOKEN)
118
+ api.upload_file(path_or_fileobj=test_path,
119
+ path_in_repo="test_data.csv",
120
+ repo_id=REPO_ID,
121
+ token=HF_TOKEN)
122
+ print("✅ Model and data uploaded successfully to Hugging Face Hub!")
123
+ else:
124
+ print("⚠️ HF_TOKEN not found. Skipping upload.")