github-actions[bot] commited on
Commit
e9d74dc
Β·
1 Parent(s): 5f402a9

πŸ€– Auto-sync from GitHub 502b1dc4d7694ca52daf67481f49a16413e51144

Browse files
.github/workflows/deploy-to-hf.yml CHANGED
@@ -9,6 +9,13 @@ jobs:
9
  deploy:
10
  runs-on: ubuntu-latest
11
 
 
 
 
 
 
 
 
12
  steps:
13
  - name: 🧩 Checkout repository
14
  uses: actions/checkout@v4
@@ -22,6 +29,7 @@ jobs:
22
  run: |
23
  pip install --upgrade pip
24
  pip install -r requirements.txt
 
25
 
26
  - name: 🧠 Train the healthcare model
27
  run: |
@@ -34,10 +42,6 @@ jobs:
34
  git config --global user.name "github-actions[bot]"
35
 
36
  - name: πŸš€ Clone Hugging Face Space
37
- env:
38
- HF_USERNAME: "udaysankarjalli"
39
- HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
40
- SPACE_NAME: "healthcare-disease-predictor"
41
  run: |
42
  if [ -z "$HF_TOKEN" ]; then
43
  echo "❌ ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
@@ -46,25 +50,22 @@ jobs:
46
  echo "πŸ”Ή Cloning Hugging Face Space..."
47
  git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
48
 
49
-
50
- - name: πŸ”„ Sync files to Hugging Face Space
51
  run: |
52
  echo "πŸ”Ή Syncing files to Hugging Face Space..."
53
- # Exclude .git and model folder
54
  rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
55
  cd hf-space
56
  git add .
57
  git commit -m "πŸ€– Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
58
 
59
-
60
- - name: πŸ“€ Push to Hugging Face
61
  run: |
62
- echo "πŸ”Ή Pushing latest changes to Hugging Face..."
63
  cd hf-space
64
  git push origin main || { echo "❌ Push failed. Check your Hugging Face token permissions."; exit 1; }
65
-
66
 
67
  - name: βœ… Summary
68
  run: |
69
  echo "πŸŽ‰ Deployment completed successfully!"
70
- echo "Your model and app are now live on Hugging Face: https://huggingface.co/spaces/udaysankarjalli/healthcare-disease-predictor"
 
 
9
  deploy:
10
  runs-on: ubuntu-latest
11
 
12
+ # βœ… Global environment variables
13
+ env:
14
+ HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }} # GitHub Actions secret
15
+ HF_USERNAME: "udaysankarjalli"
16
+ MODEL_REPO: "healthcare-disease-predictor-model" # Separate repo for large model files
17
+ SPACE_NAME: "healthcare-disease-predictor"
18
+
19
  steps:
20
  - name: 🧩 Checkout repository
21
  uses: actions/checkout@v4
 
29
  run: |
30
  pip install --upgrade pip
31
  pip install -r requirements.txt
32
+ pip install huggingface-hub
33
 
34
  - name: 🧠 Train the healthcare model
35
  run: |
 
42
  git config --global user.name "github-actions[bot]"
43
 
44
  - name: πŸš€ Clone Hugging Face Space
 
 
 
 
45
  run: |
46
  if [ -z "$HF_TOKEN" ]; then
47
  echo "❌ ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
 
50
  echo "πŸ”Ή Cloning Hugging Face Space..."
51
  git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
52
 
53
+ - name: πŸ”„ Sync app code to Hugging Face Space
 
54
  run: |
55
  echo "πŸ”Ή Syncing files to Hugging Face Space..."
56
+ # Exclude .git and model folder (avoid large files)
57
  rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
58
  cd hf-space
59
  git add .
60
  git commit -m "πŸ€– Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
61
 
62
+ - name: πŸ“€ Push app code to Hugging Face Space
 
63
  run: |
 
64
  cd hf-space
65
  git push origin main || { echo "❌ Push failed. Check your Hugging Face token permissions."; exit 1; }
 
66
 
67
  - name: βœ… Summary
68
  run: |
69
  echo "πŸŽ‰ Deployment completed successfully!"
70
+ echo "App is live: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
71
+ echo "Model uploaded to HF Model Repo: https://huggingface.co/$HF_USERNAME/$MODEL_REPO"
hf-space/app.py CHANGED
@@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
8
  # -----------------------------
9
  # Step 1: Download model from Hugging Face Hub
10
  # ----------------------------
11
- HF_TOKEN = os.environ.get("HF_TOKEN") # GitHub Actions or Space secret
12
  REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
13
  MODEL_FILENAME = "healthcare_model.joblib"
14
 
 
8
  # -----------------------------
9
  # Step 1: Download model from Hugging Face Hub
10
  # ----------------------------
11
+ HF_TOKEN = os.environ.get("github_actions_deploy_healthcare")# GitHub Actions or Space secret
12
  REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
13
  MODEL_FILENAME = "healthcare_model.joblib"
14
 
hf-space/hf-space/hf-space/.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Ignore model artifacts
2
+ model/
3
+ *.joblib
4
+ *.pkl
5
+ *.npy
6
+ *.csv
hf-space/hf-space/hf-space/hf-space/.github/workflows/deploy-to-hf.yml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy Healthcare Model to Hugging Face
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: 🧩 Checkout repository
14
+ uses: actions/checkout@v4
15
+
16
+ - name: 🐍 Set up Python
17
+ uses: actions/setup-python@v4
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: πŸ“¦ Install dependencies
22
+ run: |
23
+ pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+
26
+ - name: 🧠 Train the healthcare model
27
+ run: |
28
+ echo "Starting model training..."
29
+ python train_model.py
30
+
31
+ - name: βš™οΈ Set up Git identity
32
+ run: |
33
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
34
+ git config --global user.name "github-actions[bot]"
35
+
36
+ - name: πŸš€ Clone Hugging Face Space
37
+ env:
38
+ HF_USERNAME: "udaysankarjalli"
39
+ HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
40
+ SPACE_NAME: "healthcare-disease-predictor"
41
+ run: |
42
+ if [ -z "$HF_TOKEN" ]; then
43
+ echo "❌ ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
44
+ exit 1
45
+ fi
46
+ echo "πŸ”Ή Cloning Hugging Face Space..."
47
+ git clone https://$HF_USERNAME:[email protected]/spaces/$HF_USERNAME/$SPACE_NAME hf-space
48
+
49
+
50
+ - name: πŸ”„ Sync files to Hugging Face Space
51
+ run: |
52
+ echo "πŸ”Ή Syncing files to Hugging Face Space..."
53
+ # Exclude .git and model folder
54
+ rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
55
+ cd hf-space
56
+ git add .
57
+ git commit -m "πŸ€– Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
58
+
59
+
60
+ - name: πŸ“€ Push to Hugging Face
61
+ run: |
62
+ echo "πŸ”Ή Pushing latest changes to Hugging Face..."
63
+ cd hf-space
64
+ git push origin main || { echo "❌ Push failed. Check your Hugging Face token permissions."; exit 1; }
65
+
66
+
67
+ - name: βœ… Summary
68
+ run: |
69
+ echo "πŸŽ‰ Deployment completed successfully!"
70
+ echo "Your model and app are now live on Hugging Face: https://huggingface.co/spaces/udaysankarjalli/healthcare-disease-predictor"
hf-space/hf-space/hf-space/hf-space/app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ import os
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ # -----------------------------
9
+ # Step 1: Download model from Hugging Face Hub
10
+ # ----------------------------
11
+ HF_TOKEN = os.environ.get("HF_TOKEN") # GitHub Actions or Space secret
12
+ REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
13
+ MODEL_FILENAME = "healthcare_model.joblib"
14
+
15
+ try:
16
+ model_path = hf_hub_download(
17
+ repo_id=REPO_ID,
18
+ filename=MODEL_FILENAME,
19
+ token=HF_TOKEN
20
+ )
21
+ print(f"βœ… Model downloaded successfully: {model_path}")
22
+ except Exception as e:
23
+ raise FileNotFoundError(f"Failed to download model from HF Hub: {e}")
24
+
25
+ # Load the trained pipeline
26
+ pipe = joblib.load(model_path)
27
+
28
+ # ----------------------------
29
+ # Step 2: Prediction function
30
+ # ----------------------------
31
+ def predict_top_k(symptoms_text, duration_days, severity):
32
+ row = {
33
+ 'symptoms_text': symptoms_text,
34
+ 'duration_days_reported': duration_days,
35
+ 'severity_level': severity
36
+ }
37
+ X = pd.DataFrame([row])
38
+ proba = pipe.predict_proba(X)[0]
39
+ classes = pipe.classes_
40
+ idx = np.argsort(proba)[::-1][:3]
41
+ return [{'disease': classes[i], 'probability': float(proba[i])} for i in idx]
42
+
43
+ # ----------------------------
44
+ # Step 3: Gradio Interface
45
+ # ----------------------------
46
+ iface = gr.Interface(
47
+ fn=predict_top_k,
48
+ inputs=[
49
+ gr.Textbox(label="Symptoms Text"),
50
+ gr.Number(label="Duration (days)"),
51
+ gr.Dropdown(label="Severity Level", choices=['mild', 'moderate', 'severe'], value='mild')
52
+ ],
53
+ outputs=gr.JSON(label="Top 3 Predicted Diseases"),
54
+ title="🩺 Healthcare Disease Prediction",
55
+ description="Enter symptoms and details to get top disease predictions."
56
+ )
57
+
58
+ if __name__ == "__main__":
59
+ iface.launch()
hf-space/hf-space/hf-space/hf-space/hf-space/README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Healthcare Disease Predictor
3
+ emoji: 🩺
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: "5.47.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ ## 🧠 Features
13
+ - Uses real healthcare dataset (`health_chatbot_structured_features.csv`)
14
+ - Trains RandomForestClassifier with TF-IDF + OneHot + Median Imputation pipeline
15
+ - Auto-saves `.joblib` model
16
+ - Interactive Gradio interface for disease prediction
17
+
18
+ ## πŸš€ CI/CD with GitHub Actions
19
+ Whenever you push changes, GitHub Actions automatically:
20
+ 1. Runs `train_model.py`
21
+ 2. Saves trained model (`.joblib`)
22
+ 3. Pushes the updated model and app to your Hugging Face Space
hf-space/hf-space/hf-space/hf-space/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ joblib
5
+ gradio==5.47.0
6
+ fastapi
7
+ uvicorn
8
+ pydantic
9
+ starlette
hf-space/hf-space/hf-space/hf-space/train_model.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings('ignore')
3
+
4
+ import pandas as pd, numpy as np
5
+ from pathlib import Path
6
+ from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
7
+ from sklearn.pipeline import Pipeline
8
+ from sklearn.compose import ColumnTransformer
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
+ from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
11
+ from sklearn.impute import SimpleImputer
12
+ from sklearn.ensemble import RandomForestClassifier
13
+ from sklearn.metrics import classification_report, accuracy_score
14
+ import joblib
15
+ import os
16
+ from huggingface_hub import HfApi
17
+
18
+ # ----------------------------
19
+ # Step 1: Load dataset
20
+ # ----------------------------
21
+ df = pd.read_csv("health_chatbot_structured_features.csv")
22
+ print("Data shape:", df.shape)
23
+
24
+ # ----------------------------
25
+ # Step 2: Define features & target
26
+ # ----------------------------
27
+ features = ['symptoms_text', 'duration_days_reported', 'severity_level']
28
+ target = 'disease_label'
29
+
30
+ train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[target])
31
+ print("Train:", train_df.shape, " Test:", test_df.shape)
32
+
33
+ # ----------------------------
34
+ # Step 3: Preprocessing setup
35
+ # ----------------------------
36
+
37
+ def flatten_text(x):
38
+ return x.ravel()
39
+
40
+ numeric_features = ['duration_days_reported']
41
+ numeric_transformer = SimpleImputer(strategy='median')
42
+
43
+ categorical_features = ['severity_level']
44
+ categorical_transformer = Pipeline([
45
+ ('imputer', SimpleImputer(strategy='most_frequent')),
46
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
47
+ ])
48
+
49
+ text_feature = 'symptoms_text'
50
+ text_transformer = Pipeline([
51
+ ('imputer', SimpleImputer(strategy='constant', fill_value='')),
52
+ ('flatten', FunctionTransformer(flatten_text, validate=False)),
53
+ ('tfidf', TfidfVectorizer(ngram_range=(1,2), max_df=0.95))
54
+ ])
55
+
56
+ preprocessor = ColumnTransformer([
57
+ ('num', numeric_transformer, numeric_features),
58
+ ('cat', categorical_transformer, categorical_features),
59
+ ('text', text_transformer, [text_feature])
60
+ ])
61
+
62
+ # ----------------------------
63
+ # Step 4: Model Pipeline
64
+ # ----------------------------
65
+ pipe = Pipeline([
66
+ ('preprocessor', preprocessor),
67
+ ('clf', RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
68
+ ])
69
+
70
+ # ----------------------------
71
+ # Step 5: Train & Evaluate
72
+ # ----------------------------
73
+ cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
74
+ scores = cross_val_score(pipe, train_df[features], train_df[target], cv=cv, scoring='accuracy', n_jobs=-1)
75
+ print(f"\nCV accuracy (mean Β± std): {scores.mean():.4f} Β± {scores.std():.4f}")
76
+
77
+ pipe.fit(train_df[features], train_df[target])
78
+ preds = pipe.predict(test_df[features])
79
+ print("\nTest accuracy:", accuracy_score(test_df[target], preds))
80
+ print("\nClassification report:\n", classification_report(test_df[target], preds))
81
+
82
+ # ----------------------------
83
+ # Step 6: Save model artifacts
84
+ # ----------------------------
85
+ Path("model").mkdir(exist_ok=True)
86
+
87
+ model_path = "model/healthcare_model.joblib"
88
+ train_path = "model/train_data.csv"
89
+ test_path = "model/test_data.csv"
90
+
91
+ joblib.dump(pipe, model_path)
92
+ train_df.to_csv(train_path, index=False)
93
+ test_df.to_csv(test_path, index=False)
94
+
95
+ print("\nβœ… Model and data saved successfully:")
96
+ print(f" Model β†’ {model_path}")
97
+ print(f" Train β†’ {train_path}")
98
+ print(f" Test β†’ {test_path}")
99
+
100
+ # ----------------------------
101
+ # Step 7: Upload model to Hugging Face Hub
102
+ # ----------------------------
103
+ HF_TOKEN = os.environ.get("HF_TOKEN") # GitHub Action secret
104
+ REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model" # separate repo for large model files
105
+
106
+ if HF_TOKEN:
107
+ api = HfApi()
108
+ # Upload model
109
+ api.upload_file(path_or_fileobj=model_path,
110
+ path_in_repo="healthcare_model.joblib",
111
+ repo_id=REPO_ID,
112
+ token=HF_TOKEN)
113
+ # Upload train/test CSVs (optional)
114
+ api.upload_file(path_or_fileobj=train_path,
115
+ path_in_repo="train_data.csv",
116
+ repo_id=REPO_ID,
117
+ token=HF_TOKEN)
118
+ api.upload_file(path_or_fileobj=test_path,
119
+ path_in_repo="test_data.csv",
120
+ repo_id=REPO_ID,
121
+ token=HF_TOKEN)
122
+ print("βœ… Model and data uploaded successfully to Hugging Face Hub!")
123
+ else:
124
+ print("⚠️ HF_TOKEN not found. Skipping upload.")
hf-space/hf-space/train_model.py CHANGED
@@ -100,7 +100,7 @@ print(f" Test β†’ {test_path}")
100
  # ----------------------------
101
  # Step 7: Upload model to Hugging Face Hub
102
  # ----------------------------
103
- HF_TOKEN = os.environ.get("HF_TOKEN") # GitHub Action secret
104
  REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model" # separate repo for large model files
105
 
106
  if HF_TOKEN:
 
100
  # ----------------------------
101
  # Step 7: Upload model to Hugging Face Hub
102
  # ----------------------------
103
+ HF_TOKEN = os.environ.get("HG_HEALTHCAREDATA") # GitHub Action secret
104
  REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model" # separate repo for large model files
105
 
106
  if HF_TOKEN: