DoctorTwin / app.py
isana25's picture
Update app.py
7014ec0 verified
import os
import requests
import joblib
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, AutoModelForSequenceClassification
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
app = FastAPI()
# Utility to download files if not present locally
def download_file(url, dest):
if not os.path.exists(dest):
print(f"Downloading {url} to {dest}")
r = requests.get(url)
r.raise_for_status()
with open(dest, 'wb') as f:
f.write(r.content)
else:
print(f"File {dest} already exists.")
# ----------- Setup for BERT QA model (Virtual Consultation) ------------
qa_model_dir = "./bert_mini_squadv2_finetuned"
os.makedirs(qa_model_dir, exist_ok=True)
qa_files = {
"pytorch_model.bin": "https://huggingface.co/spaces/isana25/DoctorTwin/resolve/main/pytorch_model.bin",
"config.json": "https://huggingface.co/spaces/isana25/DoctorTwin/resolve/main/config.json",
"tokenizer_config.json": "https://huggingface.co/spaces/isana25/DoctorTwin/resolve/main/tokenizer_config.json",
"vocab.txt": "https://huggingface.co/spaces/isana25/DoctorTwin/resolve/main/vocab.txt",
}
for fname, furl in qa_files.items():
download_file(furl, os.path.join(qa_model_dir, fname))
tokenizer_qa = AutoTokenizer.from_pretrained(qa_model_dir)
model_qa = AutoModelForQuestionAnswering.from_pretrained(qa_model_dir)
# ----------- Setup for Diabetes XGBoost Model (Risk Prediction) ------------
diabetes_pkl_url = "https://huggingface.co/spaces/isana25/DoctorTwin/resolve/main/diabetes_xgboost_model.pkl"
diabetes_pkl_path = "./diabetes_xgboost_model.pkl"
download_file(diabetes_pkl_url, diabetes_pkl_path)
diabetes_model = joblib.load(diabetes_pkl_path)
# ----------- Setup for other features: load pretrained models directly ------------
from transformers import pipeline
# Monitoring & Alerts - Summarization using bert-mini finetuned on squad_v2
monitoring_model_id = "prajjwal1/bert-mini"
summarizer = pipeline("summarization", model=monitoring_model_id)
# Personalized Simulation - Bio_ClinicalBERT sequence classifier
personalized_model_id = "emilyalsentzer/Bio_ClinicalBERT"
personalized_tokenizer = AutoTokenizer.from_pretrained(personalized_model_id)
personalized_model = AutoModelForSequenceClassification.from_pretrained(personalized_model_id)
# --- Pydantic models for request validation ---
class QARequest(BaseModel):
question: str
context: str
class RiskPredictionRequest(BaseModel):
features: list # example: [age, bmi, blood_pressure, ...]
# --- API endpoints ---
@app.post("/virtual_consultation")
def virtual_consultation(data: QARequest):
inputs = tokenizer_qa(data.question, data.context, return_tensors="pt")
with torch.no_grad():
outputs = model_qa(**inputs)
answer_start = torch.argmax(outputs.start_logits)
answer_end = torch.argmax(outputs.end_logits) + 1
answer = tokenizer_qa.convert_tokens_to_string(
tokenizer_qa.convert_ids_to_tokens(inputs.input_ids[0][answer_start:answer_end])
)
return {"answer": answer}
@app.post("/risk_prediction")
def risk_prediction(data: RiskPredictionRequest):
import numpy as np
features = np.array(data.features).reshape(1, -1)
pred = diabetes_model.predict(features)
return {"risk_prediction": int(pred[0])}
@app.post("/monitoring_alerts")
def monitoring_alerts(text: str):
summary = summarizer(text, max_length=50, min_length=20, do_sample=False)
return {"summary": summary[0]['summary_text']}
@app.post("/personalized_simulation")
def personalized_simulation(text: str):
inputs = personalized_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
outputs = personalized_model(**inputs)
logits = outputs.logits.detach().numpy()
pred_label = logits.argmax()
return {"predicted_label": int(pred_label)}