from fastapi import FastAPI, HTTPException from pydantic import BaseModel from utils import retrive_context, generate_response # Initialize FastAPI app = FastAPI() class QueryRequest(BaseModel): # Asked query should be in string format query: str class QueryResponse(BaseModel): # Response should be in string format response: str @app.post("/infer", response_model=QueryResponse) def infer(query_request: QueryRequest): query = query_request.query context = retrive_context(query) if context == 500: raise HTTPException(status_code=500, detail="Error retrieving context") response = generate_response(query, context) if response == 500: raise HTTPException(status_code=500, detail="Error generating response") return QueryResponse(response=response) # Root endpoint for testing @app.get("/") def read_root(): return {"message": "Inference API is running"} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")