Spaces:

rudranighosh
/

family_law_finetuned

Running

App Files Files Community

rudranighosh commited on May 21

Commit

c5b561d

verified ·

1 Parent(s): f933d7b

Upload app.py

Browse files

Files changed (1) hide show

app/app.py +59 -0

app/app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# -*- coding: utf-8 -*-
+"""app
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/11wiIj_rvhSCb_ULZJmOMUhIwInf_QYiW
+"""
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from peft import PeftModel
+import os
+app = FastAPI()
+# Load the base model first
+base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+adapter_base_path = "./tinyllama-lora-finetuned"
+# This assumes checkpoints are named like 'checkpoint-XXX'
+checkpoints = [d for d in os.listdir(adapter_base_path) if os.path.isdir(os.path.join(adapter_base_path, d)) and d.startswith('checkpoint-')]
+if not checkpoints:
+    raise FileNotFoundError(f"No checkpoints found in {adapter_base_path}")
+# Sort checkpoints to find the latest one (based on checkpoint number)
+latest_checkpoint = sorted(checkpoints, key=lambda x: int(x.split('-')[1]))[-1]
+adapter_path = os.path.join(adapter_base_path, latest_checkpoint) # Point to the latest checkpoint directory
+print(f"Loading adapter from: {adapter_path}") # Print the path being loaded
+tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32)
+# Load the LoRA adapter weights onto the base model from the specific checkpoint path
+model = PeftModel.from_pretrained(base_model, adapter_path)
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+class Query(BaseModel):
+    question: str
+@app.post("/generate/")
+def generate_answer(query: Query):
+    input_text = f"### Question:\n{query.question}\n\n### Answer:\n"
+    inputs = tokenizer(input_text, return_tensors="pt").to(device)
+    outputs = model.generate(**inputs, max_length=512, num_return_sequences=1)
+    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    answer_start_index = decoded_output.find("### Answer:")
+    if answer_start_index != -1:
+        answer = decoded_output[answer_start_index + len("### Answer:"):].strip()
+    else:
+        answer = decoded_output.strip()
+    return {"answer": answer}