DrishtiSharma's picture
Create lab/app.py
83eb24a verified
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from unsloth import FastLanguageModel
# Model Setup
max_seq_length = 2048
load_in_4bit = True
name = "large-traversaal/Phi-4-Hindi"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=name,
max_seq_length=max_seq_length,
load_in_4bit=load_in_4bit,
)
model = FastLanguageModel.get_peft_model(
model,
r=16,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_alpha=16,
lora_dropout=0,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=3407,
use_rslora=False,
loftq_config=None,
)
FastLanguageModel.for_inference(model)
def generate_response(task, input_text, temperature, top_p, max_tokens):
prompt = f"### INPUT : {input_text} RESPONSE : "
message = [{"role": "user", "content": prompt}]
inputs = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
outputs = model.generate(
input_ids=inputs,
max_new_tokens=max_tokens,
use_cache=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
processed_response = response.split("### RESPONSE :assistant")[-1].strip()
return processed_response
# Gradio Interface
def gradio_ui():
with gr.Blocks() as demo:
gr.Markdown("## Test Space: Chat with Phi-4-Hindi")
with gr.Row():
task = gr.Dropdown([
"Long Response", "Short Response", "NLI", "Translation", "MCQ", "Cross-Lingual"
], label="Select Task")
input_text = gr.Textbox(label="Input Text")
with gr.Row():
temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top P")
max_tokens = gr.Slider(50, 800, value=200, step=50, label="Max Tokens")
output_text = gr.Textbox(label="Generated Response")
btn = gr.Button("Generate")
btn.click(generate_response, inputs=[task, input_text, temperature, top_p, max_tokens], outputs=output_text)
return demo
# Launch Gradio App
demo = gradio_ui()
demo.launch()