|
import gradio as gr |
|
import torch |
|
import torch.nn.functional as F |
|
from transformers import AutoTokenizer |
|
from unsloth import FastLanguageModel |
|
|
|
|
|
max_seq_length = 2048 |
|
load_in_4bit = True |
|
name = "large-traversaal/Phi-4-Hindi" |
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name=name, |
|
max_seq_length=max_seq_length, |
|
load_in_4bit=load_in_4bit, |
|
) |
|
|
|
model = FastLanguageModel.get_peft_model( |
|
model, |
|
r=16, |
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], |
|
lora_alpha=16, |
|
lora_dropout=0, |
|
bias="none", |
|
use_gradient_checkpointing="unsloth", |
|
random_state=3407, |
|
use_rslora=False, |
|
loftq_config=None, |
|
) |
|
FastLanguageModel.for_inference(model) |
|
|
|
def generate_response(task, input_text, temperature, top_p, max_tokens): |
|
prompt = f"### INPUT : {input_text} RESPONSE : " |
|
message = [{"role": "user", "content": prompt}] |
|
inputs = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda") |
|
|
|
outputs = model.generate( |
|
input_ids=inputs, |
|
max_new_tokens=max_tokens, |
|
use_cache=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
pad_token_id=tokenizer.eos_token_id, |
|
) |
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
processed_response = response.split("### RESPONSE :assistant")[-1].strip() |
|
return processed_response |
|
|
|
|
|
def gradio_ui(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Test Space: Chat with Phi-4-Hindi") |
|
with gr.Row(): |
|
task = gr.Dropdown([ |
|
"Long Response", "Short Response", "NLI", "Translation", "MCQ", "Cross-Lingual" |
|
], label="Select Task") |
|
input_text = gr.Textbox(label="Input Text") |
|
|
|
with gr.Row(): |
|
temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature") |
|
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top P") |
|
max_tokens = gr.Slider(50, 800, value=200, step=50, label="Max Tokens") |
|
|
|
output_text = gr.Textbox(label="Generated Response") |
|
btn = gr.Button("Generate") |
|
btn.click(generate_response, inputs=[task, input_text, temperature, top_p, max_tokens], outputs=output_text) |
|
|
|
return demo |
|
|
|
|
|
demo = gradio_ui() |
|
demo.launch() |