|
import gradio as gr |
|
import torch |
|
import time |
|
import json |
|
import uuid |
|
import os |
|
import pytz |
|
from langdetect import detect |
|
from datetime import datetime |
|
from unsloth import FastLanguageModel |
|
from transformers import AutoTokenizer |
|
from pathlib import Path |
|
from huggingface_hub import CommitScheduler |
|
|
|
def load_model(): |
|
model_name = "large-traversaal/Phi-4-Hindi" |
|
max_seq_length = 2048 |
|
load_in_4bit = True |
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name=model_name, |
|
max_seq_length=max_seq_length, |
|
load_in_4bit=load_in_4bit, |
|
) |
|
|
|
model = FastLanguageModel.get_peft_model( |
|
model, |
|
r=16, |
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], |
|
lora_alpha=16, |
|
lora_dropout=0, |
|
bias="none", |
|
use_gradient_checkpointing="unsloth", |
|
random_state=3407, |
|
use_rslora=False, |
|
loftq_config=None, |
|
) |
|
FastLanguageModel.for_inference(model) |
|
return model, tokenizer |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
log_folder = Path("logs") |
|
log_folder.mkdir(parents=True, exist_ok=True) |
|
log_file = log_folder / f"chat_log_{uuid.uuid4()}.json" |
|
|
|
token = os.getenv("HF_TOKEN", "") |
|
|
|
scheduler = CommitScheduler( |
|
repo_id="DrishtiSharma/phi-4-unsloth-no-task-type-log", |
|
repo_type="dataset", |
|
folder_path=log_folder, |
|
path_in_repo="data", |
|
every=10, |
|
token=token |
|
) |
|
|
|
|
|
timezone = pytz.timezone("UTC") |
|
|
|
import langdetect |
|
from langdetect import detect |
|
|
|
def generate_model_response(input_text, task_type, temperature, max_new_tokens, top_p): |
|
"""Generates a model response based on user input""" |
|
|
|
task_prompts = { |
|
"Long Response": "### LONG RESPONSE ###", |
|
"Short Response": "### सीधा उत्तर ###", |
|
"NLI": "### NLI ###", |
|
"Translation": "### TRANSLATION", |
|
"MCQ": "### MCQ ###", |
|
} |
|
|
|
|
|
if task_type == "Translation": |
|
detected_lang = detect(input_text) |
|
if detected_lang == "hi": |
|
task_suffix = "### TRANSLATION [English] ###" |
|
else: |
|
task_suffix = "### TRANSLATION [Hindi] ###" |
|
else: |
|
task_suffix = task_prompts.get(task_type, "") |
|
|
|
|
|
prompt = f"### INPUT : {input_text} {task_suffix} RESPONSE : " |
|
message = [{"role": "user", "content": prompt}] |
|
|
|
inputs = tokenizer.apply_chat_template( |
|
message, tokenize=True, add_generation_prompt=True, return_tensors="pt" |
|
).to("cuda") |
|
|
|
outputs = model.generate( |
|
input_ids=inputs, |
|
max_new_tokens=max_new_tokens, |
|
use_cache=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
processed_response = response.split("### RESPONSE :assistant")[-1].strip() |
|
|
|
return processed_response |
|
|
|
|
|
def log_data(input_text, task_type, output_text, response_time, temperature, max_new_tokens, top_p): |
|
"""Logs responses and metadata.""" |
|
timestamp = datetime.now(timezone).strftime("%Y-%m-%d %H:%M:%S %Z") |
|
|
|
log_data = { |
|
"timestamp": timestamp, |
|
"task_type": task_type, |
|
"input": input_text, |
|
"output": output_text, |
|
"response_time": response_time, |
|
"temperature": temperature, |
|
"max_new_tokens": max_new_tokens, |
|
"top_p": top_p |
|
} |
|
|
|
with scheduler.lock: |
|
with log_file.open("a", encoding="utf-8") as f: |
|
f.write(json.dumps(log_data, ensure_ascii=False) + "\n") |
|
|
|
def process_request(input_text, task_type, temperature, max_new_tokens, top_p): |
|
"""Handles request processing, response generation, and logging.""" |
|
start_time = time.time() |
|
response = generate_model_response(input_text, task_type, temperature, max_new_tokens, top_p) |
|
end_time = time.time() |
|
response_time = round(end_time - start_time, 2) |
|
|
|
log_data(input_text, task_type, response, response_time, temperature, max_new_tokens, top_p) |
|
return response |
|
|
|
|
|
|
|
examples = [ |
|
["I want to cook Idli. Could you please provide the recipe in Hindi?", "Long Response"], |
|
["Plan a trip to Hyderabad in Hindi.", "Long Response"], |
|
["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?","Long Response"], |
|
["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?", "Short Response"], |
|
["पोईरोट आगे कह रहा थाः उस दिन, मसीहीयों, छाया में तापमान 80 डिग्री था। उस दिन काफी गर्मी थी।", "NLI"], |
|
["This model was trained on Hindi and English data over qwen-2.5-14b.", "Translation"], |
|
["इस मॉडल को हिंदी और अंग्रेजी डेटा पर प्रशिक्षित किया गया था", "Translation"], |
|
["how do you play fetch? A) throw the object for the dog to get and bring back to you. B) get the object and bring it back to the dog.", "MCQ"], |
|
] |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# **Test Space: Phi-4-Hindi**") |
|
gr.Markdown("### Test Space") |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("## **Configure Settings**") |
|
|
|
task_type = gr.Dropdown( |
|
label="Task Type", |
|
choices=["Long Response", "Short Response", "NLI", "Translation", "MCQ"], |
|
value="Long Response" |
|
) |
|
|
|
temperature = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="Temperature") |
|
max_new_tokens = gr.Slider(180, 4096, value=1000, step=100, label="Max Tokens") |
|
top_p = gr.Slider(0.1, 1.0, value=0.1, step=0.1, label="Top_p") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
|
|
|
input_box = gr.Textbox(lines=5, placeholder="Enter your query here...", label="User Input") |
|
|
|
submit_button = gr.Button("Generate Response", variant="primary") |
|
|
|
output_box = gr.Textbox(lines=5, placeholder="Response will appear here...", interactive=False, label="Response") |
|
|
|
submit_button.click( |
|
fn=process_request, |
|
inputs=[input_box, task_type, temperature, max_new_tokens, top_p], |
|
outputs=[output_box] |
|
) |
|
|
|
gr.Markdown("## **Examples**") |
|
gr.Examples( |
|
examples=examples, |
|
inputs=[input_box, task_type], |
|
label="Select an example to autofill" |
|
) |
|
|
|
demo.launch() |