|
import gradio as gr |
|
import torch |
|
import time |
|
import json |
|
import uuid |
|
import os |
|
import pytz |
|
from datetime import datetime |
|
from transformers import AutoTokenizer |
|
from unsloth import FastLanguageModel |
|
from pathlib import Path |
|
from huggingface_hub import CommitScheduler |
|
|
|
|
|
token = os.environ["HF_TOKEN"] |
|
|
|
|
|
max_seq_length = 2048 |
|
load_in_4bit = True |
|
name = "large-traversaal/Phi-4-Hindi" |
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name=name, |
|
max_seq_length=max_seq_length, |
|
load_in_4bit=load_in_4bit, |
|
) |
|
|
|
model = FastLanguageModel.get_peft_model( |
|
model, |
|
r=16, |
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], |
|
lora_alpha=16, |
|
lora_dropout=0, |
|
bias="none", |
|
use_gradient_checkpointing="unsloth", |
|
random_state=3407, |
|
use_rslora=False, |
|
loftq_config=None, |
|
) |
|
FastLanguageModel.for_inference(model) |
|
|
|
|
|
option_mapping = { |
|
"translation": "### TRANSLATION ###", |
|
"mcq": "### MCQ ###", |
|
"nli": "### NLI ###", |
|
"summarization": "### SUMMARIZATION ###", |
|
"long response": "### LONG RESPONSE ###", |
|
"direct response": "### DIRECT RESPONSE ###", |
|
"paraphrase": "### PARAPHRASE ###", |
|
"code": "### CODE ###", |
|
} |
|
|
|
|
|
log_folder = Path("logs") |
|
log_folder.mkdir(parents=True, exist_ok=True) |
|
log_file = log_folder / f"chat_log_{uuid.uuid4()}.json" |
|
|
|
scheduler = CommitScheduler( |
|
repo_id="DrishtiSharma/phi-4-unsloth-logs", |
|
repo_type="dataset", |
|
folder_path=log_folder, |
|
path_in_repo="data", |
|
every=10, |
|
token=token |
|
) |
|
|
|
|
|
timezone = pytz.timezone("UTC") |
|
|
|
def generate_response(message, temperature, max_new_tokens, top_p, task): |
|
append_text = option_mapping.get(task, "") |
|
prompt = f"### INPUT : {message} {append_text} RESPONSE : " |
|
print(f"Prompt: {prompt}") |
|
start_time = time.time() |
|
inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device) |
|
|
|
outputs = model.generate( |
|
input_ids=inputs, |
|
max_new_tokens=max_new_tokens, |
|
use_cache=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
pad_token_id=tokenizer.eos_token_id, |
|
) |
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
processed_response = response.split("### RESPONSE :")[-1].strip() |
|
end_time = time.time() |
|
|
|
response_time = round(end_time - start_time, 2) |
|
timestamp = datetime.now(timezone).strftime("%Y-%m-%d %H:%M:%S %Z") |
|
log_data = { |
|
"timestamp": timestamp, |
|
"input": message, |
|
"output": processed_response, |
|
"response_time": response_time, |
|
"temperature": temperature, |
|
"max_tokens": max_new_tokens, |
|
"top_p": top_p |
|
} |
|
|
|
with scheduler.lock: |
|
with log_file.open("a") as f: |
|
f.write(json.dumps(log_data) + "\n") |
|
|
|
return processed_response |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Chat with Phi-4-Hindi") |
|
|
|
task_dropdown = gr.Dropdown( |
|
choices=list(option_mapping.keys()), |
|
value="long response", |
|
label="Select Task" |
|
) |
|
message_input = gr.Textbox(label="Enter your message") |
|
|
|
with gr.Row(): |
|
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature") |
|
top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top P") |
|
max_tokens_slider = gr.Slider(50, 800, value=200, step=50, label="Max Tokens") |
|
|
|
output_box = gr.Textbox(label="Generated Response") |
|
generate_btn = gr.Button("Generate") |
|
|
|
generate_btn.click( |
|
generate_response, |
|
inputs=[message_input, temperature_slider, max_tokens_slider, top_p_slider, task_dropdown], |
|
outputs=output_box |
|
) |
|
|
|
demo.launch() |