Spaces:

DrishtiSharma
/

phi-4-unsloth-test-space-v2

Sleeping

App Files Files Community

phi-4-unsloth-test-space-v2 / only_long_response.py

DrishtiSharma

Update only_long_response.py

7bd6adf verified 17 days ago

raw

history blame contribute delete

7.05 kB

	import gradio as gr
	import torch
	import time
	import json
	import uuid
	import os
	import pytz
	from langdetect import detect
	from datetime import datetime
	from unsloth import FastLanguageModel
	from transformers import AutoTokenizer
	from pathlib import Path
	from huggingface_hub import CommitScheduler

	def load_model():
	model_name = "large-traversaal/Phi-4-Hindi"
	max_seq_length = 2048
	load_in_4bit = True

	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=model_name,
	max_seq_length=max_seq_length,
	load_in_4bit=load_in_4bit,
	)

	model = FastLanguageModel.get_peft_model(
	model,
	r=16,
	target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
	lora_alpha=16,
	lora_dropout=0,
	bias="none",
	use_gradient_checkpointing="unsloth",
	random_state=3407,
	use_rslora=False,
	loftq_config=None,
	)
	FastLanguageModel.for_inference(model)
	return model, tokenizer

	# Load model and tokenizer
	model, tokenizer = load_model()

	# Set up logging folder and CommitScheduler
	log_folder = Path("logs")
	log_folder.mkdir(parents=True, exist_ok=True)
	log_file = log_folder / f"chat_log_{uuid.uuid4()}.json"

	token = os.getenv("HF_TOKEN", "")

	scheduler = CommitScheduler(
	repo_id="DrishtiSharma/phi-4-unsloth-log-v2",
	repo_type="dataset",
	folder_path=log_folder,
	path_in_repo="data",
	every=10,
	token=token
	)

	# UTC Timezone
	timezone = pytz.timezone("UTC")

	import langdetect # For detecting language
	from langdetect import detect

	def generate_model_response(input_text, temperature, max_new_tokens, top_p):
	"""Generates a model response based on user input, handling bidirectional translation."""


	# Create prompt for the model
	prompt = f"### INPUT : {input_text} RESPONSE : "
	message = [{"role": "user", "content": prompt}]

	inputs = tokenizer.apply_chat_template(
	message, tokenize=True, add_generation_prompt=True, return_tensors="pt"
	).to("cuda")

	outputs = model.generate(
	input_ids=inputs,
	max_new_tokens=max_new_tokens,
	use_cache=True,
	temperature=temperature,
	top_p=top_p,
	pad_token_id=tokenizer.eos_token_id
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	processed_response = response.split("### RESPONSE :assistant")[-1].strip()

	return processed_response


	def log_data(input_text, output_text, response_time, temperature, max_new_tokens, top_p):
	"""Logs responses and metadata."""
	timestamp = datetime.now(timezone).strftime("%Y-%m-%d %H:%M:%S %Z")

	log_data = {
	"timestamp": timestamp,
	"input": input_text,
	"output": output_text,
	"response_time": response_time,
	"temperature": temperature,
	"max_new_tokens": max_new_tokens,
	"top_p": top_p
	}

	with scheduler.lock:
	with log_file.open("a", encoding="utf-8") as f:
	f.write(json.dumps(log_data, ensure_ascii=False) + "\n")

	def process_request(input_text, temperature, max_new_tokens, top_p):
	"""Handles request processing, response generation, and logging."""
	start_time = time.time()
	response = generate_model_response(input_text, temperature, max_new_tokens, top_p)
	end_time = time.time()
	response_time = round(end_time - start_time, 2)

	log_data(input_text, response, response_time, temperature, max_new_tokens, top_p)
	return response


	# Define examples
	examples = [
	["I want to cook Idli. Could you please provide the recipe in Hindi?", "Long Response"],
	["Plan a trip to Hyderabad in Hindi.", "Long Response"],
	["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?","Long Response"],
	["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?", "Short Response"],
	["पोईरोट आगे कह रहा थाः उस दिन, मसीहीयों, छाया में तापमान 80 डिग्री था। उस दिन काफी गर्मी थी।", "NLI"],
	["This model was trained on Hindi and English data over qwen-2.5-14b.", "Translation"],
	["इस मॉडल को हिंदी और अंग्रेजी डेटा पर प्रशिक्षित किया गया था", "Translation"],
	["how do you play fetch? A) throw the object for the dog to get and bring back to you. B) get the object and bring it back to the dog.", "MCQ"],
	]
	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# Test Space: Phi-4-Hindi")
	gr.Markdown("### A chatbot that can generate long and short responses, NLI, translations, and MCQs.")

	with gr.Row():
	# LEFT COLUMN: Sliders & Example Selection
	with gr.Column(scale=1):
	gr.Markdown("## Configuration")

	temperature = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="Temperature")
	max_new_tokens = gr.Slider(180, 4096, value=1000, step=100, label="Max Tokens")
	top_p = gr.Slider(0.1, 1.0, value=0.1, step=0.1, label="Top_p")

	# RIGHT COLUMN: Input Box & Chat Output
	with gr.Column(scale=2):
	gr.Markdown("## Chat with Phi-4-Hindi")

	input_box = gr.Textbox(lines=5, placeholder="Enter your query here...", label="User Input")

	submit_button = gr.Button("Generate Response", variant="primary")

	output_box = gr.Textbox(lines=5, placeholder="Response will appear here...", interactive=False, label="Response")

	submit_button.click(
	fn=process_request,
	inputs=[input_box, temperature, max_new_tokens, top_p],
	outputs=[output_box]
	)

	# Place `gr.Examples` AFTER `input_box` is defined
	gr.Markdown("## Examples")
	gr.Examples(
	examples=examples,
	inputs=[input_box],
	label="Select an example to autofill"
	)

	demo.launch()