File size: 7,050 Bytes
c02101c
 
7bd6adf
 
 
 
 
 
 
c02101c
 
7bd6adf
 
c02101c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bd6adf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c02101c
 
7bd6adf
c02101c
 
 
7bd6adf
c02101c
 
7bd6adf
c02101c
7bd6adf
 
c02101c
 
7bd6adf
c02101c
 
7bd6adf
c02101c
 
7bd6adf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c02101c
7bd6adf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c02101c
7bd6adf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import gradio as gr
import torch
import time
import json
import uuid
import os
import pytz
from langdetect import detect
from datetime import datetime
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
from pathlib import Path
from huggingface_hub import CommitScheduler

def load_model():
    model_name = "large-traversaal/Phi-4-Hindi"
    max_seq_length = 2048
    load_in_4bit = True
    
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name, 
        max_seq_length=max_seq_length, 
        load_in_4bit=load_in_4bit,
    )
    
    model = FastLanguageModel.get_peft_model(
        model,
        r=16, 
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_alpha=16, 
        lora_dropout=0, 
        bias="none", 
        use_gradient_checkpointing="unsloth",
        random_state=3407, 
        use_rslora=False,
        loftq_config=None,
    )
    FastLanguageModel.for_inference(model)
    return model, tokenizer

# Load model and tokenizer
model, tokenizer = load_model()

# Set up logging folder and CommitScheduler
log_folder = Path("logs")
log_folder.mkdir(parents=True, exist_ok=True)
log_file = log_folder / f"chat_log_{uuid.uuid4()}.json"

token = os.getenv("HF_TOKEN", "")

scheduler = CommitScheduler(
    repo_id="DrishtiSharma/phi-4-unsloth-log-v2",  
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=10,
    token=token
)

# UTC Timezone
timezone = pytz.timezone("UTC")

import langdetect  # For detecting language
from langdetect import detect

def generate_model_response(input_text, temperature, max_new_tokens, top_p):
    """Generates a model response based on user input, handling bidirectional translation."""
    

    # Create prompt for the model
    prompt = f"### INPUT : {input_text} RESPONSE : "
    message = [{"role": "user", "content": prompt}]

    inputs = tokenizer.apply_chat_template(
        message, tokenize=True, add_generation_prompt=True, return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        input_ids=inputs, 
        max_new_tokens=max_new_tokens, 
        use_cache=True, 
        temperature=temperature, 
        top_p=top_p, 
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    processed_response = response.split("### RESPONSE :assistant")[-1].strip()

    return processed_response


def log_data(input_text, output_text, response_time, temperature, max_new_tokens, top_p):
    """Logs responses and metadata."""
    timestamp = datetime.now(timezone).strftime("%Y-%m-%d %H:%M:%S %Z")
    
    log_data = {
        "timestamp": timestamp,
        "input": input_text,
        "output": output_text,
        "response_time": response_time,
        "temperature": temperature,
        "max_new_tokens": max_new_tokens,
        "top_p": top_p
    }
    
    with scheduler.lock:
        with log_file.open("a", encoding="utf-8") as f:
            f.write(json.dumps(log_data, ensure_ascii=False) + "\n")

def process_request(input_text, temperature, max_new_tokens, top_p):
    """Handles request processing, response generation, and logging."""
    start_time = time.time()
    response = generate_model_response(input_text, temperature, max_new_tokens, top_p)
    end_time = time.time()
    response_time = round(end_time - start_time, 2)
    
    log_data(input_text, response, response_time, temperature, max_new_tokens, top_p)
    return response


# Define examples
examples = [
    ["I want to cook Idli. Could you please provide the recipe in Hindi?", "Long Response"],
    ["Plan a trip to Hyderabad in Hindi.", "Long Response"],
    ["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?","Long Response"],
    ["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?", "Short Response"],
    ["पोईरोट आगे कह रहा थाः उस दिन, मसीहीयों, छाया में तापमान 80 डिग्री था। उस दिन काफी गर्मी थी।", "NLI"],
    ["This model was trained on Hindi and English data over qwen-2.5-14b.", "Translation"],
    ["इस मॉडल को हिंदी और अंग्रेजी डेटा पर प्रशिक्षित किया गया था", "Translation"],
    ["how do you play fetch? A) throw the object for the dog to get and bring back to you. B) get the object and bring it back to the dog.", "MCQ"],
]
# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# **Test Space: Phi-4-Hindi**")
    gr.Markdown("### A chatbot that can generate long and short responses, NLI, translations, and MCQs.")

    with gr.Row():
        # LEFT COLUMN: Sliders & Example Selection
        with gr.Column(scale=1):
            gr.Markdown("## **Configuration**")

            temperature = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="Temperature")
            max_new_tokens = gr.Slider(180, 4096, value=1000, step=100, label="Max Tokens")
            top_p = gr.Slider(0.1, 1.0, value=0.1, step=0.1, label="Top_p")

        # RIGHT COLUMN: Input Box & Chat Output
        with gr.Column(scale=2):
            gr.Markdown("## **Chat with Phi-4-Hindi**")

            input_box = gr.Textbox(lines=5, placeholder="Enter your query here...", label="User Input")

            submit_button = gr.Button("Generate Response", variant="primary")

            output_box = gr.Textbox(lines=5, placeholder="Response will appear here...", interactive=False, label="Response")

            submit_button.click(
                fn=process_request,
                inputs=[input_box, temperature, max_new_tokens, top_p],
                outputs=[output_box]
            )

    # Place `gr.Examples` AFTER `input_box` is defined
    gr.Markdown("## **Examples**")
    gr.Examples(
        examples=examples,
        inputs=[input_box],  
        label="Select an example to autofill"
    )

demo.launch()