DrishtiSharma commited on
Commit
c583e69
·
verified ·
1 Parent(s): 83eb24a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -28
app.py CHANGED
@@ -1,8 +1,18 @@
1
  import gradio as gr
2
  import torch
3
- import torch.nn.functional as F
 
 
 
 
 
4
  from transformers import AutoTokenizer
5
  from unsloth import FastLanguageModel
 
 
 
 
 
6
 
7
  # Model Setup
8
  max_seq_length = 2048
@@ -28,14 +38,45 @@ model = FastLanguageModel.get_peft_model(
28
  )
29
  FastLanguageModel.for_inference(model)
30
 
31
- def generate_response(task, input_text, temperature, top_p, max_tokens):
32
- prompt = f"### INPUT : {input_text} RESPONSE : "
33
- message = [{"role": "user", "content": prompt}]
34
- inputs = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  outputs = model.generate(
37
  input_ids=inputs,
38
- max_new_tokens=max_tokens,
39
  use_cache=True,
40
  temperature=temperature,
41
  top_p=top_p,
@@ -43,30 +84,50 @@ def generate_response(task, input_text, temperature, top_p, max_tokens):
43
  )
44
 
45
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
- processed_response = response.split("### RESPONSE :assistant")[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  return processed_response
48
 
49
- # Gradio Interface
50
- def gradio_ui():
51
- with gr.Blocks() as demo:
52
- gr.Markdown("## Test Space: Chat with Phi-4-Hindi")
53
- with gr.Row():
54
- task = gr.Dropdown([
55
- "Long Response", "Short Response", "NLI", "Translation", "MCQ", "Cross-Lingual"
56
- ], label="Select Task")
57
- input_text = gr.Textbox(label="Input Text")
58
-
59
- with gr.Row():
60
- temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
61
- top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top P")
62
- max_tokens = gr.Slider(50, 800, value=200, step=50, label="Max Tokens")
63
-
64
- output_text = gr.Textbox(label="Generated Response")
65
- btn = gr.Button("Generate")
66
- btn.click(generate_response, inputs=[task, input_text, temperature, top_p, max_tokens], outputs=output_text)
67
 
68
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- # Launch Gradio App
71
- demo = gradio_ui()
72
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ import time
4
+ import json
5
+ import uuid
6
+ import os
7
+ import pytz
8
+ from datetime import datetime
9
  from transformers import AutoTokenizer
10
  from unsloth import FastLanguageModel
11
+ from pathlib import Path
12
+ from huggingface_hub import CommitScheduler
13
+
14
+ # Load HF token from the environment
15
+ token = os.environ["HF_TOKEN"]
16
 
17
  # Model Setup
18
  max_seq_length = 2048
 
38
  )
39
  FastLanguageModel.for_inference(model)
40
 
41
+ # Task-Specific Prompt Mapping
42
+ option_mapping = {
43
+ "translation": "### TRANSLATION ###",
44
+ "mcq": "### MCQ ###",
45
+ "nli": "### NLI ###",
46
+ "summarization": "### SUMMARIZATION ###",
47
+ "long response": "### LONG RESPONSE ###",
48
+ "direct response": "### DIRECT RESPONSE ###",
49
+ "paraphrase": "### PARAPHRASE ###",
50
+ "code": "### CODE ###",
51
+ }
52
+
53
+ # Set up logging folder and CommitScheduler
54
+ log_folder = Path("logs")
55
+ log_folder.mkdir(parents=True, exist_ok=True)
56
+ log_file = log_folder / f"chat_log_{uuid.uuid4()}.json"
57
+
58
+ scheduler = CommitScheduler(
59
+ repo_id="DrishtiSharma/phi-4-unsloth-logs",
60
+ repo_type="dataset",
61
+ folder_path=log_folder,
62
+ path_in_repo="data",
63
+ every=10,
64
+ token=token
65
+ )
66
+
67
+ # Fixed timezone
68
+ timezone = pytz.timezone("UTC")
69
+
70
+ def generate_response(message, temperature, max_new_tokens, top_p, task):
71
+ append_text = option_mapping.get(task, "")
72
+ prompt = f"### INPUT : {message} {append_text} RESPONSE : "
73
+ print(f"Prompt: {prompt}")
74
+ start_time = time.time()
75
+ inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
76
 
77
  outputs = model.generate(
78
  input_ids=inputs,
79
+ max_new_tokens=max_new_tokens,
80
  use_cache=True,
81
  temperature=temperature,
82
  top_p=top_p,
 
84
  )
85
 
86
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
87
+ processed_response = response.split("### RESPONSE :")[-1].strip()
88
+ end_time = time.time()
89
+
90
+ response_time = round(end_time - start_time, 2)
91
+ timestamp = datetime.now(timezone).strftime("%Y-%m-%d %H:%M:%S %Z")
92
+ log_data = {
93
+ "timestamp": timestamp,
94
+ "input": message,
95
+ "output": processed_response,
96
+ "response_time": response_time,
97
+ "temperature": temperature,
98
+ "max_tokens": max_new_tokens,
99
+ "top_p": top_p
100
+ }
101
+
102
+ with scheduler.lock:
103
+ with log_file.open("a") as f:
104
+ f.write(json.dumps(log_data) + "\n")
105
+
106
  return processed_response
107
 
108
+ # Gradio UI
109
+ with gr.Blocks() as demo:
110
+ gr.Markdown("## Chat with Phi-4-Hindi")
111
+
112
+ task_dropdown = gr.Dropdown(
113
+ choices=list(option_mapping.keys()),
114
+ value="long response",
115
+ label="Select Task"
116
+ )
117
+ message_input = gr.Textbox(label="Enter your message")
 
 
 
 
 
 
 
 
118
 
119
+ with gr.Row():
120
+ temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
121
+ top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top P")
122
+ max_tokens_slider = gr.Slider(50, 800, value=200, step=50, label="Max Tokens")
123
+
124
+ output_box = gr.Textbox(label="Generated Response")
125
+ generate_btn = gr.Button("Generate")
126
+
127
+ generate_btn.click(
128
+ generate_response,
129
+ inputs=[message_input, temperature_slider, max_tokens_slider, top_p_slider, task_dropdown],
130
+ outputs=output_box
131
+ )
132
 
 
 
133
  demo.launch()