DrishtiSharma commited on
Commit
83eb24a
·
verified ·
1 Parent(s): 706bc23

Create lab/app.py

Browse files
Files changed (1) hide show
  1. lab/app.py +72 -0
lab/app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn.functional as F
4
+ from transformers import AutoTokenizer
5
+ from unsloth import FastLanguageModel
6
+
7
+ # Model Setup
8
+ max_seq_length = 2048
9
+ load_in_4bit = True
10
+ name = "large-traversaal/Phi-4-Hindi"
11
+ model, tokenizer = FastLanguageModel.from_pretrained(
12
+ model_name=name,
13
+ max_seq_length=max_seq_length,
14
+ load_in_4bit=load_in_4bit,
15
+ )
16
+
17
+ model = FastLanguageModel.get_peft_model(
18
+ model,
19
+ r=16,
20
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
21
+ lora_alpha=16,
22
+ lora_dropout=0,
23
+ bias="none",
24
+ use_gradient_checkpointing="unsloth",
25
+ random_state=3407,
26
+ use_rslora=False,
27
+ loftq_config=None,
28
+ )
29
+ FastLanguageModel.for_inference(model)
30
+
31
+ def generate_response(task, input_text, temperature, top_p, max_tokens):
32
+ prompt = f"### INPUT : {input_text} RESPONSE : "
33
+ message = [{"role": "user", "content": prompt}]
34
+ inputs = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
35
+
36
+ outputs = model.generate(
37
+ input_ids=inputs,
38
+ max_new_tokens=max_tokens,
39
+ use_cache=True,
40
+ temperature=temperature,
41
+ top_p=top_p,
42
+ pad_token_id=tokenizer.eos_token_id,
43
+ )
44
+
45
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+ processed_response = response.split("### RESPONSE :assistant")[-1].strip()
47
+ return processed_response
48
+
49
+ # Gradio Interface
50
+ def gradio_ui():
51
+ with gr.Blocks() as demo:
52
+ gr.Markdown("## Test Space: Chat with Phi-4-Hindi")
53
+ with gr.Row():
54
+ task = gr.Dropdown([
55
+ "Long Response", "Short Response", "NLI", "Translation", "MCQ", "Cross-Lingual"
56
+ ], label="Select Task")
57
+ input_text = gr.Textbox(label="Input Text")
58
+
59
+ with gr.Row():
60
+ temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
61
+ top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.1, label="Top P")
62
+ max_tokens = gr.Slider(50, 800, value=200, step=50, label="Max Tokens")
63
+
64
+ output_text = gr.Textbox(label="Generated Response")
65
+ btn = gr.Button("Generate")
66
+ btn.click(generate_response, inputs=[task, input_text, temperature, top_p, max_tokens], outputs=output_text)
67
+
68
+ return demo
69
+
70
+ # Launch Gradio App
71
+ demo = gradio_ui()
72
+ demo.launch()