DrishtiSharma commited on
Commit
c02101c
·
verified ·
1 Parent(s): 4bbae32

Create only_long_response.py

Browse files
Files changed (1) hide show
  1. only_long_response.py +66 -0
only_long_response.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from unsloth import FastLanguageModel
4
+ from transformers import AutoTokenizer
5
+
6
+ def load_model():
7
+ model_name = "large-traversaal/Phi-4-Hindi"
8
+ max_seq_length = 2048
9
+ load_in_4bit = True
10
+
11
+ model, tokenizer = FastLanguageModel.from_pretrained(
12
+ model_name=model_name,
13
+ max_seq_length=max_seq_length,
14
+ load_in_4bit=load_in_4bit,
15
+ )
16
+
17
+ model = FastLanguageModel.get_peft_model(
18
+ model,
19
+ r=16,
20
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
21
+ lora_alpha=16,
22
+ lora_dropout=0,
23
+ bias="none",
24
+ use_gradient_checkpointing="unsloth",
25
+ random_state=3407,
26
+ use_rslora=False,
27
+ loftq_config=None,
28
+ )
29
+ FastLanguageModel.for_inference(model)
30
+ return model, tokenizer
31
+
32
+ # Load model and tokenizer
33
+ model, tokenizer = load_model()
34
+
35
+ def generate_response(input_text):
36
+ prompt = f"### INPUT : {input_text} RESPONSE : "
37
+ message = [{"role": "user", "content": prompt}]
38
+
39
+ inputs = tokenizer.apply_chat_template(
40
+ message, tokenize=True, add_generation_prompt=True, return_tensors="pt"
41
+ ).to("cuda")
42
+
43
+ outputs = model.generate(
44
+ input_ids=inputs,
45
+ max_new_tokens=1000,
46
+ use_cache=True,
47
+ temperature=0.1,
48
+ min_p=0.1,
49
+ pad_token_id=tokenizer.eos_token_id
50
+ )
51
+
52
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
53
+ processed_response = response.split("### RESPONSE :assistant")[-1].strip()
54
+
55
+ return processed_response
56
+
57
+ # Gradio UI
58
+ iface = gr.Interface(
59
+ fn=generate_response,
60
+ inputs=gr.Textbox(lines=5, placeholder="Enter your query here..."),
61
+ outputs="text",
62
+ title="AI Hindi Chatbot",
63
+ description="Enter your prompt and get a response generated by the AI model."
64
+ )
65
+
66
+ iface.launch()