akhaliq HF Staff commited on
Commit
610cf3e
·
verified ·
1 Parent(s): 0d4f09b

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ model_id = "NousResearch/Hermes-4-14B"
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_id,
10
+ torch_dtype=torch.float16,
11
+ device_map="auto"
12
+ )
13
+
14
+ def predict(message, history):
15
+ history.append({"role": "user", "content": message})
16
+
17
+ input_ids = tokenizer.apply_chat_template(
18
+ history, add_generation_prompt=True, return_tensors="pt"
19
+ ).to(model.device)
20
+
21
+ with torch.inference_mode():
22
+ output = model.generate(
23
+ input_ids,
24
+ max_new_tokens=512,
25
+ temperature=0.7,
26
+ top_p=0.95,
27
+ top_k=40,
28
+ repetition_penalty=1.1,
29
+ do_sample=True
30
+ )
31
+
32
+ # Remove input tokens from the output
33
+ output_ids = output[0][input_ids.shape[-1]:]
34
+ response = tokenizer.decode(output_ids, skip_special_tokens=True)
35
+
36
+ history.append({"role": "assistant", "content": response})
37
+ return response
38
+
39
+ gr.ChatInterface(
40
+ predict,
41
+ title="Hermes-4-14B Chatbot",
42
+ description="Chat with Hermes-4-14B, a large language model by Nous Research",
43
+ examples=["Hello", "Explain quantum computing in simple terms", "What is the capital of France?"]
44
+ ).launch()