ujwal55 commited on
Commit
d659432
·
verified ·
1 Parent(s): db5dc60

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import gradio as gr
3
+ import os
4
+ import psutil
5
+ import shutil
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+ from llama_cpp import Llama
9
+
10
+ def run_test(model_type, repo_id, file_name, test_prompt):
11
+ result = {}
12
+
13
+ # Disk usage before download
14
+ disk_before = shutil.disk_usage("/")[2]
15
+
16
+ start_time = time.time()
17
+ process = psutil.Process(os.getpid())
18
+ cpu_start = process.cpu_percent(interval=0.1)
19
+ mem_start = process.memory_info().rss
20
+
21
+ try:
22
+ if model_type == "transformers":
23
+ tokenizer = AutoTokenizer.from_pretrained(repo_id)
24
+ model = AutoModelForCausalLM.from_pretrained(repo_id)
25
+ inputs = tokenizer(test_prompt, return_tensors="pt")
26
+ with torch.no_grad():
27
+ outputs = model.generate(**inputs, max_new_tokens=50)
28
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
29
+ else:
30
+ gguf_path = f"./{file_name}"
31
+ if not os.path.exists(gguf_path):
32
+ # Auto download from Hugging Face model repo if not already
33
+ from huggingface_hub import hf_hub_download
34
+ hf_hub_download(repo_id=repo_id, filename=file_name, local_dir="./", local_dir_use_symlinks=False)
35
+ llm = Llama(model_path=gguf_path, n_ctx=2048)
36
+ output_text = llm(test_prompt, max_tokens=128)["choices"][0]["text"]
37
+
38
+ except Exception as e:
39
+ return f"❌ Error: {str(e)}", "", "", "", ""
40
+
41
+ end_time = time.time()
42
+
43
+ # Memory and CPU after
44
+ mem_end = process.memory_info().rss
45
+ cpu_end = process.cpu_percent(interval=0.1)
46
+
47
+ # Disk usage after
48
+ disk_after = shutil.disk_usage("/")[2]
49
+
50
+ result["output"] = output_text
51
+ result["inference_time"] = round(end_time - start_time, 2)
52
+ result["memory_used_MB"] = round((mem_end - mem_start) / (1024 * 1024), 2)
53
+ result["cpu_percent"] = round(cpu_end - cpu_start, 2)
54
+ result["disk_used_MB"] = round((disk_before - disk_after) / (1024 * 1024), 2)
55
+
56
+ return (
57
+ result["output"],
58
+ f"{result['inference_time']} sec",
59
+ f"{result['cpu_percent']}%",
60
+ f"{result['memory_used_MB']} MB",
61
+ f"{result['disk_used_MB']} MB"
62
+ )
63
+
64
+ gr.Interface(
65
+ fn=run_test,
66
+ inputs=[
67
+ gr.Dropdown(["transformers", "gguf"], label="Model Type"),
68
+ gr.Textbox(label="Repo ID (e.g., TheBloke/Mistral-7B-Instruct-v0.1-GGUF)"),
69
+ gr.Textbox(label="Model File Name (only for GGUF)", placeholder="mistral.Q4_0.gguf"),
70
+ gr.Textbox(label="Test Prompt", value="What is the treatment for lumbar disc herniation?")
71
+ ],
72
+ outputs=[
73
+ gr.Textbox(label="Model Output"),
74
+ gr.Textbox(label="Inference Time"),
75
+ gr.Textbox(label="CPU Usage"),
76
+ gr.Textbox(label="RAM Usage"),
77
+ gr.Textbox(label="Disk Usage (downloaded size)")
78
+ ],
79
+ title="🧪 Model Benchmark Tester - HF CPU Space",
80
+ description="Input repo and model file name to benchmark GGUF or Transformers models."
81
+ ).launch()