ujwal55 commited on
Commit
f110e11
·
verified ·
1 Parent(s): c2390a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -7,7 +7,7 @@ import torch
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
  from llama_cpp import Llama
9
 
10
- def run_test(model_type, repo_id, file_name, test_prompt):
11
  result = {}
12
 
13
  # Disk usage before download
@@ -24,27 +24,38 @@ def run_test(model_type, repo_id, file_name, test_prompt):
24
  model = AutoModelForCausalLM.from_pretrained(repo_id)
25
  inputs = tokenizer(test_prompt, return_tensors="pt")
26
  with torch.no_grad():
27
- outputs = model.generate(**inputs, max_new_tokens=50)
 
 
 
 
 
 
28
  output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
29
  else:
30
  gguf_path = f"./{file_name}"
31
  if not os.path.exists(gguf_path):
32
- # Auto download from Hugging Face model repo if not already
33
  from huggingface_hub import hf_hub_download
34
  hf_hub_download(repo_id=repo_id, filename=file_name, local_dir="./", local_dir_use_symlinks=False)
35
- llm = Llama(model_path=gguf_path, n_ctx=2048)
36
- output_text = llm(test_prompt, max_tokens=128)["choices"][0]["text"]
 
 
 
 
 
 
 
 
 
37
 
38
  except Exception as e:
39
  return f"❌ Error: {str(e)}", "", "", "", ""
40
 
41
  end_time = time.time()
42
 
43
- # Memory and CPU after
44
  mem_end = process.memory_info().rss
45
  cpu_end = process.cpu_percent(interval=0.1)
46
-
47
- # Disk usage after
48
  disk_after = shutil.disk_usage("/")[2]
49
 
50
  result["output"] = output_text
@@ -67,7 +78,13 @@ gr.Interface(
67
  gr.Dropdown(["transformers", "gguf"], label="Model Type"),
68
  gr.Textbox(label="Repo ID (e.g., TheBloke/Mistral-7B-Instruct-v0.1-GGUF)"),
69
  gr.Textbox(label="Model File Name (only for GGUF)", placeholder="mistral.Q4_0.gguf"),
70
- gr.Textbox(label="Test Prompt", value="What is the treatment for lumbar disc herniation?")
 
 
 
 
 
 
71
  ],
72
  outputs=[
73
  gr.Textbox(label="Model Output"),
@@ -77,5 +94,5 @@ gr.Interface(
77
  gr.Textbox(label="Disk Usage (downloaded size)")
78
  ],
79
  title="🧪 Model Benchmark Tester - HF CPU Space",
80
- description="Input repo and model file name to benchmark GGUF or Transformers models."
81
  ).launch()
 
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
  from llama_cpp import Llama
9
 
10
+ def run_test(model_type, repo_id, file_name, test_prompt, max_new_tokens, n_ctx, max_tokens, temperature, top_p, top_k):
11
  result = {}
12
 
13
  # Disk usage before download
 
24
  model = AutoModelForCausalLM.from_pretrained(repo_id)
25
  inputs = tokenizer(test_prompt, return_tensors="pt")
26
  with torch.no_grad():
27
+ outputs = model.generate(
28
+ **inputs,
29
+ max_new_tokens=max_new_tokens,
30
+ temperature=temperature,
31
+ top_p=top_p,
32
+ top_k=top_k
33
+ )
34
  output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
  else:
36
  gguf_path = f"./{file_name}"
37
  if not os.path.exists(gguf_path):
 
38
  from huggingface_hub import hf_hub_download
39
  hf_hub_download(repo_id=repo_id, filename=file_name, local_dir="./", local_dir_use_symlinks=False)
40
+ llm = Llama(
41
+ model_path=gguf_path,
42
+ n_ctx=n_ctx
43
+ )
44
+ output_text = llm(
45
+ test_prompt,
46
+ max_tokens=max_tokens,
47
+ temperature=temperature,
48
+ top_p=top_p,
49
+ top_k=top_k
50
+ )["choices"][0]["text"]
51
 
52
  except Exception as e:
53
  return f"❌ Error: {str(e)}", "", "", "", ""
54
 
55
  end_time = time.time()
56
 
 
57
  mem_end = process.memory_info().rss
58
  cpu_end = process.cpu_percent(interval=0.1)
 
 
59
  disk_after = shutil.disk_usage("/")[2]
60
 
61
  result["output"] = output_text
 
78
  gr.Dropdown(["transformers", "gguf"], label="Model Type"),
79
  gr.Textbox(label="Repo ID (e.g., TheBloke/Mistral-7B-Instruct-v0.1-GGUF)"),
80
  gr.Textbox(label="Model File Name (only for GGUF)", placeholder="mistral.Q4_0.gguf"),
81
+ gr.Textbox(label="Test Prompt", value="What is the treatment for lumbar disc herniation?"),
82
+ gr.Slider(1, 16384, value=50, step=1, label="Max New Tokens"),
83
+ gr.Slider(256, 32768, value=2048, step=64, label="n_ctx (GGUF only)"),
84
+ gr.Slider(1, 16384, value=128, step=1, label="Max Tokens (GGUF only)"),
85
+ gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature"),
86
+ gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="Top-p"),
87
+ gr.Slider(0, 100, value=50, step=1, label="Top-k")
88
  ],
89
  outputs=[
90
  gr.Textbox(label="Model Output"),
 
94
  gr.Textbox(label="Disk Usage (downloaded size)")
95
  ],
96
  title="🧪 Model Benchmark Tester - HF CPU Space",
97
+ description="Input repo and model file name to benchmark GGUF or Transformers models. Adjust generation hyperparameters as needed."
98
  ).launch()