Spaces:

ujwal55
/

Model-Benchmarking_tool

Sleeping

App Files Files Community

ujwal55 commited on 4 days ago

Commit

f110e11

verified ·

1 Parent(s): c2390a9

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -10

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from llama_cpp import Llama
-def run_test(model_type, repo_id, file_name, test_prompt):
     result = {}
     # Disk usage before download
@@ -24,27 +24,38 @@ def run_test(model_type, repo_id, file_name, test_prompt):
             model = AutoModelForCausalLM.from_pretrained(repo_id)
             inputs = tokenizer(test_prompt, return_tensors="pt")
             with torch.no_grad():
-                outputs = model.generate(**inputs, max_new_tokens=50)
             output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         else:
             gguf_path = f"./{file_name}"
             if not os.path.exists(gguf_path):
-                # Auto download from Hugging Face model repo if not already
                 from huggingface_hub import hf_hub_download
                 hf_hub_download(repo_id=repo_id, filename=file_name, local_dir="./", local_dir_use_symlinks=False)
-            llm = Llama(model_path=gguf_path, n_ctx=2048)
-            output_text = llm(test_prompt, max_tokens=128)["choices"][0]["text"]
     except Exception as e:
         return f"❌ Error: {str(e)}", "", "", "", ""
     end_time = time.time()
-    # Memory and CPU after
     mem_end = process.memory_info().rss
     cpu_end = process.cpu_percent(interval=0.1)
-    # Disk usage after
     disk_after = shutil.disk_usage("/")[2]
     result["output"] = output_text
@@ -67,7 +78,13 @@ gr.Interface(
         gr.Dropdown(["transformers", "gguf"], label="Model Type"),
         gr.Textbox(label="Repo ID (e.g., TheBloke/Mistral-7B-Instruct-v0.1-GGUF)"),
         gr.Textbox(label="Model File Name (only for GGUF)", placeholder="mistral.Q4_0.gguf"),
-        gr.Textbox(label="Test Prompt", value="What is the treatment for lumbar disc herniation?")
     ],
     outputs=[
         gr.Textbox(label="Model Output"),
@@ -77,5 +94,5 @@ gr.Interface(
         gr.Textbox(label="Disk Usage (downloaded size)")
     ],
     title="🧪 Model Benchmark Tester - HF CPU Space",
-    description="Input repo and model file name to benchmark GGUF or Transformers models."
 ).launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from llama_cpp import Llama
+def run_test(model_type, repo_id, file_name, test_prompt, max_new_tokens, n_ctx, max_tokens, temperature, top_p, top_k):
     result = {}
     # Disk usage before download
             model = AutoModelForCausalLM.from_pretrained(repo_id)
             inputs = tokenizer(test_prompt, return_tensors="pt")
             with torch.no_grad():
+                outputs = model.generate(
+                    **inputs,
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    top_p=top_p,
+                    top_k=top_k
+                )
             output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         else:
             gguf_path = f"./{file_name}"
             if not os.path.exists(gguf_path):
                 from huggingface_hub import hf_hub_download
                 hf_hub_download(repo_id=repo_id, filename=file_name, local_dir="./", local_dir_use_symlinks=False)
+            llm = Llama(
+                model_path=gguf_path,
+                n_ctx=n_ctx
+            )
+            output_text = llm(
+                test_prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k
+            )["choices"][0]["text"]
     except Exception as e:
         return f"❌ Error: {str(e)}", "", "", "", ""
     end_time = time.time()
     mem_end = process.memory_info().rss
     cpu_end = process.cpu_percent(interval=0.1)
     disk_after = shutil.disk_usage("/")[2]
     result["output"] = output_text
         gr.Dropdown(["transformers", "gguf"], label="Model Type"),
         gr.Textbox(label="Repo ID (e.g., TheBloke/Mistral-7B-Instruct-v0.1-GGUF)"),
         gr.Textbox(label="Model File Name (only for GGUF)", placeholder="mistral.Q4_0.gguf"),
+        gr.Textbox(label="Test Prompt", value="What is the treatment for lumbar disc herniation?"),
+        gr.Slider(1, 16384, value=50, step=1, label="Max New Tokens"),
+        gr.Slider(256, 32768, value=2048, step=64, label="n_ctx (GGUF only)"),
+        gr.Slider(1, 16384, value=128, step=1, label="Max Tokens (GGUF only)"),
+        gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature"),
+        gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="Top-p"),
+        gr.Slider(0, 100, value=50, step=1, label="Top-k")
     ],
     outputs=[
         gr.Textbox(label="Model Output"),
         gr.Textbox(label="Disk Usage (downloaded size)")
     ],
     title="🧪 Model Benchmark Tester - HF CPU Space",
+    description="Input repo and model file name to benchmark GGUF or Transformers models. Adjust generation hyperparameters as needed."
 ).launch()