File size: 2,890 Bytes
960d072
02009c3
5fb9513
02009c3
 
 
960d072
02009c3
 
 
960d072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the original model
original_model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-R1-Distill-Llama-8B")
original_tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-R1-Distill-Llama-8B")

# Load the fine-tuned model
fine_tuned_model = AutoModelForCausalLM.from_pretrained("kas1/DeepSeek-R1-Distill-Llama-8B-John1")
fine_tuned_tokenizer = AutoTokenizer.from_pretrained("kas1/DeepSeek-R1-Distill-Llama-8B-John1")

# Function to generate responses from both models
def compare_models(prompt):
    # Generate response from the original model
    original_inputs = original_tokenizer(prompt, return_tensors="pt")
    original_outputs = original_model.generate(**original_inputs)
    original_response = original_tokenizer.decode(original_outputs[0], skip_special_tokens=True)

    # Generate response from the fine-tuned model
    fine_tuned_inputs = fine_tuned_tokenizer(prompt, return_tensors="pt")
    fine_tuned_outputs = fine_tuned_model.generate(**fine_tuned_inputs)
    fine_tuned_response = fine_tuned_tokenizer.decode(fine_tuned_outputs[0], skip_special_tokens=True)

    return original_response, fine_tuned_response

# Function to handle batch testing with a JSON file
def batch_test(json_file):
    import json
    results = []
    data = json.load(json_file)
    for item in data:
        question = item.get("question", "")
        expected_answer = item.get("answer", "")
        # Generate responses from both models
        original_response, fine_tuned_response = compare_models(question)
        results.append({
            "question": question,
            "expected_answer": expected_answer,
            "original_model_response": original_response,
            "fine_tuned_model_response": fine_tuned_response
        })
    return results

# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Compare Two Models Side by Side")
    
    # Single prompt comparison
    with gr.Tab("Single Prompt"):
        prompt_input = gr.Textbox(label="Enter a Prompt/Question")
        compare_button = gr.Button("Compare Responses")
        original_output = gr.Textbox(label="Original Model Response")
        fine_tuned_output = gr.Textbox(label="Fine-Tuned Model Response")
        compare_button.click(
            compare_models,
            inputs=prompt_input,
            outputs=[original_output, fine_tuned_output]
        )

    # Batch testing with a JSON file
    with gr.Tab("Batch Testing"):
        json_file_input = gr.File(label="Upload JSON File with Questions")
        batch_results = gr.JSON(label="Comparison Results")
        batch_button = gr.Button("Run Batch Test")
        batch_button.click(
            batch_test,
            inputs=json_file_input,
            outputs=batch_results
        )

# Launch the app
demo.launch()