kas1 commited on
Commit
23afbfb
·
1 Parent(s): a5a84c2

Remove quantization_config entirely to avoid bitsandbytes dependency2

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -1,24 +1,36 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import accelerate
4
 
5
  print("Accelerate version:", accelerate.__version__)
6
 
7
- from transformers import BitsAndBytesConfig
 
 
 
 
 
 
 
8
 
9
- # Load the original model
10
  original_model = AutoModelForCausalLM.from_pretrained(
11
  "unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit",
12
- load_in_4bit=False, # Disable 4-bit quantization
13
- # Remove quantization_config entirely
14
  )
15
  original_tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit")
16
 
17
- # Load the fine-tuned model
 
 
 
 
 
 
 
 
18
  fine_tuned_model = AutoModelForCausalLM.from_pretrained(
19
  "kas1/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit-John1",
20
- load_in_4bit=False, # Disable 4-bit quantization
21
- # Remove quantization_config entirely
22
  )
23
  fine_tuned_tokenizer = AutoTokenizer.from_pretrained("kas1/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit-John1")
24
 
@@ -41,21 +53,17 @@ def batch_test(json_file):
41
  import json
42
  results = []
43
  data = json.load(json_file)
44
-
45
  for item in data:
46
  question = item.get("question", "")
47
  expected_answer = item.get("answer", "")
48
-
49
  # Generate responses from both models
50
  original_response, fine_tuned_response = compare_models(question)
51
-
52
  results.append({
53
  "question": question,
54
  "expected_answer": expected_answer,
55
  "original_model_response": original_response,
56
  "fine_tuned_model_response": fine_tuned_response
57
  })
58
-
59
  return results
60
 
61
  # Define the Gradio interface
@@ -68,7 +76,6 @@ with gr.Blocks() as demo:
68
  compare_button = gr.Button("Compare Responses")
69
  original_output = gr.Textbox(label="Original Model Response")
70
  fine_tuned_output = gr.Textbox(label="Fine-Tuned Model Response")
71
-
72
  compare_button.click(
73
  compare_models,
74
  inputs=prompt_input,
@@ -80,7 +87,6 @@ with gr.Blocks() as demo:
80
  json_file_input = gr.File(label="Upload JSON File with Questions")
81
  batch_results = gr.JSON(label="Comparison Results")
82
  batch_button = gr.Button("Run Batch Test")
83
-
84
  batch_button.click(
85
  batch_test,
86
  inputs=json_file_input,
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
3
  import accelerate
4
 
5
  print("Accelerate version:", accelerate.__version__)
6
 
7
+ # Load the original model with overridden configuration
8
+ original_config = AutoConfig.from_pretrained(
9
+ "unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit"
10
+ )
11
+ # Remove quantization-related attributes from the config
12
+ original_config._load_in_4bit = False
13
+ original_config._load_in_8bit = False
14
+ original_config.quant_method = None
15
 
 
16
  original_model = AutoModelForCausalLM.from_pretrained(
17
  "unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit",
18
+ config=original_config # Use the overridden configuration
 
19
  )
20
  original_tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit")
21
 
22
+ # Load the fine-tuned model with overridden configuration
23
+ fine_tuned_config = AutoConfig.from_pretrained(
24
+ "kas1/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit-John1"
25
+ )
26
+ # Remove quantization-related attributes from the config
27
+ fine_tuned_config._load_in_4bit = False
28
+ fine_tuned_config._load_in_8bit = False
29
+ fine_tuned_config.quant_method = None
30
+
31
  fine_tuned_model = AutoModelForCausalLM.from_pretrained(
32
  "kas1/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit-John1",
33
+ config=fine_tuned_config # Use the overridden configuration
 
34
  )
35
  fine_tuned_tokenizer = AutoTokenizer.from_pretrained("kas1/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit-John1")
36
 
 
53
  import json
54
  results = []
55
  data = json.load(json_file)
 
56
  for item in data:
57
  question = item.get("question", "")
58
  expected_answer = item.get("answer", "")
 
59
  # Generate responses from both models
60
  original_response, fine_tuned_response = compare_models(question)
 
61
  results.append({
62
  "question": question,
63
  "expected_answer": expected_answer,
64
  "original_model_response": original_response,
65
  "fine_tuned_model_response": fine_tuned_response
66
  })
 
67
  return results
68
 
69
  # Define the Gradio interface
 
76
  compare_button = gr.Button("Compare Responses")
77
  original_output = gr.Textbox(label="Original Model Response")
78
  fine_tuned_output = gr.Textbox(label="Fine-Tuned Model Response")
 
79
  compare_button.click(
80
  compare_models,
81
  inputs=prompt_input,
 
87
  json_file_input = gr.File(label="Upload JSON File with Questions")
88
  batch_results = gr.JSON(label="Comparison Results")
89
  batch_button = gr.Button("Run Batch Test")
 
90
  batch_button.click(
91
  batch_test,
92
  inputs=json_file_input,