Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -56,7 +56,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
56 |
# quantization_config=quantization_config_4bit, # Comment out if not using quantization
|
57 |
device_map="auto",
|
58 |
offload_folder='./',
|
59 |
-
).to(torch.
|
60 |
|
61 |
print(f"Loading tokenizer: {model_name}")
|
62 |
tokenizer = AutoTokenizer.from_pretrained(
|
@@ -131,8 +131,8 @@ def generate_code(prompt: str) -> str:
|
|
131 |
with torch.no_grad():
|
132 |
generated_ids = model.generate(
|
133 |
**model_inputs, # Pass tokenized inputs
|
134 |
-
max_new_tokens=
|
135 |
-
min_new_tokens=
|
136 |
do_sample=True,
|
137 |
temperature=0.7,
|
138 |
top_p=0.9,
|
|
|
56 |
# quantization_config=quantization_config_4bit, # Comment out if not using quantization
|
57 |
device_map="auto",
|
58 |
offload_folder='./',
|
59 |
+
).to(torch.bfloat16) #.to(torch.device("cuda:0"), torch.bfloat16)
|
60 |
|
61 |
print(f"Loading tokenizer: {model_name}")
|
62 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
131 |
with torch.no_grad():
|
132 |
generated_ids = model.generate(
|
133 |
**model_inputs, # Pass tokenized inputs
|
134 |
+
max_new_tokens=192,
|
135 |
+
min_new_tokens=128,
|
136 |
do_sample=True,
|
137 |
temperature=0.7,
|
138 |
top_p=0.9,
|