Spaces:
Running
on
Zero
Running
on
Zero
adds minor changes to the README
Browse files
README.md
CHANGED
@@ -28,7 +28,7 @@ A complete Gradio application for the [Petite Elle L'Aime 3](https://huggingface
|
|
28 |
|
29 |
## π Model Information
|
30 |
|
31 |
-
- **Base Model**: SmolLM3-3B
|
32 |
- **Parameters**: ~3B
|
33 |
- **Context Length**: 128k
|
34 |
- **Precision**: Full fine-tuned model (float16/float32)
|
|
|
28 |
|
29 |
## π Model Information
|
30 |
|
31 |
+
- **Base Model**: HuggingFaceTB/SmolLM3-3B
|
32 |
- **Parameters**: ~3B
|
33 |
- **Context Length**: 128k
|
34 |
- **Precision**: Full fine-tuned model (float16/float32)
|
app.py
CHANGED
@@ -224,35 +224,35 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
224 |
response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
|
225 |
|
226 |
# Debug: Print the full raw response with tokens
|
227 |
-
logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
|
228 |
-
logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
|
229 |
-
logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
|
230 |
|
231 |
# More robust response extraction - look for assistant marker
|
232 |
-
logger.info(f"Looking for assistant marker in response...")
|
233 |
if "<|im_start|>assistant" in response_with_tokens:
|
234 |
-
logger.info(f"Found assistant marker in response")
|
235 |
# Find the start of assistant response
|
236 |
assistant_start = response_with_tokens.find("<|im_start|>assistant")
|
237 |
-
logger.info(f"Assistant marker found at position: {assistant_start}")
|
238 |
if assistant_start != -1:
|
239 |
# Find the end of the assistant marker
|
240 |
marker_end = response_with_tokens.find("\n", assistant_start)
|
241 |
-
logger.info(f"Marker end found at position: {marker_end}")
|
242 |
if marker_end != -1:
|
243 |
assistant_response = response_with_tokens[marker_end + 1:].strip()
|
244 |
-
logger.info(f"Using marker-based extraction")
|
245 |
else:
|
246 |
assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
|
247 |
-
logger.info(f"Using fallback marker extraction")
|
248 |
else:
|
249 |
# Fallback to prompt-based extraction
|
250 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
251 |
assistant_response = response[len(full_prompt):].strip()
|
252 |
-
logger.info(f"Using prompt-based extraction (marker not found)")
|
253 |
else:
|
254 |
# Fallback to original method
|
255 |
-
logger.info(f"No assistant marker found, using prompt-based extraction")
|
256 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
257 |
assistant_response = response[len(full_prompt):].strip()
|
258 |
|
@@ -262,34 +262,34 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
262 |
assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
|
263 |
|
264 |
# Debug: Print the extracted assistant response after cleanup
|
265 |
-
logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
|
266 |
-
logger.info(f"Extracted response length: {len(assistant_response)}")
|
267 |
-
logger.info(f"Extracted response: {repr(assistant_response)}")
|
268 |
|
269 |
# Debug: Print before cleanup
|
270 |
-
logger.info(f"=== BEFORE CLEANUP DEBUG ===")
|
271 |
-
logger.info(f"Before cleanup length: {len(assistant_response)}")
|
272 |
-
logger.info(f"Before cleanup: {repr(assistant_response)}")
|
273 |
|
274 |
assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
|
275 |
|
276 |
# Debug: Print after first cleanup
|
277 |
-
logger.info(f"=== AFTER FIRST CLEANUP DEBUG ===")
|
278 |
-
logger.info(f"After first cleanup length: {len(assistant_response)}")
|
279 |
-
logger.info(f"After first cleanup: {repr(assistant_response)}")
|
280 |
|
281 |
if not enable_thinking:
|
282 |
assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
|
283 |
|
284 |
# Debug: Print after thinking cleanup
|
285 |
-
logger.info(f"=== AFTER THINKING CLEANUP DEBUG ===")
|
286 |
-
logger.info(f"After thinking cleanup length: {len(assistant_response)}")
|
287 |
-
logger.info(f"After thinking cleanup: {repr(assistant_response)}")
|
288 |
|
289 |
# Debug: Print before tool call handling
|
290 |
-
logger.info(f"=== BEFORE TOOL CALL HANDLING DEBUG ===")
|
291 |
-
logger.info(f"Before tool call handling length: {len(assistant_response)}")
|
292 |
-
logger.info(f"Before tool call handling: {repr(assistant_response)}")
|
293 |
|
294 |
# Handle tool calls if present
|
295 |
if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
|
@@ -305,17 +305,17 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
305 |
assistant_response += f"\n\nπ Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
|
306 |
|
307 |
# Debug: Print after tool call handling
|
308 |
-
logger.info(f"=== AFTER TOOL CALL HANDLING DEBUG ===")
|
309 |
-
logger.info(f"After tool call handling length: {len(assistant_response)}")
|
310 |
-
logger.info(f"After tool call handling: {repr(assistant_response)}")
|
311 |
|
312 |
assistant_response = assistant_response.strip()
|
313 |
|
314 |
# Debug: Print final response
|
315 |
-
logger.info(f"=== FINAL RESPONSE DEBUG ===")
|
316 |
-
logger.info(f"Final response length: {len(assistant_response)}")
|
317 |
-
logger.info(f"Final response: {repr(assistant_response)}")
|
318 |
-
logger.info(f"=== END DEBUG ===")
|
319 |
|
320 |
return assistant_response
|
321 |
|
|
|
224 |
response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
|
225 |
|
226 |
# Debug: Print the full raw response with tokens
|
227 |
+
# logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
|
228 |
+
# logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
|
229 |
+
# logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
|
230 |
|
231 |
# More robust response extraction - look for assistant marker
|
232 |
+
# logger.info(f"Looking for assistant marker in response...")
|
233 |
if "<|im_start|>assistant" in response_with_tokens:
|
234 |
+
# logger.info(f"Found assistant marker in response")
|
235 |
# Find the start of assistant response
|
236 |
assistant_start = response_with_tokens.find("<|im_start|>assistant")
|
237 |
+
# logger.info(f"Assistant marker found at position: {assistant_start}")
|
238 |
if assistant_start != -1:
|
239 |
# Find the end of the assistant marker
|
240 |
marker_end = response_with_tokens.find("\n", assistant_start)
|
241 |
+
# logger.info(f"Marker end found at position: {marker_end}")
|
242 |
if marker_end != -1:
|
243 |
assistant_response = response_with_tokens[marker_end + 1:].strip()
|
244 |
+
# logger.info(f"Using marker-based extraction")
|
245 |
else:
|
246 |
assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
|
247 |
+
# logger.info(f"Using fallback marker extraction")
|
248 |
else:
|
249 |
# Fallback to prompt-based extraction
|
250 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
251 |
assistant_response = response[len(full_prompt):].strip()
|
252 |
+
# logger.info(f"Using prompt-based extraction (marker not found)")
|
253 |
else:
|
254 |
# Fallback to original method
|
255 |
+
# logger.info(f"No assistant marker found, using prompt-based extraction")
|
256 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
257 |
assistant_response = response[len(full_prompt):].strip()
|
258 |
|
|
|
262 |
assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
|
263 |
|
264 |
# Debug: Print the extracted assistant response after cleanup
|
265 |
+
# logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
|
266 |
+
# logger.info(f"Extracted response length: {len(assistant_response)}")
|
267 |
+
# logger.info(f"Extracted response: {repr(assistant_response)}")
|
268 |
|
269 |
# Debug: Print before cleanup
|
270 |
+
# logger.info(f"=== BEFORE CLEANUP DEBUG ===")
|
271 |
+
# logger.info(f"Before cleanup length: {len(assistant_response)}")
|
272 |
+
# logger.info(f"Before cleanup: {repr(assistant_response)}")
|
273 |
|
274 |
assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
|
275 |
|
276 |
# Debug: Print after first cleanup
|
277 |
+
# logger.info(f"=== AFTER FIRST CLEANUP DEBUG ===")
|
278 |
+
# logger.info(f"After first cleanup length: {len(assistant_response)}")
|
279 |
+
# logger.info(f"After first cleanup: {repr(assistant_response)}")
|
280 |
|
281 |
if not enable_thinking:
|
282 |
assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
|
283 |
|
284 |
# Debug: Print after thinking cleanup
|
285 |
+
# logger.info(f"=== AFTER THINKING CLEANUP DEBUG ===")
|
286 |
+
# logger.info(f"After thinking cleanup length: {len(assistant_response)}")
|
287 |
+
# logger.info(f"After thinking cleanup: {repr(assistant_response)}")
|
288 |
|
289 |
# Debug: Print before tool call handling
|
290 |
+
# logger.info(f"=== BEFORE TOOL CALL HANDLING DEBUG ===")
|
291 |
+
# logger.info(f"Before tool call handling length: {len(assistant_response)}")
|
292 |
+
# logger.info(f"Before tool call handling: {repr(assistant_response)}")
|
293 |
|
294 |
# Handle tool calls if present
|
295 |
if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
|
|
|
305 |
assistant_response += f"\n\nπ Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
|
306 |
|
307 |
# Debug: Print after tool call handling
|
308 |
+
# logger.info(f"=== AFTER TOOL CALL HANDLING DEBUG ===")
|
309 |
+
# logger.info(f"After tool call handling length: {len(assistant_response)}")
|
310 |
+
# logger.info(f"After tool call handling: {repr(assistant_response)}")
|
311 |
|
312 |
assistant_response = assistant_response.strip()
|
313 |
|
314 |
# Debug: Print final response
|
315 |
+
# logger.info(f"=== FINAL RESPONSE DEBUG ===")
|
316 |
+
# logger.info(f"Final response length: {len(assistant_response)}")
|
317 |
+
# logger.info(f"Final response: {repr(assistant_response)}")
|
318 |
+
# logger.info(f"=== END DEBUG ===")
|
319 |
|
320 |
return assistant_response
|
321 |
|