Tonic commited on
Commit
0004ba8
Β·
1 Parent(s): 9413e52

adds minor changes to the README

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +33 -33
README.md CHANGED
@@ -28,7 +28,7 @@ A complete Gradio application for the [Petite Elle L'Aime 3](https://huggingface
28
 
29
  ## πŸ“‹ Model Information
30
 
31
- - **Base Model**: SmolLM3-3B
32
  - **Parameters**: ~3B
33
  - **Context Length**: 128k
34
  - **Precision**: Full fine-tuned model (float16/float32)
 
28
 
29
  ## πŸ“‹ Model Information
30
 
31
+ - **Base Model**: HuggingFaceTB/SmolLM3-3B
32
  - **Parameters**: ~3B
33
  - **Context Length**: 128k
34
  - **Precision**: Full fine-tuned model (float16/float32)
app.py CHANGED
@@ -224,35 +224,35 @@ def generate_response(message, history, system_message, max_tokens, temperature,
224
  response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
225
 
226
  # Debug: Print the full raw response with tokens
227
- logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
228
- logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
229
- logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
230
 
231
  # More robust response extraction - look for assistant marker
232
- logger.info(f"Looking for assistant marker in response...")
233
  if "<|im_start|>assistant" in response_with_tokens:
234
- logger.info(f"Found assistant marker in response")
235
  # Find the start of assistant response
236
  assistant_start = response_with_tokens.find("<|im_start|>assistant")
237
- logger.info(f"Assistant marker found at position: {assistant_start}")
238
  if assistant_start != -1:
239
  # Find the end of the assistant marker
240
  marker_end = response_with_tokens.find("\n", assistant_start)
241
- logger.info(f"Marker end found at position: {marker_end}")
242
  if marker_end != -1:
243
  assistant_response = response_with_tokens[marker_end + 1:].strip()
244
- logger.info(f"Using marker-based extraction")
245
  else:
246
  assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
247
- logger.info(f"Using fallback marker extraction")
248
  else:
249
  # Fallback to prompt-based extraction
250
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
251
  assistant_response = response[len(full_prompt):].strip()
252
- logger.info(f"Using prompt-based extraction (marker not found)")
253
  else:
254
  # Fallback to original method
255
- logger.info(f"No assistant marker found, using prompt-based extraction")
256
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
257
  assistant_response = response[len(full_prompt):].strip()
258
 
@@ -262,34 +262,34 @@ def generate_response(message, history, system_message, max_tokens, temperature,
262
  assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
263
 
264
  # Debug: Print the extracted assistant response after cleanup
265
- logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
266
- logger.info(f"Extracted response length: {len(assistant_response)}")
267
- logger.info(f"Extracted response: {repr(assistant_response)}")
268
 
269
  # Debug: Print before cleanup
270
- logger.info(f"=== BEFORE CLEANUP DEBUG ===")
271
- logger.info(f"Before cleanup length: {len(assistant_response)}")
272
- logger.info(f"Before cleanup: {repr(assistant_response)}")
273
 
274
  assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
275
 
276
  # Debug: Print after first cleanup
277
- logger.info(f"=== AFTER FIRST CLEANUP DEBUG ===")
278
- logger.info(f"After first cleanup length: {len(assistant_response)}")
279
- logger.info(f"After first cleanup: {repr(assistant_response)}")
280
 
281
  if not enable_thinking:
282
  assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
283
 
284
  # Debug: Print after thinking cleanup
285
- logger.info(f"=== AFTER THINKING CLEANUP DEBUG ===")
286
- logger.info(f"After thinking cleanup length: {len(assistant_response)}")
287
- logger.info(f"After thinking cleanup: {repr(assistant_response)}")
288
 
289
  # Debug: Print before tool call handling
290
- logger.info(f"=== BEFORE TOOL CALL HANDLING DEBUG ===")
291
- logger.info(f"Before tool call handling length: {len(assistant_response)}")
292
- logger.info(f"Before tool call handling: {repr(assistant_response)}")
293
 
294
  # Handle tool calls if present
295
  if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
@@ -305,17 +305,17 @@ def generate_response(message, history, system_message, max_tokens, temperature,
305
  assistant_response += f"\n\n🐍 Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
306
 
307
  # Debug: Print after tool call handling
308
- logger.info(f"=== AFTER TOOL CALL HANDLING DEBUG ===")
309
- logger.info(f"After tool call handling length: {len(assistant_response)}")
310
- logger.info(f"After tool call handling: {repr(assistant_response)}")
311
 
312
  assistant_response = assistant_response.strip()
313
 
314
  # Debug: Print final response
315
- logger.info(f"=== FINAL RESPONSE DEBUG ===")
316
- logger.info(f"Final response length: {len(assistant_response)}")
317
- logger.info(f"Final response: {repr(assistant_response)}")
318
- logger.info(f"=== END DEBUG ===")
319
 
320
  return assistant_response
321
 
 
224
  response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
225
 
226
  # Debug: Print the full raw response with tokens
227
+ # logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
228
+ # logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
229
+ # logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
230
 
231
  # More robust response extraction - look for assistant marker
232
+ # logger.info(f"Looking for assistant marker in response...")
233
  if "<|im_start|>assistant" in response_with_tokens:
234
+ # logger.info(f"Found assistant marker in response")
235
  # Find the start of assistant response
236
  assistant_start = response_with_tokens.find("<|im_start|>assistant")
237
+ # logger.info(f"Assistant marker found at position: {assistant_start}")
238
  if assistant_start != -1:
239
  # Find the end of the assistant marker
240
  marker_end = response_with_tokens.find("\n", assistant_start)
241
+ # logger.info(f"Marker end found at position: {marker_end}")
242
  if marker_end != -1:
243
  assistant_response = response_with_tokens[marker_end + 1:].strip()
244
+ # logger.info(f"Using marker-based extraction")
245
  else:
246
  assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
247
+ # logger.info(f"Using fallback marker extraction")
248
  else:
249
  # Fallback to prompt-based extraction
250
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
251
  assistant_response = response[len(full_prompt):].strip()
252
+ # logger.info(f"Using prompt-based extraction (marker not found)")
253
  else:
254
  # Fallback to original method
255
+ # logger.info(f"No assistant marker found, using prompt-based extraction")
256
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
257
  assistant_response = response[len(full_prompt):].strip()
258
 
 
262
  assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
263
 
264
  # Debug: Print the extracted assistant response after cleanup
265
+ # logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
266
+ # logger.info(f"Extracted response length: {len(assistant_response)}")
267
+ # logger.info(f"Extracted response: {repr(assistant_response)}")
268
 
269
  # Debug: Print before cleanup
270
+ # logger.info(f"=== BEFORE CLEANUP DEBUG ===")
271
+ # logger.info(f"Before cleanup length: {len(assistant_response)}")
272
+ # logger.info(f"Before cleanup: {repr(assistant_response)}")
273
 
274
  assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
275
 
276
  # Debug: Print after first cleanup
277
+ # logger.info(f"=== AFTER FIRST CLEANUP DEBUG ===")
278
+ # logger.info(f"After first cleanup length: {len(assistant_response)}")
279
+ # logger.info(f"After first cleanup: {repr(assistant_response)}")
280
 
281
  if not enable_thinking:
282
  assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
283
 
284
  # Debug: Print after thinking cleanup
285
+ # logger.info(f"=== AFTER THINKING CLEANUP DEBUG ===")
286
+ # logger.info(f"After thinking cleanup length: {len(assistant_response)}")
287
+ # logger.info(f"After thinking cleanup: {repr(assistant_response)}")
288
 
289
  # Debug: Print before tool call handling
290
+ # logger.info(f"=== BEFORE TOOL CALL HANDLING DEBUG ===")
291
+ # logger.info(f"Before tool call handling length: {len(assistant_response)}")
292
+ # logger.info(f"Before tool call handling: {repr(assistant_response)}")
293
 
294
  # Handle tool calls if present
295
  if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
 
305
  assistant_response += f"\n\n🐍 Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
306
 
307
  # Debug: Print after tool call handling
308
+ # logger.info(f"=== AFTER TOOL CALL HANDLING DEBUG ===")
309
+ # logger.info(f"After tool call handling length: {len(assistant_response)}")
310
+ # logger.info(f"After tool call handling: {repr(assistant_response)}")
311
 
312
  assistant_response = assistant_response.strip()
313
 
314
  # Debug: Print final response
315
+ # logger.info(f"=== FINAL RESPONSE DEBUG ===")
316
+ # logger.info(f"Final response length: {len(assistant_response)}")
317
+ # logger.info(f"Final response: {repr(assistant_response)}")
318
+ # logger.info(f"=== END DEBUG ===")
319
 
320
  return assistant_response
321