Spaces:
Sleeping
Sleeping
import asyncio | |
import json | |
import re | |
import modal | |
from novel_heroes.mcp_client import MCPClient | |
class LLMCall: | |
MODAL_APP_NAME = "llm-server" | |
def _convert_history(self, message, gradio_history): | |
"""Convert Gradio history format to Gemma3 format.""" | |
gemma3_history = [] | |
for h in gradio_history: | |
# Gradio: {'role': ..., 'content': ..., ...} | |
# Gemma3: {'role': ..., 'content': [{'type': 'text', 'text': ...}]} | |
if isinstance(h.get("content"), list): | |
# If already in Gemma3 format, keep as is | |
gemma3_history.append( | |
{"role": h.get("role"), "content": h.get("content")} | |
) | |
else: | |
gemma3_history.append( | |
{ | |
"role": h.get("role"), | |
"content": [{"type": "text", "text": h.get("content", "")}], | |
} | |
) | |
# Add current user input | |
gemma3_history.append( | |
{"role": "user", "content": [{"type": "text", "text": message}]} | |
) | |
return gemma3_history | |
def respond(self, message, history: list, system_prompt=None): | |
"""Generate a response to the user message using the LLM.""" | |
# Add system prompt at the beginning | |
history.insert( | |
0, | |
{ | |
"role": "system", | |
"content": system_prompt, | |
}, | |
) | |
# Debug | |
print(f"Chat history length: {len(history)}") | |
for h in history: | |
print(f"Role: {h['role']}, Content: {h['content'][:100]} ...") | |
gemma_formatted_history = self._convert_history(message, history) | |
model = modal.Cls.from_name(self.MODAL_APP_NAME, "VLLMModel")() | |
response = "" | |
final_response = "" | |
for chunk in model.generate_stream.remote_gen(gemma_formatted_history): | |
response += chunk | |
yield response | |
# Add to Gradio history | |
history.append({"role": "assistant", "content": final_response}) | |
def listup_heroes(self, book_content: str) -> list[str]: | |
""" | |
List up heroes from the book content. | |
Args: | |
book_content (str): The content of the book. | |
Returns: | |
list[str]: A list of hero names found in the book content. | |
""" | |
system_prompt = ( | |
"Your task is to extract hero names from the provided book content. " | |
"Return a list of hero names, each on a new line." | |
"Do not include any additional text or explanations." | |
"The hero names should be in the format: 'Hero Name'. " | |
"If no heroes are found, return an empty list." | |
"Example output:\n" | |
"Hero One\n" | |
"Hero Two\n" | |
"Hero Three\n" | |
) | |
prompts = [ | |
{ | |
"role": "system", | |
"content": [{"type": "text", "text": system_prompt}], | |
}, | |
{ | |
"role": "user", | |
"content": [{"type": "text", "text": book_content}], | |
}, | |
] | |
model = modal.Cls.from_name(self.MODAL_APP_NAME, "VLLMModel")() | |
response = model.generate.remote(prompts) | |
print("Response from LLM:", response) | |
# Split the response by new lines and strip whitespace | |
hero_names = [name.strip() for name in response.split("\n") if name.strip()] | |
return hero_names | |
async def main(): | |
""" | |
Example usage | |
Gradio app and Modal server need to be running. | |
""" | |
llm_call = LLMCall() | |
mcp_client = MCPClient() | |
# Select a book | |
book_list_json = await mcp_client.get_book_list() | |
book_name = json.loads(book_list_json)[0] | |
print( | |
f"Selected Book: {book_name}" | |
) # "A_Christmas_Carol_in_Prose_Being_a_Ghost_Story_of_Christmas_by_Charles_Dickens_13844" | |
# Get book content | |
book_content = await mcp_client.get_book_content(book_name, 50000) | |
print("Book content:\n", "======", sep="") | |
print(book_content[:500].replace("\n", ""), "...") | |
print("======") | |
# Select a hero | |
heroes = llm_call.listup_heroes(book_content) | |
print("Extracted Heroes:", heroes) | |
hero = heroes[0] if heroes else "No heroes found" | |
print(f"Selected Hero: {hero}") # "Scrooge" | |
# Construct system prompt | |
# fmt: off | |
system_prompt = ( | |
f"You are {hero}, a character from the book '{book_name}'. " | |
"Behave and respond according to the personality and attitude of this character. " | |
"For example, if the character is unfriendly, respond unfriendly; if the character is kind, respond kindly. " | |
"Below is the book content:\n\n" | |
+ "=" * 20 + "\n" | |
f"{book_content}\n\n" | |
+ "=" * 20 + "\n" | |
# This DOES WORK as expected. | |
"ζ₯ζ¬θͺγ§ηγγ¦γ" # "Please respond in Japanese." | |
# This also works but sometimes may not as expected. | |
# "Please respond in Japanese." | |
# This DOES NOT WORK as expected. | |
# "IMPORTANT: You must ALWAYS respond in the EXACT same language as the user's message. " | |
# "If the user writes in Japanese, you MUST respond in Japanese. " | |
# "If the user writes in English, you MUST respond in English. " | |
# "Never mix languages or respond in a different language than the user used. " | |
# "This is a strict requirement - match the user's language exactly. " | |
) | |
# fmt: on | |
print(f"Book Content Length: {len(book_content)}") | |
print(f"System Prompt Length: {len(system_prompt)}") | |
# Generate a response | |
response = llm_call.respond( | |
message="γγγ«γ‘γ―γγζ©ε«γ―γγγγ§γγοΌ", | |
history=[], | |
system_prompt=system_prompt, | |
) | |
print("Response from LLM:") | |
for chunk in response: | |
print(chunk, end="", flush=True) | |
print() | |
if __name__ == "__main__": | |
import asyncio | |
asyncio.run(main()) | |