ryomo's picture
Implement system to chat with novel heroes
6a2d777
import asyncio
import json
import re
import modal
from novel_heroes.mcp_client import MCPClient
class LLMCall:
MODAL_APP_NAME = "llm-server"
def _convert_history(self, message, gradio_history):
"""Convert Gradio history format to Gemma3 format."""
gemma3_history = []
for h in gradio_history:
# Gradio: {'role': ..., 'content': ..., ...}
# Gemma3: {'role': ..., 'content': [{'type': 'text', 'text': ...}]}
if isinstance(h.get("content"), list):
# If already in Gemma3 format, keep as is
gemma3_history.append(
{"role": h.get("role"), "content": h.get("content")}
)
else:
gemma3_history.append(
{
"role": h.get("role"),
"content": [{"type": "text", "text": h.get("content", "")}],
}
)
# Add current user input
gemma3_history.append(
{"role": "user", "content": [{"type": "text", "text": message}]}
)
return gemma3_history
def respond(self, message, history: list, system_prompt=None):
"""Generate a response to the user message using the LLM."""
# Add system prompt at the beginning
history.insert(
0,
{
"role": "system",
"content": system_prompt,
},
)
# Debug
print(f"Chat history length: {len(history)}")
for h in history:
print(f"Role: {h['role']}, Content: {h['content'][:100]} ...")
gemma_formatted_history = self._convert_history(message, history)
model = modal.Cls.from_name(self.MODAL_APP_NAME, "VLLMModel")()
response = ""
final_response = ""
for chunk in model.generate_stream.remote_gen(gemma_formatted_history):
response += chunk
yield response
# Add to Gradio history
history.append({"role": "assistant", "content": final_response})
def listup_heroes(self, book_content: str) -> list[str]:
"""
List up heroes from the book content.
Args:
book_content (str): The content of the book.
Returns:
list[str]: A list of hero names found in the book content.
"""
system_prompt = (
"Your task is to extract hero names from the provided book content. "
"Return a list of hero names, each on a new line."
"Do not include any additional text or explanations."
"The hero names should be in the format: 'Hero Name'. "
"If no heroes are found, return an empty list."
"Example output:\n"
"Hero One\n"
"Hero Two\n"
"Hero Three\n"
)
prompts = [
{
"role": "system",
"content": [{"type": "text", "text": system_prompt}],
},
{
"role": "user",
"content": [{"type": "text", "text": book_content}],
},
]
model = modal.Cls.from_name(self.MODAL_APP_NAME, "VLLMModel")()
response = model.generate.remote(prompts)
print("Response from LLM:", response)
# Split the response by new lines and strip whitespace
hero_names = [name.strip() for name in response.split("\n") if name.strip()]
return hero_names
async def main():
"""
Example usage
Gradio app and Modal server need to be running.
"""
llm_call = LLMCall()
mcp_client = MCPClient()
# Select a book
book_list_json = await mcp_client.get_book_list()
book_name = json.loads(book_list_json)[0]
print(
f"Selected Book: {book_name}"
) # "A_Christmas_Carol_in_Prose_Being_a_Ghost_Story_of_Christmas_by_Charles_Dickens_13844"
# Get book content
book_content = await mcp_client.get_book_content(book_name, 50000)
print("Book content:\n", "======", sep="")
print(book_content[:500].replace("\n", ""), "...")
print("======")
# Select a hero
heroes = llm_call.listup_heroes(book_content)
print("Extracted Heroes:", heroes)
hero = heroes[0] if heroes else "No heroes found"
print(f"Selected Hero: {hero}") # "Scrooge"
# Construct system prompt
# fmt: off
system_prompt = (
f"You are {hero}, a character from the book '{book_name}'. "
"Behave and respond according to the personality and attitude of this character. "
"For example, if the character is unfriendly, respond unfriendly; if the character is kind, respond kindly. "
"Below is the book content:\n\n"
+ "=" * 20 + "\n"
f"{book_content}\n\n"
+ "=" * 20 + "\n"
# This DOES WORK as expected.
"ζ—₯本θͺžγ§η­”γˆγ¦γ€‚" # "Please respond in Japanese."
# This also works but sometimes may not as expected.
# "Please respond in Japanese."
# This DOES NOT WORK as expected.
# "IMPORTANT: You must ALWAYS respond in the EXACT same language as the user's message. "
# "If the user writes in Japanese, you MUST respond in Japanese. "
# "If the user writes in English, you MUST respond in English. "
# "Never mix languages or respond in a different language than the user used. "
# "This is a strict requirement - match the user's language exactly. "
)
# fmt: on
print(f"Book Content Length: {len(book_content)}")
print(f"System Prompt Length: {len(system_prompt)}")
# Generate a response
response = llm_call.respond(
message="γ“γ‚“γ«γ‘γ―γ€‚γ”ζ©Ÿε«Œγ―γ„γ‹γŒγ§γ™γ‹οΌŸ",
history=[],
system_prompt=system_prompt,
)
print("Response from LLM:")
for chunk in response:
print(chunk, end="", flush=True)
print()
if __name__ == "__main__":
import asyncio
asyncio.run(main())