Spaces:

Agents-MCP-Hackathon
/

novel-heroes

Sleeping

App Files Files Community

novel-heroes / src /novel_heroes /llm_call.py

ryomo

Implement system to chat with novel heroes

6a2d777 4 months ago

raw

history blame contribute delete

6.01 kB

	import asyncio
	import json
	import re

	import modal
	from novel_heroes.mcp_client import MCPClient


	class LLMCall:

	MODAL_APP_NAME = "llm-server"

	def _convert_history(self, message, gradio_history):
	"""Convert Gradio history format to Gemma3 format."""
	gemma3_history = []
	for h in gradio_history:
	# Gradio: {'role': ..., 'content': ..., ...}
	# Gemma3: {'role': ..., 'content': [{'type': 'text', 'text': ...}]}
	if isinstance(h.get("content"), list):
	# If already in Gemma3 format, keep as is
	gemma3_history.append(
	{"role": h.get("role"), "content": h.get("content")}
	)
	else:
	gemma3_history.append(
	{
	"role": h.get("role"),
	"content": [{"type": "text", "text": h.get("content", "")}],
	}
	)
	# Add current user input
	gemma3_history.append(
	{"role": "user", "content": [{"type": "text", "text": message}]}
	)
	return gemma3_history

	def respond(self, message, history: list, system_prompt=None):
	"""Generate a response to the user message using the LLM."""

	# Add system prompt at the beginning
	history.insert(
	0,
	{
	"role": "system",
	"content": system_prompt,
	},
	)

	# Debug
	print(f"Chat history length: {len(history)}")
	for h in history:
	print(f"Role: {h['role']}, Content: {h['content'][:100]} ...")

	gemma_formatted_history = self._convert_history(message, history)

	model = modal.Cls.from_name(self.MODAL_APP_NAME, "VLLMModel")()

	response = ""
	final_response = ""
	for chunk in model.generate_stream.remote_gen(gemma_formatted_history):
	response += chunk
	yield response

	# Add to Gradio history
	history.append({"role": "assistant", "content": final_response})

	def listup_heroes(self, book_content: str) -> list[str]:
	"""
	List up heroes from the book content.

	Args:
	book_content (str): The content of the book.

	Returns:
	list[str]: A list of hero names found in the book content.
	"""

	system_prompt = (
	"Your task is to extract hero names from the provided book content. "
	"Return a list of hero names, each on a new line."
	"Do not include any additional text or explanations."
	"The hero names should be in the format: 'Hero Name'. "
	"If no heroes are found, return an empty list."
	"Example output:\n"
	"Hero One\n"
	"Hero Two\n"
	"Hero Three\n"
	)

	prompts = [
	{
	"role": "system",
	"content": [{"type": "text", "text": system_prompt}],
	},
	{
	"role": "user",
	"content": [{"type": "text", "text": book_content}],
	},
	]

	model = modal.Cls.from_name(self.MODAL_APP_NAME, "VLLMModel")()
	response = model.generate.remote(prompts)
	print("Response from LLM:", response)

	# Split the response by new lines and strip whitespace
	hero_names = [name.strip() for name in response.split("\n") if name.strip()]

	return hero_names


	async def main():
	"""
	Example usage
	Gradio app and Modal server need to be running.
	"""
	llm_call = LLMCall()
	mcp_client = MCPClient()

	# Select a book
	book_list_json = await mcp_client.get_book_list()
	book_name = json.loads(book_list_json)[0]
	print(
	f"Selected Book: {book_name}"
	) # "A_Christmas_Carol_in_Prose_Being_a_Ghost_Story_of_Christmas_by_Charles_Dickens_13844"

	# Get book content
	book_content = await mcp_client.get_book_content(book_name, 50000)
	print("Book content:\n", "======", sep="")
	print(book_content[:500].replace("\n", ""), "...")
	print("======")

	# Select a hero
	heroes = llm_call.listup_heroes(book_content)
	print("Extracted Heroes:", heroes)
	hero = heroes[0] if heroes else "No heroes found"
	print(f"Selected Hero: {hero}") # "Scrooge"

	# Construct system prompt
	# fmt: off
	system_prompt = (
	f"You are {hero}, a character from the book '{book_name}'. "
	"Behave and respond according to the personality and attitude of this character. "
	"For example, if the character is unfriendly, respond unfriendly; if the character is kind, respond kindly. "
	"Below is the book content:\n\n"
	+ "=" * 20 + "\n"
	f"{book_content}\n\n"
	+ "=" * 20 + "\n"

	# This DOES WORK as expected.
	"日本語で答えて。" # "Please respond in Japanese."

	# This also works but sometimes may not as expected.
	# "Please respond in Japanese."

	# This DOES NOT WORK as expected.
	# "IMPORTANT: You must ALWAYS respond in the EXACT same language as the user's message. "
	# "If the user writes in Japanese, you MUST respond in Japanese. "
	# "If the user writes in English, you MUST respond in English. "
	# "Never mix languages or respond in a different language than the user used. "
	# "This is a strict requirement - match the user's language exactly. "
	)
	# fmt: on

	print(f"Book Content Length: {len(book_content)}")
	print(f"System Prompt Length: {len(system_prompt)}")

	# Generate a response
	response = llm_call.respond(
	message="こんにちは。ご機嫌はいかがですか？",
	history=[],
	system_prompt=system_prompt,
	)
	print("Response from LLM:")
	for chunk in response:
	print(chunk, end="", flush=True)
	print()


	if __name__ == "__main__":
	import asyncio

	asyncio.run(main())