Spaces:

goodemagod
/

modeloraggemini

Runtime error

App Files Files Community

modeloraggemini / app.py

goodemagod

Update app.py

d1c7004 verified 2 months ago

raw

history blame contribute delete

7.48 kB

	import os
	from llama_cpp import Llama
	from transformers import AutoTokenizer, AutoModel
	import torch
	from pypdf import PdfReader
	import chromadb
	import gradio as gr
	from huggingface_hub import hf_hub_download

	# --- Configuración ---
	MODEL_PATH = "goodemagod/sommy_sanonig4" # Modelo de Hugging Face
	MODEL_FILENAME = "sommy_sanonig.gguf" # Nombre del archivo GGUF
	KNOWLEDGE_BASE_PATH = "conocimiento"
	EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
	VECTOR_STORE_NAME = "mi_base_de_conocimiento"
	MODEL_LOCAL_DIR = "./modelo"
	MODEL_LOCAL_PATH = os.path.join(MODEL_LOCAL_DIR, MODEL_FILENAME)

	# --- Variables Globales ---
	llm_instance = None
	vector_store = None
	embedding_tokenizer = None
	embedding_model = None
	initialization_error = None

	# --- Funciones de Carga y Procesamiento de Conocimiento ---
	def cargar_documentos(ruta_carpeta):
	textos = []
	for filename in os.listdir(ruta_carpeta):
	if filename.endswith(".pdf"):
	filepath = os.path.join(ruta_carpeta, filename)
	try:
	with open(filepath, 'rb') as pdf_file:
	pdf_reader = PdfReader(pdf_file)
	for page in pdf_reader.pages:
	textos.append(page.extract_text())
	except Exception as e:
	print(f"Error al leer el PDF {filename}: {e}")
	return textos

	def dividir_en_fragmentos(textos, chunk_size=500, chunk_overlap=50):
	fragmentos = []
	for texto in textos:
	for i in range(0, len(texto), chunk_size - chunk_overlap):
	chunk = texto[i:i + chunk_size]
	if len(chunk.strip()) > 0:
	fragmentos.append(chunk)
	return fragmentos

	# --- Función para Generar Embeddings ---
	def cargar_modelo_embeddings():
	try:
	tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL)
	model = AutoModel.from_pretrained(EMBEDDING_MODEL)
	print("Modelo de embeddings cargado.")
	return tokenizer, model
	except Exception as e:
	error_message = f"Error al cargar el modelo de embeddings: {e}"
	print(error_message)
	return None, None

	def generar_embedding(texto, tokenizer, model):
	try:
	inputs = tokenizer(texto, padding=True, truncation=True, return_tensors='pt')
	with torch.no_grad():
	outputs = model(**inputs)
	return outputs.pooler_output.squeeze().numpy()
	except Exception as e:
	print(f"Error al generar embedding: {e}")
	return None

	# --- Funciones del Vector Store ---
	def crear_vector_store(fragmentos, tokenizer, embedding_model, nombre_coleccion):
	client = chromadb.Client()
	collection = client.get_or_create_collection(nombre_coleccion)
	embeddings = []
	valid_fragmentos = []
	for f in fragmentos:
	embedding = generar_embedding(f, tokenizer, embedding_model)
	if embedding is not None:
	embeddings.append(embedding)
	valid_fragmentos.append(f)
	try:
	collection.add(
	embeddings=embeddings,
	documents=valid_fragmentos,
	ids=[f"frag_{i}" for i in range(len(valid_fragmentos))]
	)
	print("Base de conocimientos vectorizada.")
	return collection
	except Exception as e:
	error_message = f"Error al crear el vector store: {e}"
	print(error_message)
	return None

	def buscar_en_conocimiento(pregunta, collection, tokenizer, embedding_model, top_n=3):
	embedding_pregunta = generar_embedding(pregunta, tokenizer, embedding_model)
	if embedding_pregunta is None or collection is None:
	return []
	resultados = collection.query(
	query_embeddings=[embedding_pregunta],
	n_results=top_n
	)
	return resultados['documents'][0] if resultados and resultados['documents'] else []

	# --- Carga del Modelo LLM ---
	def cargar_modelo_llm():
	global initialization_error
	os.makedirs(MODEL_LOCAL_DIR, exist_ok=True)
	if not os.path.exists(MODEL_LOCAL_PATH):
	print(f"Descargando modelo desde Hugging Face Hub: {MODEL_PATH}/{MODEL_FILENAME} a {MODEL_LOCAL_PATH}")
	try:
	hf_hub_download(repo_id=MODEL_PATH, filename=MODEL_FILENAME, local_dir=MODEL_LOCAL_DIR)
	print(f"Modelo descargado exitosamente en: {MODEL_LOCAL_PATH}")
	except Exception as e:
	error_message = f"Error al descargar el modelo: {e}"
	print(error_message)
	initialization_error = error_message
	return None
	try:
	llm = Llama(model_path=MODEL_LOCAL_PATH)
	print("Modelo LLM cargado.")
	return llm
	except Exception as e:
	error_message = f"Error al cargar el modelo LLM desde {MODEL_LOCAL_PATH}: {e}"
	print(error_message)
	initialization_error = error_message
	return None

	# --- Función Principal del Chatbot ---
	def chatbot(pregunta, historial=[]):
	global llm_instance, vector_store, embedding_tokenizer, embedding_model, initialization_error

	if initialization_error:
	return f"Error durante la inicialización: {initialization_error}", historial

	if llm_instance is None or vector_store is None or embedding_tokenizer is None or embedding_model is None:
	return "Cargando recursos. Por favor, espera...", historial

	contexto = buscar_en_conocimiento(pregunta, vector_store, embedding_tokenizer, embedding_model)
	historial_str = "\n".join([f"{turn[0]}: {turn[1]}" for turn in historial])

	prompt = f"Basado en la siguiente información: '{contexto}' y la conversación anterior: '{historial_str}', responde a la pregunta: '{pregunta}'"

	try:
	output = llm_instance(
	prompt,
	max_tokens=256,
	temperature=0.7,
	stop=["</s>"],
	echo=False
	)
	respuesta = output['choices'][0]['text'].strip()
	except Exception as e:
	respuesta = f"Error al generar la respuesta: {e}"

	historial.append([pregunta, respuesta])
	return respuesta, historial

	# --- Inicialización Global ---
	def inicializar():
	global llm_instance, vector_store, embedding_tokenizer, embedding_model, initialization_error
	print("Inicializando recursos...")
	llm_instance = cargar_modelo_llm()
	if llm_instance:
	embedding_tokenizer, embedding_model = cargar_modelo_embeddings()
	if embedding_tokenizer and embedding_model:
	textos_pdf = cargar_documentos(KNOWLEDGE_BASE_PATH)
	fragmentos = dividir_en_fragmentos(textos_pdf)
	vector_store = crear_vector_store(fragmentos, embedding_tokenizer, embedding_model, VECTOR_STORE_NAME)
	if not vector_store:
	initialization_error = "Error al crear la base de conocimientos vectorial."
	else:
	initialization_error = "Error al cargar el modelo de embeddings."
	else:
	initialization_error = "Error al inicializar el modelo LLM."

	# --- Interfaz de Gradio ---
	if __name__ == "__main__":
	inicializar()

	if initialization_error:
	print(f"Error durante la inicialización: {initialization_error}")
	else:
	interface = gr.ChatInterface(
	fn=chatbot,
	title="Chatbot con Base de Conocimiento",
	description="Pregúntame cualquier cosa basada en los documentos PDF cargados.",
	examples=["¿De qué trata el documento principal?", "¿Cuál es la idea clave del segundo archivo?"]
	)
	interface.launch()