modeloraggemini / app.py
goodemagod's picture
Update app.py
d1c7004 verified
import os
from llama_cpp import Llama
from transformers import AutoTokenizer, AutoModel
import torch
from pypdf import PdfReader
import chromadb
import gradio as gr
from huggingface_hub import hf_hub_download
# --- Configuraci贸n ---
MODEL_PATH = "goodemagod/sommy_sanonig4" # Modelo de Hugging Face
MODEL_FILENAME = "sommy_sanonig.gguf" # Nombre del archivo GGUF
KNOWLEDGE_BASE_PATH = "conocimiento"
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
VECTOR_STORE_NAME = "mi_base_de_conocimiento"
MODEL_LOCAL_DIR = "./modelo"
MODEL_LOCAL_PATH = os.path.join(MODEL_LOCAL_DIR, MODEL_FILENAME)
# --- Variables Globales ---
llm_instance = None
vector_store = None
embedding_tokenizer = None
embedding_model = None
initialization_error = None
# --- Funciones de Carga y Procesamiento de Conocimiento ---
def cargar_documentos(ruta_carpeta):
textos = []
for filename in os.listdir(ruta_carpeta):
if filename.endswith(".pdf"):
filepath = os.path.join(ruta_carpeta, filename)
try:
with open(filepath, 'rb') as pdf_file:
pdf_reader = PdfReader(pdf_file)
for page in pdf_reader.pages:
textos.append(page.extract_text())
except Exception as e:
print(f"Error al leer el PDF {filename}: {e}")
return textos
def dividir_en_fragmentos(textos, chunk_size=500, chunk_overlap=50):
fragmentos = []
for texto in textos:
for i in range(0, len(texto), chunk_size - chunk_overlap):
chunk = texto[i:i + chunk_size]
if len(chunk.strip()) > 0:
fragmentos.append(chunk)
return fragmentos
# --- Funci贸n para Generar Embeddings ---
def cargar_modelo_embeddings():
try:
tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL)
model = AutoModel.from_pretrained(EMBEDDING_MODEL)
print("Modelo de embeddings cargado.")
return tokenizer, model
except Exception as e:
error_message = f"Error al cargar el modelo de embeddings: {e}"
print(error_message)
return None, None
def generar_embedding(texto, tokenizer, model):
try:
inputs = tokenizer(texto, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
outputs = model(**inputs)
return outputs.pooler_output.squeeze().numpy()
except Exception as e:
print(f"Error al generar embedding: {e}")
return None
# --- Funciones del Vector Store ---
def crear_vector_store(fragmentos, tokenizer, embedding_model, nombre_coleccion):
client = chromadb.Client()
collection = client.get_or_create_collection(nombre_coleccion)
embeddings = []
valid_fragmentos = []
for f in fragmentos:
embedding = generar_embedding(f, tokenizer, embedding_model)
if embedding is not None:
embeddings.append(embedding)
valid_fragmentos.append(f)
try:
collection.add(
embeddings=embeddings,
documents=valid_fragmentos,
ids=[f"frag_{i}" for i in range(len(valid_fragmentos))]
)
print("Base de conocimientos vectorizada.")
return collection
except Exception as e:
error_message = f"Error al crear el vector store: {e}"
print(error_message)
return None
def buscar_en_conocimiento(pregunta, collection, tokenizer, embedding_model, top_n=3):
embedding_pregunta = generar_embedding(pregunta, tokenizer, embedding_model)
if embedding_pregunta is None or collection is None:
return []
resultados = collection.query(
query_embeddings=[embedding_pregunta],
n_results=top_n
)
return resultados['documents'][0] if resultados and resultados['documents'] else []
# --- Carga del Modelo LLM ---
def cargar_modelo_llm():
global initialization_error
os.makedirs(MODEL_LOCAL_DIR, exist_ok=True)
if not os.path.exists(MODEL_LOCAL_PATH):
print(f"Descargando modelo desde Hugging Face Hub: {MODEL_PATH}/{MODEL_FILENAME} a {MODEL_LOCAL_PATH}")
try:
hf_hub_download(repo_id=MODEL_PATH, filename=MODEL_FILENAME, local_dir=MODEL_LOCAL_DIR)
print(f"Modelo descargado exitosamente en: {MODEL_LOCAL_PATH}")
except Exception as e:
error_message = f"Error al descargar el modelo: {e}"
print(error_message)
initialization_error = error_message
return None
try:
llm = Llama(model_path=MODEL_LOCAL_PATH)
print("Modelo LLM cargado.")
return llm
except Exception as e:
error_message = f"Error al cargar el modelo LLM desde {MODEL_LOCAL_PATH}: {e}"
print(error_message)
initialization_error = error_message
return None
# --- Funci贸n Principal del Chatbot ---
def chatbot(pregunta, historial=[]):
global llm_instance, vector_store, embedding_tokenizer, embedding_model, initialization_error
if initialization_error:
return f"Error durante la inicializaci贸n: {initialization_error}", historial
if llm_instance is None or vector_store is None or embedding_tokenizer is None or embedding_model is None:
return "Cargando recursos. Por favor, espera...", historial
contexto = buscar_en_conocimiento(pregunta, vector_store, embedding_tokenizer, embedding_model)
historial_str = "\n".join([f"{turn[0]}: {turn[1]}" for turn in historial])
prompt = f"Basado en la siguiente informaci贸n: '{contexto}' y la conversaci贸n anterior: '{historial_str}', responde a la pregunta: '{pregunta}'"
try:
output = llm_instance(
prompt,
max_tokens=256,
temperature=0.7,
stop=["</s>"],
echo=False
)
respuesta = output['choices'][0]['text'].strip()
except Exception as e:
respuesta = f"Error al generar la respuesta: {e}"
historial.append([pregunta, respuesta])
return respuesta, historial
# --- Inicializaci贸n Global ---
def inicializar():
global llm_instance, vector_store, embedding_tokenizer, embedding_model, initialization_error
print("Inicializando recursos...")
llm_instance = cargar_modelo_llm()
if llm_instance:
embedding_tokenizer, embedding_model = cargar_modelo_embeddings()
if embedding_tokenizer and embedding_model:
textos_pdf = cargar_documentos(KNOWLEDGE_BASE_PATH)
fragmentos = dividir_en_fragmentos(textos_pdf)
vector_store = crear_vector_store(fragmentos, embedding_tokenizer, embedding_model, VECTOR_STORE_NAME)
if not vector_store:
initialization_error = "Error al crear la base de conocimientos vectorial."
else:
initialization_error = "Error al cargar el modelo de embeddings."
else:
initialization_error = "Error al inicializar el modelo LLM."
# --- Interfaz de Gradio ---
if __name__ == "__main__":
inicializar()
if initialization_error:
print(f"Error durante la inicializaci贸n: {initialization_error}")
else:
interface = gr.ChatInterface(
fn=chatbot,
title="Chatbot con Base de Conocimiento",
description="Preg煤ntame cualquier cosa basada en los documentos PDF cargados.",
examples=["驴De qu茅 trata el documento principal?", "驴Cu谩l es la idea clave del segundo archivo?"]
)
interface.launch()