Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from llama_cpp import Llama
|
3 |
+
from transformers import AutoTokenizer, AutoModel
|
4 |
+
import torch
|
5 |
+
from pypdf import PdfReader
|
6 |
+
import chromadb
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
# --- Configuraci贸n ---
|
10 |
+
MODEL_PATH = "TheBloke/Llama-2-7B-Chat-GGML" # Modelo de Hugging Face (se descargar谩 si no existe localmente)
|
11 |
+
MODEL_FILENAME = "llama-2-7b-chat.ggmlv3.q4_0.bin" # Nombre del archivo GGML dentro del repo
|
12 |
+
KNOWLEDGE_BASE_PATH = "conocimiento"
|
13 |
+
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
|
14 |
+
VECTOR_STORE_NAME = "mi_base_de_conocimiento"
|
15 |
+
|
16 |
+
# --- Funciones de Carga y Procesamiento de Conocimiento ---
|
17 |
+
def cargar_documentos(ruta_carpeta):
|
18 |
+
textos = []
|
19 |
+
for filename in os.listdir(ruta_carpeta):
|
20 |
+
if filename.endswith(".pdf"):
|
21 |
+
filepath = os.path.join(ruta_carpeta, filename)
|
22 |
+
with open(filepath, 'rb') as pdf_file:
|
23 |
+
pdf_reader = PdfReader(pdf_file)
|
24 |
+
for page in pdf_reader.pages:
|
25 |
+
textos.append(page.extract_text())
|
26 |
+
return textos
|
27 |
+
|
28 |
+
def dividir_en_fragmentos(textos, chunk_size=500, chunk_overlap=50):
|
29 |
+
fragmentos = []
|
30 |
+
for texto in textos:
|
31 |
+
for i in range(0, len(texto), chunk_size - chunk_overlap):
|
32 |
+
chunk = texto[i:i + chunk_size]
|
33 |
+
if len(chunk.strip()) > 0:
|
34 |
+
fragmentos.append(chunk)
|
35 |
+
return fragmentos
|
36 |
+
|
37 |
+
# --- Funci贸n para Generar Embeddings ---
|
38 |
+
def cargar_modelo_embeddings():
|
39 |
+
tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL)
|
40 |
+
model = AutoModel.from_pretrained(EMBEDDING_MODEL)
|
41 |
+
return tokenizer, model
|
42 |
+
|
43 |
+
def generar_embedding(texto, tokenizer, model):
|
44 |
+
inputs = tokenizer(texto, padding=True, truncation=True, return_tensors='pt')
|
45 |
+
with torch.no_grad():
|
46 |
+
outputs = model(**inputs)
|
47 |
+
return outputs.pooler_output.squeeze().numpy()
|
48 |
+
|
49 |
+
# --- Funciones del Vector Store ---
|
50 |
+
def crear_vector_store(fragmentos, tokenizer, embedding_model, nombre_coleccion):
|
51 |
+
client = chromadb.Client()
|
52 |
+
collection = client.get_or_create_collection(nombre_coleccion)
|
53 |
+
embeddings = [generar_embedding(f, tokenizer, embedding_model) for f in fragmentos]
|
54 |
+
collection.add(
|
55 |
+
embeddings=embeddings,
|
56 |
+
documents=fragmentos,
|
57 |
+
ids=[f"frag_{i}" for i in range(len(fragmentos))]
|
58 |
+
)
|
59 |
+
return collection
|
60 |
+
|
61 |
+
def buscar_en_conocimiento(pregunta, collection, tokenizer, embedding_model, top_n=3):
|
62 |
+
embedding_pregunta = generar_embedding(pregunta, tokenizer, embedding_model)
|
63 |
+
resultados = collection.query(
|
64 |
+
query_embeddings=[embedding_pregunta],
|
65 |
+
n_results=top_n
|
66 |
+
)
|
67 |
+
return resultados['documents'][0] if resultados and resultados['documents'] else []
|
68 |
+
|
69 |
+
# --- Carga del Modelo LLM ---
|
70 |
+
def cargar_modelo_llm():
|
71 |
+
# Intenta descargar el modelo si no existe localmente
|
72 |
+
model_path_local = os.path.join("./modelo", MODEL_FILENAME)
|
73 |
+
if not os.path.exists("./modelo"):
|
74 |
+
os.makedirs("./modelo", exist_ok=True)
|
75 |
+
if not os.path.exists(model_path_local):
|
76 |
+
from huggingface_hub import hf_hub_download
|
77 |
+
print(f"Descargando modelo desde Hugging Face Hub: {MODEL_PATH}")
|
78 |
+
try:
|
79 |
+
hf_hub_download(repo_id=MODEL_PATH, filename=MODEL_FILENAME, local_dir="./modelo")
|
80 |
+
print(f"Modelo descargado exitosamente en: {model_path_local}")
|
81 |
+
except Exception as e:
|
82 |
+
print(f"Error al descargar el modelo: {e}")
|
83 |
+
return None
|
84 |
+
try:
|
85 |
+
llm = Llama(model_path=model_path_local)
|
86 |
+
return llm
|
87 |
+
except Exception as e:
|
88 |
+
print(f"Error al cargar el modelo LLM: {e}")
|
89 |
+
return None
|
90 |
+
|
91 |
+
# --- Funci贸n Principal del Chatbot ---
|
92 |
+
def chatbot(pregunta, historial=[]):
|
93 |
+
global llm_instance, vector_store, embedding_tokenizer, embedding_model
|
94 |
+
|
95 |
+
if llm_instance is None or vector_store is None or embedding_tokenizer is None or embedding_model is None:
|
96 |
+
return "Cargando recursos. Por favor, espera...", historial # Indica que a煤n se est谩 cargando
|
97 |
+
|
98 |
+
contexto = buscar_en_conocimiento(pregunta, vector_store, embedding_tokenizer, embedding_model)
|
99 |
+
historial_str = "\n".join([f"{msg['usuario']}: {msg['contenido']}\n{msg['bot']}: {msg['contenido']}" for msg in historial])
|
100 |
+
|
101 |
+
prompt = f"Basado en la siguiente informaci贸n: '{contexto}' y la conversaci贸n anterior: '{historial_str}', responde a la pregunta: '{pregunta}'"
|
102 |
+
|
103 |
+
try:
|
104 |
+
output = llm_instance(
|
105 |
+
prompt,
|
106 |
+
max_tokens=256,
|
107 |
+
temperature=0.7,
|
108 |
+
stop=["</s>"],
|
109 |
+
echo=False
|
110 |
+
)
|
111 |
+
respuesta = output['choices'][0]['text'].strip()
|
112 |
+
except Exception as e:
|
113 |
+
respuesta = f"Error al generar la respuesta: {e}"
|
114 |
+
|
115 |
+
historial.append({"usuario": "Usuario", "contenido": pregunta})
|
116 |
+
historial.append({"bot": "Agente", "contenido": respuesta})
|
117 |
+
return respuesta, historial
|
118 |
+
|
119 |
+
# --- Inicializaci贸n Global ---
|
120 |
+
llm_instance = None
|
121 |
+
vector_store = None
|
122 |
+
embedding_tokenizer = None
|
123 |
+
embedding_model = None
|
124 |
+
|
125 |
+
def inicializar():
|
126 |
+
global llm_instance, vector_store, embedding_tokenizer, embedding_model
|
127 |
+
print("Inicializando recursos...")
|
128 |
+
llm_instance = cargar_modelo_llm()
|
129 |
+
if llm_instance:
|
130 |
+
print("Modelo LLM cargado.")
|
131 |
+
textos_pdf = cargar_documentos(KNOWLEDGE_BASE_PATH)
|
132 |
+
fragmentos = dividir_en_fragmentos(textos_pdf)
|
133 |
+
embedding_tokenizer, embedding_model = cargar_modelo_embeddings()
|
134 |
+
print("Modelo de embeddings cargado.")
|
135 |
+
vector_store = crear_vector_store(fragmentos, embedding_tokenizer, embedding_model, VECTOR_STORE_NAME)
|
136 |
+
print("Base de conocimientos vectorizada.")
|
137 |
+
else:
|
138 |
+
print("Error al inicializar el modelo LLM. La aplicaci贸n no funcionar谩 correctamente.")
|
139 |
+
|
140 |
+
# --- Interfaz de Gradio ---
|
141 |
+
if __name__ == "__main__":
|
142 |
+
inicializar()
|
143 |
+
|
144 |
+
if llm_instance and vector_store and embedding_tokenizer and embedding_model:
|
145 |
+
interface = gr.ChatInterface(
|
146 |
+
fn=chatbot,
|
147 |
+
title="Chatbot con Base de Conocimiento",
|
148 |
+
description="Preg煤ntame cualquier cosa basada en los documentos PDF cargados.",
|
149 |
+
examples=["驴De qu茅 trata el documento principal?", "驴Cu谩l es la idea clave del segundo archivo?"]
|
150 |
+
)
|
151 |
+
interface.launch()
|
152 |
+
else:
|
153 |
+
print("No se pudieron cargar todos los recursos necesarios. La interfaz de Gradio no se iniciar谩.")
|