|
from dotenv import load_dotenv, find_dotenv |
|
import os |
|
import bs4 |
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.document_loaders import WebBaseLoader |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain.prompts import ChatPromptTemplate |
|
|
|
from langdetect import detect |
|
from deep_translator import GoogleTranslator |
|
|
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
from langchain.agents import initialize_agent, AgentType |
|
from langchain_openai import ChatOpenAI |
|
|
|
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper, DuckDuckGoSearchAPIWrapper |
|
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun, DuckDuckGoSearchRun |
|
|
|
|
|
load_dotenv(find_dotenv()) |
|
|
|
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)) |
|
|
|
arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)) |
|
|
|
duckduckgo_search = DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(region="in-en", time="y", max_results=2)) |
|
|
|
tools = [wiki, arxiv, duckduckgo_search] |
|
|
|
def translate_to_english(text): |
|
try: |
|
detected_lang = detect(text) |
|
if detected_lang == "en": |
|
return text, "en" |
|
translated_text = GoogleTranslator(source=detected_lang, target="en").translate(text) |
|
return translated_text, detected_lang |
|
except Exception: |
|
return text, "unknown" |
|
|
|
def translate_back(text, target_lang): |
|
try: |
|
if target_lang == "en": |
|
return text |
|
return GoogleTranslator(source="en", target=target_lang).translate(text) |
|
except Exception: |
|
return text |
|
|
|
def load_llm(): |
|
key = os.environ.get("GEMINI_API_KEY") |
|
if not key: |
|
raise ValueError("❌ GEMINI_API_KEY chưa được thiết lập trong biến môi trường.") |
|
return ChatGoogleGenerativeAI( |
|
model="gemini-1.5-flash", |
|
temperature=0.7, |
|
google_api_key=key |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format_docs(docs): |
|
return "\n\n".join(doc.page_content for doc in docs) |
|
|
|
|
|
|
|
def get_conversational_agent(): |
|
|
|
llm = load_llm() |
|
|
|
return initialize_agent( |
|
tools=tools, |
|
llm=llm, |
|
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, |
|
verbose=False, |
|
return_intermediate_steps=False, |
|
max_iterations=5, |
|
handle_parsing_errors=True |
|
|
|
) |
|
|
|
def ask_gemini_to_evaluate(question, answer): |
|
try: |
|
agent = get_conversational_agent() |
|
|
|
prompt = f""" |
|
Câu hỏi: {question} |
|
Câu trả lời từ AI: {answer} |
|
Hãy trả lời duy nhất một từ: "Hợp lý" nếu câu trả lời tốt, hoặc "Không hợp lý" nếu câu trả lời sai hoặc thiếu thông tin. |
|
""" |
|
|
|
response = agent.invoke(prompt) |
|
|
|
if response["output"].strip() == "Hợp lý": |
|
return True |
|
return False |
|
except: |
|
return True |
|
class AIAgent: |
|
def __init__(self): |
|
self.loader = WebBaseLoader( |
|
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), |
|
bs_kwargs=dict( |
|
parse_only=bs4.SoupStrainer( |
|
class_=("post-content", "post-title", "post-header") |
|
) |
|
), |
|
) |
|
|
|
self.text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( |
|
chunk_size=300, |
|
chunk_overlap=50 |
|
) |
|
|
|
self.embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
self.agent = get_conversational_agent() |
|
|
|
self.prompt = ChatPromptTemplate.from_template( |
|
"Answer the question based only on the following context:\n{context}\n\nQuestion: {question}" |
|
) |
|
|
|
def ai_agent(self, question, answer): |
|
try: |
|
if ask_gemini_to_evaluate(question, answer): |
|
return answer |
|
|
|
translated_question, original_lang = translate_to_english(question) |
|
|
|
answer = self.agent.invoke(translated_question) |
|
|
|
answer = translate_back(answer['output'], original_lang) |
|
|
|
return answer |
|
except Exception: |
|
return f"Server: {answer}" |
|
|
|
|