chatx / web_engine.py
vlasdadsda's picture
Update web_engine.py
a007bfc verified
import aiohttp
import asyncio
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs
# Асинхронный запрос к странице
async def fetch(session, url):
try:
async with session.get(url, timeout=10) as response:
return await response.text()
except Exception as e:
return ""
# Асинхронное получение текста страницы
async def get_page_text(session, url):
html = await fetch(session, url)
if not html:
return "Текст не найден"
soup = BeautifulSoup(html, 'html.parser')
body = soup.find('body')
if body:
return body.get_text(separator='\n', strip=True)
return "Текст не найден"
# Асинхронный поиск информации
async def search_info(prompt):
query = prompt.replace(' ', '+')
search_url = f"https://www.google.com/search?q={query}"
async with aiohttp.ClientSession() as session:
html = await fetch(session, search_url)
if not html:
return []
soup = BeautifulSoup(html, 'html.parser')
links = []
for item in soup.find_all('h3'):
parent = item.find_parent('a')
if parent and 'href' in parent.attrs:
link = parent['href']
parsed_url = urlparse(link)
if parsed_url.path == '/url':
query_params = parse_qs(parsed_url.query)
if 'q' in query_params:
links.append(query_params['q'][0])
return links
# Основной асинхронный цикл
async def main():
prompt = input("Введите запрос для поиска: ")
results = await search_info(prompt)
if not results:
print("Ничего не найдено.")
return
async with aiohttp.ClientSession() as session:
tasks = [get_page_text(session, link) for link in results[:5]] # Ограничение до 5 ссылок для скорости
texts = await asyncio.gather(*tasks)
for link, text in zip(results, texts):
print(f"Ссылка: {link}")
print(f"Текст: {text}\n")
# Запуск программы
if __name__ == "__main__":
asyncio.run(main())