Spaces:
Sleeping
Sleeping
import aiohttp | |
import asyncio | |
from bs4 import BeautifulSoup | |
from urllib.parse import urlparse, parse_qs | |
# Асинхронный запрос к странице | |
async def fetch(session, url): | |
try: | |
async with session.get(url, timeout=10) as response: | |
return await response.text() | |
except Exception as e: | |
return "" | |
# Асинхронное получение текста страницы | |
async def get_page_text(session, url): | |
html = await fetch(session, url) | |
if not html: | |
return "Текст не найден" | |
soup = BeautifulSoup(html, 'html.parser') | |
body = soup.find('body') | |
if body: | |
return body.get_text(separator='\n', strip=True) | |
return "Текст не найден" | |
# Асинхронный поиск информации | |
async def search_info(prompt): | |
query = prompt.replace(' ', '+') | |
search_url = f"https://www.google.com/search?q={query}" | |
async with aiohttp.ClientSession() as session: | |
html = await fetch(session, search_url) | |
if not html: | |
return [] | |
soup = BeautifulSoup(html, 'html.parser') | |
links = [] | |
for item in soup.find_all('h3'): | |
parent = item.find_parent('a') | |
if parent and 'href' in parent.attrs: | |
link = parent['href'] | |
parsed_url = urlparse(link) | |
if parsed_url.path == '/url': | |
query_params = parse_qs(parsed_url.query) | |
if 'q' in query_params: | |
links.append(query_params['q'][0]) | |
return links | |
# Основной асинхронный цикл | |
async def main(): | |
prompt = input("Введите запрос для поиска: ") | |
results = await search_info(prompt) | |
if not results: | |
print("Ничего не найдено.") | |
return | |
async with aiohttp.ClientSession() as session: | |
tasks = [get_page_text(session, link) for link in results[:5]] # Ограничение до 5 ссылок для скорости | |
texts = await asyncio.gather(*tasks) | |
for link, text in zip(results, texts): | |
print(f"Ссылка: {link}") | |
print(f"Текст: {text}\n") | |
# Запуск программы | |
if __name__ == "__main__": | |
asyncio.run(main()) | |