dfa32412 commited on
Commit
c0b8476
·
verified ·
1 Parent(s): 3187d57

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -0
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from playwright.sync_api import sync_playwright
2
+
3
+ # 用于存储捕获到的请求头信息
4
+ all_request_headers_info = []
5
+
6
+
7
+ def handle_request(request):
8
+ """
9
+ 捕获每个请求的URL, 方法和头部信息
10
+ """
11
+ # print(f"Intercepted request to: {request.url}") # 调试时可以取消注释
12
+ all_request_headers_info.append({
13
+ "url": request.url,
14
+ "method": request.method,
15
+ "headers": request.headers # request.headers 是一个字典
16
+ })
17
+
18
+
19
+ def main():
20
+ with sync_playwright() as p:
21
+ # 启动浏览器,可以是 chromium, firefox, or webkit
22
+ # headless=False 可以看到浏览器操作,True则为无头模式
23
+ browser = p.chromium.launch(headless=False,
24
+ args=[
25
+ '--no-sandbox',
26
+ '--disable-setuid-sandbox',
27
+ '--disable-dev-shm-usage' # 有时也需要这个,但 --shm-size 更好
28
+ ])
29
+
30
+ # 创建一个新的浏览器上下文
31
+ # 可以在这里设置 user_agent, viewport, etc.
32
+ context = browser.new_context(
33
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0",
34
+ )
35
+
36
+ # 在上下文中创建一个新页面
37
+ page = context.new_page()
38
+
39
+ # 注册请求拦截器,这必须在导航之前完成
40
+ # 'request' 事件会在每个HTTP请求发起时触发
41
+ page.on("request", handle_request)
42
+
43
+ print(f"Navigating to https://grok.com/ ...")
44
+ try:
45
+ # 访问目标网站,设置一个合理的超时时间(例如60秒)
46
+ page.goto("https://grok.com/", timeout=60000)
47
+ print("Page loaded. Waiting for 10 seconds for dynamic content or further requests...")
48
+
49
+ # 检查是否仍然被 Cloudflare 阻止 (例如,查找特定的标题或元素)
50
+ title = page.title()
51
+ print(f"Page title: {title}")
52
+
53
+ if "请稍候…" in page.content() or "Just a moment..." in page.content() or "Cloudflare" in title or "Checking your browser" in title:
54
+ print("Still on a Cloudflare challenge page. Waiting longer or trying interaction...")
55
+ # 你可能需要在这里添加更长的等待或模拟用户交互
56
+ # 例如,等待特定的元素出现,表明挑战已通过
57
+ try:
58
+ page.wait_for_selector("body:not(:has-text('请稍候…'))", timeout=60000)
59
+ print("Cloudflare challenge likely passed.")
60
+ title = page.title()
61
+ print(f"New page title: {title}")
62
+ page.screenshot(path="cf_passed.png")
63
+ except Exception as e:
64
+ print(f"Failed to pass Cloudflare challenge after extended wait: {e}")
65
+ page.screenshot(path="cf_failed.png")
66
+ else:
67
+ print("Successfully navigated to the page.")
68
+ page.screenshot(path="cf_success.png")
69
+
70
+
71
+ page.wait_for_timeout(10000)
72
+
73
+
74
+ try:
75
+ textarea_locator = page.get_by_label("向Grok提任何问题")
76
+ textarea_locator.fill("你好")
77
+ print("Successfully entered '你好' into the textarea.")
78
+ except Exception as e:
79
+ print(f"Could not find or fill the textarea with aria-label '向Grok提任何问题'. Error: {e}")
80
+ browser.close()
81
+ return
82
+
83
+ # 2. 查找 aria-label 为“提交”的 button 并点击
84
+ # 使用 get_by_role('button', name='...') 是 Playwright 推荐的方式来查找具有特定可访问名称的按钮
85
+ try:
86
+ submit_button_locator = page.get_by_role("button", name="提交")
87
+ submit_button_locator.click()
88
+ print("Successfully clicked the '提交' button.")
89
+ except Exception as e:
90
+ print(f"Could not find or click the button with aria-label '提交'. Error: {e}")
91
+ browser.close()
92
+ return
93
+
94
+ # 等待10秒
95
+ # Playwright 的 page.wait_for_timeout() 是首选,因为它与Playwright的事件循环集成
96
+ # page.wait_for_timeout(10000)
97
+ # 或者使用 time.sleep(10) 也可以,但在Playwright脚本中前者更佳
98
+
99
+ print("\n--- Cookies ---")
100
+ # 获取当前上下文中的所有cookies
101
+ cookies = context.cookies()
102
+ if cookies:
103
+ for cookie in cookies:
104
+ print(
105
+ f"Name: {cookie['name']}, Value: {cookie['value']}, Domain: {cookie['domain']}, Path: {cookie['path']}")
106
+ else:
107
+ print("No cookies found.")
108
+
109
+ print("\n--- Request Headers (collected during the session) ---")
110
+ if all_request_headers_info:
111
+ # 打印捕获到的每个请求的头部信息
112
+ # 注意:这里会包含所有资源的请求(HTML, CSS, JS, XHR, 图片等)
113
+ for i, req_info in enumerate(all_request_headers_info):
114
+ if req_info['url'] == 'https://grok.com/rest/app-chat/conversations/new':
115
+ datas = {
116
+ 'x-xai-request-id': req_info['headers']['x-xai-request-id'],
117
+ 'x-statsig-id':req_info['headers']['x-statsig-id'],
118
+ 'user-agent': req_info['headers']['user-agent'],
119
+ }
120
+ print(datas)
121
+ else:
122
+ print("No requests were intercepted (this is unlikely if the page loaded).")
123
+
124
+ except Exception as e:
125
+ print(f"An error occurred: {e}")
126
+ finally:
127
+ # 确保浏览器关闭
128
+ print("\nClosing browser...")
129
+ browser.close()
130
+
131
+
132
+ if __name__ == "__main__":
133
+ main()