freddyaboulton HF staff commited on
Commit
f4c39f1
·
1 Parent(s): e6306f4

Take screenshots

Browse files
Files changed (4) hide show
  1. app.py +12 -5
  2. processing.py +53 -12
  3. requirements.txt +4 -1
  4. screenshot.py +15 -0
app.py CHANGED
@@ -8,18 +8,25 @@ import datetime
8
 
9
  app = FastAPI()
10
 
 
11
  @app.get("/")
12
  def index():
13
- return HTMLResponse("""
 
14
  <p>Backend for gradio theme gallery.
15
  <a href="https://huggingface.co/spaces/freddyaboulton/theme-gallery-static">https://huggingface.co/spaces/freddyaboulton/theme-gallery-stati</a>
16
- </p>""")
 
 
17
 
18
  scheduler = BackgroundScheduler()
19
- scheduler.add_job(func=process_spaces, trigger="interval", seconds=360,
20
- next_run_time=datetime.datetime.now())
 
 
 
 
21
  scheduler.start()
22
 
23
  if __name__ == "__main__":
24
  uvicorn.run(app, port=7860, host="0.0.0.0")
25
-
 
8
 
9
  app = FastAPI()
10
 
11
+
12
  @app.get("/")
13
  def index():
14
+ return HTMLResponse(
15
+ """
16
  <p>Backend for gradio theme gallery.
17
  <a href="https://huggingface.co/spaces/freddyaboulton/theme-gallery-static">https://huggingface.co/spaces/freddyaboulton/theme-gallery-stati</a>
18
+ </p>"""
19
+ )
20
+
21
 
22
  scheduler = BackgroundScheduler()
23
+ scheduler.add_job(
24
+ func=process_spaces,
25
+ trigger="interval",
26
+ seconds=1200,
27
+ next_run_time=datetime.datetime.now(),
28
+ )
29
  scheduler.start()
30
 
31
  if __name__ == "__main__":
32
  uvicorn.run(app, port=7860, host="0.0.0.0")
 
processing.py CHANGED
@@ -10,6 +10,10 @@ import json
10
  import datetime
11
  import tqdm
12
  import requests
 
 
 
 
13
 
14
  class SpaceData(TypedDict):
15
  id: str
@@ -18,20 +22,32 @@ class SpaceData(TypedDict):
18
  lastModified: str
19
  status: str
20
 
 
21
  repo = huggingface_hub.Repository(
22
  local_dir="data",
23
  repo_type="dataset",
24
  clone_from="freddyaboulton/gradio-theme-subdomains",
25
- token=os.getenv("HF_TOKEN")
26
  )
27
  repo.git_pull()
28
 
 
 
 
 
 
 
 
 
 
29
 
30
  api = huggingface_hub.HfApi(token=os.getenv("HF_TOKEN"))
31
 
 
32
  def get_theme_preview_spaces() -> List[SpaceInfo]:
33
  return list(iter(api.list_spaces(filter="gradio-theme")))
34
 
 
35
  def get_info(space_name: SpaceInfo) -> SpaceData | None:
36
  if not space_name.id:
37
  print(f"no space_name for {space_name}")
@@ -44,17 +60,42 @@ def get_info(space_name: SpaceInfo) -> SpaceData | None:
44
  if subdomain is None:
45
  print(f"no subdomain for {space_info.id}")
46
  return None
47
-
48
- status = space_info.runtime['stage']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  if status not in ["SLEEPING", "RUNNING", "RUNNING_BUILDING", "BUILDING"]:
50
  print(f"Space not running, building, or sleeping {space_info.id}")
51
  elif status == "SLEEPING":
52
  requests.get(f"https://huggingface.co/spaces/{space_info.id}")
53
-
54
- return {"id": space_info.id, "likes": space_info.likes,
55
- "subdomain": f"https://{space_info.subdomain}.hf.space",
56
- "lastModified": space_info.lastModified,
57
- "status": status} # type: ignore
 
 
 
58
 
59
 
60
  def get_all_info(spaces: List[SpaceInfo]) -> List[SpaceData]:
@@ -68,7 +109,7 @@ def process_spaces():
68
 
69
  all_info = get_all_info(theme_spaces)
70
 
71
- json.dump(all_info, open("data/subdomains.json", "w"))
72
- repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
73
-
74
-
 
10
  import datetime
11
  import tqdm
12
  import requests
13
+ from pathlib import Path
14
+ from screenshot import get_screen_shot
15
+ import boto3
16
+
17
 
18
  class SpaceData(TypedDict):
19
  id: str
 
22
  lastModified: str
23
  status: str
24
 
25
+
26
  repo = huggingface_hub.Repository(
27
  local_dir="data",
28
  repo_type="dataset",
29
  clone_from="freddyaboulton/gradio-theme-subdomains",
30
+ token=os.getenv("HF_TOKEN"),
31
  )
32
  repo.git_pull()
33
 
34
+ screen_shot_dir = Path("data") / "images"
35
+ screen_shot_dir.mkdir(exist_ok=True, parents=True)
36
+
37
+ s3_client = boto3.client(
38
+ "s3",
39
+ aws_access_key_id=os.getenv("AWS_ACCESS_KEY"),
40
+ aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
41
+ )
42
+
43
 
44
  api = huggingface_hub.HfApi(token=os.getenv("HF_TOKEN"))
45
 
46
+
47
  def get_theme_preview_spaces() -> List[SpaceInfo]:
48
  return list(iter(api.list_spaces(filter="gradio-theme")))
49
 
50
+
51
  def get_info(space_name: SpaceInfo) -> SpaceData | None:
52
  if not space_name.id:
53
  print(f"no space_name for {space_name}")
 
60
  if subdomain is None:
61
  print(f"no subdomain for {space_info.id}")
62
  return None
63
+
64
+ status = space_info.runtime["stage"]
65
+ img_id = space_info.id.replace("/", "_")
66
+ light_file = str(screen_shot_dir / Path(img_id + "_light.jpg"))
67
+ dark_file = str(screen_shot_dir / Path(img_id + "_dark.jpg"))
68
+ if status == "RUNNING":
69
+ get_screen_shot(
70
+ f"https://{space_info.subdomain}.hf.space?__theme=light", 10, light_file
71
+ )
72
+ get_screen_shot(
73
+ f"https://{space_info.subdomain}.hf.space?__theme=dark", 10, dark_file
74
+ )
75
+ s3_client.upload_file(
76
+ light_file,
77
+ "gradio-theme-screenshots",
78
+ img_id + "_light.jpg",
79
+ ExtraArgs={"ContentType": "image/jpg"},
80
+ )
81
+ s3_client.upload_file(
82
+ dark_file,
83
+ "gradio-theme-screenshots",
84
+ img_id + "_dark.jpg",
85
+ ExtraArgs={"ContentType": "image/jpg"},
86
+ )
87
  if status not in ["SLEEPING", "RUNNING", "RUNNING_BUILDING", "BUILDING"]:
88
  print(f"Space not running, building, or sleeping {space_info.id}")
89
  elif status == "SLEEPING":
90
  requests.get(f"https://huggingface.co/spaces/{space_info.id}")
91
+ return {
92
+ "id": space_info.id,
93
+ "likes": space_info.likes,
94
+ "subdomain": f"https://{space_info.subdomain}.hf.space",
95
+ "lastModified": space_info.lastModified,
96
+ "screenshot_id": img_id,
97
+ "status": status,
98
+ } # type: ignore
99
 
100
 
101
  def get_all_info(spaces: List[SpaceInfo]) -> List[SpaceData]:
 
109
 
110
  all_info = get_all_info(theme_spaces)
111
 
112
+ json.dump(all_info, open("data/val_subdomains.json", "w"))
113
+ repo.push_to_hub(
114
+ blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}"
115
+ )
requirements.txt CHANGED
@@ -1 +1,4 @@
1
- apscheduler
 
 
 
 
1
+ apscheduler
2
+ selenium
3
+ webdriver-manager
4
+ boto3
screenshot.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from selenium import webdriver
2
+ from selenium.webdriver import ChromeOptions
3
+ from webdriver_manager.chrome import ChromeDriverManager
4
+ import time
5
+
6
+ options = ChromeOptions()
7
+ options.add_argument("--headless=new")
8
+ ChromeDriverManager().install()
9
+
10
+
11
+ def get_screen_shot(url: str, load_time: int, path: str):
12
+ driver = webdriver.Chrome(options=options)
13
+ driver.get(url)
14
+ time.sleep(load_time)
15
+ driver.save_screenshot(path)