John6666 commited on
Commit
80aa93e
·
verified ·
1 Parent(s): 6b109d1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -7
app.py CHANGED
@@ -32,6 +32,15 @@ def create_repo(repo_id: str, repo_type: str, private: bool, hf_token: str):
32
  if repo_type == "space": api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True, space_sdk="gradio", token=hf_token)
33
  else: api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
34
 
 
 
 
 
 
 
 
 
 
35
  def extract_src_reponame(source_repo: str):
36
  try:
37
  if is_valid_reponame(source_repo): target = ""
@@ -90,12 +99,17 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, re
90
  create_repo(dst_repo, repo_type, private, hf_token)
91
  for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
92
  if target and target not in path: continue
 
 
 
 
 
93
  file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
94
  if not Path(file).exists(): continue
95
  if Path(file).is_dir(): # unused for now
96
- api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=f"{subfolder}/{path}" if subfolder else path, repo_type=repo_type, token=hf_token)
97
  elif Path(file).is_file():
98
- api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=f"{subfolder}/{path}" if subfolder else path, repo_type=repo_type, token=hf_token)
99
  if Path(file).exists(): Path(file).unlink()
100
  if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
101
  elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
@@ -141,7 +155,7 @@ def is_same_file_hf(src_repo: str, src_path: str, src_type: str, dst_repo: str,
141
  if src_info[0].size == dst_info[0].size and src_info[0].lfs.sha256 == dst_info[0].lfs.sha256: return True
142
  return False
143
 
144
- def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
145
  hf_token = oauth_token.token
146
  api = HfApi(token=hf_token)
147
  try:
@@ -160,8 +174,8 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
160
  source_repos = parse_repos(source_repos_str)
161
  for source_repo in source_repos:
162
  source_repo, target = extract_src_reponame(source_repo)
163
-
164
  subfolder = subfolder_prefix + "/" + source_repo if subfolder_prefix else source_repo
 
165
 
166
  temp_dir = tempfile.mkdtemp()
167
  create_repo(dst_repo, repo_type, private, hf_token)
@@ -170,6 +184,7 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
170
  path_in_repo = f"{subfolder}/{path}" if subfolder else path
171
  if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
172
  print(f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
 
173
  continue
174
  file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
175
  if not Path(file).exists(): continue
@@ -270,6 +285,7 @@ with gr.Blocks(css=css) as demo:
270
  with gr.Row():
271
  submit_button = gr.Button("Submit", variant="primary")
272
  clear_button = gr.Button("Clear", variant="secondary")
 
273
  with gr.Column():
274
  output_md = gr.Markdown(label="output")
275
  output_image = gr.Image(show_label=False)
@@ -289,9 +305,11 @@ with gr.Blocks(css=css) as demo:
289
  with gr.Row():
290
  m2o_is_private = gr.Checkbox(label="Make new repo private?", value=True)
291
  m2o_is_overwrite = gr.Checkbox(label="Overwrite existing repo?", value=True)
 
292
  with gr.Row():
293
  m2o_submit_button = gr.Button("Submit", variant="primary")
294
  m2o_clear_button = gr.Button("Clear", variant="secondary")
 
295
  with gr.Column():
296
  m2o_output_md = gr.Markdown(label="output")
297
  m2o_output_image = gr.Image(show_label=False)
@@ -318,20 +336,25 @@ with gr.Blocks(css=css) as demo:
318
  with gr.Row():
319
  m2m_submit_button = gr.Button("Submit", variant="primary")
320
  m2m_clear_button = gr.Button("Clear", variant="secondary")
 
321
  with gr.Column():
322
  m2m_output_md = gr.Markdown(label="output")
323
  m2m_output_image = gr.Image(show_label=False)
324
  demo.load(fn=swap_visibilty, outputs=main_ui)
325
- submit_button.click(duplicate, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], [output_md, output_image])
 
326
  clear_button.click(lambda: ("", HF_REPO, "model", True, True, True, True), None, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], queue=False)
327
  m2o_search.submit(add_repo_text, [m2o_search, m2o_source_repos], [m2o_source_repos], queue=False)
328
- m2o_submit_button.click(duplicate_m2o, [m2o_source_repos, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite], [m2o_output_md, m2o_output_image])
 
329
  m2o_clear_button.click(lambda: ("", HF_REPO, "model", True, True, ""), None,
330
  [m2o_search, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_source_repos], queue=False)
331
  m2m_search.submit(add_repo_text, [m2m_search, m2m_source_repos], [m2m_source_repos], queue=False)
332
- m2m_submit_button.click(duplicate_m2m, [m2m_source_repos, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_prefix, m2m_suffix],
333
  [m2m_output_md, m2m_output_image])
 
334
  m2m_clear_button.click(lambda: ("", HF_USER, "model", True, False, True, "", HF_REPO_PREFIX, HF_REPO_SUFFIX), None,
335
  [m2m_search, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_source_repos, m2m_prefix, m2m_suffix], queue=False)
 
336
 
337
  demo.queue().launch()
 
32
  if repo_type == "space": api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True, space_sdk="gradio", token=hf_token)
33
  else: api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
34
 
35
+ def remove_dup_pathstr(path: str) -> str:
36
+ try:
37
+ if not path or "/" not in path: return path
38
+ first = path.split("/")[0]
39
+ path = re.sub(f"^{first}/{first}", first, path, count=1)
40
+ return path
41
+ except Exception:
42
+ return path
43
+
44
  def extract_src_reponame(source_repo: str):
45
  try:
46
  if is_valid_reponame(source_repo): target = ""
 
99
  create_repo(dst_repo, repo_type, private, hf_token)
100
  for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
101
  if target and target not in path: continue
102
+ path_in_repo = f"{subfolder}/{path}" if subfolder else path
103
+ if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
104
+ print(f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
105
+ progress(0, desc=f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
106
+ continue
107
  file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
108
  if not Path(file).exists(): continue
109
  if Path(file).is_dir(): # unused for now
110
+ api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
111
  elif Path(file).is_file():
112
+ api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
113
  if Path(file).exists(): Path(file).unlink()
114
  if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
115
  elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
 
155
  if src_info[0].size == dst_info[0].size and src_info[0].lfs.sha256 == dst_info[0].lfs.sha256: return True
156
  return False
157
 
158
+ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, avoid_dup_dir, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
159
  hf_token = oauth_token.token
160
  api = HfApi(token=hf_token)
161
  try:
 
174
  source_repos = parse_repos(source_repos_str)
175
  for source_repo in source_repos:
176
  source_repo, target = extract_src_reponame(source_repo)
 
177
  subfolder = subfolder_prefix + "/" + source_repo if subfolder_prefix else source_repo
178
+ if avoid_dup_dir: subfolder = remove_dup_pathstr(subfolder)
179
 
180
  temp_dir = tempfile.mkdtemp()
181
  create_repo(dst_repo, repo_type, private, hf_token)
 
184
  path_in_repo = f"{subfolder}/{path}" if subfolder else path
185
  if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
186
  print(f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
187
+ progress(0, desc=f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
188
  continue
189
  file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
190
  if not Path(file).exists(): continue
 
285
  with gr.Row():
286
  submit_button = gr.Button("Submit", variant="primary")
287
  clear_button = gr.Button("Clear", variant="secondary")
288
+ stop_button = gr.Button("Stop", variant="stop")
289
  with gr.Column():
290
  output_md = gr.Markdown(label="output")
291
  output_image = gr.Image(show_label=False)
 
305
  with gr.Row():
306
  m2o_is_private = gr.Checkbox(label="Make new repo private?", value=True)
307
  m2o_is_overwrite = gr.Checkbox(label="Overwrite existing repo?", value=True)
308
+ m2o_is_nodupdir = gr.Checkbox(label="Avoid creating duplicated directory?", value=True)
309
  with gr.Row():
310
  m2o_submit_button = gr.Button("Submit", variant="primary")
311
  m2o_clear_button = gr.Button("Clear", variant="secondary")
312
+ m2o_stop_button = gr.Button("Stop", variant="stop")
313
  with gr.Column():
314
  m2o_output_md = gr.Markdown(label="output")
315
  m2o_output_image = gr.Image(show_label=False)
 
336
  with gr.Row():
337
  m2m_submit_button = gr.Button("Submit", variant="primary")
338
  m2m_clear_button = gr.Button("Clear", variant="secondary")
339
+ m2m_stop_button = gr.Button("Stop", variant="stop")
340
  with gr.Column():
341
  m2m_output_md = gr.Markdown(label="output")
342
  m2m_output_image = gr.Image(show_label=False)
343
  demo.load(fn=swap_visibilty, outputs=main_ui)
344
+ submit_event = submit_button.click(duplicate, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], [output_md, output_image])
345
+ stop_button.click(fn=None, inputs=None, outputs=None, cancels=[submit_event])
346
  clear_button.click(lambda: ("", HF_REPO, "model", True, True, True, True), None, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], queue=False)
347
  m2o_search.submit(add_repo_text, [m2o_search, m2o_source_repos], [m2o_source_repos], queue=False)
348
+ m2o_submit_event = m2o_submit_button.click(duplicate_m2o, [m2o_source_repos, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_is_nodupdir], [m2o_output_md, m2o_output_image])
349
+ m2o_stop_button.click(fn=None, inputs=None, outputs=None, cancels=[m2o_submit_event])
350
  m2o_clear_button.click(lambda: ("", HF_REPO, "model", True, True, ""), None,
351
  [m2o_search, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_source_repos], queue=False)
352
  m2m_search.submit(add_repo_text, [m2m_search, m2m_source_repos], [m2m_source_repos], queue=False)
353
+ m2m_submit_event = m2m_submit_button.click(duplicate_m2m, [m2m_source_repos, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_prefix, m2m_suffix],
354
  [m2m_output_md, m2m_output_image])
355
+ m2m_stop_button.click(fn=None, inputs=None, outputs=None, cancels=[m2m_submit_event])
356
  m2m_clear_button.click(lambda: ("", HF_USER, "model", True, False, True, "", HF_REPO_PREFIX, HF_REPO_SUFFIX), None,
357
  [m2m_search, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_source_repos, m2m_prefix, m2m_suffix], queue=False)
358
+
359
 
360
  demo.queue().launch()