Spaces:
Running
Running
Upload 2 files
Browse files- app.py +32 -13
- requirements.txt +2 -0
app.py
CHANGED
@@ -21,6 +21,9 @@ HF_USER = os.environ.get("HF_USER") if os.environ.get("HF_USER") else "" # set y
|
|
21 |
REGEX_HF_REPO = r'^[\w_\-\.]+/[\w_\-\.]+$'
|
22 |
REGEX_HF_PATH = r'^[\w_\-\.]+/[\w_\-\.]+(/?:.+)?$'
|
23 |
|
|
|
|
|
|
|
24 |
def is_valid_reponame(repo_id: str):
|
25 |
return re.fullmatch(REGEX_HF_REPO, repo_id)
|
26 |
|
@@ -38,9 +41,10 @@ def extract_src_reponame(source_repo: str):
|
|
38 |
else:
|
39 |
source_repo, target = re.findall(r'^(?:http.+\.co/)?(?:datasets)?(?:spaces)?([\w_\-\.]+/[\w_\-\.]+)/?(?:blob/main/)?(?:resolve/main/)?(.+)?$', source_repo)[0]
|
40 |
target = urllib.parse.unquote(target.removesuffix("/"))
|
|
|
41 |
return source_repo, target
|
42 |
except Exception as e:
|
43 |
-
|
44 |
return source_repo, ""
|
45 |
|
46 |
def extract_dst_reponame(dst_repo: str):
|
@@ -49,9 +53,10 @@ def extract_dst_reponame(dst_repo: str):
|
|
49 |
else:
|
50 |
dst_repo, subfolder = re.findall(r'^([\w_\-\.]+/[\w_\-\.]+)/?(.+)?$', dst_repo)[0]
|
51 |
subfolder = subfolder.removesuffix("/")
|
|
|
52 |
return dst_repo, subfolder
|
53 |
except Exception as e:
|
54 |
-
|
55 |
return dst_repo, ""
|
56 |
|
57 |
def remove_repo_tags(repo_id: str, tags: list[str], repo_type: str, hf_token: str):
|
@@ -76,12 +81,12 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, re
|
|
76 |
except Exception as e:
|
77 |
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
78 |
|
79 |
-
if not is_valid_path(dst_repo): raise gr.Error(f"Invalid dst_repo: {dst_repo}")
|
80 |
|
81 |
try:
|
82 |
source_repo, target = extract_src_reponame(source_repo)
|
83 |
dst_repo, subfolder = extract_dst_reponame(dst_repo)
|
84 |
-
if auto_dir: subfolder = source_repo
|
85 |
|
86 |
if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token): raise gr.Error(f"Repo already exists {dst_repo}")
|
87 |
|
@@ -90,12 +95,18 @@ def duplicate(source_repo, dst_repo, repo_type, private, overwrite, auto_dir, re
|
|
90 |
create_repo(dst_repo, repo_type, private, hf_token)
|
91 |
for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
|
92 |
if target and target not in path: continue
|
|
|
|
|
|
|
|
|
|
|
93 |
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
|
|
94 |
if not Path(file).exists(): continue
|
95 |
if Path(file).is_dir(): # unused for now
|
96 |
-
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=
|
97 |
elif Path(file).is_file():
|
98 |
-
api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=
|
99 |
if Path(file).exists(): Path(file).unlink()
|
100 |
if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
|
101 |
elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
|
@@ -152,7 +163,7 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
|
|
152 |
except Exception as e:
|
153 |
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
154 |
|
155 |
-
if not is_valid_path(dst_repo): raise gr.Error(f"Invalid dst_repo: {dst_repo}")
|
156 |
|
157 |
try:
|
158 |
dst_repo, subfolder_prefix = extract_dst_reponame(dst_repo)
|
@@ -160,8 +171,7 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
|
|
160 |
source_repos = parse_repos(source_repos_str)
|
161 |
for source_repo in source_repos:
|
162 |
source_repo, target = extract_src_reponame(source_repo)
|
163 |
-
|
164 |
-
subfolder = subfolder_prefix + "/" + source_repo if subfolder_prefix else source_repo
|
165 |
|
166 |
temp_dir = tempfile.mkdtemp()
|
167 |
create_repo(dst_repo, repo_type, private, hf_token)
|
@@ -169,10 +179,12 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
|
|
169 |
if target and target not in path: continue
|
170 |
path_in_repo = f"{subfolder}/{path}" if subfolder else path
|
171 |
if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
|
172 |
-
|
|
|
173 |
continue
|
174 |
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
175 |
if not Path(file).exists(): continue
|
|
|
176 |
if Path(file).is_dir(): # unused for now
|
177 |
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
|
178 |
elif Path(file).is_file():
|
@@ -270,6 +282,7 @@ with gr.Blocks(css=css) as demo:
|
|
270 |
with gr.Row():
|
271 |
submit_button = gr.Button("Submit", variant="primary")
|
272 |
clear_button = gr.Button("Clear", variant="secondary")
|
|
|
273 |
with gr.Column():
|
274 |
output_md = gr.Markdown(label="output")
|
275 |
output_image = gr.Image(show_label=False)
|
@@ -292,6 +305,7 @@ with gr.Blocks(css=css) as demo:
|
|
292 |
with gr.Row():
|
293 |
m2o_submit_button = gr.Button("Submit", variant="primary")
|
294 |
m2o_clear_button = gr.Button("Clear", variant="secondary")
|
|
|
295 |
with gr.Column():
|
296 |
m2o_output_md = gr.Markdown(label="output")
|
297 |
m2o_output_image = gr.Image(show_label=False)
|
@@ -318,20 +332,25 @@ with gr.Blocks(css=css) as demo:
|
|
318 |
with gr.Row():
|
319 |
m2m_submit_button = gr.Button("Submit", variant="primary")
|
320 |
m2m_clear_button = gr.Button("Clear", variant="secondary")
|
|
|
321 |
with gr.Column():
|
322 |
m2m_output_md = gr.Markdown(label="output")
|
323 |
m2m_output_image = gr.Image(show_label=False)
|
324 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
325 |
-
submit_button.click(duplicate, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], [output_md, output_image])
|
|
|
326 |
clear_button.click(lambda: ("", HF_REPO, "model", True, True, True, True), None, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], queue=False)
|
327 |
m2o_search.submit(add_repo_text, [m2o_search, m2o_source_repos], [m2o_source_repos], queue=False)
|
328 |
-
m2o_submit_button.click(duplicate_m2o, [m2o_source_repos, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite], [m2o_output_md, m2o_output_image])
|
|
|
329 |
m2o_clear_button.click(lambda: ("", HF_REPO, "model", True, True, ""), None,
|
330 |
[m2o_search, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_source_repos], queue=False)
|
331 |
m2m_search.submit(add_repo_text, [m2m_search, m2m_source_repos], [m2m_source_repos], queue=False)
|
332 |
-
m2m_submit_button.click(duplicate_m2m, [m2m_source_repos, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_prefix, m2m_suffix],
|
333 |
[m2m_output_md, m2m_output_image])
|
|
|
334 |
m2m_clear_button.click(lambda: ("", HF_USER, "model", True, False, True, "", HF_REPO_PREFIX, HF_REPO_SUFFIX), None,
|
335 |
[m2m_search, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_source_repos, m2m_prefix, m2m_suffix], queue=False)
|
|
|
336 |
|
337 |
demo.queue().launch()
|
|
|
21 |
REGEX_HF_REPO = r'^[\w_\-\.]+/[\w_\-\.]+$'
|
22 |
REGEX_HF_PATH = r'^[\w_\-\.]+/[\w_\-\.]+(/?:.+)?$'
|
23 |
|
24 |
+
def debug(s: str):
|
25 |
+
print(s)
|
26 |
+
|
27 |
def is_valid_reponame(repo_id: str):
|
28 |
return re.fullmatch(REGEX_HF_REPO, repo_id)
|
29 |
|
|
|
41 |
else:
|
42 |
source_repo, target = re.findall(r'^(?:http.+\.co/)?(?:datasets)?(?:spaces)?([\w_\-\.]+/[\w_\-\.]+)/?(?:blob/main/)?(?:resolve/main/)?(.+)?$', source_repo)[0]
|
43 |
target = urllib.parse.unquote(target.removesuffix("/"))
|
44 |
+
debug(f"Sourece repo_id:{source_repo} folder:{target}")
|
45 |
return source_repo, target
|
46 |
except Exception as e:
|
47 |
+
debug(e)
|
48 |
return source_repo, ""
|
49 |
|
50 |
def extract_dst_reponame(dst_repo: str):
|
|
|
53 |
else:
|
54 |
dst_repo, subfolder = re.findall(r'^([\w_\-\.]+/[\w_\-\.]+)/?(.+)?$', dst_repo)[0]
|
55 |
subfolder = subfolder.removesuffix("/")
|
56 |
+
debug(f"Destination repo_id:{dst_repo} folder:{subfolder}")
|
57 |
return dst_repo, subfolder
|
58 |
except Exception as e:
|
59 |
+
debug(e)
|
60 |
return dst_repo, ""
|
61 |
|
62 |
def remove_repo_tags(repo_id: str, tags: list[str], repo_type: str, hf_token: str):
|
|
|
81 |
except Exception as e:
|
82 |
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
83 |
|
84 |
+
if not is_valid_path(extract_dst_reponame(dst_repo)[0]): raise gr.Error(f"Invalid dst_repo: {dst_repo}")
|
85 |
|
86 |
try:
|
87 |
source_repo, target = extract_src_reponame(source_repo)
|
88 |
dst_repo, subfolder = extract_dst_reponame(dst_repo)
|
89 |
+
if auto_dir: subfolder = f"{subfolder}/{source_repo}" if subfolder else source_repo
|
90 |
|
91 |
if not overwrite and api.repo_exists(repo_id=dst_repo, repo_type=repo_type, token=hf_token): raise gr.Error(f"Repo already exists {dst_repo}")
|
92 |
|
|
|
95 |
create_repo(dst_repo, repo_type, private, hf_token)
|
96 |
for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
|
97 |
if target and target not in path: continue
|
98 |
+
path_in_repo = f"{subfolder}/{path}" if subfolder else path
|
99 |
+
if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
|
100 |
+
debug(f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
|
101 |
+
progress(0, desc=f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
|
102 |
+
continue
|
103 |
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
104 |
+
debug(f"Uploading {file} to {path_in_repo}")
|
105 |
if not Path(file).exists(): continue
|
106 |
if Path(file).is_dir(): # unused for now
|
107 |
+
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
|
108 |
elif Path(file).is_file():
|
109 |
+
api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
|
110 |
if Path(file).exists(): Path(file).unlink()
|
111 |
if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
|
112 |
elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
|
|
|
163 |
except Exception as e:
|
164 |
raise gr.Error(f"""Oops, you forgot to login. Please use the loggin button on the top left to migrate your repo {e}""")
|
165 |
|
166 |
+
if not is_valid_path(extract_dst_reponame(dst_repo)[0]): raise gr.Error(f"Invalid dst_repo: {dst_repo}")
|
167 |
|
168 |
try:
|
169 |
dst_repo, subfolder_prefix = extract_dst_reponame(dst_repo)
|
|
|
171 |
source_repos = parse_repos(source_repos_str)
|
172 |
for source_repo in source_repos:
|
173 |
source_repo, target = extract_src_reponame(source_repo)
|
174 |
+
subfolder = f"{subfolder_prefix}/{source_repo}" if subfolder_prefix else source_repo
|
|
|
175 |
|
176 |
temp_dir = tempfile.mkdtemp()
|
177 |
create_repo(dst_repo, repo_type, private, hf_token)
|
|
|
179 |
if target and target not in path: continue
|
180 |
path_in_repo = f"{subfolder}/{path}" if subfolder else path
|
181 |
if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
|
182 |
+
debug(f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
|
183 |
+
progress(0, desc=f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
|
184 |
continue
|
185 |
file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
|
186 |
if not Path(file).exists(): continue
|
187 |
+
debug(f"Uploading {file} to {path_in_repo}")
|
188 |
if Path(file).is_dir(): # unused for now
|
189 |
api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
|
190 |
elif Path(file).is_file():
|
|
|
282 |
with gr.Row():
|
283 |
submit_button = gr.Button("Submit", variant="primary")
|
284 |
clear_button = gr.Button("Clear", variant="secondary")
|
285 |
+
stop_button = gr.Button("Stop", variant="stop")
|
286 |
with gr.Column():
|
287 |
output_md = gr.Markdown(label="output")
|
288 |
output_image = gr.Image(show_label=False)
|
|
|
305 |
with gr.Row():
|
306 |
m2o_submit_button = gr.Button("Submit", variant="primary")
|
307 |
m2o_clear_button = gr.Button("Clear", variant="secondary")
|
308 |
+
m2o_stop_button = gr.Button("Stop", variant="stop")
|
309 |
with gr.Column():
|
310 |
m2o_output_md = gr.Markdown(label="output")
|
311 |
m2o_output_image = gr.Image(show_label=False)
|
|
|
332 |
with gr.Row():
|
333 |
m2m_submit_button = gr.Button("Submit", variant="primary")
|
334 |
m2m_clear_button = gr.Button("Clear", variant="secondary")
|
335 |
+
m2m_stop_button = gr.Button("Stop", variant="stop")
|
336 |
with gr.Column():
|
337 |
m2m_output_md = gr.Markdown(label="output")
|
338 |
m2m_output_image = gr.Image(show_label=False)
|
339 |
demo.load(fn=swap_visibilty, outputs=main_ui)
|
340 |
+
submit_event = submit_button.click(duplicate, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], [output_md, output_image])
|
341 |
+
stop_button.click(fn=None, inputs=None, outputs=None, cancels=[submit_event])
|
342 |
clear_button.click(lambda: ("", HF_REPO, "model", True, True, True, True), None, [search, dst_repo, repo_type, is_private, is_overwrite, is_subdir, is_remtag], queue=False)
|
343 |
m2o_search.submit(add_repo_text, [m2o_search, m2o_source_repos], [m2o_source_repos], queue=False)
|
344 |
+
m2o_submit_event = m2o_submit_button.click(duplicate_m2o, [m2o_source_repos, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite], [m2o_output_md, m2o_output_image])
|
345 |
+
m2o_stop_button.click(fn=None, inputs=None, outputs=None, cancels=[m2o_submit_event])
|
346 |
m2o_clear_button.click(lambda: ("", HF_REPO, "model", True, True, ""), None,
|
347 |
[m2o_search, m2o_dst_repo, m2o_repo_type, m2o_is_private, m2o_is_overwrite, m2o_source_repos], queue=False)
|
348 |
m2m_search.submit(add_repo_text, [m2m_search, m2m_source_repos], [m2m_source_repos], queue=False)
|
349 |
+
m2m_submit_event = m2m_submit_button.click(duplicate_m2m, [m2m_source_repos, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_prefix, m2m_suffix],
|
350 |
[m2m_output_md, m2m_output_image])
|
351 |
+
m2m_stop_button.click(fn=None, inputs=None, outputs=None, cancels=[m2m_submit_event])
|
352 |
m2m_clear_button.click(lambda: ("", HF_USER, "model", True, False, True, "", HF_REPO_PREFIX, HF_REPO_SUFFIX), None,
|
353 |
[m2m_search, m2m_user, m2m_repo_type, m2m_is_private, m2m_is_overwrite, m2m_is_remtag, m2m_source_repos, m2m_prefix, m2m_suffix], queue=False)
|
354 |
+
|
355 |
|
356 |
demo.queue().launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
huggingface_hub>=0.22.2
|
|
|
|
|
2 |
gradio_huggingfacehub_search==0.0.7
|
3 |
pydantic==2.10.6
|
|
|
1 |
huggingface_hub>=0.22.2
|
2 |
+
hf_transfer
|
3 |
+
hf_xet
|
4 |
gradio_huggingfacehub_search==0.0.7
|
5 |
pydantic==2.10.6
|