shwj114514
commited on
Commit
·
74ac4a3
0
Parent(s):
Initial commit: code + checkpoints
Browse files- .gitattributes +8 -0
- .gitignore +39 -0
- model_traced_slakh.pt +3 -0
- yt-dlp_download.py +116 -0
.gitattributes
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.idea/
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
#### complie result
|
| 5 |
+
**.pyc
|
| 6 |
+
**.pyx
|
| 7 |
+
|
| 8 |
+
**/build/
|
| 9 |
+
|
| 10 |
+
##### infer output
|
| 11 |
+
eval/**/*.wav
|
| 12 |
+
|
| 13 |
+
**.zip
|
| 14 |
+
|
| 15 |
+
**/bazel-5.3.2-installer-linux-x86_64.sh
|
| 16 |
+
|
| 17 |
+
tmp.py
|
| 18 |
+
|
| 19 |
+
**__pycache__**
|
| 20 |
+
|
| 21 |
+
test_module/**/*.wav
|
| 22 |
+
|
| 23 |
+
**.ckpt
|
| 24 |
+
**.pth
|
| 25 |
+
backup
|
| 26 |
+
*tmp*
|
| 27 |
+
|
| 28 |
+
exp*/
|
| 29 |
+
!exp_recon/
|
| 30 |
+
exp_recon/*
|
| 31 |
+
!exp_recon/DAC_24k_9/
|
| 32 |
+
!exp_recon/test-clean/
|
| 33 |
+
!exp_recon/test-clean_16000/
|
| 34 |
+
!exp_recon/test-clean_flac/
|
| 35 |
+
|
| 36 |
+
/*.txt
|
| 37 |
+
!combined.txt
|
| 38 |
+
|
| 39 |
+
pretrained/
|
model_traced_slakh.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f338c9137cb2378c3b081cef3574093cc9edd88235229983ec59fa97e14094ad
|
| 3 |
+
size 322931966
|
yt-dlp_download.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import concurrent.futures as futures
|
| 3 |
+
import os
|
| 4 |
+
import re
|
| 5 |
+
import sys
|
| 6 |
+
from dataclasses import dataclass
|
| 7 |
+
from typing import List
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from yt_dlp import YoutubeDL
|
| 11 |
+
except ImportError:
|
| 12 |
+
print("请先安装: pip install yt-dlp", file=sys.stderr)
|
| 13 |
+
sys.exit(1)
|
| 14 |
+
|
| 15 |
+
HEADER_PATTERN = re.compile(r"[::]\s*$")
|
| 16 |
+
URL_PATTERN = re.compile(r"^https?://", re.I)
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class Job:
|
| 20 |
+
url: str
|
| 21 |
+
group: str # 目标文件夹
|
| 22 |
+
|
| 23 |
+
def safe_dirname(name: str) -> str:
|
| 24 |
+
name = name.strip().strip("::").strip()
|
| 25 |
+
return re.sub(r"[\\/:\*\?\"<>\|]", " ", name)
|
| 26 |
+
|
| 27 |
+
def parse_links(file_path: str) -> List[Job]:
|
| 28 |
+
jobs: List[Job] = []
|
| 29 |
+
current_group = "Misc"
|
| 30 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 31 |
+
for raw in f:
|
| 32 |
+
line = raw.strip()
|
| 33 |
+
if not line:
|
| 34 |
+
continue
|
| 35 |
+
# 分组行
|
| 36 |
+
if HEADER_PATTERN.search(line):
|
| 37 |
+
current_group = safe_dirname(line)
|
| 38 |
+
continue
|
| 39 |
+
# URL 行
|
| 40 |
+
if URL_PATTERN.match(line):
|
| 41 |
+
jobs.append(Job(url=line, group=current_group))
|
| 42 |
+
return jobs
|
| 43 |
+
|
| 44 |
+
def build_ydl_opts(out_dir: str, audio_format: str):
|
| 45 |
+
|
| 46 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 47 |
+
fmt = "bestaudio[acodec=opus]/bestaudio"
|
| 48 |
+
postprocessors = [
|
| 49 |
+
{
|
| 50 |
+
"key": "FFmpegExtractAudio",
|
| 51 |
+
"preferredcodec": audio_format,
|
| 52 |
+
},
|
| 53 |
+
{"key": "FFmpegMetadata"},
|
| 54 |
+
{"key": "EmbedThumbnail"},
|
| 55 |
+
]
|
| 56 |
+
|
| 57 |
+
ydl_opts = {
|
| 58 |
+
"format": fmt,
|
| 59 |
+
"outtmpl": os.path.join(out_dir, "%(title)s.%(ext)s"),
|
| 60 |
+
"restrictfilenames": False,
|
| 61 |
+
"noplaylist": True,
|
| 62 |
+
"ignoreerrors": True,
|
| 63 |
+
"continuedl": True,
|
| 64 |
+
"split_chapters": True,
|
| 65 |
+
"overwrites": False,
|
| 66 |
+
"postprocessors": postprocessors,
|
| 67 |
+
"consoletitle": True,
|
| 68 |
+
"progress_with_newline": True,
|
| 69 |
+
}
|
| 70 |
+
return ydl_opts
|
| 71 |
+
|
| 72 |
+
def download_one(job: Job, audio_format: str) -> str:
|
| 73 |
+
out_dir = safe_dirname(job.group)
|
| 74 |
+
ydl_opts = build_ydl_opts(out_dir, audio_format)
|
| 75 |
+
try:
|
| 76 |
+
print(f"[DOWN] {job.url} -> {out_dir}")
|
| 77 |
+
with YoutubeDL(ydl_opts) as ydl:
|
| 78 |
+
ydl.download([job.url])
|
| 79 |
+
return f"OK : {job.url}"
|
| 80 |
+
except Exception as e:
|
| 81 |
+
return f"FAIL: {job.url} ({e})"
|
| 82 |
+
|
| 83 |
+
LINK_TXT="gpt5pro_list.txt"
|
| 84 |
+
LINK_TXT="gemini0827.txt"
|
| 85 |
+
|
| 86 |
+
LINK_TXT="youtube_12_composer_list.txt"
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def main():
|
| 90 |
+
parser = argparse.ArgumentParser()
|
| 91 |
+
parser.add_argument("--links", default=LINK_TXT, help="包含分组与 URL 的文本文件,例如 links.txt")
|
| 92 |
+
parser.add_argument("--audio-format", default="best",
|
| 93 |
+
choices=["best", "mp3", "m4a", "flac", "wav", "opus"],
|
| 94 |
+
help="音频格式;'best' 为保留原始音轨(推荐)")
|
| 95 |
+
parser.add_argument("-j", "--workers", type=int, default=1,
|
| 96 |
+
help="并行下载线程数(谨慎过高,默认 1)")
|
| 97 |
+
args = parser.parse_args()
|
| 98 |
+
|
| 99 |
+
jobs = parse_links(args.links)
|
| 100 |
+
if not jobs:
|
| 101 |
+
print("未在文件中找到任何 URL。", file=sys.stderr)
|
| 102 |
+
sys.exit(1)
|
| 103 |
+
|
| 104 |
+
print(f"共 {len(jobs)} 个链接,分组数:{len(set(j.group for j in jobs))}")
|
| 105 |
+
if args.workers <= 1:
|
| 106 |
+
for job in jobs:
|
| 107 |
+
msg = download_one(job, args.audio_format)
|
| 108 |
+
print(msg)
|
| 109 |
+
else:
|
| 110 |
+
with futures.ThreadPoolExecutor(max_workers=args.workers) as ex:
|
| 111 |
+
tasks = [ex.submit(download_one, job, args.audio_format) for job in jobs]
|
| 112 |
+
for t in futures.as_completed(tasks):
|
| 113 |
+
print(t.result())
|
| 114 |
+
|
| 115 |
+
if __name__ == "__main__":
|
| 116 |
+
main()
|