shwj114514 commited on
Commit
74ac4a3
·
0 Parent(s):

Initial commit: code + checkpoints

Browse files
Files changed (4) hide show
  1. .gitattributes +8 -0
  2. .gitignore +39 -0
  3. model_traced_slakh.pt +3 -0
  4. yt-dlp_download.py +116 -0
.gitattributes ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ *.pt filter=lfs diff=lfs merge=lfs -text
2
+ *.bin filter=lfs diff=lfs merge=lfs -text
3
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
4
+ *.npz filter=lfs diff=lfs merge=lfs -text
5
+ *.onnx filter=lfs diff=lfs merge=lfs -text
6
+ *.tar filter=lfs diff=lfs merge=lfs -text
7
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .idea/
2
+
3
+
4
+ #### complie result
5
+ **.pyc
6
+ **.pyx
7
+
8
+ **/build/
9
+
10
+ ##### infer output
11
+ eval/**/*.wav
12
+
13
+ **.zip
14
+
15
+ **/bazel-5.3.2-installer-linux-x86_64.sh
16
+
17
+ tmp.py
18
+
19
+ **__pycache__**
20
+
21
+ test_module/**/*.wav
22
+
23
+ **.ckpt
24
+ **.pth
25
+ backup
26
+ *tmp*
27
+
28
+ exp*/
29
+ !exp_recon/
30
+ exp_recon/*
31
+ !exp_recon/DAC_24k_9/
32
+ !exp_recon/test-clean/
33
+ !exp_recon/test-clean_16000/
34
+ !exp_recon/test-clean_flac/
35
+
36
+ /*.txt
37
+ !combined.txt
38
+
39
+ pretrained/
model_traced_slakh.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f338c9137cb2378c3b081cef3574093cc9edd88235229983ec59fa97e14094ad
3
+ size 322931966
yt-dlp_download.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import concurrent.futures as futures
3
+ import os
4
+ import re
5
+ import sys
6
+ from dataclasses import dataclass
7
+ from typing import List
8
+
9
+ try:
10
+ from yt_dlp import YoutubeDL
11
+ except ImportError:
12
+ print("请先安装: pip install yt-dlp", file=sys.stderr)
13
+ sys.exit(1)
14
+
15
+ HEADER_PATTERN = re.compile(r"[::]\s*$")
16
+ URL_PATTERN = re.compile(r"^https?://", re.I)
17
+
18
+ @dataclass
19
+ class Job:
20
+ url: str
21
+ group: str # 目标文件夹
22
+
23
+ def safe_dirname(name: str) -> str:
24
+ name = name.strip().strip("::").strip()
25
+ return re.sub(r"[\\/:\*\?\"<>\|]", " ", name)
26
+
27
+ def parse_links(file_path: str) -> List[Job]:
28
+ jobs: List[Job] = []
29
+ current_group = "Misc"
30
+ with open(file_path, "r", encoding="utf-8") as f:
31
+ for raw in f:
32
+ line = raw.strip()
33
+ if not line:
34
+ continue
35
+ # 分组行
36
+ if HEADER_PATTERN.search(line):
37
+ current_group = safe_dirname(line)
38
+ continue
39
+ # URL 行
40
+ if URL_PATTERN.match(line):
41
+ jobs.append(Job(url=line, group=current_group))
42
+ return jobs
43
+
44
+ def build_ydl_opts(out_dir: str, audio_format: str):
45
+
46
+ os.makedirs(out_dir, exist_ok=True)
47
+ fmt = "bestaudio[acodec=opus]/bestaudio"
48
+ postprocessors = [
49
+ {
50
+ "key": "FFmpegExtractAudio",
51
+ "preferredcodec": audio_format,
52
+ },
53
+ {"key": "FFmpegMetadata"},
54
+ {"key": "EmbedThumbnail"},
55
+ ]
56
+
57
+ ydl_opts = {
58
+ "format": fmt,
59
+ "outtmpl": os.path.join(out_dir, "%(title)s.%(ext)s"),
60
+ "restrictfilenames": False,
61
+ "noplaylist": True,
62
+ "ignoreerrors": True,
63
+ "continuedl": True,
64
+ "split_chapters": True,
65
+ "overwrites": False,
66
+ "postprocessors": postprocessors,
67
+ "consoletitle": True,
68
+ "progress_with_newline": True,
69
+ }
70
+ return ydl_opts
71
+
72
+ def download_one(job: Job, audio_format: str) -> str:
73
+ out_dir = safe_dirname(job.group)
74
+ ydl_opts = build_ydl_opts(out_dir, audio_format)
75
+ try:
76
+ print(f"[DOWN] {job.url} -> {out_dir}")
77
+ with YoutubeDL(ydl_opts) as ydl:
78
+ ydl.download([job.url])
79
+ return f"OK : {job.url}"
80
+ except Exception as e:
81
+ return f"FAIL: {job.url} ({e})"
82
+
83
+ LINK_TXT="gpt5pro_list.txt"
84
+ LINK_TXT="gemini0827.txt"
85
+
86
+ LINK_TXT="youtube_12_composer_list.txt"
87
+
88
+
89
+ def main():
90
+ parser = argparse.ArgumentParser()
91
+ parser.add_argument("--links", default=LINK_TXT, help="包含分组与 URL 的文本文件,例如 links.txt")
92
+ parser.add_argument("--audio-format", default="best",
93
+ choices=["best", "mp3", "m4a", "flac", "wav", "opus"],
94
+ help="音频格式;'best' 为保留原始音轨(推荐)")
95
+ parser.add_argument("-j", "--workers", type=int, default=1,
96
+ help="并行下载线程数(谨慎过高,默认 1)")
97
+ args = parser.parse_args()
98
+
99
+ jobs = parse_links(args.links)
100
+ if not jobs:
101
+ print("未在文件中找到任何 URL。", file=sys.stderr)
102
+ sys.exit(1)
103
+
104
+ print(f"共 {len(jobs)} 个链接,分组数:{len(set(j.group for j in jobs))}")
105
+ if args.workers <= 1:
106
+ for job in jobs:
107
+ msg = download_one(job, args.audio_format)
108
+ print(msg)
109
+ else:
110
+ with futures.ThreadPoolExecutor(max_workers=args.workers) as ex:
111
+ tasks = [ex.submit(download_one, job, args.audio_format) for job in jobs]
112
+ for t in futures.as_completed(tasks):
113
+ print(t.result())
114
+
115
+ if __name__ == "__main__":
116
+ main()