Upload 18 files
Browse files- Training With Mixed Dataset/results/Bach10Synth_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/Bach10Synth_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/Bach10Synth_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/MIR1K_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/MIR1K_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/MIR1K_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/PTDBNoisy_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/PTDBNoisy_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/PTDBNoisy_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/PTDB_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/PTDB_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/PTDB_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/SpeechSynth_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/SpeechSynth_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/SpeechSynth_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/Vocadito_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/Vocadito_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
- Training With Mixed Dataset/results/Vocadito_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json +43 -0
Training With Mixed Dataset/results/Bach10Synth_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "DJCM",
|
4 |
+
"dataset_name": "Bach10Synth",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:35:52.897167+00:00",
|
7 |
+
"execution_time_seconds": 93.29
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9888224302255139,
|
24 |
+
"recall": 0.9454692305809599,
|
25 |
+
"f1": 0.9666599936606443
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 75.07798232516214,
|
29 |
+
"cents_error": 23.948067652055137,
|
30 |
+
"rpa": 0.9697037682610083,
|
31 |
+
"rca": 0.9712828655006084,
|
32 |
+
"octave_error_rate": 0.00569510479855724,
|
33 |
+
"gross_error_rate": 0.01048417019734401,
|
34 |
+
"valid_frames": 115889
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 10.671855677502464,
|
38 |
+
"continuity_breaks": 0.8157242063492063
|
39 |
+
},
|
40 |
+
"combined_score": 0.9582051059673263,
|
41 |
+
"optimal_threshold": 0.4
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/Bach10Synth_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "RMVPE",
|
4 |
+
"dataset_name": "Bach10Synth",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:33:07.496451+00:00",
|
7 |
+
"execution_time_seconds": 58.24
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9972985023736459,
|
24 |
+
"recall": 0.9306454113059156,
|
25 |
+
"f1": 0.962819787806916
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 7.1072699327635585,
|
29 |
+
"cents_error": 4.009898514637859,
|
30 |
+
"rpa": 0.9936794305351007,
|
31 |
+
"rca": 0.9941002174065503,
|
32 |
+
"octave_error_rate": 0.0005610491619328144,
|
33 |
+
"gross_error_rate": 0.000666245879795217,
|
34 |
+
"valid_frames": 114072
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 2.574204714660861,
|
38 |
+
"continuity_breaks": 0.9614682539682541
|
39 |
+
},
|
40 |
+
"combined_score": 0.9835085640298364,
|
41 |
+
"optimal_threshold": 0.7000000000000001
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/Bach10Synth_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "SwiftF0",
|
4 |
+
"dataset_name": "Bach10Synth",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T09:38:09.679652+00:00",
|
7 |
+
"execution_time_seconds": 70.16
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9969860433256272,
|
24 |
+
"recall": 0.9499481941373712,
|
25 |
+
"f1": 0.9728989025037288
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 11.876706991525216,
|
29 |
+
"cents_error": 12.08998838088326,
|
30 |
+
"rpa": 0.9839055978288874,
|
31 |
+
"rca": 0.9850048953090915,
|
32 |
+
"octave_error_rate": 0.0012452979267936583,
|
33 |
+
"gross_error_rate": 0.002044006252254419,
|
34 |
+
"valid_frames": 116438
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 3.2330790409961194,
|
38 |
+
"continuity_breaks": 0.9716369047619049
|
39 |
+
},
|
40 |
+
"combined_score": 0.9804966663673379,
|
41 |
+
"optimal_threshold": 0.9
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/MIR1K_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "DJCM",
|
4 |
+
"dataset_name": "MIR1K",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T06:53:25.130216+00:00",
|
7 |
+
"execution_time_seconds": 782.61
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9832899141749102,
|
24 |
+
"recall": 0.8611351808145903,
|
25 |
+
"f1": 0.9181674445185256
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 19.926925409210934,
|
29 |
+
"cents_error": 19.72579242879989,
|
30 |
+
"rpa": 0.9682069411940447,
|
31 |
+
"rca": 0.9685554946970074,
|
32 |
+
"octave_error_rate": 0.005051951069727298,
|
33 |
+
"gross_error_rate": 0.009838337565768726,
|
34 |
+
"valid_frames": 481992
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 4.309565087875009,
|
38 |
+
"continuity_breaks": 0.9387467319144565
|
39 |
+
},
|
40 |
+
"combined_score": 0.9443512426108808,
|
41 |
+
"optimal_threshold": 0.6000000000000001
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/MIR1K_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "RMVPE",
|
4 |
+
"dataset_name": "MIR1K",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T06:39:02.799501+00:00",
|
7 |
+
"execution_time_seconds": 512.11
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9918244831059065,
|
24 |
+
"recall": 0.8973284713524871,
|
25 |
+
"f1": 0.9422131174450595
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 8.947234475709527,
|
29 |
+
"cents_error": 16.374922916542904,
|
30 |
+
"rpa": 0.9725913389746143,
|
31 |
+
"rca": 0.9730413140866102,
|
32 |
+
"octave_error_rate": 0.00341662518666003,
|
33 |
+
"gross_error_rate": 0.006381284221005475,
|
34 |
+
"valid_frames": 502250
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 1.1202456264212082,
|
38 |
+
"continuity_breaks": 0.976168745925557
|
39 |
+
},
|
40 |
+
"combined_score": 0.9597984235258288,
|
41 |
+
"optimal_threshold": 0.8
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/MIR1K_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "SwiftF0",
|
4 |
+
"dataset_name": "MIR1K",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T09:03:30.558979+00:00",
|
7 |
+
"execution_time_seconds": 475.15
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9741025541084564,
|
24 |
+
"recall": 0.937663140479921,
|
25 |
+
"f1": 0.9555355686320205
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 10.823747373647318,
|
29 |
+
"cents_error": 23.26574300210297,
|
30 |
+
"rpa": 0.946458445275196,
|
31 |
+
"rca": 0.947578816598263,
|
32 |
+
"octave_error_rate": 0.00600008383730989,
|
33 |
+
"gross_error_rate": 0.011901087217477792,
|
34 |
+
"valid_frames": 524826
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 1.327155603681321,
|
38 |
+
"continuity_breaks": 0.8272468735040329
|
39 |
+
},
|
40 |
+
"combined_score": 0.9493043789341343,
|
41 |
+
"optimal_threshold": 0.9
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/PTDBNoisy_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "DJCM",
|
4 |
+
"dataset_name": "PTDBNoisy",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:30:56.982574+00:00",
|
7 |
+
"execution_time_seconds": 225.97
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.8284498828927872,
|
24 |
+
"recall": 0.7005375692170891,
|
25 |
+
"f1": 0.7591432701151943
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 25.480182019558534,
|
29 |
+
"cents_error": 95.31680455405053,
|
30 |
+
"rpa": 0.7906473574890376,
|
31 |
+
"rca": 0.8095141933994923,
|
32 |
+
"octave_error_rate": 0.05158089083775675,
|
33 |
+
"gross_error_rate": 0.06747634433417955,
|
34 |
+
"valid_frames": 34664
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 1.3060640517221112,
|
38 |
+
"continuity_breaks": 0.7062522601351662
|
39 |
+
},
|
40 |
+
"combined_score": 0.7329929205200064,
|
41 |
+
"optimal_threshold": 0.5
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/PTDBNoisy_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "RMVPE",
|
4 |
+
"dataset_name": "PTDBNoisy",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:25:58.851298+00:00",
|
7 |
+
"execution_time_seconds": 143.6
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.6809796967605265,
|
24 |
+
"recall": 0.5781900489066731,
|
25 |
+
"f1": 0.6253893655391006
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 32.33536169187113,
|
29 |
+
"cents_error": 116.04125555361891,
|
30 |
+
"rpa": 0.7271583362460678,
|
31 |
+
"rca": 0.7448794127927298,
|
32 |
+
"octave_error_rate": 0.06025865082139112,
|
33 |
+
"gross_error_rate": 0.06791331702202028,
|
34 |
+
"valid_frames": 28610
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 0.9860356845620268,
|
38 |
+
"continuity_breaks": 0.9427879588398543
|
39 |
+
},
|
40 |
+
"combined_score": 0.6618407878716646,
|
41 |
+
"optimal_threshold": 0.6000000000000001
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/PTDBNoisy_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "SwiftF0",
|
4 |
+
"dataset_name": "PTDBNoisy",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T09:35:49.330035+00:00",
|
7 |
+
"execution_time_seconds": 114.94
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.7939900220443207,
|
24 |
+
"recall": 0.6915039812457056,
|
25 |
+
"f1": 0.739211683247459
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 24.970811971097046,
|
29 |
+
"cents_error": 80.77345725072703,
|
30 |
+
"rpa": 0.8526463453838735,
|
31 |
+
"rca": 0.8676388929479498,
|
32 |
+
"octave_error_rate": 0.044714615541982056,
|
33 |
+
"gross_error_rate": 0.05377443960604378,
|
34 |
+
"valid_frames": 34217
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 0.9001541717375056,
|
38 |
+
"continuity_breaks": 0.7711701259629766
|
39 |
+
},
|
40 |
+
"combined_score": 0.7569963032698516,
|
41 |
+
"optimal_threshold": 0.9
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/PTDB_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "DJCM",
|
4 |
+
"dataset_name": "PTDB",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:22:15.150712+00:00",
|
7 |
+
"execution_time_seconds": 3150.39
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.8985208597739346,
|
24 |
+
"recall": 0.9004775774537881,
|
25 |
+
"f1": 0.8994981544817311
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 14.973836091147659,
|
29 |
+
"cents_error": 53.05202306422977,
|
30 |
+
"rpa": 0.8593370949453151,
|
31 |
+
"rca": 0.8738098142233506,
|
32 |
+
"octave_error_rate": 0.02429085783726887,
|
33 |
+
"gross_error_rate": 0.03303080817435328,
|
34 |
+
"valid_frames": 664077
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 1.5532397623141796,
|
38 |
+
"continuity_breaks": 0.6409778381620306
|
39 |
+
},
|
40 |
+
"combined_score": 0.8628606450132019,
|
41 |
+
"optimal_threshold": 0.5
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/PTDB_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "RMVPE",
|
4 |
+
"dataset_name": "PTDB",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T07:28:22.119210+00:00",
|
7 |
+
"execution_time_seconds": 2006.56
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.8163694357636753,
|
24 |
+
"recall": 0.7197873275188753,
|
25 |
+
"f1": 0.7650421994442588
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 8.938506229633358,
|
29 |
+
"cents_error": 33.65537796219959,
|
30 |
+
"rpa": 0.8545824125932749,
|
31 |
+
"rca": 0.8585667915670572,
|
32 |
+
"octave_error_rate": 0.006842205405568335,
|
33 |
+
"gross_error_rate": 0.009195155447295992,
|
34 |
+
"valid_frames": 530823
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 0.9070856086063553,
|
38 |
+
"continuity_breaks": 0.9673187531437764
|
39 |
+
},
|
40 |
+
"combined_score": 0.8605986452889337,
|
41 |
+
"optimal_threshold": 0.7000000000000001
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/PTDB_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "SwiftF0",
|
4 |
+
"dataset_name": "PTDB",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T09:32:39.060467+00:00",
|
7 |
+
"execution_time_seconds": 1676.28
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.8595051361271753,
|
24 |
+
"recall": 0.8827074112644276,
|
25 |
+
"f1": 0.8709517731521248
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 8.824085672990389,
|
29 |
+
"cents_error": 27.25684125583582,
|
30 |
+
"rpa": 0.9245973713155097,
|
31 |
+
"rca": 0.9284331737770596,
|
32 |
+
"octave_error_rate": 0.007820612868141794,
|
33 |
+
"gross_error_rate": 0.011805423274733783,
|
34 |
+
"valid_frames": 650972
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 0.9319907131972056,
|
38 |
+
"continuity_breaks": 0.7307847355480911
|
39 |
+
},
|
40 |
+
"combined_score": 0.9123995027247119,
|
41 |
+
"optimal_threshold": 0.9
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/SpeechSynth_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "DJCM",
|
4 |
+
"dataset_name": "SpeechSynth",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:52:00.966539+00:00",
|
7 |
+
"execution_time_seconds": 304.97
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.837945238672159,
|
24 |
+
"recall": 0.8362027275365123,
|
25 |
+
"f1": 0.8370730762713916
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 7.718708919257315,
|
29 |
+
"cents_error": 33.90223406070879,
|
30 |
+
"rpa": 0.8017465733618645,
|
31 |
+
"rca": 0.802151408247065,
|
32 |
+
"octave_error_rate": 0.0016771730958302007,
|
33 |
+
"gross_error_rate": 0.0031808455265745187,
|
34 |
+
"valid_frames": 17291
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 1.7037643989686282,
|
38 |
+
"continuity_breaks": 0.8117525549032398
|
39 |
+
},
|
40 |
+
"combined_score": 0.8902532632727663,
|
41 |
+
"optimal_threshold": 0.6000000000000001
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/SpeechSynth_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "RMVPE",
|
4 |
+
"dataset_name": "SpeechSynth",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:45:40.410835+00:00",
|
7 |
+
"execution_time_seconds": 275.48
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.8699555898822167,
|
24 |
+
"recall": 0.8715543089273624,
|
25 |
+
"f1": 0.8707542155868001
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 5.550796755159622,
|
29 |
+
"cents_error": 32.35756146049905,
|
30 |
+
"rpa": 0.8010209743646655,
|
31 |
+
"rca": 0.8010209743646655,
|
32 |
+
"octave_error_rate": 0.0002219509488403063,
|
33 |
+
"gross_error_rate": 0.0037176783930751305,
|
34 |
+
"valid_frames": 18022
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 0.7998049379840393,
|
38 |
+
"continuity_breaks": 0.7018083641371313
|
39 |
+
},
|
40 |
+
"combined_score": 0.9045536925081291,
|
41 |
+
"optimal_threshold": 0.6000000000000001
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/SpeechSynth_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "SwiftF0",
|
4 |
+
"dataset_name": "SpeechSynth",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T09:45:45.708101+00:00",
|
7 |
+
"execution_time_seconds": 273.75
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.7957286432160804,
|
24 |
+
"recall": 0.9189476738562724,
|
25 |
+
"f1": 0.8529108128731093
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 5.230897331841639,
|
29 |
+
"cents_error": 29.4226793241312,
|
30 |
+
"rpa": 0.838017050836754,
|
31 |
+
"rca": 0.838017050836754,
|
32 |
+
"octave_error_rate": 0.00036838227554994213,
|
33 |
+
"gross_error_rate": 0.001894537417113988,
|
34 |
+
"valid_frames": 19002
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 0.8613561059850603,
|
38 |
+
"continuity_breaks": 0.47523555845473653
|
39 |
+
},
|
40 |
+
"combined_score": 0.907437759674716,
|
41 |
+
"optimal_threshold": 0.9
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/Vocadito_DJCM_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "DJCM",
|
4 |
+
"dataset_name": "Vocadito",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:39:49.611660+00:00",
|
7 |
+
"execution_time_seconds": 58.08
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9296414157094071,
|
24 |
+
"recall": 0.8986551122615497,
|
25 |
+
"f1": 0.9138856832206316
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 8.330877399204885,
|
29 |
+
"cents_error": 16.74128488445694,
|
30 |
+
"rpa": 0.957441035274473,
|
31 |
+
"rca": 0.9577749947818827,
|
32 |
+
"octave_error_rate": 0.0030056355666875393,
|
33 |
+
"gross_error_rate": 0.005322479649342517,
|
34 |
+
"valid_frames": 47910
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 4.31178607164076,
|
38 |
+
"continuity_breaks": 0.7110614185980959
|
39 |
+
},
|
40 |
+
"combined_score": 0.9487006017295365,
|
41 |
+
"optimal_threshold": 0.5
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/Vocadito_RMVPE_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "RMVPE",
|
4 |
+
"dataset_name": "Vocadito",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T08:37:38.723299+00:00",
|
7 |
+
"execution_time_seconds": 34.64
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9617054875641532,
|
24 |
+
"recall": 0.9595408249395082,
|
25 |
+
"f1": 0.9606219367922934
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 6.843768019450916,
|
29 |
+
"cents_error": 12.639775639966883,
|
30 |
+
"rpa": 0.9772069747439206,
|
31 |
+
"rca": 0.9778325123152709,
|
32 |
+
"octave_error_rate": 0.002795371021972007,
|
33 |
+
"gross_error_rate": 0.004359214950347955,
|
34 |
+
"valid_frames": 51156
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 1.129077960650627,
|
38 |
+
"continuity_breaks": 0.6609074166755537
|
39 |
+
},
|
40 |
+
"combined_score": 0.9706704800499829,
|
41 |
+
"optimal_threshold": 0.7000000000000001
|
42 |
+
}
|
43 |
+
}
|
Training With Mixed Dataset/results/Vocadito_SwiftF0_sr16k_hop160_snr10-30_gain-6-6_seed42.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"algorithm_name": "SwiftF0",
|
4 |
+
"dataset_name": "Vocadito",
|
5 |
+
"seed": 42,
|
6 |
+
"timestamp_utc": "2025-09-20T09:39:59.812909+00:00",
|
7 |
+
"execution_time_seconds": 39.39
|
8 |
+
},
|
9 |
+
"parameters": {
|
10 |
+
"sample_rate": 16000,
|
11 |
+
"hop_size": 160,
|
12 |
+
"snr_range": [
|
13 |
+
10.0,
|
14 |
+
30.0
|
15 |
+
],
|
16 |
+
"voice_gain_range": [
|
17 |
+
-6.0,
|
18 |
+
6.0
|
19 |
+
]
|
20 |
+
},
|
21 |
+
"results": {
|
22 |
+
"voicing_detection": {
|
23 |
+
"precision": 0.9235118722955529,
|
24 |
+
"recall": 0.952769493369347,
|
25 |
+
"f1": 0.937912569819508
|
26 |
+
},
|
27 |
+
"pitch_accuracy": {
|
28 |
+
"rmse": 9.873827468340767,
|
29 |
+
"cents_error": 21.766317671126714,
|
30 |
+
"rpa": 0.92713849788365,
|
31 |
+
"rca": 0.9278275420809134,
|
32 |
+
"octave_error_rate": 0.0030514814450241165,
|
33 |
+
"gross_error_rate": 0.005335170784526036,
|
34 |
+
"valid_frames": 50795
|
35 |
+
},
|
36 |
+
"smoothness_metrics": {
|
37 |
+
"relative_smoothness": 1.29472430667963,
|
38 |
+
"continuity_breaks": 0.5422515277945902
|
39 |
+
},
|
40 |
+
"combined_score": 0.9503450251143354,
|
41 |
+
"optimal_threshold": 0.9
|
42 |
+
}
|
43 |
+
}
|