PhoenixStormJr commited on
Commit
b26651d
·
verified ·
1 Parent(s): 11a71c7

Update infer_pack/modules/F0Predictor/PMF0Predictor.py

Browse files
infer_pack/modules/F0Predictor/PMF0Predictor.py CHANGED
@@ -1,97 +1,97 @@
1
- from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
2
- import parselmouth
3
- import numpy as np
4
-
5
-
6
- class PMF0Predictor(F0Predictor):
7
- def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100):
8
- self.hop_length = hop_length
9
- self.f0_min = f0_min
10
- self.f0_max = f0_max
11
- self.sampling_rate = sampling_rate
12
-
13
- def interpolate_f0(self, f0):
14
- """
15
- F0进行插值处理
16
- """
17
-
18
- data = np.reshape(f0, (f0.size, 1))
19
-
20
- vuv_vector = np.zeros((data.size, 1), dtype=np.float32)
21
- vuv_vector[data > 0.0] = 1.0
22
- vuv_vector[data <= 0.0] = 0.0
23
-
24
- ip_data = data
25
-
26
- frame_number = data.size
27
- last_value = 0.0
28
- for i in range(frame_number):
29
- if data[i] <= 0.0:
30
- j = i + 1
31
- for j in range(i + 1, frame_number):
32
- if data[j] > 0.0:
33
- break
34
- if j < frame_number - 1:
35
- if last_value > 0.0:
36
- step = (data[j] - data[i - 1]) / float(j - i)
37
- for k in range(i, j):
38
- ip_data[k] = data[i - 1] + step * (k - i + 1)
39
- else:
40
- for k in range(i, j):
41
- ip_data[k] = data[j]
42
- else:
43
- for k in range(i, frame_number):
44
- ip_data[k] = last_value
45
- else:
46
- ip_data[i] = data[i] # 这里可能存在一个没有必要的拷贝
47
- last_value = data[i]
48
-
49
- return ip_data[:, 0], vuv_vector[:, 0]
50
-
51
- def compute_f0(self, wav, p_len=None):
52
- x = wav
53
- if p_len is None:
54
- p_len = x.shape[0] // self.hop_length
55
- else:
56
- assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error"
57
- time_step = self.hop_length / self.sampling_rate * 1000
58
- f0 = (
59
- parselmouth.Sound(x, self.sampling_rate)
60
- .to_pitch_ac(
61
- time_step=time_step / 1000,
62
- voicing_threshold=0.6,
63
- pitch_floor=self.f0_min,
64
- pitch_ceiling=self.f0_max,
65
- )
66
- .selected_array["frequency"]
67
- )
68
-
69
- pad_size = (p_len - len(f0) + 1) // 2
70
- if pad_size > 0 or p_len - len(f0) - pad_size > 0:
71
- f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
72
- f0, uv = self.interpolate_f0(f0)
73
- return f0
74
-
75
- def compute_f0_uv(self, wav, p_len=None):
76
- x = wav
77
- if p_len is None:
78
- p_len = x.shape[0] // self.hop_length
79
- else:
80
- assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error"
81
- time_step = self.hop_length / self.sampling_rate * 1000
82
- f0 = (
83
- parselmouth.Sound(x, self.sampling_rate)
84
- .to_pitch_ac(
85
- time_step=time_step / 1000,
86
- voicing_threshold=0.6,
87
- pitch_floor=self.f0_min,
88
- pitch_ceiling=self.f0_max,
89
- )
90
- .selected_array["frequency"]
91
- )
92
-
93
- pad_size = (p_len - len(f0) + 1) // 2
94
- if pad_size > 0 or p_len - len(f0) - pad_size > 0:
95
- f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
96
- f0, uv = self.interpolate_f0(f0)
97
- return f0, uv
 
1
+ from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
2
+ import parselmouth
3
+ import numpy as np
4
+
5
+
6
+ class PMF0Predictor(F0Predictor):
7
+ def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100):
8
+ self.hop_length = hop_length
9
+ self.f0_min = f0_min
10
+ self.f0_max = f0_max
11
+ self.sampling_rate = sampling_rate
12
+
13
+ def interpolate_f0(self, f0):
14
+ """
15
+ Interpolate F0
16
+ """
17
+
18
+ data = np.reshape(f0, (f0.size, 1))
19
+
20
+ vuv_vector = np.zeros((data.size, 1), dtype=np.float32)
21
+ vuv_vector[data > 0.0] = 1.0
22
+ vuv_vector[data <= 0.0] = 0.0
23
+
24
+ ip_data = data
25
+
26
+ frame_number = data.size
27
+ last_value = 0.0
28
+ for i in range(frame_number):
29
+ if data[i] <= 0.0:
30
+ j = i + 1
31
+ for j in range(i + 1, frame_number):
32
+ if data[j] > 0.0:
33
+ break
34
+ if j < frame_number - 1:
35
+ if last_value > 0.0:
36
+ step = (data[j] - data[i - 1]) / float(j - i)
37
+ for k in range(i, j):
38
+ ip_data[k] = data[i - 1] + step * (k - i + 1)
39
+ else:
40
+ for k in range(i, j):
41
+ ip_data[k] = data[j]
42
+ else:
43
+ for k in range(i, frame_number):
44
+ ip_data[k] = last_value
45
+ else:
46
+ ip_data[i] = data[i] # There may be an unnecessary copy here
47
+ last_value = data[i]
48
+
49
+ return ip_data[:, 0], vuv_vector[:, 0]
50
+
51
+ def compute_f0(self, wav, p_len=None):
52
+ x = wav
53
+ if p_len is None:
54
+ p_len = x.shape[0] // self.hop_length
55
+ else:
56
+ assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error"
57
+ time_step = self.hop_length / self.sampling_rate * 1000
58
+ f0 = (
59
+ parselmouth.Sound(x, self.sampling_rate)
60
+ .to_pitch_ac(
61
+ time_step=time_step / 1000,
62
+ voicing_threshold=0.6,
63
+ pitch_floor=self.f0_min,
64
+ pitch_ceiling=self.f0_max,
65
+ )
66
+ .selected_array["frequency"]
67
+ )
68
+
69
+ pad_size = (p_len - len(f0) + 1) // 2
70
+ if pad_size > 0 or p_len - len(f0) - pad_size > 0:
71
+ f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
72
+ f0, uv = self.interpolate_f0(f0)
73
+ return f0
74
+
75
+ def compute_f0_uv(self, wav, p_len=None):
76
+ x = wav
77
+ if p_len is None:
78
+ p_len = x.shape[0] // self.hop_length
79
+ else:
80
+ assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error"
81
+ time_step = self.hop_length / self.sampling_rate * 1000
82
+ f0 = (
83
+ parselmouth.Sound(x, self.sampling_rate)
84
+ .to_pitch_ac(
85
+ time_step=time_step / 1000,
86
+ voicing_threshold=0.6,
87
+ pitch_floor=self.f0_min,
88
+ pitch_ceiling=self.f0_max,
89
+ )
90
+ .selected_array["frequency"]
91
+ )
92
+
93
+ pad_size = (p_len - len(f0) + 1) // 2
94
+ if pad_size > 0 or p_len - len(f0) - pad_size > 0:
95
+ f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
96
+ f0, uv = self.interpolate_f0(f0)
97
+ return f0, uv