PhoenixStormJr commited on
Commit
dc39a2c
·
verified ·
1 Parent(s): dcbb157

Update infer/train-index -v2.py

Browse files
Files changed (1) hide show
  1. infer/train-index -v2.py +44 -44
infer/train-index -v2.py CHANGED
@@ -1,44 +1,44 @@
1
- """
2
- 格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个
3
- """
4
- import faiss, numpy as np, os
5
-
6
- # ###########如果是原始特征要先写save
7
- inp_root = r"./logs/nene/3_feature768"
8
- npys = []
9
- listdir_res = list(os.listdir(inp_root))
10
- for name in sorted(listdir_res):
11
- phone = np.load("%s/%s" % (inp_root, name))
12
- npys.append(phone)
13
- big_npy = np.concatenate(npys, 0)
14
- big_npy_idx = np.arange(big_npy.shape[0])
15
- np.random.shuffle(big_npy_idx)
16
- big_npy = big_npy[big_npy_idx]
17
- print(big_npy.shape) # (6196072, 192)#fp32#4.43G
18
- np.save("infer/big_src_feature_mi.npy", big_npy)
19
-
20
- ##################train+add
21
- # big_npy=np.load("/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/inference_f0/big_src_feature_mi.npy")
22
- n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
23
- index = faiss.index_factory(768, "IVF%s,Flat" % n_ivf) # mi
24
- print("training")
25
- index_ivf = faiss.extract_index_ivf(index) #
26
- index_ivf.nprobe = 1
27
- index.train(big_npy)
28
- faiss.write_index(
29
- index, "infer/trained_IVF%s_Flat_baseline_src_feat_v2.index" % (n_ivf)
30
- )
31
- print("adding")
32
- batch_size_add = 8192
33
- for i in range(0, big_npy.shape[0], batch_size_add):
34
- index.add(big_npy[i : i + batch_size_add])
35
- faiss.write_index(index, "infer/added_IVF%s_Flat_mi_baseline_src_feat.index" % (n_ivf))
36
- """
37
- 大小(都是FP32
38
- big_src_feature 2.95G
39
- (3098036, 256)
40
- big_emb 4.43G
41
- (6196072, 192)
42
- big_emb双倍是因为求特征要repeat后再加pitch
43
-
44
- """
 
1
+ """
2
+ Format: directly use cid as the built-in index position; if aid cannot be put in, use the dictionary to search, there are only 50,000 of them anyway
3
+ """
4
+ import faiss, numpy as np, os
5
+
6
+ # ############ If it is an original feature, you need to write save first
7
+ inp_root = r"./logs/nene/3_feature768"
8
+ npys = []
9
+ listdir_res = list(os.listdir(inp_root))
10
+ for name in sorted(listdir_res):
11
+ phone = np.load("%s/%s" % (inp_root, name))
12
+ npys.append(phone)
13
+ big_npy = np.concatenate(npys, 0)
14
+ big_npy_idx = np.arange(big_npy.shape[0])
15
+ np.random.shuffle(big_npy_idx)
16
+ big_npy = big_npy[big_npy_idx]
17
+ print(big_npy.shape) # (6196072, 192)#fp32#4.43G
18
+ np.save("infer/big_src_feature_mi.npy", big_npy)
19
+
20
+ ##################train+add
21
+ # big_npy=np.load("/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/inference_f0/big_src_feature_mi.npy")
22
+ n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
23
+ index = faiss.index_factory(768, "IVF%s,Flat" % n_ivf) # mi
24
+ print("training")
25
+ index_ivf = faiss.extract_index_ivf(index) #
26
+ index_ivf.nprobe = 1
27
+ index.train(big_npy)
28
+ faiss.write_index(
29
+ index, "infer/trained_IVF%s_Flat_baseline_src_feat_v2.index" % (n_ivf)
30
+ )
31
+ print("adding")
32
+ batch_size_add = 8192
33
+ for i in range(0, big_npy.shape[0], batch_size_add):
34
+ index.add(big_npy[i : i + batch_size_add])
35
+ faiss.write_index(index, "infer/added_IVF%s_Flat_mi_baseline_src_feat.index" % (n_ivf))
36
+ """
37
+ Size (all FP32)
38
+ big_src_feature 2.95G
39
+ (3098036, 256)
40
+ big_emb 4.43G
41
+ (6196072, 192)
42
+ The double of big_emb is because the feature needs to be repeated and then pitched
43
+
44
+ """