Spaces:

alibabasglab
/

ClearVoice

Running on Zero

App Files Files Community

alibabasglab commited on Oct 16, 2024

Commit

8e8cd3e

verified ·

1 Parent(s): af5b0c7

Upload 161 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

__pycache__/clearvoice.cpython-37.pyc +0 -0
__pycache__/clearvoice.cpython-38.pyc +0 -0
__pycache__/network_wrapper.cpython-37.pyc +0 -0
__pycache__/network_wrapper.cpython-38.pyc +0 -0
__pycache__/networks.cpython-38.pyc +0 -0
checkpoints/AV_MossFormer2_TSE_16K/config.yaml +55 -0
checkpoints/AV_MossFormer2_TSE_16K/last_best_checkpoint +1 -0
checkpoints/AV_MossFormer2_TSE_16K/last_best_checkpoint_enhance.pt +3 -0
checkpoints/AV_MossFormer2_TSE_16K/last_best_checkpoint_separate.pt +3 -0
checkpoints/AV_MossFormer2_TSE_16K/last_best_checkpoint_tmp.pt +3 -0
checkpoints/FRCRN_SE_16K/config.yaml +33 -0
checkpoints/FRCRN_SE_16K/last_best_checkpoint +1 -0
checkpoints/FRCRN_SE_16K/last_checkpoint +1 -0
checkpoints/FRCRN_SE_16K/model.ckpt-88-8491630.pt +3 -0
clearvoice.py +62 -0
config/inference/AV_MossFormer2_TSE_16K.yaml +41 -0
config/inference/FRCRN_SE_16K.yaml +20 -0
config/inference/MossFormer2_SE_48K.yaml +22 -0
config/inference/MossFormer2_SS_16K.yaml +21 -0
config/inference/MossFormerGAN_SE_16K.yaml +22 -0
config/inference/SpEx_plus_TSE_8K.yaml +18 -0
dataloader/__pycache__/dataloader.cpython-38.pyc +0 -0
dataloader/__pycache__/misc.cpython-38.pyc +0 -0
dataloader/dataloader.py +496 -0
dataloader/misc.py +84 -0
demo.py +70 -0
demo_with_detailed_comments.py +61 -0
input.wav +0 -0
models/.DS_Store +0 -0
models/__pycache__/__init__.cpython-36.pyc +0 -0
models/__pycache__/__init__.cpython-37.pyc +0 -0
models/__pycache__/__init__.cpython-38.pyc +0 -0
models/__pycache__/complex_nn.cpython-36.pyc +0 -0
models/__pycache__/complex_nn.cpython-37.pyc +0 -0
models/__pycache__/complex_nn.cpython-38.pyc +0 -0
models/__pycache__/constant.cpython-36.pyc +0 -0
models/__pycache__/constant.cpython-37.pyc +0 -0
models/__pycache__/constant.cpython-38.pyc +0 -0
models/__pycache__/conv_stft.cpython-38.pyc +0 -0
models/__pycache__/criterion.cpython-36.pyc +0 -0
models/__pycache__/criterion.cpython-37.pyc +0 -0
models/__pycache__/criterion.cpython-38.pyc +0 -0
models/__pycache__/frcrn.cpython-38.pyc +0 -0
models/__pycache__/metric.cpython-36.pyc +0 -0
models/__pycache__/noisedataset.cpython-36.pyc +0 -0
models/__pycache__/noisedataset.cpython-37.pyc +0 -0
models/__pycache__/noisedataset.cpython-38.pyc +0 -0
models/__pycache__/phasen_dccrn.cpython-36.pyc +0 -0
models/__pycache__/phasen_dccrn.cpython-37.pyc +0 -0
models/__pycache__/phasen_dccrn.cpython-38.pyc +0 -0

__pycache__/clearvoice.cpython-37.pyc ADDED Viewed

Binary file (2.13 kB). View file

__pycache__/clearvoice.cpython-38.pyc ADDED Viewed

Binary file (2.14 kB). View file

__pycache__/network_wrapper.cpython-37.pyc ADDED Viewed

Binary file (8.18 kB). View file

__pycache__/network_wrapper.cpython-38.pyc ADDED Viewed

Binary file (7.39 kB). View file

__pycache__/networks.cpython-38.pyc ADDED Viewed

Binary file (13.2 kB). View file

checkpoints/AV_MossFormer2_TSE_16K/config.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+## Config file
+# Log
+seed: 777
+use_cuda: 1           # 1 for True, 0 for False
+# dataset
+speaker_no: 2
+mix_lst_path: ./data/allData/voxceleb2/mixture_data_list_2mix_pretrain.csv
+audio_direc: /mnt/nas_sg/wulanchabu/zexu.pan/datasets/voxceleb2/audio_clean
+reference_direc: /mnt/nas_sg/wulanchabu/zexu.pan/datasets/ # not used
+audio_sr: 16000
+ref_sr: 25
+# dataloader
+num_workers: 4
+batch_size: 2         # 4-GPU training with a total effective batch size of 8
+accu_grad: 0
+effec_batch_size: 4   # per GPU, only used if accu_grad is set to 1, must be multiple times of batch size
+max_length: 5         # truncate the utterances in dataloader, in seconds
+# network settings
+init_from: checkpoints/log_2024-09-30(09:49:14)       # 'None' or a log name 'log_2024-07-22(18:12:13)'
+causal: 0             # 1 for True, 0 for False
+network_reference:
+  cue: lip            # lip or speech or gesture or EEG
+  backbone: resnet18  # resnet18 or shufflenetV2 or blazenet64
+  emb_size: 256       # resnet18:256
+network_audio:
+  backbone: mossformer2
+  encoder_kernel_size: 16
+  encoder_out_nchannels: 512
+  encoder_in_nchannels: 1
+  masknet_numspks: 1
+  masknet_chunksize: 250
+  masknet_numlayers: 1
+  masknet_norm: "ln"
+  masknet_useextralinearlayer: False
+  masknet_extraskipconnection: True
+  intra_numlayers: 24
+  intra_nhead: 8
+  intra_dffn: 1024
+  intra_dropout: 0
+  intra_use_positional: True
+  intra_norm_before: True
+# optimizer
+loss_type: hybrid      # "snr", "sisdr", "hybrid"
+init_learning_rate: 0.00015
+max_epoch: 150
+clip_grad_norm: 5

checkpoints/AV_MossFormer2_TSE_16K/last_best_checkpoint ADDED Viewed

	@@ -0,0 +1 @@


1	+ last_best_checkpoint_tmp.pt

checkpoints/AV_MossFormer2_TSE_16K/last_best_checkpoint_enhance.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6b416073f66c7a9faa84ad8088bf4ae69c946f6c2ea3db2e7c6ead1a1fca088
+size 134

checkpoints/AV_MossFormer2_TSE_16K/last_best_checkpoint_separate.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb45b197224686bbe11c3898b32b9da84533572bb93751f75586087fda43193b
+size 134

checkpoints/AV_MossFormer2_TSE_16K/last_best_checkpoint_tmp.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:981fe2b4a3e912e10919a41e674606870b61b8cb00e4f15ca97984b0144fc61a
+size 134

checkpoints/FRCRN_SE_16K/config.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+#!/bin/bash
+mode: 'train'
+use_cuda: 1 # 1 for True, 0 for False
+sampling_rate: 16000
+network: "FRCRN_SE_16K"  ##network type
+## FFT Parameters
+win_type: hanning
+win_len: 640
+win_inc: 320
+fft_len: 640
+# Train
+#tr_list: 'datasets/tr_tts_16k_noise_0to10db_p13_p20.lst_dur'
+tr_list: 'data/cv_webrtc_test_set_20200521_16k.lst'
+cv_list: 'data/cv_webrtc_test_set_20200521_16k.lst'
+init_learning_rate: 0.001 #learning rate for a new training
+finetune_learning_rate: 0.0001 #learning rate for a finetune training
+max_epoch: 100
+weight_decay: 0.00001
+clip_grad_norm: 10.
+# Log
+seed: 777
+# # dataset
+num_workers: 4
+batch_size: 4
+accu_grad: 1  # accumulate multiple batch sizes for one back-propagation updating
+effec_batch_size: 12   # per GPU, only used if accu_grad is set to 1, must be multiple times of batch size
+max_length: 1         # truncate the utterances in dataloader, in seconds

checkpoints/FRCRN_SE_16K/last_best_checkpoint ADDED Viewed

	@@ -0,0 +1 @@


1	+ model.ckpt-88-8491630.pt

checkpoints/FRCRN_SE_16K/last_checkpoint ADDED Viewed

	@@ -0,0 +1 @@


1	+ model.ckpt-88-8491630.pt

checkpoints/FRCRN_SE_16K/model.ckpt-88-8491630.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b22256adbb91b68cf5a3db8f6657a4fb17066eecd5f069803e59c186c1cf3ebb
+size 161053751

clearvoice.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from network_wrapper import network_wrapper
+class ClearVoice:
+    """ The main class inferface to the end users for performing speech processing
+        this class provides the desired model to perform the given task
+    """
+    def __init__(self, task, model_names):
+        """ Load the desired models for the specified task. Perform all the given models and return all results.
+        Parameters:
+        ----------
+        task: str
+            the task matching any of the provided tasks:
+            'speech_enhancement'
+            'speech_separation'
+            'target_speaker_extraction'
+        model_names: str or list of str
+            the model names matching any of the provided models:
+            'FRCRN_SE_16K'
+            'MossFormer2_SE_48K'
+            'MossFormerGAN_SE_16K'
+            'MossFormer2_SS_16K'
+            'AV_MossFormer2_TSE_16K'
+        Returns:
+        --------
+        A ModelsList object, that can be run to get the desired results
+        """
+        self.network_wrapper = network_wrapper()
+        self.models = []
+        for model_name in model_names:
+            model = self.network_wrapper(task, model_name)
+            self.models += [model]
+    def __call__(self, input_path, online_write=False, output_path=None):
+        results = {}
+        for model in self.models:
+            result = model.process(input_path, online_write, output_path)
+            if not online_write:
+                results[model.name] = result
+        if not online_write:
+            if len(results) == 1:
+                return results[model.name]
+            else:
+                return results
+    def write(self, results, output_path):
+        add_subdir = False
+        use_key = False
+        if len(self.models) > 1: add_subdir = True #multi_model is True
+        for model in self.models:
+            if isinstance(results, dict):
+                if model.name in results:
+                   if len(results[model.name]) > 1: use_key = True
+                else:
+                   if len(results) > 1: use_key = True #multi_input is True
+            break
+        for model in self.models:
+            model.write(output_path, add_subdir, use_key)

config/inference/AV_MossFormer2_TSE_16K.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+#!/bin/bash
+mode: 'inference'
+use_cuda: 1 # 1 for True, 0 for False
+num_gpu: 1
+sampling_rate: 16000
+network: "AV_MossFormer2_TSE_16K"  # network type
+checkpoint_dir: "checkpoints/AV_MossFormer2_TSE_16K"
+input_path: "scp/video_samples.scp"  # an input dir or input scp file
+output_dir: "path_to_output_videos_tse" # output dir to store processed audio
+# decode parameters
+one_time_decode_length: 3 # maximum segment length for one-pass decoding (seconds), longer audio (>5s) will use segmented decoding
+decode_window: 3 # one-pass decoding length
+# Model-specific settings for target speaker extraction
+network_reference:
+  cue: lip
+  backbone: resnet18
+  emb_size: 256
+network_audio:
+  backbone: mossformer2
+  encoder_kernel_size: 16
+  encoder_out_nchannels: 512
+  encoder_in_nchannels: 1
+  masknet_numspks: 1
+  masknet_chunksize: 250
+  masknet_numlayers: 1
+  masknet_norm: "ln"
+  masknet_useextralinearlayer: False
+  masknet_extraskipconnection: True
+  intra_numlayers: 24
+  intra_nhead: 8
+  intra_dffn: 1024
+  intra_dropout: 0
+  intra_use_positional: True
+  intra_norm_before: True

config/inference/FRCRN_SE_16K.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+#!/bin/bash
+mode: 'inference'
+use_cuda: 1 # 1 for True, 0 for False
+num_gpu: 1
+sampling_rate: 16000
+network: "FRCRN_SE_16K"  ##network type
+checkpoint_dir: "checkpoints/FRCRN_SE_16K"
+#input_path: "data/cv_webrtc_test_set_20200521_16k.scp"  # an input dir or input scp file
+input_path: "/home/shengkui.zhao/DingTalk_NS/data/webrtc_test_set_20200521_16k/noisy"
+output_dir: "outputs/FRCRN_SE_16K" ## output dir to store processed audio
+# decode parameters
+one_time_decode_length: 120 #maximum segment length for one-pass decoding (seconds), longer audio  will use segmented decoding
+decode_window: 1 #one-pass decoding length
+#
+# FFT parameters
+win_type: 'hanning'
+win_len: 640
+win_inc: 320
+fft_len: 640

config/inference/MossFormer2_SE_48K.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/bin/bash
+mode: 'inference'
+use_cuda: 1 # 1 for True, 0 for False
+num_gpu: 1
+sampling_rate: 48000
+network: "MossFormer2_SE_48K"  ##network type
+checkpoint_dir: "checkpoints/MossFormer2_SE_48K"
+#input_path: support wav dir or wav scp or a wav file
+input_path: "/mnt/nas/mit_sg/shengkui.zhao/DNS-Challenge/datasets/test_set/synthetic/no_reverb/noisy"
+output_dir: "outputs/MossFormer2_SE_48K_dns_2020_noreverb"
+# decode parameters
+one_time_decode_length: 20 #maximum segment length for one-pass decoding (seconds), longer audio  will use segmented decoding
+decode_window: 4 #one-pass decoding length
+# FFT parameters
+win_type: 'hamming'
+win_len: 1920
+win_inc: 384
+fft_len: 1920
+num_mels: 60

config/inference/MossFormer2_SS_16K.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/bin/bash
+mode: 'inference'
+use_cuda: 1 # 1 for True, 0 for False
+num_gpu: 1
+sampling_rate: 16000
+network: "MossFormer2_SS_16K"  ##network type
+checkpoint_dir: "checkpoints/MossFormer2_SS_16K"
+input_path: "data/wsj0_2mix_16k_fullpath.lst"  # an input dir or input scp file
+#input_path: "/home/shengkui.zhao/DingTalk_NS/data/webrtc_test_set_20200521_16k/noisy"
+#input_path: "/mnt/nas_sg/mit_sg/shengkui.zhao/ComplexNN/audio/youtube_testset_16k/noisy_long/noisy"
+output_dir: "outputs/MossFormer2_SS_16K_wsj0_2mix" ## output dir to store processed audio
+# decode parameters
+one_time_decode_length: 30 #maximum segment length for one-pass decoding (seconds), longer audio (>3s) will use segmented decoding
+decode_window: 10 #one-pass decoding length
+num_spks: 2
+encoder_kernel_size: 16
+encoder_embedding_dim: 512
+mossformer_sequence_dim: 512
+num_mossformer_layer: 24

config/inference/MossFormerGAN_SE_16K.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/bin/bash
+mode: 'inference'
+use_cuda: 1 # 1 for True, 0 for False
+num_gpu: 1
+sampling_rate: 16000
+network: "MossFormerGAN_SE_16K"  ##network type
+checkpoint_dir: "checkpoints/MossFormerGAN_SE_16K"
+#input_path: "data/cv_webrtc_test_set_20200521_16k.scp"  # an input dir or input scp file
+#input_path: "/home/shengkui.zhao/DingTalk_NS/data/webrtc_test_set_20200521_16k/noisy"
+input_path: "/mnt/nas_sg/mit_sg/shengkui.zhao/ComplexNN/audio/youtube_testset_16k/noisy_long/noisy"
+output_dir: "outputs/MossFormerGAN_SE_16K" ## output dir to store processed audio
+# decode parameters
+one_time_decode_length: 10 #maximum segment length for one-pass decoding (seconds), longer audio  will use segmented decoding
+decode_window: 10 #one-pass decoding length
+# FFT parameters
+win_type: 'hamming'
+win_len: 400
+win_inc: 100
+fft_len: 400

config/inference/SpEx_plus_TSE_8K.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/bin/bash
+mode: 'inference'
+use_cuda: 1 # 1 for True, 0 for False
+num_gpu: 1
+sampling_rate: 8000
+network: "SpEx_plus_TSE_8K"  ##network type
+checkpoint_dir: "checkpoints/SpEx_plus_TSE_8K"
+input_path: "data/wsj0_2mix_16k_fullpath.lst"  # an input dir or input scp file
+#input_path: "/home/shengkui.zhao/DingTalk_NS/data/webrtc_test_set_20200521_16k/noisy"
+#input_path: "/mnt/nas_sg/mit_sg/shengkui.zhao/ComplexNN/audio/youtube_testset_16k/noisy_long/noisy"
+output_dir: "outputs/MossFormer2_SS_16K_wsj0_2mix" ## output dir to store processed audio
+# decode parameters
+one_time_decode_length: 5 #maximum segment length for one-pass decoding (seconds), longer audio (>3s) will use segmented decoding
+decode_window: 1 #one-pass decoding length

dataloader/__pycache__/dataloader.cpython-38.pyc ADDED Viewed

Binary file (14.2 kB). View file

dataloader/__pycache__/misc.cpython-38.pyc ADDED Viewed

Binary file (2.15 kB). View file

dataloader/dataloader.py ADDED Viewed

	@@ -0,0 +1,496 @@

+import numpy as np
+import math, os, csv
+import torchaudio
+import torch
+import torch.nn as nn
+import torch.utils.data as data
+import torch.distributed as dist
+import soundfile as sf
+from torch.utils.data import Dataset
+import torch.utils.data as data
+import os
+import sys
+sys.path.append(os.path.dirname(__file__))
+from dataloader.misc import read_and_config_file
+import librosa
+import random
+EPS = 1e-6
+MAX_WAV_VALUE = 32768.0
+def audioread(path, sampling_rate):
+    """
+    Reads an audio file from the specified path, normalizes the audio,
+    resamples it to the desired sampling rate (if necessary), and ensures it is single-channel.
+    Parameters:
+    path (str): The file path of the audio file to be read.
+    sampling_rate (int): The target sampling rate for the audio.
+    Returns:
+    numpy.ndarray: The processed audio data, normalized, resampled (if necessary),
+                   and converted to mono (if the input audio has multiple channels).
+    """
+    # Read audio data and its sample rate from the file.
+    data, fs = sf.read(path)
+    # Normalize the audio data.
+    data = audio_norm(data)
+    # Resample the audio if the sample rate is different from the target sampling rate.
+    if fs != sampling_rate:
+        data = librosa.resample(data, orig_sr=fs, target_sr=sampling_rate)
+    # Convert to mono by selecting the first channel if the audio has multiple channels.
+    if len(data.shape) > 1:
+        data = data[:, 0]
+    # Return the processed audio data.
+    return data
+def audio_norm(x):
+    """
+    Normalizes the input audio signal to a target Root Mean Square (RMS) level,
+    applying two stages of scaling. This ensures the audio signal is neither too quiet
+    nor too loud, keeping its amplitude consistent.
+    Parameters:
+    x (numpy.ndarray): Input audio signal to be normalized.
+    Returns:
+    numpy.ndarray: Normalized audio signal.
+    """
+    # Compute the root mean square (RMS) of the input audio signal.
+    rms = (x ** 2).mean() ** 0.5
+    # Calculate the scalar to adjust the signal to the target level (-25 dB).
+    scalar = 10 ** (-25 / 20) / (rms + EPS)
+    # Scale the input audio by the computed scalar.
+    x = x * scalar
+    # Compute the power of the scaled audio signal.
+    pow_x = x ** 2
+    # Calculate the average power of the audio signal.
+    avg_pow_x = pow_x.mean()
+    # Compute RMS only for audio segments with higher-than-average power.
+    rmsx = pow_x[pow_x > avg_pow_x].mean() ** 0.5
+    # Calculate another scalar to further normalize based on higher-power segments.
+    scalarx = 10 ** (-25 / 20) / (rmsx + EPS)
+    # Apply the second scalar to the audio.
+    x = x * scalarx
+    # Return the doubly normalized audio signal.
+    return x
+class DataReader(object):
+    """
+    A class for reading audio data from a list of files, normalizing it,
+    and extracting features for further processing. It supports extracting
+    features from each file, reshaping the data, and returning metadata
+    like utterance ID and data length.
+    Parameters:
+    args: Arguments containing the input path and target sampling rate.
+    Attributes:
+    file_list (list): A list of audio file paths to process.
+    sampling_rate (int): The target sampling rate for audio files.
+    """
+    def __init__(self, args):
+        # Read and configure the file list from the input path provided in the arguments.
+        # The file list is decoded, if necessary.
+        self.file_list = read_and_config_file(args, args.input_path, decode=True)
+        # Store the target sampling rate.
+        self.sampling_rate = args.sampling_rate
+        # Store the args file
+        self.args = args
+    def __len__(self):
+        """
+        Returns the number of audio files in the file list.
+        Returns:
+        int: Number of files to process.
+        """
+        return len(self.file_list)
+    def __getitem__(self, index):
+        """
+        Retrieves the features of the audio file at the given index.
+        Parameters:
+        index (int): Index of the file in the file list.
+        Returns:
+        tuple: Features (inputs, utterance ID, data length) for the selected audio file.
+        """
+        if self.args.task == 'target_speaker_extraction':
+            if self.args.network_reference.cue== 'lip':
+                return self.file_list[index]
+        return self.extract_feature(self.file_list[index])
+    def extract_feature(self, path):
+        """
+        Extracts features from the given audio file path.
+        Parameters:
+        path (str): The file path of the audio file.
+        Returns:
+        inputs (numpy.ndarray): Reshaped audio data for further processing.
+        utt_id (str): The unique identifier of the audio file, usually the filename.
+        length (int): The length of the original audio data.
+        """
+        # Extract the utterance ID from the file path (usually the filename).
+        utt_id = path.split('/')[-1]
+        # Read and normalize the audio data, converting it to float32 for processing.
+        data = audioread(path, self.sampling_rate).astype(np.float32)
+        # Reshape the data to ensure it's in the format [1, data_length].
+        inputs = np.reshape(data, [1, data.shape[0]])
+        # Return the reshaped audio data, utterance ID, and the length of the original data.
+        return inputs, utt_id, data.shape[0]
+class Wave_Processor(object):
+    """
+    A class for processing audio data, specifically for reading input and label audio files,
+    segmenting them into fixed-length segments, and applying padding or trimming as necessary.
+    Methods:
+    process(path, segment_length, sampling_rate):
+        Processes audio data by reading, padding, or segmenting it to match the specified segment length.
+    Parameters:
+    path (dict): A dictionary containing file paths for 'inputs' and 'labels' audio files.
+    segment_length (int): The desired length of audio segments to extract.
+    sampling_rate (int): The target sampling rate for reading the audio files.
+    """
+    def process(self, path, segment_length, sampling_rate):
+        """
+        Reads input and label audio files, and ensures the audio is segmented into
+        the desired length, padding if necessary or extracting random segments if
+        the audio is longer than the target segment length.
+        Parameters:
+        path (dict): Dictionary containing the paths to 'inputs' and 'labels' audio files.
+        segment_length (int): Desired length of the audio segment in samples.
+        sampling_rate (int): Target sample rate for the audio.
+        Returns:
+        tuple: A pair of numpy arrays representing the processed input and label audio,
+               either padded to the segment length or trimmed.
+        """
+        # Read the input and label audio files using the target sampling rate.
+        wave_inputs = audioread(path['inputs'], sampling_rate)
+        wave_labels = audioread(path['labels'], sampling_rate)
+        # Get the length of the label audio (assumed both inputs and labels have similar lengths).
+        len_wav = wave_labels.shape[0]
+        # If the input audio is shorter than the desired segment length, pad it with zeros.
+        if wave_inputs.shape[0] < segment_length:
+            # Create zero-padded arrays for inputs and labels.
+            padded_inputs = np.zeros(segment_length, dtype=np.float32)
+            padded_labels = np.zeros(segment_length, dtype=np.float32)
+            # Copy the original audio into the padded arrays.
+            padded_inputs[:wave_inputs.shape[0]] = wave_inputs
+            padded_labels[:wave_labels.shape[0]] = wave_labels
+        else:
+            # Randomly select a start index for segmenting the audio if it's longer than the segment length.
+            st_idx = random.randint(0, len_wav - segment_length)
+            # Extract a segment of the desired length from the inputs and labels.
+            padded_inputs = wave_inputs[st_idx:st_idx + segment_length]
+            padded_labels = wave_labels[st_idx:st_idx + segment_length]
+        # Return the processed (padded or segmented) input and label audio.
+        return padded_inputs, padded_labels
+class Fbank_Processor(object):
+    """
+    A class for processing input audio data into mel-filterbank (Fbank) features,
+    including the computation of delta and delta-delta features.
+    Methods:
+    process(inputs, args):
+        Processes the raw audio input and returns the mel-filterbank features
+        along with delta and delta-delta features.
+    """
+    def process(self, inputs, args):
+        # Convert frame length and shift from seconds to milliseconds.
+        frame_length = int(args.win_len / args.sampling_rate * 1000)
+        frame_shift = int(args.win_inc / args.sampling_rate * 1000)
+        # Set up configuration for the mel-filterbank computation.
+        fbank_config = {
+            "dither": 1.0,
+            "frame_length": frame_length,
+            "frame_shift": frame_shift,
+            "num_mel_bins": args.num_mels,
+            "sample_frequency": args.sampling_rate,
+            "window_type": args.win_type
+        }
+        # Convert the input audio to a FloatTensor and scale it to match the expected input range.
+        inputs = torch.FloatTensor(inputs * MAX_WAV_VALUE)
+        # Compute the mel-filterbank features using Kaldi's fbank function.
+        fbank = torchaudio.compliance.kaldi.fbank(inputs.unsqueeze(0), **fbank_config)
+        # Add delta and delta-delta features.
+        fbank_tr = torch.transpose(fbank, 0, 1)
+        fbank_delta = torchaudio.functional.compute_deltas(fbank_tr)
+        fbank_delta_delta = torchaudio.functional.compute_deltas(fbank_delta)
+        fbank_delta = torch.transpose(fbank_delta, 0, 1)
+        fbank_delta_delta = torch.transpose(fbank_delta_delta, 0, 1)
+        # Concatenate the original Fbank, delta, and delta-delta features.
+        fbanks = torch.cat([fbank, fbank_delta, fbank_delta_delta], dim=1)
+        return fbanks.numpy()
+class AudioDataset(Dataset):
+    """
+    A dataset class for loading and processing audio data from different data types
+    (train, validation, test). Supports audio processing and feature extraction
+    (e.g., waveform processing, Fbank feature extraction).
+    Parameters:
+    args: Arguments containing dataset configuration (paths, sampling rate, etc.).
+    data_type (str): The type of data to load (train, val, test).
+    """
+    def __init__(self, args, data_type):
+        self.args = args
+        self.sampling_rate = args.sampling_rate
+        # Read the list of audio files based on the data type.
+        if data_type == 'train':
+            self.wav_list = read_and_config_file(args.tr_list)
+        elif data_type == 'val':
+            self.wav_list = read_and_config_file(args.cv_list)
+        elif data_type == 'test':
+            self.wav_list = read_and_config_file(args.tt_list)
+        else:
+            print(f'Data type: {data_type} is unknown!')
+        # Initialize processors for waveform and Fbank features.
+        self.wav_processor = Wave_Processor()
+        self.fbank_processor = Fbank_Processor()
+        # Clip data to a fixed segment length based on the sampling rate and max length.
+        self.segment_length = self.sampling_rate * self.args.max_length
+        print(f'No. {data_type} files: {len(self.wav_list)}')
+    def __len__(self):
+        # Return the number of audio files in the dataset.
+        return len(self.wav_list)
+    def __getitem__(self, index):
+        # Get the input and label paths from the list.
+        data_info = self.wav_list[index]
+        # Process the waveform inputs and labels.
+        inputs, labels = self.wav_processor.process(
+            {'inputs': data_info['inputs'], 'labels': data_info['labels']},
+            self.segment_length,
+            self.sampling_rate
+        )
+        # Optionally load Fbank features if specified.
+        if self.args.load_fbank is not None:
+            fbanks = self.fbank_processor.process(inputs, self.args)
+            return inputs * MAX_WAV_VALUE, labels * MAX_WAV_VALUE, fbanks
+        return inputs, labels
+def zero_pad_concat(self, inputs):
+    """
+    Concatenates a list of input arrays, applying zero-padding as needed to ensure
+    they all match the length of the longest input.
+    Parameters:
+    inputs (list of numpy arrays): List of input arrays to be concatenated.
+    Returns:
+    numpy.ndarray: A zero-padded array with concatenated inputs.
+    """
+    # Get the maximum length among all inputs.
+    max_t = max(inp.shape[0] for inp in inputs)
+    # Determine the shape of the output based on the input dimensions.
+    shape = None
+    if len(inputs[0].shape) == 1:
+        shape = (len(inputs), max_t)
+    elif len(inputs[0].shape) == 2:
+        shape = (len(inputs), max_t, inputs[0].shape[1])
+    # Initialize an array with zeros to hold the concatenated inputs.
+    input_mat = np.zeros(shape, dtype=np.float32)
+    # Copy the input data into the zero-padded array.
+    for e, inp in enumerate(inputs):
+        if len(inp.shape) == 1:
+            input_mat[e, :inp.shape[0]] = inp
+        elif len(inp.shape) == 2:
+            input_mat[e, :inp.shape[0], :] = inp
+    return input_mat
+def collate_fn_2x_wavs(data):
+    """
+    A custom collate function for combining batches of waveform input and label pairs.
+    Parameters:
+    data (list): List of tuples (inputs, labels).
+    Returns:
+    tuple: Batched inputs and labels as torch.FloatTensors.
+    """
+    inputs, labels = zip(*data)
+    x = torch.FloatTensor(inputs)
+    y = torch.FloatTensor(labels)
+    return x, y
+def collate_fn_2x_wavs_fbank(data):
+    """
+    A custom collate function for combining batches of waveform inputs, labels, and Fbank features.
+    Parameters:
+    data (list): List of tuples (inputs, labels, fbanks).
+    Returns:
+    tuple: Batched inputs, labels, and Fbank features as torch.FloatTensors.
+    """
+    inputs, labels, fbanks = zip(*data)
+    x = torch.FloatTensor(inputs)
+    y = torch.FloatTensor(labels)
+    z = torch.FloatTensor(fbanks)
+    return x, y, z
+class DistributedSampler(data.Sampler):
+    """
+    Sampler for distributed training. Divides the dataset among multiple replicas (processes),
+    ensuring that each process gets a unique subset of the data. It also supports shuffling
+    and managing epochs.
+    Parameters:
+    dataset (Dataset): The dataset to sample from.
+    num_replicas (int): Number of processes participating in the training.
+    rank (int): Rank of the current process.
+    shuffle (bool): Whether to shuffle the data or not.
+    seed (int): Random seed for reproducibility.
+    """
+    def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, seed=0):
+        if num_replicas is None:
+            if not dist.is_available():
+                raise RuntimeError("Requires distributed package to be available")
+            num_replicas = dist.get_world_size()
+        if rank is None:
+            if not dist.is_available():
+                raise RuntimeError("Requires distributed package to be available")
+            rank = dist.get_rank()
+        self.dataset = dataset
+        self.num_replicas = num_replicas
+        self.rank = rank
+        self.epoch = 0
+        self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
+        self.total_size = self.num_samples * self.num_replicas
+        self.shuffle = shuffle
+        self.seed = seed
+    def __iter__(self):
+        # Shuffle the indices based on the epoch and seed.
+        if self.shuffle:
+            g = torch.Generator()
+            g.manual_seed(self.seed + self.epoch)
+            ind = torch.randperm(int(len(self.dataset) / self.num_replicas), generator=g) * self.num_replicas
+            indices = []
+            for i in range(self.num_replicas):
+                indices = indices + (ind + i).tolist()
+        else:
+            indices = list(range(len(self.dataset)))
+        # Add extra samples to make the dataset evenly divisible.
+        indices += indices[:(self.total_size - len(indices))]
+        assert len(indices) == self.total_size
+        # Subsample for the current process.
+        indices = indices[self.rank * self.num_samples:(self.rank + 1) * self.num_samples]
+        assert len(indices) == self.num_samples
+        return iter(indices)
+    def __len__(self):
+        return self.num_samples
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+def get_dataloader(args, data_type):
+    """
+    Creates and returns a data loader and sampler for the specified dataset type (train, validation, or test).
+    Parameters:
+    args (Namespace): Configuration arguments containing details such as batch size, sampling rate,
+                      network type, and whether distributed training is used.
+    data_type (str): The type of dataset to load ('train', 'val', 'test').
+    Returns:
+    sampler (DistributedSampler or None): The sampler for distributed training, or None if not used.
+    generator (DataLoader): The PyTorch DataLoader for the specified dataset.
+    """
+    # Initialize the dataset based on the given arguments and dataset type (train, val, or test).
+    datasets = AudioDataset(args=args, data_type=data_type)
+    # Create a distributed sampler if distributed training is enabled; otherwise, use no sampler.
+    sampler = DistributedSampler(
+        datasets,
+        num_replicas=args.world_size,  # Number of replicas in distributed training.
+        rank=args.local_rank  # Rank of the current process.
+    ) if args.distributed else None
+    # Select the appropriate collate function based on the network type.
+    if args.network == 'FRCRN_SE_16K' or args.network == 'MossFormerGAN_SE_16K':
+        # Use the collate function for two-channel waveform data (inputs and labels).
+        collate_fn = collate_fn_2x_wavs
+    elif args.network == 'MossFormer2_SE_48K':
+        # Use the collate function for waveforms along with Fbank features.
+        collate_fn = collate_fn_2x_wavs_fbank
+    else:
+        # Print an error message if the network type is unknown.
+        print(f'in dataloader, please specify a correct network type using args.network!')
+        return
+    # Create a DataLoader with the specified dataset, batch size, and worker configuration.
+    generator = data.DataLoader(
+        datasets,
+        batch_size=args.batch_size,  # Batch size for training.
+        shuffle=(sampler is None),  # Shuffle the data only if no sampler is used.
+        collate_fn=collate_fn,  # Use the selected collate function for batching data.
+        num_workers=args.num_workers,  # Number of workers for data loading.
+        sampler=sampler  # Use the distributed sampler if applicable.
+    )
+    # Return both the sampler and DataLoader (generator).
+    return sampler, generator

dataloader/misc.py ADDED Viewed

	@@ -0,0 +1,84 @@

+#!/usr/bin/env python -u
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import torch
+import torch.nn as nn
+import numpy as np
+import os
+import sys
+import librosa
+def read_and_config_file(args, input_path, decode=0):
+    """
+    Reads and processes the input file or directory to extract audio file paths or configuration data.
+    Parameters:
+    args: The args
+    input_path (str): Path to a file or directory containing audio data or file paths.
+    decode (bool): If True (decode=1) for decoding, process the input as audio files directly (find .wav or .flac files) or from a .scp file.
+                   If False (decode=0) for training, assume the input file contains lines with paths to audio files.
+    Returns:
+    processed_list (list): A list of processed file paths or a list of dictionaries containing input
+                           and optional condition audio paths.
+    """
+    processed_list = []  # Initialize list to hold processed file paths or configurations
+    if decode:
+        if args.task == 'target_speaker_extraction':
+            if args.network_reference.cue== 'lip':
+                # If decode is True, find video files in a directory or single file
+                if os.path.isdir(input_path):
+                    # Find all .mp4 , mov .avi files in the input directory
+                    processed_list = librosa.util.find_files(input_path, ext="mp4")
+                    processed_list += librosa.util.find_files(input_path, ext="avi")
+                    processed_list += librosa.util.find_files(input_path, ext="mov")
+                    processed_list += librosa.util.find_files(input_path, ext="MOV")
+                else:
+                    # If it's a single file and it's a .wav or .flac, add to processed list
+                    if input_path.lower().endswith(".mp4") or input_path.lower().endswith(".avi") or input_path.lower().endswith(".mov"):
+                        processed_list.append(input_path)
+                    else:
+                        # Read file paths from the input text file (one path per line)
+                        with open(input_path) as fid:
+                            for line in fid:
+                                path_s = line.strip().split()  # Split paths (space-separated)
+                                processed_list.append(path_s[0])  # Add the first path (input audio path)
+                return processed_list
+        # If decode is True, find audio files in a directory or single file
+        if os.path.isdir(input_path):
+            # Find all .wav files in the input directory
+            processed_list = librosa.util.find_files(input_path, ext="wav")
+            if len(processed_list) == 0:
+                # If no .wav files, look for .flac files
+                processed_list = librosa.util.find_files(input_path, ext="flac")
+        else:
+            # If it's a single file and it's a .wav or .flac, add to processed list
+            if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"):
+                processed_list.append(input_path)
+            else:
+                # Read file paths from the input text file (one path per line)
+                with open(input_path) as fid:
+                    for line in fid:
+                        path_s = line.strip().split()  # Split paths (space-separated)
+                        processed_list.append(path_s[0])  # Add the first path (input audio path)
+        return processed_list
+    # If decode is False, treat the input file as a configuration file
+    with open(input_path) as fid:
+        for line in fid:
+            tmp_paths = line.strip().split()  # Split paths (space-separated)
+            if len(tmp_paths) == 2:
+                # If two paths per line, treat the second as 'condition_audio'
+                sample = {'inputs': tmp_paths[0], 'condition_audio': tmp_paths[1]}
+            elif len(tmp_paths) == 1:
+                # If only one path per line, treat it as 'inputs'
+                sample = {'inputs': tmp_paths[0]}
+            processed_list.append(sample)  # Append processed sample to list
+    return processed_list

demo.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from clearvoice import ClearVoice
+##-----------------demo one: use one model ----------------------------------
+if False:
+    myClearVoice = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K'])
+    ##1sd calling method: process an input waveform and return output waveform, then write to output.wav
+    #output_wav = myClearVoice(input_path='input.wav', online_write=False)
+    #myClearVoice.write(output_wav, output_path='output.wav')
+    ##2nd calling method: process all wav files in 'path_to_input_wavs/' and write outputs to 'path_to_output_wavs'
+    myClearVoice(input_path='path_to_input_wavs', online_write=True, output_path='path_to_output_wavs')
+    ##3rd calling method: process wav files listed in .scp file, and write outputs to 'path_to_output_waves/'
+    #myClearVoice(input_path='scp/cv_webrtc_test_set_20200521_16k.scp', online_write=True, output_path='path_to_output_scp')
+##----------------Demo two: use multiple models -----------------------------------
+if False:
+    myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K']) #, 'MossFormerGAN_SE_16K'])
+    ##1sd calling method: process the waveform from input.wav and return output waveform, then write to output.wav
+    #output_wav = myClearVoice(input_path='input.wav', online_write=False)
+    #myClearVoice.write(output_wav, output_path='output.wav')
+    ##2nd calling method: process all wav files in 'path_to_input_wavs/' and write outputs to 'path_to_output_wavs'
+    myClearVoice(input_path='path_to_input_wavs', online_write=True, output_path='path_to_output_wavs')
+    ##3rd calling method: process wav files listed in .scp file, and write outputs to 'path_to_output_waves/'
+    #myClearVoice(input_path='scp/cv_webrtc_test_set_20200521_16k.scp', online_write=True, output_path='path_to_output_scp')
+if False:
+    myClearVoice = ClearVoice(task='speech_enhancement', model_names=['MossFormerGAN_SE_16K'])
+    ##1sd calling method: process the waveform from input.wav and return output waveform, then write to output.wav
+    #output_wav = myClearVoice(input_path='input.wav', online_write=False)
+    #myClearVoice.write(output_wav, output_path='output.wav')
+    ##2nd calling method: process all wav files in 'path_to_input_wavs/' and write outputs to 'path_to_output_wavs'
+    myClearVoice(input_path='path_to_input_wavs', online_write=True, output_path='path_to_output_wavs')
+    ##3rd calling method: process wav files listed in .scp file, and write outputs to 'path_to_output_waves/'
+    #myClearVoice(input_path='scp/cv_webrtc_test_set_20200521_16k.scp', online_write=True, output_path='path_to_output_scp')
+##----------------Demo three: use one model for speech separation -----------------------------------
+if True:
+    myClearVoice = ClearVoice(task='speech_separation', model_names=['MossFormer2_SS_16K'])
+    ##1sd calling method: process an input waveform and return output waveform, then write to output.wav
+    #output_wav = myClearVoice(input_path='input.wav', online_write=False)
+    #myClearVoice.write(output_wav, output_path='output.wav')
+    #2nd calling method: process all wav files in 'path_to_input_wavs/' and write outputs to 'path_to_output_wavs'
+    #myClearVoice(input_path='path_to_input_wavs_ss', online_write=True, output_path='path_to_output_wavs')
+    ##3rd calling method: process wav files listed in .scp file, and write outputs to 'path_to_output_waves/'
+    myClearVoice(input_path='scp/libri_2mix_tt.scp', online_write=True, output_path='path_to_output_scp')
+##----------------Demo four: use one model for audio-visual target speaker extraction -----------------------------------
+if False:
+    myClearVoice = ClearVoice(task='target_speaker_extraction', model_names=['AV_MossFormer2_TSE_16K'])
+    # #1sd calling method: process an input video and return output video, then write outputs to 'path_to_output_videos_tse'
+    # output_wav = myClearVoice(input_path='path_to_input_videos_tse/004.MOV', online_write=True, output_path='path_to_output_videos_tse')
+    #2nd calling method: process all video files in 'path_to_input_videos/' and write outputs to 'path_to_output_videos_tse'
+    myClearVoice(input_path='path_to_input_videos_tse', online_write=True, output_path='path_to_output_videos_tse')
+    # #3rd calling method: process video files listed in .scp file, and write outputs to 'path_to_output_videos_tse/'
+    # myClearVoice(input_path='scp/video_samples.scp', online_write=True, output_path='path_to_output_videos_tse')

demo_with_detailed_comments.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from clearvoice import ClearVoice  # Import the ClearVoice class for speech processing tasks
+if __name__ == '__main__':
+    ## ----------------- Demo One: Using a Single Model ----------------------
+    if True:  # This block demonstrates how to use a single model for speech enhancement
+        # Initialize ClearVoice for the task of speech enhancement using the MossFormerGAN_SE_16K model
+        myClearVoice = ClearVoice(task='speech_enhancement', model_names=['MossFormerGAN_SE_16K'])
+        # 1st calling method:
+        #   Process an input waveform and return the enhanced output waveform
+        # - input_path: Path to the input noisy audio file (input.wav)
+        # - The returned value is the enhanced output waveform
+        output_wav = myClearVoice(input_path='input.wav')
+        # Write the processed waveform to an output file
+        # - output_wav: The enhanced waveform data
+        # - output_path: Path to save the enhanced audio file (output.wav)
+        myClearVoice.write(output_wav, output_path='output.wav')
+        # 2nd calling method:
+        #   Process and write audio files directly
+        # - input_path: Directory of input noisy audio files
+        # - online_write=True: Enables writing the enhanced audio directly to files during processing
+        # - output_path: Directory where the enhanced audio files will be saved
+        myClearVoice(input_path='path_to_input_wavs', online_write=True, output_path='path_to_output_wavs')
+        # 3rd calling method:
+        #   Use an .scp file to specify input audio paths
+        # - input_path: Path to an .scp file listing multiple audio file paths
+        # - online_write=True: Directly writes the enhanced output during processing
+        # - output_path: Directory to save the enhanced output files
+        myClearVoice(input_path='data/cv_webrtc_test_set_20200521_16k.scp', online_write=True, output_path='path_to_output_waves')
+    ## ---------------- Demo Two: Using Multiple Models -----------------------
+    if False:  # This block demonstrates using multiple models for speech enhancement
+        # Initialize ClearVoice for the task of speech enhancement using two models: FRCRN_SE_16K and MossFormerGAN_SE_16K
+        myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K', 'MossFormerGAN_SE_16K'])
+        # 1st calling method:
+        #   Process an input waveform using the multiple models and return the enhanced output waveform
+        # - input_path: Path to the input noisy audio file (input.wav)
+        # - The returned value is the enhanced output waveform after being processed by the models
+        output_wav = myClearVoice(input_path='input.wav')
+        # Write the processed waveform to an output file
+        # - output_wav: The enhanced waveform data
+        # - output_path: Path to save the enhanced audio file (output.wav)
+        myClearVoice.write(output_wav, output_path='output.wav')
+        # 2nd calling method:
+        #   Process and write audio files directly using multiple models
+        # - input_path: Directory of input noisy audio files
+        # - online_write=True: Enables writing the enhanced audio directly to files during processing
+        # - output_path: Directory where the enhanced audio files will be saved
+        myClearVoice(input_path='path_to_input_wavs', online_write=True, output_path='path_to_output_wavs')
+        # 3rd calling method:
+        #   Use an .scp file to specify input audio paths for multiple models
+        # - input_path: Path to an .scp file listing multiple audio file paths
+        # - online_write=True: Directly writes the enhanced output during processing
+        # - output_path: Directory to save the enhanced output files
+        myClearVoice(input_path='data/cv_webrtc_test_set_20200521_16k.scp', online_write=True, output_path='path_to_output_waves')

input.wav ADDED Viewed

Binary file (76.8 kB). View file

models/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

models/__pycache__/__init__.cpython-36.pyc ADDED Viewed

Binary file (309 Bytes). View file

models/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (332 Bytes). View file

models/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (314 Bytes). View file

models/__pycache__/complex_nn.cpython-36.pyc ADDED Viewed

Binary file (7.86 kB). View file

models/__pycache__/complex_nn.cpython-37.pyc ADDED Viewed

Binary file (7.76 kB). View file

models/__pycache__/complex_nn.cpython-38.pyc ADDED Viewed

Binary file (7.42 kB). View file

models/__pycache__/constant.cpython-36.pyc ADDED Viewed

Binary file (394 Bytes). View file

models/__pycache__/constant.cpython-37.pyc ADDED Viewed

Binary file (417 Bytes). View file

models/__pycache__/constant.cpython-38.pyc ADDED Viewed

Binary file (419 Bytes). View file

models/__pycache__/conv_stft.cpython-38.pyc ADDED Viewed

Binary file (5.05 kB). View file

models/__pycache__/criterion.cpython-36.pyc ADDED Viewed

Binary file (1.63 kB). View file

models/__pycache__/criterion.cpython-37.pyc ADDED Viewed

Binary file (1.63 kB). View file

models/__pycache__/criterion.cpython-38.pyc ADDED Viewed

Binary file (1.65 kB). View file

models/__pycache__/frcrn.cpython-38.pyc ADDED Viewed

Binary file (7.17 kB). View file

models/__pycache__/metric.cpython-36.pyc ADDED Viewed

Binary file (1.07 kB). View file

models/__pycache__/noisedataset.cpython-36.pyc ADDED Viewed

Binary file (2.82 kB). View file

models/__pycache__/noisedataset.cpython-37.pyc ADDED Viewed

Binary file (2.78 kB). View file

models/__pycache__/noisedataset.cpython-38.pyc ADDED Viewed

Binary file (2.79 kB). View file

models/__pycache__/phasen_dccrn.cpython-36.pyc ADDED Viewed

Binary file (14.4 kB). View file

models/__pycache__/phasen_dccrn.cpython-37.pyc ADDED Viewed

Binary file (14.2 kB). View file

models/__pycache__/phasen_dccrn.cpython-38.pyc ADDED Viewed

Binary file (14.1 kB). View file