From 27f52b71d5405af62345b7d819db196240239cb4 Mon Sep 17 00:00:00 2001 From: mmoussallam Date: Thu, 18 Jun 2020 18:01:03 +0200 Subject: [PATCH] Fixing gltches issues with Istft --- spleeter/separator.py | 10 +++++++--- tests/test_eval.py | 26 +++++++++++--------------- tests/test_separator.py | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 18 deletions(-) diff --git a/spleeter/separator.py b/spleeter/separator.py index 35d8ae2..ceceeca 100644 --- a/spleeter/separator.py +++ b/spleeter/separator.py @@ -122,16 +122,20 @@ class Separator(object): assert not (inverse and length is None) data = np.asfortranarray(data) N = self._params["frame_length"] + pad_edges = int(N/4) H = self._params["frame_step"] win = hann(N, sym=False) fstft = istft if inverse else stft - win_len_arg = {"win_length": None, "length": length} if inverse else {"n_fft": N} + win_len_arg = {"win_length": None, "length": length + 2*pad_edges} if inverse else {"n_fft": N} n_channels = data.shape[-1] out = [] - for c in range(n_channels): - d = data[:, :, c].T if inverse else data[:, c] + for c in range(n_channels): + d = data[:, :, c].T if inverse else np.concatenate((np.zeros(pad_edges,), data[:,c], np.zeros(pad_edges,))) s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg) + if inverse: + s = s[pad_edges:-pad_edges] s = np.expand_dims(s.T, 2-inverse) + out.append(s) if len(out) == 1: return out[0] diff --git a/tests/test_eval.py b/tests/test_eval.py index 298e421..e9bf762 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -99,18 +99,14 @@ def generate_fake_eval_dataset(path): aa.save(filename, data, fs) -@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS) -def test_evaluate(backend): - with TemporaryDirectory() as directory: - - generate_fake_eval_dataset(directory) - p = create_argument_parser() - arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend]) - params = load_configuration(arguments.configuration) - metrics = evaluate.entrypoint(arguments, params) - for instrument, metric in metrics.items(): - for metric, value in metric.items(): - assert np.allclose(np.median(value), res_4stems[backend][instrument][metric], atol=1e-3) - - -# test_evaluate("tensorflow") \ No newline at end of file +def test_evaluate(path="FAKE_MUSDB_DIR"): + generate_fake_eval_dataset(path) + p = create_argument_parser() + arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", path]) + params = load_configuration(arguments.configuration) + metrics = evaluate.entrypoint(arguments, params) + for instrument, metric in metrics.items(): + print(instrument), print(metric) + for m, value in metric.items(): + print(np.median(value)), print(res_4stems[instrument][m]) + assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3) diff --git a/tests/test_separator.py b/tests/test_separator.py index 7213af6..06f5676 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -38,6 +38,44 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__)) +@pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS) +def test_separator_backends(test_file): + adapter = get_default_audio_adapter() + waveform, _ = adapter.load(test_file) + + separator_lib = Separator("spleeter:2stems", stft_backend="librosa") + separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow") + + # Test the stft and inverse stft provides exact reconstruction + stft_matrix = separator_lib._stft(waveform) + reconstructed = separator_lib._stft(stft_matrix, inverse=True, length= waveform.shape[0]) + assert np.allclose(reconstructed, waveform, atol=1e-2) + + # # now also test that tensorflow and librosa STFT provide same results + from spleeter.audio.spectrogram import compute_spectrogram_tf + tf_waveform = tf.convert_to_tensor(waveform, tf.float32) + spectrogram_tf = compute_spectrogram_tf(tf_waveform, + separator_tf._params['frame_length'], + separator_tf._params['frame_step'],) + with tf.Session() as sess: + spectrogram_tf_eval = spectrogram_tf.eval() + + # check that stfts are equivalent up to the padding in the librosa case + assert stft_matrix.shape[0] == spectrogram_tf_eval.shape[0] + 2 + assert stft_matrix.shape[1:] == spectrogram_tf_eval.shape[1:] + assert np.allclose(np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2) + + # compare both separation, it should be close + out_tf = separator_tf._separate_tensorflow(waveform, test_file) + out_lib = separator_lib._separate_librosa(waveform, test_file) + + for instrument in out_lib.keys(): + # test that both outputs are not null + assert np.sum(np.abs(out_tf[instrument])) > 1000 + assert np.sum(np.abs(out_lib[instrument])) > 1000 + max_diff = np.max(np.abs(out_tf[instrument] - out_lib[instrument])) + print(f"Max diff on {instrument} is {max_diff}") + assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1) @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) def test_separate(test_file, configuration, backend):