From 0016e3519274ec2dc766c2eac40539d9a44c0185 Mon Sep 17 00:00:00 2001 From: mmoussallam Date: Fri, 24 Jul 2020 16:32:32 +0200 Subject: [PATCH] align the padding in librosa to what is now done in tf backend --- spleeter/separator.py | 8 ++--- tests/test_eval.py | 74 +++++++++++++---------------------------- tests/test_separator.py | 18 +--------- 3 files changed, 27 insertions(+), 73 deletions(-) diff --git a/spleeter/separator.py b/spleeter/separator.py index 9318911..2c636b4 100644 --- a/spleeter/separator.py +++ b/spleeter/separator.py @@ -123,7 +123,7 @@ class Separator(object): data = np.asfortranarray(data) N = self._params["frame_length"] H = self._params["frame_step"] - F = int(N/2) + 1 + win = hann(N, sym=False) fstft = istft if inverse else stft win_len_arg = {"win_length": None, @@ -131,12 +131,10 @@ class Separator(object): n_channels = data.shape[-1] out = [] for c in range(n_channels): - d = np.concatenate((np.zeros((F, 1)), data[:, :, c].T, np.zeros( - (F, 1))), axis=1) if inverse else data[:, c] + d = np.concatenate((np.zeros((N, )), data[:, c], np.zeros((N, )))) if not inverse else data[:, :, c].T s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg) if inverse: - s = s[H:] - s = s[:length] + s = s[N:N+length] s = np.expand_dims(s.T, 2-inverse) out.append(s) if len(out) == 1: diff --git a/tests/test_eval.py b/tests/test_eval.py index 547d1e5..97540a9 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -29,57 +29,29 @@ BACKENDS = ["tensorflow", "librosa"] TEST_CONFIGURATIONS = {el:el for el in BACKENDS} res_4stems = { - "librosa": { - "vocals": { - "SDR": 0.000, - "SAR": -16.212, - "SIR": -4.172, - "ISR": 0.000 - }, - "drums": { - "SDR": -0.077, - "SAR": -15.739, - "SIR": -5.045, - "ISR": 0.001 - }, - "bass":{ - "SDR": -0.000, - "SAR": -10.665, - "SIR": -5.646, - "ISR": -0.000 - }, - "other":{ - "SDR": -1.309, - "SAR": -14.573, - "SIR": -4.705, - "ISR": -0.014 - } + "vocals": { + "SDR": 3.25e-05, + "SAR": -11.153575, + "SIR": -1.3849, + "ISR": 2.75e-05 }, - "tensorflow": { - "vocals": { - "SDR": 3.25e-05, - "SAR": -11.153575, - "SIR": -1.3849, - "ISR": 2.75e-05 - }, - "drums": { - "SDR": -0.079505, - "SAR": -15.7073575, - "SIR": -4.972755, - "ISR": 0.0013575 - }, - "bass":{ - "SDR": 2.5e-06, - "SAR": -10.3520575, - "SIR": -4.272325, - "ISR": 2.5e-06 - }, - "other":{ - "SDR": -1.359175, - "SAR": -14.7076775, - "SIR": -4.761505, - "ISR": -0.01528 - } + "drums": { + "SDR": -0.079505, + "SAR": -15.7073575, + "SIR": -4.972755, + "ISR": 0.0013575 + }, + "bass":{ + "SDR": 2.5e-06, + "SAR": -10.3520575, + "SIR": -4.272325, + "ISR": 2.5e-06 + }, + "other":{ + "SDR": -1.359175, + "SAR": -14.7076775, + "SIR": -4.761505, + "ISR": -0.01528 } } @@ -109,4 +81,4 @@ def test_evaluate(backend): metrics = evaluate.entrypoint(arguments, params) for instrument, metric in metrics.items(): for m, value in metric.items(): - assert np.allclose(np.median(value), res_4stems[backend][instrument][m], atol=1e-3) + assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3) diff --git a/tests/test_separator.py b/tests/test_separator.py index d850e97..3094900 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -53,29 +53,13 @@ def test_separator_backends(test_file): stft_matrix, inverse=True, length=waveform.shape[0]) assert np.allclose(reconstructed, waveform, atol=3e-2) - # # now also test that tensorflow and librosa STFT provide same results - from spleeter.audio.spectrogram import compute_spectrogram_tf - tf_waveform = tf.convert_to_tensor(waveform, tf.float32) - spectrogram_tf = compute_spectrogram_tf(tf_waveform, - separator_tf._params['frame_length'], - separator_tf._params['frame_step'],) - with tf.Session() as sess: - spectrogram_tf_eval = spectrogram_tf.eval() - - # check that stfts are equivalent - assert stft_matrix.shape == spectrogram_tf_eval.shape - assert np.allclose( - np.abs(stft_matrix), spectrogram_tf_eval, atol=1e-2) - # compare both separation, it should be close out_tf = separator_tf._separate_tensorflow(waveform, test_file) out_lib = separator_lib._separate_librosa(waveform, test_file) for instrument in out_lib.keys(): # test that both outputs are close everywhere - assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.025) - # it should be even more similar outside edges zones - assert np.allclose(out_tf[instrument][4096:-4096,:], out_lib[instrument][4096:-4096,:], atol=0.002) + assert np.allclose(out_tf[instrument], out_lib[instrument], atol=1e-5) @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)