From 8d442c88ac001af00422acd1addba2629967dfd5 Mon Sep 17 00:00:00 2001 From: mmoussallam Date: Thu, 18 Jun 2020 18:01:03 +0200 Subject: [PATCH 1/6] Fixing gltches issues with Istft --- spleeter/separator.py | 10 +++++++--- tests/test_eval.py | 34 ++++++++++++++++++---------------- tests/test_separator.py | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 19 deletions(-) diff --git a/spleeter/separator.py b/spleeter/separator.py index e769c28..7b6dab7 100644 --- a/spleeter/separator.py +++ b/spleeter/separator.py @@ -122,16 +122,20 @@ class Separator(object): assert not (inverse and length is None) data = np.asfortranarray(data) N = self._params["frame_length"] + pad_edges = int(N/4) H = self._params["frame_step"] win = hann(N, sym=False) fstft = istft if inverse else stft - win_len_arg = {"win_length": None, "length": length} if inverse else {"n_fft": N} + win_len_arg = {"win_length": None, "length": length + 2*pad_edges} if inverse else {"n_fft": N} n_channels = data.shape[-1] out = [] - for c in range(n_channels): - d = data[:, :, c].T if inverse else data[:, c] + for c in range(n_channels): + d = data[:, :, c].T if inverse else np.concatenate((np.zeros(pad_edges,), data[:,c], np.zeros(pad_edges,))) s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg) + if inverse: + s = s[pad_edges:-pad_edges] s = np.expand_dims(s.T, 2-inverse) + out.append(s) if len(out) == 1: return out[0] diff --git a/tests/test_eval.py b/tests/test_eval.py index 6a3634b..2227e18 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -26,28 +26,28 @@ from spleeter.commands import evaluate from spleeter.utils.configuration import load_configuration res_4stems = { "vocals": { - "SDR": -0.007, - "SAR": -19.231, - "SIR": -4.528, + "SDR": 0.000, + "SAR": -15.856, + "SIR": -6.861, "ISR": 0.000 }, "drums": { - "SDR": -0.071, - "SAR": -14.496, - "SIR": -4.987, + "SDR": -0.069, + "SAR": -15.769, + "SIR": -5.049, "ISR": 0.001 }, "bass":{ - "SDR": -0.001, - "SAR": -12.426, - "SIR": -7.198, - "ISR": -0.001 + "SDR": -0.000, + "SAR": -10.499, + "SIR": -6.988, + "ISR": -0.000 }, "other":{ - "SDR": -1.453, - "SAR": -14.899, - "SIR": -4.678, - "ISR": -0.015 + "SDR": -1.365, + "SAR": -14.591, + "SIR": -4.751, + "ISR": -0.016 } } @@ -75,5 +75,7 @@ def test_evaluate(path="FAKE_MUSDB_DIR"): params = load_configuration(arguments.configuration) metrics = evaluate.entrypoint(arguments, params) for instrument, metric in metrics.items(): - for metric, value in metric.items(): - assert np.allclose(np.median(value), res_4stems[instrument][metric], atol=1e-3) \ No newline at end of file + print(instrument), print(metric) + for m, value in metric.items(): + print(np.median(value)), print(res_4stems[instrument][m]) + assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3) \ No newline at end of file diff --git a/tests/test_separator.py b/tests/test_separator.py index 245571d..0b5b282 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -38,6 +38,44 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__)) +@pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS) +def test_separator_backends(test_file): + adapter = get_default_audio_adapter() + waveform, _ = adapter.load(test_file) + + separator_lib = Separator("spleeter:2stems", stft_backend="librosa") + separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow") + + # Test the stft and inverse stft provides exact reconstruction + stft_matrix = separator_lib._stft(waveform) + reconstructed = separator_lib._stft(stft_matrix, inverse=True, length= waveform.shape[0]) + assert np.allclose(reconstructed, waveform, atol=1e-2) + + # # now also test that tensorflow and librosa STFT provide same results + from spleeter.audio.spectrogram import compute_spectrogram_tf + tf_waveform = tf.convert_to_tensor(waveform, tf.float32) + spectrogram_tf = compute_spectrogram_tf(tf_waveform, + separator_tf._params['frame_length'], + separator_tf._params['frame_step'],) + with tf.Session() as sess: + spectrogram_tf_eval = spectrogram_tf.eval() + + # check that stfts are equivalent up to the padding in the librosa case + assert stft_matrix.shape[0] == spectrogram_tf_eval.shape[0] + 2 + assert stft_matrix.shape[1:] == spectrogram_tf_eval.shape[1:] + assert np.allclose(np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2) + + # compare both separation, it should be close + out_tf = separator_tf._separate_tensorflow(waveform, test_file) + out_lib = separator_lib._separate_librosa(waveform, test_file) + + for instrument in out_lib.keys(): + # test that both outputs are not null + assert np.sum(np.abs(out_tf[instrument])) > 1000 + assert np.sum(np.abs(out_lib[instrument])) > 1000 + max_diff = np.max(np.abs(out_tf[instrument] - out_lib[instrument])) + print(f"Max diff on {instrument} is {max_diff}") + assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1) @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) def test_separate(test_file, configuration, backend): From bec0555a62bf310eb1a6d55c60bed4caa23fc482 Mon Sep 17 00:00:00 2001 From: mmoussallam Date: Thu, 18 Jun 2020 18:09:34 +0200 Subject: [PATCH 2/6] remove useless prints --- tests/test_eval.py | 2 -- tests/test_separator.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/tests/test_eval.py b/tests/test_eval.py index 2227e18..a829706 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -75,7 +75,5 @@ def test_evaluate(path="FAKE_MUSDB_DIR"): params = load_configuration(arguments.configuration) metrics = evaluate.entrypoint(arguments, params) for instrument, metric in metrics.items(): - print(instrument), print(metric) for m, value in metric.items(): - print(np.median(value)), print(res_4stems[instrument][m]) assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3) \ No newline at end of file diff --git a/tests/test_separator.py b/tests/test_separator.py index 0b5b282..532d179 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -73,8 +73,6 @@ def test_separator_backends(test_file): # test that both outputs are not null assert np.sum(np.abs(out_tf[instrument])) > 1000 assert np.sum(np.abs(out_lib[instrument])) > 1000 - max_diff = np.max(np.abs(out_tf[instrument] - out_lib[instrument])) - print(f"Max diff on {instrument} is {max_diff}") assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1) @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) From 45dc3566c24ff1eb3100442655e44bad9287b7c1 Mon Sep 17 00:00:00 2001 From: mmoussallam Date: Thu, 18 Jun 2020 18:12:43 +0200 Subject: [PATCH 3/6] pep8 --- spleeter/separator.py | 9 +++++---- tests/test_separator.py | 12 ++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/spleeter/separator.py b/spleeter/separator.py index 7b6dab7..b204030 100644 --- a/spleeter/separator.py +++ b/spleeter/separator.py @@ -126,16 +126,17 @@ class Separator(object): H = self._params["frame_step"] win = hann(N, sym=False) fstft = istft if inverse else stft - win_len_arg = {"win_length": None, "length": length + 2*pad_edges} if inverse else {"n_fft": N} + win_len_arg = {"win_length": None, "length": length + + 2*pad_edges} if inverse else {"n_fft": N} n_channels = data.shape[-1] out = [] - for c in range(n_channels): - d = data[:, :, c].T if inverse else np.concatenate((np.zeros(pad_edges,), data[:,c], np.zeros(pad_edges,))) + for c in range(n_channels): + d = data[:, :, c].T if inverse else np.concatenate( + (np.zeros(pad_edges,), data[:, c], np.zeros(pad_edges,))) s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg) if inverse: s = s[pad_edges:-pad_edges] s = np.expand_dims(s.T, 2-inverse) - out.append(s) if len(out) == 1: return out[0] diff --git a/tests/test_separator.py b/tests/test_separator.py index 532d179..93ad148 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -38,6 +38,7 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__)) + @pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS) def test_separator_backends(test_file): adapter = get_default_audio_adapter() @@ -48,22 +49,24 @@ def test_separator_backends(test_file): # Test the stft and inverse stft provides exact reconstruction stft_matrix = separator_lib._stft(waveform) - reconstructed = separator_lib._stft(stft_matrix, inverse=True, length= waveform.shape[0]) + reconstructed = separator_lib._stft( + stft_matrix, inverse=True, length=waveform.shape[0]) assert np.allclose(reconstructed, waveform, atol=1e-2) # # now also test that tensorflow and librosa STFT provide same results from spleeter.audio.spectrogram import compute_spectrogram_tf tf_waveform = tf.convert_to_tensor(waveform, tf.float32) spectrogram_tf = compute_spectrogram_tf(tf_waveform, - separator_tf._params['frame_length'], - separator_tf._params['frame_step'],) + separator_tf._params['frame_length'], + separator_tf._params['frame_step'],) with tf.Session() as sess: spectrogram_tf_eval = spectrogram_tf.eval() # check that stfts are equivalent up to the padding in the librosa case assert stft_matrix.shape[0] == spectrogram_tf_eval.shape[0] + 2 assert stft_matrix.shape[1:] == spectrogram_tf_eval.shape[1:] - assert np.allclose(np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2) + assert np.allclose( + np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2) # compare both separation, it should be close out_tf = separator_tf._separate_tensorflow(waveform, test_file) @@ -75,6 +78,7 @@ def test_separator_backends(test_file): assert np.sum(np.abs(out_lib[instrument])) > 1000 assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1) + @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) def test_separate(test_file, configuration, backend): """ Test separation from raw data. """ From 02f86e42b24a2d0028f08c1481491619ff95c035 Mon Sep 17 00:00:00 2001 From: mmoussallam Date: Thu, 18 Jun 2020 20:17:03 +0200 Subject: [PATCH 4/6] investigate diff values --- spleeter/separator.py | 2 +- tests/test_separator.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/spleeter/separator.py b/spleeter/separator.py index b204030..cc05a64 100644 --- a/spleeter/separator.py +++ b/spleeter/separator.py @@ -122,7 +122,7 @@ class Separator(object): assert not (inverse and length is None) data = np.asfortranarray(data) N = self._params["frame_length"] - pad_edges = int(N/4) + pad_edges = int(N/2) H = self._params["frame_step"] win = hann(N, sym=False) fstft = istft if inverse else stft diff --git a/tests/test_separator.py b/tests/test_separator.py index 93ad148..4c29dbf 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -74,9 +74,11 @@ def test_separator_backends(test_file): for instrument in out_lib.keys(): # test that both outputs are not null + print(np.sum(np.abs(out_tf[instrument]))) + print(np.sum(np.abs(out_lib[instrument]))) assert np.sum(np.abs(out_tf[instrument])) > 1000 assert np.sum(np.abs(out_lib[instrument])) > 1000 - assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1) + assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.01) @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) From 9510e2b5f7a47c968b4c6686d338f482bece56e9 Mon Sep 17 00:00:00 2001 From: mmoussallam Date: Fri, 19 Jun 2020 00:33:19 +0200 Subject: [PATCH 5/6] dont pad the signal, only the stft matrix before inversion --- spleeter/separator.py | 13 +++++++------ tests/test_separator.py | 12 ++++++------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/spleeter/separator.py b/spleeter/separator.py index cc05a64..7cc9d9a 100644 --- a/spleeter/separator.py +++ b/spleeter/separator.py @@ -122,20 +122,21 @@ class Separator(object): assert not (inverse and length is None) data = np.asfortranarray(data) N = self._params["frame_length"] - pad_edges = int(N/2) H = self._params["frame_step"] + F = int(N/2) + 1 win = hann(N, sym=False) fstft = istft if inverse else stft - win_len_arg = {"win_length": None, "length": length + - 2*pad_edges} if inverse else {"n_fft": N} + win_len_arg = {"win_length": None, + "length": None} if inverse else {"n_fft": N} n_channels = data.shape[-1] out = [] for c in range(n_channels): - d = data[:, :, c].T if inverse else np.concatenate( - (np.zeros(pad_edges,), data[:, c], np.zeros(pad_edges,))) + d = np.concatenate((np.zeros((F, 1)), data[:, :, c].T, np.zeros( + (F, 1))), axis=1) if inverse else data[:, c] s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg) if inverse: - s = s[pad_edges:-pad_edges] + s = s[H:] + s = s[:length] s = np.expand_dims(s.T, 2-inverse) out.append(s) if len(out) == 1: diff --git a/tests/test_separator.py b/tests/test_separator.py index 4c29dbf..882acd9 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -51,7 +51,7 @@ def test_separator_backends(test_file): stft_matrix = separator_lib._stft(waveform) reconstructed = separator_lib._stft( stft_matrix, inverse=True, length=waveform.shape[0]) - assert np.allclose(reconstructed, waveform, atol=1e-2) + assert np.allclose(reconstructed, waveform, atol=3e-2) # # now also test that tensorflow and librosa STFT provide same results from spleeter.audio.spectrogram import compute_spectrogram_tf @@ -62,11 +62,10 @@ def test_separator_backends(test_file): with tf.Session() as sess: spectrogram_tf_eval = spectrogram_tf.eval() - # check that stfts are equivalent up to the padding in the librosa case - assert stft_matrix.shape[0] == spectrogram_tf_eval.shape[0] + 2 - assert stft_matrix.shape[1:] == spectrogram_tf_eval.shape[1:] + # check that stfts are equivalent + assert stft_matrix.shape == spectrogram_tf_eval.shape assert np.allclose( - np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2) + np.abs(stft_matrix), spectrogram_tf_eval, atol=1e-2) # compare both separation, it should be close out_tf = separator_tf._separate_tensorflow(waveform, test_file) @@ -78,7 +77,8 @@ def test_separator_backends(test_file): print(np.sum(np.abs(out_lib[instrument]))) assert np.sum(np.abs(out_tf[instrument])) > 1000 assert np.sum(np.abs(out_lib[instrument])) > 1000 - assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.01) + print(np.max(out_tf[instrument]- out_lib[instrument])) + assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.025) @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) From af6dbec5cf2a8d2bbad728f9267ebf5204637053 Mon Sep 17 00:00:00 2001 From: mmoussallam Date: Fri, 19 Jun 2020 00:48:09 +0200 Subject: [PATCH 6/6] fix tests and more testing --- tests/test_eval.py | 22 +++++++++++----------- tests/test_separator.py | 9 +++------ 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/tests/test_eval.py b/tests/test_eval.py index a829706..c39ea76 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -27,27 +27,27 @@ from spleeter.utils.configuration import load_configuration res_4stems = { "vocals": { "SDR": 0.000, - "SAR": -15.856, - "SIR": -6.861, + "SAR": -16.212, + "SIR": -4.172, "ISR": 0.000 }, "drums": { - "SDR": -0.069, - "SAR": -15.769, - "SIR": -5.049, + "SDR": -0.077, + "SAR": -15.739, + "SIR": -5.045, "ISR": 0.001 }, "bass":{ "SDR": -0.000, - "SAR": -10.499, - "SIR": -6.988, + "SAR": -10.665, + "SIR": -5.646, "ISR": -0.000 }, "other":{ - "SDR": -1.365, - "SAR": -14.591, - "SIR": -4.751, - "ISR": -0.016 + "SDR": -1.309, + "SAR": -14.573, + "SIR": -4.705, + "ISR": -0.014 } } diff --git a/tests/test_separator.py b/tests/test_separator.py index 882acd9..21840b2 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -72,13 +72,10 @@ def test_separator_backends(test_file): out_lib = separator_lib._separate_librosa(waveform, test_file) for instrument in out_lib.keys(): - # test that both outputs are not null - print(np.sum(np.abs(out_tf[instrument]))) - print(np.sum(np.abs(out_lib[instrument]))) - assert np.sum(np.abs(out_tf[instrument])) > 1000 - assert np.sum(np.abs(out_lib[instrument])) > 1000 - print(np.max(out_tf[instrument]- out_lib[instrument])) + # test that both outputs are close everywhere assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.025) + # it should be even more similar outside edges zones + assert np.allclose(out_tf[instrument][4096:-4096,:], out_lib[instrument][4096:-4096,:], atol=0.002) @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)