diff --git a/CHANGELOG.md b/CHANGELOG.md index a9df7a2..a5f4162 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog History +## 1.5.4 + +First release, July 24th 2020 + +Add some padding of the input waveform to avoid separation artefacts on the edges due to unstabilities in the inverse fourier transforms. +Also add tests to ensure both librosa and tensorflow backends have same outputs. + ## 1.5.2 First released, May 15th 2020 diff --git a/spleeter/separator.py b/spleeter/separator.py index 35d8ae2..2c636b4 100644 --- a/spleeter/separator.py +++ b/spleeter/separator.py @@ -123,14 +123,18 @@ class Separator(object): data = np.asfortranarray(data) N = self._params["frame_length"] H = self._params["frame_step"] + win = hann(N, sym=False) fstft = istft if inverse else stft - win_len_arg = {"win_length": None, "length": length} if inverse else {"n_fft": N} + win_len_arg = {"win_length": None, + "length": None} if inverse else {"n_fft": N} n_channels = data.shape[-1] out = [] for c in range(n_channels): - d = data[:, :, c].T if inverse else data[:, c] + d = np.concatenate((np.zeros((N, )), data[:, c], np.zeros((N, )))) if not inverse else data[:, :, c].T s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg) + if inverse: + s = s[N:N+length] s = np.expand_dims(s.T, 2-inverse) out.append(s) if len(out) == 1: diff --git a/tests/test_eval.py b/tests/test_eval.py index 298e421..97540a9 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -29,57 +29,29 @@ BACKENDS = ["tensorflow", "librosa"] TEST_CONFIGURATIONS = {el:el for el in BACKENDS} res_4stems = { - "librosa": { - "vocals": { - "SDR": -0.007, - "SAR": -19.231, - "SIR": -4.528, - "ISR": 0.000 - }, - "drums": { - "SDR": -0.071, - "SAR": -14.496, - "SIR": -4.987, - "ISR": 0.001 - }, - "bass":{ - "SDR": -0.001, - "SAR": -12.426, - "SIR": -7.198, - "ISR": -0.001 - }, - "other":{ - "SDR": -1.453, - "SAR": -14.899, - "SIR": -4.678, - "ISR": -0.015 - } + "vocals": { + "SDR": 3.25e-05, + "SAR": -11.153575, + "SIR": -1.3849, + "ISR": 2.75e-05 }, - "tensorflow": { - "vocals": { - "SDR": 3.25e-05, - "SAR": -11.153575, - "SIR": -1.3849, - "ISR": 2.75e-05 - }, - "drums": { - "SDR": -0.079505, - "SAR": -15.7073575, - "SIR": -4.972755, - "ISR": 0.0013575 - }, - "bass":{ - "SDR": 2.5e-06, - "SAR": -10.3520575, - "SIR": -4.272325, - "ISR": 2.5e-06 - }, - "other":{ - "SDR": -1.359175, - "SAR": -14.7076775, - "SIR": -4.761505, - "ISR": -0.01528 - } + "drums": { + "SDR": -0.079505, + "SAR": -15.7073575, + "SIR": -4.972755, + "ISR": 0.0013575 + }, + "bass":{ + "SDR": 2.5e-06, + "SAR": -10.3520575, + "SIR": -4.272325, + "ISR": 2.5e-06 + }, + "other":{ + "SDR": -1.359175, + "SAR": -14.7076775, + "SIR": -4.761505, + "ISR": -0.01528 } } @@ -102,15 +74,11 @@ def generate_fake_eval_dataset(path): @pytest.mark.parametrize('backend', TEST_CONFIGURATIONS) def test_evaluate(backend): with TemporaryDirectory() as directory: - generate_fake_eval_dataset(directory) p = create_argument_parser() arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend]) params = load_configuration(arguments.configuration) metrics = evaluate.entrypoint(arguments, params) for instrument, metric in metrics.items(): - for metric, value in metric.items(): - assert np.allclose(np.median(value), res_4stems[backend][instrument][metric], atol=1e-3) - - -# test_evaluate("tensorflow") \ No newline at end of file + for m, value in metric.items(): + assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3) diff --git a/tests/test_separator.py b/tests/test_separator.py index 7213af6..3094900 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -39,6 +39,29 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__)) +@pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS) +def test_separator_backends(test_file): + adapter = get_default_audio_adapter() + waveform, _ = adapter.load(test_file) + + separator_lib = Separator("spleeter:2stems", stft_backend="librosa") + separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow") + + # Test the stft and inverse stft provides exact reconstruction + stft_matrix = separator_lib._stft(waveform) + reconstructed = separator_lib._stft( + stft_matrix, inverse=True, length=waveform.shape[0]) + assert np.allclose(reconstructed, waveform, atol=3e-2) + + # compare both separation, it should be close + out_tf = separator_tf._separate_tensorflow(waveform, test_file) + out_lib = separator_lib._separate_librosa(waveform, test_file) + + for instrument in out_lib.keys(): + # test that both outputs are close everywhere + assert np.allclose(out_tf[instrument], out_lib[instrument], atol=1e-5) + + @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) def test_separate(test_file, configuration, backend): """ Test separation from raw data. """