From 27f52b71d5405af62345b7d819db196240239cb4 Mon Sep 17 00:00:00 2001
From: mmoussallam <manuel.moussallam@deezer.com>
Date: Thu, 18 Jun 2020 18:01:03 +0200
Subject: [PATCH] Fixing gltches issues with Istft

---
 spleeter/separator.py   | 10 +++++++---
 tests/test_eval.py      | 26 +++++++++++---------------
 tests/test_separator.py | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/spleeter/separator.py b/spleeter/separator.py
index 35d8ae2..ceceeca 100644
--- a/spleeter/separator.py
+++ b/spleeter/separator.py
@@ -122,16 +122,20 @@ class Separator(object):
         assert not (inverse and length is None)
         data = np.asfortranarray(data)
         N = self._params["frame_length"]
+        pad_edges = int(N/4)
         H = self._params["frame_step"]
         win = hann(N, sym=False)
         fstft = istft if inverse else stft
-        win_len_arg = {"win_length": None, "length": length} if inverse else {"n_fft": N}
+        win_len_arg = {"win_length": None, "length": length + 2*pad_edges} if inverse else {"n_fft": N}
         n_channels = data.shape[-1]
         out = []
-        for c in range(n_channels):
-            d = data[:, :, c].T if inverse else data[:, c]
+        for c in range(n_channels):            
+            d = data[:, :, c].T if inverse else np.concatenate((np.zeros(pad_edges,), data[:,c], np.zeros(pad_edges,)))
             s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg)
+            if inverse:
+                s = s[pad_edges:-pad_edges]
             s = np.expand_dims(s.T, 2-inverse)
+
             out.append(s)
         if len(out) == 1:
             return out[0]
diff --git a/tests/test_eval.py b/tests/test_eval.py
index 298e421..e9bf762 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -99,18 +99,14 @@ def generate_fake_eval_dataset(path):
             aa.save(filename, data, fs)
 
 
-@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS)
-def test_evaluate(backend):
-    with TemporaryDirectory() as directory:
-
-        generate_fake_eval_dataset(directory)
-        p = create_argument_parser()
-        arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend])
-        params = load_configuration(arguments.configuration)
-        metrics = evaluate.entrypoint(arguments, params)
-        for instrument, metric in metrics.items():
-            for metric, value in metric.items():
-                assert np.allclose(np.median(value), res_4stems[backend][instrument][metric], atol=1e-3)
-
-
-# test_evaluate("tensorflow")
\ No newline at end of file
+def test_evaluate(path="FAKE_MUSDB_DIR"):
+    generate_fake_eval_dataset(path)
+    p = create_argument_parser()
+    arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", path])
+    params = load_configuration(arguments.configuration)
+    metrics = evaluate.entrypoint(arguments, params)
+    for instrument, metric in metrics.items():
+        print(instrument), print(metric)
+        for m, value in metric.items():
+            print(np.median(value)), print(res_4stems[instrument][m])
+            assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3)
diff --git a/tests/test_separator.py b/tests/test_separator.py
index 7213af6..06f5676 100644
--- a/tests/test_separator.py
+++ b/tests/test_separator.py
@@ -38,6 +38,44 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC
 
 print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__))
 
+@pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS)
+def test_separator_backends(test_file):
+    adapter = get_default_audio_adapter()
+    waveform, _ = adapter.load(test_file)
+
+    separator_lib = Separator("spleeter:2stems", stft_backend="librosa")
+    separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow")
+
+    # Test the stft and inverse stft provides exact reconstruction
+    stft_matrix = separator_lib._stft(waveform)
+    reconstructed = separator_lib._stft(stft_matrix, inverse=True, length= waveform.shape[0])
+    assert np.allclose(reconstructed, waveform, atol=1e-2)
+
+    # # now also test that tensorflow and librosa STFT provide same results
+    from spleeter.audio.spectrogram import compute_spectrogram_tf
+    tf_waveform = tf.convert_to_tensor(waveform, tf.float32)
+    spectrogram_tf = compute_spectrogram_tf(tf_waveform,
+        separator_tf._params['frame_length'],
+        separator_tf._params['frame_step'],)
+    with tf.Session() as sess:
+        spectrogram_tf_eval = spectrogram_tf.eval()
+
+    # check that stfts are equivalent up to the padding in the librosa case
+    assert stft_matrix.shape[0] == spectrogram_tf_eval.shape[0] + 2
+    assert stft_matrix.shape[1:] == spectrogram_tf_eval.shape[1:]
+    assert np.allclose(np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2)
+
+    # compare both separation, it should be close
+    out_tf = separator_tf._separate_tensorflow(waveform, test_file)
+    out_lib = separator_lib._separate_librosa(waveform, test_file)
+
+    for instrument in out_lib.keys():
+        # test that both outputs are not null
+        assert np.sum(np.abs(out_tf[instrument])) > 1000
+        assert np.sum(np.abs(out_lib[instrument])) > 1000
+        max_diff = np.max(np.abs(out_tf[instrument] - out_lib[instrument]))
+        print(f"Max diff on {instrument} is {max_diff}")
+        assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1)
 
 @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)
 def test_separate(test_file, configuration, backend):