From 8d442c88ac001af00422acd1addba2629967dfd5 Mon Sep 17 00:00:00 2001
From: mmoussallam <manuel.moussallam@deezer.com>
Date: Thu, 18 Jun 2020 18:01:03 +0200
Subject: [PATCH 1/6] Fixing gltches issues with Istft

---
 spleeter/separator.py   | 10 +++++++---
 tests/test_eval.py      | 34 ++++++++++++++++++----------------
 tests/test_separator.py | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/spleeter/separator.py b/spleeter/separator.py
index e769c28..7b6dab7 100644
--- a/spleeter/separator.py
+++ b/spleeter/separator.py
@@ -122,16 +122,20 @@ class Separator(object):
         assert not (inverse and length is None)
         data = np.asfortranarray(data)
         N = self._params["frame_length"]
+        pad_edges = int(N/4)
         H = self._params["frame_step"]
         win = hann(N, sym=False)
         fstft = istft if inverse else stft
-        win_len_arg = {"win_length": None, "length": length} if inverse else {"n_fft": N}
+        win_len_arg = {"win_length": None, "length": length + 2*pad_edges} if inverse else {"n_fft": N}
         n_channels = data.shape[-1]
         out = []
-        for c in range(n_channels):
-            d = data[:, :, c].T if inverse else data[:, c]
+        for c in range(n_channels):            
+            d = data[:, :, c].T if inverse else np.concatenate((np.zeros(pad_edges,), data[:,c], np.zeros(pad_edges,)))
             s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg)
+            if inverse:
+                s = s[pad_edges:-pad_edges]
             s = np.expand_dims(s.T, 2-inverse)
+
             out.append(s)
         if len(out) == 1:
             return out[0]
diff --git a/tests/test_eval.py b/tests/test_eval.py
index 6a3634b..2227e18 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -26,28 +26,28 @@ from spleeter.commands import evaluate
 from spleeter.utils.configuration import load_configuration
 
 res_4stems = {  "vocals": {
-                    "SDR": -0.007,
-                    "SAR": -19.231,
-                    "SIR": -4.528,
+                    "SDR": 0.000,
+                    "SAR": -15.856,
+                    "SIR": -6.861,
                     "ISR": 0.000
                 },
                 "drums": {
-                    "SDR": -0.071,
-                    "SAR": -14.496,
-                    "SIR": -4.987,
+                    "SDR": -0.069,
+                    "SAR": -15.769,
+                    "SIR": -5.049,
                     "ISR": 0.001
                 },
                 "bass":{
-                    "SDR": -0.001,
-                    "SAR": -12.426,
-                    "SIR": -7.198,
-                    "ISR": -0.001
+                    "SDR": -0.000,
+                    "SAR": -10.499,
+                    "SIR": -6.988,
+                    "ISR": -0.000
                 },
                 "other":{
-                    "SDR": -1.453,
-                    "SAR": -14.899,
-                    "SIR": -4.678,
-                    "ISR": -0.015
+                    "SDR": -1.365,
+                    "SAR": -14.591,
+                    "SIR": -4.751,
+                    "ISR": -0.016
                 }
             }
 
@@ -75,5 +75,7 @@ def test_evaluate(path="FAKE_MUSDB_DIR"):
     params = load_configuration(arguments.configuration)
     metrics = evaluate.entrypoint(arguments, params)
     for instrument, metric in metrics.items():
-        for metric, value in metric.items():
-            assert np.allclose(np.median(value), res_4stems[instrument][metric], atol=1e-3)
\ No newline at end of file
+        print(instrument), print(metric)
+        for m, value in metric.items():
+            print(np.median(value)), print(res_4stems[instrument][m])
+            assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3)
\ No newline at end of file
diff --git a/tests/test_separator.py b/tests/test_separator.py
index 245571d..0b5b282 100644
--- a/tests/test_separator.py
+++ b/tests/test_separator.py
@@ -38,6 +38,44 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC
 
 print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__))
 
+@pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS)
+def test_separator_backends(test_file):
+    adapter = get_default_audio_adapter()
+    waveform, _ = adapter.load(test_file)
+
+    separator_lib = Separator("spleeter:2stems", stft_backend="librosa")
+    separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow")
+
+    # Test the stft and inverse stft provides exact reconstruction
+    stft_matrix = separator_lib._stft(waveform)
+    reconstructed = separator_lib._stft(stft_matrix, inverse=True, length= waveform.shape[0])
+    assert np.allclose(reconstructed, waveform, atol=1e-2)
+
+    # # now also test that tensorflow and librosa STFT provide same results
+    from spleeter.audio.spectrogram import compute_spectrogram_tf
+    tf_waveform = tf.convert_to_tensor(waveform, tf.float32)
+    spectrogram_tf = compute_spectrogram_tf(tf_waveform,
+        separator_tf._params['frame_length'],
+        separator_tf._params['frame_step'],)
+    with tf.Session() as sess:
+        spectrogram_tf_eval = spectrogram_tf.eval()
+
+    # check that stfts are equivalent up to the padding in the librosa case
+    assert stft_matrix.shape[0] == spectrogram_tf_eval.shape[0] + 2
+    assert stft_matrix.shape[1:] == spectrogram_tf_eval.shape[1:]
+    assert np.allclose(np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2)
+
+    # compare both separation, it should be close
+    out_tf = separator_tf._separate_tensorflow(waveform, test_file)
+    out_lib = separator_lib._separate_librosa(waveform, test_file)
+
+    for instrument in out_lib.keys():
+        # test that both outputs are not null
+        assert np.sum(np.abs(out_tf[instrument])) > 1000
+        assert np.sum(np.abs(out_lib[instrument])) > 1000
+        max_diff = np.max(np.abs(out_tf[instrument] - out_lib[instrument]))
+        print(f"Max diff on {instrument} is {max_diff}")
+        assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1)
 
 @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)
 def test_separate(test_file, configuration, backend):

From bec0555a62bf310eb1a6d55c60bed4caa23fc482 Mon Sep 17 00:00:00 2001
From: mmoussallam <manuel.moussallam@deezer.com>
Date: Thu, 18 Jun 2020 18:09:34 +0200
Subject: [PATCH 2/6] remove useless prints

---
 tests/test_eval.py      | 2 --
 tests/test_separator.py | 2 --
 2 files changed, 4 deletions(-)

diff --git a/tests/test_eval.py b/tests/test_eval.py
index 2227e18..a829706 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -75,7 +75,5 @@ def test_evaluate(path="FAKE_MUSDB_DIR"):
     params = load_configuration(arguments.configuration)
     metrics = evaluate.entrypoint(arguments, params)
     for instrument, metric in metrics.items():
-        print(instrument), print(metric)
         for m, value in metric.items():
-            print(np.median(value)), print(res_4stems[instrument][m])
             assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3)
\ No newline at end of file
diff --git a/tests/test_separator.py b/tests/test_separator.py
index 0b5b282..532d179 100644
--- a/tests/test_separator.py
+++ b/tests/test_separator.py
@@ -73,8 +73,6 @@ def test_separator_backends(test_file):
         # test that both outputs are not null
         assert np.sum(np.abs(out_tf[instrument])) > 1000
         assert np.sum(np.abs(out_lib[instrument])) > 1000
-        max_diff = np.max(np.abs(out_tf[instrument] - out_lib[instrument]))
-        print(f"Max diff on {instrument} is {max_diff}")
         assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1)
 
 @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)

From 45dc3566c24ff1eb3100442655e44bad9287b7c1 Mon Sep 17 00:00:00 2001
From: mmoussallam <manuel.moussallam@deezer.com>
Date: Thu, 18 Jun 2020 18:12:43 +0200
Subject: [PATCH 3/6] pep8

---
 spleeter/separator.py   |  9 +++++----
 tests/test_separator.py | 12 ++++++++----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/spleeter/separator.py b/spleeter/separator.py
index 7b6dab7..b204030 100644
--- a/spleeter/separator.py
+++ b/spleeter/separator.py
@@ -126,16 +126,17 @@ class Separator(object):
         H = self._params["frame_step"]
         win = hann(N, sym=False)
         fstft = istft if inverse else stft
-        win_len_arg = {"win_length": None, "length": length + 2*pad_edges} if inverse else {"n_fft": N}
+        win_len_arg = {"win_length": None, "length": length +
+                       2*pad_edges} if inverse else {"n_fft": N}
         n_channels = data.shape[-1]
         out = []
-        for c in range(n_channels):            
-            d = data[:, :, c].T if inverse else np.concatenate((np.zeros(pad_edges,), data[:,c], np.zeros(pad_edges,)))
+        for c in range(n_channels):
+            d = data[:, :, c].T if inverse else np.concatenate(
+                (np.zeros(pad_edges,), data[:, c], np.zeros(pad_edges,)))
             s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg)
             if inverse:
                 s = s[pad_edges:-pad_edges]
             s = np.expand_dims(s.T, 2-inverse)
-
             out.append(s)
         if len(out) == 1:
             return out[0]
diff --git a/tests/test_separator.py b/tests/test_separator.py
index 532d179..93ad148 100644
--- a/tests/test_separator.py
+++ b/tests/test_separator.py
@@ -38,6 +38,7 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC
 
 print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__))
 
+
 @pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS)
 def test_separator_backends(test_file):
     adapter = get_default_audio_adapter()
@@ -48,22 +49,24 @@ def test_separator_backends(test_file):
 
     # Test the stft and inverse stft provides exact reconstruction
     stft_matrix = separator_lib._stft(waveform)
-    reconstructed = separator_lib._stft(stft_matrix, inverse=True, length= waveform.shape[0])
+    reconstructed = separator_lib._stft(
+        stft_matrix, inverse=True, length=waveform.shape[0])
     assert np.allclose(reconstructed, waveform, atol=1e-2)
 
     # # now also test that tensorflow and librosa STFT provide same results
     from spleeter.audio.spectrogram import compute_spectrogram_tf
     tf_waveform = tf.convert_to_tensor(waveform, tf.float32)
     spectrogram_tf = compute_spectrogram_tf(tf_waveform,
-        separator_tf._params['frame_length'],
-        separator_tf._params['frame_step'],)
+                                            separator_tf._params['frame_length'],
+                                            separator_tf._params['frame_step'],)
     with tf.Session() as sess:
         spectrogram_tf_eval = spectrogram_tf.eval()
 
     # check that stfts are equivalent up to the padding in the librosa case
     assert stft_matrix.shape[0] == spectrogram_tf_eval.shape[0] + 2
     assert stft_matrix.shape[1:] == spectrogram_tf_eval.shape[1:]
-    assert np.allclose(np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2)
+    assert np.allclose(
+        np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2)
 
     # compare both separation, it should be close
     out_tf = separator_tf._separate_tensorflow(waveform, test_file)
@@ -75,6 +78,7 @@ def test_separator_backends(test_file):
         assert np.sum(np.abs(out_lib[instrument])) > 1000
         assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1)
 
+
 @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)
 def test_separate(test_file, configuration, backend):
     """ Test separation from raw data. """

From 02f86e42b24a2d0028f08c1481491619ff95c035 Mon Sep 17 00:00:00 2001
From: mmoussallam <manuel.moussallam@deezer.com>
Date: Thu, 18 Jun 2020 20:17:03 +0200
Subject: [PATCH 4/6] investigate diff values

---
 spleeter/separator.py   | 2 +-
 tests/test_separator.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/spleeter/separator.py b/spleeter/separator.py
index b204030..cc05a64 100644
--- a/spleeter/separator.py
+++ b/spleeter/separator.py
@@ -122,7 +122,7 @@ class Separator(object):
         assert not (inverse and length is None)
         data = np.asfortranarray(data)
         N = self._params["frame_length"]
-        pad_edges = int(N/4)
+        pad_edges = int(N/2)
         H = self._params["frame_step"]
         win = hann(N, sym=False)
         fstft = istft if inverse else stft
diff --git a/tests/test_separator.py b/tests/test_separator.py
index 93ad148..4c29dbf 100644
--- a/tests/test_separator.py
+++ b/tests/test_separator.py
@@ -74,9 +74,11 @@ def test_separator_backends(test_file):
 
     for instrument in out_lib.keys():
         # test that both outputs are not null
+        print(np.sum(np.abs(out_tf[instrument])))
+        print(np.sum(np.abs(out_lib[instrument])))
         assert np.sum(np.abs(out_tf[instrument])) > 1000
         assert np.sum(np.abs(out_lib[instrument])) > 1000
-        assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.1)
+        assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.01)
 
 
 @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)

From 9510e2b5f7a47c968b4c6686d338f482bece56e9 Mon Sep 17 00:00:00 2001
From: mmoussallam <manuel.moussallam@deezer.com>
Date: Fri, 19 Jun 2020 00:33:19 +0200
Subject: [PATCH 5/6] dont pad the signal, only the stft matrix before
 inversion

---
 spleeter/separator.py   | 13 +++++++------
 tests/test_separator.py | 12 ++++++------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/spleeter/separator.py b/spleeter/separator.py
index cc05a64..7cc9d9a 100644
--- a/spleeter/separator.py
+++ b/spleeter/separator.py
@@ -122,20 +122,21 @@ class Separator(object):
         assert not (inverse and length is None)
         data = np.asfortranarray(data)
         N = self._params["frame_length"]
-        pad_edges = int(N/2)
         H = self._params["frame_step"]
+        F = int(N/2) + 1
         win = hann(N, sym=False)
         fstft = istft if inverse else stft
-        win_len_arg = {"win_length": None, "length": length +
-                       2*pad_edges} if inverse else {"n_fft": N}
+        win_len_arg = {"win_length": None,
+                       "length": None} if inverse else {"n_fft": N}
         n_channels = data.shape[-1]
         out = []
         for c in range(n_channels):
-            d = data[:, :, c].T if inverse else np.concatenate(
-                (np.zeros(pad_edges,), data[:, c], np.zeros(pad_edges,)))
+            d = np.concatenate((np.zeros((F, 1)), data[:, :, c].T, np.zeros(
+                (F, 1))), axis=1) if inverse else data[:, c]
             s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg)
             if inverse:
-                s = s[pad_edges:-pad_edges]
+                s = s[H:]
+                s = s[:length]
             s = np.expand_dims(s.T, 2-inverse)
             out.append(s)
         if len(out) == 1:
diff --git a/tests/test_separator.py b/tests/test_separator.py
index 4c29dbf..882acd9 100644
--- a/tests/test_separator.py
+++ b/tests/test_separator.py
@@ -51,7 +51,7 @@ def test_separator_backends(test_file):
     stft_matrix = separator_lib._stft(waveform)
     reconstructed = separator_lib._stft(
         stft_matrix, inverse=True, length=waveform.shape[0])
-    assert np.allclose(reconstructed, waveform, atol=1e-2)
+    assert np.allclose(reconstructed, waveform, atol=3e-2)
 
     # # now also test that tensorflow and librosa STFT provide same results
     from spleeter.audio.spectrogram import compute_spectrogram_tf
@@ -62,11 +62,10 @@ def test_separator_backends(test_file):
     with tf.Session() as sess:
         spectrogram_tf_eval = spectrogram_tf.eval()
 
-    # check that stfts are equivalent up to the padding in the librosa case
-    assert stft_matrix.shape[0] == spectrogram_tf_eval.shape[0] + 2
-    assert stft_matrix.shape[1:] == spectrogram_tf_eval.shape[1:]
+    # check that stfts are equivalent
+    assert stft_matrix.shape == spectrogram_tf_eval.shape
     assert np.allclose(
-        np.abs(stft_matrix[1:-1]), spectrogram_tf_eval, atol=1e-2)
+        np.abs(stft_matrix), spectrogram_tf_eval, atol=1e-2)
 
     # compare both separation, it should be close
     out_tf = separator_tf._separate_tensorflow(waveform, test_file)
@@ -78,7 +77,8 @@ def test_separator_backends(test_file):
         print(np.sum(np.abs(out_lib[instrument])))
         assert np.sum(np.abs(out_tf[instrument])) > 1000
         assert np.sum(np.abs(out_lib[instrument])) > 1000
-        assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.01)
+        print(np.max(out_tf[instrument]- out_lib[instrument]))
+        assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.025)
 
 
 @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)

From af6dbec5cf2a8d2bbad728f9267ebf5204637053 Mon Sep 17 00:00:00 2001
From: mmoussallam <manuel.moussallam@deezer.com>
Date: Fri, 19 Jun 2020 00:48:09 +0200
Subject: [PATCH 6/6] fix tests and more testing

---
 tests/test_eval.py      | 22 +++++++++++-----------
 tests/test_separator.py |  9 +++------
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/tests/test_eval.py b/tests/test_eval.py
index a829706..c39ea76 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -27,27 +27,27 @@ from spleeter.utils.configuration import load_configuration
 
 res_4stems = {  "vocals": {
                     "SDR": 0.000,
-                    "SAR": -15.856,
-                    "SIR": -6.861,
+                    "SAR": -16.212,
+                    "SIR": -4.172,
                     "ISR": 0.000
                 },
                 "drums": {
-                    "SDR": -0.069,
-                    "SAR": -15.769,
-                    "SIR": -5.049,
+                    "SDR": -0.077,
+                    "SAR": -15.739,
+                    "SIR": -5.045,
                     "ISR": 0.001
                 },
                 "bass":{
                     "SDR": -0.000,
-                    "SAR": -10.499,
-                    "SIR": -6.988,
+                    "SAR": -10.665,
+                    "SIR": -5.646,
                     "ISR": -0.000
                 },
                 "other":{
-                    "SDR": -1.365,
-                    "SAR": -14.591,
-                    "SIR": -4.751,
-                    "ISR": -0.016
+                    "SDR": -1.309,
+                    "SAR": -14.573,
+                    "SIR": -4.705,
+                    "ISR": -0.014
                 }
             }
 
diff --git a/tests/test_separator.py b/tests/test_separator.py
index 882acd9..21840b2 100644
--- a/tests/test_separator.py
+++ b/tests/test_separator.py
@@ -72,13 +72,10 @@ def test_separator_backends(test_file):
     out_lib = separator_lib._separate_librosa(waveform, test_file)
 
     for instrument in out_lib.keys():
-        # test that both outputs are not null
-        print(np.sum(np.abs(out_tf[instrument])))
-        print(np.sum(np.abs(out_lib[instrument])))
-        assert np.sum(np.abs(out_tf[instrument])) > 1000
-        assert np.sum(np.abs(out_lib[instrument])) > 1000
-        print(np.max(out_tf[instrument]- out_lib[instrument]))
+        # test that both outputs are close everywhere
         assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.025)
+        # it should be even more similar outside edges zones
+        assert np.allclose(out_tf[instrument][4096:-4096,:], out_lib[instrument][4096:-4096,:], atol=0.002)
 
 
 @pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)