Merge pull request #426 from deezer/fix_librosa_istft_edge

Fixing gltches issues with Istft
This commit is contained in:
Moussallam
2020-07-24 17:05:58 +02:00
committed by GitHub
4 changed files with 60 additions and 58 deletions

View File

@@ -1,5 +1,12 @@
# Changelog History
## 1.5.4
First release, July 24th 2020
Add some padding of the input waveform to avoid separation artefacts on the edges due to unstabilities in the inverse fourier transforms.
Also add tests to ensure both librosa and tensorflow backends have same outputs.
## 1.5.2
First released, May 15th 2020

View File

@@ -123,14 +123,18 @@ class Separator(object):
data = np.asfortranarray(data)
N = self._params["frame_length"]
H = self._params["frame_step"]
win = hann(N, sym=False)
fstft = istft if inverse else stft
win_len_arg = {"win_length": None, "length": length} if inverse else {"n_fft": N}
win_len_arg = {"win_length": None,
"length": None} if inverse else {"n_fft": N}
n_channels = data.shape[-1]
out = []
for c in range(n_channels):
d = data[:, :, c].T if inverse else data[:, c]
d = np.concatenate((np.zeros((N, )), data[:, c], np.zeros((N, )))) if not inverse else data[:, :, c].T
s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg)
if inverse:
s = s[N:N+length]
s = np.expand_dims(s.T, 2-inverse)
out.append(s)
if len(out) == 1:

View File

@@ -29,57 +29,29 @@ BACKENDS = ["tensorflow", "librosa"]
TEST_CONFIGURATIONS = {el:el for el in BACKENDS}
res_4stems = {
"librosa": {
"vocals": {
"SDR": -0.007,
"SAR": -19.231,
"SIR": -4.528,
"ISR": 0.000
},
"drums": {
"SDR": -0.071,
"SAR": -14.496,
"SIR": -4.987,
"ISR": 0.001
},
"bass":{
"SDR": -0.001,
"SAR": -12.426,
"SIR": -7.198,
"ISR": -0.001
},
"other":{
"SDR": -1.453,
"SAR": -14.899,
"SIR": -4.678,
"ISR": -0.015
}
"vocals": {
"SDR": 3.25e-05,
"SAR": -11.153575,
"SIR": -1.3849,
"ISR": 2.75e-05
},
"tensorflow": {
"vocals": {
"SDR": 3.25e-05,
"SAR": -11.153575,
"SIR": -1.3849,
"ISR": 2.75e-05
},
"drums": {
"SDR": -0.079505,
"SAR": -15.7073575,
"SIR": -4.972755,
"ISR": 0.0013575
},
"bass":{
"SDR": 2.5e-06,
"SAR": -10.3520575,
"SIR": -4.272325,
"ISR": 2.5e-06
},
"other":{
"SDR": -1.359175,
"SAR": -14.7076775,
"SIR": -4.761505,
"ISR": -0.01528
}
"drums": {
"SDR": -0.079505,
"SAR": -15.7073575,
"SIR": -4.972755,
"ISR": 0.0013575
},
"bass":{
"SDR": 2.5e-06,
"SAR": -10.3520575,
"SIR": -4.272325,
"ISR": 2.5e-06
},
"other":{
"SDR": -1.359175,
"SAR": -14.7076775,
"SIR": -4.761505,
"ISR": -0.01528
}
}
@@ -102,15 +74,11 @@ def generate_fake_eval_dataset(path):
@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS)
def test_evaluate(backend):
with TemporaryDirectory() as directory:
generate_fake_eval_dataset(directory)
p = create_argument_parser()
arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend])
params = load_configuration(arguments.configuration)
metrics = evaluate.entrypoint(arguments, params)
for instrument, metric in metrics.items():
for metric, value in metric.items():
assert np.allclose(np.median(value), res_4stems[backend][instrument][metric], atol=1e-3)
# test_evaluate("tensorflow")
for m, value in metric.items():
assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3)

View File

@@ -39,6 +39,29 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC
print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__))
@pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS)
def test_separator_backends(test_file):
adapter = get_default_audio_adapter()
waveform, _ = adapter.load(test_file)
separator_lib = Separator("spleeter:2stems", stft_backend="librosa")
separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow")
# Test the stft and inverse stft provides exact reconstruction
stft_matrix = separator_lib._stft(waveform)
reconstructed = separator_lib._stft(
stft_matrix, inverse=True, length=waveform.shape[0])
assert np.allclose(reconstructed, waveform, atol=3e-2)
# compare both separation, it should be close
out_tf = separator_tf._separate_tensorflow(waveform, test_file)
out_lib = separator_lib._separate_librosa(waveform, test_file)
for instrument in out_lib.keys():
# test that both outputs are close everywhere
assert np.allclose(out_tf[instrument], out_lib[instrument], atol=1e-5)
@pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)
def test_separate(test_file, configuration, backend):
""" Test separation from raw data. """