mirror of
https://github.com/YuzuZensai/spleeter.git
synced 2026-01-31 14:58:23 +00:00
Merge pull request #426 from deezer/fix_librosa_istft_edge
Fixing gltches issues with Istft
This commit is contained in:
@@ -1,5 +1,12 @@
|
|||||||
# Changelog History
|
# Changelog History
|
||||||
|
|
||||||
|
## 1.5.4
|
||||||
|
|
||||||
|
First release, July 24th 2020
|
||||||
|
|
||||||
|
Add some padding of the input waveform to avoid separation artefacts on the edges due to unstabilities in the inverse fourier transforms.
|
||||||
|
Also add tests to ensure both librosa and tensorflow backends have same outputs.
|
||||||
|
|
||||||
## 1.5.2
|
## 1.5.2
|
||||||
|
|
||||||
First released, May 15th 2020
|
First released, May 15th 2020
|
||||||
|
|||||||
@@ -123,14 +123,18 @@ class Separator(object):
|
|||||||
data = np.asfortranarray(data)
|
data = np.asfortranarray(data)
|
||||||
N = self._params["frame_length"]
|
N = self._params["frame_length"]
|
||||||
H = self._params["frame_step"]
|
H = self._params["frame_step"]
|
||||||
|
|
||||||
win = hann(N, sym=False)
|
win = hann(N, sym=False)
|
||||||
fstft = istft if inverse else stft
|
fstft = istft if inverse else stft
|
||||||
win_len_arg = {"win_length": None, "length": length} if inverse else {"n_fft": N}
|
win_len_arg = {"win_length": None,
|
||||||
|
"length": None} if inverse else {"n_fft": N}
|
||||||
n_channels = data.shape[-1]
|
n_channels = data.shape[-1]
|
||||||
out = []
|
out = []
|
||||||
for c in range(n_channels):
|
for c in range(n_channels):
|
||||||
d = data[:, :, c].T if inverse else data[:, c]
|
d = np.concatenate((np.zeros((N, )), data[:, c], np.zeros((N, )))) if not inverse else data[:, :, c].T
|
||||||
s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg)
|
s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg)
|
||||||
|
if inverse:
|
||||||
|
s = s[N:N+length]
|
||||||
s = np.expand_dims(s.T, 2-inverse)
|
s = np.expand_dims(s.T, 2-inverse)
|
||||||
out.append(s)
|
out.append(s)
|
||||||
if len(out) == 1:
|
if len(out) == 1:
|
||||||
|
|||||||
@@ -29,57 +29,29 @@ BACKENDS = ["tensorflow", "librosa"]
|
|||||||
TEST_CONFIGURATIONS = {el:el for el in BACKENDS}
|
TEST_CONFIGURATIONS = {el:el for el in BACKENDS}
|
||||||
|
|
||||||
res_4stems = {
|
res_4stems = {
|
||||||
"librosa": {
|
"vocals": {
|
||||||
"vocals": {
|
"SDR": 3.25e-05,
|
||||||
"SDR": -0.007,
|
"SAR": -11.153575,
|
||||||
"SAR": -19.231,
|
"SIR": -1.3849,
|
||||||
"SIR": -4.528,
|
"ISR": 2.75e-05
|
||||||
"ISR": 0.000
|
|
||||||
},
|
|
||||||
"drums": {
|
|
||||||
"SDR": -0.071,
|
|
||||||
"SAR": -14.496,
|
|
||||||
"SIR": -4.987,
|
|
||||||
"ISR": 0.001
|
|
||||||
},
|
|
||||||
"bass":{
|
|
||||||
"SDR": -0.001,
|
|
||||||
"SAR": -12.426,
|
|
||||||
"SIR": -7.198,
|
|
||||||
"ISR": -0.001
|
|
||||||
},
|
|
||||||
"other":{
|
|
||||||
"SDR": -1.453,
|
|
||||||
"SAR": -14.899,
|
|
||||||
"SIR": -4.678,
|
|
||||||
"ISR": -0.015
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"tensorflow": {
|
"drums": {
|
||||||
"vocals": {
|
"SDR": -0.079505,
|
||||||
"SDR": 3.25e-05,
|
"SAR": -15.7073575,
|
||||||
"SAR": -11.153575,
|
"SIR": -4.972755,
|
||||||
"SIR": -1.3849,
|
"ISR": 0.0013575
|
||||||
"ISR": 2.75e-05
|
},
|
||||||
},
|
"bass":{
|
||||||
"drums": {
|
"SDR": 2.5e-06,
|
||||||
"SDR": -0.079505,
|
"SAR": -10.3520575,
|
||||||
"SAR": -15.7073575,
|
"SIR": -4.272325,
|
||||||
"SIR": -4.972755,
|
"ISR": 2.5e-06
|
||||||
"ISR": 0.0013575
|
},
|
||||||
},
|
"other":{
|
||||||
"bass":{
|
"SDR": -1.359175,
|
||||||
"SDR": 2.5e-06,
|
"SAR": -14.7076775,
|
||||||
"SAR": -10.3520575,
|
"SIR": -4.761505,
|
||||||
"SIR": -4.272325,
|
"ISR": -0.01528
|
||||||
"ISR": 2.5e-06
|
|
||||||
},
|
|
||||||
"other":{
|
|
||||||
"SDR": -1.359175,
|
|
||||||
"SAR": -14.7076775,
|
|
||||||
"SIR": -4.761505,
|
|
||||||
"ISR": -0.01528
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -102,15 +74,11 @@ def generate_fake_eval_dataset(path):
|
|||||||
@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS)
|
@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS)
|
||||||
def test_evaluate(backend):
|
def test_evaluate(backend):
|
||||||
with TemporaryDirectory() as directory:
|
with TemporaryDirectory() as directory:
|
||||||
|
|
||||||
generate_fake_eval_dataset(directory)
|
generate_fake_eval_dataset(directory)
|
||||||
p = create_argument_parser()
|
p = create_argument_parser()
|
||||||
arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend])
|
arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend])
|
||||||
params = load_configuration(arguments.configuration)
|
params = load_configuration(arguments.configuration)
|
||||||
metrics = evaluate.entrypoint(arguments, params)
|
metrics = evaluate.entrypoint(arguments, params)
|
||||||
for instrument, metric in metrics.items():
|
for instrument, metric in metrics.items():
|
||||||
for metric, value in metric.items():
|
for m, value in metric.items():
|
||||||
assert np.allclose(np.median(value), res_4stems[backend][instrument][metric], atol=1e-3)
|
assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3)
|
||||||
|
|
||||||
|
|
||||||
# test_evaluate("tensorflow")
|
|
||||||
|
|||||||
@@ -39,6 +39,29 @@ TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BAC
|
|||||||
print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__))
|
print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('test_file', TEST_AUDIO_DESCRIPTORS)
|
||||||
|
def test_separator_backends(test_file):
|
||||||
|
adapter = get_default_audio_adapter()
|
||||||
|
waveform, _ = adapter.load(test_file)
|
||||||
|
|
||||||
|
separator_lib = Separator("spleeter:2stems", stft_backend="librosa")
|
||||||
|
separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow")
|
||||||
|
|
||||||
|
# Test the stft and inverse stft provides exact reconstruction
|
||||||
|
stft_matrix = separator_lib._stft(waveform)
|
||||||
|
reconstructed = separator_lib._stft(
|
||||||
|
stft_matrix, inverse=True, length=waveform.shape[0])
|
||||||
|
assert np.allclose(reconstructed, waveform, atol=3e-2)
|
||||||
|
|
||||||
|
# compare both separation, it should be close
|
||||||
|
out_tf = separator_tf._separate_tensorflow(waveform, test_file)
|
||||||
|
out_lib = separator_lib._separate_librosa(waveform, test_file)
|
||||||
|
|
||||||
|
for instrument in out_lib.keys():
|
||||||
|
# test that both outputs are close everywhere
|
||||||
|
assert np.allclose(out_tf[instrument], out_lib[instrument], atol=1e-5)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)
|
@pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)
|
||||||
def test_separate(test_file, configuration, backend):
|
def test_separate(test_file, configuration, backend):
|
||||||
""" Test separation from raw data. """
|
""" Test separation from raw data. """
|
||||||
|
|||||||
Reference in New Issue
Block a user