mirror of
https://github.com/YuzuZensai/spleeter.git
synced 2026-01-06 04:32:43 +00:00
align the padding in librosa to what is now done in tf backend
This commit is contained in:
@@ -123,7 +123,7 @@ class Separator(object):
|
||||
data = np.asfortranarray(data)
|
||||
N = self._params["frame_length"]
|
||||
H = self._params["frame_step"]
|
||||
F = int(N/2) + 1
|
||||
|
||||
win = hann(N, sym=False)
|
||||
fstft = istft if inverse else stft
|
||||
win_len_arg = {"win_length": None,
|
||||
@@ -131,12 +131,10 @@ class Separator(object):
|
||||
n_channels = data.shape[-1]
|
||||
out = []
|
||||
for c in range(n_channels):
|
||||
d = np.concatenate((np.zeros((F, 1)), data[:, :, c].T, np.zeros(
|
||||
(F, 1))), axis=1) if inverse else data[:, c]
|
||||
d = np.concatenate((np.zeros((N, )), data[:, c], np.zeros((N, )))) if not inverse else data[:, :, c].T
|
||||
s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg)
|
||||
if inverse:
|
||||
s = s[H:]
|
||||
s = s[:length]
|
||||
s = s[N:N+length]
|
||||
s = np.expand_dims(s.T, 2-inverse)
|
||||
out.append(s)
|
||||
if len(out) == 1:
|
||||
|
||||
@@ -29,57 +29,29 @@ BACKENDS = ["tensorflow", "librosa"]
|
||||
TEST_CONFIGURATIONS = {el:el for el in BACKENDS}
|
||||
|
||||
res_4stems = {
|
||||
"librosa": {
|
||||
"vocals": {
|
||||
"SDR": 0.000,
|
||||
"SAR": -16.212,
|
||||
"SIR": -4.172,
|
||||
"ISR": 0.000
|
||||
},
|
||||
"drums": {
|
||||
"SDR": -0.077,
|
||||
"SAR": -15.739,
|
||||
"SIR": -5.045,
|
||||
"ISR": 0.001
|
||||
},
|
||||
"bass":{
|
||||
"SDR": -0.000,
|
||||
"SAR": -10.665,
|
||||
"SIR": -5.646,
|
||||
"ISR": -0.000
|
||||
},
|
||||
"other":{
|
||||
"SDR": -1.309,
|
||||
"SAR": -14.573,
|
||||
"SIR": -4.705,
|
||||
"ISR": -0.014
|
||||
}
|
||||
"vocals": {
|
||||
"SDR": 3.25e-05,
|
||||
"SAR": -11.153575,
|
||||
"SIR": -1.3849,
|
||||
"ISR": 2.75e-05
|
||||
},
|
||||
"tensorflow": {
|
||||
"vocals": {
|
||||
"SDR": 3.25e-05,
|
||||
"SAR": -11.153575,
|
||||
"SIR": -1.3849,
|
||||
"ISR": 2.75e-05
|
||||
},
|
||||
"drums": {
|
||||
"SDR": -0.079505,
|
||||
"SAR": -15.7073575,
|
||||
"SIR": -4.972755,
|
||||
"ISR": 0.0013575
|
||||
},
|
||||
"bass":{
|
||||
"SDR": 2.5e-06,
|
||||
"SAR": -10.3520575,
|
||||
"SIR": -4.272325,
|
||||
"ISR": 2.5e-06
|
||||
},
|
||||
"other":{
|
||||
"SDR": -1.359175,
|
||||
"SAR": -14.7076775,
|
||||
"SIR": -4.761505,
|
||||
"ISR": -0.01528
|
||||
}
|
||||
"drums": {
|
||||
"SDR": -0.079505,
|
||||
"SAR": -15.7073575,
|
||||
"SIR": -4.972755,
|
||||
"ISR": 0.0013575
|
||||
},
|
||||
"bass":{
|
||||
"SDR": 2.5e-06,
|
||||
"SAR": -10.3520575,
|
||||
"SIR": -4.272325,
|
||||
"ISR": 2.5e-06
|
||||
},
|
||||
"other":{
|
||||
"SDR": -1.359175,
|
||||
"SAR": -14.7076775,
|
||||
"SIR": -4.761505,
|
||||
"ISR": -0.01528
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,4 +81,4 @@ def test_evaluate(backend):
|
||||
metrics = evaluate.entrypoint(arguments, params)
|
||||
for instrument, metric in metrics.items():
|
||||
for m, value in metric.items():
|
||||
assert np.allclose(np.median(value), res_4stems[backend][instrument][m], atol=1e-3)
|
||||
assert np.allclose(np.median(value), res_4stems[instrument][m], atol=1e-3)
|
||||
|
||||
@@ -53,29 +53,13 @@ def test_separator_backends(test_file):
|
||||
stft_matrix, inverse=True, length=waveform.shape[0])
|
||||
assert np.allclose(reconstructed, waveform, atol=3e-2)
|
||||
|
||||
# # now also test that tensorflow and librosa STFT provide same results
|
||||
from spleeter.audio.spectrogram import compute_spectrogram_tf
|
||||
tf_waveform = tf.convert_to_tensor(waveform, tf.float32)
|
||||
spectrogram_tf = compute_spectrogram_tf(tf_waveform,
|
||||
separator_tf._params['frame_length'],
|
||||
separator_tf._params['frame_step'],)
|
||||
with tf.Session() as sess:
|
||||
spectrogram_tf_eval = spectrogram_tf.eval()
|
||||
|
||||
# check that stfts are equivalent
|
||||
assert stft_matrix.shape == spectrogram_tf_eval.shape
|
||||
assert np.allclose(
|
||||
np.abs(stft_matrix), spectrogram_tf_eval, atol=1e-2)
|
||||
|
||||
# compare both separation, it should be close
|
||||
out_tf = separator_tf._separate_tensorflow(waveform, test_file)
|
||||
out_lib = separator_lib._separate_librosa(waveform, test_file)
|
||||
|
||||
for instrument in out_lib.keys():
|
||||
# test that both outputs are close everywhere
|
||||
assert np.allclose(out_tf[instrument], out_lib[instrument], atol=0.025)
|
||||
# it should be even more similar outside edges zones
|
||||
assert np.allclose(out_tf[instrument][4096:-4096,:], out_lib[instrument][4096:-4096,:], atol=0.002)
|
||||
assert np.allclose(out_tf[instrument], out_lib[instrument], atol=1e-5)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS)
|
||||
|
||||
Reference in New Issue
Block a user