mirror of
https://github.com/YuzuZensai/spleeter.git
synced 2026-01-31 14:58:23 +00:00
Merge pull request #444 from deezer/pad_waveform
Added padding at the begining to avoid tf STFT reconstruction error
This commit is contained in:
@@ -170,6 +170,7 @@ def _create_evaluate_parser(parser_factory):
|
|||||||
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
|
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
|
||||||
parser.add_argument('--mus_dir', **OPT_MUSDB)
|
parser.add_argument('--mus_dir', **OPT_MUSDB)
|
||||||
parser.add_argument('-m', '--mwf', **OPT_MWF)
|
parser.add_argument('-m', '--mwf', **OPT_MWF)
|
||||||
|
parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ def _separate_evaluation_dataset(arguments, musdb_root_directory, params):
|
|||||||
bitrate='128k',
|
bitrate='128k',
|
||||||
MWF=arguments.MWF,
|
MWF=arguments.MWF,
|
||||||
verbose=arguments.verbose,
|
verbose=arguments.verbose,
|
||||||
stft_backend="auto"),
|
stft_backend=arguments.stft_backend),
|
||||||
params)
|
params)
|
||||||
return audio_output_directory
|
return audio_output_directory
|
||||||
|
|
||||||
|
|||||||
@@ -275,9 +275,16 @@ class EstimatorSpecBuilder(object):
|
|||||||
spec_name = self.spectrogram_name
|
spec_name = self.spectrogram_name
|
||||||
|
|
||||||
if stft_name not in self._features:
|
if stft_name not in self._features:
|
||||||
|
# pad input with a frame of zeros
|
||||||
|
waveform = tf.concat([
|
||||||
|
tf.zeros((self._frame_length, self._n_channels)),
|
||||||
|
self._features['waveform']
|
||||||
|
],
|
||||||
|
0
|
||||||
|
)
|
||||||
stft_feature = tf.transpose(
|
stft_feature = tf.transpose(
|
||||||
stft(
|
stft(
|
||||||
tf.transpose(self._features['waveform']),
|
tf.transpose(waveform),
|
||||||
self._frame_length,
|
self._frame_length,
|
||||||
self._frame_step,
|
self._frame_step,
|
||||||
window_fn=lambda frame_length, dtype: (
|
window_fn=lambda frame_length, dtype: (
|
||||||
@@ -341,7 +348,7 @@ class EstimatorSpecBuilder(object):
|
|||||||
reshaped = tf.transpose(inversed)
|
reshaped = tf.transpose(inversed)
|
||||||
if time_crop is None:
|
if time_crop is None:
|
||||||
time_crop = tf.shape(self._features['waveform'])[0]
|
time_crop = tf.shape(self._features['waveform'])[0]
|
||||||
return reshaped[:time_crop, :]
|
return reshaped[self._frame_length:self._frame_length+time_crop, :]
|
||||||
|
|
||||||
def _build_mwf_output_waveform(self):
|
def _build_mwf_output_waveform(self):
|
||||||
""" Perform separation with multichannel Wiener Filtering using Norbert.
|
""" Perform separation with multichannel Wiener Filtering using Norbert.
|
||||||
|
|||||||
@@ -25,33 +25,64 @@ from spleeter.commands import evaluate
|
|||||||
|
|
||||||
from spleeter.utils.configuration import load_configuration
|
from spleeter.utils.configuration import load_configuration
|
||||||
|
|
||||||
res_4stems = { "vocals": {
|
BACKENDS = ["tensorflow", "librosa"]
|
||||||
"SDR": -0.007,
|
TEST_CONFIGURATIONS = {el:el for el in BACKENDS}
|
||||||
"SAR": -19.231,
|
|
||||||
"SIR": -4.528,
|
res_4stems = {
|
||||||
"ISR": 0.000
|
"librosa": {
|
||||||
|
"vocals": {
|
||||||
|
"SDR": -0.007,
|
||||||
|
"SAR": -19.231,
|
||||||
|
"SIR": -4.528,
|
||||||
|
"ISR": 0.000
|
||||||
|
},
|
||||||
|
"drums": {
|
||||||
|
"SDR": -0.071,
|
||||||
|
"SAR": -14.496,
|
||||||
|
"SIR": -4.987,
|
||||||
|
"ISR": 0.001
|
||||||
|
},
|
||||||
|
"bass":{
|
||||||
|
"SDR": -0.001,
|
||||||
|
"SAR": -12.426,
|
||||||
|
"SIR": -7.198,
|
||||||
|
"ISR": -0.001
|
||||||
|
},
|
||||||
|
"other":{
|
||||||
|
"SDR": -1.453,
|
||||||
|
"SAR": -14.899,
|
||||||
|
"SIR": -4.678,
|
||||||
|
"ISR": -0.015
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"drums": {
|
"tensorflow": {
|
||||||
"SDR": -0.071,
|
"vocals": {
|
||||||
"SAR": -14.496,
|
"SDR": 3.25e-05,
|
||||||
"SIR": -4.987,
|
"SAR": -11.153575,
|
||||||
"ISR": 0.001
|
"SIR": -1.3849,
|
||||||
},
|
"ISR": 2.75e-05
|
||||||
"bass":{
|
},
|
||||||
"SDR": -0.001,
|
"drums": {
|
||||||
"SAR": -12.426,
|
"SDR": -0.079505,
|
||||||
"SIR": -7.198,
|
"SAR": -15.7073575,
|
||||||
"ISR": -0.001
|
"SIR": -4.972755,
|
||||||
},
|
"ISR": 0.0013575
|
||||||
"other":{
|
},
|
||||||
"SDR": -1.453,
|
"bass":{
|
||||||
"SAR": -14.899,
|
"SDR": 2.5e-06,
|
||||||
"SIR": -4.678,
|
"SAR": -10.3520575,
|
||||||
"ISR": -0.015
|
"SIR": -4.272325,
|
||||||
|
"ISR": 2.5e-06
|
||||||
|
},
|
||||||
|
"other":{
|
||||||
|
"SDR": -1.359175,
|
||||||
|
"SAR": -14.7076775,
|
||||||
|
"SIR": -4.761505,
|
||||||
|
"ISR": -0.01528
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def generate_fake_eval_dataset(path):
|
def generate_fake_eval_dataset(path):
|
||||||
aa = get_default_audio_adapter()
|
aa = get_default_audio_adapter()
|
||||||
n_songs = 2
|
n_songs = 2
|
||||||
@@ -68,12 +99,18 @@ def generate_fake_eval_dataset(path):
|
|||||||
aa.save(filename, data, fs)
|
aa.save(filename, data, fs)
|
||||||
|
|
||||||
|
|
||||||
def test_evaluate(path="FAKE_MUSDB_DIR"):
|
@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS)
|
||||||
generate_fake_eval_dataset(path)
|
def test_evaluate(backend):
|
||||||
p = create_argument_parser()
|
with TemporaryDirectory() as directory:
|
||||||
arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", path])
|
|
||||||
params = load_configuration(arguments.configuration)
|
generate_fake_eval_dataset(directory)
|
||||||
metrics = evaluate.entrypoint(arguments, params)
|
p = create_argument_parser()
|
||||||
for instrument, metric in metrics.items():
|
arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend])
|
||||||
for metric, value in metric.items():
|
params = load_configuration(arguments.configuration)
|
||||||
assert np.allclose(np.median(value), res_4stems[instrument][metric], atol=1e-3)
|
metrics = evaluate.entrypoint(arguments, params)
|
||||||
|
for instrument, metric in metrics.items():
|
||||||
|
for metric, value in metric.items():
|
||||||
|
assert np.allclose(np.median(value), res_4stems[backend][instrument][metric], atol=1e-3)
|
||||||
|
|
||||||
|
|
||||||
|
# test_evaluate("tensorflow")
|
||||||
Reference in New Issue
Block a user