diff --git a/audio_example_mono.mp3 b/audio_example_mono.mp3 new file mode 100644 index 0000000..7b134ac Binary files /dev/null and b/audio_example_mono.mp3 differ diff --git a/requirements.txt b/requirements.txt index 1fd76f7..c728b8c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ importlib_resources; python_version<'3.7' requests setuptools>=41.0.0 pandas==0.25.1 -tensorflow==1.15 +tensorflow==1.15.2 ffmpeg-python norbert==0.2.1 librosa==0.7.2 \ No newline at end of file diff --git a/setup.py b/setup.py index deb47cf..da31dce 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ __license__ = 'MIT License' project_name = 'spleeter' project_version = '1.5.0' tensorflow_dependency = 'tensorflow' -tensorflow_version = '1.15' +tensorflow_version = '1.15.2' here = path.abspath(path.dirname(__file__)) readme_path = path.join(here, 'README.md') with open(readme_path, 'r') as stream: diff --git a/spleeter/separator.py b/spleeter/separator.py index 174c73f..3488fb5 100644 --- a/spleeter/separator.py +++ b/spleeter/separator.py @@ -123,12 +123,16 @@ class Separator(object): win = hann(N, sym=False) fstft = istft if inverse else stft win_len_arg = {"win_length": None, "length": length} if inverse else {"n_fft": N} - dl, dr = (data[:, :, 0].T, data[:, :, 1].T) if inverse else (data[:, 0], data[:, 1]) - s1 = fstft(dl, hop_length=H, window=win, center=False, **win_len_arg) - s2 = fstft(dr, hop_length=H, window=win, center=False, **win_len_arg) - s1 = np.expand_dims(s1.T, 2-inverse) - s2 = np.expand_dims(s2.T, 2-inverse) - return np.concatenate([s1, s2], axis=2-inverse) + n_channels = data.shape[-1] + out = [] + for c in range(n_channels): + d = data[:, :, c].T if inverse else data[:, c] + s = fstft(d, hop_length=H, window=win, center=False, **win_len_arg) + s = np.expand_dims(s.T, 2-inverse) + out.append(s) + if len(out) == 1: + return out[0] + return np.concatenate(out, axis=2-inverse) def separate_librosa(self, waveform, audio_id): out = {} @@ -140,9 +144,13 @@ class Separator(object): # TODO: fix the logic, build sometimes return, sometimes set attribute outputs = builder.outputs + stft = self.stft(waveform) + if stft.shape[-1] == 1: + stft = np.concatenate([stft, stft], axis=-1) + elif stft.shape[-1] > 2: + stft = stft[:, :2] saver = tf.train.Saver() - stft = self.stft(waveform) with tf.Session() as sess: saver.restore(sess, latest_checkpoint) outputs = sess.run(outputs, feed_dict=input_provider.get_feed_dict(features, stft, audio_id)) diff --git a/tests/test_separator.py b/tests/test_separator.py index 271fdfb..245571d 100644 --- a/tests/test_separator.py +++ b/tests/test_separator.py @@ -8,83 +8,101 @@ __author__ = 'Deezer Research' __license__ = 'MIT License' import filecmp - +import itertools from os.path import splitext, basename, exists, join from tempfile import TemporaryDirectory import pytest import numpy as np +import tensorflow as tf + from spleeter import SpleeterError from spleeter.audio.adapter import get_default_audio_adapter from spleeter.separator import Separator -TEST_AUDIO_DESCRIPTOR = 'audio_example.mp3' -TEST_AUDIO_BASENAME = splitext(basename(TEST_AUDIO_DESCRIPTOR))[0] -TEST_CONFIGURATIONS = [ - ('spleeter:2stems', ('vocals', 'accompaniment'), 'tensorflow'), - ('spleeter:4stems', ('vocals', 'drums', 'bass', 'other'), 'tensorflow'), - ('spleeter:5stems', ('vocals', 'drums', 'bass', 'piano', 'other'), 'tensorflow'), - ('spleeter:2stems', ('vocals', 'accompaniment'), 'librosa'), - ('spleeter:4stems', ('vocals', 'drums', 'bass', 'other'), 'librosa'), - ('spleeter:5stems', ('vocals', 'drums', 'bass', 'piano', 'other'), 'librosa') -] +TEST_AUDIO_DESCRIPTORS = ['audio_example.mp3', 'audio_example_mono.mp3'] +BACKENDS = ["tensorflow", "librosa"] +MODELS = ['spleeter:2stems', 'spleeter:4stems', 'spleeter:5stems'] + +MODEL_TO_INST = { + 'spleeter:2stems': ('vocals', 'accompaniment'), + 'spleeter:4stems': ('vocals', 'drums', 'bass', 'other'), + 'spleeter:5stems': ('vocals', 'drums', 'bass', 'piano', 'other'), +} -@pytest.mark.parametrize('configuration, instruments, backend', TEST_CONFIGURATIONS) -def test_separate(configuration, instruments, backend): +MODELS_AND_TEST_FILES = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS)) +TEST_CONFIGURATIONS = list(itertools.product(TEST_AUDIO_DESCRIPTORS, MODELS, BACKENDS)) + + +print("RUNNING TESTS WITH TF VERSION {}".format(tf.__version__)) + + +@pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) +def test_separate(test_file, configuration, backend): """ Test separation from raw data. """ + tf.reset_default_graph() + instruments = MODEL_TO_INST[configuration] adapter = get_default_audio_adapter() - waveform, _ = adapter.load(TEST_AUDIO_DESCRIPTOR) + waveform, _ = adapter.load(test_file) separator = Separator(configuration, stft_backend=backend) - prediction = separator.separate(waveform, TEST_AUDIO_DESCRIPTOR) + prediction = separator.separate(waveform, test_file) assert len(prediction) == len(instruments) for instrument in instruments: assert instrument in prediction for instrument in instruments: track = prediction[instrument] - assert waveform.shape == track.shape + assert waveform.shape[:-1] == track.shape[:-1] assert not np.allclose(waveform, track) for compared in instruments: if instrument != compared: assert not np.allclose(track, prediction[compared]) -@pytest.mark.parametrize('configuration, instruments, backend', TEST_CONFIGURATIONS) -def test_separate_to_file(configuration, instruments, backend): +@pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) +def test_separate_to_file(test_file, configuration, backend): """ Test file based separation. """ + tf.reset_default_graph() + instruments = MODEL_TO_INST[configuration] separator = Separator(configuration, stft_backend=backend) + name = splitext(basename(test_file))[0] with TemporaryDirectory() as directory: separator.separate_to_file( - TEST_AUDIO_DESCRIPTOR, + test_file, directory) for instrument in instruments: assert exists(join( directory, - '{}/{}.wav'.format(TEST_AUDIO_BASENAME, instrument))) + '{}/{}.wav'.format(name, instrument))) -@pytest.mark.parametrize('configuration, instruments, backend', TEST_CONFIGURATIONS) -def test_filename_format(configuration, instruments, backend): +@pytest.mark.parametrize('test_file, configuration, backend', TEST_CONFIGURATIONS) +def test_filename_format(test_file, configuration, backend): """ Test custom filename format. """ + tf.reset_default_graph() + instruments = MODEL_TO_INST[configuration] separator = Separator(configuration, stft_backend=backend) + name = splitext(basename(test_file))[0] with TemporaryDirectory() as directory: separator.separate_to_file( - TEST_AUDIO_DESCRIPTOR, + test_file, directory, filename_format='export/{filename}/{instrument}.{codec}') for instrument in instruments: assert exists(join( directory, - 'export/{}/{}.wav'.format(TEST_AUDIO_BASENAME, instrument))) + 'export/{}/{}.wav'.format(name, instrument))) -def test_filename_conflict(): +@pytest.mark.parametrize('test_file, configuration', MODELS_AND_TEST_FILES) +def test_filename_conflict(test_file, configuration): """ Test error handling with static pattern. """ - separator = Separator(TEST_CONFIGURATIONS[0][0]) + tf.reset_default_graph() + separator = Separator(configuration) with TemporaryDirectory() as directory: with pytest.raises(SpleeterError): separator.separate_to_file( - TEST_AUDIO_DESCRIPTOR, + test_file, directory, filename_format='I wanna be your lover')