diff --git a/.circleci/config.yml b/.circleci/config.yml index 5e7368a..c8cdce3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -75,7 +75,27 @@ jobs: key: sdist-{{ .Branch }}-{{ checksum "setup.py" }} - run: name: upload to PyPi + # TODO: Infer destination regarding of branch. + # - master => production PyPi + # - development => testing PyPi command: pip install twine && twine upload dist/* + + # ======================================================================================= + # Conda distribution. + # ======================================================================================= + conda-deploy: + docker: + - image: null # TODO: use circle ci python image. + steps: + - run: + name: checkout feedstock + command: git clone https://github.com/conda-forge/spleeter-feedstock + - run: + name: update version + command: sed -i 's///g' + - run: + name: deploy + command: git add recipe && git commit -m "" && git push workflows: version: 2 spleeter-workflow: @@ -91,6 +111,5 @@ workflows: branches: only: - master - - development requires: - sdist \ No newline at end of file diff --git a/setup.py b/setup.py index b9abf78..c4665e6 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ __license__ = 'MIT License' # Default project values. project_name = 'spleeter' -project_version = '1.4.3' +project_version = '1.4.4' device_target = 'cpu' tensorflow_dependency = 'tensorflow' tensorflow_version = '1.14.0' diff --git a/spleeter/commands/__init__.py b/spleeter/commands/__init__.py index 331ee2d..6428d6d 100644 --- a/spleeter/commands/__init__.py +++ b/spleeter/commands/__init__.py @@ -13,31 +13,31 @@ __email__ = 'research@deezer.com' __author__ = 'Deezer Research' __license__ = 'MIT License' -# -i opt specification. +# -i opt specification (separate). OPT_INPUT = { - 'dest': 'audio_filenames', + 'dest': 'inputs', 'nargs': '+', 'help': 'List of input audio filenames', 'required': True } -# -o opt specification. +# -o opt specification (evaluate and separate). OPT_OUTPUT = { 'dest': 'output_path', 'default': join(gettempdir(), 'separated_audio'), 'help': 'Path of the output directory to write audio files in' } -# -p opt specification. +# -p opt specification (train, evaluate and separate). OPT_PARAMS = { - 'dest': 'params_filename', + 'dest': 'configuration', 'default': 'spleeter:2stems', 'type': str, 'action': 'store', 'help': 'JSON filename that contains params' } -# -n opt specification. +# -n opt specification (separate). OPT_OUTPUT_NAMING = { 'dest': 'output_naming', 'default': 'filename', @@ -54,26 +54,42 @@ OPT_OUTPUT_NAMING = { ', /audio/.wav)') } +# -s opt specification (separate). +OPT_OFFSET = { + 'dest': 'offset', + 'type': float, + 'default': 0., + 'help': 'Set the starting offset to separate audio from.' +} + # -d opt specification (separate). OPT_DURATION = { - 'dest': 'max_duration', + 'dest': 'duration', 'type': float, 'default': 600., 'help': ( 'Set a maximum duration for processing audio ' - '(only separate max_duration first seconds of ' + '(only separate offset + duration first seconds of ' 'the input file)') } -# -c opt specification. +# -c opt specification (separate). OPT_CODEC = { - 'dest': 'audio_codec', + 'dest': 'codec', 'choices': ('wav', 'mp3', 'ogg', 'm4a', 'wma', 'flac'), 'default': 'wav', 'help': 'Audio codec to be used for the separated output' } -# -m opt specification. +# -b opt specification (separate). +OPT_BITRATE = { + 'dest': 'bitrate', + 'type': int, + 'default': '128k', + 'help': 'Audio bitrate to be used for the separated output' +} + +# -m opt specification (evaluate and separate). OPT_MWF = { 'dest': 'MWF', 'action': 'store_const', @@ -82,7 +98,7 @@ OPT_MWF = { 'help': 'Whether to use multichannel Wiener filtering for separation', } -# --mus_dir opt specification. +# --mus_dir opt specification (evaluate). OPT_MUSDB = { 'dest': 'mus_dir', 'type': str, @@ -98,14 +114,14 @@ OPT_DATA = { 'help': 'Path of the folder containing audio data for training' } -# -a opt specification. +# -a opt specification (train, evaluate and separate). OPT_ADAPTER = { 'dest': 'audio_adapter', 'type': str, 'help': 'Name of the audio adapter to use for audio I/O' } -# -a opt specification. +# -a opt specification (train, evaluate and separate). OPT_VERBOSE = { 'action': 'store_true', 'help': 'Shows verbose logs' @@ -158,11 +174,13 @@ def _create_separate_parser(parser_factory): """ parser = parser_factory('separate', help='Separate audio files') _add_common_options(parser) - parser.add_argument('-i', '--audio_filenames', **OPT_INPUT) + parser.add_argument('-i', '--inputs', **OPT_INPUT) parser.add_argument('-o', '--output_path', **OPT_OUTPUT) parser.add_argument('-n', '--output_naming', **OPT_OUTPUT_NAMING) - parser.add_argument('-d', '--max_duration', **OPT_DURATION) - parser.add_argument('-c', '--audio_codec', **OPT_CODEC) + parser.add_ + parser.add_argument('-d', '--duration', **OPT_DURATION) + parser.add_argument('-c', '--codec', **OPT_CODEC) + parser.add_argument('-b', '--birate', **OPT_BITRATE) parser.add_argument('-m', '--mwf', **OPT_MWF) return parser diff --git a/spleeter/commands/separate.py b/spleeter/commands/separate.py index 10da43c..df88229 100644 --- a/spleeter/commands/separate.py +++ b/spleeter/commands/separate.py @@ -22,6 +22,7 @@ import numpy as np from ..audio.adapter import get_audio_adapter from ..audio.convertor import to_n_channels +from ..separator import Separator from ..utils.estimator import create_estimator from ..utils.tensor import set_tensor_shape @@ -30,149 +31,24 @@ __author__ = 'Deezer Research' __license__ = 'MIT License' -def get_dataset(audio_adapter, filenames_and_crops, sample_rate, n_channels): - """" - Build a tensorflow dataset of waveform from a filename list wit crop - information. - - Params: - - audio_adapter: An AudioAdapter instance to load audio from. - - filenames_and_crops: list of (audio_filename, start, duration) - tuples separation is performed on each filaneme - from start (in seconds) to start + duration - (in seconds). - - sample_rate: audio sample_rate of the input and output audio - signals - - n_channels: int, number of channels of the input and output - audio signals - - Returns - A tensorflow dataset of waveform to feed a tensorflow estimator in - predict mode. - """ - filenames, starts, ends = list(zip(*filenames_and_crops)) - dataset = tf.data.Dataset.from_tensor_slices({ - 'audio_id': list(filenames), - 'start': list(starts), - 'end': list(ends) - }) - # Load waveform. - dataset = dataset.map( - lambda sample: dict( - sample, - **audio_adapter.load_tf_waveform( - sample['audio_id'], - sample_rate=sample_rate, - offset=sample['start'], - duration=sample['end'] - sample['start'])), - num_parallel_calls=2) - # Filter out error. - dataset = dataset.filter( - lambda sample: tf.logical_not(sample['waveform_error'])) - # Convert waveform to the right number of channels. - dataset = dataset.map( - lambda sample: dict( - sample, - waveform=to_n_channels(sample['waveform'], n_channels))) - # Set number of channels (required for the model). - dataset = dataset.map( - lambda sample: dict( - sample, - waveform=set_tensor_shape(sample['waveform'], (None, n_channels)))) - return dataset - - -def process_audio( - audio_adapter, - filenames_and_crops, estimator, output_path, - sample_rate, n_channels, codec, output_naming): - """ - Perform separation on a list of audio ids. - - Params: - - audio_adapter: Audio adapter to use for audio I/O. - - filenames_and_crops: list of (audio_filename, start, duration) - tuples separation is performed on each filaneme - from start (in seconds) to start + duration - (in seconds). - - estimator: the tensorflow estimator that performs the - source separation. - - output_path: output_path where to export separated files. - - sample_rate: audio sample_rate of the input and output audio - signals - - n_channels: int, number of channels of the input and output - audio signals - - codec: string codec to be used for export (could be - "wav", "mp3", "ogg", "m4a") could be anything - supported by ffmpeg. - - output_naming: string (= "filename" of "directory") - naming convention for output. - for an input file /path/to/audio/input_file.wav: - * if output_naming is equal to "filename": - output files will be put in the directory /input_file - (/input_file/., - /input_file/....). - * if output_naming is equal to "directory": - output files will be put in the directory /audio/ - (/audio/., - /audio/....) - Use "directory" when separating the MusDB dataset. - - """ - # Get estimator - prediction = estimator.predict( - lambda: get_dataset( - audio_adapter, - filenames_and_crops, - sample_rate, - n_channels), - yield_single_examples=False) - # initialize pool for audio export - pool = Pool(16) - for sample in prediction: - sample_filename = sample.pop('audio_id', 'unknown_filename').decode() - input_directory, input_filename = split(sample_filename) - if output_naming == 'directory': - output_dirname = split(input_directory)[1] - elif output_naming == 'filename': - output_dirname = splitext(input_filename)[0] - else: - raise ValueError(f'Unknown output naming {output_naming}') - for instrument, waveform in sample.items(): - filename = join( - output_path, - output_dirname, - f'{instrument}.{codec}') - pool.apply_async( - audio_adapter.save, - (filename, waveform, sample_rate, codec)) - # Wait for everything to be written - pool.close() - pool.join() - - def entrypoint(arguments, params): """ Command entrypoint. :param arguments: Command line parsed argument as argparse.Namespace. :param params: Deserialized JSON configuration file provided in CLI args. """ + # TODO: check with output naming. audio_adapter = get_audio_adapter(arguments.audio_adapter) - filenames = arguments.audio_filenames - output_path = arguments.output_path - max_duration = arguments.max_duration - audio_codec = arguments.audio_codec - output_naming = arguments.output_naming - estimator = create_estimator(params, arguments.MWF) - filenames_and_crops = [ - (filename, 0., max_duration) - for filename in filenames] - process_audio( - audio_adapter, - filenames_and_crops, - estimator, - output_path, - params['sample_rate'], - params['n_channels'], - codec=audio_codec, - output_naming=output_naming) + separator = Separator(arguments.configuration, arguments.MWF) + for filename in arguments.audio_filenames: + separator.separate_to_file( + filename, + arguments.output_path, + audio_adapter=audio_adapter, + offset=arguments.offset, + duration=arguments.max_duration, + codec=arguments.codec, + bitrate=arguments.bitrate, + synchronous=False + ) + separator.join()