From 1991b222e28ab0afe0c863e249763cc524786682 Mon Sep 17 00:00:00 2001 From: Faylixe Date: Mon, 7 Dec 2020 20:21:05 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20=20typer=20finalization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spleeter/__main__.py | 265 ++++++++++++++++++++-------------- spleeter/commands/__init__.py | 197 ------------------------- spleeter/commands/evaluate.py | 1 - spleeter/options.py | 123 ++++++++++++++++ 4 files changed, 281 insertions(+), 305 deletions(-) delete mode 100644 spleeter/commands/__init__.py create mode 100644 spleeter/options.py diff --git a/spleeter/__main__.py b/spleeter/__main__.py index d60af24..0c49505 100644 --- a/spleeter/__main__.py +++ b/spleeter/__main__.py @@ -3,143 +3,194 @@ """ TO DOCUMENT """ + +from functools import partial from pathlib import Path -from os.path import join -from spleeter.separator import STFTBackend -from tempfile import gettempdir from typing import List from .audio import Codec from .audio.adapter import AudioAdapter +from .options import * +from .dataset import get_training_dataset, get_validation_dataset +from .model import model_fn +from .model.provider import ModelProvider from .separator import Separator +from .utils.configuration import load_configuration +from .utils.logging import get_logger + # pyright: reportMissingImports=false # pylint: disable=import-error -from typer import Argument, Option, Typer -from typer.models import OptionInfo +import tensorflow as tf + +from typer import Exit, Typer # pylint: enable=import-error spleeter: Typer = Typer() """ """ -AudioOutput: OptionInfo = Option( - join(gettempdir(), 'separated_audio'), - help='Path of the output directory to write audio files in') - -AudioSTFTBackend: OptionInfo = Option( - STFTBackend.AUTO, - '--stft-backend', - '-B', - case_sensitive=False, - help=( - 'Who should be in charge of computing the stfts. Librosa is faster ' - 'than tensorflow on CPU and uses less memory. "auto" will use ' - 'tensorflow when GPU acceleration is available and librosa when not')) - -AudioAdapterDescriptor: OptionInfo = Option( - 'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter', - help='Name of the audio adapter to use for audio I/O') - -MWF: OptionInfo = Option( - False, - '--mwf', - help='Whether to use multichannel Wiener filtering for separation') - -ModelParameters: OptionInfo = Option( - 'spleeter:2stems', - help='JSON filename that contains params') - -Verbose: OptionInfo = Option( - False, - '--verbose', - help='Enable verbose logs') - @spleeter.command() def train( - adapter=None, - verbose: bool = Verbose, - params_filename: str = ModelParameters, - data: Path = Option( - ..., - exists=True, - dir_okay=True, - file_okay=False, - readable=True, - resolve_path=True, - help='Path of the folder containing audio data for training') - ) -> None: + adapter: str = AudioAdapterOption, + data: Path = TrainingDataDirectoryOption, + params_filename: str = ModelParametersOption, + verbose: bool = VerboseOption) -> None: """ Train a source separation model """ - pass + # TODO: try / catch or custom decorator for function handling. + # TODO: handle verbose flag ? + audio_adapter = AudioAdapter.get(adapter) + audio_path = str(data) + params = load_configuration(params_filename) + session_config = tf.compat.v1.ConfigProto() + session_config.gpu_options.per_process_gpu_memory_fraction = 0.45 + estimator = tf.estimator.Estimator( + model_fn=model_fn, + model_dir=params['model_dir'], + params=params, + config=tf.estimator.RunConfig( + save_checkpoints_steps=params['save_checkpoints_steps'], + tf_random_seed=params['random_seed'], + save_summary_steps=params['save_summary_steps'], + session_config=session_config, + log_step_count_steps=10, + keep_checkpoint_max=2)) + input_fn = partial(get_training_dataset, params, audio_adapter, audio_path) + train_spec = tf.estimator.TrainSpec( + input_fn=input_fn, + max_steps=params['train_max_steps']) + input_fn = partial( + get_validation_dataset, + params, + audio_adapter, + audio_path) + evaluation_spec = tf.estimator.EvalSpec( + input_fn=input_fn, + steps=None, + throttle_secs=params['throttle_secs']) + get_logger().info('Start model training') + tf.estimator.train_and_evaluate(estimator, train_spec, evaluation_spec) + ModelProvider.writeProbe(params['model_dir']) + get_logger().info('Model training done') + +_SPLIT = 'test' +_MIXTURE = 'mixture.wav' +_AUDIO_DIRECTORY = 'audio' +_METRICS_DIRECTORY = 'metrics' +_INSTRUMENTS = ('vocals', 'drums', 'bass', 'other') +_METRICS = ('SDR', 'SAR', 'SIR', 'ISR') + + +def _compute_musdb_metrics( + arguments, + musdb_root_directory, + audio_output_directory): + """ Generates musdb metrics fro previsouly computed audio estimation. + + :param arguments: Entrypoint arguments. + :param audio_output_directory: Directory to get audio estimation from. + :returns: Path of generated metrics directory. + """ + metrics_output_directory = join( + arguments.output_path, + _METRICS_DIRECTORY) + get_logger().info('Starting musdb evaluation (this could be long) ...') + try: + import musdb + import museval + except ImportError: + logger = get_logger() + logger.error('Extra dependencies musdb and museval not found') + logger.error('Please install musdb and museval first, abort') + raise Exit(10) + dataset = musdb.DB( + root=musdb_root_directory, + is_wav=True, + subsets=[_SPLIT]) + museval.eval_mus_dir( + dataset=dataset, + estimates_dir=audio_output_directory, + output_dir=metrics_output_directory) + get_logger().info('musdb evaluation done') + return metrics_output_directory + + +def _compile_metrics(metrics_output_directory): + """ Compiles metrics from given directory and returns + results as dict. + + :param metrics_output_directory: Directory to get metrics from. + :returns: Compiled metrics as dict. + """ + songs = glob(join(metrics_output_directory, 'test/*.json')) + index = pd.MultiIndex.from_tuples( + product(_INSTRUMENTS, _METRICS), + names=['instrument', 'metric']) + pd.DataFrame([], index=['config1', 'config2'], columns=index) + metrics = { + instrument: {k: [] for k in _METRICS} + for instrument in _INSTRUMENTS} + for song in songs: + with open(song, 'r') as stream: + data = json.load(stream) + for target in data['targets']: + instrument = target['name'] + for metric in _METRICS: + sdr_med = np.median([ + frame['metrics'][metric] + for frame in target['frames'] + if not np.isnan(frame['metrics'][metric])]) + metrics[instrument][metric].append(sdr_med) + return metrics @spleeter.command() def evaluate( - adapter: str = AudioAdapterDescriptor, - output_path: Path = AudioOutput, - stft_backend: STFTBackend = AudioSTFTBackend, - params_filename: str = ModelParameters, - mwf: bool = MWF, - verbose: bool = Verbose, - mus_dir: Path = Option( - ..., - '--mus_dir', - exists=True, - dir_okay=True, - file_okay=False, - readable=True, - resolve_path=True, - help='Path to musDB dataset directory') - ) -> None: + adapter: str = AudioAdapterOption, + output_path: Path = AudioAdapterOption, + stft_backend: STFTBackend = AudioSTFTBackendOption, + params_filename: str = ModelParametersOption, + mus_dir: Path = MUSDBDirectoryOption, + mwf: bool = MWFOption, + verbose: bool = VerboseOption) -> None: """ Evaluate a model on the musDB test dataset """ - pass + # Separate musdb sources. + audio_output_directory = _separate_evaluation_dataset( + arguments, + mus_dir, + params) + # Compute metrics with musdb. + metrics_output_directory = _compute_musdb_metrics( + arguments, + mus_dir, + audio_output_directory) + # Compute and pretty print median metrics. + metrics = _compile_metrics(metrics_output_directory) + for instrument, metric in metrics.items(): + get_logger().info('%s:', instrument) + for metric, value in metric.items(): + get_logger().info('%s: %s', metric, f'{np.median(value):.3f}') + return metrics @spleeter.commmand() def separate( - adapter: str = AudioAdapterDescriptor, - output_path: Path = AudioOutput, - stft_backend: STFTBackend = AudioSTFTBackend, - params_filename: str = ModelParameters, - mwf: bool = MWF, - verbose: bool = Verbose, - files: List[Path] = Argument( - ..., - help='List of input audio file path', - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - resolve_path=True), - filename_format: str = Option( - '{filename}/{instrument}.{codec}', - help=( - 'Template string that will be formatted to generated' - 'output filename. Such template should be Python formattable' - 'string, and could use {filename}, {instrument}, and {codec}' - 'variables')), - duration: float = Option( - 600., - help=( - 'Set a maximum duration for processing audio ' - '(only separate offset + duration first seconds of ' - 'the input file)')), - offset: float = Option( - 0., - '--offset', - '-s', - help='Set the starting offset to separate audio from'), - codec: Codec = Option( - Codec.WAV, - help='Audio codec to be used for the separated output'), - bitrate: str = Option( - '128k', - help='Audio bitrate to be used for the separated output') - ) -> None: + adapter: str = AudioAdapterOption, + bitrate: str = AudioBitrateOption, + codec: Codec = AudioCodecOption, + duration: float = AudioDurationOption, + files: List[Path] = AudioInputArgument, + offset: float = AudioOffsetOption, + output_path: Path = AudioAdapterOption, + stft_backend: STFTBackend = AudioSTFTBackendOption, + filename_format: str = FilenameFormatOption, + params_filename: str = ModelParametersOption, + mwf: bool = MWFOption, + verbose: bool = VerboseOption) -> None: """ Separate audio file(s) """ @@ -153,7 +204,7 @@ def separate( audio_adapter: AudioAdapter = AudioAdapter.get(adapter) separator: Separator = Separator( params_filename, - MWF=MWF, + MWF=mwf, stft_backend=stft_backend) for filename in files: separator.separate_to_file( diff --git a/spleeter/commands/__init__.py b/spleeter/commands/__init__.py deleted file mode 100644 index db7d172..0000000 --- a/spleeter/commands/__init__.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env python -# coding: utf8 - -""" This modules provides spleeter command as well as CLI parsing methods. """ - -from tempfile import gettempdir -from os.path import join - -from ..separator import STFTBackend -from ..audio import Codec - -from typer import Argument, Option -from typer.models import ArgumentInfo, OptionInfo - -__email__ = 'spleeter@deezer.com' -__author__ = 'Deezer Research' -__license__ = 'MIT License' - -AudioInput: ArgumentInfo = Argument( - ..., - help='List of input audio file path', - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - resolve_path=True) - -AudioOutput: OptionInfo = Option( - join(gettempdir(), 'separated_audio'), - '--output_path', - '-o', - help='Path of the output directory to write audio files in') - -AudioOffset: OptionInfo = Option( - 0., - '--offset', - '-s', - help='Set the starting offset to separate audio from') - -AudioDuration: OptionInfo = Option( - 600., - '--duration', - '-d', - help=( - 'Set a maximum duration for processing audio ' - '(only separate offset + duration first seconds of ' - 'the input file)')) - -FilenameFormat: OptionInfo = Option( - '{filename}/{instrument}.{codec}', - '--filename_format', - '-f', - help=( - 'Template string that will be formatted to generated' - 'output filename. Such template should be Python formattable' - 'string, and could use {filename}, {instrument}, and {codec}' - 'variables')) - -ModelParameters: OptionInfo = Option( - 'spleeter:2stems', - '--params_filename', - '-p', - help='JSON filename that contains params') - - -AudioSTFTBackend: OptionInfo = Option( - STFTBackend.AUTO, - '--stft-backend', - '-B', - case_sensitive=False, - help=( - 'Who should be in charge of computing the stfts. Librosa is faster ' - 'than tensorflow on CPU and uses less memory. "auto" will use ' - 'tensorflow when GPU acceleration is available and librosa when not')) - -AudioCodec: OptionInfo = Option( - Codec.WAV, - '--codec', - '-c', - help='Audio codec to be used for the separated output') - -AudioBitrate: OptionInfo = Option( - '128k', - '--bitrate', - '-b', - help='Audio bitrate to be used for the separated output') - -MWF: OptionInfo = Option( - False, - '--mwf', - help='Whether to use multichannel Wiener filtering for separation') - -MUSDBDirectory: OptionInfo = Option( - ..., - '--mus_dir', - exists=True, - dir_okay=True, - file_okay=False, - readable=True, - resolve_path=True, - help='Path to musDB dataset directory') - -TrainingDataDirectory: OptionInfo = Option( - ..., - '--data', - '-d', - exists=True, - dir_okay=True, - file_okay=False, - readable=True, - resolve_path=True, - help='Path of the folder containing audio data for training') - -AudioAdapter: OptionInfo = Option( - 'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter', - '--adapter', - '-a', - help='Name of the audio adapter to use for audio I/O') - -Verbose: OptionInfo = Option( - False, - '--verbose', - help='Enable verbose logs') - - -def _add_common_options(parser): - """ Add common option to the given parser. - - :param parser: Parser to add common opt to. - """ - parser.add_argument('-a', '--adapter', **OPT_ADAPTER) - parser.add_argument('-p', '--params_filename', **OPT_PARAMS) - parser.add_argument('--verbose', **OPT_VERBOSE) - - -def _create_train_parser(parser_factory): - """ Creates an argparser for training command - - :param parser_factory: Factory to use to create parser instance. - :returns: Created and configured parser. - """ - parser = parser_factory('train', help='Train a source separation model') - _add_common_options(parser) - parser.add_argument('-d', '--data', **OPT_DATA) - return parser - - -def _create_evaluate_parser(parser_factory): - """ Creates an argparser for evaluation command - - :param parser_factory: Factory to use to create parser instance. - :returns: Created and configured parser. - """ - parser = parser_factory( - 'evaluate', - help='Evaluate a model on the musDB test dataset') - _add_common_options(parser) - parser.add_argument('-o', '--output_path', **OPT_OUTPUT) - parser.add_argument('--mus_dir', **OPT_MUSDB) - parser.add_argument('-m', '--mwf', **OPT_MWF) - parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND) - return parser - - -def _create_separate_parser(parser_factory): - """ Creates an argparser for separation command - - :param parser_factory: Factory to use to create parser instance. - :returns: Created and configured parser. - """ - parser = parser_factory('separate', help='Separate audio files') - _add_common_options(parser) - parser.add_argument('-i', '--inputs', **OPT_INPUT) - parser.add_argument('-o', '--output_path', **OPT_OUTPUT) - parser.add_argument('-f', '--filename_format', **OPT_FORMAT) - parser.add_argument('-d', '--duration', **OPT_DURATION) - parser.add_argument('-s', '--offset', **OPT_OFFSET) - parser.add_argument('-c', '--codec', **OPT_CODEC) - parser.add_argument('-b', '--birate', **OPT_BITRATE) - parser.add_argument('-m', '--mwf', **OPT_MWF) - parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND) - return parser - - -def create_argument_parser(): - """ Creates overall command line parser for Spleeter. - - :returns: Created argument parser. - """ - parser = ArgumentParser(prog='spleeter') - subparsers = parser.add_subparsers() - subparsers.dest = 'command' - subparsers.required = True - _create_separate_parser(subparsers.add_parser) - _create_train_parser(subparsers.add_parser) - _create_evaluate_parser(subparsers.add_parser) - return parser diff --git a/spleeter/commands/evaluate.py b/spleeter/commands/evaluate.py index 93e0990..d6cf7d6 100644 --- a/spleeter/commands/evaluate.py +++ b/spleeter/commands/evaluate.py @@ -163,5 +163,4 @@ def entrypoint(arguments, params): get_logger().info('%s:', instrument) for metric, value in metric.items(): get_logger().info('%s: %s', metric, f'{np.median(value):.3f}') - return metrics diff --git a/spleeter/options.py b/spleeter/options.py new file mode 100644 index 0000000..d5c89db --- /dev/null +++ b/spleeter/options.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# coding: utf8 + +""" This modules provides spleeter command as well as CLI parsing methods. """ + +from tempfile import gettempdir +from os.path import join + +from .separator import STFTBackend +from .audio import Codec + +from typer import Argument, Option +from typer.models import ArgumentInfo, OptionInfo + +__email__ = 'spleeter@deezer.com' +__author__ = 'Deezer Research' +__license__ = 'MIT License' + +AudioInputArgument: ArgumentInfo = Argument( + ..., + help='List of input audio file path', + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + resolve_path=True) + +AudioAdapterOption: OptionInfo = Option( + 'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter', + '--adapter', + '-a', + help='Name of the audio adapter to use for audio I/O') + +AudioOutputOption: OptionInfo = Option( + join(gettempdir(), 'separated_audio'), + '--output_path', + '-o', + help='Path of the output directory to write audio files in') + +AudioOffsetOption: OptionInfo = Option( + 0., + '--offset', + '-s', + help='Set the starting offset to separate audio from') + +AudioDurationOption: OptionInfo = Option( + 600., + '--duration', + '-d', + help=( + 'Set a maximum duration for processing audio ' + '(only separate offset + duration first seconds of ' + 'the input file)')) + +AudioSTFTBackendOption: OptionInfo = Option( + STFTBackend.AUTO, + '--stft-backend', + '-B', + case_sensitive=False, + help=( + 'Who should be in charge of computing the stfts. Librosa is faster ' + 'than tensorflow on CPU and uses less memory. "auto" will use ' + 'tensorflow when GPU acceleration is available and librosa when not')) + +AudioCodecOption: OptionInfo = Option( + Codec.WAV, + '--codec', + '-c', + help='Audio codec to be used for the separated output') + +AudioBitrateOption: OptionInfo = Option( + '128k', + '--bitrate', + '-b', + help='Audio bitrate to be used for the separated output') + +FilenameFormatOption: OptionInfo = Option( + '{filename}/{instrument}.{codec}', + '--filename_format', + '-f', + help=( + 'Template string that will be formatted to generated' + 'output filename. Such template should be Python formattable' + 'string, and could use {filename}, {instrument}, and {codec}' + 'variables')) + +ModelParametersOption: OptionInfo = Option( + 'spleeter:2stems', + '--params_filename', + '-p', + help='JSON filename that contains params') + + +MWFOption: OptionInfo = Option( + False, + '--mwf', + help='Whether to use multichannel Wiener filtering for separation') + +MUSDBDirectoryOption: OptionInfo = Option( + ..., + '--mus_dir', + exists=True, + dir_okay=True, + file_okay=False, + readable=True, + resolve_path=True, + help='Path to musDB dataset directory') + +TrainingDataDirectoryOption: OptionInfo = Option( + ..., + '--data', + '-d', + exists=True, + dir_okay=True, + file_okay=False, + readable=True, + resolve_path=True, + help='Path of the folder containing audio data for training') + +VerboseOption: OptionInfo = Option( + False, + '--verbose', + help='Enable verbose logs')