🚧 typer finalization

2026-01-06 04:32:43 +00:00 · 2020-12-07 20:21:05 +01:00
parent 4b1f1e192c
commit 1991b222e2
4 changed files with 281 additions and 305 deletions
--- a/spleeter/main.py
+++ b/spleeter/main.py
@@ -3,143 +3,194 @@

 """ TO DOCUMENT """

+
+from functools import partial
 from pathlib import Path
-from os.path import join
-from spleeter.separator import STFTBackend
-from tempfile import gettempdir
 from typing import List

 from .audio import Codec
 from .audio.adapter import AudioAdapter
+from .options import *
+from .dataset import get_training_dataset, get_validation_dataset
+from .model import model_fn
+from .model.provider import ModelProvider
 from .separator import Separator
+from .utils.configuration import load_configuration
+from .utils.logging import get_logger
+

 # pyright: reportMissingImports=false
 # pylint: disable=import-error
-from typer import Argument, Option, Typer
-from typer.models import OptionInfo
+import tensorflow as tf
+
+from typer import Exit, Typer
 # pylint: enable=import-error

 spleeter: Typer = Typer()
 """ """

-AudioOutput: OptionInfo = Option(
-    join(gettempdir(), 'separated_audio'),
-    help='Path of the output directory to write audio files in')
-
-AudioSTFTBackend: OptionInfo = Option(
-    STFTBackend.AUTO,
-    '--stft-backend',
-    '-B',
-    case_sensitive=False,
-    help=(
-        'Who should be in charge of computing the stfts. Librosa is faster '
-        'than tensorflow on CPU and uses  less memory. "auto" will use '
-        'tensorflow when GPU acceleration is available and librosa when not'))
-
-AudioAdapterDescriptor: OptionInfo = Option(
-    'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter',
-    help='Name of the audio adapter to use for audio I/O')
-
-MWF: OptionInfo = Option(
-    False,
-    '--mwf',
-    help='Whether to use multichannel Wiener filtering for separation')
-
-ModelParameters: OptionInfo = Option(
-    'spleeter:2stems',
-    help='JSON filename that contains params')
-
-Verbose: OptionInfo = Option(
-    False,
-    '--verbose',
-    help='Enable verbose logs')
-

@spleeter.command()
 def train(
-        adapter=None,
-        verbose: bool = Verbose,
-        params_filename: str = ModelParameters,
-        data: Path = Option(
-            ...,
-            exists=True,
-            dir_okay=True,
-            file_okay=False,
-            readable=True,
-            resolve_path=True,
-            help='Path of the folder containing audio data for training')
-        ) -> None:
+        adapter: str = AudioAdapterOption,
+        data: Path = TrainingDataDirectoryOption,
+        params_filename: str = ModelParametersOption,
+        verbose: bool = VerboseOption) -> None:
    """
        Train a source separation model
    """
-    pass
+    # TODO: try / catch or custom decorator for function handling.
+    # TODO: handle verbose flag ?
+    audio_adapter = AudioAdapter.get(adapter)
+    audio_path = str(data)
+    params = load_configuration(params_filename)
+    session_config = tf.compat.v1.ConfigProto()
+    session_config.gpu_options.per_process_gpu_memory_fraction = 0.45
+    estimator = tf.estimator.Estimator(
+        model_fn=model_fn,
+        model_dir=params['model_dir'],
+        params=params,
+        config=tf.estimator.RunConfig(
+            save_checkpoints_steps=params['save_checkpoints_steps'],
+            tf_random_seed=params['random_seed'],
+            save_summary_steps=params['save_summary_steps'],
+            session_config=session_config,
+            log_step_count_steps=10,
+            keep_checkpoint_max=2))
+    input_fn = partial(get_training_dataset, params, audio_adapter, audio_path)
+    train_spec = tf.estimator.TrainSpec(
+        input_fn=input_fn,
+        max_steps=params['train_max_steps'])
+    input_fn = partial(
+        get_validation_dataset,
+        params,
+        audio_adapter,
+        audio_path)
+    evaluation_spec = tf.estimator.EvalSpec(
+        input_fn=input_fn,
+        steps=None,
+        throttle_secs=params['throttle_secs'])
+    get_logger().info('Start model training')
+    tf.estimator.train_and_evaluate(estimator, train_spec, evaluation_spec)
+    ModelProvider.writeProbe(params['model_dir'])
+    get_logger().info('Model training done')
+
+_SPLIT = 'test'
+_MIXTURE = 'mixture.wav'
+_AUDIO_DIRECTORY = 'audio'
+_METRICS_DIRECTORY = 'metrics'
+_INSTRUMENTS = ('vocals', 'drums', 'bass', 'other')
+_METRICS = ('SDR', 'SAR', 'SIR', 'ISR')
+
+
+def _compute_musdb_metrics(
+        arguments,
+        musdb_root_directory,
+        audio_output_directory):
+    """ Generates musdb metrics fro previsouly computed audio estimation.
+
+    :param arguments: Entrypoint arguments.
+    :param audio_output_directory: Directory to get audio estimation from.
+    :returns: Path of generated metrics directory.
+    """
+    metrics_output_directory = join(
+        arguments.output_path,
+        _METRICS_DIRECTORY)
+    get_logger().info('Starting musdb evaluation (this could be long) ...')
+    try:
+        import musdb
+        import museval
+    except ImportError:
+        logger = get_logger()
+        logger.error('Extra dependencies musdb and museval not found')
+        logger.error('Please install musdb and museval first, abort')
+        raise Exit(10)
+    dataset = musdb.DB(
+        root=musdb_root_directory,
+        is_wav=True,
+        subsets=[_SPLIT])
+    museval.eval_mus_dir(
+        dataset=dataset,
+        estimates_dir=audio_output_directory,
+        output_dir=metrics_output_directory)
+    get_logger().info('musdb evaluation done')
+    return metrics_output_directory
+
+
+def _compile_metrics(metrics_output_directory):
+    """ Compiles metrics from given directory and returns
+    results as dict.
+
+    :param metrics_output_directory: Directory to get metrics from.
+    :returns: Compiled metrics as dict.
+    """
+    songs = glob(join(metrics_output_directory, 'test/*.json'))
+    index = pd.MultiIndex.from_tuples(
+        product(_INSTRUMENTS, _METRICS),
+        names=['instrument', 'metric'])
+    pd.DataFrame([], index=['config1', 'config2'], columns=index)
+    metrics = {
+        instrument: {k: [] for k in _METRICS}
+        for instrument in _INSTRUMENTS}
+    for song in songs:
+        with open(song, 'r') as stream:
+            data = json.load(stream)
+        for target in data['targets']:
+            instrument = target['name']
+            for metric in _METRICS:
+                sdr_med = np.median([
+                    frame['metrics'][metric]
+                    for frame in target['frames']
+                    if not np.isnan(frame['metrics'][metric])])
+                metrics[instrument][metric].append(sdr_med)
+    return metrics


@spleeter.command()
 def evaluate(
-        adapter: str = AudioAdapterDescriptor,
-        output_path: Path = AudioOutput,
-        stft_backend: STFTBackend = AudioSTFTBackend,
-        params_filename: str = ModelParameters,
-        mwf: bool = MWF,
-        verbose: bool = Verbose,
-        mus_dir: Path = Option(
-            ...,
-            '--mus_dir',
-            exists=True,
-            dir_okay=True,
-            file_okay=False,
-            readable=True,
-            resolve_path=True,
-            help='Path to musDB dataset directory')
-        ) -> None:
+        adapter: str = AudioAdapterOption,
+        output_path: Path = AudioAdapterOption,
+        stft_backend: STFTBackend = AudioSTFTBackendOption,
+        params_filename: str = ModelParametersOption,
+        mus_dir: Path = MUSDBDirectoryOption,
+        mwf: bool = MWFOption,
+        verbose: bool = VerboseOption) -> None:
    """
        Evaluate a model on the musDB test dataset
    """
-    pass
+    # Separate musdb sources.
+    audio_output_directory = _separate_evaluation_dataset(
+        arguments,
+        mus_dir,
+        params)
+   # Compute metrics with musdb.
+    metrics_output_directory = _compute_musdb_metrics(
+        arguments,
+        mus_dir,
+        audio_output_directory)
+    # Compute and pretty print median metrics.
+    metrics = _compile_metrics(metrics_output_directory)
+    for instrument, metric in metrics.items():
+        get_logger().info('%s:', instrument)
+        for metric, value in metric.items():
+            get_logger().info('%s: %s', metric, f'{np.median(value):.3f}')
+    return metrics


@spleeter.commmand()
 def separate(
-        adapter: str = AudioAdapterDescriptor,
-        output_path: Path = AudioOutput,
-        stft_backend: STFTBackend = AudioSTFTBackend,
-        params_filename: str = ModelParameters,
-        mwf: bool = MWF,
-        verbose: bool = Verbose,
-        files: List[Path] = Argument(
-            ...,
-            help='List of input audio file path',
-            exists=True,
-            file_okay=True,
-            dir_okay=False,
-            readable=True,
-            resolve_path=True),
-        filename_format: str = Option(
-            '{filename}/{instrument}.{codec}',
-            help=(
-                'Template string that will be formatted to generated'
-                'output filename. Such template should be Python formattable'
-                'string, and could use {filename}, {instrument}, and {codec}'
-                'variables')),
-        duration: float = Option(
-            600.,
-            help=(
-                'Set a maximum duration for processing audio '
-                '(only separate offset + duration first seconds of '
-                'the input file)')),
-        offset: float = Option(
-            0.,
-            '--offset',
-            '-s',
-            help='Set the starting offset to separate audio from'),
-        codec: Codec = Option(
-            Codec.WAV,
-            help='Audio codec to be used for the separated output'),
-        bitrate: str = Option(
-            '128k',
-            help='Audio bitrate to be used for the separated output')
-        ) -> None:
+        adapter: str = AudioAdapterOption,
+        bitrate: str = AudioBitrateOption,
+        codec: Codec = AudioCodecOption,
+        duration: float = AudioDurationOption,
+        files: List[Path] = AudioInputArgument,
+        offset: float = AudioOffsetOption,
+        output_path: Path = AudioAdapterOption,
+        stft_backend: STFTBackend = AudioSTFTBackendOption,
+        filename_format: str = FilenameFormatOption,
+        params_filename: str = ModelParametersOption,
+        mwf: bool = MWFOption,
+        verbose: bool = VerboseOption) -> None:
    """
        Separate audio file(s)
    """
@@ -153,7 +204,7 @@ def separate(
    audio_adapter: AudioAdapter = AudioAdapter.get(adapter)
    separator: Separator = Separator(
        params_filename,
-        MWF=MWF,
+        MWF=mwf,
        stft_backend=stft_backend)
    for filename in files:
        separator.separate_to_file(
--- a/spleeter/commands/init.py
+++ b/spleeter/commands/init.py
@@ -1,197 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-
-""" This modules provides spleeter command as well as CLI parsing methods. """
-
-from tempfile import gettempdir
-from os.path import join
-
-from ..separator import STFTBackend
-from ..audio import Codec
-
-from typer import Argument, Option
-from typer.models import ArgumentInfo, OptionInfo
-
-__email__ = 'spleeter@deezer.com'
-__author__ = 'Deezer Research'
-__license__ = 'MIT License'
-
-AudioInput: ArgumentInfo = Argument(
-    ...,
-    help='List of input audio file path',
-    exists=True,
-    file_okay=True,
-    dir_okay=False,
-    readable=True,
-    resolve_path=True)
-
-AudioOutput: OptionInfo = Option(
-    join(gettempdir(), 'separated_audio'),
-    '--output_path',
-    '-o',
-    help='Path of the output directory to write audio files in')
-
-AudioOffset: OptionInfo = Option(
-    0.,
-    '--offset',
-    '-s',
-    help='Set the starting offset to separate audio from')
-
-AudioDuration: OptionInfo = Option(
-    600.,
-    '--duration',
-    '-d',
-    help=(
-        'Set a maximum duration for processing audio '
-        '(only separate offset + duration first seconds of '
-        'the input file)'))
-
-FilenameFormat: OptionInfo = Option(
-    '{filename}/{instrument}.{codec}',
-    '--filename_format',
-    '-f',
-    help=(
-        'Template string that will be formatted to generated'
-        'output filename. Such template should be Python formattable'
-        'string, and could use {filename}, {instrument}, and {codec}'
-        'variables'))
-
-ModelParameters: OptionInfo = Option(
-    'spleeter:2stems',
-    '--params_filename',
-    '-p',
-    help='JSON filename that contains params')
-
-
-AudioSTFTBackend: OptionInfo = Option(
-    STFTBackend.AUTO,
-    '--stft-backend',
-    '-B',
-    case_sensitive=False,
-    help=(
-        'Who should be in charge of computing the stfts. Librosa is faster '
-        'than tensorflow on CPU and uses  less memory. "auto" will use '
-        'tensorflow when GPU acceleration is available and librosa when not'))
-
-AudioCodec: OptionInfo = Option(
-    Codec.WAV,
-    '--codec',
-    '-c',
-    help='Audio codec to be used for the separated output')
-
-AudioBitrate: OptionInfo = Option(
-    '128k',
-    '--bitrate',
-    '-b',
-    help='Audio bitrate to be used for the separated output')
-
-MWF: OptionInfo = Option(
-    False,
-    '--mwf',
-    help='Whether to use multichannel Wiener filtering for separation')
-
-MUSDBDirectory: OptionInfo = Option(
-    ...,
-    '--mus_dir',
-    exists=True,
-    dir_okay=True,
-    file_okay=False,
-    readable=True,
-    resolve_path=True,
-    help='Path to musDB dataset directory')
-
-TrainingDataDirectory: OptionInfo = Option(
-    ...,
-    '--data',
-    '-d',
-    exists=True,
-    dir_okay=True,
-    file_okay=False,
-    readable=True,
-    resolve_path=True,
-    help='Path of the folder containing audio data for training')
-
-AudioAdapter: OptionInfo = Option(
-    'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter',
-    '--adapter',
-    '-a',
-    help='Name of the audio adapter to use for audio I/O')
-
-Verbose: OptionInfo = Option(
-    False,
-    '--verbose',
-    help='Enable verbose logs')
-
-
-def _add_common_options(parser):
-    """ Add common option to the given parser.
-
-    :param parser: Parser to add common opt to.
-    """
-    parser.add_argument('-a', '--adapter', **OPT_ADAPTER)
-    parser.add_argument('-p', '--params_filename', **OPT_PARAMS)
-    parser.add_argument('--verbose', **OPT_VERBOSE)
-
-
-def _create_train_parser(parser_factory):
-    """ Creates an argparser for training command
-
-    :param parser_factory: Factory to use to create parser instance.
-    :returns: Created and configured parser.
-    """
-    parser = parser_factory('train', help='Train a source separation model')
-    _add_common_options(parser)
-    parser.add_argument('-d', '--data', **OPT_DATA)
-    return parser
-
-
-def _create_evaluate_parser(parser_factory):
-    """ Creates an argparser for evaluation command
-
-    :param parser_factory: Factory to use to create parser instance.
-    :returns: Created and configured parser.
-    """
-    parser = parser_factory(
-        'evaluate',
-        help='Evaluate a model on the musDB test dataset')
-    _add_common_options(parser)
-    parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
-    parser.add_argument('--mus_dir', **OPT_MUSDB)
-    parser.add_argument('-m', '--mwf', **OPT_MWF)
-    parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND)
-    return parser
-
-
-def _create_separate_parser(parser_factory):
-    """ Creates an argparser for separation command
-
-    :param parser_factory: Factory to use to create parser instance.
-    :returns: Created and configured parser.
-    """
-    parser = parser_factory('separate', help='Separate audio files')
-    _add_common_options(parser)
-    parser.add_argument('-i', '--inputs', **OPT_INPUT)
-    parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
-    parser.add_argument('-f', '--filename_format', **OPT_FORMAT)
-    parser.add_argument('-d', '--duration', **OPT_DURATION)
-    parser.add_argument('-s', '--offset', **OPT_OFFSET)
-    parser.add_argument('-c', '--codec', **OPT_CODEC)
-    parser.add_argument('-b', '--birate', **OPT_BITRATE)
-    parser.add_argument('-m', '--mwf', **OPT_MWF)
-    parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND)
-    return parser
-
-
-def create_argument_parser():
-    """ Creates overall command line parser for Spleeter.
-
-    :returns: Created argument parser.
-    """
-    parser = ArgumentParser(prog='spleeter')
-    subparsers = parser.add_subparsers()
-    subparsers.dest = 'command'
-    subparsers.required = True
-    _create_separate_parser(subparsers.add_parser)
-    _create_train_parser(subparsers.add_parser)
-    _create_evaluate_parser(subparsers.add_parser)
-    return parser
--- a/spleeter/commands/evaluate.py
+++ b/spleeter/commands/evaluate.py
@@ -163,5 +163,4 @@ def entrypoint(arguments, params):
        get_logger().info('%s:', instrument)
        for metric, value in metric.items():
            get_logger().info('%s: %s', metric, f'{np.median(value):.3f}')
-
    return metrics
--- a/spleeter/options.py
+++ b/spleeter/options.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+# coding: utf8
+
+""" This modules provides spleeter command as well as CLI parsing methods. """
+
+from tempfile import gettempdir
+from os.path import join
+
+from .separator import STFTBackend
+from .audio import Codec
+
+from typer import Argument, Option
+from typer.models import ArgumentInfo, OptionInfo
+
+__email__ = 'spleeter@deezer.com'
+__author__ = 'Deezer Research'
+__license__ = 'MIT License'
+
+AudioInputArgument: ArgumentInfo = Argument(
+    ...,
+    help='List of input audio file path',
+    exists=True,
+    file_okay=True,
+    dir_okay=False,
+    readable=True,
+    resolve_path=True)
+
+AudioAdapterOption: OptionInfo = Option(
+    'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter',
+    '--adapter',
+    '-a',
+    help='Name of the audio adapter to use for audio I/O')
+
+AudioOutputOption: OptionInfo = Option(
+    join(gettempdir(), 'separated_audio'),
+    '--output_path',
+    '-o',
+    help='Path of the output directory to write audio files in')
+
+AudioOffsetOption: OptionInfo = Option(
+    0.,
+    '--offset',
+    '-s',
+    help='Set the starting offset to separate audio from')
+
+AudioDurationOption: OptionInfo = Option(
+    600.,
+    '--duration',
+    '-d',
+    help=(
+        'Set a maximum duration for processing audio '
+        '(only separate offset + duration first seconds of '
+        'the input file)'))
+
+AudioSTFTBackendOption: OptionInfo = Option(
+    STFTBackend.AUTO,
+    '--stft-backend',
+    '-B',
+    case_sensitive=False,
+    help=(
+        'Who should be in charge of computing the stfts. Librosa is faster '
+        'than tensorflow on CPU and uses  less memory. "auto" will use '
+        'tensorflow when GPU acceleration is available and librosa when not'))
+
+AudioCodecOption: OptionInfo = Option(
+    Codec.WAV,
+    '--codec',
+    '-c',
+    help='Audio codec to be used for the separated output')
+
+AudioBitrateOption: OptionInfo = Option(
+    '128k',
+    '--bitrate',
+    '-b',
+    help='Audio bitrate to be used for the separated output')
+
+FilenameFormatOption: OptionInfo = Option(
+    '{filename}/{instrument}.{codec}',
+    '--filename_format',
+    '-f',
+    help=(
+        'Template string that will be formatted to generated'
+        'output filename. Such template should be Python formattable'
+        'string, and could use {filename}, {instrument}, and {codec}'
+        'variables'))
+
+ModelParametersOption: OptionInfo = Option(
+    'spleeter:2stems',
+    '--params_filename',
+    '-p',
+    help='JSON filename that contains params')
+
+
+MWFOption: OptionInfo = Option(
+    False,
+    '--mwf',
+    help='Whether to use multichannel Wiener filtering for separation')
+
+MUSDBDirectoryOption: OptionInfo = Option(
+    ...,
+    '--mus_dir',
+    exists=True,
+    dir_okay=True,
+    file_okay=False,
+    readable=True,
+    resolve_path=True,
+    help='Path to musDB dataset directory')
+
+TrainingDataDirectoryOption: OptionInfo = Option(
+    ...,
+    '--data',
+    '-d',
+    exists=True,
+    dir_okay=True,
+    file_okay=False,
+    readable=True,
+    resolve_path=True,
+    help='Path of the folder containing audio data for training')
+
+VerboseOption: OptionInfo = Option(
+    False,
+    '--verbose',
+    help='Enable verbose logs')