🚧 typer finalization

This commit is contained in:
Faylixe
2020-12-07 20:21:05 +01:00
parent 4b1f1e192c
commit 1991b222e2
4 changed files with 281 additions and 305 deletions

View File

@@ -3,143 +3,194 @@
""" TO DOCUMENT """
from functools import partial
from pathlib import Path
from os.path import join
from spleeter.separator import STFTBackend
from tempfile import gettempdir
from typing import List
from .audio import Codec
from .audio.adapter import AudioAdapter
from .options import *
from .dataset import get_training_dataset, get_validation_dataset
from .model import model_fn
from .model.provider import ModelProvider
from .separator import Separator
from .utils.configuration import load_configuration
from .utils.logging import get_logger
# pyright: reportMissingImports=false
# pylint: disable=import-error
from typer import Argument, Option, Typer
from typer.models import OptionInfo
import tensorflow as tf
from typer import Exit, Typer
# pylint: enable=import-error
spleeter: Typer = Typer()
""" """
AudioOutput: OptionInfo = Option(
join(gettempdir(), 'separated_audio'),
help='Path of the output directory to write audio files in')
AudioSTFTBackend: OptionInfo = Option(
STFTBackend.AUTO,
'--stft-backend',
'-B',
case_sensitive=False,
help=(
'Who should be in charge of computing the stfts. Librosa is faster '
'than tensorflow on CPU and uses less memory. "auto" will use '
'tensorflow when GPU acceleration is available and librosa when not'))
AudioAdapterDescriptor: OptionInfo = Option(
'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter',
help='Name of the audio adapter to use for audio I/O')
MWF: OptionInfo = Option(
False,
'--mwf',
help='Whether to use multichannel Wiener filtering for separation')
ModelParameters: OptionInfo = Option(
'spleeter:2stems',
help='JSON filename that contains params')
Verbose: OptionInfo = Option(
False,
'--verbose',
help='Enable verbose logs')
@spleeter.command()
def train(
adapter=None,
verbose: bool = Verbose,
params_filename: str = ModelParameters,
data: Path = Option(
...,
exists=True,
dir_okay=True,
file_okay=False,
readable=True,
resolve_path=True,
help='Path of the folder containing audio data for training')
) -> None:
adapter: str = AudioAdapterOption,
data: Path = TrainingDataDirectoryOption,
params_filename: str = ModelParametersOption,
verbose: bool = VerboseOption) -> None:
"""
Train a source separation model
"""
pass
# TODO: try / catch or custom decorator for function handling.
# TODO: handle verbose flag ?
audio_adapter = AudioAdapter.get(adapter)
audio_path = str(data)
params = load_configuration(params_filename)
session_config = tf.compat.v1.ConfigProto()
session_config.gpu_options.per_process_gpu_memory_fraction = 0.45
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir=params['model_dir'],
params=params,
config=tf.estimator.RunConfig(
save_checkpoints_steps=params['save_checkpoints_steps'],
tf_random_seed=params['random_seed'],
save_summary_steps=params['save_summary_steps'],
session_config=session_config,
log_step_count_steps=10,
keep_checkpoint_max=2))
input_fn = partial(get_training_dataset, params, audio_adapter, audio_path)
train_spec = tf.estimator.TrainSpec(
input_fn=input_fn,
max_steps=params['train_max_steps'])
input_fn = partial(
get_validation_dataset,
params,
audio_adapter,
audio_path)
evaluation_spec = tf.estimator.EvalSpec(
input_fn=input_fn,
steps=None,
throttle_secs=params['throttle_secs'])
get_logger().info('Start model training')
tf.estimator.train_and_evaluate(estimator, train_spec, evaluation_spec)
ModelProvider.writeProbe(params['model_dir'])
get_logger().info('Model training done')
_SPLIT = 'test'
_MIXTURE = 'mixture.wav'
_AUDIO_DIRECTORY = 'audio'
_METRICS_DIRECTORY = 'metrics'
_INSTRUMENTS = ('vocals', 'drums', 'bass', 'other')
_METRICS = ('SDR', 'SAR', 'SIR', 'ISR')
def _compute_musdb_metrics(
arguments,
musdb_root_directory,
audio_output_directory):
""" Generates musdb metrics fro previsouly computed audio estimation.
:param arguments: Entrypoint arguments.
:param audio_output_directory: Directory to get audio estimation from.
:returns: Path of generated metrics directory.
"""
metrics_output_directory = join(
arguments.output_path,
_METRICS_DIRECTORY)
get_logger().info('Starting musdb evaluation (this could be long) ...')
try:
import musdb
import museval
except ImportError:
logger = get_logger()
logger.error('Extra dependencies musdb and museval not found')
logger.error('Please install musdb and museval first, abort')
raise Exit(10)
dataset = musdb.DB(
root=musdb_root_directory,
is_wav=True,
subsets=[_SPLIT])
museval.eval_mus_dir(
dataset=dataset,
estimates_dir=audio_output_directory,
output_dir=metrics_output_directory)
get_logger().info('musdb evaluation done')
return metrics_output_directory
def _compile_metrics(metrics_output_directory):
""" Compiles metrics from given directory and returns
results as dict.
:param metrics_output_directory: Directory to get metrics from.
:returns: Compiled metrics as dict.
"""
songs = glob(join(metrics_output_directory, 'test/*.json'))
index = pd.MultiIndex.from_tuples(
product(_INSTRUMENTS, _METRICS),
names=['instrument', 'metric'])
pd.DataFrame([], index=['config1', 'config2'], columns=index)
metrics = {
instrument: {k: [] for k in _METRICS}
for instrument in _INSTRUMENTS}
for song in songs:
with open(song, 'r') as stream:
data = json.load(stream)
for target in data['targets']:
instrument = target['name']
for metric in _METRICS:
sdr_med = np.median([
frame['metrics'][metric]
for frame in target['frames']
if not np.isnan(frame['metrics'][metric])])
metrics[instrument][metric].append(sdr_med)
return metrics
@spleeter.command()
def evaluate(
adapter: str = AudioAdapterDescriptor,
output_path: Path = AudioOutput,
stft_backend: STFTBackend = AudioSTFTBackend,
params_filename: str = ModelParameters,
mwf: bool = MWF,
verbose: bool = Verbose,
mus_dir: Path = Option(
...,
'--mus_dir',
exists=True,
dir_okay=True,
file_okay=False,
readable=True,
resolve_path=True,
help='Path to musDB dataset directory')
) -> None:
adapter: str = AudioAdapterOption,
output_path: Path = AudioAdapterOption,
stft_backend: STFTBackend = AudioSTFTBackendOption,
params_filename: str = ModelParametersOption,
mus_dir: Path = MUSDBDirectoryOption,
mwf: bool = MWFOption,
verbose: bool = VerboseOption) -> None:
"""
Evaluate a model on the musDB test dataset
"""
pass
# Separate musdb sources.
audio_output_directory = _separate_evaluation_dataset(
arguments,
mus_dir,
params)
# Compute metrics with musdb.
metrics_output_directory = _compute_musdb_metrics(
arguments,
mus_dir,
audio_output_directory)
# Compute and pretty print median metrics.
metrics = _compile_metrics(metrics_output_directory)
for instrument, metric in metrics.items():
get_logger().info('%s:', instrument)
for metric, value in metric.items():
get_logger().info('%s: %s', metric, f'{np.median(value):.3f}')
return metrics
@spleeter.commmand()
def separate(
adapter: str = AudioAdapterDescriptor,
output_path: Path = AudioOutput,
stft_backend: STFTBackend = AudioSTFTBackend,
params_filename: str = ModelParameters,
mwf: bool = MWF,
verbose: bool = Verbose,
files: List[Path] = Argument(
...,
help='List of input audio file path',
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True),
filename_format: str = Option(
'{filename}/{instrument}.{codec}',
help=(
'Template string that will be formatted to generated'
'output filename. Such template should be Python formattable'
'string, and could use {filename}, {instrument}, and {codec}'
'variables')),
duration: float = Option(
600.,
help=(
'Set a maximum duration for processing audio '
'(only separate offset + duration first seconds of '
'the input file)')),
offset: float = Option(
0.,
'--offset',
'-s',
help='Set the starting offset to separate audio from'),
codec: Codec = Option(
Codec.WAV,
help='Audio codec to be used for the separated output'),
bitrate: str = Option(
'128k',
help='Audio bitrate to be used for the separated output')
) -> None:
adapter: str = AudioAdapterOption,
bitrate: str = AudioBitrateOption,
codec: Codec = AudioCodecOption,
duration: float = AudioDurationOption,
files: List[Path] = AudioInputArgument,
offset: float = AudioOffsetOption,
output_path: Path = AudioAdapterOption,
stft_backend: STFTBackend = AudioSTFTBackendOption,
filename_format: str = FilenameFormatOption,
params_filename: str = ModelParametersOption,
mwf: bool = MWFOption,
verbose: bool = VerboseOption) -> None:
"""
Separate audio file(s)
"""
@@ -153,7 +204,7 @@ def separate(
audio_adapter: AudioAdapter = AudioAdapter.get(adapter)
separator: Separator = Separator(
params_filename,
MWF=MWF,
MWF=mwf,
stft_backend=stft_backend)
for filename in files:
separator.separate_to_file(

View File

@@ -1,197 +0,0 @@
#!/usr/bin/env python
# coding: utf8
""" This modules provides spleeter command as well as CLI parsing methods. """
from tempfile import gettempdir
from os.path import join
from ..separator import STFTBackend
from ..audio import Codec
from typer import Argument, Option
from typer.models import ArgumentInfo, OptionInfo
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
AudioInput: ArgumentInfo = Argument(
...,
help='List of input audio file path',
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True)
AudioOutput: OptionInfo = Option(
join(gettempdir(), 'separated_audio'),
'--output_path',
'-o',
help='Path of the output directory to write audio files in')
AudioOffset: OptionInfo = Option(
0.,
'--offset',
'-s',
help='Set the starting offset to separate audio from')
AudioDuration: OptionInfo = Option(
600.,
'--duration',
'-d',
help=(
'Set a maximum duration for processing audio '
'(only separate offset + duration first seconds of '
'the input file)'))
FilenameFormat: OptionInfo = Option(
'{filename}/{instrument}.{codec}',
'--filename_format',
'-f',
help=(
'Template string that will be formatted to generated'
'output filename. Such template should be Python formattable'
'string, and could use {filename}, {instrument}, and {codec}'
'variables'))
ModelParameters: OptionInfo = Option(
'spleeter:2stems',
'--params_filename',
'-p',
help='JSON filename that contains params')
AudioSTFTBackend: OptionInfo = Option(
STFTBackend.AUTO,
'--stft-backend',
'-B',
case_sensitive=False,
help=(
'Who should be in charge of computing the stfts. Librosa is faster '
'than tensorflow on CPU and uses less memory. "auto" will use '
'tensorflow when GPU acceleration is available and librosa when not'))
AudioCodec: OptionInfo = Option(
Codec.WAV,
'--codec',
'-c',
help='Audio codec to be used for the separated output')
AudioBitrate: OptionInfo = Option(
'128k',
'--bitrate',
'-b',
help='Audio bitrate to be used for the separated output')
MWF: OptionInfo = Option(
False,
'--mwf',
help='Whether to use multichannel Wiener filtering for separation')
MUSDBDirectory: OptionInfo = Option(
...,
'--mus_dir',
exists=True,
dir_okay=True,
file_okay=False,
readable=True,
resolve_path=True,
help='Path to musDB dataset directory')
TrainingDataDirectory: OptionInfo = Option(
...,
'--data',
'-d',
exists=True,
dir_okay=True,
file_okay=False,
readable=True,
resolve_path=True,
help='Path of the folder containing audio data for training')
AudioAdapter: OptionInfo = Option(
'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter',
'--adapter',
'-a',
help='Name of the audio adapter to use for audio I/O')
Verbose: OptionInfo = Option(
False,
'--verbose',
help='Enable verbose logs')
def _add_common_options(parser):
""" Add common option to the given parser.
:param parser: Parser to add common opt to.
"""
parser.add_argument('-a', '--adapter', **OPT_ADAPTER)
parser.add_argument('-p', '--params_filename', **OPT_PARAMS)
parser.add_argument('--verbose', **OPT_VERBOSE)
def _create_train_parser(parser_factory):
""" Creates an argparser for training command
:param parser_factory: Factory to use to create parser instance.
:returns: Created and configured parser.
"""
parser = parser_factory('train', help='Train a source separation model')
_add_common_options(parser)
parser.add_argument('-d', '--data', **OPT_DATA)
return parser
def _create_evaluate_parser(parser_factory):
""" Creates an argparser for evaluation command
:param parser_factory: Factory to use to create parser instance.
:returns: Created and configured parser.
"""
parser = parser_factory(
'evaluate',
help='Evaluate a model on the musDB test dataset')
_add_common_options(parser)
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
parser.add_argument('--mus_dir', **OPT_MUSDB)
parser.add_argument('-m', '--mwf', **OPT_MWF)
parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND)
return parser
def _create_separate_parser(parser_factory):
""" Creates an argparser for separation command
:param parser_factory: Factory to use to create parser instance.
:returns: Created and configured parser.
"""
parser = parser_factory('separate', help='Separate audio files')
_add_common_options(parser)
parser.add_argument('-i', '--inputs', **OPT_INPUT)
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
parser.add_argument('-f', '--filename_format', **OPT_FORMAT)
parser.add_argument('-d', '--duration', **OPT_DURATION)
parser.add_argument('-s', '--offset', **OPT_OFFSET)
parser.add_argument('-c', '--codec', **OPT_CODEC)
parser.add_argument('-b', '--birate', **OPT_BITRATE)
parser.add_argument('-m', '--mwf', **OPT_MWF)
parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND)
return parser
def create_argument_parser():
""" Creates overall command line parser for Spleeter.
:returns: Created argument parser.
"""
parser = ArgumentParser(prog='spleeter')
subparsers = parser.add_subparsers()
subparsers.dest = 'command'
subparsers.required = True
_create_separate_parser(subparsers.add_parser)
_create_train_parser(subparsers.add_parser)
_create_evaluate_parser(subparsers.add_parser)
return parser

View File

@@ -163,5 +163,4 @@ def entrypoint(arguments, params):
get_logger().info('%s:', instrument)
for metric, value in metric.items():
get_logger().info('%s: %s', metric, f'{np.median(value):.3f}')
return metrics

123
spleeter/options.py Normal file
View File

@@ -0,0 +1,123 @@
#!/usr/bin/env python
# coding: utf8
""" This modules provides spleeter command as well as CLI parsing methods. """
from tempfile import gettempdir
from os.path import join
from .separator import STFTBackend
from .audio import Codec
from typer import Argument, Option
from typer.models import ArgumentInfo, OptionInfo
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
AudioInputArgument: ArgumentInfo = Argument(
...,
help='List of input audio file path',
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True)
AudioAdapterOption: OptionInfo = Option(
'spleeter.audio.ffmpeg.FFMPEGProcessAudioAdapter',
'--adapter',
'-a',
help='Name of the audio adapter to use for audio I/O')
AudioOutputOption: OptionInfo = Option(
join(gettempdir(), 'separated_audio'),
'--output_path',
'-o',
help='Path of the output directory to write audio files in')
AudioOffsetOption: OptionInfo = Option(
0.,
'--offset',
'-s',
help='Set the starting offset to separate audio from')
AudioDurationOption: OptionInfo = Option(
600.,
'--duration',
'-d',
help=(
'Set a maximum duration for processing audio '
'(only separate offset + duration first seconds of '
'the input file)'))
AudioSTFTBackendOption: OptionInfo = Option(
STFTBackend.AUTO,
'--stft-backend',
'-B',
case_sensitive=False,
help=(
'Who should be in charge of computing the stfts. Librosa is faster '
'than tensorflow on CPU and uses less memory. "auto" will use '
'tensorflow when GPU acceleration is available and librosa when not'))
AudioCodecOption: OptionInfo = Option(
Codec.WAV,
'--codec',
'-c',
help='Audio codec to be used for the separated output')
AudioBitrateOption: OptionInfo = Option(
'128k',
'--bitrate',
'-b',
help='Audio bitrate to be used for the separated output')
FilenameFormatOption: OptionInfo = Option(
'{filename}/{instrument}.{codec}',
'--filename_format',
'-f',
help=(
'Template string that will be formatted to generated'
'output filename. Such template should be Python formattable'
'string, and could use {filename}, {instrument}, and {codec}'
'variables'))
ModelParametersOption: OptionInfo = Option(
'spleeter:2stems',
'--params_filename',
'-p',
help='JSON filename that contains params')
MWFOption: OptionInfo = Option(
False,
'--mwf',
help='Whether to use multichannel Wiener filtering for separation')
MUSDBDirectoryOption: OptionInfo = Option(
...,
'--mus_dir',
exists=True,
dir_okay=True,
file_okay=False,
readable=True,
resolve_path=True,
help='Path to musDB dataset directory')
TrainingDataDirectoryOption: OptionInfo = Option(
...,
'--data',
'-d',
exists=True,
dir_okay=True,
file_okay=False,
readable=True,
resolve_path=True,
help='Path of the folder containing audio data for training')
VerboseOption: OptionInfo = Option(
False,
'--verbose',
help='Enable verbose logs')