diff --git a/conda/spleeter-cpu.yaml b/conda/spleeter-cpu.yaml index df9ed04..5137ba8 100644 --- a/conda/spleeter-cpu.yaml +++ b/conda/spleeter-cpu.yaml @@ -16,3 +16,4 @@ dependencies: - musdb==0.3.1 - norbert==0.2.1 - spleeter + - ffmpeg-python diff --git a/conda/spleeter-gpu.yaml b/conda/spleeter-gpu.yaml index b269a35..094fe7d 100644 --- a/conda/spleeter-gpu.yaml +++ b/conda/spleeter-gpu.yaml @@ -16,4 +16,5 @@ dependencies: - musdb==0.3.1 - norbert==0.2.1 - spleeter + - ffmpeg-python diff --git a/setup.py b/setup.py index 7ecb58a..7eb2839 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ __license__ = 'MIT License' # Default project values. project_name = 'spleeter' -project_version = '1.4.1' +project_version = '1.4.2' device_target = 'cpu' tensorflow_dependency = 'tensorflow' tensorflow_version = '1.14.0' @@ -63,14 +63,17 @@ setup( python_requires='>=3.6, <3.8', include_package_data=True, install_requires=[ + 'ffmpeg-python', 'importlib_resources ; python_version<"3.7"', - 'musdb==0.3.1', - 'museval==0.3.0', 'norbert==0.2.1', 'pandas==0.25.1', 'requests', + 'setuptools>=41.0.0', '{}=={}'.format(tensorflow_dependency, tensorflow_version), ], + extras_require={ + 'evaluation': ['musdb==0.3.1', 'museval==0.3.0'] + }, entry_points={ 'console_scripts': ['spleeter=spleeter.__main__:entrypoint'] }, diff --git a/spleeter/__main__.py b/spleeter/__main__.py index fde5292..5f72040 100644 --- a/spleeter/__main__.py +++ b/spleeter/__main__.py @@ -12,7 +12,7 @@ import warnings from .commands import create_argument_parser from .utils.configuration import load_configuration -from .utils.logging import enable_logging, enable_verbose_logging +from .utils.logging import enable_logging, enable_tensorflow_logging __email__ = 'research@deezer.com' __author__ = 'Deezer Research' @@ -28,10 +28,9 @@ def main(argv): """ parser = create_argument_parser() arguments = parser.parse_args(argv[1:]) + enable_logging() if arguments.verbose: - enable_verbose_logging() - else: - enable_logging() + enable_tensorflow_logging() if arguments.command == 'separate': from .commands.separate import entrypoint elif arguments.command == 'train': diff --git a/spleeter/commands/__init__.py b/spleeter/commands/__init__.py index 2577399..331ee2d 100644 --- a/spleeter/commands/__init__.py +++ b/spleeter/commands/__init__.py @@ -172,7 +172,7 @@ def create_argument_parser(): :returns: Created argument parser. """ - parser = ArgumentParser(prog='python -m spleeter') + parser = ArgumentParser(prog='spleeter') subparsers = parser.add_subparsers() subparsers.dest = 'command' subparsers.required = True diff --git a/spleeter/commands/evaluate.py b/spleeter/commands/evaluate.py index dc990ad..c2fe789 100644 --- a/spleeter/commands/evaluate.py +++ b/spleeter/commands/evaluate.py @@ -13,6 +13,7 @@ --mus_dir /path/to/musdb dataset """ +import sys import json from argparse import Namespace @@ -21,8 +22,6 @@ from glob import glob from os.path import join, exists # pylint: disable=import-error -import musdb -import museval import numpy as np import pandas as pd # pylint: enable=import-error @@ -30,6 +29,15 @@ import pandas as pd from .separate import entrypoint as separate_entrypoint from ..utils.logging import get_logger +try: + import musdb + import museval +except ImportError: + logger = get_logger() + logger.error('Extra dependencies musdb and museval not found') + logger.error('Please install musdb and museval first, abort') + sys.exit(1) + __email__ = 'research@deezer.com' __author__ = 'Deezer Research' __license__ = 'MIT License' diff --git a/spleeter/commands/separate.py b/spleeter/commands/separate.py index 0098351..71eed2d 100644 --- a/spleeter/commands/separate.py +++ b/spleeter/commands/separate.py @@ -129,7 +129,6 @@ def process_audio( yield_single_examples=False) # initialize pool for audio export pool = Pool(16) - tasks = [] for sample in prediction: sample_filename = sample.pop('audio_id', 'unknown_filename').decode() input_directory, input_filename = split(sample_filename) @@ -144,13 +143,12 @@ def process_audio( output_path, output_dirname, f'{instrument}.{codec}') - tasks.append( - pool.apply_async( - audio_adapter.save, - (filename, waveform, sample_rate, codec))) + pool.apply_async( + audio_adapter.save, + (filename, waveform, sample_rate, codec)) # Wait for everything to be written - for task in tasks: - task.wait(timeout=20) + pool.close() + pool.join() def entrypoint(arguments, params): diff --git a/spleeter/model/functions/unet.py b/spleeter/model/functions/unet.py index 245a5e5..245a247 100644 --- a/spleeter/model/functions/unet.py +++ b/spleeter/model/functions/unet.py @@ -3,10 +3,9 @@ """ This module contains building functions for U-net source separation source -separation models. -Each instrument is modeled by a single U-net convolutional/deconvolutional -network that take a mix spectrogram as input and the estimated sound spectrogram -as output. +separation models. Each instrument is modeled by a single U-netconvolutional +/ deconvolutional network that take a mix spectrogram as input and the +estimated sound spectrogram as output. """ from functools import partial diff --git a/spleeter/model/provider/github.py b/spleeter/model/provider/github.py index cc7028c..aad0c44 100644 --- a/spleeter/model/provider/github.py +++ b/spleeter/model/provider/github.py @@ -65,9 +65,9 @@ class GithubModelProvider(ModelProvider): raise IOError(f'Resource {url} not found') with TemporaryFile() as stream: copyfileobj(response.raw, stream) - get_logger().debug('Extracting downloaded archive') + get_logger().info('Extracting downloaded %s archive', name) stream.seek(0) tar = tarfile.open(fileobj=stream) tar.extractall(path=path) tar.close() - get_logger().debug('Model file extracted') + get_logger().info('%s model file(s) extracted', name) diff --git a/spleeter/utils/audio/adapter.py b/spleeter/utils/audio/adapter.py index b2d7cb1..d1a64c0 100644 --- a/spleeter/utils/audio/adapter.py +++ b/spleeter/utils/audio/adapter.py @@ -82,6 +82,7 @@ class AudioAdapter(ABC): duration.numpy(), sample_rate.numpy(), dtype=dtype.numpy()) + get_logger().info('Audio data loaded successfully') return (data, False) except Exception as e: get_logger().warning(e) diff --git a/spleeter/utils/audio/ffmpeg.py b/spleeter/utils/audio/ffmpeg.py index ad24e33..d9c5506 100644 --- a/spleeter/utils/audio/ffmpeg.py +++ b/spleeter/utils/audio/ffmpeg.py @@ -9,12 +9,11 @@ """ import os -import os.path -import platform -import re -import subprocess -import numpy as np # pylint: disable=import-error +# pylint: disable=import-error +import ffmpeg +import numpy as np +# pylint: enable=import-error from .adapter import AudioAdapter from ..logging import get_logger @@ -23,58 +22,9 @@ __email__ = 'research@deezer.com' __author__ = 'Deezer Research' __license__ = 'MIT License' -# Default FFMPEG binary name. -_UNIX_BINARY = 'ffmpeg' -_WINDOWS_BINARY = 'ffmpeg.exe' - - -def _which(program): - """ A pure python implementation of `which`command - for retrieving absolute path from command name or path. - - @see https://stackoverflow.com/a/377028/1211342 - - :param program: Program name or path to expend. - :returns: Absolute path of program if any, None otherwise. - """ - def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - fpath, _ = os.path.split(program) - if fpath: - if is_exe(program): - return program - else: - for path in os.environ['PATH'].split(os.pathsep): - exe_file = os.path.join(path, program) - if is_exe(exe_file): - return exe_file - return None - - -def _get_ffmpeg_path(): - """ Retrieves FFMPEG binary path using ENVVAR if defined - or default binary name (Windows or UNIX style). - - :returns: Absolute path of FFMPEG binary. - :raise IOError: If FFMPEG binary cannot be found. - """ - ffmpeg_path = os.environ.get('FFMPEG_PATH', None) - if ffmpeg_path is None: - # Note: try to infer standard binary name regarding of platform. - if platform.system() == 'Windows': - ffmpeg_path = _WINDOWS_BINARY - else: - ffmpeg_path = _UNIX_BINARY - expended = _which(ffmpeg_path) - if expended is None: - raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found') - return expended - def _to_ffmpeg_time(n): """ Format number of seconds to time expected by FFMPEG. - :param n: Time in seconds to format. :returns: Formatted time in FFMPEG format. """ @@ -83,56 +33,6 @@ def _to_ffmpeg_time(n): return '%d:%02d:%09.6f' % (h, m, s) -def _parse_ffmpg_results(stderr): - """ Extract number of channels and sample rate from - the given FFMPEG STDERR output line. - - :param stderr: STDERR output line to parse. - :returns: Parsed n_channels and sample_rate values. - """ - # Setup default value. - n_channels = 0 - sample_rate = 0 - # Find samplerate - match = re.search(r'(\d+) hz', stderr) - if match: - sample_rate = int(match.group(1)) - # Channel count. - match = re.search(r'hz, ([^,]+),', stderr) - if match: - mode = match.group(1) - if mode == 'stereo': - n_channels = 2 - else: - match = re.match(r'(\d+) ', mode) - n_channels = match and int(match.group(1)) or 1 - return n_channels, sample_rate - - -class _CommandBuilder(object): - """ A simple builder pattern class for CLI string. """ - - def __init__(self, binary): - """ Default constructor. """ - self._command = [binary] - - def flag(self, flag): - """ Add flag or unlabelled opt. """ - self._command.append(flag) - return self - - def opt(self, short, value, formatter=str): - """ Add option if value not None. """ - if value is not None: - self._command.append(short) - self._command.append(formatter(value)) - return self - - def command(self): - """ Build string command. """ - return self._command - - class FFMPEGProcessAudioAdapter(AudioAdapter): """ An AudioAdapter implementation that use FFMPEG binary through subprocess in order to perform I/O operation for audio processing. @@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter): FFMPEG_PATH environment variable. """ - def __init__(self): - """ Default constructor. """ - self._ffmpeg_path = _get_ffmpeg_path() - - def _get_command_builder(self): - """ Creates and returns a command builder using FFMPEG path. - - :returns: Built command builder. - """ - return _CommandBuilder(self._ffmpeg_path) - def load( self, path, offset=None, duration=None, sample_rate=None, dtype=np.float32): @@ -168,44 +57,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter): """ if not isinstance(path, str): path = path.decode() - command = ( - self._get_command_builder() - .opt('-ss', offset, formatter=_to_ffmpeg_time) - .opt('-t', duration, formatter=_to_ffmpeg_time) - .opt('-i', path) - .opt('-ar', sample_rate) - .opt('-f', 'f32le') - .flag('-') - .command()) - process = subprocess.Popen( - command, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - buffer = process.stdout.read(-1) - # Read STDERR until end of the process detected. - while True: - status = process.stderr.readline() - if not status: - raise OSError('Stream info not found') - if isinstance(status, bytes): # Note: Python 3 compatibility. - status = status.decode('utf8', 'ignore') - status = status.strip().lower() - if 'no such file' in status: - raise IOError(f'File {path} not found') - elif 'invalid data found' in status: - raise IOError(f'FFMPEG error : {status}') - elif 'audio:' in status: - n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status) - if sample_rate is None: - sample_rate = ffmpeg_sample_rate - break - # Load waveform and clean process. + probe = ffmpeg.probe(path) + if 'streams' not in probe or len(probe['streams']) == 0: + raise IOError('No stream was found with ffprobe') + metadata = next( + stream + for stream in probe['streams'] + if stream['codec_type'] == 'audio') + n_channels = metadata['channels'] + if sample_rate is None: + sample_rate = metadata['sample_rate'] + output_kwargs = {'format': 'f32le', 'ar': sample_rate} + if duration is not None: + output_kwargs['t'] = _to_ffmpeg_time(duration) + if offset is not None: + output_kwargs['ss'] = _to_ffmpeg_time(offset) + process = ( + ffmpeg + .input(path) + .output('pipe:', **output_kwargs) + .run_async(pipe_stdout=True, pipe_stderr=True)) + buffer, _ = process.communicate() waveform = np.frombuffer(buffer, dtype='