Merge pull request #41 from deezer/fix-ffmpeg

Fix ffmpeg
This commit is contained in:
fvoituret
2019-11-07 00:14:28 +01:00
committed by GitHub
13 changed files with 106 additions and 220 deletions

View File

@@ -16,3 +16,4 @@ dependencies:
- musdb==0.3.1
- norbert==0.2.1
- spleeter
- ffmpeg-python

View File

@@ -16,4 +16,5 @@ dependencies:
- musdb==0.3.1
- norbert==0.2.1
- spleeter
- ffmpeg-python

View File

@@ -14,7 +14,7 @@ __license__ = 'MIT License'
# Default project values.
project_name = 'spleeter'
project_version = '1.4.1'
project_version = '1.4.2'
device_target = 'cpu'
tensorflow_dependency = 'tensorflow'
tensorflow_version = '1.14.0'
@@ -63,14 +63,17 @@ setup(
python_requires='>=3.6, <3.8',
include_package_data=True,
install_requires=[
'ffmpeg-python',
'importlib_resources ; python_version<"3.7"',
'musdb==0.3.1',
'museval==0.3.0',
'norbert==0.2.1',
'pandas==0.25.1',
'requests',
'setuptools>=41.0.0',
'{}=={}'.format(tensorflow_dependency, tensorflow_version),
],
extras_require={
'evaluation': ['musdb==0.3.1', 'museval==0.3.0']
},
entry_points={
'console_scripts': ['spleeter=spleeter.__main__:entrypoint']
},

View File

@@ -12,7 +12,7 @@ import warnings
from .commands import create_argument_parser
from .utils.configuration import load_configuration
from .utils.logging import enable_logging, enable_verbose_logging
from .utils.logging import enable_logging, enable_tensorflow_logging
__email__ = 'research@deezer.com'
__author__ = 'Deezer Research'
@@ -28,10 +28,9 @@ def main(argv):
"""
parser = create_argument_parser()
arguments = parser.parse_args(argv[1:])
enable_logging()
if arguments.verbose:
enable_verbose_logging()
else:
enable_logging()
enable_tensorflow_logging()
if arguments.command == 'separate':
from .commands.separate import entrypoint
elif arguments.command == 'train':

View File

@@ -172,7 +172,7 @@ def create_argument_parser():
:returns: Created argument parser.
"""
parser = ArgumentParser(prog='python -m spleeter')
parser = ArgumentParser(prog='spleeter')
subparsers = parser.add_subparsers()
subparsers.dest = 'command'
subparsers.required = True

View File

@@ -13,6 +13,7 @@
--mus_dir /path/to/musdb dataset
"""
import sys
import json
from argparse import Namespace
@@ -21,8 +22,6 @@ from glob import glob
from os.path import join, exists
# pylint: disable=import-error
import musdb
import museval
import numpy as np
import pandas as pd
# pylint: enable=import-error
@@ -30,6 +29,15 @@ import pandas as pd
from .separate import entrypoint as separate_entrypoint
from ..utils.logging import get_logger
try:
import musdb
import museval
except ImportError:
logger = get_logger()
logger.error('Extra dependencies musdb and museval not found')
logger.error('Please install musdb and museval first, abort')
sys.exit(1)
__email__ = 'research@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'

View File

@@ -129,7 +129,6 @@ def process_audio(
yield_single_examples=False)
# initialize pool for audio export
pool = Pool(16)
tasks = []
for sample in prediction:
sample_filename = sample.pop('audio_id', 'unknown_filename').decode()
input_directory, input_filename = split(sample_filename)
@@ -144,13 +143,12 @@ def process_audio(
output_path,
output_dirname,
f'{instrument}.{codec}')
tasks.append(
pool.apply_async(
audio_adapter.save,
(filename, waveform, sample_rate, codec)))
pool.apply_async(
audio_adapter.save,
(filename, waveform, sample_rate, codec))
# Wait for everything to be written
for task in tasks:
task.wait(timeout=20)
pool.close()
pool.join()
def entrypoint(arguments, params):

View File

@@ -3,10 +3,9 @@
"""
This module contains building functions for U-net source separation source
separation models.
Each instrument is modeled by a single U-net convolutional/deconvolutional
network that take a mix spectrogram as input and the estimated sound spectrogram
as output.
separation models. Each instrument is modeled by a single U-netconvolutional
/ deconvolutional network that take a mix spectrogram as input and the
estimated sound spectrogram as output.
"""
from functools import partial

View File

@@ -65,9 +65,9 @@ class GithubModelProvider(ModelProvider):
raise IOError(f'Resource {url} not found')
with TemporaryFile() as stream:
copyfileobj(response.raw, stream)
get_logger().debug('Extracting downloaded archive')
get_logger().info('Extracting downloaded %s archive', name)
stream.seek(0)
tar = tarfile.open(fileobj=stream)
tar.extractall(path=path)
tar.close()
get_logger().debug('Model file extracted')
get_logger().info('%s model file(s) extracted', name)

View File

@@ -82,6 +82,7 @@ class AudioAdapter(ABC):
duration.numpy(),
sample_rate.numpy(),
dtype=dtype.numpy())
get_logger().info('Audio data loaded successfully')
return (data, False)
except Exception as e:
get_logger().warning(e)

View File

@@ -9,12 +9,11 @@
"""
import os
import os.path
import platform
import re
import subprocess
import numpy as np # pylint: disable=import-error
# pylint: disable=import-error
import ffmpeg
import numpy as np
# pylint: enable=import-error
from .adapter import AudioAdapter
from ..logging import get_logger
@@ -23,58 +22,9 @@ __email__ = 'research@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
# Default FFMPEG binary name.
_UNIX_BINARY = 'ffmpeg'
_WINDOWS_BINARY = 'ffmpeg.exe'
def _which(program):
""" A pure python implementation of `which`command
for retrieving absolute path from command name or path.
@see https://stackoverflow.com/a/377028/1211342
:param program: Program name or path to expend.
:returns: Absolute path of program if any, None otherwise.
"""
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, _ = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ['PATH'].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def _get_ffmpeg_path():
""" Retrieves FFMPEG binary path using ENVVAR if defined
or default binary name (Windows or UNIX style).
:returns: Absolute path of FFMPEG binary.
:raise IOError: If FFMPEG binary cannot be found.
"""
ffmpeg_path = os.environ.get('FFMPEG_PATH', None)
if ffmpeg_path is None:
# Note: try to infer standard binary name regarding of platform.
if platform.system() == 'Windows':
ffmpeg_path = _WINDOWS_BINARY
else:
ffmpeg_path = _UNIX_BINARY
expended = _which(ffmpeg_path)
if expended is None:
raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found')
return expended
def _to_ffmpeg_time(n):
""" Format number of seconds to time expected by FFMPEG.
:param n: Time in seconds to format.
:returns: Formatted time in FFMPEG format.
"""
@@ -83,56 +33,6 @@ def _to_ffmpeg_time(n):
return '%d:%02d:%09.6f' % (h, m, s)
def _parse_ffmpg_results(stderr):
""" Extract number of channels and sample rate from
the given FFMPEG STDERR output line.
:param stderr: STDERR output line to parse.
:returns: Parsed n_channels and sample_rate values.
"""
# Setup default value.
n_channels = 0
sample_rate = 0
# Find samplerate
match = re.search(r'(\d+) hz', stderr)
if match:
sample_rate = int(match.group(1))
# Channel count.
match = re.search(r'hz, ([^,]+),', stderr)
if match:
mode = match.group(1)
if mode == 'stereo':
n_channels = 2
else:
match = re.match(r'(\d+) ', mode)
n_channels = match and int(match.group(1)) or 1
return n_channels, sample_rate
class _CommandBuilder(object):
""" A simple builder pattern class for CLI string. """
def __init__(self, binary):
""" Default constructor. """
self._command = [binary]
def flag(self, flag):
""" Add flag or unlabelled opt. """
self._command.append(flag)
return self
def opt(self, short, value, formatter=str):
""" Add option if value not None. """
if value is not None:
self._command.append(short)
self._command.append(formatter(value))
return self
def command(self):
""" Build string command. """
return self._command
class FFMPEGProcessAudioAdapter(AudioAdapter):
""" An AudioAdapter implementation that use FFMPEG binary through
subprocess in order to perform I/O operation for audio processing.
@@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
FFMPEG_PATH environment variable.
"""
def __init__(self):
""" Default constructor. """
self._ffmpeg_path = _get_ffmpeg_path()
def _get_command_builder(self):
""" Creates and returns a command builder using FFMPEG path.
:returns: Built command builder.
"""
return _CommandBuilder(self._ffmpeg_path)
def load(
self, path, offset=None, duration=None,
sample_rate=None, dtype=np.float32):
@@ -168,44 +57,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
"""
if not isinstance(path, str):
path = path.decode()
command = (
self._get_command_builder()
.opt('-ss', offset, formatter=_to_ffmpeg_time)
.opt('-t', duration, formatter=_to_ffmpeg_time)
.opt('-i', path)
.opt('-ar', sample_rate)
.opt('-f', 'f32le')
.flag('-')
.command())
process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
buffer = process.stdout.read(-1)
# Read STDERR until end of the process detected.
while True:
status = process.stderr.readline()
if not status:
raise OSError('Stream info not found')
if isinstance(status, bytes): # Note: Python 3 compatibility.
status = status.decode('utf8', 'ignore')
status = status.strip().lower()
if 'no such file' in status:
raise IOError(f'File {path} not found')
elif 'invalid data found' in status:
raise IOError(f'FFMPEG error : {status}')
elif 'audio:' in status:
n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status)
if sample_rate is None:
sample_rate = ffmpeg_sample_rate
break
# Load waveform and clean process.
probe = ffmpeg.probe(path)
if 'streams' not in probe or len(probe['streams']) == 0:
raise IOError('No stream was found with ffprobe')
metadata = next(
stream
for stream in probe['streams']
if stream['codec_type'] == 'audio')
n_channels = metadata['channels']
if sample_rate is None:
sample_rate = metadata['sample_rate']
output_kwargs = {'format': 'f32le', 'ar': sample_rate}
if duration is not None:
output_kwargs['t'] = _to_ffmpeg_time(duration)
if offset is not None:
output_kwargs['ss'] = _to_ffmpeg_time(offset)
process = (
ffmpeg
.input(path)
.output('pipe:', **output_kwargs)
.run_async(pipe_stdout=True, pipe_stderr=True))
buffer, _ = process.communicate()
waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
if not waveform.dtype == np.dtype(dtype):
waveform = waveform.astype(dtype)
process.stdout.close()
process.stderr.close()
del process
return (waveform, sample_rate)
def save(
@@ -225,39 +100,22 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
if not os.path.exists(directory):
os.makedirs(directory)
get_logger().debug('Writing file %s', path)
# NOTE: Tweak.
if codec == 'wav':
codec = None
command = (
self._get_command_builder()
.flag('-y')
.opt('-loglevel', 'error')
.opt('-f', 'f32le')
.opt('-ar', sample_rate)
.opt('-ac', data.shape[1])
.opt('-i', '-')
.flag('-vn')
.opt('-acodec', codec)
.opt('-ar', sample_rate) # Note: why twice ?
.opt('-strict', '-2') # Note: For 'aac' codec support.
.opt('-ab', bitrate)
.flag(path)
.command())
process = subprocess.Popen(
command,
stdout=open(os.devnull, 'wb'),
stdin=subprocess.PIPE,
stderr=subprocess.PIPE)
# Write data to STDIN.
input_kwargs = {'ar': sample_rate, 'ac': data.shape[1]}
output_kwargs = {'ar': sample_rate, 'strict': '-2'}
if bitrate:
output_kwargs['audio_bitrate'] = bitrate
if codec is not None and codec != 'wav':
output_kwargs['codec'] = codec
process = (
ffmpeg
.input('pipe:', format='f32le', **input_kwargs)
.output(path, **output_kwargs)
.overwrite_output()
.run_async(pipe_stdin=True, quiet=True))
try:
process.stdin.write(
data.astype('<f4').tostring())
process.stdin.write(data.astype('<f4').tobytes())
process.stdin.close()
process.wait()
except IOError:
raise IOError(f'FFMPEG error: {process.stderr.read()}')
# Clean process.
process.stdin.close()
if process.stderr is not None:
process.stderr.close()
process.wait()
del process
get_logger().info('File %s written', path)

View File

@@ -4,6 +4,8 @@
""" Utility functions for creating estimator. """
from pathlib import Path
from os.path import join
from tempfile import gettempdir
# pylint: disable=import-error
import tensorflow as tf
@@ -15,7 +17,7 @@ from ..model import model_fn
from ..model.provider import get_default_model_provider
# Default exporting directory for predictor.
DEFAULT_EXPORT_DIRECTORY = '/tmp/serving'
DEFAULT_EXPORT_DIRECTORY = join(gettempdir(), 'serving')
def create_estimator(params, MWF):

View File

@@ -3,12 +3,16 @@
""" Centralized logging facilities for Spleeter. """
import logging
from os import environ
__email__ = 'research@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
_FORMAT = '%(levelname)s:%(name)s:%(message)s'
class _LoggerHolder(object):
""" Logger singleton instance holder. """
@@ -16,30 +20,42 @@ class _LoggerHolder(object):
INSTANCE = None
def get_tensorflow_logger():
"""
"""
# pylint: disable=import-error
from tensorflow.compat.v1 import logging
# pylint: enable=import-error
return logging
def get_logger():
""" Returns library scoped logger.
:returns: Library logger.
"""
if _LoggerHolder.INSTANCE is None:
# pylint: disable=import-error
from tensorflow.compat.v1 import logging
# pylint: enable=import-error
_LoggerHolder.INSTANCE = logging
_LoggerHolder.INSTANCE.set_verbosity(_LoggerHolder.INSTANCE.ERROR)
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
formatter = logging.Formatter(_FORMAT)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger = logging.getLogger('spleeter')
logger.addHandler(handler)
logger.setLevel(logging.INFO)
_LoggerHolder.INSTANCE = logger
return _LoggerHolder.INSTANCE
def enable_logging():
""" Enable INFO level logging. """
def enable_tensorflow_logging():
""" Enable tensorflow logging. """
environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
tf_logger = get_tensorflow_logger()
tf_logger.set_verbosity(tf_logger.INFO)
logger = get_logger()
logger.set_verbosity(logger.INFO)
logger.setLevel(logging.DEBUG)
def enable_verbose_logging():
""" Enable DEBUG level logging. """
environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
logger = get_logger()
logger.set_verbosity(logger.DEBUG)
def enable_logging():
""" Configure default logging. """
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf_logger = get_tensorflow_logger()
tf_logger.set_verbosity(tf_logger.ERROR)