mirror of
https://github.com/YuzuZensai/spleeter.git
synced 2026-01-06 04:32:43 +00:00
@@ -16,3 +16,4 @@ dependencies:
|
||||
- musdb==0.3.1
|
||||
- norbert==0.2.1
|
||||
- spleeter
|
||||
- ffmpeg-python
|
||||
|
||||
@@ -16,4 +16,5 @@ dependencies:
|
||||
- musdb==0.3.1
|
||||
- norbert==0.2.1
|
||||
- spleeter
|
||||
- ffmpeg-python
|
||||
|
||||
|
||||
9
setup.py
9
setup.py
@@ -14,7 +14,7 @@ __license__ = 'MIT License'
|
||||
|
||||
# Default project values.
|
||||
project_name = 'spleeter'
|
||||
project_version = '1.4.1'
|
||||
project_version = '1.4.2'
|
||||
device_target = 'cpu'
|
||||
tensorflow_dependency = 'tensorflow'
|
||||
tensorflow_version = '1.14.0'
|
||||
@@ -63,14 +63,17 @@ setup(
|
||||
python_requires='>=3.6, <3.8',
|
||||
include_package_data=True,
|
||||
install_requires=[
|
||||
'ffmpeg-python',
|
||||
'importlib_resources ; python_version<"3.7"',
|
||||
'musdb==0.3.1',
|
||||
'museval==0.3.0',
|
||||
'norbert==0.2.1',
|
||||
'pandas==0.25.1',
|
||||
'requests',
|
||||
'setuptools>=41.0.0',
|
||||
'{}=={}'.format(tensorflow_dependency, tensorflow_version),
|
||||
],
|
||||
extras_require={
|
||||
'evaluation': ['musdb==0.3.1', 'museval==0.3.0']
|
||||
},
|
||||
entry_points={
|
||||
'console_scripts': ['spleeter=spleeter.__main__:entrypoint']
|
||||
},
|
||||
|
||||
@@ -12,7 +12,7 @@ import warnings
|
||||
|
||||
from .commands import create_argument_parser
|
||||
from .utils.configuration import load_configuration
|
||||
from .utils.logging import enable_logging, enable_verbose_logging
|
||||
from .utils.logging import enable_logging, enable_tensorflow_logging
|
||||
|
||||
__email__ = 'research@deezer.com'
|
||||
__author__ = 'Deezer Research'
|
||||
@@ -28,10 +28,9 @@ def main(argv):
|
||||
"""
|
||||
parser = create_argument_parser()
|
||||
arguments = parser.parse_args(argv[1:])
|
||||
enable_logging()
|
||||
if arguments.verbose:
|
||||
enable_verbose_logging()
|
||||
else:
|
||||
enable_logging()
|
||||
enable_tensorflow_logging()
|
||||
if arguments.command == 'separate':
|
||||
from .commands.separate import entrypoint
|
||||
elif arguments.command == 'train':
|
||||
|
||||
@@ -172,7 +172,7 @@ def create_argument_parser():
|
||||
|
||||
:returns: Created argument parser.
|
||||
"""
|
||||
parser = ArgumentParser(prog='python -m spleeter')
|
||||
parser = ArgumentParser(prog='spleeter')
|
||||
subparsers = parser.add_subparsers()
|
||||
subparsers.dest = 'command'
|
||||
subparsers.required = True
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
--mus_dir /path/to/musdb dataset
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
|
||||
from argparse import Namespace
|
||||
@@ -21,8 +22,6 @@ from glob import glob
|
||||
from os.path import join, exists
|
||||
|
||||
# pylint: disable=import-error
|
||||
import musdb
|
||||
import museval
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
# pylint: enable=import-error
|
||||
@@ -30,6 +29,15 @@ import pandas as pd
|
||||
from .separate import entrypoint as separate_entrypoint
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
try:
|
||||
import musdb
|
||||
import museval
|
||||
except ImportError:
|
||||
logger = get_logger()
|
||||
logger.error('Extra dependencies musdb and museval not found')
|
||||
logger.error('Please install musdb and museval first, abort')
|
||||
sys.exit(1)
|
||||
|
||||
__email__ = 'research@deezer.com'
|
||||
__author__ = 'Deezer Research'
|
||||
__license__ = 'MIT License'
|
||||
|
||||
@@ -129,7 +129,6 @@ def process_audio(
|
||||
yield_single_examples=False)
|
||||
# initialize pool for audio export
|
||||
pool = Pool(16)
|
||||
tasks = []
|
||||
for sample in prediction:
|
||||
sample_filename = sample.pop('audio_id', 'unknown_filename').decode()
|
||||
input_directory, input_filename = split(sample_filename)
|
||||
@@ -144,13 +143,12 @@ def process_audio(
|
||||
output_path,
|
||||
output_dirname,
|
||||
f'{instrument}.{codec}')
|
||||
tasks.append(
|
||||
pool.apply_async(
|
||||
audio_adapter.save,
|
||||
(filename, waveform, sample_rate, codec)))
|
||||
pool.apply_async(
|
||||
audio_adapter.save,
|
||||
(filename, waveform, sample_rate, codec))
|
||||
# Wait for everything to be written
|
||||
for task in tasks:
|
||||
task.wait(timeout=20)
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
|
||||
def entrypoint(arguments, params):
|
||||
|
||||
@@ -3,10 +3,9 @@
|
||||
|
||||
"""
|
||||
This module contains building functions for U-net source separation source
|
||||
separation models.
|
||||
Each instrument is modeled by a single U-net convolutional/deconvolutional
|
||||
network that take a mix spectrogram as input and the estimated sound spectrogram
|
||||
as output.
|
||||
separation models. Each instrument is modeled by a single U-netconvolutional
|
||||
/ deconvolutional network that take a mix spectrogram as input and the
|
||||
estimated sound spectrogram as output.
|
||||
"""
|
||||
|
||||
from functools import partial
|
||||
|
||||
@@ -65,9 +65,9 @@ class GithubModelProvider(ModelProvider):
|
||||
raise IOError(f'Resource {url} not found')
|
||||
with TemporaryFile() as stream:
|
||||
copyfileobj(response.raw, stream)
|
||||
get_logger().debug('Extracting downloaded archive')
|
||||
get_logger().info('Extracting downloaded %s archive', name)
|
||||
stream.seek(0)
|
||||
tar = tarfile.open(fileobj=stream)
|
||||
tar.extractall(path=path)
|
||||
tar.close()
|
||||
get_logger().debug('Model file extracted')
|
||||
get_logger().info('%s model file(s) extracted', name)
|
||||
|
||||
@@ -82,6 +82,7 @@ class AudioAdapter(ABC):
|
||||
duration.numpy(),
|
||||
sample_rate.numpy(),
|
||||
dtype=dtype.numpy())
|
||||
get_logger().info('Audio data loaded successfully')
|
||||
return (data, False)
|
||||
except Exception as e:
|
||||
get_logger().warning(e)
|
||||
|
||||
@@ -9,12 +9,11 @@
|
||||
"""
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
import numpy as np # pylint: disable=import-error
|
||||
# pylint: disable=import-error
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
# pylint: enable=import-error
|
||||
|
||||
from .adapter import AudioAdapter
|
||||
from ..logging import get_logger
|
||||
@@ -23,58 +22,9 @@ __email__ = 'research@deezer.com'
|
||||
__author__ = 'Deezer Research'
|
||||
__license__ = 'MIT License'
|
||||
|
||||
# Default FFMPEG binary name.
|
||||
_UNIX_BINARY = 'ffmpeg'
|
||||
_WINDOWS_BINARY = 'ffmpeg.exe'
|
||||
|
||||
|
||||
def _which(program):
|
||||
""" A pure python implementation of `which`command
|
||||
for retrieving absolute path from command name or path.
|
||||
|
||||
@see https://stackoverflow.com/a/377028/1211342
|
||||
|
||||
:param program: Program name or path to expend.
|
||||
:returns: Absolute path of program if any, None otherwise.
|
||||
"""
|
||||
def is_exe(fpath):
|
||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||
|
||||
fpath, _ = os.path.split(program)
|
||||
if fpath:
|
||||
if is_exe(program):
|
||||
return program
|
||||
else:
|
||||
for path in os.environ['PATH'].split(os.pathsep):
|
||||
exe_file = os.path.join(path, program)
|
||||
if is_exe(exe_file):
|
||||
return exe_file
|
||||
return None
|
||||
|
||||
|
||||
def _get_ffmpeg_path():
|
||||
""" Retrieves FFMPEG binary path using ENVVAR if defined
|
||||
or default binary name (Windows or UNIX style).
|
||||
|
||||
:returns: Absolute path of FFMPEG binary.
|
||||
:raise IOError: If FFMPEG binary cannot be found.
|
||||
"""
|
||||
ffmpeg_path = os.environ.get('FFMPEG_PATH', None)
|
||||
if ffmpeg_path is None:
|
||||
# Note: try to infer standard binary name regarding of platform.
|
||||
if platform.system() == 'Windows':
|
||||
ffmpeg_path = _WINDOWS_BINARY
|
||||
else:
|
||||
ffmpeg_path = _UNIX_BINARY
|
||||
expended = _which(ffmpeg_path)
|
||||
if expended is None:
|
||||
raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found')
|
||||
return expended
|
||||
|
||||
|
||||
def _to_ffmpeg_time(n):
|
||||
""" Format number of seconds to time expected by FFMPEG.
|
||||
|
||||
:param n: Time in seconds to format.
|
||||
:returns: Formatted time in FFMPEG format.
|
||||
"""
|
||||
@@ -83,56 +33,6 @@ def _to_ffmpeg_time(n):
|
||||
return '%d:%02d:%09.6f' % (h, m, s)
|
||||
|
||||
|
||||
def _parse_ffmpg_results(stderr):
|
||||
""" Extract number of channels and sample rate from
|
||||
the given FFMPEG STDERR output line.
|
||||
|
||||
:param stderr: STDERR output line to parse.
|
||||
:returns: Parsed n_channels and sample_rate values.
|
||||
"""
|
||||
# Setup default value.
|
||||
n_channels = 0
|
||||
sample_rate = 0
|
||||
# Find samplerate
|
||||
match = re.search(r'(\d+) hz', stderr)
|
||||
if match:
|
||||
sample_rate = int(match.group(1))
|
||||
# Channel count.
|
||||
match = re.search(r'hz, ([^,]+),', stderr)
|
||||
if match:
|
||||
mode = match.group(1)
|
||||
if mode == 'stereo':
|
||||
n_channels = 2
|
||||
else:
|
||||
match = re.match(r'(\d+) ', mode)
|
||||
n_channels = match and int(match.group(1)) or 1
|
||||
return n_channels, sample_rate
|
||||
|
||||
|
||||
class _CommandBuilder(object):
|
||||
""" A simple builder pattern class for CLI string. """
|
||||
|
||||
def __init__(self, binary):
|
||||
""" Default constructor. """
|
||||
self._command = [binary]
|
||||
|
||||
def flag(self, flag):
|
||||
""" Add flag or unlabelled opt. """
|
||||
self._command.append(flag)
|
||||
return self
|
||||
|
||||
def opt(self, short, value, formatter=str):
|
||||
""" Add option if value not None. """
|
||||
if value is not None:
|
||||
self._command.append(short)
|
||||
self._command.append(formatter(value))
|
||||
return self
|
||||
|
||||
def command(self):
|
||||
""" Build string command. """
|
||||
return self._command
|
||||
|
||||
|
||||
class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||
""" An AudioAdapter implementation that use FFMPEG binary through
|
||||
subprocess in order to perform I/O operation for audio processing.
|
||||
@@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||
FFMPEG_PATH environment variable.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
""" Default constructor. """
|
||||
self._ffmpeg_path = _get_ffmpeg_path()
|
||||
|
||||
def _get_command_builder(self):
|
||||
""" Creates and returns a command builder using FFMPEG path.
|
||||
|
||||
:returns: Built command builder.
|
||||
"""
|
||||
return _CommandBuilder(self._ffmpeg_path)
|
||||
|
||||
def load(
|
||||
self, path, offset=None, duration=None,
|
||||
sample_rate=None, dtype=np.float32):
|
||||
@@ -168,44 +57,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||
"""
|
||||
if not isinstance(path, str):
|
||||
path = path.decode()
|
||||
command = (
|
||||
self._get_command_builder()
|
||||
.opt('-ss', offset, formatter=_to_ffmpeg_time)
|
||||
.opt('-t', duration, formatter=_to_ffmpeg_time)
|
||||
.opt('-i', path)
|
||||
.opt('-ar', sample_rate)
|
||||
.opt('-f', 'f32le')
|
||||
.flag('-')
|
||||
.command())
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
buffer = process.stdout.read(-1)
|
||||
# Read STDERR until end of the process detected.
|
||||
while True:
|
||||
status = process.stderr.readline()
|
||||
if not status:
|
||||
raise OSError('Stream info not found')
|
||||
if isinstance(status, bytes): # Note: Python 3 compatibility.
|
||||
status = status.decode('utf8', 'ignore')
|
||||
status = status.strip().lower()
|
||||
if 'no such file' in status:
|
||||
raise IOError(f'File {path} not found')
|
||||
elif 'invalid data found' in status:
|
||||
raise IOError(f'FFMPEG error : {status}')
|
||||
elif 'audio:' in status:
|
||||
n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status)
|
||||
if sample_rate is None:
|
||||
sample_rate = ffmpeg_sample_rate
|
||||
break
|
||||
# Load waveform and clean process.
|
||||
probe = ffmpeg.probe(path)
|
||||
if 'streams' not in probe or len(probe['streams']) == 0:
|
||||
raise IOError('No stream was found with ffprobe')
|
||||
metadata = next(
|
||||
stream
|
||||
for stream in probe['streams']
|
||||
if stream['codec_type'] == 'audio')
|
||||
n_channels = metadata['channels']
|
||||
if sample_rate is None:
|
||||
sample_rate = metadata['sample_rate']
|
||||
output_kwargs = {'format': 'f32le', 'ar': sample_rate}
|
||||
if duration is not None:
|
||||
output_kwargs['t'] = _to_ffmpeg_time(duration)
|
||||
if offset is not None:
|
||||
output_kwargs['ss'] = _to_ffmpeg_time(offset)
|
||||
process = (
|
||||
ffmpeg
|
||||
.input(path)
|
||||
.output('pipe:', **output_kwargs)
|
||||
.run_async(pipe_stdout=True, pipe_stderr=True))
|
||||
buffer, _ = process.communicate()
|
||||
waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
|
||||
if not waveform.dtype == np.dtype(dtype):
|
||||
waveform = waveform.astype(dtype)
|
||||
process.stdout.close()
|
||||
process.stderr.close()
|
||||
del process
|
||||
return (waveform, sample_rate)
|
||||
|
||||
def save(
|
||||
@@ -225,39 +100,22 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
get_logger().debug('Writing file %s', path)
|
||||
# NOTE: Tweak.
|
||||
if codec == 'wav':
|
||||
codec = None
|
||||
command = (
|
||||
self._get_command_builder()
|
||||
.flag('-y')
|
||||
.opt('-loglevel', 'error')
|
||||
.opt('-f', 'f32le')
|
||||
.opt('-ar', sample_rate)
|
||||
.opt('-ac', data.shape[1])
|
||||
.opt('-i', '-')
|
||||
.flag('-vn')
|
||||
.opt('-acodec', codec)
|
||||
.opt('-ar', sample_rate) # Note: why twice ?
|
||||
.opt('-strict', '-2') # Note: For 'aac' codec support.
|
||||
.opt('-ab', bitrate)
|
||||
.flag(path)
|
||||
.command())
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
stdout=open(os.devnull, 'wb'),
|
||||
stdin=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
# Write data to STDIN.
|
||||
input_kwargs = {'ar': sample_rate, 'ac': data.shape[1]}
|
||||
output_kwargs = {'ar': sample_rate, 'strict': '-2'}
|
||||
if bitrate:
|
||||
output_kwargs['audio_bitrate'] = bitrate
|
||||
if codec is not None and codec != 'wav':
|
||||
output_kwargs['codec'] = codec
|
||||
process = (
|
||||
ffmpeg
|
||||
.input('pipe:', format='f32le', **input_kwargs)
|
||||
.output(path, **output_kwargs)
|
||||
.overwrite_output()
|
||||
.run_async(pipe_stdin=True, quiet=True))
|
||||
try:
|
||||
process.stdin.write(
|
||||
data.astype('<f4').tostring())
|
||||
process.stdin.write(data.astype('<f4').tobytes())
|
||||
process.stdin.close()
|
||||
process.wait()
|
||||
except IOError:
|
||||
raise IOError(f'FFMPEG error: {process.stderr.read()}')
|
||||
# Clean process.
|
||||
process.stdin.close()
|
||||
if process.stderr is not None:
|
||||
process.stderr.close()
|
||||
process.wait()
|
||||
del process
|
||||
get_logger().info('File %s written', path)
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
""" Utility functions for creating estimator. """
|
||||
|
||||
from pathlib import Path
|
||||
from os.path import join
|
||||
from tempfile import gettempdir
|
||||
|
||||
# pylint: disable=import-error
|
||||
import tensorflow as tf
|
||||
@@ -15,7 +17,7 @@ from ..model import model_fn
|
||||
from ..model.provider import get_default_model_provider
|
||||
|
||||
# Default exporting directory for predictor.
|
||||
DEFAULT_EXPORT_DIRECTORY = '/tmp/serving'
|
||||
DEFAULT_EXPORT_DIRECTORY = join(gettempdir(), 'serving')
|
||||
|
||||
|
||||
def create_estimator(params, MWF):
|
||||
|
||||
@@ -3,12 +3,16 @@
|
||||
|
||||
""" Centralized logging facilities for Spleeter. """
|
||||
|
||||
import logging
|
||||
|
||||
from os import environ
|
||||
|
||||
__email__ = 'research@deezer.com'
|
||||
__author__ = 'Deezer Research'
|
||||
__license__ = 'MIT License'
|
||||
|
||||
_FORMAT = '%(levelname)s:%(name)s:%(message)s'
|
||||
|
||||
|
||||
class _LoggerHolder(object):
|
||||
""" Logger singleton instance holder. """
|
||||
@@ -16,30 +20,42 @@ class _LoggerHolder(object):
|
||||
INSTANCE = None
|
||||
|
||||
|
||||
def get_tensorflow_logger():
|
||||
"""
|
||||
"""
|
||||
# pylint: disable=import-error
|
||||
from tensorflow.compat.v1 import logging
|
||||
# pylint: enable=import-error
|
||||
return logging
|
||||
|
||||
|
||||
def get_logger():
|
||||
""" Returns library scoped logger.
|
||||
|
||||
:returns: Library logger.
|
||||
"""
|
||||
if _LoggerHolder.INSTANCE is None:
|
||||
# pylint: disable=import-error
|
||||
from tensorflow.compat.v1 import logging
|
||||
# pylint: enable=import-error
|
||||
_LoggerHolder.INSTANCE = logging
|
||||
_LoggerHolder.INSTANCE.set_verbosity(_LoggerHolder.INSTANCE.ERROR)
|
||||
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
formatter = logging.Formatter(_FORMAT)
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(formatter)
|
||||
logger = logging.getLogger('spleeter')
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
_LoggerHolder.INSTANCE = logger
|
||||
return _LoggerHolder.INSTANCE
|
||||
|
||||
|
||||
def enable_logging():
|
||||
""" Enable INFO level logging. """
|
||||
def enable_tensorflow_logging():
|
||||
""" Enable tensorflow logging. """
|
||||
environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
|
||||
tf_logger = get_tensorflow_logger()
|
||||
tf_logger.set_verbosity(tf_logger.INFO)
|
||||
logger = get_logger()
|
||||
logger.set_verbosity(logger.INFO)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
def enable_verbose_logging():
|
||||
""" Enable DEBUG level logging. """
|
||||
environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
|
||||
logger = get_logger()
|
||||
logger.set_verbosity(logger.DEBUG)
|
||||
def enable_logging():
|
||||
""" Configure default logging. """
|
||||
environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
tf_logger = get_tensorflow_logger()
|
||||
tf_logger.set_verbosity(tf_logger.ERROR)
|
||||
|
||||
Reference in New Issue
Block a user