refactor: use separator

feat: add bitrate opt
feat: add offset opt
This commit is contained in:
Félix Voituret
2019-11-14 13:03:55 -05:00
parent e912ae0fce
commit 4a743cd618
4 changed files with 71 additions and 158 deletions

View File

@@ -75,7 +75,27 @@ jobs:
key: sdist-{{ .Branch }}-{{ checksum "setup.py" }} key: sdist-{{ .Branch }}-{{ checksum "setup.py" }}
- run: - run:
name: upload to PyPi name: upload to PyPi
# TODO: Infer destination regarding of branch.
# - master => production PyPi
# - development => testing PyPi
command: pip install twine && twine upload dist/* command: pip install twine && twine upload dist/*
# =======================================================================================
# Conda distribution.
# =======================================================================================
conda-deploy:
docker:
- image: null # TODO: use circle ci python image.
steps:
- run:
name: checkout feedstock
command: git clone https://github.com/conda-forge/spleeter-feedstock
- run:
name: update version
command: sed -i 's///g'
- run:
name: deploy
command: git add recipe && git commit -m "" && git push
workflows: workflows:
version: 2 version: 2
spleeter-workflow: spleeter-workflow:
@@ -91,6 +111,5 @@ workflows:
branches: branches:
only: only:
- master - master
- development
requires: requires:
- sdist - sdist

View File

@@ -14,7 +14,7 @@ __license__ = 'MIT License'
# Default project values. # Default project values.
project_name = 'spleeter' project_name = 'spleeter'
project_version = '1.4.3' project_version = '1.4.4'
device_target = 'cpu' device_target = 'cpu'
tensorflow_dependency = 'tensorflow' tensorflow_dependency = 'tensorflow'
tensorflow_version = '1.14.0' tensorflow_version = '1.14.0'

View File

@@ -13,31 +13,31 @@ __email__ = 'research@deezer.com'
__author__ = 'Deezer Research' __author__ = 'Deezer Research'
__license__ = 'MIT License' __license__ = 'MIT License'
# -i opt specification. # -i opt specification (separate).
OPT_INPUT = { OPT_INPUT = {
'dest': 'audio_filenames', 'dest': 'inputs',
'nargs': '+', 'nargs': '+',
'help': 'List of input audio filenames', 'help': 'List of input audio filenames',
'required': True 'required': True
} }
# -o opt specification. # -o opt specification (evaluate and separate).
OPT_OUTPUT = { OPT_OUTPUT = {
'dest': 'output_path', 'dest': 'output_path',
'default': join(gettempdir(), 'separated_audio'), 'default': join(gettempdir(), 'separated_audio'),
'help': 'Path of the output directory to write audio files in' 'help': 'Path of the output directory to write audio files in'
} }
# -p opt specification. # -p opt specification (train, evaluate and separate).
OPT_PARAMS = { OPT_PARAMS = {
'dest': 'params_filename', 'dest': 'configuration',
'default': 'spleeter:2stems', 'default': 'spleeter:2stems',
'type': str, 'type': str,
'action': 'store', 'action': 'store',
'help': 'JSON filename that contains params' 'help': 'JSON filename that contains params'
} }
# -n opt specification. # -n opt specification (separate).
OPT_OUTPUT_NAMING = { OPT_OUTPUT_NAMING = {
'dest': 'output_naming', 'dest': 'output_naming',
'default': 'filename', 'default': 'filename',
@@ -54,26 +54,42 @@ OPT_OUTPUT_NAMING = {
', <output_path>/audio/<instument2>.wav)') ', <output_path>/audio/<instument2>.wav)')
} }
# -s opt specification (separate).
OPT_OFFSET = {
'dest': 'offset',
'type': float,
'default': 0.,
'help': 'Set the starting offset to separate audio from.'
}
# -d opt specification (separate). # -d opt specification (separate).
OPT_DURATION = { OPT_DURATION = {
'dest': 'max_duration', 'dest': 'duration',
'type': float, 'type': float,
'default': 600., 'default': 600.,
'help': ( 'help': (
'Set a maximum duration for processing audio ' 'Set a maximum duration for processing audio '
'(only separate max_duration first seconds of ' '(only separate offset + duration first seconds of '
'the input file)') 'the input file)')
} }
# -c opt specification. # -c opt specification (separate).
OPT_CODEC = { OPT_CODEC = {
'dest': 'audio_codec', 'dest': 'codec',
'choices': ('wav', 'mp3', 'ogg', 'm4a', 'wma', 'flac'), 'choices': ('wav', 'mp3', 'ogg', 'm4a', 'wma', 'flac'),
'default': 'wav', 'default': 'wav',
'help': 'Audio codec to be used for the separated output' 'help': 'Audio codec to be used for the separated output'
} }
# -m opt specification. # -b opt specification (separate).
OPT_BITRATE = {
'dest': 'bitrate',
'type': int,
'default': '128k',
'help': 'Audio bitrate to be used for the separated output'
}
# -m opt specification (evaluate and separate).
OPT_MWF = { OPT_MWF = {
'dest': 'MWF', 'dest': 'MWF',
'action': 'store_const', 'action': 'store_const',
@@ -82,7 +98,7 @@ OPT_MWF = {
'help': 'Whether to use multichannel Wiener filtering for separation', 'help': 'Whether to use multichannel Wiener filtering for separation',
} }
# --mus_dir opt specification. # --mus_dir opt specification (evaluate).
OPT_MUSDB = { OPT_MUSDB = {
'dest': 'mus_dir', 'dest': 'mus_dir',
'type': str, 'type': str,
@@ -98,14 +114,14 @@ OPT_DATA = {
'help': 'Path of the folder containing audio data for training' 'help': 'Path of the folder containing audio data for training'
} }
# -a opt specification. # -a opt specification (train, evaluate and separate).
OPT_ADAPTER = { OPT_ADAPTER = {
'dest': 'audio_adapter', 'dest': 'audio_adapter',
'type': str, 'type': str,
'help': 'Name of the audio adapter to use for audio I/O' 'help': 'Name of the audio adapter to use for audio I/O'
} }
# -a opt specification. # -a opt specification (train, evaluate and separate).
OPT_VERBOSE = { OPT_VERBOSE = {
'action': 'store_true', 'action': 'store_true',
'help': 'Shows verbose logs' 'help': 'Shows verbose logs'
@@ -158,11 +174,13 @@ def _create_separate_parser(parser_factory):
""" """
parser = parser_factory('separate', help='Separate audio files') parser = parser_factory('separate', help='Separate audio files')
_add_common_options(parser) _add_common_options(parser)
parser.add_argument('-i', '--audio_filenames', **OPT_INPUT) parser.add_argument('-i', '--inputs', **OPT_INPUT)
parser.add_argument('-o', '--output_path', **OPT_OUTPUT) parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
parser.add_argument('-n', '--output_naming', **OPT_OUTPUT_NAMING) parser.add_argument('-n', '--output_naming', **OPT_OUTPUT_NAMING)
parser.add_argument('-d', '--max_duration', **OPT_DURATION) parser.add_
parser.add_argument('-c', '--audio_codec', **OPT_CODEC) parser.add_argument('-d', '--duration', **OPT_DURATION)
parser.add_argument('-c', '--codec', **OPT_CODEC)
parser.add_argument('-b', '--birate', **OPT_BITRATE)
parser.add_argument('-m', '--mwf', **OPT_MWF) parser.add_argument('-m', '--mwf', **OPT_MWF)
return parser return parser

View File

@@ -22,6 +22,7 @@ import numpy as np
from ..audio.adapter import get_audio_adapter from ..audio.adapter import get_audio_adapter
from ..audio.convertor import to_n_channels from ..audio.convertor import to_n_channels
from ..separator import Separator
from ..utils.estimator import create_estimator from ..utils.estimator import create_estimator
from ..utils.tensor import set_tensor_shape from ..utils.tensor import set_tensor_shape
@@ -30,149 +31,24 @@ __author__ = 'Deezer Research'
__license__ = 'MIT License' __license__ = 'MIT License'
def get_dataset(audio_adapter, filenames_and_crops, sample_rate, n_channels):
""""
Build a tensorflow dataset of waveform from a filename list wit crop
information.
Params:
- audio_adapter: An AudioAdapter instance to load audio from.
- filenames_and_crops: list of (audio_filename, start, duration)
tuples separation is performed on each filaneme
from start (in seconds) to start + duration
(in seconds).
- sample_rate: audio sample_rate of the input and output audio
signals
- n_channels: int, number of channels of the input and output
audio signals
Returns
A tensorflow dataset of waveform to feed a tensorflow estimator in
predict mode.
"""
filenames, starts, ends = list(zip(*filenames_and_crops))
dataset = tf.data.Dataset.from_tensor_slices({
'audio_id': list(filenames),
'start': list(starts),
'end': list(ends)
})
# Load waveform.
dataset = dataset.map(
lambda sample: dict(
sample,
**audio_adapter.load_tf_waveform(
sample['audio_id'],
sample_rate=sample_rate,
offset=sample['start'],
duration=sample['end'] - sample['start'])),
num_parallel_calls=2)
# Filter out error.
dataset = dataset.filter(
lambda sample: tf.logical_not(sample['waveform_error']))
# Convert waveform to the right number of channels.
dataset = dataset.map(
lambda sample: dict(
sample,
waveform=to_n_channels(sample['waveform'], n_channels)))
# Set number of channels (required for the model).
dataset = dataset.map(
lambda sample: dict(
sample,
waveform=set_tensor_shape(sample['waveform'], (None, n_channels))))
return dataset
def process_audio(
audio_adapter,
filenames_and_crops, estimator, output_path,
sample_rate, n_channels, codec, output_naming):
"""
Perform separation on a list of audio ids.
Params:
- audio_adapter: Audio adapter to use for audio I/O.
- filenames_and_crops: list of (audio_filename, start, duration)
tuples separation is performed on each filaneme
from start (in seconds) to start + duration
(in seconds).
- estimator: the tensorflow estimator that performs the
source separation.
- output_path: output_path where to export separated files.
- sample_rate: audio sample_rate of the input and output audio
signals
- n_channels: int, number of channels of the input and output
audio signals
- codec: string codec to be used for export (could be
"wav", "mp3", "ogg", "m4a") could be anything
supported by ffmpeg.
- output_naming: string (= "filename" of "directory")
naming convention for output.
for an input file /path/to/audio/input_file.wav:
* if output_naming is equal to "filename":
output files will be put in the directory <output_path>/input_file
(<output_path>/input_file/<instrument1>.<codec>,
<output_path>/input_file/<instrument2>.<codec>...).
* if output_naming is equal to "directory":
output files will be put in the directory <output_path>/audio/
(<output_path>/audio/<instrument1>.<codec>,
<output_path>/audio/<instrument2>.<codec>...)
Use "directory" when separating the MusDB dataset.
"""
# Get estimator
prediction = estimator.predict(
lambda: get_dataset(
audio_adapter,
filenames_and_crops,
sample_rate,
n_channels),
yield_single_examples=False)
# initialize pool for audio export
pool = Pool(16)
for sample in prediction:
sample_filename = sample.pop('audio_id', 'unknown_filename').decode()
input_directory, input_filename = split(sample_filename)
if output_naming == 'directory':
output_dirname = split(input_directory)[1]
elif output_naming == 'filename':
output_dirname = splitext(input_filename)[0]
else:
raise ValueError(f'Unknown output naming {output_naming}')
for instrument, waveform in sample.items():
filename = join(
output_path,
output_dirname,
f'{instrument}.{codec}')
pool.apply_async(
audio_adapter.save,
(filename, waveform, sample_rate, codec))
# Wait for everything to be written
pool.close()
pool.join()
def entrypoint(arguments, params): def entrypoint(arguments, params):
""" Command entrypoint. """ Command entrypoint.
:param arguments: Command line parsed argument as argparse.Namespace. :param arguments: Command line parsed argument as argparse.Namespace.
:param params: Deserialized JSON configuration file provided in CLI args. :param params: Deserialized JSON configuration file provided in CLI args.
""" """
# TODO: check with output naming.
audio_adapter = get_audio_adapter(arguments.audio_adapter) audio_adapter = get_audio_adapter(arguments.audio_adapter)
filenames = arguments.audio_filenames separator = Separator(arguments.configuration, arguments.MWF)
output_path = arguments.output_path for filename in arguments.audio_filenames:
max_duration = arguments.max_duration separator.separate_to_file(
audio_codec = arguments.audio_codec filename,
output_naming = arguments.output_naming arguments.output_path,
estimator = create_estimator(params, arguments.MWF) audio_adapter=audio_adapter,
filenames_and_crops = [ offset=arguments.offset,
(filename, 0., max_duration) duration=arguments.max_duration,
for filename in filenames] codec=arguments.codec,
process_audio( bitrate=arguments.bitrate,
audio_adapter, synchronous=False
filenames_and_crops, )
estimator, separator.join()
output_path,
params['sample_rate'],
params['n_channels'],
codec=audio_codec,
output_naming=output_naming)