mirror of
https://github.com/YuzuZensai/spleeter.git
synced 2026-01-31 14:58:23 +00:00
refactor: use separator
feat: add bitrate opt feat: add offset opt
This commit is contained in:
@@ -75,7 +75,27 @@ jobs:
|
|||||||
key: sdist-{{ .Branch }}-{{ checksum "setup.py" }}
|
key: sdist-{{ .Branch }}-{{ checksum "setup.py" }}
|
||||||
- run:
|
- run:
|
||||||
name: upload to PyPi
|
name: upload to PyPi
|
||||||
|
# TODO: Infer destination regarding of branch.
|
||||||
|
# - master => production PyPi
|
||||||
|
# - development => testing PyPi
|
||||||
command: pip install twine && twine upload dist/*
|
command: pip install twine && twine upload dist/*
|
||||||
|
|
||||||
|
# =======================================================================================
|
||||||
|
# Conda distribution.
|
||||||
|
# =======================================================================================
|
||||||
|
conda-deploy:
|
||||||
|
docker:
|
||||||
|
- image: null # TODO: use circle ci python image.
|
||||||
|
steps:
|
||||||
|
- run:
|
||||||
|
name: checkout feedstock
|
||||||
|
command: git clone https://github.com/conda-forge/spleeter-feedstock
|
||||||
|
- run:
|
||||||
|
name: update version
|
||||||
|
command: sed -i 's///g'
|
||||||
|
- run:
|
||||||
|
name: deploy
|
||||||
|
command: git add recipe && git commit -m "" && git push
|
||||||
workflows:
|
workflows:
|
||||||
version: 2
|
version: 2
|
||||||
spleeter-workflow:
|
spleeter-workflow:
|
||||||
@@ -91,6 +111,5 @@ workflows:
|
|||||||
branches:
|
branches:
|
||||||
only:
|
only:
|
||||||
- master
|
- master
|
||||||
- development
|
|
||||||
requires:
|
requires:
|
||||||
- sdist
|
- sdist
|
||||||
2
setup.py
2
setup.py
@@ -14,7 +14,7 @@ __license__ = 'MIT License'
|
|||||||
|
|
||||||
# Default project values.
|
# Default project values.
|
||||||
project_name = 'spleeter'
|
project_name = 'spleeter'
|
||||||
project_version = '1.4.3'
|
project_version = '1.4.4'
|
||||||
device_target = 'cpu'
|
device_target = 'cpu'
|
||||||
tensorflow_dependency = 'tensorflow'
|
tensorflow_dependency = 'tensorflow'
|
||||||
tensorflow_version = '1.14.0'
|
tensorflow_version = '1.14.0'
|
||||||
|
|||||||
@@ -13,31 +13,31 @@ __email__ = 'research@deezer.com'
|
|||||||
__author__ = 'Deezer Research'
|
__author__ = 'Deezer Research'
|
||||||
__license__ = 'MIT License'
|
__license__ = 'MIT License'
|
||||||
|
|
||||||
# -i opt specification.
|
# -i opt specification (separate).
|
||||||
OPT_INPUT = {
|
OPT_INPUT = {
|
||||||
'dest': 'audio_filenames',
|
'dest': 'inputs',
|
||||||
'nargs': '+',
|
'nargs': '+',
|
||||||
'help': 'List of input audio filenames',
|
'help': 'List of input audio filenames',
|
||||||
'required': True
|
'required': True
|
||||||
}
|
}
|
||||||
|
|
||||||
# -o opt specification.
|
# -o opt specification (evaluate and separate).
|
||||||
OPT_OUTPUT = {
|
OPT_OUTPUT = {
|
||||||
'dest': 'output_path',
|
'dest': 'output_path',
|
||||||
'default': join(gettempdir(), 'separated_audio'),
|
'default': join(gettempdir(), 'separated_audio'),
|
||||||
'help': 'Path of the output directory to write audio files in'
|
'help': 'Path of the output directory to write audio files in'
|
||||||
}
|
}
|
||||||
|
|
||||||
# -p opt specification.
|
# -p opt specification (train, evaluate and separate).
|
||||||
OPT_PARAMS = {
|
OPT_PARAMS = {
|
||||||
'dest': 'params_filename',
|
'dest': 'configuration',
|
||||||
'default': 'spleeter:2stems',
|
'default': 'spleeter:2stems',
|
||||||
'type': str,
|
'type': str,
|
||||||
'action': 'store',
|
'action': 'store',
|
||||||
'help': 'JSON filename that contains params'
|
'help': 'JSON filename that contains params'
|
||||||
}
|
}
|
||||||
|
|
||||||
# -n opt specification.
|
# -n opt specification (separate).
|
||||||
OPT_OUTPUT_NAMING = {
|
OPT_OUTPUT_NAMING = {
|
||||||
'dest': 'output_naming',
|
'dest': 'output_naming',
|
||||||
'default': 'filename',
|
'default': 'filename',
|
||||||
@@ -54,26 +54,42 @@ OPT_OUTPUT_NAMING = {
|
|||||||
', <output_path>/audio/<instument2>.wav)')
|
', <output_path>/audio/<instument2>.wav)')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# -s opt specification (separate).
|
||||||
|
OPT_OFFSET = {
|
||||||
|
'dest': 'offset',
|
||||||
|
'type': float,
|
||||||
|
'default': 0.,
|
||||||
|
'help': 'Set the starting offset to separate audio from.'
|
||||||
|
}
|
||||||
|
|
||||||
# -d opt specification (separate).
|
# -d opt specification (separate).
|
||||||
OPT_DURATION = {
|
OPT_DURATION = {
|
||||||
'dest': 'max_duration',
|
'dest': 'duration',
|
||||||
'type': float,
|
'type': float,
|
||||||
'default': 600.,
|
'default': 600.,
|
||||||
'help': (
|
'help': (
|
||||||
'Set a maximum duration for processing audio '
|
'Set a maximum duration for processing audio '
|
||||||
'(only separate max_duration first seconds of '
|
'(only separate offset + duration first seconds of '
|
||||||
'the input file)')
|
'the input file)')
|
||||||
}
|
}
|
||||||
|
|
||||||
# -c opt specification.
|
# -c opt specification (separate).
|
||||||
OPT_CODEC = {
|
OPT_CODEC = {
|
||||||
'dest': 'audio_codec',
|
'dest': 'codec',
|
||||||
'choices': ('wav', 'mp3', 'ogg', 'm4a', 'wma', 'flac'),
|
'choices': ('wav', 'mp3', 'ogg', 'm4a', 'wma', 'flac'),
|
||||||
'default': 'wav',
|
'default': 'wav',
|
||||||
'help': 'Audio codec to be used for the separated output'
|
'help': 'Audio codec to be used for the separated output'
|
||||||
}
|
}
|
||||||
|
|
||||||
# -m opt specification.
|
# -b opt specification (separate).
|
||||||
|
OPT_BITRATE = {
|
||||||
|
'dest': 'bitrate',
|
||||||
|
'type': int,
|
||||||
|
'default': '128k',
|
||||||
|
'help': 'Audio bitrate to be used for the separated output'
|
||||||
|
}
|
||||||
|
|
||||||
|
# -m opt specification (evaluate and separate).
|
||||||
OPT_MWF = {
|
OPT_MWF = {
|
||||||
'dest': 'MWF',
|
'dest': 'MWF',
|
||||||
'action': 'store_const',
|
'action': 'store_const',
|
||||||
@@ -82,7 +98,7 @@ OPT_MWF = {
|
|||||||
'help': 'Whether to use multichannel Wiener filtering for separation',
|
'help': 'Whether to use multichannel Wiener filtering for separation',
|
||||||
}
|
}
|
||||||
|
|
||||||
# --mus_dir opt specification.
|
# --mus_dir opt specification (evaluate).
|
||||||
OPT_MUSDB = {
|
OPT_MUSDB = {
|
||||||
'dest': 'mus_dir',
|
'dest': 'mus_dir',
|
||||||
'type': str,
|
'type': str,
|
||||||
@@ -98,14 +114,14 @@ OPT_DATA = {
|
|||||||
'help': 'Path of the folder containing audio data for training'
|
'help': 'Path of the folder containing audio data for training'
|
||||||
}
|
}
|
||||||
|
|
||||||
# -a opt specification.
|
# -a opt specification (train, evaluate and separate).
|
||||||
OPT_ADAPTER = {
|
OPT_ADAPTER = {
|
||||||
'dest': 'audio_adapter',
|
'dest': 'audio_adapter',
|
||||||
'type': str,
|
'type': str,
|
||||||
'help': 'Name of the audio adapter to use for audio I/O'
|
'help': 'Name of the audio adapter to use for audio I/O'
|
||||||
}
|
}
|
||||||
|
|
||||||
# -a opt specification.
|
# -a opt specification (train, evaluate and separate).
|
||||||
OPT_VERBOSE = {
|
OPT_VERBOSE = {
|
||||||
'action': 'store_true',
|
'action': 'store_true',
|
||||||
'help': 'Shows verbose logs'
|
'help': 'Shows verbose logs'
|
||||||
@@ -158,11 +174,13 @@ def _create_separate_parser(parser_factory):
|
|||||||
"""
|
"""
|
||||||
parser = parser_factory('separate', help='Separate audio files')
|
parser = parser_factory('separate', help='Separate audio files')
|
||||||
_add_common_options(parser)
|
_add_common_options(parser)
|
||||||
parser.add_argument('-i', '--audio_filenames', **OPT_INPUT)
|
parser.add_argument('-i', '--inputs', **OPT_INPUT)
|
||||||
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
|
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
|
||||||
parser.add_argument('-n', '--output_naming', **OPT_OUTPUT_NAMING)
|
parser.add_argument('-n', '--output_naming', **OPT_OUTPUT_NAMING)
|
||||||
parser.add_argument('-d', '--max_duration', **OPT_DURATION)
|
parser.add_
|
||||||
parser.add_argument('-c', '--audio_codec', **OPT_CODEC)
|
parser.add_argument('-d', '--duration', **OPT_DURATION)
|
||||||
|
parser.add_argument('-c', '--codec', **OPT_CODEC)
|
||||||
|
parser.add_argument('-b', '--birate', **OPT_BITRATE)
|
||||||
parser.add_argument('-m', '--mwf', **OPT_MWF)
|
parser.add_argument('-m', '--mwf', **OPT_MWF)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import numpy as np
|
|||||||
|
|
||||||
from ..audio.adapter import get_audio_adapter
|
from ..audio.adapter import get_audio_adapter
|
||||||
from ..audio.convertor import to_n_channels
|
from ..audio.convertor import to_n_channels
|
||||||
|
from ..separator import Separator
|
||||||
from ..utils.estimator import create_estimator
|
from ..utils.estimator import create_estimator
|
||||||
from ..utils.tensor import set_tensor_shape
|
from ..utils.tensor import set_tensor_shape
|
||||||
|
|
||||||
@@ -30,149 +31,24 @@ __author__ = 'Deezer Research'
|
|||||||
__license__ = 'MIT License'
|
__license__ = 'MIT License'
|
||||||
|
|
||||||
|
|
||||||
def get_dataset(audio_adapter, filenames_and_crops, sample_rate, n_channels):
|
|
||||||
""""
|
|
||||||
Build a tensorflow dataset of waveform from a filename list wit crop
|
|
||||||
information.
|
|
||||||
|
|
||||||
Params:
|
|
||||||
- audio_adapter: An AudioAdapter instance to load audio from.
|
|
||||||
- filenames_and_crops: list of (audio_filename, start, duration)
|
|
||||||
tuples separation is performed on each filaneme
|
|
||||||
from start (in seconds) to start + duration
|
|
||||||
(in seconds).
|
|
||||||
- sample_rate: audio sample_rate of the input and output audio
|
|
||||||
signals
|
|
||||||
- n_channels: int, number of channels of the input and output
|
|
||||||
audio signals
|
|
||||||
|
|
||||||
Returns
|
|
||||||
A tensorflow dataset of waveform to feed a tensorflow estimator in
|
|
||||||
predict mode.
|
|
||||||
"""
|
|
||||||
filenames, starts, ends = list(zip(*filenames_and_crops))
|
|
||||||
dataset = tf.data.Dataset.from_tensor_slices({
|
|
||||||
'audio_id': list(filenames),
|
|
||||||
'start': list(starts),
|
|
||||||
'end': list(ends)
|
|
||||||
})
|
|
||||||
# Load waveform.
|
|
||||||
dataset = dataset.map(
|
|
||||||
lambda sample: dict(
|
|
||||||
sample,
|
|
||||||
**audio_adapter.load_tf_waveform(
|
|
||||||
sample['audio_id'],
|
|
||||||
sample_rate=sample_rate,
|
|
||||||
offset=sample['start'],
|
|
||||||
duration=sample['end'] - sample['start'])),
|
|
||||||
num_parallel_calls=2)
|
|
||||||
# Filter out error.
|
|
||||||
dataset = dataset.filter(
|
|
||||||
lambda sample: tf.logical_not(sample['waveform_error']))
|
|
||||||
# Convert waveform to the right number of channels.
|
|
||||||
dataset = dataset.map(
|
|
||||||
lambda sample: dict(
|
|
||||||
sample,
|
|
||||||
waveform=to_n_channels(sample['waveform'], n_channels)))
|
|
||||||
# Set number of channels (required for the model).
|
|
||||||
dataset = dataset.map(
|
|
||||||
lambda sample: dict(
|
|
||||||
sample,
|
|
||||||
waveform=set_tensor_shape(sample['waveform'], (None, n_channels))))
|
|
||||||
return dataset
|
|
||||||
|
|
||||||
|
|
||||||
def process_audio(
|
|
||||||
audio_adapter,
|
|
||||||
filenames_and_crops, estimator, output_path,
|
|
||||||
sample_rate, n_channels, codec, output_naming):
|
|
||||||
"""
|
|
||||||
Perform separation on a list of audio ids.
|
|
||||||
|
|
||||||
Params:
|
|
||||||
- audio_adapter: Audio adapter to use for audio I/O.
|
|
||||||
- filenames_and_crops: list of (audio_filename, start, duration)
|
|
||||||
tuples separation is performed on each filaneme
|
|
||||||
from start (in seconds) to start + duration
|
|
||||||
(in seconds).
|
|
||||||
- estimator: the tensorflow estimator that performs the
|
|
||||||
source separation.
|
|
||||||
- output_path: output_path where to export separated files.
|
|
||||||
- sample_rate: audio sample_rate of the input and output audio
|
|
||||||
signals
|
|
||||||
- n_channels: int, number of channels of the input and output
|
|
||||||
audio signals
|
|
||||||
- codec: string codec to be used for export (could be
|
|
||||||
"wav", "mp3", "ogg", "m4a") could be anything
|
|
||||||
supported by ffmpeg.
|
|
||||||
- output_naming: string (= "filename" of "directory")
|
|
||||||
naming convention for output.
|
|
||||||
for an input file /path/to/audio/input_file.wav:
|
|
||||||
* if output_naming is equal to "filename":
|
|
||||||
output files will be put in the directory <output_path>/input_file
|
|
||||||
(<output_path>/input_file/<instrument1>.<codec>,
|
|
||||||
<output_path>/input_file/<instrument2>.<codec>...).
|
|
||||||
* if output_naming is equal to "directory":
|
|
||||||
output files will be put in the directory <output_path>/audio/
|
|
||||||
(<output_path>/audio/<instrument1>.<codec>,
|
|
||||||
<output_path>/audio/<instrument2>.<codec>...)
|
|
||||||
Use "directory" when separating the MusDB dataset.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# Get estimator
|
|
||||||
prediction = estimator.predict(
|
|
||||||
lambda: get_dataset(
|
|
||||||
audio_adapter,
|
|
||||||
filenames_and_crops,
|
|
||||||
sample_rate,
|
|
||||||
n_channels),
|
|
||||||
yield_single_examples=False)
|
|
||||||
# initialize pool for audio export
|
|
||||||
pool = Pool(16)
|
|
||||||
for sample in prediction:
|
|
||||||
sample_filename = sample.pop('audio_id', 'unknown_filename').decode()
|
|
||||||
input_directory, input_filename = split(sample_filename)
|
|
||||||
if output_naming == 'directory':
|
|
||||||
output_dirname = split(input_directory)[1]
|
|
||||||
elif output_naming == 'filename':
|
|
||||||
output_dirname = splitext(input_filename)[0]
|
|
||||||
else:
|
|
||||||
raise ValueError(f'Unknown output naming {output_naming}')
|
|
||||||
for instrument, waveform in sample.items():
|
|
||||||
filename = join(
|
|
||||||
output_path,
|
|
||||||
output_dirname,
|
|
||||||
f'{instrument}.{codec}')
|
|
||||||
pool.apply_async(
|
|
||||||
audio_adapter.save,
|
|
||||||
(filename, waveform, sample_rate, codec))
|
|
||||||
# Wait for everything to be written
|
|
||||||
pool.close()
|
|
||||||
pool.join()
|
|
||||||
|
|
||||||
|
|
||||||
def entrypoint(arguments, params):
|
def entrypoint(arguments, params):
|
||||||
""" Command entrypoint.
|
""" Command entrypoint.
|
||||||
|
|
||||||
:param arguments: Command line parsed argument as argparse.Namespace.
|
:param arguments: Command line parsed argument as argparse.Namespace.
|
||||||
:param params: Deserialized JSON configuration file provided in CLI args.
|
:param params: Deserialized JSON configuration file provided in CLI args.
|
||||||
"""
|
"""
|
||||||
|
# TODO: check with output naming.
|
||||||
audio_adapter = get_audio_adapter(arguments.audio_adapter)
|
audio_adapter = get_audio_adapter(arguments.audio_adapter)
|
||||||
filenames = arguments.audio_filenames
|
separator = Separator(arguments.configuration, arguments.MWF)
|
||||||
output_path = arguments.output_path
|
for filename in arguments.audio_filenames:
|
||||||
max_duration = arguments.max_duration
|
separator.separate_to_file(
|
||||||
audio_codec = arguments.audio_codec
|
filename,
|
||||||
output_naming = arguments.output_naming
|
arguments.output_path,
|
||||||
estimator = create_estimator(params, arguments.MWF)
|
audio_adapter=audio_adapter,
|
||||||
filenames_and_crops = [
|
offset=arguments.offset,
|
||||||
(filename, 0., max_duration)
|
duration=arguments.max_duration,
|
||||||
for filename in filenames]
|
codec=arguments.codec,
|
||||||
process_audio(
|
bitrate=arguments.bitrate,
|
||||||
audio_adapter,
|
synchronous=False
|
||||||
filenames_and_crops,
|
)
|
||||||
estimator,
|
separator.join()
|
||||||
output_path,
|
|
||||||
params['sample_rate'],
|
|
||||||
params['n_channels'],
|
|
||||||
codec=audio_codec,
|
|
||||||
output_naming=output_naming)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user