refactor: use separator

feat: add bitrate opt feat: add offset opt
2026-01-06 04:32:43 +00:00 · 2019-11-14 13:03:55 -05:00
parent e912ae0fce
commit 4a743cd618
4 changed files with 71 additions and 158 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -75,7 +75,27 @@ jobs:
          key: sdist-{{ .Branch }}-{{ checksum "setup.py" }}
      - run:
          name: upload to PyPi
+          # TODO: Infer destination regarding of branch.
+          #       - master => production PyPi
+          #       - development => testing PyPi
          command: pip install twine && twine upload dist/*
+
+  # =======================================================================================
+  # Conda distribution.
+  # =======================================================================================
+  conda-deploy:
+    docker:
+      - image: null # TODO: use circle ci python image.
+    steps:
+      - run:
+          name: checkout feedstock
+          command: git clone https://github.com/conda-forge/spleeter-feedstock
+      - run:
+          name: update version
+          command: sed -i 's///g'
+      - run:
+          name: deploy
+          command: git add recipe && git commit -m "" && git push
 workflows:
  version: 2
  spleeter-workflow:
@@ -91,6 +111,5 @@ workflows:
            branches:
              only:
                - master
-                - development
          requires:
            - sdist
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ __license__ = 'MIT License'

 # Default project values.
 project_name = 'spleeter'
-project_version = '1.4.3'
+project_version = '1.4.4'
 device_target = 'cpu'
 tensorflow_dependency = 'tensorflow'
 tensorflow_version = '1.14.0'
--- a/spleeter/commands/init.py
+++ b/spleeter/commands/init.py
@@ -13,31 +13,31 @@ __email__ = 'research@deezer.com'
 __author__ = 'Deezer Research'
 __license__ = 'MIT License'

-# -i opt specification.
+# -i opt specification (separate).
 OPT_INPUT = {
-    'dest': 'audio_filenames',
+    'dest': 'inputs',
    'nargs': '+',
    'help': 'List of input audio filenames',
    'required': True
 }

-# -o opt specification.
+# -o opt specification (evaluate and separate).
 OPT_OUTPUT = {
    'dest': 'output_path',
    'default': join(gettempdir(), 'separated_audio'),
    'help': 'Path of the output directory to write audio files in'
 }

-# -p opt specification.
+# -p opt specification (train, evaluate and separate).
 OPT_PARAMS = {
-    'dest': 'params_filename',
+    'dest': 'configuration',
    'default': 'spleeter:2stems',
    'type': str,
    'action': 'store',
    'help': 'JSON filename that contains params'
 }

-# -n opt specification.
+# -n opt specification (separate).
 OPT_OUTPUT_NAMING = {
    'dest': 'output_naming',
    'default': 'filename',
@@ -54,26 +54,42 @@ OPT_OUTPUT_NAMING = {
        ', <output_path>/audio/<instument2>.wav)')
 }

+# -s opt specification (separate).
+OPT_OFFSET = {
+    'dest': 'offset',
+    'type': float,
+    'default': 0.,
+    'help': 'Set the starting offset to separate audio from.'
+}
+
 # -d opt specification (separate).
 OPT_DURATION = {
-    'dest': 'max_duration',
+    'dest': 'duration',
    'type': float,
    'default': 600.,
    'help': (
        'Set a maximum duration for processing audio '
-        '(only separate max_duration first seconds of '
+        '(only separate offset + duration first seconds of '
        'the input file)')
 }

-# -c opt specification.
+# -c opt specification (separate).
 OPT_CODEC = {
-    'dest': 'audio_codec',
+    'dest': 'codec',
    'choices': ('wav', 'mp3', 'ogg', 'm4a', 'wma', 'flac'),
    'default': 'wav',
    'help': 'Audio codec to be used for the separated output'
 }

-# -m opt specification.
+# -b opt specification (separate).
+OPT_BITRATE = {
+    'dest': 'bitrate',
+    'type': int,
+    'default': '128k',
+    'help': 'Audio bitrate to be used for the separated output'
+}
+
+# -m opt specification (evaluate and separate).
 OPT_MWF = {
    'dest': 'MWF',
    'action': 'store_const',
@@ -82,7 +98,7 @@ OPT_MWF = {
    'help': 'Whether to use multichannel Wiener filtering for separation',
 }

-# --mus_dir opt specification.
+# --mus_dir opt specification (evaluate).
 OPT_MUSDB = {
    'dest': 'mus_dir',
    'type': str,
@@ -98,14 +114,14 @@ OPT_DATA = {
    'help': 'Path of the folder containing audio data for training'
 }

-# -a opt specification.
+# -a opt specification (train, evaluate and separate).
 OPT_ADAPTER = {
    'dest': 'audio_adapter',
    'type': str,
    'help': 'Name of the audio adapter to use for audio I/O'
 }

-# -a opt specification.
+# -a opt specification (train, evaluate and separate).
 OPT_VERBOSE = {
    'action': 'store_true',
    'help': 'Shows verbose logs'
@@ -158,11 +174,13 @@ def _create_separate_parser(parser_factory):
    """
    parser = parser_factory('separate', help='Separate audio files')
    _add_common_options(parser)
-    parser.add_argument('-i', '--audio_filenames', **OPT_INPUT)
+    parser.add_argument('-i', '--inputs', **OPT_INPUT)
    parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
    parser.add_argument('-n', '--output_naming', **OPT_OUTPUT_NAMING)
-    parser.add_argument('-d', '--max_duration', **OPT_DURATION)
-    parser.add_argument('-c', '--audio_codec', **OPT_CODEC)
+    parser.add_
+    parser.add_argument('-d', '--duration', **OPT_DURATION)
+    parser.add_argument('-c', '--codec', **OPT_CODEC)
+    parser.add_argument('-b', '--birate', **OPT_BITRATE)
    parser.add_argument('-m', '--mwf', **OPT_MWF)
    return parser

--- a/spleeter/commands/separate.py
+++ b/spleeter/commands/separate.py
@@ -22,6 +22,7 @@ import numpy as np

 from ..audio.adapter import get_audio_adapter
 from ..audio.convertor import to_n_channels
+from ..separator import Separator
 from ..utils.estimator import create_estimator
 from ..utils.tensor import set_tensor_shape

@@ -30,149 +31,24 @@ __author__ = 'Deezer Research'
 __license__ = 'MIT License'


-def get_dataset(audio_adapter, filenames_and_crops, sample_rate, n_channels):
-    """"
-        Build a tensorflow dataset of waveform from a filename list wit crop
-        information.
-
-        Params:
-        - audio_adapter:        An AudioAdapter instance to load audio from.
-        - filenames_and_crops:  list of (audio_filename, start, duration)
-                                tuples separation is performed on each filaneme
-                                from start (in seconds) to start + duration
-                                (in seconds).
-        - sample_rate:          audio sample_rate of the input and output audio
-                                signals
-        - n_channels:           int, number of channels of the input and output
-                                audio signals
-
-        Returns
-        A tensorflow dataset of waveform to feed a tensorflow estimator in
-        predict mode.
-    """
-    filenames, starts, ends = list(zip(*filenames_and_crops))
-    dataset = tf.data.Dataset.from_tensor_slices({
-        'audio_id': list(filenames),
-        'start': list(starts),
-        'end': list(ends)
-    })
-    # Load waveform.
-    dataset = dataset.map(
-        lambda sample: dict(
-            sample,
-            **audio_adapter.load_tf_waveform(
-                sample['audio_id'],
-                sample_rate=sample_rate,
-                offset=sample['start'],
-                duration=sample['end'] - sample['start'])),
-        num_parallel_calls=2)
-    # Filter out error.
-    dataset = dataset.filter(
-        lambda sample: tf.logical_not(sample['waveform_error']))
-    # Convert waveform to the right number of channels.
-    dataset = dataset.map(
-        lambda sample: dict(
-            sample,
-            waveform=to_n_channels(sample['waveform'], n_channels)))
-    # Set number of channels (required for the model).
-    dataset = dataset.map(
-        lambda sample: dict(
-            sample,
-            waveform=set_tensor_shape(sample['waveform'], (None, n_channels))))
-    return dataset
-
-
-def process_audio(
-        audio_adapter,
-        filenames_and_crops, estimator, output_path,
-        sample_rate, n_channels, codec, output_naming):
-    """
-        Perform separation on a list of audio ids.
-
-        Params:
-        - audio_adapter:        Audio adapter to use for audio I/O.
-        - filenames_and_crops:  list of (audio_filename, start, duration)
-                                tuples separation is performed on each filaneme
-                                from start (in seconds) to start + duration
-                                (in seconds).
-        - estimator:            the tensorflow estimator that performs the
-                                source separation.
-        - output_path:          output_path where to export separated files.
-        - sample_rate:          audio sample_rate of the input and output audio
-                                signals
-        - n_channels:           int, number of channels of the input and output
-                                audio signals
-        - codec:                string codec to be used for export (could be
-                                "wav", "mp3", "ogg", "m4a") could be anything
-                                supported by ffmpeg.
-        - output_naming: string (= "filename" of "directory")
-            naming convention for output.
-            for an input file /path/to/audio/input_file.wav:
-                * if output_naming is equal to "filename":
-        output files will be put in the directory <output_path>/input_file
-        (<output_path>/input_file/<instrument1>.<codec>,
-         <output_path>/input_file/<instrument2>.<codec>...).
-                * if output_naming is equal to "directory":
-        output files will be put in the directory <output_path>/audio/
-        (<output_path>/audio/<instrument1>.<codec>,
-         <output_path>/audio/<instrument2>.<codec>...)
-        Use "directory" when separating the MusDB dataset.
-
-    """
-    # Get estimator
-    prediction = estimator.predict(
-        lambda: get_dataset(
-            audio_adapter,
-            filenames_and_crops,
-            sample_rate,
-            n_channels),
-        yield_single_examples=False)
-    # initialize pool for audio export
-    pool = Pool(16)
-    for sample in prediction:
-        sample_filename = sample.pop('audio_id', 'unknown_filename').decode()
-        input_directory, input_filename = split(sample_filename)
-        if output_naming == 'directory':
-            output_dirname = split(input_directory)[1]
-        elif output_naming == 'filename':
-            output_dirname = splitext(input_filename)[0]
-        else:
-            raise ValueError(f'Unknown output naming {output_naming}')
-        for instrument, waveform in sample.items():
-            filename = join(
-                output_path,
-                output_dirname,
-                f'{instrument}.{codec}')
-            pool.apply_async(
-                audio_adapter.save,
-                (filename, waveform, sample_rate, codec))
-    # Wait for everything to be written
-    pool.close()
-    pool.join()
-
-
 def entrypoint(arguments, params):
    """ Command entrypoint.

    :param arguments: Command line parsed argument as argparse.Namespace.
    :param params: Deserialized JSON configuration file provided in CLI args.
    """
+    # TODO: check with output naming.
    audio_adapter = get_audio_adapter(arguments.audio_adapter)
-    filenames = arguments.audio_filenames
-    output_path = arguments.output_path
-    max_duration = arguments.max_duration
-    audio_codec = arguments.audio_codec
-    output_naming = arguments.output_naming
-    estimator = create_estimator(params, arguments.MWF)
-    filenames_and_crops = [
-        (filename, 0., max_duration)
-        for filename in filenames]
-    process_audio(
-        audio_adapter,
-        filenames_and_crops,
-        estimator,
-        output_path,
-        params['sample_rate'],
-        params['n_channels'],
-        codec=audio_codec,
-        output_naming=output_naming)
+    separator = Separator(arguments.configuration, arguments.MWF)
+    for filename in arguments.audio_filenames:
+        separator.separate_to_file(
+            filename,
+            arguments.output_path,
+            audio_adapter=audio_adapter,
+            offset=arguments.offset,
+            duration=arguments.max_duration,
+            codec=arguments.codec,
+            bitrate=arguments.bitrate,
+            synchronous=False
+        )
+    separator.join()