refactor: ffmpeg adapter

2026-01-31 14:58:23 +00:00 · 2019-11-06 18:03:35 +01:00
parent 0a571c9fca
commit c9254625cd
1 changed files with 40 additions and 182 deletions
--- a/spleeter/utils/audio/ffmpeg.py
+++ b/spleeter/utils/audio/ffmpeg.py
@@ -9,12 +9,11 @@
 """
 import os
 import os.path
 import platform
 import re
 import subprocess
-import numpy as np  # pylint: disable=import-error
+# pylint: disable=import-error
 import ffmpeg
 import numpy as np  
 # pylint: enable=import-error
 from .adapter import AudioAdapter
 from ..logging import get_logger
@@ -23,58 +22,9 @@ __email__ = 'research@deezer.com'
 __author__ = 'Deezer Research'
 __license__ = 'MIT License'
 # Default FFMPEG binary name.
 _UNIX_BINARY = 'ffmpeg'
 _WINDOWS_BINARY = 'ffmpeg.exe'
 def _which(program):
    """ A pure python implementation of `which`command
    for retrieving absolute path from command name or path.
    @see https://stackoverflow.com/a/377028/1211342
    :param program: Program name or path to expend.
    :returns: Absolute path of program if any, None otherwise.
    """
    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
    fpath, _ = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ['PATH'].split(os.pathsep):
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file
    return None
 def _get_ffmpeg_path():
    """ Retrieves FFMPEG binary path using ENVVAR if defined
    or default binary name (Windows or UNIX style).
    :returns: Absolute path of FFMPEG binary.
    :raise IOError: If FFMPEG binary cannot be found.
    """
    ffmpeg_path = os.environ.get('FFMPEG_PATH', None)
    if ffmpeg_path is None:
        # Note: try to infer standard binary name regarding of platform.
        if platform.system() == 'Windows':
            ffmpeg_path = _WINDOWS_BINARY
        else:
            ffmpeg_path = _UNIX_BINARY
    expended = _which(ffmpeg_path)
    if expended is None:
        raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found')
    return expended
 def _to_ffmpeg_time(n):
    """ Format number of seconds to time expected by FFMPEG.
    :param n: Time in seconds to format.
    :returns: Formatted time in FFMPEG format.
    """
@@ -83,56 +33,6 @@ def _to_ffmpeg_time(n):
    return '%d:%02d:%09.6f' % (h, m, s)
 def _parse_ffmpg_results(stderr):
    """ Extract number of channels and sample rate from
    the given FFMPEG STDERR output line.
    :param stderr: STDERR output line to parse.
    :returns: Parsed n_channels and sample_rate values.
    """
    # Setup default value.
    n_channels = 0
    sample_rate = 0
    # Find samplerate
    match = re.search(r'(\d+) hz', stderr)
    if match:
        sample_rate = int(match.group(1))
    # Channel count.
    match = re.search(r'hz, ([^,]+),', stderr)
    if match:
        mode = match.group(1)
        if mode == 'stereo':
            n_channels = 2
        else:
            match = re.match(r'(\d+) ', mode)
            n_channels = match and int(match.group(1)) or 1
    return n_channels, sample_rate
 class _CommandBuilder(object):
    """ A simple builder pattern class for CLI string. """
    def __init__(self, binary):
        """ Default constructor. """
        self._command = [binary]
    def flag(self, flag):
        """ Add flag or unlabelled opt. """
        self._command.append(flag)
        return self
    def opt(self, short, value, formatter=str):
        """ Add option if value not None. """
        if value is not None:
            self._command.append(short)
            self._command.append(formatter(value))
        return self
    def command(self):
        """ Build string command. """
        return self._command
 class FFMPEGProcessAudioAdapter(AudioAdapter):
    """ An AudioAdapter implementation that use FFMPEG binary through
    subprocess in order to perform I/O operation for audio processing.
@@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
    FFMPEG_PATH environment variable.
    """
    def __init__(self):
        """ Default constructor. """
        self._ffmpeg_path = _get_ffmpeg_path()
    def _get_command_builder(self):
        """ Creates and returns a command builder using FFMPEG path.
        :returns: Built command builder.
        """
        return _CommandBuilder(self._ffmpeg_path)
    def load(
            self, path, offset=None, duration=None,
            sample_rate=None, dtype=np.float32):
@@ -166,46 +55,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
        :param dtype: (Optional) Numpy data type to use, default to float32.
        :returns: Loaded data a (waveform, sample_rate) tuple.
        """
-        if not isinstance(path, str):
+        probe = ffmpeg.probe(path)
-            path = path.decode()
+        if 'streams' not in probe or len(probe['streams']) == 0:
-        command = (
+            raise IOError('No stream was found with ffprobe')
-            self._get_command_builder()
+        metadata = next(
-            .opt('-ss', offset, formatter=_to_ffmpeg_time)
+            stream
-            .opt('-t', duration, formatter=_to_ffmpeg_time)
+            for stream in probe['stream']
-            .opt('-i', path)
+            if stream['codec_type'] == 'audio')
-            .opt('-ar', sample_rate)
+        n_channels = metadata['channels']
-            .opt('-f', 'f32le')
+        if sample_rate is None:
-            .flag('-')
+            sample_rate = metadata['sample_rate']
-            .command())
+        input_kwargs = {'format': 'f32le', 'ar': sample_rate}
-        process = subprocess.Popen(
+        if duration is not None:
-            command,
+            input_kwargs['t'] = _to_ffmpeg_time(duration)
-            stdout=subprocess.PIPE,
+        if offset is not None:
-            stderr=subprocess.PIPE)
+            input_kwargs['ss'] = _to_ffmpeg_time(offset)
-        buffer = process.stdout.read(-1)
+        buffer, _ = (
-        # Read STDERR until end of the process detected.
+            ffmpeg
-        while True:
+            .input(path, **input_kwargs)
-            status = process.stderr.readline()
+            .output('-', format='f32le')
-            if not status:
+            .overwrite_output()
-                raise OSError('Stream info not found')
+            .run(quiet=True))
            if isinstance(status, bytes):  # Note: Python 3 compatibility.
                status = status.decode('utf8', 'ignore')
            status = status.strip().lower()
            if 'no such file' in status:
                raise IOError(f'File {path} not found')
            elif 'invalid data found' in status:
                raise IOError(f'FFMPEG error : {status}')
            elif 'audio:' in status:
                n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status)
                if sample_rate is None:
                    sample_rate = ffmpeg_sample_rate
                break
        # Load waveform and clean process.
        waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
        if not waveform.dtype == np.dtype(dtype):
            waveform = waveform.astype(dtype)
        process.stdout.close()
        process.stderr.close()
        del process
        return (waveform, sample_rate)
    def save(
@@ -225,39 +98,24 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
        if not os.path.exists(directory):
            os.makedirs(directory)
        get_logger().debug('Writing file %s', path)
-        # NOTE: Tweak.
+        output_kwargs = {
-        if codec == 'wav':
+            'ar': sample_rate,
-            codec = None
+            'ac': data.shape[1],
-        command = (
+            'strict': '-2'}
-            self._get_command_builder()
+        if bitrate:
-            .flag('-y')
+            output_kwargs['audio_bitrate'] = bitrate
-            .opt('-loglevel', 'error')
+        if codec:
-            .opt('-f', 'f32le')
+            output_kwargs['codec'] = codec
-            .opt('-ar', sample_rate)
+        process = (
-            .opt('-ac', data.shape[1])
+            ffmpeg
-            .opt('-i', '-')
+            .input('pipe:', format='f32le')
-            .flag('-vn')
+            .output(path, format='f32le', **output_kwargs)
-            .opt('-acodec', codec)
+            .run_async(pipe_stdin=True))
            .opt('-ar', sample_rate)  # Note: why twice ?
            .opt('-strict', '-2')     # Note: For 'aac' codec support.
            .opt('-ab', bitrate)
            .flag(path)
            .command())
        process = subprocess.Popen(
            command,
            stdout=open(os.devnull, 'wb'),
            stdin=subprocess.PIPE,
            stderr=subprocess.PIPE)
        # Write data to STDIN.
        try:
            process.stdin.write(
                data.astype('<f4').tostring())
            process.stdin.close()
            process.wait()
        except IOError:
            raise IOError(f'FFMPEG error: {process.stderr.read()}')
        # Clean process.
        process.stdin.close()
        if process.stderr is not None:
            process.stderr.close()
        process.wait()
        del process
        get_logger().info('File %s written', path)