refactor: ffmpeg adapter

2026-01-30 12:22:58 +00:00 · 2019-11-06 18:03:35 +01:00
parent 0a571c9fca
commit c9254625cd
1 changed files with 40 additions and 182 deletions
--- a/spleeter/utils/audio/ffmpeg.py
+++ b/spleeter/utils/audio/ffmpeg.py
@@ -9,12 +9,11 @@
 """

 import os
-import os.path
-import platform
-import re
-import subprocess

-import numpy as np  # pylint: disable=import-error
+# pylint: disable=import-error
+import ffmpeg
+import numpy as np  
+# pylint: enable=import-error

 from .adapter import AudioAdapter
 from ..logging import get_logger
@@ -23,58 +22,9 @@ __email__ = 'research@deezer.com'
 __author__ = 'Deezer Research'
 __license__ = 'MIT License'

-# Default FFMPEG binary name.
-_UNIX_BINARY = 'ffmpeg'
-_WINDOWS_BINARY = 'ffmpeg.exe'
-
-
-def _which(program):
-    """ A pure python implementation of `which`command
-    for retrieving absolute path from command name or path.
-
-    @see https://stackoverflow.com/a/377028/1211342
-
-    :param program: Program name or path to expend.
-    :returns: Absolute path of program if any, None otherwise.
-    """
-    def is_exe(fpath):
-        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
-
-    fpath, _ = os.path.split(program)
-    if fpath:
-        if is_exe(program):
-            return program
-    else:
-        for path in os.environ['PATH'].split(os.pathsep):
-            exe_file = os.path.join(path, program)
-            if is_exe(exe_file):
-                return exe_file
-    return None
-
-
-def _get_ffmpeg_path():
-    """ Retrieves FFMPEG binary path using ENVVAR if defined
-    or default binary name (Windows or UNIX style).
-
-    :returns: Absolute path of FFMPEG binary.
-    :raise IOError: If FFMPEG binary cannot be found.
-    """
-    ffmpeg_path = os.environ.get('FFMPEG_PATH', None)
-    if ffmpeg_path is None:
-        # Note: try to infer standard binary name regarding of platform.
-        if platform.system() == 'Windows':
-            ffmpeg_path = _WINDOWS_BINARY
-        else:
-            ffmpeg_path = _UNIX_BINARY
-    expended = _which(ffmpeg_path)
-    if expended is None:
-        raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found')
-    return expended
-

 def _to_ffmpeg_time(n):
    """ Format number of seconds to time expected by FFMPEG.
-
    :param n: Time in seconds to format.
    :returns: Formatted time in FFMPEG format.
    """
@@ -83,56 +33,6 @@ def _to_ffmpeg_time(n):
    return '%d:%02d:%09.6f' % (h, m, s)


-def _parse_ffmpg_results(stderr):
-    """ Extract number of channels and sample rate from
-    the given FFMPEG STDERR output line.
-
-    :param stderr: STDERR output line to parse.
-    :returns: Parsed n_channels and sample_rate values.
-    """
-    # Setup default value.
-    n_channels = 0
-    sample_rate = 0
-    # Find samplerate
-    match = re.search(r'(\d+) hz', stderr)
-    if match:
-        sample_rate = int(match.group(1))
-    # Channel count.
-    match = re.search(r'hz, ([^,]+),', stderr)
-    if match:
-        mode = match.group(1)
-        if mode == 'stereo':
-            n_channels = 2
-        else:
-            match = re.match(r'(\d+) ', mode)
-            n_channels = match and int(match.group(1)) or 1
-    return n_channels, sample_rate
-
-
-class _CommandBuilder(object):
-    """ A simple builder pattern class for CLI string. """
-
-    def __init__(self, binary):
-        """ Default constructor. """
-        self._command = [binary]
-
-    def flag(self, flag):
-        """ Add flag or unlabelled opt. """
-        self._command.append(flag)
-        return self
-
-    def opt(self, short, value, formatter=str):
-        """ Add option if value not None. """
-        if value is not None:
-            self._command.append(short)
-            self._command.append(formatter(value))
-        return self
-
-    def command(self):
-        """ Build string command. """
-        return self._command
-
-
 class FFMPEGProcessAudioAdapter(AudioAdapter):
    """ An AudioAdapter implementation that use FFMPEG binary through
    subprocess in order to perform I/O operation for audio processing.
@@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
    FFMPEG_PATH environment variable.
    """

-    def __init__(self):
-        """ Default constructor. """
-        self._ffmpeg_path = _get_ffmpeg_path()
-
-    def _get_command_builder(self):
-        """ Creates and returns a command builder using FFMPEG path.
-
-        :returns: Built command builder.
-        """
-        return _CommandBuilder(self._ffmpeg_path)
-
    def load(
            self, path, offset=None, duration=None,
            sample_rate=None, dtype=np.float32):
@@ -166,46 +55,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
        :param dtype: (Optional) Numpy data type to use, default to float32.
        :returns: Loaded data a (waveform, sample_rate) tuple.
        """
-        if not isinstance(path, str):
-            path = path.decode()
-        command = (
-            self._get_command_builder()
-            .opt('-ss', offset, formatter=_to_ffmpeg_time)
-            .opt('-t', duration, formatter=_to_ffmpeg_time)
-            .opt('-i', path)
-            .opt('-ar', sample_rate)
-            .opt('-f', 'f32le')
-            .flag('-')
-            .command())
-        process = subprocess.Popen(
-            command,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE)
-        buffer = process.stdout.read(-1)
-        # Read STDERR until end of the process detected.
-        while True:
-            status = process.stderr.readline()
-            if not status:
-                raise OSError('Stream info not found')
-            if isinstance(status, bytes):  # Note: Python 3 compatibility.
-                status = status.decode('utf8', 'ignore')
-            status = status.strip().lower()
-            if 'no such file' in status:
-                raise IOError(f'File {path} not found')
-            elif 'invalid data found' in status:
-                raise IOError(f'FFMPEG error : {status}')
-            elif 'audio:' in status:
-                n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status)
-                if sample_rate is None:
-                    sample_rate = ffmpeg_sample_rate
-                break
-        # Load waveform and clean process.
+        probe = ffmpeg.probe(path)
+        if 'streams' not in probe or len(probe['streams']) == 0:
+            raise IOError('No stream was found with ffprobe')
+        metadata = next(
+            stream
+            for stream in probe['stream']
+            if stream['codec_type'] == 'audio')
+        n_channels = metadata['channels']
+        if sample_rate is None:
+            sample_rate = metadata['sample_rate']
+        input_kwargs = {'format': 'f32le', 'ar': sample_rate}
+        if duration is not None:
+            input_kwargs['t'] = _to_ffmpeg_time(duration)
+        if offset is not None:
+            input_kwargs['ss'] = _to_ffmpeg_time(offset)
+        buffer, _ = (
+            ffmpeg
+            .input(path, **input_kwargs)
+            .output('-', format='f32le')
+            .overwrite_output()
+            .run(quiet=True))
        waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
        if not waveform.dtype == np.dtype(dtype):
            waveform = waveform.astype(dtype)
-        process.stdout.close()
-        process.stderr.close()
-        del process
        return (waveform, sample_rate)

    def save(
@@ -225,39 +98,24 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
        if not os.path.exists(directory):
            os.makedirs(directory)
        get_logger().debug('Writing file %s', path)
-        # NOTE: Tweak.
-        if codec == 'wav':
-            codec = None
-        command = (
-            self._get_command_builder()
-            .flag('-y')
-            .opt('-loglevel', 'error')
-            .opt('-f', 'f32le')
-            .opt('-ar', sample_rate)
-            .opt('-ac', data.shape[1])
-            .opt('-i', '-')
-            .flag('-vn')
-            .opt('-acodec', codec)
-            .opt('-ar', sample_rate)  # Note: why twice ?
-            .opt('-strict', '-2')     # Note: For 'aac' codec support.
-            .opt('-ab', bitrate)
-            .flag(path)
-            .command())
-        process = subprocess.Popen(
-            command,
-            stdout=open(os.devnull, 'wb'),
-            stdin=subprocess.PIPE,
-            stderr=subprocess.PIPE)
-        # Write data to STDIN.
+        output_kwargs = {
+            'ar': sample_rate,
+            'ac': data.shape[1],
+            'strict': '-2'}
+        if bitrate:
+            output_kwargs['audio_bitrate'] = bitrate
+        if codec:
+            output_kwargs['codec'] = codec
+        process = (
+            ffmpeg
+            .input('pipe:', format='f32le')
+            .output(path, format='f32le', **output_kwargs)
+            .run_async(pipe_stdin=True))
        try:
            process.stdin.write(
                data.astype('<f4').tostring())
+            process.stdin.close()
+            process.wait()
        except IOError:
            raise IOError(f'FFMPEG error: {process.stderr.read()}')
-        # Clean process.
-        process.stdin.close()
-        if process.stderr is not None:
-            process.stderr.close()
-        process.wait()
-        del process
        get_logger().info('File %s written', path)