diff --git a/spleeter/utils/audio/ffmpeg.py b/spleeter/utils/audio/ffmpeg.py index ad24e33..437cdfc 100644 --- a/spleeter/utils/audio/ffmpeg.py +++ b/spleeter/utils/audio/ffmpeg.py @@ -9,12 +9,11 @@ """ import os -import os.path -import platform -import re -import subprocess -import numpy as np # pylint: disable=import-error +# pylint: disable=import-error +import ffmpeg +import numpy as np +# pylint: enable=import-error from .adapter import AudioAdapter from ..logging import get_logger @@ -23,58 +22,9 @@ __email__ = 'research@deezer.com' __author__ = 'Deezer Research' __license__ = 'MIT License' -# Default FFMPEG binary name. -_UNIX_BINARY = 'ffmpeg' -_WINDOWS_BINARY = 'ffmpeg.exe' - - -def _which(program): - """ A pure python implementation of `which`command - for retrieving absolute path from command name or path. - - @see https://stackoverflow.com/a/377028/1211342 - - :param program: Program name or path to expend. - :returns: Absolute path of program if any, None otherwise. - """ - def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - fpath, _ = os.path.split(program) - if fpath: - if is_exe(program): - return program - else: - for path in os.environ['PATH'].split(os.pathsep): - exe_file = os.path.join(path, program) - if is_exe(exe_file): - return exe_file - return None - - -def _get_ffmpeg_path(): - """ Retrieves FFMPEG binary path using ENVVAR if defined - or default binary name (Windows or UNIX style). - - :returns: Absolute path of FFMPEG binary. - :raise IOError: If FFMPEG binary cannot be found. - """ - ffmpeg_path = os.environ.get('FFMPEG_PATH', None) - if ffmpeg_path is None: - # Note: try to infer standard binary name regarding of platform. - if platform.system() == 'Windows': - ffmpeg_path = _WINDOWS_BINARY - else: - ffmpeg_path = _UNIX_BINARY - expended = _which(ffmpeg_path) - if expended is None: - raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found') - return expended - def _to_ffmpeg_time(n): """ Format number of seconds to time expected by FFMPEG. - :param n: Time in seconds to format. :returns: Formatted time in FFMPEG format. """ @@ -83,56 +33,6 @@ def _to_ffmpeg_time(n): return '%d:%02d:%09.6f' % (h, m, s) -def _parse_ffmpg_results(stderr): - """ Extract number of channels and sample rate from - the given FFMPEG STDERR output line. - - :param stderr: STDERR output line to parse. - :returns: Parsed n_channels and sample_rate values. - """ - # Setup default value. - n_channels = 0 - sample_rate = 0 - # Find samplerate - match = re.search(r'(\d+) hz', stderr) - if match: - sample_rate = int(match.group(1)) - # Channel count. - match = re.search(r'hz, ([^,]+),', stderr) - if match: - mode = match.group(1) - if mode == 'stereo': - n_channels = 2 - else: - match = re.match(r'(\d+) ', mode) - n_channels = match and int(match.group(1)) or 1 - return n_channels, sample_rate - - -class _CommandBuilder(object): - """ A simple builder pattern class for CLI string. """ - - def __init__(self, binary): - """ Default constructor. """ - self._command = [binary] - - def flag(self, flag): - """ Add flag or unlabelled opt. """ - self._command.append(flag) - return self - - def opt(self, short, value, formatter=str): - """ Add option if value not None. """ - if value is not None: - self._command.append(short) - self._command.append(formatter(value)) - return self - - def command(self): - """ Build string command. """ - return self._command - - class FFMPEGProcessAudioAdapter(AudioAdapter): """ An AudioAdapter implementation that use FFMPEG binary through subprocess in order to perform I/O operation for audio processing. @@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter): FFMPEG_PATH environment variable. """ - def __init__(self): - """ Default constructor. """ - self._ffmpeg_path = _get_ffmpeg_path() - - def _get_command_builder(self): - """ Creates and returns a command builder using FFMPEG path. - - :returns: Built command builder. - """ - return _CommandBuilder(self._ffmpeg_path) - def load( self, path, offset=None, duration=None, sample_rate=None, dtype=np.float32): @@ -166,46 +55,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter): :param dtype: (Optional) Numpy data type to use, default to float32. :returns: Loaded data a (waveform, sample_rate) tuple. """ - if not isinstance(path, str): - path = path.decode() - command = ( - self._get_command_builder() - .opt('-ss', offset, formatter=_to_ffmpeg_time) - .opt('-t', duration, formatter=_to_ffmpeg_time) - .opt('-i', path) - .opt('-ar', sample_rate) - .opt('-f', 'f32le') - .flag('-') - .command()) - process = subprocess.Popen( - command, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - buffer = process.stdout.read(-1) - # Read STDERR until end of the process detected. - while True: - status = process.stderr.readline() - if not status: - raise OSError('Stream info not found') - if isinstance(status, bytes): # Note: Python 3 compatibility. - status = status.decode('utf8', 'ignore') - status = status.strip().lower() - if 'no such file' in status: - raise IOError(f'File {path} not found') - elif 'invalid data found' in status: - raise IOError(f'FFMPEG error : {status}') - elif 'audio:' in status: - n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status) - if sample_rate is None: - sample_rate = ffmpeg_sample_rate - break - # Load waveform and clean process. + probe = ffmpeg.probe(path) + if 'streams' not in probe or len(probe['streams']) == 0: + raise IOError('No stream was found with ffprobe') + metadata = next( + stream + for stream in probe['stream'] + if stream['codec_type'] == 'audio') + n_channels = metadata['channels'] + if sample_rate is None: + sample_rate = metadata['sample_rate'] + input_kwargs = {'format': 'f32le', 'ar': sample_rate} + if duration is not None: + input_kwargs['t'] = _to_ffmpeg_time(duration) + if offset is not None: + input_kwargs['ss'] = _to_ffmpeg_time(offset) + buffer, _ = ( + ffmpeg + .input(path, **input_kwargs) + .output('-', format='f32le') + .overwrite_output() + .run(quiet=True)) waveform = np.frombuffer(buffer, dtype='