refactor: ffmpeg adapter

This commit is contained in:
Félix Voituret
2019-11-06 18:03:35 +01:00
parent 0a571c9fca
commit c9254625cd

View File

@@ -9,12 +9,11 @@
"""
import os
import os.path
import platform
import re
import subprocess
import numpy as np # pylint: disable=import-error
# pylint: disable=import-error
import ffmpeg
import numpy as np
# pylint: enable=import-error
from .adapter import AudioAdapter
from ..logging import get_logger
@@ -23,58 +22,9 @@ __email__ = 'research@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
# Default FFMPEG binary name.
_UNIX_BINARY = 'ffmpeg'
_WINDOWS_BINARY = 'ffmpeg.exe'
def _which(program):
""" A pure python implementation of `which`command
for retrieving absolute path from command name or path.
@see https://stackoverflow.com/a/377028/1211342
:param program: Program name or path to expend.
:returns: Absolute path of program if any, None otherwise.
"""
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, _ = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ['PATH'].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def _get_ffmpeg_path():
""" Retrieves FFMPEG binary path using ENVVAR if defined
or default binary name (Windows or UNIX style).
:returns: Absolute path of FFMPEG binary.
:raise IOError: If FFMPEG binary cannot be found.
"""
ffmpeg_path = os.environ.get('FFMPEG_PATH', None)
if ffmpeg_path is None:
# Note: try to infer standard binary name regarding of platform.
if platform.system() == 'Windows':
ffmpeg_path = _WINDOWS_BINARY
else:
ffmpeg_path = _UNIX_BINARY
expended = _which(ffmpeg_path)
if expended is None:
raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found')
return expended
def _to_ffmpeg_time(n):
""" Format number of seconds to time expected by FFMPEG.
:param n: Time in seconds to format.
:returns: Formatted time in FFMPEG format.
"""
@@ -83,56 +33,6 @@ def _to_ffmpeg_time(n):
return '%d:%02d:%09.6f' % (h, m, s)
def _parse_ffmpg_results(stderr):
""" Extract number of channels and sample rate from
the given FFMPEG STDERR output line.
:param stderr: STDERR output line to parse.
:returns: Parsed n_channels and sample_rate values.
"""
# Setup default value.
n_channels = 0
sample_rate = 0
# Find samplerate
match = re.search(r'(\d+) hz', stderr)
if match:
sample_rate = int(match.group(1))
# Channel count.
match = re.search(r'hz, ([^,]+),', stderr)
if match:
mode = match.group(1)
if mode == 'stereo':
n_channels = 2
else:
match = re.match(r'(\d+) ', mode)
n_channels = match and int(match.group(1)) or 1
return n_channels, sample_rate
class _CommandBuilder(object):
""" A simple builder pattern class for CLI string. """
def __init__(self, binary):
""" Default constructor. """
self._command = [binary]
def flag(self, flag):
""" Add flag or unlabelled opt. """
self._command.append(flag)
return self
def opt(self, short, value, formatter=str):
""" Add option if value not None. """
if value is not None:
self._command.append(short)
self._command.append(formatter(value))
return self
def command(self):
""" Build string command. """
return self._command
class FFMPEGProcessAudioAdapter(AudioAdapter):
""" An AudioAdapter implementation that use FFMPEG binary through
subprocess in order to perform I/O operation for audio processing.
@@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
FFMPEG_PATH environment variable.
"""
def __init__(self):
""" Default constructor. """
self._ffmpeg_path = _get_ffmpeg_path()
def _get_command_builder(self):
""" Creates and returns a command builder using FFMPEG path.
:returns: Built command builder.
"""
return _CommandBuilder(self._ffmpeg_path)
def load(
self, path, offset=None, duration=None,
sample_rate=None, dtype=np.float32):
@@ -166,46 +55,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
:param dtype: (Optional) Numpy data type to use, default to float32.
:returns: Loaded data a (waveform, sample_rate) tuple.
"""
if not isinstance(path, str):
path = path.decode()
command = (
self._get_command_builder()
.opt('-ss', offset, formatter=_to_ffmpeg_time)
.opt('-t', duration, formatter=_to_ffmpeg_time)
.opt('-i', path)
.opt('-ar', sample_rate)
.opt('-f', 'f32le')
.flag('-')
.command())
process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
buffer = process.stdout.read(-1)
# Read STDERR until end of the process detected.
while True:
status = process.stderr.readline()
if not status:
raise OSError('Stream info not found')
if isinstance(status, bytes): # Note: Python 3 compatibility.
status = status.decode('utf8', 'ignore')
status = status.strip().lower()
if 'no such file' in status:
raise IOError(f'File {path} not found')
elif 'invalid data found' in status:
raise IOError(f'FFMPEG error : {status}')
elif 'audio:' in status:
n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status)
if sample_rate is None:
sample_rate = ffmpeg_sample_rate
break
# Load waveform and clean process.
probe = ffmpeg.probe(path)
if 'streams' not in probe or len(probe['streams']) == 0:
raise IOError('No stream was found with ffprobe')
metadata = next(
stream
for stream in probe['stream']
if stream['codec_type'] == 'audio')
n_channels = metadata['channels']
if sample_rate is None:
sample_rate = metadata['sample_rate']
input_kwargs = {'format': 'f32le', 'ar': sample_rate}
if duration is not None:
input_kwargs['t'] = _to_ffmpeg_time(duration)
if offset is not None:
input_kwargs['ss'] = _to_ffmpeg_time(offset)
buffer, _ = (
ffmpeg
.input(path, **input_kwargs)
.output('-', format='f32le')
.overwrite_output()
.run(quiet=True))
waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
if not waveform.dtype == np.dtype(dtype):
waveform = waveform.astype(dtype)
process.stdout.close()
process.stderr.close()
del process
return (waveform, sample_rate)
def save(
@@ -225,39 +98,24 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
if not os.path.exists(directory):
os.makedirs(directory)
get_logger().debug('Writing file %s', path)
# NOTE: Tweak.
if codec == 'wav':
codec = None
command = (
self._get_command_builder()
.flag('-y')
.opt('-loglevel', 'error')
.opt('-f', 'f32le')
.opt('-ar', sample_rate)
.opt('-ac', data.shape[1])
.opt('-i', '-')
.flag('-vn')
.opt('-acodec', codec)
.opt('-ar', sample_rate) # Note: why twice ?
.opt('-strict', '-2') # Note: For 'aac' codec support.
.opt('-ab', bitrate)
.flag(path)
.command())
process = subprocess.Popen(
command,
stdout=open(os.devnull, 'wb'),
stdin=subprocess.PIPE,
stderr=subprocess.PIPE)
# Write data to STDIN.
output_kwargs = {
'ar': sample_rate,
'ac': data.shape[1],
'strict': '-2'}
if bitrate:
output_kwargs['audio_bitrate'] = bitrate
if codec:
output_kwargs['codec'] = codec
process = (
ffmpeg
.input('pipe:', format='f32le')
.output(path, format='f32le', **output_kwargs)
.run_async(pipe_stdin=True))
try:
process.stdin.write(
data.astype('<f4').tostring())
process.stdin.close()
process.wait()
except IOError:
raise IOError(f'FFMPEG error: {process.stderr.read()}')
# Clean process.
process.stdin.close()
if process.stderr is not None:
process.stderr.close()
process.wait()
del process
get_logger().info('File %s written', path)