mirror of
https://github.com/YuzuZensai/spleeter.git
synced 2026-01-30 12:22:58 +00:00
refactor: ffmpeg adapter
This commit is contained in:
@@ -9,12 +9,11 @@
|
||||
"""
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
import numpy as np # pylint: disable=import-error
|
||||
# pylint: disable=import-error
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
# pylint: enable=import-error
|
||||
|
||||
from .adapter import AudioAdapter
|
||||
from ..logging import get_logger
|
||||
@@ -23,58 +22,9 @@ __email__ = 'research@deezer.com'
|
||||
__author__ = 'Deezer Research'
|
||||
__license__ = 'MIT License'
|
||||
|
||||
# Default FFMPEG binary name.
|
||||
_UNIX_BINARY = 'ffmpeg'
|
||||
_WINDOWS_BINARY = 'ffmpeg.exe'
|
||||
|
||||
|
||||
def _which(program):
|
||||
""" A pure python implementation of `which`command
|
||||
for retrieving absolute path from command name or path.
|
||||
|
||||
@see https://stackoverflow.com/a/377028/1211342
|
||||
|
||||
:param program: Program name or path to expend.
|
||||
:returns: Absolute path of program if any, None otherwise.
|
||||
"""
|
||||
def is_exe(fpath):
|
||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||
|
||||
fpath, _ = os.path.split(program)
|
||||
if fpath:
|
||||
if is_exe(program):
|
||||
return program
|
||||
else:
|
||||
for path in os.environ['PATH'].split(os.pathsep):
|
||||
exe_file = os.path.join(path, program)
|
||||
if is_exe(exe_file):
|
||||
return exe_file
|
||||
return None
|
||||
|
||||
|
||||
def _get_ffmpeg_path():
|
||||
""" Retrieves FFMPEG binary path using ENVVAR if defined
|
||||
or default binary name (Windows or UNIX style).
|
||||
|
||||
:returns: Absolute path of FFMPEG binary.
|
||||
:raise IOError: If FFMPEG binary cannot be found.
|
||||
"""
|
||||
ffmpeg_path = os.environ.get('FFMPEG_PATH', None)
|
||||
if ffmpeg_path is None:
|
||||
# Note: try to infer standard binary name regarding of platform.
|
||||
if platform.system() == 'Windows':
|
||||
ffmpeg_path = _WINDOWS_BINARY
|
||||
else:
|
||||
ffmpeg_path = _UNIX_BINARY
|
||||
expended = _which(ffmpeg_path)
|
||||
if expended is None:
|
||||
raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found')
|
||||
return expended
|
||||
|
||||
|
||||
def _to_ffmpeg_time(n):
|
||||
""" Format number of seconds to time expected by FFMPEG.
|
||||
|
||||
:param n: Time in seconds to format.
|
||||
:returns: Formatted time in FFMPEG format.
|
||||
"""
|
||||
@@ -83,56 +33,6 @@ def _to_ffmpeg_time(n):
|
||||
return '%d:%02d:%09.6f' % (h, m, s)
|
||||
|
||||
|
||||
def _parse_ffmpg_results(stderr):
|
||||
""" Extract number of channels and sample rate from
|
||||
the given FFMPEG STDERR output line.
|
||||
|
||||
:param stderr: STDERR output line to parse.
|
||||
:returns: Parsed n_channels and sample_rate values.
|
||||
"""
|
||||
# Setup default value.
|
||||
n_channels = 0
|
||||
sample_rate = 0
|
||||
# Find samplerate
|
||||
match = re.search(r'(\d+) hz', stderr)
|
||||
if match:
|
||||
sample_rate = int(match.group(1))
|
||||
# Channel count.
|
||||
match = re.search(r'hz, ([^,]+),', stderr)
|
||||
if match:
|
||||
mode = match.group(1)
|
||||
if mode == 'stereo':
|
||||
n_channels = 2
|
||||
else:
|
||||
match = re.match(r'(\d+) ', mode)
|
||||
n_channels = match and int(match.group(1)) or 1
|
||||
return n_channels, sample_rate
|
||||
|
||||
|
||||
class _CommandBuilder(object):
|
||||
""" A simple builder pattern class for CLI string. """
|
||||
|
||||
def __init__(self, binary):
|
||||
""" Default constructor. """
|
||||
self._command = [binary]
|
||||
|
||||
def flag(self, flag):
|
||||
""" Add flag or unlabelled opt. """
|
||||
self._command.append(flag)
|
||||
return self
|
||||
|
||||
def opt(self, short, value, formatter=str):
|
||||
""" Add option if value not None. """
|
||||
if value is not None:
|
||||
self._command.append(short)
|
||||
self._command.append(formatter(value))
|
||||
return self
|
||||
|
||||
def command(self):
|
||||
""" Build string command. """
|
||||
return self._command
|
||||
|
||||
|
||||
class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||
""" An AudioAdapter implementation that use FFMPEG binary through
|
||||
subprocess in order to perform I/O operation for audio processing.
|
||||
@@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||
FFMPEG_PATH environment variable.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
""" Default constructor. """
|
||||
self._ffmpeg_path = _get_ffmpeg_path()
|
||||
|
||||
def _get_command_builder(self):
|
||||
""" Creates and returns a command builder using FFMPEG path.
|
||||
|
||||
:returns: Built command builder.
|
||||
"""
|
||||
return _CommandBuilder(self._ffmpeg_path)
|
||||
|
||||
def load(
|
||||
self, path, offset=None, duration=None,
|
||||
sample_rate=None, dtype=np.float32):
|
||||
@@ -166,46 +55,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||
:param dtype: (Optional) Numpy data type to use, default to float32.
|
||||
:returns: Loaded data a (waveform, sample_rate) tuple.
|
||||
"""
|
||||
if not isinstance(path, str):
|
||||
path = path.decode()
|
||||
command = (
|
||||
self._get_command_builder()
|
||||
.opt('-ss', offset, formatter=_to_ffmpeg_time)
|
||||
.opt('-t', duration, formatter=_to_ffmpeg_time)
|
||||
.opt('-i', path)
|
||||
.opt('-ar', sample_rate)
|
||||
.opt('-f', 'f32le')
|
||||
.flag('-')
|
||||
.command())
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
buffer = process.stdout.read(-1)
|
||||
# Read STDERR until end of the process detected.
|
||||
while True:
|
||||
status = process.stderr.readline()
|
||||
if not status:
|
||||
raise OSError('Stream info not found')
|
||||
if isinstance(status, bytes): # Note: Python 3 compatibility.
|
||||
status = status.decode('utf8', 'ignore')
|
||||
status = status.strip().lower()
|
||||
if 'no such file' in status:
|
||||
raise IOError(f'File {path} not found')
|
||||
elif 'invalid data found' in status:
|
||||
raise IOError(f'FFMPEG error : {status}')
|
||||
elif 'audio:' in status:
|
||||
n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status)
|
||||
if sample_rate is None:
|
||||
sample_rate = ffmpeg_sample_rate
|
||||
break
|
||||
# Load waveform and clean process.
|
||||
probe = ffmpeg.probe(path)
|
||||
if 'streams' not in probe or len(probe['streams']) == 0:
|
||||
raise IOError('No stream was found with ffprobe')
|
||||
metadata = next(
|
||||
stream
|
||||
for stream in probe['stream']
|
||||
if stream['codec_type'] == 'audio')
|
||||
n_channels = metadata['channels']
|
||||
if sample_rate is None:
|
||||
sample_rate = metadata['sample_rate']
|
||||
input_kwargs = {'format': 'f32le', 'ar': sample_rate}
|
||||
if duration is not None:
|
||||
input_kwargs['t'] = _to_ffmpeg_time(duration)
|
||||
if offset is not None:
|
||||
input_kwargs['ss'] = _to_ffmpeg_time(offset)
|
||||
buffer, _ = (
|
||||
ffmpeg
|
||||
.input(path, **input_kwargs)
|
||||
.output('-', format='f32le')
|
||||
.overwrite_output()
|
||||
.run(quiet=True))
|
||||
waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
|
||||
if not waveform.dtype == np.dtype(dtype):
|
||||
waveform = waveform.astype(dtype)
|
||||
process.stdout.close()
|
||||
process.stderr.close()
|
||||
del process
|
||||
return (waveform, sample_rate)
|
||||
|
||||
def save(
|
||||
@@ -225,39 +98,24 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
get_logger().debug('Writing file %s', path)
|
||||
# NOTE: Tweak.
|
||||
if codec == 'wav':
|
||||
codec = None
|
||||
command = (
|
||||
self._get_command_builder()
|
||||
.flag('-y')
|
||||
.opt('-loglevel', 'error')
|
||||
.opt('-f', 'f32le')
|
||||
.opt('-ar', sample_rate)
|
||||
.opt('-ac', data.shape[1])
|
||||
.opt('-i', '-')
|
||||
.flag('-vn')
|
||||
.opt('-acodec', codec)
|
||||
.opt('-ar', sample_rate) # Note: why twice ?
|
||||
.opt('-strict', '-2') # Note: For 'aac' codec support.
|
||||
.opt('-ab', bitrate)
|
||||
.flag(path)
|
||||
.command())
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
stdout=open(os.devnull, 'wb'),
|
||||
stdin=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
# Write data to STDIN.
|
||||
output_kwargs = {
|
||||
'ar': sample_rate,
|
||||
'ac': data.shape[1],
|
||||
'strict': '-2'}
|
||||
if bitrate:
|
||||
output_kwargs['audio_bitrate'] = bitrate
|
||||
if codec:
|
||||
output_kwargs['codec'] = codec
|
||||
process = (
|
||||
ffmpeg
|
||||
.input('pipe:', format='f32le')
|
||||
.output(path, format='f32le', **output_kwargs)
|
||||
.run_async(pipe_stdin=True))
|
||||
try:
|
||||
process.stdin.write(
|
||||
data.astype('<f4').tostring())
|
||||
process.stdin.close()
|
||||
process.wait()
|
||||
except IOError:
|
||||
raise IOError(f'FFMPEG error: {process.stderr.read()}')
|
||||
# Clean process.
|
||||
process.stdin.close()
|
||||
if process.stderr is not None:
|
||||
process.stderr.close()
|
||||
process.wait()
|
||||
del process
|
||||
get_logger().info('File %s written', path)
|
||||
|
||||
Reference in New Issue
Block a user