mirror of
https://github.com/YuzuZensai/spleeter.git
synced 2026-01-31 14:58:23 +00:00
refactor: ffmpeg adapter
This commit is contained in:
@@ -9,12 +9,11 @@
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import os.path
|
|
||||||
import platform
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
import numpy as np # pylint: disable=import-error
|
# pylint: disable=import-error
|
||||||
|
import ffmpeg
|
||||||
|
import numpy as np
|
||||||
|
# pylint: enable=import-error
|
||||||
|
|
||||||
from .adapter import AudioAdapter
|
from .adapter import AudioAdapter
|
||||||
from ..logging import get_logger
|
from ..logging import get_logger
|
||||||
@@ -23,58 +22,9 @@ __email__ = 'research@deezer.com'
|
|||||||
__author__ = 'Deezer Research'
|
__author__ = 'Deezer Research'
|
||||||
__license__ = 'MIT License'
|
__license__ = 'MIT License'
|
||||||
|
|
||||||
# Default FFMPEG binary name.
|
|
||||||
_UNIX_BINARY = 'ffmpeg'
|
|
||||||
_WINDOWS_BINARY = 'ffmpeg.exe'
|
|
||||||
|
|
||||||
|
|
||||||
def _which(program):
|
|
||||||
""" A pure python implementation of `which`command
|
|
||||||
for retrieving absolute path from command name or path.
|
|
||||||
|
|
||||||
@see https://stackoverflow.com/a/377028/1211342
|
|
||||||
|
|
||||||
:param program: Program name or path to expend.
|
|
||||||
:returns: Absolute path of program if any, None otherwise.
|
|
||||||
"""
|
|
||||||
def is_exe(fpath):
|
|
||||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
|
||||||
|
|
||||||
fpath, _ = os.path.split(program)
|
|
||||||
if fpath:
|
|
||||||
if is_exe(program):
|
|
||||||
return program
|
|
||||||
else:
|
|
||||||
for path in os.environ['PATH'].split(os.pathsep):
|
|
||||||
exe_file = os.path.join(path, program)
|
|
||||||
if is_exe(exe_file):
|
|
||||||
return exe_file
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _get_ffmpeg_path():
|
|
||||||
""" Retrieves FFMPEG binary path using ENVVAR if defined
|
|
||||||
or default binary name (Windows or UNIX style).
|
|
||||||
|
|
||||||
:returns: Absolute path of FFMPEG binary.
|
|
||||||
:raise IOError: If FFMPEG binary cannot be found.
|
|
||||||
"""
|
|
||||||
ffmpeg_path = os.environ.get('FFMPEG_PATH', None)
|
|
||||||
if ffmpeg_path is None:
|
|
||||||
# Note: try to infer standard binary name regarding of platform.
|
|
||||||
if platform.system() == 'Windows':
|
|
||||||
ffmpeg_path = _WINDOWS_BINARY
|
|
||||||
else:
|
|
||||||
ffmpeg_path = _UNIX_BINARY
|
|
||||||
expended = _which(ffmpeg_path)
|
|
||||||
if expended is None:
|
|
||||||
raise IOError(f'FFMPEG binary ({ffmpeg_path}) not found')
|
|
||||||
return expended
|
|
||||||
|
|
||||||
|
|
||||||
def _to_ffmpeg_time(n):
|
def _to_ffmpeg_time(n):
|
||||||
""" Format number of seconds to time expected by FFMPEG.
|
""" Format number of seconds to time expected by FFMPEG.
|
||||||
|
|
||||||
:param n: Time in seconds to format.
|
:param n: Time in seconds to format.
|
||||||
:returns: Formatted time in FFMPEG format.
|
:returns: Formatted time in FFMPEG format.
|
||||||
"""
|
"""
|
||||||
@@ -83,56 +33,6 @@ def _to_ffmpeg_time(n):
|
|||||||
return '%d:%02d:%09.6f' % (h, m, s)
|
return '%d:%02d:%09.6f' % (h, m, s)
|
||||||
|
|
||||||
|
|
||||||
def _parse_ffmpg_results(stderr):
|
|
||||||
""" Extract number of channels and sample rate from
|
|
||||||
the given FFMPEG STDERR output line.
|
|
||||||
|
|
||||||
:param stderr: STDERR output line to parse.
|
|
||||||
:returns: Parsed n_channels and sample_rate values.
|
|
||||||
"""
|
|
||||||
# Setup default value.
|
|
||||||
n_channels = 0
|
|
||||||
sample_rate = 0
|
|
||||||
# Find samplerate
|
|
||||||
match = re.search(r'(\d+) hz', stderr)
|
|
||||||
if match:
|
|
||||||
sample_rate = int(match.group(1))
|
|
||||||
# Channel count.
|
|
||||||
match = re.search(r'hz, ([^,]+),', stderr)
|
|
||||||
if match:
|
|
||||||
mode = match.group(1)
|
|
||||||
if mode == 'stereo':
|
|
||||||
n_channels = 2
|
|
||||||
else:
|
|
||||||
match = re.match(r'(\d+) ', mode)
|
|
||||||
n_channels = match and int(match.group(1)) or 1
|
|
||||||
return n_channels, sample_rate
|
|
||||||
|
|
||||||
|
|
||||||
class _CommandBuilder(object):
|
|
||||||
""" A simple builder pattern class for CLI string. """
|
|
||||||
|
|
||||||
def __init__(self, binary):
|
|
||||||
""" Default constructor. """
|
|
||||||
self._command = [binary]
|
|
||||||
|
|
||||||
def flag(self, flag):
|
|
||||||
""" Add flag or unlabelled opt. """
|
|
||||||
self._command.append(flag)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def opt(self, short, value, formatter=str):
|
|
||||||
""" Add option if value not None. """
|
|
||||||
if value is not None:
|
|
||||||
self._command.append(short)
|
|
||||||
self._command.append(formatter(value))
|
|
||||||
return self
|
|
||||||
|
|
||||||
def command(self):
|
|
||||||
""" Build string command. """
|
|
||||||
return self._command
|
|
||||||
|
|
||||||
|
|
||||||
class FFMPEGProcessAudioAdapter(AudioAdapter):
|
class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||||
""" An AudioAdapter implementation that use FFMPEG binary through
|
""" An AudioAdapter implementation that use FFMPEG binary through
|
||||||
subprocess in order to perform I/O operation for audio processing.
|
subprocess in order to perform I/O operation for audio processing.
|
||||||
@@ -142,17 +42,6 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
|||||||
FFMPEG_PATH environment variable.
|
FFMPEG_PATH environment variable.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
""" Default constructor. """
|
|
||||||
self._ffmpeg_path = _get_ffmpeg_path()
|
|
||||||
|
|
||||||
def _get_command_builder(self):
|
|
||||||
""" Creates and returns a command builder using FFMPEG path.
|
|
||||||
|
|
||||||
:returns: Built command builder.
|
|
||||||
"""
|
|
||||||
return _CommandBuilder(self._ffmpeg_path)
|
|
||||||
|
|
||||||
def load(
|
def load(
|
||||||
self, path, offset=None, duration=None,
|
self, path, offset=None, duration=None,
|
||||||
sample_rate=None, dtype=np.float32):
|
sample_rate=None, dtype=np.float32):
|
||||||
@@ -166,46 +55,30 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
|||||||
:param dtype: (Optional) Numpy data type to use, default to float32.
|
:param dtype: (Optional) Numpy data type to use, default to float32.
|
||||||
:returns: Loaded data a (waveform, sample_rate) tuple.
|
:returns: Loaded data a (waveform, sample_rate) tuple.
|
||||||
"""
|
"""
|
||||||
if not isinstance(path, str):
|
probe = ffmpeg.probe(path)
|
||||||
path = path.decode()
|
if 'streams' not in probe or len(probe['streams']) == 0:
|
||||||
command = (
|
raise IOError('No stream was found with ffprobe')
|
||||||
self._get_command_builder()
|
metadata = next(
|
||||||
.opt('-ss', offset, formatter=_to_ffmpeg_time)
|
stream
|
||||||
.opt('-t', duration, formatter=_to_ffmpeg_time)
|
for stream in probe['stream']
|
||||||
.opt('-i', path)
|
if stream['codec_type'] == 'audio')
|
||||||
.opt('-ar', sample_rate)
|
n_channels = metadata['channels']
|
||||||
.opt('-f', 'f32le')
|
if sample_rate is None:
|
||||||
.flag('-')
|
sample_rate = metadata['sample_rate']
|
||||||
.command())
|
input_kwargs = {'format': 'f32le', 'ar': sample_rate}
|
||||||
process = subprocess.Popen(
|
if duration is not None:
|
||||||
command,
|
input_kwargs['t'] = _to_ffmpeg_time(duration)
|
||||||
stdout=subprocess.PIPE,
|
if offset is not None:
|
||||||
stderr=subprocess.PIPE)
|
input_kwargs['ss'] = _to_ffmpeg_time(offset)
|
||||||
buffer = process.stdout.read(-1)
|
buffer, _ = (
|
||||||
# Read STDERR until end of the process detected.
|
ffmpeg
|
||||||
while True:
|
.input(path, **input_kwargs)
|
||||||
status = process.stderr.readline()
|
.output('-', format='f32le')
|
||||||
if not status:
|
.overwrite_output()
|
||||||
raise OSError('Stream info not found')
|
.run(quiet=True))
|
||||||
if isinstance(status, bytes): # Note: Python 3 compatibility.
|
|
||||||
status = status.decode('utf8', 'ignore')
|
|
||||||
status = status.strip().lower()
|
|
||||||
if 'no such file' in status:
|
|
||||||
raise IOError(f'File {path} not found')
|
|
||||||
elif 'invalid data found' in status:
|
|
||||||
raise IOError(f'FFMPEG error : {status}')
|
|
||||||
elif 'audio:' in status:
|
|
||||||
n_channels, ffmpeg_sample_rate = _parse_ffmpg_results(status)
|
|
||||||
if sample_rate is None:
|
|
||||||
sample_rate = ffmpeg_sample_rate
|
|
||||||
break
|
|
||||||
# Load waveform and clean process.
|
|
||||||
waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
|
waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
|
||||||
if not waveform.dtype == np.dtype(dtype):
|
if not waveform.dtype == np.dtype(dtype):
|
||||||
waveform = waveform.astype(dtype)
|
waveform = waveform.astype(dtype)
|
||||||
process.stdout.close()
|
|
||||||
process.stderr.close()
|
|
||||||
del process
|
|
||||||
return (waveform, sample_rate)
|
return (waveform, sample_rate)
|
||||||
|
|
||||||
def save(
|
def save(
|
||||||
@@ -225,39 +98,24 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
|||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
get_logger().debug('Writing file %s', path)
|
get_logger().debug('Writing file %s', path)
|
||||||
# NOTE: Tweak.
|
output_kwargs = {
|
||||||
if codec == 'wav':
|
'ar': sample_rate,
|
||||||
codec = None
|
'ac': data.shape[1],
|
||||||
command = (
|
'strict': '-2'}
|
||||||
self._get_command_builder()
|
if bitrate:
|
||||||
.flag('-y')
|
output_kwargs['audio_bitrate'] = bitrate
|
||||||
.opt('-loglevel', 'error')
|
if codec:
|
||||||
.opt('-f', 'f32le')
|
output_kwargs['codec'] = codec
|
||||||
.opt('-ar', sample_rate)
|
process = (
|
||||||
.opt('-ac', data.shape[1])
|
ffmpeg
|
||||||
.opt('-i', '-')
|
.input('pipe:', format='f32le')
|
||||||
.flag('-vn')
|
.output(path, format='f32le', **output_kwargs)
|
||||||
.opt('-acodec', codec)
|
.run_async(pipe_stdin=True))
|
||||||
.opt('-ar', sample_rate) # Note: why twice ?
|
|
||||||
.opt('-strict', '-2') # Note: For 'aac' codec support.
|
|
||||||
.opt('-ab', bitrate)
|
|
||||||
.flag(path)
|
|
||||||
.command())
|
|
||||||
process = subprocess.Popen(
|
|
||||||
command,
|
|
||||||
stdout=open(os.devnull, 'wb'),
|
|
||||||
stdin=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE)
|
|
||||||
# Write data to STDIN.
|
|
||||||
try:
|
try:
|
||||||
process.stdin.write(
|
process.stdin.write(
|
||||||
data.astype('<f4').tostring())
|
data.astype('<f4').tostring())
|
||||||
|
process.stdin.close()
|
||||||
|
process.wait()
|
||||||
except IOError:
|
except IOError:
|
||||||
raise IOError(f'FFMPEG error: {process.stderr.read()}')
|
raise IOError(f'FFMPEG error: {process.stderr.read()}')
|
||||||
# Clean process.
|
|
||||||
process.stdin.close()
|
|
||||||
if process.stderr is not None:
|
|
||||||
process.stderr.close()
|
|
||||||
process.wait()
|
|
||||||
del process
|
|
||||||
get_logger().info('File %s written', path)
|
get_logger().info('File %s written', path)
|
||||||
|
|||||||
Reference in New Issue
Block a user