mirror of
https://github.com/YuzuZensai/spleeter.git
synced 2026-01-31 14:58:23 +00:00
🎨 finalizes audio package
This commit is contained in:
@@ -7,7 +7,7 @@ from abc import ABC, abstractmethod
|
|||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from spleeter.audio import Codec
|
from spleeter.audio import Codec
|
||||||
from typing import Any, Dict, List, Union
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
from .. import SpleeterError
|
from .. import SpleeterError
|
||||||
from ..types import AudioDescriptor, Signal
|
from ..types import AudioDescriptor, Signal
|
||||||
@@ -35,9 +35,9 @@ class AudioAdapter(ABC):
|
|||||||
def load(
|
def load(
|
||||||
self,
|
self,
|
||||||
audio_descriptor: AudioDescriptor,
|
audio_descriptor: AudioDescriptor,
|
||||||
offset: float,
|
offset: Optional[float] = None,
|
||||||
duration: float,
|
duration: Optional[float] = None,
|
||||||
sample_rate: float,
|
sample_rate: Optional[float] = None,
|
||||||
dtype: np.dtype = np.float32) -> Signal:
|
dtype: np.dtype = np.float32) -> Signal:
|
||||||
"""
|
"""
|
||||||
Loads the audio file denoted by the given audio descriptor and
|
Loads the audio file denoted by the given audio descriptor and
|
||||||
@@ -47,11 +47,11 @@ class AudioAdapter(ABC):
|
|||||||
audio_descriptor (AudioDescriptor):
|
audio_descriptor (AudioDescriptor):
|
||||||
Describe song to load, in case of file based audio adapter,
|
Describe song to load, in case of file based audio adapter,
|
||||||
such descriptor would be a file path.
|
such descriptor would be a file path.
|
||||||
offset (float):
|
offset (Optional[float]):
|
||||||
Start offset to load from in seconds.
|
Start offset to load from in seconds.
|
||||||
duration (float):
|
duration (Optional[float]):
|
||||||
Duration to load in seconds.
|
Duration to load in seconds.
|
||||||
sample_rate (float):
|
sample_rate (Optional[float]):
|
||||||
Sample rate to load audio with.
|
Sample rate to load audio with.
|
||||||
dtype (numpy.dtype):
|
dtype (numpy.dtype):
|
||||||
(Optional) Numpy data type to use, default to `float32`.
|
(Optional) Numpy data type to use, default to `float32`.
|
||||||
@@ -136,7 +136,7 @@ class AudioAdapter(ABC):
|
|||||||
data: np.ndarray,
|
data: np.ndarray,
|
||||||
sample_rate: float,
|
sample_rate: float,
|
||||||
codec: Codec = None,
|
codec: Codec = None,
|
||||||
bitrate: str = None):
|
bitrate: str = None) -> None:
|
||||||
"""
|
"""
|
||||||
Save the given audio data to the file denoted by the given path.
|
Save the given audio data to the file denoted by the given path.
|
||||||
|
|
||||||
|
|||||||
@@ -8,76 +8,92 @@
|
|||||||
used within this library.
|
used within this library.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import datetime as dt
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Optional, Union
|
||||||
|
|
||||||
|
from . import Codec
|
||||||
|
from .adapter import AudioAdapter
|
||||||
|
from .. import SpleeterError
|
||||||
|
from ..types import Signal
|
||||||
|
from ..utils.logging import get_logger
|
||||||
|
|
||||||
|
# pyright: reportMissingImports=false
|
||||||
# pylint: disable=import-error
|
# pylint: disable=import-error
|
||||||
import ffmpeg
|
import ffmpeg
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# pylint: enable=import-error
|
# pylint: enable=import-error
|
||||||
|
|
||||||
from .adapter import AudioAdapter
|
|
||||||
from .. import SpleeterError
|
|
||||||
from ..utils.logging import get_logger
|
|
||||||
|
|
||||||
__email__ = 'spleeter@deezer.com'
|
__email__ = 'spleeter@deezer.com'
|
||||||
__author__ = 'Deezer Research'
|
__author__ = 'Deezer Research'
|
||||||
__license__ = 'MIT License'
|
__license__ = 'MIT License'
|
||||||
|
|
||||||
|
|
||||||
def _check_ffmpeg_install():
|
|
||||||
""" Ensure FFMPEG binaries are available.
|
|
||||||
|
|
||||||
:raise SpleeterError: If ffmpeg or ffprobe is not found.
|
|
||||||
"""
|
|
||||||
for binary in ('ffmpeg', 'ffprobe'):
|
|
||||||
if shutil.which(binary) is None:
|
|
||||||
raise SpleeterError('{} binary not found'.format(binary))
|
|
||||||
|
|
||||||
|
|
||||||
def _to_ffmpeg_time(n):
|
|
||||||
""" Format number of seconds to time expected by FFMPEG.
|
|
||||||
:param n: Time in seconds to format.
|
|
||||||
:returns: Formatted time in FFMPEG format.
|
|
||||||
"""
|
|
||||||
m, s = divmod(n, 60)
|
|
||||||
h, m = divmod(m, 60)
|
|
||||||
return '%d:%02d:%09.6f' % (h, m, s)
|
|
||||||
|
|
||||||
|
|
||||||
def _to_ffmpeg_codec(codec):
|
|
||||||
ffmpeg_codecs = {
|
|
||||||
'm4a': 'aac',
|
|
||||||
'ogg': 'libvorbis',
|
|
||||||
'wma': 'wmav2',
|
|
||||||
}
|
|
||||||
return ffmpeg_codecs.get(codec) or codec
|
|
||||||
|
|
||||||
|
|
||||||
class FFMPEGProcessAudioAdapter(AudioAdapter):
|
class FFMPEGProcessAudioAdapter(AudioAdapter):
|
||||||
""" An AudioAdapter implementation that use FFMPEG binary through
|
|
||||||
subprocess in order to perform I/O operation for audio processing.
|
|
||||||
|
|
||||||
When created, FFMPEG binary path will be checked and expended,
|
|
||||||
raising exception if not found. Such path could be infered using
|
|
||||||
FFMPEG_PATH environment variable.
|
|
||||||
"""
|
"""
|
||||||
|
An AudioAdapter implementation that use FFMPEG binary through
|
||||||
|
subprocess in order to perform I/O operation for audio processing.
|
||||||
|
|
||||||
|
When created, FFMPEG binary path will be checked and expended,
|
||||||
|
raising exception if not found. Such path could be infered using
|
||||||
|
`FFMPEG_PATH` environment variable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
SUPPORTED_CODECS: Dict[Codec, str] = {
|
||||||
|
Codec.M4A: 'aac',
|
||||||
|
Codec.OGG: 'libvorbis',
|
||||||
|
Codec.WMA: 'wmav2'
|
||||||
|
}
|
||||||
|
""" FFMPEG codec name mapping. """
|
||||||
|
|
||||||
|
def __init__(_) -> None:
|
||||||
|
"""
|
||||||
|
Default constructor, ensure FFMPEG binaries are available.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SpleeterError:
|
||||||
|
If ffmpeg or ffprobe is not found.
|
||||||
|
"""
|
||||||
|
for binary in ('ffmpeg', 'ffprobe'):
|
||||||
|
if shutil.which(binary) is None:
|
||||||
|
raise SpleeterError('{} binary not found'.format(binary))
|
||||||
|
|
||||||
def load(
|
def load(
|
||||||
self, path, offset=None, duration=None,
|
_,
|
||||||
sample_rate=None, dtype=np.float32):
|
path: Union[Path, str],
|
||||||
""" Loads the audio file denoted by the given path
|
offset: Optional[float] = None,
|
||||||
and returns it data as a waveform.
|
duration: Optional[float] = None,
|
||||||
|
sample_rate: Optional[float] = None,
|
||||||
:param path: Path of the audio file to load data from.
|
dtype: np.dtype = np.float32) -> Signal:
|
||||||
:param offset: (Optional) Start offset to load from in seconds.
|
|
||||||
:param duration: (Optional) Duration to load in seconds.
|
|
||||||
:param sample_rate: (Optional) Sample rate to load audio with.
|
|
||||||
:param dtype: (Optional) Numpy data type to use, default to float32.
|
|
||||||
:returns: Loaded data a (waveform, sample_rate) tuple.
|
|
||||||
:raise SpleeterError: If any error occurs while loading audio.
|
|
||||||
"""
|
"""
|
||||||
_check_ffmpeg_install()
|
Loads the audio file denoted by the given path
|
||||||
|
and returns it data as a waveform.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
path (Union[Path, str]:
|
||||||
|
Path of the audio file to load data from.
|
||||||
|
offset (Optional[float]):
|
||||||
|
Start offset to load from in seconds.
|
||||||
|
duration (Optional[float]):
|
||||||
|
Duration to load in seconds.
|
||||||
|
sample_rate (Optional[float]):
|
||||||
|
Sample rate to load audio with.
|
||||||
|
dtype (numpy.dtype):
|
||||||
|
(Optional) Numpy data type to use, default to `float32`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Signal:
|
||||||
|
Loaded data a (waveform, sample_rate) tuple.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SpleeterError:
|
||||||
|
If any error occurs while loading audio.
|
||||||
|
"""
|
||||||
|
if isinstance(path, Path):
|
||||||
|
path = str(path)
|
||||||
if not isinstance(path, str):
|
if not isinstance(path, str):
|
||||||
path = path.decode()
|
path = path.decode()
|
||||||
try:
|
try:
|
||||||
@@ -97,9 +113,9 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
|||||||
sample_rate = metadata['sample_rate']
|
sample_rate = metadata['sample_rate']
|
||||||
output_kwargs = {'format': 'f32le', 'ar': sample_rate}
|
output_kwargs = {'format': 'f32le', 'ar': sample_rate}
|
||||||
if duration is not None:
|
if duration is not None:
|
||||||
output_kwargs['t'] = _to_ffmpeg_time(duration)
|
output_kwargs['t'] = str(dt.timedelta(seconds=duration))
|
||||||
if offset is not None:
|
if offset is not None:
|
||||||
output_kwargs['ss'] = _to_ffmpeg_time(offset)
|
output_kwargs['ss'] = str(dt.timedelta(seconds=offset))
|
||||||
process = (
|
process = (
|
||||||
ffmpeg
|
ffmpeg
|
||||||
.input(path)
|
.input(path)
|
||||||
@@ -112,29 +128,46 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
|||||||
return (waveform, sample_rate)
|
return (waveform, sample_rate)
|
||||||
|
|
||||||
def save(
|
def save(
|
||||||
self, path, data, sample_rate,
|
self,
|
||||||
codec=None, bitrate=None):
|
path: Union[Path, str],
|
||||||
""" Write waveform data to the file denoted by the given path
|
data: np.ndarray,
|
||||||
using FFMPEG process.
|
sample_rate: float,
|
||||||
|
codec: Codec = None,
|
||||||
:param path: Path of the audio file to save data in.
|
bitrate: str = None) -> None:
|
||||||
:param data: Waveform data to write.
|
|
||||||
:param sample_rate: Sample rate to write file in.
|
|
||||||
:param codec: (Optional) Writing codec to use.
|
|
||||||
:param bitrate: (Optional) Bitrate of the written audio file.
|
|
||||||
:raise IOError: If any error occurs while using FFMPEG to write data.
|
|
||||||
"""
|
"""
|
||||||
_check_ffmpeg_install()
|
Write waveform data to the file denoted by the given path using
|
||||||
|
FFMPEG process.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
path (Union[Path, str]):
|
||||||
|
Path like of the audio file to save data in.
|
||||||
|
data (numpy.ndarray):
|
||||||
|
Waveform data to write.
|
||||||
|
sample_rate (float):
|
||||||
|
Sample rate to write file in.
|
||||||
|
codec ():
|
||||||
|
(Optional) Writing codec to use, default to `None`.
|
||||||
|
bitrate (str):
|
||||||
|
(Optional) Bitrate of the written audio file, default to
|
||||||
|
`None`.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
IOError:
|
||||||
|
If any error occurs while using FFMPEG to write data.
|
||||||
|
"""
|
||||||
|
if isinstance(path, Path):
|
||||||
|
path = str(path)
|
||||||
directory = os.path.dirname(path)
|
directory = os.path.dirname(path)
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
raise SpleeterError(f'output directory does not exists: {directory}')
|
raise SpleeterError(
|
||||||
get_logger().debug('Writing file %s', path)
|
f'output directory does not exists: {directory}')
|
||||||
|
get_logger().debug(f'Writing file {path}')
|
||||||
input_kwargs = {'ar': sample_rate, 'ac': data.shape[1]}
|
input_kwargs = {'ar': sample_rate, 'ac': data.shape[1]}
|
||||||
output_kwargs = {'ar': sample_rate, 'strict': '-2'}
|
output_kwargs = {'ar': sample_rate, 'strict': '-2'}
|
||||||
if bitrate:
|
if bitrate:
|
||||||
output_kwargs['audio_bitrate'] = bitrate
|
output_kwargs['audio_bitrate'] = bitrate
|
||||||
if codec is not None and codec != 'wav':
|
if codec is not None and codec != 'wav':
|
||||||
output_kwargs['codec'] = _to_ffmpeg_codec(codec)
|
output_kwargs['codec'] = self.SUPPORTED_CODECS.get(codec, codec)
|
||||||
process = (
|
process = (
|
||||||
ffmpeg
|
ffmpeg
|
||||||
.input('pipe:', format='f32le', **input_kwargs)
|
.input('pipe:', format='f32le', **input_kwargs)
|
||||||
@@ -147,4 +180,4 @@ class FFMPEGProcessAudioAdapter(AudioAdapter):
|
|||||||
process.wait()
|
process.wait()
|
||||||
except IOError:
|
except IOError:
|
||||||
raise SpleeterError(f'FFMPEG error: {process.stderr.read()}')
|
raise SpleeterError(f'FFMPEG error: {process.stderr.read()}')
|
||||||
get_logger().info('File %s written succesfully', path)
|
get_logger().info(f'File {path} written succesfully')
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
|
|
||||||
""" Spectrogram specific data augmentation """
|
""" Spectrogram specific data augmentation. """
|
||||||
|
|
||||||
# pyright: reportMissingImports=false
|
# pyright: reportMissingImports=false
|
||||||
# pylint: disable=import-error
|
# pylint: disable=import-error
|
||||||
@@ -17,25 +17,35 @@ __license__ = 'MIT License'
|
|||||||
|
|
||||||
|
|
||||||
def compute_spectrogram_tf(
|
def compute_spectrogram_tf(
|
||||||
waveform,
|
waveform: tf.Tensor,
|
||||||
frame_length=2048, frame_step=512,
|
frame_length: int = 2048,
|
||||||
spec_exponent=1., window_exponent=1.):
|
frame_step: int = 512,
|
||||||
""" Compute magnitude / power spectrogram from waveform as
|
spec_exponent: float = 1.,
|
||||||
a n_samples x n_channels tensor.
|
window_exponent: float = 1.) -> tf.Tensor:
|
||||||
|
|
||||||
:param waveform: Input waveform as (times x number of channels)
|
|
||||||
tensor.
|
|
||||||
:param frame_length: Length of a STFT frame to use.
|
|
||||||
:param frame_step: HOP between successive frames.
|
|
||||||
:param spec_exponent: Exponent of the spectrogram (usually 1 for
|
|
||||||
magnitude spectrogram, or 2 for power spectrogram).
|
|
||||||
:param window_exponent: Exponent applied to the Hann windowing function
|
|
||||||
(may be useful for making perfect STFT/iSTFT
|
|
||||||
reconstruction).
|
|
||||||
:returns: Computed magnitude / power spectrogram as a
|
|
||||||
(T x F x n_channels) tensor.
|
|
||||||
"""
|
"""
|
||||||
stft_tensor = tf.transpose(
|
Compute magnitude / power spectrogram from waveform as a
|
||||||
|
`n_samples x n_channels` tensor.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
waveform (tensorflow.Tensor):
|
||||||
|
Input waveform as `(times x number of channels)` tensor.
|
||||||
|
frame_length (int):
|
||||||
|
Length of a STFT frame to use.
|
||||||
|
frame_step (int):
|
||||||
|
HOP between successive frames.
|
||||||
|
spec_exponent (float):
|
||||||
|
Exponent of the spectrogram (usually 1 for magnitude
|
||||||
|
spectrogram, or 2 for power spectrogram).
|
||||||
|
window_exponent (float):
|
||||||
|
Exponent applied to the Hann windowing function (may be
|
||||||
|
useful for making perfect STFT/iSTFT reconstruction).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tensorflow.Tensor:
|
||||||
|
Computed magnitude / power spectrogram as a
|
||||||
|
`(T x F x n_channels)` tensor.
|
||||||
|
"""
|
||||||
|
stft_tensor: tf.Tensor = tf.transpose(
|
||||||
stft(
|
stft(
|
||||||
tf.transpose(waveform),
|
tf.transpose(waveform),
|
||||||
frame_length,
|
frame_length,
|
||||||
@@ -49,16 +59,25 @@ def compute_spectrogram_tf(
|
|||||||
|
|
||||||
|
|
||||||
def time_stretch(
|
def time_stretch(
|
||||||
spectrogram,
|
spectrogram: tf.Tensor,
|
||||||
factor=1.0,
|
factor: float = 1.0,
|
||||||
method=tf.image.ResizeMethod.BILINEAR):
|
method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR
|
||||||
""" Time stretch a spectrogram preserving shape in tensorflow. Note that
|
) -> tf.Tensor:
|
||||||
this is an approximation in the frequency domain.
|
"""
|
||||||
|
Time stretch a spectrogram preserving shape in tensorflow. Note that
|
||||||
|
this is an approximation in the frequency domain.
|
||||||
|
|
||||||
:param spectrogram: Input spectrogram to be time stretched as tensor.
|
Parameters:
|
||||||
:param factor: (Optional) Time stretch factor, must be >0, default to 1.
|
spectrogram (tensorflow.Tensor):
|
||||||
:param mehtod: (Optional) Interpolation method, default to BILINEAR.
|
Input spectrogram to be time stretched as tensor.
|
||||||
:returns: Time stretched spectrogram as tensor with same shape.
|
factor (float):
|
||||||
|
(Optional) Time stretch factor, must be > 0, default to `1`.
|
||||||
|
method (tensorflow.image.ResizeMethod):
|
||||||
|
(Optional) Interpolation method, default to `BILINEAR`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tensorflow.Tensor:
|
||||||
|
Time stretched spectrogram as tensor with same shape.
|
||||||
"""
|
"""
|
||||||
T = tf.shape(spectrogram)[0]
|
T = tf.shape(spectrogram)[0]
|
||||||
T_ts = tf.cast(tf.cast(T, tf.float32) * factor, tf.int32)[0]
|
T_ts = tf.cast(tf.cast(T, tf.float32) * factor, tf.int32)[0]
|
||||||
@@ -71,15 +90,27 @@ def time_stretch(
|
|||||||
return tf.image.resize_image_with_crop_or_pad(ts_spec, T, F)
|
return tf.image.resize_image_with_crop_or_pad(ts_spec, T, F)
|
||||||
|
|
||||||
|
|
||||||
def random_time_stretch(spectrogram, factor_min=0.9, factor_max=1.1, **kwargs):
|
def random_time_stretch(
|
||||||
""" Time stretch a spectrogram preserving shape with random ratio in
|
spectrogram: tf.Tensor,
|
||||||
tensorflow. Applies time_stretch to spectrogram with a random ratio drawn
|
factor_min: float = 0.9,
|
||||||
uniformly in [factor_min, factor_max].
|
factor_max: float = 1.1,
|
||||||
|
**kwargs) -> tf.Tensor:
|
||||||
|
"""
|
||||||
|
Time stretch a spectrogram preserving shape with random ratio in
|
||||||
|
tensorflow. Applies time_stretch to spectrogram with a random ratio
|
||||||
|
drawn uniformly in `[factor_min, factor_max]`.
|
||||||
|
|
||||||
:param spectrogram: Input spectrogram to be time stretched as tensor.
|
Parameters:
|
||||||
:param factor_min: (Optional) Min time stretch factor, default to 0.9.
|
spectrogram (tensorflow.Tensor):
|
||||||
:param factor_max: (Optional) Max time stretch factor, default to 1.1.
|
Input spectrogram to be time stretched as tensor.
|
||||||
:returns: Randomly time stretched spectrogram as tensor with same shape.
|
factor_min (float):
|
||||||
|
(Optional) Min time stretch factor, default to `0.9`.
|
||||||
|
factor_max (float):
|
||||||
|
(Optional) Max time stretch factor, default to `1.1`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tensorflow.Tensor:
|
||||||
|
Randomly time stretched spectrogram as tensor with same shape.
|
||||||
"""
|
"""
|
||||||
factor = tf.random_uniform(
|
factor = tf.random_uniform(
|
||||||
shape=(1,),
|
shape=(1,),
|
||||||
@@ -88,16 +119,25 @@ def random_time_stretch(spectrogram, factor_min=0.9, factor_max=1.1, **kwargs):
|
|||||||
|
|
||||||
|
|
||||||
def pitch_shift(
|
def pitch_shift(
|
||||||
spectrogram,
|
spectrogram: tf.Tensor,
|
||||||
semitone_shift=0.0,
|
semitone_shift: float = 0.0,
|
||||||
method=tf.image.ResizeMethod.BILINEAR):
|
method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR
|
||||||
""" Pitch shift a spectrogram preserving shape in tensorflow. Note that
|
) -> tf.Tensor:
|
||||||
this is an approximation in the frequency domain.
|
"""
|
||||||
|
Pitch shift a spectrogram preserving shape in tensorflow. Note that
|
||||||
|
this is an approximation in the frequency domain.
|
||||||
|
|
||||||
:param spectrogram: Input spectrogram to be pitch shifted as tensor.
|
Parameters:
|
||||||
:param semitone_shift: (Optional) Pitch shift in semitone, default to 0.0.
|
spectrogram (tensorflow.Tensor):
|
||||||
:param mehtod: (Optional) Interpolation method, default to BILINEAR.
|
Input spectrogram to be pitch shifted as tensor.
|
||||||
:returns: Pitch shifted spectrogram (same shape as spectrogram).
|
semitone_shift (float):
|
||||||
|
(Optional) Pitch shift in semitone, default to `0.0`.
|
||||||
|
method (tensorflow.image.ResizeMethod):
|
||||||
|
(Optional) Interpolation method, default to `BILINEAR`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tensorflow.Tensor:
|
||||||
|
Pitch shifted spectrogram (same shape as spectrogram).
|
||||||
"""
|
"""
|
||||||
factor = 2 ** (semitone_shift / 12.)
|
factor = 2 ** (semitone_shift / 12.)
|
||||||
T = tf.shape(spectrogram)[0]
|
T = tf.shape(spectrogram)[0]
|
||||||
@@ -112,16 +152,28 @@ def pitch_shift(
|
|||||||
return tf.pad(ps_spec[:, :F, :], paddings, 'CONSTANT')
|
return tf.pad(ps_spec[:, :F, :], paddings, 'CONSTANT')
|
||||||
|
|
||||||
|
|
||||||
def random_pitch_shift(spectrogram, shift_min=-1., shift_max=1., **kwargs):
|
def random_pitch_shift(
|
||||||
""" Pitch shift a spectrogram preserving shape with random ratio in
|
spectrogram: tf.Tensor,
|
||||||
tensorflow. Applies pitch_shift to spectrogram with a random shift
|
shift_min: float = -1.,
|
||||||
amount (expressed in semitones) drawn uniformly in [shift_min, shift_max].
|
shift_max: float = 1.,
|
||||||
|
**kwargs) -> tf.Tensor:
|
||||||
|
"""
|
||||||
|
Pitch shift a spectrogram preserving shape with random ratio in
|
||||||
|
tensorflow. Applies pitch_shift to spectrogram with a random shift
|
||||||
|
amount (expressed in semitones) drawn uniformly in
|
||||||
|
`[shift_min, shift_max]`.
|
||||||
|
|
||||||
:param spectrogram: Input spectrogram to be pitch shifted as tensor.
|
Parameters:
|
||||||
|
spectrogram (tensorflow.Tensor):
|
||||||
|
Input spectrogram to be pitch shifted as tensor.
|
||||||
|
shift_min (float):
|
||||||
|
(Optional) Min pitch shift in semitone, default to -1.
|
||||||
|
shift_max (float):
|
||||||
|
(Optional) Max pitch shift in semitone, default to 1.
|
||||||
|
|
||||||
:param shift_min: (Optional) Min pitch shift in semitone, default to -1.
|
Returns:
|
||||||
:param shift_max: (Optional) Max pitch shift in semitone, default to 1.
|
tensorflow.Tensor:
|
||||||
:returns: Randomly pitch shifted spectrogram (same shape as spectrogram).
|
Randomly pitch shifted spectrogram (same shape as spectrogram).
|
||||||
"""
|
"""
|
||||||
semitone_shift = tf.random_uniform(
|
semitone_shift = tf.random_uniform(
|
||||||
shape=(1,),
|
shape=(1,),
|
||||||
|
|||||||
Reference in New Issue
Block a user