# pipでインストール
!pip install librosa
Requirement already satisfied: librosa in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (0.6.0) Requirement already satisfied: scipy>=0.14.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from librosa) (1.1.0) Requirement already satisfied: resampy>=0.2.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from librosa) (0.2.0) Requirement already satisfied: numpy>=1.8.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from librosa) (1.14.2) Requirement already satisfied: decorator>=3.0.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from librosa) (4.3.0) Requirement already satisfied: audioread>=2.0.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from librosa) (2.1.5) Requirement already satisfied: six>=1.3 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from librosa) (1.11.0) Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from librosa) (0.19.1) Requirement already satisfied: joblib>=0.7.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from librosa) (0.11) Requirement already satisfied: numba>=0.32 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from resampy>=0.2.0->librosa) (0.38.0) Requirement already satisfied: llvmlite>=0.23.0dev0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from numba>=0.32->resampy>=0.2.0->librosa) (0.23.0)
# 本発表では以下を仮定
import numpy as np
import scipy as sp
import scipy.signal as sg
import scipy.fftpack as fft
import matplotlib as mpl
import matplotlib.pyplot as plt
import IPython.display
# version
print("numpy version", np.version.full_version)
print("scipy version", sp.version.full_version)
print("matplotlib version", mpl.__version__)
numpy version 1.14.2 scipy version 1.1.0 matplotlib version 2.2.2
# librosa以下の各モジュールをimport
import librosa
import librosa.display
import librosa.feature
import librosa.effects
import librosa.onset
import librosa.beat
librosa.load
audioread
ライブラリを使用librosa.display.waveplot
で波形データの可視化# デモ音源のロード&再生
# y: モノラル音声データ, sr: サンプリングレート
y, sr = librosa.load(librosa.util.example_audio_file())
IPython.display.Audio(data=y, rate=sr)
# 波形の描画
# 縦軸レンジを自動的に指定し、横軸を秒数表示にしてくれる
librosa.display.waveplot(y, sr)
<matplotlib.collections.PolyCollection at 0x115239668>
b_pt = int(10.0*sr); e_pt = int((10.0+0.002)*sr)
fig = plt.figure(1, figsize=(14,6)); ax = fig.add_subplot(111)
ax.plot(y[b_pt:e_pt]); ax.plot(y[b_pt:e_pt], "o"); ax.grid();
ax.set_xlabel("time [pt]"); ax.set_ylabel("amplitude")
Text(0,0.5,'amplitude')
# サンプリングレート: 1秒辺りのサンプル数
print("sampling rate: {0}".format(sr))
print("length: {0}[pt]={1}[s]".format(y.shape[0], y.shape[0]/sr))
sampling rate: 22050 length: 1355168[pt]=61.45886621315193[s]
# 周波数分析時には通常窓関数を乗算した短時間波形を使用
# 切り出しはここでは1/2区間をオーバーラップさせている
plot_overlap_waves()
/Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Passing one of 'on', 'true', 'off', 'false' as a boolean is deprecated; use an actual boolean (True/False) instead. warnings.warn(message, mplDeprecation, stacklevel=1)
sr
= sampling rate [Hz = pt/s]frame
= 各短時間波形n_fft
= 各フレームに含まれるサンプル数hop_length
= フレーム間のサンプル数# 各短時間波形を周波数分析したグラフを描画
# 縦軸が周波数、横軸が振幅スペクトル
plot_overlap_freqs()
/Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Passing one of 'on', 'true', 'off', 'false' as a boolean is deprecated; use an actual boolean (True/False) instead. warnings.warn(message, mplDeprecation, stacklevel=1)
# 短時間波形を周波数分析したものをスペクトログラムとして表示
plot_spectrogram()
librosa.stft
librosa.cqt
librosa.specshow
# librosaでは短時間波形分析をメソッド一発で実行可能
D = librosa.stft(y)
# D[k, l]
# k: 周波数ビン l: フレームインデックス
# kは実信号に対する2048ptフーリエ変換の結果なので1025ptのみ保持
D.shape
(1025, 2647)
* $x[n]$は入力信号、$w[n]$は窓関数
* 周波数ビン$k$、フレームインデックス$l$、$M$はhop length
* $lM$を中心に前後$N/2$点に渡り離散フーリエ変換を実行
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)
log_power = librosa.amplitude_to_db(np.abs(D), ref=np.max)
librosa.display.specshow(log_power, x_axis="time", y_axis="linear")
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x115a2aba8>
# 縦軸を対数表示
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)
log_power = librosa.amplitude_to_db(np.abs(D), ref=np.max)
librosa.display.specshow(log_power, x_axis="time", y_axis="log")
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x1c3344cef0>
C = librosa.cqt(y, sr)
# STFTと比較して縦軸の周波数データ数は圧倒的に少ない
# STFTは周波数間隔が等幅なのに対してCQTだと対数間隔となるため
print(D.shape)
print(C.shape)
(1025, 2647) (84, 2647)
fmin = 32.70 # C1に相当するHz
fmax = fmin * ((2**(1/12)) ** 84)
print(fmax)
print(librosa.hz_to_note(fmin))
print(librosa.hz_to_note(fmax))
4185.600000000015 C1 C8
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)
log_cqt_power = librosa.amplitude_to_db(np.abs(C), ref=np.max)
librosa.display.specshow(log_cqt_power, x_axis="time", y_axis="cqt_hz")
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x115933cf8>
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)
log_cqt_power = librosa.amplitude_to_db(np.abs(C), ref=np.max)
librosa.display.specshow(log_cqt_power, x_axis="time", y_axis="cqt_note")
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x11589d518>
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)
# デフォルトだと80dBまでだがtop_dbを付けるとレンジが縮まる
log_cqt_power = librosa.amplitude_to_db(np.abs(C), ref=np.max, top_db=40)
librosa.display.specshow(log_cqt_power, x_axis="time", y_axis="cqt_note")
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x1c1f823518>
librosa.feature.chroma_cqt
chroma = librosa.feature.chroma_cqt(C=C, sr=sr)
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)
librosa.display.specshow(chroma, x_axis="time", y_axis="chroma")
plt.colorbar()
/Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages/librosa/display.py:657: UserWarning: Trying to display complex-valued input. Showing magnitude instead. warnings.warn('Trying to display complex-valued input. '
<matplotlib.colorbar.Colorbar at 0x1159a3128>
M = librosa.feature.melspectrogram(y=y, sr=sr)
mfcc = librosa.feature.mfcc(y=y, sr=sr)
tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
librosa.effects
モジュール¶librosa.hpss
librosa.time_stretch
librosa.pitch_shift
y_harmonic, y_percussive = librosa.effects.hpss(y)
IPython.display.Audio(y, rate=sr)
IPython.display.Audio(y_harmonic, rate=sr)
IPython.display.Audio(y_percussive, rate=sr)
# それぞれに対するCQTを求める
C = librosa.cqt(y=y, sr=sr)
C_harm = librosa.cqt(y=y_harmonic, sr=sr)
C_perc = librosa.cqt(y=y_percussive, sr=sr)
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)
log_cqt_power= librosa.amplitude_to_db(np.abs(C), ref=np.max)
log_cqt_harm = librosa.amplitude_to_db(np.abs(C_harm), ref=np.max(np.abs(C)))
log_cqt_perc = librosa.amplitude_to_db(np.abs(C_perc), ref=np.max(np.abs(C)))
ax = fig.add_subplot(3,1,1); librosa.display.specshow(log_cqt_power, x_axis="time", y_axis="cqt_hz")
ax = fig.add_subplot(3,1,2); librosa.display.specshow(log_cqt_harm , x_axis="time", y_axis="cqt_hz")
ax = fig.add_subplot(3,1,3); librosa.display.specshow(log_cqt_perc , x_axis="time", y_axis="cqt_hz")
<matplotlib.axes._subplots.AxesSubplot at 0x1139ae2e8>
# 2倍の速度に時間伸縮
y_fast = librosa.effects.time_stretch(y, 2.0)
IPython.display.Audio(y_fast, rate=sr)
# 3度上にピッチシフト
y_third = librosa.effects.pitch_shift(y, sr, n_steps=4)
IPython.display.Audio(y_third, rate=sr)
# 減五度にピッチシフト
y_tritone = librosa.effects.pitch_shift(y, sr, n_steps=-6)
IPython.display.Audio(y_tritone, rate=sr)
librosa.onset
モジュール、librosa.beat
モジュールを使用onset.onset_strength
で算出onset.onset_detect
で検出beat.beat_track
でテンポとビートを推定onset_envelope = librosa.onset.onset_strength(y, sr)
onsets = librosa.onset.onset_detect(onset_envelope=onset_envelope)
fig = plt.figure(1, figsize=(12,8))
ax = fig.add_subplot(2, 1, 1)
ax.plot(onset_envelope, label="onset strength")
ax.vlines(onsets, 0, np.max(onset_envelope), color="red", alpha=0.25, label="onsets")
ax.set_xlim([0, len(onset_envelope)]); ax.legend(loc=1)
ax = fig.add_subplot(2, 1, 2)
librosa.display.waveplot(y, sr)
<matplotlib.collections.PolyCollection at 0x113aa6c88>
tempo, beats = librosa.beat.beat_track(onset_envelope=onset_envelope)
print(tempo)
129.19921875
fig = plt.figure(1, figsize=(12,8))
ax = fig.add_subplot(1, 1, 1)
ax.plot(onset_envelope, label="onset strength")
ax.vlines(onsets, 0, 0.8 * np.max(onset_envelope), color="red", alpha=0.5, linestyle="dotted", label="onsets")
ax.vlines(beats, 0, np.max(onset_envelope), color="black", alpha=0.25, label="beats")
ax.set_xlim([0, len(onset_envelope)]); ax.legend(loc=1)
<matplotlib.legend.Legend at 0x114fc1128>
mir_eval
ライブラリを使用!pip install mir_eval
Requirement already satisfied: mir_eval in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (0.4) Requirement already satisfied: six in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from mir_eval) (1.11.0) Requirement already satisfied: numpy>=1.7.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from mir_eval) (1.14.2) Requirement already satisfied: scipy>=0.9.0 in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from mir_eval) (1.1.0) Requirement already satisfied: future in /Users/wrist/.pyenv/versions/miniconda3-4.3.30/envs/py36/lib/python3.6/site-packages (from mir_eval) (0.16.0)
# http://colinraffel.com/publications/ismir2014mir_eval.pdf
import mir_eval
beat_times = librosa.frames_to_time(beats)
y_click = mir_eval.sonify.clicks(beat_times, sr, length=len(y))
IPython.display.Audio(y_click, rate=sr)
IPython.display.Audio(y + y_click, rate=sr)
librosa.util.sync
を用いるとビートとその他特徴量を同期させることができるtempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)
# 13次元のMFCCおよびΔを抽出
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
mfcc_delta = librosa.feature.delta(mfcc)
print(beat_frames.shape)
print(mfcc.shape)
print(mfcc_delta.shape)
(98,) (13, 2647) (13, 2647)
# ビート間の平均値を用いてMFCCとΔMFCCをビートに同期
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]),
beat_frames)
beat_mfcc_delta.shape
(26, 99)
# 調波成分からクロマベクトルを推定
chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
# ビート間の中央値を用いてクロマベクトルをビートに同期
beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate=np.median)
# ビート同期特徴量(クロマベクトル、MFCC、ΔMFCC)を結合
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])
# MFCC 13dim, ΔMFCC 13dim, クロマベクトル(12音階; 12dim)
beat_features.shape
(38, 99)
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(211)
librosa.display.specshow(log_cqt_power, x_axis="time", y_axis="cqt_hz")
log_beat_features = librosa.amplitude_to_db(np.abs(beat_features), ref=np.max)
ax = fig.add_subplot(212)
ax.pcolormesh(log_beat_features)
<matplotlib.collections.QuadMesh at 0x113a0ac50>
librosa.decompose
による音源分離¶scikit-learn
のNMFに投げるD = librosa.stft(y)
S, phase = librosa.magphase(D)
comps, acts = librosa.decompose.decompose(S, n_components=8, sort=True)
S_approx = comps @ acts
print(comps.shape)
print(acts.shape)
print(S.shape, S_approx.shape)
(1025, 8) (8, 2647) (1025, 2647) (1025, 2647)
plot_decomposed(S, comps, acts, S_approx)
S_partly = comps[:, :3] @ acts[:3, :]
plot_decomposed(S, comps[:, :3], acts[:3, :], S_partly)
y_partly = librosa.istft(S_partly * phase)
IPython.display.Audio(y_partly, rate=sr)