import pandas as pd
import numpy as np
import plotly.express as px
import librosa as lb
from scipy.signal.windows import tukey
from pathlib import Path
from bacpipe import settings
[docs]
class SpectrogramPlot:
def __init__(self, audio_dir, loader, model_name, panel_static_text, **kwargs):
self.audio_dir = audio_dir
self.panel_static_text = panel_static_text
self.all_sample_rates = {}
self.all_segment_lengths = {}
for model in model_name.options:
loader.get_data(model, 'time_of_day')
metadata = loader.embeds[model]['metadata']
self.all_sample_rates[model] = metadata['sample_rate (Hz)']
self.all_segment_lengths[model] = metadata['segment_length (samples)']
self.kwargs = kwargs
def _update_spec_obj(self, model, bool_autoplay_audio):
self.model_name = model
self.sample_rate = self.all_sample_rates[model]
self.segment_length = self.all_segment_lengths[model]
self.bool_autoplay_audio = bool_autoplay_audio
def _cache_selected_points(self, selected_points):
self.selected_points = selected_points
[docs]
@staticmethod
def dummy_image(title):
# initial dummy figure, as a placeholder
fig = px.imshow(np.zeros((100, 100, 3), dtype=np.uint8))
fig.update_layout(
title=title,
margin=dict(l=20, r=20, t=40, b=20),
height=settings.spectrogram_plot_height,
xaxis={'visible': False},
yaxis={'visible': False}
)
return fig
[docs]
def update_spectrogram(
self, clickData=None, play_btn=None, autoplay_radio=None
):
# Sohw black image initially
if not clickData:
return SpectrogramPlot.dummy_image(
"Click an embedding to see the corresponding spectrogram"
)
# Extract data from click
point_data = clickData.get('customdata', [None]*6)
audiofilename, start_s, end_s, idx, label, label_id = point_data
# Load Audio
audio, file_stem = self.load_audio(start_s, end_s, audiofilename)
spec_fig = self.create_specs(audio)
self.update_text(start_s, end_s, audiofilename, label)
return spec_fig
[docs]
def update_text(self, start_s, end_s, audiofilename, label):
self.panel_static_text.visible=True
self.panel_static_text.value = f"""
<b>model sample rate</b> = {self.sample_rate} Hz;
<b>model segment_length</b> = {self.segment_length} samples; <br>
<b>filename</b> = {audiofilename}; <br>
<b>offset</b> = {start_s} s;
<b>duration</b> = {end_s - start_s} s;
<b>label</b> = {label}
"""
[docs]
def create_specs(self, audio):
S = np.abs(lb.stft(audio, win_length=1024))
S_dB = lb.amplitude_to_db(S, ref=np.max)
f_max, S_dB = self.set_axis_lims_dep_sr(S_dB)
fig = px.imshow(
S_dB, origin='lower', aspect='auto',
y=np.linspace(
0,
f_max,
S_dB.shape[0]
),
x=np.linspace(
0,
self.segment_length / self.sample_rate,
S_dB.shape[1]
),
labels={'x': 'time (s)', 'y': 'freq (Hz)'},
color_continuous_scale=self.kwargs.get('spec_colorscale'),
)
fig.update_layout(
height=self.kwargs.get('spectrogram_plot_height'),
margin=dict(l=20, r=20, t=20, b=20)
)
return fig
[docs]
def play_audio(self, event):
import sounddevice as sd
if not hasattr(self, 'audio'):
return
audio = tukey(len(self.audio), alpha=0.01) * self.audio
sd.play(audio, self.sample_rate)#int(self.orig_sr / self.kwargs.get('slowdown_rate')))
[docs]
def load_audio(self, start, end, filename):
path = Path(self.audio_dir) / filename
if not self.kwargs.get('bool_slowdown'):
audio, self.orig_sr = lb.load(
path,
sr=self.sample_rate,
offset=float(start),
duration=float(end)-float(start)
)
else:
audio, self.orig_sr = lb.load(
path,
sr=None,
offset=float(start / self.kwargs.get('slowdown_rate')),
duration=(
float(end / self.kwargs.get('slowdown_rate'))
- float(start / self.kwargs.get('slowdown_rate'))
)
)
audio = lb.resample(
audio,
orig_sr=int(self.orig_sr / self.kwargs.get('slowdown_rate')),
target_sr=self.sample_rate
)
if (
(float(end)-float(start)) * self.sample_rate < self.segment_length
):
audio = tukey(len(audio), alpha=0.01) * audio
return_audio = lb.util.fix_length(
audio,
size=self.segment_length,
mode=self.kwargs['padding']
)
else:
return_audio = audio
self.audio = return_audio
return return_audio, path.stem
[docs]
def set_axis_lims_dep_sr(self, S_dB):
f_max = self.sample_rate / 2
reduce = self.sample_rate / (f_max * 2)
S_dB = S_dB[:int(S_dB.shape[0] / reduce), :]
return f_max, S_dB