Source code for bacpipe.model_pipelines.feature_extractors.birdnet

import tensorflow as tf
import keras
import pandas as pd
import numpy as np

SAMPLE_RATE = 48000
LENGTH_IN_SAMPLES = 144000

from ..model_utils import ModelBaseClass


[docs] class Model(ModelBaseClass): def __init__(self, **kwargs): super().__init__(sr=SAMPLE_RATE, segment_length=LENGTH_IN_SAMPLES, **kwargs) if tf.__version__ == '2.15.1': self.model = tf.keras.models.load_model( self.model_base_path / "birdnet/birdnet_tf215", compile=False ) else: self.model = tf.keras.models.load_model( self.model_base_path / "birdnet/birdnetv2.4.keras", compile=False ) loaded_preprocessor = tf.saved_model.load( self.model_base_path / "birdnet/BirdNET_Preprocessor", ) self.preprocessor = lambda x: ( loaded_preprocessor.signatures['serving_default'](x)['concatenate'] ) all_classes = pd.read_csv( self.model_utils_base_path / "birdnet/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt", header=None, ) self.classes = [s.split("_")[-1] for s in all_classes.values.squeeze()] self.embeds = tf.keras.Model( inputs=self.model.input, outputs=self.model.layers[-3].output, name="embeddings_model" ) x = keras.Input(shape=self.model.layers[-3].output.shape[1:]) y = self.model.layers[-2](x) y = self.model.layers[-1](y) self.classifier = tf.keras.Model(x, y, name="classifier_model")
[docs] def preprocess(self, audio): audio = audio.cpu() for idx in range(0, audio.shape[0], 511): if idx == 0: processed = self.preprocessor(tf.convert_to_tensor(audio[:511], dtype=tf.float32)).numpy() else: processed = np.vstack([ processed, self.preprocessor(tf.convert_to_tensor(audio[idx:idx+511], dtype=tf.float32)).numpy() ]) return tf.convert_to_tensor(processed, dtype=tf.float32)
def __call__(self, input): return self.embeds(input, training=False)
[docs] def classifier_predictions(self, embeddings): logits = self.classifier(embeddings).numpy() return tf.nn.sigmoid(logits).numpy()
[docs] class Rebuilder: def __init__(self, model): self.input_layer = tf.keras.Input(shape=model.layers[4].input.shape[1:], name='inputs', dtype=tf.float32) # we're starting at layer 4 because the MelSpecLayerSimple classes wont be deserializable by keras 3.11 # but we don't need them because they are just part of the preprocessing anyway # So essentially we're starting after the concatenation of the two spectrograms self.layer_map = {layer.name: layer for layer in model.layers[4:]} self.output_cache = {} # cache outputs to stop infinite recursion self.layer_confs = []
[docs] def rebuild_layer(self, layer): if layer.name in self.output_cache: return self.output_cache[layer.name] layer_config = { "name": layer.name, "class_name": layer.__class__.__name__, "config": layer.get_config(), "inbound_nodes": [] } if 'axis' in layer_config['config']: if not isinstance(layer_config['config']['axis'], int): layer_config['config']['axis'] = layer_config['config']['axis'][0] # Handle multiple inputs inbound = [] if isinstance(layer.input, list): inputs = [] for inp in layer.input: inbound.append(inp._keras_history.layer.name) inputs.append(self.rebuild_layer(inp._keras_history.layer)) else: inp = layer.input inbound.append(inp._keras_history.layer.name) if inp.name.startswith("concat") or inp.name.startswith("INPUT"): layer_config['inbound_nodes'] = inbound out = layer(self.input_layer) self.output_cache[layer.name] = out self.layer_confs.append(layer_config) return out inputs = [self.rebuild_layer(inp._keras_history.layer)] layer_config['inbound_nodes'] = inbound out = layer(inputs if len(inputs) > 1 else inputs[0]) self.output_cache[layer.name] = out self.layer_confs.append(layer_config) return out
[docs] def build_model(self, model): # Recurse from final output layer output = self.rebuild_layer(model.layers[-1]) return ( { "input_shape": self.input_layer.shape, "layer_confs": self.layer_confs }, keras.Model(self.input_layer, output, name="rebuilt_model") )