Source code for bacpipe.model_pipelines.feature_extractors.birdnet

import tensorflow as tf
import keras
import pandas as pd
import numpy as np

SAMPLE_RATE = 48000
LENGTH_IN_SAMPLES = 144000

from ..model_utils import ModelBaseClass



[docs]
class Model(ModelBaseClass):

    def __init__(self, **kwargs):
        super().__init__(sr=SAMPLE_RATE, segment_length=LENGTH_IN_SAMPLES, **kwargs)
        if tf.__version__ == '2.15.1':
            self.model = tf.keras.models.load_model(
                self.model_base_path / "birdnet/birdnet_tf215", compile=False
            )
        else:
            self.model = tf.keras.models.load_model(
                self.model_base_path / "birdnet/birdnetv2.4.keras", compile=False
            )
        
        loaded_preprocessor = tf.saved_model.load(
            self.model_base_path / "birdnet/BirdNET_Preprocessor",
        )
        self.preprocessor = lambda x: (
            loaded_preprocessor.signatures['serving_default'](x)['concatenate']
            )
        
        all_classes = pd.read_csv(
            self.model_utils_base_path /
            "birdnet/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt",
            header=None,
        )
        self.classes = [s.split("_")[-1] for s in all_classes.values.squeeze()]
        
        self.embeds = tf.keras.Model(
            inputs=self.model.input,
            outputs=self.model.layers[-3].output,
            name="embeddings_model"
        )
        
        x = keras.Input(shape=self.model.layers[-3].output.shape[1:])
        y = self.model.layers[-2](x)
        y = self.model.layers[-1](y)
        self.classifier = tf.keras.Model(x, y, name="classifier_model")


[docs]
    def preprocess(self, audio):
        audio = audio.cpu()
        for idx in range(0, audio.shape[0], 511):
            if idx == 0:
                processed = self.preprocessor(tf.convert_to_tensor(audio[:511], 
                                                                   dtype=tf.float32)).numpy()
            else:
                processed = np.vstack([
                    processed,
                    self.preprocessor(tf.convert_to_tensor(audio[idx:idx+511], 
                                                        dtype=tf.float32)).numpy()
                    ])
        return tf.convert_to_tensor(processed, dtype=tf.float32)


    def __call__(self, input):
        return self.embeds(input, training=False)


[docs]
    def classifier_predictions(self, embeddings):
        logits = self.classifier(embeddings).numpy()
        return tf.nn.sigmoid(logits).numpy()







[docs]
class Rebuilder:
    def __init__(self, model):
        self.input_layer = tf.keras.Input(shape=model.layers[4].input.shape[1:], name='inputs', dtype=tf.float32)
        # we're starting at layer 4 because the MelSpecLayerSimple classes wont be deserializable by keras 3.11
        # but we don't need them because they are just part of the preprocessing anyway
        # So essentially we're starting after the concatenation of the two spectrograms
        self.layer_map = {layer.name: layer for layer in model.layers[4:]} 
        self.output_cache = {}  # cache outputs to stop infinite recursion
        self.layer_confs = []


[docs]
    def rebuild_layer(self, layer):
        if layer.name in self.output_cache:
            return self.output_cache[layer.name]
        
        layer_config = {
            "name": layer.name,
            "class_name": layer.__class__.__name__,
            "config": layer.get_config(),
            "inbound_nodes": []
        }
        
        if 'axis' in layer_config['config']:
            if not isinstance(layer_config['config']['axis'], int):
                layer_config['config']['axis'] = layer_config['config']['axis'][0]

        # Handle multiple inputs
        inbound = []
        if isinstance(layer.input, list):
            inputs = []
            for inp in layer.input:
                inbound.append(inp._keras_history.layer.name)
                inputs.append(self.rebuild_layer(inp._keras_history.layer))
        else:
            inp = layer.input
            inbound.append(inp._keras_history.layer.name)
            if inp.name.startswith("concat") or inp.name.startswith("INPUT"):
                layer_config['inbound_nodes'] = inbound
                
                out = layer(self.input_layer)
                self.output_cache[layer.name] = out
                self.layer_confs.append(layer_config)
                return out
            inputs = [self.rebuild_layer(inp._keras_history.layer)]
        layer_config['inbound_nodes'] = inbound
        
        out = layer(inputs if len(inputs) > 1 else inputs[0])
        self.output_cache[layer.name] = out
        self.layer_confs.append(layer_config)
        return out



[docs]
    def build_model(self, model):
        # Recurse from final output layer
        output = self.rebuild_layer(model.layers[-1])
        return (
            {
                "input_shape": self.input_layer.shape,
                "layer_confs": self.layer_confs
                },
            keras.Model(self.input_layer, output, name="rebuilt_model")
        )