bootshorn_investigation/process_chunks.py

#!/usr/bin/env python3
import os
import concurrent.futures
import datetime
import numpy as np
import matplotlib.pyplot as plt
import soundfile
import scipy.signal
from scipy.fft import fft, fftfreq
import shutil

RECORDINGS_DIR = "recordings"
PROCESSED_RECORDINGS_DIR = "recordings/processed"
DETECTIONS_DIR = "events"
DETECT_FREQUENCY_FROM = 210
DETECT_FREQUENCY_TO = 212
CLIP_SECONDS = 1
THRESHOLD_BASE = 0.1
OCTAVE_FACTOR = 0.1
CLIP_PADDING_BEFORE = 1
CLIP_PADDING_AFTER = 6


def process_chunk(filename):
    print('processing', filename)
    path = os.path.join(RECORDINGS_DIR, filename)
    info = soundfile.info(path)
    samplerate = info.samplerate
    blocksize = int(CLIP_SECONDS * samplerate)

    print(info)

    for block in soundfile.blocks(path, blocksize=blocksize, overlap=0):
        strengths = fft(block)
        labels = fftfreq(len(block), d=1/samplerate)
        # get the frequency with the highest strength
        max_freq = labels[np.argmax(np.abs(strengths))]
        print(max_freq)

def main():
    os.makedirs(RECORDINGS_DIR, exist_ok=True)
    os.makedirs(PROCESSED_RECORDINGS_DIR, exist_ok=True)

    for file in os.listdir(RECORDINGS_DIR):
        if file.endswith(".flac"):
            process_chunk(file)


if __name__ == "__main__":
    main()