#!/usr/bin/env python3 """ Erkennt 211 Hz + 422 Hz (Oberton) in WAV-Dateien. Speichert WAV + PNG nur bei Erkennung. Blockiert Folgetreffer für definierte Zeit (SKIP_SECONDS). """ import numpy as np import soundfile as sf from scipy.fft import fft, fftfreq import matplotlib.pyplot as plt import os # === Konfiguration === FILENAME = "1b.flac" TARGET_FREQ = 211 OCTAVE_FREQ = TARGET_FREQ * 2 TOLERANCE = 1 THRESHOLD_BASE = 0.3 THRESHOLD_OCT = THRESHOLD_BASE / 10 CHUNK_SECONDS = 2 CLIP_PADDING_BEFORE = 2 CLIP_PADDING_AFTER = 8 SKIP_SECONDS = 10 OUTDIR = "events" os.makedirs(OUTDIR, exist_ok=True) # === WAV/Audio-Datei laden === data, rate = sf.read(FILENAME, dtype='float32') if data.ndim > 1: data = data.mean(axis=1) samples_per_chunk = int(rate * CHUNK_SECONDS) total_chunks = len(data) // samples_per_chunk detections = [] next_allowed_time = 0 # für Skip-Logik # === Analyse-Loop === for i in range(total_chunks): timestamp = i * CHUNK_SECONDS if timestamp < next_allowed_time: continue segment = data[i * samples_per_chunk : (i + 1) * samples_per_chunk] if len(segment) == 0: continue freqs = fftfreq(len(segment), d=1/rate) fft_vals = np.abs(fft(segment)) pos_mask = freqs > 0 freqs = freqs[pos_mask] fft_vals = fft_vals[pos_mask] peak_freq = freqs[np.argmax(fft_vals)] peak_mag = np.max(fft_vals) # Energien normiert mask_base = (freqs >= TARGET_FREQ - TOLERANCE) & (freqs <= TARGET_FREQ + TOLERANCE) energy_base = np.mean(fft_vals[mask_base]) / peak_mag mask_oct = (freqs >= OCTAVE_FREQ - TOLERANCE) & (freqs <= OCTAVE_FREQ + TOLERANCE) energy_oct = np.mean(fft_vals[mask_oct]) / peak_mag is_peak_near_target = TARGET_FREQ - TOLERANCE <= peak_freq <= TARGET_FREQ + TOLERANCE detected = is_peak_near_target and energy_base > THRESHOLD_BASE and energy_oct > THRESHOLD_OCT if detected: detections.append((timestamp, round(energy_base, 4), round(energy_oct, 4), round(peak_freq, 2))) next_allowed_time = timestamp + SKIP_SECONDS # Ausschnitt extrahieren start = max(0, int((timestamp - CLIP_PADDING_BEFORE) * rate)) end = min(len(data), int((timestamp + CLIP_PADDING_AFTER) * rate)) clip = (data[start:end] * 32767).astype(np.int16) base_filename = os.path.join(OUTDIR, f"event_{int(timestamp):04}s") wav_name = f"{base_filename}.wav" png_name = f"{base_filename}.png" # WAV speichern sf.write(wav_name, clip, rate, subtype="PCM_24") print(f"🟢 WAV gespeichert: {wav_name} (211Hz: {energy_base:.4f}, 422Hz: {energy_oct:.4f}, Peak: {peak_freq:.1f} Hz)") # PNG Spektrogramm plt.figure(figsize=(10, 4)) # Verstärke das Signal künstlich, um schwache Ereignisse im dB-Spektrum sichtbarer zu machen plt.specgram((clip / 32767.0), NFFT=32768, Fs=rate, noverlap=512, cmap="plasma", vmin=-80, vmax=-35) plt.title(f"Ereignis @ {timestamp:.2f}s") plt.xlabel("Zeit (s)") plt.ylabel("Frequenz (Hz)") plt.ylim(0, 1000) plt.colorbar(label="Intensität (dB)") plt.tight_layout() plt.savefig(png_name) plt.close() print(f"📷 PNG gespeichert: {png_name}") # === Zusammenfassung === print("\n🎯 Erkennungen:") for ts, eb, eo, pf in detections: print(f"- {ts:.2f}s | 211Hz: {eb} | 422Hz: {eo} | Peak: {pf:.1f} Hz") if not detections: print("→ Keine gültigen Ereignisse erkannt.")