diff --git a/process_chunk.py b/process_chunk.py deleted file mode 100755 index b187c9e..0000000 --- a/process_chunk.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python3 -""" -Erkennt 211 Hz + 422 Hz (Oberton) in WAV-Dateien. -Speichert WAV + PNG nur bei Erkennung. -Blockiert Folgetreffer für definierte Zeit (SKIP_SECONDS). -""" - -import numpy as np -import soundfile as sf -from scipy.fft import fft, fftfreq -import matplotlib.pyplot as plt -import os - -# === Konfiguration === -FILENAME = "1b.flac" -TARGET_FREQ = 211 -OCTAVE_FREQ = TARGET_FREQ * 2 -TOLERANCE = 1 -THRESHOLD_BASE = 0.3 -THRESHOLD_OCT = THRESHOLD_BASE / 10 -CHUNK_SECONDS = 2 -CLIP_PADDING_BEFORE = 2 -CLIP_PADDING_AFTER = 8 -SKIP_SECONDS = 10 -OUTDIR = "events" - -os.makedirs(OUTDIR, exist_ok=True) - -# === WAV/Audio-Datei laden === -data, rate = sf.read(FILENAME, dtype='float32') -if data.ndim > 1: - data = data.mean(axis=1) - -samples_per_chunk = int(rate * CHUNK_SECONDS) -total_chunks = len(data) // samples_per_chunk - -detections = [] -next_allowed_time = 0 # für Skip-Logik - -# === Analyse-Loop === -for i in range(total_chunks): - timestamp = i * CHUNK_SECONDS - if timestamp < next_allowed_time: - continue - - segment = data[i * samples_per_chunk : (i + 1) * samples_per_chunk] - if len(segment) == 0: - continue - - freqs = fftfreq(len(segment), d=1/rate) - fft_vals = np.abs(fft(segment)) - - pos_mask = freqs > 0 - freqs = freqs[pos_mask] - fft_vals = fft_vals[pos_mask] - - peak_freq = freqs[np.argmax(fft_vals)] - peak_mag = np.max(fft_vals) - - # Energien normiert - mask_base = (freqs >= TARGET_FREQ - TOLERANCE) & (freqs <= TARGET_FREQ + TOLERANCE) - energy_base = np.mean(fft_vals[mask_base]) / peak_mag - - mask_oct = (freqs >= OCTAVE_FREQ - TOLERANCE) & (freqs <= OCTAVE_FREQ + TOLERANCE) - energy_oct = np.mean(fft_vals[mask_oct]) / peak_mag - - is_peak_near_target = TARGET_FREQ - TOLERANCE <= peak_freq <= TARGET_FREQ + TOLERANCE - detected = is_peak_near_target and energy_base > THRESHOLD_BASE and energy_oct > THRESHOLD_OCT - - if detected: - detections.append((timestamp, round(energy_base, 4), round(energy_oct, 4), round(peak_freq, 2))) - next_allowed_time = timestamp + SKIP_SECONDS - - # Ausschnitt extrahieren - start = max(0, int((timestamp - CLIP_PADDING_BEFORE) * rate)) - end = min(len(data), int((timestamp + CLIP_PADDING_AFTER) * rate)) - clip = (data[start:end] * 32767).astype(np.int16) - - base_filename = os.path.join(OUTDIR, f"event_{int(timestamp):04}s") - wav_name = f"{base_filename}.wav" - png_name = f"{base_filename}.png" - - # WAV speichern - sf.write(wav_name, clip, rate, subtype="PCM_24") - print(f"🟢 WAV gespeichert: {wav_name} (211Hz: {energy_base:.4f}, 422Hz: {energy_oct:.4f}, Peak: {peak_freq:.1f} Hz)") - - # PNG Spektrogramm - plt.figure(figsize=(10, 4)) - # Verstärke das Signal künstlich, um schwache Ereignisse im dB-Spektrum sichtbarer zu machen - plt.specgram((clip / 32767.0), NFFT=32768, Fs=rate, noverlap=512, cmap="plasma", vmin=-80, vmax=-35) - plt.title(f"Ereignis @ {timestamp:.2f}s") - plt.xlabel("Zeit (s)") - plt.ylabel("Frequenz (Hz)") - plt.ylim(0, 1000) - plt.colorbar(label="Intensität (dB)") - plt.tight_layout() - plt.savefig(png_name) - plt.close() - print(f"📷 PNG gespeichert: {png_name}") - -# === Zusammenfassung === -print("\n🎯 Erkennungen:") -for ts, eb, eo, pf in detections: - print(f"- {ts:.2f}s | 211Hz: {eb} | 422Hz: {eo} | Peak: {pf:.1f} Hz") - -if not detections: - print("→ Keine gültigen Ereignisse erkannt.") \ No newline at end of file