From 38faeedee1a2cae9e42360ccd21112509cd92497 Mon Sep 17 00:00:00 2001 From: CroneKorkN Date: Fri, 30 May 2025 19:33:56 +0200 Subject: [PATCH] wip --- process_chunk.py | 107 ++++++++++++++++++++++++++++++++++++++++++++++ process_chunks.py | 9 +++- 2 files changed, 115 insertions(+), 1 deletion(-) create mode 100755 process_chunk.py diff --git a/process_chunk.py b/process_chunk.py new file mode 100755 index 0000000..b187c9e --- /dev/null +++ b/process_chunk.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +""" +Erkennt 211 Hz + 422 Hz (Oberton) in WAV-Dateien. +Speichert WAV + PNG nur bei Erkennung. +Blockiert Folgetreffer für definierte Zeit (SKIP_SECONDS). +""" + +import numpy as np +import soundfile as sf +from scipy.fft import fft, fftfreq +import matplotlib.pyplot as plt +import os + +# === Konfiguration === +FILENAME = "1b.flac" +TARGET_FREQ = 211 +OCTAVE_FREQ = TARGET_FREQ * 2 +TOLERANCE = 1 +THRESHOLD_BASE = 0.3 +THRESHOLD_OCT = THRESHOLD_BASE / 10 +CHUNK_SECONDS = 2 +CLIP_PADDING_BEFORE = 2 +CLIP_PADDING_AFTER = 8 +SKIP_SECONDS = 10 +OUTDIR = "events" + +os.makedirs(OUTDIR, exist_ok=True) + +# === WAV/Audio-Datei laden === +data, rate = sf.read(FILENAME, dtype='float32') +if data.ndim > 1: + data = data.mean(axis=1) + +samples_per_chunk = int(rate * CHUNK_SECONDS) +total_chunks = len(data) // samples_per_chunk + +detections = [] +next_allowed_time = 0 # für Skip-Logik + +# === Analyse-Loop === +for i in range(total_chunks): + timestamp = i * CHUNK_SECONDS + if timestamp < next_allowed_time: + continue + + segment = data[i * samples_per_chunk : (i + 1) * samples_per_chunk] + if len(segment) == 0: + continue + + freqs = fftfreq(len(segment), d=1/rate) + fft_vals = np.abs(fft(segment)) + + pos_mask = freqs > 0 + freqs = freqs[pos_mask] + fft_vals = fft_vals[pos_mask] + + peak_freq = freqs[np.argmax(fft_vals)] + peak_mag = np.max(fft_vals) + + # Energien normiert + mask_base = (freqs >= TARGET_FREQ - TOLERANCE) & (freqs <= TARGET_FREQ + TOLERANCE) + energy_base = np.mean(fft_vals[mask_base]) / peak_mag + + mask_oct = (freqs >= OCTAVE_FREQ - TOLERANCE) & (freqs <= OCTAVE_FREQ + TOLERANCE) + energy_oct = np.mean(fft_vals[mask_oct]) / peak_mag + + is_peak_near_target = TARGET_FREQ - TOLERANCE <= peak_freq <= TARGET_FREQ + TOLERANCE + detected = is_peak_near_target and energy_base > THRESHOLD_BASE and energy_oct > THRESHOLD_OCT + + if detected: + detections.append((timestamp, round(energy_base, 4), round(energy_oct, 4), round(peak_freq, 2))) + next_allowed_time = timestamp + SKIP_SECONDS + + # Ausschnitt extrahieren + start = max(0, int((timestamp - CLIP_PADDING_BEFORE) * rate)) + end = min(len(data), int((timestamp + CLIP_PADDING_AFTER) * rate)) + clip = (data[start:end] * 32767).astype(np.int16) + + base_filename = os.path.join(OUTDIR, f"event_{int(timestamp):04}s") + wav_name = f"{base_filename}.wav" + png_name = f"{base_filename}.png" + + # WAV speichern + sf.write(wav_name, clip, rate, subtype="PCM_24") + print(f"🟢 WAV gespeichert: {wav_name} (211Hz: {energy_base:.4f}, 422Hz: {energy_oct:.4f}, Peak: {peak_freq:.1f} Hz)") + + # PNG Spektrogramm + plt.figure(figsize=(10, 4)) + # Verstärke das Signal künstlich, um schwache Ereignisse im dB-Spektrum sichtbarer zu machen + plt.specgram((clip / 32767.0), NFFT=32768, Fs=rate, noverlap=512, cmap="plasma", vmin=-80, vmax=-35) + plt.title(f"Ereignis @ {timestamp:.2f}s") + plt.xlabel("Zeit (s)") + plt.ylabel("Frequenz (Hz)") + plt.ylim(0, 1000) + plt.colorbar(label="Intensität (dB)") + plt.tight_layout() + plt.savefig(png_name) + plt.close() + print(f"📷 PNG gespeichert: {png_name}") + +# === Zusammenfassung === +print("\n🎯 Erkennungen:") +for ts, eb, eo, pf in detections: + print(f"- {ts:.2f}s | 211Hz: {eb} | 422Hz: {eo} | Peak: {pf:.1f} Hz") + +if not detections: + print("→ Keine gültigen Ereignisse erkannt.") \ No newline at end of file diff --git a/process_chunks.py b/process_chunks.py index f45253e..fb91aec 100755 --- a/process_chunks.py +++ b/process_chunks.py @@ -5,6 +5,7 @@ import numpy as np import matplotlib.pyplot as plt import soundfile as sf import scipy.signal +from scipy.fft import fft, fftfreq from datetime import datetime import shutil @@ -31,7 +32,13 @@ def detect_event(chunk, samplerate): base_energy = np.mean(Sxx[idx_base]) oct_energy = np.mean(Sxx[idx_oct]) total_energy = np.mean(Sxx, axis=0).max() - return base_energy > THRESHOLD_BASE * total_energy and oct_energy > THRESHOLD_OCT * total_energy + + fft_vals = np.abs(fft(chunk)) + freqs = fftfreq(len(chunk), 1/samplerate) + peak_freq = freqs[np.argmax(fft_vals)] + is_peak_near_target = TARGET_FREQ - TOLERANCE <= peak_freq <= TARGET_FREQ + TOLERANCE + + return is_peak_near_target and base_energy > THRESHOLD_BASE * total_energy and oct_energy > THRESHOLD_OCT * total_energy def process_chunk(filename): input_path = os.path.join(INPUT_DIR, filename)