This commit is contained in:
CroneKorkN 2025-05-30 19:12:51 +02:00
parent f99412c537
commit bf16f740cd
Signed by: cronekorkn
SSH key fingerprint: SHA256:v0410ZKfuO1QHdgKBsdQNF64xmTxOF8osF1LIqwTcVw

View file

@ -1,107 +1,92 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""
Erkennt 211 Hz + 422 Hz (Oberton) in WAV-Dateien.
Speichert WAV + PNG nur bei Erkennung.
Blockiert Folgetreffer für definierte Zeit (SKIP_SECONDS).
"""
import numpy as np
import soundfile as sf
from scipy.fft import fft, fftfreq
import matplotlib.pyplot as plt
import os import os
import datetime
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
import scipy.signal
from datetime import datetime
import shutil
# === Konfiguration === OUTDIR = "chunks_unprocessed"
FILENAME = "1b.flac" PROCESSED_DIR = "chunks_processed"
TARGET_FREQ = 211
OCTAVE_FREQ = TARGET_FREQ * 2
TOLERANCE = 1
THRESHOLD_BASE = 0.3
THRESHOLD_OCT = THRESHOLD_BASE / 10
CHUNK_SECONDS = 2
CLIP_PADDING_BEFORE = 2
CLIP_PADDING_AFTER = 8
SKIP_SECONDS = 10
OUTDIR = "events"
os.makedirs(OUTDIR, exist_ok=True) def process_chunk(filename):
input_path = os.path.join(OUTDIR, filename)
print(f"🔍 Verarbeite {input_path}...")
# === WAV/Audio-Datei laden === # Frequenzanalyse und Event-Erkennung
data, rate = sf.read(FILENAME, dtype='float32') data, samplerate = sf.read(input_path)
if data.ndim > 1: if data.ndim > 1:
data = data.mean(axis=1) data = data[:, 0] # nur Kanal 1
samples_per_chunk = int(rate * CHUNK_SECONDS) CHUNK_SECONDS = 1
total_chunks = len(data) // samples_per_chunk TOLERANCE = 1
THRESHOLD_BASE = 0.5
THRESHOLD_OCT = THRESHOLD_BASE / 10
CLIP_PADDING_BEFORE = 1
CLIP_PADDING_AFTER = 6
TARGET_FREQ = 211
OVERTONE_FREQ = TARGET_FREQ * 2
NFFT = 32768
SKIP_SECONDS = 10
detections = [] chunk_samples = int(CHUNK_SECONDS * samplerate)
next_allowed_time = 0 # für Skip-Logik skip_samples = int(SKIP_SECONDS * samplerate)
padding_before = int(CLIP_PADDING_BEFORE * samplerate)
padding_after = int(CLIP_PADDING_AFTER * samplerate)
# === Analyse-Loop === def detect_event(chunk):
for i in range(total_chunks): freqs, times, Sxx = scipy.signal.spectrogram(chunk, samplerate, nperseg=NFFT)
timestamp = i * CHUNK_SECONDS idx_base = np.where((freqs >= TARGET_FREQ - TOLERANCE) & (freqs <= TARGET_FREQ + TOLERANCE))[0]
if timestamp < next_allowed_time: idx_oct = np.where((freqs >= OVERTONE_FREQ - TOLERANCE) & (freqs <= OVERTONE_FREQ + TOLERANCE))[0]
continue if len(idx_base) == 0 or len(idx_oct) == 0:
return False
base_energy = np.mean(Sxx[idx_base])
oct_energy = np.mean(Sxx[idx_oct])
total_energy = np.mean(Sxx, axis=0).max()
return base_energy > THRESHOLD_BASE * total_energy and oct_energy > THRESHOLD_OCT * total_energy
segment = data[i * samples_per_chunk : (i + 1) * samples_per_chunk] i = 0
if len(segment) == 0: last_event = -skip_samples
continue while i + chunk_samples <= len(data):
chunk = data[i:i+chunk_samples]
if i - last_event >= skip_samples and detect_event(chunk):
clip_start = max(0, i - padding_before)
clip_end = min(len(data), i + chunk_samples + padding_after)
clip = data[clip_start:clip_end]
freqs = fftfreq(len(segment), d=1/rate) event_time = datetime.now().strftime("%Y%m%d-%H%M%S")
fft_vals = np.abs(fft(segment)) base_name = os.path.splitext(filename)[0]
wav_out = os.path.join(PROCESSED_DIR, f"{base_name}_{event_time}.wav")
png_out = os.path.join(PROCESSED_DIR, f"{base_name}_{event_time}.png")
sf.write(wav_out, clip, samplerate)
pos_mask = freqs > 0 plt.figure()
freqs = freqs[pos_mask] plt.specgram(clip, Fs=samplerate, NFFT=NFFT, noverlap=NFFT//2, cmap='inferno', vmin=-90, vmax=-20)
fft_vals = fft_vals[pos_mask] plt.title(f"Spectrogram: {base_name}_{event_time}")
plt.xlabel("Time (s)")
peak_freq = freqs[np.argmax(fft_vals)] plt.ylabel("Frequency (Hz)")
peak_mag = np.max(fft_vals) plt.colorbar(label="dB")
plt.savefig(png_out)
# Energien normiert
mask_base = (freqs >= TARGET_FREQ - TOLERANCE) & (freqs <= TARGET_FREQ + TOLERANCE)
energy_base = np.mean(fft_vals[mask_base]) / peak_mag
mask_oct = (freqs >= OCTAVE_FREQ - TOLERANCE) & (freqs <= OCTAVE_FREQ + TOLERANCE)
energy_oct = np.mean(fft_vals[mask_oct]) / peak_mag
is_peak_near_target = TARGET_FREQ - TOLERANCE <= peak_freq <= TARGET_FREQ + TOLERANCE
detected = is_peak_near_target and energy_base > THRESHOLD_BASE and energy_oct > THRESHOLD_OCT
if detected:
detections.append((timestamp, round(energy_base, 4), round(energy_oct, 4), round(peak_freq, 2)))
next_allowed_time = timestamp + SKIP_SECONDS
# Ausschnitt extrahieren
start = max(0, int((timestamp - CLIP_PADDING_BEFORE) * rate))
end = min(len(data), int((timestamp + CLIP_PADDING_AFTER) * rate))
clip = (data[start:end] * 32767).astype(np.int16)
base_filename = os.path.join(OUTDIR, f"event_{int(timestamp):04}s")
wav_name = f"{base_filename}.wav"
png_name = f"{base_filename}.png"
# WAV speichern
sf.write(wav_name, clip, rate, subtype="PCM_24")
print(f"🟢 WAV gespeichert: {wav_name} (211Hz: {energy_base:.4f}, 422Hz: {energy_oct:.4f}, Peak: {peak_freq:.1f} Hz)")
# PNG Spektrogramm
plt.figure(figsize=(10, 4))
# Verstärke das Signal künstlich, um schwache Ereignisse im dB-Spektrum sichtbarer zu machen
plt.specgram((clip / 32767.0), NFFT=32768, Fs=rate, noverlap=512, cmap="plasma", vmin=-80, vmax=-35)
plt.title(f"Ereignis @ {timestamp:.2f}s")
plt.xlabel("Zeit (s)")
plt.ylabel("Frequenz (Hz)")
plt.ylim(0, 1000)
plt.colorbar(label="Intensität (dB)")
plt.tight_layout()
plt.savefig(png_name)
plt.close() plt.close()
print(f"📷 PNG gespeichert: {png_name}")
# === Zusammenfassung === print(f"🎯 Ereignis erkannt bei {event_time}, gespeichert: {wav_out}, {png_out}")
print("\n🎯 Erkennungen:") last_event = i
for ts, eb, eo, pf in detections: i += skip_samples
print(f"- {ts:.2f}s | 211Hz: {eb} | 422Hz: {eo} | Peak: {pf:.1f} Hz") else:
i += chunk_samples
if not detections: # Datei verschieben
print("→ Keine gültigen Ereignisse erkannt.") output_path = os.path.join(PROCESSED_DIR, filename)
shutil.move(input_path, output_path)
print(f"✅ Verschoben nach {output_path}")
def main():
for filename in os.listdir(OUTDIR):
if filename.endswith(".flac"):
process_chunk(filename)
if __name__ == "__main__":
main()