wip
This commit is contained in:
parent
f99412c537
commit
bf16f740cd
1 changed files with 76 additions and 91 deletions
|
@ -1,107 +1,92 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
|
||||||
Erkennt 211 Hz + 422 Hz (Oberton) in WAV-Dateien.
|
|
||||||
Speichert WAV + PNG nur bei Erkennung.
|
|
||||||
Blockiert Folgetreffer für definierte Zeit (SKIP_SECONDS).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import soundfile as sf
|
|
||||||
from scipy.fft import fft, fftfreq
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import os
|
import os
|
||||||
|
import datetime
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import soundfile as sf
|
||||||
|
import scipy.signal
|
||||||
|
from datetime import datetime
|
||||||
|
import shutil
|
||||||
|
|
||||||
# === Konfiguration ===
|
OUTDIR = "chunks_unprocessed"
|
||||||
FILENAME = "1b.flac"
|
PROCESSED_DIR = "chunks_processed"
|
||||||
TARGET_FREQ = 211
|
|
||||||
OCTAVE_FREQ = TARGET_FREQ * 2
|
|
||||||
TOLERANCE = 1
|
|
||||||
THRESHOLD_BASE = 0.3
|
|
||||||
THRESHOLD_OCT = THRESHOLD_BASE / 10
|
|
||||||
CHUNK_SECONDS = 2
|
|
||||||
CLIP_PADDING_BEFORE = 2
|
|
||||||
CLIP_PADDING_AFTER = 8
|
|
||||||
SKIP_SECONDS = 10
|
|
||||||
OUTDIR = "events"
|
|
||||||
|
|
||||||
os.makedirs(OUTDIR, exist_ok=True)
|
def process_chunk(filename):
|
||||||
|
input_path = os.path.join(OUTDIR, filename)
|
||||||
|
print(f"🔍 Verarbeite {input_path}...")
|
||||||
|
|
||||||
# === WAV/Audio-Datei laden ===
|
# Frequenzanalyse und Event-Erkennung
|
||||||
data, rate = sf.read(FILENAME, dtype='float32')
|
data, samplerate = sf.read(input_path)
|
||||||
if data.ndim > 1:
|
if data.ndim > 1:
|
||||||
data = data.mean(axis=1)
|
data = data[:, 0] # nur Kanal 1
|
||||||
|
|
||||||
samples_per_chunk = int(rate * CHUNK_SECONDS)
|
CHUNK_SECONDS = 1
|
||||||
total_chunks = len(data) // samples_per_chunk
|
TOLERANCE = 1
|
||||||
|
THRESHOLD_BASE = 0.5
|
||||||
|
THRESHOLD_OCT = THRESHOLD_BASE / 10
|
||||||
|
CLIP_PADDING_BEFORE = 1
|
||||||
|
CLIP_PADDING_AFTER = 6
|
||||||
|
TARGET_FREQ = 211
|
||||||
|
OVERTONE_FREQ = TARGET_FREQ * 2
|
||||||
|
NFFT = 32768
|
||||||
|
SKIP_SECONDS = 10
|
||||||
|
|
||||||
detections = []
|
chunk_samples = int(CHUNK_SECONDS * samplerate)
|
||||||
next_allowed_time = 0 # für Skip-Logik
|
skip_samples = int(SKIP_SECONDS * samplerate)
|
||||||
|
padding_before = int(CLIP_PADDING_BEFORE * samplerate)
|
||||||
|
padding_after = int(CLIP_PADDING_AFTER * samplerate)
|
||||||
|
|
||||||
# === Analyse-Loop ===
|
def detect_event(chunk):
|
||||||
for i in range(total_chunks):
|
freqs, times, Sxx = scipy.signal.spectrogram(chunk, samplerate, nperseg=NFFT)
|
||||||
timestamp = i * CHUNK_SECONDS
|
idx_base = np.where((freqs >= TARGET_FREQ - TOLERANCE) & (freqs <= TARGET_FREQ + TOLERANCE))[0]
|
||||||
if timestamp < next_allowed_time:
|
idx_oct = np.where((freqs >= OVERTONE_FREQ - TOLERANCE) & (freqs <= OVERTONE_FREQ + TOLERANCE))[0]
|
||||||
continue
|
if len(idx_base) == 0 or len(idx_oct) == 0:
|
||||||
|
return False
|
||||||
|
base_energy = np.mean(Sxx[idx_base])
|
||||||
|
oct_energy = np.mean(Sxx[idx_oct])
|
||||||
|
total_energy = np.mean(Sxx, axis=0).max()
|
||||||
|
return base_energy > THRESHOLD_BASE * total_energy and oct_energy > THRESHOLD_OCT * total_energy
|
||||||
|
|
||||||
segment = data[i * samples_per_chunk : (i + 1) * samples_per_chunk]
|
i = 0
|
||||||
if len(segment) == 0:
|
last_event = -skip_samples
|
||||||
continue
|
while i + chunk_samples <= len(data):
|
||||||
|
chunk = data[i:i+chunk_samples]
|
||||||
|
if i - last_event >= skip_samples and detect_event(chunk):
|
||||||
|
clip_start = max(0, i - padding_before)
|
||||||
|
clip_end = min(len(data), i + chunk_samples + padding_after)
|
||||||
|
clip = data[clip_start:clip_end]
|
||||||
|
|
||||||
freqs = fftfreq(len(segment), d=1/rate)
|
event_time = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
fft_vals = np.abs(fft(segment))
|
base_name = os.path.splitext(filename)[0]
|
||||||
|
wav_out = os.path.join(PROCESSED_DIR, f"{base_name}_{event_time}.wav")
|
||||||
|
png_out = os.path.join(PROCESSED_DIR, f"{base_name}_{event_time}.png")
|
||||||
|
sf.write(wav_out, clip, samplerate)
|
||||||
|
|
||||||
pos_mask = freqs > 0
|
plt.figure()
|
||||||
freqs = freqs[pos_mask]
|
plt.specgram(clip, Fs=samplerate, NFFT=NFFT, noverlap=NFFT//2, cmap='inferno', vmin=-90, vmax=-20)
|
||||||
fft_vals = fft_vals[pos_mask]
|
plt.title(f"Spectrogram: {base_name}_{event_time}")
|
||||||
|
plt.xlabel("Time (s)")
|
||||||
|
plt.ylabel("Frequency (Hz)")
|
||||||
|
plt.colorbar(label="dB")
|
||||||
|
plt.savefig(png_out)
|
||||||
|
plt.close()
|
||||||
|
|
||||||
peak_freq = freqs[np.argmax(fft_vals)]
|
print(f"🎯 Ereignis erkannt bei {event_time}, gespeichert: {wav_out}, {png_out}")
|
||||||
peak_mag = np.max(fft_vals)
|
last_event = i
|
||||||
|
i += skip_samples
|
||||||
|
else:
|
||||||
|
i += chunk_samples
|
||||||
|
|
||||||
# Energien normiert
|
# Datei verschieben
|
||||||
mask_base = (freqs >= TARGET_FREQ - TOLERANCE) & (freqs <= TARGET_FREQ + TOLERANCE)
|
output_path = os.path.join(PROCESSED_DIR, filename)
|
||||||
energy_base = np.mean(fft_vals[mask_base]) / peak_mag
|
shutil.move(input_path, output_path)
|
||||||
|
print(f"✅ Verschoben nach {output_path}")
|
||||||
|
|
||||||
mask_oct = (freqs >= OCTAVE_FREQ - TOLERANCE) & (freqs <= OCTAVE_FREQ + TOLERANCE)
|
|
||||||
energy_oct = np.mean(fft_vals[mask_oct]) / peak_mag
|
|
||||||
|
|
||||||
is_peak_near_target = TARGET_FREQ - TOLERANCE <= peak_freq <= TARGET_FREQ + TOLERANCE
|
def main():
|
||||||
detected = is_peak_near_target and energy_base > THRESHOLD_BASE and energy_oct > THRESHOLD_OCT
|
for filename in os.listdir(OUTDIR):
|
||||||
|
if filename.endswith(".flac"):
|
||||||
|
process_chunk(filename)
|
||||||
|
|
||||||
if detected:
|
if __name__ == "__main__":
|
||||||
detections.append((timestamp, round(energy_base, 4), round(energy_oct, 4), round(peak_freq, 2)))
|
main()
|
||||||
next_allowed_time = timestamp + SKIP_SECONDS
|
|
||||||
|
|
||||||
# Ausschnitt extrahieren
|
|
||||||
start = max(0, int((timestamp - CLIP_PADDING_BEFORE) * rate))
|
|
||||||
end = min(len(data), int((timestamp + CLIP_PADDING_AFTER) * rate))
|
|
||||||
clip = (data[start:end] * 32767).astype(np.int16)
|
|
||||||
|
|
||||||
base_filename = os.path.join(OUTDIR, f"event_{int(timestamp):04}s")
|
|
||||||
wav_name = f"{base_filename}.wav"
|
|
||||||
png_name = f"{base_filename}.png"
|
|
||||||
|
|
||||||
# WAV speichern
|
|
||||||
sf.write(wav_name, clip, rate, subtype="PCM_24")
|
|
||||||
print(f"🟢 WAV gespeichert: {wav_name} (211Hz: {energy_base:.4f}, 422Hz: {energy_oct:.4f}, Peak: {peak_freq:.1f} Hz)")
|
|
||||||
|
|
||||||
# PNG Spektrogramm
|
|
||||||
plt.figure(figsize=(10, 4))
|
|
||||||
# Verstärke das Signal künstlich, um schwache Ereignisse im dB-Spektrum sichtbarer zu machen
|
|
||||||
plt.specgram((clip / 32767.0), NFFT=32768, Fs=rate, noverlap=512, cmap="plasma", vmin=-80, vmax=-35)
|
|
||||||
plt.title(f"Ereignis @ {timestamp:.2f}s")
|
|
||||||
plt.xlabel("Zeit (s)")
|
|
||||||
plt.ylabel("Frequenz (Hz)")
|
|
||||||
plt.ylim(0, 1000)
|
|
||||||
plt.colorbar(label="Intensität (dB)")
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(png_name)
|
|
||||||
plt.close()
|
|
||||||
print(f"📷 PNG gespeichert: {png_name}")
|
|
||||||
|
|
||||||
# === Zusammenfassung ===
|
|
||||||
print("\n🎯 Erkennungen:")
|
|
||||||
for ts, eb, eo, pf in detections:
|
|
||||||
print(f"- {ts:.2f}s | 211Hz: {eb} | 422Hz: {eo} | Peak: {pf:.1f} Hz")
|
|
||||||
|
|
||||||
if not detections:
|
|
||||||
print("→ Keine gültigen Ereignisse erkannt.")
|
|
Loading…
Reference in a new issue