left4me/speech-recognition/left4dead_voices
mwiegand a4f87b0d88
wip
2022-09-11 14:49:01 +02:00

29 lines
841 B
Python
Executable file

#!/usr/bin/python3
# download and put in folder 'audio': https://cloud.sublimity.de/s/2QP6PAWGTK57iSt
# unpack and rename folder to 'model': https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip
from vosk import Model, KaldiRecognizer
import sys
import json
import os
from glob import glob
from random import random
import wave
model = Model('model')
for path in sorted(glob('audio/**/*.wav', recursive= True), key=lambda k: random()):
with wave.open(path, "rb") as wav:
frame_rate = wav.getframerate()
rec = KaldiRecognizer(model, 44100)
while True:
data = wav.readframes(2000)
if len(data) == 0:
break
else:
rec.AcceptWaveform(data)
print(f'{path} ({frame_rate}):'.ljust(70, ' '), json.loads(rec.FinalResult())['text'])