2024-09-26 13:08:29 +02:00
|
|
|
import io
|
|
|
|
import numpy as np
|
2024-09-27 13:59:27 +02:00
|
|
|
from faster_whisper import WhisperModel
|
2024-09-26 13:08:29 +02:00
|
|
|
from pydub import AudioSegment
|
2024-09-24 09:55:23 +02:00
|
|
|
|
|
|
|
|
|
|
|
class VoiceRecognition:
|
2024-09-26 11:01:15 +02:00
|
|
|
@staticmethod
|
|
|
|
def recognition(audio):
|
2024-09-27 13:59:27 +02:00
|
|
|
audio_buffer = io.BytesIO(audio.read())
|
|
|
|
|
|
|
|
try:
|
|
|
|
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
2024-09-26 13:08:29 +02:00
|
|
|
|
2024-09-27 13:59:27 +02:00
|
|
|
wav_io = io.BytesIO()
|
|
|
|
audio_segment.export(wav_io, format="wav")
|
|
|
|
wav_io.seek(0)
|
|
|
|
except:
|
|
|
|
print("audio to wav failed")
|
|
|
|
|
|
|
|
model_size = "base"
|
|
|
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
2024-09-26 13:08:29 +02:00
|
|
|
|
2024-09-27 13:59:27 +02:00
|
|
|
segments, _ = model.transcribe(wav_io)
|
|
|
|
transcription = ""
|
|
|
|
for segment in segments:
|
|
|
|
transcription += segment.text + " "
|
|
|
|
result = transcription.strip()
|
|
|
|
print(result)
|
|
|
|
return result
|
2024-09-26 16:01:40 +02:00
|
|
|
|
|
|
|
# npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt
|