interstellar_ai/py/voice.py
2024-09-30 11:47:58 +02:00

25 lines
704 B
Python

import io
from faster_whisper import WhisperModel
from pydub import AudioSegment
class VoiceRecognition:
@staticmethod
def recognition(audio):
audio_buffer = io.BytesIO(audio.read())
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
wav_io = io.BytesIO()
audio_segment.export(wav_io, format="wav")
wav_io.seek(0)
model_size = "base"
model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, _ = model.transcribe(wav_io)
transcription = ""
for segment in segments:
transcription += segment.text + " "
result = transcription.strip()
return result