From 770fca579e3f3abf4e851922d7ca2aa5af333924 Mon Sep 17 00:00:00 2001 From: sageTheDM Date: Fri, 11 Oct 2024 10:14:31 +0200 Subject: [PATCH] voice.py comments --- py/voice.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/py/voice.py b/py/voice.py index 8aeb3e0..536407f 100644 --- a/py/voice.py +++ b/py/voice.py @@ -6,20 +6,28 @@ from pydub import AudioSegment class VoiceRecognition: @staticmethod def recognition(audio): + # Read the audio file into a BytesIO buffer audio_buffer = io.BytesIO(audio.read()) + # Load the audio file using pydub audio_segment = AudioSegment.from_file(audio_buffer, format="ogg") + # Export the audio to a WAV format in a BytesIO buffer wav_io = io.BytesIO() audio_segment.export(wav_io, format="wav") - wav_io.seek(0) + wav_io.seek(0) # Reset the buffer pointer to the start - model_size = "base" + # Load the Whisper model + model_size = "base" # Specify the model size model = WhisperModel(model_size, device="cpu", compute_type="int8") + # Transcribe the audio segments, _ = model.transcribe(wav_io) transcription = "" + + # Combine the transcribed segments into a single string for segment in segments: transcription += segment.text + " " - result = transcription.strip() + + result = transcription.strip() # Strip any leading/trailing whitespace return result