voice.py comments

2024-10-11 10:14:31 +02:00 · 2024-10-11 10:14:31 +02:00 · 770fca579e
commit 770fca579e
parent d47ef91162
1 changed files with 11 additions and 3 deletions
--- a/py/voice.py
+++ b/py/voice.py
@ -6,20 +6,28 @@ from pydub import AudioSegment
 class VoiceRecognition:
    @staticmethod
    def recognition(audio):
+        # Read the audio file into a BytesIO buffer
        audio_buffer = io.BytesIO(audio.read())

+        # Load the audio file using pydub
        audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")

+        # Export the audio to a WAV format in a BytesIO buffer
        wav_io = io.BytesIO()
        audio_segment.export(wav_io, format="wav")
-        wav_io.seek(0)
+        wav_io.seek(0)  # Reset the buffer pointer to the start

-        model_size = "base"
+        # Load the Whisper model
+        model_size = "base"  # Specify the model size
        model = WhisperModel(model_size, device="cpu", compute_type="int8")

+        # Transcribe the audio
        segments, _ = model.transcribe(wav_io)
        transcription = ""
+
+        # Combine the transcribed segments into a single string
        for segment in segments:
            transcription += segment.text + " "
-        result = transcription.strip()
+        
+        result = transcription.strip()  # Strip any leading/trailing whitespace
        return result