From 770fca579e3f3abf4e851922d7ca2aa5af333924 Mon Sep 17 00:00:00 2001
From: sageTheDM <info@photofuel.tech>
Date: Fri, 11 Oct 2024 10:14:31 +0200
Subject: [PATCH] voice.py comments

---
 py/voice.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/py/voice.py b/py/voice.py
index 8aeb3e0..536407f 100644
--- a/py/voice.py
+++ b/py/voice.py
@@ -6,20 +6,28 @@ from pydub import AudioSegment
 class VoiceRecognition:
     @staticmethod
     def recognition(audio):
+        # Read the audio file into a BytesIO buffer
         audio_buffer = io.BytesIO(audio.read())
 
+        # Load the audio file using pydub
         audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
 
+        # Export the audio to a WAV format in a BytesIO buffer
         wav_io = io.BytesIO()
         audio_segment.export(wav_io, format="wav")
-        wav_io.seek(0)
+        wav_io.seek(0)  # Reset the buffer pointer to the start
 
-        model_size = "base"
+        # Load the Whisper model
+        model_size = "base"  # Specify the model size
         model = WhisperModel(model_size, device="cpu", compute_type="int8")
 
+        # Transcribe the audio
         segments, _ = model.transcribe(wav_io)
         transcription = ""
+
+        # Combine the transcribed segments into a single string
         for segment in segments:
             transcription += segment.text + " "
-        result = transcription.strip()
+        
+        result = transcription.strip()  # Strip any leading/trailing whitespace
         return result