voice.py comments

This commit is contained in:
sageTheDM 2024-10-11 10:14:31 +02:00
parent d47ef91162
commit 770fca579e

View file

@ -6,20 +6,28 @@ from pydub import AudioSegment
class VoiceRecognition: class VoiceRecognition:
@staticmethod @staticmethod
def recognition(audio): def recognition(audio):
# Read the audio file into a BytesIO buffer
audio_buffer = io.BytesIO(audio.read()) audio_buffer = io.BytesIO(audio.read())
# Load the audio file using pydub
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg") audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
# Export the audio to a WAV format in a BytesIO buffer
wav_io = io.BytesIO() wav_io = io.BytesIO()
audio_segment.export(wav_io, format="wav") audio_segment.export(wav_io, format="wav")
wav_io.seek(0) wav_io.seek(0) # Reset the buffer pointer to the start
model_size = "base" # Load the Whisper model
model_size = "base" # Specify the model size
model = WhisperModel(model_size, device="cpu", compute_type="int8") model = WhisperModel(model_size, device="cpu", compute_type="int8")
# Transcribe the audio
segments, _ = model.transcribe(wav_io) segments, _ = model.transcribe(wav_io)
transcription = "" transcription = ""
# Combine the transcribed segments into a single string
for segment in segments: for segment in segments:
transcription += segment.text + " " transcription += segment.text + " "
result = transcription.strip()
result = transcription.strip() # Strip any leading/trailing whitespace
return result return result