interstellar_ai/py/voice.py

import io
from faster_whisper import WhisperModel
from pydub import AudioSegment


class VoiceRecognition:
    @staticmethod
    def recognition(audio):
        # Read the audio file into a BytesIO buffer
        audio_buffer = io.BytesIO(audio.read())

        # Load the audio file using pydub
        audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")

        # Export the audio to a WAV format in a BytesIO buffer
        wav_io = io.BytesIO()
        audio_segment.export(wav_io, format="wav")
        wav_io.seek(0)  # Reset the buffer pointer to the start

        # Load the Whisper model
        model_size = "base"  # Specify the model size
        model = WhisperModel(model_size, device="cpu", compute_type="int8")

        # Transcribe the audio
        segments, _ = model.transcribe(wav_io)
        transcription = ""

        # Combine the transcribed segments into a single string
        for segment in segments:
            transcription += segment.text + " "
        
        result = transcription.strip()  # Strip any leading/trailing whitespace
        return result
voice recognition trial 5 2024-09-26 13:08:29 +02:00			`import io`
voice recognition kinda works?? 2024-09-27 13:59:27 +02:00			`from faster_whisper import WhisperModel`
voice recognition trial 5 2024-09-26 13:08:29 +02:00			`from pydub import AudioSegment`
google-help-me 2024-09-24 09:55:23 +02:00

			`class VoiceRecognition:`
whisper is better 2024-09-26 11:01:15 +02:00			`@staticmethod`
			`def recognition(audio):`
voice.py comments 2024-10-11 10:14:31 +02:00			`# Read the audio file into a BytesIO buffer`
voice recognition kinda works?? 2024-09-27 13:59:27 +02:00			`audio_buffer = io.BytesIO(audio.read())`
Hello World 2024-09-30 11:47:58 +02:00
voice.py comments 2024-10-11 10:14:31 +02:00			`# Load the audio file using pydub`
Hello World 2024-09-30 11:47:58 +02:00			`audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")`

voice.py comments 2024-10-11 10:14:31 +02:00			`# Export the audio to a WAV format in a BytesIO buffer`
Hello World 2024-09-30 11:47:58 +02:00			`wav_io = io.BytesIO()`
			`audio_segment.export(wav_io, format="wav")`
voice.py comments 2024-10-11 10:14:31 +02:00			`wav_io.seek(0) # Reset the buffer pointer to the start`
Hello World 2024-09-30 11:47:58 +02:00
voice.py comments 2024-10-11 10:14:31 +02:00			`# Load the Whisper model`
			`model_size = "base" # Specify the model size`
Hello World 2024-09-30 11:47:58 +02:00			`model = WhisperModel(model_size, device="cpu", compute_type="int8")`

voice.py comments 2024-10-11 10:14:31 +02:00			`# Transcribe the audio`
voice recognition kinda works?? 2024-09-27 13:59:27 +02:00			`segments, _ = model.transcribe(wav_io)`
			`transcription = ""`
voice.py comments 2024-10-11 10:14:31 +02:00
			`# Combine the transcribed segments into a single string`
voice recognition kinda works?? 2024-09-27 13:59:27 +02:00			`for segment in segments:`
			`transcription += segment.text + " "`
voice.py comments 2024-10-11 10:14:31 +02:00
			`result = transcription.strip() # Strip any leading/trailing whitespace`
voice recognition kinda works?? 2024-09-27 13:59:27 +02:00			`return result`