sageTheDM
91353bd051
Reviewed-on: https://interstellardevelopment.org/code/code/React-Group/interstellar_ai/pulls/137 Reviewed-by: Patrick <patrick_pluto@noreply.localhost> Co-authored-by: sageTheDM <info@photofuel.tech> Co-committed-by: sageTheDM <info@photofuel.tech>
33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
import io
|
|
from faster_whisper import WhisperModel
|
|
from pydub import AudioSegment
|
|
|
|
|
|
class VoiceRecognition:
|
|
@staticmethod
|
|
def recognition(audio):
|
|
# Read the audio file into a BytesIO buffer
|
|
audio_buffer = io.BytesIO(audio.read())
|
|
|
|
# Load the audio file using pydub
|
|
audio_segment = AudioSegment.from_file(audio_buffer, format="ogg")
|
|
|
|
# Export the audio to a WAV format in a BytesIO buffer
|
|
wav_io = io.BytesIO()
|
|
audio_segment.export(wav_io, format="wav")
|
|
wav_io.seek(0) # Reset the buffer pointer to the start
|
|
|
|
# Load the Whisper model
|
|
model_size = "base" # Specify the model size
|
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
|
|
# Transcribe the audio
|
|
segments, _ = model.transcribe(wav_io)
|
|
transcription = ""
|
|
|
|
# Combine the transcribed segments into a single string
|
|
for segment in segments:
|
|
transcription += segment.text + " "
|
|
|
|
result = transcription.strip() # Strip any leading/trailing whitespace
|
|
return result
|