import io import numpy as np from faster_whisper import WhisperModel from pydub import AudioSegment class VoiceRecognition: @staticmethod def recognition(audio): audio_buffer = io.BytesIO(audio.read()) try: audio_segment = AudioSegment.from_file(audio_buffer, format="ogg") wav_io = io.BytesIO() audio_segment.export(wav_io, format="wav") wav_io.seek(0) except: print("audio to wav failed") model_size = "base" model = WhisperModel(model_size, device="cpu", compute_type="int8") segments, _ = model.transcribe(wav_io) transcription = "" for segment in segments: transcription += segment.text + " " result = transcription.strip() print(result) return result # npm install @ffmpeg/ffmpeg @ffmpeg/util @ffmpeg/types @ffmpeg/core-mt