#pip install faster-whisper from api import API import os import wave from faster_whisper import WhisperModel import pyaudio def transcribe_chunk(model, file_path): print("transcribing...") segments, _ = model.transcribe(file_path) transcription = "" for segment in segments: transcription += segment.text + " " return transcription.strip() #Record voice #chunk_length = duration to record in seconds def record_chunk(p, stream, file_path, chunk_length=2): frames=[] for _ in range(0, int(16000/1024 * chunk_length)): data = stream.read(1024) frames.append(data) wf = wave.open(file_path, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) wf.setframerate(16000) wf.writeframes(b''.join(frames)) wf.close() def listen(): #model settings (tiny, base, small, medium, large) model_size = "medium" #what should it run on (cpu or cuda for gpu) model = WhisperModel(model_size, device="cpu", compute_type="int8") p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate = 16000, input = True, frames_per_buffer=1024) try: while True: print("Recording...") # CHANGE TEMP FILE PATH chunk_file="temp_chunk.wav" record_chunk(p, stream, chunk_file) transcription = transcribe_chunk(model, chunk_file) print(transcription) try: return transcription except Exception as e: return "ERROR" finally: if os.path.exists(chunk_file): os.remove(chunk_file) break except KeyboardInterrupt: print("Stopping...") finally: stream.stop_stream() stream.close() p.terminate()