61 lines
No EOL
1.8 KiB
Python
61 lines
No EOL
1.8 KiB
Python
#pip install faster-whisper
|
|
from api import API
|
|
import os
|
|
import wave
|
|
from faster_whisper import WhisperModel
|
|
import pyaudio
|
|
|
|
def transcribe_chunk(model, file_path):
|
|
print("transcribing...")
|
|
segments, _ = model.transcribe(file_path)
|
|
transcription = ""
|
|
for segment in segments:
|
|
transcription += segment.text + " "
|
|
return transcription.strip()
|
|
|
|
#Record voice
|
|
#chunk_length = duration to record in seconds
|
|
def record_chunk(p, stream, file_path, chunk_length=2):
|
|
frames=[]
|
|
for _ in range(0, int(16000/1024 * chunk_length)):
|
|
data = stream.read(1024)
|
|
frames.append(data)
|
|
|
|
wf = wave.open(file_path, 'wb')
|
|
wf.setnchannels(1)
|
|
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
|
|
wf.setframerate(16000)
|
|
wf.writeframes(b''.join(frames))
|
|
wf.close()
|
|
|
|
def listen():
|
|
#model settings (tiny, base, small, medium, large)
|
|
model_size = "medium"
|
|
#what should it run on (cpu or cuda for gpu)
|
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
|
|
p = pyaudio.PyAudio()
|
|
stream = p.open(format=pyaudio.paInt16, channels=1, rate = 16000, input = True, frames_per_buffer=1024)
|
|
|
|
try:
|
|
while True:
|
|
print("Recording...")
|
|
# CHANGE TEMP FILE PATH
|
|
chunk_file="temp_chunk.wav"
|
|
record_chunk(p, stream, chunk_file)
|
|
transcription = transcribe_chunk(model, chunk_file)
|
|
print(transcription)
|
|
try:
|
|
return transcription
|
|
except Exception as e:
|
|
return "ERROR"
|
|
finally:
|
|
if os.path.exists(chunk_file):
|
|
os.remove(chunk_file)
|
|
break
|
|
except KeyboardInterrupt:
|
|
print("Stopping...")
|
|
finally:
|
|
stream.stop_stream()
|
|
stream.close()
|
|
p.terminate() |