voice recognition with whisper
This commit is contained in:
parent
c85e981fce
commit
06f263b417
1 changed files with 58 additions and 28 deletions
|
@ -1,31 +1,61 @@
|
|||
import speech_recognition as sr
|
||||
#pip install faster-whisper
|
||||
from api import API
|
||||
import os
|
||||
import wave
|
||||
from faster_whisper import WhisperModel
|
||||
import pyaudio
|
||||
|
||||
class Voice: #create Class
|
||||
@staticmethod
|
||||
def listen(): #define function listen()
|
||||
recognizer = sr.Recognizer()
|
||||
def transcribe_chunk(model, file_path):
|
||||
print("transcribing...")
|
||||
segments, _ = model.transcribe(file_path)
|
||||
transcription = ""
|
||||
for segment in segments:
|
||||
transcription += segment.text + " "
|
||||
return transcription.strip()
|
||||
|
||||
try:
|
||||
with sr.Microphone() as source:
|
||||
print("Adjusting for ambient noise...")
|
||||
recognizer.adjust_for_ambient_noise(source, duration=0.5) #listen to surrounding for .5sec to adjust backgroundnoise
|
||||
print("Listening...")
|
||||
audio_data = recognizer.listen(source) #listen to user until user stops speaking
|
||||
print("Audio captured")
|
||||
try:
|
||||
text = recognizer.recognize_sphinx(audio_data) # Using Sphinx convert audio to text (also works offline)
|
||||
#if any Exceptions or Errors eccur => return ERROR
|
||||
except sr.UnknownValueError:
|
||||
text = "ERROR"
|
||||
except sr.RequestError as e:
|
||||
text = "ERROR"
|
||||
#Record voice
|
||||
#chunk_length = duration to record in seconds
|
||||
def record_chunk(p, stream, file_path, chunk_length=2):
|
||||
frames=[]
|
||||
for _ in range(0, int(16000/1024 * chunk_length)):
|
||||
data = stream.read(1024)
|
||||
frames.append(data)
|
||||
|
||||
except sr.RequestError as e:
|
||||
text = "ERROR"
|
||||
except sr.UnknownValueError:
|
||||
text = "ERROR"
|
||||
except Exception as e:
|
||||
text = "ERROR"
|
||||
return text
|
||||
wf = wave.open(file_path, 'wb')
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
|
||||
wf.setframerate(16000)
|
||||
wf.writeframes(b''.join(frames))
|
||||
wf.close()
|
||||
|
||||
def listen():
|
||||
#model settings (tiny, base, small, medium, large)
|
||||
model_size = "medium"
|
||||
#what should it run on (cpu or cuda for gpu)
|
||||
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||
|
||||
p = pyaudio.PyAudio()
|
||||
stream = p.open(format=pyaudio.paInt16, channels=1, rate = 16000, input = True, frames_per_buffer=1024)
|
||||
|
||||
try:
|
||||
while True:
|
||||
print("Recording...")
|
||||
# CHANGE TEMP FILE PATH
|
||||
chunk_file="temp_chunk.wav"
|
||||
record_chunk(p, stream, chunk_file)
|
||||
transcription = transcribe_chunk(model, chunk_file)
|
||||
print(transcription)
|
||||
try:
|
||||
return transcription
|
||||
except Exception as e:
|
||||
return "ERROR"
|
||||
finally:
|
||||
if os.path.exists(chunk_file):
|
||||
os.remove(chunk_file)
|
||||
break
|
||||
except KeyboardInterrupt:
|
||||
print("Stopping...")
|
||||
finally:
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
p.terminate()
|
Loading…
Reference in a new issue