voice recognition with whisper #40
					 1 changed files with 58 additions and 28 deletions
				
			
		|  | @ -1,31 +1,61 @@ | |||
| import speech_recognition as sr | ||||
| #pip install faster-whisper | ||||
| from api import API | ||||
| import os | ||||
| import wave | ||||
| from faster_whisper import WhisperModel | ||||
| import pyaudio | ||||
| 
 | ||||
| class Voice: #create Class | ||||
|     @staticmethod | ||||
|     def listen():   #define function listen() | ||||
|         recognizer = sr.Recognizer() | ||||
| def transcribe_chunk(model, file_path): | ||||
|     print("transcribing...") | ||||
|     segments, _ = model.transcribe(file_path) | ||||
|     transcription = "" | ||||
|     for segment in segments: | ||||
|         transcription += segment.text + " " | ||||
|     return transcription.strip() | ||||
| 
 | ||||
|         try: | ||||
|             with sr.Microphone() as source: | ||||
|                 print("Adjusting for ambient noise...") | ||||
|                 recognizer.adjust_for_ambient_noise(source, duration=0.5) #listen to surrounding for .5sec to adjust backgroundnoise | ||||
|                 print("Listening...") | ||||
|                 audio_data = recognizer.listen(source) #listen to user until user stops speaking | ||||
|                 print("Audio captured") | ||||
|                 try: | ||||
|                     text = recognizer.recognize_sphinx(audio_data)  # Using Sphinx convert audio to text (also works offline) | ||||
|                 #if any Exceptions or Errors eccur => return ERROR | ||||
|                 except sr.UnknownValueError: | ||||
|                     text = "ERROR" | ||||
|                 except sr.RequestError as e: | ||||
|                     text = "ERROR" | ||||
| #Record voice | ||||
| #chunk_length = duration to record in seconds | ||||
| def record_chunk(p, stream, file_path, chunk_length=2): | ||||
|     frames=[] | ||||
|     for _ in range(0, int(16000/1024 * chunk_length)): | ||||
|         data = stream.read(1024) | ||||
|         frames.append(data) | ||||
| 
 | ||||
|         except sr.RequestError as e: | ||||
|             text = "ERROR" | ||||
|         except sr.UnknownValueError: | ||||
|             text = "ERROR" | ||||
|         except Exception as e: | ||||
|             text = "ERROR" | ||||
|         return text | ||||
|     wf = wave.open(file_path, 'wb') | ||||
|     wf.setnchannels(1) | ||||
|     wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) | ||||
|     wf.setframerate(16000) | ||||
|     wf.writeframes(b''.join(frames)) | ||||
|     wf.close() | ||||
|      | ||||
| def listen(): | ||||
|     #model settings (tiny, base, small, medium, large) | ||||
|     model_size = "medium" | ||||
|     #what should it run on (cpu or cuda for gpu) | ||||
|     model = WhisperModel(model_size, device="cpu", compute_type="int8") | ||||
|      | ||||
|     p = pyaudio.PyAudio() | ||||
|     stream = p.open(format=pyaudio.paInt16, channels=1, rate = 16000, input = True,  frames_per_buffer=1024) | ||||
|      | ||||
|     try: | ||||
|         while True: | ||||
|             print("Recording...") | ||||
|             # CHANGE TEMP FILE PATH | ||||
|             chunk_file="temp_chunk.wav" | ||||
|             record_chunk(p, stream, chunk_file) | ||||
|             transcription = transcribe_chunk(model, chunk_file) | ||||
|             print(transcription) | ||||
|             try: | ||||
|                 return transcription | ||||
|             except Exception as e: | ||||
|                 return "ERROR" | ||||
|             finally: | ||||
|                 if os.path.exists(chunk_file): | ||||
|                     os.remove(chunk_file) | ||||
|             break | ||||
|     except KeyboardInterrupt: | ||||
|         print("Stopping...") | ||||
|     finally: | ||||
|         stream.stop_stream() | ||||
|         stream.close() | ||||
|         p.terminate() | ||||
		Reference in a new issue