Merge pull request 'voice recognition with whisper' (#40) from YasinOnm08/ai-virtual-assistant:main into main

Reviewed-on: https://interstellardevelopment.org/code/code/React-Group/ai-virtual-assistant/pulls/40
2024-09-18 09:19:51 +02:00 · 2024-09-18 09:19:51 +02:00 · de12ebfa03
commit de12ebfa03
parent b0ff4c3f2c 06f263b417
1 changed files with 58 additions and 28 deletions
--- a/py/voice_recognition.py
+++ b/py/voice_recognition.py
@ -1,31 +1,61 @@
-import speech_recognition as sr
+#pip install faster-whisper
 from api import API
 import os
 import wave
 from faster_whisper import WhisperModel
 import pyaudio
-class Voice: #create Class
+def transcribe_chunk(model, file_path):
-    @staticmethod
+    print("transcribing...")
-    def listen():   #define function listen()
+    segments, _ = model.transcribe(file_path)
-        recognizer = sr.Recognizer()
+    transcription = ""
    for segment in segments:
        transcription += segment.text + " "
    return transcription.strip()
-        try:
+#Record voice
-            with sr.Microphone() as source:
+#chunk_length = duration to record in seconds
-                print("Adjusting for ambient noise...")
+def record_chunk(p, stream, file_path, chunk_length=2):
-                recognizer.adjust_for_ambient_noise(source, duration=0.5) #listen to surrounding for .5sec to adjust backgroundnoise
+    frames=[]
-                print("Listening...")
+    for _ in range(0, int(16000/1024 * chunk_length)):
-                audio_data = recognizer.listen(source) #listen to user until user stops speaking
+        data = stream.read(1024)
-                print("Audio captured")
+        frames.append(data)
                try:
                    text = recognizer.recognize_sphinx(audio_data)  # Using Sphinx convert audio to text (also works offline)
                #if any Exceptions or Errors eccur => return ERROR
                except sr.UnknownValueError:
                    text = "ERROR"
                except sr.RequestError as e:
                    text = "ERROR"
-        except sr.RequestError as e:
+    wf = wave.open(file_path, 'wb')
-            text = "ERROR"
+    wf.setnchannels(1)
-        except sr.UnknownValueError:
+    wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
-            text = "ERROR"
+    wf.setframerate(16000)
-        except Exception as e:
+    wf.writeframes(b''.join(frames))
-            text = "ERROR"
+    wf.close()
        return text
 def listen():
    #model settings (tiny, base, small, medium, large)
    model_size = "medium"
    #what should it run on (cpu or cuda for gpu)
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate = 16000, input = True,  frames_per_buffer=1024)
    try:
        while True:
            print("Recording...")
            # CHANGE TEMP FILE PATH
            chunk_file="temp_chunk.wav"
            record_chunk(p, stream, chunk_file)
            transcription = transcribe_chunk(model, chunk_file)
            print(transcription)
            try:
                return transcription
            except Exception as e:
                return "ERROR"
            finally:
                if os.path.exists(chunk_file):
                    os.remove(chunk_file)
            break
    except KeyboardInterrupt:
        print("Stopping...")
    finally:
        stream.stop_stream()
        stream.close()
        p.terminate()