interstellar_ai/py/voice.py

45 lines
1.3 KiB
Python
Raw Normal View History

2024-09-24 09:55:23 +02:00
import speech_recognition as sr
2024-09-26 10:43:39 +02:00
from pydub import AudioSegment
2024-09-24 09:55:23 +02:00
class VoiceRecognition:
2024-09-26 10:43:39 +02:00
def check_audio_format(self, file_path):
try:
audio = AudioSegment.from_ogg(file_path)
print(f"Audio format: {audio.format}")
return True
except Exception as e:
print(f"Error reading audio file: {e}")
return False
def basic_recognition(self, audio, option):
2024-09-26 09:52:31 +02:00
print(type(audio))
2024-09-26 09:32:14 +02:00
print("preparing")
2024-09-24 09:55:23 +02:00
r = sr.Recognizer()
2024-09-26 10:43:39 +02:00
# Read the data from the FileStorage object
audio_data = audio.read()
# Write the audio data to a file
with open('output.wav', 'wb') as file:
file.write(audio_data)
self.check_audio_format(audio)
2024-09-24 09:55:23 +02:00
if option == "online":
2024-09-26 09:52:31 +02:00
with sr.AudioFile(audio) as source:
2024-09-26 10:43:39 +02:00
print(type(source))
2024-09-26 09:52:31 +02:00
print("online")
2024-09-26 09:32:14 +02:00
text = r.recognize_google_cloud(source)
print("recognized as: " + text)
return text
elif option == "offline":
2024-09-26 09:52:31 +02:00
with sr.AudioFile(audio) as source:
2024-09-26 10:43:39 +02:00
print(type(source))
2024-09-26 09:52:31 +02:00
print("offline")
2024-09-26 09:32:14 +02:00
text = r.recognize_sphinx(source)
print("recognized as: " + text)
return text
2024-09-26 09:32:14 +02:00
print("nothing")
return False