forked from React-Group/interstellar_ai
		
	whisper is better
This commit is contained in:
		
							parent
							
								
									8cc9b1b015
								
							
						
					
					
						commit
						ab12b796ec
					
				
					 2 changed files with 9 additions and 50 deletions
				
			
		
							
								
								
									
										11
									
								
								py/api.py
									
										
									
									
									
								
							
							
						
						
									
										11
									
								
								py/api.py
									
										
									
									
									
								
							|  | @ -99,16 +99,9 @@ class API: | ||||||
| 
 | 
 | ||||||
|         @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST']) |         @self.app.route('/interstellar_ai/api/voice_recognition', methods=['POST']) | ||||||
|         def voice_recognition(): |         def voice_recognition(): | ||||||
|             print(request.args) |  | ||||||
|             recog_type = request.form.get('type') |  | ||||||
|             print(recog_type) |  | ||||||
|             audio = request.files.get('audio') |             audio = request.files.get('audio') | ||||||
|             option = request.form.get('option') |             text = self.voice.recognition(audio) | ||||||
|             if recog_type == "basic": |             return jsonify({'status': 200, 'response': text}) | ||||||
|                 text = self.voice.basic_recognition(audio, option) |  | ||||||
|                 return jsonify({'status': 200, 'response': text}) |  | ||||||
|             else: |  | ||||||
|                 return jsonify({'status': 401, 'response': "Invalid type"}) |  | ||||||
| 
 | 
 | ||||||
|         @self.app.route('/interstellar_ai/api/weather', methods=['POST']) |         @self.app.route('/interstellar_ai/api/weather', methods=['POST']) | ||||||
|         def get_weather(): |         def get_weather(): | ||||||
|  |  | ||||||
							
								
								
									
										48
									
								
								py/voice.py
									
										
									
									
									
								
							
							
						
						
									
										48
									
								
								py/voice.py
									
										
									
									
									
								
							|  | @ -1,44 +1,10 @@ | ||||||
| import speech_recognition as sr | import whisper | ||||||
| from pydub import AudioSegment |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class VoiceRecognition: | class VoiceRecognition: | ||||||
|     def check_audio_format(self, file_path): |     @staticmethod | ||||||
|         try: |     def recognition(audio): | ||||||
|             audio = AudioSegment.from_ogg(file_path) |         model = whisper.load_model("base") | ||||||
|             print(f"Audio format: {audio.format}") |         result = model.transcribe(audio) | ||||||
|             return True |         print(result["text"]) | ||||||
|         except Exception as e: |         return result["text"] | ||||||
|             print(f"Error reading audio file: {e}") |  | ||||||
|             return False |  | ||||||
| 
 |  | ||||||
|     def basic_recognition(self, audio, option): |  | ||||||
|         print(type(audio)) |  | ||||||
|         print("preparing") |  | ||||||
|         r = sr.Recognizer() |  | ||||||
| 
 |  | ||||||
|         # Read the data from the FileStorage object |  | ||||||
|         audio_data = audio.read() |  | ||||||
| 
 |  | ||||||
|         # Write the audio data to a file |  | ||||||
|         with open('output.wav', 'wb') as file: |  | ||||||
|             file.write(audio_data) |  | ||||||
| 
 |  | ||||||
|         self.check_audio_format(audio) |  | ||||||
|         if option == "online": |  | ||||||
|             with sr.AudioFile(audio) as source: |  | ||||||
|                 print(type(source)) |  | ||||||
|                 print("online") |  | ||||||
|                 text = r.recognize_google_cloud(source) |  | ||||||
|                 print("recognized as: " + text) |  | ||||||
|                 return text |  | ||||||
|         elif option == "offline": |  | ||||||
|             with sr.AudioFile(audio) as source: |  | ||||||
|                 print(type(source)) |  | ||||||
|                 print("offline") |  | ||||||
|                 text = r.recognize_sphinx(source) |  | ||||||
|                 print("recognized as: " + text) |  | ||||||
|                 return text |  | ||||||
| 
 |  | ||||||
|         print("nothing") |  | ||||||
|         return False |  | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Patrick_Pluto
						Patrick_Pluto