main #54
					 4 changed files with 18 additions and 7 deletions
				
			
		|  | @ -151,7 +151,7 @@ const InputOutputBackend: React.FC = () => { | |||
|     } | ||||
|          | ||||
|     mediaRecorder.onstop = () => { | ||||
|       const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" }) | ||||
|       const audioBlob = new Blob(audioChunks.current, { type: "audio/ogg" }) | ||||
|       const url = URL.createObjectURL(audioBlob) | ||||
|       console.log(url); | ||||
|       setAudioURL(url) | ||||
|  |  | |||
|  | @ -4,13 +4,9 @@ import axios from "axios"; | |||
| class VoiceSend { | ||||
|     sendToVoiceRecognition(audio_data: Blob) { | ||||
|         console.log("sending recording..."); | ||||
|         console.log(typeof (audio_data)); | ||||
|         console.log(audio_data instanceof Blob); | ||||
| 
 | ||||
|         const formdata = new FormData() | ||||
|         formdata.append("audio", audio_data) | ||||
|         formdata.append("option", "offline") | ||||
|         formdata.append("type", "basic") | ||||
| 
 | ||||
|         const dataSend = { option:"offline", type:"basic",audio:audio_data } | ||||
|         axios.post("http://localhost:5000/interstellar_ai/api/voice_recognition", formdata) | ||||
|  |  | |||
|  | @ -15,3 +15,4 @@ pyttsx3 | |||
| pip-licenses | ||||
| openai-whisper | ||||
| pydub | ||||
| ffmpeg | ||||
							
								
								
									
										16
									
								
								py/voice.py
									
										
									
									
									
								
							
							
						
						
									
										16
									
								
								py/voice.py
									
										
									
									
									
								
							|  | @ -1,10 +1,24 @@ | |||
| import io | ||||
| import numpy as np | ||||
| import whisper | ||||
| from pydub import AudioSegment | ||||
| 
 | ||||
| 
 | ||||
| class VoiceRecognition: | ||||
|     @staticmethod | ||||
|     def recognition(audio): | ||||
|         audio_data = audio.read() | ||||
|         with io.BytesIO(audio_data) as audio_buffer: | ||||
|             audio_segment = AudioSegment.from_ogg(audio_buffer) | ||||
|              | ||||
|             raw_data = np.array(audio_segment.get_array_of_samples()) | ||||
|              | ||||
|             if audio_segment.channels > 1: | ||||
|                 raw_data = raw_data.reshape((-1, audio_segment.channels))[:, 0] | ||||
|              | ||||
|             audio_data = raw_data.astype(np.float32) / (2 ** (audio_segment.sample_width * 8 - 1)) | ||||
|          | ||||
|         model = whisper.load_model("base") | ||||
|         result = model.transcribe(audio) | ||||
|         result = model.transcribe(audio_data) | ||||
|         print(result["text"]) | ||||
|         return result["text"] | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue