diff options
Diffstat (limited to 'agl_service_voiceagent/utils')
-rw-r--r-- | agl_service_voiceagent/utils/audio_recorder.py | 4 | ||||
-rw-r--r-- | agl_service_voiceagent/utils/stt_model.py | 44 |
2 files changed, 25 insertions, 23 deletions
diff --git a/agl_service_voiceagent/utils/audio_recorder.py b/agl_service_voiceagent/utils/audio_recorder.py index 49716c9..e362480 100644 --- a/agl_service_voiceagent/utils/audio_recorder.py +++ b/agl_service_voiceagent/utils/audio_recorder.py @@ -64,7 +64,7 @@ class AudioRecorder: """ print("Creating pipeline for audio recording in {} mode...".format(self.mode)) self.pipeline = Gst.Pipeline() - autoaudiosrc = Gst.ElementFactory.make("autoaudiosrc", None) + autoaudiosrc = Gst.ElementFactory.make("alsasrc", None) queue = Gst.ElementFactory.make("queue", None) queue.set_property("max-size-buffers", 0) queue.set_property("max-size-bytes", 0) @@ -109,6 +109,7 @@ class AudioRecorder: Start recording audio using the GStreamer pipeline. """ self.pipeline.set_state(Gst.State.PLAYING) + self.loop.run() print("Recording Voice Input...") @@ -186,3 +187,4 @@ class AudioRecorder: print("Pipeline cleanup complete!") self.bus = None self.pipeline = None + self.loop.quit()
\ No newline at end of file diff --git a/agl_service_voiceagent/utils/stt_model.py b/agl_service_voiceagent/utils/stt_model.py index 7e8ad8b..0a092ea 100644 --- a/agl_service_voiceagent/utils/stt_model.py +++ b/agl_service_voiceagent/utils/stt_model.py @@ -21,7 +21,7 @@ import wave from agl_service_voiceagent.utils.common import generate_unique_uuid # import the whisper model -import whisper +# import whisper # for whisper timeout feature from concurrent.futures import ThreadPoolExecutor import subprocess @@ -93,28 +93,28 @@ class STTModel: return result # Recognize speech using the whisper model - def recognize_using_whisper(self,filename,language = None,timeout = 5,fp16=False): - """ - Recognize speech and return the result as a JSON object. - - Args: - filename (str): The path to the audio file. - timeout (int, optional): The timeout for recognition (default is 5 seconds). - fp16 (bool, optional): If True, use 16-bit floating point precision, (default is False) because cuda is not supported. - language (str, optional): The language code for recognition (default is None). - - Returns: - dict: A JSON object containing recognition results. - """ - def transcribe_with_whisper(): - return self.whisper_model.transcribe(filename, language = language,fp16=fp16) + # def recognize_using_whisper(self,filename,language = None,timeout = 5,fp16=False): + # """ + # Recognize speech and return the result as a JSON object. + + # Args: + # filename (str): The path to the audio file. + # timeout (int, optional): The timeout for recognition (default is 5 seconds). + # fp16 (bool, optional): If True, use 16-bit floating point precision, (default is False) because cuda is not supported. + # language (str, optional): The language code for recognition (default is None). + + # Returns: + # dict: A JSON object containing recognition results. + # """ + # def transcribe_with_whisper(): + # return self.whisper_model.transcribe(filename, language = language,fp16=fp16) - with ThreadPoolExecutor() as executor: - future = executor.submit(transcribe_with_whisper) - try: - return future.result(timeout=timeout) - except TimeoutError: - return {"error": "Transcription with Whisper exceeded the timeout."} + # with ThreadPoolExecutor() as executor: + # future = executor.submit(transcribe_with_whisper) + # try: + # return future.result(timeout=timeout) + # except TimeoutError: + # return {"error": "Transcription with Whisper exceeded the timeout."} def recognize_using_whisper_cpp(self,filename): command = self.whisper_cpp_path |