7 files changed, 86 insertions, 73 deletions
diff --git a/agl_service_voiceagent/config.ini b/agl_service_voiceagent/config.ini
index d6d695e..e4f6313 100644
--- a/agl_service_voiceagent/config.ini
+++ b/agl_service_voiceagent/config.ini
@@ -9,7 +9,7 @@ snips_model_path = /usr/share/nlu/snips/model/
 channels = 1
 sample_rate = 16000
 bits_per_sample = 16
-wake_word = hello
+wake_word = hey automotive
 server_port = 51053
 server_address = 127.0.0.1
 rasa_model_path = /usr/share/nlu/rasa/models/
@@ -17,7 +17,7 @@ rasa_server_port = 51054
 rasa_detached_mode = 1
 base_log_dir = /usr/share/nlu/logs/
 store_voice_commands = 0
-online_mode = 1
+online_mode = 0
 online_mode_address = 65.108.107.216
 online_mode_port = 50051
 online_mode_timeout = 15
diff --git a/agl_service_voiceagent/generated/voice_agent_pb2.py b/agl_service_voiceagent/generated/voice_agent_pb2.py
index 4606f60..d978664 100644
--- a/agl_service_voiceagent/generated/voice_agent_pb2.py
+++ b/agl_service_voiceagent/generated/voice_agent_pb2.py
@@ -14,49 +14,49 @@ _sym_db = _symbol_database.Default()
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11voice_agent.proto\"\x07\n\x05\x45mpty\"C\n\rServiceStatus\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\x08\x12\x11\n\twake_word\x18\x03 \x01(\t\"^\n\nVoiceAudio\x12\x13\n\x0b\x61udio_chunk\x18\x01 \x01(\x0c\x12\x14\n\x0c\x61udio_format\x18\x02 \x01(\t\x12\x13\n\x0bsample_rate\x18\x03 \x01(\x05\x12\x10\n\x08language\x18\x04 \x01(\t\" \n\x0eWakeWordStatus\x12\x0e\n\x06status\x18\x01 \x01(\x08\"\x93\x01\n\x17S_RecognizeVoiceControl\x12!\n\x0c\x61udio_stream\x18\x01 \x01(\x0b\x32\x0b.VoiceAudio\x12\x1c\n\tnlu_model\x18\x02 \x01(\x0e\x32\t.NLUModel\x12\x11\n\tstream_id\x18\x03 \x01(\t\x12$\n\rstt_framework\x18\x04 \x01(\x0e\x32\r.STTFramework\"\xd1\x01\n\x15RecognizeVoiceControl\x12\x1d\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\r.RecordAction\x12\x1c\n\tnlu_model\x18\x02 \x01(\x0e\x32\t.NLUModel\x12 \n\x0brecord_mode\x18\x03 \x01(\x0e\x32\x0b.RecordMode\x12\x11\n\tstream_id\x18\x04 \x01(\t\x12$\n\rstt_framework\x18\x05 \x01(\x0e\x32\r.STTFramework\x12 \n\x0bonline_mode\x18\x06 \x01(\x0e\x32\x0b.OnlineMode\"J\n\x14RecognizeTextControl\x12\x14\n\x0ctext_command\x18\x01 \x01(\t\x12\x1c\n\tnlu_model\x18\x02 \x01(\x0e\x32\t.NLUModel\")\n\nIntentSlot\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t\"\x8e\x01\n\x0fRecognizeResult\x12\x0f\n\x07\x63ommand\x18\x01 \x01(\t\x12\x0e\n\x06intent\x18\x02 \x01(\t\x12!\n\x0cintent_slots\x18\x03 \x03(\x0b\x32\x0b.IntentSlot\x12\x11\n\tstream_id\x18\x04 \x01(\t\x12$\n\x06status\x18\x05 \x01(\x0e\x32\x14.RecognizeStatusType\"A\n\x0c\x45xecuteInput\x12\x0e\n\x06intent\x18\x01 \x01(\t\x12!\n\x0cintent_slots\x18\x02 \x03(\x0b\x32\x0b.IntentSlot\"E\n\rExecuteResult\x12\x10\n\x08response\x18\x01 \x01(\t\x12\"\n\x06status\x18\x02 \x01(\x0e\x32\x12.ExecuteStatusType*%\n\x0cSTTFramework\x12\x08\n\x04VOSK\x10\x00\x12\x0b\n\x07WHISPER\x10\x01*%\n\nOnlineMode\x12\n\n\x06ONLINE\x10\x00\x12\x0b\n\x07OFFLINE\x10\x01*#\n\x0cRecordAction\x12\t\n\x05START\x10\x00\x12\x08\n\x04STOP\x10\x01*\x1f\n\x08NLUModel\x12\t\n\x05SNIPS\x10\x00\x12\x08\n\x04RASA\x10\x01*\"\n\nRecordMode\x12\n\n\x06MANUAL\x10\x00\x12\x08\n\x04\x41UTO\x10\x01*\xb4\x01\n\x13RecognizeStatusType\x12\r\n\tREC_ERROR\x10\x00\x12\x0f\n\x0bREC_SUCCESS\x10\x01\x12\x12\n\x0eREC_PROCESSING\x10\x02\x12\x18\n\x14VOICE_NOT_RECOGNIZED\x10\x03\x12\x19\n\x15INTENT_NOT_RECOGNIZED\x10\x04\x12\x17\n\x13TEXT_NOT_RECOGNIZED\x10\x05\x12\x1b\n\x17NLU_MODEL_NOT_SUPPORTED\x10\x06*\x82\x01\n\x11\x45xecuteStatusType\x12\x0e\n\nEXEC_ERROR\x10\x00\x12\x10\n\x0c\x45XEC_SUCCESS\x10\x01\x12\x14\n\x10KUKSA_CONN_ERROR\x10\x02\x12\x18\n\x14INTENT_NOT_SUPPORTED\x10\x03\x12\x1b\n\x17INTENT_SLOTS_INCOMPLETE\x10\x04\x32\xa4\x03\n\x11VoiceAgentService\x12,\n\x12\x43heckServiceStatus\x12\x06.Empty\x1a\x0e.ServiceStatus\x12\x34\n\x10S_DetectWakeWord\x12\x0b.VoiceAudio\x1a\x0f.WakeWordStatus(\x01\x30\x01\x12+\n\x0e\x44\x65tectWakeWord\x12\x06.Empty\x1a\x0f.WakeWordStatus0\x01\x12G\n\x17S_RecognizeVoiceCommand\x12\x18.S_RecognizeVoiceControl\x1a\x10.RecognizeResult(\x01\x12\x43\n\x15RecognizeVoiceCommand\x12\x16.RecognizeVoiceControl\x1a\x10.RecognizeResult(\x01\x12?\n\x14RecognizeTextCommand\x12\x15.RecognizeTextControl\x1a\x10.RecognizeResult\x12/\n\x0e\x45xecuteCommand\x12\r.ExecuteInput\x1a\x0e.ExecuteResultb\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11voice_agent.proto\"\x07\n\x05\x45mpty\"X\n\rServiceStatus\x12\x0f\n\x07version\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\x08\x12\x11\n\twake_word\x18\x03 \x01(\t\x12\x13\n\x0bonline_mode\x18\x04 \x01(\x08\"^\n\nVoiceAudio\x12\x13\n\x0b\x61udio_chunk\x18\x01 \x01(\x0c\x12\x14\n\x0c\x61udio_format\x18\x02 \x01(\t\x12\x13\n\x0bsample_rate\x18\x03 \x01(\x05\x12\x10\n\x08language\x18\x04 \x01(\t\" \n\x0eWakeWordStatus\x12\x0e\n\x06status\x18\x01 \x01(\x08\"\x93\x01\n\x17S_RecognizeVoiceControl\x12!\n\x0c\x61udio_stream\x18\x01 \x01(\x0b\x32\x0b.VoiceAudio\x12\x1c\n\tnlu_model\x18\x02 \x01(\x0e\x32\t.NLUModel\x12\x11\n\tstream_id\x18\x03 \x01(\t\x12$\n\rstt_framework\x18\x04 \x01(\x0e\x32\r.STTFramework\"\xd1\x01\n\x15RecognizeVoiceControl\x12\x1d\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\r.RecordAction\x12\x1c\n\tnlu_model\x18\x02 \x01(\x0e\x32\t.NLUModel\x12 \n\x0brecord_mode\x18\x03 \x01(\x0e\x32\x0b.RecordMode\x12\x11\n\tstream_id\x18\x04 \x01(\t\x12$\n\rstt_framework\x18\x05 \x01(\x0e\x32\r.STTFramework\x12 \n\x0bonline_mode\x18\x06 \x01(\x0e\x32\x0b.OnlineMode\"J\n\x14RecognizeTextControl\x12\x14\n\x0ctext_command\x18\x01 \x01(\t\x12\x1c\n\tnlu_model\x18\x02 \x01(\x0e\x32\t.NLUModel\")\n\nIntentSlot\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t\"\x8e\x01\n\x0fRecognizeResult\x12\x0f\n\x07\x63ommand\x18\x01 \x01(\t\x12\x0e\n\x06intent\x18\x02 \x01(\t\x12!\n\x0cintent_slots\x18\x03 \x03(\x0b\x32\x0b.IntentSlot\x12\x11\n\tstream_id\x18\x04 \x01(\t\x12$\n\x06status\x18\x05 \x01(\x0e\x32\x14.RecognizeStatusType\"A\n\x0c\x45xecuteInput\x12\x0e\n\x06intent\x18\x01 \x01(\t\x12!\n\x0cintent_slots\x18\x02 \x03(\x0b\x32\x0b.IntentSlot\"E\n\rExecuteResult\x12\x10\n\x08response\x18\x01 \x01(\t\x12\"\n\x06status\x18\x02 \x01(\x0e\x32\x12.ExecuteStatusType*%\n\x0cSTTFramework\x12\x08\n\x04VOSK\x10\x00\x12\x0b\n\x07WHISPER\x10\x01*%\n\nOnlineMode\x12\n\n\x06ONLINE\x10\x00\x12\x0b\n\x07OFFLINE\x10\x01*#\n\x0cRecordAction\x12\t\n\x05START\x10\x00\x12\x08\n\x04STOP\x10\x01*\x1f\n\x08NLUModel\x12\t\n\x05SNIPS\x10\x00\x12\x08\n\x04RASA\x10\x01*\"\n\nRecordMode\x12\n\n\x06MANUAL\x10\x00\x12\x08\n\x04\x41UTO\x10\x01*\xb4\x01\n\x13RecognizeStatusType\x12\r\n\tREC_ERROR\x10\x00\x12\x0f\n\x0bREC_SUCCESS\x10\x01\x12\x12\n\x0eREC_PROCESSING\x10\x02\x12\x18\n\x14VOICE_NOT_RECOGNIZED\x10\x03\x12\x19\n\x15INTENT_NOT_RECOGNIZED\x10\x04\x12\x17\n\x13TEXT_NOT_RECOGNIZED\x10\x05\x12\x1b\n\x17NLU_MODEL_NOT_SUPPORTED\x10\x06*\x82\x01\n\x11\x45xecuteStatusType\x12\x0e\n\nEXEC_ERROR\x10\x00\x12\x10\n\x0c\x45XEC_SUCCESS\x10\x01\x12\x14\n\x10KUKSA_CONN_ERROR\x10\x02\x12\x18\n\x14INTENT_NOT_SUPPORTED\x10\x03\x12\x1b\n\x17INTENT_SLOTS_INCOMPLETE\x10\x04\x32\xa4\x03\n\x11VoiceAgentService\x12,\n\x12\x43heckServiceStatus\x12\x06.Empty\x1a\x0e.ServiceStatus\x12\x34\n\x10S_DetectWakeWord\x12\x0b.VoiceAudio\x1a\x0f.WakeWordStatus(\x01\x30\x01\x12+\n\x0e\x44\x65tectWakeWord\x12\x06.Empty\x1a\x0f.WakeWordStatus0\x01\x12G\n\x17S_RecognizeVoiceCommand\x12\x18.S_RecognizeVoiceControl\x1a\x10.RecognizeResult(\x01\x12\x43\n\x15RecognizeVoiceCommand\x12\x16.RecognizeVoiceControl\x1a\x10.RecognizeResult(\x01\x12?\n\x14RecognizeTextCommand\x12\x15.RecognizeTextControl\x1a\x10.RecognizeResult\x12/\n\x0e\x45xecuteCommand\x12\r.ExecuteInput\x1a\x0e.ExecuteResultb\x06proto3')
 
 _globals = globals()
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'voice_agent_pb2', _globals)
 if not _descriptor._USE_C_DESCRIPTORS:
   DESCRIPTOR._loaded_options = None
-  _globals['_STTFRAMEWORK']._serialized_start=993
-  _globals['_STTFRAMEWORK']._serialized_end=1030
-  _globals['_ONLINEMODE']._serialized_start=1032
-  _globals['_ONLINEMODE']._serialized_end=1069
-  _globals['_RECORDACTION']._serialized_start=1071
-  _globals['_RECORDACTION']._serialized_end=1106
-  _globals['_NLUMODEL']._serialized_start=1108
-  _globals['_NLUMODEL']._serialized_end=1139
-  _globals['_RECORDMODE']._serialized_start=1141
-  _globals['_RECORDMODE']._serialized_end=1175
-  _globals['_RECOGNIZESTATUSTYPE']._serialized_start=1178
-  _globals['_RECOGNIZESTATUSTYPE']._serialized_end=1358
-  _globals['_EXECUTESTATUSTYPE']._serialized_start=1361
-  _globals['_EXECUTESTATUSTYPE']._serialized_end=1491
+  _globals['_STTFRAMEWORK']._serialized_start=1014
+  _globals['_STTFRAMEWORK']._serialized_end=1051
+  _globals['_ONLINEMODE']._serialized_start=1053
+  _globals['_ONLINEMODE']._serialized_end=1090
+  _globals['_RECORDACTION']._serialized_start=1092
+  _globals['_RECORDACTION']._serialized_end=1127
+  _globals['_NLUMODEL']._serialized_start=1129
+  _globals['_NLUMODEL']._serialized_end=1160
+  _globals['_RECORDMODE']._serialized_start=1162
+  _globals['_RECORDMODE']._serialized_end=1196
+  _globals['_RECOGNIZESTATUSTYPE']._serialized_start=1199
+  _globals['_RECOGNIZESTATUSTYPE']._serialized_end=1379
+  _globals['_EXECUTESTATUSTYPE']._serialized_start=1382
+  _globals['_EXECUTESTATUSTYPE']._serialized_end=1512
   _globals['_EMPTY']._serialized_start=21
   _globals['_EMPTY']._serialized_end=28
   _globals['_SERVICESTATUS']._serialized_start=30
-  _globals['_SERVICESTATUS']._serialized_end=97
-  _globals['_VOICEAUDIO']._serialized_start=99
-  _globals['_VOICEAUDIO']._serialized_end=193
-  _globals['_WAKEWORDSTATUS']._serialized_start=195
-  _globals['_WAKEWORDSTATUS']._serialized_end=227
-  _globals['_S_RECOGNIZEVOICECONTROL']._serialized_start=230
-  _globals['_S_RECOGNIZEVOICECONTROL']._serialized_end=377
-  _globals['_RECOGNIZEVOICECONTROL']._serialized_start=380
-  _globals['_RECOGNIZEVOICECONTROL']._serialized_end=589
-  _globals['_RECOGNIZETEXTCONTROL']._serialized_start=591
-  _globals['_RECOGNIZETEXTCONTROL']._serialized_end=665
-  _globals['_INTENTSLOT']._serialized_start=667
-  _globals['_INTENTSLOT']._serialized_end=708
-  _globals['_RECOGNIZERESULT']._serialized_start=711
-  _globals['_RECOGNIZERESULT']._serialized_end=853
-  _globals['_EXECUTEINPUT']._serialized_start=855
-  _globals['_EXECUTEINPUT']._serialized_end=920
-  _globals['_EXECUTERESULT']._serialized_start=922
-  _globals['_EXECUTERESULT']._serialized_end=991
-  _globals['_VOICEAGENTSERVICE']._serialized_start=1494
-  _globals['_VOICEAGENTSERVICE']._serialized_end=1914
+  _globals['_SERVICESTATUS']._serialized_end=118
+  _globals['_VOICEAUDIO']._serialized_start=120
+  _globals['_VOICEAUDIO']._serialized_end=214
+  _globals['_WAKEWORDSTATUS']._serialized_start=216
+  _globals['_WAKEWORDSTATUS']._serialized_end=248
+  _globals['_S_RECOGNIZEVOICECONTROL']._serialized_start=251
+  _globals['_S_RECOGNIZEVOICECONTROL']._serialized_end=398
+  _globals['_RECOGNIZEVOICECONTROL']._serialized_start=401
+  _globals['_RECOGNIZEVOICECONTROL']._serialized_end=610
+  _globals['_RECOGNIZETEXTCONTROL']._serialized_start=612
+  _globals['_RECOGNIZETEXTCONTROL']._serialized_end=686
+  _globals['_INTENTSLOT']._serialized_start=688
+  _globals['_INTENTSLOT']._serialized_end=729
+  _globals['_RECOGNIZERESULT']._serialized_start=732
+  _globals['_RECOGNIZERESULT']._serialized_end=874
+  _globals['_EXECUTEINPUT']._serialized_start=876
+  _globals['_EXECUTEINPUT']._serialized_end=941
+  _globals['_EXECUTERESULT']._serialized_start=943
+  _globals['_EXECUTERESULT']._serialized_end=1012
+  _globals['_VOICEAGENTSERVICE']._serialized_start=1515
+  _globals['_VOICEAGENTSERVICE']._serialized_end=1935
 # @@protoc_insertion_point(module_scope)
diff --git a/agl_service_voiceagent/nlu/snips_interface.py b/agl_service_voiceagent/nlu/snips_interface.py
index a32f574..25ad05b 100644
--- a/agl_service_voiceagent/nlu/snips_interface.py
+++ b/agl_service_voiceagent/nlu/snips_interface.py
@@ -46,8 +46,7 @@ class SnipsInterface:
         preprocessed_text = text.lower().strip()
         # remove special characters, punctuation, and extra whitespaces
         preprocessed_text = re.sub(r'[^\w\s]', '', preprocessed_text).strip()
-        # replace % with " precent"
-        preprocessed_text = re.sub(r'%', ' percent', preprocessed_text)
+        preprocessed_text = re.sub(r'percent', '', preprocessed_text)
         # replace ° with " degrees"
         preprocessed_text = re.sub(r'°', ' degrees ', preprocessed_text)
         return preprocessed_text
diff --git a/agl_service_voiceagent/protos/voice_agent.proto b/agl_service_voiceagent/protos/voice_agent.proto
index bd2daa2..72d48c6 100644
--- a/agl_service_voiceagent/protos/voice_agent.proto
+++ b/agl_service_voiceagent/protos/voice_agent.proto
@@ -61,6 +61,7 @@ message ServiceStatus {
   string version = 1;
   bool status = 2;  
   string wake_word = 3;
+  bool online_mode = 4;
 }
 
 message VoiceAudio {
diff --git a/agl_service_voiceagent/servicers/voice_agent_servicer.py b/agl_service_voiceagent/servicers/voice_agent_servicer.py
index 2a4de33..c149b6d 100644
--- a/agl_service_voiceagent/servicers/voice_agent_servicer.py
+++ b/agl_service_voiceagent/servicers/voice_agent_servicer.py
@@ -199,6 +199,7 @@ class VoiceAgentServicer(voice_agent_pb2_grpc.VoiceAgentServiceServicer):
             version=self.service_version,
             status=True,
             wake_word=self.wake_word,
+            online_mode = self.online_mode
         )
 
         # Convert the response object to a JSON string and log it
@@ -280,8 +281,12 @@ class VoiceAgentServicer(voice_agent_pb2_grpc.VoiceAgentServiceServicer):
                         "recorder": recorder,
                         "audio_file": audio_file
                     }
-                    
-                    recorder.start_recording()
+
+                    def record():
+                        recorder.start_recording()
+
+                    record_thread = threading.Thread(target=record)
+                    record_thread.start()
 
                 elif request.action == voice_agent_pb2.STOP:
                     stream_uuid = request.stream_id
@@ -294,6 +299,7 @@ class VoiceAgentServicer(voice_agent_pb2_grpc.VoiceAgentServiceServicer):
                     recorder = self.rvc_stream_uuids[stream_uuid]["recorder"]
                     audio_file = self.rvc_stream_uuids[stream_uuid]["audio_file"]
                     del self.rvc_stream_uuids[stream_uuid]
+                    print(use_online_mode)
 
                     recorder.stop_recording()
                                       
@@ -316,12 +322,19 @@ class VoiceAgentServicer(voice_agent_pb2_grpc.VoiceAgentServiceServicer):
                         recognizer_uuid = self.stt_model.setup_vosk_recognizer()
                         stt = self.stt_model.recognize_from_file(recognizer_uuid, audio_file,stt_framework=stt_framework)
                         used_kaldi = True
-
                     print(stt)
                     if stt not in ["FILE_NOT_FOUND", "FILE_FORMAT_INVALID", "VOICE_NOT_RECOGNIZED", ""]:
                         if request.nlu_model == voice_agent_pb2.SNIPS:
-                            extracted_intent = self.snips_interface.extract_intent(stt)
-                            intent, intent_actions = self.snips_interface.process_intent(extracted_intent)
+                            try:
+                                extracted_intent = self.snips_interface.extract_intent(stt)
+                            except Exception as e:
+                                print(e)
+                                extracted_intent = ""
+                            if extracted_intent != "":
+                                intent, intent_actions = self.snips_interface.process_intent(extracted_intent)
+                            else:
+                                intent = ""
+                                intent_actions = {}
                             if not intent or intent == "":
                                 status = voice_agent_pb2.INTENT_NOT_RECOGNIZED
                             
@@ -346,14 +359,12 @@ class VoiceAgentServicer(voice_agent_pb2_grpc.VoiceAgentServiceServicer):
                             status = voice_agent_pb2.NLU_MODEL_NOT_SUPPORTED
 
                     else:
-                        stt = ""
                         status = voice_agent_pb2.VOICE_NOT_RECOGNIZED
                     
                     # cleanup the kaldi recognizer
                     if used_kaldi:
                         self.stt_model.cleanup_recognizer(recognizer_uuid)
                         used_kaldi = False
-
                     # delete the audio file
                     if not self.store_voice_command:   
                         delete_file(audio_file)
@@ -516,7 +527,7 @@ class VoiceAgentServicer(voice_agent_pb2_grpc.VoiceAgentServiceServicer):
                     exec_response = "Uh oh, I failed to stop the media."
                     exec_status = voice_agent_pb2.EXEC_ERROR
             else:
-                exec_response = "Sorry, I failed to execute command against intent 'MediaControl'. Maybe try again with more specific instructions."
+                exec_response = "Sorry, I failed to execute command."
                 exec_status = voice_agent_pb2.EXEC_ERROR
             
 
@@ -572,7 +583,7 @@ class VoiceAgentServicer(voice_agent_pb2_grpc.VoiceAgentServiceServicer):
                     if "value" in execution_item:
                         value = execution_item["value"]
                         if self.set_current_values(signal, value):
-                            exec_response = f"Yay, I successfully updated the intent '{intent}' to value '{value}'."
+                            exec_response = f"Yay, I successfully updated '{intent}' to value '{value}'."
                             exec_status = voice_agent_pb2.EXEC_SUCCESS
                     
                     elif "factor" in execution_item:
@@ -593,7 +604,7 @@ class VoiceAgentServicer(voice_agent_pb2_grpc.VoiceAgentServiceServicer):
                                     value = current_value - factor
                                     value = str(value)
                                 if self.set_current_values(signal, value):
-                                    exec_response = f"Yay, I successfully updated the intent '{intent}' to value '{value}'."
+                                    exec_response = f"Yay, I successfully updated '{intent}' to value '{value}'."
                                     exec_status = voice_agent_pb2.EXEC_SUCCESS
 
                             else:
diff --git a/agl_service_voiceagent/utils/audio_recorder.py b/agl_service_voiceagent/utils/audio_recorder.py
index 49716c9..e362480 100644
--- a/agl_service_voiceagent/utils/audio_recorder.py
+++ b/agl_service_voiceagent/utils/audio_recorder.py
@@ -64,7 +64,7 @@ class AudioRecorder:
         """
         print("Creating pipeline for audio recording in {} mode...".format(self.mode))
         self.pipeline = Gst.Pipeline()
-        autoaudiosrc = Gst.ElementFactory.make("autoaudiosrc", None)
+        autoaudiosrc = Gst.ElementFactory.make("alsasrc", None)
         queue = Gst.ElementFactory.make("queue", None)
         queue.set_property("max-size-buffers", 0)
         queue.set_property("max-size-bytes", 0)
@@ -109,6 +109,7 @@ class AudioRecorder:
         Start recording audio using the GStreamer pipeline.
         """
         self.pipeline.set_state(Gst.State.PLAYING)
+        self.loop.run()
         print("Recording Voice Input...")
 
 
@@ -186,3 +187,4 @@ class AudioRecorder:
             print("Pipeline cleanup complete!")
             self.bus = None
             self.pipeline = None
+            self.loop.quit()
+\ No newline at end of file
diff --git a/agl_service_voiceagent/utils/stt_model.py b/agl_service_voiceagent/utils/stt_model.py
index 7e8ad8b..0a092ea 100644
--- a/agl_service_voiceagent/utils/stt_model.py
+++ b/agl_service_voiceagent/utils/stt_model.py
@@ -21,7 +21,7 @@ import wave
 from agl_service_voiceagent.utils.common import generate_unique_uuid
 
 # import the whisper model
-import whisper
+# import whisper
 # for whisper timeout feature
 from concurrent.futures import ThreadPoolExecutor  
 import subprocess
@@ -93,28 +93,28 @@ class STTModel:
         return result
     
     # Recognize speech using the whisper model
-    def recognize_using_whisper(self,filename,language = None,timeout = 5,fp16=False):
-        """
-        Recognize speech and return the result as a JSON object.
-
-        Args:
-            filename (str): The path to the audio file.
-            timeout (int, optional): The timeout for recognition (default is 5 seconds).
-            fp16 (bool, optional): If True, use 16-bit floating point precision, (default is False) because cuda is not supported.
-            language (str, optional): The language code for recognition (default is None).
-
-        Returns:
-            dict: A JSON object containing recognition results.
-        """
-        def transcribe_with_whisper():
-            return self.whisper_model.transcribe(filename, language = language,fp16=fp16)
+    # def recognize_using_whisper(self,filename,language = None,timeout = 5,fp16=False):
+    #     """
+    #     Recognize speech and return the result as a JSON object.
+
+    #     Args:
+    #         filename (str): The path to the audio file.
+    #         timeout (int, optional): The timeout for recognition (default is 5 seconds).
+    #         fp16 (bool, optional): If True, use 16-bit floating point precision, (default is False) because cuda is not supported.
+    #         language (str, optional): The language code for recognition (default is None).
+
+    #     Returns:
+    #         dict: A JSON object containing recognition results.
+    #     """
+    #     def transcribe_with_whisper():
+    #         return self.whisper_model.transcribe(filename, language = language,fp16=fp16)
         
-        with ThreadPoolExecutor() as executor:
-            future = executor.submit(transcribe_with_whisper)
-            try:
-                return future.result(timeout=timeout)
-            except TimeoutError:
-                return {"error": "Transcription with Whisper exceeded the timeout."}
+    #     with ThreadPoolExecutor() as executor:
+    #         future = executor.submit(transcribe_with_whisper)
+    #         try:
+    #             return future.result(timeout=timeout)
+    #         except TimeoutError:
+    #             return {"error": "Transcription with Whisper exceeded the timeout."}
             
     def recognize_using_whisper_cpp(self,filename):
         command = self.whisper_cpp_path