1 files changed, 64 insertions, 10 deletions
diff --git a/agl_service_voiceagent/service.py b/agl_service_voiceagent/service.py
index baf7b02..b5fb50e 100644
--- a/agl_service_voiceagent/service.py
+++ b/agl_service_voiceagent/service.py
@@ -23,6 +23,7 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
 generated_dir = os.path.join(current_dir, "generated")
 # Add the "generated" folder to sys.path
 sys.path.append(generated_dir)
+sys.path.append("../")
 
 import argparse
 from agl_service_voiceagent.utils.config import set_config_path, load_config, update_config_value, get_config_value, get_logger
@@ -49,8 +50,9 @@ def main():
     # Add the arguments for the server
     server_parser.add_argument('--default', action='store_true', help='Starts the server based on default config file.')
     server_parser.add_argument('--config', required=False, help='Path to a config file. Server is started based on this config file.')
-    server_parser.add_argument('--stt-model-path', required=False, help='Path to the Speech To Text model for Voice Commad detection. Currently only supports VOSK Kaldi.')
-    server_parser.add_argument('--ww-model-path', required=False, help='Path to the Speech To Text model for Wake Word detection. Currently only supports VOSK Kaldi. Defaults to the same model as --stt-model-path if not provided.')
+    server_parser.add_argument('--vosk-model-path', required=False, help='Path to the Vosk Speech To Text model for Voice Commad detection.')
+    server_parser.add_argument('--whisper-model-path', required=False, help='Path to the Whisper Speech To Text model for Voice Commad detection.')
+    server_parser.add_argument('--ww-model-path', required=False, help='Path to the Speech To Text model for Wake Word detection. Currently only supports VOSK Kaldi. Defaults to the same model as --vosk-model-path if not provided.')
     server_parser.add_argument('--snips-model-path', required=False, help='Path to the Snips NLU model.')
     server_parser.add_argument('--rasa-model-path', required=False, help='Path to the RASA NLU model.')
     server_parser.add_argument('--rasa-detached-mode', required=False, help='Assume that the RASA server is already running and does not start it as a sub process.')
@@ -59,6 +61,13 @@ def main():
     server_parser.add_argument('--audio-store-dir', required=False, help='Directory to store the generated audio files.')
     server_parser.add_argument('--log-store-dir', required=False, help='Directory to store the generated log files.')
 
+    # Arguments for online mode
+    server_parser.add_argument('--online-mode', required=False, help='Enable online mode for the Voice Agent Service (default is False).')
+    server_parser.add_argument('--online-mode-address', required=False, help='URL of the online server to connect to.')
+    server_parser.add_argument('--online-mode-port', required=False, help='Port of the online server to connect to.')
+    server_parser.add_argument('--online-mode-timeout', required=False, help='Timeout value in seconds for the online server connection.')
+
+
     # Add the arguments for the client
     client_parser.add_argument('--server-address', required=True, help='Address of the gRPC server running the Voice Agent Service.')
     client_parser.add_argument('--server-port', required=True, help='Port of the gRPC server running the Voice Agent Service.')
@@ -66,6 +75,10 @@ def main():
     client_parser.add_argument('--mode', help='Mode to run the client in. Supported modes: "auto" and "manual".')
     client_parser.add_argument('--nlu', help='NLU engine/model to use. Supported NLU engines: "snips" and "rasa".')
     client_parser.add_argument('--recording-time', help='Number of seconds to continue recording the voice command. Required by the \'manual\' mode. Defaults to 10 seconds.')
+    client_parser.add_argument('--stt-framework', help='STT framework to use. Supported frameworks: "vosk". Defaults to "vosk".')
+
+    # Arguments for online mode in client as --online-mode is a reserved keyword
+    client_parser.add_argument('--online-mode', required=False, help='Enable online mode for the Voice Agent Service (default is False).')
 
     args = parser.parse_args()
     
@@ -74,8 +87,12 @@ def main():
 
     elif args.subcommand == 'run-server':
         if not args.default and not args.config:
-            if not args.stt_model_path:
-                print("Error: The --stt-model-path is missing. Please provide a value. Use --help to see available options.")
+            if not args.vosk_model_path:
+                print("Error: The --vosk-model-path is missing. Please provide a value. Use --help to see available options.")
+                exit(1)
+
+            if not args.whisper_model_path:
+                print("Error: The --whisper-model-path is missing. Please provide a value. Use --help to see available options.")
                 exit(1)
             
             if not args.snips_model_path:
@@ -94,6 +111,16 @@ def main():
                 print("Error: The --vss-signals-spec-path is missing. Please provide a value. Use --help to see available options.")
                 exit(1)
             
+            # Error check for online mode
+            if args.online_mode:
+                if not args.online_mode_address:
+                    print("Error: The --online-mode-address is missing. Please provide a value. Use --help to see available options.")
+                    exit(1)
+                
+                if not args.online_mode_port:
+                    print("Error: The --online-mode-port is missing. Please provide a value. Use --help to see available options.")
+                    exit(1)    
+                           
             # Contruct the default config file path
             config_path = os.path.join(current_dir, "config.ini")
 
@@ -105,21 +132,36 @@ def main():
             logger.info("Starting Voice Agent Service in server mode using CLI provided params...")
             
             # Get the values provided by the user
-            stt_path = args.stt_model_path
+            vosk_path = args.vosk_model_path
+            whisper_path = args.whisper_model_path
             snips_model_path = args.snips_model_path
             rasa_model_path = args.rasa_model_path
             intents_vss_map_path = args.intents_vss_map_path
             vss_signals_spec_path = args.vss_signals_spec_path
             
+            # Get the values for online mode
+            online_mode = False
+            if args.online_mode:
+                online_mode = True
+                online_mode_address = args.online_mode_address
+                online_mode_port = args.online_mode_port
+                online_mode_timeout = args.online_mode_timeout or 5
+                update_config_value('1', 'ONLINE_MODE')
+                update_config_value(online_mode_address, 'ONLINE_MODE_ADDRESS')
+                update_config_value(online_mode_port, 'ONLINE_MODE_PORT')
+                update_config_value(online_mode_timeout, 'ONLINE_MODE_TIMEOUT')
+            
             # Convert to an absolute path if it's a relative path
-            stt_path = add_trailing_slash(os.path.abspath(stt_path)) if not os.path.isabs(stt_path) else stt_path
+            vosk_path = add_trailing_slash(os.path.abspath(vosk_path)) if not os.path.isabs(vosk_path) else vosk_path
+            whisper_path = add_trailing_slash(os.path.abspath(whisper_path)) if not os.path.isabs(whisper_path) else whisper_path
             snips_model_path = add_trailing_slash(os.path.abspath(snips_model_path)) if not os.path.isabs(snips_model_path) else snips_model_path
             rasa_model_path = add_trailing_slash(os.path.abspath(rasa_model_path)) if not os.path.isabs(rasa_model_path) else rasa_model_path
             intents_vss_map_path = os.path.abspath(intents_vss_map_path) if not os.path.isabs(intents_vss_map_path) else intents_vss_map_path
             vss_signals_spec_path = os.path.abspath(vss_signals_spec_path) if not os.path.isabs(vss_signals_spec_path) else vss_signals_spec_path
             
             # Also update the config.ini file
-            update_config_value(stt_path, 'STT_MODEL_PATH')
+            update_config_value(vosk_path, 'VOSK_MODEL_PATH')
+            update_config_value(whisper_path, 'WHISPER_MODEL_PATH')
             update_config_value(snips_model_path, 'SNIPS_MODEL_PATH')
             update_config_value(rasa_model_path, 'RASA_MODEL_PATH')
             update_config_value(intents_vss_map_path, 'INTENTS_VSS_MAP')
@@ -162,7 +204,6 @@ def main():
 
             logger = get_logger()
             logger.info(f"Starting Voice Agent Service in server mode using the default config file...")
-        
         # create the base audio dir if not exists
         if not os.path.exists(get_config_value('BASE_AUDIO_DIR')):
             os.makedirs(get_config_value('BASE_AUDIO_DIR'))
@@ -176,6 +217,8 @@ def main():
         mode = ""
         action = args.action
         recording_time = 5 # seconds
+        stt_framework = args.stt_framework or "vosk"
+        online_mode = args.online_mode or False
 
         if action not in ["GetStatus", "DetectWakeWord", "ExecuteVoiceCommand", "ExecuteTextCommand"]:
             print("Error: Invalid value for --action. Supported actions: 'GetStatus', 'DetectWakeWord', 'ExecuteVoiceCommand' and 'ExecuteTextCommand'. Use --help to see available options.")
@@ -199,8 +242,19 @@ def main():
             mode = args.mode
             if mode == "manual" and args.recording_time:
                 recording_time = int(args.recording_time)
-        
-        run_client(server_address, server_port, action, mode, nlu_engine, recording_time)
+            if args.stt_framework and args.stt_framework not in ['vosk', 'whisper']:
+                print("Error: Invalid value for --stt-framework. Supported frameworks: 'vosk' and 'whisper'. Use --help to see available options.")
+                exit(1)
+            if args.stt_framework:
+                stt_framework = args.stt_framework
+            
+            if args.online_mode and args.online_mode not in ['True', 'False', 'true', 'false', '1', '0']:
+                print("Error: Invalid value for --online-mode. Supported values: 'True' and 'False'. Use --help to see available options.")
+                exit(1)
+            if args.online_mode:
+                online_mode = True if args.online_mode in ['True', 'true', '1'] else False
+
+        run_client(server_address, server_port, action, mode, nlu_engine, recording_time, stt_framework, online_mode)
 
     else:
         print_version()