diff options
Diffstat (limited to 'agl_service_voiceagent/service.py')
-rw-r--r-- | agl_service_voiceagent/service.py | 74 |
1 files changed, 64 insertions, 10 deletions
diff --git a/agl_service_voiceagent/service.py b/agl_service_voiceagent/service.py index baf7b02..b5fb50e 100644 --- a/agl_service_voiceagent/service.py +++ b/agl_service_voiceagent/service.py @@ -23,6 +23,7 @@ current_dir = os.path.dirname(os.path.abspath(__file__)) generated_dir = os.path.join(current_dir, "generated") # Add the "generated" folder to sys.path sys.path.append(generated_dir) +sys.path.append("../") import argparse from agl_service_voiceagent.utils.config import set_config_path, load_config, update_config_value, get_config_value, get_logger @@ -49,8 +50,9 @@ def main(): # Add the arguments for the server server_parser.add_argument('--default', action='store_true', help='Starts the server based on default config file.') server_parser.add_argument('--config', required=False, help='Path to a config file. Server is started based on this config file.') - server_parser.add_argument('--stt-model-path', required=False, help='Path to the Speech To Text model for Voice Commad detection. Currently only supports VOSK Kaldi.') - server_parser.add_argument('--ww-model-path', required=False, help='Path to the Speech To Text model for Wake Word detection. Currently only supports VOSK Kaldi. Defaults to the same model as --stt-model-path if not provided.') + server_parser.add_argument('--vosk-model-path', required=False, help='Path to the Vosk Speech To Text model for Voice Commad detection.') + server_parser.add_argument('--whisper-model-path', required=False, help='Path to the Whisper Speech To Text model for Voice Commad detection.') + server_parser.add_argument('--ww-model-path', required=False, help='Path to the Speech To Text model for Wake Word detection. Currently only supports VOSK Kaldi. Defaults to the same model as --vosk-model-path if not provided.') server_parser.add_argument('--snips-model-path', required=False, help='Path to the Snips NLU model.') server_parser.add_argument('--rasa-model-path', required=False, help='Path to the RASA NLU model.') server_parser.add_argument('--rasa-detached-mode', required=False, help='Assume that the RASA server is already running and does not start it as a sub process.') @@ -59,6 +61,13 @@ def main(): server_parser.add_argument('--audio-store-dir', required=False, help='Directory to store the generated audio files.') server_parser.add_argument('--log-store-dir', required=False, help='Directory to store the generated log files.') + # Arguments for online mode + server_parser.add_argument('--online-mode', required=False, help='Enable online mode for the Voice Agent Service (default is False).') + server_parser.add_argument('--online-mode-address', required=False, help='URL of the online server to connect to.') + server_parser.add_argument('--online-mode-port', required=False, help='Port of the online server to connect to.') + server_parser.add_argument('--online-mode-timeout', required=False, help='Timeout value in seconds for the online server connection.') + + # Add the arguments for the client client_parser.add_argument('--server-address', required=True, help='Address of the gRPC server running the Voice Agent Service.') client_parser.add_argument('--server-port', required=True, help='Port of the gRPC server running the Voice Agent Service.') @@ -66,6 +75,10 @@ def main(): client_parser.add_argument('--mode', help='Mode to run the client in. Supported modes: "auto" and "manual".') client_parser.add_argument('--nlu', help='NLU engine/model to use. Supported NLU engines: "snips" and "rasa".') client_parser.add_argument('--recording-time', help='Number of seconds to continue recording the voice command. Required by the \'manual\' mode. Defaults to 10 seconds.') + client_parser.add_argument('--stt-framework', help='STT framework to use. Supported frameworks: "vosk". Defaults to "vosk".') + + # Arguments for online mode in client as --online-mode is a reserved keyword + client_parser.add_argument('--online-mode', required=False, help='Enable online mode for the Voice Agent Service (default is False).') args = parser.parse_args() @@ -74,8 +87,12 @@ def main(): elif args.subcommand == 'run-server': if not args.default and not args.config: - if not args.stt_model_path: - print("Error: The --stt-model-path is missing. Please provide a value. Use --help to see available options.") + if not args.vosk_model_path: + print("Error: The --vosk-model-path is missing. Please provide a value. Use --help to see available options.") + exit(1) + + if not args.whisper_model_path: + print("Error: The --whisper-model-path is missing. Please provide a value. Use --help to see available options.") exit(1) if not args.snips_model_path: @@ -94,6 +111,16 @@ def main(): print("Error: The --vss-signals-spec-path is missing. Please provide a value. Use --help to see available options.") exit(1) + # Error check for online mode + if args.online_mode: + if not args.online_mode_address: + print("Error: The --online-mode-address is missing. Please provide a value. Use --help to see available options.") + exit(1) + + if not args.online_mode_port: + print("Error: The --online-mode-port is missing. Please provide a value. Use --help to see available options.") + exit(1) + # Contruct the default config file path config_path = os.path.join(current_dir, "config.ini") @@ -105,21 +132,36 @@ def main(): logger.info("Starting Voice Agent Service in server mode using CLI provided params...") # Get the values provided by the user - stt_path = args.stt_model_path + vosk_path = args.vosk_model_path + whisper_path = args.whisper_model_path snips_model_path = args.snips_model_path rasa_model_path = args.rasa_model_path intents_vss_map_path = args.intents_vss_map_path vss_signals_spec_path = args.vss_signals_spec_path + # Get the values for online mode + online_mode = False + if args.online_mode: + online_mode = True + online_mode_address = args.online_mode_address + online_mode_port = args.online_mode_port + online_mode_timeout = args.online_mode_timeout or 5 + update_config_value('1', 'ONLINE_MODE') + update_config_value(online_mode_address, 'ONLINE_MODE_ADDRESS') + update_config_value(online_mode_port, 'ONLINE_MODE_PORT') + update_config_value(online_mode_timeout, 'ONLINE_MODE_TIMEOUT') + # Convert to an absolute path if it's a relative path - stt_path = add_trailing_slash(os.path.abspath(stt_path)) if not os.path.isabs(stt_path) else stt_path + vosk_path = add_trailing_slash(os.path.abspath(vosk_path)) if not os.path.isabs(vosk_path) else vosk_path + whisper_path = add_trailing_slash(os.path.abspath(whisper_path)) if not os.path.isabs(whisper_path) else whisper_path snips_model_path = add_trailing_slash(os.path.abspath(snips_model_path)) if not os.path.isabs(snips_model_path) else snips_model_path rasa_model_path = add_trailing_slash(os.path.abspath(rasa_model_path)) if not os.path.isabs(rasa_model_path) else rasa_model_path intents_vss_map_path = os.path.abspath(intents_vss_map_path) if not os.path.isabs(intents_vss_map_path) else intents_vss_map_path vss_signals_spec_path = os.path.abspath(vss_signals_spec_path) if not os.path.isabs(vss_signals_spec_path) else vss_signals_spec_path # Also update the config.ini file - update_config_value(stt_path, 'STT_MODEL_PATH') + update_config_value(vosk_path, 'VOSK_MODEL_PATH') + update_config_value(whisper_path, 'WHISPER_MODEL_PATH') update_config_value(snips_model_path, 'SNIPS_MODEL_PATH') update_config_value(rasa_model_path, 'RASA_MODEL_PATH') update_config_value(intents_vss_map_path, 'INTENTS_VSS_MAP') @@ -162,7 +204,6 @@ def main(): logger = get_logger() logger.info(f"Starting Voice Agent Service in server mode using the default config file...") - # create the base audio dir if not exists if not os.path.exists(get_config_value('BASE_AUDIO_DIR')): os.makedirs(get_config_value('BASE_AUDIO_DIR')) @@ -176,6 +217,8 @@ def main(): mode = "" action = args.action recording_time = 5 # seconds + stt_framework = args.stt_framework or "vosk" + online_mode = args.online_mode or False if action not in ["GetStatus", "DetectWakeWord", "ExecuteVoiceCommand", "ExecuteTextCommand"]: print("Error: Invalid value for --action. Supported actions: 'GetStatus', 'DetectWakeWord', 'ExecuteVoiceCommand' and 'ExecuteTextCommand'. Use --help to see available options.") @@ -199,8 +242,19 @@ def main(): mode = args.mode if mode == "manual" and args.recording_time: recording_time = int(args.recording_time) - - run_client(server_address, server_port, action, mode, nlu_engine, recording_time) + if args.stt_framework and args.stt_framework not in ['vosk', 'whisper']: + print("Error: Invalid value for --stt-framework. Supported frameworks: 'vosk' and 'whisper'. Use --help to see available options.") + exit(1) + if args.stt_framework: + stt_framework = args.stt_framework + + if args.online_mode and args.online_mode not in ['True', 'False', 'true', 'false', '1', '0']: + print("Error: Invalid value for --online-mode. Supported values: 'True' and 'False'. Use --help to see available options.") + exit(1) + if args.online_mode: + online_mode = True if args.online_mode in ['True', 'true', '1'] else False + + run_client(server_address, server_port, action, mode, nlu_engine, recording_time, stt_framework, online_mode) else: print_version() |