aboutsummaryrefslogtreecommitdiffstats
path: root/agl_service_voiceagent/service.py
diff options
context:
space:
mode:
Diffstat (limited to 'agl_service_voiceagent/service.py')
-rw-r--r--agl_service_voiceagent/service.py74
1 files changed, 64 insertions, 10 deletions
diff --git a/agl_service_voiceagent/service.py b/agl_service_voiceagent/service.py
index baf7b02..b5fb50e 100644
--- a/agl_service_voiceagent/service.py
+++ b/agl_service_voiceagent/service.py
@@ -23,6 +23,7 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
generated_dir = os.path.join(current_dir, "generated")
# Add the "generated" folder to sys.path
sys.path.append(generated_dir)
+sys.path.append("../")
import argparse
from agl_service_voiceagent.utils.config import set_config_path, load_config, update_config_value, get_config_value, get_logger
@@ -49,8 +50,9 @@ def main():
# Add the arguments for the server
server_parser.add_argument('--default', action='store_true', help='Starts the server based on default config file.')
server_parser.add_argument('--config', required=False, help='Path to a config file. Server is started based on this config file.')
- server_parser.add_argument('--stt-model-path', required=False, help='Path to the Speech To Text model for Voice Commad detection. Currently only supports VOSK Kaldi.')
- server_parser.add_argument('--ww-model-path', required=False, help='Path to the Speech To Text model for Wake Word detection. Currently only supports VOSK Kaldi. Defaults to the same model as --stt-model-path if not provided.')
+ server_parser.add_argument('--vosk-model-path', required=False, help='Path to the Vosk Speech To Text model for Voice Commad detection.')
+ server_parser.add_argument('--whisper-model-path', required=False, help='Path to the Whisper Speech To Text model for Voice Commad detection.')
+ server_parser.add_argument('--ww-model-path', required=False, help='Path to the Speech To Text model for Wake Word detection. Currently only supports VOSK Kaldi. Defaults to the same model as --vosk-model-path if not provided.')
server_parser.add_argument('--snips-model-path', required=False, help='Path to the Snips NLU model.')
server_parser.add_argument('--rasa-model-path', required=False, help='Path to the RASA NLU model.')
server_parser.add_argument('--rasa-detached-mode', required=False, help='Assume that the RASA server is already running and does not start it as a sub process.')
@@ -59,6 +61,13 @@ def main():
server_parser.add_argument('--audio-store-dir', required=False, help='Directory to store the generated audio files.')
server_parser.add_argument('--log-store-dir', required=False, help='Directory to store the generated log files.')
+ # Arguments for online mode
+ server_parser.add_argument('--online-mode', required=False, help='Enable online mode for the Voice Agent Service (default is False).')
+ server_parser.add_argument('--online-mode-address', required=False, help='URL of the online server to connect to.')
+ server_parser.add_argument('--online-mode-port', required=False, help='Port of the online server to connect to.')
+ server_parser.add_argument('--online-mode-timeout', required=False, help='Timeout value in seconds for the online server connection.')
+
+
# Add the arguments for the client
client_parser.add_argument('--server-address', required=True, help='Address of the gRPC server running the Voice Agent Service.')
client_parser.add_argument('--server-port', required=True, help='Port of the gRPC server running the Voice Agent Service.')
@@ -66,6 +75,10 @@ def main():
client_parser.add_argument('--mode', help='Mode to run the client in. Supported modes: "auto" and "manual".')
client_parser.add_argument('--nlu', help='NLU engine/model to use. Supported NLU engines: "snips" and "rasa".')
client_parser.add_argument('--recording-time', help='Number of seconds to continue recording the voice command. Required by the \'manual\' mode. Defaults to 10 seconds.')
+ client_parser.add_argument('--stt-framework', help='STT framework to use. Supported frameworks: "vosk". Defaults to "vosk".')
+
+ # Arguments for online mode in client as --online-mode is a reserved keyword
+ client_parser.add_argument('--online-mode', required=False, help='Enable online mode for the Voice Agent Service (default is False).')
args = parser.parse_args()
@@ -74,8 +87,12 @@ def main():
elif args.subcommand == 'run-server':
if not args.default and not args.config:
- if not args.stt_model_path:
- print("Error: The --stt-model-path is missing. Please provide a value. Use --help to see available options.")
+ if not args.vosk_model_path:
+ print("Error: The --vosk-model-path is missing. Please provide a value. Use --help to see available options.")
+ exit(1)
+
+ if not args.whisper_model_path:
+ print("Error: The --whisper-model-path is missing. Please provide a value. Use --help to see available options.")
exit(1)
if not args.snips_model_path:
@@ -94,6 +111,16 @@ def main():
print("Error: The --vss-signals-spec-path is missing. Please provide a value. Use --help to see available options.")
exit(1)
+ # Error check for online mode
+ if args.online_mode:
+ if not args.online_mode_address:
+ print("Error: The --online-mode-address is missing. Please provide a value. Use --help to see available options.")
+ exit(1)
+
+ if not args.online_mode_port:
+ print("Error: The --online-mode-port is missing. Please provide a value. Use --help to see available options.")
+ exit(1)
+
# Contruct the default config file path
config_path = os.path.join(current_dir, "config.ini")
@@ -105,21 +132,36 @@ def main():
logger.info("Starting Voice Agent Service in server mode using CLI provided params...")
# Get the values provided by the user
- stt_path = args.stt_model_path
+ vosk_path = args.vosk_model_path
+ whisper_path = args.whisper_model_path
snips_model_path = args.snips_model_path
rasa_model_path = args.rasa_model_path
intents_vss_map_path = args.intents_vss_map_path
vss_signals_spec_path = args.vss_signals_spec_path
+ # Get the values for online mode
+ online_mode = False
+ if args.online_mode:
+ online_mode = True
+ online_mode_address = args.online_mode_address
+ online_mode_port = args.online_mode_port
+ online_mode_timeout = args.online_mode_timeout or 5
+ update_config_value('1', 'ONLINE_MODE')
+ update_config_value(online_mode_address, 'ONLINE_MODE_ADDRESS')
+ update_config_value(online_mode_port, 'ONLINE_MODE_PORT')
+ update_config_value(online_mode_timeout, 'ONLINE_MODE_TIMEOUT')
+
# Convert to an absolute path if it's a relative path
- stt_path = add_trailing_slash(os.path.abspath(stt_path)) if not os.path.isabs(stt_path) else stt_path
+ vosk_path = add_trailing_slash(os.path.abspath(vosk_path)) if not os.path.isabs(vosk_path) else vosk_path
+ whisper_path = add_trailing_slash(os.path.abspath(whisper_path)) if not os.path.isabs(whisper_path) else whisper_path
snips_model_path = add_trailing_slash(os.path.abspath(snips_model_path)) if not os.path.isabs(snips_model_path) else snips_model_path
rasa_model_path = add_trailing_slash(os.path.abspath(rasa_model_path)) if not os.path.isabs(rasa_model_path) else rasa_model_path
intents_vss_map_path = os.path.abspath(intents_vss_map_path) if not os.path.isabs(intents_vss_map_path) else intents_vss_map_path
vss_signals_spec_path = os.path.abspath(vss_signals_spec_path) if not os.path.isabs(vss_signals_spec_path) else vss_signals_spec_path
# Also update the config.ini file
- update_config_value(stt_path, 'STT_MODEL_PATH')
+ update_config_value(vosk_path, 'VOSK_MODEL_PATH')
+ update_config_value(whisper_path, 'WHISPER_MODEL_PATH')
update_config_value(snips_model_path, 'SNIPS_MODEL_PATH')
update_config_value(rasa_model_path, 'RASA_MODEL_PATH')
update_config_value(intents_vss_map_path, 'INTENTS_VSS_MAP')
@@ -162,7 +204,6 @@ def main():
logger = get_logger()
logger.info(f"Starting Voice Agent Service in server mode using the default config file...")
-
# create the base audio dir if not exists
if not os.path.exists(get_config_value('BASE_AUDIO_DIR')):
os.makedirs(get_config_value('BASE_AUDIO_DIR'))
@@ -176,6 +217,8 @@ def main():
mode = ""
action = args.action
recording_time = 5 # seconds
+ stt_framework = args.stt_framework or "vosk"
+ online_mode = args.online_mode or False
if action not in ["GetStatus", "DetectWakeWord", "ExecuteVoiceCommand", "ExecuteTextCommand"]:
print("Error: Invalid value for --action. Supported actions: 'GetStatus', 'DetectWakeWord', 'ExecuteVoiceCommand' and 'ExecuteTextCommand'. Use --help to see available options.")
@@ -199,8 +242,19 @@ def main():
mode = args.mode
if mode == "manual" and args.recording_time:
recording_time = int(args.recording_time)
-
- run_client(server_address, server_port, action, mode, nlu_engine, recording_time)
+ if args.stt_framework and args.stt_framework not in ['vosk', 'whisper']:
+ print("Error: Invalid value for --stt-framework. Supported frameworks: 'vosk' and 'whisper'. Use --help to see available options.")
+ exit(1)
+ if args.stt_framework:
+ stt_framework = args.stt_framework
+
+ if args.online_mode and args.online_mode not in ['True', 'False', 'true', 'false', '1', '0']:
+ print("Error: Invalid value for --online-mode. Supported values: 'True' and 'False'. Use --help to see available options.")
+ exit(1)
+ if args.online_mode:
+ online_mode = True if args.online_mode in ['True', 'true', '1'] else False
+
+ run_client(server_address, server_port, action, mode, nlu_engine, recording_time, stt_framework, online_mode)
else:
print_version()