diff options
author | Malik Talha <talhamalik727x@gmail.com> | 2023-09-14 22:41:26 +0500 |
---|---|---|
committer | Malik Talha <talhamalik727x@gmail.com> | 2023-09-25 00:40:38 +0500 |
commit | a10c988b5480ca5b937a2793b450cfa01f569d76 (patch) | |
tree | 23c032557a36afd671c7b7db9d6dd843253ae835 /agl_service_voiceagent/nlu | |
parent | 3e300cdc7fff19e5f338b282266444061f74506e (diff) |
Add gRPC-based voice agent service for AGL
Introducing a gRPC-based voice agent service for Automotive Grade Linux
(AGL) that leverages GStreamer, Vosk, Snips, and RASA. It seamlessly
processes user voice commands, converting spoken words to text,
extracting intents, and performing actions via the Kuksa interface (WIP).
Bug-AGL: SPEC-4906
Signed-off-by: Malik Talha <talhamalik727x@gmail.com>
Change-Id: I47e61c66149c67bb97fecc745e4c3afd79f447a5
Diffstat (limited to 'agl_service_voiceagent/nlu')
-rw-r--r-- | agl_service_voiceagent/nlu/__init__.py | 0 | ||||
-rw-r--r-- | agl_service_voiceagent/nlu/rasa_interface.py | 87 | ||||
-rw-r--r-- | agl_service_voiceagent/nlu/snips_interface.py | 46 |
3 files changed, 133 insertions, 0 deletions
diff --git a/agl_service_voiceagent/nlu/__init__.py b/agl_service_voiceagent/nlu/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/agl_service_voiceagent/nlu/__init__.py diff --git a/agl_service_voiceagent/nlu/rasa_interface.py b/agl_service_voiceagent/nlu/rasa_interface.py new file mode 100644 index 0000000..0232126 --- /dev/null +++ b/agl_service_voiceagent/nlu/rasa_interface.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023 Malik Talha +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import time +import requests +import subprocess +from concurrent.futures import ThreadPoolExecutor + +class RASAInterface: + def __init__(self, port, model_path, log_dir, max_threads=5): + self.port = port + self.model_path = model_path + self.max_threads = max_threads + self.server_process = None + self.thread_pool = ThreadPoolExecutor(max_workers=max_threads) + self.log_file = log_dir+"rasa_server_logs.txt" + + + def _start_server(self): + command = ( + f"rasa run --enable-api -m \"{self.model_path}\" -p {self.port}" + ) + # Redirect stdout and stderr to capture the output + with open(self.log_file, "w") as output_file: + self.server_process = subprocess.Popen(command, shell=True, stdout=output_file, stderr=subprocess.STDOUT) + self.server_process.wait() # Wait for the server process to finish + + + def start_server(self): + self.thread_pool.submit(self._start_server) + + # Wait for a brief moment to allow the server to start + time.sleep(25) + + + def stop_server(self): + if self.server_process: + self.server_process.terminate() + self.server_process.wait() + self.server_process = None + self.thread_pool.shutdown(wait=True) + + + def preprocess_text(self, text): + # text to lower case and remove trailing and leading spaces + preprocessed_text = text.lower().strip() + # remove special characters, punctuation, and extra whitespaces + preprocessed_text = re.sub(r'[^\w\s]', '', preprocessed_text).strip() + return preprocessed_text + + + def extract_intent(self, text): + preprocessed_text = self.preprocess_text(text) + url = f"http://localhost:{self.port}/model/parse" + data = { + "text": preprocessed_text + } + response = requests.post(url, json=data) + if response.status_code == 200: + return response.json() + else: + return None + + + def process_intent(self, intent_output): + intent = intent_output["intent"]["name"] + entities = {} + for entity in intent_output["entities"]: + entity_name = entity["entity"] + entity_value = entity["value"] + entities[entity_name] = entity_value + + return intent, entities
\ No newline at end of file diff --git a/agl_service_voiceagent/nlu/snips_interface.py b/agl_service_voiceagent/nlu/snips_interface.py new file mode 100644 index 0000000..f0b05d2 --- /dev/null +++ b/agl_service_voiceagent/nlu/snips_interface.py @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023 Malik Talha +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from typing import Text +from snips_inference_agl import SnipsNLUEngine + +class SnipsInterface: + def __init__(self, model_path: Text): + self.engine = SnipsNLUEngine.from_path(model_path) + + def preprocess_text(self, text): + # text to lower case and remove trailing and leading spaces + preprocessed_text = text.lower().strip() + # remove special characters, punctuation, and extra whitespaces + preprocessed_text = re.sub(r'[^\w\s]', '', preprocessed_text).strip() + return preprocessed_text + + def extract_intent(self, text: Text): + preprocessed_text = self.preprocess_text(text) + result = self.engine.parse(preprocessed_text) + return result + + def process_intent(self, intent_output): + intent_actions = {} + intent = intent_output['intent']['intentName'] + slots = intent_output.get('slots', []) + for slot in slots: + action = slot['entity'] + value = slot['value']['value'] + intent_actions[action] = value + + return intent, intent_actions
\ No newline at end of file |