1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
# SPDX-License-Identifier: Apache-2.0
#
# Copyright (c) 2023 Malik Talha
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import time
import requests
import subprocess
from concurrent.futures import ThreadPoolExecutor
class RASAInterface:
"""
RASAInterface is a class for interfacing with a Rasa NLU server to extract intents and entities from text input.
"""
def __init__(self, port, model_path, log_dir, max_threads=5):
"""
Initialize the RASAInterface instance with the provided parameters.
Args:
port (int): The port number on which the Rasa NLU server will run.
model_path (str): The path to the Rasa NLU model.
log_dir (str): The directory where server logs will be saved.
max_threads (int, optional): The maximum number of concurrent threads (default is 5).
"""
self.port = port
self.model_path = model_path
self.max_threads = max_threads
self.server_process = None
self.thread_pool = ThreadPoolExecutor(max_workers=max_threads)
self.log_file = log_dir+"rasa_server_logs.txt"
def _start_server(self):
"""
Start the Rasa NLU server in a subprocess and redirect its output to the log file.
"""
command = (
f"rasa run --enable-api -m \"{self.model_path}\" -p {self.port}"
)
# Redirect stdout and stderr to capture the output
with open(self.log_file, "w") as output_file:
self.server_process = subprocess.Popen(command, shell=True, stdout=output_file, stderr=subprocess.STDOUT)
self.server_process.wait() # Wait for the server process to finish
def start_server(self):
"""
Start the Rasa NLU server in a separate thread and wait for it to initialize.
"""
self.thread_pool.submit(self._start_server)
# Wait for a brief moment to allow the server to start
time.sleep(25)
def stop_server(self):
"""
Stop the Rasa NLU server and shut down the thread pool.
"""
if self.server_process:
self.server_process.terminate()
self.server_process.wait()
self.server_process = None
self.thread_pool.shutdown(wait=True)
def preprocess_text(self, text):
"""
Preprocess the input text by converting it to lowercase, removing leading/trailing spaces,
and removing special characters and punctuation.
Args:
text (str): The input text to preprocess.
Returns:
str: The preprocessed text.
"""
# text to lower case and remove trailing and leading spaces
preprocessed_text = text.lower().strip()
# remove special characters, punctuation, and extra whitespaces
preprocessed_text = re.sub(r'[^\w\s]', '', preprocessed_text).strip()
return preprocessed_text
def extract_intent(self, text):
preprocessed_text = self.preprocess_text(text)
url = f"http://localhost:{self.port}/model/parse"
data = {
"text": preprocessed_text
}
response = requests.post(url, json=data)
if response.status_code == 200:
return response.json()
else:
return None
def process_intent(self, intent_output):
"""
Extract intents and entities from preprocessed text using the Rasa NLU server.
Args:
text (str): The preprocessed input text.
Returns:
dict: Intent and entity extraction result as a dictionary.
"""
intent = intent_output["intent"]["name"]
entities = {}
for entity in intent_output["entities"]:
entity_name = entity["entity"]
entity_value = entity["value"]
entities[entity_name] = entity_value
return intent, entities
|