summaryrefslogtreecommitdiffstats
path: root/snips_inference_agl/slot_filler/features_utils.py
diff options
context:
space:
mode:
authorMalik Talha <talhamalik727x@gmail.com>2023-10-22 21:06:23 +0500
committerJan-Simon Moeller <jsmoeller@linuxfoundation.org>2023-10-23 14:38:13 +0000
commit697a1adce1e463079e640b55d6386cf82d7bd6bc (patch)
tree86e299cc7fe12b10c2e549f640924b61c7d07a95 /snips_inference_agl/slot_filler/features_utils.py
parent97029ab8141e654a170a2282106f854037da294f (diff)
Add Snips Inference Module
Add slightly modified version of the original Snips NLU library. This module adds support for Python upto version 3.10. Bug-AGL: SPEC-4856 Signed-off-by: Malik Talha <talhamalik727x@gmail.com> Change-Id: I6d7e9eb181e6ff4aed9b6291027877ccb9f0d846
Diffstat (limited to 'snips_inference_agl/slot_filler/features_utils.py')
-rw-r--r--snips_inference_agl/slot_filler/features_utils.py47
1 files changed, 47 insertions, 0 deletions
diff --git a/snips_inference_agl/slot_filler/features_utils.py b/snips_inference_agl/slot_filler/features_utils.py
new file mode 100644
index 0000000..483e9c0
--- /dev/null
+++ b/snips_inference_agl/slot_filler/features_utils.py
@@ -0,0 +1,47 @@
+from __future__ import unicode_literals
+
+from copy import deepcopy
+
+from snips_inference_agl.common.dict_utils import LimitedSizeDict
+from snips_inference_agl.constants import END, RES_MATCH_RANGE, START
+
+_NGRAMS_CACHE = LimitedSizeDict(size_limit=1000)
+
+
+def get_all_ngrams(tokens):
+ from snips_nlu_utils import compute_all_ngrams
+
+ if not tokens:
+ return []
+ key = "<||>".join(tokens)
+ if key not in _NGRAMS_CACHE:
+ ngrams = compute_all_ngrams(tokens, len(tokens))
+ _NGRAMS_CACHE[key] = ngrams
+ return deepcopy(_NGRAMS_CACHE[key])
+
+
+def get_word_chunk(word, chunk_size, chunk_start, reverse=False):
+ if chunk_size < 1:
+ raise ValueError("chunk size should be >= 1")
+ if chunk_size > len(word):
+ return None
+ start = chunk_start - chunk_size if reverse else chunk_start
+ end = chunk_start if reverse else chunk_start + chunk_size
+ return word[start:end]
+
+
+def initial_string_from_tokens(tokens):
+ current_index = 0
+ s = ""
+ for t in tokens:
+ if t.start > current_index:
+ s += " " * (t.start - current_index)
+ s += t.value
+ current_index = t.end
+ return s
+
+
+def entity_filter(entity, start, end):
+ entity_start = entity[RES_MATCH_RANGE][START]
+ entity_end = entity[RES_MATCH_RANGE][END]
+ return entity_start <= start < end <= entity_end