diff options
author | Malik Talha <talhamalik727x@gmail.com> | 2023-10-22 21:06:23 +0500 |
---|---|---|
committer | Jan-Simon Moeller <jsmoeller@linuxfoundation.org> | 2023-10-23 14:38:13 +0000 |
commit | 697a1adce1e463079e640b55d6386cf82d7bd6bc (patch) | |
tree | 86e299cc7fe12b10c2e549f640924b61c7d07a95 /snips_inference_agl/slot_filler/features_utils.py | |
parent | 97029ab8141e654a170a2282106f854037da294f (diff) |
Add Snips Inference Module
Add slightly modified version of the original Snips NLU
library. This module adds support for Python upto version
3.10.
Bug-AGL: SPEC-4856
Signed-off-by: Malik Talha <talhamalik727x@gmail.com>
Change-Id: I6d7e9eb181e6ff4aed9b6291027877ccb9f0d846
Diffstat (limited to 'snips_inference_agl/slot_filler/features_utils.py')
-rw-r--r-- | snips_inference_agl/slot_filler/features_utils.py | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/snips_inference_agl/slot_filler/features_utils.py b/snips_inference_agl/slot_filler/features_utils.py new file mode 100644 index 0000000..483e9c0 --- /dev/null +++ b/snips_inference_agl/slot_filler/features_utils.py @@ -0,0 +1,47 @@ +from __future__ import unicode_literals + +from copy import deepcopy + +from snips_inference_agl.common.dict_utils import LimitedSizeDict +from snips_inference_agl.constants import END, RES_MATCH_RANGE, START + +_NGRAMS_CACHE = LimitedSizeDict(size_limit=1000) + + +def get_all_ngrams(tokens): + from snips_nlu_utils import compute_all_ngrams + + if not tokens: + return [] + key = "<||>".join(tokens) + if key not in _NGRAMS_CACHE: + ngrams = compute_all_ngrams(tokens, len(tokens)) + _NGRAMS_CACHE[key] = ngrams + return deepcopy(_NGRAMS_CACHE[key]) + + +def get_word_chunk(word, chunk_size, chunk_start, reverse=False): + if chunk_size < 1: + raise ValueError("chunk size should be >= 1") + if chunk_size > len(word): + return None + start = chunk_start - chunk_size if reverse else chunk_start + end = chunk_start if reverse else chunk_start + chunk_size + return word[start:end] + + +def initial_string_from_tokens(tokens): + current_index = 0 + s = "" + for t in tokens: + if t.start > current_index: + s += " " * (t.start - current_index) + s += t.value + current_index = t.end + return s + + +def entity_filter(entity, start, end): + entity_start = entity[RES_MATCH_RANGE][START] + entity_end = entity[RES_MATCH_RANGE][END] + return entity_start <= start < end <= entity_end |