aboutsummaryrefslogtreecommitdiffstats
path: root/snips_inference_agl/entity_parser/entity_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'snips_inference_agl/entity_parser/entity_parser.py')
-rw-r--r--snips_inference_agl/entity_parser/entity_parser.py85
1 files changed, 85 insertions, 0 deletions
diff --git a/snips_inference_agl/entity_parser/entity_parser.py b/snips_inference_agl/entity_parser/entity_parser.py
new file mode 100644
index 0000000..46de55e
--- /dev/null
+++ b/snips_inference_agl/entity_parser/entity_parser.py
@@ -0,0 +1,85 @@
+# coding=utf-8
+from __future__ import unicode_literals
+
+from abc import ABCMeta, abstractmethod
+
+from future.builtins import object
+from future.utils import with_metaclass
+
+from snips_inference_agl.common.dict_utils import LimitedSizeDict
+
+# pylint: disable=ungrouped-imports
+
+try:
+ from abc import abstractclassmethod
+except ImportError:
+ from snips_inference_agl.common.abc_utils import abstractclassmethod
+
+
+# pylint: enable=ungrouped-imports
+
+
+class EntityParser(with_metaclass(ABCMeta, object)):
+ """Abstraction of a entity parser implementing some basic caching
+ """
+
+ def __init__(self):
+ self._cache = LimitedSizeDict(size_limit=1000)
+
+ def parse(self, text, scope=None, use_cache=True):
+ """Search the given text for entities defined in the scope. If no
+ scope is provided, search for all kinds of entities.
+
+ Args:
+ text (str): input text
+ scope (list or set of str, optional): if provided the parser
+ will only look for entities which entity kind is given in
+ the scope. By default the scope is None and the parser
+ will search for all kinds of supported entities
+ use_cache (bool): if False the internal cache will not be use,
+ this can be useful if the output of the parser depends on
+ the current timestamp. Defaults to True.
+
+ Returns:
+ list of dict: list of the parsed entities formatted as a dict
+ containing the string value, the resolved value, the
+ entity kind and the entity range
+ """
+ if not use_cache:
+ return self._parse(text, scope)
+ scope_key = tuple(sorted(scope)) if scope is not None else scope
+ cache_key = (text, scope_key)
+ if cache_key not in self._cache:
+ parser_result = self._parse(text, scope)
+ self._cache[cache_key] = parser_result
+ return self._cache[cache_key]
+
+ @abstractmethod
+ def _parse(self, text, scope=None):
+ """Internal parse method to implement in each subclass of
+ :class:`.EntityParser`
+
+ Args:
+ text (str): input text
+ scope (list or set of str, optional): if provided the parser
+ will only look for entities which entity kind is given in
+ the scope. By default the scope is None and the parser
+ will search for all kinds of supported entities
+ use_cache (bool): if False the internal cache will not be use,
+ this can be useful if the output of the parser depends on
+ the current timestamp. Defaults to True.
+
+ Returns:
+ list of dict: list of the parsed entities. These entity must
+ have the same output format as the
+ :func:`snips_inference_agl.utils.result.parsed_entity` function
+ """
+ pass
+
+ @abstractmethod
+ def persist(self, path):
+ pass
+
+ @abstractclassmethod
+ def from_path(cls, path):
+ pass