diff options
Diffstat (limited to 'snips_inference_agl/entity_parser/entity_parser.py')
-rw-r--r-- | snips_inference_agl/entity_parser/entity_parser.py | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/snips_inference_agl/entity_parser/entity_parser.py b/snips_inference_agl/entity_parser/entity_parser.py new file mode 100644 index 0000000..46de55e --- /dev/null +++ b/snips_inference_agl/entity_parser/entity_parser.py @@ -0,0 +1,85 @@ +# coding=utf-8 +from __future__ import unicode_literals + +from abc import ABCMeta, abstractmethod + +from future.builtins import object +from future.utils import with_metaclass + +from snips_inference_agl.common.dict_utils import LimitedSizeDict + +# pylint: disable=ungrouped-imports + +try: + from abc import abstractclassmethod +except ImportError: + from snips_inference_agl.common.abc_utils import abstractclassmethod + + +# pylint: enable=ungrouped-imports + + +class EntityParser(with_metaclass(ABCMeta, object)): + """Abstraction of a entity parser implementing some basic caching + """ + + def __init__(self): + self._cache = LimitedSizeDict(size_limit=1000) + + def parse(self, text, scope=None, use_cache=True): + """Search the given text for entities defined in the scope. If no + scope is provided, search for all kinds of entities. + + Args: + text (str): input text + scope (list or set of str, optional): if provided the parser + will only look for entities which entity kind is given in + the scope. By default the scope is None and the parser + will search for all kinds of supported entities + use_cache (bool): if False the internal cache will not be use, + this can be useful if the output of the parser depends on + the current timestamp. Defaults to True. + + Returns: + list of dict: list of the parsed entities formatted as a dict + containing the string value, the resolved value, the + entity kind and the entity range + """ + if not use_cache: + return self._parse(text, scope) + scope_key = tuple(sorted(scope)) if scope is not None else scope + cache_key = (text, scope_key) + if cache_key not in self._cache: + parser_result = self._parse(text, scope) + self._cache[cache_key] = parser_result + return self._cache[cache_key] + + @abstractmethod + def _parse(self, text, scope=None): + """Internal parse method to implement in each subclass of + :class:`.EntityParser` + + Args: + text (str): input text + scope (list or set of str, optional): if provided the parser + will only look for entities which entity kind is given in + the scope. By default the scope is None and the parser + will search for all kinds of supported entities + use_cache (bool): if False the internal cache will not be use, + this can be useful if the output of the parser depends on + the current timestamp. Defaults to True. + + Returns: + list of dict: list of the parsed entities. These entity must + have the same output format as the + :func:`snips_inference_agl.utils.result.parsed_entity` function + """ + pass + + @abstractmethod + def persist(self, path): + pass + + @abstractclassmethod + def from_path(cls, path): + pass |