aboutsummaryrefslogtreecommitdiffstats
path: root/snips_inference_agl/entity_parser/entity_parser.py
blob: 46de55e1e89def0103ae75d7a4779a21387c247f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# coding=utf-8
from __future__ import unicode_literals

from abc import ABCMeta, abstractmethod

from future.builtins import object
from future.utils import with_metaclass

from snips_inference_agl.common.dict_utils import LimitedSizeDict

# pylint: disable=ungrouped-imports

try:
    from abc import abstractclassmethod
except ImportError:
    from snips_inference_agl.common.abc_utils import abstractclassmethod


# pylint: enable=ungrouped-imports


class EntityParser(with_metaclass(ABCMeta, object)):
    """Abstraction of a entity parser implementing some basic caching
    """

    def __init__(self):
        self._cache = LimitedSizeDict(size_limit=1000)

    def parse(self, text, scope=None, use_cache=True):
        """Search the given text for entities defined in the scope. If no
        scope is provided, search for all kinds of entities.

            Args:
                text (str): input text
                scope (list or set of str, optional): if provided the parser
                    will only look for entities which entity kind is given in
                    the scope. By default the scope is None and the parser
                    will search for all kinds of supported entities
                use_cache (bool): if False the internal cache will not be use,
                    this can be useful if the output of the parser depends on
                    the current timestamp. Defaults to True.

            Returns:
                list of dict: list of the parsed entities formatted as a dict
                    containing the string value, the resolved value, the
                    entity kind and the entity range
        """
        if not use_cache:
            return self._parse(text, scope)
        scope_key = tuple(sorted(scope)) if scope is not None else scope
        cache_key = (text, scope_key)
        if cache_key not in self._cache:
            parser_result = self._parse(text, scope)
            self._cache[cache_key] = parser_result
        return self._cache[cache_key]

    @abstractmethod
    def _parse(self, text, scope=None):
        """Internal parse method to implement in each subclass of
         :class:`.EntityParser`

            Args:
                text (str): input text
                scope (list or set of str, optional): if provided the parser
                    will only look for entities which entity kind is given in
                    the scope. By default the scope is None and the parser
                    will search for all kinds of supported entities
                use_cache (bool): if False the internal cache will not be use,
                    this can be useful if the output of the parser depends on
                    the current timestamp. Defaults to True.

            Returns:
                list of dict: list of the parsed entities. These entity must
                    have the same output format as the
                    :func:`snips_inference_agl.utils.result.parsed_entity` function
        """
        pass

    @abstractmethod
    def persist(self, path):
        pass

    @abstractclassmethod
    def from_path(cls, path):
        pass