aboutsummaryrefslogtreecommitdiffstats
path: root/snips_inference_agl/languages.py
blob: cc205a306f3b8e2ae459f733bba3545444803006 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from __future__ import unicode_literals

import re
import string

_PUNCTUATION_REGEXES = dict()
_NUM2WORDS_SUPPORT = dict()


# pylint:disable=unused-argument
def get_default_sep(language):
    return " "


# pylint:enable=unused-argument

# pylint:disable=unused-argument
def get_punctuation(language):
    return string.punctuation


# pylint:enable=unused-argument


def get_punctuation_regex(language):
    global _PUNCTUATION_REGEXES
    if language not in _PUNCTUATION_REGEXES:
        pattern = r"|".join(re.escape(p) for p in get_punctuation(language))
        _PUNCTUATION_REGEXES[language] = re.compile(pattern)
    return _PUNCTUATION_REGEXES[language]


def supports_num2words(language):
    from num2words import num2words

    global _NUM2WORDS_SUPPORT

    if language not in _NUM2WORDS_SUPPORT:
        try:
            num2words(0, lang=language)
            _NUM2WORDS_SUPPORT[language] = True
        except NotImplementedError:
            _NUM2WORDS_SUPPORT[language] = False
    return _NUM2WORDS_SUPPORT[language]