blob: cc205a306f3b8e2ae459f733bba3545444803006 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
from __future__ import unicode_literals
import re
import string
_PUNCTUATION_REGEXES = dict()
_NUM2WORDS_SUPPORT = dict()
# pylint:disable=unused-argument
def get_default_sep(language):
return " "
# pylint:enable=unused-argument
# pylint:disable=unused-argument
def get_punctuation(language):
return string.punctuation
# pylint:enable=unused-argument
def get_punctuation_regex(language):
global _PUNCTUATION_REGEXES
if language not in _PUNCTUATION_REGEXES:
pattern = r"|".join(re.escape(p) for p in get_punctuation(language))
_PUNCTUATION_REGEXES[language] = re.compile(pattern)
return _PUNCTUATION_REGEXES[language]
def supports_num2words(language):
from num2words import num2words
global _NUM2WORDS_SUPPORT
if language not in _NUM2WORDS_SUPPORT:
try:
num2words(0, lang=language)
_NUM2WORDS_SUPPORT[language] = True
except NotImplementedError:
_NUM2WORDS_SUPPORT[language] = False
return _NUM2WORDS_SUPPORT[language]
|