aboutsummaryrefslogtreecommitdiffstats
path: root/snips_inference_agl/default_configs/config_pt_br.py
blob: 450f0dbba7a950b1c26c5a7e86faf98e2e8add4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from __future__ import unicode_literals

CONFIG = {
    "unit_name": "nlu_engine",
    "intent_parsers_configs": [
        {
            "unit_name": "lookup_intent_parser",
            "ignore_stop_words": True
        },
        {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "crf_slot_filler",
                "feature_factory_configs": [
                    {
                        "args": {
                            "common_words_gazetteer_name":
                                "top_5000_words_stemmed",
                            "use_stemming": True,
                            "n": 1
                        },
                        "factory_name": "ngram",
                        "offsets": [-2, -1, 0, 1, 2]
                    },
                    {
                        "args": {
                            "common_words_gazetteer_name":
                                "top_5000_words_stemmed",
                            "use_stemming": True,
                            "n": 2
                        },
                        "factory_name": "ngram",
                        "offsets": [-2, 1]
                    },
                    {
                        "args": {},
                        "factory_name": "is_digit",
                        "offsets": [-1, 0, 1]
                    },
                    {
                        "args": {},
                        "factory_name": "is_first",
                        "offsets": [-2, -1, 0]
                    },
                    {
                        "args": {},
                        "factory_name": "is_last",
                        "offsets": [0, 1, 2]
                    },
                    {
                        "args": {"n": 1},
                        "factory_name": "shape_ngram",
                        "offsets": [0]
                    },
                    {
                        "args": {"n": 2},
                        "factory_name": "shape_ngram",
                        "offsets": [-1, 0]
                    },
                    {
                        "args": {"n": 3},
                        "factory_name": "shape_ngram",
                        "offsets": [-1]
                    },
                    {
                        "args": {
                            "use_stemming": True,
                            "tagging_scheme_code": 2,
                            "entity_filter": {
                                "automatically_extensible": False
                            }
                        },
                        "factory_name": "entity_match",
                        "offsets": [-2, -1, 0]
                    },
                    {
                        "args": {
                            "use_stemming": True,
                            "tagging_scheme_code": 2,
                            "entity_filter": {
                                "automatically_extensible": True
                            }
                        },
                        "factory_name": "entity_match",
                        "offsets": [-2, -1, 0],
                        "drop_out": 0.5
                    },
                    {
                        "args": {"tagging_scheme_code": 1},
                        "factory_name": "builtin_entity_match",
                        "offsets": [-2, -1, 0]
                    }
                ],
                "crf_args": {
                    "c1": 0.1,
                    "c2": 0.1,
                    "algorithm": "lbfgs"
                },
                "tagging_scheme": 1,
                "data_augmentation_config": {
                    "min_utterances": 200,
                    "capitalization_ratio": 0.2,
                    "add_builtin_entities_examples": True
                },
            },
            "intent_classifier_config": {
                "unit_name": "log_reg_intent_classifier",
                "data_augmentation_config": {
                    "min_utterances": 20,
                    "noise_factor": 5,
                    "add_builtin_entities_examples": False,
                    "max_unknown_words": None,
                    "unknown_word_prob": 0.0,
                    "unknown_words_replacement_string": None
                },
                "featurizer_config": {
                    "unit_name": "featurizer",
                    "pvalue_threshold": 0.4,
                    "added_cooccurrence_feature_ratio": 0.0,
                    "tfidf_vectorizer_config": {
                        "unit_name": "tfidf_vectorizer",
                        "use_stemming": True,
                        "word_clusters_name": None
                    },
                    "cooccurrence_vectorizer_config": {
                        "unit_name": "cooccurrence_vectorizer",
                        "window_size": None,
                        "filter_stop_words": True,
                        "unknown_words_replacement_string": None,
                        "keep_order": True
                    }
                },
            },
            "noise_reweight_factor": 1,
        }
    ]
}