1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
from __future__ import unicode_literals
CONFIG = {
"unit_name": "nlu_engine",
"intent_parsers_configs": [
{
"unit_name": "lookup_intent_parser",
"ignore_stop_words": True
},
{
"unit_name": "probabilistic_intent_parser",
"slot_filler_config": {
"unit_name": "crf_slot_filler",
"feature_factory_configs": [
{
"args": {
"common_words_gazetteer_name":
"top_5000_words_stemmed",
"use_stemming": True,
"n": 1
},
"factory_name": "ngram",
"offsets": [-2, -1, 0, 1, 2]
},
{
"args": {
"common_words_gazetteer_name":
"top_5000_words_stemmed",
"use_stemming": True,
"n": 2
},
"factory_name": "ngram",
"offsets": [-2, 1]
},
{
"args": {},
"factory_name": "is_digit",
"offsets": [-1, 0, 1]
},
{
"args": {},
"factory_name": "is_first",
"offsets": [-2, -1, 0]
},
{
"args": {},
"factory_name": "is_last",
"offsets": [0, 1, 2]
},
{
"args": {"n": 1},
"factory_name": "shape_ngram",
"offsets": [0]
},
{
"args": {"n": 2},
"factory_name": "shape_ngram",
"offsets": [-1, 0]
},
{
"args": {"n": 3},
"factory_name": "shape_ngram",
"offsets": [-1]
},
{
"args": {
"use_stemming": True,
"tagging_scheme_code": 2,
"entity_filter": {
"automatically_extensible": False
}
},
"factory_name": "entity_match",
"offsets": [-2, -1, 0]
},
{
"args": {
"use_stemming": True,
"tagging_scheme_code": 2,
"entity_filter": {
"automatically_extensible": True
}
},
"factory_name": "entity_match",
"offsets": [-2, -1, 0],
"drop_out": 0.5
},
{
"args": {"tagging_scheme_code": 1},
"factory_name": "builtin_entity_match",
"offsets": [-2, -1, 0]
}
],
"crf_args": {
"c1": 0.1,
"c2": 0.1,
"algorithm": "lbfgs"
},
"tagging_scheme": 1,
"data_augmentation_config": {
"min_utterances": 200,
"capitalization_ratio": 0.2,
"add_builtin_entities_examples": True
},
},
"intent_classifier_config": {
"unit_name": "log_reg_intent_classifier",
"data_augmentation_config": {
"min_utterances": 20,
"noise_factor": 5,
"add_builtin_entities_examples": False,
"max_unknown_words": None,
"unknown_word_prob": 0.0,
"unknown_words_replacement_string": None
},
"featurizer_config": {
"unit_name": "featurizer",
"pvalue_threshold": 0.4,
"added_cooccurrence_feature_ratio": 0.0,
"tfidf_vectorizer_config": {
"unit_name": "tfidf_vectorizer",
"use_stemming": True,
"word_clusters_name": None
},
"cooccurrence_vectorizer_config": {
"unit_name": "cooccurrence_vectorizer",
"window_size": None,
"filter_stop_words": True,
"unknown_words_replacement_string": None,
"keep_order": True
}
},
},
"noise_reweight_factor": 1,
}
]
}
|