aboutsummaryrefslogtreecommitdiffstats
path: root/snips_inference_agl/default_configs/config_it.py
diff options
context:
space:
mode:
Diffstat (limited to 'snips_inference_agl/default_configs/config_it.py')
-rw-r--r--snips_inference_agl/default_configs/config_it.py137
1 files changed, 137 insertions, 0 deletions
diff --git a/snips_inference_agl/default_configs/config_it.py b/snips_inference_agl/default_configs/config_it.py
new file mode 100644
index 0000000..a2da590
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_it.py
@@ -0,0 +1,137 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+ "unit_name": "nlu_engine",
+ "intent_parsers_configs": [
+ {
+ "unit_name": "lookup_intent_parser",
+ "ignore_stop_words": True
+ },
+ {
+ "unit_name": "probabilistic_intent_parser",
+ "slot_filler_config": {
+ "unit_name": "crf_slot_filler",
+ "feature_factory_configs": [
+ {
+ "args": {
+ "common_words_gazetteer_name":
+ "top_10000_words_stemmed",
+ "use_stemming": True,
+ "n": 1
+ },
+ "factory_name": "ngram",
+ "offsets": [-2, -1, 0, 1, 2]
+ },
+ {
+ "args": {
+ "common_words_gazetteer_name":
+ "top_10000_words_stemmed",
+ "use_stemming": True,
+ "n": 2
+ },
+ "factory_name": "ngram",
+ "offsets": [-2, 1]
+ },
+ {
+ "args": {},
+ "factory_name": "is_digit",
+ "offsets": [-1, 0, 1]
+ },
+ {
+ "args": {},
+ "factory_name": "is_first",
+ "offsets": [-2, -1, 0]
+ },
+ {
+ "args": {},
+ "factory_name": "is_last",
+ "offsets": [0, 1, 2]
+ },
+ {
+ "args": {"n": 1},
+ "factory_name": "shape_ngram",
+ "offsets": [0]
+ },
+ {
+ "args": {"n": 2},
+ "factory_name": "shape_ngram",
+ "offsets": [-1, 0]
+ },
+ {
+ "args": {"n": 3},
+ "factory_name": "shape_ngram",
+ "offsets": [-1]
+ },
+ {
+ "args": {
+ "use_stemming": True,
+ "tagging_scheme_code": 2,
+ "entity_filter": {
+ "automatically_extensible": False
+ }
+ },
+ "factory_name": "entity_match",
+ "offsets": [-2, -1, 0]
+ },
+ {
+ "args": {
+ "use_stemming": True,
+ "tagging_scheme_code": 2,
+ "entity_filter": {
+ "automatically_extensible": True
+ }
+ },
+ "factory_name": "entity_match",
+ "offsets": [-2, -1, 0],
+ "drop_out": 0.5
+ },
+ {
+ "args": {"tagging_scheme_code": 1},
+ "factory_name": "builtin_entity_match",
+ "offsets": [-2, -1, 0]
+ }
+ ],
+ "crf_args": {
+ "c1": 0.1,
+ "c2": 0.1,
+ "algorithm": "lbfgs"
+ },
+ "tagging_scheme": 1,
+ "data_augmentation_config": {
+ "min_utterances": 200,
+ "capitalization_ratio": 0.2,
+ "add_builtin_entities_examples": True
+ }
+ },
+ "intent_classifier_config": {
+ "unit_name": "log_reg_intent_classifier",
+ "data_augmentation_config": {
+ "min_utterances": 20,
+ "noise_factor": 5,
+ "add_builtin_entities_examples": False,
+ "max_unknown_words": None,
+ "unknown_word_prob": 0.0,
+ "unknown_words_replacement_string": None
+ },
+ "featurizer_config": {
+ "unit_name": "featurizer",
+ "pvalue_threshold": 0.4,
+ "added_cooccurrence_feature_ratio": 0.0,
+ "tfidf_vectorizer_config": {
+ "unit_name": "tfidf_vectorizer",
+ "use_stemming": True,
+ "word_clusters_name": None
+ },
+ "cooccurrence_vectorizer_config": {
+ "unit_name": "cooccurrence_vectorizer",
+ "window_size": None,
+ "filter_stop_words": True,
+ "unknown_words_replacement_string": None,
+ "keep_order": True
+ }
+ },
+ "noise_reweight_factor": 1,
+ }
+ }
+ ]
+}