aboutsummaryrefslogtreecommitdiffstats
path: root/snips_inference_agl/default_configs/config_en.py
diff options
context:
space:
mode:
authorMalik Talha <talhamalik727x@gmail.com>2023-10-22 21:06:23 +0500
committerJan-Simon Moeller <jsmoeller@linuxfoundation.org>2023-10-23 14:38:13 +0000
commit697a1adce1e463079e640b55d6386cf82d7bd6bc (patch)
tree86e299cc7fe12b10c2e549f640924b61c7d07a95 /snips_inference_agl/default_configs/config_en.py
parent97029ab8141e654a170a2282106f854037da294f (diff)
Add Snips Inference Module
Add slightly modified version of the original Snips NLU library. This module adds support for Python upto version 3.10. Bug-AGL: SPEC-4856 Signed-off-by: Malik Talha <talhamalik727x@gmail.com> Change-Id: I6d7e9eb181e6ff4aed9b6291027877ccb9f0d846
Diffstat (limited to 'snips_inference_agl/default_configs/config_en.py')
-rw-r--r--snips_inference_agl/default_configs/config_en.py145
1 files changed, 145 insertions, 0 deletions
diff --git a/snips_inference_agl/default_configs/config_en.py b/snips_inference_agl/default_configs/config_en.py
new file mode 100644
index 0000000..12f7ae1
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_en.py
@@ -0,0 +1,145 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+ "unit_name": "nlu_engine",
+ "intent_parsers_configs": [
+ {
+ "unit_name": "lookup_intent_parser",
+ "ignore_stop_words": True
+ },
+ {
+ "unit_name": "probabilistic_intent_parser",
+ "slot_filler_config": {
+ "unit_name": "crf_slot_filler",
+ "feature_factory_configs": [
+ {
+ "args": {
+ "common_words_gazetteer_name":
+ "top_10000_words_stemmed",
+ "use_stemming": True,
+ "n": 1
+ },
+ "factory_name": "ngram",
+ "offsets": [-2, -1, 0, 1, 2]
+ },
+ {
+ "args": {
+ "common_words_gazetteer_name":
+ "top_10000_words_stemmed",
+ "use_stemming": True,
+ "n": 2
+ },
+ "factory_name": "ngram",
+ "offsets": [-2, 1]
+ },
+ {
+ "args": {},
+ "factory_name": "is_digit",
+ "offsets": [-1, 0, 1]
+ },
+ {
+ "args": {},
+ "factory_name": "is_first",
+ "offsets": [-2, -1, 0]
+ },
+ {
+ "args": {},
+ "factory_name": "is_last",
+ "offsets": [0, 1, 2]
+ },
+ {
+ "args": {"n": 1},
+ "factory_name": "shape_ngram",
+ "offsets": [0]
+ },
+ {
+ "args": {"n": 2},
+ "factory_name": "shape_ngram",
+ "offsets": [-1, 0]
+ },
+ {
+ "args": {"n": 3},
+ "factory_name": "shape_ngram",
+ "offsets": [-1]
+ },
+ {
+ "args": {
+ "use_stemming": True,
+ "tagging_scheme_code": 2,
+ "entity_filter": {
+ "automatically_extensible": False
+ }
+ },
+ "factory_name": "entity_match",
+ "offsets": [-2, -1, 0]
+ },
+ {
+ "args": {
+ "use_stemming": True,
+ "tagging_scheme_code": 2,
+ "entity_filter": {
+ "automatically_extensible": True
+ }
+ },
+ "factory_name": "entity_match",
+ "offsets": [-2, -1, 0],
+ "drop_out": 0.5
+ },
+ {
+ "args": {"tagging_scheme_code": 1},
+ "factory_name": "builtin_entity_match",
+ "offsets": [-2, -1, 0]
+ },
+ {
+ "args": {
+ "cluster_name": "brown_clusters",
+ "use_stemming": False
+ },
+ "factory_name": "word_cluster",
+ "offsets": [-2, -1, 0, 1]
+ }
+ ],
+ "crf_args": {
+ "c1": 0.1,
+ "c2": 0.1,
+ "algorithm": "lbfgs"
+ },
+ "tagging_scheme": 1,
+ "data_augmentation_config": {
+ "min_utterances": 200,
+ "capitalization_ratio": 0.2,
+ "add_builtin_entities_examples": True
+ }
+ },
+ "intent_classifier_config": {
+ "unit_name": "log_reg_intent_classifier",
+ "data_augmentation_config": {
+ "min_utterances": 20,
+ "noise_factor": 5,
+ "add_builtin_entities_examples": False,
+ "max_unknown_words": None,
+ "unknown_word_prob": 0.0,
+ "unknown_words_replacement_string": None
+ },
+ "featurizer_config": {
+ "unit_name": "featurizer",
+ "pvalue_threshold": 0.4,
+ "added_cooccurrence_feature_ratio": 0.0,
+ "tfidf_vectorizer_config": {
+ "unit_name": "tfidf_vectorizer",
+ "use_stemming": False,
+ "word_clusters_name": None
+ },
+ "cooccurrence_vectorizer_config": {
+ "unit_name": "cooccurrence_vectorizer",
+ "window_size": None,
+ "filter_stop_words": True,
+ "unknown_words_replacement_string": None,
+ "keep_order": True
+ }
+ },
+ "noise_reweight_factor": 1,
+ }
+ }
+ ]
+}