diff options
author | Malik Talha <talhamalik727x@gmail.com> | 2023-10-22 21:06:23 +0500 |
---|---|---|
committer | Jan-Simon Moeller <jsmoeller@linuxfoundation.org> | 2023-10-23 14:38:13 +0000 |
commit | 697a1adce1e463079e640b55d6386cf82d7bd6bc (patch) | |
tree | 86e299cc7fe12b10c2e549f640924b61c7d07a95 /snips_inference_agl/default_configs/config_en.py | |
parent | 97029ab8141e654a170a2282106f854037da294f (diff) |
Add Snips Inference Module
Add slightly modified version of the original Snips NLU
library. This module adds support for Python upto version
3.10.
Bug-AGL: SPEC-4856
Signed-off-by: Malik Talha <talhamalik727x@gmail.com>
Change-Id: I6d7e9eb181e6ff4aed9b6291027877ccb9f0d846
Diffstat (limited to 'snips_inference_agl/default_configs/config_en.py')
-rw-r--r-- | snips_inference_agl/default_configs/config_en.py | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/snips_inference_agl/default_configs/config_en.py b/snips_inference_agl/default_configs/config_en.py new file mode 100644 index 0000000..12f7ae1 --- /dev/null +++ b/snips_inference_agl/default_configs/config_en.py @@ -0,0 +1,145 @@ +from __future__ import unicode_literals + +CONFIG = { + "unit_name": "nlu_engine", + "intent_parsers_configs": [ + { + "unit_name": "lookup_intent_parser", + "ignore_stop_words": True + }, + { + "unit_name": "probabilistic_intent_parser", + "slot_filler_config": { + "unit_name": "crf_slot_filler", + "feature_factory_configs": [ + { + "args": { + "common_words_gazetteer_name": + "top_10000_words_stemmed", + "use_stemming": True, + "n": 1 + }, + "factory_name": "ngram", + "offsets": [-2, -1, 0, 1, 2] + }, + { + "args": { + "common_words_gazetteer_name": + "top_10000_words_stemmed", + "use_stemming": True, + "n": 2 + }, + "factory_name": "ngram", + "offsets": [-2, 1] + }, + { + "args": {}, + "factory_name": "is_digit", + "offsets": [-1, 0, 1] + }, + { + "args": {}, + "factory_name": "is_first", + "offsets": [-2, -1, 0] + }, + { + "args": {}, + "factory_name": "is_last", + "offsets": [0, 1, 2] + }, + { + "args": {"n": 1}, + "factory_name": "shape_ngram", + "offsets": [0] + }, + { + "args": {"n": 2}, + "factory_name": "shape_ngram", + "offsets": [-1, 0] + }, + { + "args": {"n": 3}, + "factory_name": "shape_ngram", + "offsets": [-1] + }, + { + "args": { + "use_stemming": True, + "tagging_scheme_code": 2, + "entity_filter": { + "automatically_extensible": False + } + }, + "factory_name": "entity_match", + "offsets": [-2, -1, 0] + }, + { + "args": { + "use_stemming": True, + "tagging_scheme_code": 2, + "entity_filter": { + "automatically_extensible": True + } + }, + "factory_name": "entity_match", + "offsets": [-2, -1, 0], + "drop_out": 0.5 + }, + { + "args": {"tagging_scheme_code": 1}, + "factory_name": "builtin_entity_match", + "offsets": [-2, -1, 0] + }, + { + "args": { + "cluster_name": "brown_clusters", + "use_stemming": False + }, + "factory_name": "word_cluster", + "offsets": [-2, -1, 0, 1] + } + ], + "crf_args": { + "c1": 0.1, + "c2": 0.1, + "algorithm": "lbfgs" + }, + "tagging_scheme": 1, + "data_augmentation_config": { + "min_utterances": 200, + "capitalization_ratio": 0.2, + "add_builtin_entities_examples": True + } + }, + "intent_classifier_config": { + "unit_name": "log_reg_intent_classifier", + "data_augmentation_config": { + "min_utterances": 20, + "noise_factor": 5, + "add_builtin_entities_examples": False, + "max_unknown_words": None, + "unknown_word_prob": 0.0, + "unknown_words_replacement_string": None + }, + "featurizer_config": { + "unit_name": "featurizer", + "pvalue_threshold": 0.4, + "added_cooccurrence_feature_ratio": 0.0, + "tfidf_vectorizer_config": { + "unit_name": "tfidf_vectorizer", + "use_stemming": False, + "word_clusters_name": None + }, + "cooccurrence_vectorizer_config": { + "unit_name": "cooccurrence_vectorizer", + "window_size": None, + "filter_stop_words": True, + "unknown_words_replacement_string": None, + "keep_order": True + } + }, + "noise_reweight_factor": 1, + } + } + ] +} |