From 697a1adce1e463079e640b55d6386cf82d7bd6bc Mon Sep 17 00:00:00 2001 From: Malik Talha Date: Sun, 22 Oct 2023 21:06:23 +0500 Subject: Add Snips Inference Module Add slightly modified version of the original Snips NLU library. This module adds support for Python upto version 3.10. Bug-AGL: SPEC-4856 Signed-off-by: Malik Talha Change-Id: I6d7e9eb181e6ff4aed9b6291027877ccb9f0d846 --- snips_inference_agl/default_configs/config_ko.py | 155 +++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 snips_inference_agl/default_configs/config_ko.py (limited to 'snips_inference_agl/default_configs/config_ko.py') diff --git a/snips_inference_agl/default_configs/config_ko.py b/snips_inference_agl/default_configs/config_ko.py new file mode 100644 index 0000000..1630796 --- /dev/null +++ b/snips_inference_agl/default_configs/config_ko.py @@ -0,0 +1,155 @@ +from __future__ import unicode_literals + +CONFIG = { + "unit_name": "nlu_engine", + "intent_parsers_configs": [ + { + "unit_name": "lookup_intent_parser", + "ignore_stop_words": False + }, + { + "unit_name": "probabilistic_intent_parser", + "slot_filler_config": { + "unit_name": "crf_slot_filler", + "feature_factory_configs": [ + { + "args": { + "common_words_gazetteer_name": None, + "use_stemming": False, + "n": 1 + }, + "factory_name": "ngram", + "offsets": [-2, -1, 0, 1, 2] + }, + { + "args": { + "common_words_gazetteer_name": None, + "use_stemming": False, + "n": 2 + }, + "factory_name": "ngram", + "offsets": [-2, 1] + }, + { + "args": {"prefix_size": 1}, + "factory_name": "prefix", + "offsets": [0] + }, + { + "args": {"prefix_size": 2}, + "factory_name": "prefix", + "offsets": [0] + }, + { + "args": {"suffix_size": 1}, + "factory_name": "suffix", + "offsets": [0] + }, + { + "args": {"suffix_size": 2}, + "factory_name": "suffix", + "offsets": [0] + }, + { + "args": {}, + "factory_name": "is_digit", + "offsets": [-1, 0, 1] + }, + { + "args": {}, + "factory_name": "is_first", + "offsets": [-2, -1, 0] + }, + { + "args": {}, + "factory_name": "is_last", + "offsets": [0, 1, 2] + }, + { + "args": {"n": 1}, + "factory_name": "shape_ngram", + "offsets": [0] + }, + { + "args": {"n": 2}, + "factory_name": "shape_ngram", + "offsets": [-1, 0] + }, + { + "args": {"n": 3}, + "factory_name": "shape_ngram", + "offsets": [-1] + }, + { + "args": { + "use_stemming": False, + "tagging_scheme_code": 2, + "entity_filter": { + "automatically_extensible": False + } + }, + "factory_name": "entity_match", + "offsets": [-2, -1, 0] + }, + { + "args": { + "use_stemming": False, + "tagging_scheme_code": 2, + "entity_filter": { + "automatically_extensible": True + } + }, + "factory_name": "entity_match", + "offsets": [-2, -1, 0], + "drop_out": 0.5 + }, + { + "args": {"tagging_scheme_code": 1}, + "factory_name": "builtin_entity_match", + "offsets": [-2, -1, 0] + } + ], + "crf_args": { + "c1": 0.1, + "c2": 0.1, + "algorithm": "lbfgs" + }, + "tagging_scheme": 1, + "data_augmentation_config": { + "min_utterances": 200, + "capitalization_ratio": 0.2, + "add_builtin_entities_examples": True + } + }, + "intent_classifier_config": { + "unit_name": "log_reg_intent_classifier", + "data_augmentation_config": { + "min_utterances": 20, + "noise_factor": 5, + "add_builtin_entities_examples": False, + "max_unknown_words": None, + "unknown_word_prob": 0.0, + "unknown_words_replacement_string": None + }, + "featurizer_config": { + "unit_name": "featurizer", + "pvalue_threshold": 0.4, + "added_cooccurrence_feature_ratio": 0.0, + "tfidf_vectorizer_config": { + "unit_name": "tfidf_vectorizer", + "use_stemming": False, + "word_clusters_name": None + }, + "cooccurrence_vectorizer_config": { + "unit_name": "cooccurrence_vectorizer", + "window_size": None, + "filter_stop_words": True, + "unknown_words_replacement_string": None, + "keep_order": True + } + }, + "noise_reweight_factor": 1, + } + } + ] +} -- cgit 1.2.3-korg