Add Snips Inference Module

Add slightly modified version of the original Snips NLU library. This module adds support for Python upto version 3.10. Bug-AGL: SPEC-4856 Signed-off-by: Malik Talha <talhamalik727x@gmail.com> Change-Id: I6d7e9eb181e6ff4aed9b6291027877ccb9f0d846
author: Malik Talha <talhamalik727x@gmail.com> 2023-10-22 21:06:23 +0500
committer: Jan-Simon Moeller <jsmoeller@linuxfoundation.org> 2023-10-23 14:38:13 +0000
commit: 697a1adce1e463079e640b55d6386cf82d7bd6bc (patch)
tree: 86e299cc7fe12b10c2e549f640924b61c7d07a95 /snips_inference_agl/default_configs/config_fr.py
parent: 97029ab8141e654a170a2282106f854037da294f (diff)
1 files changed, 137 insertions, 0 deletions
diff --git a/snips_inference_agl/default_configs/config_fr.py b/snips_inference_agl/default_configs/config_fr.py
new file mode 100644
index 0000000..a2da590
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_fr.py
@@ -0,0 +1,137 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": True
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                }
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": True,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+                "noise_reweight_factor": 1,
+            }
+        }
+    ]
+}
author	Malik Talha <talhamalik727x@gmail.com>	2023-10-22 21:06:23 +0500
committer	Jan-Simon Moeller <jsmoeller@linuxfoundation.org>	2023-10-23 14:38:13 +0000
commit	697a1adce1e463079e640b55d6386cf82d7bd6bc (patch)
tree	86e299cc7fe12b10c2e549f640924b61c7d07a95 /snips_inference_agl/default_configs/config_fr.py
parent	97029ab8141e654a170a2282106f854037da294f (diff)