10 files changed, 1335 insertions, 0 deletions
diff --git a/snips_inference_agl/default_configs/__init__.py b/snips_inference_agl/default_configs/__init__.py
new file mode 100644
index 0000000..fc66d33
--- /dev/null
+++ b/snips_inference_agl/default_configs/__init__.py
@@ -0,0 +1,26 @@
+from __future__ import unicode_literals
+
+from snips_inference_agl.constants import (
+    LANGUAGE_DE, LANGUAGE_EN, LANGUAGE_ES, LANGUAGE_FR, LANGUAGE_IT,
+    LANGUAGE_JA, LANGUAGE_KO, LANGUAGE_PT_BR, LANGUAGE_PT_PT)
+from .config_de import CONFIG as CONFIG_DE
+from .config_en import CONFIG as CONFIG_EN
+from .config_es import CONFIG as CONFIG_ES
+from .config_fr import CONFIG as CONFIG_FR
+from .config_it import CONFIG as CONFIG_IT
+from .config_ja import CONFIG as CONFIG_JA
+from .config_ko import CONFIG as CONFIG_KO
+from .config_pt_br import CONFIG as CONFIG_PT_BR
+from .config_pt_pt import CONFIG as CONFIG_PT_PT
+
+DEFAULT_CONFIGS = {
+    LANGUAGE_DE: CONFIG_DE,
+    LANGUAGE_EN: CONFIG_EN,
+    LANGUAGE_ES: CONFIG_ES,
+    LANGUAGE_FR: CONFIG_FR,
+    LANGUAGE_IT: CONFIG_IT,
+    LANGUAGE_JA: CONFIG_JA,
+    LANGUAGE_KO: CONFIG_KO,
+    LANGUAGE_PT_BR: CONFIG_PT_BR,
+    LANGUAGE_PT_PT: CONFIG_PT_PT,
+}
diff --git a/snips_inference_agl/default_configs/config_de.py b/snips_inference_agl/default_configs/config_de.py
new file mode 100644
index 0000000..200fc30
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_de.py
@@ -0,0 +1,159 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": True
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_200000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_200000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {
+                            "prefix_size": 2
+                        },
+                        "factory_name": "prefix",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"prefix_size": 5},
+                        "factory_name": "prefix",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"suffix_size": 2},
+                        "factory_name": "suffix",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"suffix_size": 5},
+                        "factory_name": "suffix",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                }
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": True,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+                "noise_reweight_factor": 1,
+            }
+        }
+    ]
+}
diff --git a/snips_inference_agl/default_configs/config_en.py b/snips_inference_agl/default_configs/config_en.py
new file mode 100644
index 0000000..12f7ae1
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_en.py
@@ -0,0 +1,145 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": True
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "cluster_name": "brown_clusters",
+                            "use_stemming": False
+                        },
+                        "factory_name": "word_cluster",
+                        "offsets": [-2, -1, 0, 1]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                }
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": False,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+                "noise_reweight_factor": 1,
+            }
+        }
+    ]
+}
diff --git a/snips_inference_agl/default_configs/config_es.py b/snips_inference_agl/default_configs/config_es.py
new file mode 100644
index 0000000..28969ce
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_es.py
@@ -0,0 +1,138 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": True
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                },
+
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": True,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+                "noise_reweight_factor": 1,
+            }
+        }
+    ]
+}
diff --git a/snips_inference_agl/default_configs/config_fr.py b/snips_inference_agl/default_configs/config_fr.py
new file mode 100644
index 0000000..a2da590
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_fr.py
@@ -0,0 +1,137 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": True
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                }
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": True,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+                "noise_reweight_factor": 1,
+            }
+        }
+    ]
+}
diff --git a/snips_inference_agl/default_configs/config_it.py b/snips_inference_agl/default_configs/config_it.py
new file mode 100644
index 0000000..a2da590
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_it.py
@@ -0,0 +1,137 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": True
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_10000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                }
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": True,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+                "noise_reweight_factor": 1,
+            }
+        }
+    ]
+}
diff --git a/snips_inference_agl/default_configs/config_ja.py b/snips_inference_agl/default_configs/config_ja.py
new file mode 100644
index 0000000..b28791f
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_ja.py
@@ -0,0 +1,164 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": False
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name": None,
+                            "use_stemming": False,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name": None,
+                            "use_stemming": False,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 0, 1, 2]
+                    },
+                    {
+                        "args": {"prefix_size": 1},
+                        "factory_name": "prefix",
+                        "offsets": [0, 1]
+                    },
+                    {
+                        "args": {"prefix_size": 2},
+                        "factory_name": "prefix",
+                        "offsets": [0, 1]
+                    },
+                    {
+                        "args": {"suffix_size": 1},
+                        "factory_name": "suffix",
+                        "offsets": [0, 1]
+                    },
+                    {
+                        "args": {"suffix_size": 2},
+                        "factory_name": "suffix",
+                        "offsets": [0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": False,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-1, 0, 1, 2],
+                    },
+                    {
+                        "args": {
+                            "use_stemming": False,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-1, 0, 1, 2],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "cluster_name": "w2v_clusters",
+                            "use_stemming": False
+                        },
+                        "factory_name": "word_cluster",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                },
+
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.9,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": False,
+                        "word_clusters_name": "w2v_clusters"
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+                "noise_reweight_factor": 1,
+            }
+        }
+    ]
+}
diff --git a/snips_inference_agl/default_configs/config_ko.py b/snips_inference_agl/default_configs/config_ko.py
new file mode 100644
index 0000000..1630796
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_ko.py
@@ -0,0 +1,155 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": False
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name": None,
+                            "use_stemming": False,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name": None,
+                            "use_stemming": False,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {"prefix_size": 1},
+                        "factory_name": "prefix",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"prefix_size": 2},
+                        "factory_name": "prefix",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"suffix_size": 1},
+                        "factory_name": "suffix",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"suffix_size": 2},
+                        "factory_name": "suffix",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": False,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": False,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                }
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": False,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+                "noise_reweight_factor": 1,
+            }
+        }
+    ]
+}
diff --git a/snips_inference_agl/default_configs/config_pt_br.py b/snips_inference_agl/default_configs/config_pt_br.py
new file mode 100644
index 0000000..450f0db
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_pt_br.py
@@ -0,0 +1,137 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": True
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_5000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_5000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                },
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": True,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+            },
+            "noise_reweight_factor": 1,
+        }
+    ]
+}
diff --git a/snips_inference_agl/default_configs/config_pt_pt.py b/snips_inference_agl/default_configs/config_pt_pt.py
new file mode 100644
index 0000000..450f0db
--- /dev/null
+++ b/snips_inference_agl/default_configs/config_pt_pt.py
@@ -0,0 +1,137 @@
+from __future__ import unicode_literals
+
+CONFIG = {
+    "unit_name": "nlu_engine",
+    "intent_parsers_configs": [
+        {
+            "unit_name": "lookup_intent_parser",
+            "ignore_stop_words": True
+        },
+        {
+            "unit_name": "probabilistic_intent_parser",
+            "slot_filler_config": {
+                "unit_name": "crf_slot_filler",
+                "feature_factory_configs": [
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_5000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 1
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, -1, 0, 1, 2]
+                    },
+                    {
+                        "args": {
+                            "common_words_gazetteer_name":
+                                "top_5000_words_stemmed",
+                            "use_stemming": True,
+                            "n": 2
+                        },
+                        "factory_name": "ngram",
+                        "offsets": [-2, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_digit",
+                        "offsets": [-1, 0, 1]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_first",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {},
+                        "factory_name": "is_last",
+                        "offsets": [0, 1, 2]
+                    },
+                    {
+                        "args": {"n": 1},
+                        "factory_name": "shape_ngram",
+                        "offsets": [0]
+                    },
+                    {
+                        "args": {"n": 2},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1, 0]
+                    },
+                    {
+                        "args": {"n": 3},
+                        "factory_name": "shape_ngram",
+                        "offsets": [-1]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": False
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0]
+                    },
+                    {
+                        "args": {
+                            "use_stemming": True,
+                            "tagging_scheme_code": 2,
+                            "entity_filter": {
+                                "automatically_extensible": True
+                            }
+                        },
+                        "factory_name": "entity_match",
+                        "offsets": [-2, -1, 0],
+                        "drop_out": 0.5
+                    },
+                    {
+                        "args": {"tagging_scheme_code": 1},
+                        "factory_name": "builtin_entity_match",
+                        "offsets": [-2, -1, 0]
+                    }
+                ],
+                "crf_args": {
+                    "c1": 0.1,
+                    "c2": 0.1,
+                    "algorithm": "lbfgs"
+                },
+                "tagging_scheme": 1,
+                "data_augmentation_config": {
+                    "min_utterances": 200,
+                    "capitalization_ratio": 0.2,
+                    "add_builtin_entities_examples": True
+                },
+            },
+            "intent_classifier_config": {
+                "unit_name": "log_reg_intent_classifier",
+                "data_augmentation_config": {
+                    "min_utterances": 20,
+                    "noise_factor": 5,
+                    "add_builtin_entities_examples": False,
+                    "max_unknown_words": None,
+                    "unknown_word_prob": 0.0,
+                    "unknown_words_replacement_string": None
+                },
+                "featurizer_config": {
+                    "unit_name": "featurizer",
+                    "pvalue_threshold": 0.4,
+                    "added_cooccurrence_feature_ratio": 0.0,
+                    "tfidf_vectorizer_config": {
+                        "unit_name": "tfidf_vectorizer",
+                        "use_stemming": True,
+                        "word_clusters_name": None
+                    },
+                    "cooccurrence_vectorizer_config": {
+                        "unit_name": "cooccurrence_vectorizer",
+                        "window_size": None,
+                        "filter_stop_words": True,
+                        "unknown_words_replacement_string": None,
+                        "keep_order": True
+                    }
+                },
+            },
+            "noise_reweight_factor": 1,
+        }
+    ]
+}