from __future__ import unicode_literals

CONFIG = {
    "unit_name": "nlu_engine",
    "intent_parsers_configs": [
        {
            "unit_name": "lookup_intent_parser",
            "ignore_stop_words": True
        },
        {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "crf_slot_filler",
                "feature_factory_configs": [
                    {
                        "args": {
                            "common_words_gazetteer_name":
                                "top_10000_words_stemmed",
                            "use_stemming": True,
                            "n": 1
                        },
                        "factory_name": "ngram",
                        "offsets": [-2, -1, 0, 1, 2]
                    },
                    {
                        "args": {
                            "common_words_gazetteer_name":
                                "top_10000_words_stemmed",
                            "use_stemming": True,
                            "n": 2
                        },
                        "factory_name": "ngram",
                        "offsets": [-2, 1]
                    },
                    {
                        "args": {},
                        "factory_name": "is_digit",
                        "offsets": [-1, 0, 1]
                    },
                    {
                        "args": {},
                        "factory_name": "is_first",
                        "offsets": [-2, -1, 0]
                    },
                    {
                        "args": {},
                        "factory_name": "is_last",
                        "offsets": [0, 1, 2]
                    },
                    {
                        "args": {"n": 1},
                        "factory_name": "shape_ngram",
                        "offsets": [0]
                    },
                    {
                        "args": {"n": 2},
                        "factory_name": "shape_ngram",
                        "offsets": [-1, 0]
                    },
                    {
                        "args": {"n": 3},
                        "factory_name": "shape_ngram",
                        "offsets": [-1]
                    },
                    {
                        "args": {
                            "use_stemming": True,
                            "tagging_scheme_code": 2,
                            "entity_filter": {
                                "automatically_extensible": False
                            }
                        },
                        "factory_name": "entity_match",
                        "offsets": [-2, -1, 0]
                    },
                    {
                        "args": {
                            "use_stemming": True,
                            "tagging_scheme_code": 2,
                            "entity_filter": {
                                "automatically_extensible": True
                            }
                        },
                        "factory_name": "entity_match",
                        "offsets": [-2, -1, 0],
                        "drop_out": 0.5
                    },
                    {
                        "args": {"tagging_scheme_code": 1},
                        "factory_name": "builtin_entity_match",
                        "offsets": [-2, -1, 0]
                    }
                ],
                "crf_args": {
                    "c1": 0.1,
                    "c2": 0.1,
                    "algorithm": "lbfgs"
                },
                "tagging_scheme": 1,
                "data_augmentation_config": {
                    "min_utterances": 200,
                    "capitalization_ratio": 0.2,
                    "add_builtin_entities_examples": True
                },

            },
            "intent_classifier_config": {
                "unit_name": "log_reg_intent_classifier",
                "data_augmentation_config": {
                    "min_utterances": 20,
                    "noise_factor": 5,
                    "add_builtin_entities_examples": False,
                    "max_unknown_words": None,
                    "unknown_word_prob": 0.0,
                    "unknown_words_replacement_string": None
                },
                "featurizer_config": {
                    "unit_name": "featurizer",
                    "pvalue_threshold": 0.4,
                    "added_cooccurrence_feature_ratio": 0.0,
                    "tfidf_vectorizer_config": {
                        "unit_name": "tfidf_vectorizer",
                        "use_stemming": True,
                        "word_clusters_name": None
                    },
                    "cooccurrence_vectorizer_config": {
                        "unit_name": "cooccurrence_vectorizer",
                        "window_size": None,
                        "filter_stop_words": True,
                        "unknown_words_replacement_string": None,
                        "keep_order": True
                    }
                },
                "noise_reweight_factor": 1,
            }
        }
    ]
}