diff options
Diffstat (limited to 'roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx')
8 files changed, 1414 insertions, 0 deletions
diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/COPYING b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/COPYING new file mode 100644 index 000000000..07394df7c --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/COPYING @@ -0,0 +1,24 @@ +This is the esaxx copyright. + +Copyright (c) 2010 Daisuke Okanohara All Rights Reserved. + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/README b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/README new file mode 100644 index 000000000..c3afa09de --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/README @@ -0,0 +1,34 @@ +ESAXX +---------------------- + +This library provides the implementation of enhanced suffix array. +For an input text of length N, this library builds an enhanced suffix array in O(N) time +using 20N bytes. + +For a suffix array construction, I use sais.hxx, the induced sorting algorithm +implemented by Yuta Mori. + +It also provides the program to enumerate the statistics of all substrings in the text. + +> enum_substring + Enumerate all substring +> enum_substring -w + Input are words separated by space. + +Example: +------------------ +$ cat abra +abracadabra +$ enum_substring < abra + n:11 +alpha:256 + node:5 +0 2 4 abra +1 5 1 a +2 2 3 bra +3 2 2 ra +4 11 0 + +$ enum_substring -w < wiki.txt > + +Daisuke Okanohara <daisuke dot okanohara at gmail.com> diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/cmdline.h b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/cmdline.h new file mode 100644 index 000000000..2fc0260cb --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/cmdline.h @@ -0,0 +1,704 @@ +/* +Copyright (c) 2009, Hideyuki Tanaka +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the <organization> nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY <copyright holder> ''AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include <iostream> +#include <sstream> +#include <vector> +#include <map> +#include <string> +#include <stdexcept> +#include <typeinfo> +#include <cstring> +#include <algorithm> +#include <cxxabi.h> + +namespace cmdline{ + +namespace detail{ + +template <typename Target, typename Source, bool Same> +class lexical_cast_t{ +public: + static Target cast(const Source &arg){ + Target ret; + std::stringstream ss; + if (!(ss<<arg && ss>>ret && ss.eof())) + throw std::bad_cast(); + + return ret; + } +}; + +template <typename Target, typename Source> +class lexical_cast_t<Target, Source, true>{ +public: + static Target cast(const Source &arg){ + return arg; + } +}; + +template <typename Source> +class lexical_cast_t<std::string, Source, false>{ +public: + static std::string cast(const Source &arg){ + std::ostringstream ss; + ss<<arg; + return ss.str(); + } +}; + +template <typename Target> +class lexical_cast_t<Target, std::string, false>{ +public: + static Target cast(const std::string &arg){ + Target ret; + std::istringstream ss(arg); + if (!(ss>>ret && ss.eof())) + throw std::bad_cast(); + return ret; + } +}; + +template <typename T1, typename T2> +struct is_same { + static const bool value = false; +}; + +template <typename T> +struct is_same<T, T>{ + static const bool value = true; +}; + +template<typename Target, typename Source> +Target lexical_cast(const Source &arg) +{ + return lexical_cast_t<Target, Source, detail::is_same<Target, Source>::value>::cast(arg); +} + +static inline std::string demangle(const std::string &name) +{ + int status=0; + char *p=abi::__cxa_demangle(name.c_str(), 0, 0, &status); + std::string ret(p); + free(p); + return ret; +} + +template <class T> +std::string readable_typename() +{ + return demangle(typeid(T).name()); +} + +template <> +std::string readable_typename<std::string>() +{ + return "string"; +} + +} // detail + +//----- + +class cmdline_error : public std::exception { +public: + cmdline_error(const std::string &msg): msg(msg){} + ~cmdline_error() throw() {} + const char *what() const throw() { return msg.c_str(); } +private: + std::string msg; +}; + +template <class T> +struct default_reader{ + T operator()(const std::string &str){ + return detail::lexical_cast<T>(str); + } +}; + +template <class T> +struct range_reader{ + range_reader(const T &low, const T &high): low(low), high(high) {} + T operator()(const std::string &s) const { + T ret=default_reader<T>()(s); + if (!(ret>=low && ret<=high)) throw cmdline::cmdline_error("range_error"); + return ret; + } +private: + T low, high; +}; + +template <class T> +range_reader<T> range(const T &low, const T &high) +{ + return range_reader<T>(low, high); +} + +template <class T> +struct oneof_reader{ + T operator()(const std::string &s){ + T ret=default_reader<T>()(s); + if (std::find(alt.begin(), alt.end(), s)==alt.end()) + throw cmdline_error(""); + return ret; + } + void add(const T &v){ alt.push_back(v); } +private: + std::vector<T> alt; +}; + +template <class T> +oneof_reader<T> oneof(T a1) +{ + oneof_reader<T> ret; + ret.add(a1); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2, T a3) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2, T a3, T a4) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + ret.add(a7); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + ret.add(a7); + ret.add(a8); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8, T a9) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + ret.add(a7); + ret.add(a8); + ret.add(a9); + return ret; +} + +template <class T> +oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8, T a9, T a10) +{ + oneof_reader<T> ret; + ret.add(a1); + ret.add(a2); + ret.add(a3); + ret.add(a4); + ret.add(a5); + ret.add(a6); + ret.add(a7); + ret.add(a8); + ret.add(a9); + ret.add(a10); + return ret; +} + +//----- + +class parser{ +public: + parser(){ + } + ~parser(){ + for (std::map<std::string, option_base*>::iterator p=options.begin(); + p!=options.end(); p++) + delete p->second; + } + + void add(const std::string &name, + char short_name=0, + const std::string &desc=""){ + if (options.count(name)) throw cmdline_error("multiple definition: "+name); + options[name]=new option_without_value(name, short_name, desc); + ordered.push_back(options[name]); + } + + template <class T> + void add(const std::string &name, + char short_name=0, + const std::string &desc="", + bool need=true, + const T def=T()){ + add(name, short_name, desc, need, def, default_reader<T>()); + } + + template <class T, class F> + void add(const std::string &name, + char short_name=0, + const std::string &desc="", + bool need=true, + const T def=T(), + F reader=F()){ + if (options.count(name)) throw cmdline_error("multiple definition: "+name); + options[name]=new option_with_value_with_reader<T, F>(name, short_name, need, def, desc, reader); + ordered.push_back(options[name]); + } + + void footer(const std::string &f){ + ftr=f; + } + + void set_program_name(const std::string &name){ + prog_name=name; + } + + bool exist(const std::string &name){ + if (options.count(name)==0) throw cmdline_error("there is no flag: --"+name); + return options[name]->has_set(); + } + + template <class T> + const T &get(const std::string &name) const { + if (options.count(name)==0) throw cmdline_error("there is no flag: --"+name); + const option_with_value<T> *p=dynamic_cast<const option_with_value<T>*>(options.find(name)->second); + if (p==NULL) throw cmdline_error("type mismatch flag '"+name+"'"); + return p->get(); + } + + const std::vector<std::string> &rest() const { + return others; + } + + bool parse(int argc, char *argv[]){ + errors.clear(); + others.clear(); + + if (argc<1){ + errors.push_back("argument number must be longer than 0"); + return false; + } + if (prog_name=="") + prog_name=argv[0]; + + std::map<char, std::string> lookup; + for (std::map<std::string, option_base*>::iterator p=options.begin(); + p!=options.end(); p++){ + if (p->first.length()==0) continue; + char initial=p->second->short_name(); + if (initial){ + if (lookup.count(initial)>0){ + lookup[initial]=""; + errors.push_back(std::string("short option '")+initial+"' is ambiguous"); + return false; + } + else lookup[initial]=p->first; + } + } + + for (int i=1; i<argc; i++){ + if (strncmp(argv[i], "--", 2)==0){ + char *p=strchr(argv[i]+2, '='); + if (p){ + std::string name(argv[i]+2, p); + std::string val(p+1); + set_option(name, val); + } + else{ + std::string name(argv[i]+2); + set_option(name); + } + } + else if (strncmp(argv[i], "-", 1)==0){ + if (!argv[i][1]) continue; + char last=argv[i][1]; + for (int j=2; argv[i][j]; j++){ + last=argv[i][j]; + if (lookup.count(argv[i][j-1])==0){ + errors.push_back(std::string("undefined short option: -")+argv[i][j-1]); + continue; + } + if (lookup[argv[i][j-1]]==""){ + errors.push_back(std::string("ambiguous short option: -")+argv[i][j-1]); + continue; + } + set_option(lookup[argv[i][j-1]]); + } + + if (lookup.count(last)==0){ + errors.push_back(std::string("undefined short option: -")+last); + continue; + } + if (lookup[last]==""){ + errors.push_back(std::string("ambiguous short option: -")+last); + continue; + } + + if (i+1<argc && options[lookup[last]]->has_value()){ + set_option(lookup[last], argv[i+1]); + i++; + } + else{ + set_option(lookup[last]); + } + } + else{ + others.push_back(argv[i]); + } + } + + for (std::map<std::string, option_base*>::iterator p=options.begin(); + p!=options.end(); p++) + if (!p->second->valid()) + errors.push_back("need option: --"+std::string(p->first)); + + return errors.size()==0; + } + + std::string error() const{ + return errors.size()>0?errors[0]:""; + } + + std::string error_full() const{ + std::ostringstream oss; + for (size_t i=0; i<errors.size(); i++) + oss<<errors[i]<<std::endl; + return oss.str(); + } + + std::string usage() const { + std::ostringstream oss; + oss<<"usage: "<<prog_name<<" "; + for (size_t i=0; i<ordered.size(); i++){ + if (ordered[i]->must()) + oss<<ordered[i]->short_description()<<" "; + } + + oss<<"[options] ... "<<ftr<<std::endl; + oss<<"options:"<<std::endl; + + size_t max_width=0; + for (size_t i=0; i<ordered.size(); i++){ + max_width=std::max(max_width, ordered[i]->name().length()); + } + for (size_t i=0; i<ordered.size(); i++){ + if (ordered[i]->short_name()){ + oss<<" -"<<ordered[i]->short_name()<<", "; + } + else{ + oss<<" "; + } + + oss<<"--"<<ordered[i]->name(); + for (size_t j=ordered[i]->name().length(); j<max_width+4; j++) + oss<<' '; + oss<<ordered[i]->description()<<std::endl; + } + return oss.str(); + } + +private: + + void set_option(const std::string &name){ + if (options.count(name)==0){ + errors.push_back("undefined option: --"+name); + return; + } + if (!options[name]->set()){ + errors.push_back("option needs value: --"+name); + return; + } + } + + void set_option(const std::string &name, const std::string &value){ + if (options.count(name)==0){ + errors.push_back("undefined option: --"+name); + return; + } + if (!options[name]->set(value)){ + errors.push_back("option value is invalid: --"+name+"="+value); + return; + } + } + + class option_base{ + public: + virtual ~option_base(){} + + virtual bool has_value() const=0; + virtual bool set()=0; + virtual bool set(const std::string &value)=0; + virtual bool has_set() const=0; + virtual bool valid() const=0; + virtual bool must() const=0; + + virtual const std::string &name() const=0; + virtual char short_name() const=0; + virtual const std::string &description() const=0; + virtual std::string short_description() const=0; + }; + + class option_without_value : public option_base { + public: + option_without_value(const std::string &name, + char short_name, + const std::string &desc) + :nam(name), snam(short_name), desc(desc), has(false){ + } + ~option_without_value(){} + + bool has_value() const { return false; } + + bool set(){ + has=true; + return true; + } + + bool set(const std::string &){ + return false; + } + + bool has_set() const { + return has; + } + + bool valid() const{ + return true; + } + + bool must() const{ + return false; + } + + const std::string &name() const{ + return nam; + } + + char short_name() const{ + return snam; + } + + const std::string &description() const { + return desc; + } + + std::string short_description() const{ + return "--"+nam; + } + + private: + std::string nam; + char snam; + std::string desc; + bool has; + }; + + template <class T> + class option_with_value : public option_base { + public: + option_with_value(const std::string &name, + char short_name, + bool need, + const T &def, + const std::string &desc) + : nam(name), snam(short_name), need(need), has(false) + , def(def), actual(def) { + this->desc=full_description(desc); + } + ~option_with_value(){} + + const T &get() const { + return actual; + } + + bool has_value() const { return true; } + + bool set(){ + return false; + } + + bool set(const std::string &value){ + try{ + actual=read(value); + has=true; + } + catch(const std::exception &e){ + return false; + } + return true; + } + + bool has_set() const{ + return has; + } + + bool valid() const{ + if (need && !has) return false; + return true; + } + + bool must() const{ + return need; + } + + const std::string &name() const{ + return nam; + } + + char short_name() const{ + return snam; + } + + const std::string &description() const { + return desc; + } + + std::string short_description() const{ + return "--"+nam+"="+detail::readable_typename<T>(); + } + + protected: + std::string full_description(const std::string &desc){ + return + desc+" ("+detail::readable_typename<T>()+ + (need?"":" [="+detail::lexical_cast<std::string>(def)+"]") + +")"; + } + + virtual T read(const std::string &s)=0; + + std::string nam; + char snam; + bool need; + std::string desc; + + bool has; + T def; + T actual; + }; + + template <class T, class F> + class option_with_value_with_reader : public option_with_value<T> { + public: + option_with_value_with_reader(const std::string &name, + char short_name, + bool need, + const T def, + const std::string &desc, + F reader) + : option_with_value<T>(name, short_name, need, def, desc), reader(reader){ + } + + private: + T read(const std::string &s){ + return reader(s); + } + + F reader; + }; + + std::map<std::string, option_base*> options; + std::vector<option_base*> ordered; + std::string ftr; + + std::string prog_name; + std::vector<std::string> others; + + std::vector<std::string> errors; +}; + +} // cmdline diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/enumSubstring.cpp b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/enumSubstring.cpp new file mode 100644 index 000000000..e34842fd2 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/enumSubstring.cpp @@ -0,0 +1,140 @@ +#include <iostream> +#include <string> +#include <vector> +#include <map> +#include "cmdline.h" +#include "esa.hxx" + +using namespace std; + +int readFile(const char* fn, vector<int>& T){ + FILE* fp = fopen(fn, "rb"); + if (fp == NULL){ + cerr << "cannot open " << fn << endl; + return -1; + } + + if (fseek(fp, 0, SEEK_END) != 0){ + cerr << "cannot fseek " << fn << endl; + fclose(fp); + return -1; + } + int n = ftell(fp); + rewind(fp); + if (n < 0){ + cerr << "cannot ftell " << fn << endl; + fclose(fp); + return -1; + } + T.resize(n); + if (fread(&T[0], sizeof(unsigned char), (size_t)n, fp) != (size_t) n){ + cerr << "fread error " << fn << endl; + fclose(fp); + return -1; + } + + fclose(fp); + return 0; +} + +int getID(const string& str, map<string, int>& word2id){ + map<string, int>::const_iterator it = word2id.find(str); + if (it == word2id.end()){ + int newID = (int)word2id.size(); + word2id[str] = newID; + return newID; + } else { + return it->second; + } +} + +void printSnipet(const vector<int>& T, const int beg, const int len, const vector<string>& id2word){ + for (int i = 0; i < len; ++i){ + int c = T[beg + i]; + if (id2word.size() > 0){ + cout << id2word[c] << " "; + } else { + cout << (isspace((char)c) ? '_' : (char)c); + } + } +} + +int main(int argc, char* argv[]){ + cmdline::parser p; + p.add("word", 'w', "word type"); + + if (!p.parse(argc, argv)){ + cerr << p.error() << endl + << p.usage() << endl; + return -1; + } + + if (p.rest().size() > 0){ + cerr << p.usage() << endl; + return -1; + } + + vector<int> T; + + bool isWord = p.exist("word"); + map<string, int> word2id; + istreambuf_iterator<char> isit(cin); + istreambuf_iterator<char> end; + + size_t origLen = 0; + if (isWord){ + string word; + while (isit != end){ + char c = *isit++; + if (!isspace(c)){ + word += c; + } else if (word.size() > 0){ + T.push_back(getID(word, word2id)); + word = ""; + } + ++origLen; + } + if (word.size() > 0){ + T.push_back(getID(word, word2id)); + } + } else { + while (isit != end){ + T.push_back((unsigned char)(*isit++)); + } + origLen = T.size(); + } + + vector<string> id2word(word2id.size()); + for (map<string, int>::const_iterator it = word2id.begin(); + it != word2id.end(); ++it){ + id2word[it->second] = it->first; + } + + vector<int> SA(T.size()); + vector<int> L (T.size()); + vector<int> R (T.size()); + vector<int> D (T.size()); + + int k = (isWord) ? (int)id2word.size() : 0x100; + if (isWord){ + cerr << "origN:" << origLen << endl; + } + cerr << " n:" << T.size() << endl; + cerr << "alpha:" << k << endl; + + int nodeNum = 0; + if (esaxx(T.begin(), SA.begin(), + L.begin(), R.begin(), D.begin(), + (int)T.size(), k, nodeNum) == -1){ + return -1; + } + cerr << " node:" << nodeNum << endl; + + for (int i = 0; i < nodeNum; ++i){ + cout << i << "\t" << R[i] - L[i] << "\t" << D[i] << "\t"; + printSnipet(T, SA[L[i]], D[i], id2word); + cout << endl; + } + + return 0; +} diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/esa.hxx b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/esa.hxx new file mode 100644 index 000000000..acb5c7a1b --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/esa.hxx @@ -0,0 +1,125 @@ +/* + * esa.hxx + * Copyright (c) 2010 Daisuke Okanohara All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ESA_HXX +#define _ESA_HXX + +#include <vector> +#include <utility> +#include <cassert> +#include "sais.hxx" + +namespace esaxx_private { +template<typename string_type, typename sarray_type, typename index_type> +index_type suffixtree(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, index_type n){ + if (n == 0){ + return 0; + } + sarray_type Psi = L; + Psi[SA[0]] = SA[n-1]; + for (index_type i = 1; i < n; ++i){ + Psi[SA[i]] = SA[i-1]; + } + + // Compare at most 2n log n charcters. Practically fastest + // "Permuted Longest-Common-Prefix Array", Juha Karkkainen, CPM 09 + sarray_type PLCP = R; + index_type h = 0; + for (index_type i = 0; i < n; ++i){ + index_type j = Psi[i]; + while (i+h < n && j+h < n && + T[i+h] == T[j+h]){ + ++h; + } + PLCP[i] = h; + if (h > 0) --h; + } + + sarray_type H = L; + for (index_type i = 0; i < n; ++i){ + H[i] = PLCP[SA[i]]; + } + H[0] = -1; + + std::vector<std::pair<index_type, index_type> > S; + S.push_back(std::make_pair((index_type)-1, (index_type)-1)); + size_t nodeNum = 0; + for (index_type i = 0; ; ++i){ + std::pair<index_type, index_type> cur (i, (i == n) ? -1 : H[i]); + std::pair<index_type, index_type> cand(S.back()); + while (cand.second > cur.second){ + if (i - cand.first > 1){ + L[nodeNum] = cand.first; + R[nodeNum] = i; + D[nodeNum] = cand.second; + ++nodeNum; + } + cur.first = cand.first; + S.pop_back(); + cand = S.back(); + } + if (cand.second < cur.second){ + S.push_back(cur); + } + if (i == n) break; + S.push_back(std::make_pair(i, n - SA[i] + 1)); + } + return nodeNum; +} +} + +/** + * @brief Build an enhanced suffix array of a given string in linear time + * For an input text T, esaxx() builds an enhancd suffix array in linear time. + * i-th internal node is represented as a triple (L[i], R[i], D[i]); + * L[i] and R[i] is the left/right boundary of the suffix array as SA[L[i]....R[i]-1] + * D[i] is the depth of the internal node + * The number of internal node is at most N-1 and return the actual number by + * @param T[0...n-1] The input string. (random access iterator) + * @param SA[0...n-1] The output suffix array (random access iterator) + * @param L[0...n-1] The output left boundary of internal node (random access iterator) + * @param R[0...n-1] The output right boundary of internal node (random access iterator) + * @param D[0...n-1] The output depth of internal node (random access iterator) + * @param n The length of the input string + * @param k The alphabet size + * @pram nodeNum The output the number of internal node + * @return 0 if succeded, -1 or -2 otherwise + */ + +template<typename string_type, typename sarray_type, typename index_type> +int esaxx(string_type T, sarray_type SA, sarray_type L, sarray_type R, sarray_type D, + index_type n, index_type k, index_type& nodeNum) { + if ((n < 0) || (k <= 0)) return -1; + int err = saisxx(T, SA, n, k); + if (err != 0){ + return err; + } + nodeNum = esaxx_private::suffixtree(T, SA, L, R, D, n); + return 0; +} + + +#endif // _ESA_HXX diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/sais.hxx b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/sais.hxx new file mode 100644 index 000000000..20e69dfec --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/sais.hxx @@ -0,0 +1,364 @@ +/* + * sais.hxx for sais-lite + * Copyright (c) 2008-2009 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _SAIS_HXX +#define _SAIS_HXX 1 +#ifdef __cplusplus + +#ifdef __INTEL_COMPILER +#pragma warning(disable : 383 981 1418) +// for icc 64-bit +//#define __builtin_vsnprintf(a, b, c, d) __builtin_vsnprintf(a, b, c, (char *)d) +#endif + +#include <iterator> +#ifdef _OPENMP +# include <omp.h> +#endif + +namespace saisxx_private { + +/* find the start or end of each bucket */ +template<typename string_type, typename bucket_type, typename index_type> +void +getCounts(const string_type T, bucket_type C, index_type n, index_type k) { +#ifdef _OPENMP + bucket_type D; + index_type i, j, p, sum, first, last; + int thnum, maxthreads = omp_get_max_threads(); +#pragma omp parallel default(shared) private(D, i, thnum, first, last) + { + thnum = omp_get_thread_num(); + D = C + thnum * k; + first = n / maxthreads * thnum; + last = (thnum < (maxthreads - 1)) ? n / maxthreads * (thnum + 1) : n; + for(i = 0; i < k; ++i) { D[i] = 0; } + for(i = first; i < last; ++i) { ++D[T[i]]; } + } + if(1 < maxthreads) { +#pragma omp parallel for default(shared) private(i, j, p, sum) + for(i = 0; i < k; ++i) { + for(j = 1, p = i + k, sum = C[i]; j < maxthreads; ++j, p += k) { + sum += C[p]; + } + C[i] = sum; + } + } +#else + index_type i; + for(i = 0; i < k; ++i) { C[i] = 0; } + for(i = 0; i < n; ++i) { ++C[T[i]]; } +#endif +} +template<typename bucket_type, typename index_type> +void +getBuckets(const bucket_type C, bucket_type B, index_type k, bool end) { + index_type i, sum = 0; + if(end) { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } } + else { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } } +} + +/* compute SA and BWT */ +template<typename string_type, typename sarray_type, + typename bucket_type, typename index_type> +void +induceSA(string_type T, sarray_type SA, bucket_type C, bucket_type B, + index_type n, index_type k) { +typedef typename std::iterator_traits<string_type>::value_type char_type; + sarray_type b; + index_type i, j; + char_type c0, c1; + /* compute SAl */ + if(C == B) { getCounts(T, C, n, k); } + getBuckets(C, B, k, false); /* find starts of buckets */ + b = SA + B[c1 = T[j = n - 1]]; + *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + for(i = 0; i < n; ++i) { + j = SA[i], SA[i] = ~j; + if(0 < j) { + if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; } + *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + } + } + /* compute SAs */ + if(C == B) { getCounts(T, C, n, k); } + getBuckets(C, B, k, true); /* find ends of buckets */ + for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) { + if(0 < (j = SA[i])) { + if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; } + *--b = ((j == 0) || (T[j - 1] > c1)) ? ~j : j; + } else { + SA[i] = ~j; + } + } +} +template<typename string_type, typename sarray_type, + typename bucket_type, typename index_type> +int +computeBWT(string_type T, sarray_type SA, bucket_type C, bucket_type B, + index_type n, index_type k) { +typedef typename std::iterator_traits<string_type>::value_type char_type; + sarray_type b; + index_type i, j, pidx = -1; + char_type c0, c1; + /* compute SAl */ + if(C == B) { getCounts(T, C, n, k); } + getBuckets(C, B, k, false); /* find starts of buckets */ + b = SA + B[c1 = T[j = n - 1]]; + *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + for(i = 0; i < n; ++i) { + if(0 < (j = SA[i])) { + SA[i] = ~(c0 = T[--j]); + if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; } + *b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j; + } else if(j != 0) { + SA[i] = ~j; + } + } + /* compute SAs */ + if(C == B) { getCounts(T, C, n, k); } + getBuckets(C, B, k, true); /* find ends of buckets */ + for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) { + if(0 < (j = SA[i])) { + SA[i] = (c0 = T[--j]); + if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; } + *--b = ((0 < j) && (T[j - 1] > c1)) ? ~((index_type)T[j - 1]) : j; + } else if(j != 0) { + SA[i] = ~j; + } else { + pidx = i; + } + } + return pidx; +} + +/* find the suffix array SA of T[0..n-1] in {0..k}^n + use a working space (excluding s and SA) of at most 2n+O(1) for a constant alphabet */ +template<typename string_type, typename sarray_type, typename index_type> +int +suffixsort(string_type T, sarray_type SA, + index_type fs, index_type n, index_type k, + bool isbwt) { +typedef typename std::iterator_traits<string_type>::value_type char_type; + sarray_type RA; + index_type i, j, m, p, q, plen, qlen, name, pidx = 0; + bool diff; + int c; +#ifdef _OPENMP + int maxthreads = omp_get_max_threads(); +#else +# define maxthreads 1 +#endif + char_type c0, c1; + + /* stage 1: reduce the problem by at least 1/2 + sort all the S-substrings */ + if(fs < (maxthreads * k)) { + index_type *C, *B; + if((C = new index_type[maxthreads * k]) == 0) { return -2; } + B = (1 < maxthreads) ? C + k : C; + getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = 0; i < n; ++i) { SA[i] = 0; } + for(i = n - 2, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) { + if((c0 = T[i]) < (c1 + c)) { c = 1; } + else if(c != 0) { SA[--B[c1]] = i + 1, c = 0; } + } + induceSA(T, SA, C, B, n, k); + delete [] C; + } else { + sarray_type C, B; + C = SA + n; + B = ((1 < maxthreads) || (k <= (fs - k))) ? C + k : C; + getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = 0; i < n; ++i) { SA[i] = 0; } + for(i = n - 2, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) { + if((c0 = T[i]) < (c1 + c)) { c = 1; } + else if(c != 0) { SA[--B[c1]] = i + 1, c = 0; } + } + induceSA(T, SA, C, B, n, k); + } + + /* compact all the sorted substrings into the first m items of SA + 2*m must be not larger than n (proveable) */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i, j, p, c0, c1) + for(i = 0; i < n; ++i) { + p = SA[i]; + if((0 < p) && (T[p - 1] > (c0 = T[p]))) { + for(j = p + 1; (j < n) && (c0 == (c1 = T[j])); ++j) { } + if((j < n) && (c0 < c1)) { SA[i] = ~p; } + } + } + for(i = 0, m = 0; i < n; ++i) { if((p = SA[i]) < 0) { SA[m++] = ~p; } } +#else + for(i = 0, m = 0; i < n; ++i) { + p = SA[i]; + if((0 < p) && (T[p - 1] > (c0 = T[p]))) { + for(j = p + 1; (j < n) && (c0 == (c1 = T[j])); ++j) { } + if((j < n) && (c0 < c1)) { SA[m++] = p; } + } + } +#endif + j = m + (n >> 1); +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = m; i < j; ++i) { SA[i] = 0; } /* init the name array buffer */ + /* store the length of all substrings */ + for(i = n - 2, j = n, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) { + if((c0 = T[i]) < (c1 + c)) { c = 1; } + else if(c != 0) { SA[m + ((i + 1) >> 1)] = j - i - 1; j = i + 1; c = 0; } + } + /* find the lexicographic names of all substrings */ + for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) { + p = SA[i], plen = SA[m + (p >> 1)], diff = true; + if(plen == qlen) { + for(j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { } + if(j == plen) { diff = false; } + } + if(diff != false) { ++name, q = p, qlen = plen; } + SA[m + (p >> 1)] = name; + } + + /* stage 2: solve the reduced problem + recurse if names are not yet unique */ + if(name < m) { + RA = SA + n + fs - m; + for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) { + if(SA[i] != 0) { RA[j--] = SA[i] - 1; } + } + if(suffixsort(RA, SA, fs + n - m * 2, m, name, false) != 0) { return -2; } + for(i = n - 2, j = m - 1, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) { + if((c0 = T[i]) < (c1 + c)) { c = 1; } + else if(c != 0) { RA[j--] = i + 1, c = 0; } /* get p1 */ + } +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; } /* get index in s */ + } + + /* stage 3: induce the result for the original problem */ + if(fs < (maxthreads * k)) { + index_type *B, *C; + if((C = new index_type[maxthreads * k]) == 0) { return -2; } + B = (1 < maxthreads) ? C + k : C; + /* put all left-most S characters into their buckets */ + getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */ + for(i = m - 1; 0 <= i; --i) { + j = SA[i], SA[i] = 0; + SA[--B[T[j]]] = j; + } + if(isbwt == false) { induceSA(T, SA, C, B, n, k); } + else { pidx = computeBWT(T, SA, C, B, n, k); } + delete [] C; + } else { + sarray_type C, B; + C = SA + n; + B = ((1 < maxthreads) || (k <= (fs - k))) ? C + k : C; + /* put all left-most S characters into their buckets */ + getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ +#ifdef _OPENMP +#pragma omp parallel for default(shared) private(i) +#endif + for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */ + for(i = m - 1; 0 <= i; --i) { + j = SA[i], SA[i] = 0; + SA[--B[T[j]]] = j; + } + if(isbwt == false) { induceSA(T, SA, C, B, n, k); } + else { pidx = computeBWT(T, SA, C, B, n, k); } + } + + return pidx; +#ifndef _OPENMP +# undef maxthreads +#endif +} + +} /* namespace saisxx_private */ + + +/** + * @brief Constructs the suffix array of a given string in linear time. + * @param T[0..n-1] The input string. (random access iterator) + * @param SA[0..n-1] The output array of suffixes. (random access iterator) + * @param n The length of the given string. + * @param k The alphabet size. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +template<typename string_type, typename sarray_type, typename index_type> +int +saisxx(string_type T, sarray_type SA, index_type n, index_type k = 256) { + int err; + if((n < 0) || (k <= 0)) { return -1; } + if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; } + try { err = saisxx_private::suffixsort(T, SA, 0, n, k, false); } + catch(...) { err = -2; } + return err; +} + +/** + * @brief Constructs the burrows-wheeler transformed string of a given string in linear time. + * @param T[0..n-1] The input string. (random access iterator) + * @param U[0..n-1] The output string. (random access iterator) + * @param A[0..n-1] The temporary array. (random access iterator) + * @param n The length of the given string. + * @param k The alphabet size. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +template<typename string_type, typename sarray_type, typename index_type> +index_type +saisxx_bwt(string_type T, string_type U, sarray_type A, index_type n, index_type k = 256) { +typedef typename std::iterator_traits<string_type>::value_type char_type; + index_type i, pidx; + if((n < 0) || (k <= 0)) { return -1; } + if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + try { + pidx = saisxx_private::suffixsort(T, A, 0, n, k, true); + if(0 <= pidx) { + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (char_type)A[i]; } + for(i += 1; i < n; ++i) { U[i] = (char_type)A[i]; } + pidx += 1; + } + } catch(...) { pidx = -2; } + return pidx; +} + + +#endif /* __cplusplus */ +#endif /* _SAIS_HXX */ diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/waf b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/waf Binary files differnew file mode 100755 index 000000000..d99146fa4 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/waf diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/wscript b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/wscript new file mode 100644 index 000000000..a58f45826 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/esaxx/wscript @@ -0,0 +1,23 @@ +VERSION= '0.0.4' +APPNAME= 'esaxx' + +srcdir= '.' +blddir= 'bin' + +def set_options(ctx): + ctx.tool_options('compiler_cxx') + +def configure(ctx): + ctx.check_tool('compiler_cxx') + ctx.env.CXXFLAGS += ['-O2', '-Wall', '-g'] + +def build(bld): + task1= bld(features='cxx cprogram', + source = 'enumSubstring.cpp', + name = 'enum_substring', + target = 'enum_substring', + includes = '.') + +def dist_hook(): + import os + os.remove('googlecode_upload.py') |