diff options
Diffstat (limited to 'roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/find_opt_references.cc')
-rw-r--r-- | roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/find_opt_references.cc | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/find_opt_references.cc b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/find_opt_references.cc new file mode 100644 index 000000000..378d3b521 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Library/BrotliCustomDecompressLib/brotli/research/find_opt_references.cc @@ -0,0 +1,267 @@ +/* Copyright 2016 Google Inc. All Rights Reserved. + Author: zip753@gmail.com (Ivan Nikulin) + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* Tool for generating optimal backward references for the input file. Uses + sais-lite library for building suffix array. */ + +#include <algorithm> +#include <cassert> +#include <cstdio> +#include <cstring> +#include <functional> +#include <utility> +#include <vector> + +#include <gflags/gflags.h> +using gflags::ParseCommandLineFlags; + +#include "./esaxx/sais.hxx" + +DEFINE_bool(advanced, false, "Advanced searching mode: finds all longest " + "matches at positions that are not covered by matches of length at least " + "max_length. WARNING: uses much more memory than simple mode, especially " + "for small values of min_length."); +DEFINE_int32(min_length, 1, "Minimal length of found backward references."); +/* For advanced mode. */ +DEFINE_int32(long_length, 32, + "Maximal length of found backward references for advanced mode."); +DEFINE_int32(skip, 1, "Number of bytes to skip."); + +const size_t kFileBufferSize = (1 << 16); // 64KB + +typedef int sarray_type; // Can't make it unsigned because of templates :( +typedef uint8_t input_type; +typedef uint32_t lcp_type; +typedef std::pair<int, std::vector<int> > entry_type; +typedef std::function<void(sarray_type*, lcp_type*, size_t, int, int, int, int, + int)> Fn; + +void ReadInput(FILE* fin, input_type* storage, size_t input_size) { + size_t last_pos = 0; + size_t available_in; + fseek(fin, 0, SEEK_SET); + do { + available_in = fread(storage + last_pos, 1, kFileBufferSize, fin); + last_pos += available_in; + } while (available_in != 0); + assert(last_pos == input_size); +} + +void BuildLCP(input_type* storage, sarray_type* sarray, lcp_type* lcp, + size_t size, uint32_t* pos) { + for (int i = 0; i < size; ++i) { + pos[sarray[i]] = i; + } + uint32_t k = 0; + lcp[size - 1] = 0; + for (int i = 0; i < size; ++i) { + if (pos[i] == size - 1) { + k = 0; + continue; + } + uint32_t j = sarray[pos[i] + 1]; // Suffix which follow i-th suffix in SA. + while (i + k < size && j + k < size && storage[i + k] == storage[j + k]) { + ++k; + } + lcp[pos[i]] = k; + if (k > 0) --k; + } +} + +inline void PrintReference(sarray_type* sarray, lcp_type* lcp, size_t size, + int idx, int left_ix, int right_ix, int left_lcp, + int right_lcp, FILE* fout) { + int max_lcp_ix; + if (right_ix == size - 1 || (left_ix >= 0 && left_lcp >= right_lcp)) { + max_lcp_ix = left_ix; + } else { + max_lcp_ix = right_ix; + } + int dist = idx - sarray[max_lcp_ix]; + assert(dist > 0); + fputc(1, fout); + fwrite(&idx, sizeof(int), 1, fout); // Position in input. + fwrite(&dist, sizeof(int), 1, fout); // Backward distance. +} + +inline void GoLeft(sarray_type* sarray, lcp_type* lcp, int idx, int left_ix, + int left_lcp, entry_type* entry) { + entry->first = left_lcp; + if (left_lcp > FLAGS_long_length) return; + for (; left_ix >= 0; --left_ix) { + if (lcp[left_ix] < left_lcp) break; + if (sarray[left_ix] < idx) { + entry->second.push_back(idx - sarray[left_ix]); + } + } +} + +inline void GoRight(sarray_type* sarray, lcp_type* lcp, int idx, size_t size, + int right_ix, int right_lcp, entry_type* entry) { + entry->first = right_lcp; + if (right_lcp > FLAGS_long_length) return; + for (; right_ix < size - 1; ++right_ix) { + if (lcp[right_ix] < right_lcp) break; + if (sarray[right_ix] < idx) { + entry->second.push_back(idx - sarray[right_ix]); + } + } +} + +inline void StoreReference(sarray_type* sarray, lcp_type* lcp, size_t size, + int idx, int left_ix, int right_ix, int left_lcp, + int right_lcp, entry_type* entries) { + if (right_ix == size - 1 || (left_ix >= 0 && left_lcp > right_lcp)) { + // right is invalid or left is better + GoLeft(sarray, lcp, idx, left_ix, left_lcp, &entries[idx]); + } else if (left_ix < 0 || (right_ix < size - 1 && right_lcp > left_lcp)) { + // left is invalid or right is better + GoRight(sarray, lcp, idx, size, right_ix, right_lcp, &entries[idx]); + } else { // both are valid and of equal length + GoLeft(sarray, lcp, idx, left_ix, left_lcp, &entries[idx]); + GoRight(sarray, lcp, idx, size, right_ix, right_lcp, &entries[idx]); + } +} + +void ProcessReferences(sarray_type* sarray, lcp_type* lcp, size_t size, + uint32_t* pos, const Fn& process_output) { + int min_length = FLAGS_min_length; + for (int idx = FLAGS_skip; idx < size; ++idx) { + int left_lcp = -1; + int left_ix; + for (left_ix = pos[idx] - 1; left_ix >= 0; --left_ix) { + if (left_lcp == -1 || left_lcp > lcp[left_ix]) { + left_lcp = lcp[left_ix]; + } + if (left_lcp == 0) break; + if (sarray[left_ix] < idx) break; + } + + int right_lcp = -1; + int right_ix; + for (right_ix = pos[idx]; right_ix < size - 1; ++right_ix) { + if (right_lcp == -1 || right_lcp > lcp[right_ix]) { + right_lcp = lcp[right_ix]; + } + // Stop if we have better result from the left side already. + if (right_lcp < left_lcp && left_ix >= 0) break; + if (right_lcp == 0) break; + if (sarray[right_ix] < idx) break; + } + + if ((left_ix >= 0 && left_lcp >= min_length) || + (right_ix < size - 1 && right_lcp >= min_length)) { + process_output(sarray, lcp, size, idx, left_ix, right_ix, left_lcp, + right_lcp); + } + } +} + +void ProcessEntries(entry_type* entries, size_t size, FILE* fout) { + int long_length = FLAGS_long_length; + std::vector<std::pair<int, int> > segments; + size_t idx; + for (idx = 0; idx < size;) { + entry_type& entry = entries[idx]; + if (entry.first > long_length) { + // Add segment. + if (segments.empty() || segments.back().second < idx) { + segments.push_back({idx, idx + entry.first}); + } else { + segments.back().second = idx + entry.first; + } + } + ++idx; + } + printf("Segments generated.\n"); + size_t segments_ix = 0; + for (idx = 0; idx < size;) { + if (idx == segments[segments_ix].first) { + // Skip segment. + idx = segments[segments_ix].second; + } else { + for (auto& dist : entries[idx].second) { + fputc(1, fout); + fwrite(&idx, sizeof(int), 1, fout); // Position in input. + fwrite(&dist, sizeof(int), 1, fout); // Backward distance. + } + ++idx; + } + } +} + +int main(int argc, char* argv[]) { + ParseCommandLineFlags(&argc, &argv, true); + if (argc != 3) { + printf("usage: %s input_file output_file\n", argv[0]); + return 1; + } + + FILE* fin = fopen(argv[1], "rb"); + FILE* fout = fopen(argv[2], "w"); + + fseek(fin, 0, SEEK_END); + int input_size = ftell(fin); + fseek(fin, 0, SEEK_SET); + printf("The file size is %u bytes\n", input_size); + + input_type* storage = new input_type[input_size]; + + ReadInput(fin, storage, input_size); + fclose(fin); + + sarray_type* sarray = new sarray_type[input_size]; + saisxx(storage, sarray, input_size); + printf("Suffix array calculated.\n"); + + // Inverse suffix array. + uint32_t* pos = new uint32_t[input_size]; + + lcp_type* lcp = new lcp_type[input_size]; + BuildLCP(storage, sarray, lcp, input_size, pos); + printf("LCP array constructed.\n"); + delete[] storage; + + using std::placeholders::_1; + using std::placeholders::_2; + using std::placeholders::_3; + using std::placeholders::_4; + using std::placeholders::_5; + using std::placeholders::_6; + using std::placeholders::_7; + using std::placeholders::_8; + entry_type* entries; + if (FLAGS_advanced) { + entries = new entry_type[input_size]; + for (size_t i = 0; i < input_size; ++i) entries[i].first = -1; + } + Fn print = std::bind(PrintReference, _1, _2, _3, _4, _5, _6, _7, _8, fout); + Fn store = std::bind(StoreReference, _1, _2, _3, _4, _5, _6, _7, _8, entries); + + ProcessReferences(sarray, lcp, input_size, pos, + FLAGS_advanced ? store : print); + printf("References processed.\n"); + + if (FLAGS_advanced) { + int good_cnt = 0; + uint64_t avg_cnt = 0; + for (size_t i = 0; i < input_size; ++i) { + if (entries[i].first != -1) { + ++good_cnt; + avg_cnt += entries[i].second.size(); + } + } + printf("Number of covered positions = %d\n", good_cnt); + printf("Average number of references per covered position = %.4lf\n", + static_cast<double>(avg_cnt) / good_cnt); + ProcessEntries(entries, input_size, fout); + printf("Entries processed.\n"); + } + + fclose(fout); + return 0; +} |