diff options
Diffstat (limited to 'roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/sample/encode.c')
-rw-r--r-- | roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/sample/encode.c | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/sample/encode.c b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/sample/encode.c new file mode 100644 index 000000000..c5d4771b3 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/sample/encode.c @@ -0,0 +1,212 @@ +/* + * encode.c + */ +#include <stdio.h> +#include "oniguruma.h" + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end) +{ + int r; + unsigned char *start, *range; + OnigRegion *region; + + region = onig_region_new(); + + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stderr, "match at %d (%s)\n", r, + ONIGENC_NAME(onig_get_encoding(reg))); + for (i = 0; i < region->num_regs; i++) { + fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail (%s)\n", + ONIGENC_NAME(onig_get_encoding(reg))); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(stderr, "ERROR: %s\n", s); + fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return 0; +} + +static int +exec(OnigEncoding enc, OnigOptionType options, + char* apattern, char* astr) +{ + int r; + unsigned char *end; + regex_t* reg; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + + onig_initialize(&enc, 1); + + r = onig_new(®, pattern, + pattern + onigenc_str_bytelen_null(enc, pattern), + options, enc, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + end = str + onigenc_str_bytelen_null(enc, str); + r = search(reg, str, end); + + onig_free(reg); + onig_end(); + return 0; +} + +extern int main(int argc, char* argv[]) +{ + int r; + /* ISO 8859-1 test */ + static unsigned char str[] = { 0xc7, 0xd6, 0xfe, 0xea, 0xe0, 0xe2, 0x00 }; + static unsigned char pattern[] = { 0xe7, 0xf6, 0xde, '\\', 'w', '+', 0x00 }; + + r = exec(ONIG_ENCODING_SJIS, ONIG_OPTION_NONE, + "^a\\p{Hiragana}c$", "a\202\274c"); + + r = exec(ONIG_ENCODING_EUC_JP, ONIG_OPTION_NONE, + "^a\\p{Hiragana}c$", "a\244\276c"); + + r = exec(ONIG_ENCODING_CP1251, ONIG_OPTION_IGNORECASE, + "aBc", " AbC"); + + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + " [a-c\337z] ", " SS "); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + " [\330-\341] ", " SS "); + + r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE, + "\337 ", " Ss "); + r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE, + "SS ", " \337 "); + r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE, + "\\A\\S\\z", "ss"); + + r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + + r = exec(ONIG_ENCODING_ISO_8859_3, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_4, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_5, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_6, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_7, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_8, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_9, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_10, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_11, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_13, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_14, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_15, ONIG_OPTION_IGNORECASE, + (char* )pattern, (char* )str); + r = exec(ONIG_ENCODING_ISO_8859_16, ONIG_OPTION_IGNORECASE, + (char* )pattern, (char* )str); + + r = exec(ONIG_ENCODING_KOI8_R, ONIG_OPTION_NONE, "a+", "bbbaaaccc"); + r = exec(ONIG_ENCODING_EUC_TW, ONIG_OPTION_NONE, "b*a+?c+", "bbbaaaccc"); + r = exec(ONIG_ENCODING_EUC_KR, ONIG_OPTION_NONE, "a+", "bbbaaaccc"); + r = exec(ONIG_ENCODING_EUC_CN, ONIG_OPTION_NONE, "c+", "bbbaaaccc"); + r = exec(ONIG_ENCODING_BIG5, ONIG_OPTION_NONE, "a+", "bbbaaaccc"); + + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "\337", "SS"); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "SS", "\337"); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "SSb\337ssc", "a\337bSS\337cd"); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "[a\337]{0,2}", "aSS"); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "is", "iss"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE, + "\000[\000[\000:\000a\000l\000n\000u\000m\000:\000]\000]\000+\000\000", + "\000#\002\120\000a\000Z\012\077\012\076\012\075\000\000"); + /* 0x0a3d == \012\075 : is not alnum */ + /* 0x0a3e == \012\076 : is alnum */ + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE, + "\000\\\000d\000+\000\000", + "\0003\0001\377\020\377\031\377\032\000\000"); + + r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE, + "(Aa\\d)+", "BaA5Aa0234"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + "\000[\000\337\000]\000\000", "\000S\000S\000\000"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + "\000[\000\337\000]\000\000", "\000s\000S\000\000"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + "\000^\000[\000\001\000-\377\375\000]\000$\000\000", + "\000s\000S\000\000"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + "\000S\000S\000\000", + "\000S\000T\000\337\000\000"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + "\000S\000T\000S\000S\000\000", + "\000S\000t\000s\000S\000\000"); + + { + UChar pat[] = { 0x1f, 0xfc, 0x00, 0x00 }; + UChar str1[] = { 0x21, 0x26, 0x1f, 0xbe, 0x00, 0x00 }; + UChar str2[] = { 0x1f, 0xf3, 0x00, 0x00 }; + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + (char* )pat, (char* )str1); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + (char* )pat, (char* )str2); + } + +#if 0 + /* You should define USE_UNICODE_CASE_FOLD_TURKISH_AZERI in regenc.h. */ + + set_case_fold(ONIGENC_CASE_FOLD_TURKISH_AZERI); + + r = exec(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8, ONIG_OPTION_IGNORECASE, + "Ii", "\304\261\304\260"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + "\000I\000i\000\000", "\001\061\001\060\000\000"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE, + "\001\061\001\060\000\000", "\000I\000i\000\000"); + + set_case_fold(ONIGENC_CASE_FOLD_MIN); +#endif + + return r; +} |