diff options
author | 2023-10-10 14:33:42 +0000 | |
---|---|---|
committer | 2023-10-10 14:33:42 +0000 | |
commit | af1a266670d040d2f4083ff309d732d648afba2a (patch) | |
tree | 2fc46203448ddcc6f81546d379abfaeb323575e9 /roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc | |
parent | e02cda008591317b1625707ff8e115a4841aa889 (diff) |
Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec
Diffstat (limited to 'roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc')
12 files changed, 6002 insertions, 0 deletions
diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/API b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/API new file mode 100644 index 000000000..5795e1327 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/API @@ -0,0 +1,982 @@ +Oniguruma API Version 6.9.5 2020/02/19 + +#include <oniguruma.h> + + +# int onig_initialize(OnigEncoding use_encodings[], int num_encodings) + + Initialize library. + + You have to call it explicitly. + + * onig_init() is deprecated. + + arguments + 1 use_encodings: array of encodings used in application. + 2 num_encodings: number of encodings. + + +# int onig_error_code_to_str(UChar* err_buf, int err_code, ...) + + Get error message string. + If this function is used for onig_new(), + don't call this after the pattern argument of onig_new() is freed. + + normal return: error message string length + + arguments + 1 err_buf: error message string buffer. + (required size: ONIG_MAX_ERROR_MESSAGE_LEN) + 2 err_code: error code returned by other API functions. + 3 err_info (optional): error info returned by onig_new(). + + +# void onig_set_warn_func(OnigWarnFunc func) + + Set warning function. + + WARNING: + '[', '-', ']' in character class without escape. + ']' in pattern without escape. + + arguments + 1 func: function pointer. void (*func)(char* warning_message) + + +# void onig_set_verb_warn_func(OnigWarnFunc func) + + Set verbose warning function. + + WARNING: + redundant nested repeat operator. + + arguments + 1 func: function pointer. void (*func)(char* warning_message) + + +# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + Create a regex object. + + normal return: ONIG_NORMAL + + arguments + 1 reg: return regex object's address. + 2 pattern: regex pattern string. + 3 pattern_end: terminate address of pattern. (pattern + pattern length) + 4 option: compile time options. + + ONIG_OPTION_NONE no option + ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z' + ONIG_OPTION_MULTILINE '.' match with newline + ONIG_OPTION_IGNORECASE ambiguity match on + ONIG_OPTION_EXTEND extended pattern form + ONIG_OPTION_FIND_LONGEST find longest match + ONIG_OPTION_FIND_NOT_EMPTY ignore empty match + ONIG_OPTION_NEGATE_SINGLELINE + clear ONIG_OPTION_SINGLELINE which is enabled on + ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA + + ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured. + ONIG_OPTION_CAPTURE_GROUP named and no-named group captured. + + ONIG_OPTION_WORD_IS_ASCII ASCII only word (\w, \p{Word}, [[:word:]]) + ASCII only word bound (\b) + ONIG_OPTION_DIGIT_IS_ASCII ASCII only digit (\d, \p{Digit}, [[:digit:]]) + ONIG_OPTION_SPACE_IS_ASCII ASCII only space (\s, \p{Space}, [[:space:]]) + ONIG_OPTION_POSIX_IS_ASCII ASCII only POSIX properties + (includes word, digit, space) + (alnum, alpha, blank, cntrl, digit, graph, + lower, print, punct, space, upper, xdigit, + word) + ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER Extended Grapheme Cluster mode + ONIG_OPTION_TEXT_SEGMENT_WORD Word mode + + 5 enc: character encoding. + + ONIG_ENCODING_ASCII ASCII + ONIG_ENCODING_ISO_8859_1 ISO 8859-1 + ONIG_ENCODING_ISO_8859_2 ISO 8859-2 + ONIG_ENCODING_ISO_8859_3 ISO 8859-3 + ONIG_ENCODING_ISO_8859_4 ISO 8859-4 + ONIG_ENCODING_ISO_8859_5 ISO 8859-5 + ONIG_ENCODING_ISO_8859_6 ISO 8859-6 + ONIG_ENCODING_ISO_8859_7 ISO 8859-7 + ONIG_ENCODING_ISO_8859_8 ISO 8859-8 + ONIG_ENCODING_ISO_8859_9 ISO 8859-9 + ONIG_ENCODING_ISO_8859_10 ISO 8859-10 + ONIG_ENCODING_ISO_8859_11 ISO 8859-11 + ONIG_ENCODING_ISO_8859_13 ISO 8859-13 + ONIG_ENCODING_ISO_8859_14 ISO 8859-14 + ONIG_ENCODING_ISO_8859_15 ISO 8859-15 + ONIG_ENCODING_ISO_8859_16 ISO 8859-16 + ONIG_ENCODING_UTF8 UTF-8 + ONIG_ENCODING_UTF16_BE UTF-16BE + ONIG_ENCODING_UTF16_LE UTF-16LE + ONIG_ENCODING_UTF32_BE UTF-32BE + ONIG_ENCODING_UTF32_LE UTF-32LE + ONIG_ENCODING_EUC_JP EUC-JP + ONIG_ENCODING_EUC_TW EUC-TW + ONIG_ENCODING_EUC_KR EUC-KR + ONIG_ENCODING_EUC_CN EUC-CN + ONIG_ENCODING_SJIS Shift_JIS + ONIG_ENCODING_KOI8_R KOI8-R + ONIG_ENCODING_CP1251 CP1251 + ONIG_ENCODING_BIG5 Big5 + ONIG_ENCODING_GB18030 GB18030 + + or any OnigEncodingType data address defined by user. + + 6 syntax: address of pattern syntax definition. + + ONIG_SYNTAX_ASIS plain text + ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE + ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE + ONIG_SYNTAX_EMACS Emacs + ONIG_SYNTAX_GREP grep + ONIG_SYNTAX_GNU_REGEX GNU regex + ONIG_SYNTAX_JAVA Java (Sun java.util.regex) + ONIG_SYNTAX_PERL Perl + ONIG_SYNTAX_PERL_NG Perl + named group + ONIG_SYNTAX_RUBY Ruby + ONIG_SYNTAX_ONIGURUMA Oniguruma + ONIG_SYNTAX_DEFAULT default (== ONIG_SYNTAX_ONIGURUMA) + onig_set_default_syntax() + + or any OnigSyntaxType data address defined by user. + + 7 err_info: address for return optional error info. + Use this value as 3rd argument of onig_error_code_to_str(). + + + +# int onig_new_without_alloc(regex_t* reg, const UChar* pattern, + const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + Create a regex object. + reg object area is not allocated in this function. + + normal return: ONIG_NORMAL + + + +# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) + + This function is deprecated, and it does not allow the case where + the encoding of pattern and target is different. + + Create a regex object. + This function is deluxe version of onig_new(). + + normal return: ONIG_NORMAL + + arguments + 1 reg: return address of regex object. + 2 pattern: regex pattern string. + 3 pattern_end: terminate address of pattern. (pattern + pattern length) + 4 ci: compile time info. + + ci->num_of_elements: number of elements in ci. (current version: 5) + ci->pattern_enc: pattern string character encoding. + ci->target_enc: target string character encoding. + ci->syntax: address of pattern syntax definition. + ci->option: compile time option. + ci->case_fold_flag: character matching case fold bit flag for + ONIG_OPTION_IGNORECASE mode. + + ONIGENC_CASE_FOLD_MIN: minimum + ONIGENC_CASE_FOLD_DEFAULT: minimum + onig_set_default_case_fold_flag() + + 5 err_info: address for return optional error info. + Use this value as 3rd argument of onig_error_code_to_str(). + + + Different character encoding combination is allowed for + the following cases only. + + pattern_enc: ASCII, ISO_8859_1 + target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE + + pattern_enc: UTF16_BE/LE + target_enc: UTF16_LE/BE + + pattern_enc: UTF32_BE/LE + target_enc: UTF32_LE/BE + + +# void onig_free(regex_t* reg) + + Free memory used by regex object. + + arguments + 1 reg: regex object. + + +# void onig_free_body(regex_t* reg) + + Free memory used by regex object. (Except reg oneself.) + + arguments + 1 reg: regex object. + + +# OnigMatchParam* onig_new_match_param() + + Allocate a OnigMatchParam object and initialize the contents by + onig_initialize_match_param(). + + +# void onig_free_match_param(OnigMatchParam* mp) + + Free memory used by a OnigMatchParam object. + + arguments + 1 mp: OnigMatchParam object + + +# void onig_initialize_match_param(OnigMatchParam* mp) + + Set match-param fields to default values. + Match-param is used in onig_match_with_param() and onig_search_with_param(). + + arguments + 1 mp: match-param pointer + + +# int onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* mp, unsigned int limit) + + Set a maximum number of match-stack depth. + 0 means unlimited. + + arguments + 1 mp: match-param pointer + 2 limit: number of limit + + normal return: ONIG_NORMAL + + +# int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit) + + Set a retry limit count of a match process. + + arguments + 1 mp: match-param pointer + 2 limit: number of limit + + normal return: ONIG_NORMAL + + +# int onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* mp, unsigned long limit) + + Set a retry limit count of a search process. + 0 means unlimited. + + arguments + 1 mp: match-param pointer + 2 limit: number of limit + + normal return: ONIG_NORMAL + + +# int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + Set a function for callouts of contents in progress. + If 0 (NULL) is set, never called in progress. + + arguments + 1 mp: match-param pointer + 2 f: function + + normal return: ONIG_NORMAL + + +# int onig_set_retraction_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + Set a function for callouts of contents in retraction (backtrack). + If 0 (NULL) is set, never called in retraction. + + arguments + 1 mp: match-param pointer + 2 f: function + + normal return: ONIG_NORMAL + + + +# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, + const UChar* range, OnigRegion* region, OnigOptionType option) + + Search string and return search result and matching region. + Do not pass invalid byte string in the regex character encoding. + + normal return: match position offset (i.e. p - str >= 0) + not found: ONIG_MISMATCH (< 0) + error: error code (< 0) + + arguments + 1 reg: regex object + 2 str: target string + 3 end: terminate address of target string + 4 start: search start address of target string + 5 range: search terminate address of target string + in forward search (start <= searched string < range) + in backward search (range <= searched string <= start) + 6 region: address for return group match range info (NULL is allowed) + 7 option: search time option + + ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line + ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API. + + +# int onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) + + Search string and return search result and matching region. + Do not pass invalid byte string in the regex character encoding. + + arguments + 1-7: same as onig_search() + 8 mp: match parameter values (match_stack_limit, retry_limit_in_match, retry_limit_in_search) + + +# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, + OnigRegion* region, OnigOptionType option) + + Match string and return result and matching region. + Do not pass invalid byte string in the regex character encoding. + + normal return: match length (>= 0) + not match: ONIG_MISMATCH (< 0) + error: error code (< 0) + + arguments + 1 reg: regex object + 2 str: target string + 3 end: terminate address of target string + 4 at: match address of target string + 5 region: address for return group match range info (NULL is allowed) + 6 option: search time option + + ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line + ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API. + + +# int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) + + Match string and return result and matching region. + Do not pass invalid byte string in the regex character encoding. + + arguments + 1-6: same as onig_match() + 7 mp: match parameter values (match_stack_limit, retry_limit_in_match, retry_limit_in_search) + + +# int onig_scan(regex_t* reg, const UChar* str, const UChar* end, + OnigRegion* region, OnigOptionType option, + int (*scan_callback)(int, int, OnigRegion*, void*), + void* callback_arg) + + Scan string and callback with matching region. + Do not pass invalid byte string in the regex character encoding. + + normal return: number of matching times + error: error code + interruption: return value of callback function (!= 0) + + arguments + 1 reg: regex object + 2 str: target string + 3 end: terminate address of target string + 4 region: address for return group match range info (NULL is allowed) + 5 option: search time option + 6 scan_callback: callback function (defined by user) + 7 callback_arg: optional argument passed to callback + + +# int onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[]) + + Create a regset object. + All regex objects must have the same character encoding. + All regex objects are prohibited from having the ONIG_OPTION_FIND_LONGEST option. + + arguments + 1 rset: return address of regset object + 2 n: number of regex in regs + 3 regs: array of regex + + normal return: ONIG_NORMAL + + +# int onig_regset_add(OnigRegSet* set, regex_t* reg) + + Add a regex into regset. + The regex object must have the same character encoding with the regset. + The regex object is prohibited from having the ONIG_OPTION_FIND_LONGEST option. + + arguments + 1 set: regset object + 2 reg: regex object + + normal return: ONIG_NORMAL + + +# int onig_regset_replace(OnigRegSet* set, int at, regex_t* reg) + + Replace a regex in regset with another one. + If the reg argument value is NULL, then remove at-th regex. (and indexes of other regexes are changed) + + arguments + 1 set: regset object + 2 at: index of regex (zero origin) + 3 reg: regex object + + normal return: ONIG_NORMAL + + +# void onig_regset_free(OnigRegSet* set) + + Free memory used by regset object and regex objects in the regset. + If the same regex object is registered twice, the situation becomes destructive. + + arguments + 1 set: regset object + + +# int onig_regset_number_of_regex(OnigRegSet* set) + + Returns number of regex objects in the regset. + + arguments + 1 set: regset object + + +# regex_t* onig_regset_get_regex(OnigRegSet* set, int at) + + Returns the regex object corresponding to the at-th regex. + + arguments + 1 set: regset object + 2 at: index of regex array (zero origin) + + +# OnigRegion* onig_regset_get_region(OnigRegSet* set, int at) + + Returns the region object corresponding to the at-th regex. + + arguments + 1 set: regset object + 2 at: index of regex array (zero origin) + + +# int onig_regset_search(OnigRegSet* set, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos) + + Perform a search with regset. + + return value: + normal return: index of match regex (zero origin) + not found: ONIG_MISMATCH (< 0) + error: error code (< 0) + + arguments + 1 set: regset object + 2 str: target string + 3 end: terminate address of target string + 4 start: search start address of target string + 5 range: search terminate address of target string + 6 lead: outer loop element + ONIG_REGSET_POSITION_LEAD (returns most left position) + ONIG_REGSET_REGEX_LEAD (returns most left position) + ONIG_REGSET_PRIORITY_TO_REGEX_ORDER (returns first match regex) + 7 option: search time option + ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line + ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + 8 rmatch_pos: return address of match position (match_address - str) + + * ONIG_REGSET_POSITION_LEAD and ONIG_REGSET_REGEX_LEAD return the same result. + These differences only appear in search time. + In most cases, ONIG_REGSET_POSITION_LEAD seems to be faster. + + +# int onig_regset_search_with_param(OnigRegSet* set, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos) + + Perform a search with regset and match-params. + + return value: + normal return: index of match regex (zero origin) + not found: ONIG_MISMATCH (< 0) + error: error code (< 0) + + arguments + 1 set: regset object + 2 str: target string + 3 end: terminate address of target string + 4 start: search start address of target string + 5 range: search terminate address of target string + 6 lead: outer loop element + ONIG_REGSET_POSITION_LEAD (returns most left position) + ONIG_REGSET_REGEX_LEAD (returns most left position) + ONIG_REGSET_PRIORITY_TO_REGEX_ORDER (returns first match regex) + 7 option: search time option + ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line + ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line + 8 mps: array of match-params + 9 rmatch_pos: return address of match position (match_address - str) + + +# OnigRegion* onig_region_new(void) + + Create a region. + + +# void onig_region_free(OnigRegion* region, int free_self) + + Free memory used by region. + + arguments + 1 region: target region + 2 free_self: [1: free all, 0: free memory used in region but not self] + + +# void onig_region_copy(OnigRegion* to, OnigRegion* from) + + Copy contents of region. + + arguments + 1 to: target region + 2 from: source region + + +# void onig_region_clear(OnigRegion* region) + + Clear contents of region. + + arguments + 1 region: target region + + +# int onig_region_resize(OnigRegion* region, int n) + + Resize group range area of region. + + normal return: ONIG_NORMAL + + arguments + 1 region: target region + 2 n: new size + + +# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, + int** num_list) + + Return the group number list of the name. + Named subexp is defined by (?<name>....). + + normal return: number of groups for the name. + (ex. /(?<x>..)(?<x>..)/ ==> 2) + name not found: -1 + + arguments + 1 reg: regex object. + 2 name: group name. + 3 name_end: terminate address of group name. + 4 num_list: return list of group number. + + +# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end, + OnigRegion *region) + + Return the group number corresponding to the named backref (\k<name>). + If two or more regions for the groups of the name are effective, + the greatest number in it is obtained. + + normal return: group number. + + arguments + 1 reg: regex object. + 2 name: group name. + 3 name_end: terminate address of group name. + 4 region: search/match result region. + + +# int onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) + + Iterate function call for all names. + + normal return: 0 + error: func's return value. + + arguments + 1 reg: regex object. + 2 func: callback function. + func(name, name_end, <number of groups>, <group number's list>, + reg, arg); + if func does not return 0, then iteration is stopped. + 3 arg: argument for func. + + +# int onig_number_of_names(regex_t* reg) + + Return the number of names defined in the pattern. + Multiple definitions of one name is counted as one. + + arguments + 1 reg: regex object. + + +# OnigEncoding onig_get_encoding(regex_t* reg) +# OnigOptionType onig_get_options(regex_t* reg) +# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg) +# OnigSyntaxType* onig_get_syntax(regex_t* reg) + + Return a value of the regex object. + + arguments + 1 reg: regex object. + + +# int onig_number_of_captures(regex_t* reg) + + Return the number of capture group in the pattern. + + arguments + 1 reg: regex object. + + +# int onig_number_of_capture_histories(regex_t* reg) + + Return the number of capture history defined in the pattern. + + You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY + is disabled in the pattern syntax.(disabled in the default syntax) + + arguments + 1 reg: regex object. + + + +# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region) + + Return the root node of capture history data tree. + + This value is undefined if matching has faild. + + arguments + 1 region: matching result. + + +# int onig_capture_tree_traverse(OnigRegion* region, int at, + int(*func)(int,int,int,int,int,void*), void* arg) + + Traverse and callback in capture history data tree. + + normal return: 0 + error: callback func's return value. + + arguments + 1 region: match region data. + 2 at: callback position. + + ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse children. + ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse children first, then callback. + ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse children, + and at last callback again. + + 3 func: callback function. + if func does not return 0, then traverse is stopped. + + int func(int group, int beg, int end, int level, int at, + void* arg) + + group: group number + beg: capture start position + end: capture end position + level: nest level (from 0) + at: callback position + ONIG_TRAVERSE_CALLBACK_AT_FIRST + ONIG_TRAVERSE_CALLBACK_AT_LAST + arg: optional callback argument + + 4 arg; optional callback argument. + + +# int onig_noname_group_capture_is_active(regex_t* reg) + + Return noname group capture activity. + + active: 1 + inactive: 0 + + arguments + 1 reg: regex object. + + if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON + --> inactive + + if the regex pattern have named group + and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON + and option ONIG_OPTION_CAPTURE_GROUP == OFF + --> inactive + + else --> active + + +# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) + + Return previous character head address. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + Return left-adjusted head address of a character. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + Return right-adjusted head address of a character. + + arguments + 1 enc: character encoding + 2 start: string address + 3 s: target address of string + + +# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) + + Return number of characters in the string. + + +# int onigenc_strlen_null(OnigEncoding enc, const UChar* s) + + Return number of characters in the string. + Do not pass invalid byte string in the character encoding. + + +# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) + + Return number of bytes in the string. + Do not pass invalid byte string in the character encoding. + + +# int onig_set_default_syntax(OnigSyntaxType* syntax) + + Set default syntax. + + arguments + 1 syntax: address of pattern syntax definition. + + +# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) + + Copy syntax. + + arguments + 1 to: destination address. + 2 from: source address. + + +# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax) +# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax) + +# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) + + Get/Set elements of the syntax. + + arguments + 1 syntax: syntax + 2 op, op2, behavior, options: value of element. + + +# void onig_copy_encoding(OnigEncoding to, OnigEncoding from) + + Copy encoding. + + arguments + 1 to: destination address. + 2 from: source address. + + +# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, + OnigCodePoint code) + + Set a variable meta character to the code point value. + Except for an escape character, this meta characters specification + is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective + by the syntax. (Build-in syntaxes are not effective.) + + normal return: ONIG_NORMAL + + arguments + 1 syntax: target syntax + 2 what: specifies which meta character it is. + + ONIG_META_CHAR_ESCAPE + ONIG_META_CHAR_ANYCHAR + ONIG_META_CHAR_ANYTIME + ONIG_META_CHAR_ZERO_OR_ONE_TIME + ONIG_META_CHAR_ONE_OR_MORE_TIME + ONIG_META_CHAR_ANYCHAR_ANYTIME + + 3 code: meta character or ONIG_INEFFECTIVE_META_CHAR. + + +# OnigCaseFoldType onig_get_default_case_fold_flag() + + Get default case fold flag. + + +# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) + + Set default case fold flag. + + 1 case_fold_flag: case fold flag + + +# unsigned int onig_get_match_stack_limit_size(void) + + Return the maximum number of stack size. + (default: 0 == unlimited) + + +# int onig_set_match_stack_limit_size(unsigned int size) + + Set the maximum number of stack size. + (size = 0: unlimited) + + normal return: ONIG_NORMAL + + +# unsigned long onig_get_retry_limit_in_match(void) + + Return the limit of retry counts in a matching process. + (default: 10000000) + + normal return: current limit value + + +# unsigned long onig_get_retry_limit_in_search(void) + + Return the limit of retry counts in a search process. + 0 means unlimited. + (default: 0) + + normal return: current limit value + + +# int onig_set_retry_limit_in_match(unsigned long limit) + + Set the limit of retry counts in matching process. + + normal return: ONIG_NORMAL + + +# int onig_set_retry_limit_in_search(unsigned long limit) + + Set a retry limit count of a search process. + 0 means unlimited. + + normal return: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_progress_callout(void) + + Get a function for callouts of contents in progress. + + +# int onig_set_progress_callout(OnigCalloutFunc f) + + Set a function for callouts of contents in progress. + If 0 (NULL) is set, never called in progress. + + normal return: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_retraction_callout(void) + + Get a function for callouts of contents in retraction (backtrack). + + +# int onig_set_retraction_callout(OnigCalloutFunc f) + + Set a function for callouts of contents in retraction (backtrack). + If 0 (NULL) is set, never called in retraction. + + normal return: ONIG_NORMAL + + +# int onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)) + + Define new Unicode property. + (This function is not thread safe.) + + arguments + 1 name: property name (ASCII only. character ' ', '-', '_' are ignored.) + 2 ranges: property code point ranges + (first element is number of ranges.) + + [num-of-ranges, 1st-range-start, 1st-range-end, 2nd-range-start... ] + + * Don't destroy the ranges after having called this function. + + normal return: ONIG_NORMAL + + +# unsigned int onig_get_parse_depth_limit(void) + + Return the maximum depth of parser recursion. + (default: DEFAULT_PARSE_DEPTH_LIMIT defined in regint.h. Currently 4096.) + + +# int onig_set_parse_depth_limit(unsigned int depth) + + Set the maximum depth of parser recursion. + (depth = 0: Set to the default value defined in regint.h.) + + normal return: ONIG_NORMAL + + +# int onig_end(void) + + The use of this library is finished. + + normal return: ONIG_NORMAL + + It is not allowed to use regex objects which created + before onig_end() call. + + +# const char* onig_version(void) + + Return version string. (ex. "5.0.3") + +// END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/API.ja b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/API.ja new file mode 100644 index 000000000..044d1256e --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/API.ja @@ -0,0 +1,989 @@ +鬼車インターフェース Version 6.9.5 2020/02/19 + +#include <oniguruma.h> + + +# int onig_initialize(OnigEncoding use_encodings[], int num_encodings) + + ライブラリの初期化 + 最初に呼び出す必要がある。 + + * onig_init() は廃止 + + 引数 + 1 use_encodings: 使用する文字エンコーディングの配列 + 2 num_encodings: 文字エンコーディングの数 + + +# int onig_error_code_to_str(UChar* err_buf, int err_code, ...) + + エラーメッセージを取得する。 + + この関数を、onig_new()の結果に対して呼び出す場合には、onig_new()のpattern引数を + メモリ解放するよりも前に呼び出さなければならない。 + + 正常終了戻り値: エラーメッセージ文字列のバイト長 + + 引数 + 1 err_buf: エラーメッセージを格納する領域 + (必要なサイズ: ONIG_MAX_ERROR_MESSAGE_LEN) + 2 err_code: エラーコード + 3 err_info (optional): onig_new()のerr_info + + +# void onig_set_warn_func(OnigWarnFunc func) + + 警告通知関数をセットする。 + + 警告: + '[', '-', ']' in character class without escape. + ']' in pattern without escape. + + 引数 + 1 func: 警告関数 void (*func)(char* warning_message) + + +# void onig_set_verb_warn_func(OnigWarnFunc func) + + 詳細警告通知関数をセットする。 + + 詳細警告: + redundant nested repeat operator. + + 引数 + 1 func: 詳細警告関数 void (*func)(char* warning_message) + + +# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + 正規表現オブジェクト(regex)を作成する。 + + 正常終了戻り値: ONIG_NORMAL + + 引数 + 1 reg: 作成された正規表現オブジェクトを返すアドレス + 2 pattern: 正規表現パターン文字列 + 3 pattern_end: 正規表現パターン文字列の終端アドレス(pattern + pattern length) + 4 option: 正規表現コンパイル時オプション + + ONIG_OPTION_NONE オプションなし + ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z' + ONIG_OPTION_MULTILINE '.'が改行にマッチする + ONIG_OPTION_IGNORECASE 曖昧マッチ オン + ONIG_OPTION_EXTEND パターン拡張形式 + ONIG_OPTION_FIND_LONGEST 最長マッチ + ONIG_OPTION_FIND_NOT_EMPTY 空マッチを無視 + ONIG_OPTION_NEGATE_SINGLELINE + ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVAで + デフォルトで有効なONIG_OPTION_SINGLELINEをクリアする。 + + ONIG_OPTION_DONT_CAPTURE_GROUP 名前付き捕獲式集合のみ捕獲 + ONIG_OPTION_CAPTURE_GROUP 名前無し捕獲式集合も捕獲 + ONIG_OPTION_WORD_IS_ASCII wordがASCIIのみ (\w, \p{Word}, [[:word:]]) + word boundがASCIIのみ (\b) + ONIG_OPTION_DIGIT_IS_ASCII digitがASCIIのみ (\d, \p{Digit}, [[:digit:]]) + ONIG_OPTION_SPACE_IS_ASCII spaceがASCIIのみ (\s, \p{Space}, [[:space:]]) + ONIG_OPTION_POSIX_IS_ASCII POSIXプロパティがASCIIのみ + (word, digit, spaceを全て含んでいる) + (alnum, alpha, blank, cntrl, digit, graph, + lower, print, punct, space, upper, xdigit, + word) + ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER 拡張書記素房モード + ONIG_OPTION_TEXT_SEGMENT_WORD 単語モード + + 5 enc: 文字エンコーディング + + ONIG_ENCODING_ASCII ASCII + ONIG_ENCODING_ISO_8859_1 ISO 8859-1 + ONIG_ENCODING_ISO_8859_2 ISO 8859-2 + ONIG_ENCODING_ISO_8859_3 ISO 8859-3 + ONIG_ENCODING_ISO_8859_4 ISO 8859-4 + ONIG_ENCODING_ISO_8859_5 ISO 8859-5 + ONIG_ENCODING_ISO_8859_6 ISO 8859-6 + ONIG_ENCODING_ISO_8859_7 ISO 8859-7 + ONIG_ENCODING_ISO_8859_8 ISO 8859-8 + ONIG_ENCODING_ISO_8859_9 ISO 8859-9 + ONIG_ENCODING_ISO_8859_10 ISO 8859-10 + ONIG_ENCODING_ISO_8859_11 ISO 8859-11 + ONIG_ENCODING_ISO_8859_13 ISO 8859-13 + ONIG_ENCODING_ISO_8859_14 ISO 8859-14 + ONIG_ENCODING_ISO_8859_15 ISO 8859-15 + ONIG_ENCODING_ISO_8859_16 ISO 8859-16 + ONIG_ENCODING_UTF8 UTF-8 + ONIG_ENCODING_UTF16_BE UTF-16BE + ONIG_ENCODING_UTF16_LE UTF-16LE + ONIG_ENCODING_UTF32_BE UTF-32BE + ONIG_ENCODING_UTF32_LE UTF-32LE + ONIG_ENCODING_EUC_JP EUC-JP + ONIG_ENCODING_EUC_TW EUC-TW + ONIG_ENCODING_EUC_KR EUC-KR + ONIG_ENCODING_EUC_CN EUC-CN + ONIG_ENCODING_SJIS Shift_JIS + ONIG_ENCODING_KOI8_R KOI8-R + ONIG_ENCODING_CP1251 CP1251 + ONIG_ENCODING_BIG5 Big5 + ONIG_ENCODING_GB18030 GB18030 + + または、ユーザが定義したOnigEncodingTypeデータのアドレス + + 6 syntax: 正規表現パターン文法定義 + + ONIG_SYNTAX_ASIS plain text + ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE + ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE + ONIG_SYNTAX_EMACS Emacs + ONIG_SYNTAX_GREP grep + ONIG_SYNTAX_GNU_REGEX GNU regex + ONIG_SYNTAX_JAVA Java (Sun java.util.regex) + ONIG_SYNTAX_PERL Perl + ONIG_SYNTAX_PERL_NG Perl + 名前付き捕獲式集合 + ONIG_SYNTAX_RUBY Ruby + ONIG_SYNTAX_ONIGURUMA Oniguruma + ONIG_SYNTAX_DEFAULT default (== ONIG_SYNTAX_ONIGURUMA) + onig_set_default_syntax() + + または、ユーザが定義したOnigSyntaxTypeデータのアドレス + + 7 err_info: エラー情報を返すためのアドレス + onig_error_code_to_str()の三番目の引数として使用する + + + +# int onig_new_without_alloc(regex_t* reg, const UChar* pattern, + const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* err_info) + + 正規表現オブジェクト(regex)を作成する。 + regの領域を内部で割り当てない。 + + 正常終了戻り値: ONIG_NORMAL + + + +# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) + + この関数は廃止予定。 + パターンと対象文字列の文字エンコーディングが異なる場合を許さなくなった。 + + 正規表現オブジェクト(regex)を作成する。 + この関数は、onig_new()のデラックス版。 + + 正常終了戻り値: ONIG_NORMAL + + 引数 + 1 reg: 作成された正規表現オブジェクトを返すアドレス + 2 pattern: 正規表現パターン文字列 + 3 pattern_end: 正規表現パターン文字列の終端アドレス(pattern + pattern length) + 4 ci: コンパイル情報 + + ci->num_of_elements: ciの要素数 (現在の版では: 5) + ci->pattern_enc: パターン文字列の文字エンコーディング + ci->target_enc: 対象文字列の文字エンコーディング + ci->syntax: 正規表現パターン文法定義 + ci->option: 正規表現コンパイル時オプション + ci->case_fold_flag: ONIG_OPTION_IGNORECASEモードでの + 文字曖昧マッチ指定ビットフラグ + + ONIGENC_CASE_FOLD_MIN: 最小 + ONIGENC_CASE_FOLD_DEFAULT: 最小 + onig_set_default_case_fold_flag() + + 5 err_info: エラー情報を返すためのアドレス + onig_error_code_to_str()の三番目の引数として使用する + + + 異なる文字エンコーディングの組み合わせは、以下の場合にのみ許される。 + + pattern_enc: ASCII, ISO_8859_1 + target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE + + pattern_enc: UTF16_BE/LE + target_enc: UTF16_LE/BE + + pattern_enc: UTF32_BE/LE + target_enc: UTF32_LE/BE + + +# void onig_free(regex_t* reg) + + 正規表現オブジェクトのメモリを解放する。 + + 引数 + 1 reg: 正規表現オブジェクト + + +# void onig_free_body(regex_t* reg) + + 正規表現オブジェクトのメモリを解放する。(reg自身の領域を除いて) + + 引数 + 1 reg: 正規表現オブジェクト + + +# OnigMatchParam* onig_new_match_param() + + OnigMatchParamオブジェクトを生成し、onig_initialize_match_param()を使用して + 中身を初期化する。 + + +# void onig_free_match_param(OnigMatchParam* mp) + + OnigMatchParamオブジェクトで使用しているメモリを開放する。 + + 引数 + 1 mp: OnigMatchParamオブジェクト + + +# void onig_initialize_match_param(OnigMatchParam* mp) + + マッチパラメタ構造体にデフォルト値をセットする。 + マッチパラメタは、onig_match_with_param(), onig_search_with_param()で + 使用される。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + + +# int onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* mp, unsigned int limit) + + マッチスタックの最大深さをセットする。 + 0は、無制限を表す。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 limit: 制限数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit) + + 一回のマッチでのリトライ数の制限値をセットする。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 limit: 制限回数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* mp, unsigned long limit) + + 一回の検索でのリトライ数の制限値をセットする。 + 0は無制限を意味する。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 limit: 制限回数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + 前進時の内容の呼び出し(callouts)で呼び出される関数をセットする。 + もし0(NULL)がセットされると、前進時に呼び出しは起こらない。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 f: 呼び出される関数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retraction_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + 後退時の内容の呼び出し(callouts)で呼び出される関数をセットする。 + もし0(NULL)がセットされると、後退時に呼び出しは起こらない。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 f: 呼び出される関数 + + 正常終了戻り値: ONIG_NORMAL + + + +# int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, + const UChar* range, OnigRegion* region, OnigOptionType option) + + 正規表現で文字列を検索し、検索結果とマッチ領域を返す。 + 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。 + + 正常終了戻り値: マッチ位置 (p - str >= 0) + 検索失敗: ONIG_MISMATCH (< 0) + + 引数 + 1 reg: 正規表現オブジェクト + 2 str: 検索対象文字列 + 3 end: 検索対象文字列の終端アドレス + 4 start: 検索対象文字列の検索先頭位置アドレス + 5 range: 検索対象文字列の検索終了位置アドレス + 前方探索 (start <= 探索される文字列 < range) + 後方探索 (range <= 探索される文字列 <= start) + 6 region: マッチ領域情報(region) (NULLも許される) + 7 option: 検索時オプション + + ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない + ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない + ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする + + +# int onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) + + 正規表現で文字列を検索し、検索結果とマッチ領域を返す。 + 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。 + + 引数 + 1-7: onig_search()と同じ + 8 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match, retry_limit_in_search) + + +# int onig_match(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, OnigOptionType option) + + 文字列の指定位置でマッチングを行い、結果とマッチ領域を返す。 + 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。 + + 正常終了戻り値: マッチしたバイト長 (>= 0) + not match: ONIG_MISMATCH ( < 0) + + 引数 + 1 reg: 正規表現オブジェクト + 2 str: 検索対象文字列 + 3 end: 検索対象文字列の終端アドレス + 4 at: 検索対象文字列の検索アドレス + 5 region: マッチ領域情報(region) (NULLも許される) + 6 option: 検索時オプション + + ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない + ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない + ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする + + +# int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) + + 文字列の指定位置でマッチングを行い、結果とマッチ領域を返す。 + 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。 + + 引数 + 1-6: onig_match()と同じ + 7 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match, retry_limit_in_search) + + +# int onig_scan(regex_t* reg, const UChar* str, const UChar* end, + OnigRegion* region, OnigOptionType option, + int (*scan_callback)(int, int, OnigRegion*, void*), + void* callback_arg) + + 正規表現で文字列をスキャンして、マッチングする毎にコールバック関数を呼び出す。 + 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。 + + 正常終了: マッチ回数 (0回も含める) + エラー: エラーコード (< 0) + 中断: コールバック関数が0以外の戻り値を返したとき、その値を戻り値として中断 + + 引数 + 1 reg: 正規表現オブジェクト + 2 str: 検索対象文字列 + 3 end: 検索対象文字列の終端アドレス + 4 region: マッチ領域情報(region) (NULLも許される) + 5 option: 検索時オプション + 6 scan_callback: コールバック関数 + 7 callback_arg: コールバック関数に渡される付加引数値 + + +# int onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[]) + + regsetオブジェクトを生成する。 + 全ての正規表現オブジェクトは、同じ文字エンコーディングでなければならない。 + 全ての正規表現オブジェクトは、ONIG_OPTION_FIND_LONGESTオプションでコンパイルされていてはならない。 + + 引数 + 1 rset: regsetオブジェクトを返すためのアドレス + 2 n: 正規表現の個数 + 3 regs: 正規表現オブジェクトの配列 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_regset_add(OnigRegSet* set, regex_t* reg) + + regsetオブジェクトに正規表現を追加する。 + 正規表現オブジェクトは、regsetと同じ文字エンコーディングでなければならない。 + 正規表現オブジェクトは、ONIG_OPTION_FIND_LONGESTオプションでコンパイルされていてはならない。 + + 引数 + 1 set: regsetオブジェクト + 2 reg: 正規表現オブジェクト + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_regset_replace(OnigRegSet* set, int at, regex_t* reg) + + regsetの中の一個の正規表現オブジェクトを別のものに変更する。 + 若しreg引数の値がNULLであれば、at番目の正規表現オブジェクトを外す。(そして、以降の正規表現オブジェクトのインデックスは変化する) + + 引数 + 1 set: regsetオブジェクト + 2 at: 変更する場所のインデックス + 2 reg: 正規表現オブジェクト + + 正常終了戻り値: ONIG_NORMAL + + +# void onig_regset_free(OnigRegSet* set) + + regsetオブジェクトとその中の正規表現オブジェクトの使用メモリを開放する。 + 若し、同一の正規表現オブジェクトを重複して登録していれば、破壊的な状況になる。 + + 引数 + 1 set: regsetオブジェクト + + +# int onig_regset_number_of_regex(OnigRegSet* set) + + regsetの中の正規表現オブジェクトの個数を返す。 + + 引数 + 1 set: regsetオブジェクト + + +# regex_t* onig_regset_get_regex(OnigRegSet* set, int at) + + regsetのat番目の正規表現を返す。 + + 引数 + 1 set: regsetオブジェクト + 2 at: 正規表現オブジェクトのインデックス (ゼロ開始) + + +# OnigRegion* onig_regset_get_region(OnigRegSet* set, int at) + + regsetのat番目の正規表現に対応する領域を返す。 + + 引数 + 1 set: regsetオブジェクト + 2 at: 正規表現オブジェクトのインデックス (ゼロ開始) + + +# int onig_regset_search(OnigRegSet* set, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos) + + regsetによる検索を実行する。 + + 戻り値: + 検索成功: マッチした正規表現オブジェクトのインデックス (ゼロ開始) + 検索失敗: ONIG_MISMATCH (< 0) + エラー: エラーコード (< 0) + + 引数 + 1 set: regsetオブジェクト + 2 str: 検索対象文字列 + 3 end: 検索対象文字列の終端アドレス + 4 start: 検索対象文字列の検索先頭位置アドレス + 5 range: 検索対象文字列の検索終了位置アドレス + (start <= 探索される文字列 < range) + 6 lead: 外側のループ要素 + ONIG_REGSET_POSITION_LEAD (最左位置でマッチした結果を返す) + ONIG_REGSET_REGEX_LEAD (最左位置でマッチした結果を返す) + ONIG_REGSET_PRIORITY_TO_REGEX_ORDER (最初にマッチした正規表現の結果を返す) + 7 option: 検索時オプション + ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない + ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない + 8 rmatch_pos: マッチした位置を返すためのアドレス (match_address - str) + + * ONIG_REGSET_POSITION_LEADとONIG_REGSET_REGEX_LEADは同じ結果を返す。 + これらの違いは検索時間にしか現れない。 + ほとんどの場合、ONIG_REGSET_POSITION_LEADのほうが速いと思われる。 + + +# int onig_regset_search_with_param(OnigRegSet* set, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos) + + regsetとOnigMatchParamオブジェクトによる検索を実行する。 + + 戻り値: + 検索成功: マッチした正規表現オブジェクトのインデックス (ゼロ開始) + 検索失敗: ONIG_MISMATCH (< 0) + エラー: エラーコード (< 0) + + 引数 + 1 set: regsetオブジェクト + 2 str: 検索対象文字列 + 3 end: 検索対象文字列の終端アドレス + 4 start: 検索対象文字列の検索先頭位置アドレス + 5 range: 検索対象文字列の検索終了位置アドレス + (start <= 探索される文字列 < range) + 6 lead: 外側のループ要素 + ONIG_REGSET_POSITION_LEAD (最左位置でマッチした結果を返す) + ONIG_REGSET_REGEX_LEAD (最左位置でマッチした結果を返す) + ONIG_REGSET_PRIORITY_TO_REGEX_ORDER (最初にマッチした正規表現の結果を返す) + 7 option: 検索時オプション + ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない + ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない + 8 mps: OnigMatchParamオブジェクトの配列 + 9 rmatch_pos: マッチした位置を返すためのアドレス (match_address - str) + + +# OnigRegion* onig_region_new(void) + + マッチ領域情報(region)を作成する。 + + +# void onig_region_free(OnigRegion* region, int free_self) + + マッチ領域情報(region)で使用されているメモリを解放する。 + + 引数 + 1 region: マッチ領域情報オブジェクト + 2 free_self: [1: region自身を含めて全て解放, 0: region自身は解放しない] + + +# void onig_region_copy(OnigRegion* to, OnigRegion* from) + + マッチ領域情報(region)を複製する。 + + 引数 + 1 to: 対象領域 + 2 from: 元領域 + + +# void onig_region_clear(OnigRegion* region) + + マッチ領域情報(region)の中味をクリアする。 + + 引数 + 1 region: 対象領域 + + +# int onig_region_resize(OnigRegion* region, int n) + + マッチ領域情報(region)の捕獲式集合(グループ)数を変更する。 + + 正常終了戻り値: ONIG_NORMAL + + 引数 + 1 region: 対象領域 + 2 n: 新しいサイズ + + +# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, + int** num_list) + + 指定した名前に対する名前付き捕獲式集合(グループ)の + グループ番号リストを返す。 + 名前付き捕獲式集合は、(?<name>....)によって定義できる。 + + 正常終了戻り値: 指定された名前に対するグループ数 + (例 /(?<x>..)(?<x>..)/ ==> 2) + 名前に対するグループが存在しない: -1 + + 引数 + 1 reg: 正規表現オブジェクト + 2 name: 捕獲式集合(グループ)名 + 3 name_end: 捕獲式集合(グループ)名の終端アドレス + 4 num_list: 番号リストを返すアドレス + + +# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end, + OnigRegion *region) + + 指定された名前の後方参照(\k<name>)に対する捕獲式集合(グループ)の番号を返す。 + 名前に対して、複数のマッチ領域が有効であれば、その中の最大の番号を返す。 + 名前に対する捕獲式集合が一個しかないときには、対応するマッチ領域が有効か + どうかに関係なく、その番号を返す。(従って、regionにはNULLを渡してもよい。) + + 正常終了戻り値: 番号 + + 引数 + 1 reg: 正規表現オブジェクト + 2 name: 捕獲式集合(グループ)名 + 3 name_end: 捕獲式集合(グループ)名の終端アドレス + 4 region: search/match結果のマッチ領域 + + +# int onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) + + 全ての名前に対してコールバック関数呼び出しを実行する。 + + 正常終了戻り値: 0 + エラー: コールバック関数の戻り値 + + 引数 + 1 reg: 正規表現オブジェクト + 2 func: コールバック関数 + func(name, name_end, <number of groups>, <group number's list>, + reg, arg); + + funcが0以外の値を返すと、それ以降のコールバックは行なわずに + 終了する。 + + 3 arg: funcに対する追加引数 + + +# int onig_number_of_names(regex_t* reg) + + パターン中で定義された名前の数を返す。 + 一個の名前の多重定義は一個と看做す。 + + 引数 + 1 reg: 正規表現オブジェクト + + +# OnigEncoding onig_get_encoding(regex_t* reg) +# OnigOptionType onig_get_options(regex_t* reg) +# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg) +# OnigSyntaxType* onig_get_syntax(regex_t* reg) + + 正規表現オブジェクトに対して、対応する値を返す。 + + 引数 + 1 reg: 正規表現オブジェクト + + +# int onig_number_of_captures(regex_t* reg) + + パターン中で定義された捕獲グループの数を返す。 + + 引数 + 1 reg: 正規表現オブジェクト + + +# int onig_number_of_capture_histories(regex_t* reg) + + パターン中で定義された捕獲履歴(?@...)の数を返す。 + + 使用する文法で捕獲履歴機能が有効(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY) + でなければ、捕獲履歴機能は使用できない。 + + 引数 + 1 reg: 正規表現オブジェクト + + +# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region) + + 捕獲履歴データのルートノードを返す。 + + マッチが失敗している場合には、この値は不定である。 + + 引数 + 1 region: マッチ領域 + + +# int onig_capture_tree_traverse(OnigRegion* region, int at, + int(*func)(int,int,int,int,int,void*), void* arg) + + 捕獲履歴データ木を巡回してコールバックする。 + + 正常終了戻り値: 0 + エラー: コールバック関数の戻り値 + + 引数 + 1 region: マッチ領域 + 2 at: コールバックを行なうタイミング + + ONIG_TRAVERSE_CALLBACK_AT_FIRST: + 最初にコールバックして、子ノードを巡回 + ONIG_TRAVERSE_CALLBACK_AT_LAST: + 子ノードを巡回して、コールバック + ONIG_TRAVERSE_CALLBACK_AT_BOTH: + 最初にコールバックして、子ノードを巡回、最後にもう一度コールバック + + 3 func: コールバック関数 + funcが0以外の値を返すと、それ以降の巡回は行なわずに + 終了する。 + + int func(int group, int beg, int end, int level, int at, + void* arg) + group: グループ番号 + beg: マッチ開始位置 + end マッチ終了位置 + level: ネストレベル (0から) + at: コールバックが呼び出されたタイミング + ONIG_TRAVERSE_CALLBACK_AT_FIRST + ONIG_TRAVERSE_CALLBACK_AT_LAST + arg: 追加引数 + + 4 arg; funcに対する追加引数 + + +# int onig_noname_group_capture_is_active(regex_t* reg) + + 名前なし式集合の捕獲機能が有効かどうかを返す。 + + 有効: 1 + 無効: 0 + + 引数 + 1 reg: 正規表現オブジェクト + + + オプションのONIG_OPTION_DONT_CAPTURE_GROUPがON --> 無効 + + パターンが名前つき式集合を使用している + AND 使用文法で、ONIG_SYN_CAPTURE_ONLY_NAMED_GROUPがON + AND オプションのONIG_OPTION_CAPTURE_GROUPがOFF + --> 無効 + + 上記以外の場合 --> 有効 + + +# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) + + 文字一個分前の文字列位置を返す。 + + 引数 + 1 enc: 文字エンコーディング + 2 start: 文字列の先頭アドレス + 3 s: 文字列中の位置 + + +# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + 文字の先頭バイト位置になるように左側に調整したアドレスを返す。 + + 引数 + 1 enc: 文字エンコーディング + 2 start: 文字列の先頭アドレス + 3 s: 文字列中の位置 + + +# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, + const UChar* start, const UChar* s) + + 文字の先頭バイト位置になるように右側に調整したアドレスを返す。 + + 引数 + 1 enc: 文字エンコーディング + 2 start: 文字列の先頭アドレス + 3 s: 文字列中の位置 + + +# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) + + 文字列の文字数を返す。 + + +# int onigenc_strlen_null(OnigEncoding enc, const UChar* s) + + 文字列の文字数を返す。 + 文字エンコーディングに対して、不正な文字列を渡してはいけない。 + + +# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) + + 文字列のバイト数を返す。 + 文字エンコーディングに対して、不正な文字列を渡してはいけない。 + + +# int onig_set_default_syntax(OnigSyntaxType* syntax) + + デフォルトの正規表現パターン文法をセットする。 + + 引数 + 1 syntax: 正規表現パターン文法 + + +# void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) + + 正規表現パターン文法をコピーする。 + + 引数 + 1 to: 対象 + 2 from: 元 + + +# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax) +# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax) +# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax) + +# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) + + 正規表現パターン文法の要素を参照/取得する。 + + 引数 + 1 syntax: 正規表現パターン文法 + 2 op, op2, behavior, options: 要素の値 + + +# void onig_copy_encoding(OnigEncoding to, OnigEncoding from) + + 文字エンコーディングをコピーする。 + + 引数 + 1 to: 対象 + 2 from: 元 + + +# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, + OnigCodePoint code) + + メタ文字を指定したコードポイント値にセットする。 + ONIG_SYN_OP_VARIABLE_META_CHARACTERSが正規表現パターン文法で有効に + なっていない場合には、エスケープ文字を除いて、ここで指定したメタ文字は + 機能しない。(組込みの文法では有効にしていない。) + + 正常終了戻り値: ONIG_NORMAL + + 引数 + 1 syntax: 対象文法 + 2 what: メタ文字機能の指定 + + ONIG_META_CHAR_ESCAPE + ONIG_META_CHAR_ANYCHAR + ONIG_META_CHAR_ANYTIME + ONIG_META_CHAR_ZERO_OR_ONE_TIME + ONIG_META_CHAR_ONE_OR_MORE_TIME + ONIG_META_CHAR_ANYCHAR_ANYTIME + + 3 code: メタ文字のコードポイント または ONIG_INEFFECTIVE_META_CHAR. + + +# OnigCaseFoldType onig_get_default_case_fold_flag() + + デフォルトのcase foldフラグを取得する。 + + +# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) + + デフォルトのcase foldフラグをセットする。 + + 引数 + 1 case_fold_flag: case foldフラグ + + +# unsigned int onig_get_match_stack_limit_size(void) + + マッチスタックサイズの最大値を返す。 + (デフォルト: 0 == 無制限) + + +# int onig_set_match_stack_limit_size(unsigned int size) + + マッチスタックサイズの最大値を指定する。 + (size = 0: 無制限) + + 正常終了戻り値: ONIG_NORMAL + + +# unsigned long onig_get_retry_limit_in_match(void) + + 一回のマッチングでのリトライ数の制限値を返す。 + (デフォルト: 10000000) + + 正常終了戻り値: 制限値 + + +# unsigned long onig_get_retry_limit_in_search(void) + + 一回の検索でのリトライ数の制限値を返す。 + 0は無制限を意味する。 + (デフォルト: 0) + + 正常終了戻り値: 制限値 + + +# int onig_set_retry_limit_in_match(unsigned long limit) + + 一回のマッチング内でのリトライ数の制限値を指定する。 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retry_limit_in_search(unsigned long limit) + + 一回の検索でのリトライ数の制限値をセットする。 + 0は無制限を意味する。 + (デフォルト: 0) + + 正常終了戻り値: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_progress_callout(void) + + 前進時の内容の呼び出しで呼び出される関数を返す。 + + +# int onig_set_progress_callout(OnigCalloutFunc f) + + 前進時の内容の呼び出しで呼び出される関数を指定する。 + もし0(NULL)を指定すると、前進時の内容の呼び出しで呼び出しは起こらない。 + + 正常終了戻り値: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_retraction_callout(void) + + 後退時の内容の呼び出しで呼び出される関数を返す。 + + +# int onig_set_retraction_callout(OnigCalloutFunc f) + + 後退時の内容の呼び出しで呼び出される関数を指定する。 + もし0(NULL)を指定すると、後退時の内容の呼び出しで呼び出しは起こらない。 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)) + + 新しいUnicodeプロパティを定義する。 + (この関数はスレッドセーフではない) + + 引数 + 1 name: プロパティ名 (ASCIIコードのみ。 文字 ' ', '-', '_' は無視される。) + 2 ranges: プロパティコードポイント範囲 + (最初の要素は範囲の数) + + [num-of-ranges, 1st-range-start, 1st-range-end, 2nd-range-start... ] + + * この関数を呼んだ後で、rangesを変更/破壊しないこと + + 正常終了戻り値: ONIG_NORMAL + + +# unsigned int onig_get_parse_depth_limit(void) + + 再帰パース処理の最大深さを返す。 + (デフォルト: regint.h で定義されている DEFAULT_PARSE_DEPTH_LIMIT。現在は 4096) + + +# int onig_set_parse_depth_limit(unsigned int depth) + + 再帰パース処理の最大深さを指定する。 + (depth = 0: regint.h で定義されたデフォルト値に設定する。) + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_end(void) + + ライブラリの使用を終了する。 + + 正常終了戻り値: ONIG_NORMAL + + onig_init()を再度呼び出しても、以前に作成した正規表現オブジェクト + を使用することはできない。 + + +# const char* onig_version(void) + + バージョン文字列を返す。(例 "5.0.3") + +// END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.API b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.API new file mode 100644 index 000000000..057a054d8 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.API @@ -0,0 +1,385 @@ +Callouts API Version 6.8.2 2018/06/08 + +#include <oniguruma.h> + +(1) Callout functions +(2) Set/Get functions for Callouts of contents +(3) Set functions for Callouts of name +(4) User data +(5) Get values from OnigCalloutArgs +(6) Tag +(7) Callout data (used in callout functions) +(8) Callout data (used in applications) +(9) Miscellaneous functions + + +(1) Callout functions + + type: OnigCalloutFunc + + typedef int (*OnigCalloutFunc)(OnigCalloutArgs* args, void* user_data); + + If 0 (NULL) is set as a callout function value, never called. + + + * Callout function return value (int) + + ONIG_CALLOUT_FAIL (== 1): fail + ONIG_CALLOUT_SUCCESS (== 0): success + less than -1: error code (terminate search/match) + + ONIG_CALLOUT_FAIL/SUCCESS values are ignored in retractions, + because retraction is a part of recovery process after failure. + + * Example of callout function + + extern int always_success(OnigCalloutArgs* args, void* user_data) + { + return ONIG_CALLOUT_SUCCESS; + } + + + +(2) Set/Get functions for Callouts of contents + +# OnigCalloutFunc onig_get_progress_callout(void) + + Get a function for callouts of contents in progress. + + +# int onig_set_progress_callout(OnigCalloutFunc f) + + Set a function for callouts of contents in progress. + This value set in onig_initialize_match_param() as a default + callout function. + + normal return: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_retraction_callout(void) + + Get a function for callouts of contents in retraction (backtrack). + + +# int onig_set_retraction_callout(OnigCalloutFunc f) + + Set a function for callouts of contents in retraction (backtrack). + This value set in onig_initialize_match_param() as a default + callout function. + + normal return: ONIG_NORMAL + + +# int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + Set a function for callouts of contents in progress. + + arguments + 1 mp: match-param pointer + 2 f: function + + normal return: ONIG_NORMAL + + +# int onig_set_retraction_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + Set a function for callouts of contents in retraction (backtrack). + + arguments + 1 mp: match-param pointer + 2 f: function + + normal return: ONIG_NORMAL + + + +(3) Set functions for Callouts of name + +# int onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int opt_arg_num, OnigValue opt_defaults[]) + + Set a function for callouts of name. + Allowed name string characters: _ A-Z a-z 0-9 (* first character: _ A-Z a-z) + + (enc, name) pair is used as key value to find callout function. + You have to call this function for every encoding used in your applications. + But if enc is ASCII compatible and (enc, name) entry is not found, + then (ASCII, name) entry is used. + Therefore, if you use ASCII compatible encodings only, it is enough to call + this function one time for (ASCII, name). + + arguments + 1 enc: character encoding + 2 type: callout type (currently ONIG_CALLOUT_TYPE_SINGLE only supported) + 3 name: name string address (the string is encoded by enc) + 4 name_end: name string end address + 5 callout_in: direction (ONIG_CALLOUT_IN_PROGRESS/RETRACTION/BOTH) + 6 callout: callout function + 7 end_callout: * not used currently (set 0) + 8 arg_num: number of arguments (*limit by ONIG_CALLOUT_MAX_ARGS_NUM == 4) + 9 arg_types: type array of arguments + 10 opt_arg_num: number of optional arguments + 11 opt_defaults: default values array of optional arguments + + normal return: ONIG_NORMAL + error: + ONIGERR_INVALID_CALLOUT_NAME + ONIGERR_INVALID_ARGUMENT + ONIGERR_INVALID_CALLOUT_ARG + + + +(4) User data + +# int onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data) + + Set a user_data value which passed as second argument of callout. + + normal return: ONIG_NORMAL + + + +(5) Get values from OnigCalloutArgs + +# int onig_get_callout_num_by_callout_args(OnigCalloutArgs* args) + + Returns callout number of this callout. + "Callout number" is an identifier of callout in a regex pattern. + + +# OnigCalloutIn onig_get_callout_in_by_callout_args(OnigCalloutArgs* args) + + Returns the direction of this callout. + (ONIG_CALLOUT_IN_PROGRESS or ONIG_CALLOUT_IN_RETRACTION) + + +# int onig_get_name_id_by_callout_args(OnigCalloutArgs* args) + + Returns the name identifier of this callout. + If this callout is callout of contents, then returns ONIG_NON_NAME_ID. + + +# const OnigUChar* onig_get_contents_by_callout_args(OnigCalloutArgs* args) + + Returns the contents string of this callout. (NULL terminated string) + If this callout is callout of name, then returns NULL. + + +# const OnigUChar* onig_get_contents_end_by_callout_args(OnigCalloutArgs* args) + + Returns the end of contents string of this callout. + If this callout is callout of name, then returns NULL. + + +# int onig_get_args_num_by_callout_args(OnigCalloutArgs* args) + + Returns the number of args of this callout. + It includes optional arguments that doesn't passed in regex pattern. + If this callout is callout of contents, then returns + ONIGERR_INVALID_ARGUMENT. + + +# int onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args) + + Returns the number of args that passed really in regex pattern. + If this callout is callout of contents, then returns + ONIGERR_INVALID_ARGUMENT. + + +# int onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index, OnigType* type, OnigValue* val) + + Returns a value and a type of the callout argument. + If this callout is callout of contents, then returns + ONIGERR_INVALID_ARGUMENT. + + normal return: ONIG_NORMAL + + +# const OnigUChar* onig_get_string_by_callout_args(OnigCalloutArgs* args) + + Returns the subject string address. + This is the second argument(str) of onig_search(). + + +# const OnigUChar* onig_get_string_end_by_callout_args(OnigCalloutArgs* args) + + Returns the end address of subject string. + This is the third argument(end) of onig_search(). + + +# const OnigUChar* onig_get_start_by_callout_args(OnigCalloutArgs* args) + + Returns the start address of subject string in current match process. + + +# const OnigUChar* onig_get_right_range_by_callout_args(OnigCalloutArgs* args) + + Returns the right range address of subject string. + + +# const OnigUChar* onig_get_current_by_callout_args(OnigCalloutArgs* args) + + Returns the current address of subject string in current match process. + + +# OnigRegex onig_get_regex_by_callout_args(OnigCalloutArgs* args) + + Returns the regex object address of this callout. + + +# unsigned long onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args) + + Returns the current counter value for retry-limit-in-match. + + + +(6) Tag + + "Tag" is a name assigned to a callout in regexp pattern. + Allowed tag string characters: _ A-Z a-z 0-9 (* first character: _ A-Z a-z) + + +# int onig_callout_tag_is_exist_at_callout_num(OnigRegex reg, int callout_num) + + Returns 1 if tag is assigned for the callout, else returns 0. + + +# int onig_get_callout_num_by_tag(OnigRegex reg, const OnigUChar* tag, const OnigUChar* tag_end) + + Returns the callout number for the tag. + + +# const OnigUChar* onig_get_callout_tag_start(OnigRegex reg, int callout_num) + + Returns the start address of tag string for the callout. + (NULL terminated string) + + +# const OnigUChar* onig_get_callout_tag_end(OnigRegex reg, int callout_num) + + Returns the end address of tag string for the callout. + + + +(7) Callout data (used in callout functions) + + "Callout data" is ONIG_CALLOUT_DATA_SLOT_NUM(5) values area + for each callout in each search process. + Each value area in a callout is indicated by "slot" number (0 - 4). + Callout data are used for any purpose by callout function implementers. + + +# int onig_get_callout_data_by_callout_args(OnigCalloutArgs* args, int callout_num, int slot, OnigType* type, OnigValue* val) + + Returns the callout data value/type for a callout slot indicated by + callout_num/slot. + + normal return: ONIG_NORMAL + 1: not yet set (type is ONIG_TYPE_VOID) + < 0: error code + + +# int onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val) + + Returns self callout data value/type. + + normal return: ONIG_NORMAL + 1: not yet set (type is ONIG_TYPE_VOID) + < 0: error code + + +# int onig_set_callout_data_by_callout_args(OnigCalloutArgs* args, int callout_num, int slot, OnigType type, OnigValue* val) + + Set the callout data value/type for a callout slot indicated by callout_num/slot. + + normal return: ONIG_NORMAL + < 0: error code + + +# int onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args, int slot, OnigType type, OnigValue* val) + + Set self callout data value/type for a callout slot indicated by slot. + + normal return: ONIG_NORMAL + < 0: error code + + +# int onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val) + + This function is almost same as onig_get_callout_data_by_callout_args_self(). + But this function doesn't clear values which set in previous failed match process. + Other onig_get_callout_data_xxxx() functions clear all values which set + in previous failed match process. + + For example, Builtin callout (*TOTAL_COUNT) is implemented by using this + function for accumulate count of all of match processes in a search process. + Builtin callout (*COUNT) returns count in last success match process only, + because it doesn't use this function. + + +(8) Callout data (used in apllications) + +# int onig_get_callout_data(OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val) + + Returns the callout data value/type for a callout slot indicated by + callout_num/slot. + + normal return: ONIG_NORMAL + 1: not yet set (type is ONIG_TYPE_VOID) + < 0: error code + + +# int onig_get_callout_data_by_tag(OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType* type, OnigValue* val) + + Returns the callout data value/type for a callout slot indicated by tag/slot. + + normal return: ONIG_NORMAL + 1: not yet set (type is ONIG_TYPE_VOID) + < 0: error code + + +# int onig_set_callout_data(OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType type, OnigValue* val) + + Set the callout data value/type for a callout slot indicated by callout_num/slot. + + normal return: ONIG_NORMAL + < 0: error code + + +# int onig_set_callout_data_by_tag(OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType type, OnigValue* val) + + Set the callout data value/type for a callout slot indicated by tag/slot. + + normal return: ONIG_NORMAL + < 0: error code + + +# int onig_get_callout_data_dont_clear_old(OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val) + + No needs to use this function. + It will be abolished. + + + +(9) Miscellaneous functions + +# OnigUChar* onig_get_callout_name_by_name_id(int name_id) + + Returns callout name of the name id. + if invalid name id is passed, return 0. + + +# int onig_get_capture_range_in_callout(OnigCalloutArgs* args, int mem_num, int* begin, int* end) + + Returns current capture range position. + Position is byte length offset from subject string. + For uncaptured mem_num, ONIG_REGION_NOTPOS is set. + + +# int onig_get_used_stack_size_in_callout(OnigCalloutArgs* args, int* used_num, int* used_bytes) + + Returns current used match-stack size. + + used_num: number of match-stack elements + used_bytes: used byte size of match-stack + +//END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.API.ja b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.API.ja new file mode 100644 index 000000000..c56555ad1 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.API.ja @@ -0,0 +1,382 @@ +Callouts API Version 6.8.2 2018/06/08 + +#include <oniguruma.h> + +(1) 呼び出し関数 +(2) 内容の呼び出し関数の設定/取得 +(3) 名前の呼び出し関数の設定 +(4) ユーザデータ +(5) OnigCalloutArgsからの値の取得 +(6) 名札 +(7) 呼び出しデータ (呼び出し関数内から使用される) +(8) 呼び出しデータ (アプリケーションから使用される) +(9) その他の関数 + + +(1) 呼び出し関数 + + 型: OnigCalloutFunc + + typedef int (*OnigCalloutFunc)(OnigCalloutArgs* args, void* user_data); + + 若し呼び出し関数として0(NULL)がセットされると、呼ばれることはない + + + * 呼び出し関数の戻り値 (int) + + ONIG_CALLOUT_FAIL (== 1): 失敗 + ONIG_CALLOUT_SUCCESS (== 0): 成功 + -1未満: エラーコード (検索/照合の終了) + + ONIG_CALLOUT_FAIL/SUCCESSは、後退中の呼び出しでは無視される。 + 後退は失敗の回復過程なので。 + + * 呼び出し関数の例 + + extern int always_success(OnigCalloutArgs* args, void* user_data) + { + return ONIG_CALLOUT_SUCCESS; + } + + + +(2) 内容の呼び出し関数の設定/取得 + +# OnigCalloutFunc onig_get_progress_callout(void) + + 内容の呼び出し関数(前進中)を返す + + +# int onig_set_progress_callout(OnigCalloutFunc f) + + 内容の呼び出し関数(前進中)をセットする。 + この値はonig_initialize_match_param()の中でデフォルトの呼び出し関数として + セットされる。 + + 正常終了: ONIG_NORMAL + + +# OnigCalloutFunc onig_get_retraction_callout(void) + + 内容の呼び出し関数(後退中)を返す + + +# int onig_set_retraction_callout(OnigCalloutFunc f) + + 内容の呼び出し関数(後退中)をセットする。 + この値はonig_initialize_match_param()の中でデフォルトの呼び出し関数として + セットされる。 + + 正常終了: ONIG_NORMAL + + +# int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + 内容の呼び出し関数(前進中)をセットする。 + + 引数 + 1 mp: match-paramアドレス + 2 f: 関数 + + 正常終了: ONIG_NORMAL + + +# int onig_set_retraction_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) + + 内容の呼び出し関数(後退中)をセットする。 + + 引数 + 1 mp: match-paramアドレス + 2 f: 関数 + + 正常終了: ONIG_NORMAL + + + +(3) 名前の呼び出し関数の設定 + +# int onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int opt_arg_num, OnigValue opt_defaults[]) + + 名前の呼び出し関数をセットする。 + 名前に許される文字: _ A-Z a-z 0-9 (* 最初の文字: _ A-Z a-z) + + (enc, name)のペアが、呼び出し関数を見つけるためのキーとして使用される。 + アプリケーションで使用される各エンコーディングに対してこの関数を呼ぶ必要がある。 + しかし若しencエンコーディングがASCII互換であり、(enc, name)に対するエントリが + 見つからない場合には、(ASCII, name)エントリが参照される。 + 従って、若しASCII互換エンコーディングのみ使用している場合には、この関数を(ASCII, name) + について一回呼べば十分である。 + + 引数 + 1 enc: 文字エンコーディング + 2 type: 呼び出し型 (現在は ONIG_CALLOUT_TYPE_SINGLE のみサポート) + 3 name: 名前のアドレス (encでエンコーディングされている文字列) + 4 name_end: 名前の終端アドレス + 5 callout_in: 方向フラグ (ONIG_CALLOUT_IN_PROGRESS/RETRACTION/BOTH) + 6 callout: 呼び出し関数 + 7 end_callout: *まだ使用していない (0をセット) + 8 arg_num: 引数の数 (* 最大値 ONIG_CALLOUT_MAX_ARGS_NUM == 4) + 9 arg_types: 引数の型の配列 + 10 opt_arg_num: オプション引数の数 + 11 opt_defaults: オプション引数のデフォルト値 + + 正常終了: ONIG_NORMAL + error: + ONIGERR_INVALID_CALLOUT_NAME + ONIGERR_INVALID_ARGUMENT + ONIGERR_INVALID_CALLOUT_ARG + + + +(4) ユーザデータ + +# int onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data) + + 呼び出し関数の引数として渡されるユーザデータをセットする。 + + 正常終了: ONIG_NORMAL + + + +(5) OnigCalloutArgsからの値の取得 + +# int onig_get_callout_num_by_callout_args(OnigCalloutArgs* args) + + この呼び出しの呼び出し番号を返す。 + "呼び出し番号"とは、正規表現パターンの中の呼び出しに対する識別子である。 + + +# OnigCalloutIn onig_get_callout_in_by_callout_args(OnigCalloutArgs* args) + + この呼び出しが起きた時の方向(前進中/後退中)を返す。 + (ONIG_CALLOUT_IN_PROGRESS か ONIG_CALLOUT_IN_RETRACTION) + + +# int onig_get_name_id_by_callout_args(OnigCalloutArgs* args) + + この呼び出しの名前(name)の識別子を返す。 + 若しこの呼び出しが内容の呼び出しのときには、ONIG_NON_NAME_IDが返される。 + + +# const OnigUChar* onig_get_contents_by_callout_args(OnigCalloutArgs* args) + + この呼び出しの内容文字列(NULL終端あり)を返す。 + 若しこの呼び出しが名前の呼び出しのときには、NULLを返す。 + + +# const OnigUChar* onig_get_contents_end_by_callout_args(OnigCalloutArgs* args) + + この呼び出しの内容(contents)の終端を返す。 + 若しこの呼び出しが名前の呼び出しのときには、NULLを返す。 + + +# int onig_get_args_num_by_callout_args(OnigCalloutArgs* args) + + この呼び出しの引数の数を返す。 + 正規表現パターンの中で渡されなかったオプション引数も含む。 + 若しこの呼び出しが内容の呼び出しのときには、ONIGERR_INVALID_ARGUMENTが返される。 + + +# int onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args) + + この呼び出しの本当に渡された引数の数を返す。 + 若しこの呼び出しが内容の呼び出しのときには、ONIGERR_INVALID_ARGUMENTが返される。 + + +# int onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index, OnigType* type, OnigValue* val) + + この呼び出しの一個の引数の値と型を返す。 + 若しこの呼び出しが内容の呼び出しのときには、ONIGERR_INVALID_ARGUMENTが返される。 + + 正常終了: ONIG_NORMAL + + +# const OnigUChar* onig_get_string_by_callout_args(OnigCalloutArgs* args) + + 対象文字列のアドレスを返す。 + onig_search()の二番目の引数(str)である。 + + +# const OnigUChar* onig_get_string_end_by_callout_args(OnigCalloutArgs* args) + + 対象文字列の終端アドレスを返す。 + onig_search()の三番目の引数(end)である。 + + +# const OnigUChar* onig_get_start_by_callout_args(OnigCalloutArgs* args) + + 対象文字列の現在の照合処理開始アドレスを返す。 + + +# const OnigUChar* onig_get_right_range_by_callout_args(OnigCalloutArgs* args) + + 対象文字列の現在の照合範囲アドレスを返す。 + + +# const OnigUChar* onig_get_current_by_callout_args(OnigCalloutArgs* args) + + 対象文字列の現在の照合位置アドレスを返す。 + + +# OnigRegex onig_get_regex_by_callout_args(OnigCalloutArgs* args) + + この呼び出しの正規表現オブジェクトのアドレスを返す。 + + +# unsigned long onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args) + + retry-limit-in-matchのためのリトライカウンタの現在値を返す。 + + + +(6) 名札 + + "Tag" (名札)とは、正規表現パターンの中で呼び出しに割り当てられた名前である。 + tag文字列に使用できる文字: _ A-Z a-z 0-9 (* 先頭の文字: _ A-Z a-z) + + +# int onig_callout_tag_is_exist_at_callout_num(OnigRegex reg, int callout_num) + + その呼び出しにtagが割り当てられていれば1を返す、そうでなければ0を返す。 + + +# const OnigUChar* onig_get_callout_tag_start(OnigRegex reg, int callout_num) + + その呼び出しに対するtag文字列(NULL終端あり)の先頭アドレスを返す。 + + +# const OnigUChar* onig_get_callout_tag_end(OnigRegex reg, int callout_num) + + その呼び出しに対するtag文字列の終端アドレスを返す。 + + +# int onig_get_callout_num_by_tag(OnigRegex reg, const OnigUChar* tag, const OnigUChar* tag_end) + + そのtagに対する呼び出し番号を返す。 + + + +(7) 呼び出しデータ (呼び出し関数内から使用される) + + "呼び出しデータ" (callout data)とは、 + それぞれの呼び出しに対してそれぞれの検索処理の中で割り当てられた、 + ONIG_CALLOUT_DATA_SLOT_NUM(== 5)個の値の領域である。 + 一個の呼び出しに対する各値の領域は、"スロット"(slot)番号(0 - 4)によって示される。 + 呼び出しデータは呼び出し関数の実装者によって任意の目的に使用される。 + + +# int onig_get_callout_data_by_callout_args(OnigCalloutArgs* args, int callout_num, int slot, OnigType* type, OnigValue* val) + + callout_num/slotによって示された呼び出しスロットに対するデータの値/型を返す。 + + 正常終了: ONIG_NORMAL + 1: 値が未セット (typeは ONIG_TYPE_VOID) + < 0: エラーコード + + +# int onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val) + + 自分自身の呼び出しのslotによって示されたスロットに対するデータの値/型を返す。 + + 正常終了: ONIG_NORMAL + 1: 値が未セット (typeは ONIG_TYPE_VOID) + < 0: エラーコード + + +# int onig_set_callout_data_by_callout_args(OnigCalloutArgs* args, int callout_num, int slot, OnigType type, OnigValue* val) + + callout_num/slotによって示された呼び出しスロットに対する値/型をセットする。。 + + 正常終了: ONIG_NORMAL + < 0: エラーコード + + +# int onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args, int slot, OnigType type, OnigValue* val) + + 自分自身の呼び出しのslotによって示されたスロットに対する値/型をセットする。。 + + 正常終了: ONIG_NORMAL + < 0: エラーコード + + +# int onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val) + + この関数は、onig_get_callout_data_by_callout_args_self()とほぼ同じである。 + しかしこの関数は、現在の照合処理以前の失敗した照合処理の中でセットされた値を + クリアしない。 + 他のonig_get_callout_data_xxxx()関数は、以前の失敗した照合処理の中でセットされた値を + クリアする。 + + 例えば、組み込み呼び出し(*TOTAL_COUNT)は、検索処理の中の全ての照合処理の積算カウントを + 得るためにこの関数を使用して実装されている。 + 組み込む呼び出し(*COUNT)は、この関数を使用しないので、最後の成功した照合処理だけの + カウントを返す。 + + + +(8) 呼び出しデータ (アプリケーションから使用される) + +# int onig_get_callout_data(OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val) + + callout_num/slotによって示された呼び出しスロットに対するデータの値/型を返す。 + + 正常終了: ONIG_NORMAL + 1: 値が未セット (typeは ONIG_TYPE_VOID) + < 0: エラーコード + + +# int onig_get_callout_data_by_tag(OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType* type, OnigValue* val) + + tag/slotによって示された呼び出しスロットに対するデータの値/型を返す。 + + 正常終了: ONIG_NORMAL + 1: 値が未セット (typeは ONIG_TYPE_VOID) + < 0: エラーコード + + +# int onig_set_callout_data(OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType type, OnigValue* val) + + callout_num/slotによって示された呼び出しスロットに対する値/型をセットする。。 + + 正常終了: ONIG_NORMAL + < 0: エラーコード + + +# int onig_set_callout_data_by_tag(OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType type, OnigValue* val) + + tag/slotによって示された呼び出しスロットに対する値/型をセットする。。 + + 正常終了: ONIG_NORMAL + < 0: エラーコード + + +# int onig_get_callout_data_dont_clear_old(OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val) + + この関数を使用する必要はないと思われる。 + 廃止予定。 + + + +(9) その他の関数 + +# OnigUChar* onig_get_callout_name_by_name_id(int name_id) + + 名前の識別子に対する名前を返す。 + 不正な識別子が渡された場合には0を返す。 + + +# int onig_get_capture_range_in_callout(OnigCalloutArgs* args, int mem_num, int* begin, int* end) + + 現在の捕獲範囲を返す。 + 位置は、対象文字列に対するバイト単位で表される。 + 未捕獲のmem_numに対しては、ONIG_REGION_NOTPOSがセットされる。 + + +# int onig_get_used_stack_size_in_callout(OnigCalloutArgs* args, int* used_num, int* used_bytes) + + 現在使用されている照合処理用スタックサイズを返す。 + + used_num: 要素数 + used_bytes: バイト数 + +//END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.BUILTIN b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.BUILTIN new file mode 100644 index 000000000..26840e74c --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.BUILTIN @@ -0,0 +1,95 @@ +CALLOUTS.BUILTIN 2018/03/26 + + +* FAIL (progress) + + (*FAIL) + + Always fail. + + +* MISMATCH (progress) + + (*MISMATCH) + + Terminates Match process. + Continues Search process. + + +* ERROR (progress) + + (*ERROR{n::LONG}) + + Terminates Search/Match process. + + Return value is the argument 'n'. (The value must be less than -1) + 'n' is an optional argument. (default value is ONIG_ABORT) + + +* MAX (progress/retraction) + + (*MAX{n::LONG/TAG, c::CHAR}) + + Restricts the maximum count of success(default), progress or retraction. + If 'n' type is tag, slot 0 value of the tag are used. + Depends on 'c' argument, the slot 0 value changes. + 'c' is an optional argument, default value is 'X'. + + (* success count = progress count - retraction count) + + + ex. "(?:(*COUNT[T]{X})a)*(?:(*MAX{T})c)*" + + [callout data] + slot 0: '>': progress count, '<': retraction count, 'X': success count (default) + + +* COUNT (progress/retraction) + + (*COUNT{c::CHAR}) + + Counter. + Depends on 'c' argument, the slot 0 value changes. + 'c' is an optional argument, default value is '>'. + + [callout data] + slot 0: '>': progress count (default), '<': retraction count, 'X': success count + slot 1: progress count + slot 2: retraction count + + ** If option ONIG_OPTION_FIND_LONGEST or ONIG_OPTION_FIND_NOT_EMPTY is used, + counts are not accurate. + + +* TOTAL_COUNT (progress/retraction) + + (*TOTAL_COUNT{c::CHAR}) + + It's the almost same as COUNT. + But the counts are integrated in a search process. + 'c' is an optional argument, default value is '>'. + + [callout data] + slot 0: '>': progress count (default), '<': retraction count, 'X': success count + slot 1: progress count + slot 2: retraction count + + ** If option ONIG_OPTION_FIND_LONGEST or ONIG_OPTION_FIND_NOT_EMPTY is used, + counts are not accurate. + + +* CMP (progress) + + (*CMP{x::TAG/LONG, op::STRING, y::TAG/LONG}) + + Compares x value and y value with op operator. + If x and y types are tag, slot 0 value of the tag are used. + + op: '==', '!=', '>', '<', '>=', '<=' + + ex. "(?:(*MAX[TA]{7})a|(*MAX[TB]{5})b)*(*CMP{TA,>=,4})" + + [callout data] + slot 0: op value (enum OP_CMP in src/regexec.c) + +//END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.BUILTIN.ja b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.BUILTIN.ja new file mode 100644 index 000000000..d371beb6e --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/CALLOUTS.BUILTIN.ja @@ -0,0 +1,93 @@ +CALLOUTS.BUILTIN.ja 2018/03/26 + + +* FAIL (前進) + + (*FAIL) + + 常に失敗する + + +* MISMATCH (前進) + + (*MISMATCH) + + 照合を中止する + 検索は継続する + + +* ERROR (前進) + + (*ERROR{n::LONG}) + + 検索/照合を中止する + 戻り値は引数'n'の値。(-1より小さい負の値でなければならない) + 'n'はオプション引数で、デフォルト値はONIG_ABORT + + +* MAX (前進/後退) + + (*MAX{n::LONG/TAG, c::CHAR}) + + 成功(デフォルト)、前進または後退回数を制限する + 'n'がTAGのときは、そのTAGのcalloutのslot 0の値が使用される + 'c'引数の値によって、slot 0の値が変化する + 'c'はオプション引数で、デフォルト値は'X' + + 例: "(?:(*COUNT[T]{X})a)*(?:(*MAX{T})c)*" + + [callout data] + slot 0: '>': 前進回数, '<': 後退回数, 'X': 成功回数(デフォルト) + + +* COUNT (前進/後退) + + (*COUNT{c::CHAR}) + + カウンタ + 'c'引数の値によって、slot 0の値が変化する + 'c'はオプション引数で、デフォルト値は'>' + + [callout data] + slot 0: '>': 前進回数(デフォルト), '<': 後退回数, 'X': 成功回数 + slot 1: 前進回数 + slot 2: 後退回数 + + (* 成功回数 = 前進回数 - 後退回数) + + ** ONIG_OPTION_FIND_LONGEST または ONIG_OPTION_FIND_NOT_EMPTY が使用されると + 正確な動作ができなくなる + + +* TOTAL_COUNT (前進/後退) + + (*TOTAL_COUNT{c::CHAR}) + + これはCOUNTとほとんど同じ + しかし、カウントが検索過程で積算される + 'c'はオプション引数で、デフォルト値は'>' + + [callout data] + slot 0: '>': 前進回数(デフォルト), '<': 後退回数, 'X': 成功回数 + slot 1: 前進回数 + slot 2: 後退回数 + + ** ONIG_OPTION_FIND_LONGEST または ONIG_OPTION_FIND_NOT_EMPTY が使用されると + 正確な動作ができなくなる + + +* CMP (前進) + + (*CMP{x::TAG/LONG, op::STRING, y::TAG/LONG}) + + xの値とyの値をop演算子で比較する + x, yがTAGのときにはそのcalloutのslot 0の値が参照される + + op: '==', '!=', '>', '<', '>=', '<=' + + 例: "(?:(*MAX[TA]{7})a|(*MAX[TB]{5})b)*(*CMP{TA,>=,4})" + + [callout data] + slot 0: op値 (src/regexec.c の中の enum OP_CMP) + +//END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/FAQ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/FAQ new file mode 100644 index 000000000..dfbd7fac7 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/FAQ @@ -0,0 +1,12 @@ +FAQ 2006/11/14 + +1. Longest match + + You can execute the longest match by using ONIG_OPTION_FIND_LONGEST option + in onig_new(). + +2. Mailing list + + There is no mailing list for Oniguruma. + +// END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/FAQ.ja b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/FAQ.ja new file mode 100644 index 000000000..ffb25f4a6 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/FAQ.ja @@ -0,0 +1,22 @@ +FAQ 2016/04/06 + +1. 最長マッチ + + onig_new()の中で、ONIG_OPTION_FIND_LONGESTオプション + を使用すれば最長マッチになる。 + + +2. CR + LF + + DOSの改行(CR(0x0c) + LF(0x0a)の連続) + + regenc.hの中の、以下の部分を有効にする。 + + /* #define USE_CRNL_AS_LINE_TERMINATOR */ + + +3. メーリングリスト + + 鬼車に関するメーリングリストは存在しない。 + +//END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/RE b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/RE new file mode 100644 index 000000000..4561698a7 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/RE @@ -0,0 +1,578 @@ +Oniguruma Regular Expressions Version 6.9.5 2020/01/28 + +syntax: ONIG_SYNTAX_ONIGURUMA (default) + + +1. Syntax elements + + \ escape (enable or disable meta character) + | alternation + (...) group + [...] character class + + +2. Characters + + \t horizontal tab (0x09) + \v vertical tab (0x0B) + \n newline (line feed) (0x0A) + \r carriage return (0x0D) + \b backspace (0x08) + \f form feed (0x0C) + \a bell (0x07) + \e escape (0x1B) + \nnn octal char (encoded byte value) + \o{17777777777} wide octal char (character code point value) + \uHHHH wide hexadecimal char (character code point value) + \xHH hexadecimal char (encoded byte value) + \x{7HHHHHHH} wide hexadecimal char (character code point value) + \cx control char (character code point value) + \C-x control char (character code point value) + \M-x meta (x|0x80) (character code point value) + \M-\C-x meta control char (character code point value) + + (* \b as backspace is effective in character class only) + + +3. Character types + + . any character (except newline) + + \w word character + + Not Unicode: + alphanumeric, "_" and multibyte char. + + Unicode: + General_Category -- (Letter|Mark|Number|Connector_Punctuation) + + \W non-word char + + \s whitespace char + + Not Unicode: + \t, \n, \v, \f, \r, \x20 + + Unicode case: + U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL), + General_Category -- Line_Separator + -- Paragraph_Separator + -- Space_Separator + + \S non-whitespace char + + \d decimal digit char + + Unicode: General_Category -- Decimal_Number + + \D non-decimal-digit char + + \h hexadecimal digit char [0-9a-fA-F] + + \H non-hexdigit char + + \R general newline (* can't be used in character-class) + "\r\n" or \n,\v,\f,\r (* but doesn't backtrack from \r\n to \r) + + Unicode case: + "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029 + + \N negative newline (?-m:.) + + \O true anychar (?m:.) (* original function) + + \X Text Segment \X === (?>\O(?:\Y\O)*) + + The meaning of this operator changes depending on the setting of + the option (?y{..}). + + \X doesn't check whether matching start position is boundary or not. + Please write as \y\X if you want to ensure it. + + [Extended Grapheme Cluster mode] (default) + Unicode case: + See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + Not Unicode case: \X === (?>\r\n|\O) + + [Word mode] + Currently, this mode is supported in Unicode only. + See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + + Character Property + + * \p{property-name} + * \p{^property-name} (negative) + * \P{property-name} (negative) + + property-name: + + + works on all encodings + Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, + Print, Punct, Space, Upper, XDigit, Word, ASCII + + + works on EUC_JP, Shift_JIS + Hiragana, Katakana + + + works on UTF8, UTF16, UTF32 + See doc/UNICODE_PROPERTIES. + + + +4. Quantifier + + greedy + + ? 1 or 0 times + * 0 or more times + + 1 or more times + {n,m} (n <= m) at least n but no more than m times + {n,} at least n times + {,n} at least 0 but no more than n times ({0,n}) + {n} n times + + + reluctant + + ?? 0 or 1 times + *? 0 or more times + +? 1 or more times + {n,m}? (n <= m) at least n but not more than m times + {n,}? at least n times + {,n}? at least 0 but not more than n times (== {0,n}?) + + {n}? is reluctant operator in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL only. + (In that case, it doesn't make sense to write so.) + In default syntax, /a{n}?/ === /(?:a{n})?/ + + + possessive (greedy and does not backtrack once match) + + ?+ 1 or 0 times + *+ 0 or more times + ++ 1 or more times + {n,m} (n > m) at least m but not more than n times + + {n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and + ONIG_SYNTAX_PERL only. + + ex. /a*+/ === /(?>a*)/ + + +5. Anchors + + ^ beginning of the line + $ end of the line + \b word boundary + \B non-word boundary + + \A beginning of string + \Z end of string, or before newline at the end + \z end of string + \G where the current search attempt begins + \K keep (keep start position of the result string) + + + \y Text Segment boundary + \Y Text Segment non-boundary + + The meaning of these operators(\y, \Y) changes depending on the setting + of the option (?y{..}). + + [Extended Grapheme Cluster mode] (default) + Unicode case: + See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + Not Unicode: + All positions except between \r and \n. + + [Word mode] + Currently, this mode is supported in Unicode only. + See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + + +6. Character class + + ^... negative class (lowest precedence) + x-y range from x to y + [...] set (character class in character class) + ..&&.. intersection (low precedence, only higher than ^) + + ex. [a-w&&[^c-g]z] ==> ([a-w] AND ([^c-g] OR z)) ==> [abh-w] + + * If you want to use '[', '-', or ']' as a normal character + in character class, you should escape them with '\'. + + + POSIX bracket ([:xxxxx:], negate [:^xxxxx:]) + + Not Unicode Case: + + alnum alphabet or digit char + alpha alphabet + ascii code value: [0 - 127] + blank \t, \x20 + cntrl + digit 0-9 + graph include all of multibyte encoded characters + lower + print include all of multibyte encoded characters + punct + space \t, \n, \v, \f, \r, \x20 + upper + xdigit 0-9, a-f, A-F + word alphanumeric, "_" and multibyte characters + + + Unicode Case: + + alnum Letter | Mark | Decimal_Number + alpha Letter | Mark + ascii 0000 - 007F + blank Space_Separator | 0009 + cntrl Control | Format | Unassigned | Private_Use | Surrogate + digit Decimal_Number + graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate + lower Lowercase_Letter + print [[:graph:]] | [[:space:]] + punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation | + Final_Punctuation | Initial_Punctuation | Other_Punctuation | + Open_Punctuation + space Space_Separator | Line_Separator | Paragraph_Separator | + U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085 + upper Uppercase_Letter + xdigit U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066 + (0-9, a-f, A-F) + word Letter | Mark | Decimal_Number | Connector_Punctuation + + + +7. Extended groups + + (?#...) comment + + (?imxWDSPy-imxWDSP:subexp) option on/off for subexp + + i: ignore case + m: multi-line (dot (.) also matches newline) + x: extended form + W: ASCII only word (\w, \p{Word}, [[:word:]]) + ASCII only word bound (\b) + D: ASCII only digit (\d, \p{Digit}, [[:digit:]]) + S: ASCII only space (\s, \p{Space}, [[:space:]]) + P: ASCII only POSIX properties (includes W,D,S) + (alnum, alpha, blank, cntrl, digit, graph, + lower, print, punct, space, upper, xdigit, word) + + y{?}: Text Segment mode + This option changes the meaning of \X, \y, \Y. + Currently, this option is supported in Unicode only. + + y{g}: Extended Grapheme Cluster mode (default) + y{w}: Word mode + See [Unicode Standard Annex #29] + + (?imxWDSPy-imxWDSP) isolated option + + * It makes a group to the next ')' or end of the pattern. + /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/ + + + (?:subexp) non-capturing group + (subexp) capturing group + + (?=subexp) look-ahead + (?!subexp) negative look-ahead + + (?<=subexp) look-behind + (?<!subexp) negative look-behind + + * Cannot use Absent stopper (?~|expr) and Range clear + (?~|) operators in look-behind and negative look-behind. + + * In look-behind and negative look-behind, support for + ignore-case option is limited. Only supports conversion + between single characters. (Does not support conversion + of multiple characters in Unicode) + + (?>subexp) atomic group + no backtracks in subexp. + + (?<name>subexp), (?'name'subexp) + define named group + (Each character of the name must be a word character.) + + Not only a name but a number is assigned like a capturing + group. + + Assigning the same name to two or more subexps is allowed. + + + <Callouts> + + * Callouts of contents + (?{...contents...}) callout in progress + (?{...contents...}D) D is a direction flag char + D = 'X': in progress and retraction + '<': in retraction only + '>': in progress only + (?{...contents...}[tag]) tag assigned + (?{...contents...}[tag]D) + + * Escape characters have no effects in contents. + * contents is not allowed to start with '{'. + + (?{{{...contents...}}}) n times continuations '}' in contents is allowed in + (n+1) times continuations {{{...}}}. + + Allowed tag string characters: _ A-Z a-z 0-9 (* first character: _ A-Z a-z) + + + * Callouts of name + (*name) + (*name{args...}) with args + (*name[tag]) tag assigned + (*name[tag]{args...}) + + Allowed name string characters: _ A-Z a-z 0-9 (* first character: _ A-Z a-z) + Allowed tag string characters: _ A-Z a-z 0-9 (* first character: _ A-Z a-z) + + + <Absent functions> + + (?~absent) Absent repeater (* proposed by Tanaka Akira) + This works like .* (more precisely \O*), but it is + limited by the range that does not include the string + match with <absent>. + This is a written abbreviation of (?~|(?:absent)|\O*). + \O* is used as a repeater. + + (?~|absent|exp) Absent expression (* original) + This works like "exp", but it is limited by the range + that does not include the string match with <absent>. + + ex. (?~|345|\d*) "12345678" ==> "12", "1", "" + + (?~|absent) Absent stopper (* original) + After passed this operator, string right range is limited + at the point that does not include the string match whth + <absent>. + + (?~|) Range clear + Clear the effects caused by Absent stoppers. + + * Nested Absent functions are not supported and the behavior + is undefined. + + + <if-then-else> + + (?(condition_exp)then_exp|else_exp) if-then-else + (?(condition_exp)then_exp) if-then + + condition_exp can be a backreference number/name or a normal + regular expression. + When condition_exp is a backreference number/name, both then_exp and + else_exp can be omitted. + Then it works as a backreference validity checker. + + [ Backreference validity checker ] (* original) + + (?(n)), (?(-n)), (?(+n)), (?(n+level)) ... + (?(<n>)), (?('-n')), (?(<+n>)) ... + (?(<name>)), (?('name')), (?(<name+level>)) ... + + + +8. Backreferences + + When we say "backreference a group," it actually means, "re-match the same + text matched by the subexp in that group." + + \n \k<n> \k'n' (n >= 1) backreference the nth group in the regexp + \k<-n> \k'-n' (n >= 1) backreference the nth group counting + backwards from the referring position + \k<+n> \k'+n' (n >= 1) backreference the nth group counting + forwards from the referring position + \k<name> \k'name' backreference a group with the specified name + + When backreferencing with a name that is assigned to more than one groups, + the last group with the name is checked first, if not matched then the + previous one with the name, and so on, until there is a match. + + * Backreference by number is forbidden if any named group is defined and + ONIG_OPTION_CAPTURE_GROUP is not set. + + + backreference with recursion level + + (n >= 1, level >= 0) + + \k<n+level> \k'n+level' + \k<n-level> \k'n-level' + + \k<name+level> \k'name+level' + \k<name-level> \k'name-level' + + Destine a group on the recursion level relative to the referring position. + + ex 1. + + /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b>))\z/.match("reee") + /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer") + + \k<b+0> refers to the (?<b>.) on the same recursion level with it. + + ex 2. + + r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED) + (?<element> \g<stag> \g<content>* \g<etag> ){0} + (?<stag> < \g<name> \s* > ){0} + (?<name> [a-zA-Z_:]+ ){0} + (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0} + (?<etag> </ \k<name+1> >){0} + \g<element> + __REGEXP__ + + p r.match("<foo>f<bar>bbb</bar>f</foo>").captures + + +9. Subexp calls ("Tanaka Akira special") (* original function) + + When we say "call a group," it actually means, "re-execute the subexp in + that group." + + \g<n> \g'n' (n >= 1) call the nth group + \g<0> \g'0' call zero (call the total regexp) + \g<-n> \g'-n' (n >= 1) call the nth group counting backwards from + the calling position + \g<+n> \g'+n' (n >= 1) call the nth group counting forwards from + the calling position + \g<name> \g'name' call the group with the specified name + + * Left-most recursive calls are not allowed. + + ex. (?<name>a|\g<name>b) => error + (?<name>a|b\g<name>c) => OK + + * Calls with a name that is assigned to more than one groups are not + allowed. + + * Call by number is forbidden if any named group is defined and + ONIG_OPTION_CAPTURE_GROUP is not set. + + * The option status of the called group is always effective. + + ex. /(?-i:\g<name>)(?i:(?<name>a)){0}/.match("A") + + +10. Captured group + + Behavior of an unnamed group (...) changes with the following conditions. + (But named group is not changed.) + + case 1. /.../ (named group is not used, no option) + + (...) is treated as a capturing group. + + case 2. /.../g (named group is not used, 'g' option) + + (...) is treated as a non-capturing group (?:...). + + case 3. /..(?<name>..)../ (named group is used, no option) + + (...) is treated as a non-capturing group. + numbered-backref/call is not allowed. + + case 4. /..(?<name>..)../G (named group is used, 'G' option) + + (...) is treated as a capturing group. + numbered-backref/call is allowed. + + where + g: ONIG_OPTION_DONT_CAPTURE_GROUP + G: ONIG_OPTION_CAPTURE_GROUP + + ('g' and 'G' options are argued in ruby-dev ML) + + + +----------------------------- +A-1. Syntax-dependent options + + + ONIG_SYNTAX_ONIGURUMA + (?m): dot (.) also matches newline + + + ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA + (?s): dot (.) also matches newline + (?m): ^ matches after newline, $ matches before newline + + +A-2. Original extensions + + + hexadecimal digit char type \h, \H + + true anychar \O + + text segment boundary \y, \Y + + backreference validity checker (?(...)) + + named group (?<name>...), (?'name'...) + + named backref \k<name> + + subexp call \g<name>, \g<group-num> + + absent expression (?~|...|...) + + absent stopper (?|...) + + +A-3. Missing features compared with perl 5.8.0 + + + \N{name} + + \l,\u,\L,\U,\C + + (??{code}) + + * \Q...\E + This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. + + +A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8 + + + add character property (\p{property}, \P{property}) + + add hexadecimal digit char type (\h, \H) + + add look-behind + (?<=fixed-width-pattern), (?<!fixed-width-pattern) + + add possessive quantifier. ?+, *+, ++ + + add operations in character class. [], && + ('[' must be escaped as an usual char in character class.) + + add named group and subexp call. + + octal or hexadecimal number sequence can be treated as + a multibyte code char in character class if multibyte encoding + is specified. + (ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1]) + + allow the range of single byte char and multibyte char in character + class. + ex. /[a-<<any EUC-JP character>>]/ in EUC-JP encoding. + + effect range of isolated option is to next ')'. + ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b). + + isolated option is not transparent to previous pattern. + ex. a(?i)* is a syntax error pattern. + + allowed unpaired left brace as a normal character. + ex. /{/, /({)/, /a{2,3/ etc... + + negative POSIX bracket [:^xxxx:] is supported. + + POSIX bracket [:ascii:] is added. + + repeat of look-ahead is not allowed. + ex. /(?=a)*/, /(?!b){5}/ + + Ignore case option is effective to escape sequence. + ex. /\x61/i =~ "A" + + In the range quantifier, the number of the minimum is optional. + /a{,n}/ == /a{0,n}/ + The omission of both minimum and maximum values is not allowed. + /a{,}/ + + /{n}?/ is not a reluctant quantifier. + /a{n}?/ == /(?:a{n})?/ + + invalid back reference is checked and raises error. + /\1/, /(a)\2/ + + Zero-width match in an infinite loop stops the repeat, + then changes of the capture group status are checked as stop condition. + /(?:()|())*\1\2/ =~ "" + /(?:\1a|())*/ =~ "a" + +// END diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/RE.ja b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/RE.ja new file mode 100644 index 000000000..12c7df40d --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/RE.ja @@ -0,0 +1,585 @@ +鬼車 正規表現 Version 6.9.5 2020/01/28 + +使用文法: ONIG_SYNTAX_ONIGURUMA (既定値) + + +1. 基本要素 + + \ 退避修飾 (エスケープ) 正規表現記号の有効/無効の制御 + | 選択子 + (...) 式集合 (グループ) + [...] 文字集合 (文字クラス) + + +2. 文字 + + \t 水平タブ (0x09) + \v 垂直タブ (0x0B) + \n 改行 (0x0A) + \r 復帰 (0x0D) + \b 後退空白 (0x08) + \f 改頁 (0x0C) + \a 鐘 (0x07) + \e 退避修飾 (0x1B) + \nnn 八進数表現 符号化バイト値 + \o{17777777777} 拡張八進数表現 コードポイント値 + \uHHHH 拡張十六進数表現 コードポイント値 + \xHH 十六進数表現 符号化バイト値 + \x{7HHHHHHH} 拡張十六進数表現 コードポイント値 + \cx 制御文字表現 コードポイント値 + \C-x 制御文字表現 コードポイント値 + \M-x 超 (x|0x80) コードポイント値 + \M-\C-x 超 + 制御文字表現 コードポイント値 + + ※ \bは、文字集合内でのみ有効 + + +3. 文字種 + + . 任意文字 (改行を除く: オプションに依存) + + \w 単語構成文字 + + Unicode以外の場合: + 英数字, "_" および 多バイト文字。 + + Unicodeの場合: + General_Category -- (Letter|Mark|Number|Connector_Punctuation) + + \W 非単語構成文字 + + \s 空白文字 + + Unicode以外の場合: + \t, \n, \v, \f, \r, \x20 + + Unicodeの場合: + U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL), + General_Category -- Line_Separator + -- Paragraph_Separator + -- Space_Separator + + \S 非空白文字 + + \d 10進数字 + + Unicodeの場合: General_Category -- Decimal_Number + + \D 非10進数字 + + \h 16進数字 [0-9a-fA-F] + + \H 非16進数字 + + \R 汎改行 (* 文字集合の中では使用できない) + "\r\n" or \n,\v,\f,\r (* 但し \r\nから\rにはバックトラックしない) + + Unicodeの場合: + "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029 + + \N 非改行文字 (?-m:.) + + \O 真任意文字 (?m:.) (* 原作) + + \X 文章区分 \X === (?>\O(?:\Y\O)*) + + この演算子の意味は、オプション (?y{..})の設定によって変化する。 + + \Xは照合の開始位置が区分の境界かどうかを確認しない。 + それを確実にしたければ、\y\Xと書けば良い。 + + [拡張書記素房-状態のとき] (デフォルト) + Unicodeの場合: + 参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + Unicode以外の場合: \X === (?>\r\n|\O) + + [単語-状態のとき] + 現在、Unicodeしかサポートしていない。 + 参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + + Character Property + + * \p{property-name} + * \p{^property-name} (negative) + * \P{property-name} (negative) + + property-name: + + + 全てのエンコーディングで有効 + Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, + Print, Punct, Space, Upper, XDigit, Word, ASCII, + + + EUC-JP, Shift_JISで有効 + Hiragana, Katakana + + + UTF8, UTF16, UTF32で有効 + doc/UNICODE_PROPERTIES参照 + + + +4. 量指定子 + + 欲張り + + ? 一回または零回 + * 零回以上 + + 一回以上 + {n,m} (n <= m) n回以上 かつ m回以下 + {n,} n回以上 + {,n} 零回以上n回以下 ({0,n}) + {n} n回 + + + 無欲 + + ?? 零回または一回 + *? 零回以上 + +? 一回以上 + {n,m}? (n <= m) n回以上 かつ m回以下 + {n,}? n回以上 + {,n}? 零回以上n回以下 (== {0,n}?) + + {n}? はONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ無欲な指定子 + (その場合には、態々そう書く意味はないが) + デフォルトの文法では、/a{n}?/ === /(?:a{n})?/ + + + 強欲 (欲張りで、繰り返しに成功した後は回数を減らすような後退再試行をしない) + + ?+ 一回または零回 + *+ 零回以上 + ++ 一回以上 + {n,m} (n > m) m回以上 かつ n回以下 + + {n,m}+, {n,}+, {n}+ は、ONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ + 強欲な指定子 + + 例. /a*+/ === /(?>a*)/ + + +5. 錨 + + ^ 行頭 + $ 行末 + \b 単語境界 + \B 非単語境界 + + \A 文字列先頭 + \Z 文字列末尾、または文字列末尾の改行の直前 + \z 文字列末尾 + \G 探索開始位置 + \K 保持 (結果の開始位置をこの位置に保つ) + + + \y 文章区分 境界 + \Y 文章区分 非境界 + + この演算子の意味は、オプション (?y{..})の設定によって変化する。 + + [拡張書記素房-状態のとき] (デフォルト) + Unicodeの場合: + 参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + Unicode以外の場合: + \rと\nの間を除く全ての位置 + + [単語-状態のとき] + 現在、Unicodeしかサポートしていない。 + 参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + + +6. 文字集合 + + ^... 否定 (最低優先度演算子) + x-y 範囲 (xからyまで) + [...] 集合 (文字集合内文字集合) + ..&&.. 積演算 (^の次に優先度が低い演算子) + + 例. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w] + + ※ '[', '-', ']'を、文字集合内で通常文字の意味で使用したい場合には、 + これらの文字を'\'で退避修飾しなければならない。 + + + POSIXブラケット ([:xxxxx:], 否定 [:^xxxxx:]) + + Unicode以外の場合: + + alnum 英数字 + alpha 英字 + ascii 0 - 127 + blank \t, \x20 + cntrl + digit 0-9 + graph 多バイト文字全部を含む + lower + print 多バイト文字全部を含む + punct + space \t, \n, \v, \f, \r, \x20 + upper + xdigit 0-9, a-f, A-F + word 英数字, "_" および 多バイト文字 + + Unicodeの場合: + + alnum Letter | Mark | Decimal_Number + alpha Letter | Mark + ascii 0000 - 007F + blank Space_Separator | 0009 + cntrl Control | Format | Unassigned | Private_Use | Surrogate + digit Decimal_Number + graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate + lower Lowercase_Letter + print [[:graph:]] | [[:space:]] + punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation | + Final_Punctuation | Initial_Punctuation | Other_Punctuation | + Open_Punctuation + space Space_Separator | Line_Separator | Paragraph_Separator | + U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085 + upper Uppercase_Letter + xdigit U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066 + (0-9, a-f, A-F) + word Letter | Mark | Decimal_Number | Connector_Punctuation + + + +7. 拡張式集合 + + (?#...) 注釈 + + (?imxWDSPy-imxWDSP:式) 式オプション + + i: 大文字小文字照合 + m: 複数行 + x: 拡張形式 + W: wordがASCIIのみ (\w, \p{Word}, [[:word:]]) + word境界がASCIIのみ (\b) + D: digitがASCIIのみ (\d, \p{Digit}, [[:digit:]]) + S: spaceがASCIIのみ (\s, \p{Space}, [[:space:]]) + P: POSIXプロパティがASCIIのみ (W,D,Sを全て含んでいる) + (alnum, alpha, blank, cntrl, digit, graph, + lower, print, punct, space, upper, xdigit, word) + + y{?}: 文章区分状態 + このオプションは\X, \y, \Yの意味を変更する。 + 現在このオプションはUnicodeでしかサポートしていない + y{g}: 拡張書記素房-状態 (デフォルト) + y{w}: 単語-状態 + 参照 [Unicode Standard Annex #29] + + (?imxWDSPy-imxWDSP) 孤立オプション + + * これは次の')'またはパターンの終わりまでのグループを形成する + /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/ + + + (式) 捕獲式集合 + (?:式) 非捕獲式集合 + + (?=式) 先読み + (?!式) 否定先読み + + (?<=式) 戻り読み + (?<!式) 否定戻り読み + + * 戻り読み、否定戻り読みの式の中では、不在停止演算子 + (?~|expr)と範囲消去演算子(?~|)を使用することはできない + + * 戻り読み、否定戻り読みの中では、ignore-caseオプションの + 対応が制限される。一文字と一文字の間の変換しか対応しない。 + (Unicodeでの複数文字の変換に対応しない) + + (?>式) 原子的式集合 + 式全体を通過したとき、式の中での後退再試行を行なわない + + (?<name>式), (?'name'式) + 名前付き捕獲式集合 + 式集合に名前を割り当てる(定義する)。 + (名前は単語構成文字でなければならない。) + + 名前だけでなく、捕獲式集合と同様に番号も割り当てられる。 + 番号指定が禁止されていない状態 (10. 捕獲式集合 を参照) + のときは、名前を使わないで番号でも参照できる。 + + 複数の式集合に同じ名前を与えることは許されている。 + この場合には、この名前を使用した後方参照は可能であるが、 + 部分式呼出しはできない。 + + + <呼び出し> + + * 内容の呼び出し + (?{...contents...}) 前進中のみの呼び出し + (?{...contents...}D) Dは方向指定文字 + D = 'X': 前進中および後退中 + '<': 後退中のみ + '>': 前進中のみ + (?{...contents...}[tag]) 名札付き + (?{...contents...}[tag]D) + + * エスケープ文字はcontentsの中で何の機能も持たない + * contentsは、'{'文字で始まってはならない + + (?{{{...contents...}}}) contentsの中のn個連続の'}'は、(n+1)個連続の{{{...}}} + の中で許される + + tagに許される文字: _ A-Z a-z 0-9 (* 最初の文字: _ A-Z a-z) + + + * 名前の呼び出し + (*name) + (*name{args...}) 引数付き + (*name[tag]) 名札付き + (*name[tag]{args...}) + + nameに許される文字: _ A-Z a-z 0-9 (* 最初の文字: _ A-Z a-z) + tag に許される文字: _ A-Z a-z 0-9 (* 最初の文字: _ A-Z a-z) + + + + <不在機能群> + + (?~不在) 不在繰り返し (*原案 田中哲) + これは .*(より正確には\O*)のように動作するが、<不在>に + 適合する文字列を含まない範囲に制限される。 + これは(?~|(?:不在)|\O*)の省略表記である。 + + (?~|不在|式) 不在式 (* 原作) + これは<式>のように動作するが、<不在>に適合する文字列を + 含まない範囲に制限される。 + + 例 (?~|345|\d*) "12345678" ==> "12", "1", "" + + (?~|不在) 不在停止 (* 原作) + この演算子を通過した後は、対象文字列の適合範囲が + <不在>に適合する文字列を含まない範囲に制限される。 + + (?~|) 範囲消去 + 不在停止の効果を消して、それ以前の状態にする。 + + * 不在機能の入れ子には対応しておらず、その場合の挙動は不定とする。 + + + + <条件文> + + (?(条件式)成功式|失敗式) 条件式が成功すれば成功式、失敗すれば失敗式を実行する + この機能の存在理由は、成功式が失敗しても失敗式には + 行かないこと。これは他の正規表現で書くことができない。 + もうひとつは、条件式が後方参照の番号/名前のとき、 + 後方参照値の有効性を調べる(文字列と照合はしない) + 意味になる。 + + (?(条件式)成功式) 条件式が成功すれば成功式を実行する + (条件式が通常の式のときには、この構文は不必要だが + 今のところエラーにはしない。) + + + 条件式は後方参照の番号/名前または普通の式を使用できる。 + 条件式が後方参照の場合、成功式と失敗式の両方を省略可能であり、 + この場合、後方参照値有効性を調べる(成功/失敗)機能のみになる。 + + [後方参照値有効性確認器] (* 原作) + (?(n)), (?(-n)), (?(+n)), (?(n+level)) ... + (?(<n>)), (?('-n')), (?(<+n>)) ... + (?(<name>)), (?('name')), (?(<name+level>)) ... + + + +8. 後方参照 + + \n 番号指定参照 (n >= 1) + \k<n> 番号指定参照 (n >= 1) + \k'n' 番号指定参照 (n >= 1) + \k<-n> 相対番号指定参照 (n >= 1) + \k'-n' 相対番号指定参照 (n >= 1) + \k<+n> 相対番号指定参照 (n >= 1) + \k'+n' 相対番号指定参照 (n >= 1) + \k<name> 名前指定参照 + \k'name' 名前指定参照 + + 名前指定参照で、その名前が複数の式集合で多重定義されている場合には、 + 番号の大きい式集合から優先的に参照される。 + (マッチしないときには番号の小さい式集合が参照される) + + ※ 番号指定参照は、名前付き捕獲式集合が定義され、 + かつ ONIG_OPTION_CAPTURE_GROUPが指定されていない場合には、 + 禁止される。(10. 捕獲式集合 を参照) + + + ネストレベル付き後方参照 + + level: 0, 1, 2, ... + + \k<n+level> (n >= 1) + \k<n-level> (n >= 1) + \k'n+level' (n >= 1) + \k'n-level' (n >= 1) + + \k<name+level> + \k<name-level> + \k'name+level' + \k'name-level' + + 後方参照の位置から相対的な部分式呼出しネストレベルを指定して、そのレベルでの + 捕獲値を参照する。 + + 例-1. + + /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer") + + 例-2. + + r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED) + (?<element> \g<stag> \g<content>* \g<etag> ){0} + (?<stag> < \g<name> \s* > ){0} + (?<name> [a-zA-Z_:]+ ){0} + (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0} + (?<etag> </ \k<name+1> >){0} + \g<element> + __REGEXP__ + + p r.match('<foo>f<bar>bbb</bar>f</foo>').captures + + + +9. 部分式呼出し ("田中哲スペシャル") (* 原作) + + \g<name> 名前指定呼出し + \g'name' 名前指定呼出し + \g<n> 番号指定呼出し (n >= 1) + \g'n' 番号指定呼出し (n >= 1) + \g<0> 番号指定呼出し(全体呼び出し) + \g'0' 番号指定呼出し(全体呼び出し) + \g<-n> 相対番号指定呼出し (n >= 1) + \g'-n' 相対番号指定呼出し (n >= 1) + \g<+n> 相対番号指定呼出し (n >= 1) + \g'+n' 相対番号指定呼出し (n >= 1) + + ※ 最左位置での再帰呼出しは禁止される。 + 例. (?<name>a|\g<name>b) => error + (?<name>a|b\g<name>c) => OK + + ※ 番号指定呼出しは、名前付き捕獲式集合が定義され、 + かつ ONIG_OPTION_CAPTURE_GROUPが指定されていない場合には、 + 禁止される。 (10. 捕獲式集合 を参照) + + ※ 呼び出された式集合のオプション状態が呼出し側のオプション状態と異なっている + とき、呼び出された側のオプション状態が有効である。 + + 例. (?-i:\g<name>)(?i:(?<name>a)){0} は "A" に照合成功する。 + + +10. 捕獲式集合 + + 捕獲式集合(...)は、以下の条件に応じて振舞が変化する。 + (名前付き捕獲式集合は変化しない) + + case 1. /.../ (名前付き捕獲式集合は不使用、オプションなし) + + (...) は、捕獲式集合として扱われる。 + + case 2. /.../g (名前付き捕獲式集合は不使用、オプション 'g'を指定) + + (...) は、非捕獲式集合として扱われる。 + + case 3. /..(?<name>..)../ (名前付き捕獲式集合は使用、オプションなし) + + (...) は、非捕獲式集合として扱われる。 + 番号指定参照/呼び出しは不許可。 + + case 4. /..(?<name>..)../G (名前付き捕獲式集合は使用、オプション 'G'を指定) + + (...) は、捕獲式集合として扱われる。 + 番号指定参照/呼び出しは許可。 + + 但し + g: ONIG_OPTION_DONT_CAPTURE_GROUP + G: ONIG_OPTION_CAPTURE_GROUP + ('g'と'G'オプションは、ruby-dev MLで議論された。) + + これらの振舞の意味は、 + 名前付き捕獲と名前無し捕獲を同時に使用する必然性のある場面は少ないであろう + という理由から考えられたものである。 + + +----------------------------- +補記 1. 文法依存オプション + + + ONIG_SYNTAX_ONIGURUMA + (?m): 終止符記号(.)は改行と照合成功 + + + ONIG_SYNTAX_PERL と ONIG_SYNTAX_JAVA + (?s): 終止符記号(.)は改行と照合成功 + (?m): ^ は改行の直後に照合する、$ は改行の直前に照合する + + +補記 2. 独自拡張機能 + + + 16進数数字、非16進数字 \h, \H + + 真任意文字 \O + + 文章区分境界 \y, \Y + + 後方参照値有効性確認器 (?(...)) + + 名前付き捕獲式集合 (?<name>...), (?'name'...) + + 名前指定後方参照 \k<name> + + 部分式呼出し \g<name>, \g<group-num> + + 不在式 (?~|...|...) + + 不在停止 (?|...) + + +補記 3. Perl 5.8.0と比較して存在しない機能 + + + \N{name} + + \l,\u,\L,\U,\C + + (??{code}) + + * \Q...\E + 但しONIG_SYNTAX_PERLとONIG_SYNTAX_JAVAでは有効 + + +補記 4. Ruby 1.8 の日本語化 GNU regex(version 0.12)との違い + + + 文字Property機能追加 (\p{property}, \P{Property}) + + 16進数字タイプ追加 (\h, \H) + + 戻り読み機能を追加 + + 強欲な繰り返し指定子を追加 (?+, *+, ++) + + 文字集合の中の演算子を追加 ([...], &&) + ('[' は、文字集合の中で通常の文字として使用するときには + 退避修飾しなければならない) + + 名前付き捕獲式集合と、部分式呼出し機能追加 + + 多バイト文字コードが指定されているとき、 + 文字集合の中で八進数または十六進数表現の連続は、多バイト符号で表現された + 一個の文字と解釈される + (例. [\xa1\xa2], [\xa1\xa7-\xa4\xa1]) + + 文字集合の中で、一バイト文字と多バイト文字の範囲指定は許される。 + ex. /[a-あ]/ + + 孤立オプションの有効範囲は、その孤立オプションを含んでいる式集合の + 終わりまでである + 例. (?:(?i)a|b) は (?:(?i:a|b)) と解釈される、(?:(?i:a)|b)ではない + + 孤立オプションはその前の式に対して透過的ではない + 例. /a(?i)*/ は文法エラーとなる + + 不完全な繰り返し範囲指定子は通常の文字列として許可される + 例. /{/, /({)/, /a{2,3/ + + 否定的POSIXブラケット [:^xxxx:] を追加 + + POSIXブラケット [:ascii:] を追加 + + 先読みの繰り返しは不許可 + 例. /(?=a)*/, /(?!b){5}/ + + 数値で指定された文字に対しても、大文字小文字照合オプションは有効 + 例. /\x61/i =~ "A" + + 繰り返し回数指定で、最低回数の省略(0回)ができる + /a{,n}/ == /a{0,n}/ + 最低回数と最大回数の同時省略は許されない。(/a{,}/) + + /a{n}?/は無欲な演算子ではない。 + /a{n}?/ == /(?:a{n})?/ + + 無効な後方参照をチェックしてエラーにする。 + /\1/, /(a)\2/ + + 無限繰り返しの中で、長さ零での照合成功は繰り返しを中断させるが、 + このとき、中断すべきかどうかの判定として、捕獲式集合の捕獲状態の + 変化まで考慮している + /(?:()|())*\1\2/ =~ "" + /(?:\1a|())*/ =~ "a" + +終り diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/SYNTAX.md b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/SYNTAX.md new file mode 100644 index 000000000..c38e5c873 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/SYNTAX.md @@ -0,0 +1,1091 @@ + +# Oniguruma syntax (operator) configuration + +_Documented for Oniguruma 6.9.5 (2020/01/23)_ + + +---------- + + +## Overview + +This document details how to configure Oniguruma's syntax, by describing the desired +syntax operators and behaviors in an instance of the OnigSyntaxType struct, just like +the built-in Oniguruma syntaxes do. + +Configuration operators are bit flags, and are broken into multiple groups, somewhat arbitrarily, +because Oniguruma takes its configuration as a trio of 32-bit `unsigned int` values, assigned as +the first three fields in an `OnigSyntaxType` struct: + +```C +typedef struct { + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ + OnigMetaCharTableType meta_char_table; +} OnigSyntaxType; +``` + +The first group of configuration flags (`op`) roughly corresponds to the +configuration for "basic regex." The second group (`op2`) roughly corresponds +to the configuration for "advanced regex." And the third group (`behavior`) +describes more-or-less what to do for broken input, bad input, or other corner-case +regular expressions whose meaning is not well-defined. These three groups of +flags are described in full below, and tables of their usages for various syntaxes +follow. + +The `options` field describes the default compile options to use if the caller does +not specify any options when invoking `onig_new()`. + +The `meta_char_table` field is used exclusively by the ONIG_SYN_OP_VARIABLE_META_CHARACTERS +option, which allows the various regex metacharacters, like `*` and `?`, to be replaced +with alternates (for example, SQL typically uses `%` instead of `.*` and `_` instead of `?`). + + +---------- + + +## Group One Flags (op) + + +This group contains "basic regex" constructs, features common to most regex systems. + + +### 0. ONIG_SYN_OP_VARIABLE_META_CHARACTERS + +_Set in: none_ + +Enables support for `onig_set_meta_char()`, which allows you to provide alternate +characters that will be used instead of the six special characters that are normally +these characters below: + + - `ONIG_META_CHAR_ESCAPE`: `\` + - `ONIG_META_CHAR_ANYCHAR`: `.` + - `ONIG_META_CHAR_ANYTIME`: `*` + - `ONIG_META_CHAR_ZERO_OR_ONE_TIME`: `?` + - `ONIG_META_CHAR_ONE_OR_MORE_TIME`: `+` + - `ONIG_META_CHAR_ANYCHAR_ANYTIME`: Equivalent in normal regex to `.*`, but supported + explicitly so that Oniguruma can support matching SQL `%` wildcards or shell `*` wildcards. + +If this flag is set, then the values defined using `onig_set_meta_char()` will be used; +if this flag is clear, then the default regex characters will be used instead, and +data set by `onig_set_meta_char()` will be ignored. + + +### 1. ONIG_SYN_OP_DOT_ANYCHAR (enable `.`) + +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Java, Perl, Perl_NG, Ruby_ + +Enables support for the standard `.` metacharacter, meaning "any one character." You +usually want this flag on unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` +so that you can use a metacharacter other than `.` instead. + + +### 2. ONIG_SYN_OP_ASTERISK_ZERO_INF (enable `r*`) + +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the standard `r*` metacharacter, meaning "zero or more r's." +You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` +so that you can use a metacharacter other than `*` instead. + + +### 3. ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (enable `r\*`) + +_Set in: none_ + +Enables support for an escaped `r\*` metacharacter, meaning "zero or more r's." This is +useful if you have disabled support for the normal `r*` metacharacter because you want `*` +to simply match a literal `*` character, but you still want some way of activating "zero or more" +behavior. + + +### 4. ONIG_SYN_OP_PLUS_ONE_INF (enable `r+`) + +_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the standard `r+` metacharacter, meaning "one or more r's." +You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` +so that you can use a metacharacter other than `+` instead. + + +### 5. ONIG_SYN_OP_ESC_PLUS_ONE_INF (enable `r\+`) + +_Set in: Grep_ + +Enables support for an escaped `r\+` metacharacter, meaning "one or more r's." This is +useful if you have disabled support for the normal `r+` metacharacter because you want `+` +to simply match a literal `+` character, but you still want some way of activating "one or more" +behavior. + + +### 6. ONIG_SYN_OP_QMARK_ZERO_ONE (enable `r?`) + +_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the standard `r?` metacharacter, meaning "zero or one r" or "an optional r." +You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` +so that you can use a metacharacter other than `?` instead. + + +### 7. ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (enable `r\?`) + +_Set in: Grep_ + +Enables support for an escaped `r\?` metacharacter, meaning "zero or one r" or "an optional +r." This is useful if you have disabled support for the normal `r?` metacharacter because +you want `?` to simply match a literal `?` character, but you still want some way of activating +"optional" behavior. + + +### 8. ONIG_SYN_OP_BRACE_INTERVAL (enable `r{l,u}`) + +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the `r{lower,upper}` range form, common to more advanced +regex engines, which lets you specify precisely a minimum and maximum range on how many r's +must match (and not simply "zero or more"). + +This form also allows `r{count}` to specify a precise count of r's that must match. + +This form also allows `r{lower,}` to be equivalent to `r{lower,infinity}`. + +If and only if the `ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV` behavior flag is set, +this form also allows `r{,upper}` to be equivalent to `r{0,upper}`; otherwise, +`r{,upper}` will be treated as an error. + + +### 9. ONIG_SYN_OP_ESC_BRACE_INTERVAL (enable `\{` and `\}`) + +_Set in: PosixBasic, Emacs, Grep_ + +Enables support for an escaped `r\{lower,upper\}` range form. This is useful if you +have disabled support for the normal `r{...}` range form and want curly braces to simply +match literal curly brace characters, but you still want some way of activating +"range" behavior. + + +### 10. ONIG_SYN_OP_VBAR_ALT (enable `r|s`) + +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the common `r|s` alternation operator. You usually want this +flag set. + + +### 11. ONIG_SYN_OP_ESC_VBAR_ALT (enable `\|`) + +_Set in: Emacs, Grep_ + +Enables support for an escaped `r\|s` alternation form. This is useful if you +have disabled support for the normal `r|s` alternation form and want `|` to simply +match a literal `|` character, but you still want some way of activating "alternate" behavior. + + +### 12. ONIG_SYN_OP_LPAREN_SUBEXP (enable `(r)`) + +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the common `(...)` grouping-and-capturing operators. You usually +want this flag set. + + +### 13. ONIG_SYN_OP_ESC_LPAREN_SUBEXP (enable `\(` and `\)`) + +_Set in: PosixBasic, Emacs, Grep_ + +Enables support for escaped `\(...\)` grouping-and-capturing operators. This is useful if you +have disabled support for the normal `(...)` grouping-and-capturing operators and want +parentheses to simply match literal parenthesis characters, but you still want some way of +activating "grouping" or "capturing" behavior. + + +### 14. ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (enable `\A` and `\Z` and `\z`) + +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the anchors `\A` (start-of-string), `\Z` (end-of-string or +newline-at-end-of-string), and `\z` (end-of-string) escapes. + +(If the escape metacharacter has been changed from the default of `\`, this +option will recognize that metacharacter instead.) + + +### 15. ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (enable `\G`) + +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the special anchor `\G` (start-of-previous-match). + +(If the escape metacharacter has been changed from the default of `\`, this +option will recognize that metacharacter instead.) + +Note that `OnigRegex`/`regex_t` are not stateful objects, and do _not_ record +the location of the previous match. The `\G` flag uses the `start` parameter +explicitly passed to `onig_search()` (or `onig_search_with_param()` to determine +the "start of the previous match," so if the caller always passes the start of +the entire buffer as the function's `start` parameter, then `\G` will behave +exactly the same as `\A`. + + +### 16. ONIG_SYN_OP_DECIMAL_BACKREF (enable `\num`) + +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for subsequent matches to back references to prior capture groups `(...)` using +the common `\num` syntax (like `\3`). + +If this flag is clear, then a numeric escape like `\3` will either be treated as a literal `3`, +or, if `ONIG_SYN_OP_ESC_OCTAL3` is set, will be treated as an octal character code `\3`. + +You usually want this enabled, and it is enabled by default in every built-in syntax. + + +### 17. ONIG_SYN_OP_BRACKET_CC (enable `[...]`) + +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for recognizing character classes, like `[a-z]`. If this flag is not set, `[` +and `]` will be treated as ordinary literal characters instead of as metacharacters. + +You usually want this enabled, and it is enabled by default in every built-in syntax. + + +### 18. ONIG_SYN_OP_ESC_W_WORD (enable `\w` and `\W`) + +_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the common `\w` and `\W` shorthand forms. These match "word characters," +whose meaning varies depending on the encoding being used. + +In ASCII encoding, `\w` is equivalent to `[A-Za-z0-9_]`. + +In most other encodings, `\w` matches many more characters, including accented letters, Greek letters, +Cyrillic letters, Braille letters and numbers, Runic letters, Hebrew letters, Arabic letters and numerals, +Chinese Han ideographs, Japanese Katakana and Hiragana, Korean Hangul, and generally any symbol that +could qualify as a phonetic "letter" or counting "number" in any language. (Note that emoji are _not_ +considered "word characters.") + +`\W` always matches the opposite of whatever `\w` matches. + + +### 19. ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (enable `\<` and `\>`) + +_Set in: Grep, GnuRegex_ + +Enables support for the GNU-specific `\<` and `\>` word-boundary metacharacters. These work like +the `\b` word-boundary metacharacter, but only match at one end of the word or the other: `\<` +only matches at a transition from a non-word character to a word character (i.e., at the start +of a word), and `\>` only matches at a transition from a word character to a non-word character +(i.e., at the end of a word). + +Most regex syntaxes do _not_ support these metacharacters. + + +### 20. ONIG_SYN_OP_ESC_B_WORD_BOUND (enable `\b` and `\B`) + +_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the common `\b` and `\B` word-boundary metacharacters. The `\b` metacharacter +matches a zero-width position at a transition from word-characters to non-word-characters, or vice +versa. The `\B` metacharacter matches at all positions _not_ matched by `\b`. + +See details in `ONIG_SYN_OP_ESC_W_WORD` above for an explanation as to which characters +are considered "word characters." + + +### 21. ONIG_SYN_OP_ESC_S_WHITE_SPACE (enable `\s` and `\S`) + +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the common `\s` and `\S` whitespace-matching metacharacters. + +The `\s` metacharacter in ASCII encoding is exactly equivalent to the character class +`[\t\n\v\f\r ]`, or characters codes 9 through 13 (inclusive), and 32. + +The `\s` metacharacter in Unicode is exactly equivalent to the character class +`[\t\n\v\f\r \x85\xA0\x1680\x2000-\x200A\x2028-\x2029\x202F\x205F\x3000]` — that is, it matches +the same as ASCII, plus U+0085 (next line), U+00A0 (nonbreaking space), U+1680 (Ogham space mark), +U+2000 (en quad) through U+200A (hair space) (this range includes several widths of Unicode spaces), +U+2028 (line separator) through U+2029 (paragraph separator), +U+202F (narrow no-break space), U+205F (medium mathematical space), and U+3000 (CJK ideographic space). + +All non-Unicode encodings are handled by converting their code points to the appropriate +Unicode-equivalent code points, and then matching according to Unicode rules. + +`\S` always matches any one character that is _not_ in the set matched by `\s`. + + +### 22. ONIG_SYN_OP_ESC_D_DIGIT (enable `\d` and `\D`) + +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the common `\d` and `\D` digit-matching metacharacters. + +The `\d` metacharacter in ASCII encoding is exactly equivalent to the character class +`[0-9]`, or characters codes 48 through 57 (inclusive). + +The `\d` metacharacter in Unicode matches `[0-9]`, as well as digits in Arabic, Devanagari, +Bengali, Laotian, Mongolian, CJK fullwidth numerals, and many more. + +All non-Unicode encodings are handled by converting their code points to the appropriate +Unicode-equivalent code points, and then matching according to Unicode rules. + +`\D` always matches any one character that is _not_ in the set matched by `\d`. + + +### 23. ONIG_SYN_OP_LINE_ANCHOR (enable `^r` and `r$`) + +_Set in: Oniguruma, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the common `^` and `$` line-anchor metacharacters. + +In single-line mode, `^` matches the start of the input buffer, and `$` matches +the end of the input buffer. In multi-line mode, `^` matches if the preceding +character is `\n`; and `$` matches if the following character is `\n`. + +(Note that Oniguruma does not recognize other newline types: It only matches +`^` and `$` against `\n`: not `\r`, not `\r\n`, not the U+2028 line separator, +and not any other form.) + + +### 24. ONIG_SYN_OP_POSIX_BRACKET (enable POSIX `[:xxxx:]`) + +_Set in: Oniguruma, PosixBasic, PosixExtended, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ + +Enables support for the POSIX `[:xxxx:]` character classes, like `[:alpha:]` and `[:digit:]`. +The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `digit`, +`graph`, `lower`, `print`, `punct`, `space`, `upper`, `xdigit`, `ascii`, `word`. + + +### 25. ONIG_SYN_OP_QMARK_NON_GREEDY (enable `r??`, `r*?`, `r+?`, and `r{n,m}?`) + +_Set in: Oniguruma, Perl, Java, Perl_NG, Ruby_ + +Enables support for lazy (non-greedy) quantifiers: That is, if you append a `?` after +another quantifier such as `?`, `*`, `+`, or `{n,m}`, Oniguruma will try to match +as _little_ as possible instead of as _much_ as possible. + + +### 26. ONIG_SYN_OP_ESC_CONTROL_CHARS (enable `\n`, `\r`, `\t`, etc.) + +_Set in: Oniguruma, PosixBasic, PosixExtended, Java, Perl, Perl_NG, Ruby_ + +Enables support for C-style control-code escapes, like `\n` and `\r`. Specifically, +this recognizes `\a` (7), `\b` (8), `\t` (9), `\n` (10), `\f` (12), `\r` (13), and +`\e` (27). If ONIG_SYN_OP2_ESC_V_VTAB is enabled (see below), this also enables +support for recognizing `\v` as code point 11. + + +### 27. ONIG_SYN_OP_ESC_C_CONTROL (enable `\cx` control codes) + +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ + +Enables support for named control-code escapes, like `\cm` or `\cM` for code-point +13. In this shorthand form, control codes may be specified by `\c` (for "Control") +followed by an alphabetic letter, a-z or A-Z, indicating which code point to represent +(1 through 26). So `\cA` is code point 1, and `\cZ` is code point 26. + + +### 28. ONIG_SYN_OP_ESC_OCTAL3 (enable `\OOO` octal codes) + +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ + +Enables support for octal-style escapes of up to three digits, like `\1` for code +point 1, and `\177` for code point 127. Octal values greater than 255 will result +in an error message. + + +### 29. ONIG_SYN_OP_ESC_X_HEX2 (enable `\xHH` hex codes) + +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ + +Enables support for hexadecimal-style escapes of up to two digits, like `\x1` for code +point 1, and `\x7F` for code point 127. + + +### 30. ONIG_SYN_OP_ESC_X_BRACE_HEX8 (enable `\x{7HHHHHHH}` hex codes) + +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ + +Enables support for brace-wrapped hexadecimal-style escapes of up to eight digits, +like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534. + + +### 31. ONIG_SYN_OP_ESC_O_BRACE_OCTAL (enable `\o{1OOOOOOOOOO}` octal codes) + +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ + +Enables support for brace-wrapped octal-style escapes of up to eleven digits, +like `\o{1}` for code point 1, and `\o{177776}` for code point 65534. + +(New feature as of Oniguruma 6.3.) + + +---------- + + +## Group Two Flags (op2) + + +This group contains support for lesser-known regex syntax constructs. + + +### 0. ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (enable `\Q...\E`) + +_Set in: Java, Perl, Perl_NG_ + +Enables support for "quoted" parts of a pattern: Between `\Q` and `\E`, all +syntax parsing is turned off, so that metacharacters like `*` and `+` will no +longer be treated as metacharacters, and instead will be matched as literal +`*` and `+`, as if they had been escaped with `\*` and `\+`. + + +### 1. ONIG_SYN_OP2_QMARK_GROUP_EFFECT (enable `(?...)`) + +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ + +Enables support for the fairly-common `(?...)` grouping operator, which +controls precedence but which does _not_ capture its contents. + + +### 2. ONIG_SYN_OP2_OPTION_PERL (enable options `(?imsx)` and `(?-imsx)`) + +_Set in: Java, Perl, Perl_NG_ + +Enables support of regex options. (i,m,s,x) +The supported toggle-able options for this flag are: + + - `i` - Case-insensitivity + - `m` - Multi-line mode (`^` and `$` match at `\n` as well as start/end of buffer) + - `s` - Single-line mode (`.` can match `\n`) + - `x` - Extended pattern (free-formatting: whitespace will ignored) + + +### 3. ONIG_SYN_OP2_OPTION_RUBY (enable options `(?imx)` and `(?-imx)`) + +_Set in: Oniguruma, Ruby_ + +Enables support of regex options. (i,m,x) +The supported toggle-able options for this flag are: + + - `i` - Case-insensitivity + - `m` - Multi-line mode (`.` can match `\n`) + - `x` - Extended pattern (free-formatting: whitespace will ignored) + + +### 4. ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (enable `r?+`, `r*+`, and `r++`) + +_Set in: Oniguruma, Ruby_ + +Enables support for the _possessive_ quantifiers `?+`, `*+`, and `++`, which +work similarly to `?` and `*` and `+`, respectively, but which do not backtrack +after matching: Like the normal greedy quantifiers, they match as much as +possible, but they do not attempt to match _less_ than their maximum possible +extent if subsequent parts of the pattern fail to match. + + +### 5. ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (enable `r{n,m}+`) + +_Set in: Java_ + +Enables support for the _possessive_ quantifier `{n,m}+`, which +works similarly to `{n,m}`, but which does not backtrack +after matching: Like the normal greedy quantifier, it matches as much as +possible, but it do not attempt to match _less_ than its maximum possible +extent if subsequent parts of the pattern fail to match. + + +### 6. ONIG_SYN_OP2_CCLASS_SET_OP (enable `&&` within `[...]`) + +_Set in: Oniguruma, Java, Ruby_ + +Enables support for character-class _intersection_. For example, with this +feature enabled, you can write `[a-z&&[^aeiou]]` to produce a character class +of only consonants, or `[\0-\37&&[^\n\r]]` to produce a character class of +all control codes _except_ newlines. + + +### 7. ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (enable named captures `(?<name>...)`) + +_Set in: Oniguruma, Perl_NG, Ruby_ + +Enables support for _naming_ capture groups, so that instead of having to +refer to captures by position (like `\3` or `$3`), you can refer to them by names +(like `server` and `path`). This supports the Perl/Ruby naming syntaxes `(?<name>...)` +and `(?'name'...)`, but not the Python `(?P<name>...)` syntax. + + +### 8. ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (enable named backreferences `\k<name>`) + +_Set in: Oniguruma, Perl_NG, Ruby_ + +Enables support for substituted backreferences by name, not just by position. +This supports using `\k'name'` in addition to supporting `\k<name>`. This also +supports an Oniguruma-specific extension that lets you specify the _distance_ of +the match, if the capture matched multiple times, by writing `\k<name+n>` or +`\k<name-n>`. + + +### 9. ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (enable backreferences `\g<name>` and `\g<n>`) + +_Set in: Oniguruma, Perl_NG, Ruby_ + +Enables support for substituted backreferences by both name and position using +the same syntax. This supports using `\g'name'` and `\g'1'` in addition to +supporting `\g<name>` and `\g<1>`. + + +### 10. ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (enable `(?@...)` and `(?@<name>...)`) + +_Set in: none_ + +Enables support for _capture history_, which can answer via the `onig_*capture*()` +functions exactly which captures were matched, how many times, and where in the +input they were matched, by placing `?@` in front of the capture. Per Oniguruma's +regex syntax documentation (appendix A-5): + +`/(?@a)*/.match("aaa")` ==> `[<0-1>, <1-2>, <2-3>]` + +This can require substantial memory, is primarily useful for debugging, and is not +enabled by default in any syntax. + + +### 11. ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (enable `\C-x`) + +_Set in: Oniguruma, Ruby_ + +Enables support for Ruby legacy control-code escapes, like `\C-m` or `\C-M` for code-point +13. In this shorthand form, control codes may be specified by `\C-` (for "Control") +followed by a single character (or equivalent), indicating which code point to represent, +based on that character's lowest five bits. So, like `\c`, you can represent code-point +10 with `\C-j`, but you can also represent it with `\C-*` as well. + +See also ONIG_SYN_OP_ESC_C_CONTROL, which enables the more-common `\cx` syntax. + + +### 12. ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (enable `\M-x`) + +_Set in: Oniguruma, Ruby_ + +Enables support for Ruby legacy meta-code escapes. When you write `\M-x`, Oniguruma +will match an `x` whose 8th bit is set (i.e., the character code of `x` will be or'ed +with `0x80`). So, for example, you can match `\x81` using `\x81`, or you can write +`\M-\1`. This is mostly useful when working with legacy 8-bit character encodings. + + +### 13. ONIG_SYN_OP2_ESC_V_VTAB (enable `\v` as vertical tab) + +_Set in: Oniguruma, Java, Ruby_ + +Enables support for a C-style `\v` escape code, meaning "vertical tab." If enabled, +`\v` will be equivalent to ASCII code point 11. + + +### 14. ONIG_SYN_OP2_ESC_U_HEX4 (enable `\uHHHH` for Unicode) + +_Set in: Oniguruma, Java, Ruby_ + +Enables support for a Java-style `\uHHHH` escape code for representing Unicode +code-points by number, using up to four hexadecimal digits (up to `\uFFFF`). So, +for example, `\u221E` will match an infinity symbol, `∞`. + +For code points larger than four digits, like the emoji `🚡` (aerial tramway, or code +point U+1F6A1), you must either represent the character directly using an encoding like +UTF-8, or you must enable support for ONIG_SYN_OP_ESC_X_BRACE_HEX8 or +ONIG_SYN_OP_ESC_O_BRACE_OCTAL, which support more than four digits. + +(New feature as of Oniguruma 6.7.) + + +### 15. ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (enable ``\` `` and `\'` anchors) + +_Set in: Emacs_ + +This flag makes the ``\` `` and `\'` escapes function identically to +`\A` and `\z`, respectively (when ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR is enabled). + +These anchor forms are very obscure, and rarely supported by other regex libraries. + + +### 16. ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (enable `\p{...}` and `\P{...}`) + +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ + +Enables support for an alternate syntax for POSIX character classes; instead of +writing `[:alpha:]` when this is enabled, you can instead write `\p{alpha}`. + +See also ONIG_SYN_OP_POSIX_BRACKET for the classic POSIX form. + + +### 17. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (enable `\p{^...}` and `\P{^...}`) + +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ + +Enables support for an alternate syntax for POSIX character classes; instead of +writing `[:^alpha:]` when this is enabled, you can instead write `\p{^alpha}`. + +See also ONIG_SYN_OP_POSIX_BRACKET for the classic POSIX form. + + +### 18. ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS + +_(not presently used)_ + + +### 19. ONIG_SYN_OP2_ESC_H_XDIGIT (enable `\h` and `\H`) + +_Set in: Oniguruma, Ruby_ + +Enables support for the Ruby-specific shorthand `\h` and `\H` metacharacters. +Somewhat like `\d` matches decimal digits, `\h` matches hexadecimal digits — that is, +characters in `[0-9a-fA-F]`. + +`\H` matches the opposite of whatever `\h` matches. + + +### 20. ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (disable `\`) + +_Set in: As-is_ + +If set, this disables all escape codes, shorthands, and metacharacters that start +with `\` (or whatever the configured escape character is), allowing `\` to be treated +as a literal `\`. + +You usually do not want this flag to be enabled. + + +### 21. ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE (enable `(?(...)then|else)`) + +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ + +Enables support for conditional inclusion of subsequent regex patterns based on whether +a prior named or numbered capture matched, or based on whether a pattern will +match. This supports many different forms, including: + + - `(?(<foo>)then|else)` - condition based on a capture by name. + - `(?('foo')then|else)` - condition based on a capture by name. + - `(?(3)then|else)` - condition based on a capture by number. + - `(?(+3)then|else)` - forward conditional to a future match, by relative position. + - `(?(-3)then|else)` - backward conditional to a prior match, by relative position. + - `(?(foo)then|else)` - this matches a pattern `foo`. (foo is any sub-expression) + +(New feature as of Oniguruma 6.5.) + + +### 22. ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (enable `\K`) + +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ + +Enables support for `\K`, which excludes all content before it from the overall +regex match (i.e., capture #0). So, for example, pattern `foo\Kbar` would match +`foobar`, but capture #0 would only include `bar`. + +(New feature as of Oniguruma 6.5.) + + +### 23. ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (enable `\R`) + +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ + +Enables support for `\R`, the "general newline" shorthand, which matches +`(\r\n|[\n\v\f\r\u0085\u2028\u2029])` (obviously, the Unicode values are cannot be +matched in ASCII encodings). + +(New feature as of Oniguruma 6.5.) + + +### 24. ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (enable `\N` and `\O`) + +_Set in: Oniguruma, Perl, Perl_NG_ + +Enables support for `\N` and `\O`. `\N` is "not a line break," which is much +like the standard `.` metacharacter, except that while `.` can be affected by +the single-line setting, `\N` always matches exactly one character that is not +one of the various line-break characters (like `\n` and `\r`). + +`\O` matches exactly one character, regardless of whether single-line or +multi-line mode are enabled or disabled. + +(New feature as of Oniguruma 6.5.) + + +### 25. ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (enable `(?~...)`) + +_Set in: Oniguruma, Ruby_ + +Enables support for the `(?~r)` "absent operator" syntax, which matches +as much as possible as long as the result _doesn't_ match pattern `r`. This is +_not_ the same as negative lookahead or negative lookbehind. + +Among the most useful examples of this is `\/\*(?~\*\/)\*\/`, which matches +C-style comments by simply saying "starts with /*, ends with */, and _doesn't_ +contain a */ in between." + +A full explanation of this feature is complicated, but it is useful, and an +excellent article about it is [available on Medium](https://medium.com/rubyinside/the-new-absent-operator-in-ruby-s-regular-expressions-7c3ef6cd0b99). + +(New feature as of Oniguruma 6.5.) + + +### 26. ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT (enable `\X` and `\Y` and `\y`) + +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ + +`\X` is another variation on `.`, designed to support Unicode, in that it matches +a full _grapheme cluster_. In Unicode, `à` can be encoded as one code point, +`U+00E0`, or as two, `U+0061 U+0300`. If those are further escaped using UTF-8, +the former becomes two bytes, and the latter becomes three. Unfortunately, `.` +would naively match only one or two bytes, depending on the encoding, and would +likely incorrectly match anything from just `a` to a broken half of a code point. +`\X` is designed to fix this: It matches the full `à`, no matter how `à` is +encoded or decomposed. + +`\y` matches a cluster boundary, i.e., a zero-width position between +graphemes, somewhat like `\b` matches boundaries between words. `\Y` matches +the _opposite_ of `\y`, that is, a zero-width position between code points in +the _middle_ of a grapheme. + +(New feature as of Oniguruma 6.6.) + + +### 27. ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL (enable `(?R)` and `(?&name)`) + +_Set in: Perl_NG_ + +Enables support for substituted backreferences by both name and position using +Perl-5-specific syntax. This supports using `(?R3)` and `(?&name)` to reference +previous (and future) matches, similar to the more-common `\g<3>` and `\g<name>` +backreferences. + +(New feature as of Oniguruma 6.7.) + + +### 28. ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (enable `(?{...})`) + +_Set in: Oniguruma, Perl, Perl_NG_ + +Enables support for Perl-style "callouts" — pattern substitutions that result from +invoking a callback method. When `(?{foo})` is reached in a pattern, the callback +function set in `onig_set_progress_callout()` will be invoked, and be able to perform +custom computation during the pattern match (and during backtracking). + +Full documentation for this advanced feature can be found in the Oniguruma +`docs/CALLOUT.md` file, with an example in `samples/callout.c`. + +(New feature as of Oniguruma 6.8.) + + +### 29. ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (enable `(*name)`) + +_Set in: Oniguruma, Perl, Perl_NG_ + +Enables support for Perl-style "callouts" — pattern substitutions that result from +invoking a callback method. When `(*foo)` is reached in a pattern, the callback +function set in `onig_set_callout_of_name()` will be invoked, passing the given name +`foo` to it, and it can perform custom computation during the pattern match (and +during backtracking). + +Full documentation for this advanced feature can be found in the Oniguruma +`docs/CALLOUT.md` file, with an example in `samples/callout.c`. + +(New feature as of Oniguruma 6.8.) + + +### 30. ONIG_SYN_OP2_OPTION_ONIGURUMA (enable options `(?imxWSDPy)` and `(?-imxWDSP)`) + +_Set in: Oniguruma_ + +Enables support of regex options. (i,m,x,W,S,D,P,y) + +(New feature as of Oniguruma 6.9.2) + + - `i` - Case-insensitivity + - `m` - Multi-line mode (`.` can match `\n`) + - `x` - Extended pattern (free-formatting: whitespace will ignored) + - `W` - ASCII only word. + - `D` - ASCII only digit. + - `S` - ASCII only space. + - `P` - ASCII only POSIX properties. (includes W,D,S) + +---------- + + +## Syntax Flags (syn) + + +This group contains rules to handle corner cases and constructs that are errors in +some syntaxes but not in others. + +### 0. ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (independent `?`, `*`, `+`, `{n,m}`) + +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ + +This flag specifies how to handle operators like `?` and `*` when they aren't +directly attached to an operand, as in `^*` or `(*)`: Are they an error, are +they discarded, or are they taken as literals? If this flag is clear, they +are taken as literals; otherwise, the ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS flag +determines if they are errors or if they are discarded. + +### 1. ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (error or ignore independent operators) + +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ + +If ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS is set, this flag controls what happens when +independent operators appear in a pattern: If this flag is set, then independent +operators produce an error message; if this flag is clear, then independent +operators are silently discarded. + +### 2. ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (allow `...)...`) + +_Set in: PosixExtended_ + +This flag, if set, causes a `)` character without a preceding `(` to be treated as +a literal `)`, equivalent to `\)`. If this flag is clear, then an unmatched `)` +character will produce an error message. + +### 3. ONIG_SYN_ALLOW_INVALID_INTERVAL (allow `{???`) + +_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ + +This flag, if set, causes an invalid range, like `foo{bar}` or `foo{}`, to be +silently discarded, as if `foo` had been written instead. If clear, an invalid +range will produce an error message. + +### 4. ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (allow `{,n}` to mean `{0,n}`) + +_Set in: Oniguruma, Ruby_ + +If this flag is set, then `r{,n}` will be treated as equivalent to writing +`{0,n}`. If this flag is clear, then `r{,n}` will produce an error message. + +Note that regardless of whether this flag is set or clear, if +ONIG_SYN_OP_BRACE_INTERVAL is enabled, then `r{n,}` will always be legal: This +flag *only* controls the behavior of the opposite form, `r{,n}`. + +### 5. ONIG_SYN_STRICT_CHECK_BACKREF (error on invalid backrefs) + +_Set in: none_ + +If this flag is set, an invalid backref, like `\1` in a pattern with no captures, +will produce an error. If this flag is clear, then an invalid backref will be +equivalent to the empty string. + +No built-in syntax has this flag enabled. + +### 6. ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (allow `(?<=a|bc)`) + +_Set in: Oniguruma, Java, Ruby_ + +If this flag is set, lookbehind patterns with alternate options may have differing +lengths among those options. If this flag is clear, lookbehind patterns with options +must have each option have identical length to the other options. + +Oniguruma can handle either form, but not all regex engines can, so for compatibility, +Oniguruma allows you to cause regexes for other regex engines to fail if they might +depend on this rule. + +### 7. ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (prefer `\k<name>` over `\3`) + +_Set in: Oniguruma, Perl_NG, Ruby_ + +If this flag is set on the syntax *and* ONIG_OPTION_CAPTURE_GROUP is set when calling +Oniguruma, then if a name is used on any capture, all captures must also use names: A +single use of a named capture prohibits the use of numbered captures. + +### 8. ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (allow `(?<x>)...(?<x>)`) + +_Set in: Oniguruma, Perl_NG, Ruby_ + +If this flag is set, multiple capture groups may use the same name. If this flag is +clear, then reuse of a name will produce an error message. + +### 9. ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (`a{n}?` is equivalent to `(?:a{n})?`) + +_Set in: Oniguruma, Ruby_ + +If this flag is set, then intervals of a fixed size will ignore a lazy (non-greedy) +`?` quantifier and treat it as an optional match (an ordinary `r?`), since "match as +little as possible" is meaningless for a fixed-size interval. If this flag is clear, +then `r{n}?` will mean the same as `r{n}`, and the useless `?` will be discarded. + +### 10. ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (`..(?i)..`) + +_Set in: Perl, Perl_NG, Java_ + +If this flag is set, then an isolated option doesn't break the branch and affects until the end of the group (or end of the pattern). +If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. /a(?i)b|c/ ==> /a(?i:b|c)/ + +### 11. ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND (`(?<=...a+...)`) + +_Set in: Oniguruma, Java_ + +If this flag is set, then a variable length expressions are allowed in look-behind. + +### 20. ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (add `\n` to `[^...]`) + +_Set in: Grep_ + +If this flag is set, all newline characters (like `\n`) will be excluded from a negative +character class automatically, as if the pattern had been written as `[^...\n]`. If this +flag is clear, negative character classes do not automatically exclude newlines, and +only exclude those characters and ranges written in them. + +### 21. ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (allow `[...\w...]`) + +_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ + +If this flag is set, shorthands like `\w` are allowed to describe characters in character +classes. If this flag is clear, shorthands like `\w` are treated as a redundantly-escaped +literal `w`. + +### 22. ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (silently discard `[z-a]`) + +_Set in: Emacs, Grep_ + +If this flag is set, then character ranges like `[z-a]` that are broken or contain no +characters will be silently ignored. If this flag is clear, then broken or empty +character ranges will produce an error message. + +### 23. ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (treat `[0-9-a]` as `[0-9\-a]`) + +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ + +If this flag is set, then a trailing `-` after a character range will be taken as a +literal `-`, as if it had been escaped as `\-`. If this flag is clear, then a trailing +`-` after a character range will produce an error message. + +### 24. ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (warn on `[[...]` and `[-x]`) + +_Set in: Oniguruma, Ruby_ + +If this flag is set, Oniguruma will be stricter about warning for bad forms in +character classes: `[[...]` will produce a warning, but `[\[...]` will not; +`[-x]` will produce a warning, but `[\-x]` will not; `[x&&-y]` will produce a warning, +while `[x&&\-y]` will not; and so on. If this flag is clear, all of these warnings +will be silently discarded. + +### 25. ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (warn on `(?:a*)+`) + +_Set in: Oniguruma, Ruby_ + +If this flag is set, Oniguruma will warn about nested repeat operators those have no meaning, like `(?:a*)+`. +If this flag is clear, Oniguruma will allow the nested repeat operators without warning about them. + +### 26. ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (allow [a-\x{7fffffff}]) + +_Set in: Oniguruma_ + +If this flag is set, then invalid code points at the end of range in character class are allowed. + +### 31. ONIG_SYN_CONTEXT_INDEP_ANCHORS + +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ + +Not currently used, and does nothing. (But still set in several syntaxes for some +reason.) + +---------- + +## Usage tables + +These tables show which of the built-in syntaxes use which flags and options, for easy comparison between them. + +### Group One Flags (op) + +| ID | Option | PosB | PosEx | Emacs | Grep | Gnu | Java | Perl | PeNG | Ruby | Onig | +| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| 0 | `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` | - | - | - | - | - | - | - | - | - | - | +| 1 | `ONIG_SYN_OP_DOT_ANYCHAR` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +| 2 | `ONIG_SYN_OP_ASTERISK_ZERO_INF` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +| 3 | `ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF` | - | - | - | - | - | - | - | - | - | - | +| 4 | `ONIG_SYN_OP_PLUS_ONE_INF` | - | Yes | Yes | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 5 | `ONIG_SYN_OP_ESC_PLUS_ONE_INF` | - | - | - | Yes | - | - | - | - | - | - | +| 6 | `ONIG_SYN_OP_QMARK_ZERO_ONE` | - | Yes | Yes | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 7 | `ONIG_SYN_OP_ESC_QMARK_ZERO_ONE` | - | - | - | Yes | - | - | - | - | - | - | +| 8 | `ONIG_SYN_OP_BRACE_INTERVAL` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 9 | `ONIG_SYN_OP_ESC_BRACE_INTERVAL` | Yes | - | Yes | Yes | - | - | - | - | - | - | +| 10 | `ONIG_SYN_OP_VBAR_ALT` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 11 | `ONIG_SYN_OP_ESC_VBAR_ALT` | - | - | Yes | Yes | - | - | - | - | - | - | +| 12 | `ONIG_SYN_OP_LPAREN_SUBEXP` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 13 | `ONIG_SYN_OP_ESC_LPAREN_SUBEXP` | Yes | - | Yes | Yes | - | - | - | - | - | - | +| 14 | `ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 15 | `ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 16 | `ONIG_SYN_OP_DECIMAL_BACKREF` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +| 17 | `ONIG_SYN_OP_BRACKET_CC` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +| 18 | `ONIG_SYN_OP_ESC_W_WORD` | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +| 19 | `ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END` | - | - | - | Yes | Yes | - | - | - | - | - | +| 20 | `ONIG_SYN_OP_ESC_B_WORD_BOUND` | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +| 21 | `ONIG_SYN_OP_ESC_S_WHITE_SPACE` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 22 | `ONIG_SYN_OP_ESC_D_DIGIT` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 23 | `ONIG_SYN_OP_LINE_ANCHOR` | - | - | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +| 24 | `ONIG_SYN_OP_POSIX_BRACKET` | Yes | Yes | Yes | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 25 | `ONIG_SYN_OP_QMARK_NON_GREEDY` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | +| 26 | `ONIG_SYN_OP_ESC_CONTROL_CHARS` | Yes | Yes | - | - | - | Yes | Yes | Yes | Yes | Yes | +| 27 | `ONIG_SYN_OP_ESC_C_CONTROL` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | +| 28 | `ONIG_SYN_OP_ESC_OCTAL3` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | +| 29 | `ONIG_SYN_OP_ESC_X_HEX2` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | +| 30 | `ONIG_SYN_OP_ESC_X_BRACE_HEX8` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | +| 31 | `ONIG_SYN_OP_ESC_O_BRACE_OCTAL` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | + +### Group Two Flags (op2) + +| ID | Option | PosB | PosEx | Emacs | Grep | Gnu | Java | Perl | PeNG | Ruby | Onig | +| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| 0 | `ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE` | - | - | - | - | - | Yes | Yes | Yes | - | - | +| 1 | `ONIG_SYN_OP2_QMARK_GROUP_EFFECT` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | +| 2 | `ONIG_SYN_OP2_OPTION_PERL` | - | - | - | - | - | Yes | Yes | Yes | - | - | +| 3 | `ONIG_SYN_OP2_OPTION_RUBY` | - | - | - | - | - | - | - | - | Yes | - | +| 4 | `ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT` | - | - | - | - | - | - | - | - | Yes | Yes | +| 5 | `ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL` | - | - | - | - | - | Yes | - | - | - | - | +| 6 | `ONIG_SYN_OP2_CCLASS_SET_OP` | - | - | - | - | - | - | - | Yes | Yes | Yes | +| 7 | `ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP` | - | - | - | - | - | - | - | Yes | Yes | Yes | +| 8 | `ONIG_SYN_OP2_ESC_K_NAMED_BACKREF` | - | - | - | - | - | - | - | Yes | Yes | Yes | +| 9 | `ONIG_SYN_OP2_ESC_G_SUBEXP_CALL` | - | - | - | - | - | - | - | Yes | Yes | Yes | +| 10 | `ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY` | - | - | - | - | - | - | - | - | - | - | +| 11 | `ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL` | - | - | - | - | - | - | - | - | Yes | Yes | +| 12 | `ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META` | - | - | - | - | - | - | - | - | Yes | Yes | +| 13 | `ONIG_SYN_OP2_ESC_V_VTAB` | - | - | - | - | - | Yes | - | - | Yes | Yes | +| 14 | `ONIG_SYN_OP2_ESC_U_HEX4` | - | - | - | - | - | Yes | - | - | Yes | Yes | +| 15 | `ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR` | - | - | Yes | - | - | - | - | - | - | - | +| 16 | `ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY` | - | - | - | - | - | Yes | Yes | Yes | Yes | Yes | +| 17 | `ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | +| 18 | `ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS` | - | - | - | - | - | - | - | - | - | - | +| 19 | `ONIG_SYN_OP2_ESC_H_XDIGIT` | - | - | - | - | - | - | - | - | Yes | Yes | +| 20 | `ONIG_SYN_OP2_INEFFECTIVE_ESCAPE` | - | - | - | - | - | - | - | - | - | - | +| 21 | `ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | +| 22 | `ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | +| 23 | `ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | +| 24 | `ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT` | - | - | - | - | - | - | Yes | Yes | - | Yes | +| 25 | `ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP` | - | - | - | - | - | - | - | - | Yes | Yes | +| 26 | `ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT` | - | - | - | - | - | - | Yes | Yes | Yes | Yes | +| 27 | `ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL` | - | - | - | - | - | - | - | Yes | - | - | +| 28 | `ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS` | - | - | - | - | - | - | Yes | Yes | Yes | - | +| 29 | `ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME` | - | - | - | - | - | - | Yes | Yes | Yes | - | +| 30 | `ONIG_SYN_OP2_OPTION_ONIGURUMA` | - | - | - | - | - | - | - | - | - | Yes | + +### Syntax Flags (syn) + +| ID | Option | PosB | PosEx | Emacs | Grep | Gnu | Java | Perl | PeNG | Ruby | Onig | +| ----- | --------------------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| 0 | `ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 1 | `ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 2 | `ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP` | - | Yes | - | - | - | - | - | - | - | - | +| 3 | `ONIG_SYN_ALLOW_INVALID_INTERVAL` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 4 | `ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV` | - | - | - | - | - | - | - | - | Yes | Yes | +| 5 | `ONIG_SYN_STRICT_CHECK_BACKREF` | - | - | - | - | - | - | - | - | - | - | +| 6 | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND` | - | - | - | - | - | Yes | - | - | Yes | Yes | +| 7 | `ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP` | - | - | - | - | - | - | - | Yes | Yes | Yes | +| 8 | `ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME` | - | - | - | - | - | - | - | Yes | Yes | Yes | +| 9 | `ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY` | - | - | - | - | - | - | - | - | Yes | Yes | +| 10 | `ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH` | - | - | - | - | - | Yes | Yes | Yes | - | - | +| 11 | `ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND` | - | - | - | - | - | Yes | - | - | - | Yes | +| 20 | `ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC` | - | - | - | Yes | - | - | - | - | - | - | +| 21 | `ONIG_SYN_BACKSLASH_ESCAPE_IN_CC` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 22 | `ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC` | - | - | Yes | Yes | - | - | - | - | - | - | +| 23 | `ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | +| 24 | `ONIG_SYN_WARN_CC_OP_NOT_ESCAPED` | - | - | - | - | - | - | - | - | Yes | Yes | +| 25 | `ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT` | - | - | - | - | - | - | - | - | Yes | Yes | +| 26 | `ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC` | - | - | - | - | - | - | - | - | - | Yes | +| 31 | `ONIG_SYN_CONTEXT_INDEP_ANCHORS` | - | Yes | - | - | Yes | Yes | Yes | Yes | Yes | Yes | diff --git a/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/UNICODE_PROPERTIES b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/UNICODE_PROPERTIES new file mode 100644 index 000000000..24c203156 --- /dev/null +++ b/roms/edk2/MdeModulePkg/Universal/RegularExpressionDxe/oniguruma/doc/UNICODE_PROPERTIES @@ -0,0 +1,788 @@ +Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1) + + 15: ASCII_Hex_Digit + 16: Adlam + 17: Ahom + 18: Alphabetic + 19: Anatolian_Hieroglyphs + 20: Any + 21: Arabic + 22: Armenian + 23: Assigned + 24: Avestan + 25: Balinese + 26: Bamum + 27: Bassa_Vah + 28: Batak + 29: Bengali + 30: Bhaiksuki + 31: Bidi_Control + 32: Bopomofo + 33: Brahmi + 34: Braille + 35: Buginese + 36: Buhid + 37: C + 38: Canadian_Aboriginal + 39: Carian + 40: Case_Ignorable + 41: Cased + 42: Caucasian_Albanian + 43: Cc + 44: Cf + 45: Chakma + 46: Cham + 47: Changes_When_Casefolded + 48: Changes_When_Casemapped + 49: Changes_When_Lowercased + 50: Changes_When_Titlecased + 51: Changes_When_Uppercased + 52: Cherokee + 53: Cn + 54: Co + 55: Common + 56: Coptic + 57: Cs + 58: Cuneiform + 59: Cypriot + 60: Cyrillic + 61: Dash + 62: Default_Ignorable_Code_Point + 63: Deprecated + 64: Deseret + 65: Devanagari + 66: Diacritic + 67: Dogra + 68: Duployan + 69: Egyptian_Hieroglyphs + 70: Elbasan + 71: Elymaic + 72: Emoji + 73: Emoji_Component + 74: Emoji_Modifier + 75: Emoji_Modifier_Base + 76: Emoji_Presentation + 77: Ethiopic + 78: Extended_Pictographic + 79: Extender + 80: Georgian + 81: Glagolitic + 82: Gothic + 83: Grantha + 84: Grapheme_Base + 85: Grapheme_Extend + 86: Grapheme_Link + 87: Greek + 88: Gujarati + 89: Gunjala_Gondi + 90: Gurmukhi + 91: Han + 92: Hangul + 93: Hanifi_Rohingya + 94: Hanunoo + 95: Hatran + 96: Hebrew + 97: Hex_Digit + 98: Hiragana + 99: Hyphen +100: IDS_Binary_Operator +101: IDS_Trinary_Operator +102: ID_Continue +103: ID_Start +104: Ideographic +105: Imperial_Aramaic +106: Inherited +107: Inscriptional_Pahlavi +108: Inscriptional_Parthian +109: Javanese +110: Join_Control +111: Kaithi +112: Kannada +113: Katakana +114: Kayah_Li +115: Kharoshthi +116: Khmer +117: Khojki +118: Khudawadi +119: L +120: LC +121: Lao +122: Latin +123: Lepcha +124: Limbu +125: Linear_A +126: Linear_B +127: Lisu +128: Ll +129: Lm +130: Lo +131: Logical_Order_Exception +132: Lowercase +133: Lt +134: Lu +135: Lycian +136: Lydian +137: M +138: Mahajani +139: Makasar +140: Malayalam +141: Mandaic +142: Manichaean +143: Marchen +144: Masaram_Gondi +145: Math +146: Mc +147: Me +148: Medefaidrin +149: Meetei_Mayek +150: Mende_Kikakui +151: Meroitic_Cursive +152: Meroitic_Hieroglyphs +153: Miao +154: Mn +155: Modi +156: Mongolian +157: Mro +158: Multani +159: Myanmar +160: N +161: Nabataean +162: Nandinagari +163: Nd +164: New_Tai_Lue +165: Newa +166: Nko +167: Nl +168: No +169: Noncharacter_Code_Point +170: Nushu +171: Nyiakeng_Puachue_Hmong +172: Ogham +173: Ol_Chiki +174: Old_Hungarian +175: Old_Italic +176: Old_North_Arabian +177: Old_Permic +178: Old_Persian +179: Old_Sogdian +180: Old_South_Arabian +181: Old_Turkic +182: Oriya +183: Osage +184: Osmanya +185: Other_Alphabetic +186: Other_Default_Ignorable_Code_Point +187: Other_Grapheme_Extend +188: Other_ID_Continue +189: Other_ID_Start +190: Other_Lowercase +191: Other_Math +192: Other_Uppercase +193: P +194: Pahawh_Hmong +195: Palmyrene +196: Pattern_Syntax +197: Pattern_White_Space +198: Pau_Cin_Hau +199: Pc +200: Pd +201: Pe +202: Pf +203: Phags_Pa +204: Phoenician +205: Pi +206: Po +207: Prepended_Concatenation_Mark +208: Ps +209: Psalter_Pahlavi +210: Quotation_Mark +211: Radical +212: Regional_Indicator +213: Rejang +214: Runic +215: S +216: Samaritan +217: Saurashtra +218: Sc +219: Sentence_Terminal +220: Sharada +221: Shavian +222: Siddham +223: SignWriting +224: Sinhala +225: Sk +226: Sm +227: So +228: Soft_Dotted +229: Sogdian +230: Sora_Sompeng +231: Soyombo +232: Sundanese +233: Syloti_Nagri +234: Syriac +235: Tagalog +236: Tagbanwa +237: Tai_Le +238: Tai_Tham +239: Tai_Viet +240: Takri +241: Tamil +242: Tangut +243: Telugu +244: Terminal_Punctuation +245: Thaana +246: Thai +247: Tibetan +248: Tifinagh +249: Tirhuta +250: Ugaritic +251: Unified_Ideograph +252: Unknown +253: Uppercase +254: Vai +255: Variation_Selector +256: Wancho +257: Warang_Citi +258: White_Space +259: XID_Continue +260: XID_Start +261: Yi +262: Z +263: Zanabazar_Square +264: Zl +265: Zp +266: Zs + 16: Adlm + 42: Aghb + 15: AHex + 21: Arab +105: Armi + 22: Armn + 24: Avst + 25: Bali + 26: Bamu + 27: Bass + 28: Batk + 29: Beng + 30: Bhks + 31: Bidi_C + 32: Bopo + 33: Brah + 34: Brai + 35: Bugi + 36: Buhd + 45: Cakm + 38: Cans + 39: Cari +120: Cased_Letter + 52: Cher + 40: CI +201: Close_Punctuation +137: Combining_Mark +199: Connector_Punctuation + 43: Control + 56: Copt + 59: Cprt +218: Currency_Symbol + 47: CWCF + 48: CWCM + 49: CWL + 50: CWT + 51: CWU + 60: Cyrl +200: Dash_Punctuation +163: Decimal_Number + 63: Dep + 65: Deva + 62: DI + 66: Dia + 67: Dogr + 64: Dsrt + 68: Dupl + 69: Egyp + 70: Elba + 71: Elym +147: Enclosing_Mark + 77: Ethi + 79: Ext +202: Final_Punctuation + 44: Format + 80: Geor + 81: Glag + 89: Gong +144: Gonm + 82: Goth + 83: Gran + 84: Gr_Base + 87: Grek + 85: Gr_Ext + 86: Gr_Link + 88: Gujr + 90: Guru + 92: Hang + 91: Hani + 94: Hano + 95: Hatr + 96: Hebr + 97: Hex + 98: Hira + 19: Hluw +194: Hmng +171: Hmnp +174: Hung +102: IDC +104: Ideo +103: IDS +100: IDSB +101: IDST +205: Initial_Punctuation +175: Ital +109: Java +110: Join_C +114: Kali +113: Kana +115: Khar +116: Khmr +117: Khoj +112: Knda +111: Kthi +238: Lana +121: Laoo +122: Latn +123: Lepc +119: Letter +167: Letter_Number +124: Limb +125: Lina +126: Linb +264: Line_Separator +131: LOE +128: Lowercase_Letter +135: Lyci +136: Lydi +138: Mahj +139: Maka +141: Mand +142: Mani +143: Marc +137: Mark +226: Math_Symbol +148: Medf +150: Mend +151: Merc +152: Mero +140: Mlym +129: Modifier_Letter +225: Modifier_Symbol +156: Mong +157: Mroo +149: Mtei +158: Mult +159: Mymr +162: Nand +176: Narb +161: Nbat +169: NChar +166: Nkoo +154: Nonspacing_Mark +170: Nshu +160: Number +185: OAlpha +186: ODI +172: Ogam +187: OGr_Ext +188: OIDC +189: OIDS +173: Olck +190: OLower +191: OMath +208: Open_Punctuation +181: Orkh +182: Orya +183: Osge +184: Osma + 37: Other +130: Other_Letter +168: Other_Number +206: Other_Punctuation +227: Other_Symbol +192: OUpper +195: Palm +265: Paragraph_Separator +196: Pat_Syn +197: Pat_WS +198: Pauc +207: PCM +177: Perm +203: Phag +107: Phli +209: Phlp +204: Phnx +153: Plrd + 54: Private_Use +108: Prti +193: Punctuation + 56: Qaac +106: Qaai +210: QMark +212: RI +213: Rjng + 93: Rohg +214: Runr +216: Samr +180: Sarb +217: Saur +228: SD +262: Separator +223: Sgnw +221: Shaw +220: Shrd +222: Sidd +118: Sind +224: Sinh +229: Sogd +179: Sogo +230: Sora +231: Soyo +266: Space_Separator +146: Spacing_Mark +219: STerm +232: Sund + 57: Surrogate +233: Sylo +215: Symbol +234: Syrc +236: Tagb +240: Takr +237: Tale +164: Talu +241: Taml +242: Tang +239: Tavt +243: Telu +244: Term +248: Tfng +235: Tglg +245: Thaa +247: Tibt +249: Tirh +133: Titlecase_Letter +250: Ugar +251: UIdeo + 53: Unassigned +134: Uppercase_Letter +254: Vaii +255: VS +257: Wara +256: Wcho +258: WSpace +259: XIDC +260: XIDS +178: Xpeo + 58: Xsux +261: Yiii +263: Zanb +106: Zinh + 55: Zyyy +252: Zzzz +267: In_Basic_Latin +268: In_Latin_1_Supplement +269: In_Latin_Extended_A +270: In_Latin_Extended_B +271: In_IPA_Extensions +272: In_Spacing_Modifier_Letters +273: In_Combining_Diacritical_Marks +274: In_Greek_and_Coptic +275: In_Cyrillic +276: In_Cyrillic_Supplement +277: In_Armenian +278: In_Hebrew +279: In_Arabic +280: In_Syriac +281: In_Arabic_Supplement +282: In_Thaana +283: In_NKo +284: In_Samaritan +285: In_Mandaic +286: In_Syriac_Supplement +287: In_Arabic_Extended_A +288: In_Devanagari +289: In_Bengali +290: In_Gurmukhi +291: In_Gujarati +292: In_Oriya +293: In_Tamil +294: In_Telugu +295: In_Kannada +296: In_Malayalam +297: In_Sinhala +298: In_Thai +299: In_Lao +300: In_Tibetan +301: In_Myanmar +302: In_Georgian +303: In_Hangul_Jamo +304: In_Ethiopic +305: In_Ethiopic_Supplement +306: In_Cherokee +307: In_Unified_Canadian_Aboriginal_Syllabics +308: In_Ogham +309: In_Runic +310: In_Tagalog +311: In_Hanunoo +312: In_Buhid +313: In_Tagbanwa +314: In_Khmer +315: In_Mongolian +316: In_Unified_Canadian_Aboriginal_Syllabics_Extended +317: In_Limbu +318: In_Tai_Le +319: In_New_Tai_Lue +320: In_Khmer_Symbols +321: In_Buginese +322: In_Tai_Tham +323: In_Combining_Diacritical_Marks_Extended +324: In_Balinese +325: In_Sundanese +326: In_Batak +327: In_Lepcha +328: In_Ol_Chiki +329: In_Cyrillic_Extended_C +330: In_Georgian_Extended +331: In_Sundanese_Supplement +332: In_Vedic_Extensions +333: In_Phonetic_Extensions +334: In_Phonetic_Extensions_Supplement +335: In_Combining_Diacritical_Marks_Supplement +336: In_Latin_Extended_Additional +337: In_Greek_Extended +338: In_General_Punctuation +339: In_Superscripts_and_Subscripts +340: In_Currency_Symbols +341: In_Combining_Diacritical_Marks_for_Symbols +342: In_Letterlike_Symbols +343: In_Number_Forms +344: In_Arrows +345: In_Mathematical_Operators +346: In_Miscellaneous_Technical +347: In_Control_Pictures +348: In_Optical_Character_Recognition +349: In_Enclosed_Alphanumerics +350: In_Box_Drawing +351: In_Block_Elements +352: In_Geometric_Shapes +353: In_Miscellaneous_Symbols +354: In_Dingbats +355: In_Miscellaneous_Mathematical_Symbols_A +356: In_Supplemental_Arrows_A +357: In_Braille_Patterns +358: In_Supplemental_Arrows_B +359: In_Miscellaneous_Mathematical_Symbols_B +360: In_Supplemental_Mathematical_Operators +361: In_Miscellaneous_Symbols_and_Arrows +362: In_Glagolitic +363: In_Latin_Extended_C +364: In_Coptic +365: In_Georgian_Supplement +366: In_Tifinagh +367: In_Ethiopic_Extended +368: In_Cyrillic_Extended_A +369: In_Supplemental_Punctuation +370: In_CJK_Radicals_Supplement +371: In_Kangxi_Radicals +372: In_Ideographic_Description_Characters +373: In_CJK_Symbols_and_Punctuation +374: In_Hiragana +375: In_Katakana +376: In_Bopomofo +377: In_Hangul_Compatibility_Jamo +378: In_Kanbun +379: In_Bopomofo_Extended +380: In_CJK_Strokes +381: In_Katakana_Phonetic_Extensions +382: In_Enclosed_CJK_Letters_and_Months +383: In_CJK_Compatibility +384: In_CJK_Unified_Ideographs_Extension_A +385: In_Yijing_Hexagram_Symbols +386: In_CJK_Unified_Ideographs +387: In_Yi_Syllables +388: In_Yi_Radicals +389: In_Lisu +390: In_Vai +391: In_Cyrillic_Extended_B +392: In_Bamum +393: In_Modifier_Tone_Letters +394: In_Latin_Extended_D +395: In_Syloti_Nagri +396: In_Common_Indic_Number_Forms +397: In_Phags_pa +398: In_Saurashtra +399: In_Devanagari_Extended +400: In_Kayah_Li +401: In_Rejang +402: In_Hangul_Jamo_Extended_A +403: In_Javanese +404: In_Myanmar_Extended_B +405: In_Cham +406: In_Myanmar_Extended_A +407: In_Tai_Viet +408: In_Meetei_Mayek_Extensions +409: In_Ethiopic_Extended_A +410: In_Latin_Extended_E +411: In_Cherokee_Supplement +412: In_Meetei_Mayek +413: In_Hangul_Syllables +414: In_Hangul_Jamo_Extended_B +415: In_High_Surrogates +416: In_High_Private_Use_Surrogates +417: In_Low_Surrogates +418: In_Private_Use_Area +419: In_CJK_Compatibility_Ideographs +420: In_Alphabetic_Presentation_Forms +421: In_Arabic_Presentation_Forms_A +422: In_Variation_Selectors +423: In_Vertical_Forms +424: In_Combining_Half_Marks +425: In_CJK_Compatibility_Forms +426: In_Small_Form_Variants +427: In_Arabic_Presentation_Forms_B +428: In_Halfwidth_and_Fullwidth_Forms +429: In_Specials +430: In_Linear_B_Syllabary +431: In_Linear_B_Ideograms +432: In_Aegean_Numbers +433: In_Ancient_Greek_Numbers +434: In_Ancient_Symbols +435: In_Phaistos_Disc +436: In_Lycian +437: In_Carian +438: In_Coptic_Epact_Numbers +439: In_Old_Italic +440: In_Gothic +441: In_Old_Permic +442: In_Ugaritic +443: In_Old_Persian +444: In_Deseret +445: In_Shavian +446: In_Osmanya +447: In_Osage +448: In_Elbasan +449: In_Caucasian_Albanian +450: In_Linear_A +451: In_Cypriot_Syllabary +452: In_Imperial_Aramaic +453: In_Palmyrene +454: In_Nabataean +455: In_Hatran +456: In_Phoenician +457: In_Lydian +458: In_Meroitic_Hieroglyphs +459: In_Meroitic_Cursive +460: In_Kharoshthi +461: In_Old_South_Arabian +462: In_Old_North_Arabian +463: In_Manichaean +464: In_Avestan +465: In_Inscriptional_Parthian +466: In_Inscriptional_Pahlavi +467: In_Psalter_Pahlavi +468: In_Old_Turkic +469: In_Old_Hungarian +470: In_Hanifi_Rohingya +471: In_Rumi_Numeral_Symbols +472: In_Old_Sogdian +473: In_Sogdian +474: In_Elymaic +475: In_Brahmi +476: In_Kaithi +477: In_Sora_Sompeng +478: In_Chakma +479: In_Mahajani +480: In_Sharada +481: In_Sinhala_Archaic_Numbers +482: In_Khojki +483: In_Multani +484: In_Khudawadi +485: In_Grantha +486: In_Newa +487: In_Tirhuta +488: In_Siddham +489: In_Modi +490: In_Mongolian_Supplement +491: In_Takri +492: In_Ahom +493: In_Dogra +494: In_Warang_Citi +495: In_Nandinagari +496: In_Zanabazar_Square +497: In_Soyombo +498: In_Pau_Cin_Hau +499: In_Bhaiksuki +500: In_Marchen +501: In_Masaram_Gondi +502: In_Gunjala_Gondi +503: In_Makasar +504: In_Tamil_Supplement +505: In_Cuneiform +506: In_Cuneiform_Numbers_and_Punctuation +507: In_Early_Dynastic_Cuneiform +508: In_Egyptian_Hieroglyphs +509: In_Egyptian_Hieroglyph_Format_Controls +510: In_Anatolian_Hieroglyphs +511: In_Bamum_Supplement +512: In_Mro +513: In_Bassa_Vah +514: In_Pahawh_Hmong +515: In_Medefaidrin +516: In_Miao +517: In_Ideographic_Symbols_and_Punctuation +518: In_Tangut +519: In_Tangut_Components +520: In_Kana_Supplement +521: In_Kana_Extended_A +522: In_Small_Kana_Extension +523: In_Nushu +524: In_Duployan +525: In_Shorthand_Format_Controls +526: In_Byzantine_Musical_Symbols +527: In_Musical_Symbols +528: In_Ancient_Greek_Musical_Notation +529: In_Mayan_Numerals +530: In_Tai_Xuan_Jing_Symbols +531: In_Counting_Rod_Numerals +532: In_Mathematical_Alphanumeric_Symbols +533: In_Sutton_SignWriting +534: In_Glagolitic_Supplement +535: In_Nyiakeng_Puachue_Hmong +536: In_Wancho +537: In_Mende_Kikakui +538: In_Adlam +539: In_Indic_Siyaq_Numbers +540: In_Ottoman_Siyaq_Numbers +541: In_Arabic_Mathematical_Alphabetic_Symbols +542: In_Mahjong_Tiles +543: In_Domino_Tiles +544: In_Playing_Cards +545: In_Enclosed_Alphanumeric_Supplement +546: In_Enclosed_Ideographic_Supplement +547: In_Miscellaneous_Symbols_and_Pictographs +548: In_Emoticons +549: In_Ornamental_Dingbats +550: In_Transport_and_Map_Symbols +551: In_Alchemical_Symbols +552: In_Geometric_Shapes_Extended +553: In_Supplemental_Arrows_C +554: In_Supplemental_Symbols_and_Pictographs +555: In_Chess_Symbols +556: In_Symbols_and_Pictographs_Extended_A +557: In_CJK_Unified_Ideographs_Extension_B +558: In_CJK_Unified_Ideographs_Extension_C +559: In_CJK_Unified_Ideographs_Extension_D +560: In_CJK_Unified_Ideographs_Extension_E +561: In_CJK_Unified_Ideographs_Extension_F +562: In_CJK_Compatibility_Ideographs_Supplement +563: In_Tags +564: In_Variation_Selectors_Supplement +565: In_Supplementary_Private_Use_Area_A +566: In_Supplementary_Private_Use_Area_B +567: In_No_Block |