34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
39#ifndef USE_TOKEN_THREADED_VM
41# define USE_TOKEN_THREADED_VM 1
43# define USE_TOKEN_THREADED_VM 0
48# define ENC_DUMMY_FLAG (1<<24)
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
68is_mbc_newline_ex(
OnigEncoding enc,
const UChar *p,
const UChar *start,
69 const UChar *end, OnigOptionType option,
int check_prev)
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
84 const UChar *pnext = p + enclen(enc, p, end);
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
107history_tree_clear(OnigCaptureTreeNode* node)
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
125 node->childs = (OnigCaptureTreeNode** )0;
130history_tree_free(OnigCaptureTreeNode* node)
132 history_tree_clear(node);
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
145static OnigCaptureTreeNode*
146history_node_new(
void)
148 OnigCaptureTreeNode* node;
150 node = (OnigCaptureTreeNode* )
xmalloc(
sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
154 node->num_childs = 0;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
167 if (parent->num_childs >= parent->allocated) {
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
173 (OnigCaptureTreeNode** )
xmalloc(
sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
180 (OnigCaptureTreeNode** )
xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
186 parent->childs = tmp;
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
191 parent->allocated = n;
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
203 OnigCaptureTreeNode *clone, *child;
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
216 r = history_tree_add_child(clone, child);
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
227extern OnigCaptureTreeNode*
230 return region->history_root;
234#ifdef USE_CACHE_MATCH_OPT
238count_num_cache_opcode(
regex_t* reg,
long* num,
long* table_size)
241 UChar* pend = p + reg->used;
244 MemNumType current_mem = -1;
245 long current_mem_num = 0;
257 case OP_EXACT1: p++;
break;
258 case OP_EXACT2: p += 2;
break;
259 case OP_EXACT3: p += 3;
break;
260 case OP_EXACT4: p += 4;
break;
261 case OP_EXACT5: p += 5;
break;
263 GET_LENGTH_INC(len, p); p += len;
break;
264 case OP_EXACTMB2N1: p += 2;
break;
265 case OP_EXACTMB2N2: p += 4;
break;
266 case OP_EXACTMB2N3: p += 6;
break;
268 GET_LENGTH_INC(len, p); p += len * 2;
break;
270 GET_LENGTH_INC(len, p); p += len * 3;
break;
274 GET_LENGTH_INC(mb_len, p);
275 GET_LENGTH_INC(len, p);
281 len = enclen(enc, p, pend); p += len;
break;
283 GET_LENGTH_INC(len, p); p += len;
break;
287 p += SIZE_BITSET;
break;
289 case OP_CCLASS_MB_NOT:
290 GET_LENGTH_INC(len, p); p += len;
break;
292 case OP_CCLASS_MIX_NOT:
294 GET_LENGTH_INC(len, p);
301 case OP_ANYCHAR_STAR:
302 case OP_ANYCHAR_ML_STAR:
303 *num += 1; *table_size += 1;
break;
304 case OP_ANYCHAR_STAR_PEEK_NEXT:
305 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
306 p++; *num += 1; *table_size += 1;
break;
311 case OP_NOT_WORD_BOUND:
317 case OP_NOT_ASCII_WORD:
318 case OP_ASCII_WORD_BOUND:
319 case OP_NOT_ASCII_WORD_BOUND:
320 case OP_ASCII_WORD_BEGIN:
321 case OP_ASCII_WORD_END:
328 case OP_SEMI_END_BUF:
329 case OP_BEGIN_POSITION:
336 case OP_BACKREF_MULTI:
337 case OP_BACKREF_MULTI_IC:
338 case OP_BACKREF_WITH_LEVEL:
341 case OP_MEMORY_START:
342 case OP_MEMORY_START_PUSH:
343 case OP_MEMORY_END_PUSH:
344 case OP_MEMORY_END_PUSH_REC:
346 case OP_MEMORY_END_REC:
347 p += SIZE_MEMNUM;
break;
364 case OP_PUSH_OR_JUMP_EXACT1:
365 case OP_PUSH_IF_PEEK_NEXT:
366 p += SIZE_RELADDR + 1; *num += 1; *table_size += 1;
break;
369 if (current_mem != -1) {
373 GET_MEMNUM_INC(mem, p);
375 if (reg->repeat_range[mem].lower == 0) {
379 reg->repeat_range[mem].base_num = *num;
381 current_mem_num = *num;
384 case OP_REPEAT_INC_NG:
385 GET_MEMNUM_INC(mem, p);
386 if (mem != current_mem) {
391 long inner_num = *num - current_mem_num;
393 repeat_range->inner_num = inner_num;
395 *num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
396 if (repeat_range->lower < repeat_range->upper) {
403 case OP_REPEAT_INC_SG:
404 case OP_REPEAT_INC_NG_SG:
407 case OP_NULL_CHECK_START:
408 case OP_NULL_CHECK_END:
409 case OP_NULL_CHECK_END_MEMST:
410 case OP_NULL_CHECK_END_MEMST_PUSH:
411 p += SIZE_MEMNUM;
break;
415 case OP_PUSH_POS_NOT:
417 case OP_PUSH_STOP_BT:
420 case OP_PUSH_LOOK_BEHIND_NOT:
421 case OP_FAIL_LOOK_BEHIND_NOT:
422 case OP_PUSH_ABSENT_POS:
434 case OP_STATE_CHECK_PUSH:
435 case OP_STATE_CHECK_PUSH_OR_JUMP:
437 case OP_STATE_CHECK_ANYCHAR_STAR:
438 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
441 case OP_SET_OPTION_PUSH:
454 *num = NUM_CACHE_OPCODE_FAIL;
458 return ONIGERR_UNDEFINED_BYTECODE;
466 UChar* pend = p + reg->used;
469 MemNumType current_mem = -1;
471 long current_mem_num = 0;
481 case OP_EXACT1: p++;
break;
482 case OP_EXACT2: p += 2;
break;
483 case OP_EXACT3: p += 3;
break;
484 case OP_EXACT4: p += 4;
break;
485 case OP_EXACT5: p += 5;
break;
487 GET_LENGTH_INC(len, p); p += len;
break;
488 case OP_EXACTMB2N1: p += 2;
break;
489 case OP_EXACTMB2N2: p += 4;
break;
490 case OP_EXACTMB2N3: p += 6;
break;
492 GET_LENGTH_INC(len, p); p += len * 2;
break;
494 GET_LENGTH_INC(len, p); p += len * 3;
break;
498 GET_LENGTH_INC(mb_len, p);
499 GET_LENGTH_INC(len, p);
505 len = enclen(enc, p, pend); p += len;
break;
507 GET_LENGTH_INC(len, p); p += len;
break;
511 p += SIZE_BITSET;
break;
513 case OP_CCLASS_MB_NOT:
514 GET_LENGTH_INC(len, p); p += len;
break;
516 case OP_CCLASS_MIX_NOT:
518 GET_LENGTH_INC(len, p);
525 case OP_ANYCHAR_STAR:
526 case OP_ANYCHAR_ML_STAR:
527 table->addr = pbegin;
528 table->num = num - current_mem_num;
529 table->outer_repeat = current_mem;
533 case OP_ANYCHAR_STAR_PEEK_NEXT:
534 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
536 table->addr = pbegin;
537 table->num = num - current_mem_num;
538 table->outer_repeat = current_mem;
546 case OP_NOT_WORD_BOUND:
552 case OP_NOT_ASCII_WORD:
553 case OP_ASCII_WORD_BOUND:
554 case OP_NOT_ASCII_WORD_BOUND:
555 case OP_ASCII_WORD_BEGIN:
556 case OP_ASCII_WORD_END:
563 case OP_SEMI_END_BUF:
564 case OP_BEGIN_POSITION:
571 case OP_BACKREF_MULTI:
572 case OP_BACKREF_MULTI_IC:
573 case OP_BACKREF_WITH_LEVEL:
574 goto unexpected_bytecode_error;
576 case OP_MEMORY_START:
577 case OP_MEMORY_START_PUSH:
578 case OP_MEMORY_END_PUSH:
579 case OP_MEMORY_END_PUSH_REC:
581 case OP_MEMORY_END_REC:
582 p += SIZE_MEMNUM;
break;
594 table->addr = pbegin;
595 table->num = num - current_mem_num;
596 table->outer_repeat = current_mem;
602 case OP_PUSH_OR_JUMP_EXACT1:
603 case OP_PUSH_IF_PEEK_NEXT:
604 p += SIZE_RELADDR + 1;
605 table->addr = pbegin;
606 table->num = num - current_mem_num;
607 table->outer_repeat = current_mem;
613 GET_MEMNUM_INC(mem, p);
615 if (reg->repeat_range[mem].lower == 0) {
616 table->addr = pbegin;
617 table->num = num - current_mem_num;
618 table->outer_repeat = -1;
623 current_mem_num = num;
626 case OP_REPEAT_INC_NG:
627 GET_MEMNUM_INC(mem, p);
629 long inner_num = num - current_mem_num;
631 if (repeat_range->lower < repeat_range->upper) {
632 table->addr = pbegin;
633 table->num = num - current_mem_num;
634 table->outer_repeat = mem;
638 num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
643 case OP_REPEAT_INC_SG:
644 case OP_REPEAT_INC_NG_SG:
646 goto unexpected_bytecode_error;
647 case OP_NULL_CHECK_START:
648 case OP_NULL_CHECK_END:
649 case OP_NULL_CHECK_END_MEMST:
650 case OP_NULL_CHECK_END_MEMST_PUSH:
651 p += SIZE_MEMNUM;
break;
655 case OP_PUSH_POS_NOT:
657 case OP_PUSH_STOP_BT:
660 case OP_PUSH_LOOK_BEHIND_NOT:
661 case OP_FAIL_LOOK_BEHIND_NOT:
662 case OP_PUSH_ABSENT_POS:
665 goto unexpected_bytecode_error;
669 goto unexpected_bytecode_error;
672 goto unexpected_bytecode_error;
674 case OP_STATE_CHECK_PUSH:
675 case OP_STATE_CHECK_PUSH_OR_JUMP:
677 case OP_STATE_CHECK_ANYCHAR_STAR:
678 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
679 goto unexpected_bytecode_error;
681 case OP_SET_OPTION_PUSH:
693unexpected_bytecode_error:
694 return ONIGERR_UNEXPECTED_BYTECODE;
697 return ONIGERR_UNDEFINED_BYTECODE;
701count_num_cache_opcode(
regex_t* reg,
long* num,
long* table_size)
703 *num = NUM_CACHE_OPCODE_FAIL;
711 long num = 0, table_size = 0;
712 count_num_cache_opcode(reg, &num, &table_size);
713 return num != NUM_CACHE_OPCODE_FAIL;
721 for (i = 0; i < region->num_regs; i++) {
722 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
724#ifdef USE_CAPTURE_HISTORY
725 history_root_free(region);
732 region->num_regs = n;
734 if (n < ONIG_NREGION)
737 if (region->allocated == 0) {
738 region->beg = (OnigPosition* )
xmalloc(n *
sizeof(OnigPosition));
739 if (region->beg == 0)
740 return ONIGERR_MEMORY;
742 region->end = (OnigPosition* )
xmalloc(n *
sizeof(OnigPosition));
743 if (region->end == 0) {
745 return ONIGERR_MEMORY;
748 region->allocated = n;
750 else if (region->allocated < n) {
753 region->allocated = 0;
754 tmp = (OnigPosition* )
xrealloc(region->beg, n *
sizeof(OnigPosition));
758 return ONIGERR_MEMORY;
761 tmp = (OnigPosition* )
xrealloc(region->end, n *
sizeof(OnigPosition));
765 return ONIGERR_MEMORY;
769 region->allocated = n;
776onig_region_resize_clear(
OnigRegion* region,
int n)
780 r = onig_region_resize(region, n);
781 if (r != 0)
return r;
782 onig_region_clear(region);
787onig_region_set(
OnigRegion* region,
int at,
int beg,
int end)
789 if (at < 0)
return ONIGERR_INVALID_ARGUMENT;
791 if (at >= region->allocated) {
792 int r = onig_region_resize(region, at + 1);
796 region->beg[at] = beg;
797 region->end[at] = end;
804 region->num_regs = 0;
805 region->allocated = 0;
806 region->beg = (OnigPosition* )0;
807 region->end = (OnigPosition* )0;
808#ifdef USE_CAPTURE_HISTORY
809 region->history_root = (OnigCaptureTreeNode* )0;
828 if (r->allocated > 0) {
829 if (r->beg)
xfree(r->beg);
830 if (r->end)
xfree(r->end);
833#ifdef USE_CAPTURE_HISTORY
834 history_root_free(r);
836 if (free_self)
xfree(r);
843#define RREGC_SIZE (sizeof(int) * from->num_regs)
846 if (to == from)
return;
848 r = onig_region_resize(to, from->num_regs);
851 for (i = 0; i < from->num_regs; i++) {
852 to->beg[i] = from->beg[i];
853 to->end[i] = from->end[i];
855 to->num_regs = from->num_regs;
857#ifdef USE_CAPTURE_HISTORY
858 history_root_free(to);
860 if (IS_NOT_NULL(from->history_root)) {
861 to->history_root = history_tree_clone(from->history_root);
868#define INVALID_STACK_INDEX -1
872#define STK_ALT 0x0001
873#define STK_LOOK_BEHIND_NOT 0x0002
874#define STK_POS_NOT 0x0003
876#define STK_MEM_START 0x0100
877#define STK_MEM_END 0x8200
878#define STK_REPEAT_INC 0x0300
879#define STK_STATE_CHECK_MARK 0x1000
881#define STK_NULL_CHECK_START 0x3000
882#define STK_NULL_CHECK_END 0x5000
883#define STK_MEM_END_MARK 0x8400
884#define STK_POS 0x0500
885#define STK_STOP_BT 0x0600
886#define STK_REPEAT 0x0700
887#define STK_CALL_FRAME 0x0800
888#define STK_RETURN 0x0900
889#define STK_VOID 0x0a00
890#define STK_ABSENT_POS 0x0b00
891#define STK_ABSENT 0x0c00
894#define STK_MASK_POP_USED 0x00ff
895#define STK_MASK_TO_VOID_TARGET 0x10ff
896#define STK_MASK_MEM_END_OR_MARK 0x8000
898#ifdef USE_CACHE_MATCH_OPT
899#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\
900 (msa).enable_cache_match_opt = 0;\
902 (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\
903 (msa).num_cache_table = 0;\
904 (msa).cache_index_table = (OnigCacheIndex *)0;\
905 (msa).match_cache = (uint8_t *)0;\
907#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\
908 if ((msa).cache_index_table) xfree((msa).cache_index_table);\
909 if ((msa).match_cache) xfree((msa).match_cache);\
912#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa)
913#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa)
916#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
917# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
918 (msa).stack_p = (void* )0;\
919 (msa).options = (arg_option);\
920 (msa).region = (arg_region);\
921 (msa).start = (arg_start);\
922 (msa).gpos = (arg_gpos);\
923 (msa).best_len = ONIG_MISMATCH;\
926 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
929# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
930 (msa).stack_p = (void* )0;\
931 (msa).options = (arg_option);\
932 (msa).region = (arg_region);\
933 (msa).start = (arg_start);\
934 (msa).gpos = (arg_gpos);\
937 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
941#ifdef USE_COMBINATION_EXPLOSION_CHECK
943# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
945# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
946 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
947 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
948 offset = ((offset) * (state_num)) >> 3;\
949 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
950 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
951 (msa).state_check_buff = (void* )xmalloc(size);\
952 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
955 (msa).state_check_buff = (void* )xalloca(size);\
956 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
957 (size_t )(size - (offset))); \
958 (msa).state_check_buff_size = size;\
961 (msa).state_check_buff = (void* )0;\
962 (msa).state_check_buff_size = 0;\
966 (msa).state_check_buff = (void* )0;\
967 (msa).state_check_buff_size = 0;\
971# define MATCH_ARG_FREE(msa) do {\
972 if ((msa).stack_p) xfree((msa).stack_p);\
973 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
974 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
976 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
979# define MATCH_ARG_FREE(msa) do {\
980 if ((msa).stack_p) xfree((msa).stack_p);\
981 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
987#define MAX_PTR_NUM 100
989#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
990 if (ptr_num > MAX_PTR_NUM) {\
991 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
992 heap_addr = alloc_addr;\
994 stk_alloc = (OnigStackType* )(msa->stack_p);\
995 stk_base = stk_alloc;\
997 stk_end = stk_base + msa->stack_n;\
999 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1000 stk_base = stk_alloc;\
1002 stk_end = stk_base + (stack_num);\
1004 } else if (msa->stack_p) {\
1005 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1007 stk_alloc = (OnigStackType* )(msa->stack_p);\
1008 stk_base = stk_alloc;\
1010 stk_end = stk_base + msa->stack_n;\
1013 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1014 + sizeof(OnigStackType) * (stack_num));\
1016 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1017 stk_base = stk_alloc;\
1019 stk_end = stk_base + (stack_num);\
1023#define STACK_SAVE do{\
1024 if (stk_base != stk_alloc) {\
1025 msa->stack_p = stk_base;\
1026 msa->stack_n = stk_end - stk_base; \
1030static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1033onig_get_match_stack_limit_size(
void)
1035 return MatchStackLimitSize;
1039onig_set_match_stack_limit_size(
unsigned int size)
1041 MatchStackLimitSize = size;
1052 stk_base = *arg_stk_base;
1053 stk_end = *arg_stk_end;
1056 n = stk_end - stk_base;
1057 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1061 return ONIGERR_MEMORY;
1067 unsigned int limit_size = MatchStackLimitSize;
1069 if (limit_size != 0 && n > limit_size) {
1070 if ((
unsigned int )(stk_end - stk_base) == limit_size)
1071 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1078 return ONIGERR_MEMORY;
1081 *arg_stk = x + (stk - stk_base);
1083 *arg_stk_end = x + n;
1087#define STACK_ENSURE(n) do {\
1088 if (stk_end - stk < (n)) {\
1089 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1092 if (xmalloc_base) xfree(xmalloc_base);\
1098#define STACK_AT(index) (stk_base + (index))
1099#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1101#define STACK_PUSH_TYPE(stack_type) do {\
1103 stk->type = (stack_type);\
1104 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1108#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1110#ifdef USE_COMBINATION_EXPLOSION_CHECK
1111# define STATE_CHECK_POS(s,snum) \
1112 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1113# define STATE_CHECK_VAL(v,snum) do {\
1114 if (state_check_buff != NULL) {\
1115 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1116 (v) = state_check_buff[x/8] & (1<<(x%8));\
1122# define ELSE_IF_STATE_CHECK_MARK(stk) \
1123 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1124 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1125 state_check_buff[x/8] |= (1<<(x%8)); \
1128# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1130 stk->type = (stack_type);\
1131 stk->u.state.pcode = (pat);\
1132 stk->u.state.pstr = (s);\
1133 stk->u.state.pstr_prev = (sprev);\
1134 stk->u.state.state_check = 0;\
1135 stk->u.state.pkeep = (keep);\
1139# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1140 stk->type = (stack_type);\
1141 stk->u.state.pcode = (pat);\
1142 stk->u.state.state_check = 0;\
1146# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1148 stk->type = STK_ALT;\
1149 stk->u.state.pcode = (pat);\
1150 stk->u.state.pstr = (s);\
1151 stk->u.state.pstr_prev = (sprev);\
1152 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1153 stk->u.state.pkeep = (keep);\
1157# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1158 if (state_check_buff != NULL) {\
1160 stk->type = STK_STATE_CHECK_MARK;\
1161 stk->u.state.pstr = (s);\
1162 stk->u.state.state_check = (snum);\
1169# define ELSE_IF_STATE_CHECK_MARK(stk)
1171# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1173 stk->type = (stack_type);\
1174 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1175 stk->u.state.pcode = (pat);\
1176 stk->u.state.pstr = (s);\
1177 stk->u.state.pstr_prev = (sprev);\
1178 stk->u.state.pkeep = (keep);\
1182# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1183 stk->type = (stack_type);\
1184 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1185 stk->u.state.pcode = (pat);\
1190#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1191#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1192#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1193#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1194#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1195#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1196 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1198#ifdef USE_CACHE_MATCH_OPT
1200#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\
1202 long cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\
1203 if (cache_index >= 0) {\
1204 long key = (num_cache_size) * (long)(pos) + cache_index;\
1205 long index = key >> 3;\
1206 long mask = 1 << (key & 7);\
1207 if ((match_cache)[index] & mask) {\
1210 (match_cache)[index] |= mask;\
1218 long l = 0, r = num_cache_table - 1, m = 0;
1223 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
1227 if (table[m].addr == p)
break;
1228 if (table[m].addr < p) l = m + 1;
1232 if (!(0 <= m && m < num_cache_table && table[m].addr == p)) {
1237 if (item->outer_repeat == -1) {
1241 range = ®->repeat_range[item->outer_repeat];
1243 stkp = &stk[repeat_stk[item->outer_repeat]];
1244 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
1246 if (count < range->lower) {
1247 return range->base_num + range->inner_num * count + item->num;
1250 if (range->upper == 0x7fffffff) {
1251 return range->base_num + range->inner_num * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) + item->num;
1254 return range->base_num + range->inner_num * (range->lower - 1) + (range->inner_num + 1) * (count - range->lower + 1) + item->num;
1258reset_match_cache(
regex_t* reg, UChar* pbegin, UChar* pend,
long pos, uint8_t* match_cache,
OnigCacheIndex *table,
long num_cache_size,
long num_cache_table)
1260 long l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0;
1261 int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG;
1267 if (table[m1].addr == pbegin)
break;
1268 if (table[m1].addr < pbegin) l = m1 + 1;
1272 l = 0, r = num_cache_table - 1;
1275 if (table[m2].addr == pend)
break;
1276 if (table[m2].addr < pend) l = m2 + 1;
1280 if (table[m1].addr < pbegin && m1 + 1 < num_cache_table) m1++;
1281 if (table[m2].addr > pend && m2 - 1 > 0) m2--;
1286 if (item1->outer_repeat < 0) k1 = item1->num;
1287 else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num;
1289 if (item2->outer_repeat < 0) k2 = item2->num;
1292 if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num;
1293 else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num;
1296 base = pos * num_cache_size;
1300 if ((k1 >> 3) == (k2 >> 3)) {
1301 match_cache[k1 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1);
1305 match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1;
1308 if (i < (k2 >> 3)) {
1309 xmemset(&match_cache[i], 0, (k2 >> 3) - i);
1311 match_cache[k2 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1));
1318#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache)
1321#define STACK_PUSH_REPEAT(id, pat) do {\
1323 stk->type = STK_REPEAT;\
1324 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1325 stk->u.repeat.num = (id);\
1326 stk->u.repeat.pcode = (pat);\
1327 stk->u.repeat.count = 0;\
1331#define STACK_PUSH_REPEAT_INC(sindex) do {\
1333 stk->type = STK_REPEAT_INC;\
1334 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1335 stk->u.repeat_inc.si = (sindex);\
1339#define STACK_PUSH_MEM_START(mnum, s) do {\
1341 stk->type = STK_MEM_START;\
1342 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1343 stk->u.mem.num = (mnum);\
1344 stk->u.mem.pstr = (s);\
1345 stk->u.mem.start = mem_start_stk[mnum];\
1346 stk->u.mem.end = mem_end_stk[mnum];\
1347 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1348 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1352#define STACK_PUSH_MEM_END(mnum, s) do {\
1354 stk->type = STK_MEM_END;\
1355 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356 stk->u.mem.num = (mnum);\
1357 stk->u.mem.pstr = (s);\
1358 stk->u.mem.start = mem_start_stk[mnum];\
1359 stk->u.mem.end = mem_end_stk[mnum];\
1360 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1364#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1366 stk->type = STK_MEM_END_MARK;\
1367 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1368 stk->u.mem.num = (mnum);\
1372#define STACK_GET_MEM_START(mnum, k) do {\
1375 while (k > stk_base) {\
1377 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1378 && k->u.mem.num == (mnum)) {\
1381 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1382 if (level == 0) break;\
1388#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1391 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1392 if (level == 0) (start) = k->u.mem.pstr;\
1395 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1398 (end) = k->u.mem.pstr;\
1406#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1408 stk->type = STK_NULL_CHECK_START;\
1409 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1410 stk->u.null_check.num = (cnum);\
1411 stk->u.null_check.pstr = (s);\
1415#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1417 stk->type = STK_NULL_CHECK_END;\
1418 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1419 stk->u.null_check.num = (cnum);\
1423#define STACK_PUSH_CALL_FRAME(pat) do {\
1425 stk->type = STK_CALL_FRAME;\
1426 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1427 stk->u.call_frame.ret_addr = (pat);\
1431#define STACK_PUSH_RETURN do {\
1433 stk->type = STK_RETURN;\
1434 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1438#define STACK_PUSH_ABSENT_POS(start, end) do {\
1440 stk->type = STK_ABSENT_POS;\
1441 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1442 stk->u.absent_pos.abs_pstr = (start);\
1443 stk->u.absent_pos.end_pstr = (end);\
1449# define STACK_BASE_CHECK(p, at) \
1450 if ((p) < stk_base) {\
1451 fprintf(stderr, "at %s\n", at);\
1455# define STACK_BASE_CHECK(p, at)
1458#define STACK_POP_ONE do {\
1460 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1463#define STACK_POP do {\
1464 switch (pop_level) {\
1465 case STACK_POP_LEVEL_FREE:\
1468 STACK_BASE_CHECK(stk, "STACK_POP"); \
1469 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1470 ELSE_IF_STATE_CHECK_MARK(stk);\
1473 case STACK_POP_LEVEL_MEM_START:\
1476 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1477 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1478 else if (stk->type == STK_MEM_START) {\
1479 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1480 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1482 ELSE_IF_STATE_CHECK_MARK(stk);\
1488 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1489 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1490 else if (stk->type == STK_MEM_START) {\
1491 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1492 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1494 else if (stk->type == STK_REPEAT_INC) {\
1495 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1497 else if (stk->type == STK_MEM_END) {\
1498 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1499 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1501 ELSE_IF_STATE_CHECK_MARK(stk);\
1507#define STACK_POP_TIL_POS_NOT do {\
1510 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1511 if (stk->type == STK_POS_NOT) break;\
1512 else if (stk->type == STK_MEM_START) {\
1513 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1514 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1516 else if (stk->type == STK_REPEAT_INC) {\
1517 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1519 else if (stk->type == STK_MEM_END) {\
1520 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1521 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1523 ELSE_IF_STATE_CHECK_MARK(stk);\
1527#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1530 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1531 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1532 else if (stk->type == STK_MEM_START) {\
1533 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1534 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1536 else if (stk->type == STK_REPEAT_INC) {\
1537 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1539 else if (stk->type == STK_MEM_END) {\
1540 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1541 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1543 ELSE_IF_STATE_CHECK_MARK(stk);\
1547#define STACK_POP_TIL_ABSENT do {\
1550 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1551 if (stk->type == STK_ABSENT) break;\
1552 else if (stk->type == STK_MEM_START) {\
1553 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1554 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1556 else if (stk->type == STK_REPEAT_INC) {\
1557 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1559 else if (stk->type == STK_MEM_END) {\
1560 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1561 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1563 ELSE_IF_STATE_CHECK_MARK(stk);\
1567#define STACK_POP_ABSENT_POS(start, end) do {\
1569 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1570 (start) = stk->u.absent_pos.abs_pstr;\
1571 (end) = stk->u.absent_pos.end_pstr;\
1574#define STACK_POS_END(k) do {\
1578 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1579 if (IS_TO_VOID_TARGET(k)) {\
1580 k->type = STK_VOID;\
1582 else if (k->type == STK_POS) {\
1583 k->type = STK_VOID;\
1589#define STACK_STOP_BT_END do {\
1590 OnigStackType *k = stk;\
1593 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1594 if (IS_TO_VOID_TARGET(k)) {\
1595 k->type = STK_VOID;\
1597 else if (k->type == STK_STOP_BT) {\
1598 k->type = STK_VOID;\
1604#define STACK_NULL_CHECK(isnull,id,s) do {\
1605 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1608 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1609 if (k->type == STK_NULL_CHECK_START) {\
1610 if (k->u.null_check.num == (id)) {\
1611 (isnull) = (k->u.null_check.pstr == (s));\
1618#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1620 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1623 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1624 if (k->type == STK_NULL_CHECK_START) {\
1625 if (k->u.null_check.num == (id)) {\
1627 (isnull) = (k->u.null_check.pstr == (s));\
1633 else if (k->type == STK_NULL_CHECK_END) {\
1639#define STACK_NULL_CHECK_MEMST(isnull,ischange,id,s,reg) do {\
1640 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1643 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1644 if (k->type == STK_NULL_CHECK_START) {\
1645 if (k->u.null_check.num == (id)) {\
1646 if (k->u.null_check.pstr != (s)) {\
1654 if (k->type == STK_MEM_START) {\
1655 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1656 (isnull) = 0; (ischange) = 1; break;\
1658 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1659 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1661 endp = (UChar* )k->u.mem.end;\
1662 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1663 (isnull) = 0; (ischange) = 1; break;\
1665 else if (endp != s) {\
1678#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1680 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1683 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1684 if (k->type == STK_NULL_CHECK_START) {\
1685 if (k->u.null_check.num == (id)) {\
1687 if (k->u.null_check.pstr != (s)) {\
1695 if (k->type == STK_MEM_START) {\
1696 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1697 (isnull) = 0; break;\
1699 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1700 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1702 endp = (UChar* )k->u.mem.end;\
1703 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1704 (isnull) = 0; break;\
1706 else if (endp != s) {\
1720 else if (k->type == STK_NULL_CHECK_END) {\
1721 if (k->u.null_check.num == (id)) level++;\
1726#define STACK_GET_REPEAT(id, k) do {\
1731 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1732 if (k->type == STK_REPEAT) {\
1734 if (k->u.repeat.num == (id)) {\
1739 else if (k->type == STK_CALL_FRAME) level--;\
1740 else if (k->type == STK_RETURN) level++;\
1744#define STACK_RETURN(addr) do {\
1746 OnigStackType* k = stk;\
1749 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1750 if (k->type == STK_CALL_FRAME) {\
1752 (addr) = k->u.call_frame.ret_addr;\
1757 else if (k->type == STK_RETURN)\
1763#define STRING_CMP(s1,s2,len) do {\
1764 while (len-- > 0) {\
1765 if (*s1++ != *s2++) goto fail;\
1769#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1770 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1774static int string_cmp_ic(
OnigEncoding enc,
int case_fold_flag,
1775 UChar* s1, UChar** ps2, OnigDistance mblen,
const UChar* text_end)
1777 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1778 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1779 UChar *p1, *p2, *end1, *s2;
1785 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1786 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1787 if (len1 != len2)
return 0;
1790 while (len1-- > 0) {
1791 if (*p1 != *p2)
return 0;
1801#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1803 while (len-- > 0) {\
1804 if (*s1++ != *s2++) {\
1805 is_fail = 1; break;\
1810#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1811 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1818#define IS_EMPTY_STR (str == end)
1819#define ON_STR_BEGIN(s) ((s) == str)
1820#define ON_STR_END(s) ((s) == end)
1821#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1822# define DATA_ENSURE_CHECK1 (s < right_range)
1823# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1824# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1825# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1826# define ABSENT_END_POS right_range
1828# define DATA_ENSURE_CHECK1 (s < end)
1829# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1830# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1831# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1832# define ABSENT_END_POS end
1836#ifdef USE_CAPTURE_HISTORY
1838make_capture_history_tree(OnigCaptureTreeNode* node,
OnigStackType** kp,
1842 OnigCaptureTreeNode* child;
1845 while (k < stk_top) {
1846 if (k->type == STK_MEM_START) {
1848 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1849 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1850 child = history_node_new();
1851 CHECK_NULL_RETURN_MEMERR(child);
1853 child->beg = k->u.mem.pstr - str;
1854 r = history_tree_add_child(node, child);
1856 history_tree_free(child);
1860 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1861 if (r != 0)
return r;
1864 child->end = k->u.mem.pstr - str;
1867 else if (k->type == STK_MEM_END) {
1868 if (k->u.mem.num == node->group) {
1869 node->end = k->u.mem.pstr - str;
1881#ifdef USE_BACKREF_WITH_LEVEL
1883mem_is_in_memp(
int mem,
int num, UChar* memp)
1888 for (i = 0; i < num; i++) {
1889 GET_MEMNUM_INC(m, memp);
1890 if (mem == (
int )m)
return 1;
1895static int backref_match_at_nested_level(
regex_t* reg,
1897 int ignore_case,
int case_fold_flag,
1898 int nest,
int mem_num, UChar* memp, UChar** s,
const UChar* send)
1900 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1907 while (k >= stk_base) {
1908 if (k->type == STK_CALL_FRAME) {
1911 else if (k->type == STK_RETURN) {
1914 else if (level == nest) {
1915 if (k->type == STK_MEM_START) {
1916 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1917 pstart = k->u.mem.pstr;
1918 if (pend != NULL_UCHARP) {
1919 if (pend - pstart > send - *s)
return 0;
1923 if (ignore_case != 0) {
1924 if (string_cmp_ic(reg->enc, case_fold_flag,
1925 pstart, &ss, pend - pstart, send) == 0)
1930 if (*p++ != *ss++)
return 0;
1939 else if (k->type == STK_MEM_END) {
1940 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1941 pend = k->u.mem.pstr;
1953#ifdef ONIG_DEBUG_STATISTICS
1956# include <windows.h>
1957static LARGE_INTEGER ts, te, freq;
1958# define GETTIME(t) QueryPerformanceCounter(&(t))
1959# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1960 * 1000000 / freq.QuadPart)
1963# define USE_TIMEOFDAY
1965# ifdef USE_TIMEOFDAY
1966# ifdef HAVE_SYS_TIME_H
1967# include <sys/time.h>
1969# ifdef HAVE_UNISTD_H
1973# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1974# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1975 (((te).tv_sec - (ts).tv_sec)*1000000))
1977# ifdef HAVE_SYS_TIMES_H
1978# include <sys/times.h>
1980static struct tms ts, te;
1981# define GETTIME(t) times(&(t))
1982# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1987static int OpCounter[256];
1988static int OpPrevCounter[256];
1989static unsigned long OpTime[256];
1990static int OpCurr = OP_FINISH;
1991static int OpPrevTarget = OP_FAIL;
1992static int MaxStackDepth = 0;
1994# define MOP_IN(opcode) do {\
1995 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1997 OpCounter[opcode]++;\
2001# define MOP_OUT do {\
2003 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2007onig_statistics_init(
void)
2010 for (i = 0; i < 256; i++) {
2011 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2015 QueryPerformanceFrequency(&freq);
2020onig_print_statistics(
FILE* f)
2023 fprintf(f,
" count prev time\n");
2024 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2025 fprintf(f,
"%8d: %8d: %10lu: %s\n",
2026 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2028 fprintf(f,
"\nmax stack depth: %d\n", MaxStackDepth);
2031# define STACK_INC do {\
2033 if (stk - stk_base > MaxStackDepth) \
2034 MaxStackDepth = stk - stk_base;\
2038# define STACK_INC stk++
2040# define MOP_IN(opcode)
2045#ifdef ONIG_DEBUG_MATCH
2047stack_type_str(
int stack_type)
2049 switch (stack_type) {
2050 case STK_ALT:
return "Alt ";
2051 case STK_LOOK_BEHIND_NOT:
return "LBNot ";
2052 case STK_POS_NOT:
return "PosNot";
2053 case STK_MEM_START:
return "MemS ";
2054 case STK_MEM_END:
return "MemE ";
2055 case STK_REPEAT_INC:
return "RepInc";
2056 case STK_STATE_CHECK_MARK:
return "StChMk";
2057 case STK_NULL_CHECK_START:
return "NulChS";
2058 case STK_NULL_CHECK_END:
return "NulChE";
2059 case STK_MEM_END_MARK:
return "MemEMk";
2060 case STK_POS:
return "Pos ";
2061 case STK_STOP_BT:
return "StopBt";
2062 case STK_REPEAT:
return "Rep ";
2063 case STK_CALL_FRAME:
return "Call ";
2064 case STK_RETURN:
return "Ret ";
2065 case STK_VOID:
return "Void ";
2066 case STK_ABSENT_POS:
return "AbsPos";
2067 case STK_ABSENT:
return "Absent";
2068 default:
return " ";
2076match_at(
regex_t* reg,
const UChar* str,
const UChar* end,
2077#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2078 const UChar* right_range,
2082 static const UChar FinishCode[] = { OP_FINISH };
2084 int i, num_mem, pop_level;
2085 ptrdiff_t n, best_len;
2086 LengthType tlen, tlen2;
2089 OnigOptionType option = reg->options;
2091 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2092 UChar *s, *q, *sbegin;
2097 char *xmalloc_base = NULL;
2101 OnigStackIndex *repeat_stk;
2102 OnigStackIndex *mem_start_stk, *mem_end_stk;
2103#ifdef USE_COMBINATION_EXPLOSION_CHECK
2105 unsigned char* state_check_buff = msa->state_check_buff;
2106 int num_comb_exp_check = reg->num_comb_exp_check;
2109#if USE_TOKEN_THREADED_VM
2111# define VM_LOOP JUMP;
2113# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2114# define DEFAULT L_DEFAULT:
2115# define NEXT sprev = sbegin; JUMP
2116# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2142 &&L_OP_CCLASS_MB_NOT,
2143 &&L_OP_CCLASS_MIX_NOT,
2147 &&L_OP_ANYCHAR_STAR,
2148 &&L_OP_ANYCHAR_ML_STAR,
2149 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2150 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2155 &&L_OP_NOT_WORD_BOUND,
2156# ifdef USE_WORD_BEGIN_END
2164 &&L_OP_NOT_ASCII_WORD,
2165 &&L_OP_ASCII_WORD_BOUND,
2166 &&L_OP_NOT_ASCII_WORD_BOUND,
2167# ifdef USE_WORD_BEGIN_END
2168 &&L_OP_ASCII_WORD_BEGIN,
2169 &&L_OP_ASCII_WORD_END,
2179 &&L_OP_SEMI_END_BUF,
2180 &&L_OP_BEGIN_POSITION,
2186 &&L_OP_BACKREF_MULTI,
2187 &&L_OP_BACKREF_MULTI_IC,
2188# ifdef USE_BACKREF_WITH_LEVEL
2189 &&L_OP_BACKREF_WITH_LEVEL,
2193 &&L_OP_MEMORY_START,
2194 &&L_OP_MEMORY_START_PUSH,
2195 &&L_OP_MEMORY_END_PUSH,
2196# ifdef USE_SUBEXP_CALL
2197 &&L_OP_MEMORY_END_PUSH_REC,
2202# ifdef USE_SUBEXP_CALL
2203 &&L_OP_MEMORY_END_REC,
2214# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2215 &&L_OP_PUSH_OR_JUMP_EXACT1,
2219 &&L_OP_PUSH_IF_PEEK_NEXT,
2223 &&L_OP_REPEAT_INC_NG,
2224 &&L_OP_REPEAT_INC_SG,
2225 &&L_OP_REPEAT_INC_NG_SG,
2226 &&L_OP_NULL_CHECK_START,
2227 &&L_OP_NULL_CHECK_END,
2228# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2229 &&L_OP_NULL_CHECK_END_MEMST,
2233# ifdef USE_SUBEXP_CALL
2234 &&L_OP_NULL_CHECK_END_MEMST_PUSH,
2241 &&L_OP_PUSH_POS_NOT,
2243 &&L_OP_PUSH_STOP_BT,
2246 &&L_OP_PUSH_LOOK_BEHIND_NOT,
2247 &&L_OP_FAIL_LOOK_BEHIND_NOT,
2248 &&L_OP_PUSH_ABSENT_POS,
2252# ifdef USE_SUBEXP_CALL
2261# ifdef USE_COMBINATION_EXPLOSION_CHECK
2262 &&L_OP_STATE_CHECK_PUSH,
2263 &&L_OP_STATE_CHECK_PUSH_OR_JUMP,
2270# ifdef USE_COMBINATION_EXPLOSION_CHECK
2271 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2272 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2279 &&L_OP_SET_OPTION_PUSH,
2295# define VM_LOOP_END } sprev = sbegin; }
2296# define CASE(x) case x:
2297# define DEFAULT default:
2299# define JUMP continue; break
2303#ifdef USE_SUBEXP_CALL
2306# define ADD_NUMMEM 1
2309# define ADD_NUMMEM 0
2312 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2314 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2315 pop_level = reg->stack_pop_level;
2316 num_mem = reg->num_mem;
2317 repeat_stk = (OnigStackIndex* )alloca_base;
2319 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2320 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2322 OnigStackIndex *pp = mem_start_stk;
2323 for (; pp < repeat_stk + n; pp += 2) {
2324 pp[0] = INVALID_STACK_INDEX;
2325 pp[1] = INVALID_STACK_INDEX;
2328#ifndef USE_SUBEXP_CALL
2335#ifdef ONIG_DEBUG_MATCH
2336 fprintf(stderr,
"match_at: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), start: %"PRIuPTR
" (%p), sprev: %"PRIuPTR
" (%p)\n",
2337 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2338 fprintf(stderr,
"size: %d, start offset: %d\n",
2339 (
int )(end - str), (
int )(sstart - str));
2340 fprintf(stderr,
"\n ofs> str stk:type addr:opcode\n");
2343 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode);
2344 best_len = ONIG_MISMATCH;
2345 s = (UChar* )sstart;
2346 pkeep = (UChar* )sstart;
2349#ifdef ONIG_DEBUG_MATCH
2350# define OPCODE_EXEC_HOOK \
2352 UChar *op, *q, *bp, buf[50]; \
2354 op = p - OP_OFFSET; \
2355 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2358 if (*op != OP_FINISH) { \
2359 for (i = 0; i < 7 && q < end; i++) { \
2360 len = enclen(encode, q, end); \
2361 while (len-- > 0) *bp++ = *q++; \
2363 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2365 xmemcpy(bp, "\"", 1); bp += 1; \
2367 fputs((char* )buf, stderr); \
2368 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2369 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2370 stk - stk_base - 1, \
2371 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2372 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2373 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2374 fprintf(stderr, "\n"); \
2377# define OPCODE_EXEC_HOOK ((void) 0)
2382 CASE(OP_END) MOP_IN(OP_END);
2386#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2387 if (IS_FIND_LONGEST(option)) {
2388 if (n > msa->best_len) {
2390 msa->best_s = (UChar* )sstart;
2397 region = msa->region;
2399 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2400 region->end[0] = s - str;
2401 for (i = 1; i <= num_mem; i++) {
2402 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2403 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2404 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2406 region->beg[i] = (UChar* )((
void* )mem_start_stk[i]) - str;
2408 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2409 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2410 : (UChar* )((
void* )mem_end_stk[i])) - str;
2413 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2417#ifdef USE_CAPTURE_HISTORY
2418 if (reg->capture_history != 0) {
2420 OnigCaptureTreeNode* node;
2422 if (IS_NULL(region->history_root)) {
2423 region->history_root = node = history_node_new();
2424 CHECK_NULL_RETURN_MEMERR(node);
2427 node = region->history_root;
2428 history_tree_clear(node);
2432 node->beg = ((pkeep > s) ? s : pkeep) - str;
2433 node->end = s - str;
2436 r = make_capture_history_tree(region->history_root, &stkp,
2437 stk, (UChar* )str, reg);
2447#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2452 if (IS_FIND_CONDITION(option)) {
2453 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2454 best_len = ONIG_MISMATCH;
2457 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2466 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2468 if (*p != *s)
goto fail;
2473 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2476 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2479 len = ONIGENC_MBC_CASE_FOLD(encode,
2495 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2497 if (*p != *s)
goto fail;
2499 if (*p != *s)
goto fail;
2505 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2507 if (*p != *s)
goto fail;
2509 if (*p != *s)
goto fail;
2511 if (*p != *s)
goto fail;
2517 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2519 if (*p != *s)
goto fail;
2521 if (*p != *s)
goto fail;
2523 if (*p != *s)
goto fail;
2525 if (*p != *s)
goto fail;
2531 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2533 if (*p != *s)
goto fail;
2535 if (*p != *s)
goto fail;
2537 if (*p != *s)
goto fail;
2539 if (*p != *s)
goto fail;
2541 if (*p != *s)
goto fail;
2547 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2548 GET_LENGTH_INC(tlen, p);
2550 while (tlen-- > 0) {
2551 if (*p++ != *s++)
goto fail;
2557 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2560 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2562 GET_LENGTH_INC(tlen, p);
2568 len = ONIGENC_MBC_CASE_FOLD(encode,
2575 if (*p != *q)
goto fail;
2584 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2586 if (*p != *s)
goto fail;
2588 if (*p != *s)
goto fail;
2593 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2595 if (*p != *s)
goto fail;
2597 if (*p != *s)
goto fail;
2600 if (*p != *s)
goto fail;
2602 if (*p != *s)
goto fail;
2607 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2609 if (*p != *s)
goto fail;
2611 if (*p != *s)
goto fail;
2613 if (*p != *s)
goto fail;
2615 if (*p != *s)
goto fail;
2618 if (*p != *s)
goto fail;
2620 if (*p != *s)
goto fail;
2625 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2626 GET_LENGTH_INC(tlen, p);
2627 DATA_ENSURE(tlen * 2);
2628 while (tlen-- > 0) {
2629 if (*p != *s)
goto fail;
2631 if (*p != *s)
goto fail;
2638 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2639 GET_LENGTH_INC(tlen, p);
2640 DATA_ENSURE(tlen * 3);
2641 while (tlen-- > 0) {
2642 if (*p != *s)
goto fail;
2644 if (*p != *s)
goto fail;
2646 if (*p != *s)
goto fail;
2653 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2654 GET_LENGTH_INC(tlen, p);
2655 GET_LENGTH_INC(tlen2, p);
2658 while (tlen2-- > 0) {
2659 if (*p != *s)
goto fail;
2666 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2668 if (BITSET_AT(((BitSetRef )p), *s) == 0)
goto fail;
2670 s += enclen(encode, s, end);
2674 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2675 if (! ONIGENC_IS_MBC_HEAD(encode, s, end))
goto fail;
2678 GET_LENGTH_INC(tlen, p);
2685 mb_len = enclen(encode, s, end);
2686 DATA_ENSURE(mb_len);
2689 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2691#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2692 if (! onig_is_in_code_range(p, code))
goto fail;
2696 if (! onig_is_in_code_range(q, code))
goto fail;
2703 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2705 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2710 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2714 GET_LENGTH_INC(tlen, p);
2721 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2723 if (BITSET_AT(((BitSetRef )p), *s) != 0)
goto fail;
2725 s += enclen(encode, s, end);
2729 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2731 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2733 GET_LENGTH_INC(tlen, p);
2735 goto cc_mb_not_success;
2739 GET_LENGTH_INC(tlen, p);
2743 int mb_len = enclen(encode, s, end);
2745 if (! DATA_ENSURE_CHECK(mb_len)) {
2749 goto cc_mb_not_success;
2754 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2756#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2757 if (onig_is_in_code_range(p, code))
goto fail;
2761 if (onig_is_in_code_range(q, code))
goto fail;
2770 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2772 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2777 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2781 GET_LENGTH_INC(tlen, p);
2788 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2790 n = enclen(encode, s, end);
2792 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2797 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2799 n = enclen(encode, s, end);
2805 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2806 while (DATA_ENSURE_CHECK1) {
2807 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2808 STACK_PUSH_ALT(p, s, sprev, pkeep);
2809 n = enclen(encode, s, end);
2811 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2818 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2819 while (DATA_ENSURE_CHECK1) {
2820 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2821 STACK_PUSH_ALT(p, s, sprev, pkeep);
2822 n = enclen(encode, s, end);
2836 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2837 while (DATA_ENSURE_CHECK1) {
2838 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
2840 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2844#ifdef USE_CACHE_MATCH_OPT
2848 n = enclen(encode, s, end);
2850 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2858 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2859 while (DATA_ENSURE_CHECK1) {
2860 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2862 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2866#ifdef USE_CACHE_MATCH_OPT
2870 n = enclen(encode, s, end);
2885#ifdef USE_COMBINATION_EXPLOSION_CHECK
2886 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2887 GET_STATE_CHECK_NUM_INC(mem, p);
2888 while (DATA_ENSURE_CHECK1) {
2889 STATE_CHECK_VAL(scv, mem);
2892 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2893 n = enclen(encode, s, end);
2895 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))
goto fail;
2902 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2903 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2905 GET_STATE_CHECK_NUM_INC(mem, p);
2906 while (DATA_ENSURE_CHECK1) {
2907 STATE_CHECK_VAL(scv, mem);
2910 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2911 n = enclen(encode, s, end);
2926 CASE(OP_WORD) MOP_IN(OP_WORD);
2928 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2931 s += enclen(encode, s, end);
2935 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2937 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2940 s += enclen(encode, s, end);
2944 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2946 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2949 s += enclen(encode, s, end);
2953 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2955 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2958 s += enclen(encode, s, end);
2962 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2963 if (ON_STR_BEGIN(s)) {
2965 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2968 else if (ON_STR_END(s)) {
2969 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2973 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2974 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2980 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2981 if (ON_STR_BEGIN(s)) {
2983 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2986 else if (ON_STR_END(s)) {
2987 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2991 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2992 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2998 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2999 if (ON_STR_BEGIN(s)) {
3000 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3003 else if (ON_STR_END(s)) {
3004 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3008 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3009 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3015 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3016 if (ON_STR_BEGIN(s)) {
3017 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3020 else if (ON_STR_END(s)) {
3021 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3025 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3026 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3032#ifdef USE_WORD_BEGIN_END
3033 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3034 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3035 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3043 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3044 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3045 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3053 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3054 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3055 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3063 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3064 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3065 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3074 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3075 if (! ON_STR_BEGIN(s))
goto fail;
3076 if (IS_NOTBOS(msa->options))
goto fail;
3081 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3082 if (! ON_STR_END(s))
goto fail;
3083 if (IS_NOTEOS(msa->options))
goto fail;
3088 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3089 if (ON_STR_BEGIN(s)) {
3090 if (IS_NOTBOL(msa->options))
goto fail;
3094 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3095#ifdef USE_CRNL_AS_LINE_TERMINATOR
3096 && !(IS_NEWLINE_CRLF(option)
3097 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3099 && !ON_STR_END(s)) {
3106 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3107 if (ON_STR_END(s)) {
3108#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3109 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3111 if (IS_NOTEOL(msa->options))
goto fail;
3114#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3118 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3125 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3126 if (ON_STR_END(s)) {
3127#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3128 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3130 if (IS_NOTEOL(msa->options))
goto fail;
3133#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3137 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3138 UChar* ss = s + enclen(encode, s, end);
3139 if (ON_STR_END(ss)) {
3143#ifdef USE_CRNL_AS_LINE_TERMINATOR
3144 else if (IS_NEWLINE_CRLF(option)
3145 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3146 ss += enclen(encode, ss, end);
3147 if (ON_STR_END(ss)) {
3157 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3164 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3165 GET_MEMNUM_INC(mem, p);
3166 STACK_PUSH_MEM_START(mem, s);
3170 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3171 GET_MEMNUM_INC(mem, p);
3172 mem_start_stk[mem] = (OnigStackIndex )((
void* )s);
3173 mem_end_stk[mem] = INVALID_STACK_INDEX;
3177 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3178 GET_MEMNUM_INC(mem, p);
3179 STACK_PUSH_MEM_END(mem, s);
3183 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3184 GET_MEMNUM_INC(mem, p);
3185 mem_end_stk[mem] = (OnigStackIndex )((
void* )s);
3189 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3194#ifdef USE_SUBEXP_CALL
3195 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3196 GET_MEMNUM_INC(mem, p);
3197 STACK_GET_MEM_START(mem, stkp);
3198 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3199 STACK_PUSH_MEM_END(mem, s);
3203 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3204 GET_MEMNUM_INC(mem, p);
3205 mem_end_stk[mem] = (OnigStackIndex )((
void* )s);
3206 STACK_GET_MEM_START(mem, stkp);
3208 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3209 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3211 mem_start_stk[mem] = (OnigStackIndex )((
void* )stkp->u.mem.pstr);
3213 STACK_PUSH_MEM_END_MARK(mem);
3218 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3223 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3228 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3229 GET_MEMNUM_INC(mem, p);
3233 UChar *pstart, *pend;
3237 if (mem > num_mem)
goto fail;
3238 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3239 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3241 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3242 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3244 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3246 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3247 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3248 : (UChar* )((
void* )mem_end_stk[mem]));
3252 STRING_CMP(pstart, s, n);
3253 while (sprev + (len = enclen(encode, sprev, end)) < s)
3260 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3261 GET_MEMNUM_INC(mem, p);
3264 UChar *pstart, *pend;
3268 if (mem > num_mem)
goto fail;
3269 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3270 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
goto fail;
3272 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3273 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3275 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3277 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3278 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3279 : (UChar* )((
void* )mem_end_stk[mem]));
3283 STRING_CMP_IC(case_fold_flag, pstart, &s, (
int)n, end);
3284 while (sprev + (len = enclen(encode, sprev, end)) < s)
3292 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3295 UChar *pstart, *pend, *swork;
3297 GET_LENGTH_INC(tlen, p);
3298 for (i = 0; i < tlen; i++) {
3299 GET_MEMNUM_INC(mem, p);
3301 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
continue;
3302 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
continue;
3304 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3305 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3307 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3309 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3310 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3311 : (UChar* )((
void* )mem_end_stk[mem]));
3313 DATA_ENSURE_CONTINUE(n);
3316 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3317 if (is_fail)
continue;
3319 while (sprev + (len = enclen(encode, sprev, end)) < s)
3322 p += (SIZE_MEMNUM * (tlen - i - 1));
3325 if (i == tlen)
goto fail;
3331 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3334 UChar *pstart, *pend, *swork;
3336 GET_LENGTH_INC(tlen, p);
3337 for (i = 0; i < tlen; i++) {
3338 GET_MEMNUM_INC(mem, p);
3340 if (mem_end_stk[mem] == INVALID_STACK_INDEX)
continue;
3341 if (mem_start_stk[mem] == INVALID_STACK_INDEX)
continue;
3343 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3344 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3346 pstart = (UChar* )((
void* )mem_start_stk[mem]);
3348 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3349 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3350 : (UChar* )((
void* )mem_end_stk[mem]));
3352 DATA_ENSURE_CONTINUE(n);
3355 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3356 if (is_fail)
continue;
3358 while (sprev + (len = enclen(encode, sprev, end)) < s)
3361 p += (SIZE_MEMNUM * (tlen - i - 1));
3364 if (i == tlen)
goto fail;
3369#ifdef USE_BACKREF_WITH_LEVEL
3370 CASE(OP_BACKREF_WITH_LEVEL)
3376 GET_OPTION_INC(ic, p);
3377 GET_LENGTH_INC(level, p);
3378 GET_LENGTH_INC(tlen, p);
3381 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3382 case_fold_flag, (
int )level, (
int )tlen, p, &s, end)) {
3383 while (sprev + (len = enclen(encode, sprev, end)) < s)
3386 p += (SIZE_MEMNUM * tlen);
3398 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3399 GET_OPTION_INC(option, p);
3400 STACK_PUSH_ALT(p, s, sprev, pkeep);
3401 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3405 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3406 GET_OPTION_INC(option, p);
3411 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3412 GET_MEMNUM_INC(mem, p);
3413 STACK_PUSH_NULL_CHECK_START(mem, s);
3417 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3421 GET_MEMNUM_INC(mem, p);
3422 STACK_NULL_CHECK(isnull, mem, s);
3424#ifdef ONIG_DEBUG_MATCH
3425 fprintf(stderr,
"NULL_CHECK_END: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3426 (
int )mem, (uintptr_t )s, s);
3436 case OP_REPEAT_INC_NG:
3437 case OP_REPEAT_INC_SG:
3438 case OP_REPEAT_INC_NG_SG:
3442 goto unexpected_bytecode_error;
3450#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3451 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3456 GET_MEMNUM_INC(mem, p);
3457 STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg);
3459# ifdef ONIG_DEBUG_MATCH
3460 fprintf(stderr,
"NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3461 (
int )mem, (uintptr_t )s, s);
3463 if (isnull == -1)
goto fail;
3464 goto null_check_found;
3466# ifdef USE_CACHE_MATCH_OPT
3467 if (ischanged && msa->enable_cache_match_opt) {
3475 GET_RELADDR_INC(rel, tmp);
3479 case OP_REPEAT_INC_NG:
3480 GET_MEMNUM_INC(mem, tmp);
3481 addr = STACK_AT(repeat_stk[mem])->u.repeat.pcode;
3484 goto unexpected_bytecode_error;
3486 reset_match_cache(reg, addr, pbegin, (
long)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_opcode, msa->num_cache_table);
3494#ifdef USE_SUBEXP_CALL
3495 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3496 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3500 GET_MEMNUM_INC(mem, p);
3501# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3502 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3504 STACK_NULL_CHECK_REC(isnull, mem, s);
3507# ifdef ONIG_DEBUG_MATCH
3508 fprintf(stderr,
"NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR
" (%p)\n",
3509 (
int )mem, (uintptr_t )s, s);
3511 if (isnull == -1)
goto fail;
3512 goto null_check_found;
3515 STACK_PUSH_NULL_CHECK_END(mem);
3522 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3523 GET_RELADDR_INC(addr, p);
3526 CHECK_INTERRUPT_IN_MATCH_AT;
3529 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3530 GET_RELADDR_INC(addr, p);
3531 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3532 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3536#ifdef USE_COMBINATION_EXPLOSION_CHECK
3537 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3538 GET_STATE_CHECK_NUM_INC(mem, p);
3539 STATE_CHECK_VAL(scv, mem);
3542 GET_RELADDR_INC(addr, p);
3543 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3547 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3548 GET_STATE_CHECK_NUM_INC(mem, p);
3549 GET_RELADDR_INC(addr, p);
3550 STATE_CHECK_VAL(scv, mem);
3555 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3560 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3561 GET_STATE_CHECK_NUM_INC(mem, p);
3562 STATE_CHECK_VAL(scv, mem);
3565 STACK_PUSH_STATE_CHECK(s, mem);
3570 CASE(OP_POP) MOP_IN(OP_POP);
3574#ifdef USE_CACHE_MATCH_OPT
3580#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3581 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3582 GET_RELADDR_INC(addr, p);
3583 if (*p == *s && DATA_ENSURE_CHECK1) {
3585 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3586 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3595 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3596 GET_RELADDR_INC(addr, p);
3599 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3600 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3608 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3610 GET_MEMNUM_INC(mem, p);
3611 GET_RELADDR_INC(addr, p);
3614 repeat_stk[mem] = GET_STACK_INDEX(stk);
3615 STACK_PUSH_REPEAT(mem, p);
3617 if (reg->repeat_range[mem].lower == 0) {
3618 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
3619 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3625 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3627 GET_MEMNUM_INC(mem, p);
3628 GET_RELADDR_INC(addr, p);
3631 repeat_stk[mem] = GET_STACK_INDEX(stk);
3632 STACK_PUSH_REPEAT(mem, p);
3634 if (reg->repeat_range[mem].lower == 0) {
3635 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3636 STACK_PUSH_ALT(p, s, sprev, pkeep);
3643 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3644 GET_MEMNUM_INC(mem, p);
3645 si = repeat_stk[mem];
3646 stkp = STACK_AT(si);
3649 stkp->u.repeat.count++;
3650 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3653 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3654 if (*pbegin == OP_REPEAT_INC) {
3655 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3657 STACK_PUSH_ALT(p, s, sprev, pkeep);
3658 p = STACK_AT(si)->u.repeat.pcode;
3661 p = stkp->u.repeat.pcode;
3663 STACK_PUSH_REPEAT_INC(si);
3665 CHECK_INTERRUPT_IN_MATCH_AT;
3668 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3669 GET_MEMNUM_INC(mem, p);
3670 STACK_GET_REPEAT(mem, stkp);
3671 si = GET_STACK_INDEX(stkp);
3675 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3676 GET_MEMNUM_INC(mem, p);
3677 si = repeat_stk[mem];
3678 stkp = STACK_AT(si);
3681 stkp->u.repeat.count++;
3682 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3683 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3684 UChar* pcode = stkp->u.repeat.pcode;
3686 STACK_PUSH_REPEAT_INC(si);
3687 if (*pbegin == OP_REPEAT_INC_NG) {
3688 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3690 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3693 p = stkp->u.repeat.pcode;
3694 STACK_PUSH_REPEAT_INC(si);
3697 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3698 STACK_PUSH_REPEAT_INC(si);
3701 CHECK_INTERRUPT_IN_MATCH_AT;
3704 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3705 GET_MEMNUM_INC(mem, p);
3706 STACK_GET_REPEAT(mem, stkp);
3707 si = GET_STACK_INDEX(stkp);
3711 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3712 STACK_PUSH_POS(s, sprev, pkeep);
3716 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3718 STACK_POS_END(stkp);
3719 s = stkp->u.state.pstr;
3720 sprev = stkp->u.state.pstr_prev;
3725 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3726 GET_RELADDR_INC(addr, p);
3727 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3731 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3732 STACK_POP_TIL_POS_NOT;
3736 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3741 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3746 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3747 GET_LENGTH_INC(tlen, p);
3748 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (
int )tlen);
3749 if (IS_NULL(s))
goto fail;
3750 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3754 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3755 GET_RELADDR_INC(addr, p);
3756 GET_LENGTH_INC(tlen, p);
3757 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (
int )tlen);
3765 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3767 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3772 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3773 STACK_POP_TIL_LOOK_BEHIND_NOT;
3777 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3779 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3783 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3785 const UChar* aend = ABSENT_END_POS;
3787 UChar* selfp = p - 1;
3789 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS);
3790 GET_RELADDR_INC(addr, p);
3791#ifdef ONIG_DEBUG_MATCH
3792 fprintf(stderr,
"ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3794 if ((absent > aend) && (s > absent)) {
3800 else if ((s >= aend) && (s > absent)) {
3810 else if (s == end) {
3816 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3817 n = enclen(encode, s, end);
3818 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS);
3819 STACK_PUSH_ALT(selfp, s + n, s, pkeep);
3821 ABSENT_END_POS = aend;
3827 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3830 if (sprev < ABSENT_END_POS)
3831 ABSENT_END_POS = sprev;
3832#ifdef ONIG_DEBUG_MATCH
3833 fprintf(stderr,
"ABSENT_END: end:%p\n", ABSENT_END_POS);
3835 STACK_POP_TIL_ABSENT;
3839#ifdef USE_SUBEXP_CALL
3840 CASE(OP_CALL) MOP_IN(OP_CALL);
3841 GET_ABSADDR_INC(addr, p);
3842 STACK_PUSH_CALL_FRAME(p);
3847 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3854 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3855 GET_MEMNUM_INC(mem, p);
3856 GET_RELADDR_INC(addr, p);
3857 if ((mem > num_mem) ||
3858 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3859 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3877 p = stk->u.state.pcode;
3878 s = stk->u.state.pstr;
3879 sprev = stk->u.state.pstr_prev;
3880 pkeep = stk->u.state.pkeep;
3882#ifdef USE_CACHE_MATCH_OPT
3883 if (++msa->num_fail >= (
long)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3884 msa->enable_cache_match_opt = 1;
3885 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3886 OnigPosition r = count_num_cache_opcode(reg, &msa->num_cache_opcode, &msa->num_cache_table);
3887 if (r < 0)
goto bytecode_error;
3889 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) {
3890 msa->enable_cache_match_opt = 0;
3891 goto fail_match_cache_opt;
3893 if (msa->cache_index_table == NULL) {
3895 if (table == NULL) {
3896 return ONIGERR_MEMORY;
3898 OnigPosition r = init_cache_index_table(reg, table);
3900 if (r == ONIGERR_UNEXPECTED_BYTECODE)
goto unexpected_bytecode_error;
3901 else goto bytecode_error;
3903 msa->cache_index_table = table;
3905 size_t len = (end - str) + 1;
3906 size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len;
3908 if (match_cache_size8 / len != (
size_t)msa->num_cache_opcode) {
3909 return ONIGERR_MEMORY;
3912 if (match_cache_size8 >= LONG_MAX_LIMIT) {
3913 return ONIGERR_MEMORY;
3915 size_t match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0);
3916 msa->match_cache = (uint8_t*)
xmalloc(match_cache_size *
sizeof(uint8_t));
3917 if (msa->match_cache == NULL) {
3918 return ONIGERR_MEMORY;
3920 xmemset(msa->match_cache, 0, match_cache_size *
sizeof(uint8_t));
3922 fail_match_cache_opt:
3925#ifdef USE_COMBINATION_EXPLOSION_CHECK
3926 if (stk->u.state.state_check != 0) {
3927 stk->type = STK_STATE_CHECK_MARK;
3933 CHECK_INTERRUPT_IN_MATCH_AT;
3937 goto bytecode_error;
3942 if (xmalloc_base)
xfree(xmalloc_base);
3948 if (xmalloc_base)
xfree(xmalloc_base);
3949 return ONIGERR_STACK_BUG;
3954 if (xmalloc_base)
xfree(xmalloc_base);
3955 return ONIGERR_UNDEFINED_BYTECODE;
3957 unexpected_bytecode_error:
3959 if (xmalloc_base)
xfree(xmalloc_base);
3960 return ONIGERR_UNEXPECTED_BYTECODE;
3965slow_search(
OnigEncoding enc, UChar* target, UChar* target_end,
3966 const UChar* text,
const UChar* text_end, UChar* text_range)
3968 UChar *t, *p, *s, *end;
3970 end = (UChar* )text_end;
3971 end -= target_end - target - 1;
3972 if (end > text_range)
3977 if (enc->max_enc_len == enc->min_enc_len) {
3978 int n = enc->max_enc_len;
3981 if (*s == *target) {
3984 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3989 return (UChar* )NULL;
3992 if (*s == *target) {
3995 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3998 s += enclen(enc, s, text_end);
4001 return (UChar* )NULL;
4005str_lower_case_match(
OnigEncoding enc,
int case_fold_flag,
4006 const UChar* t,
const UChar* tend,
4007 const UChar* p,
const UChar* end)
4010 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4013 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4015 while (lowlen > 0) {
4016 if (*t++ != *q++)
return 0;
4026 UChar* target, UChar* target_end,
4027 const UChar* text,
const UChar* text_end, UChar* text_range)
4031 end = (UChar* )text_end;
4032 end -= target_end - target - 1;
4033 if (end > text_range)
4039 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4043 s += enclen(enc, s, text_end);
4046 return (UChar* )NULL;
4050slow_search_backward(
OnigEncoding enc, UChar* target, UChar* target_end,
4051 const UChar* text,
const UChar* adjust_text,
4052 const UChar* text_end,
const UChar* text_start)
4056 s = (UChar* )text_end;
4057 s -= (target_end - target);
4059 s = (UChar* )text_start;
4061 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4064 if (*s == *target) {
4067 while (t < target_end) {
4072 if (t == target_end)
4075 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4078 return (UChar* )NULL;
4082slow_search_backward_ic(
OnigEncoding enc,
int case_fold_flag,
4083 UChar* target, UChar* target_end,
4084 const UChar* text,
const UChar* adjust_text,
4085 const UChar* text_end,
const UChar* text_start)
4089 s = (UChar* )text_end;
4090 s -= (target_end - target);
4092 s = (UChar* )text_start;
4094 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4097 if (str_lower_case_match(enc, case_fold_flag,
4098 target, target_end, s, text_end))
4101 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4104 return (UChar* )NULL;
4107#ifndef USE_SUNDAY_QUICK_SEARCH
4110bm_search_notrev(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4111 const UChar* text,
const UChar* text_end,
4112 const UChar* text_range)
4114 const UChar *s, *se, *t, *p, *end;
4116 ptrdiff_t skip, tlen1;
4118# ifdef ONIG_DEBUG_SEARCH
4119 fprintf(stderr,
"bm_search_notrev: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4120 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4123 tail = target_end - 1;
4124 tlen1 = tail - target;
4126 if (end + tlen1 > text_end)
4127 end = text_end - tlen1;
4131 if (IS_NULL(reg->int_map)) {
4136 if (t == target)
return (UChar* )s;
4139 skip = reg->map[*se];
4142 s += enclen(reg->enc, s, end);
4143 }
while ((s - t) < skip && s < end);
4147# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4152 if (t == target)
return (UChar* )s;
4155 skip = reg->int_map[*se];
4158 s += enclen(reg->enc, s, end);
4159 }
while ((s - t) < skip && s < end);
4164 return (UChar* )NULL;
4169bm_search(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4170 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4172 const UChar *s, *t, *p, *end;
4175# ifdef ONIG_DEBUG_SEARCH
4176 fprintf(stderr,
"bm_search: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4177 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4180 end = text_range + (target_end - target) - 1;
4184 tail = target_end - 1;
4185 s = text + (target_end - target) - 1;
4186 if (IS_NULL(reg->int_map)) {
4190# ifdef ONIG_DEBUG_SEARCH
4191 fprintf(stderr,
"bm_search_loop: pos: %"PRIdPTR
" %s\n",
4192 (intptr_t )(s - text), s);
4195 if (t == target)
return (UChar* )p;
4202# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4207 if (t == target)
return (UChar* )p;
4210 s += reg->int_map[*s];
4214 return (UChar* )NULL;
4219bm_search_notrev_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4220 const UChar* text,
const UChar* text_end,
4221 const UChar* text_range)
4223 const UChar *s, *se, *t, *end;
4225 ptrdiff_t skip, tlen1;
4227 int case_fold_flag = reg->case_fold_flag;
4229# ifdef ONIG_DEBUG_SEARCH
4230 fprintf(stderr,
"bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4231 (
int )text, text, (
int )text_end, text_end, (
int )text_range, text_range);
4234 tail = target_end - 1;
4235 tlen1 = tail - target;
4237 if (end + tlen1 > text_end)
4238 end = text_end - tlen1;
4242 if (IS_NULL(reg->int_map)) {
4245 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4248 skip = reg->map[*se];
4251 s += enclen(reg->enc, s, end);
4252 }
while ((s - t) < skip && s < end);
4256# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4259 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4262 skip = reg->int_map[*se];
4265 s += enclen(reg->enc, s, end);
4266 }
while ((s - t) < skip && s < end);
4271 return (UChar* )NULL;
4276bm_search_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4277 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4279 const UChar *s, *p, *end;
4282 int case_fold_flag = reg->case_fold_flag;
4284# ifdef ONIG_DEBUG_SEARCH
4285 fprintf(stderr,
"bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4286 (
int )text, text, (
int )text_end, text_end, (
int )text_range, text_range);
4289 end = text_range + (target_end - target) - 1;
4293 tail = target_end - 1;
4294 s = text + (target_end - target) - 1;
4295 if (IS_NULL(reg->int_map)) {
4297 p = s - (target_end - target) + 1;
4298 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4305# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4307 p = s - (target_end - target) + 1;
4308 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4311 s += reg->int_map[*s];
4315 return (UChar* )NULL;
4322bm_search_notrev(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4323 const UChar* text,
const UChar* text_end,
4324 const UChar* text_range)
4326 const UChar *s, *se, *t, *p, *end;
4328 ptrdiff_t skip, tlen1;
4331# ifdef ONIG_DEBUG_SEARCH
4332 fprintf(stderr,
"bm_search_notrev: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4333 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4336 tail = target_end - 1;
4337 tlen1 = tail - target;
4339 if (end + tlen1 > text_end)
4340 end = text_end - tlen1;
4344 if (IS_NULL(reg->int_map)) {
4349 if (t == target)
return (UChar* )s;
4352 if (s + 1 >= end)
break;
4353 skip = reg->map[se[1]];
4356 s += enclen(enc, s, end);
4357 }
while ((s - t) < skip && s < end);
4361# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4366 if (t == target)
return (UChar* )s;
4369 if (s + 1 >= end)
break;
4370 skip = reg->int_map[se[1]];
4373 s += enclen(enc, s, end);
4374 }
while ((s - t) < skip && s < end);
4379 return (UChar* )NULL;
4384bm_search(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4385 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4387 const UChar *s, *t, *p, *end;
4391# ifdef ONIG_DEBUG_SEARCH
4392 fprintf(stderr,
"bm_search: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4393 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4396 tail = target_end - 1;
4397 tlen1 = tail - target;
4398 end = text_range + tlen1;
4403 if (IS_NULL(reg->int_map)) {
4408 if (t == target)
return (UChar* )p;
4411 if (s + 1 >= end)
break;
4412 s += reg->map[s[1]];
4416# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4421 if (t == target)
return (UChar* )p;
4424 if (s + 1 >= end)
break;
4425 s += reg->int_map[s[1]];
4429 return (UChar* )NULL;
4434bm_search_notrev_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4435 const UChar* text,
const UChar* text_end,
4436 const UChar* text_range)
4438 const UChar *s, *se, *t, *end;
4440 ptrdiff_t skip, tlen1;
4442 int case_fold_flag = reg->case_fold_flag;
4444# ifdef ONIG_DEBUG_SEARCH
4445 fprintf(stderr,
"bm_search_notrev_ic: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4446 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4449 tail = target_end - 1;
4450 tlen1 = tail - target;
4452 if (end + tlen1 > text_end)
4453 end = text_end - tlen1;
4457 if (IS_NULL(reg->int_map)) {
4460 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4463 if (s + 1 >= end)
break;
4464 skip = reg->map[se[1]];
4467 s += enclen(enc, s, end);
4468 }
while ((s - t) < skip && s < end);
4472# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4475 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4478 if (s + 1 >= end)
break;
4479 skip = reg->int_map[se[1]];
4482 s += enclen(enc, s, end);
4483 }
while ((s - t) < skip && s < end);
4488 return (UChar* )NULL;
4493bm_search_ic(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4494 const UChar* text,
const UChar* text_end,
const UChar* text_range)
4496 const UChar *s, *p, *end;
4500 int case_fold_flag = reg->case_fold_flag;
4502# ifdef ONIG_DEBUG_SEARCH
4503 fprintf(stderr,
"bm_search_ic: text: %"PRIuPTR
" (%p), text_end: %"PRIuPTR
" (%p), text_range: %"PRIuPTR
" (%p)\n",
4504 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4507 tail = target_end - 1;
4508 tlen1 = tail - target;
4509 end = text_range + tlen1;
4514 if (IS_NULL(reg->int_map)) {
4517 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4520 if (s + 1 >= end)
break;
4521 s += reg->map[s[1]];
4525# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4528 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4531 if (s + 1 >= end)
break;
4532 s += reg->int_map[s[1]];
4536 return (UChar* )NULL;
4540#ifdef USE_INT_MAP_BACKWARD
4542set_bm_backward_skip(UChar* s, UChar* end,
OnigEncoding enc ARG_UNUSED,
4547 if (IS_NULL(*skip)) {
4548 *skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4549 if (IS_NULL(*skip))
return ONIGERR_MEMORY;
4552 len = (int )(end - s);
4553 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4556 for (i = len - 1; i > 0; i--)
4563bm_search_backward(
regex_t* reg,
const UChar* target,
const UChar* target_end,
4564 const UChar* text,
const UChar* adjust_text,
4565 const UChar* text_end,
const UChar* text_start)
4567 const UChar *s, *t, *p;
4569 s = text_end - (target_end - target);
4573 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4578 while (t < target_end && *p == *t) {
4581 if (t == target_end)
4584 s -= reg->int_map_backward[*s];
4585 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4588 return (UChar* )NULL;
4594 const UChar* text,
const UChar* text_range,
const UChar* text_end)
4596 const UChar *s = text;
4598 while (s < text_range) {
4599 if (map[*s])
return (UChar* )s;
4601 s += enclen(enc, s, text_end);
4603 return (UChar* )NULL;
4608 const UChar* text,
const UChar* adjust_text,
4609 const UChar* text_start,
const UChar* text_end)
4611 const UChar *s = text_start;
4614 if (map[*s])
return (UChar* )s;
4616 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4618 return (UChar* )NULL;
4622onig_match(
regex_t* reg,
const UChar* str,
const UChar* end,
const UChar* at,
OnigRegion* region,
4623 OnigOptionType option)
4629 MATCH_ARG_INIT(msa, option, region, at, at);
4630#ifdef USE_COMBINATION_EXPLOSION_CHECK
4632 ptrdiff_t offset = at - str;
4633 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4638 r = onig_region_resize_clear(region, reg->num_mem + 1);
4644 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4645 r = match_at(reg, str, end,
4646#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4652 MATCH_ARG_FREE(msa);
4657forward_search_range(
regex_t* reg,
const UChar* str,
const UChar* end, UChar* s,
4658 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4660 UChar *p, *pprev = (UChar* )NULL;
4661 size_t input_len = end - str;
4663#ifdef ONIG_DEBUG_SEARCH
4664 fprintf(stderr,
"forward_search_range: str: %"PRIuPTR
" (%p), end: %"PRIuPTR
" (%p), s: %"PRIuPTR
" (%p), range: %"PRIuPTR
" (%p)\n",
4665 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4668 if (reg->dmin > input_len) {
4673 if (reg->dmin > 0) {
4674 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4678 UChar *q = p + reg->dmin;
4680 if (q >= end)
return 0;
4681 while (p < q) p += enclen(reg->enc, p, end);
4686 switch (reg->optimize) {
4687 case ONIG_OPTIMIZE_EXACT:
4688 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4690 case ONIG_OPTIMIZE_EXACT_IC:
4691 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4692 reg->exact, reg->exact_end, p, end, range);
4695 case ONIG_OPTIMIZE_EXACT_BM:
4696 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4699 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4700 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4703 case ONIG_OPTIMIZE_EXACT_BM_IC:
4704 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4707 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4708 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4711 case ONIG_OPTIMIZE_MAP:
4712 p = map_search(reg->enc, reg->map, p, range, end);
4716 if (p && p < range) {
4717 if (p - reg->dmin < s) {
4720 p += enclen(reg->enc, p, end);
4724 if (reg->sub_anchor) {
4727 switch (reg->sub_anchor) {
4728 case ANCHOR_BEGIN_LINE:
4729 if (!ON_STR_BEGIN(p)) {
4730 prev = onigenc_get_prev_char_head(reg->enc,
4731 (pprev ? pprev : str), p, end);
4732 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
4737 case ANCHOR_END_LINE:
4738 if (ON_STR_END(p)) {
4739#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4740 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4741 (pprev ? pprev : str), p);
4742 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
4746 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
4752 if (reg->dmax == 0) {
4756 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
4758 *low_prev = onigenc_get_prev_char_head(reg->enc,
4759 (pprev ? pprev : str), p, end);
4763 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4764 if (p < str + reg->dmax) {
4765 *low = (UChar* )str;
4767 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4770 *low = p - reg->dmax;
4772 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4773 *low, end, (
const UChar** )low_prev);
4774 if (low_prev && IS_NULL(*low_prev))
4775 *low_prev = onigenc_get_prev_char_head(reg->enc,
4776 (pprev ? pprev : s), *low, end);
4780 *low_prev = onigenc_get_prev_char_head(reg->enc,
4781 (pprev ? pprev : str), *low, end);
4787 *high = p - reg->dmin;
4789#ifdef ONIG_DEBUG_SEARCH
4791 "forward_search_range success: low: %"PRIdPTR
", high: %"PRIdPTR
", dmin: %"PRIdPTR
", dmax: %"PRIdPTR
"\n",
4792 *low - str, *high - str, reg->dmin, reg->dmax);
4800#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4803backward_search_range(
regex_t* reg,
const UChar* str,
const UChar* end,
4804 UChar* s,
const UChar* range, UChar* adjrange,
4805 UChar** low, UChar** high)
4808 size_t input_len = end - str;
4810 if (reg->dmin > input_len) {
4818 switch (reg->optimize) {
4819 case ONIG_OPTIMIZE_EXACT:
4821 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4822 range, adjrange, end, p);
4825 case ONIG_OPTIMIZE_EXACT_IC:
4826 case ONIG_OPTIMIZE_EXACT_BM_IC:
4827 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4828 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4829 reg->exact, reg->exact_end,
4830 range, adjrange, end, p);
4833 case ONIG_OPTIMIZE_EXACT_BM:
4834 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4835#ifdef USE_INT_MAP_BACKWARD
4836 if (IS_NULL(reg->int_map_backward)) {
4838 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4841 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4842 &(reg->int_map_backward));
4845 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4852 case ONIG_OPTIMIZE_MAP:
4853 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4858 if (reg->sub_anchor) {
4861 switch (reg->sub_anchor) {
4862 case ANCHOR_BEGIN_LINE:
4863 if (!ON_STR_BEGIN(p)) {
4864 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4865 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4872 case ANCHOR_END_LINE:
4873 if (ON_STR_END(p)) {
4874#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4875 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4876 if (IS_NULL(prev))
goto fail;
4877 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4883 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4884 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4885 if (IS_NULL(p))
goto fail;
4893 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4894 *low = p - reg->dmax;
4895 *high = p - reg->dmin;
4896 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4899#ifdef ONIG_DEBUG_SEARCH
4900 fprintf(stderr,
"backward_search_range: low: %d, high: %d\n",
4901 (
int )(*low - str), (
int )(*high - str));
4907#ifdef ONIG_DEBUG_SEARCH
4908 fprintf(stderr,
"backward_search_range: fail.\n");
4915onig_search(
regex_t* reg,
const UChar* str,
const UChar* end,
4916 const UChar* start,
const UChar* range,
OnigRegion* region, OnigOptionType option)
4918 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4922onig_search_gpos(
regex_t* reg,
const UChar* str,
const UChar* end,
4923 const UChar* global_pos,
4924 const UChar* start,
const UChar* range,
OnigRegion* region, OnigOptionType option)
4929#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4930 const UChar *orig_start = start;
4931 const UChar *orig_range = range;
4934#ifdef ONIG_DEBUG_SEARCH
4936 "onig_search (entry point): str: %"PRIuPTR
" (%p), end: %"PRIuPTR
", start: %"PRIuPTR
", range: %"PRIuPTR
"\n",
4937 (uintptr_t )str, str, end - str, start - str, range - str);
4941 r = onig_region_resize_clear(region, reg->num_mem + 1);
4942 if (r)
goto finish_no_msa;
4945 if (start > end || start < str)
goto mismatch_no_msa;
4948#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4949# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4950# define MATCH_AND_RETURN_CHECK(upper_range) \
4951 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4952 if (r != ONIG_MISMATCH) {\
4954 if (! IS_FIND_LONGEST(reg->options)) {\
4961# define MATCH_AND_RETURN_CHECK(upper_range) \
4962 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4963 if (r != ONIG_MISMATCH) {\
4971# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4972# define MATCH_AND_RETURN_CHECK(none) \
4973 r = match_at(reg, str, end, s, prev, &msa);\
4974 if (r != ONIG_MISMATCH) {\
4976 if (! IS_FIND_LONGEST(reg->options)) {\
4983# define MATCH_AND_RETURN_CHECK(none) \
4984 r = match_at(reg, str, end, s, prev, &msa);\
4985 if (r != ONIG_MISMATCH) {\
4996 if (reg->anchor != 0 && str < end) {
4997 UChar *min_semi_end, *max_semi_end;
4999 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
5004 if (global_pos > start)
5006 if (global_pos < range)
5007 range = global_pos + 1;
5015 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5017 if (range > start) {
5018 if (start != str)
goto mismatch_no_msa;
5027 goto mismatch_no_msa;
5030 else if (reg->anchor & ANCHOR_END_BUF) {
5031 min_semi_end = max_semi_end = (UChar* )end;
5034 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5035 goto mismatch_no_msa;
5037 if (range > start) {
5038 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5039 start = min_semi_end - reg->anchor_dmax;
5041 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5043 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5044 range = max_semi_end - reg->anchor_dmin + 1;
5047 if (start > range)
goto mismatch_no_msa;
5052 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5053 range = min_semi_end - reg->anchor_dmax;
5055 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5056 start = max_semi_end - reg->anchor_dmin;
5057 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5059 if (range > start)
goto mismatch_no_msa;
5062 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5063 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5065 max_semi_end = (UChar* )end;
5066 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5067 min_semi_end = pre_end;
5069#ifdef USE_CRNL_AS_LINE_TERMINATOR
5070 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5071 if (IS_NOT_NULL(pre_end) &&
5072 IS_NEWLINE_CRLF(reg->options) &&
5073 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5074 min_semi_end = pre_end;
5077 if (min_semi_end > str && start <= min_semi_end) {
5082 min_semi_end = (UChar* )end;
5086 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5087 goto begin_position;
5090 else if (str == end) {
5091 static const UChar address_for_empty_string[] =
"";
5093#ifdef ONIG_DEBUG_SEARCH
5094 fprintf(stderr,
"onig_search: empty string.\n");
5097 if (reg->threshold_len == 0) {
5098 start = end = str = address_for_empty_string;
5100 prev = (UChar* )NULL;
5102 MATCH_ARG_INIT(msa, option, region, start, start);
5103#ifdef USE_COMBINATION_EXPLOSION_CHECK
5104 msa.state_check_buff = (
void* )0;
5105 msa.state_check_buff_size = 0;
5107 MATCH_AND_RETURN_CHECK(end);
5110 goto mismatch_no_msa;
5113#ifdef ONIG_DEBUG_SEARCH
5114 fprintf(stderr,
"onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5115 (
int )(end - str), (
int )(start - str), (
int )(range - str));
5118 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5119#ifdef USE_COMBINATION_EXPLOSION_CHECK
5121 ptrdiff_t offset = (MIN(start, range) - str);
5122 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5127 if (range > start) {
5129 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5131 prev = (UChar* )NULL;
5133 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5134 UChar *sch_range, *low, *high, *low_prev;
5136 sch_range = (UChar* )range;
5137 if (reg->dmax != 0) {
5138 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5139 sch_range = (UChar* )end;
5141 sch_range += reg->dmax;
5142 if (sch_range > end) sch_range = (UChar* )end;
5146 if ((end - start) < reg->threshold_len)
5149 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5151 if (! forward_search_range(reg, str, end, s, sch_range,
5152 &low, &high, &low_prev))
goto mismatch;
5158 MATCH_AND_RETURN_CHECK(orig_range);
5160 s += enclen(reg->enc, s, end);
5162 }
while (s < range);
5166 if (! forward_search_range(reg, str, end, s, sch_range,
5167 &low, &high, (UChar** )NULL))
goto mismatch;
5169 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5171 MATCH_AND_RETURN_CHECK(orig_range);
5173 s += enclen(reg->enc, s, end);
5175 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5176 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5179 s += enclen(reg->enc, s, end);
5182 }
while (s < range);
5189 MATCH_AND_RETURN_CHECK(orig_range);
5191 s += enclen(reg->enc, s, end);
5192 }
while (s < range);
5195 MATCH_AND_RETURN_CHECK(orig_range);
5199 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5200 UChar *low, *high, *adjrange, *sch_start;
5203 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5205 adjrange = (UChar* )end;
5207 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5208 (end - range) >= reg->threshold_len) {
5210 sch_start = s + reg->dmax;
5211 if (sch_start > end) sch_start = (UChar* )end;
5212 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5220 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5221 MATCH_AND_RETURN_CHECK(orig_start);
5224 }
while (s >= range);
5228 if ((end - range) < reg->threshold_len)
goto mismatch;
5231 if (reg->dmax != 0) {
5232 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5233 sch_start = (UChar* )end;
5235 sch_start += reg->dmax;
5236 if (sch_start > end) sch_start = (UChar* )end;
5238 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5239 start, sch_start, end);
5242 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5243 &low, &high) <= 0)
goto mismatch;
5248 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5249 MATCH_AND_RETURN_CHECK(orig_start);
5251 }
while (s >= range);
5255#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5256 if (IS_FIND_LONGEST(reg->options)) {
5257 if (msa.best_len >= 0) {
5266 MATCH_ARG_FREE(msa);
5270 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5271 onig_region_clear(region);
5275 if (r != ONIG_MISMATCH)
5276 fprintf(stderr,
"onig_search: error %"PRIdPTRDIFF
"\n", r);
5284 if (r != ONIG_MISMATCH)
5285 fprintf(stderr,
"onig_search: error %"PRIdPTRDIFF
"\n", r);
5290 MATCH_ARG_FREE(msa);
5295onig_scan(
regex_t* reg,
const UChar* str,
const UChar* end,
5297 int (*scan_callback)(OnigPosition, OnigPosition,
OnigRegion*,
void*),
5308 r = onig_search(reg, str, end, start, end, region, option);
5310 rs = scan_callback(n, r, region, callback_arg);
5315 if (region->end[0] == start - str) {
5316 if (start >= end)
break;
5317 start += enclen(reg->enc, start, end);
5320 start = str + region->end[0];
5325 else if (r == ONIG_MISMATCH) {
5337onig_get_encoding(
const regex_t* reg)
5342extern OnigOptionType
5343onig_get_options(
const regex_t* reg)
5345 return reg->options;
5348extern OnigCaseFoldType
5349onig_get_case_fold_flag(
const regex_t* reg)
5351 return reg->case_fold_flag;
5355onig_get_syntax(
const regex_t* reg)
5361onig_number_of_captures(
const regex_t* reg)
5363 return reg->num_mem;
5367onig_number_of_capture_histories(
const regex_t* reg)
5369#ifdef USE_CAPTURE_HISTORY
5373 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5374 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.