Ruby 3.4.8p72 (2025-12-17 revision 995b59f66677d44767ce9faac6957e5543617ff9)
regcomp.c
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(void)
37{
38 return OnigDefaultCaseFoldFlag;
39}
40
41extern int
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
114 return ONIG_INFINITE_DISTANCE;
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141// Attempt to right size allocated buffers for a regex post compile
142static void
143onig_reg_resize(regex_t *reg)
144{
145 do {
146 if (!reg->used) {
147 xfree(reg->p);
148 reg->alloc = 0;
149 reg->p = 0;
150 }
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr = xrealloc(reg->p, reg->used);
153 // Skip the right size optimization if memory allocation fails
154 if (new_ptr) {
155 reg->alloc = reg->used;
156 reg->p = new_ptr;
157 }
158 }
159 } while ((reg = reg->chain) != 0);
160}
161
162extern int
163onig_bbuf_init(BBuf* buf, OnigDistance size)
164{
165 if (size <= 0) {
166 size = 0;
167 buf->p = NULL;
168 }
169 else {
170 buf->p = (UChar* )xmalloc(size);
171 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
172 }
173
174 buf->alloc = (unsigned int )size;
175 buf->used = 0;
176 return 0;
177}
178
179
180#ifdef USE_SUBEXP_CALL
181
182static int
183unset_addr_list_init(UnsetAddrList* uslist, int size)
184{
185 UnsetAddr* p;
186
187 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
188 CHECK_NULL_RETURN_MEMERR(p);
189 uslist->num = 0;
190 uslist->alloc = size;
191 uslist->us = p;
192 return 0;
193}
194
195static void
196unset_addr_list_end(UnsetAddrList* uslist)
197{
198 xfree(uslist->us);
199}
200
201static int
202unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
203{
204 UnsetAddr* p;
205 int size;
206
207 if (uslist->num >= uslist->alloc) {
208 size = uslist->alloc * 2;
209 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
210 CHECK_NULL_RETURN_MEMERR(p);
211 uslist->alloc = size;
212 uslist->us = p;
213 }
214
215 uslist->us[uslist->num].offset = offset;
216 uslist->us[uslist->num].target = node;
217 uslist->num++;
218 return 0;
219}
220#endif /* USE_SUBEXP_CALL */
221
222
223static int
224add_opcode(regex_t* reg, int opcode)
225{
226 BBUF_ADD1(reg, opcode);
227 return 0;
228}
229
230#ifdef USE_COMBINATION_EXPLOSION_CHECK
231static int
232add_state_check_num(regex_t* reg, int num)
233{
234 StateCheckNumType n = (StateCheckNumType )num;
235
236 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
237 return 0;
238}
239#endif
240
241static int
242add_rel_addr(regex_t* reg, int addr)
243{
244 RelAddrType ra = (RelAddrType )addr;
245
246 BBUF_ADD(reg, &ra, SIZE_RELADDR);
247 return 0;
248}
249
250static int
251add_abs_addr(regex_t* reg, int addr)
252{
253 AbsAddrType ra = (AbsAddrType )addr;
254
255 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
256 return 0;
257}
258
259static int
260add_length(regex_t* reg, OnigDistance len)
261{
262 LengthType l = (LengthType )len;
263
264 BBUF_ADD(reg, &l, SIZE_LENGTH);
265 return 0;
266}
267
268static int
269add_mem_num(regex_t* reg, int num)
270{
271 MemNumType n = (MemNumType )num;
272
273 BBUF_ADD(reg, &n, SIZE_MEMNUM);
274 return 0;
275}
276
277#if 0
278static int
279add_pointer(regex_t* reg, void* addr)
280{
281 PointerType ptr = (PointerType )addr;
282
283 BBUF_ADD(reg, &ptr, SIZE_POINTER);
284 return 0;
285}
286#endif
287
288static int
289add_option(regex_t* reg, OnigOptionType option)
290{
291 BBUF_ADD(reg, &option, SIZE_OPTION);
292 return 0;
293}
294
295static int
296add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
297{
298 int r;
299
300 r = add_opcode(reg, opcode);
301 if (r) return r;
302 r = add_rel_addr(reg, addr);
303 return r;
304}
305
306static int
307add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
308{
309 BBUF_ADD(reg, bytes, len);
310 return 0;
311}
312
313static int
314add_bitset(regex_t* reg, BitSetRef bs)
315{
316 BBUF_ADD(reg, bs, SIZE_BITSET);
317 return 0;
318}
319
320static int
321add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
322{
323 int r;
324
325 r = add_opcode(reg, opcode);
326 if (r) return r;
327 r = add_option(reg, option);
328 return r;
329}
330
331static int compile_length_tree(Node* node, regex_t* reg);
332static int compile_tree(Node* node, regex_t* reg);
333
334
335#define IS_NEED_STR_LEN_OP_EXACT(op) \
336 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
337 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
338
339static int
340select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
341{
342 int op;
343 OnigDistance str_len = roomof(byte_len, mb_len);
344
345 if (ignore_case) {
346 switch (str_len) {
347 case 1: op = OP_EXACT1_IC; break;
348 default: op = OP_EXACTN_IC; break;
349 }
350 }
351 else {
352 switch (mb_len) {
353 case 1:
354 switch (str_len) {
355 case 1: op = OP_EXACT1; break;
356 case 2: op = OP_EXACT2; break;
357 case 3: op = OP_EXACT3; break;
358 case 4: op = OP_EXACT4; break;
359 case 5: op = OP_EXACT5; break;
360 default: op = OP_EXACTN; break;
361 }
362 break;
363
364 case 2:
365 switch (str_len) {
366 case 1: op = OP_EXACTMB2N1; break;
367 case 2: op = OP_EXACTMB2N2; break;
368 case 3: op = OP_EXACTMB2N3; break;
369 default: op = OP_EXACTMB2N; break;
370 }
371 break;
372
373 case 3:
374 op = OP_EXACTMB3N;
375 break;
376
377 default:
378 op = OP_EXACTMBN;
379 break;
380 }
381 }
382 return op;
383}
384
385static int
386compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
387{
388 int r;
389 int saved_num_null_check = reg->num_null_check;
390
391 if (empty_info != 0) {
392 r = add_opcode(reg, OP_NULL_CHECK_START);
393 if (r) return r;
394 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
395 if (r) return r;
396 reg->num_null_check++;
397 }
398
399 r = compile_tree(node, reg);
400 if (r) return r;
401
402 if (empty_info != 0) {
403 if (empty_info == NQ_TARGET_IS_EMPTY)
404 r = add_opcode(reg, OP_NULL_CHECK_END);
405 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
406 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
407 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
408 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
409
410 if (r) return r;
411 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
412 }
413 return r;
414}
415
416#ifdef USE_SUBEXP_CALL
417static int
418compile_call(CallNode* node, regex_t* reg)
419{
420 int r;
421
422 r = add_opcode(reg, OP_CALL);
423 if (r) return r;
424 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
425 node->target);
426 if (r) return r;
427 r = add_abs_addr(reg, 0 /*dummy addr.*/);
428 return r;
429}
430#endif
431
432static int
433compile_tree_n_times(Node* node, int n, regex_t* reg)
434{
435 int i, r;
436
437 for (i = 0; i < n; i++) {
438 r = compile_tree(node, reg);
439 if (r) return r;
440 }
441 return 0;
442}
443
444static int
445add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
446 regex_t* reg ARG_UNUSED, int ignore_case)
447{
448 int len;
449 int op = select_str_opcode(mb_len, byte_len, ignore_case);
450
451 len = SIZE_OPCODE;
452
453 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
454 if (IS_NEED_STR_LEN_OP_EXACT(op))
455 len += SIZE_LENGTH;
456
457 len += (int )byte_len;
458 return len;
459}
460
461static int
462add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
463 regex_t* reg, int ignore_case)
464{
465 int op = select_str_opcode(mb_len, byte_len, ignore_case);
466 add_opcode(reg, op);
467
468 if (op == OP_EXACTMBN)
469 add_length(reg, mb_len);
470
471 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
472 if (op == OP_EXACTN_IC)
473 add_length(reg, byte_len);
474 else
475 add_length(reg, byte_len / mb_len);
476 }
477
478 add_bytes(reg, s, byte_len);
479 return 0;
480}
481
482
483static int
484compile_length_string_node(Node* node, regex_t* reg)
485{
486 int rlen, r, len, prev_len, blen, ambig;
487 OnigEncoding enc = reg->enc;
488 UChar *p, *prev;
489 StrNode* sn;
490
491 sn = NSTR(node);
492 if (sn->end <= sn->s)
493 return 0;
494
495 ambig = NSTRING_IS_AMBIG(node);
496
497 p = prev = sn->s;
498 prev_len = enclen(enc, p, sn->end);
499 p += prev_len;
500 blen = prev_len;
501 rlen = 0;
502
503 for (; p < sn->end; ) {
504 len = enclen(enc, p, sn->end);
505 if (len == prev_len || ambig) {
506 blen += len;
507 }
508 else {
509 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
510 rlen += r;
511 prev = p;
512 blen = len;
513 prev_len = len;
514 }
515 p += len;
516 }
517 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
518 rlen += r;
519 return rlen;
520}
521
522static int
523compile_length_string_raw_node(StrNode* sn, regex_t* reg)
524{
525 if (sn->end <= sn->s)
526 return 0;
527
528 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
529}
530
531static int
532compile_string_node(Node* node, regex_t* reg)
533{
534 int r, len, prev_len, blen, ambig;
535 OnigEncoding enc = reg->enc;
536 UChar *p, *prev, *end;
537 StrNode* sn;
538
539 sn = NSTR(node);
540 if (sn->end <= sn->s)
541 return 0;
542
543 end = sn->end;
544 ambig = NSTRING_IS_AMBIG(node);
545
546 p = prev = sn->s;
547 prev_len = enclen(enc, p, end);
548 p += prev_len;
549 blen = prev_len;
550
551 for (; p < end; ) {
552 len = enclen(enc, p, end);
553 if (len == prev_len || ambig) {
554 blen += len;
555 }
556 else {
557 r = add_compile_string(prev, prev_len, blen, reg, ambig);
558 if (r) return r;
559
560 prev = p;
561 blen = len;
562 prev_len = len;
563 }
564
565 p += len;
566 }
567 return add_compile_string(prev, prev_len, blen, reg, ambig);
568}
569
570static int
571compile_string_raw_node(StrNode* sn, regex_t* reg)
572{
573 if (sn->end <= sn->s)
574 return 0;
575
576 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
577}
578
579static int
580add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
581{
582#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
583 add_length(reg, mbuf->used);
584 return add_bytes(reg, mbuf->p, mbuf->used);
585#else
586 int r, pad_size;
587 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
588
589 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
590 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
591 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
592
593 r = add_bytes(reg, mbuf->p, mbuf->used);
594
595 /* padding for return value from compile_length_cclass_node() to be fix. */
596 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
597 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
598 return r;
599#endif
600}
601
602static int
603compile_length_cclass_node(CClassNode* cc, regex_t* reg)
604{
605 int len;
606
607 if (IS_NULL(cc->mbuf)) {
608 len = SIZE_OPCODE + SIZE_BITSET;
609 }
610 else {
611 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
612 len = SIZE_OPCODE;
613 }
614 else {
615 len = SIZE_OPCODE + SIZE_BITSET;
616 }
617#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
618 len += SIZE_LENGTH + cc->mbuf->used;
619#else
620 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
621#endif
622 }
623
624 return len;
625}
626
627static int
628compile_cclass_node(CClassNode* cc, regex_t* reg)
629{
630 int r;
631
632 if (IS_NULL(cc->mbuf)) {
633 if (IS_NCCLASS_NOT(cc))
634 add_opcode(reg, OP_CCLASS_NOT);
635 else
636 add_opcode(reg, OP_CCLASS);
637
638 r = add_bitset(reg, cc->bs);
639 }
640 else {
641 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
642 if (IS_NCCLASS_NOT(cc))
643 add_opcode(reg, OP_CCLASS_MB_NOT);
644 else
645 add_opcode(reg, OP_CCLASS_MB);
646
647 r = add_multi_byte_cclass(cc->mbuf, reg);
648 }
649 else {
650 if (IS_NCCLASS_NOT(cc))
651 add_opcode(reg, OP_CCLASS_MIX_NOT);
652 else
653 add_opcode(reg, OP_CCLASS_MIX);
654
655 r = add_bitset(reg, cc->bs);
656 if (r) return r;
657 r = add_multi_byte_cclass(cc->mbuf, reg);
658 }
659 }
660
661 return r;
662}
663
664static int
665entry_repeat_range(regex_t* reg, int id, int lower, int upper)
666{
667#define REPEAT_RANGE_ALLOC 4
668
670
671 if (reg->repeat_range_alloc == 0) {
672 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
673 CHECK_NULL_RETURN_MEMERR(p);
674 reg->repeat_range = p;
675 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
676 }
677 else if (reg->repeat_range_alloc <= id) {
678 int n;
679 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
680 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
681 sizeof(OnigRepeatRange) * n);
682 CHECK_NULL_RETURN_MEMERR(p);
683 reg->repeat_range = p;
684 reg->repeat_range_alloc = n;
685 }
686 else {
687 p = reg->repeat_range;
688 }
689
690 p[id].lower = lower;
691 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
692 return 0;
693}
694
695static int
696compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
697 regex_t* reg)
698{
699 int r;
700 int num_repeat = reg->num_repeat;
701
702 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
703 if (r) return r;
704 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
705 reg->num_repeat++;
706 if (r) return r;
707 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
708 if (r) return r;
709
710 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
711 if (r) return r;
712
713 r = compile_tree_empty_check(qn->target, reg, empty_info);
714 if (r) return r;
715
716 if (
717#ifdef USE_SUBEXP_CALL
718 reg->num_call > 0 ||
719#endif
720 IS_QUANTIFIER_IN_REPEAT(qn)) {
721 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
722 }
723 else {
724 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
725 }
726 if (r) return r;
727 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
728 return r;
729}
730
731static int
732is_anychar_star_quantifier(QtfrNode* qn)
733{
734 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
735 NTYPE(qn->target) == NT_CANY)
736 return 1;
737 else
738 return 0;
739}
740
741#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
742#define CKN_ON (ckn > 0)
743
744#ifdef USE_COMBINATION_EXPLOSION_CHECK
745
746static int
747compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
748{
749 int len, mod_tlen, cklen;
750 int ckn;
751 int infinite = IS_REPEAT_INFINITE(qn->upper);
752 int empty_info = qn->target_empty_info;
753 int tlen = compile_length_tree(qn->target, reg);
754
755 if (tlen < 0) return tlen;
756
757 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
758
759 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
760
761 /* anychar repeat */
762 if (NTYPE(qn->target) == NT_CANY) {
763 if (qn->greedy && infinite) {
764 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
765 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
766 else
767 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
768 }
769 }
770
771 if (empty_info != 0)
772 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
773 else
774 mod_tlen = tlen;
775
776 if (infinite && qn->lower <= 1) {
777 if (qn->greedy) {
778 if (qn->lower == 1)
779 len = SIZE_OP_JUMP;
780 else
781 len = 0;
782
783 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
784 }
785 else {
786 if (qn->lower == 0)
787 len = SIZE_OP_JUMP;
788 else
789 len = 0;
790
791 len += mod_tlen + SIZE_OP_PUSH + cklen;
792 }
793 }
794 else if (qn->upper == 0) {
795 if (qn->is_referred != 0) /* /(?<n>..){0}/ */
796 len = SIZE_OP_JUMP + tlen;
797 else
798 len = 0;
799 }
800 else if (qn->upper == 1 && qn->greedy) {
801 if (qn->lower == 0) {
802 if (CKN_ON) {
803 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
804 }
805 else {
806 len = SIZE_OP_PUSH + tlen;
807 }
808 }
809 else {
810 len = tlen;
811 }
812 }
813 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
814 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
815 }
816 else {
817 len = SIZE_OP_REPEAT_INC
818 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
819 if (CKN_ON)
820 len += SIZE_OP_STATE_CHECK;
821 }
822
823 return len;
824}
825
826static int
827compile_quantifier_node(QtfrNode* qn, regex_t* reg)
828{
829 int r, mod_tlen;
830 int ckn;
831 int infinite = IS_REPEAT_INFINITE(qn->upper);
832 int empty_info = qn->target_empty_info;
833 int tlen = compile_length_tree(qn->target, reg);
834
835 if (tlen < 0) return tlen;
836
837 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
838
839 if (is_anychar_star_quantifier(qn)) {
840 r = compile_tree_n_times(qn->target, qn->lower, reg);
841 if (r) return r;
842 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
843 if (IS_MULTILINE(reg->options))
844 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
845 else
846 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
847 if (r) return r;
848 if (CKN_ON) {
849 r = add_state_check_num(reg, ckn);
850 if (r) return r;
851 }
852
853 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
854 }
855 else {
856 if (IS_MULTILINE(reg->options)) {
857 r = add_opcode(reg, (CKN_ON ?
858 OP_STATE_CHECK_ANYCHAR_ML_STAR
859 : OP_ANYCHAR_ML_STAR));
860 }
861 else {
862 r = add_opcode(reg, (CKN_ON ?
863 OP_STATE_CHECK_ANYCHAR_STAR
864 : OP_ANYCHAR_STAR));
865 }
866 if (r) return r;
867 if (CKN_ON)
868 r = add_state_check_num(reg, ckn);
869
870 return r;
871 }
872 }
873
874 if (empty_info != 0)
875 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
876 else
877 mod_tlen = tlen;
878
879 if (infinite && qn->lower <= 1) {
880 if (qn->greedy) {
881 if (qn->lower == 1) {
882 r = add_opcode_rel_addr(reg, OP_JUMP,
883 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
884 if (r) return r;
885 }
886
887 if (CKN_ON) {
888 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
889 if (r) return r;
890 r = add_state_check_num(reg, ckn);
891 if (r) return r;
892 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
893 }
894 else {
895 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
896 }
897 if (r) return r;
898 r = compile_tree_empty_check(qn->target, reg, empty_info);
899 if (r) return r;
900 r = add_opcode_rel_addr(reg, OP_JUMP,
901 -(mod_tlen + (int )SIZE_OP_JUMP
902 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
903 }
904 else {
905 if (qn->lower == 0) {
906 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
907 if (r) return r;
908 }
909 r = compile_tree_empty_check(qn->target, reg, empty_info);
910 if (r) return r;
911 if (CKN_ON) {
912 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
913 if (r) return r;
914 r = add_state_check_num(reg, ckn);
915 if (r) return r;
916 r = add_rel_addr(reg,
917 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
918 }
919 else
920 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
921 }
922 }
923 else if (qn->upper == 0) {
924 if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
925 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
926 if (r) return r;
927 r = compile_tree(qn->target, reg);
928 }
929 else
930 r = 0;
931 }
932 else if (qn->upper == 1 && qn->greedy) {
933 if (qn->lower == 0) {
934 if (CKN_ON) {
935 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
936 if (r) return r;
937 r = add_state_check_num(reg, ckn);
938 if (r) return r;
939 r = add_rel_addr(reg, tlen);
940 }
941 else {
942 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
943 }
944 if (r) return r;
945 }
946
947 r = compile_tree(qn->target, reg);
948 }
949 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
950 if (CKN_ON) {
951 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
952 if (r) return r;
953 r = add_state_check_num(reg, ckn);
954 if (r) return r;
955 r = add_rel_addr(reg, SIZE_OP_JUMP);
956 }
957 else {
958 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
959 }
960
961 if (r) return r;
962 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
963 if (r) return r;
964 r = compile_tree(qn->target, reg);
965 }
966 else {
967 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
968 if (CKN_ON) {
969 if (r) return r;
970 r = add_opcode(reg, OP_STATE_CHECK);
971 if (r) return r;
972 r = add_state_check_num(reg, ckn);
973 }
974 }
975 return r;
976}
977
978#else /* USE_COMBINATION_EXPLOSION_CHECK */
979
980static int
981compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
982{
983 int len, mod_tlen;
984 int infinite = IS_REPEAT_INFINITE(qn->upper);
985 int empty_info = qn->target_empty_info;
986 int tlen = compile_length_tree(qn->target, reg);
987
988 if (tlen < 0) return tlen;
989
990 /* anychar repeat */
991 if (NTYPE(qn->target) == NT_CANY) {
992 if (qn->greedy && infinite) {
993 if (IS_NOT_NULL(qn->next_head_exact))
994 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
995 else
996 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
997 }
998 }
999
1000 if (empty_info != 0)
1001 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1002 else
1003 mod_tlen = tlen;
1004
1005 if (infinite &&
1006 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1007 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1008 len = SIZE_OP_JUMP;
1009 }
1010 else {
1011 len = tlen * qn->lower;
1012 }
1013
1014 if (qn->greedy) {
1015#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1016 if (IS_NOT_NULL(qn->head_exact))
1017 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1018 else
1019#endif
1020 if (IS_NOT_NULL(qn->next_head_exact))
1021 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1022 else
1023 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1024 }
1025 else
1026 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1027 }
1028 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1029 len = SIZE_OP_JUMP + tlen;
1030 }
1031 else if (!infinite && qn->greedy &&
1032 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1033 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1034 len = tlen * qn->lower;
1035 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1036 }
1037 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1038 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1039 }
1040 else {
1041 len = SIZE_OP_REPEAT_INC
1042 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1043 }
1044
1045 return len;
1046}
1047
1048static int
1049compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1050{
1051 int i, r, mod_tlen;
1052 int infinite = IS_REPEAT_INFINITE(qn->upper);
1053 int empty_info = qn->target_empty_info;
1054 int tlen = compile_length_tree(qn->target, reg);
1055
1056 if (tlen < 0) return tlen;
1057
1058 if (is_anychar_star_quantifier(qn)) {
1059 r = compile_tree_n_times(qn->target, qn->lower, reg);
1060 if (r) return r;
1061 if (IS_NOT_NULL(qn->next_head_exact)) {
1062 if (IS_MULTILINE(reg->options))
1063 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1064 else
1065 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1066 if (r) return r;
1067 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1068 }
1069 else {
1070 if (IS_MULTILINE(reg->options))
1071 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1072 else
1073 return add_opcode(reg, OP_ANYCHAR_STAR);
1074 }
1075 }
1076
1077 if (empty_info != 0)
1078 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1079 else
1080 mod_tlen = tlen;
1081
1082 if (infinite &&
1083 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1084 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1085 if (qn->greedy) {
1086#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1087 if (IS_NOT_NULL(qn->head_exact))
1088 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1089 else
1090#endif
1091 if (IS_NOT_NULL(qn->next_head_exact))
1092 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1093 else
1094 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1095 }
1096 else {
1097 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1098 }
1099 if (r) return r;
1100 }
1101 else {
1102 r = compile_tree_n_times(qn->target, qn->lower, reg);
1103 if (r) return r;
1104 }
1105
1106 if (qn->greedy) {
1107#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1108 if (IS_NOT_NULL(qn->head_exact)) {
1109 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1110 mod_tlen + SIZE_OP_JUMP);
1111 if (r) return r;
1112 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1114 if (r) return r;
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1117 }
1118 else
1119#endif
1120 if (IS_NOT_NULL(qn->next_head_exact)) {
1121 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1122 mod_tlen + SIZE_OP_JUMP);
1123 if (r) return r;
1124 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1125 r = compile_tree_empty_check(qn->target, reg, empty_info);
1126 if (r) return r;
1127 r = add_opcode_rel_addr(reg, OP_JUMP,
1128 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1129 }
1130 else {
1131 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1132 if (r) return r;
1133 r = compile_tree_empty_check(qn->target, reg, empty_info);
1134 if (r) return r;
1135 r = add_opcode_rel_addr(reg, OP_JUMP,
1136 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1137 }
1138 }
1139 else {
1140 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1141 if (r) return r;
1142 r = compile_tree_empty_check(qn->target, reg, empty_info);
1143 if (r) return r;
1144 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1145 }
1146 }
1147 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1148 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1149 if (r) return r;
1150 r = compile_tree(qn->target, reg);
1151 }
1152 else if (!infinite && qn->greedy &&
1153 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1154 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1155 int n = qn->upper - qn->lower;
1156
1157 r = compile_tree_n_times(qn->target, qn->lower, reg);
1158 if (r) return r;
1159
1160 for (i = 0; i < n; i++) {
1161 r = add_opcode_rel_addr(reg, OP_PUSH,
1162 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1163 if (r) return r;
1164 r = compile_tree(qn->target, reg);
1165 if (r) return r;
1166 }
1167 }
1168 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1169 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1170 if (r) return r;
1171 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1172 if (r) return r;
1173 r = compile_tree(qn->target, reg);
1174 }
1175 else {
1176 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1177 }
1178 return r;
1179}
1180#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1181
1182static int
1183compile_length_option_node(EncloseNode* node, regex_t* reg)
1184{
1185 int tlen;
1186 OnigOptionType prev = reg->options;
1187
1188 reg->options = node->option;
1189 tlen = compile_length_tree(node->target, reg);
1190 reg->options = prev;
1191
1192 if (tlen < 0) return tlen;
1193
1194 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1195 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1196 + tlen + SIZE_OP_SET_OPTION;
1197 }
1198 else
1199 return tlen;
1200}
1201
1202static int
1203compile_option_node(EncloseNode* node, regex_t* reg)
1204{
1205 int r;
1206 OnigOptionType prev = reg->options;
1207
1208 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1209 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1210 if (r) return r;
1211 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1212 if (r) return r;
1213 r = add_opcode(reg, OP_FAIL);
1214 if (r) return r;
1215 }
1216
1217 reg->options = node->option;
1218 r = compile_tree(node->target, reg);
1219 reg->options = prev;
1220
1221 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1222 if (r) return r;
1223 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1224 }
1225 return r;
1226}
1227
1228static int
1229compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1230{
1231 int len;
1232 int tlen;
1233
1234 if (node->type == ENCLOSE_OPTION)
1235 return compile_length_option_node(node, reg);
1236
1237 if (node->target) {
1238 tlen = compile_length_tree(node->target, reg);
1239 if (tlen < 0) return tlen;
1240 }
1241 else
1242 tlen = 0;
1243
1244 switch (node->type) {
1245 case ENCLOSE_MEMORY:
1246#ifdef USE_SUBEXP_CALL
1247 if (IS_ENCLOSE_CALLED(node)) {
1248 len = SIZE_OP_MEMORY_START_PUSH + tlen
1249 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1250 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1251 len += (IS_ENCLOSE_RECURSION(node)
1252 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1253 else
1254 len += (IS_ENCLOSE_RECURSION(node)
1255 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1256 }
1257 else if (IS_ENCLOSE_RECURSION(node)) {
1258 len = SIZE_OP_MEMORY_START_PUSH;
1259 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1260 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1261 }
1262 else
1263#endif
1264 {
1265 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1266 len = SIZE_OP_MEMORY_START_PUSH;
1267 else
1268 len = SIZE_OP_MEMORY_START;
1269
1270 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1271 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1272 }
1273 break;
1274
1275 case ENCLOSE_STOP_BACKTRACK:
1276 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1277 /* optimization because the match cache optimization pushes an extra item to */
1278 /* the stack and it breaks the assumption for this optimization. */
1279#ifndef USE_MATCH_CACHE
1280 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1281 QtfrNode* qn = NQTFR(node->target);
1282 tlen = compile_length_tree(qn->target, reg);
1283 if (tlen < 0) return tlen;
1284
1285 len = tlen * qn->lower
1286 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1287 }
1288 else {
1289#endif
1290 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1291#ifndef USE_MATCH_CACHE
1292 }
1293#endif
1294 break;
1295
1296 case ENCLOSE_CONDITION:
1297 len = SIZE_OP_CONDITION;
1298 if (NTYPE(node->target) == NT_ALT) {
1299 Node* x = node->target;
1300
1301 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1302 if (tlen < 0) return tlen;
1303 len += tlen + SIZE_OP_JUMP;
1304 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1305 x = NCDR(x);
1306 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1307 if (tlen < 0) return tlen;
1308 len += tlen;
1309 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1310 }
1311 else {
1312 return ONIGERR_PARSER_BUG;
1313 }
1314 break;
1315
1316 case ENCLOSE_ABSENT:
1317 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1318 break;
1319
1320 default:
1321 return ONIGERR_TYPE_BUG;
1322 break;
1323 }
1324
1325 return len;
1326}
1327
1328static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1329
1330static int
1331compile_enclose_node(EncloseNode* node, regex_t* reg)
1332{
1333 int r, len;
1334
1335 if (node->type == ENCLOSE_OPTION)
1336 return compile_option_node(node, reg);
1337
1338 switch (node->type) {
1339 case ENCLOSE_MEMORY:
1340#ifdef USE_SUBEXP_CALL
1341 if (IS_ENCLOSE_CALLED(node)) {
1342 r = add_opcode(reg, OP_CALL);
1343 if (r) return r;
1344 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1345 node->state |= NST_ADDR_FIXED;
1346 r = add_abs_addr(reg, (int )node->call_addr);
1347 if (r) return r;
1348 len = compile_length_tree(node->target, reg);
1349 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1350 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1351 len += (IS_ENCLOSE_RECURSION(node)
1352 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1353 else
1354 len += (IS_ENCLOSE_RECURSION(node)
1355 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1356
1357 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1358 if (r) return r;
1359 }
1360#endif
1361 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1362 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1363 else
1364 r = add_opcode(reg, OP_MEMORY_START);
1365 if (r) return r;
1366 r = add_mem_num(reg, node->regnum);
1367 if (r) return r;
1368 r = compile_tree(node->target, reg);
1369 if (r) return r;
1370#ifdef USE_SUBEXP_CALL
1371 if (IS_ENCLOSE_CALLED(node)) {
1372 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1373 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1374 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1375 else
1376 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1377 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1378
1379 if (r) return r;
1380 r = add_mem_num(reg, node->regnum);
1381 if (r) return r;
1382 r = add_opcode(reg, OP_RETURN);
1383 }
1384 else if (IS_ENCLOSE_RECURSION(node)) {
1385 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1386 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1387 else
1388 r = add_opcode(reg, OP_MEMORY_END_REC);
1389 if (r) return r;
1390 r = add_mem_num(reg, node->regnum);
1391 }
1392 else
1393#endif
1394 {
1395 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1396 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1397 else
1398 r = add_opcode(reg, OP_MEMORY_END);
1399 if (r) return r;
1400 r = add_mem_num(reg, node->regnum);
1401 }
1402 break;
1403
1404 case ENCLOSE_STOP_BACKTRACK:
1405 /* Disable POP_STOP_BT optimization for simple repeat under the match cache */
1406 /* optimization because the match cache optimization pushes an extra item to */
1407 /* the stack and it breaks the assumption for this optimization. */
1408#ifndef USE_MATCH_CACHE
1409 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1410 QtfrNode* qn = NQTFR(node->target);
1411 r = compile_tree_n_times(qn->target, qn->lower, reg);
1412 if (r) return r;
1413
1414 len = compile_length_tree(qn->target, reg);
1415 if (len < 0) return len;
1416
1417 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1418 if (r) return r;
1419 r = compile_tree(qn->target, reg);
1420 if (r) return r;
1421 r = add_opcode(reg, OP_POP);
1422 if (r) return r;
1423 r = add_opcode_rel_addr(reg, OP_JUMP,
1424 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1425 }
1426 else {
1427#endif
1428 r = add_opcode(reg, OP_PUSH_STOP_BT);
1429 if (r) return r;
1430 r = compile_tree(node->target, reg);
1431 if (r) return r;
1432 r = add_opcode(reg, OP_POP_STOP_BT);
1433#ifndef USE_MATCH_CACHE
1434 }
1435#endif
1436 break;
1437
1438 case ENCLOSE_CONDITION:
1439 r = add_opcode(reg, OP_CONDITION);
1440 if (r) return r;
1441 r = add_mem_num(reg, node->regnum);
1442 if (r) return r;
1443
1444 if (NTYPE(node->target) == NT_ALT) {
1445 Node* x = node->target;
1446 int len2;
1447
1448 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1449 if (len < 0) return len;
1450 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1451 x = NCDR(x);
1452 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1453 if (len2 < 0) return len2;
1454 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1455
1456 x = node->target;
1457 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1458 if (r) return r;
1459 r = compile_tree(NCAR(x), reg); /* yes-node */
1460 if (r) return r;
1461 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1462 if (r) return r;
1463 x = NCDR(x);
1464 r = compile_tree(NCAR(x), reg); /* no-node */
1465 }
1466 else {
1467 return ONIGERR_PARSER_BUG;
1468 }
1469 break;
1470
1471 case ENCLOSE_ABSENT:
1472 len = compile_length_tree(node->target, reg);
1473 if (len < 0) return len;
1474
1475 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1476 if (r) return r;
1477 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1478 if (r) return r;
1479 r = compile_tree(node->target, reg);
1480 if (r) return r;
1481 r = add_opcode(reg, OP_ABSENT_END);
1482 break;
1483
1484 default:
1485 return ONIGERR_TYPE_BUG;
1486 break;
1487 }
1488
1489 return r;
1490}
1491
1492static int
1493compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1494{
1495 int len;
1496 int tlen = 0;
1497
1498 if (node->target) {
1499 tlen = compile_length_tree(node->target, reg);
1500 if (tlen < 0) return tlen;
1501 }
1502
1503 switch (node->type) {
1504 case ANCHOR_PREC_READ:
1505 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1506 break;
1507 case ANCHOR_PREC_READ_NOT:
1508 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1509 break;
1510 case ANCHOR_LOOK_BEHIND:
1511 len = SIZE_OP_LOOK_BEHIND + tlen;
1512 break;
1513 case ANCHOR_LOOK_BEHIND_NOT:
1514 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1515 break;
1516
1517 default:
1518 len = SIZE_OPCODE;
1519 break;
1520 }
1521
1522 return len;
1523}
1524
1525static int
1526compile_anchor_node(AnchorNode* node, regex_t* reg)
1527{
1528 int r, len;
1529
1530 switch (node->type) {
1531 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1532 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1533 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1534 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1535 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1536 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1537
1538 case ANCHOR_WORD_BOUND:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1540 else r = add_opcode(reg, OP_WORD_BOUND);
1541 break;
1542 case ANCHOR_NOT_WORD_BOUND:
1543 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1544 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1545 break;
1546#ifdef USE_WORD_BEGIN_END
1547 case ANCHOR_WORD_BEGIN:
1548 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1549 else r = add_opcode(reg, OP_WORD_BEGIN);
1550 break;
1551 case ANCHOR_WORD_END:
1552 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1553 else r = add_opcode(reg, OP_WORD_END);
1554 break;
1555#endif
1556 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1557
1558 case ANCHOR_PREC_READ:
1559 r = add_opcode(reg, OP_PUSH_POS);
1560 if (r) return r;
1561 r = compile_tree(node->target, reg);
1562 if (r) return r;
1563 r = add_opcode(reg, OP_POP_POS);
1564 break;
1565
1566 case ANCHOR_PREC_READ_NOT:
1567 len = compile_length_tree(node->target, reg);
1568 if (len < 0) return len;
1569 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1570 if (r) return r;
1571 r = compile_tree(node->target, reg);
1572 if (r) return r;
1573 r = add_opcode(reg, OP_FAIL_POS);
1574 break;
1575
1576 case ANCHOR_LOOK_BEHIND:
1577 {
1578 int n;
1579 r = add_opcode(reg, OP_LOOK_BEHIND);
1580 if (r) return r;
1581 if (node->char_len < 0) {
1582 r = get_char_length_tree(node->target, reg, &n);
1583 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1584 }
1585 else
1586 n = node->char_len;
1587 r = add_length(reg, n);
1588 if (r) return r;
1589 r = compile_tree(node->target, reg);
1590 }
1591 break;
1592
1593 case ANCHOR_LOOK_BEHIND_NOT:
1594 {
1595 int n;
1596 len = compile_length_tree(node->target, reg);
1597 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1598 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1599 if (r) return r;
1600 if (node->char_len < 0) {
1601 r = get_char_length_tree(node->target, reg, &n);
1602 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1603 }
1604 else
1605 n = node->char_len;
1606 r = add_length(reg, n);
1607 if (r) return r;
1608 r = compile_tree(node->target, reg);
1609 if (r) return r;
1610 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1611 }
1612 break;
1613
1614 default:
1615 return ONIGERR_TYPE_BUG;
1616 break;
1617 }
1618
1619 return r;
1620}
1621
1622static int
1623compile_length_tree(Node* node, regex_t* reg)
1624{
1625 int len, type, r;
1626
1627 type = NTYPE(node);
1628 switch (type) {
1629 case NT_LIST:
1630 len = 0;
1631 do {
1632 r = compile_length_tree(NCAR(node), reg);
1633 if (r < 0) return r;
1634 len += r;
1635 } while (IS_NOT_NULL(node = NCDR(node)));
1636 r = len;
1637 break;
1638
1639 case NT_ALT:
1640 {
1641 int n = 0;
1642 len = 0;
1643 do {
1644 r = compile_length_tree(NCAR(node), reg);
1645 if (r < 0) return r;
1646 len += r;
1647 n++;
1648 } while (IS_NOT_NULL(node = NCDR(node)));
1649 r = len;
1650 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1651 }
1652 break;
1653
1654 case NT_STR:
1655 if (NSTRING_IS_RAW(node))
1656 r = compile_length_string_raw_node(NSTR(node), reg);
1657 else
1658 r = compile_length_string_node(node, reg);
1659 break;
1660
1661 case NT_CCLASS:
1662 r = compile_length_cclass_node(NCCLASS(node), reg);
1663 break;
1664
1665 case NT_CTYPE:
1666 case NT_CANY:
1667 r = SIZE_OPCODE;
1668 break;
1669
1670 case NT_BREF:
1671 {
1672 BRefNode* br = NBREF(node);
1673
1674#ifdef USE_BACKREF_WITH_LEVEL
1675 if (IS_BACKREF_NEST_LEVEL(br)) {
1676 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1677 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1678 }
1679 else
1680#endif
1681 if (br->back_num == 1) {
1682 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1683 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1684 }
1685 else {
1686 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1687 }
1688 }
1689 break;
1690
1691#ifdef USE_SUBEXP_CALL
1692 case NT_CALL:
1693 r = SIZE_OP_CALL;
1694 break;
1695#endif
1696
1697 case NT_QTFR:
1698 r = compile_length_quantifier_node(NQTFR(node), reg);
1699 break;
1700
1701 case NT_ENCLOSE:
1702 r = compile_length_enclose_node(NENCLOSE(node), reg);
1703 break;
1704
1705 case NT_ANCHOR:
1706 r = compile_length_anchor_node(NANCHOR(node), reg);
1707 break;
1708
1709 default:
1710 return ONIGERR_TYPE_BUG;
1711 break;
1712 }
1713
1714 return r;
1715}
1716
1717static int
1718compile_tree(Node* node, regex_t* reg)
1719{
1720 int n, type, len, pos, r = 0;
1721
1722 type = NTYPE(node);
1723 switch (type) {
1724 case NT_LIST:
1725 do {
1726 r = compile_tree(NCAR(node), reg);
1727 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1728 break;
1729
1730 case NT_ALT:
1731 {
1732 Node* x = node;
1733 len = 0;
1734 do {
1735 len += compile_length_tree(NCAR(x), reg);
1736 if (NCDR(x) != NULL) {
1737 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1738 }
1739 } while (IS_NOT_NULL(x = NCDR(x)));
1740 pos = reg->used + len; /* goal position */
1741
1742 do {
1743 len = compile_length_tree(NCAR(node), reg);
1744 if (IS_NOT_NULL(NCDR(node))) {
1745 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1746 if (r) break;
1747 }
1748 r = compile_tree(NCAR(node), reg);
1749 if (r) break;
1750 if (IS_NOT_NULL(NCDR(node))) {
1751 len = pos - (reg->used + SIZE_OP_JUMP);
1752 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1753 if (r) break;
1754 }
1755 } while (IS_NOT_NULL(node = NCDR(node)));
1756 }
1757 break;
1758
1759 case NT_STR:
1760 if (NSTRING_IS_RAW(node))
1761 r = compile_string_raw_node(NSTR(node), reg);
1762 else
1763 r = compile_string_node(node, reg);
1764 break;
1765
1766 case NT_CCLASS:
1767 r = compile_cclass_node(NCCLASS(node), reg);
1768 break;
1769
1770 case NT_CTYPE:
1771 {
1772 int op;
1773
1774 switch (NCTYPE(node)->ctype) {
1775 case ONIGENC_CTYPE_WORD:
1776 if (NCTYPE(node)->ascii_range != 0) {
1777 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1778 else op = OP_ASCII_WORD;
1779 }
1780 else {
1781 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1782 else op = OP_WORD;
1783 }
1784 break;
1785 default:
1786 return ONIGERR_TYPE_BUG;
1787 break;
1788 }
1789 r = add_opcode(reg, op);
1790 }
1791 break;
1792
1793 case NT_CANY:
1794 if (IS_MULTILINE(reg->options))
1795 r = add_opcode(reg, OP_ANYCHAR_ML);
1796 else
1797 r = add_opcode(reg, OP_ANYCHAR);
1798 break;
1799
1800 case NT_BREF:
1801 {
1802 BRefNode* br = NBREF(node);
1803
1804#ifdef USE_BACKREF_WITH_LEVEL
1805 if (IS_BACKREF_NEST_LEVEL(br)) {
1806 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1807 if (r) return r;
1808 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1809 if (r) return r;
1810 r = add_length(reg, br->nest_level);
1811 if (r) return r;
1812
1813 goto add_bacref_mems;
1814 }
1815 else
1816#endif
1817 if (br->back_num == 1) {
1818 n = br->back_static[0];
1819 if (IS_IGNORECASE(reg->options)) {
1820 r = add_opcode(reg, OP_BACKREFN_IC);
1821 if (r) return r;
1822 r = add_mem_num(reg, n);
1823 }
1824 else {
1825 switch (n) {
1826 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1827 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1828 default:
1829 r = add_opcode(reg, OP_BACKREFN);
1830 if (r) return r;
1831 r = add_mem_num(reg, n);
1832 break;
1833 }
1834 }
1835 }
1836 else {
1837 int i;
1838 int* p;
1839
1840 if (IS_IGNORECASE(reg->options)) {
1841 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1842 }
1843 else {
1844 r = add_opcode(reg, OP_BACKREF_MULTI);
1845 }
1846 if (r) return r;
1847
1848#ifdef USE_BACKREF_WITH_LEVEL
1849 add_bacref_mems:
1850#endif
1851 r = add_length(reg, br->back_num);
1852 if (r) return r;
1853 p = BACKREFS_P(br);
1854 for (i = br->back_num - 1; i >= 0; i--) {
1855 r = add_mem_num(reg, p[i]);
1856 if (r) return r;
1857 }
1858 }
1859 }
1860 break;
1861
1862#ifdef USE_SUBEXP_CALL
1863 case NT_CALL:
1864 r = compile_call(NCALL(node), reg);
1865 break;
1866#endif
1867
1868 case NT_QTFR:
1869 r = compile_quantifier_node(NQTFR(node), reg);
1870 break;
1871
1872 case NT_ENCLOSE:
1873 r = compile_enclose_node(NENCLOSE(node), reg);
1874 break;
1875
1876 case NT_ANCHOR:
1877 r = compile_anchor_node(NANCHOR(node), reg);
1878 break;
1879
1880 default:
1881#ifdef ONIG_DEBUG
1882 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1883#endif
1884 break;
1885 }
1886
1887 return r;
1888}
1889
1890#ifdef USE_NAMED_GROUP
1891
1892static int
1893noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1894{
1895 int r = 0;
1896 Node* node = *plink;
1897
1898 switch (NTYPE(node)) {
1899 case NT_LIST:
1900 case NT_ALT:
1901 do {
1902 r = noname_disable_map(&(NCAR(node)), map, counter);
1903 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1904 break;
1905
1906 case NT_QTFR:
1907 {
1908 Node** ptarget = &(NQTFR(node)->target);
1909 Node* old = *ptarget;
1910 r = noname_disable_map(ptarget, map, counter);
1911 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1912 onig_reduce_nested_quantifier(node, *ptarget);
1913 }
1914 }
1915 break;
1916
1917 case NT_ENCLOSE:
1918 {
1919 EncloseNode* en = NENCLOSE(node);
1920 if (en->type == ENCLOSE_MEMORY) {
1921 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1922 (*counter)++;
1923 map[en->regnum].new_val = *counter;
1924 en->regnum = *counter;
1925 }
1926 else if (en->regnum != 0) {
1927 *plink = en->target;
1928 en->target = NULL_NODE;
1929 onig_node_free(node);
1930 r = noname_disable_map(plink, map, counter);
1931 break;
1932 }
1933 }
1934 r = noname_disable_map(&(en->target), map, counter);
1935 }
1936 break;
1937
1938 case NT_ANCHOR:
1939 if (NANCHOR(node)->target)
1940 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1941 break;
1942
1943 default:
1944 break;
1945 }
1946
1947 return r;
1948}
1949
1950static int
1951renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1952{
1953 int i, pos, n, old_num;
1954 int *backs;
1955 BRefNode* bn = NBREF(node);
1956
1957 if (! IS_BACKREF_NAME_REF(bn))
1958 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1959
1960 old_num = bn->back_num;
1961 if (IS_NULL(bn->back_dynamic))
1962 backs = bn->back_static;
1963 else
1964 backs = bn->back_dynamic;
1965
1966 for (i = 0, pos = 0; i < old_num; i++) {
1967 if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1968 n = map[backs[i]].new_val;
1969 if (n > 0) {
1970 backs[pos] = n;
1971 pos++;
1972 }
1973 }
1974
1975 bn->back_num = pos;
1976 return 0;
1977}
1978
1979static int
1980renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1981{
1982 int r = 0;
1983
1984 switch (NTYPE(node)) {
1985 case NT_LIST:
1986 case NT_ALT:
1987 do {
1988 r = renumber_by_map(NCAR(node), map, num_mem);
1989 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1990 break;
1991 case NT_QTFR:
1992 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1993 break;
1994 case NT_ENCLOSE:
1995 {
1996 EncloseNode* en = NENCLOSE(node);
1997 if (en->type == ENCLOSE_CONDITION) {
1998 if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1999 en->regnum = map[en->regnum].new_val;
2000 }
2001 r = renumber_by_map(en->target, map, num_mem);
2002 }
2003 break;
2004
2005 case NT_BREF:
2006 r = renumber_node_backref(node, map, num_mem);
2007 break;
2008
2009 case NT_ANCHOR:
2010 if (NANCHOR(node)->target)
2011 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2012 break;
2013
2014 default:
2015 break;
2016 }
2017
2018 return r;
2019}
2020
2021static int
2022numbered_ref_check(Node* node)
2023{
2024 int r = 0;
2025
2026 switch (NTYPE(node)) {
2027 case NT_LIST:
2028 case NT_ALT:
2029 do {
2030 r = numbered_ref_check(NCAR(node));
2031 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2032 break;
2033 case NT_QTFR:
2034 r = numbered_ref_check(NQTFR(node)->target);
2035 break;
2036 case NT_ENCLOSE:
2037 r = numbered_ref_check(NENCLOSE(node)->target);
2038 break;
2039
2040 case NT_BREF:
2041 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2042 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2043 break;
2044
2045 case NT_ANCHOR:
2046 if (NANCHOR(node)->target)
2047 r = numbered_ref_check(NANCHOR(node)->target);
2048 break;
2049
2050 default:
2051 break;
2052 }
2053
2054 return r;
2055}
2056
2057static int
2058disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2059{
2060 int r, i, pos, counter;
2061 BitStatusType loc;
2062 GroupNumRemap* map;
2063
2064 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2065 CHECK_NULL_RETURN_MEMERR(map);
2066 for (i = 1; i <= env->num_mem; i++) {
2067 map[i].new_val = 0;
2068 }
2069 counter = 0;
2070 r = noname_disable_map(root, map, &counter);
2071 if (r != 0) return r;
2072
2073 r = renumber_by_map(*root, map, env->num_mem);
2074 if (r != 0) return r;
2075
2076 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2077 if (map[i].new_val > 0) {
2078 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2079 pos++;
2080 }
2081 }
2082
2083 loc = env->capture_history;
2084 BIT_STATUS_CLEAR(env->capture_history);
2085 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2086 if (BIT_STATUS_AT(loc, i)) {
2087 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2088 }
2089 }
2090
2091 env->num_mem = env->num_named;
2092 reg->num_mem = env->num_named;
2093
2094 return onig_renumber_name_table(reg, map);
2095}
2096#endif /* USE_NAMED_GROUP */
2097
2098#ifdef USE_SUBEXP_CALL
2099static int
2100unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2101{
2102 int i, offset;
2103 EncloseNode* en;
2104 AbsAddrType addr;
2105
2106 for (i = 0; i < uslist->num; i++) {
2107 en = NENCLOSE(uslist->us[i].target);
2108 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2109 addr = en->call_addr;
2110 offset = uslist->us[i].offset;
2111
2112 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2113 }
2114 return 0;
2115}
2116#endif
2117
2118#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2119static int
2120quantifiers_memory_node_info(Node* node)
2121{
2122 int r = 0;
2123
2124 switch (NTYPE(node)) {
2125 case NT_LIST:
2126 case NT_ALT:
2127 {
2128 int v;
2129 do {
2130 v = quantifiers_memory_node_info(NCAR(node));
2131 if (v > r) r = v;
2132 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2133 }
2134 break;
2135
2136# ifdef USE_SUBEXP_CALL
2137 case NT_CALL:
2138 if (IS_CALL_RECURSION(NCALL(node))) {
2139 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2140 }
2141 else
2142 r = quantifiers_memory_node_info(NCALL(node)->target);
2143 break;
2144# endif
2145
2146 case NT_QTFR:
2147 {
2148 QtfrNode* qn = NQTFR(node);
2149 if (qn->upper != 0) {
2150 r = quantifiers_memory_node_info(qn->target);
2151 }
2152 }
2153 break;
2154
2155 case NT_ENCLOSE:
2156 {
2157 EncloseNode* en = NENCLOSE(node);
2158 switch (en->type) {
2159 case ENCLOSE_MEMORY:
2160 return NQ_TARGET_IS_EMPTY_MEM;
2161 break;
2162
2163 case ENCLOSE_OPTION:
2164 case ENCLOSE_STOP_BACKTRACK:
2165 case ENCLOSE_CONDITION:
2166 case ENCLOSE_ABSENT:
2167 r = quantifiers_memory_node_info(en->target);
2168 break;
2169 default:
2170 break;
2171 }
2172 }
2173 break;
2174
2175 case NT_BREF:
2176 case NT_STR:
2177 case NT_CTYPE:
2178 case NT_CCLASS:
2179 case NT_CANY:
2180 case NT_ANCHOR:
2181 default:
2182 break;
2183 }
2184
2185 return r;
2186}
2187#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2188
2189static int
2190get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2191{
2192 OnigDistance tmin;
2193 int r = 0;
2194
2195 *min = 0;
2196 switch (NTYPE(node)) {
2197 case NT_BREF:
2198 {
2199 int i;
2200 int* backs;
2201 Node** nodes = SCANENV_MEM_NODES(env);
2202 BRefNode* br = NBREF(node);
2203 if (br->state & NST_RECURSION) break;
2204
2205 backs = BACKREFS_P(br);
2206 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2207 r = get_min_match_length(nodes[backs[0]], min, env);
2208 if (r != 0) break;
2209 for (i = 1; i < br->back_num; i++) {
2210 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2211 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2212 if (r != 0) break;
2213 if (*min > tmin) *min = tmin;
2214 }
2215 }
2216 break;
2217
2218#ifdef USE_SUBEXP_CALL
2219 case NT_CALL:
2220 if (IS_CALL_RECURSION(NCALL(node))) {
2221 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2222 if (IS_ENCLOSE_MIN_FIXED(en))
2223 *min = en->min_len;
2224 }
2225 else
2226 r = get_min_match_length(NCALL(node)->target, min, env);
2227 break;
2228#endif
2229
2230 case NT_LIST:
2231 do {
2232 r = get_min_match_length(NCAR(node), &tmin, env);
2233 if (r == 0) *min += tmin;
2234 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2235 break;
2236
2237 case NT_ALT:
2238 {
2239 Node *x, *y;
2240 y = node;
2241 do {
2242 x = NCAR(y);
2243 r = get_min_match_length(x, &tmin, env);
2244 if (r != 0) break;
2245 if (y == node) *min = tmin;
2246 else if (*min > tmin) *min = tmin;
2247 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2248 }
2249 break;
2250
2251 case NT_STR:
2252 {
2253 StrNode* sn = NSTR(node);
2254 *min = sn->end - sn->s;
2255 }
2256 break;
2257
2258 case NT_CTYPE:
2259 *min = 1;
2260 break;
2261
2262 case NT_CCLASS:
2263 case NT_CANY:
2264 *min = 1;
2265 break;
2266
2267 case NT_QTFR:
2268 {
2269 QtfrNode* qn = NQTFR(node);
2270
2271 if (qn->lower > 0) {
2272 r = get_min_match_length(qn->target, min, env);
2273 if (r == 0)
2274 *min = distance_multiply(*min, qn->lower);
2275 }
2276 }
2277 break;
2278
2279 case NT_ENCLOSE:
2280 {
2281 EncloseNode* en = NENCLOSE(node);
2282 switch (en->type) {
2283 case ENCLOSE_MEMORY:
2284 if (IS_ENCLOSE_MIN_FIXED(en))
2285 *min = en->min_len;
2286 else {
2287 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2288 *min = 0; /* recursive */
2289 else {
2290 SET_ENCLOSE_STATUS(node, NST_MARK1);
2291 r = get_min_match_length(en->target, min, env);
2292 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2293 if (r == 0) {
2294 en->min_len = *min;
2295 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2296 }
2297 }
2298 }
2299 break;
2300
2301 case ENCLOSE_OPTION:
2302 case ENCLOSE_STOP_BACKTRACK:
2303 case ENCLOSE_CONDITION:
2304 r = get_min_match_length(en->target, min, env);
2305 break;
2306
2307 case ENCLOSE_ABSENT:
2308 break;
2309 }
2310 }
2311 break;
2312
2313 case NT_ANCHOR:
2314 default:
2315 break;
2316 }
2317
2318 return r;
2319}
2320
2321static int
2322get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2323{
2324 OnigDistance tmax;
2325 int r = 0;
2326
2327 *max = 0;
2328 switch (NTYPE(node)) {
2329 case NT_LIST:
2330 do {
2331 r = get_max_match_length(NCAR(node), &tmax, env);
2332 if (r == 0)
2333 *max = distance_add(*max, tmax);
2334 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2335 break;
2336
2337 case NT_ALT:
2338 do {
2339 r = get_max_match_length(NCAR(node), &tmax, env);
2340 if (r == 0 && *max < tmax) *max = tmax;
2341 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2342 break;
2343
2344 case NT_STR:
2345 {
2346 StrNode* sn = NSTR(node);
2347 *max = sn->end - sn->s;
2348 }
2349 break;
2350
2351 case NT_CTYPE:
2352 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2353 break;
2354
2355 case NT_CCLASS:
2356 case NT_CANY:
2357 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2358 break;
2359
2360 case NT_BREF:
2361 {
2362 int i;
2363 int* backs;
2364 Node** nodes = SCANENV_MEM_NODES(env);
2365 BRefNode* br = NBREF(node);
2366 if (br->state & NST_RECURSION) {
2367 *max = ONIG_INFINITE_DISTANCE;
2368 break;
2369 }
2370 backs = BACKREFS_P(br);
2371 for (i = 0; i < br->back_num; i++) {
2372 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2373 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2374 if (r != 0) break;
2375 if (*max < tmax) *max = tmax;
2376 }
2377 }
2378 break;
2379
2380#ifdef USE_SUBEXP_CALL
2381 case NT_CALL:
2382 if (! IS_CALL_RECURSION(NCALL(node)))
2383 r = get_max_match_length(NCALL(node)->target, max, env);
2384 else
2385 *max = ONIG_INFINITE_DISTANCE;
2386 break;
2387#endif
2388
2389 case NT_QTFR:
2390 {
2391 QtfrNode* qn = NQTFR(node);
2392
2393 if (qn->upper != 0) {
2394 r = get_max_match_length(qn->target, max, env);
2395 if (r == 0 && *max != 0) {
2396 if (! IS_REPEAT_INFINITE(qn->upper))
2397 *max = distance_multiply(*max, qn->upper);
2398 else
2399 *max = ONIG_INFINITE_DISTANCE;
2400 }
2401 }
2402 }
2403 break;
2404
2405 case NT_ENCLOSE:
2406 {
2407 EncloseNode* en = NENCLOSE(node);
2408 switch (en->type) {
2409 case ENCLOSE_MEMORY:
2410 if (IS_ENCLOSE_MAX_FIXED(en))
2411 *max = en->max_len;
2412 else {
2413 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2414 *max = ONIG_INFINITE_DISTANCE;
2415 else {
2416 SET_ENCLOSE_STATUS(node, NST_MARK1);
2417 r = get_max_match_length(en->target, max, env);
2418 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2419 if (r == 0) {
2420 en->max_len = *max;
2421 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2422 }
2423 }
2424 }
2425 break;
2426
2427 case ENCLOSE_OPTION:
2428 case ENCLOSE_STOP_BACKTRACK:
2429 case ENCLOSE_CONDITION:
2430 r = get_max_match_length(en->target, max, env);
2431 break;
2432
2433 case ENCLOSE_ABSENT:
2434 break;
2435 }
2436 }
2437 break;
2438
2439 case NT_ANCHOR:
2440 default:
2441 break;
2442 }
2443
2444 return r;
2445}
2446
2447#define GET_CHAR_LEN_VARLEN -1
2448#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2449
2450/* fixed size pattern node only */
2451static int
2452get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2453{
2454 int tlen;
2455 int r = 0;
2456
2457 level++;
2458 *len = 0;
2459 switch (NTYPE(node)) {
2460 case NT_LIST:
2461 do {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2463 if (r == 0)
2464 *len = (int )distance_add(*len, tlen);
2465 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2466 break;
2467
2468 case NT_ALT:
2469 {
2470 int tlen2;
2471 int varlen = 0;
2472
2473 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2474 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2475 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2476 if (r == 0) {
2477 if (tlen != tlen2)
2478 varlen = 1;
2479 }
2480 }
2481 if (r == 0) {
2482 if (varlen != 0) {
2483 if (level == 1)
2484 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2485 else
2486 r = GET_CHAR_LEN_VARLEN;
2487 }
2488 else
2489 *len = tlen;
2490 }
2491 }
2492 break;
2493
2494 case NT_STR:
2495 {
2496 StrNode* sn = NSTR(node);
2497 UChar *s = sn->s;
2498 while (s < sn->end) {
2499 s += enclen(reg->enc, s, sn->end);
2500 (*len)++;
2501 }
2502 }
2503 break;
2504
2505 case NT_QTFR:
2506 {
2507 QtfrNode* qn = NQTFR(node);
2508 if (qn->lower == qn->upper) {
2509 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2510 if (r == 0)
2511 *len = (int )distance_multiply(tlen, qn->lower);
2512 }
2513 else
2514 r = GET_CHAR_LEN_VARLEN;
2515 }
2516 break;
2517
2518#ifdef USE_SUBEXP_CALL
2519 case NT_CALL:
2520 if (! IS_CALL_RECURSION(NCALL(node)))
2521 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2522 else
2523 r = GET_CHAR_LEN_VARLEN;
2524 break;
2525#endif
2526
2527 case NT_CTYPE:
2528 *len = 1;
2529 break;
2530
2531 case NT_CCLASS:
2532 case NT_CANY:
2533 *len = 1;
2534 break;
2535
2536 case NT_ENCLOSE:
2537 {
2538 EncloseNode* en = NENCLOSE(node);
2539 switch (en->type) {
2540 case ENCLOSE_MEMORY:
2541#ifdef USE_SUBEXP_CALL
2542 if (IS_ENCLOSE_CLEN_FIXED(en))
2543 *len = en->char_len;
2544 else {
2545 r = get_char_length_tree1(en->target, reg, len, level);
2546 if (r == 0) {
2547 en->char_len = *len;
2548 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2549 }
2550 }
2551 break;
2552#endif
2553 case ENCLOSE_OPTION:
2554 case ENCLOSE_STOP_BACKTRACK:
2555 case ENCLOSE_CONDITION:
2556 r = get_char_length_tree1(en->target, reg, len, level);
2557 break;
2558 case ENCLOSE_ABSENT:
2559 default:
2560 break;
2561 }
2562 }
2563 break;
2564
2565 case NT_ANCHOR:
2566 break;
2567
2568 default:
2569 r = GET_CHAR_LEN_VARLEN;
2570 break;
2571 }
2572
2573 return r;
2574}
2575
2576static int
2577get_char_length_tree(Node* node, regex_t* reg, int* len)
2578{
2579 return get_char_length_tree1(node, reg, len, 0);
2580}
2581
2582/* x is not included y ==> 1 : 0 */
2583static int
2584is_not_included(Node* x, Node* y, regex_t* reg)
2585{
2586 int i;
2587 OnigDistance len;
2588 OnigCodePoint code;
2589 UChar *p;
2590 int ytype;
2591
2592 retry:
2593 ytype = NTYPE(y);
2594 switch (NTYPE(x)) {
2595 case NT_CTYPE:
2596 {
2597 switch (ytype) {
2598 case NT_CTYPE:
2599 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2600 NCTYPE(y)->not != NCTYPE(x)->not &&
2601 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2602 return 1;
2603 else
2604 return 0;
2605 break;
2606
2607 case NT_CCLASS:
2608 swap:
2609 {
2610 Node* tmp;
2611 tmp = x; x = y; y = tmp;
2612 goto retry;
2613 }
2614 break;
2615
2616 case NT_STR:
2617 goto swap;
2618 break;
2619
2620 default:
2621 break;
2622 }
2623 }
2624 break;
2625
2626 case NT_CCLASS:
2627 {
2628 CClassNode* xc = NCCLASS(x);
2629 switch (ytype) {
2630 case NT_CTYPE:
2631 switch (NCTYPE(y)->ctype) {
2632 case ONIGENC_CTYPE_WORD:
2633 if (NCTYPE(y)->not == 0) {
2634 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2635 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2636 if (BITSET_AT(xc->bs, i)) {
2637 if (NCTYPE(y)->ascii_range) {
2638 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2639 }
2640 else {
2641 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2642 }
2643 }
2644 }
2645 return 1;
2646 }
2647 return 0;
2648 }
2649 else {
2650 if (IS_NOT_NULL(xc->mbuf)) return 0;
2651 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2652 int is_word;
2653 if (NCTYPE(y)->ascii_range)
2654 is_word = IS_CODE_SB_WORD(reg->enc, i);
2655 else
2656 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2657 if (! is_word) {
2658 if (!IS_NCCLASS_NOT(xc)) {
2659 if (BITSET_AT(xc->bs, i))
2660 return 0;
2661 }
2662 else {
2663 if (! BITSET_AT(xc->bs, i))
2664 return 0;
2665 }
2666 }
2667 }
2668 return 1;
2669 }
2670 break;
2671
2672 default:
2673 break;
2674 }
2675 break;
2676
2677 case NT_CCLASS:
2678 {
2679 int v;
2680 CClassNode* yc = NCCLASS(y);
2681
2682 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2683 v = BITSET_AT(xc->bs, i);
2684 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2685 (v == 0 && IS_NCCLASS_NOT(xc))) {
2686 v = BITSET_AT(yc->bs, i);
2687 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2688 (v == 0 && IS_NCCLASS_NOT(yc)))
2689 return 0;
2690 }
2691 }
2692 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2693 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2694 return 1;
2695 return 0;
2696 }
2697 break;
2698
2699 case NT_STR:
2700 goto swap;
2701 break;
2702
2703 default:
2704 break;
2705 }
2706 }
2707 break;
2708
2709 case NT_STR:
2710 {
2711 StrNode* xs = NSTR(x);
2712 if (NSTRING_LEN(x) == 0)
2713 break;
2714
2715 switch (ytype) {
2716 case NT_CTYPE:
2717 switch (NCTYPE(y)->ctype) {
2718 case ONIGENC_CTYPE_WORD:
2719 if (NCTYPE(y)->ascii_range) {
2720 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2721 return NCTYPE(y)->not;
2722 else
2723 return !(NCTYPE(y)->not);
2724 }
2725 else {
2726 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2727 return NCTYPE(y)->not;
2728 else
2729 return !(NCTYPE(y)->not);
2730 }
2731 break;
2732 default:
2733 break;
2734 }
2735 break;
2736
2737 case NT_CCLASS:
2738 {
2739 CClassNode* cc = NCCLASS(y);
2740
2741 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2742 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2743 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2744 }
2745 break;
2746
2747 case NT_STR:
2748 {
2749 UChar *q;
2750 StrNode* ys = NSTR(y);
2751 len = NSTRING_LEN(x);
2752 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2753 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2754 /* tiny version */
2755 return 0;
2756 }
2757 else {
2758 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2759 if (*p != *q) return 1;
2760 }
2761 }
2762 }
2763 break;
2764
2765 default:
2766 break;
2767 }
2768 }
2769 break;
2770
2771 default:
2772 break;
2773 }
2774
2775 return 0;
2776}
2777
2778static Node*
2779get_head_value_node(Node* node, int exact, regex_t* reg)
2780{
2781 Node* n = NULL_NODE;
2782
2783 switch (NTYPE(node)) {
2784 case NT_BREF:
2785 case NT_ALT:
2786 case NT_CANY:
2787#ifdef USE_SUBEXP_CALL
2788 case NT_CALL:
2789#endif
2790 break;
2791
2792 case NT_CTYPE:
2793 case NT_CCLASS:
2794 if (exact == 0) {
2795 n = node;
2796 }
2797 break;
2798
2799 case NT_LIST:
2800 n = get_head_value_node(NCAR(node), exact, reg);
2801 break;
2802
2803 case NT_STR:
2804 {
2805 StrNode* sn = NSTR(node);
2806
2807 if (sn->end <= sn->s)
2808 break;
2809
2810 if (exact != 0 &&
2811 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2812 }
2813 else {
2814 n = node;
2815 }
2816 }
2817 break;
2818
2819 case NT_QTFR:
2820 {
2821 QtfrNode* qn = NQTFR(node);
2822 if (qn->lower > 0) {
2823#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2824 if (IS_NOT_NULL(qn->head_exact))
2825 n = qn->head_exact;
2826 else
2827#endif
2828 n = get_head_value_node(qn->target, exact, reg);
2829 }
2830 }
2831 break;
2832
2833 case NT_ENCLOSE:
2834 {
2835 EncloseNode* en = NENCLOSE(node);
2836 switch (en->type) {
2837 case ENCLOSE_OPTION:
2838 {
2839 OnigOptionType options = reg->options;
2840
2841 reg->options = NENCLOSE(node)->option;
2842 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2843 reg->options = options;
2844 }
2845 break;
2846
2847 case ENCLOSE_MEMORY:
2848 case ENCLOSE_STOP_BACKTRACK:
2849 case ENCLOSE_CONDITION:
2850 n = get_head_value_node(en->target, exact, reg);
2851 break;
2852
2853 case ENCLOSE_ABSENT:
2854 break;
2855 }
2856 }
2857 break;
2858
2859 case NT_ANCHOR:
2860 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2861 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2862 break;
2863
2864 default:
2865 break;
2866 }
2867
2868 return n;
2869}
2870
2871static int
2872check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2873{
2874 int type, r = 0;
2875
2876 type = NTYPE(node);
2877 if ((NTYPE2BIT(type) & type_mask) == 0)
2878 return 1;
2879
2880 switch (type) {
2881 case NT_LIST:
2882 case NT_ALT:
2883 do {
2884 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2885 anchor_mask);
2886 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2887 break;
2888
2889 case NT_QTFR:
2890 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2891 anchor_mask);
2892 break;
2893
2894 case NT_ENCLOSE:
2895 {
2896 EncloseNode* en = NENCLOSE(node);
2897 if ((en->type & enclose_mask) == 0)
2898 return 1;
2899
2900 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2901 }
2902 break;
2903
2904 case NT_ANCHOR:
2905 type = NANCHOR(node)->type;
2906 if ((type & anchor_mask) == 0)
2907 return 1;
2908
2909 if (NANCHOR(node)->target)
2910 r = check_type_tree(NANCHOR(node)->target,
2911 type_mask, enclose_mask, anchor_mask);
2912 break;
2913
2914 default:
2915 break;
2916 }
2917 return r;
2918}
2919
2920#ifdef USE_SUBEXP_CALL
2921
2922# define RECURSION_EXIST 1
2923# define RECURSION_INFINITE 2
2924
2925static int
2926subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2927{
2928 int type;
2929 int r = 0;
2930
2931 type = NTYPE(node);
2932 switch (type) {
2933 case NT_LIST:
2934 {
2935 Node *x;
2936 OnigDistance min;
2937 int ret;
2938
2939 x = node;
2940 do {
2941 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2942 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2943 r |= ret;
2944 if (head) {
2945 ret = get_min_match_length(NCAR(x), &min, env);
2946 if (ret != 0) return ret;
2947 if (min != 0) head = 0;
2948 }
2949 } while (IS_NOT_NULL(x = NCDR(x)));
2950 }
2951 break;
2952
2953 case NT_ALT:
2954 {
2955 int ret;
2956 r = RECURSION_EXIST;
2957 do {
2958 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2959 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2960 r &= ret;
2961 } while (IS_NOT_NULL(node = NCDR(node)));
2962 }
2963 break;
2964
2965 case NT_QTFR:
2966 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2967 if (r == RECURSION_EXIST) {
2968 if (NQTFR(node)->lower == 0) r = 0;
2969 }
2970 break;
2971
2972 case NT_ANCHOR:
2973 {
2974 AnchorNode* an = NANCHOR(node);
2975 switch (an->type) {
2976 case ANCHOR_PREC_READ:
2977 case ANCHOR_PREC_READ_NOT:
2978 case ANCHOR_LOOK_BEHIND:
2979 case ANCHOR_LOOK_BEHIND_NOT:
2980 r = subexp_inf_recursive_check(an->target, env, head);
2981 break;
2982 }
2983 }
2984 break;
2985
2986 case NT_CALL:
2987 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2988 break;
2989
2990 case NT_ENCLOSE:
2991 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2992 return 0;
2993 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2994 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2995 else {
2996 SET_ENCLOSE_STATUS(node, NST_MARK2);
2997 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2998 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2999 }
3000 break;
3001
3002 default:
3003 break;
3004 }
3005
3006 return r;
3007}
3008
3009static int
3010subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
3011{
3012 int type;
3013 int r = 0;
3014
3015 type = NTYPE(node);
3016 switch (type) {
3017 case NT_LIST:
3018 case NT_ALT:
3019 do {
3020 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3021 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3022 break;
3023
3024 case NT_QTFR:
3025 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3026 break;
3027
3028 case NT_ANCHOR:
3029 {
3030 AnchorNode* an = NANCHOR(node);
3031 switch (an->type) {
3032 case ANCHOR_PREC_READ:
3033 case ANCHOR_PREC_READ_NOT:
3034 case ANCHOR_LOOK_BEHIND:
3035 case ANCHOR_LOOK_BEHIND_NOT:
3036 r = subexp_inf_recursive_check_trav(an->target, env);
3037 break;
3038 }
3039 }
3040 break;
3041
3042 case NT_ENCLOSE:
3043 {
3044 EncloseNode* en = NENCLOSE(node);
3045
3046 if (IS_ENCLOSE_RECURSION(en)) {
3047 SET_ENCLOSE_STATUS(node, NST_MARK1);
3048 r = subexp_inf_recursive_check(en->target, env, 1);
3049 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3050 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3051 }
3052 r = subexp_inf_recursive_check_trav(en->target, env);
3053 }
3054
3055 break;
3056
3057 default:
3058 break;
3059 }
3060
3061 return r;
3062}
3063
3064static int
3065subexp_recursive_check(Node* node)
3066{
3067 int r = 0;
3068
3069 switch (NTYPE(node)) {
3070 case NT_LIST:
3071 case NT_ALT:
3072 do {
3073 r |= subexp_recursive_check(NCAR(node));
3074 } while (IS_NOT_NULL(node = NCDR(node)));
3075 break;
3076
3077 case NT_QTFR:
3078 r = subexp_recursive_check(NQTFR(node)->target);
3079 break;
3080
3081 case NT_ANCHOR:
3082 {
3083 AnchorNode* an = NANCHOR(node);
3084 switch (an->type) {
3085 case ANCHOR_PREC_READ:
3086 case ANCHOR_PREC_READ_NOT:
3087 case ANCHOR_LOOK_BEHIND:
3088 case ANCHOR_LOOK_BEHIND_NOT:
3089 r = subexp_recursive_check(an->target);
3090 break;
3091 }
3092 }
3093 break;
3094
3095 case NT_CALL:
3096 r = subexp_recursive_check(NCALL(node)->target);
3097 if (r != 0) SET_CALL_RECURSION(node);
3098 break;
3099
3100 case NT_ENCLOSE:
3101 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3102 return 0;
3103 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3104 return 1; /* recursion */
3105 else {
3106 SET_ENCLOSE_STATUS(node, NST_MARK2);
3107 r = subexp_recursive_check(NENCLOSE(node)->target);
3108 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3109 }
3110 break;
3111
3112 default:
3113 break;
3114 }
3115
3116 return r;
3117}
3118
3119
3120static int
3121subexp_recursive_check_trav(Node* node, ScanEnv* env)
3122{
3123# define FOUND_CALLED_NODE 1
3124
3125 int type;
3126 int r = 0;
3127
3128 type = NTYPE(node);
3129 switch (type) {
3130 case NT_LIST:
3131 case NT_ALT:
3132 {
3133 int ret;
3134 do {
3135 ret = subexp_recursive_check_trav(NCAR(node), env);
3136 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3137 else if (ret < 0) return ret;
3138 } while (IS_NOT_NULL(node = NCDR(node)));
3139 }
3140 break;
3141
3142 case NT_QTFR:
3143 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3144 if (NQTFR(node)->upper == 0) {
3145 if (r == FOUND_CALLED_NODE)
3146 NQTFR(node)->is_referred = 1;
3147 }
3148 break;
3149
3150 case NT_ANCHOR:
3151 {
3152 AnchorNode* an = NANCHOR(node);
3153 switch (an->type) {
3154 case ANCHOR_PREC_READ:
3155 case ANCHOR_PREC_READ_NOT:
3156 case ANCHOR_LOOK_BEHIND:
3157 case ANCHOR_LOOK_BEHIND_NOT:
3158 r = subexp_recursive_check_trav(an->target, env);
3159 break;
3160 }
3161 }
3162 break;
3163
3164 case NT_ENCLOSE:
3165 {
3166 EncloseNode* en = NENCLOSE(node);
3167
3168 if (! IS_ENCLOSE_RECURSION(en)) {
3169 if (IS_ENCLOSE_CALLED(en)) {
3170 SET_ENCLOSE_STATUS(node, NST_MARK1);
3171 r = subexp_recursive_check(en->target);
3172 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3173 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3174 }
3175 }
3176 r = subexp_recursive_check_trav(en->target, env);
3177 if (IS_ENCLOSE_CALLED(en))
3178 r |= FOUND_CALLED_NODE;
3179 }
3180 break;
3181
3182 default:
3183 break;
3184 }
3185
3186 return r;
3187}
3188
3189static int
3190setup_subexp_call(Node* node, ScanEnv* env)
3191{
3192 int type;
3193 int r = 0;
3194
3195 type = NTYPE(node);
3196 switch (type) {
3197 case NT_LIST:
3198 do {
3199 r = setup_subexp_call(NCAR(node), env);
3200 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3201 break;
3202
3203 case NT_ALT:
3204 do {
3205 r = setup_subexp_call(NCAR(node), env);
3206 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3207 break;
3208
3209 case NT_QTFR:
3210 r = setup_subexp_call(NQTFR(node)->target, env);
3211 break;
3212 case NT_ENCLOSE:
3213 r = setup_subexp_call(NENCLOSE(node)->target, env);
3214 break;
3215
3216 case NT_CALL:
3217 {
3218 CallNode* cn = NCALL(node);
3219 Node** nodes = SCANENV_MEM_NODES(env);
3220
3221 if (cn->group_num != 0) {
3222 int gnum = cn->group_num;
3223
3224# ifdef USE_NAMED_GROUP
3225 if (env->num_named > 0 &&
3226 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3227 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3228 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3229 }
3230# endif
3231 if (gnum > env->num_mem) {
3232 onig_scan_env_set_error_string(env,
3233 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3234 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3235 }
3236
3237# ifdef USE_NAMED_GROUP
3238 set_call_attr:
3239# endif
3240 cn->target = nodes[cn->group_num];
3241 if (IS_NULL(cn->target)) {
3242 onig_scan_env_set_error_string(env,
3243 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3244 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3245 }
3246 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3247 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3248 cn->unset_addr_list = env->unset_addr_list;
3249 }
3250# ifdef USE_NAMED_GROUP
3251# ifdef USE_PERL_SUBEXP_CALL
3252 else if (cn->name == cn->name_end) {
3253 goto set_call_attr;
3254 }
3255# endif
3256 else {
3257 int *refs;
3258
3259 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3260 &refs);
3261 if (n <= 0) {
3262 onig_scan_env_set_error_string(env,
3263 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3264 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3265 }
3266 else if (n > 1 &&
3267 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3268 onig_scan_env_set_error_string(env,
3269 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3270 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3271 }
3272 else {
3273 cn->group_num = refs[0];
3274 goto set_call_attr;
3275 }
3276 }
3277# endif
3278 }
3279 break;
3280
3281 case NT_ANCHOR:
3282 {
3283 AnchorNode* an = NANCHOR(node);
3284
3285 switch (an->type) {
3286 case ANCHOR_PREC_READ:
3287 case ANCHOR_PREC_READ_NOT:
3288 case ANCHOR_LOOK_BEHIND:
3289 case ANCHOR_LOOK_BEHIND_NOT:
3290 r = setup_subexp_call(an->target, env);
3291 break;
3292 }
3293 }
3294 break;
3295
3296 default:
3297 break;
3298 }
3299
3300 return r;
3301}
3302#endif
3303
3304#define IN_ALT (1<<0)
3305#define IN_NOT (1<<1)
3306#define IN_REPEAT (1<<2)
3307#define IN_VAR_REPEAT (1<<3)
3308#define IN_CALL (1<<4)
3309#define IN_RECCALL (1<<5)
3310#define IN_LOOK_BEHIND (1<<6)
3311
3312/* divide different length alternatives in look-behind.
3313 (?<=A|B) ==> (?<=A)|(?<=B)
3314 (?<!A|B) ==> (?<!A)(?<!B)
3315*/
3316static int
3317divide_look_behind_alternatives(Node* node)
3318{
3319 Node *head, *np, *insert_node;
3320 AnchorNode* an = NANCHOR(node);
3321 int anc_type = an->type;
3322
3323 head = an->target;
3324 np = NCAR(head);
3325 swap_node(node, head);
3326 NCAR(node) = head;
3327 NANCHOR(head)->target = np;
3328
3329 np = node;
3330 while ((np = NCDR(np)) != NULL_NODE) {
3331 insert_node = onig_node_new_anchor(anc_type);
3332 CHECK_NULL_RETURN_MEMERR(insert_node);
3333 NANCHOR(insert_node)->target = NCAR(np);
3334 NCAR(np) = insert_node;
3335 }
3336
3337 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3338 np = node;
3339 do {
3340 SET_NTYPE(np, NT_LIST); /* alt -> list */
3341 } while ((np = NCDR(np)) != NULL_NODE);
3342 }
3343 return 0;
3344}
3345
3346static int
3347setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3348{
3349 int r, len;
3350 AnchorNode* an = NANCHOR(node);
3351
3352 r = get_char_length_tree(an->target, reg, &len);
3353 if (r == 0)
3354 an->char_len = len;
3355 else if (r == GET_CHAR_LEN_VARLEN)
3356 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3357 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3358 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3359 r = divide_look_behind_alternatives(node);
3360 else
3361 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3362 }
3363
3364 return r;
3365}
3366
3367static int
3368next_setup(Node* node, Node* next_node, regex_t* reg)
3369{
3370 int type;
3371
3372 retry:
3373 type = NTYPE(node);
3374 if (type == NT_QTFR) {
3375 QtfrNode* qn = NQTFR(node);
3376 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3377#ifdef USE_QTFR_PEEK_NEXT
3378 Node* n = get_head_value_node(next_node, 1, reg);
3379 /* '\0': for UTF-16BE etc... */
3380 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3381 qn->next_head_exact = n;
3382 }
3383#endif
3384 /* automatic possessification a*b ==> (?>a*)b */
3385 if (qn->lower <= 1) {
3386 int ttype = NTYPE(qn->target);
3387 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3388 Node *x, *y;
3389 x = get_head_value_node(qn->target, 0, reg);
3390 if (IS_NOT_NULL(x)) {
3391 y = get_head_value_node(next_node, 0, reg);
3392 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3393 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3394 CHECK_NULL_RETURN_MEMERR(en);
3395 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3396 swap_node(node, en);
3397 NENCLOSE(node)->target = en;
3398 }
3399 }
3400 }
3401 }
3402 }
3403 }
3404 else if (type == NT_ENCLOSE) {
3405 EncloseNode* en = NENCLOSE(node);
3406 if (en->type == ENCLOSE_MEMORY && !IS_ENCLOSE_CALLED(en)) {
3407 node = en->target;
3408 goto retry;
3409 }
3410 }
3411 return 0;
3412}
3413
3414
3415static int
3416update_string_node_case_fold(regex_t* reg, Node *node)
3417{
3418 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3419 UChar *sbuf, *ebuf, *sp;
3420 int r, i, len;
3421 OnigDistance sbuf_size;
3422 StrNode* sn = NSTR(node);
3423
3424 end = sn->end;
3425 sbuf_size = (end - sn->s) * 2;
3426 sbuf = (UChar* )xmalloc(sbuf_size);
3427 CHECK_NULL_RETURN_MEMERR(sbuf);
3428 ebuf = sbuf + sbuf_size;
3429
3430 sp = sbuf;
3431 p = sn->s;
3432 while (p < end) {
3433 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3434 for (i = 0; i < len; i++) {
3435 if (sp >= ebuf) {
3436 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3437 if (IS_NULL(p)) {
3438 xfree(sbuf);
3439 return ONIGERR_MEMORY;
3440 }
3441 sbuf = p;
3442 sp = sbuf + sbuf_size;
3443 sbuf_size *= 2;
3444 ebuf = sbuf + sbuf_size;
3445 }
3446
3447 *sp++ = buf[i];
3448 }
3449 }
3450
3451 r = onig_node_str_set(node, sbuf, sp);
3452
3453 xfree(sbuf);
3454 return r;
3455}
3456
3457static int
3458expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3459 regex_t* reg)
3460{
3461 int r;
3462 Node *node;
3463
3464 node = onig_node_new_str(s, end);
3465 if (IS_NULL(node)) return ONIGERR_MEMORY;
3466
3467 r = update_string_node_case_fold(reg, node);
3468 if (r != 0) {
3469 onig_node_free(node);
3470 return r;
3471 }
3472
3473 NSTRING_SET_AMBIG(node);
3474 NSTRING_SET_DONT_GET_OPT_INFO(node);
3475 *rnode = node;
3476 return 0;
3477}
3478
3479static int
3480is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3481 int slen)
3482{
3483 int i;
3484
3485 for (i = 0; i < item_num; i++) {
3486 if (items[i].byte_len != slen) {
3487 return 1;
3488 }
3489 if (items[i].code_len != 1) {
3490 return 1;
3491 }
3492 }
3493 return 0;
3494}
3495
3496static int
3497expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3498 UChar *p, int slen, UChar *end,
3499 regex_t* reg, Node **rnode)
3500{
3501 int r, i, j, len, varlen;
3502 Node *anode, *var_anode, *snode, *xnode, *an;
3503 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3504
3505 *rnode = var_anode = NULL_NODE;
3506
3507 varlen = 0;
3508 for (i = 0; i < item_num; i++) {
3509 if (items[i].byte_len != slen) {
3510 varlen = 1;
3511 break;
3512 }
3513 }
3514
3515 if (varlen != 0) {
3516 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3518
3519 xnode = onig_node_new_list(NULL, NULL);
3520 if (IS_NULL(xnode)) goto mem_err;
3521 NCAR(var_anode) = xnode;
3522
3523 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3524 if (IS_NULL(anode)) goto mem_err;
3525 NCAR(xnode) = anode;
3526 }
3527 else {
3528 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3529 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3530 }
3531
3532 snode = onig_node_new_str(p, p + slen);
3533 if (IS_NULL(snode)) goto mem_err;
3534
3535 NCAR(anode) = snode;
3536
3537 for (i = 0; i < item_num; i++) {
3538 snode = onig_node_new_str(NULL, NULL);
3539 if (IS_NULL(snode)) goto mem_err;
3540
3541 for (j = 0; j < items[i].code_len; j++) {
3542 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3543 if (len < 0) {
3544 r = len;
3545 goto mem_err2;
3546 }
3547
3548 r = onig_node_str_cat(snode, buf, buf + len);
3549 if (r != 0) goto mem_err2;
3550 }
3551
3552 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3553 if (IS_NULL(an)) {
3554 goto mem_err2;
3555 }
3556
3557 if (items[i].byte_len != slen) {
3558 Node *rem;
3559 UChar *q = p + items[i].byte_len;
3560
3561 if (q < end) {
3562 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3563 if (r != 0) {
3564 onig_node_free(an);
3565 goto mem_err2;
3566 }
3567
3568 xnode = onig_node_list_add(NULL_NODE, snode);
3569 if (IS_NULL(xnode)) {
3570 onig_node_free(an);
3571 onig_node_free(rem);
3572 goto mem_err2;
3573 }
3574 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3575 onig_node_free(an);
3576 onig_node_free(xnode);
3577 onig_node_free(rem);
3578 goto mem_err;
3579 }
3580
3581 NCAR(an) = xnode;
3582 }
3583 else {
3584 NCAR(an) = snode;
3585 }
3586
3587 NCDR(var_anode) = an;
3588 var_anode = an;
3589 }
3590 else {
3591 NCAR(an) = snode;
3592 NCDR(anode) = an;
3593 anode = an;
3594 }
3595 }
3596
3597 return varlen;
3598
3599 mem_err2:
3600 onig_node_free(snode);
3601
3602 mem_err:
3603 onig_node_free(*rnode);
3604
3605 return ONIGERR_MEMORY;
3606}
3607
3608#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3609
3610static int
3611expand_case_fold_string(Node* node, regex_t* reg, int state)
3612{
3613 int r, n, len, alt_num;
3614 int varlen = 0;
3615 int is_in_look_behind;
3616 UChar *start, *end, *p;
3617 Node *top_root, *root, *snode, *prev_node;
3618 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3619 StrNode* sn;
3620
3621 if (NSTRING_IS_AMBIG(node)) return 0;
3622
3623 sn = NSTR(node);
3624
3625 start = sn->s;
3626 end = sn->end;
3627 if (start >= end) return 0;
3628
3629 is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3630
3631 r = 0;
3632 top_root = root = prev_node = snode = NULL_NODE;
3633 alt_num = 1;
3634 p = start;
3635 while (p < end) {
3636 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3637 p, end, items);
3638 if (n < 0) {
3639 r = n;
3640 goto err;
3641 }
3642
3643 len = enclen(reg->enc, p, end);
3644
3645 varlen = is_case_fold_variable_len(n, items, len);
3646 if (n == 0 || varlen == 0 || is_in_look_behind) {
3647 if (IS_NULL(snode)) {
3648 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3649 onig_node_free(top_root);
3650 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3651 if (IS_NULL(root)) {
3652 onig_node_free(prev_node);
3653 goto mem_err;
3654 }
3655 }
3656
3657 prev_node = snode = onig_node_new_str(NULL, NULL);
3658 if (IS_NULL(snode)) goto mem_err;
3659 if (IS_NOT_NULL(root)) {
3660 if (IS_NULL(onig_node_list_add(root, snode))) {
3661 onig_node_free(snode);
3662 goto mem_err;
3663 }
3664 }
3665 }
3666
3667 r = onig_node_str_cat(snode, p, p + len);
3668 if (r != 0) goto err;
3669 }
3670 else {
3671 alt_num *= (n + 1);
3672 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3673
3674 if (IS_NOT_NULL(snode)) {
3675 r = update_string_node_case_fold(reg, snode);
3676 if (r == 0) {
3677 NSTRING_SET_AMBIG(snode);
3678 }
3679 }
3680 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3681 onig_node_free(top_root);
3682 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3683 if (IS_NULL(root)) {
3684 onig_node_free(prev_node);
3685 goto mem_err;
3686 }
3687 }
3688
3689 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3690 if (r < 0) goto mem_err;
3691 if (r == 1) {
3692 if (IS_NULL(root)) {
3693 top_root = prev_node;
3694 }
3695 else {
3696 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3697 onig_node_free(prev_node);
3698 goto mem_err;
3699 }
3700 }
3701
3702 root = NCAR(prev_node);
3703 }
3704 else { /* r == 0 */
3705 if (IS_NOT_NULL(root)) {
3706 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3707 onig_node_free(prev_node);
3708 goto mem_err;
3709 }
3710 }
3711 }
3712
3713 snode = NULL_NODE;
3714 }
3715
3716 p += len;
3717 }
3718 if (IS_NOT_NULL(snode)) {
3719 r = update_string_node_case_fold(reg, snode);
3720 if (r == 0) {
3721 NSTRING_SET_AMBIG(snode);
3722 }
3723 }
3724
3725 if (p < end) {
3726 Node *srem;
3727
3728 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3729 if (r != 0) goto mem_err;
3730
3731 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3732 onig_node_free(top_root);
3733 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3734 if (IS_NULL(root)) {
3735 onig_node_free(srem);
3736 onig_node_free(prev_node);
3737 goto mem_err;
3738 }
3739 }
3740
3741 if (IS_NULL(root)) {
3742 prev_node = srem;
3743 }
3744 else {
3745 if (IS_NULL(onig_node_list_add(root, srem))) {
3746 onig_node_free(srem);
3747 goto mem_err;
3748 }
3749 }
3750 }
3751
3752 /* ending */
3753 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3754 swap_node(node, top_root);
3755 onig_node_free(top_root);
3756 return 0;
3757
3758 mem_err:
3759 r = ONIGERR_MEMORY;
3760
3761 err:
3762 onig_node_free(top_root);
3763 return r;
3764}
3765
3766
3767#ifdef USE_COMBINATION_EXPLOSION_CHECK
3768
3769# define CEC_THRES_NUM_BIG_REPEAT 512
3770# define CEC_INFINITE_NUM 0x7fffffff
3771
3772# define CEC_IN_INFINITE_REPEAT (1<<0)
3773# define CEC_IN_FINITE_REPEAT (1<<1)
3774# define CEC_CONT_BIG_REPEAT (1<<2)
3775
3776static int
3777setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3778{
3779 int type;
3780 int r = state;
3781
3782 type = NTYPE(node);
3783 switch (type) {
3784 case NT_LIST:
3785 {
3786 do {
3787 r = setup_comb_exp_check(NCAR(node), r, env);
3788 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3789 }
3790 break;
3791
3792 case NT_ALT:
3793 {
3794 int ret;
3795 do {
3796 ret = setup_comb_exp_check(NCAR(node), state, env);
3797 r |= ret;
3798 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3799 }
3800 break;
3801
3802 case NT_QTFR:
3803 {
3804 int child_state = state;
3805 int add_state = 0;
3806 QtfrNode* qn = NQTFR(node);
3807 Node* target = qn->target;
3808 int var_num;
3809
3810 if (! IS_REPEAT_INFINITE(qn->upper)) {
3811 if (qn->upper > 1) {
3812 /* {0,1}, {1,1} are allowed */
3813 child_state |= CEC_IN_FINITE_REPEAT;
3814
3815 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3816 if (env->backrefed_mem == 0) {
3817 if (NTYPE(qn->target) == NT_ENCLOSE) {
3818 EncloseNode* en = NENCLOSE(qn->target);
3819 if (en->type == ENCLOSE_MEMORY) {
3820 if (NTYPE(en->target) == NT_QTFR) {
3821 QtfrNode* q = NQTFR(en->target);
3822 if (IS_REPEAT_INFINITE(q->upper)
3823 && q->greedy == qn->greedy) {
3824 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3825 if (qn->upper == 1)
3826 child_state = state;
3827 }
3828 }
3829 }
3830 }
3831 }
3832 }
3833 }
3834
3835 if (state & CEC_IN_FINITE_REPEAT) {
3836 qn->comb_exp_check_num = -1;
3837 }
3838 else {
3839 if (IS_REPEAT_INFINITE(qn->upper)) {
3840 var_num = CEC_INFINITE_NUM;
3841 child_state |= CEC_IN_INFINITE_REPEAT;
3842 }
3843 else {
3844 var_num = qn->upper - qn->lower;
3845 }
3846
3847 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3848 add_state |= CEC_CONT_BIG_REPEAT;
3849
3850 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3851 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3852 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3853 if (qn->comb_exp_check_num == 0) {
3854 env->num_comb_exp_check++;
3855 qn->comb_exp_check_num = env->num_comb_exp_check;
3856 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3857 env->comb_exp_max_regnum = env->curr_max_regnum;
3858 }
3859 }
3860 }
3861
3862 r = setup_comb_exp_check(target, child_state, env);
3863 r |= add_state;
3864 }
3865 break;
3866
3867 case NT_ENCLOSE:
3868 {
3869 EncloseNode* en = NENCLOSE(node);
3870
3871 switch (en->type) {
3872 case ENCLOSE_MEMORY:
3873 {
3874 if (env->curr_max_regnum < en->regnum)
3875 env->curr_max_regnum = en->regnum;
3876
3877 r = setup_comb_exp_check(en->target, state, env);
3878 }
3879 break;
3880
3881 default:
3882 r = setup_comb_exp_check(en->target, state, env);
3883 break;
3884 }
3885 }
3886 break;
3887
3888# ifdef USE_SUBEXP_CALL
3889 case NT_CALL:
3890 if (IS_CALL_RECURSION(NCALL(node)))
3891 env->has_recursion = 1;
3892 else
3893 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3894 break;
3895# endif
3896
3897 default:
3898 break;
3899 }
3900
3901 return r;
3902}
3903#endif
3904
3905/* setup_tree does the following work.
3906 1. check empty loop. (set qn->target_empty_info)
3907 2. expand ignore-case in char class.
3908 3. set memory status bit flags. (reg->mem_stats)
3909 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3910 5. find invalid patterns in look-behind.
3911 6. expand repeated string.
3912 */
3913static int
3914setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3915{
3916 int type;
3917 int r = 0;
3918
3919restart:
3920 type = NTYPE(node);
3921 switch (type) {
3922 case NT_LIST:
3923 {
3924 Node* prev = NULL_NODE;
3925 do {
3926 r = setup_tree(NCAR(node), reg, state, env);
3927 if (IS_NOT_NULL(prev) && r == 0) {
3928 r = next_setup(prev, NCAR(node), reg);
3929 }
3930 prev = NCAR(node);
3931 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3932 }
3933 break;
3934
3935 case NT_ALT:
3936 do {
3937 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3938 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3939 break;
3940
3941 case NT_CCLASS:
3942 break;
3943
3944 case NT_STR:
3945 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3946 r = expand_case_fold_string(node, reg, state);
3947 }
3948 break;
3949
3950 case NT_CTYPE:
3951 case NT_CANY:
3952 break;
3953
3954#ifdef USE_SUBEXP_CALL
3955 case NT_CALL:
3956 break;
3957#endif
3958
3959 case NT_BREF:
3960 {
3961 int i;
3962 int* p;
3963 Node** nodes = SCANENV_MEM_NODES(env);
3964 BRefNode* br = NBREF(node);
3965 p = BACKREFS_P(br);
3966 for (i = 0; i < br->back_num; i++) {
3967 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3968 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3969 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3970#ifdef USE_BACKREF_WITH_LEVEL
3971 if (IS_BACKREF_NEST_LEVEL(br)) {
3972 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3973 }
3974#endif
3975 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3976 }
3977 }
3978 break;
3979
3980 case NT_QTFR:
3981 {
3982 OnigDistance d;
3983 QtfrNode* qn = NQTFR(node);
3984 Node* target = qn->target;
3985
3986 if ((state & IN_REPEAT) != 0) {
3987 qn->state |= NST_IN_REPEAT;
3988 }
3989
3990 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3991 r = get_min_match_length(target, &d, env);
3992 if (r) break;
3993 if (d == 0) {
3994 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3995#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3996 r = quantifiers_memory_node_info(target);
3997 if (r < 0) break;
3998 if (r > 0) {
3999 qn->target_empty_info = r;
4000 }
4001#endif
4002#if 0
4003 r = get_max_match_length(target, &d, env);
4004 if (r == 0 && d == 0) {
4005 /* ()* ==> ()?, ()+ ==> () */
4006 qn->upper = 1;
4007 if (qn->lower > 1) qn->lower = 1;
4008 if (NTYPE(target) == NT_STR) {
4009 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
4010 }
4011 }
4012#endif
4013 }
4014 }
4015
4016 state |= IN_REPEAT;
4017 if (qn->lower != qn->upper)
4018 state |= IN_VAR_REPEAT;
4019 r = setup_tree(target, reg, state, env);
4020 if (r) break;
4021
4022 /* expand string */
4023#define EXPAND_STRING_MAX_LENGTH 100
4024 if (NTYPE(target) == NT_STR) {
4025 if (qn->lower > 1) {
4026 int i, n = qn->lower;
4027 OnigDistance len = NSTRING_LEN(target);
4028 StrNode* sn = NSTR(target);
4029 Node* np;
4030
4031 np = onig_node_new_str(sn->s, sn->end);
4032 if (IS_NULL(np)) return ONIGERR_MEMORY;
4033 NSTR(np)->flag = sn->flag;
4034
4035 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4036 r = onig_node_str_cat(np, sn->s, sn->end);
4037 if (r) {
4038 onig_node_free(np);
4039 return r;
4040 }
4041 }
4042 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4043 Node *np1, *np2;
4044
4045 qn->lower -= i;
4046 if (! IS_REPEAT_INFINITE(qn->upper))
4047 qn->upper -= i;
4048
4049 np1 = onig_node_new_list(np, NULL);
4050 if (IS_NULL(np1)) {
4051 onig_node_free(np);
4052 return ONIGERR_MEMORY;
4053 }
4054 swap_node(np1, node);
4055 np2 = onig_node_list_add(node, np1);
4056 if (IS_NULL(np2)) {
4057 onig_node_free(np1);
4058 return ONIGERR_MEMORY;
4059 }
4060 }
4061 else {
4062 swap_node(np, node);
4063 onig_node_free(np);
4064 }
4065 break; /* break case NT_QTFR: */
4066 }
4067 }
4068
4069#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4070 if (qn->greedy && (qn->target_empty_info != 0)) {
4071 if (NTYPE(target) == NT_QTFR) {
4072 QtfrNode* tqn = NQTFR(target);
4073 if (IS_NOT_NULL(tqn->head_exact)) {
4074 qn->head_exact = tqn->head_exact;
4075 tqn->head_exact = NULL;
4076 }
4077 }
4078 else {
4079 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4080 }
4081 }
4082#endif
4083 }
4084 break;
4085
4086 case NT_ENCLOSE:
4087 {
4088 EncloseNode* en = NENCLOSE(node);
4089
4090 switch (en->type) {
4091 case ENCLOSE_OPTION:
4092 {
4093 OnigOptionType options = reg->options;
4094 reg->options = NENCLOSE(node)->option;
4095 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4096 reg->options = options;
4097 }
4098 break;
4099
4100 case ENCLOSE_MEMORY:
4101 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4102 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4103 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4104 }
4105 if (IS_ENCLOSE_CALLED(en))
4106 state |= IN_CALL;
4107 if (IS_ENCLOSE_RECURSION(en))
4108 state |= IN_RECCALL;
4109 else if ((state & IN_RECCALL) != 0)
4110 SET_CALL_RECURSION(node);
4111 r = setup_tree(en->target, reg, state, env);
4112 break;
4113
4114 case ENCLOSE_STOP_BACKTRACK:
4115 {
4116 Node* target = en->target;
4117 r = setup_tree(target, reg, state, env);
4118 if (NTYPE(target) == NT_QTFR) {
4119 QtfrNode* tqn = NQTFR(target);
4120 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4121 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4122 int qtype = NTYPE(tqn->target);
4123 if (IS_NODE_TYPE_SIMPLE(qtype))
4124 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4125 }
4126 }
4127 }
4128 break;
4129
4130 case ENCLOSE_CONDITION:
4131#ifdef USE_NAMED_GROUP
4132 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4133 env->num_named > 0 &&
4134 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4135 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4136 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4137 }
4138#endif
4139 if (NENCLOSE(node)->regnum > env->num_mem)
4140 return ONIGERR_INVALID_BACKREF;
4141 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4142 break;
4143
4144 case ENCLOSE_ABSENT:
4145 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4146 break;
4147 }
4148 }
4149 break;
4150
4151 case NT_ANCHOR:
4152 {
4153 AnchorNode* an = NANCHOR(node);
4154
4155 switch (an->type) {
4156 case ANCHOR_PREC_READ:
4157 r = setup_tree(an->target, reg, state, env);
4158 break;
4159 case ANCHOR_PREC_READ_NOT:
4160 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4161 break;
4162
4163/* allowed node types in look-behind */
4164#define ALLOWED_TYPE_IN_LB \
4165 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4166 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4167
4168#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4169#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4170
4171#define ALLOWED_ANCHOR_IN_LB \
4172( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4173 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4174 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4175 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4176#define ALLOWED_ANCHOR_IN_LB_NOT \
4177( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4178 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4179 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4180 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4181
4182 case ANCHOR_LOOK_BEHIND:
4183 {
4184 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4185 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4186 if (r < 0) return r;
4187 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4188 if (NTYPE(node) != NT_ANCHOR) goto restart;
4189 r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4190 if (r != 0) return r;
4191 r = setup_look_behind(node, reg, env);
4192 }
4193 break;
4194
4195 case ANCHOR_LOOK_BEHIND_NOT:
4196 {
4197 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4198 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4199 if (r < 0) return r;
4200 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4201 if (NTYPE(node) != NT_ANCHOR) goto restart;
4202 r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4203 env);
4204 if (r != 0) return r;
4205 r = setup_look_behind(node, reg, env);
4206 }
4207 break;
4208 }
4209 }
4210 break;
4211
4212 default:
4213 break;
4214 }
4215
4216 return r;
4217}
4218
4219#ifndef USE_SUNDAY_QUICK_SEARCH
4220/* set skip map for Boyer-Moore search */
4221static int
4222set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4223 UChar skip[], int** int_skip, int ignore_case)
4224{
4225 OnigDistance i, len;
4226 int clen, flen, n, j, k;
4227 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4228 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4229 OnigEncoding enc = reg->enc;
4230
4231 len = end - s;
4232 if (len < ONIG_CHAR_TABLE_SIZE) {
4233 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4234
4235 n = 0;
4236 for (i = 0; i < len - 1; i += clen) {
4237 p = s + i;
4238 if (ignore_case)
4239 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4240 p, end, items);
4241 clen = enclen(enc, p, end);
4242 if (p + clen > end)
4243 clen = (int )(end - p);
4244
4245 for (j = 0; j < n; j++) {
4246 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4247 return 1; /* different length isn't supported. */
4248 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4249 if (flen != clen)
4250 return 1; /* different length isn't supported. */
4251 }
4252 for (j = 0; j < clen; j++) {
4253 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4254 for (k = 0; k < n; k++) {
4255 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4256 }
4257 }
4258 }
4259 }
4260 else {
4261# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4262 /* This should not happen. */
4263 return ONIGERR_TYPE_BUG;
4264# else
4265 if (IS_NULL(*int_skip)) {
4266 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4267 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4268 }
4269 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4270
4271 n = 0;
4272 for (i = 0; i < len - 1; i += clen) {
4273 p = s + i;
4274 if (ignore_case)
4275 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4276 p, end, items);
4277 clen = enclen(enc, p, end);
4278 if (p + clen > end)
4279 clen = (int )(end - p);
4280
4281 for (j = 0; j < n; j++) {
4282 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4283 return 1; /* different length isn't supported. */
4284 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4285 if (flen != clen)
4286 return 1; /* different length isn't supported. */
4287 }
4288 for (j = 0; j < clen; j++) {
4289 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4290 for (k = 0; k < n; k++) {
4291 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4292 }
4293 }
4294 }
4295# endif
4296 }
4297 return 0;
4298}
4299
4300#else /* USE_SUNDAY_QUICK_SEARCH */
4301
4302/* set skip map for Sunday's quick search */
4303static int
4304set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4305 UChar skip[], int** int_skip, int ignore_case)
4306{
4307 OnigDistance i, len;
4308 int clen, flen, n, j, k;
4309 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4310 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4311 OnigEncoding enc = reg->enc;
4312
4313 len = end - s;
4314 if (len < ONIG_CHAR_TABLE_SIZE) {
4315 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4316
4317 n = 0;
4318 for (i = 0; i < len; i += clen) {
4319 p = s + i;
4320 if (ignore_case)
4321 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4322 p, end, items);
4323 clen = enclen(enc, p, end);
4324 if (p + clen > end)
4325 clen = (int )(end - p);
4326
4327 for (j = 0; j < n; j++) {
4328 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4329 return 1; /* different length isn't supported. */
4330 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4331 if (flen != clen)
4332 return 1; /* different length isn't supported. */
4333 }
4334 for (j = 0; j < clen; j++) {
4335 skip[s[i + j]] = (UChar )(len - i - j);
4336 for (k = 0; k < n; k++) {
4337 skip[buf[k][j]] = (UChar )(len - i - j);
4338 }
4339 }
4340 }
4341 }
4342 else {
4343# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4344 /* This should not happen. */
4345 return ONIGERR_TYPE_BUG;
4346# else
4347 if (IS_NULL(*int_skip)) {
4348 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4349 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4350 }
4351 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4352
4353 n = 0;
4354 for (i = 0; i < len; i += clen) {
4355 p = s + i;
4356 if (ignore_case)
4357 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4358 p, end, items);
4359 clen = enclen(enc, p, end);
4360 if (p + clen > end)
4361 clen = (int )(end - p);
4362
4363 for (j = 0; j < n; j++) {
4364 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4365 return 1; /* different length isn't supported. */
4366 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4367 if (flen != clen)
4368 return 1; /* different length isn't supported. */
4369 }
4370 for (j = 0; j < clen; j++) {
4371 (*int_skip)[s[i + j]] = (int )(len - i - j);
4372 for (k = 0; k < n; k++) {
4373 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4374 }
4375 }
4376 }
4377# endif
4378 }
4379 return 0;
4380}
4381#endif /* USE_SUNDAY_QUICK_SEARCH */
4382
4383typedef struct {
4384 OnigDistance min; /* min byte length */
4385 OnigDistance max; /* max byte length */
4386} MinMaxLen;
4387
4388typedef struct {
4389 MinMaxLen mmd;
4390 OnigEncoding enc;
4391 OnigOptionType options;
4392 OnigCaseFoldType case_fold_flag;
4393 ScanEnv* scan_env;
4394} OptEnv;
4395
4396typedef struct {
4397 int left_anchor;
4398 int right_anchor;
4399} OptAncInfo;
4400
4401typedef struct {
4402 MinMaxLen mmd; /* info position */
4403 OptAncInfo anc;
4404
4405 int reach_end;
4406 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4407 int len;
4408 UChar s[OPT_EXACT_MAXLEN];
4409} OptExactInfo;
4410
4411typedef struct {
4412 MinMaxLen mmd; /* info position */
4413 OptAncInfo anc;
4414
4415 int value; /* weighted value */
4416 UChar map[ONIG_CHAR_TABLE_SIZE];
4417} OptMapInfo;
4418
4419typedef struct {
4420 MinMaxLen len;
4421
4422 OptAncInfo anc;
4423 OptExactInfo exb; /* boundary */
4424 OptExactInfo exm; /* middle */
4425 OptExactInfo expr; /* prec read (?=...) */
4426
4427 OptMapInfo map; /* boundary */
4428} NodeOptInfo;
4429
4430
4431static int
4432map_position_value(OnigEncoding enc, int i)
4433{
4434 static const short int ByteValTable[] = {
4435 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4436 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4437 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4438 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4439 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4440 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4441 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4442 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4443 };
4444
4445 if (i < numberof(ByteValTable)) {
4446 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4447 return 20;
4448 else
4449 return (int )ByteValTable[i];
4450 }
4451 else
4452 return 4; /* Take it easy. */
4453}
4454
4455static int
4456distance_value(MinMaxLen* mm)
4457{
4458 /* 1000 / (min-max-dist + 1) */
4459 static const short int dist_vals[] = {
4460 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4461 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4462 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4463 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4464 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4465 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4466 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4467 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4468 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4469 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4470 };
4471
4472 OnigDistance d;
4473
4474 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4475
4476 d = mm->max - mm->min;
4477 if (d < numberof(dist_vals))
4478 /* return dist_vals[d] * 16 / (mm->min + 12); */
4479 return (int )dist_vals[d];
4480 else
4481 return 1;
4482}
4483
4484static int
4485comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4486{
4487 if (v2 <= 0) return -1;
4488 if (v1 <= 0) return 1;
4489
4490 v1 *= distance_value(d1);
4491 v2 *= distance_value(d2);
4492
4493 if (v2 > v1) return 1;
4494 if (v2 < v1) return -1;
4495
4496 if (d2->min < d1->min) return 1;
4497 if (d2->min > d1->min) return -1;
4498 return 0;
4499}
4500
4501static int
4502is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4503{
4504 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4505}
4506
4507
4508static void
4509set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4510{
4511 mml->min = min;
4512 mml->max = max;
4513}
4514
4515static void
4516clear_mml(MinMaxLen* mml)
4517{
4518 mml->min = mml->max = 0;
4519}
4520
4521static void
4522copy_mml(MinMaxLen* to, MinMaxLen* from)
4523{
4524 to->min = from->min;
4525 to->max = from->max;
4526}
4527
4528static void
4529add_mml(MinMaxLen* to, MinMaxLen* from)
4530{
4531 to->min = distance_add(to->min, from->min);
4532 to->max = distance_add(to->max, from->max);
4533}
4534
4535#if 0
4536static void
4537add_len_mml(MinMaxLen* to, OnigDistance len)
4538{
4539 to->min = distance_add(to->min, len);
4540 to->max = distance_add(to->max, len);
4541}
4542#endif
4543
4544static void
4545alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4546{
4547 if (to->min > from->min) to->min = from->min;
4548 if (to->max < from->max) to->max = from->max;
4549}
4550
4551static void
4552copy_opt_env(OptEnv* to, OptEnv* from)
4553{
4554 *to = *from;
4555}
4556
4557static void
4558clear_opt_anc_info(OptAncInfo* anc)
4559{
4560 anc->left_anchor = 0;
4561 anc->right_anchor = 0;
4562}
4563
4564static void
4565copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4566{
4567 *to = *from;
4568}
4569
4570static void
4571concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4572 OnigDistance left_len, OnigDistance right_len)
4573{
4574 clear_opt_anc_info(to);
4575
4576 to->left_anchor = left->left_anchor;
4577 if (left_len == 0) {
4578 to->left_anchor |= right->left_anchor;
4579 }
4580
4581 to->right_anchor = right->right_anchor;
4582 if (right_len == 0) {
4583 to->right_anchor |= left->right_anchor;
4584 }
4585 else {
4586 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4587 }
4588}
4589
4590static int
4591is_left_anchor(int anc)
4592{
4593 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4594 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4595 anc == ANCHOR_PREC_READ_NOT)
4596 return 0;
4597
4598 return 1;
4599}
4600
4601static int
4602is_set_opt_anc_info(OptAncInfo* to, int anc)
4603{
4604 if ((to->left_anchor & anc) != 0) return 1;
4605
4606 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4607}
4608
4609static void
4610add_opt_anc_info(OptAncInfo* to, int anc)
4611{
4612 if (is_left_anchor(anc))
4613 to->left_anchor |= anc;
4614 else
4615 to->right_anchor |= anc;
4616}
4617
4618static void
4619remove_opt_anc_info(OptAncInfo* to, int anc)
4620{
4621 if (is_left_anchor(anc))
4622 to->left_anchor &= ~anc;
4623 else
4624 to->right_anchor &= ~anc;
4625}
4626
4627static void
4628alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4629{
4630 to->left_anchor &= add->left_anchor;
4631 to->right_anchor &= add->right_anchor;
4632}
4633
4634static int
4635is_full_opt_exact_info(OptExactInfo* ex)
4636{
4637 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4638}
4639
4640static void
4641clear_opt_exact_info(OptExactInfo* ex)
4642{
4643 clear_mml(&ex->mmd);
4644 clear_opt_anc_info(&ex->anc);
4645 ex->reach_end = 0;
4646 ex->ignore_case = -1; /* unset */
4647 ex->len = 0;
4648 ex->s[0] = '\0';
4649}
4650
4651static void
4652copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4653{
4654 *to = *from;
4655}
4656
4657static void
4658concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4659{
4660 int i, j, len;
4661 UChar *p, *end;
4662 OptAncInfo tanc;
4663
4664 if (to->ignore_case < 0)
4665 to->ignore_case = add->ignore_case;
4666 else if (to->ignore_case != add->ignore_case)
4667 return ; /* avoid */
4668
4669 p = add->s;
4670 end = p + add->len;
4671 for (i = to->len; p < end; ) {
4672 len = enclen(enc, p, end);
4673 if (i + len > OPT_EXACT_MAXLEN) break;
4674 for (j = 0; j < len && p < end; j++)
4675 to->s[i++] = *p++;
4676 }
4677
4678 to->len = i;
4679 to->reach_end = (p == end ? add->reach_end : 0);
4680
4681 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4682 if (! to->reach_end) tanc.right_anchor = 0;
4683 copy_opt_anc_info(&to->anc, &tanc);
4684}
4685
4686static void
4687concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4688 int raw ARG_UNUSED, OnigEncoding enc)
4689{
4690 int i, j, len;
4691 UChar *p;
4692
4693 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4694 len = enclen(enc, p, end);
4695 if (i + len > OPT_EXACT_MAXLEN) break;
4696 for (j = 0; j < len && p < end; j++)
4697 to->s[i++] = *p++;
4698 }
4699
4700 to->len = i;
4701}
4702
4703static void
4704alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4705{
4706 int i, j, len;
4707
4708 if (add->len == 0 || to->len == 0) {
4709 clear_opt_exact_info(to);
4710 return ;
4711 }
4712
4713 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4714 clear_opt_exact_info(to);
4715 return ;
4716 }
4717
4718 for (i = 0; i < to->len && i < add->len; ) {
4719 if (to->s[i] != add->s[i]) break;
4720 len = enclen(env->enc, to->s + i, to->s + to->len);
4721
4722 for (j = 1; j < len; j++) {
4723 if (to->s[i+j] != add->s[i+j]) break;
4724 }
4725 if (j < len) break;
4726 i += len;
4727 }
4728
4729 if (! add->reach_end || i < add->len || i < to->len) {
4730 to->reach_end = 0;
4731 }
4732 to->len = i;
4733 if (to->ignore_case < 0)
4734 to->ignore_case = add->ignore_case;
4735 else if (add->ignore_case >= 0)
4736 to->ignore_case |= add->ignore_case;
4737
4738 alt_merge_opt_anc_info(&to->anc, &add->anc);
4739 if (! to->reach_end) to->anc.right_anchor = 0;
4740}
4741
4742static void
4743select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4744{
4745 int v1, v2;
4746
4747 v1 = now->len;
4748 v2 = alt->len;
4749
4750 if (v2 == 0) {
4751 return ;
4752 }
4753 else if (v1 == 0) {
4754 copy_opt_exact_info(now, alt);
4755 return ;
4756 }
4757 else if (v1 <= 2 && v2 <= 2) {
4758 /* ByteValTable[x] is big value --> low price */
4759 v2 = map_position_value(enc, now->s[0]);
4760 v1 = map_position_value(enc, alt->s[0]);
4761
4762 if (now->len > 1) v1 += 5;
4763 if (alt->len > 1) v2 += 5;
4764 }
4765
4766 if (now->ignore_case <= 0) v1 *= 2;
4767 if (alt->ignore_case <= 0) v2 *= 2;
4768
4769 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4770 copy_opt_exact_info(now, alt);
4771}
4772
4773static void
4774clear_opt_map_info(OptMapInfo* map)
4775{
4776 static const OptMapInfo clean_info = {
4777 {0, 0}, {0, 0}, 0,
4778 {
4779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4781 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4782 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4783 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4784 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4785 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4786 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4787 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4788 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4789 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4790 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4791 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4792 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4793 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4794 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4795 }
4796 };
4797
4798 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4799}
4800
4801static void
4802copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4803{
4804 *to = *from;
4805}
4806
4807static void
4808add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4809{
4810 if (map->map[c] == 0) {
4811 map->map[c] = 1;
4812 map->value += map_position_value(enc, c);
4813 }
4814}
4815
4816static int
4817add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4818 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4819{
4820 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4821 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4822 int i, n;
4823
4824 add_char_opt_map_info(map, p[0], enc);
4825
4826 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4827 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4828 if (n < 0) return n;
4829
4830 for (i = 0; i < n; i++) {
4831 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4832 add_char_opt_map_info(map, buf[0], enc);
4833 }
4834
4835 return 0;
4836}
4837
4838static void
4839select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4840{
4841 const int z = 1<<15; /* 32768: something big value */
4842
4843 int v1, v2;
4844
4845 if (alt->value == 0) return ;
4846 if (now->value == 0) {
4847 copy_opt_map_info(now, alt);
4848 return ;
4849 }
4850
4851 v1 = z / now->value;
4852 v2 = z / alt->value;
4853 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4854 copy_opt_map_info(now, alt);
4855}
4856
4857static int
4858comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4859{
4860#define COMP_EM_BASE 20
4861 int ve, vm;
4862
4863 if (m->value <= 0) return -1;
4864
4865 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4866 vm = COMP_EM_BASE * 5 * 2 / m->value;
4867 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4868}
4869
4870static void
4871alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4872{
4873 int i, val;
4874
4875 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4876 if (to->value == 0) return ;
4877 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4878 clear_opt_map_info(to);
4879 return ;
4880 }
4881
4882 alt_merge_mml(&to->mmd, &add->mmd);
4883
4884 val = 0;
4885 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4886 if (add->map[i])
4887 to->map[i] = 1;
4888
4889 if (to->map[i])
4890 val += map_position_value(enc, i);
4891 }
4892 to->value = val;
4893
4894 alt_merge_opt_anc_info(&to->anc, &add->anc);
4895}
4896
4897static void
4898set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4899{
4900 copy_mml(&(opt->exb.mmd), mmd);
4901 copy_mml(&(opt->expr.mmd), mmd);
4902 copy_mml(&(opt->map.mmd), mmd);
4903}
4904
4905static void
4906clear_node_opt_info(NodeOptInfo* opt)
4907{
4908 clear_mml(&opt->len);
4909 clear_opt_anc_info(&opt->anc);
4910 clear_opt_exact_info(&opt->exb);
4911 clear_opt_exact_info(&opt->exm);
4912 clear_opt_exact_info(&opt->expr);
4913 clear_opt_map_info(&opt->map);
4914}
4915
4916static void
4917copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4918{
4919 *to = *from;
4920}
4921
4922static void
4923concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4924{
4925 int exb_reach, exm_reach;
4926 OptAncInfo tanc;
4927
4928 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4929 copy_opt_anc_info(&to->anc, &tanc);
4930
4931 if (add->exb.len > 0 && to->len.max == 0) {
4932 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4933 to->len.max, add->len.max);
4934 copy_opt_anc_info(&add->exb.anc, &tanc);
4935 }
4936
4937 if (add->map.value > 0 && to->len.max == 0) {
4938 if (add->map.mmd.max == 0)
4939 add->map.anc.left_anchor |= to->anc.left_anchor;
4940 }
4941
4942 exb_reach = to->exb.reach_end;
4943 exm_reach = to->exm.reach_end;
4944
4945 if (add->len.max != 0)
4946 to->exb.reach_end = to->exm.reach_end = 0;
4947
4948 if (add->exb.len > 0) {
4949 if (exb_reach) {
4950 concat_opt_exact_info(&to->exb, &add->exb, enc);
4951 clear_opt_exact_info(&add->exb);
4952 }
4953 else if (exm_reach) {
4954 concat_opt_exact_info(&to->exm, &add->exb, enc);
4955 clear_opt_exact_info(&add->exb);
4956 }
4957 }
4958 select_opt_exact_info(enc, &to->exm, &add->exb);
4959 select_opt_exact_info(enc, &to->exm, &add->exm);
4960
4961 if (to->expr.len > 0) {
4962 if (add->len.max > 0) {
4963 if (to->expr.len > (int )add->len.max)
4964 to->expr.len = (int )add->len.max;
4965
4966 if (to->expr.mmd.max == 0)
4967 select_opt_exact_info(enc, &to->exb, &to->expr);
4968 else
4969 select_opt_exact_info(enc, &to->exm, &to->expr);
4970 }
4971 }
4972 else if (add->expr.len > 0) {
4973 copy_opt_exact_info(&to->expr, &add->expr);
4974 }
4975
4976 select_opt_map_info(&to->map, &add->map);
4977
4978 add_mml(&to->len, &add->len);
4979}
4980
4981static void
4982alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4983{
4984 alt_merge_opt_anc_info (&to->anc, &add->anc);
4985 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4986 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4987 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4988 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4989
4990 alt_merge_mml(&to->len, &add->len);
4991}
4992
4993
4994#define MAX_NODE_OPT_INFO_REF_COUNT 5
4995
4996static int
4997optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4998{
4999 int type;
5000 int r = 0;
5001
5002 clear_node_opt_info(opt);
5003 set_bound_node_opt_info(opt, &env->mmd);
5004
5005 type = NTYPE(node);
5006 switch (type) {
5007 case NT_LIST:
5008 {
5009 OptEnv nenv;
5010 NodeOptInfo nopt;
5011 Node* nd = node;
5012
5013 copy_opt_env(&nenv, env);
5014 do {
5015 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
5016 if (r == 0) {
5017 add_mml(&nenv.mmd, &nopt.len);
5018 concat_left_node_opt_info(env->enc, opt, &nopt);
5019 }
5020 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5021 }
5022 break;
5023
5024 case NT_ALT:
5025 {
5026 NodeOptInfo nopt;
5027 Node* nd = node;
5028
5029 do {
5030 r = optimize_node_left(NCAR(nd), &nopt, env);
5031 if (r == 0) {
5032 if (nd == node) copy_node_opt_info(opt, &nopt);
5033 else alt_merge_node_opt_info(opt, &nopt, env);
5034 }
5035 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5036 }
5037 break;
5038
5039 case NT_STR:
5040 {
5041 StrNode* sn = NSTR(node);
5042 OnigDistance slen = sn->end - sn->s;
5043 int is_raw = NSTRING_IS_RAW(node);
5044
5045 if (! NSTRING_IS_AMBIG(node)) {
5046 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5047 is_raw, env->enc);
5048 opt->exb.ignore_case = 0;
5049 if (slen > 0) {
5050 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5051 }
5052 set_mml(&opt->len, slen, slen);
5053 }
5054 else {
5055 OnigDistance max;
5056
5057 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5058 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5059 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
5060 }
5061 else {
5062 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5063 is_raw, env->enc);
5064 opt->exb.ignore_case = 1;
5065
5066 if (slen > 0) {
5067 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5068 env->enc, env->case_fold_flag);
5069 if (r != 0) break;
5070 }
5071
5072 max = slen;
5073 }
5074
5075 set_mml(&opt->len, slen, max);
5076 }
5077
5078 if ((OnigDistance )opt->exb.len == slen)
5079 opt->exb.reach_end = 1;
5080 }
5081 break;
5082
5083 case NT_CCLASS:
5084 {
5085 int i, z;
5086 CClassNode* cc = NCCLASS(node);
5087
5088 /* no need to check ignore case. (set in setup_tree()) */
5089
5090 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5091 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5092 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5093
5094 set_mml(&opt->len, min, max);
5095 }
5096 else {
5097 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5098 z = BITSET_AT(cc->bs, i);
5099 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5100 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5101 }
5102 }
5103 set_mml(&opt->len, 1, 1);
5104 }
5105 }
5106 break;
5107
5108 case NT_CTYPE:
5109 {
5110 int i, min, max;
5111 int maxcode;
5112
5113 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5114
5115 if (max == 1) {
5116 min = 1;
5117
5118 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5119 switch (NCTYPE(node)->ctype) {
5120 case ONIGENC_CTYPE_WORD:
5121 if (NCTYPE(node)->not != 0) {
5122 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5123 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5124 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5125 }
5126 }
5127 }
5128 else {
5129 for (i = 0; i < maxcode; i++) {
5130 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5131 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5132 }
5133 }
5134 }
5135 break;
5136 }
5137 }
5138 else {
5139 min = ONIGENC_MBC_MINLEN(env->enc);
5140 }
5141 set_mml(&opt->len, min, max);
5142 }
5143 break;
5144
5145 case NT_CANY:
5146 {
5147 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5148 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5149 set_mml(&opt->len, min, max);
5150 }
5151 break;
5152
5153 case NT_ANCHOR:
5154 switch (NANCHOR(node)->type) {
5155 case ANCHOR_BEGIN_BUF:
5156 case ANCHOR_BEGIN_POSITION:
5157 case ANCHOR_BEGIN_LINE:
5158 case ANCHOR_END_BUF:
5159 case ANCHOR_SEMI_END_BUF:
5160 case ANCHOR_END_LINE:
5161 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5162 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5163 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5164 break;
5165
5166 case ANCHOR_PREC_READ:
5167 {
5168 NodeOptInfo nopt;
5169
5170 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5171 if (r == 0) {
5172 if (nopt.exb.len > 0)
5173 copy_opt_exact_info(&opt->expr, &nopt.exb);
5174 else if (nopt.exm.len > 0)
5175 copy_opt_exact_info(&opt->expr, &nopt.exm);
5176
5177 opt->expr.reach_end = 0;
5178
5179 if (nopt.map.value > 0)
5180 copy_opt_map_info(&opt->map, &nopt.map);
5181 }
5182 }
5183 break;
5184
5185 case ANCHOR_LOOK_BEHIND_NOT:
5186 break;
5187 }
5188 break;
5189
5190 case NT_BREF:
5191 {
5192 int i;
5193 int* backs;
5194 OnigDistance min, max, tmin, tmax;
5195 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5196 BRefNode* br = NBREF(node);
5197
5198 if (br->state & NST_RECURSION) {
5199 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5200 break;
5201 }
5202 backs = BACKREFS_P(br);
5203 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5204 if (r != 0) break;
5205 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5206 if (r != 0) break;
5207 for (i = 1; i < br->back_num; i++) {
5208 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5209 if (r != 0) break;
5210 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5211 if (r != 0) break;
5212 if (min > tmin) min = tmin;
5213 if (max < tmax) max = tmax;
5214 }
5215 if (r == 0) set_mml(&opt->len, min, max);
5216 }
5217 break;
5218
5219#ifdef USE_SUBEXP_CALL
5220 case NT_CALL:
5221 if (IS_CALL_RECURSION(NCALL(node)))
5222 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5223 else {
5224 OnigOptionType save = env->options;
5225 env->options = NENCLOSE(NCALL(node)->target)->option;
5226 r = optimize_node_left(NCALL(node)->target, opt, env);
5227 env->options = save;
5228 }
5229 break;
5230#endif
5231
5232 case NT_QTFR:
5233 {
5234 int i;
5235 OnigDistance min, max;
5236 NodeOptInfo nopt;
5237 QtfrNode* qn = NQTFR(node);
5238
5239 r = optimize_node_left(qn->target, &nopt, env);
5240 if (r) break;
5241
5242 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5243 if (env->mmd.max == 0 &&
5244 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5245 if (IS_MULTILINE(env->options))
5246 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5247 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5248 else
5249 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5250 }
5251 }
5252 else {
5253 if (qn->lower > 0) {
5254 copy_node_opt_info(opt, &nopt);
5255 if (nopt.exb.len > 0) {
5256 if (nopt.exb.reach_end) {
5257 for (i = 2; i <= qn->lower &&
5258 ! is_full_opt_exact_info(&opt->exb); i++) {
5259 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5260 }
5261 if (i < qn->lower) {
5262 opt->exb.reach_end = 0;
5263 }
5264 }
5265 }
5266
5267 if (qn->lower != qn->upper) {
5268 opt->exb.reach_end = 0;
5269 opt->exm.reach_end = 0;
5270 }
5271 if (qn->lower > 1)
5272 opt->exm.reach_end = 0;
5273 }
5274 }
5275
5276 min = distance_multiply(nopt.len.min, qn->lower);
5277 if (IS_REPEAT_INFINITE(qn->upper))
5278 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5279 else
5280 max = distance_multiply(nopt.len.max, qn->upper);
5281
5282 set_mml(&opt->len, min, max);
5283 }
5284 break;
5285
5286 case NT_ENCLOSE:
5287 {
5288 EncloseNode* en = NENCLOSE(node);
5289
5290 switch (en->type) {
5291 case ENCLOSE_OPTION:
5292 {
5293 OnigOptionType save = env->options;
5294
5295 env->options = en->option;
5296 r = optimize_node_left(en->target, opt, env);
5297 env->options = save;
5298 }
5299 break;
5300
5301 case ENCLOSE_MEMORY:
5302#ifdef USE_SUBEXP_CALL
5303 en->opt_count++;
5304 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5305 OnigDistance min, max;
5306
5307 min = 0;
5308 max = ONIG_INFINITE_DISTANCE;
5309 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5310 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5311 set_mml(&opt->len, min, max);
5312 }
5313 else
5314#endif
5315 {
5316 r = optimize_node_left(en->target, opt, env);
5317
5318 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5319 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5320 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5321 }
5322 }
5323 break;
5324
5325 case ENCLOSE_STOP_BACKTRACK:
5326 case ENCLOSE_CONDITION:
5327 r = optimize_node_left(en->target, opt, env);
5328 break;
5329
5330 case ENCLOSE_ABSENT:
5331 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5332 break;
5333 }
5334 }
5335 break;
5336
5337 default:
5338#ifdef ONIG_DEBUG
5339 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5340 NTYPE(node));
5341#endif
5342 r = ONIGERR_TYPE_BUG;
5343 break;
5344 }
5345
5346 return r;
5347}
5348
5349static int
5350set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5351{
5352 int r;
5353 int allow_reverse;
5354
5355 if (e->len == 0) return 0;
5356
5357 reg->exact = (UChar* )xmalloc(e->len);
5358 CHECK_NULL_RETURN_MEMERR(reg->exact);
5359 xmemcpy(reg->exact, e->s, e->len);
5360 reg->exact_end = reg->exact + e->len;
5361
5362 allow_reverse =
5363 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5364
5365 if (e->ignore_case > 0) {
5366 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5367 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5368 reg->map, &(reg->int_map), 1);
5369 if (r == 0) {
5370 reg->optimize = (allow_reverse != 0
5371 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5372 }
5373 else {
5374 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5375 }
5376 }
5377 else {
5378 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5379 }
5380 }
5381 else {
5382 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5383 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5384 reg->map, &(reg->int_map), 0);
5385 if (r == 0) {
5386 reg->optimize = (allow_reverse != 0
5387 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5388 }
5389 else {
5390 reg->optimize = ONIG_OPTIMIZE_EXACT;
5391 }
5392 }
5393 else {
5394 reg->optimize = ONIG_OPTIMIZE_EXACT;
5395 }
5396 }
5397
5398 reg->dmin = e->mmd.min;
5399 reg->dmax = e->mmd.max;
5400
5401 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5402 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5403 }
5404
5405 return 0;
5406}
5407
5408static void
5409set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5410{
5411 int i;
5412
5413 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5414 reg->map[i] = m->map[i];
5415
5416 reg->optimize = ONIG_OPTIMIZE_MAP;
5417 reg->dmin = m->mmd.min;
5418 reg->dmax = m->mmd.max;
5419
5420 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5421 reg->threshold_len = (int )(reg->dmin + 1);
5422 }
5423}
5424
5425static void
5426set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5427{
5428 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5429 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5430}
5431
5432#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5433static void print_optimize_info(FILE* f, regex_t* reg);
5434#endif
5435
5436static int
5437set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5438{
5439
5440 int r;
5441 NodeOptInfo opt;
5442 OptEnv env;
5443
5444 env.enc = reg->enc;
5445 env.options = reg->options;
5446 env.case_fold_flag = reg->case_fold_flag;
5447 env.scan_env = scan_env;
5448 clear_mml(&env.mmd);
5449
5450 r = optimize_node_left(node, &opt, &env);
5451 if (r) return r;
5452
5453 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5454 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5455 ANCHOR_LOOK_BEHIND);
5456
5457 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5458 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5459
5460 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5461 ANCHOR_PREC_READ_NOT);
5462
5463 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5464 reg->anchor_dmin = opt.len.min;
5465 reg->anchor_dmax = opt.len.max;
5466 }
5467
5468 if (opt.exb.len > 0 || opt.exm.len > 0) {
5469 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5470 if (opt.map.value > 0 &&
5471 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5472 goto set_map;
5473 }
5474 else {
5475 r = set_optimize_exact_info(reg, &opt.exb);
5476 set_sub_anchor(reg, &opt.exb.anc);
5477 }
5478 }
5479 else if (opt.map.value > 0) {
5480 set_map:
5481 set_optimize_map_info(reg, &opt.map);
5482 set_sub_anchor(reg, &opt.map.anc);
5483 }
5484 else {
5485 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5486 if (opt.len.max == 0)
5487 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5488 }
5489
5490#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5491 print_optimize_info(stderr, reg);
5492#endif
5493 return r;
5494}
5495
5496static void
5497clear_optimize_info(regex_t* reg)
5498{
5499 reg->optimize = ONIG_OPTIMIZE_NONE;
5500 reg->anchor = 0;
5501 reg->anchor_dmin = 0;
5502 reg->anchor_dmax = 0;
5503 reg->sub_anchor = 0;
5504 reg->exact_end = (UChar* )NULL;
5505 reg->threshold_len = 0;
5506 xfree(reg->exact);
5507 reg->exact = (UChar* )NULL;
5508}
5509
5510#ifdef ONIG_DEBUG
5511
5512static void print_enc_string(FILE* fp, OnigEncoding enc,
5513 const UChar *s, const UChar *end)
5514{
5515 fprintf(fp, "\nPATTERN: /");
5516
5517 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5518 const UChar *p;
5519 OnigCodePoint code;
5520
5521 p = s;
5522 while (p < end) {
5523 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5524 if (code >= 0x80) {
5525 fprintf(fp, " 0x%04x ", (int )code);
5526 }
5527 else {
5528 fputc((int )code, fp);
5529 }
5530
5531 p += enclen(enc, p, end);
5532 }
5533 }
5534 else {
5535 while (s < end) {
5536 fputc((int )*s, fp);
5537 s++;
5538 }
5539 }
5540
5541 fprintf(fp, "/ (%s)\n", enc->name);
5542}
5543#endif /* ONIG_DEBUG */
5544
5545#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5546static void
5547print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5548{
5549 if (a == ONIG_INFINITE_DISTANCE)
5550 fputs("inf", f);
5551 else
5552 fprintf(f, "(%"PRIuPTR")", a);
5553
5554 fputs("-", f);
5555
5556 if (b == ONIG_INFINITE_DISTANCE)
5557 fputs("inf", f);
5558 else
5559 fprintf(f, "(%"PRIuPTR")", b);
5560}
5561
5562static void
5563print_anchor(FILE* f, int anchor)
5564{
5565 int q = 0;
5566
5567 fprintf(f, "[");
5568
5569 if (anchor & ANCHOR_BEGIN_BUF) {
5570 fprintf(f, "begin-buf");
5571 q = 1;
5572 }
5573 if (anchor & ANCHOR_BEGIN_LINE) {
5574 if (q) fprintf(f, ", ");
5575 q = 1;
5576 fprintf(f, "begin-line");
5577 }
5578 if (anchor & ANCHOR_BEGIN_POSITION) {
5579 if (q) fprintf(f, ", ");
5580 q = 1;
5581 fprintf(f, "begin-pos");
5582 }
5583 if (anchor & ANCHOR_END_BUF) {
5584 if (q) fprintf(f, ", ");
5585 q = 1;
5586 fprintf(f, "end-buf");
5587 }
5588 if (anchor & ANCHOR_SEMI_END_BUF) {
5589 if (q) fprintf(f, ", ");
5590 q = 1;
5591 fprintf(f, "semi-end-buf");
5592 }
5593 if (anchor & ANCHOR_END_LINE) {
5594 if (q) fprintf(f, ", ");
5595 q = 1;
5596 fprintf(f, "end-line");
5597 }
5598 if (anchor & ANCHOR_ANYCHAR_STAR) {
5599 if (q) fprintf(f, ", ");
5600 q = 1;
5601 fprintf(f, "anychar-star");
5602 }
5603 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5604 if (q) fprintf(f, ", ");
5605 fprintf(f, "anychar-star-ml");
5606 }
5607
5608 fprintf(f, "]");
5609}
5610
5611static void
5612print_optimize_info(FILE* f, regex_t* reg)
5613{
5614 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5615 "EXACT_IC", "MAP",
5616 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5617
5618 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5619 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5620 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5621 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5622 fprintf(f, "\n");
5623
5624 if (reg->optimize) {
5625 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5626 fprintf(f, "\n");
5627 }
5628 fprintf(f, "\n");
5629
5630 if (reg->exact) {
5631 UChar *p;
5632 fprintf(f, "exact: [");
5633 for (p = reg->exact; p < reg->exact_end; p++) {
5634 fputc(*p, f);
5635 }
5636 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5637 }
5638 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5639 int c, i, n = 0;
5640
5641 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5642 if (reg->map[i]) n++;
5643
5644 fprintf(f, "map: n=%d\n", n);
5645 if (n > 0) {
5646 c = 0;
5647 fputc('[', f);
5648 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5649 if (reg->map[i] != 0) {
5650 if (c > 0) fputs(", ", f);
5651 c++;
5652 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5653 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5654 fputc(i, f);
5655 else
5656 fprintf(f, "%d", i);
5657 }
5658 }
5659 fprintf(f, "]\n");
5660 }
5661 }
5662}
5663#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5664
5665
5666extern void
5667onig_free_body(regex_t* reg)
5668{
5669 if (IS_NOT_NULL(reg)) {
5670 xfree(reg->p);
5671 xfree(reg->exact);
5672 xfree(reg->int_map);
5673 xfree(reg->int_map_backward);
5674 xfree(reg->repeat_range);
5675 onig_free(reg->chain);
5676
5677#ifdef USE_NAMED_GROUP
5678 onig_names_free(reg);
5679#endif
5680 }
5681}
5682
5683extern void
5684onig_free(regex_t* reg)
5685{
5686 if (IS_NOT_NULL(reg)) {
5687 onig_free_body(reg);
5688 xfree(reg);
5689 }
5690}
5691
5692static void*
5693dup_copy(const void *ptr, size_t size)
5694{
5695 void *newptr = xmalloc(size);
5696 if (IS_NOT_NULL(newptr)) {
5697 memcpy(newptr, ptr, size);
5698 }
5699 return newptr;
5700}
5701
5702extern int
5703onig_reg_copy(regex_t** nreg, regex_t* oreg)
5704{
5705 if (IS_NOT_NULL(oreg)) {
5706 regex_t *reg = *nreg = (regex_t* )xmalloc(sizeof(regex_t));
5707 if (IS_NULL(reg)) return ONIGERR_MEMORY;
5708
5709 *reg = *oreg;
5710
5711# define COPY_FAILED(mem, size) IS_NULL(reg->mem = dup_copy(reg->mem, size))
5712
5713 if (IS_NOT_NULL(reg->exact)) {
5714 size_t exact_size = reg->exact_end - reg->exact;
5715 if (COPY_FAILED(exact, exact_size))
5716 goto err;
5717 (reg)->exact_end = (reg)->exact + exact_size;
5718 }
5719
5720 if (IS_NOT_NULL(reg->int_map)) {
5721 if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5722 goto err_int_map;
5723 }
5724 if (IS_NOT_NULL(reg->int_map_backward)) {
5725 if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE))
5726 goto err_int_map_backward;
5727 }
5728 if (IS_NOT_NULL(reg->p)) {
5729 if (COPY_FAILED(p, reg->alloc))
5730 goto err_p;
5731 }
5732 if (IS_NOT_NULL(reg->repeat_range)) {
5733 if (COPY_FAILED(repeat_range, reg->repeat_range_alloc * sizeof(OnigRepeatRange)))
5734 goto err_repeat_range;
5735 }
5736 if (IS_NOT_NULL(reg->name_table)) {
5737 if (onig_names_copy(reg, oreg))
5738 goto err_name_table;
5739 }
5740 if (IS_NOT_NULL(reg->chain)) {
5741 if (onig_reg_copy(&reg->chain, reg->chain))
5742 goto err_chain;
5743 }
5744 return 0;
5745# undef COPY_FAILED
5746
5747 err_chain:
5748 onig_names_free(reg);
5749 err_name_table:
5750 xfree(reg->repeat_range);
5751 err_repeat_range:
5752 xfree(reg->p);
5753 err_p:
5754 xfree(reg->int_map_backward);
5755 err_int_map_backward:
5756 xfree(reg->int_map);
5757 err_int_map:
5758 xfree(reg->exact);
5759 err:
5760 xfree(reg);
5761 return ONIGERR_MEMORY;
5762 }
5763 return 0;
5764}
5765
5766#ifdef RUBY
5767size_t
5768onig_memsize(const regex_t *reg)
5769{
5770 size_t size = sizeof(regex_t);
5771 if (IS_NULL(reg)) return 0;
5772 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5773 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5774 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5775 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5776 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5777 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5778
5779 return size;
5780}
5781
5782size_t
5783onig_region_memsize(const OnigRegion *regs)
5784{
5785 size_t size = sizeof(*regs);
5786 if (IS_NULL(regs)) return 0;
5787 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5788 return size;
5789}
5790#endif
5791
5792#define REGEX_TRANSFER(to,from) do {\
5793 onig_free_body(to);\
5794 xmemcpy(to, from, sizeof(regex_t));\
5795 xfree(from);\
5796} while (0)
5797
5798#if 0
5799extern void
5800onig_transfer(regex_t* to, regex_t* from)
5801{
5802 REGEX_TRANSFER(to, from);
5803}
5804#endif
5805
5806#ifdef ONIG_DEBUG_COMPILE
5807static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5808#endif
5809#ifdef ONIG_DEBUG_PARSE_TREE
5810static void print_tree(FILE* f, Node* node);
5811#endif
5812
5813#ifdef RUBY
5814extern int
5815onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5816 OnigErrorInfo* einfo)
5817{
5818 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5819}
5820#endif
5821
5822#ifdef RUBY
5823extern int
5824onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5825 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5826#else
5827extern int
5828onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5829 OnigErrorInfo* einfo)
5830#endif
5831{
5832#define COMPILE_INIT_SIZE 20
5833
5834 int r;
5835 OnigDistance init_size;
5836 Node* root;
5837 ScanEnv scan_env = {0};
5838#ifdef USE_SUBEXP_CALL
5839 UnsetAddrList uslist;
5840#endif
5841
5842 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5843
5844#ifdef RUBY
5845 scan_env.sourcefile = sourcefile;
5846 scan_env.sourceline = sourceline;
5847#endif
5848
5849#ifdef ONIG_DEBUG
5850 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5851#endif
5852
5853 if (reg->alloc == 0) {
5854 init_size = (pattern_end - pattern) * 2;
5855 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5856 r = BBUF_INIT(reg, init_size);
5857 if (r != 0) goto end;
5858 }
5859 else
5860 reg->used = 0;
5861
5862 reg->num_mem = 0;
5863 reg->num_repeat = 0;
5864 reg->num_null_check = 0;
5865 reg->repeat_range_alloc = 0;
5866 reg->repeat_range = (OnigRepeatRange* )NULL;
5867#ifdef USE_COMBINATION_EXPLOSION_CHECK
5868 reg->num_comb_exp_check = 0;
5869#endif
5870
5871 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5872 if (r != 0) goto err;
5873
5874#ifdef ONIG_DEBUG_PARSE_TREE
5875# if 0
5876 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5877 print_tree(stderr, root);
5878# endif
5879#endif
5880
5881#ifdef USE_NAMED_GROUP
5882 /* mixed use named group and no-named group */
5883 if (scan_env.num_named > 0 &&
5884 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5885 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5886 if (scan_env.num_named != scan_env.num_mem)
5887 r = disable_noname_group_capture(&root, reg, &scan_env);
5888 else
5889 r = numbered_ref_check(root);
5890
5891 if (r != 0) goto err;
5892 }
5893#endif
5894
5895#ifdef USE_SUBEXP_CALL
5896 if (scan_env.num_call > 0) {
5897 r = unset_addr_list_init(&uslist, scan_env.num_call);
5898 if (r != 0) goto err;
5899 scan_env.unset_addr_list = &uslist;
5900 r = setup_subexp_call(root, &scan_env);
5901 if (r != 0) goto err_unset;
5902 r = subexp_recursive_check_trav(root, &scan_env);
5903 if (r < 0) goto err_unset;
5904 r = subexp_inf_recursive_check_trav(root, &scan_env);
5905 if (r != 0) goto err_unset;
5906
5907 reg->num_call = scan_env.num_call;
5908 }
5909 else
5910 reg->num_call = 0;
5911#endif
5912
5913 r = setup_tree(root, reg, 0, &scan_env);
5914 if (r != 0) goto err_unset;
5915
5916#ifdef ONIG_DEBUG_PARSE_TREE
5917 print_tree(stderr, root);
5918#endif
5919
5920 reg->capture_history = scan_env.capture_history;
5921 reg->bt_mem_start = scan_env.bt_mem_start;
5922 reg->bt_mem_start |= reg->capture_history;
5923 if (IS_FIND_CONDITION(reg->options))
5924 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5925 else {
5926 reg->bt_mem_end = scan_env.bt_mem_end;
5927 reg->bt_mem_end |= reg->capture_history;
5928 }
5929
5930#ifdef USE_COMBINATION_EXPLOSION_CHECK
5931 if (scan_env.backrefed_mem == 0
5932# ifdef USE_SUBEXP_CALL
5933 || scan_env.num_call == 0
5934# endif
5935 ) {
5936 setup_comb_exp_check(root, 0, &scan_env);
5937# ifdef USE_SUBEXP_CALL
5938 if (scan_env.has_recursion != 0) {
5939 scan_env.num_comb_exp_check = 0;
5940 }
5941 else
5942# endif
5943 if (scan_env.comb_exp_max_regnum > 0) {
5944 int i;
5945 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5946 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5947 scan_env.num_comb_exp_check = 0;
5948 break;
5949 }
5950 }
5951 }
5952 }
5953
5954 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5955#endif
5956
5957 clear_optimize_info(reg);
5958#ifndef ONIG_DONT_OPTIMIZE
5959 r = set_optimize_info_from_tree(root, reg, &scan_env);
5960 if (r != 0) goto err_unset;
5961#endif
5962
5963 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5964 xfree(scan_env.mem_nodes_dynamic);
5965 scan_env.mem_nodes_dynamic = (Node** )NULL;
5966 }
5967
5968 r = compile_tree(root, reg);
5969 if (r == 0) {
5970 r = add_opcode(reg, OP_END);
5971#ifdef USE_SUBEXP_CALL
5972 if (scan_env.num_call > 0) {
5973 r = unset_addr_list_fix(&uslist, reg);
5974 unset_addr_list_end(&uslist);
5975 if (r) goto err;
5976 }
5977#endif
5978
5979 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5980 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5981 else {
5982 if (reg->bt_mem_start != 0)
5983 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5984 else
5985 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5986 }
5987 }
5988#ifdef USE_SUBEXP_CALL
5989 else if (scan_env.num_call > 0) {
5990 unset_addr_list_end(&uslist);
5991 }
5992#endif
5993 onig_node_free(root);
5994
5995#ifdef ONIG_DEBUG_COMPILE
5996# ifdef USE_NAMED_GROUP
5997 onig_print_names(stderr, reg);
5998# endif
5999 print_compiled_byte_code_list(stderr, reg);
6000#endif
6001
6002 end:
6003 onig_reg_resize(reg);
6004 return r;
6005
6006 err_unset:
6007#ifdef USE_SUBEXP_CALL
6008 if (scan_env.num_call > 0) {
6009 unset_addr_list_end(&uslist);
6010 }
6011#endif
6012 err:
6013 if (IS_NOT_NULL(scan_env.error)) {
6014 if (IS_NOT_NULL(einfo)) {
6015 einfo->enc = scan_env.enc;
6016 einfo->par = scan_env.error;
6017 einfo->par_end = scan_env.error_end;
6018 }
6019 }
6020
6021 onig_node_free(root);
6022 xfree(scan_env.mem_nodes_dynamic);
6023
6024 return r;
6025}
6026
6027static int onig_inited = 0;
6028
6029extern int
6030onig_reg_init(regex_t* reg, OnigOptionType option,
6031 OnigCaseFoldType case_fold_flag,
6032 OnigEncoding enc, const OnigSyntaxType* syntax)
6033{
6034 if (! onig_inited)
6035 onig_init();
6036
6037 if (IS_NULL(reg))
6038 return ONIGERR_INVALID_ARGUMENT;
6039
6040 if (ONIGENC_IS_UNDEF(enc))
6041 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
6042
6043 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6044 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6045 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6046 }
6047
6048 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6049 option |= syntax->options;
6050 option &= ~ONIG_OPTION_SINGLELINE;
6051 }
6052 else
6053 option |= syntax->options;
6054
6055 (reg)->enc = enc;
6056 (reg)->options = option;
6057 (reg)->syntax = syntax;
6058 (reg)->optimize = 0;
6059 (reg)->exact = (UChar* )NULL;
6060 (reg)->int_map = (int* )NULL;
6061 (reg)->int_map_backward = (int* )NULL;
6062 (reg)->chain = (regex_t* )NULL;
6063
6064 (reg)->p = (UChar* )NULL;
6065 (reg)->alloc = 0;
6066 (reg)->used = 0;
6067 (reg)->name_table = (void* )NULL;
6068
6069 (reg)->case_fold_flag = case_fold_flag;
6070
6071 (reg)->timelimit = 0;
6072
6073 return 0;
6074}
6075
6076extern int
6077onig_new_without_alloc(regex_t* reg, const UChar* pattern,
6078 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
6079 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6080{
6081 int r;
6082
6083 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6084 if (r) return r;
6085
6086 r = onig_compile(reg, pattern, pattern_end, einfo);
6087 return r;
6088}
6089
6090extern int
6091onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6092 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
6093 OnigErrorInfo* einfo)
6094{
6095 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6096 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6097
6098 int r = onig_new_without_alloc(*reg, pattern, pattern_end, option, enc, syntax, einfo);
6099 if (r) {
6100 onig_free(*reg);
6101 *reg = NULL;
6102 }
6103
6104 return r;
6105}
6106
6107extern int
6108onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
6109{
6110 return onig_init();
6111}
6112
6113extern int
6114onig_init(void)
6115{
6116 if (onig_inited != 0)
6117 return 0;
6118
6119 onig_inited = 1;
6120
6121#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6122 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6123#endif
6124
6125 onigenc_init();
6126 /* onigenc_set_default_caseconv_table((UChar* )0); */
6127
6128#ifdef ONIG_DEBUG_STATISTICS
6129 onig_statistics_init();
6130#endif
6131
6132 return 0;
6133}
6134
6135
6136static OnigEndCallListItemType* EndCallTop;
6137
6138extern void onig_add_end_call(void (*func)(void))
6139{
6140 OnigEndCallListItemType* item;
6141
6142 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6143 if (item == 0) return ;
6144
6145 item->next = EndCallTop;
6146 item->func = func;
6147
6148 EndCallTop = item;
6149}
6150
6151static void
6152exec_end_call_list(void)
6153{
6154 OnigEndCallListItemType* prev;
6155 void (*func)(void);
6156
6157 while (EndCallTop != 0) {
6158 func = EndCallTop->func;
6159 (*func)();
6160
6161 prev = EndCallTop;
6162 EndCallTop = EndCallTop->next;
6163 xfree(prev);
6164 }
6165}
6166
6167extern int
6168onig_end(void)
6169{
6170 exec_end_call_list();
6171
6172#ifdef ONIG_DEBUG_STATISTICS
6173 onig_print_statistics(stderr);
6174#endif
6175
6176#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6177 _CrtDumpMemoryLeaks();
6178#endif
6179
6180 onig_inited = 0;
6181
6182 return 0;
6183}
6184
6185extern int
6186onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6187{
6188 OnigCodePoint n, *data;
6189 OnigCodePoint low, high, x;
6190
6191 GET_CODE_POINT(n, p);
6192 data = (OnigCodePoint* )p;
6193 data++;
6194
6195 for (low = 0, high = n; low < high; ) {
6196 x = (low + high) >> 1;
6197 if (code > data[x * 2 + 1])
6198 low = x + 1;
6199 else
6200 high = x;
6201 }
6202
6203 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6204}
6205
6206extern int
6207onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6208{
6209 int found;
6210
6211 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6212 if (IS_NULL(cc->mbuf)) {
6213 found = 0;
6214 }
6215 else {
6216 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6217 }
6218 }
6219 else {
6220 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6221 }
6222
6223 if (IS_NCCLASS_NOT(cc))
6224 return !found;
6225 else
6226 return found;
6227}
6228
6229extern int
6230onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6231{
6232 int len;
6233
6234 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6235 len = 2;
6236 }
6237 else {
6238 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6239 }
6240 return onig_is_code_in_cc_len(len, code, cc);
6241}
6242
6243
6244#ifdef ONIG_DEBUG
6245
6246/* arguments type */
6247# define ARG_SPECIAL -1
6248# define ARG_NON 0
6249# define ARG_RELADDR 1
6250# define ARG_ABSADDR 2
6251# define ARG_LENGTH 3
6252# define ARG_MEMNUM 4
6253# define ARG_OPTION 5
6254# define ARG_STATE_CHECK 6
6255
6256OnigOpInfoType OnigOpInfo[] = {
6257 { OP_FINISH, "finish", ARG_NON },
6258 { OP_END, "end", ARG_NON },
6259 { OP_EXACT1, "exact1", ARG_SPECIAL },
6260 { OP_EXACT2, "exact2", ARG_SPECIAL },
6261 { OP_EXACT3, "exact3", ARG_SPECIAL },
6262 { OP_EXACT4, "exact4", ARG_SPECIAL },
6263 { OP_EXACT5, "exact5", ARG_SPECIAL },
6264 { OP_EXACTN, "exactn", ARG_SPECIAL },
6265 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6266 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6267 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6268 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6269 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6270 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6271 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6272 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6273 { OP_CCLASS, "cclass", ARG_SPECIAL },
6274 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6275 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6276 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6277 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6278 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6279 { OP_ANYCHAR, "anychar", ARG_NON },
6280 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6281 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6282 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6283 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6284 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6285 { OP_WORD, "word", ARG_NON },
6286 { OP_NOT_WORD, "not-word", ARG_NON },
6287 { OP_WORD_BOUND, "word-bound", ARG_NON },
6288 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6289 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6290 { OP_WORD_END, "word-end", ARG_NON },
6291 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6292 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6293 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6294 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6295 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6296 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6297 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6298 { OP_END_BUF, "end-buf", ARG_NON },
6299 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6300 { OP_END_LINE, "end-line", ARG_NON },
6301 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6302 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6303 { OP_BACKREF1, "backref1", ARG_NON },
6304 { OP_BACKREF2, "backref2", ARG_NON },
6305 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6306 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6307 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6308 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6309 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6310 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6311 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6312 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6313 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6314 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6315 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6316 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6317 { OP_SET_OPTION, "set-option", ARG_OPTION },
6318 { OP_KEEP, "keep", ARG_NON },
6319 { OP_FAIL, "fail", ARG_NON },
6320 { OP_JUMP, "jump", ARG_RELADDR },
6321 { OP_PUSH, "push", ARG_RELADDR },
6322 { OP_POP, "pop", ARG_NON },
6323 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6324 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6325 { OP_REPEAT, "repeat", ARG_SPECIAL },
6326 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6327 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6328 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6329 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6330 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6331 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6332 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6333 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6334 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6335 { OP_PUSH_POS, "push-pos", ARG_NON },
6336 { OP_POP_POS, "pop-pos", ARG_NON },
6337 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6338 { OP_FAIL_POS, "fail-pos", ARG_NON },
6339 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6340 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6341 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6342 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6343 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6344 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6345 { OP_ABSENT, "absent", ARG_RELADDR },
6346 { OP_ABSENT_END, "absent-end", ARG_NON },
6347 { OP_CALL, "call", ARG_ABSADDR },
6348 { OP_RETURN, "return", ARG_NON },
6349 { OP_CONDITION, "condition", ARG_SPECIAL },
6350 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6351 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6352 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6353 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6354 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6355 "state-check-anychar-ml*", ARG_STATE_CHECK },
6356 { -1, "", ARG_NON }
6357};
6358
6359static const char*
6360op2name(int opcode)
6361{
6362 int i;
6363
6364 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6365 if (opcode == OnigOpInfo[i].opcode)
6366 return OnigOpInfo[i].name;
6367 }
6368 return "";
6369}
6370
6371static int
6372op2arg_type(int opcode)
6373{
6374 int i;
6375
6376 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6377 if (opcode == OnigOpInfo[i].opcode)
6378 return OnigOpInfo[i].arg_type;
6379 }
6380 return ARG_SPECIAL;
6381}
6382
6383# ifdef ONIG_DEBUG_PARSE_TREE
6384static void
6385Indent(FILE* f, int indent)
6386{
6387 int i;
6388 for (i = 0; i < indent; i++) putc(' ', f);
6389}
6390# endif /* ONIG_DEBUG_PARSE_TREE */
6391
6392static void
6393p_string(FILE* f, ptrdiff_t len, UChar* s)
6394{
6395 fputs(":", f);
6396 while (len-- > 0) { fputc(*s++, f); }
6397}
6398
6399static void
6400p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6401{
6402 int x = len * mb_len;
6403
6404 fprintf(f, ":%d:", len);
6405 while (x-- > 0) { fputc(*s++, f); }
6406}
6407
6408extern void
6409onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6410 OnigEncoding enc)
6411{
6412 int i, n, arg_type;
6413 RelAddrType addr;
6414 LengthType len;
6415 MemNumType mem;
6416 StateCheckNumType scn;
6417 OnigCodePoint code;
6418 UChar *q;
6419
6420 fprintf(f, "[%s", op2name(*bp));
6421 arg_type = op2arg_type(*bp);
6422 if (arg_type != ARG_SPECIAL) {
6423 bp++;
6424 switch (arg_type) {
6425 case ARG_NON:
6426 break;
6427 case ARG_RELADDR:
6428 GET_RELADDR_INC(addr, bp);
6429 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6430 break;
6431 case ARG_ABSADDR:
6432 GET_ABSADDR_INC(addr, bp);
6433 fprintf(f, ":(%d)", addr);
6434 break;
6435 case ARG_LENGTH:
6436 GET_LENGTH_INC(len, bp);
6437 fprintf(f, ":%d", len);
6438 break;
6439 case ARG_MEMNUM:
6440 mem = *((MemNumType* )bp);
6441 bp += SIZE_MEMNUM;
6442 fprintf(f, ":%d", mem);
6443 break;
6444 case ARG_OPTION:
6445 {
6446 OnigOptionType option = *((OnigOptionType* )bp);
6447 bp += SIZE_OPTION;
6448 fprintf(f, ":%d", option);
6449 }
6450 break;
6451
6452 case ARG_STATE_CHECK:
6453 scn = *((StateCheckNumType* )bp);
6454 bp += SIZE_STATE_CHECK_NUM;
6455 fprintf(f, ":%d", scn);
6456 break;
6457 }
6458 }
6459 else {
6460 switch (*bp++) {
6461 case OP_EXACT1:
6462 case OP_ANYCHAR_STAR_PEEK_NEXT:
6463 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6464 p_string(f, 1, bp++); break;
6465 case OP_EXACT2:
6466 p_string(f, 2, bp); bp += 2; break;
6467 case OP_EXACT3:
6468 p_string(f, 3, bp); bp += 3; break;
6469 case OP_EXACT4:
6470 p_string(f, 4, bp); bp += 4; break;
6471 case OP_EXACT5:
6472 p_string(f, 5, bp); bp += 5; break;
6473 case OP_EXACTN:
6474 GET_LENGTH_INC(len, bp);
6475 p_len_string(f, len, 1, bp);
6476 bp += len;
6477 break;
6478
6479 case OP_EXACTMB2N1:
6480 p_string(f, 2, bp); bp += 2; break;
6481 case OP_EXACTMB2N2:
6482 p_string(f, 4, bp); bp += 4; break;
6483 case OP_EXACTMB2N3:
6484 p_string(f, 6, bp); bp += 6; break;
6485 case OP_EXACTMB2N:
6486 GET_LENGTH_INC(len, bp);
6487 p_len_string(f, len, 2, bp);
6488 bp += len * 2;
6489 break;
6490 case OP_EXACTMB3N:
6491 GET_LENGTH_INC(len, bp);
6492 p_len_string(f, len, 3, bp);
6493 bp += len * 3;
6494 break;
6495 case OP_EXACTMBN:
6496 {
6497 int mb_len;
6498
6499 GET_LENGTH_INC(mb_len, bp);
6500 GET_LENGTH_INC(len, bp);
6501 fprintf(f, ":%d:%d:", mb_len, len);
6502 n = len * mb_len;
6503 while (n-- > 0) { fputc(*bp++, f); }
6504 }
6505 break;
6506
6507 case OP_EXACT1_IC:
6508 len = enclen(enc, bp, bpend);
6509 p_string(f, len, bp);
6510 bp += len;
6511 break;
6512 case OP_EXACTN_IC:
6513 GET_LENGTH_INC(len, bp);
6514 p_len_string(f, len, 1, bp);
6515 bp += len;
6516 break;
6517
6518 case OP_CCLASS:
6519 n = bitset_on_num((BitSetRef )bp);
6520 bp += SIZE_BITSET;
6521 fprintf(f, ":%d", n);
6522 break;
6523
6524 case OP_CCLASS_NOT:
6525 n = bitset_on_num((BitSetRef )bp);
6526 bp += SIZE_BITSET;
6527 fprintf(f, ":%d", n);
6528 break;
6529
6530 case OP_CCLASS_MB:
6531 case OP_CCLASS_MB_NOT:
6532 GET_LENGTH_INC(len, bp);
6533 q = bp;
6534# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6535 ALIGNMENT_RIGHT(q);
6536# endif
6537 GET_CODE_POINT(code, q);
6538 bp += len;
6539 fprintf(f, ":%d:%d", (int )code, len);
6540 break;
6541
6542 case OP_CCLASS_MIX:
6543 case OP_CCLASS_MIX_NOT:
6544 n = bitset_on_num((BitSetRef )bp);
6545 bp += SIZE_BITSET;
6546 GET_LENGTH_INC(len, bp);
6547 q = bp;
6548# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6549 ALIGNMENT_RIGHT(q);
6550# endif
6551 GET_CODE_POINT(code, q);
6552 bp += len;
6553 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6554 break;
6555
6556 case OP_BACKREFN_IC:
6557 mem = *((MemNumType* )bp);
6558 bp += SIZE_MEMNUM;
6559 fprintf(f, ":%d", mem);
6560 break;
6561
6562 case OP_BACKREF_MULTI_IC:
6563 case OP_BACKREF_MULTI:
6564 fputs(" ", f);
6565 GET_LENGTH_INC(len, bp);
6566 for (i = 0; i < len; i++) {
6567 GET_MEMNUM_INC(mem, bp);
6568 if (i > 0) fputs(", ", f);
6569 fprintf(f, "%d", mem);
6570 }
6571 break;
6572
6573 case OP_BACKREF_WITH_LEVEL:
6574 {
6575 OnigOptionType option;
6576 LengthType level;
6577
6578 GET_OPTION_INC(option, bp);
6579 fprintf(f, ":%d", option);
6580 GET_LENGTH_INC(level, bp);
6581 fprintf(f, ":%d", level);
6582
6583 fputs(" ", f);
6584 GET_LENGTH_INC(len, bp);
6585 for (i = 0; i < len; i++) {
6586 GET_MEMNUM_INC(mem, bp);
6587 if (i > 0) fputs(", ", f);
6588 fprintf(f, "%d", mem);
6589 }
6590 }
6591 break;
6592
6593 case OP_REPEAT:
6594 case OP_REPEAT_NG:
6595 {
6596 mem = *((MemNumType* )bp);
6597 bp += SIZE_MEMNUM;
6598 addr = *((RelAddrType* )bp);
6599 bp += SIZE_RELADDR;
6600 fprintf(f, ":%d:%d", mem, addr);
6601 }
6602 break;
6603
6604 case OP_PUSH_OR_JUMP_EXACT1:
6605 case OP_PUSH_IF_PEEK_NEXT:
6606 addr = *((RelAddrType* )bp);
6607 bp += SIZE_RELADDR;
6608 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6609 p_string(f, 1, bp);
6610 bp += 1;
6611 break;
6612
6613 case OP_LOOK_BEHIND:
6614 GET_LENGTH_INC(len, bp);
6615 fprintf(f, ":%d", len);
6616 break;
6617
6618 case OP_PUSH_LOOK_BEHIND_NOT:
6619 GET_RELADDR_INC(addr, bp);
6620 GET_LENGTH_INC(len, bp);
6621 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6622 break;
6623
6624 case OP_STATE_CHECK_PUSH:
6625 case OP_STATE_CHECK_PUSH_OR_JUMP:
6626 scn = *((StateCheckNumType* )bp);
6627 bp += SIZE_STATE_CHECK_NUM;
6628 addr = *((RelAddrType* )bp);
6629 bp += SIZE_RELADDR;
6630 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6631 break;
6632
6633 case OP_CONDITION:
6634 GET_MEMNUM_INC(mem, bp);
6635 GET_RELADDR_INC(addr, bp);
6636 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6637 break;
6638
6639 default:
6640 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6641 bp[-1]);
6642 }
6643 }
6644 fputs("]", f);
6645 if (nextp) *nextp = bp;
6646}
6647
6648# ifdef ONIG_DEBUG_COMPILE
6649static void
6650print_compiled_byte_code_list(FILE* f, regex_t* reg)
6651{
6652 int ncode;
6653 UChar* bp = reg->p;
6654 UChar* end = reg->p + reg->used;
6655
6656 fprintf(f, "code length: %d", reg->used);
6657
6658 ncode = -1;
6659 while (bp < end) {
6660 ncode++;
6661 if (ncode % 5 == 0)
6662 fprintf(f, "\n%ld:", bp - reg->p);
6663 else
6664 fprintf(f, " %ld:", bp - reg->p);
6665 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6666 }
6667
6668 fprintf(f, "\n");
6669}
6670# endif /* ONIG_DEBUG_COMPILE */
6671
6672# ifdef ONIG_DEBUG_PARSE_TREE
6673static void
6674print_indent_tree(FILE* f, Node* node, int indent)
6675{
6676 int i, type, container_p = 0;
6677 int add = 3;
6678 UChar* p;
6679
6680 Indent(f, indent);
6681 if (IS_NULL(node)) {
6682 fprintf(f, "ERROR: null node!!!\n");
6683 exit (0);
6684 }
6685
6686 type = NTYPE(node);
6687 switch (type) {
6688 case NT_LIST:
6689 case NT_ALT:
6690 if (NTYPE(node) == NT_LIST)
6691 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6692 else
6693 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6694
6695 print_indent_tree(f, NCAR(node), indent + add);
6696 while (IS_NOT_NULL(node = NCDR(node))) {
6697 if (NTYPE(node) != type) {
6698 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6699 exit(0);
6700 }
6701 print_indent_tree(f, NCAR(node), indent + add);
6702 }
6703 break;
6704
6705 case NT_STR:
6706 fprintf(f, "<string%s:%"PRIxPTR">",
6707 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6708 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6709 if (*p >= 0x20 && *p < 0x7f)
6710 fputc(*p, f);
6711 else {
6712 fprintf(f, " 0x%02x", *p);
6713 }
6714 }
6715 break;
6716
6717 case NT_CCLASS:
6718 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6719 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6720 if (NCCLASS(node)->mbuf) {
6721 BBuf* bbuf = NCCLASS(node)->mbuf;
6722 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6723 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6724 fprintf(f, "%d", *data++);
6725 for (; data < end; data+=2) {
6726 fprintf(f, ",");
6727 fprintf(f, "%04x-%04x", data[0], data[1]);
6728 }
6729 }
6730 break;
6731
6732 case NT_CTYPE:
6733 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6734 switch (NCTYPE(node)->ctype) {
6735 case ONIGENC_CTYPE_WORD:
6736 if (NCTYPE(node)->not != 0)
6737 fputs("not word", f);
6738 else
6739 fputs("word", f);
6740 break;
6741
6742 default:
6743 fprintf(f, "ERROR: undefined ctype.\n");
6744 exit(0);
6745 }
6746 break;
6747
6748 case NT_CANY:
6749 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6750 break;
6751
6752 case NT_ANCHOR:
6753 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6754 switch (NANCHOR(node)->type) {
6755 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6756 case ANCHOR_END_BUF: fputs("end buf", f); break;
6757 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6758 case ANCHOR_END_LINE: fputs("end line", f); break;
6759 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6760 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6761
6762 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6763 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6764# ifdef USE_WORD_BEGIN_END
6765 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6766 case ANCHOR_WORD_END: fputs("word end", f); break;
6767# endif
6768 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6769 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6770 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6771 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6772 case ANCHOR_KEEP: fputs("keep",f); break;
6773
6774 default:
6775 fprintf(f, "ERROR: undefined anchor type.\n");
6776 break;
6777 }
6778 break;
6779
6780 case NT_BREF:
6781 {
6782 int* p;
6783 BRefNode* br = NBREF(node);
6784 p = BACKREFS_P(br);
6785 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6786 for (i = 0; i < br->back_num; i++) {
6787 if (i > 0) fputs(", ", f);
6788 fprintf(f, "%d", p[i]);
6789 }
6790 }
6791 break;
6792
6793# ifdef USE_SUBEXP_CALL
6794 case NT_CALL:
6795 {
6796 CallNode* cn = NCALL(node);
6797 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6798 p_string(f, cn->name_end - cn->name, cn->name);
6799 }
6800 break;
6801# endif
6802
6803 case NT_QTFR:
6804 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6805 NQTFR(node)->lower, NQTFR(node)->upper,
6806 (NQTFR(node)->greedy ? "" : "?"));
6807 print_indent_tree(f, NQTFR(node)->target, indent + add);
6808 break;
6809
6810 case NT_ENCLOSE:
6811 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6812 switch (NENCLOSE(node)->type) {
6813 case ENCLOSE_OPTION:
6814 fprintf(f, "option:%d", NENCLOSE(node)->option);
6815 break;
6816 case ENCLOSE_MEMORY:
6817 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6818 break;
6819 case ENCLOSE_STOP_BACKTRACK:
6820 fprintf(f, "stop-bt");
6821 break;
6822 case ENCLOSE_CONDITION:
6823 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6824 break;
6825 case ENCLOSE_ABSENT:
6826 fprintf(f, "absent");
6827 break;
6828
6829 default:
6830 break;
6831 }
6832 fprintf(f, "\n");
6833 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6834 break;
6835
6836 default:
6837 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6838 break;
6839 }
6840
6841 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6842 type != NT_ENCLOSE)
6843 fprintf(f, "\n");
6844
6845 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6846
6847 fflush(f);
6848}
6849
6850static void
6851print_tree(FILE* f, Node* node)
6852{
6853 print_indent_tree(f, node, 0);
6854}
6855# endif /* ONIG_DEBUG_PARSE_TREE */
6856#endif /* ONIG_DEBUG */
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
int len
Length of the buffer.
Definition io.h:8
VALUE type(ANYARGS)
ANYARGS-ed function type.