vim/7.3.1037
2013-06-04 12:06:13 +02:00

409 lines
11 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

To: vim_dev@googlegroups.com
Subject: Patch 7.3.1037
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
------------
Patch 7.3.1037
Problem: Look-behind matching is very slow on long lines.
Solution: Add a byte limit to how far back an attempt is made.
Files: src/regexp.c, src/regexp_nfa.c, src/testdir/test64.in,
src/testdir/test64.ok
*** ../vim-7.3.1036/src/regexp.c 2013-05-21 21:37:01.000000000 +0200
--- src/regexp.c 2013-05-29 14:34:51.000000000 +0200
***************
*** 701,706 ****
--- 701,707 ----
# define CASEMBC(x)
#endif
static void reginsert __ARGS((int, char_u *));
+ static void reginsert_nr __ARGS((int op, long val, char_u *opnd));
static void reginsert_limits __ARGS((int, long, long, char_u *));
static char_u *re_put_long __ARGS((char_u *pr, long_u val));
static int read_limits __ARGS((long *, long *));
***************
*** 1781,1787 ****
--- 1782,1790 ----
case Magic('@'):
{
int lop = END;
+ int nr;
+ nr = getdecchrs();
switch (no_Magic(getchr()))
{
case '=': lop = MATCH; break; /* \@= */
***************
*** 1803,1809 ****
*flagp |= HASLOOKBH;
}
regtail(ret, regnode(END)); /* operand ends */
! reginsert(lop, ret);
break;
}
--- 1806,1819 ----
*flagp |= HASLOOKBH;
}
regtail(ret, regnode(END)); /* operand ends */
! if (lop == BEHIND || lop == NOBEHIND)
! {
! if (nr < 0)
! nr = 0; /* no limit is same as zero limit */
! reginsert_nr(lop, nr, ret);
! }
! else
! reginsert(lop, ret);
break;
}
***************
*** 2780,2785 ****
--- 2790,2827 ----
/*
* Insert an operator in front of already-emitted operand.
+ * Add a number to the operator.
+ */
+ static void
+ reginsert_nr(op, val, opnd)
+ int op;
+ long val;
+ char_u *opnd;
+ {
+ char_u *src;
+ char_u *dst;
+ char_u *place;
+
+ if (regcode == JUST_CALC_SIZE)
+ {
+ regsize += 7;
+ return;
+ }
+ src = regcode;
+ regcode += 7;
+ dst = regcode;
+ while (src > opnd)
+ *--dst = *--src;
+
+ place = opnd; /* Op node, where operand used to be. */
+ *place++ = op;
+ *place++ = NUL;
+ *place++ = NUL;
+ place = re_put_long(place, (long_u)val);
+ }
+
+ /*
+ * Insert an operator in front of already-emitted operand.
* The operator has the given limit values as operands. Also set next pointer.
*
* Means relocating the operand.
***************
*** 3182,3188 ****
}
/*
! * get and return the value of the decimal string immediately after the
* current position. Return -1 for invalid. Consumes all digits.
*/
static int
--- 3224,3230 ----
}
/*
! * Get and return the value of the decimal string immediately after the
* current position. Return -1 for invalid. Consumes all digits.
*/
static int
***************
*** 3200,3205 ****
--- 3242,3248 ----
nr *= 10;
nr += c - '0';
++regparse;
+ curchr = -1; /* no longer valid */
}
if (i == 0)
***************
*** 5432,5438 ****
/* save the position after the found match for next */
reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
! /* start looking for a match with operand at the current
* position. Go back one character until we find the
* result, hitting the start of the line or the previous
* line (for multi-line matching).
--- 5475,5481 ----
/* save the position after the found match for next */
reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
! /* Start looking for a match with operand at the current
* position. Go back one character until we find the
* result, hitting the start of the line or the previous
* line (for multi-line matching).
***************
*** 5444,5450 ****
rp->rs_state = RS_BEHIND2;
reg_restore(&rp->rs_un.regsave, &backpos);
! scan = OPERAND(rp->rs_scan);
}
break;
--- 5487,5493 ----
rp->rs_state = RS_BEHIND2;
reg_restore(&rp->rs_un.regsave, &backpos);
! scan = OPERAND(rp->rs_scan) + 4;
}
break;
***************
*** 5472,5480 ****
--- 5515,5526 ----
}
else
{
+ long limit;
+
/* No match or a match that doesn't end where we want it: Go
* back one character. May go to previous line once. */
no = OK;
+ limit = OPERAND_MIN(rp->rs_scan);
if (REG_MULTI)
{
if (rp->rs_un.regsave.rs_u.pos.col == 0)
***************
*** 5493,5519 ****
}
}
else
#ifdef FEAT_MBYTE
! if (has_mbyte)
! rp->rs_un.regsave.rs_u.pos.col -=
! (*mb_head_off)(regline, regline
+ rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
! else
#endif
! --rp->rs_un.regsave.rs_u.pos.col;
}
else
{
if (rp->rs_un.regsave.rs_u.ptr == regline)
no = FAIL;
else
! --rp->rs_un.regsave.rs_u.ptr;
}
if (no == OK)
{
/* Advanced, prepare for finding match again. */
reg_restore(&rp->rs_un.regsave, &backpos);
! scan = OPERAND(rp->rs_scan);
if (status == RA_MATCH)
{
/* We did match, so subexpr may have been changed,
--- 5539,5579 ----
}
}
else
+ {
#ifdef FEAT_MBYTE
! if (has_mbyte)
! rp->rs_un.regsave.rs_u.pos.col -=
! (*mb_head_off)(regline, regline
+ rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
! else
#endif
! --rp->rs_un.regsave.rs_u.pos.col;
! if (limit > 0
! && ((rp->rs_un.regsave.rs_u.pos.lnum
! < behind_pos.rs_u.pos.lnum
! ? (colnr_T)STRLEN(regline)
! : behind_pos.rs_u.pos.col)
! - rp->rs_un.regsave.rs_u.pos.col > limit))
! no = FAIL;
! }
}
else
{
if (rp->rs_un.regsave.rs_u.ptr == regline)
no = FAIL;
else
! {
! mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
! if (limit > 0 && (long)(behind_pos.rs_u.ptr
! - rp->rs_un.regsave.rs_u.ptr) > limit)
! no = FAIL;
! }
}
if (no == OK)
{
/* Advanced, prepare for finding match again. */
reg_restore(&rp->rs_un.regsave, &backpos);
! scan = OPERAND(rp->rs_scan) + 4;
if (status == RA_MATCH)
{
/* We did match, so subexpr may have been changed,
***************
*** 7773,7779 ****
#ifdef DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
! "BACKTACKING Regexp Engine",
"NFA Regexp Engine"
};
#endif
--- 7833,7839 ----
#ifdef DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
! "BACKTRACKING Regexp Engine",
"NFA Regexp Engine"
};
#endif
*** ../vim-7.3.1036/src/regexp_nfa.c 2013-05-28 22:52:11.000000000 +0200
--- src/regexp_nfa.c 2013-05-29 16:31:13.000000000 +0200
***************
*** 1331,1336 ****
--- 1331,1346 ----
case '=':
EMIT(NFA_PREV_ATOM_NO_WIDTH);
break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
case '!':
case '<':
case '>':
***************
*** 3817,3823 ****
* because recursive calls should only start in the first position.
* Also don't start a match past the first line. */
if (nfa_match == FALSE && start->c == NFA_MOPEN + 0
! && reglnum == 0 && clen != 0)
{
#ifdef ENABLE_LOG
fprintf(log_fd, "(---) STARTSTATE\n");
--- 3827,3835 ----
* because recursive calls should only start in the first position.
* Also don't start a match past the first line. */
if (nfa_match == FALSE && start->c == NFA_MOPEN + 0
! && reglnum == 0 && clen != 0
! && (ireg_maxcol == 0
! || (colnr_T)(reginput - regline) < ireg_maxcol))
{
#ifdef ENABLE_LOG
fprintf(log_fd, "(---) STARTSTATE\n");
*** ../vim-7.3.1036/src/testdir/test64.in 2013-05-28 22:03:13.000000000 +0200
--- src/testdir/test64.in 2013-05-29 14:56:44.000000000 +0200
***************
*** 336,341 ****
--- 336,349 ----
:"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
:call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
:"
+ :"""" Look-behind with limit
+ :call add(tl, [0, '<\@<=span.', 'xxspanxx<spanyyy', 'spany'])
+ :call add(tl, [0, '<\@1<=span.', 'xxspanxx<spanyyy', 'spany'])
+ :call add(tl, [0, '<\@2<=span.', 'xxspanxx<spanyyy', 'spany'])
+ :call add(tl, [0, '\(<<\)\@<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
+ :call add(tl, [0, '\(<<\)\@1<=span.', 'xxspanxxxx<spanxx<<spanyyy'])
+ :call add(tl, [0, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
+ :"
:"""" Run the tests
:"
:"
***************
*** 406,411 ****
--- 414,425 ----
y$Gop:"
:"
:"
+ :" Check a pattern with a look beind crossing a line boundary
+ /^Behind:
+ /\(<\_[xy]\+\)\@3<=start
+ :.yank
+ Gop:"
+ :"
:/\%#=1^Results/,$wq! test.out
ENDTEST
***************
*** 423,426 ****
--- 437,448 ----
xjk
lmn
+ Behind:
+ asdfasd<yyy
+ xxstart1
+ asdfasd<yy
+ xxxxstart2
+ asdfasd<yy
+ xxxstart3
+
Results of test64:
*** ../vim-7.3.1036/src/testdir/test64.ok 2013-05-28 22:03:13.000000000 +0200
--- src/testdir/test64.ok 2013-05-29 14:59:37.000000000 +0200
***************
*** 719,724 ****
--- 719,736 ----
OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
+ OK 0 - <\@<=span.
+ OK 1 - <\@<=span.
+ OK 0 - <\@1<=span.
+ OK 1 - <\@1<=span.
+ OK 0 - <\@2<=span.
+ OK 1 - <\@2<=span.
+ OK 0 - \(<<\)\@<=span.
+ OK 1 - \(<<\)\@<=span.
+ OK 0 - \(<<\)\@1<=span.
+ OK 1 - \(<<\)\@1<=span.
+ OK 0 - \(<<\)\@2<=span.
+ OK 1 - \(<<\)\@2<=span.
192.168.0.1
192.168.0.1
192.168.0.1
***************
*** 726,728 ****
--- 738,742 ----
<T="5">Ta 5</Title>
<T="7">Ac 7</Title>
ghi
+
+ xxxstart3
*** ../vim-7.3.1036/src/version.c 2013-05-28 22:52:11.000000000 +0200
--- src/version.c 2013-05-29 13:20:48.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
{ /* Add new patch number below this line */
+ /**/
+ 1037,
/**/
--
hundred-and-one symptoms of being an internet addict:
11. You find yourself typing "com" after every period when using a word
processor.com
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///