313 lines
9.5 KiB
Plaintext
313 lines
9.5 KiB
Plaintext
To: vim-dev@vim.org
|
|
Subject: Patch 7.2.312
|
|
Fcc: outbox
|
|
From: Bram Moolenaar <Bram@moolenaar.net>
|
|
Mime-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
------------
|
|
|
|
Patch 7.2.312
|
|
Problem: iconv() returns an invalid character sequence when conversion
|
|
fails. It should return an empty string. (Yongwei Wu)
|
|
Solution: Be more strict about invalid characters in the input.
|
|
Files: src/mbyte.c
|
|
|
|
|
|
*** ../vim-7.2.311/src/mbyte.c 2009-06-16 15:23:07.000000000 +0200
|
|
--- src/mbyte.c 2009-11-25 16:10:44.000000000 +0100
|
|
***************
|
|
*** 133,154 ****
|
|
static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
|
|
static int dbcs_ptr2char __ARGS((char_u *p));
|
|
|
|
! /* Lookup table to quickly get the length in bytes of a UTF-8 character from
|
|
! * the first byte of a UTF-8 string. Bytes which are illegal when used as the
|
|
! * first byte have a one, because these will be used separately. */
|
|
static char utf8len_tab[256] =
|
|
{
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
|
|
! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
|
|
};
|
|
|
|
/*
|
|
* XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
|
|
* in the "xim.log" file.
|
|
*/
|
|
--- 133,172 ----
|
|
static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
|
|
static int dbcs_ptr2char __ARGS((char_u *p));
|
|
|
|
! /*
|
|
! * Lookup table to quickly get the length in bytes of a UTF-8 character from
|
|
! * the first byte of a UTF-8 string.
|
|
! * Bytes which are illegal when used as the first byte have a 1.
|
|
! * The NUL byte has length 1.
|
|
! */
|
|
static char utf8len_tab[256] =
|
|
{
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
|
|
};
|
|
|
|
/*
|
|
+ * Like utf8len_tab above, but using a zero for illegal lead bytes.
|
|
+ */
|
|
+ static char utf8len_tab_zero[256] =
|
|
+ {
|
|
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0,
|
|
+ };
|
|
+
|
|
+ /*
|
|
* XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
|
|
* in the "xim.log" file.
|
|
*/
|
|
***************
|
|
*** 1352,1358 ****
|
|
if (size > 0 && *p >= 0x80)
|
|
{
|
|
if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
|
|
! return 1;
|
|
c = utf_ptr2char(p);
|
|
/* An illegal byte is displayed as <xx>. */
|
|
if (utf_ptr2len(p) == 1 || c == NUL)
|
|
--- 1370,1376 ----
|
|
if (size > 0 && *p >= 0x80)
|
|
{
|
|
if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
|
|
! return 1; /* truncated */
|
|
c = utf_ptr2char(p);
|
|
/* An illegal byte is displayed as <xx>. */
|
|
if (utf_ptr2len(p) == 1 || c == NUL)
|
|
***************
|
|
*** 1473,1479 ****
|
|
if (p[0] < 0x80) /* be quick for ASCII */
|
|
return p[0];
|
|
|
|
! len = utf8len_tab[p[0]];
|
|
if (len > 1 && (p[1] & 0xc0) == 0x80)
|
|
{
|
|
if (len == 2)
|
|
--- 1491,1497 ----
|
|
if (p[0] < 0x80) /* be quick for ASCII */
|
|
return p[0];
|
|
|
|
! len = utf8len_tab_zero[p[0]];
|
|
if (len > 1 && (p[1] & 0xc0) == 0x80)
|
|
{
|
|
if (len == 2)
|
|
***************
|
|
*** 1723,1728 ****
|
|
--- 1741,1747 ----
|
|
/*
|
|
* Return length of UTF-8 character, obtained from the first byte.
|
|
* "b" must be between 0 and 255!
|
|
+ * Returns 1 for an invalid first byte value.
|
|
*/
|
|
int
|
|
utf_byte2len(b)
|
|
***************
|
|
*** 1737,1742 ****
|
|
--- 1756,1762 ----
|
|
* Returns 1 for "".
|
|
* Returns 1 for an illegal byte sequence (also in incomplete byte seq.).
|
|
* Returns number > "size" for an incomplete byte sequence.
|
|
+ * Never returns zero.
|
|
*/
|
|
int
|
|
utf_ptr2len_len(p, size)
|
|
***************
|
|
*** 1747,1757 ****
|
|
int i;
|
|
int m;
|
|
|
|
! if (*p == NUL)
|
|
! return 1;
|
|
! m = len = utf8len_tab[*p];
|
|
if (len > size)
|
|
m = size; /* incomplete byte sequence. */
|
|
for (i = 1; i < m; ++i)
|
|
if ((p[i] & 0xc0) != 0x80)
|
|
return 1;
|
|
--- 1767,1779 ----
|
|
int i;
|
|
int m;
|
|
|
|
! len = utf8len_tab[*p];
|
|
! if (len == 1)
|
|
! return 1; /* NUL, ascii or illegal lead byte */
|
|
if (len > size)
|
|
m = size; /* incomplete byte sequence. */
|
|
+ else
|
|
+ m = len;
|
|
for (i = 1; i < m; ++i)
|
|
if ((p[i] & 0xc0) != 0x80)
|
|
return 1;
|
|
***************
|
|
*** 2505,2510 ****
|
|
--- 2527,2533 ----
|
|
/*
|
|
* mb_head_off() function pointer.
|
|
* Return offset from "p" to the first byte of the character it points into.
|
|
+ * If "p" points to the NUL at the end of the string return 0.
|
|
* Returns 0 when already at the first byte of a character.
|
|
*/
|
|
int
|
|
***************
|
|
*** 2524,2530 ****
|
|
|
|
/* It can't be a trailing byte when not using DBCS, at the start of the
|
|
* string or the previous byte can't start a double-byte. */
|
|
! if (p <= base || MB_BYTE2LEN(p[-1]) == 1)
|
|
return 0;
|
|
|
|
/* This is slow: need to start at the base and go forward until the
|
|
--- 2547,2553 ----
|
|
|
|
/* It can't be a trailing byte when not using DBCS, at the start of the
|
|
* string or the previous byte can't start a double-byte. */
|
|
! if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL)
|
|
return 0;
|
|
|
|
/* This is slow: need to start at the base and go forward until the
|
|
***************
|
|
*** 2552,2558 ****
|
|
* lead byte in the current cell. */
|
|
if (p <= base
|
|
|| (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
|
|
! || MB_BYTE2LEN(p[-1]) == 1)
|
|
return 0;
|
|
|
|
/* This is slow: need to start at the base and go forward until the
|
|
--- 2575,2582 ----
|
|
* lead byte in the current cell. */
|
|
if (p <= base
|
|
|| (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
|
|
! || MB_BYTE2LEN(p[-1]) == 1
|
|
! || *p == NUL)
|
|
return 0;
|
|
|
|
/* This is slow: need to start at the base and go forward until the
|
|
***************
|
|
*** 2578,2583 ****
|
|
--- 2602,2608 ----
|
|
char_u *q;
|
|
char_u *s;
|
|
int c;
|
|
+ int len;
|
|
#ifdef FEAT_ARABIC
|
|
char_u *j;
|
|
#endif
|
|
***************
|
|
*** 2597,2604 ****
|
|
--q;
|
|
/* Check for illegal sequence. Do allow an illegal byte after where we
|
|
* started. */
|
|
! if (utf8len_tab[*q] != (int)(s - q + 1)
|
|
! && utf8len_tab[*q] != (int)(p - q + 1))
|
|
return 0;
|
|
|
|
if (q <= base)
|
|
--- 2622,2629 ----
|
|
--q;
|
|
/* Check for illegal sequence. Do allow an illegal byte after where we
|
|
* started. */
|
|
! len = utf8len_tab[*q];
|
|
! if (len != (int)(s - q + 1) && len != (int)(p - q + 1))
|
|
return 0;
|
|
|
|
if (q <= base)
|
|
***************
|
|
*** 2810,2818 ****
|
|
|
|
while (end == NULL ? *p != NUL : p < end)
|
|
{
|
|
! if ((*p & 0xc0) == 0x80)
|
|
return FALSE; /* invalid lead byte */
|
|
- l = utf8len_tab[*p];
|
|
if (end != NULL && p + l > end)
|
|
return FALSE; /* incomplete byte sequence */
|
|
++p;
|
|
--- 2835,2843 ----
|
|
|
|
while (end == NULL ? *p != NUL : p < end)
|
|
{
|
|
! l = utf8len_tab_zero[*p];
|
|
! if (l == 0)
|
|
return FALSE; /* invalid lead byte */
|
|
if (end != NULL && p + l > end)
|
|
return FALSE; /* incomplete byte sequence */
|
|
++p;
|
|
***************
|
|
*** 6117,6128 ****
|
|
d = retval;
|
|
for (i = 0; i < len; ++i)
|
|
{
|
|
! l = utf_ptr2len(ptr + i);
|
|
if (l == 0)
|
|
*d++ = NUL;
|
|
else if (l == 1)
|
|
{
|
|
! if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i)
|
|
{
|
|
/* Incomplete sequence at the end. */
|
|
*unconvlenp = len - i;
|
|
--- 6142,6161 ----
|
|
d = retval;
|
|
for (i = 0; i < len; ++i)
|
|
{
|
|
! l = utf_ptr2len_len(ptr + i, len - i);
|
|
if (l == 0)
|
|
*d++ = NUL;
|
|
else if (l == 1)
|
|
{
|
|
! int l_w = utf8len_tab_zero[ptr[i]];
|
|
!
|
|
! if (l_w == 0)
|
|
! {
|
|
! /* Illegal utf-8 byte cannot be converted */
|
|
! vim_free(retval);
|
|
! return NULL;
|
|
! }
|
|
! if (unconvlenp != NULL && l_w > len - i)
|
|
{
|
|
/* Incomplete sequence at the end. */
|
|
*unconvlenp = len - i;
|
|
*** ../vim-7.2.311/src/version.c 2009-12-02 13:32:10.000000000 +0100
|
|
--- src/version.c 2009-12-02 15:00:23.000000000 +0100
|
|
***************
|
|
*** 683,684 ****
|
|
--- 683,686 ----
|
|
{ /* Add new patch number below this line */
|
|
+ /**/
|
|
+ 312,
|
|
/**/
|
|
|
|
--
|
|
hundred-and-one symptoms of being an internet addict:
|
|
6. You refuse to go to a vacation spot with no electricity and no phone lines.
|
|
|
|
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
|
|
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
|
|
\\\ download, build and distribute -- http://www.A-A-P.org ///
|
|
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///
|