284 lines
7.8 KiB
Plaintext
284 lines
7.8 KiB
Plaintext
To: vim-dev@vim.org
|
|
Subject: Patch 7.1.310
|
|
Fcc: outbox
|
|
From: Bram Moolenaar <Bram@moolenaar.net>
|
|
Mime-Version: 1.0
|
|
Content-Type: text/plain; charset=ISO-8859-1
|
|
Content-Transfer-Encoding: 8bit
|
|
------------
|
|
|
|
Patch 7.1.310
|
|
Problem: Incomplete utf-8 byte sequence at end of the file is not detected.
|
|
Accessing memory that wasn't written.
|
|
Solution: Check the last bytes in the buffer for being a valid utf-8
|
|
character. (mostly by Ben Schmidt)
|
|
Also fix that the reported line number of the error was wrong.
|
|
Files: src/fileio.c
|
|
|
|
|
|
*** ../vim-7.1.309/src/fileio.c Wed May 7 19:05:55 2008
|
|
--- src/fileio.c Wed Jun 4 18:28:48 2008
|
|
***************
|
|
*** 1288,1299 ****
|
|
#ifdef FEAT_MBYTE
|
|
else if (conv_restlen > 0)
|
|
{
|
|
! /* Reached end-of-file but some trailing bytes could
|
|
! * not be converted. Truncated file? */
|
|
! if (conv_error == 0)
|
|
! conv_error = linecnt;
|
|
! if (bad_char_behavior != BAD_DROP)
|
|
{
|
|
fio_flags = 0; /* don't convert this */
|
|
# ifdef USE_ICONV
|
|
if (iconv_fd != (iconv_t)-1)
|
|
--- 1288,1336 ----
|
|
#ifdef FEAT_MBYTE
|
|
else if (conv_restlen > 0)
|
|
{
|
|
! /*
|
|
! * Reached end-of-file but some trailing bytes could
|
|
! * not be converted. Truncated file?
|
|
! */
|
|
!
|
|
! /* When we did a conversion report an error. */
|
|
! if (fio_flags != 0
|
|
! # ifdef USE_ICONV
|
|
! || iconv_fd != (iconv_t)-1
|
|
! # endif
|
|
! )
|
|
{
|
|
+ if (conv_error == 0)
|
|
+ conv_error = curbuf->b_ml.ml_line_count
|
|
+ - linecnt + 1;
|
|
+ }
|
|
+ /* Remember the first linenr with an illegal byte */
|
|
+ else if (illegal_byte == 0)
|
|
+ illegal_byte = curbuf->b_ml.ml_line_count
|
|
+ - linecnt + 1;
|
|
+ if (bad_char_behavior == BAD_DROP)
|
|
+ {
|
|
+ *(ptr - conv_restlen) = NUL;
|
|
+ conv_restlen = 0;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ /* Replace the trailing bytes with the replacement
|
|
+ * character if we were converting; if we weren't,
|
|
+ * leave the UTF8 checking code to do it, as it
|
|
+ * works slightly differently. */
|
|
+ if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
|
|
+ # ifdef USE_ICONV
|
|
+ || iconv_fd != (iconv_t)-1
|
|
+ # endif
|
|
+ ))
|
|
+ {
|
|
+ while (conv_restlen > 0)
|
|
+ {
|
|
+ *(--ptr) = bad_char_behavior;
|
|
+ --conv_restlen;
|
|
+ }
|
|
+ }
|
|
fio_flags = 0; /* don't convert this */
|
|
# ifdef USE_ICONV
|
|
if (iconv_fd != (iconv_t)-1)
|
|
***************
|
|
*** 1302,1321 ****
|
|
iconv_fd = (iconv_t)-1;
|
|
}
|
|
# endif
|
|
- if (bad_char_behavior == BAD_KEEP)
|
|
- {
|
|
- /* Keep the trailing bytes as-is. */
|
|
- size = conv_restlen;
|
|
- ptr -= conv_restlen;
|
|
- }
|
|
- else
|
|
- {
|
|
- /* Replace the trailing bytes with the
|
|
- * replacement character. */
|
|
- size = 1;
|
|
- *--ptr = bad_char_behavior;
|
|
- }
|
|
- conv_restlen = 0;
|
|
}
|
|
}
|
|
#endif
|
|
--- 1339,1344 ----
|
|
***************
|
|
*** 1397,1402 ****
|
|
--- 1420,1430 ----
|
|
goto retry;
|
|
}
|
|
}
|
|
+
|
|
+ /* Include not converted bytes. */
|
|
+ ptr -= conv_restlen;
|
|
+ size += conv_restlen;
|
|
+ conv_restlen = 0;
|
|
#endif
|
|
/*
|
|
* Break here for a read error or end-of-file.
|
|
***************
|
|
*** 1406,1416 ****
|
|
|
|
#ifdef FEAT_MBYTE
|
|
|
|
- /* Include not converted bytes. */
|
|
- ptr -= conv_restlen;
|
|
- size += conv_restlen;
|
|
- conv_restlen = 0;
|
|
-
|
|
# ifdef USE_ICONV
|
|
if (iconv_fd != (iconv_t)-1)
|
|
{
|
|
--- 1434,1439 ----
|
|
***************
|
|
*** 1872,1883 ****
|
|
size = (long)((ptr + real_size) - dest);
|
|
ptr = dest;
|
|
}
|
|
! else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
|
|
{
|
|
! /* Reading UTF-8: Check if the bytes are valid UTF-8.
|
|
! * Need to start before "ptr" when part of the character was
|
|
! * read in the previous read() call. */
|
|
! for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
|
|
{
|
|
int todo = (int)((ptr + size) - p);
|
|
int l;
|
|
--- 1895,1906 ----
|
|
size = (long)((ptr + real_size) - dest);
|
|
ptr = dest;
|
|
}
|
|
! else if (enc_utf8 && !curbuf->b_p_bin)
|
|
{
|
|
! int incomplete_tail = FALSE;
|
|
!
|
|
! /* Reading UTF-8: Check if the bytes are valid UTF-8. */
|
|
! for (p = ptr; ; ++p)
|
|
{
|
|
int todo = (int)((ptr + size) - p);
|
|
int l;
|
|
***************
|
|
*** 1891,1933 ****
|
|
* read() will get the next bytes, we'll check it
|
|
* then. */
|
|
l = utf_ptr2len_len(p, todo);
|
|
! if (l > todo)
|
|
{
|
|
! /* Incomplete byte sequence, the next read()
|
|
! * should get them and check the bytes. */
|
|
! p += todo;
|
|
! break;
|
|
}
|
|
! if (l == 1)
|
|
{
|
|
/* Illegal byte. If we can try another encoding
|
|
! * do that. */
|
|
! if (can_retry)
|
|
break;
|
|
-
|
|
- /* Remember the first linenr with an illegal byte */
|
|
- if (illegal_byte == 0)
|
|
- illegal_byte = readfile_linenr(linecnt, ptr, p);
|
|
# ifdef USE_ICONV
|
|
/* When we did a conversion report an error. */
|
|
if (iconv_fd != (iconv_t)-1 && conv_error == 0)
|
|
conv_error = readfile_linenr(linecnt, ptr, p);
|
|
# endif
|
|
|
|
/* Drop, keep or replace the bad byte. */
|
|
if (bad_char_behavior == BAD_DROP)
|
|
{
|
|
! mch_memmove(p, p+1, todo - 1);
|
|
--p;
|
|
--size;
|
|
}
|
|
else if (bad_char_behavior != BAD_KEEP)
|
|
*p = bad_char_behavior;
|
|
}
|
|
! p += l - 1;
|
|
}
|
|
}
|
|
! if (p < ptr + size)
|
|
{
|
|
/* Detected a UTF-8 error. */
|
|
rewind_retry:
|
|
--- 1914,1969 ----
|
|
* read() will get the next bytes, we'll check it
|
|
* then. */
|
|
l = utf_ptr2len_len(p, todo);
|
|
! if (l > todo && !incomplete_tail)
|
|
{
|
|
! /* Avoid retrying with a different encoding when
|
|
! * a truncated file is more likely, or attempting
|
|
! * to read the rest of an incomplete sequence when
|
|
! * we have already done so. */
|
|
! if (p > ptr || filesize > 0)
|
|
! incomplete_tail = TRUE;
|
|
! /* Incomplete byte sequence, move it to conv_rest[]
|
|
! * and try to read the rest of it, unless we've
|
|
! * already done so. */
|
|
! if (p > ptr)
|
|
! {
|
|
! conv_restlen = todo;
|
|
! mch_memmove(conv_rest, p, conv_restlen);
|
|
! size -= conv_restlen;
|
|
! break;
|
|
! }
|
|
}
|
|
! if (l == 1 || l > todo)
|
|
{
|
|
/* Illegal byte. If we can try another encoding
|
|
! * do that, unless at EOF where a truncated
|
|
! * file is more likely than a conversion error. */
|
|
! if (can_retry && !incomplete_tail)
|
|
break;
|
|
# ifdef USE_ICONV
|
|
/* When we did a conversion report an error. */
|
|
if (iconv_fd != (iconv_t)-1 && conv_error == 0)
|
|
conv_error = readfile_linenr(linecnt, ptr, p);
|
|
# endif
|
|
+ /* Remember the first linenr with an illegal byte */
|
|
+ if (conv_error == 0 && illegal_byte == 0)
|
|
+ illegal_byte = readfile_linenr(linecnt, ptr, p);
|
|
|
|
/* Drop, keep or replace the bad byte. */
|
|
if (bad_char_behavior == BAD_DROP)
|
|
{
|
|
! mch_memmove(p, p + 1, todo - 1);
|
|
--p;
|
|
--size;
|
|
}
|
|
else if (bad_char_behavior != BAD_KEEP)
|
|
*p = bad_char_behavior;
|
|
}
|
|
! else
|
|
! p += l - 1;
|
|
}
|
|
}
|
|
! if (p < ptr + size && !incomplete_tail)
|
|
{
|
|
/* Detected a UTF-8 error. */
|
|
rewind_retry:
|
|
*** ../vim-7.1.309/src/version.c Wed Jun 4 15:27:43 2008
|
|
--- src/version.c Wed Jun 4 19:35:16 2008
|
|
***************
|
|
*** 668,669 ****
|
|
--- 673,676 ----
|
|
{ /* Add new patch number below this line */
|
|
+ /**/
|
|
+ 310,
|
|
/**/
|
|
|
|
--
|
|
Normal people believe that if it ain't broke, don't fix it. Engineers believe
|
|
that if it ain't broke, it doesn't have enough features yet.
|
|
(Scott Adams - The Dilbert principle)
|
|
|
|
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
|
|
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
|
|
\\\ download, build and distribute -- http://www.A-A-P.org ///
|
|
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///
|