diffutils/diffutils-2.8.4-i18n.patch
2007-08-14 09:51:22 +00:00

807 lines
18 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

--- diffutils-2.8.1/src/diff.c.i18n 2002-03-24 07:35:28.000000000 +0000
+++ diffutils-2.8.1/src/diff.c 2007-08-14 10:39:40.000000000 +0100
@@ -273,6 +273,13 @@
re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
excluded = new_exclude ();
+#ifdef HANDLE_MULTIBYTE
+ if (MB_CUR_MAX > 1)
+ lines_differ = lines_differ_multibyte;
+ else
+#endif
+ lines_differ = lines_differ_singlebyte;
+
/* Decode the options. */
while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
--- diffutils-2.8.1/src/diff.h.i18n 2002-03-11 21:24:42.000000000 +0000
+++ diffutils-2.8.1/src/diff.h 2007-08-14 10:41:21.000000000 +0100
@@ -25,6 +25,19 @@
#define TAB_WIDTH 8
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
+# include <wchar.h>
+# include <wctype.h>
+# if defined (HAVE_MBRTOWC)
+# define HANDLE_MULTIBYTE 1
+# endif
+#endif
+
+#define TAB_WIDTH 8
+
/* What kind of changes a hunk contains. */
enum changes
{
@@ -349,7 +362,13 @@
extern char const pr_program[];
char *concat (char const *, char const *, char const *);
char *dir_file_pathname (char const *, char const *);
-bool lines_differ (char const *, char const *);
+
+bool (*lines_differ) (char const *, size_t, char const *, size_t);
+bool lines_differ_singlebyte (char const *, size_t, char const *, size_t);
+#ifdef HANDLE_MULTIBYTE
+bool lines_differ_multibyte (char const *, size_t, char const *, size_t);
+#endif
+
lin translate_line_number (struct file_data const *, lin);
struct change *find_change (struct change *);
struct change *find_reverse_change (struct change *);
--- diffutils-2.8.1/src/io.c.i18n 2002-03-25 04:58:36.000000000 +0000
+++ diffutils-2.8.1/src/io.c 2007-08-14 10:40:05.000000000 +0100
@@ -25,6 +25,7 @@
#include <regex.h>
#include <setmode.h>
#include <xalloc.h>
+#include <assert.h>
/* Rotate an unsigned value to the left. */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
@@ -212,6 +213,28 @@
/* Split the file into lines, simultaneously computing the equivalence
class for each line. */
+#ifdef HANDLE_MULTIBYTE
+# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \
+do \
+{ \
+ mbstate_t state_bak = STATE; \
+ \
+ CONVFAIL = 0; \
+ MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \
+ \
+ switch (MBLENGTH) \
+ { \
+ case (size_t)-2: \
+ case (size_t)-1: \
+ STATE = state_bak; \
+ ++CONVFAIL; \
+ /* Fall through. */ \
+ case 0: \
+ MBLENGTH = 1; \
+ } \
+} \
+while (0)
+#endif
static void
find_and_hash_each_line (struct file_data *current)
@@ -238,12 +261,280 @@
bool same_length_diff_contents_compare_anyway =
diff_length_compare_anyway | ignore_case;
+#ifdef HANDLE_MULTIBYTE
+ wchar_t wc;
+ size_t mblength;
+ mbstate_t state;
+ int convfail;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+#endif
+
while ((char const *) p < suffix_begin)
{
char const *ip = (char const *) p;
h = 0;
+#ifdef HANDLE_MULTIBYTE
+ if (MB_CUR_MAX > 1)
+ {
+ wchar_t lo_wc;
+ char mbc[MB_LEN_MAX];
+ mbstate_t state_wc;
+
+ /* Hash this line until we find a newline. */
+ switch (ignore_white_space)
+ {
+ case IGNORE_ALL_SPACE:
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ break;
+ }
+
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+
+ if (convfail)
+ mbc[0] = *p++;
+ else if (!iswspace (wc))
+ {
+ bool flag = 0;
+
+ if (ignore_case)
+ {
+ lo_wc = towlower (wc);
+ if (lo_wc != wc)
+ {
+ flag = 1;
+
+ p += mblength;
+ memset (&state_wc, '\0', sizeof(mbstate_t));
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
+
+ assert (mblength != (size_t)-1 &&
+ mblength != (size_t)-2);
+
+ mblength = (mblength < 1) ? 1 : mblength;
+ }
+ }
+
+ if (!flag)
+ {
+ for (i = 0; i < mblength; i++)
+ mbc[i] = *p++;
+ }
+ }
+ else
+ {
+ p += mblength;
+ continue;
+ }
+
+ for (i = 0; i < mblength; i++)
+ h = HASH (h, mbc[i]);
+ }
+ break;
+
+ case IGNORE_SPACE_CHANGE:
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ break;
+ }
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+
+ if (!convfail && iswspace (wc))
+ {
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ goto hashing_done;
+ }
+
+ p += mblength;
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+ if (convfail || (!convfail && !iswspace (wc)))
+ break;
+ }
+ h = HASH (h, ' ');
+ }
+
+ /* WC is now the first non-space. */
+ if (convfail)
+ mbc[0] = *p++;
+ else
+ {
+ bool flag = 0;
+
+ if (ignore_case)
+ {
+ lo_wc = towlower (wc);
+ if (lo_wc != wc)
+ {
+ flag = 1;
+
+ p += mblength;
+ memset (&state_wc, '\0', sizeof(mbstate_t));
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
+
+ assert (mblength != (size_t)-1 &&
+ mblength != (size_t)-2);
+
+ mblength = (mblength < 1) ? 1 : mblength;
+ }
+ }
+
+ if (!flag)
+ {
+ for (i = 0; i < mblength; i++)
+ mbc[i] = *p++;
+ }
+ }
+
+ for (i = 0; i < mblength; i++)
+ h = HASH (h, mbc[i]);
+ }
+ break;
+
+ case IGNORE_TAB_EXPANSION:
+ {
+ size_t column = 0;
+
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ break;
+ }
+
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+
+ if (convfail)
+ {
+ h = HASH (h, *p++);
+ ++column;
+ }
+ else
+ {
+ bool flag;
+
+ switch (wc)
+ {
+ case L'\b':
+ column -= 0 < column;
+ h = HASH (h, '\b');
+ ++p;
+ break;
+
+ case L'\t':
+ {
+ int repetitions;
+
+ repetitions = TAB_WIDTH - column % TAB_WIDTH;
+ column += repetitions;
+ do
+ h = HASH (h, ' ');
+ while (--repetitions != 0);
+ ++p;
+ }
+ break;
+
+ case L'\r':
+ column = 0;
+ h = HASH (h, '\r');
+ ++p;
+ break;
+
+ default:
+ flag = 0;
+ column += wcwidth (wc);
+ if (ignore_case)
+ {
+ lo_wc = towlower (wc);
+ if (lo_wc != wc)
+ {
+ flag = 1;
+ p += mblength;
+ memset (&state_wc, '\0', sizeof(mbstate_t));
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
+
+ assert (mblength != (size_t)-1 &&
+ mblength != (size_t)-2);
+
+ mblength = (mblength < 1) ? 1 : mblength;
+ }
+ }
+
+ if (!flag)
+ {
+ for (i = 0; i < mblength; i++)
+ mbc[i] = *p++;
+ }
+
+ for (i = 0; i < mblength; i++)
+ h = HASH (h, mbc[i]);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ while (1)
+ {
+ if (*p == '\n')
+ {
+ ++p;
+ break;
+ }
+
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
+
+ if (convfail)
+ mbc[0] = *p++;
+ else
+ {
+ int flag = 0;
+
+ if (ignore_case)
+ {
+ lo_wc = towlower (wc);
+ if (lo_wc != wc)
+ {
+ flag = 1;
+ p += mblength;
+ memset (&state_wc, '\0', sizeof(mbstate_t));
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
+
+ assert (mblength != (size_t)-1 &&
+ mblength != (size_t)-2);
+
+ mblength = (mblength < 1) ? 1 : mblength;
+ }
+ }
+
+ if (!flag)
+ {
+ for (i = 0; i < mblength; i++)
+ mbc[i] = *p++;
+ }
+ }
+
+ for (i = 0; i < mblength; i++)
+ h = HASH (h, mbc[i]);
+ }
+ }
+ }
+ else
+#endif
/* Hash this line until we find a newline. */
if (ignore_case)
switch (ignore_white_space)
@@ -438,7 +729,7 @@
else if (!diff_length_compare_anyway)
continue;
- if (! lines_differ (eqline, ip))
+ if (! lines_differ (eqline, eqs[i].length, ip, length))
break;
}
--- diffutils-2.8.1/src/side.c.i18n 2002-02-07 18:17:04.000000000 +0000
+++ diffutils-2.8.1/src/side.c 2007-08-14 10:39:40.000000000 +0100
@@ -74,11 +74,72 @@
register unsigned int out_position = 0;
register char const *text_pointer = line[0];
register char const *text_limit = line[1];
+#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
+ unsigned char mbc[MB_LEN_MAX];
+ wchar_t wc;
+ mbstate_t state, state_bak;
+ size_t mbc_pos, mblength;
+ int mbc_loading_flag = 0;
+ int wc_width;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+#endif
while (text_pointer < text_limit)
{
register unsigned char c = *text_pointer++;
+#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
+ if (MB_CUR_MAX > 1 && mbc_loading_flag)
+ {
+ mbc_loading_flag = 0;
+ state_bak = state;
+ mbc[mbc_pos++] = c;
+
+process_mbc:
+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
+
+ switch (mblength)
+ {
+ case (size_t)-2: /* Incomplete multibyte character. */
+ mbc_loading_flag = 1;
+ state = state_bak;
+ break;
+
+ case (size_t)-1: /* Invalid as a multibyte character. */
+ if (in_position++ < out_bound)
+ {
+ out_position = in_position;
+ putc (mbc[0], out);
+ }
+ memmove (mbc, mbc + 1, --mbc_pos);
+ if (mbc_pos > 0)
+ {
+ mbc[mbc_pos] = '\0';
+ goto process_mbc;
+ }
+ break;
+
+ default:
+ wc_width = wcwidth (wc);
+ if (wc_width < 1) /* Unprintable multibyte character. */
+ {
+ if (in_position <= out_bound)
+ fprintf (out, "%lc", (wint_t)wc);
+ }
+ else /* Printable multibyte character. */
+ {
+ in_position += wc_width;
+ if (in_position <= out_bound)
+ {
+ out_position = in_position;
+ fprintf (out, "%lc", (wint_t)wc);
+ }
+ }
+ }
+ continue;
+ }
+#endif
switch (c)
{
case '\t':
@@ -136,8 +197,39 @@
break;
default:
- if (! ISPRINT (c))
- goto control_char;
+#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
+ if (MB_CUR_MAX > 1)
+ {
+ memset (mbc, '\0', MB_LEN_MAX);
+ mbc_pos = 0;
+ mbc[mbc_pos++] = c;
+ state_bak = state;
+
+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
+
+ /* The value of mblength is always less than 2 here. */
+ switch (mblength)
+ {
+ case (size_t)-2: /* Incomplete multibyte character. */
+ state = state_bak;
+ mbc_loading_flag = 1;
+ continue;
+
+ case (size_t)-1: /* Invalid as a multibyte character. */
+ state = state_bak;
+ break;
+
+ default:
+ if (! iswprint (wc))
+ goto control_char;
+ }
+ }
+ else
+#endif
+ {
+ if (! ISPRINT (c))
+ goto control_char;
+ }
/* falls through */
case ' ':
if (in_position++ < out_bound)
--- diffutils-2.8.1/src/util.c.i18n 2002-02-28 04:23:10.000000000 +0000
+++ diffutils-2.8.1/src/util.c 2007-08-14 10:42:19.000000000 +0100
@@ -312,7 +312,8 @@
Return nonzero if the lines differ. */
bool
-lines_differ (char const *s1, char const *s2)
+lines_differ_singlebyte (char const *s1, size_t s1len,
+ char const *s2, size_t s2len)
{
register unsigned char const *t1 = (unsigned char const *) s1;
register unsigned char const *t2 = (unsigned char const *) s2;
@@ -441,6 +442,294 @@
return 1;
}
+
+#ifdef HANDLE_MULTIBYTE
+# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \
+do \
+{ \
+ mbstate_t bak = STATE; \
+ \
+ CONVFAIL = 0; \
+ MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \
+ \
+ switch (MBLENGTH) \
+ { \
+ case (size_t)-2: \
+ case (size_t)-1: \
+ STATE = bak; \
+ ++CONVFAIL; \
+ /* Fall through. */ \
+ case 0: \
+ MBLENGTH = 1; \
+ } \
+} \
+while (0)
+
+bool
+lines_differ_multibyte (char const *s1, size_t s1len,
+ char const *s2, size_t s2len)
+{
+ unsigned char const *end1, *end2;
+ unsigned char c1, c2;
+ wchar_t wc1, wc2, wc1_bak, wc2_bak;
+ size_t mblen1, mblen2;
+ mbstate_t state1, state2, state1_bak, state2_bak;
+ int convfail1, convfail2, convfail1_bak, convfail2_bak;
+
+ unsigned char const *t1 = (unsigned char const *) s1;
+ unsigned char const *t2 = (unsigned char const *) s2;
+ unsigned char const *t1_bak, *t2_bak;
+ size_t column = 0;
+
+ if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case)
+ {
+ while (*t1 != '\n')
+ if (*t1++ != * t2++)
+ return 1;
+ return 0;
+ }
+
+ memset (&state1, '\0', sizeof (mbstate_t));
+ memset (&state2, '\0', sizeof (mbstate_t));
+
+ end1 = s1 + s1len;
+ end2 = s2 + s2len;
+
+ while (1)
+ {
+ c1 = *t1;
+ c2 = *t2;
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
+
+ /* Test for exact char equality first, since it's a common case. */
+ if (convfail1 ^ convfail2)
+ break;
+ else if (convfail1 && convfail2 && c1 != c2)
+ break;
+ else if (!convfail1 && !convfail2 && wc1 != wc2)
+ {
+ switch (ignore_white_space)
+ {
+ case IGNORE_ALL_SPACE:
+ /* For -w, just skip past any white space. */
+ while (1)
+ {
+ if (convfail1)
+ break;
+ else if (wc1 == L'\n' || !iswspace (wc1))
+ break;
+
+ t1 += mblen1;
+ c1 = *t1;
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
+ }
+
+ while (1)
+ {
+ if (convfail2)
+ break;
+ else if (wc2 == L'\n' || !iswspace (wc2))
+ break;
+
+ t2 += mblen2;
+ c2 = *t2;
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
+ }
+ t1 += mblen1;
+ t2 += mblen2;
+ break;
+
+ case IGNORE_SPACE_CHANGE:
+ /* For -b, advance past any sequence of white space in
+ line 1 and consider it just one space, or nothing at
+ all if it is at the end of the line. */
+ if (wc1 != L'\n' && iswspace (wc1))
+ {
+ size_t mblen_bak;
+ mbstate_t state_bak;
+
+ do
+ {
+ t1 += mblen1;
+ mblen_bak = mblen1;
+ state_bak = state1;
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
+ }
+ while (!convfail1 && (wc1 != L'\n' && iswspace (wc1)));
+
+ state1 = state_bak;
+ mblen1 = mblen_bak;
+ t1 -= mblen1;
+ convfail1 = 0;
+ wc1 = L' ';
+ }
+
+ /* Likewise for line 2. */
+ if (wc2 != L'\n' && iswspace (wc2))
+ {
+ size_t mblen_bak;
+ mbstate_t state_bak;
+
+ do
+ {
+ t2 += mblen2;
+ mblen_bak = mblen2;
+ state_bak = state2;
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
+ }
+ while (!convfail2 && (wc2 != L'\n' && iswspace (wc2)));
+
+ state2 = state_bak;
+ mblen2 = mblen_bak;
+ t2 -= mblen2;
+ convfail2 = 0;
+ wc2 = L' ';
+ }
+
+ if (wc1 != wc2)
+ {
+ if (wc2 == L' ' && wc1 != L'\n' &&
+ t1 > (unsigned char const *)s1 &&
+ !convfail1_bak && iswspace (wc1_bak))
+ {
+ t1 = t1_bak;
+ wc1 = wc1_bak;
+ state1 = state1_bak;
+ convfail1 = convfail1_bak;
+ continue;
+ }
+ if (wc1 == L' ' && wc2 != L'\n'
+ && t2 > (unsigned char const *)s2
+ && !convfail2_bak && iswspace (wc2_bak))
+ {
+ t2 = t2_bak;
+ wc2 = wc2_bak;
+ state2 = state2_bak;
+ convfail2 = convfail2_bak;
+ continue;
+ }
+ }
+
+ t1_bak = t1; t2_bak = t2;
+ wc1_bak = wc1; wc2_bak = wc2;
+ state1_bak = state1; state2_bak = state2;
+ convfail1_bak = convfail1; convfail2_bak = convfail2;
+
+ if (wc1 == L'\n')
+ wc1 = L' ';
+ else
+ t1 += mblen1;
+
+ if (wc2 == L'\n')
+ wc2 = L' ';
+ else
+ t2 += mblen2;
+
+ break;
+
+ case IGNORE_TAB_EXPANSION:
+ if ((wc1 == L' ' && wc2 == L'\t')
+ || (wc1 == L'\t' && wc2 == L' '))
+ {
+ size_t column2 = column;
+
+ while (1)
+ {
+ if (convfail1)
+ {
+ ++t1;
+ break;
+ }
+ else if (wc1 == L' ')
+ column++;
+ else if (wc1 == L'\t')
+ column += TAB_WIDTH - column % TAB_WIDTH;
+ else
+ {
+ t1 += mblen1;
+ break;
+ }
+
+ t1 += mblen1;
+ c1 = *t1;
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
+ }
+
+ while (1)
+ {
+ if (convfail2)
+ {
+ ++t2;
+ break;
+ }
+ else if (wc2 == L' ')
+ column2++;
+ else if (wc2 == L'\t')
+ column2 += TAB_WIDTH - column2 % TAB_WIDTH;
+ else
+ {
+ t2 += mblen2;
+ break;
+ }
+
+ t2 += mblen2;
+ c2 = *t2;
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
+ }
+
+ if (column != column2)
+ return 1;
+ }
+ else
+ {
+ t1 += mblen1;
+ t2 += mblen2;
+ }
+ break;
+
+ case IGNORE_NO_WHITE_SPACE:
+ t1 += mblen1;
+ t2 += mblen2;
+ break;
+ }
+
+ /* Lowercase all letters if -i is specified. */
+ if (ignore_case)
+ {
+ if (!convfail1)
+ wc1 = towlower (wc1);
+ if (!convfail2)
+ wc2 = towlower (wc2);
+ }
+
+ if (convfail1 ^ convfail2)
+ break;
+ else if (convfail1 && convfail2 && c1 != c2)
+ break;
+ else if (!convfail1 && !convfail2 && wc1 != wc2)
+ break;
+ }
+ else
+ {
+ t1_bak = t1; t2_bak = t2;
+ wc1_bak = wc1; wc2_bak = wc2;
+ state1_bak = state1; state2_bak = state2;
+ convfail1_bak = convfail1; convfail2_bak = convfail2;
+
+ t1 += mblen1; t2 += mblen2;
+ }
+
+ if (!convfail1 && wc1 == L'\n')
+ return 0;
+
+ column += convfail1 ? 1 :
+ (wc1 == L'\t') ? TAB_WIDTH - column % TAB_WIDTH : wcwidth (wc1);
+ }
+
+ return 1;
+}
+#endif
/* Find the consecutive changes at the start of the script START.
Return the last link before the first gap. */