- Multibyte speed improvement (bug #252117).

This commit is contained in:
Tim Waugh 2007-08-14 09:51:22 +00:00
parent c9a8fb4efe
commit 8f1372fa1e
2 changed files with 45 additions and 31 deletions

View File

@ -1,6 +1,6 @@
--- diffutils-2.8.4/src/diff.c.i18n 2002-06-17 01:55:42.000000000 -0400 --- diffutils-2.8.1/src/diff.c.i18n 2002-03-24 07:35:28.000000000 +0000
+++ diffutils-2.8.4/src/diff.c 2002-11-16 18:41:37.000000000 -0500 +++ diffutils-2.8.1/src/diff.c 2007-08-14 10:39:40.000000000 +0100
@@ -275,6 +275,13 @@ @@ -273,6 +273,13 @@
re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
excluded = new_exclude (); excluded = new_exclude ();
@ -14,11 +14,11 @@
/* Decode the options. */ /* Decode the options. */
while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1) while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
--- diffutils-2.8.4/src/diff.h.i18n 2002-11-16 18:31:32.000000000 -0500 --- diffutils-2.8.1/src/diff.h.i18n 2002-03-11 21:24:42.000000000 +0000
+++ diffutils-2.8.4/src/diff.h 2002-11-16 18:48:58.000000000 -0500 +++ diffutils-2.8.1/src/diff.h 2007-08-14 10:41:21.000000000 +0100
@@ -23,6 +23,19 @@ @@ -25,6 +25,19 @@
#include "system.h"
#include <stdio.h> #define TAB_WIDTH 8
+/* For platform which support the ISO C amendement 1 functionality we +/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */ + support user defined character classes. */
@ -36,24 +36,24 @@
/* What kind of changes a hunk contains. */ /* What kind of changes a hunk contains. */
enum changes enum changes
{ {
@@ -350,7 +363,13 @@ @@ -349,7 +362,13 @@
extern char const pr_program[]; extern char const pr_program[];
char *concat (char const *, char const *, char const *); char *concat (char const *, char const *, char const *);
char *dir_file_pathname (char const *, char const *); char *dir_file_pathname (char const *, char const *);
-bool lines_differ (char const *, char const *); -bool lines_differ (char const *, char const *);
+ +
+bool (*lines_differ) (char const *, char const *); +bool (*lines_differ) (char const *, size_t, char const *, size_t);
+bool lines_differ_singlebyte (char const *, char const *); +bool lines_differ_singlebyte (char const *, size_t, char const *, size_t);
+#ifdef HANDLE_MULTIBYTE +#ifdef HANDLE_MULTIBYTE
+bool lines_differ_multibyte (char const *, char const *); +bool lines_differ_multibyte (char const *, size_t, char const *, size_t);
+#endif +#endif
+ +
lin translate_line_number (struct file_data const *, lin); lin translate_line_number (struct file_data const *, lin);
struct change *find_change (struct change *); struct change *find_change (struct change *);
struct change *find_reverse_change (struct change *); struct change *find_reverse_change (struct change *);
--- diffutils-2.8.4/src/io.c.i18n 2002-06-11 02:06:32.000000000 -0400 --- diffutils-2.8.1/src/io.c.i18n 2002-03-25 04:58:36.000000000 +0000
+++ diffutils-2.8.4/src/io.c 2002-11-16 18:57:30.000000000 -0500 +++ diffutils-2.8.1/src/io.c 2007-08-14 10:40:05.000000000 +0100
@@ -26,6 +26,7 @@ @@ -25,6 +25,7 @@
#include <regex.h> #include <regex.h>
#include <setmode.h> #include <setmode.h>
#include <xalloc.h> #include <xalloc.h>
@ -61,7 +61,7 @@
/* Rotate an unsigned value to the left. */ /* Rotate an unsigned value to the left. */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n))) #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
@@ -213,6 +214,28 @@ @@ -212,6 +213,28 @@
/* Split the file into lines, simultaneously computing the equivalence /* Split the file into lines, simultaneously computing the equivalence
class for each line. */ class for each line. */
@ -90,7 +90,7 @@
static void static void
find_and_hash_each_line (struct file_data *current) find_and_hash_each_line (struct file_data *current)
@@ -239,12 +262,280 @@ @@ -238,12 +261,280 @@
bool same_length_diff_contents_compare_anyway = bool same_length_diff_contents_compare_anyway =
diff_length_compare_anyway | ignore_case; diff_length_compare_anyway | ignore_case;
@ -371,10 +371,19 @@
/* Hash this line until we find a newline. */ /* Hash this line until we find a newline. */
if (ignore_case) if (ignore_case)
switch (ignore_white_space) switch (ignore_white_space)
--- diffutils-2.8.4/src/side.c.i18n 2002-06-11 02:06:32.000000000 -0400 @@ -438,7 +729,7 @@
+++ diffutils-2.8.4/src/side.c 2002-11-16 18:41:37.000000000 -0500 else if (!diff_length_compare_anyway)
@@ -73,11 +73,72 @@ continue;
register size_t out_position = 0;
- if (! lines_differ (eqline, ip))
+ if (! lines_differ (eqline, eqs[i].length, ip, length))
break;
}
--- diffutils-2.8.1/src/side.c.i18n 2002-02-07 18:17:04.000000000 +0000
+++ diffutils-2.8.1/src/side.c 2007-08-14 10:39:40.000000000 +0100
@@ -74,11 +74,72 @@
register unsigned int out_position = 0;
register char const *text_pointer = line[0]; register char const *text_pointer = line[0];
register char const *text_limit = line[1]; register char const *text_limit = line[1];
+#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
@ -446,7 +455,7 @@
switch (c) switch (c)
{ {
case '\t': case '\t':
@@ -135,8 +196,39 @@ @@ -136,8 +197,39 @@
break; break;
default: default:
@ -488,18 +497,19 @@
/* falls through */ /* falls through */
case ' ': case ' ':
if (in_position++ < out_bound) if (in_position++ < out_bound)
--- diffutils-2.8.4/src/util.c.i18n 2002-06-11 02:06:32.000000000 -0400 --- diffutils-2.8.1/src/util.c.i18n 2002-02-28 04:23:10.000000000 +0000
+++ diffutils-2.8.4/src/util.c 2002-11-16 18:41:37.000000000 -0500 +++ diffutils-2.8.1/src/util.c 2007-08-14 10:42:19.000000000 +0100
@@ -321,7 +321,7 @@ @@ -312,7 +312,8 @@
Return nonzero if the lines differ. */ Return nonzero if the lines differ. */
bool bool
-lines_differ (char const *s1, char const *s2) -lines_differ (char const *s1, char const *s2)
+lines_differ_singlebyte (char const *s1, char const *s2) +lines_differ_singlebyte (char const *s1, size_t s1len,
+ char const *s2, size_t s2len)
{ {
register unsigned char const *t1 = (unsigned char const *) s1; register unsigned char const *t1 = (unsigned char const *) s1;
register unsigned char const *t2 = (unsigned char const *) s2; register unsigned char const *t2 = (unsigned char const *) s2;
@@ -450,6 +450,293 @@ @@ -441,6 +442,294 @@
return 1; return 1;
} }
@ -527,7 +537,8 @@
+while (0) +while (0)
+ +
+bool +bool
+lines_differ_multibyte (char const *s1, char const *s2) +lines_differ_multibyte (char const *s1, size_t s1len,
+ char const *s2, size_t s2len)
+{ +{
+ unsigned char const *end1, *end2; + unsigned char const *end1, *end2;
+ unsigned char c1, c2; + unsigned char c1, c2;
@ -552,8 +563,8 @@
+ memset (&state1, '\0', sizeof (mbstate_t)); + memset (&state1, '\0', sizeof (mbstate_t));
+ memset (&state2, '\0', sizeof (mbstate_t)); + memset (&state2, '\0', sizeof (mbstate_t));
+ +
+ end1 = s1 + strlen (s1); + end1 = s1 + s1len;
+ end2 = s2 + strlen (s2); + end2 = s2 + s2len;
+ +
+ while (1) + while (1)
+ { + {

View File

@ -1,7 +1,7 @@
Summary: A GNU collection of diff utilities. Summary: A GNU collection of diff utilities.
Name: diffutils Name: diffutils
Version: 2.8.1 Version: 2.8.1
Release: 16%{?dist} Release: 17%{?dist}
Group: Applications/Text Group: Applications/Text
URL: http://www.gnu.org/software/diffutils/diffutils.html URL: http://www.gnu.org/software/diffutils/diffutils.html
Source: ftp://ftp.gnu.org/gnu/diffutils/diffutils-%{version}.tar.gz Source: ftp://ftp.gnu.org/gnu/diffutils/diffutils-%{version}.tar.gz
@ -77,6 +77,9 @@ rm -rf $RPM_BUILD_ROOT
%{_infodir}/diff.info*gz %{_infodir}/diff.info*gz
%changelog %changelog
* Tue Aug 14 2007 Tim Waugh <twaugh@redhat.com> 2.8.1-17
- Multibyte speed improvement (bug #252117).
* Mon Jan 22 2007 Tim Waugh <twaugh@redhat.com> 2.8.1-16 * Mon Jan 22 2007 Tim Waugh <twaugh@redhat.com> 2.8.1-16
- Make scriptlet unconditionally succeed (bug #223683). - Make scriptlet unconditionally succeed (bug #223683).