Use the new i18n implementation for expand/unexpand

This commit is contained in:
Ondrej Oprala 2015-12-01 09:40:25 +01:00
parent 8e9b3fd0aa
commit 32b1e5a154
3 changed files with 1459 additions and 454 deletions

File diff suppressed because it is too large Load Diff

View File

@ -596,201 +596,6 @@ diff -urNp coreutils-8.24-orig/src/cut.c coreutils-8.24/src/cut.c
}
if (optind == argc)
diff -urNp coreutils-8.24-orig/src/expand.c coreutils-8.24/src/expand.c
--- coreutils-8.24-orig/src/expand.c 2015-06-26 19:05:22.000000000 +0200
+++ coreutils-8.24/src/expand.c 2015-07-05 09:04:33.028546950 +0200
@@ -37,12 +37,34 @@
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
+
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
+#if HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+
+/* Get iswblank(). */
+#if HAVE_WCTYPE_H
+# include <wctype.h>
+#endif
+
#include "system.h"
#include "error.h"
#include "fadvise.h"
#include "quote.h"
#include "xstrndup.h"
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
+ installation; work around this configuration error. */
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
+# define MB_LEN_MAX 16
+#endif
+
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
+#if HAVE_MBRTOWC && defined mbstate_t
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
+#endif
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "expand"
@@ -357,6 +379,142 @@ expand (void)
}
}
+#if HAVE_MBRTOWC
+static void
+expand_multibyte (void)
+{
+ FILE *fp; /* Input strem. */
+ mbstate_t i_state; /* Current shift state of the input stream. */
+ mbstate_t i_state_bak; /* Back up the I_STATE. */
+ mbstate_t o_state; /* Current shift state of the output stream. */
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
+ char *bufpos = buf; /* Next read position of BUF. */
+ size_t buflen = 0; /* The length of the byte sequence in buf. */
+ wchar_t wc; /* A gotten wide character. */
+ size_t mblength; /* The byte size of a multibyte character
+ which shows as same character as WC. */
+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */
+ int column = 0; /* Column on screen of the next char. */
+ int next_tab_column; /* Column the next tab stop is on. */
+ int convert = 1; /* If nonzero, perform translations. */
+
+ fp = next_file ((FILE *) NULL);
+ if (fp == NULL)
+ return;
+
+ memset (&o_state, '\0', sizeof(mbstate_t));
+ memset (&i_state, '\0', sizeof(mbstate_t));
+
+ for (;;)
+ {
+ /* Refill the buffer BUF. */
+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
+ {
+ memmove (buf, bufpos, buflen);
+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
+ bufpos = buf;
+ }
+
+ /* No character is left in BUF. */
+ if (buflen < 1)
+ {
+ fp = next_file (fp);
+
+ if (fp == NULL)
+ break; /* No more files. */
+ else
+ {
+ memset (&i_state, '\0', sizeof(mbstate_t));
+ continue;
+ }
+ }
+
+ /* Get a wide character. */
+ i_state_bak = i_state;
+ mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
+
+ switch (mblength)
+ {
+ case (size_t)-1: /* illegal byte sequence. */
+ case (size_t)-2:
+ mblength = 1;
+ i_state = i_state_bak;
+ if (convert)
+ {
+ ++column;
+ if (convert_entire_line == 0 && !isblank(*bufpos))
+ convert = 0;
+ }
+ putchar (*bufpos);
+ break;
+
+ case 0: /* null. */
+ mblength = 1;
+ if (convert && convert_entire_line == 0)
+ convert = 0;
+ putchar ('\0');
+ break;
+
+ default:
+ if (wc == L'\n') /* LF. */
+ {
+ tab_index = 0;
+ column = 0;
+ convert = 1;
+ putchar ('\n');
+ }
+ else if (wc == L'\t' && convert) /* Tab. */
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ tab_index++;
+ next_tab_column = tab_list[tab_index];
+ if (tab_index < first_free_tab - 1)
+ tab_index++;
+ if (column >= next_tab_column)
+ next_tab_column = column + 1;
+ }
+ else
+ next_tab_column = column + tab_size - column % tab_size;
+
+ while (column < next_tab_column)
+ {
+ putchar (' ');
+ ++column;
+ }
+ }
+ else /* Others. */
+ {
+ if (convert)
+ {
+ if (wc == L'\b')
+ {
+ if (column > 0)
+ --column;
+ }
+ else
+ {
+ int width; /* The width of WC. */
+
+ width = wcwidth (wc);
+ column += (width > 0) ? width : 0;
+ if (convert_entire_line == 0 && !iswblank(wc))
+ convert = 0;
+ }
+ }
+ fwrite (bufpos, sizeof(char), mblength, stdout);
+ }
+ }
+ buflen -= mblength;
+ bufpos += mblength;
+ }
+}
+#endif
+
int
main (int argc, char **argv)
{
@@ -421,7 +579,12 @@ main (int argc, char **argv)
file_list = (optind < argc ? &argv[optind] : stdin_argv);
- expand ();
+#if HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1)
+ expand_multibyte ();
+ else
+#endif
+ expand ();
if (have_read_stdin && fclose (stdin) != 0)
error (EXIT_FAILURE, errno, "-");
diff -urNp coreutils-8.24-orig/src/fold.c coreutils-8.24/src/fold.c
--- coreutils-8.24-orig/src/fold.c 2015-06-26 19:05:22.000000000 +0200
+++ coreutils-8.24/src/fold.c 2015-07-05 09:04:33.029546958 +0200
@ -3480,264 +3285,6 @@ diff -urNp coreutils-8.24-orig/src/sort.c coreutils-8.24/src/sort.c
if (have_read_stdin && fclose (stdin) == EOF)
die (_("close failed"), "-");
diff -urNp coreutils-8.24-orig/src/unexpand.c coreutils-8.24/src/unexpand.c
--- coreutils-8.24-orig/src/unexpand.c 2015-06-26 19:05:22.000000000 +0200
+++ coreutils-8.24/src/unexpand.c 2015-07-05 09:04:33.032546980 +0200
@@ -38,12 +38,29 @@
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
+
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
+#if HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+
#include "system.h"
#include "error.h"
#include "fadvise.h"
#include "quote.h"
#include "xstrndup.h"
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
+ installation; work around this configuration error. */
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
+# define MB_LEN_MAX 16
+#endif
+
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
+#if HAVE_MBRTOWC && defined mbstate_t
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
+#endif
+
/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "unexpand"
@@ -103,6 +120,210 @@ static struct option const longopts[] =
{NULL, 0, NULL, 0}
};
+static FILE *next_file (FILE *fp);
+
+#if HAVE_MBRTOWC
+static void
+unexpand_multibyte (void)
+{
+ FILE *fp; /* Input stream. */
+ mbstate_t i_state; /* Current shift state of the input stream. */
+ mbstate_t i_state_bak; /* Back up the I_STATE. */
+ mbstate_t o_state; /* Current shift state of the output stream. */
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
+ char *bufpos = buf; /* Next read position of BUF. */
+ size_t buflen = 0; /* The length of the byte sequence in buf. */
+ wint_t wc; /* A gotten wide character. */
+ size_t mblength; /* The byte size of a multibyte character
+ which shows as same character as WC. */
+ bool prev_tab = false;
+
+ /* Index in `tab_list' of next tabstop: */
+ int tab_index = 0; /* For calculating width of pending tabs. */
+ int print_tab_index = 0; /* For printing as many tabs as possible. */
+ unsigned int column = 0; /* Column on screen of next char. */
+ int next_tab_column; /* Column the next tab stop is on. */
+ int convert = 1; /* If nonzero, perform translations. */
+ unsigned int pending = 0; /* Pending columns of blanks. */
+
+ fp = next_file ((FILE *) NULL);
+ if (fp == NULL)
+ return;
+
+ memset (&o_state, '\0', sizeof(mbstate_t));
+ memset (&i_state, '\0', sizeof(mbstate_t));
+
+ for (;;)
+ {
+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
+ {
+ memmove (buf, bufpos, buflen);
+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
+ bufpos = buf;
+ }
+
+ /* Get a wide character. */
+ if (buflen < 1)
+ {
+ mblength = 1;
+ wc = WEOF;
+ }
+ else
+ {
+ i_state_bak = i_state;
+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
+ }
+
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
+ {
+ i_state = i_state_bak;
+ wc = L'\0';
+ }
+
+ if (wc == L' ' && convert && column < INT_MAX)
+ {
+ ++pending;
+ ++column;
+ }
+ else if (wc == L'\t' && convert)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ tab_index++;
+ next_tab_column = tab_list[tab_index];
+ if (tab_index < first_free_tab - 1)
+ tab_index++;
+ if (column >= next_tab_column)
+ {
+ convert = 0; /* Ran out of tab stops. */
+ goto flush_pend_mb;
+ }
+ }
+ else
+ {
+ next_tab_column = column + tab_size - column % tab_size;
+ }
+ pending += next_tab_column - column;
+ column = next_tab_column;
+ }
+ else
+ {
+flush_pend_mb:
+ /* Flush pending spaces. Print as many tabs as possible,
+ then print the rest as spaces. */
+ if (pending == 1 && column != 1 && !prev_tab)
+ {
+ putchar (' ');
+ pending = 0;
+ }
+ column -= pending;
+ while (pending > 0)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let print_tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (print_tab_index < first_free_tab - 1
+ && column >= tab_list[print_tab_index])
+ print_tab_index++;
+ next_tab_column = tab_list[print_tab_index];
+ if (print_tab_index < first_free_tab - 1)
+ print_tab_index++;
+ }
+ else
+ {
+ next_tab_column =
+ column + tab_size - column % tab_size;
+ }
+ if (next_tab_column - column <= pending)
+ {
+ putchar ('\t');
+ pending -= next_tab_column - column;
+ column = next_tab_column;
+ }
+ else
+ {
+ --print_tab_index;
+ column += pending;
+ while (pending != 0)
+ {
+ putchar (' ');
+ pending--;
+ }
+ }
+ }
+
+ if (wc == WEOF)
+ {
+ fp = next_file (fp);
+ if (fp == NULL)
+ break; /* No more files. */
+ else
+ {
+ memset (&i_state, '\0', sizeof(mbstate_t));
+ continue;
+ }
+ }
+
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
+ {
+ if (convert)
+ {
+ ++column;
+ if (convert_entire_line == 0)
+ convert = 0;
+ }
+ mblength = 1;
+ putchar (buf[0]);
+ }
+ else if (mblength == 0)
+ {
+ if (convert && convert_entire_line == 0)
+ convert = 0;
+ mblength = 1;
+ putchar ('\0');
+ }
+ else
+ {
+ if (convert)
+ {
+ if (wc == L'\b')
+ {
+ if (column > 0)
+ --column;
+ }
+ else
+ {
+ int width; /* The width of WC. */
+
+ width = wcwidth (wc);
+ column += (width > 0) ? width : 0;
+ if (convert_entire_line == 0)
+ convert = 0;
+ }
+ }
+
+ if (wc == L'\n')
+ {
+ tab_index = print_tab_index = 0;
+ column = pending = 0;
+ convert = 1;
+ }
+ fwrite (bufpos, sizeof(char), mblength, stdout);
+ }
+ }
+ prev_tab = wc == L'\t';
+ buflen -= mblength;
+ bufpos += mblength;
+ }
+}
+#endif
+
+
void
usage (int status)
{
@@ -523,7 +744,12 @@ main (int argc, char **argv)
file_list = (optind < argc ? &argv[optind] : stdin_argv);
- unexpand ();
+#if HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1)
+ unexpand_multibyte ();
+ else
+#endif
+ unexpand ();
if (have_read_stdin && fclose (stdin) != 0)
error (EXIT_FAILURE, errno, "-");
diff -urNp coreutils-8.24-orig/src/uniq.c coreutils-8.24/src/uniq.c
--- coreutils-8.24-orig/src/uniq.c 2015-06-26 19:04:19.000000000 +0200
+++ coreutils-8.24/src/uniq.c 2015-07-05 09:04:33.032546980 +0200

View File

@ -1,7 +1,7 @@
Summary: A set of basic GNU tools commonly used in shell scripts
Name: coreutils
Version: 8.24
Release: 101%{?dist}
Release: 102%{?dist}
License: GPLv3+
Group: System Environment/Base
Url: http://www.gnu.org/software/coreutils/
@ -41,6 +41,8 @@ Patch713: coreutils-4.5.3-langinfo.patch
# (sb) lin18nux/lsb compliance - multibyte functionality patch
Patch800: coreutils-i18n.patch
# (sb) lin18nux/lsb compliance - expand/unexpand
Patch801: coreutils-i18n-expand-unexpand.patch
#getgrouplist() patch from Ulrich Drepper.
Patch908: coreutils-getgrouplist.patch
@ -171,6 +173,7 @@ including documentation and translations.
# li18nux/lsb
%patch800 -p1 -b .i18n
%patch801 -p1 -b .i18n-expand
# Coreutils
%patch908 -p1 -b .getgrouplist
@ -339,6 +342,9 @@ fi
%{_mandir}/man*/*
%changelog
* Tue Dec 01 2015 Ondrej Oprala <ooprala@redhat.com> - 8.24-102
- Use the new i18n implementation for expand/unexpand
* Mon Nov 30 2015 Ondrej Vasik <ovasik@redhat.com> - 8.24-101
- coreutils-single should provide versioned coreutils (#1286338)