From b0302efc5081ac24f2740a331d35bf7b7a42c0ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Va=C5=A1=C3=ADk?= Date: Sat, 5 Mar 2016 15:53:55 +0100 Subject: [PATCH] adjust old i18n patch cut support for coreutils-8.25 --- coreutils-i18n-cut-old.patch | 564 +++++++++++++++++++++++++++++++++++ coreutils.spec | 2 + 2 files changed, 566 insertions(+) create mode 100644 coreutils-i18n-cut-old.patch diff --git a/coreutils-i18n-cut-old.patch b/coreutils-i18n-cut-old.patch new file mode 100644 index 0000000..29c69fc --- /dev/null +++ b/coreutils-i18n-cut-old.patch @@ -0,0 +1,564 @@ +diff -urNp coreutils-8.25-orig/src/cut.c coreutils-8.25/src/cut.c +--- coreutils-8.25-orig/src/cut.c 2015-06-26 19:05:22.000000000 +0200 ++++ coreutils-8.25/src/cut.c 2015-07-05 09:04:33.028546950 +0200 +@@ -28,6 +28,11 @@ + #include + #include + #include ++ ++/* Get mbstate_t, mbrtowc(). */ ++#if HAVE_WCHAR_H ++# include ++#endif + #include "system.h" + + #include "error.h" +@@ -38,6 +43,18 @@ + + #include "set-fields.h" + ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC ++ installation; work around this configuration error. */ ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 ++# undef MB_LEN_MAX ++# define MB_LEN_MAX 16 ++#endif ++ ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ + /* The official name of this program (e.g., no 'g' prefix). */ + #define PROGRAM_NAME "cut" + +@@ -54,6 +71,52 @@ + } \ + while (0) + ++/* Refill the buffer BUF to get a multibyte character. */ ++#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ ++ do \ ++ { \ ++ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ ++ { \ ++ memmove (BUF, BUFPOS, BUFLEN); \ ++ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ ++ BUFPOS = BUF; \ ++ } \ ++ } \ ++ while (0) ++ ++/* Get wide character on BUFPOS. BUFPOS is not included after that. ++ If byte sequence is not valid as a character, CONVFAIL is true. Otherwise false. */ ++#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ ++ do \ ++ { \ ++ mbstate_t state_bak; \ ++ \ ++ if (BUFLEN < 1) \ ++ { \ ++ WC = WEOF; \ ++ break; \ ++ } \ ++ \ ++ /* Get a wide character. */ \ ++ CONVFAIL = false; \ ++ state_bak = STATE; \ ++ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ ++ \ ++ switch (MBLENGTH) \ ++ { \ ++ case (size_t)-1: \ ++ case (size_t)-2: \ ++ CONVFAIL = true; \ ++ STATE = state_bak; \ ++ /* Fall througn. */ \ ++ \ ++ case 0: \ ++ MBLENGTH = 1; \ ++ break; \ ++ } \ ++ } \ ++ while (0) ++ + + /* Pointer inside RP. When checking if a byte or field is selected + by a finite range, we check if it is between CURRENT_RP.LO +@@ -61,6 +124,9 @@ + CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ + static struct field_range_pair *current_rp; + ++/* Length of the delimiter given as argument to -d. */ ++size_t delimlen; ++ + /* This buffer is used to support the semantics of the -s option + (or lack of same) when the specified field list includes (does + not include) the first field. In both of those cases, the entire +@@ -77,15 +143,25 @@ enum operating_mode + { + undefined_mode, + +- /* Output characters that are in the given bytes. */ ++ /* Output bytes that are at the given positions. */ + byte_mode, + ++ /* Output characters that are at the given positions. */ ++ character_mode, ++ + /* Output the given delimiter-separated fields. */ + field_mode + }; + + static enum operating_mode operating_mode; + ++/* If nonzero, when in byte mode, don't split multibyte characters. */ ++static int byte_mode_character_aware; ++ ++/* If nonzero, the function for single byte locale is work ++ if this program runs on multibyte locale. */ ++static int force_singlebyte_mode; ++ + /* If true do not output lines containing no delimiter characters. + Otherwise, all such lines are printed. This option is valid only + with field mode. */ +@@ -97,6 +173,9 @@ static bool complement; + + /* The delimiter character for field mode. */ + static unsigned char delim; ++#if HAVE_WCHAR_H ++static wchar_t wcdelim; ++#endif + + /* The delimiter for each line/record. */ + static unsigned char line_delim = '\n'; +@@ -164,7 +243,7 @@ Print selected parts of lines from each + -f, --fields=LIST select only these fields; also print any line\n\ + that contains no delimiter character, unless\n\ + the -s option is specified\n\ +- -n (ignored)\n\ ++ -n with -b: don't split multibyte characters\n\ + "), stdout); + fputs (_("\ + --complement complement the set of selected bytes, characters\n\ +@@ -280,6 +359,82 @@ cut_bytes (FILE *stream) + } + } + ++#if HAVE_MBRTOWC ++/* This function is in use for the following case. ++ ++ 1. Read from the stream STREAM, printing to standard output any selected ++ characters. ++ ++ 2. Read from stream STREAM, printing to standard output any selected bytes, ++ without splitting multibyte characters. */ ++ ++static void ++cut_characters_or_cut_bytes_no_split (FILE *stream) ++{ ++ size_t idx; /* number of bytes or characters in the line so far. */ ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ ++ char *bufpos; /* Next read position of BUF. */ ++ size_t buflen; /* The length of the byte sequence in buf. */ ++ wint_t wc; /* A gotten wide character. */ ++ size_t mblength; /* The byte size of a multibyte character which shows ++ as same character as WC. */ ++ mbstate_t state; /* State of the stream. */ ++ bool convfail = false; /* true, when conversion failed. Otherwise false. */ ++ /* Whether to begin printing delimiters between ranges for the current line. ++ Set after we've begun printing data corresponding to the first range. */ ++ bool print_delimiter = false; ++ ++ idx = 0; ++ buflen = 0; ++ bufpos = buf; ++ memset (&state, '\0', sizeof(mbstate_t)); ++ ++ current_rp = frp; ++ ++ while (1) ++ { ++ REFILL_BUFFER (buf, bufpos, buflen, stream); ++ ++ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); ++ (void) convfail; /* ignore unused */ ++ ++ if (wc == WEOF) ++ { ++ if (idx > 0) ++ putchar (line_delim); ++ break; ++ } ++ else if (wc == line_delim) ++ { ++ putchar (line_delim); ++ idx = 0; ++ print_delimiter = false; ++ current_rp = frp; ++ } ++ else ++ { ++ next_item (&idx); ++ if (print_kth (idx)) ++ { ++ if (output_delimiter_specified) ++ { ++ if (print_delimiter && is_range_start_index (idx)) ++ { ++ fwrite (output_delimiter_string, sizeof (char), ++ output_delimiter_length, stdout); ++ } ++ print_delimiter = true; ++ } ++ fwrite (bufpos, mblength, sizeof(char), stdout); ++ } ++ } ++ ++ buflen -= mblength; ++ bufpos += mblength; ++ } ++} ++#endif ++ + /* Read from stream STREAM, printing to standard output any selected fields. */ + + static void +@@ -425,13 +580,211 @@ cut_fields (FILE *stream) + } + } + ++#if HAVE_MBRTOWC ++static void ++cut_fields_mb (FILE *stream) ++{ ++ int c; ++ size_t field_idx; ++ int found_any_selected_field; ++ int buffer_first_field; ++ int empty_input; ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ ++ char *bufpos; /* Next read position of BUF. */ ++ size_t buflen; /* The length of the byte sequence in buf. */ ++ wint_t wc = 0; /* A gotten wide character. */ ++ size_t mblength; /* The byte size of a multibyte character which shows ++ as same character as WC. */ ++ mbstate_t state; /* State of the stream. */ ++ bool convfail = false; /* true, when conversion failed. Otherwise false. */ ++ ++ current_rp = frp; ++ ++ found_any_selected_field = 0; ++ field_idx = 1; ++ bufpos = buf; ++ buflen = 0; ++ memset (&state, '\0', sizeof(mbstate_t)); ++ ++ c = getc (stream); ++ empty_input = (c == EOF); ++ if (c != EOF) ++ { ++ ungetc (c, stream); ++ wc = 0; ++ } ++ else ++ wc = WEOF; ++ ++ /* To support the semantics of the -s flag, we may have to buffer ++ all of the first field to determine whether it is `delimited.' ++ But that is unnecessary if all non-delimited lines must be printed ++ and the first field has been selected, or if non-delimited lines ++ must be suppressed and the first field has *not* been selected. ++ That is because a non-delimited line has exactly one field. */ ++ buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); ++ ++ while (1) ++ { ++ if (field_idx == 1 && buffer_first_field) ++ { ++ int len = 0; ++ ++ while (1) ++ { ++ REFILL_BUFFER (buf, bufpos, buflen, stream); ++ ++ GET_NEXT_WC_FROM_BUFFER ++ (wc, bufpos, buflen, mblength, state, convfail); ++ ++ if (wc == WEOF) ++ break; ++ ++ field_1_buffer = xrealloc (field_1_buffer, len + mblength); ++ memcpy (field_1_buffer + len, bufpos, mblength); ++ len += mblength; ++ buflen -= mblength; ++ bufpos += mblength; ++ ++ if (!convfail && (wc == line_delim || wc == wcdelim)) ++ break; ++ } ++ ++ if (len <= 0 && wc == WEOF) ++ break; ++ ++ /* If the first field extends to the end of line (it is not ++ delimited) and we are printing all non-delimited lines, ++ print this one. */ ++ if (convfail || (!convfail && wc != wcdelim)) ++ { ++ if (suppress_non_delimited) ++ { ++ /* Empty. */ ++ } ++ else ++ { ++ fwrite (field_1_buffer, sizeof (char), len, stdout); ++ /* Make sure the output line is newline terminated. */ ++ if (convfail || (!convfail && wc != line_delim)) ++ putchar (line_delim); ++ } ++ continue; ++ } ++ ++ if (print_kth (1)) ++ { ++ /* Print the field, but not the trailing delimiter. */ ++ fwrite (field_1_buffer, sizeof (char), len - 1, stdout); ++ found_any_selected_field = 1; ++ } ++ next_item (&field_idx); ++ } ++ ++ if (wc != WEOF) ++ { ++ if (print_kth (field_idx)) ++ { ++ if (found_any_selected_field) ++ { ++ fwrite (output_delimiter_string, sizeof (char), ++ output_delimiter_length, stdout); ++ } ++ found_any_selected_field = 1; ++ } ++ ++ while (1) ++ { ++ REFILL_BUFFER (buf, bufpos, buflen, stream); ++ ++ GET_NEXT_WC_FROM_BUFFER ++ (wc, bufpos, buflen, mblength, state, convfail); ++ ++ if (wc == WEOF) ++ break; ++ else if (!convfail && (wc == wcdelim || wc == line_delim)) ++ { ++ buflen -= mblength; ++ bufpos += mblength; ++ break; ++ } ++ ++ if (print_kth (field_idx)) ++ fwrite (bufpos, mblength, sizeof(char), stdout); ++ ++ buflen -= mblength; ++ bufpos += mblength; ++ } ++ } ++ ++ if ((!convfail || wc == line_delim) && buflen < 1) ++ wc = WEOF; ++ ++ if (!convfail && wc == wcdelim) ++ next_item (&field_idx); ++ else if (wc == WEOF || (!convfail && wc == line_delim)) ++ { ++ if (found_any_selected_field ++ || (!empty_input && !(suppress_non_delimited && field_idx == 1))) ++ putchar (line_delim); ++ if (wc == WEOF) ++ break; ++ field_idx = 1; ++ current_rp = frp; ++ found_any_selected_field = 0; ++ } ++ } ++} ++#endif ++ + static void + cut_stream (FILE *stream) + { +- if (operating_mode == byte_mode) +- cut_bytes (stream); ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) ++ { ++ switch (operating_mode) ++ { ++ case byte_mode: ++ if (byte_mode_character_aware) ++ cut_characters_or_cut_bytes_no_split (stream); ++ else ++ cut_bytes (stream); ++ break; ++ ++ case character_mode: ++ cut_characters_or_cut_bytes_no_split (stream); ++ break; ++ ++ case field_mode: ++ if (delimlen == 1) ++ { ++ /* Check if we have utf8 multibyte locale, so we can use this ++ optimization because of uniqueness of characters, which is ++ not true for e.g. SJIS */ ++ char * loc = setlocale(LC_CTYPE, NULL); ++ if (loc && (strstr (loc, "UTF-8") || strstr (loc, "utf-8") || ++ strstr (loc, "UTF8") || strstr (loc, "utf8"))) ++ { ++ cut_fields (stream); ++ break; ++ } ++ } ++ cut_fields_mb (stream); ++ break; ++ ++ default: ++ abort (); ++ } ++ } + else +- cut_fields (stream); ++#endif ++ { ++ if (operating_mode == field_mode) ++ cut_fields (stream); ++ else ++ cut_bytes (stream); ++ } + } + + /* Process file FILE to standard output. +@@ -483,6 +836,7 @@ main (int argc, char **argv) + bool ok; + bool delim_specified = false; + char *spec_list_string IF_LINT ( = NULL); ++ char mbdelim[MB_LEN_MAX + 1]; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); +@@ -505,7 +859,6 @@ main (int argc, char **argv) + switch (optc) + { + case 'b': +- case 'c': + /* Build the byte list. */ + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); +@@ -513,6 +866,14 @@ main (int argc, char **argv) + spec_list_string = optarg; + break; + ++ case 'c': ++ /* Build the character list. */ ++ if (operating_mode != undefined_mode) ++ FATAL_ERROR (_("only one type of list may be specified")); ++ operating_mode = character_mode; ++ spec_list_string = optarg; ++ break; ++ + case 'f': + /* Build the field list. */ + if (operating_mode != undefined_mode) +@@ -524,10 +885,38 @@ main (int argc, char **argv) + case 'd': + /* New delimiter. */ + /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ +- if (optarg[0] != '\0' && optarg[1] != '\0') +- FATAL_ERROR (_("the delimiter must be a single character")); +- delim = optarg[0]; +- delim_specified = true; ++ { ++#if HAVE_MBRTOWC ++ if(MB_CUR_MAX > 1) ++ { ++ mbstate_t state; ++ ++ memset (&state, '\0', sizeof(mbstate_t)); ++ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); ++ ++ if (delimlen == (size_t)-1 || delimlen == (size_t)-2) ++ ++force_singlebyte_mode; ++ else ++ { ++ delimlen = (delimlen < 1) ? 1 : delimlen; ++ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') ++ FATAL_ERROR (_("the delimiter must be a single character")); ++ memcpy (mbdelim, optarg, delimlen); ++ mbdelim[delimlen] = '\0'; ++ if (delimlen == 1) ++ delim = *optarg; ++ } ++ } ++ ++ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) ++#endif ++ { ++ if (optarg[0] != '\0' && optarg[1] != '\0') ++ FATAL_ERROR (_("the delimiter must be a single character")); ++ delim = (unsigned char) optarg[0]; ++ } ++ delim_specified = true; ++ } + break; + + case OUTPUT_DELIMITER_OPTION: +@@ -540,6 +929,7 @@ main (int argc, char **argv) + break; + + case 'n': ++ byte_mode_character_aware = 1; + break; + + case 's': +@@ -579,15 +969,34 @@ main (int argc, char **argv) + | (complement ? SETFLD_COMPLEMENT : 0) ); + + if (!delim_specified) +- delim = '\t'; ++ { ++ delim = '\t'; ++#ifdef HAVE_MBRTOWC ++ wcdelim = L'\t'; ++ mbdelim[0] = '\t'; ++ mbdelim[1] = '\0'; ++ delimlen = 1; ++#endif ++ } + + if (output_delimiter_string == NULL) + { +- static char dummy[2]; +- dummy[0] = delim; +- dummy[1] = '\0'; +- output_delimiter_string = dummy; +- output_delimiter_length = 1; ++#ifdef HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) ++ { ++ output_delimiter_string = xstrdup(mbdelim); ++ output_delimiter_length = delimlen; ++ } ++ ++ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) ++#endif ++ { ++ static char dummy[2]; ++ dummy[0] = delim; ++ dummy[1] = '\0'; ++ output_delimiter_string = dummy; ++ output_delimiter_length = 1; ++ } + } + + if (optind == argc) diff --git a/coreutils.spec b/coreutils.spec index 528b1fa..a056fe0 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -46,6 +46,7 @@ Patch801: coreutils-i18n-expand-unexpand.patch # (sb) lin18nux/lsb compliance - cut - not stable enough, not applied Patch802: coreutils-i18n-cut.patch # i18n patch for cut - old version - used +Patch804: coreutils-i18n-cut-old.patch # The unexpand patch above is not correct. Sent to the patch authors Patch803: coreutils-i18n-fix-unexpand.patch @@ -179,6 +180,7 @@ including documentation and translations. %patch801 -p1 -b .i18n-expand #%%patch802 -p1 -b .i18n-cut %patch803 -p1 -b .i18n-fix-expand +%patch804 -p1 -b .i18n-cutold # Coreutils %patch908 -p1 -b .getgrouplist