15422 lines
452 KiB
Diff
15422 lines
452 KiB
Diff
diff -urNp coreutils-8.0-orig/lib/linebuffer.h coreutils-8.0/lib/linebuffer.h
|
||
--- coreutils-8.0-orig/lib/linebuffer.h 2009-10-06 10:59:48.000000000 +0200
|
||
+++ coreutils-8.0/lib/linebuffer.h 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -21,6 +21,11 @@
|
||
|
||
# include <stdio.h>
|
||
|
||
+/* Get mbstate_t. */
|
||
+# if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+# endif
|
||
+
|
||
/* A `struct linebuffer' holds a line of text. */
|
||
|
||
struct linebuffer
|
||
@@ -28,6 +33,9 @@ struct linebuffer
|
||
size_t size; /* Allocated. */
|
||
size_t length; /* Used. */
|
||
char *buffer;
|
||
+# if HAVE_WCHAR_H
|
||
+ mbstate_t state;
|
||
+# endif
|
||
};
|
||
|
||
/* Initialize linebuffer LINEBUFFER for use. */
|
||
diff -urNp coreutils-8.0-orig/lib/linebuffer.h.orig coreutils-8.0/lib/linebuffer.h.orig
|
||
--- coreutils-8.0-orig/lib/linebuffer.h.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/lib/linebuffer.h.orig 2009-10-06 10:59:48.000000000 +0200
|
||
@@ -0,0 +1,53 @@
|
||
+/* linebuffer.h -- declarations for reading arbitrarily long lines
|
||
+
|
||
+ Copyright (C) 1986, 1991, 1998, 1999, 2002, 2003, 2007 Free Software
|
||
+ Foundation, Inc.
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation; either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if !defined LINEBUFFER_H
|
||
+# define LINEBUFFER_H
|
||
+
|
||
+# include <stdio.h>
|
||
+
|
||
+/* A `struct linebuffer' holds a line of text. */
|
||
+
|
||
+struct linebuffer
|
||
+{
|
||
+ size_t size; /* Allocated. */
|
||
+ size_t length; /* Used. */
|
||
+ char *buffer;
|
||
+};
|
||
+
|
||
+/* Initialize linebuffer LINEBUFFER for use. */
|
||
+void initbuffer (struct linebuffer *linebuffer);
|
||
+
|
||
+/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
|
||
+ Consider lines to be terminated by DELIMITER.
|
||
+ Keep the delimiter; append DELIMITER if we reach EOF and it wasn't
|
||
+ the last character in the file. Do not NUL-terminate.
|
||
+ Return LINEBUFFER, except at end of file return NULL. */
|
||
+struct linebuffer *readlinebuffer_delim (struct linebuffer *linebuffer,
|
||
+ FILE *stream, char delimiter);
|
||
+
|
||
+/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
|
||
+ Keep the newline; append a newline if it's the last line of a file
|
||
+ that ends in a non-newline character. Do not NUL-terminate.
|
||
+ Return LINEBUFFER, except at end of file return NULL. */
|
||
+struct linebuffer *readlinebuffer (struct linebuffer *linebuffer, FILE *stream);
|
||
+
|
||
+/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */
|
||
+void freebuffer (struct linebuffer *);
|
||
+
|
||
+#endif /* LINEBUFFER_H */
|
||
diff -urNp coreutils-8.0-orig/src/cut.c coreutils-8.0/src/cut.c
|
||
--- coreutils-8.0-orig/src/cut.c 2009-09-23 10:25:44.000000000 +0200
|
||
+++ coreutils-8.0/src/cut.c 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -28,6 +28,11 @@
|
||
#include <assert.h>
|
||
#include <getopt.h>
|
||
#include <sys/types.h>
|
||
+
|
||
+/* Get mbstate_t, mbrtowc(). */
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
#include "system.h"
|
||
|
||
#include "error.h"
|
||
@@ -36,6 +41,18 @@
|
||
#include "quote.h"
|
||
#include "xstrndup.h"
|
||
|
||
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
|
||
+ installation; work around this configuration error. */
|
||
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
|
||
+# undef MB_LEN_MAX
|
||
+# define MB_LEN_MAX 16
|
||
+#endif
|
||
+
|
||
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
|
||
+#if HAVE_MBRTOWC && defined mbstate_t
|
||
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
|
||
+#endif
|
||
+
|
||
/* The official name of this program (e.g., no `g' prefix). */
|
||
#define PROGRAM_NAME "cut"
|
||
|
||
@@ -71,6 +88,52 @@
|
||
} \
|
||
while (0)
|
||
|
||
+/* Refill the buffer BUF to get a multibyte character. */
|
||
+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
|
||
+ do \
|
||
+ { \
|
||
+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
|
||
+ { \
|
||
+ memmove (BUF, BUFPOS, BUFLEN); \
|
||
+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
|
||
+ BUFPOS = BUF; \
|
||
+ } \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+/* Get wide character on BUFPOS. BUFPOS is not included after that.
|
||
+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
|
||
+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
|
||
+ do \
|
||
+ { \
|
||
+ mbstate_t state_bak; \
|
||
+ \
|
||
+ if (BUFLEN < 1) \
|
||
+ { \
|
||
+ WC = WEOF; \
|
||
+ break; \
|
||
+ } \
|
||
+ \
|
||
+ /* Get a wide character. */ \
|
||
+ CONVFAIL = 0; \
|
||
+ state_bak = STATE; \
|
||
+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
|
||
+ \
|
||
+ switch (MBLENGTH) \
|
||
+ { \
|
||
+ case (size_t)-1: \
|
||
+ case (size_t)-2: \
|
||
+ CONVFAIL++; \
|
||
+ STATE = state_bak; \
|
||
+ /* Fall througn. */ \
|
||
+ \
|
||
+ case 0: \
|
||
+ MBLENGTH = 1; \
|
||
+ break; \
|
||
+ } \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
struct range_pair
|
||
{
|
||
size_t lo;
|
||
@@ -89,7 +152,7 @@ static char *field_1_buffer;
|
||
/* The number of bytes allocated for FIELD_1_BUFFER. */
|
||
static size_t field_1_bufsize;
|
||
|
||
-/* The largest field or byte index used as an endpoint of a closed
|
||
+/* The largest byte, character or field index used as an endpoint of a closed
|
||
or degenerate range specification; this doesn't include the starting
|
||
index of right-open-ended ranges. For example, with either range spec
|
||
`2-5,9-', `2-3,5,9-' this variable would be set to 5. */
|
||
@@ -101,10 +164,11 @@ static size_t eol_range_start;
|
||
|
||
/* This is a bit vector.
|
||
In byte mode, which bytes to output.
|
||
+ In character mode, which characters to output.
|
||
In field mode, which DELIM-separated fields to output.
|
||
- Both bytes and fields are numbered starting with 1,
|
||
+ Bytes, characters and fields are numbered starting with 1,
|
||
so the zeroth bit of this array is unused.
|
||
- A field or byte K has been selected if
|
||
+ A byte, character or field K has been selected if
|
||
(K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
|
||
|| (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
|
||
static unsigned char *printable_field;
|
||
@@ -113,15 +177,25 @@ enum operating_mode
|
||
{
|
||
undefined_mode,
|
||
|
||
- /* Output characters that are in the given bytes. */
|
||
+ /* Output bytes that are at the given positions. */
|
||
byte_mode,
|
||
|
||
+ /* Output characters that are at the given positions. */
|
||
+ character_mode,
|
||
+
|
||
/* Output the given delimeter-separated fields. */
|
||
field_mode
|
||
};
|
||
|
||
static enum operating_mode operating_mode;
|
||
|
||
+/* If nonzero, when in byte mode, don't split multibyte characters. */
|
||
+static int byte_mode_character_aware;
|
||
+
|
||
+/* If nonzero, the function for single byte locale is work
|
||
+ if this program runs on multibyte locale. */
|
||
+static int force_singlebyte_mode;
|
||
+
|
||
/* If true do not output lines containing no delimeter characters.
|
||
Otherwise, all such lines are printed. This option is valid only
|
||
with field mode. */
|
||
@@ -133,6 +207,9 @@ static bool complement;
|
||
|
||
/* The delimeter character for field mode. */
|
||
static unsigned char delim;
|
||
+#if HAVE_WCHAR_H
|
||
+static wchar_t wcdelim;
|
||
+#endif
|
||
|
||
/* True if the --output-delimiter=STRING option was specified. */
|
||
static bool output_delimiter_specified;
|
||
@@ -206,7 +283,7 @@ Mandatory arguments to long options are
|
||
-f, --fields=LIST select only these fields; also print any line\n\
|
||
that contains no delimiter character, unless\n\
|
||
the -s option is specified\n\
|
||
- -n (ignored)\n\
|
||
+ -n with -b: don't split multibyte characters\n\
|
||
"), stdout);
|
||
fputs (_("\
|
||
--complement complement the set of selected bytes, characters\n\
|
||
@@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
|
||
in_digits = false;
|
||
/* Starting a range. */
|
||
if (dash_found)
|
||
- FATAL_ERROR (_("invalid byte or field list"));
|
||
+ FATAL_ERROR (_("invalid byte, character or field list"));
|
||
dash_found = true;
|
||
fieldstr++;
|
||
|
||
@@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
|
||
if (!rhs_specified)
|
||
{
|
||
/* `n-'. From `initial' to end of line. */
|
||
- eol_range_start = initial;
|
||
+ if (eol_range_start == 0 ||
|
||
+ (eol_range_start != 0 && eol_range_start > initial))
|
||
+ eol_range_start = initial;
|
||
field_found = true;
|
||
}
|
||
else
|
||
{
|
||
/* `m-n' or `-n' (1-n). */
|
||
if (value < initial)
|
||
- FATAL_ERROR (_("invalid decreasing range"));
|
||
+ FATAL_ERROR (_("invalid byte, character or field list"));
|
||
|
||
/* Is there already a range going to end of line? */
|
||
if (eol_range_start != 0)
|
||
@@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
|
||
if (operating_mode == byte_mode)
|
||
error (0, 0,
|
||
_("byte offset %s is too large"), quote (bad_num));
|
||
+ else if (operating_mode == character_mode)
|
||
+ error (0, 0,
|
||
+ _("character offset %s is too large"), quote (bad_num));
|
||
else
|
||
error (0, 0,
|
||
_("field number %s is too large"), quote (bad_num));
|
||
@@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
|
||
fieldstr++;
|
||
}
|
||
else
|
||
- FATAL_ERROR (_("invalid byte or field list"));
|
||
+ FATAL_ERROR (_("invalid byte, character or field list"));
|
||
}
|
||
|
||
max_range_endpoint = 0;
|
||
@@ -579,6 +661,63 @@ cut_bytes (FILE *stream)
|
||
}
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+/* This function is in use for the following case.
|
||
+
|
||
+ 1. Read from the stream STREAM, printing to standard output any selected
|
||
+ characters.
|
||
+
|
||
+ 2. Read from stream STREAM, printing to standard output any selected bytes,
|
||
+ without splitting multibyte characters. */
|
||
+
|
||
+static void
|
||
+cut_characters_or_cut_bytes_no_split (FILE *stream)
|
||
+{
|
||
+ int idx; /* number of bytes or characters in the line so far. */
|
||
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
|
||
+ char *bufpos; /* Next read position of BUF. */
|
||
+ size_t buflen; /* The length of the byte sequence in buf. */
|
||
+ wint_t wc; /* A gotten wide character. */
|
||
+ size_t mblength; /* The byte size of a multibyte character which shows
|
||
+ as same character as WC. */
|
||
+ mbstate_t state; /* State of the stream. */
|
||
+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
|
||
+
|
||
+ idx = 0;
|
||
+ buflen = 0;
|
||
+ bufpos = buf;
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ while (1)
|
||
+ {
|
||
+ REFILL_BUFFER (buf, bufpos, buflen, stream);
|
||
+
|
||
+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
|
||
+
|
||
+ if (wc == WEOF)
|
||
+ {
|
||
+ if (idx > 0)
|
||
+ putchar ('\n');
|
||
+ break;
|
||
+ }
|
||
+ else if (wc == L'\n')
|
||
+ {
|
||
+ putchar ('\n');
|
||
+ idx = 0;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ idx += (operating_mode == byte_mode) ? mblength : 1;
|
||
+ if (print_kth (idx, NULL))
|
||
+ fwrite (bufpos, mblength, sizeof(char), stdout);
|
||
+ }
|
||
+
|
||
+ buflen -= mblength;
|
||
+ bufpos += mblength;
|
||
+ }
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Read from stream STREAM, printing to standard output any selected fields. */
|
||
|
||
static void
|
||
@@ -701,13 +840,192 @@ cut_fields (FILE *stream)
|
||
}
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static void
|
||
+cut_fields_mb (FILE *stream)
|
||
+{
|
||
+ int c;
|
||
+ unsigned int field_idx;
|
||
+ int found_any_selected_field;
|
||
+ int buffer_first_field;
|
||
+ int empty_input;
|
||
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
|
||
+ char *bufpos; /* Next read position of BUF. */
|
||
+ size_t buflen; /* The length of the byte sequence in buf. */
|
||
+ wint_t wc = 0; /* A gotten wide character. */
|
||
+ size_t mblength; /* The byte size of a multibyte character which shows
|
||
+ as same character as WC. */
|
||
+ mbstate_t state; /* State of the stream. */
|
||
+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
|
||
+
|
||
+ found_any_selected_field = 0;
|
||
+ field_idx = 1;
|
||
+ bufpos = buf;
|
||
+ buflen = 0;
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ c = getc (stream);
|
||
+ empty_input = (c == EOF);
|
||
+ if (c != EOF)
|
||
+ ungetc (c, stream);
|
||
+ else
|
||
+ wc = WEOF;
|
||
+
|
||
+ /* To support the semantics of the -s flag, we may have to buffer
|
||
+ all of the first field to determine whether it is `delimited.'
|
||
+ But that is unnecessary if all non-delimited lines must be printed
|
||
+ and the first field has been selected, or if non-delimited lines
|
||
+ must be suppressed and the first field has *not* been selected.
|
||
+ That is because a non-delimited line has exactly one field. */
|
||
+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
|
||
+
|
||
+ while (1)
|
||
+ {
|
||
+ if (field_idx == 1 && buffer_first_field)
|
||
+ {
|
||
+ int len = 0;
|
||
+
|
||
+ while (1)
|
||
+ {
|
||
+ REFILL_BUFFER (buf, bufpos, buflen, stream);
|
||
+
|
||
+ GET_NEXT_WC_FROM_BUFFER
|
||
+ (wc, bufpos, buflen, mblength, state, convfail);
|
||
+
|
||
+ if (wc == WEOF)
|
||
+ break;
|
||
+
|
||
+ field_1_buffer = xrealloc (field_1_buffer, len + mblength);
|
||
+ memcpy (field_1_buffer + len, bufpos, mblength);
|
||
+ len += mblength;
|
||
+ buflen -= mblength;
|
||
+ bufpos += mblength;
|
||
+
|
||
+ if (!convfail && (wc == L'\n' || wc == wcdelim))
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ if (wc == WEOF)
|
||
+ break;
|
||
+
|
||
+ /* If the first field extends to the end of line (it is not
|
||
+ delimited) and we are printing all non-delimited lines,
|
||
+ print this one. */
|
||
+ if (convfail || (!convfail && wc != wcdelim))
|
||
+ {
|
||
+ if (suppress_non_delimited)
|
||
+ {
|
||
+ /* Empty. */
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ fwrite (field_1_buffer, sizeof (char), len, stdout);
|
||
+ /* Make sure the output line is newline terminated. */
|
||
+ if (convfail || (!convfail && wc != L'\n'))
|
||
+ putchar ('\n');
|
||
+ }
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ if (print_kth (1, NULL))
|
||
+ {
|
||
+ /* Print the field, but not the trailing delimiter. */
|
||
+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
|
||
+ found_any_selected_field = 1;
|
||
+ }
|
||
+ ++field_idx;
|
||
+ }
|
||
+
|
||
+ if (wc != WEOF)
|
||
+ {
|
||
+ if (print_kth (field_idx, NULL))
|
||
+ {
|
||
+ if (found_any_selected_field)
|
||
+ {
|
||
+ fwrite (output_delimiter_string, sizeof (char),
|
||
+ output_delimiter_length, stdout);
|
||
+ }
|
||
+ found_any_selected_field = 1;
|
||
+ }
|
||
+
|
||
+ while (1)
|
||
+ {
|
||
+ REFILL_BUFFER (buf, bufpos, buflen, stream);
|
||
+
|
||
+ GET_NEXT_WC_FROM_BUFFER
|
||
+ (wc, bufpos, buflen, mblength, state, convfail);
|
||
+
|
||
+ if (wc == WEOF)
|
||
+ break;
|
||
+ else if (!convfail && (wc == wcdelim || wc == L'\n'))
|
||
+ {
|
||
+ buflen -= mblength;
|
||
+ bufpos += mblength;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ if (print_kth (field_idx, NULL))
|
||
+ fwrite (bufpos, mblength, sizeof(char), stdout);
|
||
+
|
||
+ buflen -= mblength;
|
||
+ bufpos += mblength;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if ((!convfail || wc == L'\n') && buflen < 1)
|
||
+ wc = WEOF;
|
||
+
|
||
+ if (!convfail && wc == wcdelim)
|
||
+ ++field_idx;
|
||
+ else if (wc == WEOF || (!convfail && wc == L'\n'))
|
||
+ {
|
||
+ if (found_any_selected_field
|
||
+ || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
|
||
+ putchar ('\n');
|
||
+ if (wc == WEOF)
|
||
+ break;
|
||
+ field_idx = 1;
|
||
+ found_any_selected_field = 0;
|
||
+ }
|
||
+ }
|
||
+}
|
||
+#endif
|
||
+
|
||
static void
|
||
cut_stream (FILE *stream)
|
||
{
|
||
- if (operating_mode == byte_mode)
|
||
- cut_bytes (stream);
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
|
||
+ {
|
||
+ switch (operating_mode)
|
||
+ {
|
||
+ case byte_mode:
|
||
+ if (byte_mode_character_aware)
|
||
+ cut_characters_or_cut_bytes_no_split (stream);
|
||
+ else
|
||
+ cut_bytes (stream);
|
||
+ break;
|
||
+
|
||
+ case character_mode:
|
||
+ cut_characters_or_cut_bytes_no_split (stream);
|
||
+ break;
|
||
+
|
||
+ case field_mode:
|
||
+ cut_fields_mb (stream);
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ abort ();
|
||
+ }
|
||
+ }
|
||
else
|
||
- cut_fields (stream);
|
||
+#endif
|
||
+ {
|
||
+ if (operating_mode == field_mode)
|
||
+ cut_fields (stream);
|
||
+ else
|
||
+ cut_bytes (stream);
|
||
+ }
|
||
}
|
||
|
||
/* Process file FILE to standard output.
|
||
@@ -757,6 +1075,8 @@ main (int argc, char **argv)
|
||
bool ok;
|
||
bool delim_specified = false;
|
||
char *spec_list_string IF_LINT(= NULL);
|
||
+ char mbdelim[MB_LEN_MAX + 1];
|
||
+ size_t delimlen = 0;
|
||
|
||
initialize_main (&argc, &argv);
|
||
set_program_name (argv[0]);
|
||
@@ -779,7 +1099,6 @@ main (int argc, char **argv)
|
||
switch (optc)
|
||
{
|
||
case 'b':
|
||
- case 'c':
|
||
/* Build the byte list. */
|
||
if (operating_mode != undefined_mode)
|
||
FATAL_ERROR (_("only one type of list may be specified"));
|
||
@@ -787,6 +1106,14 @@ main (int argc, char **argv)
|
||
spec_list_string = optarg;
|
||
break;
|
||
|
||
+ case 'c':
|
||
+ /* Build the character list. */
|
||
+ if (operating_mode != undefined_mode)
|
||
+ FATAL_ERROR (_("only one type of list may be specified"));
|
||
+ operating_mode = character_mode;
|
||
+ spec_list_string = optarg;
|
||
+ break;
|
||
+
|
||
case 'f':
|
||
/* Build the field list. */
|
||
if (operating_mode != undefined_mode)
|
||
@@ -798,10 +1125,35 @@ main (int argc, char **argv)
|
||
case 'd':
|
||
/* New delimiter. */
|
||
/* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
|
||
- if (optarg[0] != '\0' && optarg[1] != '\0')
|
||
- FATAL_ERROR (_("the delimiter must be a single character"));
|
||
- delim = optarg[0];
|
||
- delim_specified = true;
|
||
+ {
|
||
+#if HAVE_MBRTOWC
|
||
+ if(MB_CUR_MAX > 1)
|
||
+ {
|
||
+ mbstate_t state;
|
||
+
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
|
||
+
|
||
+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
|
||
+ ++force_singlebyte_mode;
|
||
+ else
|
||
+ {
|
||
+ delimlen = (delimlen < 1) ? 1 : delimlen;
|
||
+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
|
||
+ FATAL_ERROR (_("the delimiter must be a single character"));
|
||
+ memcpy (mbdelim, optarg, delimlen);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
|
||
+#endif
|
||
+ {
|
||
+ if (optarg[0] != '\0' && optarg[1] != '\0')
|
||
+ FATAL_ERROR (_("the delimiter must be a single character"));
|
||
+ delim = (unsigned char) optarg[0];
|
||
+ }
|
||
+ delim_specified = true;
|
||
+ }
|
||
break;
|
||
|
||
case OUTPUT_DELIMITER_OPTION:
|
||
@@ -814,6 +1166,7 @@ main (int argc, char **argv)
|
||
break;
|
||
|
||
case 'n':
|
||
+ byte_mode_character_aware = 1;
|
||
break;
|
||
|
||
case 's':
|
||
@@ -836,7 +1189,7 @@ main (int argc, char **argv)
|
||
if (operating_mode == undefined_mode)
|
||
FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
|
||
|
||
- if (delim != '\0' && operating_mode != field_mode)
|
||
+ if (delim_specified && operating_mode != field_mode)
|
||
FATAL_ERROR (_("an input delimiter may be specified only\
|
||
when operating on fields"));
|
||
|
||
@@ -863,15 +1216,34 @@ main (int argc, char **argv)
|
||
}
|
||
|
||
if (!delim_specified)
|
||
- delim = '\t';
|
||
+ {
|
||
+ delim = '\t';
|
||
+#ifdef HAVE_MBRTOWC
|
||
+ wcdelim = L'\t';
|
||
+ mbdelim[0] = '\t';
|
||
+ mbdelim[1] = '\0';
|
||
+ delimlen = 1;
|
||
+#endif
|
||
+ }
|
||
|
||
if (output_delimiter_string == NULL)
|
||
{
|
||
- static char dummy[2];
|
||
- dummy[0] = delim;
|
||
- dummy[1] = '\0';
|
||
- output_delimiter_string = dummy;
|
||
- output_delimiter_length = 1;
|
||
+#ifdef HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
|
||
+ {
|
||
+ output_delimiter_string = xstrdup(mbdelim);
|
||
+ output_delimiter_length = delimlen;
|
||
+ }
|
||
+
|
||
+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
|
||
+#endif
|
||
+ {
|
||
+ static char dummy[2];
|
||
+ dummy[0] = delim;
|
||
+ dummy[1] = '\0';
|
||
+ output_delimiter_string = dummy;
|
||
+ output_delimiter_length = 1;
|
||
+ }
|
||
}
|
||
|
||
if (optind == argc)
|
||
diff -urNp coreutils-8.0-orig/src/cut.c.orig coreutils-8.0/src/cut.c.orig
|
||
--- coreutils-8.0-orig/src/cut.c.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/src/cut.c.orig 2009-09-23 10:25:44.000000000 +0200
|
||
@@ -0,0 +1,893 @@
|
||
+/* cut - remove parts of lines of files
|
||
+ Copyright (C) 1997-2009 Free Software Foundation, Inc.
|
||
+ Copyright (C) 1984 David M. Ihnat
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation, either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||
+
|
||
+/* Written by David Ihnat. */
|
||
+
|
||
+/* POSIX changes, bug fixes, long-named options, and cleanup
|
||
+ by David MacKenzie <djm@gnu.ai.mit.edu>.
|
||
+
|
||
+ Rewrite cut_fields and cut_bytes -- Jim Meyering. */
|
||
+
|
||
+#include <config.h>
|
||
+
|
||
+#include <stdio.h>
|
||
+#include <assert.h>
|
||
+#include <getopt.h>
|
||
+#include <sys/types.h>
|
||
+#include "system.h"
|
||
+
|
||
+#include "error.h"
|
||
+#include "getndelim2.h"
|
||
+#include "hash.h"
|
||
+#include "quote.h"
|
||
+#include "xstrndup.h"
|
||
+
|
||
+/* The official name of this program (e.g., no `g' prefix). */
|
||
+#define PROGRAM_NAME "cut"
|
||
+
|
||
+#define AUTHORS \
|
||
+ proper_name ("David M. Ihnat"), \
|
||
+ proper_name ("David MacKenzie"), \
|
||
+ proper_name ("Jim Meyering")
|
||
+
|
||
+#define FATAL_ERROR(Message) \
|
||
+ do \
|
||
+ { \
|
||
+ error (0, 0, (Message)); \
|
||
+ usage (EXIT_FAILURE); \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+/* Append LOW, HIGH to the list RP of range pairs, allocating additional
|
||
+ space if necessary. Update local variable N_RP. When allocating,
|
||
+ update global variable N_RP_ALLOCATED. */
|
||
+
|
||
+#define ADD_RANGE_PAIR(rp, low, high) \
|
||
+ do \
|
||
+ { \
|
||
+ if (low == 0 || high == 0) \
|
||
+ FATAL_ERROR (_("fields and positions are numbered from 1")); \
|
||
+ if (n_rp >= n_rp_allocated) \
|
||
+ { \
|
||
+ (rp) = X2NREALLOC (rp, &n_rp_allocated); \
|
||
+ } \
|
||
+ rp[n_rp].lo = (low); \
|
||
+ rp[n_rp].hi = (high); \
|
||
+ ++n_rp; \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+struct range_pair
|
||
+ {
|
||
+ size_t lo;
|
||
+ size_t hi;
|
||
+ };
|
||
+
|
||
+/* This buffer is used to support the semantics of the -s option
|
||
+ (or lack of same) when the specified field list includes (does
|
||
+ not include) the first field. In both of those cases, the entire
|
||
+ first field must be read into this buffer to determine whether it
|
||
+ is followed by a delimiter or a newline before any of it may be
|
||
+ output. Otherwise, cut_fields can do the job without using this
|
||
+ buffer. */
|
||
+static char *field_1_buffer;
|
||
+
|
||
+/* The number of bytes allocated for FIELD_1_BUFFER. */
|
||
+static size_t field_1_bufsize;
|
||
+
|
||
+/* The largest field or byte index used as an endpoint of a closed
|
||
+ or degenerate range specification; this doesn't include the starting
|
||
+ index of right-open-ended ranges. For example, with either range spec
|
||
+ `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
|
||
+static size_t max_range_endpoint;
|
||
+
|
||
+/* If nonzero, this is the index of the first field in a range that goes
|
||
+ to end of line. */
|
||
+static size_t eol_range_start;
|
||
+
|
||
+/* This is a bit vector.
|
||
+ In byte mode, which bytes to output.
|
||
+ In field mode, which DELIM-separated fields to output.
|
||
+ Both bytes and fields are numbered starting with 1,
|
||
+ so the zeroth bit of this array is unused.
|
||
+ A field or byte K has been selected if
|
||
+ (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
|
||
+ || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
|
||
+static unsigned char *printable_field;
|
||
+
|
||
+enum operating_mode
|
||
+ {
|
||
+ undefined_mode,
|
||
+
|
||
+ /* Output characters that are in the given bytes. */
|
||
+ byte_mode,
|
||
+
|
||
+ /* Output the given delimeter-separated fields. */
|
||
+ field_mode
|
||
+ };
|
||
+
|
||
+static enum operating_mode operating_mode;
|
||
+
|
||
+/* If true do not output lines containing no delimeter characters.
|
||
+ Otherwise, all such lines are printed. This option is valid only
|
||
+ with field mode. */
|
||
+static bool suppress_non_delimited;
|
||
+
|
||
+/* If nonzero, print all bytes, characters, or fields _except_
|
||
+ those that were specified. */
|
||
+static bool complement;
|
||
+
|
||
+/* The delimeter character for field mode. */
|
||
+static unsigned char delim;
|
||
+
|
||
+/* True if the --output-delimiter=STRING option was specified. */
|
||
+static bool output_delimiter_specified;
|
||
+
|
||
+/* The length of output_delimiter_string. */
|
||
+static size_t output_delimiter_length;
|
||
+
|
||
+/* The output field separator string. Defaults to the 1-character
|
||
+ string consisting of the input delimiter. */
|
||
+static char *output_delimiter_string;
|
||
+
|
||
+/* True if we have ever read standard input. */
|
||
+static bool have_read_stdin;
|
||
+
|
||
+#define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31
|
||
+
|
||
+/* The set of range-start indices. For example, given a range-spec list like
|
||
+ `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15.
|
||
+ Note that although `4' looks like a range-start index, it is in the middle
|
||
+ of the `3-5' range, so it doesn't count.
|
||
+ This table is created/used IFF output_delimiter_specified is set. */
|
||
+static Hash_table *range_start_ht;
|
||
+
|
||
+/* For long options that have no equivalent short option, use a
|
||
+ non-character as a pseudo short option, starting with CHAR_MAX + 1. */
|
||
+enum
|
||
+{
|
||
+ OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
|
||
+ COMPLEMENT_OPTION
|
||
+};
|
||
+
|
||
+static struct option const longopts[] =
|
||
+{
|
||
+ {"bytes", required_argument, NULL, 'b'},
|
||
+ {"characters", required_argument, NULL, 'c'},
|
||
+ {"fields", required_argument, NULL, 'f'},
|
||
+ {"delimiter", required_argument, NULL, 'd'},
|
||
+ {"only-delimited", no_argument, NULL, 's'},
|
||
+ {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
|
||
+ {"complement", no_argument, NULL, COMPLEMENT_OPTION},
|
||
+ {GETOPT_HELP_OPTION_DECL},
|
||
+ {GETOPT_VERSION_OPTION_DECL},
|
||
+ {NULL, 0, NULL, 0}
|
||
+};
|
||
+
|
||
+void
|
||
+usage (int status)
|
||
+{
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
|
||
+ program_name);
|
||
+ else
|
||
+ {
|
||
+ printf (_("\
|
||
+Usage: %s OPTION... [FILE]...\n\
|
||
+"),
|
||
+ program_name);
|
||
+ fputs (_("\
|
||
+Print selected parts of lines from each FILE to standard output.\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Mandatory arguments to long options are mandatory for short options too.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -b, --bytes=LIST select only these bytes\n\
|
||
+ -c, --characters=LIST select only these characters\n\
|
||
+ -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -f, --fields=LIST select only these fields; also print any line\n\
|
||
+ that contains no delimiter character, unless\n\
|
||
+ the -s option is specified\n\
|
||
+ -n (ignored)\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ --complement complement the set of selected bytes, characters\n\
|
||
+ or fields\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -s, --only-delimited do not print lines not containing delimiters\n\
|
||
+ --output-delimiter=STRING use STRING as the output delimiter\n\
|
||
+ the default is to use the input delimiter\n\
|
||
+"), stdout);
|
||
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (_("\
|
||
+\n\
|
||
+Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
|
||
+range, or many ranges separated by commas. Selected input is written\n\
|
||
+in the same order that it is read, and is written exactly once.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Each range is one of:\n\
|
||
+\n\
|
||
+ N N'th byte, character or field, counted from 1\n\
|
||
+ N- from N'th byte, character or field, to end of line\n\
|
||
+ N-M from N'th to M'th (included) byte, character or field\n\
|
||
+ -M from first to M'th (included) byte, character or field\n\
|
||
+\n\
|
||
+With no FILE, or when FILE is -, read standard input.\n\
|
||
+"), stdout);
|
||
+ emit_ancillary_info ();
|
||
+ }
|
||
+ exit (status);
|
||
+}
|
||
+
|
||
+static inline void
|
||
+mark_range_start (size_t i)
|
||
+{
|
||
+ /* Record the fact that `i' is a range-start index. */
|
||
+ void *ent_from_table = hash_insert (range_start_ht, (void*) i);
|
||
+ if (ent_from_table == NULL)
|
||
+ {
|
||
+ /* Insertion failed due to lack of memory. */
|
||
+ xalloc_die ();
|
||
+ }
|
||
+ assert ((size_t) ent_from_table == i);
|
||
+}
|
||
+
|
||
+static inline void
|
||
+mark_printable_field (size_t i)
|
||
+{
|
||
+ size_t n = i / CHAR_BIT;
|
||
+ printable_field[n] |= (1 << (i % CHAR_BIT));
|
||
+}
|
||
+
|
||
+static inline bool
|
||
+is_printable_field (size_t i)
|
||
+{
|
||
+ size_t n = i / CHAR_BIT;
|
||
+ return (printable_field[n] >> (i % CHAR_BIT)) & 1;
|
||
+}
|
||
+
|
||
+static size_t
|
||
+hash_int (const void *x, size_t tablesize)
|
||
+{
|
||
+#ifdef UINTPTR_MAX
|
||
+ uintptr_t y = (uintptr_t) x;
|
||
+#else
|
||
+ size_t y = (size_t) x;
|
||
+#endif
|
||
+ return y % tablesize;
|
||
+}
|
||
+
|
||
+static bool
|
||
+hash_compare_ints (void const *x, void const *y)
|
||
+{
|
||
+ return (x == y) ? true : false;
|
||
+}
|
||
+
|
||
+static bool
|
||
+is_range_start_index (size_t i)
|
||
+{
|
||
+ return hash_lookup (range_start_ht, (void *) i) ? true : false;
|
||
+}
|
||
+
|
||
+/* Return nonzero if the K'th field or byte is printable.
|
||
+ When returning nonzero, if RANGE_START is non-NULL,
|
||
+ set *RANGE_START to true if K is the beginning of a range, and to
|
||
+ false otherwise. */
|
||
+
|
||
+static bool
|
||
+print_kth (size_t k, bool *range_start)
|
||
+{
|
||
+ bool k_selected
|
||
+ = ((0 < eol_range_start && eol_range_start <= k)
|
||
+ || (k <= max_range_endpoint && is_printable_field (k)));
|
||
+
|
||
+ bool is_selected = k_selected ^ complement;
|
||
+ if (range_start && is_selected)
|
||
+ *range_start = is_range_start_index (k);
|
||
+
|
||
+ return is_selected;
|
||
+}
|
||
+
|
||
+/* Comparison function for qsort to order the list of
|
||
+ struct range_pairs. */
|
||
+static int
|
||
+compare_ranges (const void *a, const void *b)
|
||
+{
|
||
+ int a_start = ((const struct range_pair *) a)->lo;
|
||
+ int b_start = ((const struct range_pair *) b)->lo;
|
||
+ return a_start < b_start ? -1 : a_start > b_start;
|
||
+}
|
||
+
|
||
+/* Given the list of field or byte range specifications FIELDSTR, set
|
||
+ MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD
|
||
+ array. If there is a right-open-ended range, set EOL_RANGE_START
|
||
+ to its starting index. FIELDSTR should be composed of one or more
|
||
+ numbers or ranges of numbers, separated by blanks or commas.
|
||
+ Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n'
|
||
+ through end of line. Return true if FIELDSTR contains at least
|
||
+ one field specification, false otherwise. */
|
||
+
|
||
+/* FIXME-someday: What if the user wants to cut out the 1,000,000-th
|
||
+ field of some huge input file? This function shouldn't have to
|
||
+ allocate a table of a million bits just so we can test every
|
||
+ field < 10^6 with an array dereference. Instead, consider using
|
||
+ an adaptive approach: if the range of selected fields is too large,
|
||
+ but only a few fields/byte-offsets are actually selected, use a
|
||
+ hash table. If the range of selected fields is too large, and
|
||
+ too many are selected, then resort to using the range-pairs (the
|
||
+ `rp' array) directly. */
|
||
+
|
||
+static bool
|
||
+set_fields (const char *fieldstr)
|
||
+{
|
||
+ size_t initial = 1; /* Value of first number in a range. */
|
||
+ size_t value = 0; /* If nonzero, a number being accumulated. */
|
||
+ bool lhs_specified = false;
|
||
+ bool rhs_specified = false;
|
||
+ bool dash_found = false; /* True if a '-' is found in this field. */
|
||
+ bool field_found = false; /* True if at least one field spec
|
||
+ has been processed. */
|
||
+
|
||
+ struct range_pair *rp = NULL;
|
||
+ size_t n_rp = 0;
|
||
+ size_t n_rp_allocated = 0;
|
||
+ size_t i;
|
||
+ bool in_digits = false;
|
||
+
|
||
+ /* Collect and store in RP the range end points.
|
||
+ It also sets EOL_RANGE_START if appropriate. */
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ if (*fieldstr == '-')
|
||
+ {
|
||
+ in_digits = false;
|
||
+ /* Starting a range. */
|
||
+ if (dash_found)
|
||
+ FATAL_ERROR (_("invalid byte or field list"));
|
||
+ dash_found = true;
|
||
+ fieldstr++;
|
||
+
|
||
+ initial = (lhs_specified ? value : 1);
|
||
+ value = 0;
|
||
+ }
|
||
+ else if (*fieldstr == ',' ||
|
||
+ isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
|
||
+ {
|
||
+ in_digits = false;
|
||
+ /* Ending the string, or this field/byte sublist. */
|
||
+ if (dash_found)
|
||
+ {
|
||
+ dash_found = false;
|
||
+
|
||
+ if (!lhs_specified && !rhs_specified)
|
||
+ FATAL_ERROR (_("invalid range with no endpoint: -"));
|
||
+
|
||
+ /* A range. Possibilities: -n, m-n, n-.
|
||
+ In any case, `initial' contains the start of the range. */
|
||
+ if (!rhs_specified)
|
||
+ {
|
||
+ /* `n-'. From `initial' to end of line. */
|
||
+ eol_range_start = initial;
|
||
+ field_found = true;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* `m-n' or `-n' (1-n). */
|
||
+ if (value < initial)
|
||
+ FATAL_ERROR (_("invalid decreasing range"));
|
||
+
|
||
+ /* Is there already a range going to end of line? */
|
||
+ if (eol_range_start != 0)
|
||
+ {
|
||
+ /* Yes. Is the new sequence already contained
|
||
+ in the old one? If so, no processing is
|
||
+ necessary. */
|
||
+ if (initial < eol_range_start)
|
||
+ {
|
||
+ /* No, the new sequence starts before the
|
||
+ old. Does the old range going to end of line
|
||
+ extend into the new range? */
|
||
+ if (eol_range_start <= value)
|
||
+ {
|
||
+ /* Yes. Simply move the end of line marker. */
|
||
+ eol_range_start = initial;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* No. A simple range, before and disjoint from
|
||
+ the range going to end of line. Fill it. */
|
||
+ ADD_RANGE_PAIR (rp, initial, value);
|
||
+ }
|
||
+
|
||
+ /* In any case, some fields were selected. */
|
||
+ field_found = true;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* There is no range going to end of line. */
|
||
+ ADD_RANGE_PAIR (rp, initial, value);
|
||
+ field_found = true;
|
||
+ }
|
||
+ value = 0;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* A simple field number, not a range. */
|
||
+ ADD_RANGE_PAIR (rp, value, value);
|
||
+ value = 0;
|
||
+ field_found = true;
|
||
+ }
|
||
+
|
||
+ if (*fieldstr == '\0')
|
||
+ {
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ fieldstr++;
|
||
+ lhs_specified = false;
|
||
+ rhs_specified = false;
|
||
+ }
|
||
+ else if (ISDIGIT (*fieldstr))
|
||
+ {
|
||
+ /* Record beginning of digit string, in case we have to
|
||
+ complain about it. */
|
||
+ static char const *num_start;
|
||
+ if (!in_digits || !num_start)
|
||
+ num_start = fieldstr;
|
||
+ in_digits = true;
|
||
+
|
||
+ if (dash_found)
|
||
+ rhs_specified = 1;
|
||
+ else
|
||
+ lhs_specified = 1;
|
||
+
|
||
+ /* Detect overflow. */
|
||
+ if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t))
|
||
+ {
|
||
+ /* In case the user specified -c$(echo 2^64|bc),22,
|
||
+ complain only about the first number. */
|
||
+ /* Determine the length of the offending number. */
|
||
+ size_t len = strspn (num_start, "0123456789");
|
||
+ char *bad_num = xstrndup (num_start, len);
|
||
+ if (operating_mode == byte_mode)
|
||
+ error (0, 0,
|
||
+ _("byte offset %s is too large"), quote (bad_num));
|
||
+ else
|
||
+ error (0, 0,
|
||
+ _("field number %s is too large"), quote (bad_num));
|
||
+ free (bad_num);
|
||
+ exit (EXIT_FAILURE);
|
||
+ }
|
||
+
|
||
+ fieldstr++;
|
||
+ }
|
||
+ else
|
||
+ FATAL_ERROR (_("invalid byte or field list"));
|
||
+ }
|
||
+
|
||
+ max_range_endpoint = 0;
|
||
+ for (i = 0; i < n_rp; i++)
|
||
+ {
|
||
+ if (rp[i].hi > max_range_endpoint)
|
||
+ max_range_endpoint = rp[i].hi;
|
||
+ }
|
||
+
|
||
+ /* Allocate an array large enough so that it may be indexed by
|
||
+ the field numbers corresponding to all finite ranges
|
||
+ (i.e. `2-6' or `-4', but not `5-') in FIELDSTR. */
|
||
+
|
||
+ printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
|
||
+
|
||
+ qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
|
||
+
|
||
+ /* Set the array entries corresponding to integers in the ranges of RP. */
|
||
+ for (i = 0; i < n_rp; i++)
|
||
+ {
|
||
+ size_t j;
|
||
+ size_t rsi_candidate;
|
||
+
|
||
+ /* Record the range-start indices, i.e., record each start
|
||
+ index that is not part of any other (lo..hi] range. */
|
||
+ rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
|
||
+ if (output_delimiter_specified
|
||
+ && !is_printable_field (rsi_candidate))
|
||
+ mark_range_start (rsi_candidate);
|
||
+
|
||
+ for (j = rp[i].lo; j <= rp[i].hi; j++)
|
||
+ mark_printable_field (j);
|
||
+ }
|
||
+
|
||
+ if (output_delimiter_specified
|
||
+ && !complement
|
||
+ && eol_range_start && !is_printable_field (eol_range_start))
|
||
+ mark_range_start (eol_range_start);
|
||
+
|
||
+ free (rp);
|
||
+
|
||
+ return field_found;
|
||
+}
|
||
+
|
||
+/* Read from stream STREAM, printing to standard output any selected bytes. */
|
||
+
|
||
+static void
|
||
+cut_bytes (FILE *stream)
|
||
+{
|
||
+ size_t byte_idx; /* Number of bytes in the line so far. */
|
||
+ /* Whether to begin printing delimiters between ranges for the current line.
|
||
+ Set after we've begun printing data corresponding to the first range. */
|
||
+ bool print_delimiter;
|
||
+
|
||
+ byte_idx = 0;
|
||
+ print_delimiter = false;
|
||
+ while (1)
|
||
+ {
|
||
+ int c; /* Each character from the file. */
|
||
+
|
||
+ c = getc (stream);
|
||
+
|
||
+ if (c == '\n')
|
||
+ {
|
||
+ putchar ('\n');
|
||
+ byte_idx = 0;
|
||
+ print_delimiter = false;
|
||
+ }
|
||
+ else if (c == EOF)
|
||
+ {
|
||
+ if (byte_idx > 0)
|
||
+ putchar ('\n');
|
||
+ break;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ bool range_start;
|
||
+ bool *rs = output_delimiter_specified ? &range_start : NULL;
|
||
+ if (print_kth (++byte_idx, rs))
|
||
+ {
|
||
+ if (rs && *rs && print_delimiter)
|
||
+ {
|
||
+ fwrite (output_delimiter_string, sizeof (char),
|
||
+ output_delimiter_length, stdout);
|
||
+ }
|
||
+ print_delimiter = true;
|
||
+ putchar (c);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Read from stream STREAM, printing to standard output any selected fields. */
|
||
+
|
||
+static void
|
||
+cut_fields (FILE *stream)
|
||
+{
|
||
+ int c;
|
||
+ size_t field_idx = 1;
|
||
+ bool found_any_selected_field = false;
|
||
+ bool buffer_first_field;
|
||
+
|
||
+ c = getc (stream);
|
||
+ if (c == EOF)
|
||
+ return;
|
||
+
|
||
+ ungetc (c, stream);
|
||
+
|
||
+ /* To support the semantics of the -s flag, we may have to buffer
|
||
+ all of the first field to determine whether it is `delimited.'
|
||
+ But that is unnecessary if all non-delimited lines must be printed
|
||
+ and the first field has been selected, or if non-delimited lines
|
||
+ must be suppressed and the first field has *not* been selected.
|
||
+ That is because a non-delimited line has exactly one field. */
|
||
+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
|
||
+
|
||
+ while (1)
|
||
+ {
|
||
+ if (field_idx == 1 && buffer_first_field)
|
||
+ {
|
||
+ ssize_t len;
|
||
+ size_t n_bytes;
|
||
+
|
||
+ len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
|
||
+ GETNLINE_NO_LIMIT, delim, '\n', stream);
|
||
+ if (len < 0)
|
||
+ {
|
||
+ free (field_1_buffer);
|
||
+ field_1_buffer = NULL;
|
||
+ if (ferror (stream) || feof (stream))
|
||
+ break;
|
||
+ xalloc_die ();
|
||
+ }
|
||
+
|
||
+ n_bytes = len;
|
||
+ assert (n_bytes != 0);
|
||
+
|
||
+ /* If the first field extends to the end of line (it is not
|
||
+ delimited) and we are printing all non-delimited lines,
|
||
+ print this one. */
|
||
+ if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
|
||
+ {
|
||
+ if (suppress_non_delimited)
|
||
+ {
|
||
+ /* Empty. */
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
|
||
+ /* Make sure the output line is newline terminated. */
|
||
+ if (field_1_buffer[n_bytes - 1] != '\n')
|
||
+ putchar ('\n');
|
||
+ }
|
||
+ continue;
|
||
+ }
|
||
+ if (print_kth (1, NULL))
|
||
+ {
|
||
+ /* Print the field, but not the trailing delimiter. */
|
||
+ fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
|
||
+ found_any_selected_field = true;
|
||
+ }
|
||
+ ++field_idx;
|
||
+ }
|
||
+
|
||
+ if (c != EOF)
|
||
+ {
|
||
+ if (print_kth (field_idx, NULL))
|
||
+ {
|
||
+ if (found_any_selected_field)
|
||
+ {
|
||
+ fwrite (output_delimiter_string, sizeof (char),
|
||
+ output_delimiter_length, stdout);
|
||
+ }
|
||
+ found_any_selected_field = true;
|
||
+
|
||
+ while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
|
||
+ {
|
||
+ putchar (c);
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
|
||
+ {
|
||
+ /* Empty. */
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (c == '\n')
|
||
+ {
|
||
+ c = getc (stream);
|
||
+ if (c != EOF)
|
||
+ {
|
||
+ ungetc (c, stream);
|
||
+ c = '\n';
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (c == delim)
|
||
+ ++field_idx;
|
||
+ else if (c == '\n' || c == EOF)
|
||
+ {
|
||
+ if (found_any_selected_field
|
||
+ || !(suppress_non_delimited && field_idx == 1))
|
||
+ putchar ('\n');
|
||
+ if (c == EOF)
|
||
+ break;
|
||
+ field_idx = 1;
|
||
+ found_any_selected_field = false;
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+static void
|
||
+cut_stream (FILE *stream)
|
||
+{
|
||
+ if (operating_mode == byte_mode)
|
||
+ cut_bytes (stream);
|
||
+ else
|
||
+ cut_fields (stream);
|
||
+}
|
||
+
|
||
+/* Process file FILE to standard output.
|
||
+ Return true if successful. */
|
||
+
|
||
+static bool
|
||
+cut_file (char const *file)
|
||
+{
|
||
+ FILE *stream;
|
||
+
|
||
+ if (STREQ (file, "-"))
|
||
+ {
|
||
+ have_read_stdin = true;
|
||
+ stream = stdin;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ stream = fopen (file, "r");
|
||
+ if (stream == NULL)
|
||
+ {
|
||
+ error (0, errno, "%s", file);
|
||
+ return false;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ cut_stream (stream);
|
||
+
|
||
+ if (ferror (stream))
|
||
+ {
|
||
+ error (0, errno, "%s", file);
|
||
+ return false;
|
||
+ }
|
||
+ if (STREQ (file, "-"))
|
||
+ clearerr (stream); /* Also clear EOF. */
|
||
+ else if (fclose (stream) == EOF)
|
||
+ {
|
||
+ error (0, errno, "%s", file);
|
||
+ return false;
|
||
+ }
|
||
+ return true;
|
||
+}
|
||
+
|
||
+int
|
||
+main (int argc, char **argv)
|
||
+{
|
||
+ int optc;
|
||
+ bool ok;
|
||
+ bool delim_specified = false;
|
||
+ char *spec_list_string IF_LINT(= NULL);
|
||
+
|
||
+ initialize_main (&argc, &argv);
|
||
+ set_program_name (argv[0]);
|
||
+ setlocale (LC_ALL, "");
|
||
+ bindtextdomain (PACKAGE, LOCALEDIR);
|
||
+ textdomain (PACKAGE);
|
||
+
|
||
+ atexit (close_stdout);
|
||
+
|
||
+ operating_mode = undefined_mode;
|
||
+
|
||
+ /* By default, all non-delimited lines are printed. */
|
||
+ suppress_non_delimited = false;
|
||
+
|
||
+ delim = '\0';
|
||
+ have_read_stdin = false;
|
||
+
|
||
+ while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
|
||
+ {
|
||
+ switch (optc)
|
||
+ {
|
||
+ case 'b':
|
||
+ case 'c':
|
||
+ /* Build the byte list. */
|
||
+ if (operating_mode != undefined_mode)
|
||
+ FATAL_ERROR (_("only one type of list may be specified"));
|
||
+ operating_mode = byte_mode;
|
||
+ spec_list_string = optarg;
|
||
+ break;
|
||
+
|
||
+ case 'f':
|
||
+ /* Build the field list. */
|
||
+ if (operating_mode != undefined_mode)
|
||
+ FATAL_ERROR (_("only one type of list may be specified"));
|
||
+ operating_mode = field_mode;
|
||
+ spec_list_string = optarg;
|
||
+ break;
|
||
+
|
||
+ case 'd':
|
||
+ /* New delimiter. */
|
||
+ /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
|
||
+ if (optarg[0] != '\0' && optarg[1] != '\0')
|
||
+ FATAL_ERROR (_("the delimiter must be a single character"));
|
||
+ delim = optarg[0];
|
||
+ delim_specified = true;
|
||
+ break;
|
||
+
|
||
+ case OUTPUT_DELIMITER_OPTION:
|
||
+ output_delimiter_specified = true;
|
||
+ /* Interpret --output-delimiter='' to mean
|
||
+ `use the NUL byte as the delimiter.' */
|
||
+ output_delimiter_length = (optarg[0] == '\0'
|
||
+ ? 1 : strlen (optarg));
|
||
+ output_delimiter_string = xstrdup (optarg);
|
||
+ break;
|
||
+
|
||
+ case 'n':
|
||
+ break;
|
||
+
|
||
+ case 's':
|
||
+ suppress_non_delimited = true;
|
||
+ break;
|
||
+
|
||
+ case COMPLEMENT_OPTION:
|
||
+ complement = true;
|
||
+ break;
|
||
+
|
||
+ case_GETOPT_HELP_CHAR;
|
||
+
|
||
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||
+
|
||
+ default:
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (operating_mode == undefined_mode)
|
||
+ FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
|
||
+
|
||
+ if (delim != '\0' && operating_mode != field_mode)
|
||
+ FATAL_ERROR (_("an input delimiter may be specified only\
|
||
+ when operating on fields"));
|
||
+
|
||
+ if (suppress_non_delimited && operating_mode != field_mode)
|
||
+ FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
|
||
+\tonly when operating on fields"));
|
||
+
|
||
+ if (output_delimiter_specified)
|
||
+ {
|
||
+ range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY,
|
||
+ NULL, hash_int,
|
||
+ hash_compare_ints, NULL);
|
||
+ if (range_start_ht == NULL)
|
||
+ xalloc_die ();
|
||
+
|
||
+ }
|
||
+
|
||
+ if (! set_fields (spec_list_string))
|
||
+ {
|
||
+ if (operating_mode == field_mode)
|
||
+ FATAL_ERROR (_("missing list of fields"));
|
||
+ else
|
||
+ FATAL_ERROR (_("missing list of positions"));
|
||
+ }
|
||
+
|
||
+ if (!delim_specified)
|
||
+ delim = '\t';
|
||
+
|
||
+ if (output_delimiter_string == NULL)
|
||
+ {
|
||
+ static char dummy[2];
|
||
+ dummy[0] = delim;
|
||
+ dummy[1] = '\0';
|
||
+ output_delimiter_string = dummy;
|
||
+ output_delimiter_length = 1;
|
||
+ }
|
||
+
|
||
+ if (optind == argc)
|
||
+ ok = cut_file ("-");
|
||
+ else
|
||
+ for (ok = true; optind < argc; optind++)
|
||
+ ok &= cut_file (argv[optind]);
|
||
+
|
||
+ if (range_start_ht)
|
||
+ hash_free (range_start_ht);
|
||
+
|
||
+ if (have_read_stdin && fclose (stdin) == EOF)
|
||
+ {
|
||
+ error (0, errno, "-");
|
||
+ ok = false;
|
||
+ }
|
||
+
|
||
+ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
|
||
+}
|
||
diff -urNp coreutils-8.0-orig/src/expand.c coreutils-8.0/src/expand.c
|
||
--- coreutils-8.0-orig/src/expand.c 2009-09-29 15:27:54.000000000 +0200
|
||
+++ coreutils-8.0/src/expand.c 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -37,11 +37,28 @@
|
||
#include <stdio.h>
|
||
#include <getopt.h>
|
||
#include <sys/types.h>
|
||
+
|
||
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
+
|
||
#include "system.h"
|
||
#include "error.h"
|
||
#include "quote.h"
|
||
#include "xstrndup.h"
|
||
|
||
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
|
||
+ installation; work around this configuration error. */
|
||
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
|
||
+# define MB_LEN_MAX 16
|
||
+#endif
|
||
+
|
||
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
|
||
+#if HAVE_MBRTOWC && defined mbstate_t
|
||
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
|
||
+#endif
|
||
+
|
||
/* The official name of this program (e.g., no `g' prefix). */
|
||
#define PROGRAM_NAME "expand"
|
||
|
||
@@ -357,6 +374,142 @@ expand (void)
|
||
}
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static void
|
||
+expand_multibyte (void)
|
||
+{
|
||
+ FILE *fp; /* Input strem. */
|
||
+ mbstate_t i_state; /* Current shift state of the input stream. */
|
||
+ mbstate_t i_state_bak; /* Back up the I_STATE. */
|
||
+ mbstate_t o_state; /* Current shift state of the output stream. */
|
||
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
|
||
+ char *bufpos; /* Next read position of BUF. */
|
||
+ size_t buflen = 0; /* The length of the byte sequence in buf. */
|
||
+ wchar_t wc; /* A gotten wide character. */
|
||
+ size_t mblength; /* The byte size of a multibyte character
|
||
+ which shows as same character as WC. */
|
||
+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */
|
||
+ int column = 0; /* Column on screen of the next char. */
|
||
+ int next_tab_column; /* Column the next tab stop is on. */
|
||
+ int convert = 1; /* If nonzero, perform translations. */
|
||
+
|
||
+ fp = next_file ((FILE *) NULL);
|
||
+ if (fp == NULL)
|
||
+ return;
|
||
+
|
||
+ memset (&o_state, '\0', sizeof(mbstate_t));
|
||
+ memset (&i_state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ /* Refill the buffer BUF. */
|
||
+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
|
||
+ {
|
||
+ memmove (buf, bufpos, buflen);
|
||
+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
|
||
+ bufpos = buf;
|
||
+ }
|
||
+
|
||
+ /* No character is left in BUF. */
|
||
+ if (buflen < 1)
|
||
+ {
|
||
+ fp = next_file (fp);
|
||
+
|
||
+ if (fp == NULL)
|
||
+ break; /* No more files. */
|
||
+ else
|
||
+ {
|
||
+ memset (&i_state, '\0', sizeof(mbstate_t));
|
||
+ continue;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* Get a wide character. */
|
||
+ i_state_bak = i_state;
|
||
+ mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
|
||
+
|
||
+ switch (mblength)
|
||
+ {
|
||
+ case (size_t)-1: /* illegal byte sequence. */
|
||
+ case (size_t)-2:
|
||
+ mblength = 1;
|
||
+ i_state = i_state_bak;
|
||
+ if (convert)
|
||
+ {
|
||
+ ++column;
|
||
+ if (convert_entire_line == 0)
|
||
+ convert = 0;
|
||
+ }
|
||
+ putchar (*bufpos);
|
||
+ break;
|
||
+
|
||
+ case 0: /* null. */
|
||
+ mblength = 1;
|
||
+ if (convert && convert_entire_line == 0)
|
||
+ convert = 0;
|
||
+ putchar ('\0');
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ if (wc == L'\n') /* LF. */
|
||
+ {
|
||
+ tab_index = 0;
|
||
+ column = 0;
|
||
+ convert = 1;
|
||
+ putchar ('\n');
|
||
+ }
|
||
+ else if (wc == L'\t' && convert) /* Tab. */
|
||
+ {
|
||
+ if (tab_size == 0)
|
||
+ {
|
||
+ /* Do not let tab_index == first_free_tab;
|
||
+ stop when it is 1 less. */
|
||
+ while (tab_index < first_free_tab - 1
|
||
+ && column >= tab_list[tab_index])
|
||
+ tab_index++;
|
||
+ next_tab_column = tab_list[tab_index];
|
||
+ if (tab_index < first_free_tab - 1)
|
||
+ tab_index++;
|
||
+ if (column >= next_tab_column)
|
||
+ next_tab_column = column + 1;
|
||
+ }
|
||
+ else
|
||
+ next_tab_column = column + tab_size - column % tab_size;
|
||
+
|
||
+ while (column < next_tab_column)
|
||
+ {
|
||
+ putchar (' ');
|
||
+ ++column;
|
||
+ }
|
||
+ }
|
||
+ else /* Others. */
|
||
+ {
|
||
+ if (convert)
|
||
+ {
|
||
+ if (wc == L'\b')
|
||
+ {
|
||
+ if (column > 0)
|
||
+ --column;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ int width; /* The width of WC. */
|
||
+
|
||
+ width = wcwidth (wc);
|
||
+ column += (width > 0) ? width : 0;
|
||
+ if (convert_entire_line == 0)
|
||
+ convert = 0;
|
||
+ }
|
||
+ }
|
||
+ fwrite (bufpos, sizeof(char), mblength, stdout);
|
||
+ }
|
||
+ }
|
||
+ buflen -= mblength;
|
||
+ bufpos += mblength;
|
||
+ }
|
||
+}
|
||
+#endif
|
||
+
|
||
int
|
||
main (int argc, char **argv)
|
||
{
|
||
@@ -421,7 +574,12 @@ main (int argc, char **argv)
|
||
|
||
file_list = (optind < argc ? &argv[optind] : stdin_argv);
|
||
|
||
- expand ();
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ expand_multibyte ();
|
||
+ else
|
||
+#endif
|
||
+ expand ();
|
||
|
||
if (have_read_stdin && fclose (stdin) != 0)
|
||
error (EXIT_FAILURE, errno, "-");
|
||
diff -urNp coreutils-8.0-orig/src/expand.c.orig coreutils-8.0/src/expand.c.orig
|
||
--- coreutils-8.0-orig/src/expand.c.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/src/expand.c.orig 2009-09-29 15:27:54.000000000 +0200
|
||
@@ -0,0 +1,430 @@
|
||
+/* expand - convert tabs to spaces
|
||
+ Copyright (C) 89, 91, 1995-2006, 2008-2009 Free Software Foundation, Inc.
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation, either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||
+
|
||
+/* By default, convert all tabs to spaces.
|
||
+ Preserves backspace characters in the output; they decrement the
|
||
+ column count for tab calculations.
|
||
+ The default action is equivalent to -8.
|
||
+
|
||
+ Options:
|
||
+ --tabs=tab1[,tab2[,...]]
|
||
+ -t tab1[,tab2[,...]]
|
||
+ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
|
||
+ columns apart instead of the default 8. Otherwise,
|
||
+ set the tabs at columns tab1, tab2, etc. (numbered from
|
||
+ 0); replace any tabs beyond the tab stops given with
|
||
+ single spaces.
|
||
+ --initial
|
||
+ -i Only convert initial tabs on each line to spaces.
|
||
+
|
||
+ David MacKenzie <djm@gnu.ai.mit.edu> */
|
||
+
|
||
+#include <config.h>
|
||
+
|
||
+#include <stdio.h>
|
||
+#include <getopt.h>
|
||
+#include <sys/types.h>
|
||
+#include "system.h"
|
||
+#include "error.h"
|
||
+#include "quote.h"
|
||
+#include "xstrndup.h"
|
||
+
|
||
+/* The official name of this program (e.g., no `g' prefix). */
|
||
+#define PROGRAM_NAME "expand"
|
||
+
|
||
+#define AUTHORS proper_name ("David MacKenzie")
|
||
+
|
||
+/* If true, convert blanks even after nonblank characters have been
|
||
+ read on the line. */
|
||
+static bool convert_entire_line;
|
||
+
|
||
+/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
|
||
+static uintmax_t tab_size;
|
||
+
|
||
+/* Array of the explicit column numbers of the tab stops;
|
||
+ after `tab_list' is exhausted, each additional tab is replaced
|
||
+ by a space. The first column is column 0. */
|
||
+static uintmax_t *tab_list;
|
||
+
|
||
+/* The number of allocated entries in `tab_list'. */
|
||
+static size_t n_tabs_allocated;
|
||
+
|
||
+/* The index of the first invalid element of `tab_list',
|
||
+ where the next element can be added. */
|
||
+static size_t first_free_tab;
|
||
+
|
||
+/* Null-terminated array of input filenames. */
|
||
+static char **file_list;
|
||
+
|
||
+/* Default for `file_list' if no files are given on the command line. */
|
||
+static char *stdin_argv[] =
|
||
+{
|
||
+ (char *) "-", NULL
|
||
+};
|
||
+
|
||
+/* True if we have ever read standard input. */
|
||
+static bool have_read_stdin;
|
||
+
|
||
+/* The desired exit status. */
|
||
+static int exit_status;
|
||
+
|
||
+static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::";
|
||
+
|
||
+static struct option const longopts[] =
|
||
+{
|
||
+ {"tabs", required_argument, NULL, 't'},
|
||
+ {"initial", no_argument, NULL, 'i'},
|
||
+ {GETOPT_HELP_OPTION_DECL},
|
||
+ {GETOPT_VERSION_OPTION_DECL},
|
||
+ {NULL, 0, NULL, 0}
|
||
+};
|
||
+
|
||
+void
|
||
+usage (int status)
|
||
+{
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
|
||
+ program_name);
|
||
+ else
|
||
+ {
|
||
+ printf (_("\
|
||
+Usage: %s [OPTION]... [FILE]...\n\
|
||
+"),
|
||
+ program_name);
|
||
+ fputs (_("\
|
||
+Convert tabs in each FILE to spaces, writing to standard output.\n\
|
||
+With no FILE, or when FILE is -, read standard input.\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Mandatory arguments to long options are mandatory for short options too.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -i, --initial do not convert tabs after non blanks\n\
|
||
+ -t, --tabs=NUMBER have tabs NUMBER characters apart, not 8\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -t, --tabs=LIST use comma separated list of explicit tab positions\n\
|
||
+"), stdout);
|
||
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||
+ emit_ancillary_info ();
|
||
+ }
|
||
+ exit (status);
|
||
+}
|
||
+
|
||
+/* Add tab stop TABVAL to the end of `tab_list'. */
|
||
+
|
||
+static void
|
||
+add_tab_stop (uintmax_t tabval)
|
||
+{
|
||
+ if (first_free_tab == n_tabs_allocated)
|
||
+ tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
|
||
+ tab_list[first_free_tab++] = tabval;
|
||
+}
|
||
+
|
||
+/* Add the comma or blank separated list of tab stops STOPS
|
||
+ to the list of tab stops. */
|
||
+
|
||
+static void
|
||
+parse_tab_stops (char const *stops)
|
||
+{
|
||
+ bool have_tabval = false;
|
||
+ uintmax_t tabval IF_LINT (= 0);
|
||
+ char const *num_start IF_LINT (= NULL);
|
||
+ bool ok = true;
|
||
+
|
||
+ for (; *stops; stops++)
|
||
+ {
|
||
+ if (*stops == ',' || isblank (to_uchar (*stops)))
|
||
+ {
|
||
+ if (have_tabval)
|
||
+ add_tab_stop (tabval);
|
||
+ have_tabval = false;
|
||
+ }
|
||
+ else if (ISDIGIT (*stops))
|
||
+ {
|
||
+ if (!have_tabval)
|
||
+ {
|
||
+ tabval = 0;
|
||
+ have_tabval = true;
|
||
+ num_start = stops;
|
||
+ }
|
||
+
|
||
+ /* Detect overflow. */
|
||
+ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
|
||
+ {
|
||
+ size_t len = strspn (num_start, "0123456789");
|
||
+ char *bad_num = xstrndup (num_start, len);
|
||
+ error (0, 0, _("tab stop is too large %s"), quote (bad_num));
|
||
+ free (bad_num);
|
||
+ ok = false;
|
||
+ stops = num_start + len - 1;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ error (0, 0, _("tab size contains invalid character(s): %s"),
|
||
+ quote (stops));
|
||
+ ok = false;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (!ok)
|
||
+ exit (EXIT_FAILURE);
|
||
+
|
||
+ if (have_tabval)
|
||
+ add_tab_stop (tabval);
|
||
+}
|
||
+
|
||
+/* Check that the list of tab stops TABS, with ENTRIES entries,
|
||
+ contains only nonzero, ascending values. */
|
||
+
|
||
+static void
|
||
+validate_tab_stops (uintmax_t const *tabs, size_t entries)
|
||
+{
|
||
+ uintmax_t prev_tab = 0;
|
||
+ size_t i;
|
||
+
|
||
+ for (i = 0; i < entries; i++)
|
||
+ {
|
||
+ if (tabs[i] == 0)
|
||
+ error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
|
||
+ if (tabs[i] <= prev_tab)
|
||
+ error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
|
||
+ prev_tab = tabs[i];
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Close the old stream pointer FP if it is non-NULL,
|
||
+ and return a new one opened to read the next input file.
|
||
+ Open a filename of `-' as the standard input.
|
||
+ Return NULL if there are no more input files. */
|
||
+
|
||
+static FILE *
|
||
+next_file (FILE *fp)
|
||
+{
|
||
+ static char *prev_file;
|
||
+ char *file;
|
||
+
|
||
+ if (fp)
|
||
+ {
|
||
+ if (ferror (fp))
|
||
+ {
|
||
+ error (0, errno, "%s", prev_file);
|
||
+ exit_status = EXIT_FAILURE;
|
||
+ }
|
||
+ if (STREQ (prev_file, "-"))
|
||
+ clearerr (fp); /* Also clear EOF. */
|
||
+ else if (fclose (fp) != 0)
|
||
+ {
|
||
+ error (0, errno, "%s", prev_file);
|
||
+ exit_status = EXIT_FAILURE;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ while ((file = *file_list++) != NULL)
|
||
+ {
|
||
+ if (STREQ (file, "-"))
|
||
+ {
|
||
+ have_read_stdin = true;
|
||
+ prev_file = file;
|
||
+ return stdin;
|
||
+ }
|
||
+ fp = fopen (file, "r");
|
||
+ if (fp)
|
||
+ {
|
||
+ prev_file = file;
|
||
+ return fp;
|
||
+ }
|
||
+ error (0, errno, "%s", file);
|
||
+ exit_status = EXIT_FAILURE;
|
||
+ }
|
||
+ return NULL;
|
||
+}
|
||
+
|
||
+/* Change tabs to spaces, writing to stdout.
|
||
+ Read each file in `file_list', in order. */
|
||
+
|
||
+static void
|
||
+expand (void)
|
||
+{
|
||
+ /* Input stream. */
|
||
+ FILE *fp = next_file (NULL);
|
||
+
|
||
+ if (!fp)
|
||
+ return;
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ /* Input character, or EOF. */
|
||
+ int c;
|
||
+
|
||
+ /* If true, perform translations. */
|
||
+ bool convert = true;
|
||
+
|
||
+
|
||
+ /* The following variables have valid values only when CONVERT
|
||
+ is true: */
|
||
+
|
||
+ /* Column of next input character. */
|
||
+ uintmax_t column = 0;
|
||
+
|
||
+ /* Index in TAB_LIST of next tab stop to examine. */
|
||
+ size_t tab_index = 0;
|
||
+
|
||
+
|
||
+ /* Convert a line of text. */
|
||
+
|
||
+ do
|
||
+ {
|
||
+ while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
|
||
+ continue;
|
||
+
|
||
+ if (convert)
|
||
+ {
|
||
+ if (c == '\t')
|
||
+ {
|
||
+ /* Column the next input tab stop is on. */
|
||
+ uintmax_t next_tab_column;
|
||
+
|
||
+ if (tab_size)
|
||
+ next_tab_column = column + (tab_size - column % tab_size);
|
||
+ else
|
||
+ for (;;)
|
||
+ if (tab_index == first_free_tab)
|
||
+ {
|
||
+ next_tab_column = column + 1;
|
||
+ break;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ uintmax_t tab = tab_list[tab_index++];
|
||
+ if (column < tab)
|
||
+ {
|
||
+ next_tab_column = tab;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (next_tab_column < column)
|
||
+ error (EXIT_FAILURE, 0, _("input line is too long"));
|
||
+
|
||
+ while (++column < next_tab_column)
|
||
+ if (putchar (' ') < 0)
|
||
+ error (EXIT_FAILURE, errno, _("write error"));
|
||
+
|
||
+ c = ' ';
|
||
+ }
|
||
+ else if (c == '\b')
|
||
+ {
|
||
+ /* Go back one column, and force recalculation of the
|
||
+ next tab stop. */
|
||
+ column -= !!column;
|
||
+ tab_index -= !!tab_index;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ column++;
|
||
+ if (!column)
|
||
+ error (EXIT_FAILURE, 0, _("input line is too long"));
|
||
+ }
|
||
+
|
||
+ convert &= convert_entire_line || !! isblank (c);
|
||
+ }
|
||
+
|
||
+ if (c < 0)
|
||
+ return;
|
||
+
|
||
+ if (putchar (c) < 0)
|
||
+ error (EXIT_FAILURE, errno, _("write error"));
|
||
+ }
|
||
+ while (c != '\n');
|
||
+ }
|
||
+}
|
||
+
|
||
+int
|
||
+main (int argc, char **argv)
|
||
+{
|
||
+ int c;
|
||
+
|
||
+ initialize_main (&argc, &argv);
|
||
+ set_program_name (argv[0]);
|
||
+ setlocale (LC_ALL, "");
|
||
+ bindtextdomain (PACKAGE, LOCALEDIR);
|
||
+ textdomain (PACKAGE);
|
||
+
|
||
+ atexit (close_stdout);
|
||
+
|
||
+ have_read_stdin = false;
|
||
+ exit_status = EXIT_SUCCESS;
|
||
+ convert_entire_line = true;
|
||
+ tab_list = NULL;
|
||
+ first_free_tab = 0;
|
||
+
|
||
+ while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
|
||
+ {
|
||
+ switch (c)
|
||
+ {
|
||
+ case 'i':
|
||
+ convert_entire_line = false;
|
||
+ break;
|
||
+
|
||
+ case 't':
|
||
+ parse_tab_stops (optarg);
|
||
+ break;
|
||
+
|
||
+ case '0': case '1': case '2': case '3': case '4':
|
||
+ case '5': case '6': case '7': case '8': case '9':
|
||
+ if (optarg)
|
||
+ parse_tab_stops (optarg - 1);
|
||
+ else
|
||
+ {
|
||
+ char tab_stop[2];
|
||
+ tab_stop[0] = c;
|
||
+ tab_stop[1] = '\0';
|
||
+ parse_tab_stops (tab_stop);
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case_GETOPT_HELP_CHAR;
|
||
+
|
||
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||
+
|
||
+ default:
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ validate_tab_stops (tab_list, first_free_tab);
|
||
+
|
||
+ if (first_free_tab == 0)
|
||
+ tab_size = 8;
|
||
+ else if (first_free_tab == 1)
|
||
+ tab_size = tab_list[0];
|
||
+ else
|
||
+ tab_size = 0;
|
||
+
|
||
+ file_list = (optind < argc ? &argv[optind] : stdin_argv);
|
||
+
|
||
+ expand ();
|
||
+
|
||
+ if (have_read_stdin && fclose (stdin) != 0)
|
||
+ error (EXIT_FAILURE, errno, "-");
|
||
+
|
||
+ exit (exit_status);
|
||
+}
|
||
diff -urNp coreutils-8.0-orig/src/fold.c coreutils-8.0/src/fold.c
|
||
--- coreutils-8.0-orig/src/fold.c 2009-09-23 10:25:44.000000000 +0200
|
||
+++ coreutils-8.0/src/fold.c 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -22,11 +22,33 @@
|
||
#include <getopt.h>
|
||
#include <sys/types.h>
|
||
|
||
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
+
|
||
+/* Get iswprint(), iswblank(), wcwidth(). */
|
||
+#if HAVE_WCTYPE_H
|
||
+# include <wctype.h>
|
||
+#endif
|
||
+
|
||
#include "system.h"
|
||
#include "error.h"
|
||
#include "quote.h"
|
||
#include "xstrtol.h"
|
||
|
||
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
|
||
+ installation; work around this configuration error. */
|
||
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
|
||
+# undef MB_LEN_MAX
|
||
+# define MB_LEN_MAX 16
|
||
+#endif
|
||
+
|
||
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
|
||
+#if HAVE_MBRTOWC && defined mbstate_t
|
||
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
|
||
+#endif
|
||
+
|
||
#define TAB_WIDTH 8
|
||
|
||
/* The official name of this program (e.g., no `g' prefix). */
|
||
@@ -34,20 +56,41 @@
|
||
|
||
#define AUTHORS proper_name ("David MacKenzie")
|
||
|
||
+#define FATAL_ERROR(Message) \
|
||
+ do \
|
||
+ { \
|
||
+ error (0, 0, (Message)); \
|
||
+ usage (2); \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+enum operating_mode
|
||
+{
|
||
+ /* Fold texts by columns that are at the given positions. */
|
||
+ column_mode,
|
||
+
|
||
+ /* Fold texts by bytes that are at the given positions. */
|
||
+ byte_mode,
|
||
+
|
||
+ /* Fold texts by characters that are at the given positions. */
|
||
+ character_mode,
|
||
+};
|
||
+
|
||
+/* The argument shows current mode. (Default: column_mode) */
|
||
+static enum operating_mode operating_mode;
|
||
+
|
||
/* If nonzero, try to break on whitespace. */
|
||
static bool break_spaces;
|
||
|
||
-/* If nonzero, count bytes, not column positions. */
|
||
-static bool count_bytes;
|
||
-
|
||
/* If nonzero, at least one of the files we read was standard input. */
|
||
static bool have_read_stdin;
|
||
|
||
-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
|
||
+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
|
||
|
||
static struct option const longopts[] =
|
||
{
|
||
{"bytes", no_argument, NULL, 'b'},
|
||
+ {"characters", no_argument, NULL, 'c'},
|
||
{"spaces", no_argument, NULL, 's'},
|
||
{"width", required_argument, NULL, 'w'},
|
||
{GETOPT_HELP_OPTION_DECL},
|
||
@@ -77,6 +120,7 @@ Mandatory arguments to long options are
|
||
"), stdout);
|
||
fputs (_("\
|
||
-b, --bytes count bytes rather than columns\n\
|
||
+ -c, --characters count characters rather than columns\n\
|
||
-s, --spaces break at spaces\n\
|
||
-w, --width=WIDTH use WIDTH columns instead of 80\n\
|
||
"), stdout);
|
||
@@ -94,7 +138,7 @@ Mandatory arguments to long options are
|
||
static size_t
|
||
adjust_column (size_t column, char c)
|
||
{
|
||
- if (!count_bytes)
|
||
+ if (operating_mode != byte_mode)
|
||
{
|
||
if (c == '\b')
|
||
{
|
||
@@ -117,30 +161,14 @@ adjust_column (size_t column, char c)
|
||
to stdout, with maximum line length WIDTH.
|
||
Return true if successful. */
|
||
|
||
-static bool
|
||
-fold_file (char const *filename, size_t width)
|
||
+static void
|
||
+fold_text (FILE *istream, size_t width, int *saved_errno)
|
||
{
|
||
- FILE *istream;
|
||
int c;
|
||
size_t column = 0; /* Screen column where next char will go. */
|
||
size_t offset_out = 0; /* Index in `line_out' for next char. */
|
||
static char *line_out = NULL;
|
||
static size_t allocated_out = 0;
|
||
- int saved_errno;
|
||
-
|
||
- if (STREQ (filename, "-"))
|
||
- {
|
||
- istream = stdin;
|
||
- have_read_stdin = true;
|
||
- }
|
||
- else
|
||
- istream = fopen (filename, "r");
|
||
-
|
||
- if (istream == NULL)
|
||
- {
|
||
- error (0, errno, "%s", filename);
|
||
- return false;
|
||
- }
|
||
|
||
while ((c = getc (istream)) != EOF)
|
||
{
|
||
@@ -168,6 +196,15 @@ fold_file (char const *filename, size_t
|
||
bool found_blank = false;
|
||
size_t logical_end = offset_out;
|
||
|
||
+ /* If LINE_OUT has no wide character,
|
||
+ put a new wide character in LINE_OUT
|
||
+ if column is bigger than width. */
|
||
+ if (offset_out == 0)
|
||
+ {
|
||
+ line_out[offset_out++] = c;
|
||
+ continue;
|
||
+ }
|
||
+
|
||
/* Look for the last blank. */
|
||
while (logical_end)
|
||
{
|
||
@@ -214,11 +251,222 @@ fold_file (char const *filename, size_t
|
||
line_out[offset_out++] = c;
|
||
}
|
||
|
||
- saved_errno = errno;
|
||
+ *saved_errno = errno;
|
||
|
||
if (offset_out)
|
||
fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
|
||
|
||
+}
|
||
+
|
||
+#if HAVE_MBRTOWC
|
||
+static void
|
||
+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
|
||
+{
|
||
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
|
||
+ size_t buflen = 0; /* The length of the byte sequence in buf. */
|
||
+ char *bufpos = NULL; /* Next read position of BUF. */
|
||
+ wint_t wc; /* A gotten wide character. */
|
||
+ size_t mblength; /* The byte size of a multibyte character which shows
|
||
+ as same character as WC. */
|
||
+ mbstate_t state, state_bak; /* State of the stream. */
|
||
+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
|
||
+
|
||
+ static char *line_out = NULL;
|
||
+ size_t offset_out = 0; /* Index in `line_out' for next char. */
|
||
+ static size_t allocated_out = 0;
|
||
+
|
||
+ int increment;
|
||
+ size_t column = 0;
|
||
+
|
||
+ size_t last_blank_pos;
|
||
+ size_t last_blank_column;
|
||
+ int is_blank_seen;
|
||
+ int last_blank_increment = 0;
|
||
+ int is_bs_following_last_blank;
|
||
+ size_t bs_following_last_blank_num;
|
||
+ int is_cr_after_last_blank;
|
||
+
|
||
+#define CLEAR_FLAGS \
|
||
+ do \
|
||
+ { \
|
||
+ last_blank_pos = 0; \
|
||
+ last_blank_column = 0; \
|
||
+ is_blank_seen = 0; \
|
||
+ is_bs_following_last_blank = 0; \
|
||
+ bs_following_last_blank_num = 0; \
|
||
+ is_cr_after_last_blank = 0; \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+#define START_NEW_LINE \
|
||
+ do \
|
||
+ { \
|
||
+ putchar ('\n'); \
|
||
+ column = 0; \
|
||
+ offset_out = 0; \
|
||
+ CLEAR_FLAGS; \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+ CLEAR_FLAGS;
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ for (;; bufpos += mblength, buflen -= mblength)
|
||
+ {
|
||
+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
|
||
+ {
|
||
+ memmove (buf, bufpos, buflen);
|
||
+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
|
||
+ bufpos = buf;
|
||
+ }
|
||
+
|
||
+ if (buflen < 1)
|
||
+ break;
|
||
+
|
||
+ /* Get a wide character. */
|
||
+ convfail = 0;
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
|
||
+
|
||
+ switch (mblength)
|
||
+ {
|
||
+ case (size_t)-1:
|
||
+ case (size_t)-2:
|
||
+ convfail++;
|
||
+ state = state_bak;
|
||
+ /* Fall through. */
|
||
+
|
||
+ case 0:
|
||
+ mblength = 1;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+rescan:
|
||
+ if (operating_mode == byte_mode) /* byte mode */
|
||
+ increment = mblength;
|
||
+ else if (operating_mode == character_mode) /* character mode */
|
||
+ increment = 1;
|
||
+ else /* column mode */
|
||
+ {
|
||
+ if (convfail)
|
||
+ increment = 1;
|
||
+ else
|
||
+ {
|
||
+ switch (wc)
|
||
+ {
|
||
+ case L'\n':
|
||
+ fwrite (line_out, sizeof(char), offset_out, stdout);
|
||
+ START_NEW_LINE;
|
||
+ continue;
|
||
+
|
||
+ case L'\b':
|
||
+ increment = (column > 0) ? -1 : 0;
|
||
+ break;
|
||
+
|
||
+ case L'\r':
|
||
+ increment = -1 * column;
|
||
+ break;
|
||
+
|
||
+ case L'\t':
|
||
+ increment = 8 - column % 8;
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ increment = wcwidth (wc);
|
||
+ increment = (increment < 0) ? 0 : increment;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (column + increment > width && break_spaces && last_blank_pos)
|
||
+ {
|
||
+ fwrite (line_out, sizeof(char), last_blank_pos, stdout);
|
||
+ putchar ('\n');
|
||
+
|
||
+ offset_out = offset_out - last_blank_pos;
|
||
+ column = column - last_blank_column + ((is_cr_after_last_blank)
|
||
+ ? last_blank_increment : bs_following_last_blank_num);
|
||
+ memmove (line_out, line_out + last_blank_pos, offset_out);
|
||
+ CLEAR_FLAGS;
|
||
+ goto rescan;
|
||
+ }
|
||
+
|
||
+ if (column + increment > width && column != 0)
|
||
+ {
|
||
+ fwrite (line_out, sizeof(char), offset_out, stdout);
|
||
+ START_NEW_LINE;
|
||
+ goto rescan;
|
||
+ }
|
||
+
|
||
+ if (allocated_out < offset_out + mblength)
|
||
+ {
|
||
+ line_out = X2REALLOC (line_out, &allocated_out);
|
||
+ }
|
||
+
|
||
+ memcpy (line_out + offset_out, bufpos, mblength);
|
||
+ offset_out += mblength;
|
||
+ column += increment;
|
||
+
|
||
+ if (is_blank_seen && !convfail && wc == L'\r')
|
||
+ is_cr_after_last_blank = 1;
|
||
+
|
||
+ if (is_bs_following_last_blank && !convfail && wc == L'\b')
|
||
+ ++bs_following_last_blank_num;
|
||
+ else
|
||
+ is_bs_following_last_blank = 0;
|
||
+
|
||
+ if (break_spaces && !convfail && iswblank (wc))
|
||
+ {
|
||
+ last_blank_pos = offset_out;
|
||
+ last_blank_column = column;
|
||
+ is_blank_seen = 1;
|
||
+ last_blank_increment = increment;
|
||
+ is_bs_following_last_blank = 1;
|
||
+ bs_following_last_blank_num = 0;
|
||
+ is_cr_after_last_blank = 0;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ *saved_errno = errno;
|
||
+
|
||
+ if (offset_out)
|
||
+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
|
||
+
|
||
+}
|
||
+#endif
|
||
+
|
||
+/* Fold file FILENAME, or standard input if FILENAME is "-",
|
||
+ to stdout, with maximum line length WIDTH.
|
||
+ Return 0 if successful, 1 if an error occurs. */
|
||
+
|
||
+static bool
|
||
+fold_file (char *filename, size_t width)
|
||
+{
|
||
+ FILE *istream;
|
||
+ int saved_errno;
|
||
+
|
||
+ if (STREQ (filename, "-"))
|
||
+ {
|
||
+ istream = stdin;
|
||
+ have_read_stdin = 1;
|
||
+ }
|
||
+ else
|
||
+ istream = fopen (filename, "r");
|
||
+
|
||
+ if (istream == NULL)
|
||
+ {
|
||
+ error (0, errno, "%s", filename);
|
||
+ return 1;
|
||
+ }
|
||
+
|
||
+ /* Define how ISTREAM is being folded. */
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ fold_multibyte_text (istream, width, &saved_errno);
|
||
+ else
|
||
+#endif
|
||
+ fold_text (istream, width, &saved_errno);
|
||
+
|
||
if (ferror (istream))
|
||
{
|
||
error (0, saved_errno, "%s", filename);
|
||
@@ -251,7 +499,8 @@ main (int argc, char **argv)
|
||
|
||
atexit (close_stdout);
|
||
|
||
- break_spaces = count_bytes = have_read_stdin = false;
|
||
+ operating_mode = column_mode;
|
||
+ break_spaces = have_read_stdin = false;
|
||
|
||
while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
|
||
{
|
||
@@ -260,7 +509,15 @@ main (int argc, char **argv)
|
||
switch (optc)
|
||
{
|
||
case 'b': /* Count bytes rather than columns. */
|
||
- count_bytes = true;
|
||
+ if (operating_mode != column_mode)
|
||
+ FATAL_ERROR (_("only one way of folding may be specified"));
|
||
+ operating_mode = byte_mode;
|
||
+ break;
|
||
+
|
||
+ case 'c':
|
||
+ if (operating_mode != column_mode)
|
||
+ FATAL_ERROR (_("only one way of folding may be specified"));
|
||
+ operating_mode = character_mode;
|
||
break;
|
||
|
||
case 's': /* Break at word boundaries. */
|
||
diff -urNp coreutils-8.0-orig/src/fold.c.orig coreutils-8.0/src/fold.c.orig
|
||
--- coreutils-8.0-orig/src/fold.c.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/src/fold.c.orig 2009-09-23 10:25:44.000000000 +0200
|
||
@@ -0,0 +1,314 @@
|
||
+/* fold -- wrap each input line to fit in specified width.
|
||
+ Copyright (C) 91, 1995-2006, 2008-2009 Free Software Foundation, Inc.
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation, either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||
+
|
||
+/* Written by David MacKenzie, djm@gnu.ai.mit.edu. */
|
||
+
|
||
+#include <config.h>
|
||
+
|
||
+#include <stdio.h>
|
||
+#include <getopt.h>
|
||
+#include <sys/types.h>
|
||
+
|
||
+#include "system.h"
|
||
+#include "error.h"
|
||
+#include "quote.h"
|
||
+#include "xstrtol.h"
|
||
+
|
||
+#define TAB_WIDTH 8
|
||
+
|
||
+/* The official name of this program (e.g., no `g' prefix). */
|
||
+#define PROGRAM_NAME "fold"
|
||
+
|
||
+#define AUTHORS proper_name ("David MacKenzie")
|
||
+
|
||
+/* If nonzero, try to break on whitespace. */
|
||
+static bool break_spaces;
|
||
+
|
||
+/* If nonzero, count bytes, not column positions. */
|
||
+static bool count_bytes;
|
||
+
|
||
+/* If nonzero, at least one of the files we read was standard input. */
|
||
+static bool have_read_stdin;
|
||
+
|
||
+static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
|
||
+
|
||
+static struct option const longopts[] =
|
||
+{
|
||
+ {"bytes", no_argument, NULL, 'b'},
|
||
+ {"spaces", no_argument, NULL, 's'},
|
||
+ {"width", required_argument, NULL, 'w'},
|
||
+ {GETOPT_HELP_OPTION_DECL},
|
||
+ {GETOPT_VERSION_OPTION_DECL},
|
||
+ {NULL, 0, NULL, 0}
|
||
+};
|
||
+
|
||
+void
|
||
+usage (int status)
|
||
+{
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
|
||
+ program_name);
|
||
+ else
|
||
+ {
|
||
+ printf (_("\
|
||
+Usage: %s [OPTION]... [FILE]...\n\
|
||
+"),
|
||
+ program_name);
|
||
+ fputs (_("\
|
||
+Wrap input lines in each FILE (standard input by default), writing to\n\
|
||
+standard output.\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Mandatory arguments to long options are mandatory for short options too.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -b, --bytes count bytes rather than columns\n\
|
||
+ -s, --spaces break at spaces\n\
|
||
+ -w, --width=WIDTH use WIDTH columns instead of 80\n\
|
||
+"), stdout);
|
||
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||
+ emit_ancillary_info ();
|
||
+ }
|
||
+ exit (status);
|
||
+}
|
||
+
|
||
+/* Assuming the current column is COLUMN, return the column that
|
||
+ printing C will move the cursor to.
|
||
+ The first column is 0. */
|
||
+
|
||
+static size_t
|
||
+adjust_column (size_t column, char c)
|
||
+{
|
||
+ if (!count_bytes)
|
||
+ {
|
||
+ if (c == '\b')
|
||
+ {
|
||
+ if (column > 0)
|
||
+ column--;
|
||
+ }
|
||
+ else if (c == '\r')
|
||
+ column = 0;
|
||
+ else if (c == '\t')
|
||
+ column += TAB_WIDTH - column % TAB_WIDTH;
|
||
+ else /* if (isprint (c)) */
|
||
+ column++;
|
||
+ }
|
||
+ else
|
||
+ column++;
|
||
+ return column;
|
||
+}
|
||
+
|
||
+/* Fold file FILENAME, or standard input if FILENAME is "-",
|
||
+ to stdout, with maximum line length WIDTH.
|
||
+ Return true if successful. */
|
||
+
|
||
+static bool
|
||
+fold_file (char const *filename, size_t width)
|
||
+{
|
||
+ FILE *istream;
|
||
+ int c;
|
||
+ size_t column = 0; /* Screen column where next char will go. */
|
||
+ size_t offset_out = 0; /* Index in `line_out' for next char. */
|
||
+ static char *line_out = NULL;
|
||
+ static size_t allocated_out = 0;
|
||
+ int saved_errno;
|
||
+
|
||
+ if (STREQ (filename, "-"))
|
||
+ {
|
||
+ istream = stdin;
|
||
+ have_read_stdin = true;
|
||
+ }
|
||
+ else
|
||
+ istream = fopen (filename, "r");
|
||
+
|
||
+ if (istream == NULL)
|
||
+ {
|
||
+ error (0, errno, "%s", filename);
|
||
+ return false;
|
||
+ }
|
||
+
|
||
+ while ((c = getc (istream)) != EOF)
|
||
+ {
|
||
+ if (offset_out + 1 >= allocated_out)
|
||
+ line_out = X2REALLOC (line_out, &allocated_out);
|
||
+
|
||
+ if (c == '\n')
|
||
+ {
|
||
+ line_out[offset_out++] = c;
|
||
+ fwrite (line_out, sizeof (char), offset_out, stdout);
|
||
+ column = offset_out = 0;
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ rescan:
|
||
+ column = adjust_column (column, c);
|
||
+
|
||
+ if (column > width)
|
||
+ {
|
||
+ /* This character would make the line too long.
|
||
+ Print the line plus a newline, and make this character
|
||
+ start the next line. */
|
||
+ if (break_spaces)
|
||
+ {
|
||
+ bool found_blank = false;
|
||
+ size_t logical_end = offset_out;
|
||
+
|
||
+ /* Look for the last blank. */
|
||
+ while (logical_end)
|
||
+ {
|
||
+ --logical_end;
|
||
+ if (isblank (to_uchar (line_out[logical_end])))
|
||
+ {
|
||
+ found_blank = true;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (found_blank)
|
||
+ {
|
||
+ size_t i;
|
||
+
|
||
+ /* Found a blank. Don't output the part after it. */
|
||
+ logical_end++;
|
||
+ fwrite (line_out, sizeof (char), (size_t) logical_end,
|
||
+ stdout);
|
||
+ putchar ('\n');
|
||
+ /* Move the remainder to the beginning of the next line.
|
||
+ The areas being copied here might overlap. */
|
||
+ memmove (line_out, line_out + logical_end,
|
||
+ offset_out - logical_end);
|
||
+ offset_out -= logical_end;
|
||
+ for (column = i = 0; i < offset_out; i++)
|
||
+ column = adjust_column (column, line_out[i]);
|
||
+ goto rescan;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (offset_out == 0)
|
||
+ {
|
||
+ line_out[offset_out++] = c;
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ line_out[offset_out++] = '\n';
|
||
+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
|
||
+ column = offset_out = 0;
|
||
+ goto rescan;
|
||
+ }
|
||
+
|
||
+ line_out[offset_out++] = c;
|
||
+ }
|
||
+
|
||
+ saved_errno = errno;
|
||
+
|
||
+ if (offset_out)
|
||
+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
|
||
+
|
||
+ if (ferror (istream))
|
||
+ {
|
||
+ error (0, saved_errno, "%s", filename);
|
||
+ if (!STREQ (filename, "-"))
|
||
+ fclose (istream);
|
||
+ return false;
|
||
+ }
|
||
+ if (!STREQ (filename, "-") && fclose (istream) == EOF)
|
||
+ {
|
||
+ error (0, errno, "%s", filename);
|
||
+ return false;
|
||
+ }
|
||
+
|
||
+ return true;
|
||
+}
|
||
+
|
||
+int
|
||
+main (int argc, char **argv)
|
||
+{
|
||
+ size_t width = 80;
|
||
+ int i;
|
||
+ int optc;
|
||
+ bool ok;
|
||
+
|
||
+ initialize_main (&argc, &argv);
|
||
+ set_program_name (argv[0]);
|
||
+ setlocale (LC_ALL, "");
|
||
+ bindtextdomain (PACKAGE, LOCALEDIR);
|
||
+ textdomain (PACKAGE);
|
||
+
|
||
+ atexit (close_stdout);
|
||
+
|
||
+ break_spaces = count_bytes = have_read_stdin = false;
|
||
+
|
||
+ while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
|
||
+ {
|
||
+ char optargbuf[2];
|
||
+
|
||
+ switch (optc)
|
||
+ {
|
||
+ case 'b': /* Count bytes rather than columns. */
|
||
+ count_bytes = true;
|
||
+ break;
|
||
+
|
||
+ case 's': /* Break at word boundaries. */
|
||
+ break_spaces = true;
|
||
+ break;
|
||
+
|
||
+ case '0': case '1': case '2': case '3': case '4':
|
||
+ case '5': case '6': case '7': case '8': case '9':
|
||
+ if (optarg)
|
||
+ optarg--;
|
||
+ else
|
||
+ {
|
||
+ optargbuf[0] = optc;
|
||
+ optargbuf[1] = '\0';
|
||
+ optarg = optargbuf;
|
||
+ }
|
||
+ /* Fall through. */
|
||
+ case 'w': /* Line width. */
|
||
+ {
|
||
+ unsigned long int tmp_ulong;
|
||
+ if (! (xstrtoul (optarg, NULL, 10, &tmp_ulong, "") == LONGINT_OK
|
||
+ && 0 < tmp_ulong && tmp_ulong < SIZE_MAX - TAB_WIDTH))
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("invalid number of columns: %s"), quote (optarg));
|
||
+ width = tmp_ulong;
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case_GETOPT_HELP_CHAR;
|
||
+
|
||
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||
+
|
||
+ default:
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (argc == optind)
|
||
+ ok = fold_file ("-", width);
|
||
+ else
|
||
+ {
|
||
+ ok = true;
|
||
+ for (i = optind; i < argc; i++)
|
||
+ ok &= fold_file (argv[i], width);
|
||
+ }
|
||
+
|
||
+ if (have_read_stdin && fclose (stdin) == EOF)
|
||
+ error (EXIT_FAILURE, errno, "-");
|
||
+
|
||
+ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
|
||
+}
|
||
diff -urNp coreutils-8.0-orig/src/join.c coreutils-8.0/src/join.c
|
||
--- coreutils-8.0-orig/src/join.c 2009-09-23 10:25:44.000000000 +0200
|
||
+++ coreutils-8.0/src/join.c 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -22,17 +22,31 @@
|
||
#include <sys/types.h>
|
||
#include <getopt.h>
|
||
|
||
+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
+
|
||
+/* Get iswblank(), towupper. */
|
||
+#if HAVE_WCTYPE_H
|
||
+# include <wctype.h>
|
||
+#endif
|
||
+
|
||
#include "system.h"
|
||
#include "error.h"
|
||
#include "hard-locale.h"
|
||
#include "linebuffer.h"
|
||
-#include "memcasecmp.h"
|
||
#include "quote.h"
|
||
#include "stdio--.h"
|
||
#include "xmemcoll.h"
|
||
#include "xstrtol.h"
|
||
#include "argmatch.h"
|
||
|
||
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
|
||
+#if HAVE_MBRTOWC && defined mbstate_t
|
||
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
|
||
+#endif
|
||
+
|
||
/* The official name of this program (e.g., no `g' prefix). */
|
||
#define PROGRAM_NAME "join"
|
||
|
||
@@ -121,10 +135,12 @@ static struct outlist outlist_head;
|
||
/* Last element in `outlist', where a new element can be added. */
|
||
static struct outlist *outlist_end = &outlist_head;
|
||
|
||
-/* Tab character separating fields. If negative, fields are separated
|
||
- by any nonempty string of blanks, otherwise by exactly one
|
||
- tab character whose value (when cast to unsigned char) equals TAB. */
|
||
-static int tab = -1;
|
||
+/* Tab character separating fields. If NULL, fields are separated
|
||
+ by any nonempty string of blanks. */
|
||
+static char *tab = NULL;
|
||
+
|
||
+/* The number of bytes used for tab. */
|
||
+static size_t tablen = 0;
|
||
|
||
/* If nonzero, check that the input is correctly ordered. */
|
||
static enum
|
||
@@ -239,10 +255,11 @@ xfields (struct line *line)
|
||
if (ptr == lim)
|
||
return;
|
||
|
||
- if (0 <= tab)
|
||
+ if (tab != NULL)
|
||
{
|
||
+ unsigned char t = tab[0];
|
||
char *sep;
|
||
- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
|
||
+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
|
||
extract_field (line, ptr, sep - ptr);
|
||
}
|
||
else
|
||
@@ -269,6 +286,148 @@ xfields (struct line *line)
|
||
extract_field (line, ptr, lim - ptr);
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static void
|
||
+xfields_multibyte (struct line *line)
|
||
+{
|
||
+ char *ptr = line->buf.buffer;
|
||
+ char const *lim = ptr + line->buf.length - 1;
|
||
+ wchar_t wc = 0;
|
||
+ size_t mblength = 1;
|
||
+ mbstate_t state, state_bak;
|
||
+
|
||
+ memset (&state, 0, sizeof (mbstate_t));
|
||
+
|
||
+ if (ptr >= lim)
|
||
+ return;
|
||
+
|
||
+ if (tab != NULL)
|
||
+ {
|
||
+ unsigned char t = tab[0];
|
||
+ char *sep = ptr;
|
||
+ for (; ptr < lim; ptr = sep + mblength)
|
||
+ {
|
||
+ sep = ptr;
|
||
+ while (sep < lim)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
|
||
+
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ if (mblength == tablen && !memcmp (sep, tab, mblength))
|
||
+ break;
|
||
+ else
|
||
+ {
|
||
+ sep += mblength;
|
||
+ continue;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (sep >= lim)
|
||
+ break;
|
||
+
|
||
+ extract_field (line, ptr, sep - ptr);
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* Skip leading blanks before the first field. */
|
||
+ while(ptr < lim)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
|
||
+
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ if (!iswblank(wc))
|
||
+ break;
|
||
+ ptr += mblength;
|
||
+ }
|
||
+
|
||
+ do
|
||
+ {
|
||
+ char *sep;
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ sep = ptr + mblength;
|
||
+ while (sep < lim)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ if (iswblank (wc))
|
||
+ break;
|
||
+
|
||
+ sep += mblength;
|
||
+ }
|
||
+
|
||
+ extract_field (line, ptr, sep - ptr);
|
||
+ if (sep >= lim)
|
||
+ return;
|
||
+
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ ptr = sep + mblength;
|
||
+ while (ptr < lim)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ if (!iswblank (wc))
|
||
+ break;
|
||
+
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ }
|
||
+ while (ptr < lim);
|
||
+ }
|
||
+
|
||
+ extract_field (line, ptr, lim - ptr);
|
||
+}
|
||
+#endif
|
||
+
|
||
static void
|
||
freeline (struct line *line)
|
||
{
|
||
@@ -287,56 +446,115 @@ keycmp (struct line const *line1, struct
|
||
size_t jf_1, size_t jf_2)
|
||
{
|
||
/* Start of field to compare in each file. */
|
||
- char *beg1;
|
||
- char *beg2;
|
||
-
|
||
- size_t len1;
|
||
- size_t len2; /* Length of fields to compare. */
|
||
+ char *beg[2];
|
||
+ char *copy[2];
|
||
+ size_t len[2]; /* Length of fields to compare. */
|
||
int diff;
|
||
+ int i, j;
|
||
|
||
if (jf_1 < line1->nfields)
|
||
{
|
||
- beg1 = line1->fields[jf_1].beg;
|
||
- len1 = line1->fields[jf_1].len;
|
||
+ beg[0] = line1->fields[jf_1].beg;
|
||
+ len[0] = line1->fields[jf_1].len;
|
||
}
|
||
else
|
||
{
|
||
- beg1 = NULL;
|
||
- len1 = 0;
|
||
+ beg[0] = NULL;
|
||
+ len[0] = 0;
|
||
}
|
||
|
||
if (jf_2 < line2->nfields)
|
||
{
|
||
- beg2 = line2->fields[jf_2].beg;
|
||
- len2 = line2->fields[jf_2].len;
|
||
+ beg[1] = line2->fields[jf_2].beg;
|
||
+ len[1] = line2->fields[jf_2].len;
|
||
}
|
||
else
|
||
{
|
||
- beg2 = NULL;
|
||
- len2 = 0;
|
||
+ beg[1] = NULL;
|
||
+ len[1] = 0;
|
||
}
|
||
|
||
- if (len1 == 0)
|
||
- return len2 == 0 ? 0 : -1;
|
||
- if (len2 == 0)
|
||
+ if (len[0] == 0)
|
||
+ return len[1] == 0 ? 0 : -1;
|
||
+ if (len[1] == 0)
|
||
return 1;
|
||
|
||
if (ignore_case)
|
||
{
|
||
- /* FIXME: ignore_case does not work with NLS (in particular,
|
||
- with multibyte chars). */
|
||
- diff = memcasecmp (beg1, beg2, MIN (len1, len2));
|
||
+#ifdef HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ size_t mblength;
|
||
+ wchar_t wc, uwc;
|
||
+ mbstate_t state, state_bak;
|
||
+
|
||
+ memset (&state, '\0', sizeof (mbstate_t));
|
||
+
|
||
+ for (i = 0; i < 2; i++)
|
||
+ {
|
||
+ copy[i] = alloca (len[i] + 1);
|
||
+
|
||
+ for (j = 0; j < MIN (len[0], len[1]);)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
|
||
+
|
||
+ switch (mblength)
|
||
+ {
|
||
+ case (size_t) -1:
|
||
+ case (size_t) -2:
|
||
+ state = state_bak;
|
||
+ /* Fall through */
|
||
+ case 0:
|
||
+ mblength = 1;
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ uwc = towupper (wc);
|
||
+
|
||
+ if (uwc != wc)
|
||
+ {
|
||
+ mbstate_t state_wc;
|
||
+
|
||
+ memset (&state_wc, '\0', sizeof (mbstate_t));
|
||
+ wcrtomb (copy[i] + j, uwc, &state_wc);
|
||
+ }
|
||
+ else
|
||
+ memcpy (copy[i] + j, beg[i] + j, mblength);
|
||
+ }
|
||
+ j += mblength;
|
||
+ }
|
||
+ copy[i][j] = '\0';
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
+ {
|
||
+ for (i = 0; i < 2; i++)
|
||
+ {
|
||
+ copy[i] = alloca (len[i] + 1);
|
||
+
|
||
+ for (j = 0; j < MIN (len[0], len[1]); j++)
|
||
+ copy[i][j] = toupper (beg[i][j]);
|
||
+
|
||
+ copy[i][j] = '\0';
|
||
+ }
|
||
+ }
|
||
}
|
||
else
|
||
{
|
||
- if (hard_LC_COLLATE)
|
||
- return xmemcoll (beg1, len1, beg2, len2);
|
||
- diff = memcmp (beg1, beg2, MIN (len1, len2));
|
||
+ copy[0] = (unsigned char *) beg[0];
|
||
+ copy[1] = (unsigned char *) beg[1];
|
||
}
|
||
|
||
+ if (hard_LC_COLLATE)
|
||
+ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
|
||
+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
|
||
+
|
||
+
|
||
if (diff)
|
||
return diff;
|
||
- return len1 < len2 ? -1 : len1 != len2;
|
||
+ return len[0] - len[1];
|
||
}
|
||
|
||
/* Check that successive input lines PREV and CURRENT from input file
|
||
@@ -417,6 +635,11 @@ get_line (FILE *fp, struct line **linep,
|
||
return false;
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ xfields_multibyte (line);
|
||
+ else
|
||
+#endif
|
||
xfields (line);
|
||
|
||
if (prevline[which - 1])
|
||
@@ -518,11 +741,18 @@ prfield (size_t n, struct line const *li
|
||
|
||
/* Print the join of LINE1 and LINE2. */
|
||
|
||
+#define PUT_TAB_CHAR \
|
||
+ do \
|
||
+ { \
|
||
+ (tab != NULL) ? \
|
||
+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
static void
|
||
prjoin (struct line const *line1, struct line const *line2)
|
||
{
|
||
const struct outlist *outlist;
|
||
- char output_separator = tab < 0 ? ' ' : tab;
|
||
|
||
outlist = outlist_head.next;
|
||
if (outlist)
|
||
@@ -557,7 +787,7 @@ prjoin (struct line const *line1, struct
|
||
o = o->next;
|
||
if (o == NULL)
|
||
break;
|
||
- putchar (output_separator);
|
||
+ PUT_TAB_CHAR;
|
||
}
|
||
putchar ('\n');
|
||
}
|
||
@@ -575,23 +805,23 @@ prjoin (struct line const *line1, struct
|
||
prfield (join_field_1, line1);
|
||
for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
|
||
{
|
||
- putchar (output_separator);
|
||
+ PUT_TAB_CHAR;
|
||
prfield (i, line1);
|
||
}
|
||
for (i = join_field_1 + 1; i < line1->nfields; ++i)
|
||
{
|
||
- putchar (output_separator);
|
||
+ PUT_TAB_CHAR;
|
||
prfield (i, line1);
|
||
}
|
||
|
||
for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
|
||
{
|
||
- putchar (output_separator);
|
||
+ PUT_TAB_CHAR;
|
||
prfield (i, line2);
|
||
}
|
||
for (i = join_field_2 + 1; i < line2->nfields; ++i)
|
||
{
|
||
- putchar (output_separator);
|
||
+ PUT_TAB_CHAR;
|
||
prfield (i, line2);
|
||
}
|
||
putchar ('\n');
|
||
@@ -1022,20 +1252,41 @@ main (int argc, char **argv)
|
||
|
||
case 't':
|
||
{
|
||
- unsigned char newtab = optarg[0];
|
||
- if (! newtab)
|
||
+ char *newtab;
|
||
+ size_t newtablen;
|
||
+ if (! optarg[0])
|
||
error (EXIT_FAILURE, 0, _("empty tab"));
|
||
- if (optarg[1])
|
||
+ newtab = xstrdup (optarg);
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ mbstate_t state;
|
||
+
|
||
+ memset (&state, 0, sizeof (mbstate_t));
|
||
+ newtablen = mbrtowc (NULL, newtab,
|
||
+ strnlen (newtab, MB_LEN_MAX),
|
||
+ &state);
|
||
+ if (newtablen == (size_t) 0
|
||
+ || newtablen == (size_t) -1
|
||
+ || newtablen == (size_t) -2)
|
||
+ newtablen = 1;
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
+ newtablen = 1;
|
||
+
|
||
+ if (newtablen == 1 && newtab[1])
|
||
+ {
|
||
+ if (STREQ (newtab, "\\0"))
|
||
+ newtab[0] = '\0';
|
||
+ }
|
||
+ if (tab != NULL && strcmp (tab, newtab))
|
||
{
|
||
- if (STREQ (optarg, "\\0"))
|
||
- newtab = '\0';
|
||
- else
|
||
- error (EXIT_FAILURE, 0, _("multi-character tab %s"),
|
||
- quote (optarg));
|
||
+ free (newtab);
|
||
+ error (EXIT_FAILURE, 0, _("incompatible tabs"));
|
||
}
|
||
- if (0 <= tab && tab != newtab)
|
||
- error (EXIT_FAILURE, 0, _("incompatible tabs"));
|
||
tab = newtab;
|
||
+ tablen = newtablen;
|
||
}
|
||
break;
|
||
|
||
diff -urNp coreutils-8.0-orig/src/join.c.orig coreutils-8.0/src/join.c.orig
|
||
--- coreutils-8.0-orig/src/join.c.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/src/join.c.orig 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -0,0 +1,1360 @@
|
||
+/* join - join lines of two files on a common field
|
||
+ Copyright (C) 91, 1995-2006, 2008-2009 Free Software Foundation, Inc.
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation, either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
+
|
||
+ Written by Mike Haertel, mike@gnu.ai.mit.edu. */
|
||
+
|
||
+#include <config.h>
|
||
+
|
||
+#include <assert.h>
|
||
+#include <sys/types.h>
|
||
+#include <getopt.h>
|
||
+
|
||
+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
+
|
||
+/* Get iswblank(), towupper. */
|
||
+#if HAVE_WCTYPE_H
|
||
+# include <wctype.h>
|
||
+#endif
|
||
+
|
||
+#include "system.h"
|
||
+#include "error.h"
|
||
+#include "hard-locale.h"
|
||
+#include "linebuffer.h"
|
||
+#include "quote.h"
|
||
+#include "stdio--.h"
|
||
+#include "xmemcoll.h"
|
||
+#include "xstrtol.h"
|
||
+#include "argmatch.h"
|
||
+
|
||
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
|
||
+#if HAVE_MBRTOWC && defined mbstate_t
|
||
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
|
||
+#endif
|
||
+
|
||
+/* The official name of this program (e.g., no `g' prefix). */
|
||
+#define PROGRAM_NAME "join"
|
||
+
|
||
+#define AUTHORS proper_name ("Mike Haertel")
|
||
+
|
||
+#define join system_join
|
||
+
|
||
+#define SWAPLINES(a, b) do { \
|
||
+ struct line *tmp = a; \
|
||
+ a = b; \
|
||
+ b = tmp; \
|
||
+} while (0);
|
||
+
|
||
+/* An element of the list identifying which fields to print for each
|
||
+ output line. */
|
||
+struct outlist
|
||
+ {
|
||
+ /* File number: 0, 1, or 2. 0 means use the join field.
|
||
+ 1 means use the first file argument, 2 the second. */
|
||
+ int file;
|
||
+
|
||
+ /* Field index (zero-based), specified only when FILE is 1 or 2. */
|
||
+ size_t field;
|
||
+
|
||
+ struct outlist *next;
|
||
+ };
|
||
+
|
||
+/* A field of a line. */
|
||
+struct field
|
||
+ {
|
||
+ char *beg; /* First character in field. */
|
||
+ size_t len; /* The length of the field. */
|
||
+ };
|
||
+
|
||
+/* A line read from an input file. */
|
||
+struct line
|
||
+ {
|
||
+ struct linebuffer buf; /* The line itself. */
|
||
+ size_t nfields; /* Number of elements in `fields'. */
|
||
+ size_t nfields_allocated; /* Number of elements allocated for `fields'. */
|
||
+ struct field *fields;
|
||
+ };
|
||
+
|
||
+/* One or more consecutive lines read from a file that all have the
|
||
+ same join field value. */
|
||
+struct seq
|
||
+ {
|
||
+ size_t count; /* Elements used in `lines'. */
|
||
+ size_t alloc; /* Elements allocated in `lines'. */
|
||
+ struct line **lines;
|
||
+ };
|
||
+
|
||
+/* The previous line read from each file. */
|
||
+static struct line *prevline[2] = {NULL, NULL};
|
||
+
|
||
+/* This provides an extra line buffer for each file. We need these if we
|
||
+ try to read two consecutive lines into the same buffer, since we don't
|
||
+ want to overwrite the previous buffer before we check order. */
|
||
+static struct line *spareline[2] = {NULL, NULL};
|
||
+
|
||
+/* True if the LC_COLLATE locale is hard. */
|
||
+static bool hard_LC_COLLATE;
|
||
+
|
||
+/* If nonzero, print unpairable lines in file 1 or 2. */
|
||
+static bool print_unpairables_1, print_unpairables_2;
|
||
+
|
||
+/* If nonzero, print pairable lines. */
|
||
+static bool print_pairables;
|
||
+
|
||
+/* If nonzero, we have seen at least one unpairable line. */
|
||
+static bool seen_unpairable;
|
||
+
|
||
+/* If nonzero, we have warned about disorder in that file. */
|
||
+static bool issued_disorder_warning[2];
|
||
+
|
||
+/* Empty output field filler. */
|
||
+static char const *empty_filler;
|
||
+
|
||
+/* Field to join on; SIZE_MAX means they haven't been determined yet. */
|
||
+static size_t join_field_1 = SIZE_MAX;
|
||
+static size_t join_field_2 = SIZE_MAX;
|
||
+
|
||
+/* List of fields to print. */
|
||
+static struct outlist outlist_head;
|
||
+
|
||
+/* Last element in `outlist', where a new element can be added. */
|
||
+static struct outlist *outlist_end = &outlist_head;
|
||
+
|
||
+/* Tab character separating fields. If NULL, fields are separated
|
||
+ by any nonempty string of blanks. */
|
||
+static char *tab = NULL;
|
||
+
|
||
+/* The number of bytes used for tab. */
|
||
+static size_t tablen = 0;
|
||
+
|
||
+/* If nonzero, check that the input is correctly ordered. */
|
||
+static enum
|
||
+ {
|
||
+ CHECK_ORDER_DEFAULT,
|
||
+ CHECK_ORDER_ENABLED,
|
||
+ CHECK_ORDER_DISABLED
|
||
+ } check_input_order;
|
||
+
|
||
+enum
|
||
+{
|
||
+ CHECK_ORDER_OPTION = CHAR_MAX + 1,
|
||
+ NOCHECK_ORDER_OPTION
|
||
+};
|
||
+
|
||
+
|
||
+static struct option const longopts[] =
|
||
+{
|
||
+ {"ignore-case", no_argument, NULL, 'i'},
|
||
+ {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
|
||
+ {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
|
||
+ {GETOPT_HELP_OPTION_DECL},
|
||
+ {GETOPT_VERSION_OPTION_DECL},
|
||
+ {NULL, 0, NULL, 0}
|
||
+};
|
||
+
|
||
+/* Used to print non-joining lines */
|
||
+static struct line uni_blank;
|
||
+
|
||
+/* If nonzero, ignore case when comparing join fields. */
|
||
+static bool ignore_case;
|
||
+
|
||
+void
|
||
+usage (int status)
|
||
+{
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
|
||
+ program_name);
|
||
+ else
|
||
+ {
|
||
+ printf (_("\
|
||
+Usage: %s [OPTION]... FILE1 FILE2\n\
|
||
+"),
|
||
+ program_name);
|
||
+ fputs (_("\
|
||
+For each pair of input lines with identical join fields, write a line to\n\
|
||
+standard output. The default join field is the first, delimited\n\
|
||
+by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
|
||
+\n\
|
||
+ -a FILENUM print unpairable lines coming from file FILENUM, where\n\
|
||
+ FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
|
||
+ -e EMPTY replace missing input fields with EMPTY\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -i, --ignore-case ignore differences in case when comparing fields\n\
|
||
+ -j FIELD equivalent to `-1 FIELD -2 FIELD'\n\
|
||
+ -o FORMAT obey FORMAT while constructing output line\n\
|
||
+ -t CHAR use CHAR as input and output field separator\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -v FILENUM like -a FILENUM, but suppress joined output lines\n\
|
||
+ -1 FIELD join on this FIELD of file 1\n\
|
||
+ -2 FIELD join on this FIELD of file 2\n\
|
||
+ --check-order check that the input is correctly sorted, even\n\
|
||
+ if all input lines are pairable\n\
|
||
+ --nocheck-order do not check that the input is correctly sorted\n\
|
||
+"), stdout);
|
||
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (_("\
|
||
+\n\
|
||
+Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
|
||
+else fields are separated by CHAR. Any FIELD is a field number counted\n\
|
||
+from 1. FORMAT is one or more comma or blank separated specifications,\n\
|
||
+each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\
|
||
+the remaining fields from FILE1, the remaining fields from FILE2, all\n\
|
||
+separated by CHAR.\n\
|
||
+\n\
|
||
+Important: FILE1 and FILE2 must be sorted on the join fields.\n\
|
||
+E.g., use `sort -k 1b,1' if `join' has no options.\n\
|
||
+Note, comparisons honor the rules specified by `LC_COLLATE'.\n\
|
||
+If the input is not sorted and some lines cannot be joined, a\n\
|
||
+warning message will be given.\n\
|
||
+"), stdout);
|
||
+ emit_ancillary_info ();
|
||
+ }
|
||
+ exit (status);
|
||
+}
|
||
+
|
||
+/* Record a field in LINE, with location FIELD and size LEN. */
|
||
+
|
||
+static void
|
||
+extract_field (struct line *line, char *field, size_t len)
|
||
+{
|
||
+ if (line->nfields >= line->nfields_allocated)
|
||
+ {
|
||
+ line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
|
||
+ }
|
||
+ line->fields[line->nfields].beg = field;
|
||
+ line->fields[line->nfields].len = len;
|
||
+ ++(line->nfields);
|
||
+}
|
||
+
|
||
+/* Fill in the `fields' structure in LINE. */
|
||
+
|
||
+static void
|
||
+xfields (struct line *line)
|
||
+{
|
||
+ char *ptr = line->buf.buffer;
|
||
+ char const *lim = ptr + line->buf.length - 1;
|
||
+
|
||
+ if (ptr == lim)
|
||
+ return;
|
||
+
|
||
+ if (tab != NULL)
|
||
+ {
|
||
+ unsigned char t = tab[0];
|
||
+ char *sep;
|
||
+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
|
||
+ extract_field (line, ptr, sep - ptr);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* Skip leading blanks before the first field. */
|
||
+ while (isblank (to_uchar (*ptr)))
|
||
+ if (++ptr == lim)
|
||
+ return;
|
||
+
|
||
+ do
|
||
+ {
|
||
+ char *sep;
|
||
+ for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
|
||
+ continue;
|
||
+ extract_field (line, ptr, sep - ptr);
|
||
+ if (sep == lim)
|
||
+ return;
|
||
+ for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
|
||
+ continue;
|
||
+ }
|
||
+ while (ptr != lim);
|
||
+ }
|
||
+
|
||
+ extract_field (line, ptr, lim - ptr);
|
||
+}
|
||
+
|
||
+#if HAVE_MBRTOWC
|
||
+static void
|
||
+xfields_multibyte (struct line *line)
|
||
+{
|
||
+ char *ptr = line->buf.buffer;
|
||
+ char const *lim = ptr + line->buf.length - 1;
|
||
+ wchar_t wc = 0;
|
||
+ size_t mblength = 1;
|
||
+ mbstate_t state, state_bak;
|
||
+
|
||
+ memset (&state, 0, sizeof (mbstate_t));
|
||
+
|
||
+ if (ptr >= lim)
|
||
+ return;
|
||
+
|
||
+ if (tab != NULL)
|
||
+ {
|
||
+ unsigned char t = tab[0];
|
||
+ char *sep = ptr;
|
||
+ for (; ptr < lim; ptr = sep + mblength)
|
||
+ {
|
||
+ sep = ptr;
|
||
+ while (sep < lim)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
|
||
+
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ if (mblength == tablen && !memcmp (sep, tab, mblength))
|
||
+ break;
|
||
+ else
|
||
+ {
|
||
+ sep += mblength;
|
||
+ continue;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (sep >= lim)
|
||
+ break;
|
||
+
|
||
+ extract_field (line, ptr, sep - ptr);
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* Skip leading blanks before the first field. */
|
||
+ while(ptr < lim)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
|
||
+
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ if (!iswblank(wc))
|
||
+ break;
|
||
+ ptr += mblength;
|
||
+ }
|
||
+
|
||
+ do
|
||
+ {
|
||
+ char *sep;
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ sep = ptr + mblength;
|
||
+ while (sep < lim)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ if (iswblank (wc))
|
||
+ break;
|
||
+
|
||
+ sep += mblength;
|
||
+ }
|
||
+
|
||
+ extract_field (line, ptr, sep - ptr);
|
||
+ if (sep >= lim)
|
||
+ return;
|
||
+
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ ptr = sep + mblength;
|
||
+ while (ptr < lim)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ state = state_bak;
|
||
+ break;
|
||
+ }
|
||
+ mblength = (mblength < 1) ? 1 : mblength;
|
||
+
|
||
+ if (!iswblank (wc))
|
||
+ break;
|
||
+
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ }
|
||
+ while (ptr < lim);
|
||
+ }
|
||
+
|
||
+ extract_field (line, ptr, lim - ptr);
|
||
+}
|
||
+#endif
|
||
+
|
||
+static void
|
||
+freeline (struct line *line)
|
||
+{
|
||
+ free (line->fields);
|
||
+ free (line->buf.buffer);
|
||
+ line->buf.buffer = NULL;
|
||
+}
|
||
+
|
||
+/* Return <0 if the join field in LINE1 compares less than the one in LINE2;
|
||
+ >0 if it compares greater; 0 if it compares equal.
|
||
+ Report an error and exit if the comparison fails.
|
||
+ Use join fields JF_1 and JF_2 respectively. */
|
||
+
|
||
+static int
|
||
+keycmp (struct line const *line1, struct line const *line2,
|
||
+ size_t jf_1, size_t jf_2)
|
||
+{
|
||
+ /* Start of field to compare in each file. */
|
||
+ char *beg[2];
|
||
+ char *copy[2];
|
||
+ size_t len[2]; /* Length of fields to compare. */
|
||
+ int diff;
|
||
+ int i, j;
|
||
+
|
||
+ if (jf_1 < line1->nfields)
|
||
+ {
|
||
+ beg[0] = line1->fields[jf_1].beg;
|
||
+ len[0] = line1->fields[jf_1].len;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ beg[0] = NULL;
|
||
+ len[0] = 0;
|
||
+ }
|
||
+
|
||
+ if (jf_2 < line2->nfields)
|
||
+ {
|
||
+ beg[1] = line2->fields[jf_2].beg;
|
||
+ len[1] = line2->fields[jf_2].len;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ beg[1] = NULL;
|
||
+ len[1] = 0;
|
||
+ }
|
||
+
|
||
+ if (len[0] == 0)
|
||
+ return len[1] == 0 ? 0 : -1;
|
||
+ if (len[1] == 0)
|
||
+ return 1;
|
||
+
|
||
+ if (ignore_case)
|
||
+ {
|
||
+#ifdef HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ size_t mblength;
|
||
+ wchar_t wc, uwc;
|
||
+ mbstate_t state, state_bak;
|
||
+
|
||
+ memset (&state, '\0', sizeof (mbstate_t));
|
||
+
|
||
+ for (i = 0; i < 2; i++)
|
||
+ {
|
||
+ copy[i] = alloca (len[i] + 1);
|
||
+
|
||
+ for (j = 0; j < MIN (len[0], len[1]);)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
|
||
+
|
||
+ switch (mblength)
|
||
+ {
|
||
+ case (size_t) -1:
|
||
+ case (size_t) -2:
|
||
+ state = state_bak;
|
||
+ /* Fall through */
|
||
+ case 0:
|
||
+ mblength = 1;
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ uwc = towupper (wc);
|
||
+
|
||
+ if (uwc != wc)
|
||
+ {
|
||
+ mbstate_t state_wc;
|
||
+
|
||
+ memset (&state_wc, '\0', sizeof (mbstate_t));
|
||
+ wcrtomb (copy[i] + j, uwc, &state_wc);
|
||
+ }
|
||
+ else
|
||
+ memcpy (copy[i] + j, beg[i] + j, mblength);
|
||
+ }
|
||
+ j += mblength;
|
||
+ }
|
||
+ copy[i][j] = '\0';
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
+ {
|
||
+ for (i = 0; i < 2; i++)
|
||
+ {
|
||
+ copy[i] = alloca (len[i] + 1);
|
||
+
|
||
+ for (j = 0; j < MIN (len[0], len[1]); j++)
|
||
+ copy[i][j] = toupper (beg[i][j]);
|
||
+
|
||
+ copy[i][j] = '\0';
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ copy[0] = (unsigned char *) beg[0];
|
||
+ copy[1] = (unsigned char *) beg[1];
|
||
+ }
|
||
+
|
||
+ if (hard_LC_COLLATE)
|
||
+ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
|
||
+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
|
||
+
|
||
+
|
||
+ if (diff)
|
||
+ return diff;
|
||
+ return len[0] - len[1];
|
||
+}
|
||
+
|
||
+/* Check that successive input lines PREV and CURRENT from input file
|
||
+ WHATFILE are presented in order, unless the user may be relying on
|
||
+ the GNU extension that input lines may be out of order if no input
|
||
+ lines are unpairable.
|
||
+
|
||
+ If the user specified --nocheck-order, the check is not made.
|
||
+ If the user specified --check-order, the problem is fatal.
|
||
+ Otherwise (the default), the message is simply a warning.
|
||
+
|
||
+ A message is printed at most once per input file. */
|
||
+
|
||
+static void
|
||
+check_order (const struct line *prev,
|
||
+ const struct line *current,
|
||
+ int whatfile)
|
||
+{
|
||
+ if (check_input_order != CHECK_ORDER_DISABLED
|
||
+ && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
|
||
+ {
|
||
+ if (!issued_disorder_warning[whatfile-1])
|
||
+ {
|
||
+ size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
|
||
+ if (keycmp (prev, current, join_field, join_field) > 0)
|
||
+ {
|
||
+ error ((check_input_order == CHECK_ORDER_ENABLED
|
||
+ ? EXIT_FAILURE : 0),
|
||
+ 0, _("file %d is not in sorted order"), whatfile);
|
||
+
|
||
+ /* If we get to here, the message was just a warning, but we
|
||
+ want only to issue it once. */
|
||
+ issued_disorder_warning[whatfile-1] = true;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+static inline void
|
||
+reset_line (struct line *line)
|
||
+{
|
||
+ line->nfields = 0;
|
||
+}
|
||
+
|
||
+static struct line *
|
||
+init_linep (struct line **linep)
|
||
+{
|
||
+ struct line *line = xmalloc (sizeof *line);
|
||
+ memset (line, '\0', sizeof *line);
|
||
+ *linep = line;
|
||
+ return line;
|
||
+}
|
||
+
|
||
+/* Read a line from FP into LINE and split it into fields.
|
||
+ Return true if successful. */
|
||
+
|
||
+static bool
|
||
+get_line (FILE *fp, struct line **linep, int which)
|
||
+{
|
||
+ struct line *line = *linep;
|
||
+
|
||
+ if (line == prevline[which - 1])
|
||
+ {
|
||
+ SWAPLINES (line, spareline[which - 1]);
|
||
+ *linep = line;
|
||
+ }
|
||
+
|
||
+ if (line)
|
||
+ reset_line (line);
|
||
+ else
|
||
+ line = init_linep (linep);
|
||
+
|
||
+ if (! readlinebuffer (&line->buf, fp))
|
||
+ {
|
||
+ if (ferror (fp))
|
||
+ error (EXIT_FAILURE, errno, _("read error"));
|
||
+ freeline (line);
|
||
+ return false;
|
||
+ }
|
||
+
|
||
+ xfields (line);
|
||
+
|
||
+ if (prevline[which - 1])
|
||
+ check_order (prevline[which - 1], line, which);
|
||
+
|
||
+ prevline[which - 1] = line;
|
||
+ return true;
|
||
+}
|
||
+
|
||
+static void
|
||
+free_spareline (void)
|
||
+{
|
||
+ size_t i;
|
||
+
|
||
+ for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
|
||
+ {
|
||
+ if (spareline[i])
|
||
+ {
|
||
+ freeline (spareline[i]);
|
||
+ free (spareline[i]);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+static void
|
||
+initseq (struct seq *seq)
|
||
+{
|
||
+ seq->count = 0;
|
||
+ seq->alloc = 0;
|
||
+ seq->lines = NULL;
|
||
+}
|
||
+
|
||
+/* Read a line from FP and add it to SEQ. Return true if successful. */
|
||
+
|
||
+static bool
|
||
+getseq (FILE *fp, struct seq *seq, int whichfile)
|
||
+{
|
||
+ if (seq->count == seq->alloc)
|
||
+ {
|
||
+ size_t i;
|
||
+ seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
|
||
+ for (i = seq->count; i < seq->alloc; i++)
|
||
+ seq->lines[i] = NULL;
|
||
+ }
|
||
+
|
||
+ if (get_line (fp, &seq->lines[seq->count], whichfile))
|
||
+ {
|
||
+ ++seq->count;
|
||
+ return true;
|
||
+ }
|
||
+ return false;
|
||
+}
|
||
+
|
||
+/* Read a line from FP and add it to SEQ, as the first item if FIRST is
|
||
+ true, else as the next. */
|
||
+static bool
|
||
+advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
|
||
+{
|
||
+ if (first)
|
||
+ seq->count = 0;
|
||
+
|
||
+ return getseq (fp, seq, whichfile);
|
||
+}
|
||
+
|
||
+static void
|
||
+delseq (struct seq *seq)
|
||
+{
|
||
+ size_t i;
|
||
+ for (i = 0; i < seq->alloc; i++)
|
||
+ if (seq->lines[i])
|
||
+ {
|
||
+ if (seq->lines[i]->buf.buffer)
|
||
+ freeline (seq->lines[i]);
|
||
+ free (seq->lines[i]);
|
||
+ }
|
||
+ free (seq->lines);
|
||
+}
|
||
+
|
||
+
|
||
+/* Print field N of LINE if it exists and is nonempty, otherwise
|
||
+ `empty_filler' if it is nonempty. */
|
||
+
|
||
+static void
|
||
+prfield (size_t n, struct line const *line)
|
||
+{
|
||
+ size_t len;
|
||
+
|
||
+ if (n < line->nfields)
|
||
+ {
|
||
+ len = line->fields[n].len;
|
||
+ if (len)
|
||
+ fwrite (line->fields[n].beg, 1, len, stdout);
|
||
+ else if (empty_filler)
|
||
+ fputs (empty_filler, stdout);
|
||
+ }
|
||
+ else if (empty_filler)
|
||
+ fputs (empty_filler, stdout);
|
||
+}
|
||
+
|
||
+/* Print the join of LINE1 and LINE2. */
|
||
+
|
||
+#define PUT_TAB_CHAR \
|
||
+ do \
|
||
+ { \
|
||
+ (tab != NULL) ? \
|
||
+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+static void
|
||
+prjoin (struct line const *line1, struct line const *line2)
|
||
+{
|
||
+ const struct outlist *outlist;
|
||
+
|
||
+ outlist = outlist_head.next;
|
||
+ if (outlist)
|
||
+ {
|
||
+ const struct outlist *o;
|
||
+
|
||
+ o = outlist;
|
||
+ while (1)
|
||
+ {
|
||
+ size_t field;
|
||
+ struct line const *line;
|
||
+
|
||
+ if (o->file == 0)
|
||
+ {
|
||
+ if (line1 == &uni_blank)
|
||
+ {
|
||
+ line = line2;
|
||
+ field = join_field_2;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ line = line1;
|
||
+ field = join_field_1;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ line = (o->file == 1 ? line1 : line2);
|
||
+ field = o->field;
|
||
+ }
|
||
+ prfield (field, line);
|
||
+ o = o->next;
|
||
+ if (o == NULL)
|
||
+ break;
|
||
+ PUT_TAB_CHAR;
|
||
+ }
|
||
+ putchar ('\n');
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ size_t i;
|
||
+
|
||
+ if (line1 == &uni_blank)
|
||
+ {
|
||
+ struct line const *t;
|
||
+ t = line1;
|
||
+ line1 = line2;
|
||
+ line2 = t;
|
||
+ }
|
||
+ prfield (join_field_1, line1);
|
||
+ for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
|
||
+ {
|
||
+ PUT_TAB_CHAR;
|
||
+ prfield (i, line1);
|
||
+ }
|
||
+ for (i = join_field_1 + 1; i < line1->nfields; ++i)
|
||
+ {
|
||
+ PUT_TAB_CHAR;
|
||
+ prfield (i, line1);
|
||
+ }
|
||
+
|
||
+ for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
|
||
+ {
|
||
+ PUT_TAB_CHAR;
|
||
+ prfield (i, line2);
|
||
+ }
|
||
+ for (i = join_field_2 + 1; i < line2->nfields; ++i)
|
||
+ {
|
||
+ PUT_TAB_CHAR;
|
||
+ prfield (i, line2);
|
||
+ }
|
||
+ putchar ('\n');
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Print the join of the files in FP1 and FP2. */
|
||
+
|
||
+static void
|
||
+join (FILE *fp1, FILE *fp2)
|
||
+{
|
||
+ struct seq seq1, seq2;
|
||
+ struct line **linep = xmalloc (sizeof *linep);
|
||
+ int diff;
|
||
+ bool eof1, eof2, checktail;
|
||
+
|
||
+ *linep = NULL;
|
||
+
|
||
+ /* Read the first line of each file. */
|
||
+ initseq (&seq1);
|
||
+ getseq (fp1, &seq1, 1);
|
||
+ initseq (&seq2);
|
||
+ getseq (fp2, &seq2, 2);
|
||
+
|
||
+ while (seq1.count && seq2.count)
|
||
+ {
|
||
+ size_t i;
|
||
+ diff = keycmp (seq1.lines[0], seq2.lines[0],
|
||
+ join_field_1, join_field_2);
|
||
+ if (diff < 0)
|
||
+ {
|
||
+ if (print_unpairables_1)
|
||
+ prjoin (seq1.lines[0], &uni_blank);
|
||
+ advance_seq (fp1, &seq1, true, 1);
|
||
+ seen_unpairable = true;
|
||
+ continue;
|
||
+ }
|
||
+ if (diff > 0)
|
||
+ {
|
||
+ if (print_unpairables_2)
|
||
+ prjoin (&uni_blank, seq2.lines[0]);
|
||
+ advance_seq (fp2, &seq2, true, 2);
|
||
+ seen_unpairable = true;
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ /* Keep reading lines from file1 as long as they continue to
|
||
+ match the current line from file2. */
|
||
+ eof1 = false;
|
||
+ do
|
||
+ if (!advance_seq (fp1, &seq1, false, 1))
|
||
+ {
|
||
+ eof1 = true;
|
||
+ ++seq1.count;
|
||
+ break;
|
||
+ }
|
||
+ while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
|
||
+ join_field_1, join_field_2));
|
||
+
|
||
+ /* Keep reading lines from file2 as long as they continue to
|
||
+ match the current line from file1. */
|
||
+ eof2 = false;
|
||
+ do
|
||
+ if (!advance_seq (fp2, &seq2, false, 2))
|
||
+ {
|
||
+ eof2 = true;
|
||
+ ++seq2.count;
|
||
+ break;
|
||
+ }
|
||
+ while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
|
||
+ join_field_1, join_field_2));
|
||
+
|
||
+ if (print_pairables)
|
||
+ {
|
||
+ for (i = 0; i < seq1.count - 1; ++i)
|
||
+ {
|
||
+ size_t j;
|
||
+ for (j = 0; j < seq2.count - 1; ++j)
|
||
+ prjoin (seq1.lines[i], seq2.lines[j]);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (!eof1)
|
||
+ {
|
||
+ SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
|
||
+ seq1.count = 1;
|
||
+ }
|
||
+ else
|
||
+ seq1.count = 0;
|
||
+
|
||
+ if (!eof2)
|
||
+ {
|
||
+ SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
|
||
+ seq2.count = 1;
|
||
+ }
|
||
+ else
|
||
+ seq2.count = 0;
|
||
+ }
|
||
+
|
||
+ /* If the user did not specify --check-order, and the we read the
|
||
+ tail ends of both inputs to verify that they are in order. We
|
||
+ skip the rest of the tail once we have issued a warning for that
|
||
+ file, unless we actually need to print the unpairable lines. */
|
||
+ if (check_input_order != CHECK_ORDER_DISABLED
|
||
+ && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
|
||
+ checktail = true;
|
||
+ else
|
||
+ checktail = false;
|
||
+
|
||
+ if ((print_unpairables_1 || checktail) && seq1.count)
|
||
+ {
|
||
+ if (print_unpairables_1)
|
||
+ prjoin (seq1.lines[0], &uni_blank);
|
||
+ seen_unpairable = true;
|
||
+ while (get_line (fp1, linep, 1))
|
||
+ {
|
||
+ if (print_unpairables_1)
|
||
+ prjoin (*linep, &uni_blank);
|
||
+ if (issued_disorder_warning[0] && !print_unpairables_1)
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if ((print_unpairables_2 || checktail) && seq2.count)
|
||
+ {
|
||
+ if (print_unpairables_2)
|
||
+ prjoin (&uni_blank, seq2.lines[0]);
|
||
+ seen_unpairable = true;
|
||
+ while (get_line (fp2, linep, 2))
|
||
+ {
|
||
+ if (print_unpairables_2)
|
||
+ prjoin (&uni_blank, *linep);
|
||
+ if (issued_disorder_warning[1] && !print_unpairables_2)
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ free (*linep);
|
||
+
|
||
+ free (linep);
|
||
+ delseq (&seq1);
|
||
+ delseq (&seq2);
|
||
+}
|
||
+
|
||
+/* Add a field spec for field FIELD of file FILE to `outlist'. */
|
||
+
|
||
+static void
|
||
+add_field (int file, size_t field)
|
||
+{
|
||
+ struct outlist *o;
|
||
+
|
||
+ assert (file == 0 || file == 1 || file == 2);
|
||
+ assert (file != 0 || field == 0);
|
||
+
|
||
+ o = xmalloc (sizeof *o);
|
||
+ o->file = file;
|
||
+ o->field = field;
|
||
+ o->next = NULL;
|
||
+
|
||
+ /* Add to the end of the list so the fields are in the right order. */
|
||
+ outlist_end->next = o;
|
||
+ outlist_end = o;
|
||
+}
|
||
+
|
||
+/* Convert a string of decimal digits, STR (the 1-based join field number),
|
||
+ to an integral value. Upon successful conversion, return one less
|
||
+ (the zero-based field number). Silently convert too-large values
|
||
+ to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
|
||
+ diagnostic and exit. */
|
||
+
|
||
+static size_t
|
||
+string_to_join_field (char const *str)
|
||
+{
|
||
+ size_t result;
|
||
+ unsigned long int val;
|
||
+ verify (SIZE_MAX <= ULONG_MAX);
|
||
+
|
||
+ strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
|
||
+ if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
|
||
+ val = SIZE_MAX;
|
||
+ else if (s_err != LONGINT_OK || val == 0)
|
||
+ error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
|
||
+
|
||
+ result = val - 1;
|
||
+
|
||
+ return result;
|
||
+}
|
||
+
|
||
+/* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
|
||
+ pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
|
||
+ If S is valid, return true. Otherwise, give a diagnostic and exit. */
|
||
+
|
||
+static void
|
||
+decode_field_spec (const char *s, int *file_index, size_t *field_index)
|
||
+{
|
||
+ /* The first character must be 0, 1, or 2. */
|
||
+ switch (s[0])
|
||
+ {
|
||
+ case '0':
|
||
+ if (s[1])
|
||
+ {
|
||
+ /* `0' must be all alone -- no `.FIELD'. */
|
||
+ error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
|
||
+ }
|
||
+ *file_index = 0;
|
||
+ *field_index = 0;
|
||
+ break;
|
||
+
|
||
+ case '1':
|
||
+ case '2':
|
||
+ if (s[1] != '.')
|
||
+ error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
|
||
+ *file_index = s[0] - '0';
|
||
+ *field_index = string_to_join_field (s + 2);
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("invalid file number in field spec: %s"), quote (s));
|
||
+
|
||
+ /* Tell gcc -W -Wall that we can't get beyond this point.
|
||
+ This avoids a warning (otherwise legit) that the caller's copies
|
||
+ of *file_index and *field_index might be used uninitialized. */
|
||
+ abort ();
|
||
+
|
||
+ break;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Add the comma or blank separated field spec(s) in STR to `outlist'. */
|
||
+
|
||
+static void
|
||
+add_field_list (char *str)
|
||
+{
|
||
+ char *p = str;
|
||
+
|
||
+ do
|
||
+ {
|
||
+ int file_index;
|
||
+ size_t field_index;
|
||
+ char const *spec_item = p;
|
||
+
|
||
+ p = strpbrk (p, ", \t");
|
||
+ if (p)
|
||
+ *p++ = '\0';
|
||
+ decode_field_spec (spec_item, &file_index, &field_index);
|
||
+ add_field (file_index, field_index);
|
||
+ }
|
||
+ while (p);
|
||
+}
|
||
+
|
||
+/* Set the join field *VAR to VAL, but report an error if *VAR is set
|
||
+ more than once to incompatible values. */
|
||
+
|
||
+static void
|
||
+set_join_field (size_t *var, size_t val)
|
||
+{
|
||
+ if (*var != SIZE_MAX && *var != val)
|
||
+ {
|
||
+ unsigned long int var1 = *var + 1;
|
||
+ unsigned long int val1 = val + 1;
|
||
+ error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
|
||
+ var1, val1);
|
||
+ }
|
||
+ *var = val;
|
||
+}
|
||
+
|
||
+/* Status of command-line arguments. */
|
||
+
|
||
+enum operand_status
|
||
+ {
|
||
+ /* This argument must be an operand, i.e., one of the files to be
|
||
+ joined. */
|
||
+ MUST_BE_OPERAND,
|
||
+
|
||
+ /* This might be the argument of the preceding -j1 or -j2 option,
|
||
+ or it might be an operand. */
|
||
+ MIGHT_BE_J1_ARG,
|
||
+ MIGHT_BE_J2_ARG,
|
||
+
|
||
+ /* This might be the argument of the preceding -o option, or it might be
|
||
+ an operand. */
|
||
+ MIGHT_BE_O_ARG
|
||
+ };
|
||
+
|
||
+/* Add NAME to the array of input file NAMES with operand statuses
|
||
+ OPERAND_STATUS; currently there are NFILES names in the list. */
|
||
+
|
||
+static void
|
||
+add_file_name (char *name, char *names[2],
|
||
+ int operand_status[2], int joption_count[2], int *nfiles,
|
||
+ int *prev_optc_status, int *optc_status)
|
||
+{
|
||
+ int n = *nfiles;
|
||
+
|
||
+ if (n == 2)
|
||
+ {
|
||
+ bool op0 = (operand_status[0] == MUST_BE_OPERAND);
|
||
+ char *arg = names[op0];
|
||
+ switch (operand_status[op0])
|
||
+ {
|
||
+ case MUST_BE_OPERAND:
|
||
+ error (0, 0, _("extra operand %s"), quote (name));
|
||
+ usage (EXIT_FAILURE);
|
||
+
|
||
+ case MIGHT_BE_J1_ARG:
|
||
+ joption_count[0]--;
|
||
+ set_join_field (&join_field_1, string_to_join_field (arg));
|
||
+ break;
|
||
+
|
||
+ case MIGHT_BE_J2_ARG:
|
||
+ joption_count[1]--;
|
||
+ set_join_field (&join_field_2, string_to_join_field (arg));
|
||
+ break;
|
||
+
|
||
+ case MIGHT_BE_O_ARG:
|
||
+ add_field_list (arg);
|
||
+ break;
|
||
+ }
|
||
+ if (!op0)
|
||
+ {
|
||
+ operand_status[0] = operand_status[1];
|
||
+ names[0] = names[1];
|
||
+ }
|
||
+ n = 1;
|
||
+ }
|
||
+
|
||
+ operand_status[n] = *prev_optc_status;
|
||
+ names[n] = name;
|
||
+ *nfiles = n + 1;
|
||
+ if (*prev_optc_status == MIGHT_BE_O_ARG)
|
||
+ *optc_status = MIGHT_BE_O_ARG;
|
||
+}
|
||
+
|
||
+int
|
||
+main (int argc, char **argv)
|
||
+{
|
||
+ int optc_status;
|
||
+ int prev_optc_status = MUST_BE_OPERAND;
|
||
+ int operand_status[2];
|
||
+ int joption_count[2] = { 0, 0 };
|
||
+ char *names[2];
|
||
+ FILE *fp1, *fp2;
|
||
+ int optc;
|
||
+ int nfiles = 0;
|
||
+ int i;
|
||
+
|
||
+ initialize_main (&argc, &argv);
|
||
+ set_program_name (argv[0]);
|
||
+ setlocale (LC_ALL, "");
|
||
+ bindtextdomain (PACKAGE, LOCALEDIR);
|
||
+ textdomain (PACKAGE);
|
||
+ hard_LC_COLLATE = hard_locale (LC_COLLATE);
|
||
+
|
||
+ atexit (close_stdout);
|
||
+ atexit (free_spareline);
|
||
+
|
||
+ print_pairables = true;
|
||
+ seen_unpairable = false;
|
||
+ issued_disorder_warning[0] = issued_disorder_warning[1] = false;
|
||
+ check_input_order = CHECK_ORDER_DEFAULT;
|
||
+
|
||
+ while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
|
||
+ longopts, NULL))
|
||
+ != -1)
|
||
+ {
|
||
+ optc_status = MUST_BE_OPERAND;
|
||
+
|
||
+ switch (optc)
|
||
+ {
|
||
+ case 'v':
|
||
+ print_pairables = false;
|
||
+ /* Fall through. */
|
||
+
|
||
+ case 'a':
|
||
+ {
|
||
+ unsigned long int val;
|
||
+ if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
|
||
+ || (val != 1 && val != 2))
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("invalid field number: %s"), quote (optarg));
|
||
+ if (val == 1)
|
||
+ print_unpairables_1 = true;
|
||
+ else
|
||
+ print_unpairables_2 = true;
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case 'e':
|
||
+ if (empty_filler && ! STREQ (empty_filler, optarg))
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("conflicting empty-field replacement strings"));
|
||
+ empty_filler = optarg;
|
||
+ break;
|
||
+
|
||
+ case 'i':
|
||
+ ignore_case = true;
|
||
+ break;
|
||
+
|
||
+ case '1':
|
||
+ set_join_field (&join_field_1, string_to_join_field (optarg));
|
||
+ break;
|
||
+
|
||
+ case '2':
|
||
+ set_join_field (&join_field_2, string_to_join_field (optarg));
|
||
+ break;
|
||
+
|
||
+ case 'j':
|
||
+ if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
|
||
+ && optarg == argv[optind - 1] + 2)
|
||
+ {
|
||
+ /* The argument was either "-j1" or "-j2". */
|
||
+ bool is_j2 = (optarg[0] == '2');
|
||
+ joption_count[is_j2]++;
|
||
+ optc_status = MIGHT_BE_J1_ARG + is_j2;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ set_join_field (&join_field_1, string_to_join_field (optarg));
|
||
+ set_join_field (&join_field_2, join_field_1);
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case 'o':
|
||
+ add_field_list (optarg);
|
||
+ optc_status = MIGHT_BE_O_ARG;
|
||
+ break;
|
||
+
|
||
+ case 't':
|
||
+ {
|
||
+ char *newtab;
|
||
+ size_t newtablen;
|
||
+ if (! optarg[0])
|
||
+ error (EXIT_FAILURE, 0, _("empty tab"));
|
||
+ newtab = xstrdup (optarg);
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ mbstate_t state;
|
||
+
|
||
+ memset (&state, 0, sizeof (mbstate_t));
|
||
+ newtablen = mbrtowc (NULL, newtab,
|
||
+ strnlen (newtab, MB_LEN_MAX),
|
||
+ &state);
|
||
+ if (newtablen == (size_t) 0
|
||
+ || newtablen == (size_t) -1
|
||
+ || newtablen == (size_t) -2)
|
||
+ newtablen = 1;
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
+ newtablen = 1;
|
||
+
|
||
+ if (newtablen == 1 && newtab[1])
|
||
+ {
|
||
+ if (STREQ (newtab, "\\0"))
|
||
+ newtab[0] = '\0';
|
||
+ }
|
||
+ if (tab != NULL && strcmp (tab, newtab))
|
||
+ {
|
||
+ free (newtab);
|
||
+ error (EXIT_FAILURE, 0, _("incompatible tabs"));
|
||
+ }
|
||
+ tab = newtab;
|
||
+ tablen = newtablen;
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case NOCHECK_ORDER_OPTION:
|
||
+ check_input_order = CHECK_ORDER_DISABLED;
|
||
+ break;
|
||
+
|
||
+ case CHECK_ORDER_OPTION:
|
||
+ check_input_order = CHECK_ORDER_ENABLED;
|
||
+ break;
|
||
+
|
||
+ case 1: /* Non-option argument. */
|
||
+ add_file_name (optarg, names, operand_status, joption_count,
|
||
+ &nfiles, &prev_optc_status, &optc_status);
|
||
+ break;
|
||
+
|
||
+ case_GETOPT_HELP_CHAR;
|
||
+
|
||
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||
+
|
||
+ default:
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+
|
||
+ prev_optc_status = optc_status;
|
||
+ }
|
||
+
|
||
+ /* Process any operands after "--". */
|
||
+ prev_optc_status = MUST_BE_OPERAND;
|
||
+ while (optind < argc)
|
||
+ add_file_name (argv[optind++], names, operand_status, joption_count,
|
||
+ &nfiles, &prev_optc_status, &optc_status);
|
||
+
|
||
+ if (nfiles != 2)
|
||
+ {
|
||
+ if (nfiles == 0)
|
||
+ error (0, 0, _("missing operand"));
|
||
+ else
|
||
+ error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+
|
||
+ /* If "-j1" was specified and it turns out not to have had an argument,
|
||
+ treat it as "-j 1". Likewise for -j2. */
|
||
+ for (i = 0; i < 2; i++)
|
||
+ if (joption_count[i] != 0)
|
||
+ {
|
||
+ set_join_field (&join_field_1, i);
|
||
+ set_join_field (&join_field_2, i);
|
||
+ }
|
||
+
|
||
+ if (join_field_1 == SIZE_MAX)
|
||
+ join_field_1 = 0;
|
||
+ if (join_field_2 == SIZE_MAX)
|
||
+ join_field_2 = 0;
|
||
+
|
||
+ fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r");
|
||
+ if (!fp1)
|
||
+ error (EXIT_FAILURE, errno, "%s", names[0]);
|
||
+ fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r");
|
||
+ if (!fp2)
|
||
+ error (EXIT_FAILURE, errno, "%s", names[1]);
|
||
+ if (fp1 == fp2)
|
||
+ error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
|
||
+ join (fp1, fp2);
|
||
+
|
||
+ if (fclose (fp1) != 0)
|
||
+ error (EXIT_FAILURE, errno, "%s", names[0]);
|
||
+ if (fclose (fp2) != 0)
|
||
+ error (EXIT_FAILURE, errno, "%s", names[1]);
|
||
+
|
||
+ if (issued_disorder_warning[0] || issued_disorder_warning[1])
|
||
+ exit (EXIT_FAILURE);
|
||
+ else
|
||
+ exit (EXIT_SUCCESS);
|
||
+}
|
||
diff -urNp coreutils-8.0-orig/src/pr.c coreutils-8.0/src/pr.c
|
||
--- coreutils-8.0-orig/src/pr.c 2009-09-29 15:27:54.000000000 +0200
|
||
+++ coreutils-8.0/src/pr.c 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -312,6 +312,32 @@
|
||
|
||
#include <getopt.h>
|
||
#include <sys/types.h>
|
||
+
|
||
+/* Get MB_LEN_MAX. */
|
||
+#include <limits.h>
|
||
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
|
||
+ installation; work around this configuration error. */
|
||
+#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
|
||
+# define MB_LEN_MAX 16
|
||
+#endif
|
||
+
|
||
+/* Get MB_CUR_MAX. */
|
||
+#include <stdlib.h>
|
||
+
|
||
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
|
||
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
+
|
||
+/* Get iswprint(). -- for wcwidth(). */
|
||
+#if HAVE_WCTYPE_H
|
||
+# include <wctype.h>
|
||
+#endif
|
||
+#if !defined iswprint && !HAVE_ISWPRINT
|
||
+# define iswprint(wc) 1
|
||
+#endif
|
||
+
|
||
#include "system.h"
|
||
#include "error.h"
|
||
#include "hard-locale.h"
|
||
@@ -322,6 +348,18 @@
|
||
#include "strftime.h"
|
||
#include "xstrtol.h"
|
||
|
||
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
|
||
+#if HAVE_MBRTOWC && defined mbstate_t
|
||
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
|
||
+#endif
|
||
+
|
||
+#ifndef HAVE_DECL_WCWIDTH
|
||
+"this configure-time declaration test was not run"
|
||
+#endif
|
||
+#if !HAVE_DECL_WCWIDTH
|
||
+extern int wcwidth ();
|
||
+#endif
|
||
+
|
||
/* The official name of this program (e.g., no `g' prefix). */
|
||
#define PROGRAM_NAME "pr"
|
||
|
||
@@ -414,7 +452,20 @@ struct COLUMN
|
||
|
||
typedef struct COLUMN COLUMN;
|
||
|
||
-static int char_to_clump (char c);
|
||
+/* Funtion pointers to switch functions for single byte locale or for
|
||
+ multibyte locale. If multibyte functions do not exist in your sysytem,
|
||
+ these pointers always point the function for single byte locale. */
|
||
+static void (*print_char) (char c);
|
||
+static int (*char_to_clump) (char c);
|
||
+
|
||
+/* Functions for single byte locale. */
|
||
+static void print_char_single (char c);
|
||
+static int char_to_clump_single (char c);
|
||
+
|
||
+/* Functions for multibyte locale. */
|
||
+static void print_char_multi (char c);
|
||
+static int char_to_clump_multi (char c);
|
||
+
|
||
static bool read_line (COLUMN *p);
|
||
static bool print_page (void);
|
||
static bool print_stored (COLUMN *p);
|
||
@@ -424,6 +475,7 @@ static void print_header (void);
|
||
static void pad_across_to (int position);
|
||
static void add_line_number (COLUMN *p);
|
||
static void getoptarg (char *arg, char switch_char, char *character,
|
||
+ int *character_length, int *character_width,
|
||
int *number);
|
||
void usage (int status);
|
||
static void print_files (int number_of_files, char **av);
|
||
@@ -438,7 +490,6 @@ static void store_char (char c);
|
||
static void pad_down (int lines);
|
||
static void read_rest_of_line (COLUMN *p);
|
||
static void skip_read (COLUMN *p, int column_number);
|
||
-static void print_char (char c);
|
||
static void cleanup (void);
|
||
static void print_sep_string (void);
|
||
static void separator_string (const char *optarg_S);
|
||
@@ -450,7 +501,7 @@ static COLUMN *column_vector;
|
||
we store the leftmost columns contiguously in buff.
|
||
To print a line from buff, get the index of the first character
|
||
from line_vector[i], and print up to line_vector[i + 1]. */
|
||
-static char *buff;
|
||
+static unsigned char *buff;
|
||
|
||
/* Index of the position in buff where the next character
|
||
will be stored. */
|
||
@@ -554,7 +605,7 @@ static int chars_per_column;
|
||
static bool untabify_input = false;
|
||
|
||
/* (-e) The input tab character. */
|
||
-static char input_tab_char = '\t';
|
||
+static char input_tab_char[MB_LEN_MAX] = "\t";
|
||
|
||
/* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
|
||
where the leftmost column is 1. */
|
||
@@ -564,7 +615,10 @@ static int chars_per_input_tab = 8;
|
||
static bool tabify_output = false;
|
||
|
||
/* (-i) The output tab character. */
|
||
-static char output_tab_char = '\t';
|
||
+static char output_tab_char[MB_LEN_MAX] = "\t";
|
||
+
|
||
+/* (-i) The byte length of output tab character. */
|
||
+static int output_tab_char_length = 1;
|
||
|
||
/* (-i) The width of the output tab. */
|
||
static int chars_per_output_tab = 8;
|
||
@@ -638,7 +692,13 @@ static int power_10;
|
||
static bool numbered_lines = false;
|
||
|
||
/* (-n) Character which follows each line number. */
|
||
-static char number_separator = '\t';
|
||
+static char number_separator[MB_LEN_MAX] = "\t";
|
||
+
|
||
+/* (-n) The byte length of the character which follows each line number. */
|
||
+static int number_separator_length = 1;
|
||
+
|
||
+/* (-n) The character width of the character which follows each line number. */
|
||
+static int number_separator_width = 0;
|
||
|
||
/* (-n) line counting starts with 1st line of input file (not with 1st
|
||
line of 1st page printed). */
|
||
@@ -691,6 +751,7 @@ static bool use_col_separator = false;
|
||
-a|COLUMN|-m is a `space' and with the -J option a `tab'. */
|
||
static char *col_sep_string = (char *) "";
|
||
static int col_sep_length = 0;
|
||
+static int col_sep_width = 0;
|
||
static char *column_separator = (char *) " ";
|
||
static char *line_separator = (char *) "\t";
|
||
|
||
@@ -847,6 +908,13 @@ separator_string (const char *optarg_S)
|
||
col_sep_length = (int) strlen (optarg_S);
|
||
col_sep_string = xmalloc (col_sep_length + 1);
|
||
strcpy (col_sep_string, optarg_S);
|
||
+
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ col_sep_width = mbswidth (col_sep_string, 0);
|
||
+ else
|
||
+#endif
|
||
+ col_sep_width = col_sep_length;
|
||
}
|
||
|
||
int
|
||
@@ -871,6 +939,21 @@ main (int argc, char **argv)
|
||
|
||
atexit (close_stdout);
|
||
|
||
+/* Define which functions are used, the ones for single byte locale or the ones
|
||
+ for multibyte locale. */
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ print_char = print_char_multi;
|
||
+ char_to_clump = char_to_clump_multi;
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
+ {
|
||
+ print_char = print_char_single;
|
||
+ char_to_clump = char_to_clump_single;
|
||
+ }
|
||
+
|
||
n_files = 0;
|
||
file_names = (argc > 1
|
||
? xmalloc ((argc - 1) * sizeof (char *))
|
||
@@ -947,8 +1030,12 @@ main (int argc, char **argv)
|
||
break;
|
||
case 'e':
|
||
if (optarg)
|
||
- getoptarg (optarg, 'e', &input_tab_char,
|
||
- &chars_per_input_tab);
|
||
+ {
|
||
+ int dummy_length, dummy_width;
|
||
+
|
||
+ getoptarg (optarg, 'e', input_tab_char, &dummy_length,
|
||
+ &dummy_width, &chars_per_input_tab);
|
||
+ }
|
||
/* Could check tab width > 0. */
|
||
untabify_input = true;
|
||
break;
|
||
@@ -961,8 +1048,12 @@ main (int argc, char **argv)
|
||
break;
|
||
case 'i':
|
||
if (optarg)
|
||
- getoptarg (optarg, 'i', &output_tab_char,
|
||
- &chars_per_output_tab);
|
||
+ {
|
||
+ int dummy_width;
|
||
+
|
||
+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
|
||
+ &dummy_width, &chars_per_output_tab);
|
||
+ }
|
||
/* Could check tab width > 0. */
|
||
tabify_output = true;
|
||
break;
|
||
@@ -989,8 +1080,8 @@ main (int argc, char **argv)
|
||
case 'n':
|
||
numbered_lines = true;
|
||
if (optarg)
|
||
- getoptarg (optarg, 'n', &number_separator,
|
||
- &chars_per_number);
|
||
+ getoptarg (optarg, 'n', number_separator, &number_separator_length,
|
||
+ &number_separator_width, &chars_per_number);
|
||
break;
|
||
case 'N':
|
||
skip_count = false;
|
||
@@ -1029,7 +1120,7 @@ main (int argc, char **argv)
|
||
old_s = false;
|
||
/* Reset an additional input of -s, -S dominates -s */
|
||
col_sep_string = bad_cast ("");
|
||
- col_sep_length = 0;
|
||
+ col_sep_length = col_sep_width = 0;
|
||
use_col_separator = true;
|
||
if (optarg)
|
||
separator_string (optarg);
|
||
@@ -1186,10 +1277,45 @@ main (int argc, char **argv)
|
||
a number. */
|
||
|
||
static void
|
||
-getoptarg (char *arg, char switch_char, char *character, int *number)
|
||
+getoptarg (char *arg, char switch_char, char *character, int *character_length,
|
||
+ int *character_width, int *number)
|
||
{
|
||
if (!ISDIGIT (*arg))
|
||
- *character = *arg++;
|
||
+ {
|
||
+#ifdef HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1) /* for multibyte locale. */
|
||
+ {
|
||
+ wchar_t wc;
|
||
+ size_t mblength;
|
||
+ int width;
|
||
+ mbstate_t state = {'\0'};
|
||
+
|
||
+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
|
||
+
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ *character_length = 1;
|
||
+ *character_width = 1;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ *character_length = (mblength < 1) ? 1 : mblength;
|
||
+ width = wcwidth (wc);
|
||
+ *character_width = (width < 0) ? 0 : width;
|
||
+ }
|
||
+
|
||
+ strncpy (character, arg, *character_length);
|
||
+ arg += *character_length;
|
||
+ }
|
||
+ else /* for single byte locale. */
|
||
+#endif
|
||
+ {
|
||
+ *character = *arg++;
|
||
+ *character_length = 1;
|
||
+ *character_width = 1;
|
||
+ }
|
||
+ }
|
||
+
|
||
if (*arg)
|
||
{
|
||
long int tmp_long;
|
||
@@ -1248,7 +1374,7 @@ init_parameters (int number_of_files)
|
||
else
|
||
col_sep_string = column_separator;
|
||
|
||
- col_sep_length = 1;
|
||
+ col_sep_length = col_sep_width = 1;
|
||
use_col_separator = true;
|
||
}
|
||
/* It's rather pointless to define a TAB separator with column
|
||
@@ -1279,11 +1405,11 @@ init_parameters (int number_of_files)
|
||
TAB_WIDTH (chars_per_input_tab, chars_per_number); */
|
||
|
||
/* Estimate chars_per_text without any margin and keep it constant. */
|
||
- if (number_separator == '\t')
|
||
+ if (number_separator[0] == '\t')
|
||
number_width = chars_per_number +
|
||
TAB_WIDTH (chars_per_default_tab, chars_per_number);
|
||
else
|
||
- number_width = chars_per_number + 1;
|
||
+ number_width = chars_per_number + number_separator_width;
|
||
|
||
/* The number is part of the column width unless we are
|
||
printing files in parallel. */
|
||
@@ -1298,7 +1424,7 @@ init_parameters (int number_of_files)
|
||
}
|
||
|
||
chars_per_column = (chars_per_line - chars_used_by_number -
|
||
- (columns - 1) * col_sep_length) / columns;
|
||
+ (columns - 1) * col_sep_width) / columns;
|
||
|
||
if (chars_per_column < 1)
|
||
error (EXIT_FAILURE, 0, _("page width too narrow"));
|
||
@@ -1423,7 +1549,7 @@ init_funcs (void)
|
||
|
||
/* Enlarge p->start_position of first column to use the same form of
|
||
padding_not_printed with all columns. */
|
||
- h = h + col_sep_length;
|
||
+ h = h + col_sep_width;
|
||
|
||
/* This loop takes care of all but the rightmost column. */
|
||
|
||
@@ -1457,7 +1583,7 @@ init_funcs (void)
|
||
}
|
||
else
|
||
{
|
||
- h = h_next + col_sep_length;
|
||
+ h = h_next + col_sep_width;
|
||
h_next = h + chars_per_column;
|
||
}
|
||
}
|
||
@@ -1747,9 +1873,9 @@ static void
|
||
align_column (COLUMN *p)
|
||
{
|
||
padding_not_printed = p->start_position;
|
||
- if (padding_not_printed - col_sep_length > 0)
|
||
+ if (padding_not_printed - col_sep_width > 0)
|
||
{
|
||
- pad_across_to (padding_not_printed - col_sep_length);
|
||
+ pad_across_to (padding_not_printed - col_sep_width);
|
||
padding_not_printed = ANYWHERE;
|
||
}
|
||
|
||
@@ -2020,13 +2146,13 @@ store_char (char c)
|
||
/* May be too generous. */
|
||
buff = X2REALLOC (buff, &buff_allocated);
|
||
}
|
||
- buff[buff_current++] = c;
|
||
+ buff[buff_current++] = (unsigned char) c;
|
||
}
|
||
|
||
static void
|
||
add_line_number (COLUMN *p)
|
||
{
|
||
- int i;
|
||
+ int i, j;
|
||
char *s;
|
||
int left_cut;
|
||
|
||
@@ -2049,22 +2175,24 @@ add_line_number (COLUMN *p)
|
||
/* Tabification is assumed for multiple columns, also for n-separators,
|
||
but `default n-separator = TAB' hasn't been given priority over
|
||
equal column_width also specified by POSIX. */
|
||
- if (number_separator == '\t')
|
||
+ if (number_separator[0] == '\t')
|
||
{
|
||
i = number_width - chars_per_number;
|
||
while (i-- > 0)
|
||
(p->char_func) (' ');
|
||
}
|
||
else
|
||
- (p->char_func) (number_separator);
|
||
+ for (j = 0; j < number_separator_length; j++)
|
||
+ (p->char_func) (number_separator[j]);
|
||
}
|
||
else
|
||
/* To comply with POSIX, we avoid any expansion of default TAB
|
||
separator with a single column output. No column_width requirement
|
||
has to be considered. */
|
||
{
|
||
- (p->char_func) (number_separator);
|
||
- if (number_separator == '\t')
|
||
+ for (j = 0; j < number_separator_length; j++)
|
||
+ (p->char_func) (number_separator[j]);
|
||
+ if (number_separator[0] == '\t')
|
||
output_position = POS_AFTER_TAB (chars_per_output_tab,
|
||
output_position);
|
||
}
|
||
@@ -2225,7 +2353,7 @@ print_white_space (void)
|
||
while (goal - h_old > 1
|
||
&& (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
|
||
{
|
||
- putchar (output_tab_char);
|
||
+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
|
||
h_old = h_new;
|
||
}
|
||
while (++h_old <= goal)
|
||
@@ -2245,6 +2373,7 @@ print_sep_string (void)
|
||
{
|
||
char *s;
|
||
int l = col_sep_length;
|
||
+ int not_space_flag;
|
||
|
||
s = col_sep_string;
|
||
|
||
@@ -2258,6 +2387,7 @@ print_sep_string (void)
|
||
{
|
||
for (; separators_not_printed > 0; --separators_not_printed)
|
||
{
|
||
+ not_space_flag = 0;
|
||
while (l-- > 0)
|
||
{
|
||
/* 3 types of sep_strings: spaces only, spaces and chars,
|
||
@@ -2271,12 +2401,15 @@ print_sep_string (void)
|
||
}
|
||
else
|
||
{
|
||
+ not_space_flag = 1;
|
||
if (spaces_not_printed > 0)
|
||
print_white_space ();
|
||
putchar (*s++);
|
||
- ++output_position;
|
||
}
|
||
}
|
||
+ if (not_space_flag)
|
||
+ output_position += col_sep_width;
|
||
+
|
||
/* sep_string ends with some spaces */
|
||
if (spaces_not_printed > 0)
|
||
print_white_space ();
|
||
@@ -2304,7 +2437,7 @@ print_clump (COLUMN *p, int n, char *clu
|
||
required number of tabs and spaces. */
|
||
|
||
static void
|
||
-print_char (char c)
|
||
+print_char_single (char c)
|
||
{
|
||
if (tabify_output)
|
||
{
|
||
@@ -2328,6 +2461,74 @@ print_char (char c)
|
||
putchar (c);
|
||
}
|
||
|
||
+#ifdef HAVE_MBRTOWC
|
||
+static void
|
||
+print_char_multi (char c)
|
||
+{
|
||
+ static size_t mbc_pos = 0;
|
||
+ static char mbc[MB_LEN_MAX] = {'\0'};
|
||
+ static mbstate_t state = {'\0'};
|
||
+ mbstate_t state_bak;
|
||
+ wchar_t wc;
|
||
+ size_t mblength;
|
||
+ int width;
|
||
+
|
||
+ if (tabify_output)
|
||
+ {
|
||
+ state_bak = state;
|
||
+ mbc[mbc_pos++] = c;
|
||
+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
|
||
+
|
||
+ while (mbc_pos > 0)
|
||
+ {
|
||
+ switch (mblength)
|
||
+ {
|
||
+ case (size_t)-2:
|
||
+ state = state_bak;
|
||
+ return;
|
||
+
|
||
+ case (size_t)-1:
|
||
+ state = state_bak;
|
||
+ ++output_position;
|
||
+ putchar (mbc[0]);
|
||
+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
|
||
+ --mbc_pos;
|
||
+ break;
|
||
+
|
||
+ case 0:
|
||
+ mblength = 1;
|
||
+
|
||
+ default:
|
||
+ if (wc == L' ')
|
||
+ {
|
||
+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
|
||
+ --mbc_pos;
|
||
+ ++spaces_not_printed;
|
||
+ return;
|
||
+ }
|
||
+ else if (spaces_not_printed > 0)
|
||
+ print_white_space ();
|
||
+
|
||
+ /* Nonprintables are assumed to have width 0, except L'\b'. */
|
||
+ if ((width = wcwidth (wc)) < 1)
|
||
+ {
|
||
+ if (wc == L'\b')
|
||
+ --output_position;
|
||
+ }
|
||
+ else
|
||
+ output_position += width;
|
||
+
|
||
+ fwrite (mbc, sizeof(char), mblength, stdout);
|
||
+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
|
||
+ mbc_pos -= mblength;
|
||
+ }
|
||
+ }
|
||
+ return;
|
||
+ }
|
||
+ putchar (c);
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Skip to page PAGE before printing.
|
||
PAGE may be larger than total number of pages. */
|
||
|
||
@@ -2507,9 +2708,9 @@ read_line (COLUMN *p)
|
||
align_empty_cols = false;
|
||
}
|
||
|
||
- if (padding_not_printed - col_sep_length > 0)
|
||
+ if (padding_not_printed - col_sep_width > 0)
|
||
{
|
||
- pad_across_to (padding_not_printed - col_sep_length);
|
||
+ pad_across_to (padding_not_printed - col_sep_width);
|
||
padding_not_printed = ANYWHERE;
|
||
}
|
||
|
||
@@ -2610,9 +2811,9 @@ print_stored (COLUMN *p)
|
||
}
|
||
}
|
||
|
||
- if (padding_not_printed - col_sep_length > 0)
|
||
+ if (padding_not_printed - col_sep_width > 0)
|
||
{
|
||
- pad_across_to (padding_not_printed - col_sep_length);
|
||
+ pad_across_to (padding_not_printed - col_sep_width);
|
||
padding_not_printed = ANYWHERE;
|
||
}
|
||
|
||
@@ -2625,8 +2826,8 @@ print_stored (COLUMN *p)
|
||
if (spaces_not_printed == 0)
|
||
{
|
||
output_position = p->start_position + end_vector[line];
|
||
- if (p->start_position - col_sep_length == chars_per_margin)
|
||
- output_position -= col_sep_length;
|
||
+ if (p->start_position - col_sep_width == chars_per_margin)
|
||
+ output_position -= col_sep_width;
|
||
}
|
||
|
||
return true;
|
||
@@ -2645,7 +2846,7 @@ print_stored (COLUMN *p)
|
||
number of characters is 1.) */
|
||
|
||
static int
|
||
-char_to_clump (char c)
|
||
+char_to_clump_single (char c)
|
||
{
|
||
unsigned char uc = c;
|
||
char *s = clump_buff;
|
||
@@ -2655,10 +2856,10 @@ char_to_clump (char c)
|
||
int chars;
|
||
int chars_per_c = 8;
|
||
|
||
- if (c == input_tab_char)
|
||
+ if (c == input_tab_char[0])
|
||
chars_per_c = chars_per_input_tab;
|
||
|
||
- if (c == input_tab_char || c == '\t')
|
||
+ if (c == input_tab_char[0] || c == '\t')
|
||
{
|
||
width = TAB_WIDTH (chars_per_c, input_position);
|
||
|
||
@@ -2739,6 +2940,154 @@ char_to_clump (char c)
|
||
return chars;
|
||
}
|
||
|
||
+#ifdef HAVE_MBRTOWC
|
||
+static int
|
||
+char_to_clump_multi (char c)
|
||
+{
|
||
+ static size_t mbc_pos = 0;
|
||
+ static char mbc[MB_LEN_MAX] = {'\0'};
|
||
+ static mbstate_t state = {'\0'};
|
||
+ mbstate_t state_bak;
|
||
+ wchar_t wc;
|
||
+ size_t mblength;
|
||
+ int wc_width;
|
||
+ register char *s = clump_buff;
|
||
+ register int i, j;
|
||
+ char esc_buff[4];
|
||
+ int width;
|
||
+ int chars;
|
||
+ int chars_per_c = 8;
|
||
+
|
||
+ state_bak = state;
|
||
+ mbc[mbc_pos++] = c;
|
||
+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
|
||
+
|
||
+ width = 0;
|
||
+ chars = 0;
|
||
+ while (mbc_pos > 0)
|
||
+ {
|
||
+ switch (mblength)
|
||
+ {
|
||
+ case (size_t)-2:
|
||
+ state = state_bak;
|
||
+ return 0;
|
||
+
|
||
+ case (size_t)-1:
|
||
+ state = state_bak;
|
||
+ mblength = 1;
|
||
+
|
||
+ if (use_esc_sequence || use_cntrl_prefix)
|
||
+ {
|
||
+ width = +4;
|
||
+ chars = +4;
|
||
+ *s++ = '\\';
|
||
+ sprintf (esc_buff, "%03o", mbc[0]);
|
||
+ for (i = 0; i <= 2; ++i)
|
||
+ *s++ = (int) esc_buff[i];
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ width += 1;
|
||
+ chars += 1;
|
||
+ *s++ = mbc[0];
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case 0:
|
||
+ mblength = 1;
|
||
+ /* Fall through */
|
||
+
|
||
+ default:
|
||
+ if (memcmp (mbc, input_tab_char, mblength) == 0)
|
||
+ chars_per_c = chars_per_input_tab;
|
||
+
|
||
+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
|
||
+ {
|
||
+ int width_inc;
|
||
+
|
||
+ width_inc = TAB_WIDTH (chars_per_c, input_position);
|
||
+ width += width_inc;
|
||
+
|
||
+ if (untabify_input)
|
||
+ {
|
||
+ for (i = width_inc; i; --i)
|
||
+ *s++ = ' ';
|
||
+ chars += width_inc;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ for (i = 0; i < mblength; i++)
|
||
+ *s++ = mbc[i];
|
||
+ chars += mblength;
|
||
+ }
|
||
+ }
|
||
+ else if ((wc_width = wcwidth (wc)) < 1)
|
||
+ {
|
||
+ if (use_esc_sequence)
|
||
+ {
|
||
+ for (i = 0; i < mblength; i++)
|
||
+ {
|
||
+ width += 4;
|
||
+ chars += 4;
|
||
+ *s++ = '\\';
|
||
+ sprintf (esc_buff, "%03o", c);
|
||
+ for (j = 0; j <= 2; ++j)
|
||
+ *s++ = (int) esc_buff[j];
|
||
+ }
|
||
+ }
|
||
+ else if (use_cntrl_prefix)
|
||
+ {
|
||
+ if (wc < 0200)
|
||
+ {
|
||
+ width += 2;
|
||
+ chars += 2;
|
||
+ *s++ = '^';
|
||
+ *s++ = wc ^ 0100;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ for (i = 0; i < mblength; i++)
|
||
+ {
|
||
+ width += 4;
|
||
+ chars += 4;
|
||
+ *s++ = '\\';
|
||
+ sprintf (esc_buff, "%03o", c);
|
||
+ for (j = 0; j <= 2; ++j)
|
||
+ *s++ = (int) esc_buff[j];
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ else if (wc == L'\b')
|
||
+ {
|
||
+ width += -1;
|
||
+ chars += 1;
|
||
+ *s++ = c;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ width += 0;
|
||
+ chars += mblength;
|
||
+ for (i = 0; i < mblength; i++)
|
||
+ *s++ = mbc[i];
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ width += wc_width;
|
||
+ chars += mblength;
|
||
+ for (i = 0; i < mblength; i++)
|
||
+ *s++ = mbc[i];
|
||
+ }
|
||
+ }
|
||
+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
|
||
+ mbc_pos -= mblength;
|
||
+ }
|
||
+
|
||
+ input_position += width;
|
||
+ return chars;
|
||
+}
|
||
+#endif
|
||
+
|
||
/* We've just printed some files and need to clean up things before
|
||
looking for more options and printing the next batch of files.
|
||
|
||
diff -urNp coreutils-8.0-orig/src/pr.c.orig coreutils-8.0/src/pr.c.orig
|
||
--- coreutils-8.0-orig/src/pr.c.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/src/pr.c.orig 2009-09-29 15:27:54.000000000 +0200
|
||
@@ -0,0 +1,2877 @@
|
||
+/* pr -- convert text files for printing.
|
||
+ Copyright (C) 88, 91, 1995-2009 Free Software Foundation, Inc.
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation, either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||
+
|
||
+/* By Pete TerMaat, with considerable refinement by Roland Huebner. */
|
||
+
|
||
+/* Things to watch: Sys V screws up on ...
|
||
+ pr -n -3 -s: /usr/dict/words
|
||
+ pr -m -o10 -n /usr/dict/words{,,,}
|
||
+ pr -6 -a -n -o5 /usr/dict/words
|
||
+
|
||
+ Ideas:
|
||
+
|
||
+ Keep a things_to_do list of functions to call when we know we have
|
||
+ something to print. Cleaner than current series of checks.
|
||
+
|
||
+ Improve the printing of control prefixes.
|
||
+
|
||
+ Expand the file name in the centered header line to a full file name.
|
||
+
|
||
+
|
||
+ Concept:
|
||
+
|
||
+ If the input_tab_char differs from the default value TAB
|
||
+ (`-e[CHAR[...]]' is used), any input text tab is expanded to the
|
||
+ default width of 8 spaces (compare char_to_clump). - Same as SunOS
|
||
+ does.
|
||
+
|
||
+ The treatment of the number_separator (compare add_line_number):
|
||
+ The default value TAB of the number_separator (`-n[SEP[...]]') doesn't
|
||
+ be thought to be an input character. An optional `-e'-input has no
|
||
+ effect.
|
||
+ - With single column output
|
||
+ only one POSIX requirement has to be met:
|
||
+ The default n-separator should be a TAB. The consequence is a
|
||
+ different width between the number and the text if the output position
|
||
+ of the separator changes, i.e. it depends upon the left margin used.
|
||
+ That's not nice but easy-to-use together with the defaults of other
|
||
+ utilities, e.g. sort or cut. - Same as SunOS does.
|
||
+ - With multicolumn output
|
||
+ two conflicting POSIX requirements exist:
|
||
+ First `default n-separator is TAB', second `output text columns shall
|
||
+ be of equal width'. Moreover POSIX specifies the number+separator a
|
||
+ part of the column, together with `-COLUMN' and `-a -COLUMN'.
|
||
+ (With -m output the number shall occupy each line only once. Exactly
|
||
+ the same situation as single column output exists.)
|
||
+ GNU pr gives priority to the 2nd requirement and observes POSIX
|
||
+ column definition. The n-separator TAB is expanded to the same number
|
||
+ of spaces in each column using the default value 8. Tabification is
|
||
+ only performed if it is compatible with the output position.
|
||
+ Consequence: The output text columns are of equal width. The layout
|
||
+ of a page does not change if the left margin varies. - Looks better
|
||
+ than the SunOS approach.
|
||
+ SunOS pr gives priority to the 1st requirement. n-separator TAB
|
||
+ width varies with each column. Only the width of text part of the
|
||
+ column is fixed.
|
||
+ Consequence: The output text columns don't have equal width. The
|
||
+ widths and the layout of the whole page varies with the left margin.
|
||
+ An overflow of the line length (without margin) over the input value
|
||
+ PAGE_WIDTH may occur.
|
||
+
|
||
+ The interference of the POSIX-compliant small letter options -w and -s:
|
||
+ (`interference' means `setting a _separator_ with -s switches off the
|
||
+ column structure and the default - not generally - page_width,
|
||
+ acts on -w option')
|
||
+ options: text form / separator: equivalent new options:
|
||
+ -w l -s[x]
|
||
+ --------------------------------------------------------------------
|
||
+ 1. -- -- columns / space --
|
||
+ trunc. to page_width = 72
|
||
+ 2. -- -s[:] full lines / TAB[:] -J --sep-string[="<TAB>"|:]
|
||
+ no truncation
|
||
+ 3. -w l -- columns / space -W l
|
||
+ trunc. to page_width = l
|
||
+ 4. -w l -s[:] columns / no sep.[:] -W l --sep-string[=:]
|
||
+ trunc. to page_width = l
|
||
+ --------------------------------------------------------------------
|
||
+
|
||
+
|
||
+ Options:
|
||
+
|
||
+ Including version 1.22i:
|
||
+ Some SMALL LETTER options have been redefined with the object of a
|
||
+ better POSIX compliance. The output of some further cases has been
|
||
+ adapted to other UNIXes. A violation of downward compatibility has to
|
||
+ be accepted.
|
||
+ Some NEW CAPITAL LETTER options ( -J, -S, -W) has been introduced to
|
||
+ turn off unexpected interferences of small letter options (-s and -w
|
||
+ together with the three column options).
|
||
+ -N option and the second argument LAST_PAGE of +FIRST_PAGE offer more
|
||
+ flexibility; The detailed handling of form feeds set in the input
|
||
+ files requires -T option.
|
||
+
|
||
+ Capital letter options dominate small letter ones.
|
||
+
|
||
+ Some of the option-arguments cannot be specified as separate arguments
|
||
+ from the preceding option letter (already stated in POSIX specification).
|
||
+
|
||
+ Form feeds in the input cause page breaks in the output. Multiple
|
||
+ form feeds produce empty pages.
|
||
+
|
||
+ +FIRST_PAGE[:LAST_PAGE], --pages=FIRST_PAGE[:LAST_PAGE]
|
||
+ begin [stop] printing with page FIRST_[LAST_]PAGE
|
||
+
|
||
+ -COLUMN, --columns=COLUMN
|
||
+ Produce output that is COLUMN columns wide and
|
||
+ print columns down, unless -a is used. Balance number of
|
||
+ lines in the columns on each page.
|
||
+
|
||
+ -a, --across Print columns across rather than down, used
|
||
+ together with -COLUMN. The input
|
||
+ one
|
||
+ two
|
||
+ three
|
||
+ four
|
||
+ will be printed with `-a -3' as
|
||
+ one two three
|
||
+ four
|
||
+
|
||
+ -b Balance columns on the last page.
|
||
+ -b is no longer an independent option. It's always used
|
||
+ together with -COLUMN (unless -a is used) to get a
|
||
+ consistent formulation with "FF set by hand" in input
|
||
+ files. Each formfeed found terminates the number of lines
|
||
+ to be read with the actual page. The situation for
|
||
+ printing columns down is equivalent to that on the last
|
||
+ page. So we need a balancing.
|
||
+
|
||
+ Keeping -b as an underground option guarantees some
|
||
+ downward compatibility. Utilities using pr with -b
|
||
+ (a most frequently used form) still work as usual.
|
||
+
|
||
+ -c, --show-control-chars
|
||
+ Print unprintable characters as control prefixes.
|
||
+ Control-g is printed as ^G (use hat notation) and
|
||
+ octal backslash notation.
|
||
+
|
||
+ -d, --double-space Double space the output.
|
||
+
|
||
+ -D FORMAT, --date-format=FORMAT Use FORMAT for the header date.
|
||
+
|
||
+ -e[CHAR[WIDTH]], --expand-tabs[=CHAR[WIDTH]]
|
||
+ Expand tabs to spaces on input. Optional argument CHAR
|
||
+ is the input TAB character. (Default is TAB). Optional
|
||
+ argument WIDTH is the input TAB character's width.
|
||
+ (Default is 8.)
|
||
+
|
||
+ -F, -f, --form-feed Use formfeeds instead of newlines to separate
|
||
+ pages. A three line HEADER is used, no TRAILER with -F,
|
||
+ without -F both HEADER and TRAILER are made of five lines.
|
||
+
|
||
+ -h HEADER, --header=HEADER
|
||
+ Replace the filename in the header with the string HEADER.
|
||
+ A centered header is used.
|
||
+
|
||
+ -i[CHAR[WIDTH]], --output-tabs[=CHAR[WIDTH]]
|
||
+ Replace spaces with tabs on output. Optional argument
|
||
+ CHAR is the output TAB character. (Default is TAB).
|
||
+ Optional argument WIDTH is the output TAB character's
|
||
+ width. (Default is 8)
|
||
+
|
||
+ -J, --join-lines Merge lines of full length, turns off -W/-w
|
||
+ line truncation, no column alignment, --sep-string[=STRING]
|
||
+ sets separators, works with all column options
|
||
+ (-COLUMN | -a -COLUMN | -m).
|
||
+ -J has been introduced (together with -W and --sep-string) to
|
||
+ disentangle the old (POSIX compliant) options -w, -s
|
||
+ along with the 3 column options.
|
||
+
|
||
+ -l PAGE_LENGTH, --length=PAGE_LENGTH
|
||
+ Set the page length to PAGE_LENGTH lines. Default is 66,
|
||
+ including 5 lines of HEADER and 5 lines of TRAILER
|
||
+ without -F, but only 3 lines of HEADER and no TRAILER
|
||
+ with -F (i.e the number of text lines defaults to 56 or
|
||
+ 63 respectively).
|
||
+
|
||
+ -m, --merge Print files in parallel; pad_across_to align
|
||
+ columns; truncate lines and print separator strings;
|
||
+ Do it also with empty columns to get a continuous line
|
||
+ numbering and column marking by separators throughout
|
||
+ the whole merged file.
|
||
+
|
||
+ Empty pages in some input files produce empty columns
|
||
+ [marked by separators] in the merged pages. Completely
|
||
+ empty merged pages show no column separators at all.
|
||
+
|
||
+ The layout of a merged page is ruled by the largest form
|
||
+ feed distance of the single pages at that page. Shorter
|
||
+ columns will be filled up with empty lines.
|
||
+
|
||
+ Together with -J option join lines of full length and
|
||
+ set separators when -S option is used.
|
||
+
|
||
+ -n[SEP[DIGITS]], --number-lines[=SEP[DIGITS]]
|
||
+ Provide DIGITS digit line numbering (default for DIGITS
|
||
+ is 5). With multicolumn output the number occupies the
|
||
+ first DIGITS column positions of each text column or only
|
||
+ each line of -m output.
|
||
+ With single column output the number precedes each line
|
||
+ just as -m output.
|
||
+ Optional argument SEP is the character appended to the
|
||
+ line number to separate it from the text followed.
|
||
+ The default separator is a TAB. In a strict sense a TAB
|
||
+ is always printed with single column output only. The
|
||
+ TAB-width varies with the TAB-position, e.g. with the
|
||
+ left margin specified by -o option.
|
||
+ With multicolumn output priority is given to `equal width
|
||
+ of output columns' (a POSIX specification). The TAB-width
|
||
+ is fixed to the value of the 1st column and does not
|
||
+ change with different values of left margin. That means a
|
||
+ fixed number of spaces is always printed in the place of
|
||
+ a TAB. The tabification depends upon the output
|
||
+ position.
|
||
+
|
||
+ Default counting of the line numbers starts with 1st
|
||
+ line of the input file (not the 1st line printed,
|
||
+ compare the --page option and -N option).
|
||
+
|
||
+ -N NUMBER, --first-line-number=NUMBER
|
||
+ Start line counting with the number NUMBER at the 1st
|
||
+ line of first page printed (mostly not the 1st line of
|
||
+ the input file).
|
||
+
|
||
+ -o MARGIN, --indent=MARGIN
|
||
+ Offset each line with a margin MARGIN spaces wide.
|
||
+ Total page width is the size of the margin plus the
|
||
+ PAGE_WIDTH set with -W/-w option.
|
||
+
|
||
+ -r, --no-file-warnings
|
||
+ Omit warning when a file cannot be opened.
|
||
+
|
||
+ -s[CHAR], --separator[=CHAR]
|
||
+ Separate columns by a single character CHAR, default for
|
||
+ CHAR is the TAB character without -w and 'no char' with -w.
|
||
+ Without `-s' default separator `space' is set.
|
||
+ -s[CHAR] turns off line truncation of all 3 column options
|
||
+ (-COLUMN|-a -COLUMN|-m) except -w is set. That is a POSIX
|
||
+ compliant formulation. The source code translates -s into
|
||
+ the new options -S and -J, also -W if required.
|
||
+
|
||
+ -S STRING, --sep-string[=STRING]
|
||
+ Separate columns by any string STRING. The -S option
|
||
+ doesn't react upon the -W/-w option (unlike -s option
|
||
+ does). It defines a separator nothing else.
|
||
+ Without -S: Default separator TAB is used with -J and
|
||
+ `space' otherwise (same as -S" ").
|
||
+ With -S "": No separator is used.
|
||
+ Quotes should be used with blanks and some shell active
|
||
+ characters.
|
||
+ -S is problematic because in its obsolete form you
|
||
+ cannot use -S "STRING", but in its standard form you
|
||
+ must use -S "STRING" if STRING is empty. Use
|
||
+ --sep-string to avoid the ambiguity.
|
||
+
|
||
+ -t, --omit-header Do not print headers or footers but retain form
|
||
+ feeds set in the input files.
|
||
+
|
||
+ -T, --omit-pagination
|
||
+ Do not print headers or footers, eliminate any pagination
|
||
+ by form feeds set in the input files.
|
||
+
|
||
+ -v, --show-nonprinting
|
||
+ Print unprintable characters as escape sequences. Use
|
||
+ octal backslash notation. Control-G becomes \007.
|
||
+
|
||
+ -w PAGE_WIDTH, --width=PAGE_WIDTH
|
||
+ Set page width to PAGE_WIDTH characters for multiple
|
||
+ text-column output only (default for PAGE_WIDTH is 72).
|
||
+ -s[CHAR] turns off the default page width and any line
|
||
+ truncation. Lines of full length will be merged,
|
||
+ regardless of the column options set. A POSIX compliant
|
||
+ formulation.
|
||
+
|
||
+ -W PAGE_WIDTH, --page-width=PAGE_WIDTH
|
||
+ Set the page width to PAGE_WIDTH characters. That's valid
|
||
+ with and without a column option. Text lines will be
|
||
+ truncated, unless -J is used. Together with one of the
|
||
+ column options (-COLUMN| -a -COLUMN| -m) column alignment
|
||
+ is always used.
|
||
+ Default is 72 characters.
|
||
+ Without -W PAGE_WIDTH
|
||
+ - but with one of the column options default truncation of
|
||
+ 72 characters is used (to keep downward compatibility
|
||
+ and to simplify most frequently met column tasks).
|
||
+ Column alignment and column separators are used.
|
||
+ - and without any of the column options NO line truncation
|
||
+ is used (to keep downward compatibility and to meet most
|
||
+ frequent tasks). That's equivalent to -W 72 -J .
|
||
+
|
||
+ With/without -W PAGE_WIDTH the header line is always
|
||
+ truncated to avoid line overflow.
|
||
+
|
||
+ (In pr versions newer than 1.14 -S option does no longer
|
||
+ affect -W option.)
|
||
+
|
||
+*/
|
||
+
|
||
+
|
||
+#include <config.h>
|
||
+
|
||
+#include <getopt.h>
|
||
+#include <sys/types.h>
|
||
+#include "system.h"
|
||
+#include "error.h"
|
||
+#include "hard-locale.h"
|
||
+#include "mbswidth.h"
|
||
+#include "quote.h"
|
||
+#include "stat-time.h"
|
||
+#include "stdio--.h"
|
||
+#include "strftime.h"
|
||
+#include "xstrtol.h"
|
||
+
|
||
+/* The official name of this program (e.g., no `g' prefix). */
|
||
+#define PROGRAM_NAME "pr"
|
||
+
|
||
+#define AUTHORS \
|
||
+ proper_name ("Pete TerMaat"), \
|
||
+ proper_name ("Roland Huebner")
|
||
+
|
||
+/* Used with start_position in the struct COLUMN described below.
|
||
+ If start_position == ANYWHERE, we aren't truncating columns and
|
||
+ can begin printing a column anywhere. Otherwise we must pad to
|
||
+ the horizontal position start_position. */
|
||
+#define ANYWHERE 0
|
||
+
|
||
+/* Each column has one of these structures allocated for it.
|
||
+ If we're only dealing with one file, fp is the same for all
|
||
+ columns.
|
||
+
|
||
+ The general strategy is to spend time setting up these column
|
||
+ structures (storing columns if necessary), after which printing
|
||
+ is a matter of flitting from column to column and calling
|
||
+ print_func.
|
||
+
|
||
+ Parallel files, single files printing across in multiple
|
||
+ columns, and single files printing down in multiple columns all
|
||
+ fit the same printing loop.
|
||
+
|
||
+ print_func Function used to print lines in this column.
|
||
+ If we're storing this column it will be
|
||
+ print_stored(), Otherwise it will be read_line().
|
||
+
|
||
+ char_func Function used to process characters in this column.
|
||
+ If we're storing this column it will be store_char(),
|
||
+ otherwise it will be print_char().
|
||
+
|
||
+ current_line Index of the current entry in line_vector, which
|
||
+ contains the index of the first character of the
|
||
+ current line in buff[].
|
||
+
|
||
+ lines_stored Number of lines in this column which are stored in
|
||
+ buff.
|
||
+
|
||
+ lines_to_print If we're storing this column, lines_to_print is
|
||
+ the number of stored_lines which remain to be
|
||
+ printed. Otherwise it is the number of lines
|
||
+ we can print without exceeding lines_per_body.
|
||
+
|
||
+ start_position The horizontal position we want to be in before we
|
||
+ print the first character in this column.
|
||
+
|
||
+ numbered True means precede this column with a line number. */
|
||
+
|
||
+/* FIXME: There are many unchecked integer overflows in this file,
|
||
+ that will cause this command to misbehave given large inputs or
|
||
+ options. Many of the "int" values below should be "size_t" or
|
||
+ something else like that. */
|
||
+
|
||
+struct COLUMN;
|
||
+struct COLUMN
|
||
+ {
|
||
+ FILE *fp; /* Input stream for this column. */
|
||
+ char const *name; /* File name. */
|
||
+ enum
|
||
+ {
|
||
+ OPEN,
|
||
+ FF_FOUND, /* used with -b option, set with \f, changed
|
||
+ to ON_HOLD after print_header */
|
||
+ ON_HOLD, /* Hit a form feed. */
|
||
+ CLOSED
|
||
+ }
|
||
+ status; /* Status of the file pointer. */
|
||
+
|
||
+ /* Func to print lines in this col. */
|
||
+ bool (*print_func) (struct COLUMN *);
|
||
+
|
||
+ /* Func to print/store chars in this col. */
|
||
+ void (*char_func) (char);
|
||
+
|
||
+ int current_line; /* Index of current place in line_vector. */
|
||
+ int lines_stored; /* Number of lines stored in buff. */
|
||
+ int lines_to_print; /* No. lines stored or space left on page. */
|
||
+ int start_position; /* Horizontal position of first char. */
|
||
+ bool numbered;
|
||
+ bool full_page_printed; /* True means printed without a FF found. */
|
||
+
|
||
+ /* p->full_page_printed controls a special case of "FF set by hand":
|
||
+ True means a full page has been printed without FF found. To avoid an
|
||
+ additional empty page we have to ignore a FF immediately following in
|
||
+ the next line. */
|
||
+ };
|
||
+
|
||
+typedef struct COLUMN COLUMN;
|
||
+
|
||
+static int char_to_clump (char c);
|
||
+static bool read_line (COLUMN *p);
|
||
+static bool print_page (void);
|
||
+static bool print_stored (COLUMN *p);
|
||
+static bool open_file (char *name, COLUMN *p);
|
||
+static bool skip_to_page (uintmax_t page);
|
||
+static void print_header (void);
|
||
+static void pad_across_to (int position);
|
||
+static void add_line_number (COLUMN *p);
|
||
+static void getoptarg (char *arg, char switch_char, char *character,
|
||
+ int *number);
|
||
+void usage (int status);
|
||
+static void print_files (int number_of_files, char **av);
|
||
+static void init_parameters (int number_of_files);
|
||
+static void init_header (char const *filename, int desc);
|
||
+static bool init_fps (int number_of_files, char **av);
|
||
+static void init_funcs (void);
|
||
+static void init_store_cols (void);
|
||
+static void store_columns (void);
|
||
+static void balance (int total_stored);
|
||
+static void store_char (char c);
|
||
+static void pad_down (int lines);
|
||
+static void read_rest_of_line (COLUMN *p);
|
||
+static void skip_read (COLUMN *p, int column_number);
|
||
+static void print_char (char c);
|
||
+static void cleanup (void);
|
||
+static void print_sep_string (void);
|
||
+static void separator_string (const char *optarg_S);
|
||
+
|
||
+/* All of the columns to print. */
|
||
+static COLUMN *column_vector;
|
||
+
|
||
+/* When printing a single file in multiple downward columns,
|
||
+ we store the leftmost columns contiguously in buff.
|
||
+ To print a line from buff, get the index of the first character
|
||
+ from line_vector[i], and print up to line_vector[i + 1]. */
|
||
+static char *buff;
|
||
+
|
||
+/* Index of the position in buff where the next character
|
||
+ will be stored. */
|
||
+static unsigned int buff_current;
|
||
+
|
||
+/* The number of characters in buff.
|
||
+ Used for allocation of buff and to detect overflow of buff. */
|
||
+static size_t buff_allocated;
|
||
+
|
||
+/* Array of indices into buff.
|
||
+ Each entry is an index of the first character of a line.
|
||
+ This is used when storing lines to facilitate shuffling when
|
||
+ we do column balancing on the last page. */
|
||
+static int *line_vector;
|
||
+
|
||
+/* Array of horizonal positions.
|
||
+ For each line in line_vector, end_vector[line] is the horizontal
|
||
+ position we are in after printing that line. We keep track of this
|
||
+ so that we know how much we need to pad to prepare for the next
|
||
+ column. */
|
||
+static int *end_vector;
|
||
+
|
||
+/* (-m) True means we're printing multiple files in parallel. */
|
||
+static bool parallel_files = false;
|
||
+
|
||
+/* (-m) True means a line starts with some empty columns (some files
|
||
+ already CLOSED or ON_HOLD) which we have to align. */
|
||
+static bool align_empty_cols;
|
||
+
|
||
+/* (-m) True means we have not yet found any printable column in a line.
|
||
+ align_empty_cols = true has to be maintained. */
|
||
+static bool empty_line;
|
||
+
|
||
+/* (-m) False means printable column output precedes a form feed found.
|
||
+ Column alignment is done only once. No additional action with that form
|
||
+ feed.
|
||
+ True means we found only a form feed in a column. Maybe we have to do
|
||
+ some column alignment with that form feed. */
|
||
+static bool FF_only;
|
||
+
|
||
+/* (-[0-9]+) True means we're given an option explicitly specifying
|
||
+ number of columns. Used to detect when this option is used with -m
|
||
+ and when translating old options to new/long options. */
|
||
+static bool explicit_columns = false;
|
||
+
|
||
+/* (-t|-T) False means we aren't printing headers and footers. */
|
||
+static bool extremities = true;
|
||
+
|
||
+/* (-t) True means we retain all FF set by hand in input files.
|
||
+ False is set with -T option. */
|
||
+static bool keep_FF = false;
|
||
+static bool print_a_FF = false;
|
||
+
|
||
+/* True means we need to print a header as soon as we know we've got input
|
||
+ to print after it. */
|
||
+static bool print_a_header;
|
||
+
|
||
+/* (-f) True means use formfeeds instead of newlines to separate pages. */
|
||
+static bool use_form_feed = false;
|
||
+
|
||
+/* True means we have read the standard input. */
|
||
+static bool have_read_stdin = false;
|
||
+
|
||
+/* True means the -a flag has been given. */
|
||
+static bool print_across_flag = false;
|
||
+
|
||
+/* True means we're printing one file in multiple (>1) downward columns. */
|
||
+static bool storing_columns = true;
|
||
+
|
||
+/* (-b) True means balance columns on the last page as Sys V does. */
|
||
+/* That's no longer an independent option. With storing_columns = true
|
||
+ balance_columns = true is used too (s. function init_parameters).
|
||
+ We get a consistent formulation with "FF set by hand" in input files. */
|
||
+static bool balance_columns = false;
|
||
+
|
||
+/* (-l) Number of lines on a page, including header and footer lines. */
|
||
+static int lines_per_page = 66;
|
||
+
|
||
+/* Number of lines in the header and footer can be reset to 0 using
|
||
+ the -t flag. */
|
||
+enum { lines_per_header = 5 };
|
||
+static int lines_per_body;
|
||
+enum { lines_per_footer = 5 };
|
||
+
|
||
+/* (-w|-W) Width in characters of the page. Does not include the width of
|
||
+ the margin. */
|
||
+static int chars_per_line = 72;
|
||
+
|
||
+/* (-w|W) True means we truncate lines longer than chars_per_column. */
|
||
+static bool truncate_lines = false;
|
||
+
|
||
+/* (-J) True means we join lines without any line truncation. -J
|
||
+ dominates -w option. */
|
||
+static bool join_lines = false;
|
||
+
|
||
+/* Number of characters in a column. Based on col_sep_length and
|
||
+ page width. */
|
||
+static int chars_per_column;
|
||
+
|
||
+/* (-e) True means convert tabs to spaces on input. */
|
||
+static bool untabify_input = false;
|
||
+
|
||
+/* (-e) The input tab character. */
|
||
+static char input_tab_char = '\t';
|
||
+
|
||
+/* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
|
||
+ where the leftmost column is 1. */
|
||
+static int chars_per_input_tab = 8;
|
||
+
|
||
+/* (-i) True means convert spaces to tabs on output. */
|
||
+static bool tabify_output = false;
|
||
+
|
||
+/* (-i) The output tab character. */
|
||
+static char output_tab_char = '\t';
|
||
+
|
||
+/* (-i) The width of the output tab. */
|
||
+static int chars_per_output_tab = 8;
|
||
+
|
||
+/* Keeps track of pending white space. When we hit a nonspace
|
||
+ character after some whitespace, we print whitespace, tabbing
|
||
+ if necessary to get to output_position + spaces_not_printed. */
|
||
+static int spaces_not_printed;
|
||
+
|
||
+/* (-o) Number of spaces in the left margin (tabs used when possible). */
|
||
+static int chars_per_margin = 0;
|
||
+
|
||
+/* Position where the next character will fall.
|
||
+ Leftmost position is 0 + chars_per_margin.
|
||
+ Rightmost position is chars_per_margin + chars_per_line - 1.
|
||
+ This is important for converting spaces to tabs on output. */
|
||
+static int output_position;
|
||
+
|
||
+/* Horizontal position relative to the current file.
|
||
+ (output_position depends on where we are on the page;
|
||
+ input_position depends on where we are in the file.)
|
||
+ Important for converting tabs to spaces on input. */
|
||
+static int input_position;
|
||
+
|
||
+/* True if there were any failed opens so we can exit with nonzero
|
||
+ status. */
|
||
+static bool failed_opens = false;
|
||
+
|
||
+/* The number of spaces taken up if we print a tab character with width
|
||
+ c_ from position h_. */
|
||
+#define TAB_WIDTH(c_, h_) ((c_) - ((h_) % (c_)))
|
||
+
|
||
+/* The horizontal position we'll be at after printing a tab character
|
||
+ of width c_ from the position h_. */
|
||
+#define POS_AFTER_TAB(c_, h_) ((h_) + TAB_WIDTH (c_, h_))
|
||
+
|
||
+/* (-NNN) Number of columns of text to print. */
|
||
+static int columns = 1;
|
||
+
|
||
+/* (+NNN:MMM) Page numbers on which to begin and stop printing.
|
||
+ first_page_number = 0 will be used to check input only. */
|
||
+static uintmax_t first_page_number = 0;
|
||
+static uintmax_t last_page_number = UINTMAX_MAX;
|
||
+
|
||
+/* Number of files open (not closed, not on hold). */
|
||
+static int files_ready_to_read = 0;
|
||
+
|
||
+/* Current page number. Displayed in header. */
|
||
+static uintmax_t page_number;
|
||
+
|
||
+/* Current line number. Displayed when -n flag is specified.
|
||
+
|
||
+ When printing files in parallel (-m flag), line numbering is as follows:
|
||
+ 1 foo goo moo
|
||
+ 2 hoo too zoo
|
||
+
|
||
+ When printing files across (-a flag), ...
|
||
+ 1 foo 2 moo 3 goo
|
||
+ 4 hoo 5 too 6 zoo
|
||
+
|
||
+ Otherwise, line numbering is as follows:
|
||
+ 1 foo 3 goo 5 too
|
||
+ 2 moo 4 hoo 6 zoo */
|
||
+static int line_number;
|
||
+
|
||
+/* With line_number overflow, we use power_10 to cut off the higher-order
|
||
+ digits of the line_number */
|
||
+static int power_10;
|
||
+
|
||
+/* (-n) True means lines should be preceded by numbers. */
|
||
+static bool numbered_lines = false;
|
||
+
|
||
+/* (-n) Character which follows each line number. */
|
||
+static char number_separator = '\t';
|
||
+
|
||
+/* (-n) line counting starts with 1st line of input file (not with 1st
|
||
+ line of 1st page printed). */
|
||
+static int line_count = 1;
|
||
+
|
||
+/* (-n) True means counting of skipped lines starts with 1st line of
|
||
+ input file. False means -N option is used in addition, counting of
|
||
+ skipped lines not required. */
|
||
+static bool skip_count = true;
|
||
+
|
||
+/* (-N) Counting starts with start_line_number = NUMBER at 1st line of
|
||
+ first page printed, usually not 1st page of input file. */
|
||
+static int start_line_num = 1;
|
||
+
|
||
+/* (-n) Width in characters of a line number. */
|
||
+static int chars_per_number = 5;
|
||
+
|
||
+/* Used when widening the first column to accommodate numbers -- only
|
||
+ needed when printing files in parallel. Includes width of both the
|
||
+ number and the number_separator. */
|
||
+static int number_width;
|
||
+
|
||
+/* Buffer sprintf uses to format a line number. */
|
||
+static char *number_buff;
|
||
+
|
||
+/* (-v) True means unprintable characters are printed as escape sequences.
|
||
+ control-g becomes \007. */
|
||
+static bool use_esc_sequence = false;
|
||
+
|
||
+/* (-c) True means unprintable characters are printed as control prefixes.
|
||
+ control-g becomes ^G. */
|
||
+static bool use_cntrl_prefix = false;
|
||
+
|
||
+/* (-d) True means output is double spaced. */
|
||
+static bool double_space = false;
|
||
+
|
||
+/* Number of files opened initially in init_files. Should be 1
|
||
+ unless we're printing multiple files in parallel. */
|
||
+static int total_files = 0;
|
||
+
|
||
+/* (-r) True means don't complain if we can't open a file. */
|
||
+static bool ignore_failed_opens = false;
|
||
+
|
||
+/* (-S) True means we separate columns with a specified string.
|
||
+ -S option does not affect line truncation nor column alignment. */
|
||
+static bool use_col_separator = false;
|
||
+
|
||
+/* String used to separate columns if the -S option has been specified.
|
||
+ Default without -S but together with one of the column options
|
||
+ -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
|
||
+static char *col_sep_string = (char *) "";
|
||
+static int col_sep_length = 0;
|
||
+static char *column_separator = (char *) " ";
|
||
+static char *line_separator = (char *) "\t";
|
||
+
|
||
+/* Number of separator characters waiting to be printed as soon as we
|
||
+ know that we have any input remaining to be printed. */
|
||
+static int separators_not_printed;
|
||
+
|
||
+/* Position we need to pad to, as soon as we know that we have input
|
||
+ remaining to be printed. */
|
||
+static int padding_not_printed;
|
||
+
|
||
+/* True means we should pad the end of the page. Remains false until we
|
||
+ know we have a page to print. */
|
||
+static bool pad_vertically;
|
||
+
|
||
+/* (-h) String of characters used in place of the filename in the header. */
|
||
+static char *custom_header;
|
||
+
|
||
+/* (-D) Date format for the header. */
|
||
+static char const *date_format;
|
||
+
|
||
+/* Date and file name for the header. */
|
||
+static char *date_text;
|
||
+static char const *file_text;
|
||
+
|
||
+/* Output columns available, not counting the date and file name. */
|
||
+static int header_width_available;
|
||
+
|
||
+static char *clump_buff;
|
||
+
|
||
+/* True means we read the line no. lines_per_body in skip_read
|
||
+ called by skip_to_page. That variable controls the coincidence of a
|
||
+ "FF set by hand" and "full_page_printed", see above the definition of
|
||
+ structure COLUMN. */
|
||
+static bool last_line = false;
|
||
+
|
||
+/* For long options that have no equivalent short option, use a
|
||
+ non-character as a pseudo short option, starting with CHAR_MAX + 1. */
|
||
+enum
|
||
+{
|
||
+ COLUMNS_OPTION = CHAR_MAX + 1,
|
||
+ PAGES_OPTION
|
||
+};
|
||
+
|
||
+static char const short_options[] =
|
||
+ "-0123456789D:FJN:S::TW:abcde::fh:i::l:mn::o:rs::tvw:";
|
||
+
|
||
+static struct option const long_options[] =
|
||
+{
|
||
+ {"pages", required_argument, NULL, PAGES_OPTION},
|
||
+ {"columns", required_argument, NULL, COLUMNS_OPTION},
|
||
+ {"across", no_argument, NULL, 'a'},
|
||
+ {"show-control-chars", no_argument, NULL, 'c'},
|
||
+ {"double-space", no_argument, NULL, 'd'},
|
||
+ {"date-format", required_argument, NULL, 'D'},
|
||
+ {"expand-tabs", optional_argument, NULL, 'e'},
|
||
+ {"form-feed", no_argument, NULL, 'f'},
|
||
+ {"header", required_argument, NULL, 'h'},
|
||
+ {"output-tabs", optional_argument, NULL, 'i'},
|
||
+ {"join-lines", no_argument, NULL, 'J'},
|
||
+ {"length", required_argument, NULL, 'l'},
|
||
+ {"merge", no_argument, NULL, 'm'},
|
||
+ {"number-lines", optional_argument, NULL, 'n'},
|
||
+ {"first-line-number", required_argument, NULL, 'N'},
|
||
+ {"indent", required_argument, NULL, 'o'},
|
||
+ {"no-file-warnings", no_argument, NULL, 'r'},
|
||
+ {"separator", optional_argument, NULL, 's'},
|
||
+ {"sep-string", optional_argument, NULL, 'S'},
|
||
+ {"omit-header", no_argument, NULL, 't'},
|
||
+ {"omit-pagination", no_argument, NULL, 'T'},
|
||
+ {"show-nonprinting", no_argument, NULL, 'v'},
|
||
+ {"width", required_argument, NULL, 'w'},
|
||
+ {"page-width", required_argument, NULL, 'W'},
|
||
+ {GETOPT_HELP_OPTION_DECL},
|
||
+ {GETOPT_VERSION_OPTION_DECL},
|
||
+ {NULL, 0, NULL, 0}
|
||
+};
|
||
+
|
||
+/* Return the number of columns that have either an open file or
|
||
+ stored lines. */
|
||
+
|
||
+static int
|
||
+cols_ready_to_print (void)
|
||
+{
|
||
+ COLUMN *q;
|
||
+ int i;
|
||
+ int n;
|
||
+
|
||
+ n = 0;
|
||
+ for (q = column_vector, i = 0; i < columns; ++q, ++i)
|
||
+ if (q->status == OPEN ||
|
||
+ q->status == FF_FOUND || /* With -b: To print a header only */
|
||
+ (storing_columns && q->lines_stored > 0 && q->lines_to_print > 0))
|
||
+ ++n;
|
||
+ return n;
|
||
+}
|
||
+
|
||
+/* Estimate first_ / last_page_number
|
||
+ using option +FIRST_PAGE:LAST_PAGE */
|
||
+
|
||
+static bool
|
||
+first_last_page (int oi, char c, char const *pages)
|
||
+{
|
||
+ char *p;
|
||
+ uintmax_t first;
|
||
+ uintmax_t last = UINTMAX_MAX;
|
||
+ strtol_error err = xstrtoumax (pages, &p, 10, &first, "");
|
||
+ if (err != LONGINT_OK && err != LONGINT_INVALID_SUFFIX_CHAR)
|
||
+ xstrtol_fatal (err, oi, c, long_options, pages);
|
||
+
|
||
+ if (p == pages || !first)
|
||
+ return false;
|
||
+
|
||
+ if (*p == ':')
|
||
+ {
|
||
+ char const *p1 = p + 1;
|
||
+ err = xstrtoumax (p1, &p, 10, &last, "");
|
||
+ if (err != LONGINT_OK)
|
||
+ xstrtol_fatal (err, oi, c, long_options, pages);
|
||
+ if (p1 == p || last < first)
|
||
+ return false;
|
||
+ }
|
||
+
|
||
+ if (*p)
|
||
+ return false;
|
||
+
|
||
+ first_page_number = first;
|
||
+ last_page_number = last;
|
||
+ return true;
|
||
+}
|
||
+
|
||
+/* Parse column count string S, and if it's valid (1 or larger and
|
||
+ within range of the type of `columns') set the global variables
|
||
+ columns and explicit_columns and return true.
|
||
+ Otherwise, exit with a diagnostic. */
|
||
+static void
|
||
+parse_column_count (char const *s)
|
||
+{
|
||
+ long int tmp_long;
|
||
+ if (xstrtol (s, NULL, 10, &tmp_long, "") != LONGINT_OK
|
||
+ || !(1 <= tmp_long && tmp_long <= INT_MAX))
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("invalid number of columns: %s"), quote (s));
|
||
+
|
||
+ columns = tmp_long;
|
||
+ explicit_columns = true;
|
||
+}
|
||
+
|
||
+/* Estimate length of col_sep_string with option -S. */
|
||
+
|
||
+static void
|
||
+separator_string (const char *optarg_S)
|
||
+{
|
||
+ col_sep_length = (int) strlen (optarg_S);
|
||
+ col_sep_string = xmalloc (col_sep_length + 1);
|
||
+ strcpy (col_sep_string, optarg_S);
|
||
+}
|
||
+
|
||
+int
|
||
+main (int argc, char **argv)
|
||
+{
|
||
+ int n_files;
|
||
+ bool old_options = false;
|
||
+ bool old_w = false;
|
||
+ bool old_s = false;
|
||
+ char **file_names;
|
||
+
|
||
+ /* Accumulate the digits of old-style options like -99. */
|
||
+ char *column_count_string = NULL;
|
||
+ size_t n_digits = 0;
|
||
+ size_t n_alloc = 0;
|
||
+
|
||
+ initialize_main (&argc, &argv);
|
||
+ set_program_name (argv[0]);
|
||
+ setlocale (LC_ALL, "");
|
||
+ bindtextdomain (PACKAGE, LOCALEDIR);
|
||
+ textdomain (PACKAGE);
|
||
+
|
||
+ atexit (close_stdout);
|
||
+
|
||
+ n_files = 0;
|
||
+ file_names = (argc > 1
|
||
+ ? xmalloc ((argc - 1) * sizeof (char *))
|
||
+ : NULL);
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ int oi = -1;
|
||
+ int c = getopt_long (argc, argv, short_options, long_options, &oi);
|
||
+ if (c == -1)
|
||
+ break;
|
||
+
|
||
+ if (ISDIGIT (c))
|
||
+ {
|
||
+ /* Accumulate column-count digits specified via old-style options. */
|
||
+ if (n_digits + 1 >= n_alloc)
|
||
+ column_count_string
|
||
+ = X2REALLOC (column_count_string, &n_alloc);
|
||
+ column_count_string[n_digits++] = c;
|
||
+ column_count_string[n_digits] = '\0';
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ n_digits = 0;
|
||
+
|
||
+ switch (c)
|
||
+ {
|
||
+ case 1: /* Non-option argument. */
|
||
+ /* long option --page dominates old `+FIRST_PAGE ...'. */
|
||
+ if (! (first_page_number == 0
|
||
+ && *optarg == '+' && first_last_page (-2, '+', optarg + 1)))
|
||
+ file_names[n_files++] = optarg;
|
||
+ break;
|
||
+
|
||
+ case PAGES_OPTION: /* --pages=FIRST_PAGE[:LAST_PAGE] */
|
||
+ { /* dominates old opt +... */
|
||
+ if (! optarg)
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("`--pages=FIRST_PAGE[:LAST_PAGE]' missing argument"));
|
||
+ else if (! first_last_page (oi, 0, optarg))
|
||
+ error (EXIT_FAILURE, 0, _("invalid page range %s"),
|
||
+ quote (optarg));
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ case COLUMNS_OPTION: /* --columns=COLUMN */
|
||
+ {
|
||
+ parse_column_count (optarg);
|
||
+
|
||
+ /* If there was a prior column count specified via the
|
||
+ short-named option syntax, e.g., -9, ensure that this
|
||
+ long-name-specified value overrides it. */
|
||
+ free (column_count_string);
|
||
+ column_count_string = NULL;
|
||
+ n_alloc = 0;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ case 'a':
|
||
+ print_across_flag = true;
|
||
+ storing_columns = false;
|
||
+ break;
|
||
+ case 'b':
|
||
+ balance_columns = true;
|
||
+ break;
|
||
+ case 'c':
|
||
+ use_cntrl_prefix = true;
|
||
+ break;
|
||
+ case 'd':
|
||
+ double_space = true;
|
||
+ break;
|
||
+ case 'D':
|
||
+ date_format = optarg;
|
||
+ break;
|
||
+ case 'e':
|
||
+ if (optarg)
|
||
+ getoptarg (optarg, 'e', &input_tab_char,
|
||
+ &chars_per_input_tab);
|
||
+ /* Could check tab width > 0. */
|
||
+ untabify_input = true;
|
||
+ break;
|
||
+ case 'f':
|
||
+ case 'F':
|
||
+ use_form_feed = true;
|
||
+ break;
|
||
+ case 'h':
|
||
+ custom_header = optarg;
|
||
+ break;
|
||
+ case 'i':
|
||
+ if (optarg)
|
||
+ getoptarg (optarg, 'i', &output_tab_char,
|
||
+ &chars_per_output_tab);
|
||
+ /* Could check tab width > 0. */
|
||
+ tabify_output = true;
|
||
+ break;
|
||
+ case 'J':
|
||
+ join_lines = true;
|
||
+ break;
|
||
+ case 'l':
|
||
+ {
|
||
+ long int tmp_long;
|
||
+ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
|
||
+ || tmp_long <= 0 || tmp_long > INT_MAX)
|
||
+ {
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("`-l PAGE_LENGTH' invalid number of lines: %s"),
|
||
+ quote (optarg));
|
||
+ }
|
||
+ lines_per_page = tmp_long;
|
||
+ break;
|
||
+ }
|
||
+ case 'm':
|
||
+ parallel_files = true;
|
||
+ storing_columns = false;
|
||
+ break;
|
||
+ case 'n':
|
||
+ numbered_lines = true;
|
||
+ if (optarg)
|
||
+ getoptarg (optarg, 'n', &number_separator,
|
||
+ &chars_per_number);
|
||
+ break;
|
||
+ case 'N':
|
||
+ skip_count = false;
|
||
+ {
|
||
+ long int tmp_long;
|
||
+ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
|
||
+ || tmp_long > INT_MAX)
|
||
+ {
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("`-N NUMBER' invalid starting line number: %s"),
|
||
+ quote (optarg));
|
||
+ }
|
||
+ start_line_num = tmp_long;
|
||
+ break;
|
||
+ }
|
||
+ case 'o':
|
||
+ {
|
||
+ long int tmp_long;
|
||
+ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
|
||
+ || tmp_long < 0 || tmp_long > INT_MAX)
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("`-o MARGIN' invalid line offset: %s"), quote (optarg));
|
||
+ chars_per_margin = tmp_long;
|
||
+ break;
|
||
+ }
|
||
+ case 'r':
|
||
+ ignore_failed_opens = true;
|
||
+ break;
|
||
+ case 's':
|
||
+ old_options = true;
|
||
+ old_s = true;
|
||
+ if (!use_col_separator && optarg)
|
||
+ separator_string (optarg);
|
||
+ break;
|
||
+ case 'S':
|
||
+ old_s = false;
|
||
+ /* Reset an additional input of -s, -S dominates -s */
|
||
+ col_sep_string = bad_cast ("");
|
||
+ col_sep_length = 0;
|
||
+ use_col_separator = true;
|
||
+ if (optarg)
|
||
+ separator_string (optarg);
|
||
+ break;
|
||
+ case 't':
|
||
+ extremities = false;
|
||
+ keep_FF = true;
|
||
+ break;
|
||
+ case 'T':
|
||
+ extremities = false;
|
||
+ keep_FF = false;
|
||
+ break;
|
||
+ case 'v':
|
||
+ use_esc_sequence = true;
|
||
+ break;
|
||
+ case 'w':
|
||
+ old_options = true;
|
||
+ old_w = true;
|
||
+ {
|
||
+ long int tmp_long;
|
||
+ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
|
||
+ || tmp_long <= 0 || tmp_long > INT_MAX)
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("`-w PAGE_WIDTH' invalid number of characters: %s"),
|
||
+ quote (optarg));
|
||
+ if (!truncate_lines)
|
||
+ chars_per_line = tmp_long;
|
||
+ break;
|
||
+ }
|
||
+ case 'W':
|
||
+ old_w = false; /* dominates -w */
|
||
+ truncate_lines = true;
|
||
+ {
|
||
+ long int tmp_long;
|
||
+ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
|
||
+ || tmp_long <= 0 || tmp_long > INT_MAX)
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("`-W PAGE_WIDTH' invalid number of characters: %s"),
|
||
+ quote (optarg));
|
||
+ chars_per_line = tmp_long;
|
||
+ break;
|
||
+ }
|
||
+ case_GETOPT_HELP_CHAR;
|
||
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||
+ default:
|
||
+ usage (EXIT_FAILURE);
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (column_count_string)
|
||
+ {
|
||
+ parse_column_count (column_count_string);
|
||
+ free (column_count_string);
|
||
+ }
|
||
+
|
||
+ if (! date_format)
|
||
+ date_format = (getenv ("POSIXLY_CORRECT") && !hard_locale (LC_TIME)
|
||
+ ? "%b %e %H:%M %Y"
|
||
+ : "%Y-%m-%d %H:%M");
|
||
+
|
||
+ /* Now we can set a reasonable initial value: */
|
||
+ if (first_page_number == 0)
|
||
+ first_page_number = 1;
|
||
+
|
||
+ if (parallel_files && explicit_columns)
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("cannot specify number of columns when printing in parallel"));
|
||
+
|
||
+ if (parallel_files && print_across_flag)
|
||
+ error (EXIT_FAILURE, 0,
|
||
+ _("cannot specify both printing across and printing in parallel"));
|
||
+
|
||
+/* Translate some old short options to new/long options.
|
||
+ To meet downward compatibility with other UNIX pr utilities
|
||
+ and some POSIX specifications. */
|
||
+
|
||
+ if (old_options)
|
||
+ {
|
||
+ if (old_w)
|
||
+ {
|
||
+ if (parallel_files || explicit_columns)
|
||
+ {
|
||
+ /* activate -W */
|
||
+ truncate_lines = true;
|
||
+ if (old_s)
|
||
+ /* adapt HP-UX and SunOS: -s = no separator;
|
||
+ activate -S */
|
||
+ use_col_separator = true;
|
||
+ }
|
||
+ else
|
||
+ /* old -w sets width with columns only
|
||
+ activate -J */
|
||
+ join_lines = true;
|
||
+ }
|
||
+ else if (!use_col_separator)
|
||
+ {
|
||
+ /* No -S option read */
|
||
+ if (old_s && (parallel_files || explicit_columns))
|
||
+ {
|
||
+ if (!truncate_lines)
|
||
+ {
|
||
+ /* old -s (without -w and -W) annuls column alignment,
|
||
+ uses fields, activate -J */
|
||
+ join_lines = true;
|
||
+ if (col_sep_length > 0)
|
||
+ /* activate -S */
|
||
+ use_col_separator = true;
|
||
+ }
|
||
+ else
|
||
+ /* with -W */
|
||
+ /* adapt HP-UX and SunOS: -s = no separator;
|
||
+ activate -S */
|
||
+ use_col_separator = true;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ for (; optind < argc; optind++)
|
||
+ {
|
||
+ file_names[n_files++] = argv[optind];
|
||
+ }
|
||
+
|
||
+ if (n_files == 0)
|
||
+ {
|
||
+ /* No file arguments specified; read from standard input. */
|
||
+ print_files (0, NULL);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ if (parallel_files)
|
||
+ print_files (n_files, file_names);
|
||
+ else
|
||
+ {
|
||
+ int i;
|
||
+ for (i = 0; i < n_files; i++)
|
||
+ print_files (1, &file_names[i]);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ cleanup ();
|
||
+
|
||
+ if (have_read_stdin && fclose (stdin) == EOF)
|
||
+ error (EXIT_FAILURE, errno, _("standard input"));
|
||
+ if (failed_opens)
|
||
+ exit (EXIT_FAILURE);
|
||
+ exit (EXIT_SUCCESS);
|
||
+}
|
||
+
|
||
+/* Parse options of the form -scNNN.
|
||
+
|
||
+ Example: -nck, where 'n' is the option, c is the optional number
|
||
+ separator, and k is the optional width of the field used when printing
|
||
+ a number. */
|
||
+
|
||
+static void
|
||
+getoptarg (char *arg, char switch_char, char *character, int *number)
|
||
+{
|
||
+ if (!ISDIGIT (*arg))
|
||
+ *character = *arg++;
|
||
+ if (*arg)
|
||
+ {
|
||
+ long int tmp_long;
|
||
+ if (xstrtol (arg, NULL, 10, &tmp_long, "") != LONGINT_OK
|
||
+ || tmp_long <= 0 || tmp_long > INT_MAX)
|
||
+ {
|
||
+ error (0, 0,
|
||
+ _("`-%c' extra characters or invalid number in the argument: %s"),
|
||
+ switch_char, quote (arg));
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+ *number = tmp_long;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Set parameters related to formatting. */
|
||
+
|
||
+static void
|
||
+init_parameters (int number_of_files)
|
||
+{
|
||
+ int chars_used_by_number = 0;
|
||
+
|
||
+ lines_per_body = lines_per_page - lines_per_header - lines_per_footer;
|
||
+ if (lines_per_body <= 0)
|
||
+ {
|
||
+ extremities = false;
|
||
+ keep_FF = true;
|
||
+ }
|
||
+ if (extremities == false)
|
||
+ lines_per_body = lines_per_page;
|
||
+
|
||
+ if (double_space)
|
||
+ lines_per_body = lines_per_body / 2;
|
||
+
|
||
+ /* If input is stdin, cannot print parallel files. BSD dumps core
|
||
+ on this. */
|
||
+ if (number_of_files == 0)
|
||
+ parallel_files = false;
|
||
+
|
||
+ if (parallel_files)
|
||
+ columns = number_of_files;
|
||
+
|
||
+ /* One file, multi columns down: -b option is set to get a consistent
|
||
+ formulation with "FF set by hand" in input files. */
|
||
+ if (storing_columns)
|
||
+ balance_columns = true;
|
||
+
|
||
+ /* Tabification is assumed for multiple columns. */
|
||
+ if (columns > 1)
|
||
+ {
|
||
+ if (!use_col_separator)
|
||
+ {
|
||
+ /* Use default separator */
|
||
+ if (join_lines)
|
||
+ col_sep_string = line_separator;
|
||
+ else
|
||
+ col_sep_string = column_separator;
|
||
+
|
||
+ col_sep_length = 1;
|
||
+ use_col_separator = true;
|
||
+ }
|
||
+ /* It's rather pointless to define a TAB separator with column
|
||
+ alignment */
|
||
+ else if (!join_lines && *col_sep_string == '\t')
|
||
+ col_sep_string = column_separator;
|
||
+
|
||
+ truncate_lines = true;
|
||
+ tabify_output = true;
|
||
+ }
|
||
+ else
|
||
+ storing_columns = false;
|
||
+
|
||
+ /* -J dominates -w in any case */
|
||
+ if (join_lines)
|
||
+ truncate_lines = false;
|
||
+
|
||
+ if (numbered_lines)
|
||
+ {
|
||
+ int tmp_i;
|
||
+ int chars_per_default_tab = 8;
|
||
+
|
||
+ line_count = start_line_num;
|
||
+
|
||
+ /* To allow input tab-expansion (-e sensitive) use:
|
||
+ if (number_separator == input_tab_char)
|
||
+ number_width = chars_per_number +
|
||
+ TAB_WIDTH (chars_per_input_tab, chars_per_number); */
|
||
+
|
||
+ /* Estimate chars_per_text without any margin and keep it constant. */
|
||
+ if (number_separator == '\t')
|
||
+ number_width = chars_per_number +
|
||
+ TAB_WIDTH (chars_per_default_tab, chars_per_number);
|
||
+ else
|
||
+ number_width = chars_per_number + 1;
|
||
+
|
||
+ /* The number is part of the column width unless we are
|
||
+ printing files in parallel. */
|
||
+ if (parallel_files)
|
||
+ chars_used_by_number = number_width;
|
||
+
|
||
+ /* We use power_10 to cut off the higher-order digits of the
|
||
+ line_number in function add_line_number */
|
||
+ tmp_i = chars_per_number;
|
||
+ for (power_10 = 1; tmp_i > 0; --tmp_i)
|
||
+ power_10 = 10 * power_10;
|
||
+ }
|
||
+
|
||
+ chars_per_column = (chars_per_line - chars_used_by_number -
|
||
+ (columns - 1) * col_sep_length) / columns;
|
||
+
|
||
+ if (chars_per_column < 1)
|
||
+ error (EXIT_FAILURE, 0, _("page width too narrow"));
|
||
+
|
||
+ if (numbered_lines)
|
||
+ {
|
||
+ free (number_buff);
|
||
+ number_buff = xmalloc (2 * chars_per_number);
|
||
+ }
|
||
+
|
||
+ /* Pick the maximum between the tab width and the width of an
|
||
+ escape sequence.
|
||
+ The width of an escape sequence (4) isn't the lower limit any longer.
|
||
+ We've to use 8 as the lower limit, if we use chars_per_default_tab = 8
|
||
+ to expand a tab which is not an input_tab-char. */
|
||
+ free (clump_buff);
|
||
+ clump_buff = xmalloc (MAX (8, chars_per_input_tab));
|
||
+}
|
||
+
|
||
+/* Open the necessary files,
|
||
+ maintaining a COLUMN structure for each column.
|
||
+
|
||
+ With multiple files, each column p has a different p->fp.
|
||
+ With single files, each column p has the same p->fp.
|
||
+ Return false if (number_of_files > 0) and no files can be opened,
|
||
+ true otherwise.
|
||
+
|
||
+ With each column/file p, p->full_page_printed is initialized,
|
||
+ see also open_file. */
|
||
+
|
||
+static bool
|
||
+init_fps (int number_of_files, char **av)
|
||
+{
|
||
+ int i, files_left;
|
||
+ COLUMN *p;
|
||
+ FILE *firstfp;
|
||
+ char const *firstname;
|
||
+
|
||
+ total_files = 0;
|
||
+
|
||
+ free (column_vector);
|
||
+ column_vector = xnmalloc (columns, sizeof (COLUMN));
|
||
+
|
||
+ if (parallel_files)
|
||
+ {
|
||
+ files_left = number_of_files;
|
||
+ for (p = column_vector; files_left--; ++p, ++av)
|
||
+ {
|
||
+ if (! open_file (*av, p))
|
||
+ {
|
||
+ --p;
|
||
+ --columns;
|
||
+ }
|
||
+ }
|
||
+ if (columns == 0)
|
||
+ return false;
|
||
+ init_header ("", -1);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ p = column_vector;
|
||
+ if (number_of_files > 0)
|
||
+ {
|
||
+ if (! open_file (*av, p))
|
||
+ return false;
|
||
+ init_header (*av, fileno (p->fp));
|
||
+ p->lines_stored = 0;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ p->name = _("standard input");
|
||
+ p->fp = stdin;
|
||
+ have_read_stdin = true;
|
||
+ p->status = OPEN;
|
||
+ p->full_page_printed = false;
|
||
+ ++total_files;
|
||
+ init_header ("", -1);
|
||
+ p->lines_stored = 0;
|
||
+ }
|
||
+
|
||
+ firstname = p->name;
|
||
+ firstfp = p->fp;
|
||
+ for (i = columns - 1, ++p; i; --i, ++p)
|
||
+ {
|
||
+ p->name = firstname;
|
||
+ p->fp = firstfp;
|
||
+ p->status = OPEN;
|
||
+ p->full_page_printed = false;
|
||
+ p->lines_stored = 0;
|
||
+ }
|
||
+ }
|
||
+ files_ready_to_read = total_files;
|
||
+ return true;
|
||
+}
|
||
+
|
||
+/* Determine print_func and char_func, the functions
|
||
+ used by each column for printing and/or storing.
|
||
+
|
||
+ Determine the horizontal position desired when we begin
|
||
+ printing a column (p->start_position). */
|
||
+
|
||
+static void
|
||
+init_funcs (void)
|
||
+{
|
||
+ int i, h, h_next;
|
||
+ COLUMN *p;
|
||
+
|
||
+ h = chars_per_margin;
|
||
+
|
||
+ if (!truncate_lines)
|
||
+ h_next = ANYWHERE;
|
||
+ else
|
||
+ {
|
||
+ /* When numbering lines of parallel files, we enlarge the
|
||
+ first column to accomodate the number. Looks better than
|
||
+ the Sys V approach. */
|
||
+ if (parallel_files && numbered_lines)
|
||
+ h_next = h + chars_per_column + number_width;
|
||
+ else
|
||
+ h_next = h + chars_per_column;
|
||
+ }
|
||
+
|
||
+ /* Enlarge p->start_position of first column to use the same form of
|
||
+ padding_not_printed with all columns. */
|
||
+ h = h + col_sep_length;
|
||
+
|
||
+ /* This loop takes care of all but the rightmost column. */
|
||
+
|
||
+ for (p = column_vector, i = 1; i < columns; ++p, ++i)
|
||
+ {
|
||
+ if (storing_columns) /* One file, multi columns down. */
|
||
+ {
|
||
+ p->char_func = store_char;
|
||
+ p->print_func = print_stored;
|
||
+ }
|
||
+ else
|
||
+ /* One file, multi columns across; or parallel files. */
|
||
+ {
|
||
+ p->char_func = print_char;
|
||
+ p->print_func = read_line;
|
||
+ }
|
||
+
|
||
+ /* Number only the first column when printing files in
|
||
+ parallel. */
|
||
+ p->numbered = numbered_lines && (!parallel_files || i == 1);
|
||
+ p->start_position = h;
|
||
+
|
||
+ /* If we don't truncate lines, all start_positions are
|
||
+ ANYWHERE, except the first column's start_position when
|
||
+ using a margin. */
|
||
+
|
||
+ if (!truncate_lines)
|
||
+ {
|
||
+ h = ANYWHERE;
|
||
+ h_next = ANYWHERE;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ h = h_next + col_sep_length;
|
||
+ h_next = h + chars_per_column;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* The rightmost column.
|
||
+
|
||
+ Doesn't need to be stored unless we intend to balance
|
||
+ columns on the last page. */
|
||
+ if (storing_columns && balance_columns)
|
||
+ {
|
||
+ p->char_func = store_char;
|
||
+ p->print_func = print_stored;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ p->char_func = print_char;
|
||
+ p->print_func = read_line;
|
||
+ }
|
||
+
|
||
+ p->numbered = numbered_lines && (!parallel_files || i == 1);
|
||
+ p->start_position = h;
|
||
+}
|
||
+
|
||
+/* Open a file. Return true if successful.
|
||
+
|
||
+ With each file p, p->full_page_printed is initialized,
|
||
+ see also init_fps. */
|
||
+
|
||
+static bool
|
||
+open_file (char *name, COLUMN *p)
|
||
+{
|
||
+ if (STREQ (name, "-"))
|
||
+ {
|
||
+ p->name = _("standard input");
|
||
+ p->fp = stdin;
|
||
+ have_read_stdin = true;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ p->name = name;
|
||
+ p->fp = fopen (name, "r");
|
||
+ }
|
||
+ if (p->fp == NULL)
|
||
+ {
|
||
+ failed_opens = true;
|
||
+ if (!ignore_failed_opens)
|
||
+ error (0, errno, "%s", name);
|
||
+ return false;
|
||
+ }
|
||
+ p->status = OPEN;
|
||
+ p->full_page_printed = false;
|
||
+ ++total_files;
|
||
+ return true;
|
||
+}
|
||
+
|
||
+/* Close the file in P.
|
||
+
|
||
+ If we aren't dealing with multiple files in parallel, we change
|
||
+ the status of all columns in the column list to reflect the close. */
|
||
+
|
||
+static void
|
||
+close_file (COLUMN *p)
|
||
+{
|
||
+ COLUMN *q;
|
||
+ int i;
|
||
+
|
||
+ if (p->status == CLOSED)
|
||
+ return;
|
||
+ if (ferror (p->fp))
|
||
+ error (EXIT_FAILURE, errno, "%s", p->name);
|
||
+ if (fileno (p->fp) != STDIN_FILENO && fclose (p->fp) != 0)
|
||
+ error (EXIT_FAILURE, errno, "%s", p->name);
|
||
+
|
||
+ if (!parallel_files)
|
||
+ {
|
||
+ for (q = column_vector, i = columns; i; ++q, --i)
|
||
+ {
|
||
+ q->status = CLOSED;
|
||
+ if (q->lines_stored == 0)
|
||
+ {
|
||
+ q->lines_to_print = 0;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ p->status = CLOSED;
|
||
+ p->lines_to_print = 0;
|
||
+ }
|
||
+
|
||
+ --files_ready_to_read;
|
||
+}
|
||
+
|
||
+/* Put a file on hold until we start a new page,
|
||
+ since we've hit a form feed.
|
||
+
|
||
+ If we aren't dealing with parallel files, we must change the
|
||
+ status of all columns in the column list. */
|
||
+
|
||
+static void
|
||
+hold_file (COLUMN *p)
|
||
+{
|
||
+ COLUMN *q;
|
||
+ int i;
|
||
+
|
||
+ if (!parallel_files)
|
||
+ for (q = column_vector, i = columns; i; ++q, --i)
|
||
+ {
|
||
+ if (storing_columns)
|
||
+ q->status = FF_FOUND;
|
||
+ else
|
||
+ q->status = ON_HOLD;
|
||
+ }
|
||
+ else
|
||
+ p->status = ON_HOLD;
|
||
+
|
||
+ p->lines_to_print = 0;
|
||
+ --files_ready_to_read;
|
||
+}
|
||
+
|
||
+/* Undo hold_file -- go through the column list and change any
|
||
+ ON_HOLD columns to OPEN. Used at the end of each page. */
|
||
+
|
||
+static void
|
||
+reset_status (void)
|
||
+{
|
||
+ int i = columns;
|
||
+ COLUMN *p;
|
||
+
|
||
+ for (p = column_vector; i; --i, ++p)
|
||
+ if (p->status == ON_HOLD)
|
||
+ {
|
||
+ p->status = OPEN;
|
||
+ files_ready_to_read++;
|
||
+ }
|
||
+
|
||
+ if (storing_columns)
|
||
+ {
|
||
+ if (column_vector->status == CLOSED)
|
||
+ /* We use the info to output an error message in skip_to_page. */
|
||
+ files_ready_to_read = 0;
|
||
+ else
|
||
+ files_ready_to_read = 1;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Print a single file, or multiple files in parallel.
|
||
+
|
||
+ Set up the list of columns, opening the necessary files.
|
||
+ Allocate space for storing columns, if necessary.
|
||
+ Skip to first_page_number, if user has asked to skip leading pages.
|
||
+ Determine which functions are appropriate to store/print lines
|
||
+ in each column.
|
||
+ Print the file(s). */
|
||
+
|
||
+static void
|
||
+print_files (int number_of_files, char **av)
|
||
+{
|
||
+ init_parameters (number_of_files);
|
||
+ if (! init_fps (number_of_files, av))
|
||
+ return;
|
||
+ if (storing_columns)
|
||
+ init_store_cols ();
|
||
+
|
||
+ if (first_page_number > 1)
|
||
+ {
|
||
+ if (!skip_to_page (first_page_number))
|
||
+ return;
|
||
+ else
|
||
+ page_number = first_page_number;
|
||
+ }
|
||
+ else
|
||
+ page_number = 1;
|
||
+
|
||
+ init_funcs ();
|
||
+
|
||
+ line_number = line_count;
|
||
+ while (print_page ())
|
||
+ ;
|
||
+}
|
||
+
|
||
+/* Initialize header information.
|
||
+ If DESC is non-negative, it is a file descriptor open to
|
||
+ FILENAME for reading. */
|
||
+
|
||
+static void
|
||
+init_header (char const *filename, int desc)
|
||
+{
|
||
+ char *buf = NULL;
|
||
+ struct stat st;
|
||
+ struct timespec t;
|
||
+ int ns;
|
||
+ struct tm *tm;
|
||
+
|
||
+ /* If parallel files or standard input, use current date. */
|
||
+ if (STREQ (filename, "-"))
|
||
+ desc = -1;
|
||
+ if (0 <= desc && fstat (desc, &st) == 0)
|
||
+ t = get_stat_mtime (&st);
|
||
+ else
|
||
+ {
|
||
+ static struct timespec timespec;
|
||
+ if (! timespec.tv_sec)
|
||
+ gettime (×pec);
|
||
+ t = timespec;
|
||
+ }
|
||
+
|
||
+ ns = t.tv_nsec;
|
||
+ tm = localtime (&t.tv_sec);
|
||
+ if (tm == NULL)
|
||
+ {
|
||
+ buf = xmalloc (INT_BUFSIZE_BOUND (long int)
|
||
+ + MAX (10, INT_BUFSIZE_BOUND (int)));
|
||
+ sprintf (buf, "%ld.%09d", (long int) t.tv_sec, ns);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ size_t bufsize = nstrftime (NULL, SIZE_MAX, date_format, tm, 0, ns) + 1;
|
||
+ buf = xmalloc (bufsize);
|
||
+ nstrftime (buf, bufsize, date_format, tm, 0, ns);
|
||
+ }
|
||
+
|
||
+ free (date_text);
|
||
+ date_text = buf;
|
||
+ file_text = custom_header ? custom_header : desc < 0 ? "" : filename;
|
||
+ header_width_available = (chars_per_line
|
||
+ - mbswidth (date_text, 0)
|
||
+ - mbswidth (file_text, 0));
|
||
+}
|
||
+
|
||
+/* Set things up for printing a page
|
||
+
|
||
+ Scan through the columns ...
|
||
+ Determine which are ready to print
|
||
+ (i.e., which have lines stored or open files)
|
||
+ Set p->lines_to_print appropriately
|
||
+ (to p->lines_stored if we're storing, or lines_per_body
|
||
+ if we're reading straight from the file)
|
||
+ Keep track of this total so we know when to stop printing */
|
||
+
|
||
+static void
|
||
+init_page (void)
|
||
+{
|
||
+ int j;
|
||
+ COLUMN *p;
|
||
+
|
||
+ if (storing_columns)
|
||
+ {
|
||
+ store_columns ();
|
||
+ for (j = columns - 1, p = column_vector; j; --j, ++p)
|
||
+ {
|
||
+ p->lines_to_print = p->lines_stored;
|
||
+ }
|
||
+
|
||
+ /* Last column. */
|
||
+ if (balance_columns)
|
||
+ {
|
||
+ p->lines_to_print = p->lines_stored;
|
||
+ }
|
||
+ /* Since we're not balancing columns, we don't need to store
|
||
+ the rightmost column. Read it straight from the file. */
|
||
+ else
|
||
+ {
|
||
+ if (p->status == OPEN)
|
||
+ {
|
||
+ p->lines_to_print = lines_per_body;
|
||
+ }
|
||
+ else
|
||
+ p->lines_to_print = 0;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ for (j = columns, p = column_vector; j; --j, ++p)
|
||
+ if (p->status == OPEN)
|
||
+ {
|
||
+ p->lines_to_print = lines_per_body;
|
||
+ }
|
||
+ else
|
||
+ p->lines_to_print = 0;
|
||
+}
|
||
+
|
||
+/* Align empty columns and print separators.
|
||
+ Empty columns will be formed by files with status ON_HOLD or CLOSED
|
||
+ when printing multiple files in parallel. */
|
||
+
|
||
+static void
|
||
+align_column (COLUMN *p)
|
||
+{
|
||
+ padding_not_printed = p->start_position;
|
||
+ if (padding_not_printed - col_sep_length > 0)
|
||
+ {
|
||
+ pad_across_to (padding_not_printed - col_sep_length);
|
||
+ padding_not_printed = ANYWHERE;
|
||
+ }
|
||
+
|
||
+ if (use_col_separator)
|
||
+ print_sep_string ();
|
||
+
|
||
+ if (p->numbered)
|
||
+ add_line_number (p);
|
||
+}
|
||
+
|
||
+/* Print one page.
|
||
+
|
||
+ As long as there are lines left on the page and columns ready to print,
|
||
+ Scan across the column list
|
||
+ if the column has stored lines or the file is open
|
||
+ pad to the appropriate spot
|
||
+ print the column
|
||
+ pad the remainder of the page with \n or \f as requested
|
||
+ reset the status of all files -- any files which where on hold because
|
||
+ of formfeeds are now put back into the lineup. */
|
||
+
|
||
+static bool
|
||
+print_page (void)
|
||
+{
|
||
+ int j;
|
||
+ int lines_left_on_page;
|
||
+ COLUMN *p;
|
||
+
|
||
+ /* Used as an accumulator (with | operator) of successive values of
|
||
+ pad_vertically. The trick is to set pad_vertically
|
||
+ to false before each run through the inner loop, then after that
|
||
+ loop, it tells us whether a line was actually printed (whether a
|
||
+ newline needs to be output -- or two for double spacing). But those
|
||
+ values have to be accumulated (in pv) so we can invoke pad_down
|
||
+ properly after the outer loop completes. */
|
||
+ bool pv;
|
||
+
|
||
+ init_page ();
|
||
+
|
||
+ if (cols_ready_to_print () == 0)
|
||
+ return false;
|
||
+
|
||
+ if (extremities)
|
||
+ print_a_header = true;
|
||
+
|
||
+ /* Don't pad unless we know a page was printed. */
|
||
+ pad_vertically = false;
|
||
+ pv = false;
|
||
+
|
||
+ lines_left_on_page = lines_per_body;
|
||
+ if (double_space)
|
||
+ lines_left_on_page *= 2;
|
||
+
|
||
+ while (lines_left_on_page > 0 && cols_ready_to_print () > 0)
|
||
+ {
|
||
+ output_position = 0;
|
||
+ spaces_not_printed = 0;
|
||
+ separators_not_printed = 0;
|
||
+ pad_vertically = false;
|
||
+ align_empty_cols = false;
|
||
+ empty_line = true;
|
||
+
|
||
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
|
||
+ {
|
||
+ input_position = 0;
|
||
+ if (p->lines_to_print > 0 || p->status == FF_FOUND)
|
||
+ {
|
||
+ FF_only = false;
|
||
+ padding_not_printed = p->start_position;
|
||
+ if (!(p->print_func) (p))
|
||
+ read_rest_of_line (p);
|
||
+ pv |= pad_vertically;
|
||
+
|
||
+ --p->lines_to_print;
|
||
+ if (p->lines_to_print <= 0)
|
||
+ {
|
||
+ if (cols_ready_to_print () <= 0)
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ /* File p changed its status to ON_HOLD or CLOSED */
|
||
+ if (parallel_files && p->status != OPEN)
|
||
+ {
|
||
+ if (empty_line)
|
||
+ align_empty_cols = true;
|
||
+ else if (p->status == CLOSED ||
|
||
+ (p->status == ON_HOLD && FF_only))
|
||
+ align_column (p);
|
||
+ }
|
||
+ }
|
||
+ else if (parallel_files)
|
||
+ {
|
||
+ /* File status ON_HOLD or CLOSED */
|
||
+ if (empty_line)
|
||
+ align_empty_cols = true;
|
||
+ else
|
||
+ align_column (p);
|
||
+ }
|
||
+
|
||
+ /* We need it also with an empty column */
|
||
+ if (use_col_separator)
|
||
+ ++separators_not_printed;
|
||
+ }
|
||
+
|
||
+ if (pad_vertically)
|
||
+ {
|
||
+ putchar ('\n');
|
||
+ --lines_left_on_page;
|
||
+ }
|
||
+
|
||
+ if (cols_ready_to_print () <= 0 && !extremities)
|
||
+ break;
|
||
+
|
||
+ if (double_space && pv)
|
||
+ {
|
||
+ putchar ('\n');
|
||
+ --lines_left_on_page;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (lines_left_on_page == 0)
|
||
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
|
||
+ if (p->status == OPEN)
|
||
+ p->full_page_printed = true;
|
||
+
|
||
+ pad_vertically = pv;
|
||
+
|
||
+ if (pad_vertically && extremities)
|
||
+ pad_down (lines_left_on_page + lines_per_footer);
|
||
+ else if (keep_FF && print_a_FF)
|
||
+ {
|
||
+ putchar ('\f');
|
||
+ print_a_FF = false;
|
||
+ }
|
||
+
|
||
+ if (last_page_number < page_number)
|
||
+ return false; /* Stop printing with LAST_PAGE */
|
||
+
|
||
+ reset_status (); /* Change ON_HOLD to OPEN. */
|
||
+
|
||
+ return true; /* More pages to go. */
|
||
+}
|
||
+
|
||
+/* Allocate space for storing columns.
|
||
+
|
||
+ This is necessary when printing multiple columns from a single file.
|
||
+ Lines are stored consecutively in buff, separated by '\0'.
|
||
+
|
||
+ The following doesn't apply any longer - any tuning possible?
|
||
+ (We can't use a fixed offset since with the '-s' flag lines aren't
|
||
+ truncated.)
|
||
+
|
||
+ We maintain a list (line_vector) of pointers to the beginnings
|
||
+ of lines in buff. We allocate one more than the number of lines
|
||
+ because the last entry tells us the index of the last character,
|
||
+ which we need to know in order to print the last line in buff. */
|
||
+
|
||
+static void
|
||
+init_store_cols (void)
|
||
+{
|
||
+ int total_lines = lines_per_body * columns;
|
||
+ int chars_if_truncate = total_lines * (chars_per_column + 1);
|
||
+
|
||
+ free (line_vector);
|
||
+ /* FIXME: here's where it was allocated. */
|
||
+ line_vector = xmalloc ((total_lines + 1) * sizeof (int *));
|
||
+
|
||
+ free (end_vector);
|
||
+ end_vector = xmalloc (total_lines * sizeof (int *));
|
||
+
|
||
+ free (buff);
|
||
+ buff_allocated = (use_col_separator
|
||
+ ? 2 * chars_if_truncate
|
||
+ : chars_if_truncate); /* Tune this. */
|
||
+ buff = xmalloc (buff_allocated);
|
||
+}
|
||
+
|
||
+/* Store all but the rightmost column.
|
||
+ (Used when printing a single file in multiple downward columns)
|
||
+
|
||
+ For each column
|
||
+ set p->current_line to be the index in line_vector of the
|
||
+ first line in the column
|
||
+ For each line in the column
|
||
+ store the line in buff
|
||
+ add to line_vector the index of the line's first char
|
||
+ buff_start is the index in buff of the first character in the
|
||
+ current line. */
|
||
+
|
||
+static void
|
||
+store_columns (void)
|
||
+{
|
||
+ int i, j;
|
||
+ unsigned int line = 0;
|
||
+ unsigned int buff_start;
|
||
+ int last_col; /* The rightmost column which will be saved in buff */
|
||
+ COLUMN *p;
|
||
+
|
||
+ buff_current = 0;
|
||
+ buff_start = 0;
|
||
+
|
||
+ if (balance_columns)
|
||
+ last_col = columns;
|
||
+ else
|
||
+ last_col = columns - 1;
|
||
+
|
||
+ for (i = 1, p = column_vector; i <= last_col; ++i, ++p)
|
||
+ p->lines_stored = 0;
|
||
+
|
||
+ for (i = 1, p = column_vector; i <= last_col && files_ready_to_read;
|
||
+ ++i, ++p)
|
||
+ {
|
||
+ p->current_line = line;
|
||
+ for (j = lines_per_body; j && files_ready_to_read; --j)
|
||
+
|
||
+ if (p->status == OPEN) /* Redundant. Clean up. */
|
||
+ {
|
||
+ input_position = 0;
|
||
+
|
||
+ if (!read_line (p))
|
||
+ read_rest_of_line (p);
|
||
+
|
||
+ if (p->status == OPEN
|
||
+ || buff_start != buff_current)
|
||
+ {
|
||
+ ++p->lines_stored;
|
||
+ line_vector[line] = buff_start;
|
||
+ end_vector[line++] = input_position;
|
||
+ buff_start = buff_current;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* Keep track of the location of the last char in buff. */
|
||
+ line_vector[line] = buff_start;
|
||
+
|
||
+ if (balance_columns)
|
||
+ balance (line);
|
||
+}
|
||
+
|
||
+static void
|
||
+balance (int total_stored)
|
||
+{
|
||
+ COLUMN *p;
|
||
+ int i, lines;
|
||
+ int first_line = 0;
|
||
+
|
||
+ for (i = 1, p = column_vector; i <= columns; ++i, ++p)
|
||
+ {
|
||
+ lines = total_stored / columns;
|
||
+ if (i <= total_stored % columns)
|
||
+ ++lines;
|
||
+
|
||
+ p->lines_stored = lines;
|
||
+ p->current_line = first_line;
|
||
+
|
||
+ first_line += lines;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Store a character in the buffer. */
|
||
+
|
||
+static void
|
||
+store_char (char c)
|
||
+{
|
||
+ if (buff_current >= buff_allocated)
|
||
+ {
|
||
+ /* May be too generous. */
|
||
+ buff = X2REALLOC (buff, &buff_allocated);
|
||
+ }
|
||
+ buff[buff_current++] = c;
|
||
+}
|
||
+
|
||
+static void
|
||
+add_line_number (COLUMN *p)
|
||
+{
|
||
+ int i;
|
||
+ char *s;
|
||
+ int left_cut;
|
||
+
|
||
+ /* Cutting off the higher-order digits is more informative than
|
||
+ lower-order cut off*/
|
||
+ if (line_number < power_10)
|
||
+ sprintf (number_buff, "%*d", chars_per_number, line_number);
|
||
+ else
|
||
+ {
|
||
+ left_cut = line_number % power_10;
|
||
+ sprintf (number_buff, "%0*d", chars_per_number, left_cut);
|
||
+ }
|
||
+ line_number++;
|
||
+ s = number_buff;
|
||
+ for (i = chars_per_number; i > 0; i--)
|
||
+ (p->char_func) (*s++);
|
||
+
|
||
+ if (columns > 1)
|
||
+ {
|
||
+ /* Tabification is assumed for multiple columns, also for n-separators,
|
||
+ but `default n-separator = TAB' hasn't been given priority over
|
||
+ equal column_width also specified by POSIX. */
|
||
+ if (number_separator == '\t')
|
||
+ {
|
||
+ i = number_width - chars_per_number;
|
||
+ while (i-- > 0)
|
||
+ (p->char_func) (' ');
|
||
+ }
|
||
+ else
|
||
+ (p->char_func) (number_separator);
|
||
+ }
|
||
+ else
|
||
+ /* To comply with POSIX, we avoid any expansion of default TAB
|
||
+ separator with a single column output. No column_width requirement
|
||
+ has to be considered. */
|
||
+ {
|
||
+ (p->char_func) (number_separator);
|
||
+ if (number_separator == '\t')
|
||
+ output_position = POS_AFTER_TAB (chars_per_output_tab,
|
||
+ output_position);
|
||
+ }
|
||
+
|
||
+ if (truncate_lines && !parallel_files)
|
||
+ input_position += number_width;
|
||
+}
|
||
+
|
||
+/* Print (or store) padding until the current horizontal position
|
||
+ is position. */
|
||
+
|
||
+static void
|
||
+pad_across_to (int position)
|
||
+{
|
||
+ int h = output_position;
|
||
+
|
||
+ if (tabify_output)
|
||
+ spaces_not_printed = position - output_position;
|
||
+ else
|
||
+ {
|
||
+ while (++h <= position)
|
||
+ putchar (' ');
|
||
+ output_position = position;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Pad to the bottom of the page.
|
||
+
|
||
+ If the user has requested a formfeed, use one.
|
||
+ Otherwise, use newlines. */
|
||
+
|
||
+static void
|
||
+pad_down (int lines)
|
||
+{
|
||
+ int i;
|
||
+
|
||
+ if (use_form_feed)
|
||
+ putchar ('\f');
|
||
+ else
|
||
+ for (i = lines; i; --i)
|
||
+ putchar ('\n');
|
||
+}
|
||
+
|
||
+/* Read the rest of the line.
|
||
+
|
||
+ Read from the current column's file until an end of line is
|
||
+ hit. Used when we've truncated a line and we no longer need
|
||
+ to print or store its characters. */
|
||
+
|
||
+static void
|
||
+read_rest_of_line (COLUMN *p)
|
||
+{
|
||
+ int c;
|
||
+ FILE *f = p->fp;
|
||
+
|
||
+ while ((c = getc (f)) != '\n')
|
||
+ {
|
||
+ if (c == '\f')
|
||
+ {
|
||
+ if ((c = getc (f)) != '\n')
|
||
+ ungetc (c, f);
|
||
+ if (keep_FF)
|
||
+ print_a_FF = true;
|
||
+ hold_file (p);
|
||
+ break;
|
||
+ }
|
||
+ else if (c == EOF)
|
||
+ {
|
||
+ close_file (p);
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Read a line with skip_to_page.
|
||
+
|
||
+ Read from the current column's file until an end of line is
|
||
+ hit. Used when we read full lines to skip pages.
|
||
+ With skip_to_page we have to check for FF-coincidence which is done
|
||
+ in function read_line otherwise.
|
||
+ Count lines of skipped pages to find the line number of 1st page
|
||
+ printed relative to 1st line of input file (start_line_num). */
|
||
+
|
||
+static void
|
||
+skip_read (COLUMN *p, int column_number)
|
||
+{
|
||
+ int c;
|
||
+ FILE *f = p->fp;
|
||
+ int i;
|
||
+ bool single_ff = false;
|
||
+ COLUMN *q;
|
||
+
|
||
+ /* Read 1st character in a line or any character succeeding a FF */
|
||
+ if ((c = getc (f)) == '\f' && p->full_page_printed)
|
||
+ /* A FF-coincidence with a previous full_page_printed.
|
||
+ To avoid an additional empty page, eliminate the FF */
|
||
+ if ((c = getc (f)) == '\n')
|
||
+ c = getc (f);
|
||
+
|
||
+ p->full_page_printed = false;
|
||
+
|
||
+ /* 1st character a FF means a single FF without any printable
|
||
+ characters. Don't count it as a line with -n option. */
|
||
+ if (c == '\f')
|
||
+ single_ff = true;
|
||
+
|
||
+ /* Preparing for a FF-coincidence: Maybe we finish that page
|
||
+ without a FF found */
|
||
+ if (last_line)
|
||
+ p->full_page_printed = true;
|
||
+
|
||
+ while (c != '\n')
|
||
+ {
|
||
+ if (c == '\f')
|
||
+ {
|
||
+ /* No FF-coincidence possible,
|
||
+ no catching up of a FF-coincidence with next page */
|
||
+ if (last_line)
|
||
+ {
|
||
+ if (!parallel_files)
|
||
+ for (q = column_vector, i = columns; i; ++q, --i)
|
||
+ q->full_page_printed = false;
|
||
+ else
|
||
+ p->full_page_printed = false;
|
||
+ }
|
||
+
|
||
+ if ((c = getc (f)) != '\n')
|
||
+ ungetc (c, f);
|
||
+ hold_file (p);
|
||
+ break;
|
||
+ }
|
||
+ else if (c == EOF)
|
||
+ {
|
||
+ close_file (p);
|
||
+ break;
|
||
+ }
|
||
+ c = getc (f);
|
||
+ }
|
||
+
|
||
+ if (skip_count)
|
||
+ if ((!parallel_files || column_number == 1) && !single_ff)
|
||
+ ++line_count;
|
||
+}
|
||
+
|
||
+/* If we're tabifying output,
|
||
+
|
||
+ When print_char encounters white space it keeps track
|
||
+ of our desired horizontal position and delays printing
|
||
+ until this function is called. */
|
||
+
|
||
+static void
|
||
+print_white_space (void)
|
||
+{
|
||
+ int h_new;
|
||
+ int h_old = output_position;
|
||
+ int goal = h_old + spaces_not_printed;
|
||
+
|
||
+ while (goal - h_old > 1
|
||
+ && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
|
||
+ {
|
||
+ putchar (output_tab_char);
|
||
+ h_old = h_new;
|
||
+ }
|
||
+ while (++h_old <= goal)
|
||
+ putchar (' ');
|
||
+
|
||
+ output_position = goal;
|
||
+ spaces_not_printed = 0;
|
||
+}
|
||
+
|
||
+/* Print column separators.
|
||
+
|
||
+ We keep a count until we know that we'll be printing a line,
|
||
+ then print_sep_string() is called. */
|
||
+
|
||
+static void
|
||
+print_sep_string (void)
|
||
+{
|
||
+ char *s;
|
||
+ int l = col_sep_length;
|
||
+
|
||
+ s = col_sep_string;
|
||
+
|
||
+ if (separators_not_printed <= 0)
|
||
+ {
|
||
+ /* We'll be starting a line with chars_per_margin, anything else? */
|
||
+ if (spaces_not_printed > 0)
|
||
+ print_white_space ();
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ for (; separators_not_printed > 0; --separators_not_printed)
|
||
+ {
|
||
+ while (l-- > 0)
|
||
+ {
|
||
+ /* 3 types of sep_strings: spaces only, spaces and chars,
|
||
+ chars only */
|
||
+ if (*s == ' ')
|
||
+ {
|
||
+ /* We're tabifying output; consecutive spaces in
|
||
+ sep_string may have to be converted to tabs */
|
||
+ s++;
|
||
+ ++spaces_not_printed;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ if (spaces_not_printed > 0)
|
||
+ print_white_space ();
|
||
+ putchar (*s++);
|
||
+ ++output_position;
|
||
+ }
|
||
+ }
|
||
+ /* sep_string ends with some spaces */
|
||
+ if (spaces_not_printed > 0)
|
||
+ print_white_space ();
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Print (or store, depending on p->char_func) a clump of N
|
||
+ characters. */
|
||
+
|
||
+static void
|
||
+print_clump (COLUMN *p, int n, char *clump)
|
||
+{
|
||
+ while (n--)
|
||
+ (p->char_func) (*clump++);
|
||
+}
|
||
+
|
||
+/* Print a character.
|
||
+
|
||
+ Update the following comment: process-char hasn't been used any
|
||
+ longer.
|
||
+ If we're tabifying, all tabs have been converted to spaces by
|
||
+ process_char(). Keep a count of consecutive spaces, and when
|
||
+ a nonspace is encountered, call print_white_space() to print the
|
||
+ required number of tabs and spaces. */
|
||
+
|
||
+static void
|
||
+print_char (char c)
|
||
+{
|
||
+ if (tabify_output)
|
||
+ {
|
||
+ if (c == ' ')
|
||
+ {
|
||
+ ++spaces_not_printed;
|
||
+ return;
|
||
+ }
|
||
+ else if (spaces_not_printed > 0)
|
||
+ print_white_space ();
|
||
+
|
||
+ /* Nonprintables are assumed to have width 0, except '\b'. */
|
||
+ if (! isprint (to_uchar (c)))
|
||
+ {
|
||
+ if (c == '\b')
|
||
+ --output_position;
|
||
+ }
|
||
+ else
|
||
+ ++output_position;
|
||
+ }
|
||
+ putchar (c);
|
||
+}
|
||
+
|
||
+/* Skip to page PAGE before printing.
|
||
+ PAGE may be larger than total number of pages. */
|
||
+
|
||
+static bool
|
||
+skip_to_page (uintmax_t page)
|
||
+{
|
||
+ uintmax_t n;
|
||
+ int i;
|
||
+ int j;
|
||
+ COLUMN *p;
|
||
+
|
||
+ for (n = 1; n < page; ++n)
|
||
+ {
|
||
+ for (i = 1; i < lines_per_body; ++i)
|
||
+ {
|
||
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
|
||
+ if (p->status == OPEN)
|
||
+ skip_read (p, j);
|
||
+ }
|
||
+ last_line = true;
|
||
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
|
||
+ if (p->status == OPEN)
|
||
+ skip_read (p, j);
|
||
+
|
||
+ if (storing_columns) /* change FF_FOUND to ON_HOLD */
|
||
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
|
||
+ if (p->status != CLOSED)
|
||
+ p->status = ON_HOLD;
|
||
+
|
||
+ reset_status ();
|
||
+ last_line = false;
|
||
+
|
||
+ if (files_ready_to_read < 1)
|
||
+ {
|
||
+ /* It's very helpful, normally the total number of pages is
|
||
+ not known in advance. */
|
||
+ error (0, 0,
|
||
+ _("starting page number %"PRIuMAX
|
||
+ " exceeds page count %"PRIuMAX),
|
||
+ page, n);
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ return files_ready_to_read > 0;
|
||
+}
|
||
+
|
||
+/* Print a header.
|
||
+
|
||
+ Formfeeds are assumed to use up two lines at the beginning of
|
||
+ the page. */
|
||
+
|
||
+static void
|
||
+print_header (void)
|
||
+{
|
||
+ char page_text[256 + INT_STRLEN_BOUND (page_number)];
|
||
+ int available_width;
|
||
+ int lhs_spaces;
|
||
+ int rhs_spaces;
|
||
+
|
||
+ output_position = 0;
|
||
+ pad_across_to (chars_per_margin);
|
||
+ print_white_space ();
|
||
+
|
||
+ if (page_number == 0)
|
||
+ error (EXIT_FAILURE, 0, _("page number overflow"));
|
||
+
|
||
+ /* The translator must ensure that formatting the translation of
|
||
+ "Page %"PRIuMAX does not generate more than (sizeof page_text - 1)
|
||
+ bytes. */
|
||
+ sprintf (page_text, _("Page %"PRIuMAX), page_number++);
|
||
+ available_width = header_width_available - mbswidth (page_text, 0);
|
||
+ available_width = MAX (0, available_width);
|
||
+ lhs_spaces = available_width >> 1;
|
||
+ rhs_spaces = available_width - lhs_spaces;
|
||
+
|
||
+ printf ("\n\n%*.*s%s%*.*s%s%*.*s%s\n\n\n",
|
||
+ chars_per_margin, chars_per_margin, " ",
|
||
+ date_text, lhs_spaces, lhs_spaces, " ",
|
||
+ file_text, rhs_spaces, rhs_spaces, " ", page_text);
|
||
+
|
||
+ print_a_header = false;
|
||
+ output_position = 0;
|
||
+}
|
||
+
|
||
+/* Print (or store, if p->char_func is store_char()) a line.
|
||
+
|
||
+ Read a character to determine whether we have a line or not.
|
||
+ (We may hit EOF, \n, or \f)
|
||
+
|
||
+ Once we know we have a line,
|
||
+ set pad_vertically = true, meaning it's safe
|
||
+ to pad down at the end of the page, since we do have a page.
|
||
+ print a header if needed.
|
||
+ pad across to padding_not_printed if needed.
|
||
+ print any separators which need to be printed.
|
||
+ print a line number if it needs to be printed.
|
||
+
|
||
+ Print the clump which corresponds to the first character.
|
||
+
|
||
+ Enter a loop and keep printing until an end of line condition
|
||
+ exists, or until we exceed chars_per_column.
|
||
+
|
||
+ Return false if we exceed chars_per_column before reading
|
||
+ an end of line character, true otherwise. */
|
||
+
|
||
+static bool
|
||
+read_line (COLUMN *p)
|
||
+{
|
||
+ int c;
|
||
+ int chars IF_LINT (= 0);
|
||
+ int last_input_position;
|
||
+ int j, k;
|
||
+ COLUMN *q;
|
||
+
|
||
+ /* read 1st character in each line or any character succeeding a FF: */
|
||
+ c = getc (p->fp);
|
||
+
|
||
+ last_input_position = input_position;
|
||
+
|
||
+ if (c == '\f' && p->full_page_printed)
|
||
+ if ((c = getc (p->fp)) == '\n')
|
||
+ c = getc (p->fp);
|
||
+ p->full_page_printed = false;
|
||
+
|
||
+ switch (c)
|
||
+ {
|
||
+ case '\f':
|
||
+ if ((c = getc (p->fp)) != '\n')
|
||
+ ungetc (c, p->fp);
|
||
+ FF_only = true;
|
||
+ if (print_a_header && !storing_columns)
|
||
+ {
|
||
+ pad_vertically = true;
|
||
+ print_header ();
|
||
+ }
|
||
+ else if (keep_FF)
|
||
+ print_a_FF = true;
|
||
+ hold_file (p);
|
||
+ return true;
|
||
+ case EOF:
|
||
+ close_file (p);
|
||
+ return true;
|
||
+ case '\n':
|
||
+ break;
|
||
+ default:
|
||
+ chars = char_to_clump (c);
|
||
+ }
|
||
+
|
||
+ if (truncate_lines && input_position > chars_per_column)
|
||
+ {
|
||
+ input_position = last_input_position;
|
||
+ return false;
|
||
+ }
|
||
+
|
||
+ if (p->char_func != store_char)
|
||
+ {
|
||
+ pad_vertically = true;
|
||
+
|
||
+ if (print_a_header && !storing_columns)
|
||
+ print_header ();
|
||
+
|
||
+ if (parallel_files && align_empty_cols)
|
||
+ {
|
||
+ /* We have to align empty columns at the beginning of a line. */
|
||
+ k = separators_not_printed;
|
||
+ separators_not_printed = 0;
|
||
+ for (j = 1, q = column_vector; j <= k; ++j, ++q)
|
||
+ {
|
||
+ align_column (q);
|
||
+ separators_not_printed += 1;
|
||
+ }
|
||
+ padding_not_printed = p->start_position;
|
||
+ if (truncate_lines)
|
||
+ spaces_not_printed = chars_per_column;
|
||
+ else
|
||
+ spaces_not_printed = 0;
|
||
+ align_empty_cols = false;
|
||
+ }
|
||
+
|
||
+ if (padding_not_printed - col_sep_length > 0)
|
||
+ {
|
||
+ pad_across_to (padding_not_printed - col_sep_length);
|
||
+ padding_not_printed = ANYWHERE;
|
||
+ }
|
||
+
|
||
+ if (use_col_separator)
|
||
+ print_sep_string ();
|
||
+ }
|
||
+
|
||
+ if (p->numbered)
|
||
+ add_line_number (p);
|
||
+
|
||
+ empty_line = false;
|
||
+ if (c == '\n')
|
||
+ return true;
|
||
+
|
||
+ print_clump (p, chars, clump_buff);
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ c = getc (p->fp);
|
||
+
|
||
+ switch (c)
|
||
+ {
|
||
+ case '\n':
|
||
+ return true;
|
||
+ case '\f':
|
||
+ if ((c = getc (p->fp)) != '\n')
|
||
+ ungetc (c, p->fp);
|
||
+ if (keep_FF)
|
||
+ print_a_FF = true;
|
||
+ hold_file (p);
|
||
+ return true;
|
||
+ case EOF:
|
||
+ close_file (p);
|
||
+ return true;
|
||
+ }
|
||
+
|
||
+ last_input_position = input_position;
|
||
+ chars = char_to_clump (c);
|
||
+ if (truncate_lines && input_position > chars_per_column)
|
||
+ {
|
||
+ input_position = last_input_position;
|
||
+ return false;
|
||
+ }
|
||
+
|
||
+ print_clump (p, chars, clump_buff);
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Print a line from buff.
|
||
+
|
||
+ If this function has been called, we know we have "something to
|
||
+ print". But it remains to be seen whether we have a real text page
|
||
+ or an empty page (a single form feed) with/without a header only.
|
||
+ Therefore first we set pad_vertically to true and print a header
|
||
+ if necessary.
|
||
+ If FF_FOUND and we are using -t|-T option we omit any newline by
|
||
+ setting pad_vertically to false (see print_page).
|
||
+ Otherwise we pad across if necessary, print separators if necessary
|
||
+ and text of COLUMN *p.
|
||
+
|
||
+ Return true, meaning there is no need to call read_rest_of_line. */
|
||
+
|
||
+static bool
|
||
+print_stored (COLUMN *p)
|
||
+{
|
||
+ COLUMN *q;
|
||
+ int i;
|
||
+
|
||
+ int line = p->current_line++;
|
||
+ char *first = &buff[line_vector[line]];
|
||
+ /* FIXME
|
||
+ UMR: Uninitialized memory read:
|
||
+ * This is occurring while in:
|
||
+ print_stored [pr.c:2239]
|
||
+ * Reading 4 bytes from 0x5148c in the heap.
|
||
+ * Address 0x5148c is 4 bytes into a malloc'd block at 0x51488 of 676 bytes
|
||
+ * This block was allocated from:
|
||
+ malloc [rtlib.o]
|
||
+ xmalloc [xmalloc.c:94]
|
||
+ init_store_cols [pr.c:1648]
|
||
+ */
|
||
+ char *last = &buff[line_vector[line + 1]];
|
||
+
|
||
+ pad_vertically = true;
|
||
+
|
||
+ if (print_a_header)
|
||
+ print_header ();
|
||
+
|
||
+ if (p->status == FF_FOUND)
|
||
+ {
|
||
+ for (i = 1, q = column_vector; i <= columns; ++i, ++q)
|
||
+ q->status = ON_HOLD;
|
||
+ if (column_vector->lines_to_print <= 0)
|
||
+ {
|
||
+ if (!extremities)
|
||
+ pad_vertically = false;
|
||
+ return true; /* print a header only */
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (padding_not_printed - col_sep_length > 0)
|
||
+ {
|
||
+ pad_across_to (padding_not_printed - col_sep_length);
|
||
+ padding_not_printed = ANYWHERE;
|
||
+ }
|
||
+
|
||
+ if (use_col_separator)
|
||
+ print_sep_string ();
|
||
+
|
||
+ while (first != last)
|
||
+ print_char (*first++);
|
||
+
|
||
+ if (spaces_not_printed == 0)
|
||
+ {
|
||
+ output_position = p->start_position + end_vector[line];
|
||
+ if (p->start_position - col_sep_length == chars_per_margin)
|
||
+ output_position -= col_sep_length;
|
||
+ }
|
||
+
|
||
+ return true;
|
||
+}
|
||
+
|
||
+/* Convert a character to the proper format and return the number of
|
||
+ characters in the resulting clump. Increment input_position by
|
||
+ the width of the clump.
|
||
+
|
||
+ Tabs are converted to clumps of spaces.
|
||
+ Nonprintable characters may be converted to clumps of escape
|
||
+ sequences or control prefixes.
|
||
+
|
||
+ Note: the width of a clump is not necessarily equal to the number of
|
||
+ characters in clump_buff. (e.g, the width of '\b' is -1, while the
|
||
+ number of characters is 1.) */
|
||
+
|
||
+static int
|
||
+char_to_clump (char c)
|
||
+{
|
||
+ unsigned char uc = c;
|
||
+ char *s = clump_buff;
|
||
+ int i;
|
||
+ char esc_buff[4];
|
||
+ int width;
|
||
+ int chars;
|
||
+ int chars_per_c = 8;
|
||
+
|
||
+ if (c == input_tab_char)
|
||
+ chars_per_c = chars_per_input_tab;
|
||
+
|
||
+ if (c == input_tab_char || c == '\t')
|
||
+ {
|
||
+ width = TAB_WIDTH (chars_per_c, input_position);
|
||
+
|
||
+ if (untabify_input)
|
||
+ {
|
||
+ for (i = width; i; --i)
|
||
+ *s++ = ' ';
|
||
+ chars = width;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ *s = c;
|
||
+ chars = 1;
|
||
+ }
|
||
+
|
||
+ }
|
||
+ else if (! isprint (uc))
|
||
+ {
|
||
+ if (use_esc_sequence)
|
||
+ {
|
||
+ width = 4;
|
||
+ chars = 4;
|
||
+ *s++ = '\\';
|
||
+ sprintf (esc_buff, "%03o", uc);
|
||
+ for (i = 0; i <= 2; ++i)
|
||
+ *s++ = esc_buff[i];
|
||
+ }
|
||
+ else if (use_cntrl_prefix)
|
||
+ {
|
||
+ if (uc < 0200)
|
||
+ {
|
||
+ width = 2;
|
||
+ chars = 2;
|
||
+ *s++ = '^';
|
||
+ *s++ = c ^ 0100;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ width = 4;
|
||
+ chars = 4;
|
||
+ *s++ = '\\';
|
||
+ sprintf (esc_buff, "%03o", uc);
|
||
+ for (i = 0; i <= 2; ++i)
|
||
+ *s++ = esc_buff[i];
|
||
+ }
|
||
+ }
|
||
+ else if (c == '\b')
|
||
+ {
|
||
+ width = -1;
|
||
+ chars = 1;
|
||
+ *s = c;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ width = 0;
|
||
+ chars = 1;
|
||
+ *s = c;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ width = 1;
|
||
+ chars = 1;
|
||
+ *s = c;
|
||
+ }
|
||
+
|
||
+ /* Too many backspaces must put us in position 0 -- never negative. */
|
||
+ if (width < 0 && input_position == 0)
|
||
+ {
|
||
+ chars = 0;
|
||
+ input_position = 0;
|
||
+ }
|
||
+ else if (width < 0 && input_position <= -width)
|
||
+ input_position = 0;
|
||
+ else
|
||
+ input_position += width;
|
||
+
|
||
+ return chars;
|
||
+}
|
||
+
|
||
+/* We've just printed some files and need to clean up things before
|
||
+ looking for more options and printing the next batch of files.
|
||
+
|
||
+ Free everything we've xmalloc'ed, except `header'. */
|
||
+
|
||
+static void
|
||
+cleanup (void)
|
||
+{
|
||
+ free (number_buff);
|
||
+ free (clump_buff);
|
||
+ free (column_vector);
|
||
+ free (line_vector);
|
||
+ free (end_vector);
|
||
+ free (buff);
|
||
+}
|
||
+
|
||
+/* Complain, print a usage message, and die. */
|
||
+
|
||
+void
|
||
+usage (int status)
|
||
+{
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
|
||
+ program_name);
|
||
+ else
|
||
+ {
|
||
+ printf (_("\
|
||
+Usage: %s [OPTION]... [FILE]...\n\
|
||
+"),
|
||
+ program_name);
|
||
+
|
||
+ fputs (_("\
|
||
+Paginate or columnate FILE(s) for printing.\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Mandatory arguments to long options are mandatory for short options too.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ +FIRST_PAGE[:LAST_PAGE], --pages=FIRST_PAGE[:LAST_PAGE]\n\
|
||
+ begin [stop] printing with page FIRST_[LAST_]PAGE\n\
|
||
+ -COLUMN, --columns=COLUMN\n\
|
||
+ output COLUMN columns and print columns down,\n\
|
||
+ unless -a is used. Balance number of lines in the\n\
|
||
+ columns on each page.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -a, --across print columns across rather than down, used together\n\
|
||
+ with -COLUMN\n\
|
||
+ -c, --show-control-chars\n\
|
||
+ use hat notation (^G) and octal backslash notation\n\
|
||
+ -d, --double-space\n\
|
||
+ double space the output\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -D, --date-format=FORMAT\n\
|
||
+ use FORMAT for the header date\n\
|
||
+ -e[CHAR[WIDTH]], --expand-tabs[=CHAR[WIDTH]]\n\
|
||
+ expand input CHARs (TABs) to tab WIDTH (8)\n\
|
||
+ -F, -f, --form-feed\n\
|
||
+ use form feeds instead of newlines to separate pages\n\
|
||
+ (by a 3-line page header with -F or a 5-line header\n\
|
||
+ and trailer without -F)\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -h, --header=HEADER\n\
|
||
+ use a centered HEADER instead of filename in page header,\n\
|
||
+ -h \"\" prints a blank line, don't use -h\"\"\n\
|
||
+ -i[CHAR[WIDTH]], --output-tabs[=CHAR[WIDTH]]\n\
|
||
+ replace spaces with CHARs (TABs) to tab WIDTH (8)\n\
|
||
+ -J, --join-lines merge full lines, turns off -W line truncation, no column\n\
|
||
+ alignment, --sep-string[=STRING] sets separators\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -l, --length=PAGE_LENGTH\n\
|
||
+ set the page length to PAGE_LENGTH (66) lines\n\
|
||
+ (default number of lines of text 56, and with -F 63)\n\
|
||
+ -m, --merge print all files in parallel, one in each column,\n\
|
||
+ truncate lines, but join lines of full length with -J\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -n[SEP[DIGITS]], --number-lines[=SEP[DIGITS]]\n\
|
||
+ number lines, use DIGITS (5) digits, then SEP (TAB),\n\
|
||
+ default counting starts with 1st line of input file\n\
|
||
+ -N, --first-line-number=NUMBER\n\
|
||
+ start counting with NUMBER at 1st line of first\n\
|
||
+ page printed (see +FIRST_PAGE)\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -o, --indent=MARGIN\n\
|
||
+ offset each line with MARGIN (zero) spaces, do not\n\
|
||
+ affect -w or -W, MARGIN will be added to PAGE_WIDTH\n\
|
||
+ -r, --no-file-warnings\n\
|
||
+ omit warning when a file cannot be opened\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -s[CHAR],--separator[=CHAR]\n\
|
||
+ separate columns by a single character, default for CHAR\n\
|
||
+ is the <TAB> character without -w and \'no char\' with -w\n\
|
||
+ -s[CHAR] turns off line truncation of all 3 column\n\
|
||
+ options (-COLUMN|-a -COLUMN|-m) except -w is set\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -SSTRING, --sep-string[=STRING]\n\
|
||
+ separate columns by STRING,\n\
|
||
+ without -S: Default separator <TAB> with -J and <space>\n\
|
||
+ otherwise (same as -S\" \"), no effect on column options\n\
|
||
+ -t, --omit-header omit page headers and trailers\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -T, --omit-pagination\n\
|
||
+ omit page headers and trailers, eliminate any pagination\n\
|
||
+ by form feeds set in input files\n\
|
||
+ -v, --show-nonprinting\n\
|
||
+ use octal backslash notation\n\
|
||
+ -w, --width=PAGE_WIDTH\n\
|
||
+ set page width to PAGE_WIDTH (72) characters for\n\
|
||
+ multiple text-column output only, -s[char] turns off (72)\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -W, --page-width=PAGE_WIDTH\n\
|
||
+ set page width to PAGE_WIDTH (72) characters always,\n\
|
||
+ truncate lines, except -J option is set, no interference\n\
|
||
+ with -S or -s\n\
|
||
+"), stdout);
|
||
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (_("\
|
||
+\n\
|
||
+-t is implied if PAGE_LENGTH <= 10. With no FILE, or when\n\
|
||
+FILE is -, read standard input.\n\
|
||
+"), stdout);
|
||
+ emit_ancillary_info ();
|
||
+ }
|
||
+ exit (status);
|
||
+}
|
||
diff -urNp coreutils-8.0-orig/src/sort.c coreutils-8.0/src/sort.c
|
||
--- coreutils-8.0-orig/src/sort.c 2009-09-29 15:27:54.000000000 +0200
|
||
+++ coreutils-8.0/src/sort.c 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -22,10 +22,19 @@
|
||
|
||
#include <config.h>
|
||
|
||
+#include <assert.h>
|
||
#include <getopt.h>
|
||
#include <sys/types.h>
|
||
#include <sys/wait.h>
|
||
#include <signal.h>
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
+/* Get isw* functions. */
|
||
+#if HAVE_WCTYPE_H
|
||
+# include <wctype.h>
|
||
+#endif
|
||
+
|
||
#include "system.h"
|
||
#include "argmatch.h"
|
||
#include "error.h"
|
||
@@ -122,14 +131,38 @@ static int decimal_point;
|
||
/* Thousands separator; if -1, then there isn't one. */
|
||
static int thousands_sep;
|
||
|
||
+static int force_general_numcompare = 0;
|
||
+
|
||
/* Nonzero if the corresponding locales are hard. */
|
||
static bool hard_LC_COLLATE;
|
||
-#if HAVE_NL_LANGINFO
|
||
+#if HAVE_LANGINFO_CODESET
|
||
static bool hard_LC_TIME;
|
||
#endif
|
||
|
||
#define NONZERO(x) ((x) != 0)
|
||
|
||
+/* get a multibyte character's byte length. */
|
||
+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
|
||
+ do \
|
||
+ { \
|
||
+ wchar_t wc; \
|
||
+ mbstate_t state_bak; \
|
||
+ \
|
||
+ state_bak = STATE; \
|
||
+ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
|
||
+ \
|
||
+ switch (MBLENGTH) \
|
||
+ { \
|
||
+ case (size_t)-1: \
|
||
+ case (size_t)-2: \
|
||
+ STATE = state_bak; \
|
||
+ /* Fall through. */ \
|
||
+ case 0: \
|
||
+ MBLENGTH = 1; \
|
||
+ } \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
/* The kind of blanks for '-b' to skip in various options. */
|
||
enum blanktype { bl_start, bl_end, bl_both };
|
||
|
||
@@ -268,13 +301,11 @@ static bool reverse;
|
||
they were read if all keys compare equal. */
|
||
static bool stable;
|
||
|
||
-/* If TAB has this value, blanks separate fields. */
|
||
-enum { TAB_DEFAULT = CHAR_MAX + 1 };
|
||
-
|
||
-/* Tab character separating fields. If TAB_DEFAULT, then fields are
|
||
+/* Tab character separating fields. If tab_length is 0, then fields are
|
||
separated by the empty string between a non-blank character and a blank
|
||
character. */
|
||
-static int tab = TAB_DEFAULT;
|
||
+static char tab[MB_LEN_MAX + 1];
|
||
+static size_t tab_length = 0;
|
||
|
||
/* Flag to remove consecutive duplicate lines from the output.
|
||
Only the last of a sequence of equal lines will be output. */
|
||
@@ -712,6 +743,44 @@ reap_some (void)
|
||
update_proc (pid);
|
||
}
|
||
|
||
+/* Function pointers. */
|
||
+static void
|
||
+(*inittables) (void);
|
||
+static char *
|
||
+(*begfield) (const struct line*, const struct keyfield *);
|
||
+static char *
|
||
+(*limfield) (const struct line*, const struct keyfield *);
|
||
+static int
|
||
+(*getmonth) (char const *, size_t);
|
||
+static int
|
||
+(*keycompare) (const struct line *, const struct line *);
|
||
+static int
|
||
+(*numcompare) (const char *, const char *);
|
||
+
|
||
+/* Test for white space multibyte character.
|
||
+ Set LENGTH the byte length of investigated multibyte character. */
|
||
+#if HAVE_MBRTOWC
|
||
+static int
|
||
+ismbblank (const char *str, size_t len, size_t *length)
|
||
+{
|
||
+ size_t mblength;
|
||
+ wchar_t wc;
|
||
+ mbstate_t state;
|
||
+
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+ mblength = mbrtowc (&wc, str, len, &state);
|
||
+
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ *length = 1;
|
||
+ return 0;
|
||
+ }
|
||
+
|
||
+ *length = (mblength < 1) ? 1 : mblength;
|
||
+ return iswblank (wc);
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Clean up any remaining temporary files. */
|
||
|
||
static void
|
||
@@ -1093,7 +1162,7 @@ zaptemp (const char *name)
|
||
free (node);
|
||
}
|
||
|
||
-#if HAVE_NL_LANGINFO
|
||
+#if HAVE_LANGINFO_CODESET
|
||
|
||
static int
|
||
struct_month_cmp (const void *m1, const void *m2)
|
||
@@ -1108,7 +1177,7 @@ struct_month_cmp (const void *m1, const
|
||
/* Initialize the character class tables. */
|
||
|
||
static void
|
||
-inittables (void)
|
||
+inittables_uni (void)
|
||
{
|
||
size_t i;
|
||
|
||
@@ -1120,7 +1189,7 @@ inittables (void)
|
||
fold_toupper[i] = toupper (i);
|
||
}
|
||
|
||
-#if HAVE_NL_LANGINFO
|
||
+#if HAVE_LANGINFO_CODESET
|
||
/* If we're not in the "C" locale, read different names for months. */
|
||
if (hard_LC_TIME)
|
||
{
|
||
@@ -1202,6 +1271,64 @@ specify_nmerge (int oi, char c, char con
|
||
xstrtol_fatal (e, oi, c, long_options, s);
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static void
|
||
+inittables_mb (void)
|
||
+{
|
||
+ int i, j, k, l;
|
||
+ char *name, *s;
|
||
+ size_t s_len, mblength;
|
||
+ char mbc[MB_LEN_MAX];
|
||
+ wchar_t wc, pwc;
|
||
+ mbstate_t state_mb, state_wc;
|
||
+
|
||
+ for (i = 0; i < MONTHS_PER_YEAR; i++)
|
||
+ {
|
||
+ s = (char *) nl_langinfo (ABMON_1 + i);
|
||
+ s_len = strlen (s);
|
||
+ monthtab[i].name = name = (char *) xmalloc (s_len + 1);
|
||
+ monthtab[i].val = i + 1;
|
||
+
|
||
+ memset (&state_mb, '\0', sizeof (mbstate_t));
|
||
+ memset (&state_wc, '\0', sizeof (mbstate_t));
|
||
+
|
||
+ for (j = 0; j < s_len;)
|
||
+ {
|
||
+ if (!ismbblank (s + j, s_len - j, &mblength))
|
||
+ break;
|
||
+ j += mblength;
|
||
+ }
|
||
+
|
||
+ for (k = 0; j < s_len;)
|
||
+ {
|
||
+ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
|
||
+ assert (mblength != (size_t)-1 && mblength != (size_t)-2);
|
||
+ if (mblength == 0)
|
||
+ break;
|
||
+
|
||
+ pwc = towupper (wc);
|
||
+ if (pwc == wc)
|
||
+ {
|
||
+ memcpy (mbc, s + j, mblength);
|
||
+ j += mblength;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ j += mblength;
|
||
+ mblength = wcrtomb (mbc, pwc, &state_wc);
|
||
+ assert (mblength != (size_t)0 && mblength != (size_t)-1);
|
||
+ }
|
||
+
|
||
+ for (l = 0; l < mblength; l++)
|
||
+ name[k++] = mbc[l];
|
||
+ }
|
||
+ name[k] = '\0';
|
||
+ }
|
||
+ qsort ((void *) monthtab, MONTHS_PER_YEAR,
|
||
+ sizeof (struct month), struct_month_cmp);
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Specify the amount of main memory to use when sorting. */
|
||
static void
|
||
specify_sort_size (int oi, char c, char const *s)
|
||
@@ -1412,7 +1539,7 @@ buffer_linelim (struct buffer const *buf
|
||
by KEY in LINE. */
|
||
|
||
static char *
|
||
-begfield (const struct line *line, const struct keyfield *key)
|
||
+begfield_uni (const struct line *line, const struct keyfield *key)
|
||
{
|
||
char *ptr = line->text, *lim = ptr + line->length - 1;
|
||
size_t sword = key->sword;
|
||
@@ -1421,10 +1548,10 @@ begfield (const struct line *line, const
|
||
/* The leading field separator itself is included in a field when -t
|
||
is absent. */
|
||
|
||
- if (tab != TAB_DEFAULT)
|
||
+ if (tab_length)
|
||
while (ptr < lim && sword--)
|
||
{
|
||
- while (ptr < lim && *ptr != tab)
|
||
+ while (ptr < lim && *ptr != tab[0])
|
||
++ptr;
|
||
if (ptr < lim)
|
||
++ptr;
|
||
@@ -1450,11 +1577,70 @@ begfield (const struct line *line, const
|
||
return ptr;
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static char *
|
||
+begfield_mb (const struct line *line, const struct keyfield *key)
|
||
+{
|
||
+ int i;
|
||
+ char *ptr = line->text, *lim = ptr + line->length - 1;
|
||
+ size_t sword = key->sword;
|
||
+ size_t schar = key->schar;
|
||
+ size_t mblength;
|
||
+ mbstate_t state;
|
||
+
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ if (tab_length)
|
||
+ while (ptr < lim && sword--)
|
||
+ {
|
||
+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ if (ptr < lim)
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ while (ptr < lim && sword--)
|
||
+ {
|
||
+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
|
||
+ ptr += mblength;
|
||
+ if (ptr < lim)
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
|
||
+ ptr += mblength;
|
||
+ }
|
||
+
|
||
+ if (key->skipsblanks)
|
||
+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
|
||
+ ptr += mblength;
|
||
+
|
||
+ for (i = 0; i < schar; i++)
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+
|
||
+ if (ptr + mblength > lim)
|
||
+ break;
|
||
+ else
|
||
+ ptr += mblength;
|
||
+ }
|
||
+
|
||
+ return ptr;
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Return the limit of (a pointer to the first character after) the field
|
||
in LINE specified by KEY. */
|
||
|
||
static char *
|
||
-limfield (const struct line *line, const struct keyfield *key)
|
||
+limfield_uni (const struct line *line, const struct keyfield *key)
|
||
{
|
||
char *ptr = line->text, *lim = ptr + line->length - 1;
|
||
size_t eword = key->eword, echar = key->echar;
|
||
@@ -1469,10 +1655,10 @@ limfield (const struct line *line, const
|
||
`beginning' is the first character following the delimiting TAB.
|
||
Otherwise, leave PTR pointing at the first `blank' character after
|
||
the preceding field. */
|
||
- if (tab != TAB_DEFAULT)
|
||
+ if (tab_length)
|
||
while (ptr < lim && eword--)
|
||
{
|
||
- while (ptr < lim && *ptr != tab)
|
||
+ while (ptr < lim && *ptr != tab[0])
|
||
++ptr;
|
||
if (ptr < lim && (eword || echar))
|
||
++ptr;
|
||
@@ -1518,10 +1704,10 @@ limfield (const struct line *line, const
|
||
*/
|
||
|
||
/* Make LIM point to the end of (one byte past) the current field. */
|
||
- if (tab != TAB_DEFAULT)
|
||
+ if (tab_length)
|
||
{
|
||
char *newlim;
|
||
- newlim = memchr (ptr, tab, lim - ptr);
|
||
+ newlim = memchr (ptr, tab[0], lim - ptr);
|
||
if (newlim)
|
||
lim = newlim;
|
||
}
|
||
@@ -1552,6 +1738,113 @@ limfield (const struct line *line, const
|
||
return ptr;
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static char *
|
||
+limfield_mb (const struct line *line, const struct keyfield *key)
|
||
+{
|
||
+ char *ptr = line->text, *lim = ptr + line->length - 1;
|
||
+ size_t eword = key->eword, echar = key->echar;
|
||
+ int i;
|
||
+ size_t mblength;
|
||
+ mbstate_t state;
|
||
+
|
||
+ if (echar == 0)
|
||
+ eword++; /* skip all of end field. */
|
||
+
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ if (tab_length)
|
||
+ while (ptr < lim && eword--)
|
||
+ {
|
||
+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ if (ptr < lim && (eword | echar))
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ while (ptr < lim && eword--)
|
||
+ {
|
||
+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
|
||
+ ptr += mblength;
|
||
+ if (ptr < lim)
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
|
||
+ ptr += mblength;
|
||
+ }
|
||
+
|
||
+
|
||
+# ifdef POSIX_UNSPECIFIED
|
||
+ /* Make LIM point to the end of (one byte past) the current field. */
|
||
+ if (tab_length)
|
||
+ {
|
||
+ char *newlim, *p;
|
||
+
|
||
+ newlim = NULL;
|
||
+ for (p = ptr; p < lim;)
|
||
+ {
|
||
+ if (memcmp (p, tab, tab_length) == 0)
|
||
+ {
|
||
+ newlim = p;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+ p += mblength;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ char *newlim;
|
||
+ newlim = ptr;
|
||
+
|
||
+ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
|
||
+ newlim += mblength;
|
||
+ if (ptr < lim)
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
|
||
+ newlim += mblength;
|
||
+ lim = newlim;
|
||
+ }
|
||
+# endif
|
||
+
|
||
+ if (echar != 0)
|
||
+ {
|
||
+ /* If we're skipping leading blanks, don't start counting characters
|
||
+ * until after skipping past any leading blanks. */
|
||
+ if (key->skipsblanks)
|
||
+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
|
||
+ ptr += mblength;
|
||
+
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */
|
||
+ for (i = 0; i < echar; i++)
|
||
+ {
|
||
+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
|
||
+
|
||
+ if (ptr + mblength > lim)
|
||
+ break;
|
||
+ else
|
||
+ ptr += mblength;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ return ptr;
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Fill BUF reading from FP, moving buf->left bytes from the end
|
||
of buf->buf to the beginning first. If EOF is reached and the
|
||
file wasn't terminated by a newline, supply one. Set up BUF's line
|
||
@@ -1634,8 +1927,24 @@ fillbuf (struct buffer *buf, FILE *fp, c
|
||
else
|
||
{
|
||
if (key->skipsblanks)
|
||
- while (blanks[to_uchar (*line_start)])
|
||
- line_start++;
|
||
+ {
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ size_t mblength;
|
||
+ mbstate_t state;
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+ while (line_start < line->keylim &&
|
||
+ ismbblank (line_start,
|
||
+ line->keylim - line_start,
|
||
+ &mblength))
|
||
+ line_start += mblength;
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
+ while (blanks[to_uchar (*line_start)])
|
||
+ line_start++;
|
||
+ }
|
||
line->keybeg = line_start;
|
||
}
|
||
}
|
||
@@ -1673,7 +1982,7 @@ fillbuf (struct buffer *buf, FILE *fp, c
|
||
hideously fast. */
|
||
|
||
static int
|
||
-numcompare (const char *a, const char *b)
|
||
+numcompare_uni (const char *a, const char *b)
|
||
{
|
||
while (blanks[to_uchar (*a)])
|
||
a++;
|
||
@@ -1782,6 +2091,25 @@ human_numcompare (const char *a, const c
|
||
: strnumcmp (a, b, decimal_point, thousands_sep));
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static int
|
||
+numcompare_mb (const char *a, const char *b)
|
||
+{
|
||
+ size_t mblength, len;
|
||
+ len = strlen (a); /* okay for UTF-8 */
|
||
+ while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
|
||
+ {
|
||
+ a += mblength;
|
||
+ len -= mblength;
|
||
+ }
|
||
+ len = strlen (b); /* okay for UTF-8 */
|
||
+ while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
|
||
+ b += mblength;
|
||
+
|
||
+ return strnumcmp (a, b, decimal_point, thousands_sep);
|
||
+}
|
||
+#endif /* HAV_EMBRTOWC */
|
||
+
|
||
static int
|
||
general_numcompare (const char *sa, const char *sb)
|
||
{
|
||
@@ -1815,7 +2143,7 @@ general_numcompare (const char *sa, cons
|
||
Return 0 if the name in S is not recognized. */
|
||
|
||
static int
|
||
-getmonth (char const *month, size_t len)
|
||
+getmonth_uni (char const *month, size_t len)
|
||
{
|
||
size_t lo = 0;
|
||
size_t hi = MONTHS_PER_YEAR;
|
||
@@ -1996,11 +2324,79 @@ compare_version (char *restrict texta, s
|
||
return diff;
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static int
|
||
+getmonth_mb (const char *s, size_t len)
|
||
+{
|
||
+ char *month;
|
||
+ register size_t i;
|
||
+ register int lo = 0, hi = MONTHS_PER_YEAR, result;
|
||
+ char *tmp;
|
||
+ size_t wclength, mblength;
|
||
+ const char **pp;
|
||
+ const wchar_t **wpp;
|
||
+ wchar_t *month_wcs;
|
||
+ mbstate_t state;
|
||
+
|
||
+ while (len > 0 && ismbblank (s, len, &mblength))
|
||
+ {
|
||
+ s += mblength;
|
||
+ len -= mblength;
|
||
+ }
|
||
+
|
||
+ if (len == 0)
|
||
+ return 0;
|
||
+
|
||
+ month = (char *) alloca (len + 1);
|
||
+
|
||
+ tmp = (char *) alloca (len + 1);
|
||
+ memcpy (tmp, s, len);
|
||
+ tmp[len] = '\0';
|
||
+ pp = (const char **)&tmp;
|
||
+ month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
|
||
+ memset (&state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
|
||
+ assert (wclength != (size_t)-1 && *pp == NULL);
|
||
+
|
||
+ for (i = 0; i < wclength; i++)
|
||
+ {
|
||
+ month_wcs[i] = towupper(month_wcs[i]);
|
||
+ if (iswblank (month_wcs[i]))
|
||
+ {
|
||
+ month_wcs[i] = L'\0';
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ wpp = (const wchar_t **)&month_wcs;
|
||
+
|
||
+ mblength = wcsrtombs (month, wpp, len + 1, &state);
|
||
+ assert (mblength != (-1) && *wpp == NULL);
|
||
+
|
||
+ do
|
||
+ {
|
||
+ int ix = (lo + hi) / 2;
|
||
+
|
||
+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
|
||
+ hi = ix;
|
||
+ else
|
||
+ lo = ix;
|
||
+ }
|
||
+ while (hi - lo > 1);
|
||
+
|
||
+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
|
||
+ ? monthtab[lo].val : 0);
|
||
+
|
||
+ return result;
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Compare two lines A and B trying every key in sequence until there
|
||
are no more keys or a difference is found. */
|
||
|
||
static int
|
||
-keycompare (const struct line *a, const struct line *b)
|
||
+keycompare_uni (const struct line *a, const struct line *b)
|
||
{
|
||
struct keyfield *key = keylist;
|
||
|
||
@@ -2180,6 +2576,179 @@ keycompare (const struct line *a, const
|
||
return key->reverse ? -diff : diff;
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+static int
|
||
+keycompare_mb (const struct line *a, const struct line *b)
|
||
+{
|
||
+ struct keyfield *key = keylist;
|
||
+
|
||
+ /* For the first iteration only, the key positions have been
|
||
+ precomputed for us. */
|
||
+ char *texta = a->keybeg;
|
||
+ char *textb = b->keybeg;
|
||
+ char *lima = a->keylim;
|
||
+ char *limb = b->keylim;
|
||
+
|
||
+ size_t mblength_a, mblength_b;
|
||
+ wchar_t wc_a, wc_b;
|
||
+ mbstate_t state_a, state_b;
|
||
+
|
||
+ int diff;
|
||
+
|
||
+ memset (&state_a, '\0', sizeof(mbstate_t));
|
||
+ memset (&state_b, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ char const *translate = key->translate;
|
||
+ bool const *ignore = key->ignore;
|
||
+
|
||
+ /* Find the lengths. */
|
||
+ size_t lena = lima <= texta ? 0 : lima - texta;
|
||
+ size_t lenb = limb <= textb ? 0 : limb - textb;
|
||
+
|
||
+ /* Actually compare the fields. */
|
||
+ if (key->random)
|
||
+ diff = compare_random (texta, lena, textb, lenb);
|
||
+ else if (key->numeric | key->general_numeric | key->human_numeric)
|
||
+ {
|
||
+ char savea = *lima, saveb = *limb;
|
||
+
|
||
+ *lima = *limb = '\0';
|
||
+ diff = (key->numeric ? numcompare (texta, textb)
|
||
+ : key->general_numeric ? general_numcompare (texta, textb)
|
||
+ : human_numcompare (texta, textb, key));
|
||
+ *lima = savea, *limb = saveb;
|
||
+ }
|
||
+ else if (key->version)
|
||
+ diff = compare_version (texta, lena, textb, lenb);
|
||
+ else if (key->month)
|
||
+ diff = getmonth (texta, lena) - getmonth (textb, lenb);
|
||
+ else
|
||
+ {
|
||
+ if (ignore || translate)
|
||
+ {
|
||
+ char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
|
||
+ char *copy_b = copy_a + lena + 1;
|
||
+ size_t new_len_a, new_len_b;
|
||
+ size_t i, j;
|
||
+
|
||
+ /* Ignore and/or translate chars before comparing. */
|
||
+# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
|
||
+ do \
|
||
+ { \
|
||
+ wchar_t uwc; \
|
||
+ char mbc[MB_LEN_MAX]; \
|
||
+ mbstate_t state_wc; \
|
||
+ \
|
||
+ for (NEW_LEN = i = 0; i < LEN;) \
|
||
+ { \
|
||
+ mbstate_t state_bak; \
|
||
+ \
|
||
+ state_bak = STATE; \
|
||
+ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
|
||
+ \
|
||
+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
|
||
+ || MBLENGTH == 0) \
|
||
+ { \
|
||
+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
|
||
+ STATE = state_bak; \
|
||
+ if (!ignore) \
|
||
+ COPY[NEW_LEN++] = TEXT[i++]; \
|
||
+ continue; \
|
||
+ } \
|
||
+ \
|
||
+ if (ignore) \
|
||
+ { \
|
||
+ if ((ignore == nonprinting && !iswprint (WC)) \
|
||
+ || (ignore == nondictionary \
|
||
+ && !iswalnum (WC) && !iswblank (WC))) \
|
||
+ { \
|
||
+ i += MBLENGTH; \
|
||
+ continue; \
|
||
+ } \
|
||
+ } \
|
||
+ \
|
||
+ if (translate) \
|
||
+ { \
|
||
+ \
|
||
+ uwc = towupper(WC); \
|
||
+ if (WC == uwc) \
|
||
+ { \
|
||
+ memcpy (mbc, TEXT + i, MBLENGTH); \
|
||
+ i += MBLENGTH; \
|
||
+ } \
|
||
+ else \
|
||
+ { \
|
||
+ i += MBLENGTH; \
|
||
+ WC = uwc; \
|
||
+ memset (&state_wc, '\0', sizeof (mbstate_t)); \
|
||
+ \
|
||
+ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
|
||
+ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
|
||
+ } \
|
||
+ \
|
||
+ for (j = 0; j < MBLENGTH; j++) \
|
||
+ COPY[NEW_LEN++] = mbc[j]; \
|
||
+ } \
|
||
+ else \
|
||
+ for (j = 0; j < MBLENGTH; j++) \
|
||
+ COPY[NEW_LEN++] = TEXT[i++]; \
|
||
+ } \
|
||
+ COPY[NEW_LEN] = '\0'; \
|
||
+ } \
|
||
+ while (0)
|
||
+ IGNORE_CHARS (new_len_a, lena, texta, copy_a,
|
||
+ wc_a, mblength_a, state_a);
|
||
+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
|
||
+ wc_b, mblength_b, state_b);
|
||
+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
|
||
+ }
|
||
+ else if (lena == 0)
|
||
+ diff = - NONZERO (lenb);
|
||
+ else if (lenb == 0)
|
||
+ goto greater;
|
||
+ else
|
||
+ diff = xmemcoll (texta, lena, textb, lenb);
|
||
+ }
|
||
+
|
||
+ if (diff)
|
||
+ goto not_equal;
|
||
+
|
||
+ key = key->next;
|
||
+ if (! key)
|
||
+ break;
|
||
+
|
||
+ /* Find the beginning and limit of the next field. */
|
||
+ if (key->eword != -1)
|
||
+ lima = limfield (a, key), limb = limfield (b, key);
|
||
+ else
|
||
+ lima = a->text + a->length - 1, limb = b->text + b->length - 1;
|
||
+
|
||
+ if (key->sword != -1)
|
||
+ texta = begfield (a, key), textb = begfield (b, key);
|
||
+ else
|
||
+ {
|
||
+ texta = a->text, textb = b->text;
|
||
+ if (key->skipsblanks)
|
||
+ {
|
||
+ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
|
||
+ texta += mblength_a;
|
||
+ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
|
||
+ textb += mblength_b;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ return 0;
|
||
+
|
||
+greater:
|
||
+ diff = 1;
|
||
+not_equal:
|
||
+ return key->reverse ? -diff : diff;
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Compare two lines A and B, returning negative, zero, or positive
|
||
depending on whether A compares less than, equal to, or greater than B. */
|
||
|
||
@@ -3178,7 +3747,7 @@ main (int argc, char **argv)
|
||
initialize_exit_failure (SORT_FAILURE);
|
||
|
||
hard_LC_COLLATE = hard_locale (LC_COLLATE);
|
||
-#if HAVE_NL_LANGINFO
|
||
+#if HAVE_LANGINFO_CODESET
|
||
hard_LC_TIME = hard_locale (LC_TIME);
|
||
#endif
|
||
|
||
@@ -3199,6 +3768,27 @@ main (int argc, char **argv)
|
||
thousands_sep = -1;
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ inittables = inittables_mb;
|
||
+ begfield = begfield_mb;
|
||
+ limfield = limfield_mb;
|
||
+ getmonth = getmonth_mb;
|
||
+ keycompare = keycompare_mb;
|
||
+ numcompare = numcompare_mb;
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
+ {
|
||
+ inittables = inittables_uni;
|
||
+ begfield = begfield_uni;
|
||
+ limfield = limfield_uni;
|
||
+ getmonth = getmonth_uni;
|
||
+ keycompare = keycompare_uni;
|
||
+ numcompare = numcompare_uni;
|
||
+ }
|
||
+
|
||
have_read_stdin = false;
|
||
inittables ();
|
||
|
||
@@ -3459,13 +4049,35 @@ main (int argc, char **argv)
|
||
|
||
case 't':
|
||
{
|
||
- char newtab = optarg[0];
|
||
- if (! newtab)
|
||
+ char newtab[MB_LEN_MAX + 1];
|
||
+ size_t newtab_length = 1;
|
||
+ strncpy (newtab, optarg, MB_LEN_MAX);
|
||
+ if (! newtab[0])
|
||
error (SORT_FAILURE, 0, _("empty tab"));
|
||
- if (optarg[1])
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ wchar_t wc;
|
||
+ mbstate_t state;
|
||
+ size_t i;
|
||
+
|
||
+ memset (&state, '\0', sizeof (mbstate_t));
|
||
+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
|
||
+ MB_LEN_MAX),
|
||
+ &state);
|
||
+ switch (newtab_length)
|
||
+ {
|
||
+ case (size_t) -1:
|
||
+ case (size_t) -2:
|
||
+ case 0:
|
||
+ newtab_length = 1;
|
||
+ }
|
||
+ }
|
||
+#endif
|
||
+ if (newtab_length == 1 && optarg[1])
|
||
{
|
||
if (STREQ (optarg, "\\0"))
|
||
- newtab = '\0';
|
||
+ newtab[0] = '\0';
|
||
else
|
||
{
|
||
/* Provoke with `sort -txx'. Complain about
|
||
@@ -3476,9 +4088,12 @@ main (int argc, char **argv)
|
||
quote (optarg));
|
||
}
|
||
}
|
||
- if (tab != TAB_DEFAULT && tab != newtab)
|
||
+ if (tab_length
|
||
+ && (tab_length != newtab_length
|
||
+ || memcmp (tab, newtab, tab_length) != 0))
|
||
error (SORT_FAILURE, 0, _("incompatible tabs"));
|
||
- tab = newtab;
|
||
+ memcpy (tab, newtab, newtab_length);
|
||
+ tab_length = newtab_length;
|
||
}
|
||
break;
|
||
|
||
diff -urNp coreutils-8.0-orig/src/sort.c.orig coreutils-8.0/src/sort.c.orig
|
||
--- coreutils-8.0-orig/src/sort.c.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/src/sort.c.orig 2009-09-29 15:27:54.000000000 +0200
|
||
@@ -0,0 +1,3697 @@
|
||
+/* sort - sort lines of text (with all kinds of options).
|
||
+ Copyright (C) 1988, 1991-2009 Free Software Foundation, Inc.
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation, either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
+
|
||
+ Written December 1988 by Mike Haertel.
|
||
+ The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
|
||
+ or (US mail) as Mike Haertel c/o Free Software Foundation.
|
||
+
|
||
+ Ørn E. Hansen added NLS support in 1997. */
|
||
+
|
||
+#include <config.h>
|
||
+
|
||
+#include <getopt.h>
|
||
+#include <sys/types.h>
|
||
+#include <sys/wait.h>
|
||
+#include <signal.h>
|
||
+#include "system.h"
|
||
+#include "argmatch.h"
|
||
+#include "error.h"
|
||
+#include "filevercmp.h"
|
||
+#include "hard-locale.h"
|
||
+#include "hash.h"
|
||
+#include "md5.h"
|
||
+#include "physmem.h"
|
||
+#include "posixver.h"
|
||
+#include "quote.h"
|
||
+#include "quotearg.h"
|
||
+#include "randread.h"
|
||
+#include "readtokens0.h"
|
||
+#include "stdio--.h"
|
||
+#include "stdlib--.h"
|
||
+#include "strnumcmp.h"
|
||
+#include "xmemcoll.h"
|
||
+#include "xmemxfrm.h"
|
||
+#include "xstrtol.h"
|
||
+
|
||
+#if HAVE_SYS_RESOURCE_H
|
||
+# include <sys/resource.h>
|
||
+#endif
|
||
+#ifndef RLIMIT_DATA
|
||
+struct rlimit { size_t rlim_cur; };
|
||
+# define getrlimit(Resource, Rlp) (-1)
|
||
+#endif
|
||
+
|
||
+/* The official name of this program (e.g., no `g' prefix). */
|
||
+#define PROGRAM_NAME "sort"
|
||
+
|
||
+#define AUTHORS \
|
||
+ proper_name ("Mike Haertel"), \
|
||
+ proper_name ("Paul Eggert")
|
||
+
|
||
+#if HAVE_LANGINFO_CODESET
|
||
+# include <langinfo.h>
|
||
+#endif
|
||
+
|
||
+/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
|
||
+ present. */
|
||
+#ifndef SA_NOCLDSTOP
|
||
+# define SA_NOCLDSTOP 0
|
||
+/* No sigprocmask. Always 'return' zero. */
|
||
+# define sigprocmask(How, Set, Oset) (0)
|
||
+# define sigset_t int
|
||
+# if ! HAVE_SIGINTERRUPT
|
||
+# define siginterrupt(sig, flag) /* empty */
|
||
+# endif
|
||
+#endif
|
||
+
|
||
+#if !defined OPEN_MAX && defined NR_OPEN
|
||
+# define OPEN_MAX NR_OPEN
|
||
+#endif
|
||
+#if !defined OPEN_MAX
|
||
+# define OPEN_MAX 20
|
||
+#endif
|
||
+
|
||
+#define UCHAR_LIM (UCHAR_MAX + 1)
|
||
+
|
||
+#ifndef DEFAULT_TMPDIR
|
||
+# define DEFAULT_TMPDIR "/tmp"
|
||
+#endif
|
||
+
|
||
+/* Exit statuses. */
|
||
+enum
|
||
+ {
|
||
+ /* POSIX says to exit with status 1 if invoked with -c and the
|
||
+ input is not properly sorted. */
|
||
+ SORT_OUT_OF_ORDER = 1,
|
||
+
|
||
+ /* POSIX says any other irregular exit must exit with a status
|
||
+ code greater than 1. */
|
||
+ SORT_FAILURE = 2
|
||
+ };
|
||
+
|
||
+enum
|
||
+ {
|
||
+ /* The number of times we should try to fork a compression process
|
||
+ (we retry if the fork call fails). We don't _need_ to compress
|
||
+ temp files, this is just to reduce disk access, so this number
|
||
+ can be small. */
|
||
+ MAX_FORK_TRIES_COMPRESS = 2,
|
||
+
|
||
+ /* The number of times we should try to fork a decompression process.
|
||
+ If we can't fork a decompression process, we can't sort, so this
|
||
+ number should be big. */
|
||
+ MAX_FORK_TRIES_DECOMPRESS = 8
|
||
+ };
|
||
+
|
||
+/* The representation of the decimal point in the current locale. */
|
||
+static int decimal_point;
|
||
+
|
||
+/* Thousands separator; if -1, then there isn't one. */
|
||
+static int thousands_sep;
|
||
+
|
||
+/* Nonzero if the corresponding locales are hard. */
|
||
+static bool hard_LC_COLLATE;
|
||
+#if HAVE_NL_LANGINFO
|
||
+static bool hard_LC_TIME;
|
||
+#endif
|
||
+
|
||
+#define NONZERO(x) ((x) != 0)
|
||
+
|
||
+/* The kind of blanks for '-b' to skip in various options. */
|
||
+enum blanktype { bl_start, bl_end, bl_both };
|
||
+
|
||
+/* The character marking end of line. Default to \n. */
|
||
+static char eolchar = '\n';
|
||
+
|
||
+/* Lines are held in core as counted strings. */
|
||
+struct line
|
||
+{
|
||
+ char *text; /* Text of the line. */
|
||
+ size_t length; /* Length including final newline. */
|
||
+ char *keybeg; /* Start of first key. */
|
||
+ char *keylim; /* Limit of first key. */
|
||
+};
|
||
+
|
||
+/* Input buffers. */
|
||
+struct buffer
|
||
+{
|
||
+ char *buf; /* Dynamically allocated buffer,
|
||
+ partitioned into 3 regions:
|
||
+ - input data;
|
||
+ - unused area;
|
||
+ - an array of lines, in reverse order. */
|
||
+ size_t used; /* Number of bytes used for input data. */
|
||
+ size_t nlines; /* Number of lines in the line array. */
|
||
+ size_t alloc; /* Number of bytes allocated. */
|
||
+ size_t left; /* Number of bytes left from previous reads. */
|
||
+ size_t line_bytes; /* Number of bytes to reserve for each line. */
|
||
+ bool eof; /* An EOF has been read. */
|
||
+};
|
||
+
|
||
+struct keyfield
|
||
+{
|
||
+ size_t sword; /* Zero-origin 'word' to start at. */
|
||
+ size_t schar; /* Additional characters to skip. */
|
||
+ size_t eword; /* Zero-origin first word after field. */
|
||
+ size_t echar; /* Additional characters in field. */
|
||
+ bool const *ignore; /* Boolean array of characters to ignore. */
|
||
+ char const *translate; /* Translation applied to characters. */
|
||
+ bool skipsblanks; /* Skip leading blanks when finding start. */
|
||
+ bool skipeblanks; /* Skip leading blanks when finding end. */
|
||
+ bool numeric; /* Flag for numeric comparison. Handle
|
||
+ strings of digits with optional decimal
|
||
+ point, but no exponential notation. */
|
||
+ bool random; /* Sort by random hash of key. */
|
||
+ bool general_numeric; /* Flag for general, numeric comparison.
|
||
+ Handle numbers in exponential notation. */
|
||
+ bool human_numeric; /* Flag for sorting by human readable
|
||
+ units with either SI xor IEC prefixes. */
|
||
+ int si_present; /* Flag for checking for mixed SI and IEC. */
|
||
+ bool month; /* Flag for comparison by month name. */
|
||
+ bool reverse; /* Reverse the sense of comparison. */
|
||
+ bool version; /* sort by version number */
|
||
+ struct keyfield *next; /* Next keyfield to try. */
|
||
+};
|
||
+
|
||
+struct month
|
||
+{
|
||
+ char const *name;
|
||
+ int val;
|
||
+};
|
||
+
|
||
+/* FIXME: None of these tables work with multibyte character sets.
|
||
+ Also, there are many other bugs when handling multibyte characters.
|
||
+ One way to fix this is to rewrite `sort' to use wide characters
|
||
+ internally, but doing this with good performance is a bit
|
||
+ tricky. */
|
||
+
|
||
+/* Table of blanks. */
|
||
+static bool blanks[UCHAR_LIM];
|
||
+
|
||
+/* Table of non-printing characters. */
|
||
+static bool nonprinting[UCHAR_LIM];
|
||
+
|
||
+/* Table of non-dictionary characters (not letters, digits, or blanks). */
|
||
+static bool nondictionary[UCHAR_LIM];
|
||
+
|
||
+/* Translation table folding lower case to upper. */
|
||
+static char fold_toupper[UCHAR_LIM];
|
||
+
|
||
+#define MONTHS_PER_YEAR 12
|
||
+
|
||
+/* Table mapping month names to integers.
|
||
+ Alphabetic order allows binary search. */
|
||
+static struct month monthtab[] =
|
||
+{
|
||
+ {"APR", 4},
|
||
+ {"AUG", 8},
|
||
+ {"DEC", 12},
|
||
+ {"FEB", 2},
|
||
+ {"JAN", 1},
|
||
+ {"JUL", 7},
|
||
+ {"JUN", 6},
|
||
+ {"MAR", 3},
|
||
+ {"MAY", 5},
|
||
+ {"NOV", 11},
|
||
+ {"OCT", 10},
|
||
+ {"SEP", 9}
|
||
+};
|
||
+
|
||
+/* During the merge phase, the number of files to merge at once. */
|
||
+#define NMERGE_DEFAULT 16
|
||
+
|
||
+/* Minimum size for a merge or check buffer. */
|
||
+#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
|
||
+
|
||
+/* Minimum sort size; the code might not work with smaller sizes. */
|
||
+#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
|
||
+
|
||
+/* The number of bytes needed for a merge or check buffer, which can
|
||
+ function relatively efficiently even if it holds only one line. If
|
||
+ a longer line is seen, this value is increased. */
|
||
+static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
|
||
+
|
||
+/* The approximate maximum number of bytes of main memory to use, as
|
||
+ specified by the user. Zero if the user has not specified a size. */
|
||
+static size_t sort_size;
|
||
+
|
||
+/* The guessed size for non-regular files. */
|
||
+#define INPUT_FILE_SIZE_GUESS (1024 * 1024)
|
||
+
|
||
+/* Array of directory names in which any temporary files are to be created. */
|
||
+static char const **temp_dirs;
|
||
+
|
||
+/* Number of temporary directory names used. */
|
||
+static size_t temp_dir_count;
|
||
+
|
||
+/* Number of allocated slots in temp_dirs. */
|
||
+static size_t temp_dir_alloc;
|
||
+
|
||
+/* Flag to reverse the order of all comparisons. */
|
||
+static bool reverse;
|
||
+
|
||
+/* Flag for stable sort. This turns off the last ditch bytewise
|
||
+ comparison of lines, and instead leaves lines in the same order
|
||
+ they were read if all keys compare equal. */
|
||
+static bool stable;
|
||
+
|
||
+/* If TAB has this value, blanks separate fields. */
|
||
+enum { TAB_DEFAULT = CHAR_MAX + 1 };
|
||
+
|
||
+/* Tab character separating fields. If TAB_DEFAULT, then fields are
|
||
+ separated by the empty string between a non-blank character and a blank
|
||
+ character. */
|
||
+static int tab = TAB_DEFAULT;
|
||
+
|
||
+/* Flag to remove consecutive duplicate lines from the output.
|
||
+ Only the last of a sequence of equal lines will be output. */
|
||
+static bool unique;
|
||
+
|
||
+/* Nonzero if any of the input files are the standard input. */
|
||
+static bool have_read_stdin;
|
||
+
|
||
+/* List of key field comparisons to be tried. */
|
||
+static struct keyfield *keylist;
|
||
+
|
||
+/* Program used to (de)compress temp files. Must accept -d. */
|
||
+static char const *compress_program;
|
||
+
|
||
+/* Maximum number of files to merge in one go. If more than this
|
||
+ number are present, temp files will be used. */
|
||
+static unsigned int nmerge = NMERGE_DEFAULT;
|
||
+
|
||
+static void sortlines_temp (struct line *, size_t, struct line *);
|
||
+
|
||
+/* Report MESSAGE for FILE, then clean up and exit.
|
||
+ If FILE is null, it represents standard output. */
|
||
+
|
||
+static void die (char const *, char const *) ATTRIBUTE_NORETURN;
|
||
+static void
|
||
+die (char const *message, char const *file)
|
||
+{
|
||
+ error (0, errno, "%s: %s", message, file ? file : _("standard output"));
|
||
+ exit (SORT_FAILURE);
|
||
+}
|
||
+
|
||
+void
|
||
+usage (int status)
|
||
+{
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
|
||
+ program_name);
|
||
+ else
|
||
+ {
|
||
+ printf (_("\
|
||
+Usage: %s [OPTION]... [FILE]...\n\
|
||
+ or: %s [OPTION]... --files0-from=F\n\
|
||
+"),
|
||
+ program_name, program_name);
|
||
+ fputs (_("\
|
||
+Write sorted concatenation of all FILE(s) to standard output.\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Mandatory arguments to long options are mandatory for short options too.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Ordering options:\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -b, --ignore-leading-blanks ignore leading blanks\n\
|
||
+ -d, --dictionary-order consider only blanks and alphanumeric characters\n\
|
||
+ -f, --ignore-case fold lower case to upper case characters\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -g, --general-numeric-sort compare according to general numerical value\n\
|
||
+ -i, --ignore-nonprinting consider only printable characters\n\
|
||
+ -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -n, --numeric-sort compare according to string numerical value\n\
|
||
+ -R, --random-sort sort by random hash of keys\n\
|
||
+ --random-source=FILE get random bytes from FILE\n\
|
||
+ -r, --reverse reverse the result of comparisons\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ --sort=WORD sort according to WORD:\n\
|
||
+ general-numeric -g, human-numeric -h, month -M,\n\
|
||
+ numeric -n, random -R, version -V\n\
|
||
+ -V, --version-sort natural sort of (version) numbers within text\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Other options:\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ --batch-size=NMERGE merge at most NMERGE inputs at once;\n\
|
||
+ for more use temp files\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -c, --check, --check=diagnose-first check for sorted input; do not sort\n\
|
||
+ -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\
|
||
+ --compress-program=PROG compress temporaries with PROG;\n\
|
||
+ decompress them with PROG -d\n\
|
||
+ --files0-from=F read input from the files specified by\n\
|
||
+ NUL-terminated names in file F;\n\
|
||
+ If F is - then read names from standard input\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\
|
||
+ (default end of line)\n\
|
||
+ -m, --merge merge already sorted files; do not sort\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -o, --output=FILE write result to FILE instead of standard output\n\
|
||
+ -s, --stable stabilize sort by disabling last-resort comparison\n\
|
||
+ -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
|
||
+"), stdout);
|
||
+ printf (_("\
|
||
+ -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
|
||
+ -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
|
||
+ multiple options specify multiple directories\n\
|
||
+ -u, --unique with -c, check for strict ordering;\n\
|
||
+ without -c, output only the first of an equal run\n\
|
||
+"), DEFAULT_TMPDIR);
|
||
+ fputs (_("\
|
||
+ -z, --zero-terminated end lines with 0 byte, not newline\n\
|
||
+"), stdout);
|
||
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (_("\
|
||
+\n\
|
||
+POS is F[.C][OPTS], where F is the field number and C the character position\n\
|
||
+in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\
|
||
+in a field are counted from the beginning of the preceding whitespace. OPTS is\n\
|
||
+one or more single-letter ordering options, which override global ordering\n\
|
||
+options for that key. If no key is given, use the entire line as the key.\n\
|
||
+\n\
|
||
+SIZE may be followed by the following multiplicative suffixes:\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
|
||
+\n\
|
||
+With no FILE, or when FILE is -, read standard input.\n\
|
||
+\n\
|
||
+*** WARNING ***\n\
|
||
+The locale specified by the environment affects sort order.\n\
|
||
+Set LC_ALL=C to get the traditional sort order that uses\n\
|
||
+native byte values.\n\
|
||
+"), stdout );
|
||
+ emit_ancillary_info ();
|
||
+ }
|
||
+
|
||
+ exit (status);
|
||
+}
|
||
+
|
||
+/* For long options that have no equivalent short option, use a
|
||
+ non-character as a pseudo short option, starting with CHAR_MAX + 1. */
|
||
+enum
|
||
+{
|
||
+ CHECK_OPTION = CHAR_MAX + 1,
|
||
+ COMPRESS_PROGRAM_OPTION,
|
||
+ FILES0_FROM_OPTION,
|
||
+ NMERGE_OPTION,
|
||
+ RANDOM_SOURCE_OPTION,
|
||
+ SORT_OPTION
|
||
+};
|
||
+
|
||
+static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
|
||
+
|
||
+static struct option const long_options[] =
|
||
+{
|
||
+ {"ignore-leading-blanks", no_argument, NULL, 'b'},
|
||
+ {"check", optional_argument, NULL, CHECK_OPTION},
|
||
+ {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
|
||
+ {"dictionary-order", no_argument, NULL, 'd'},
|
||
+ {"ignore-case", no_argument, NULL, 'f'},
|
||
+ {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
|
||
+ {"general-numeric-sort", no_argument, NULL, 'g'},
|
||
+ {"ignore-nonprinting", no_argument, NULL, 'i'},
|
||
+ {"key", required_argument, NULL, 'k'},
|
||
+ {"merge", no_argument, NULL, 'm'},
|
||
+ {"month-sort", no_argument, NULL, 'M'},
|
||
+ {"numeric-sort", no_argument, NULL, 'n'},
|
||
+ {"human-numeric-sort", no_argument, NULL, 'h'},
|
||
+ {"version-sort", no_argument, NULL, 'V'},
|
||
+ {"random-sort", no_argument, NULL, 'R'},
|
||
+ {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
|
||
+ {"sort", required_argument, NULL, SORT_OPTION},
|
||
+ {"output", required_argument, NULL, 'o'},
|
||
+ {"reverse", no_argument, NULL, 'r'},
|
||
+ {"stable", no_argument, NULL, 's'},
|
||
+ {"batch-size", required_argument, NULL, NMERGE_OPTION},
|
||
+ {"buffer-size", required_argument, NULL, 'S'},
|
||
+ {"field-separator", required_argument, NULL, 't'},
|
||
+ {"temporary-directory", required_argument, NULL, 'T'},
|
||
+ {"unique", no_argument, NULL, 'u'},
|
||
+ {"zero-terminated", no_argument, NULL, 'z'},
|
||
+ {GETOPT_HELP_OPTION_DECL},
|
||
+ {GETOPT_VERSION_OPTION_DECL},
|
||
+ {NULL, 0, NULL, 0},
|
||
+};
|
||
+
|
||
+#define CHECK_TABLE \
|
||
+ _ct_("quiet", 'C') \
|
||
+ _ct_("silent", 'C') \
|
||
+ _ct_("diagnose-first", 'c')
|
||
+
|
||
+static char const *const check_args[] =
|
||
+{
|
||
+#define _ct_(_s, _c) _s,
|
||
+ CHECK_TABLE NULL
|
||
+#undef _ct_
|
||
+};
|
||
+static char const check_types[] =
|
||
+{
|
||
+#define _ct_(_s, _c) _c,
|
||
+ CHECK_TABLE
|
||
+#undef _ct_
|
||
+};
|
||
+
|
||
+#define SORT_TABLE \
|
||
+ _st_("general-numeric", 'g') \
|
||
+ _st_("human-numeric", 'h') \
|
||
+ _st_("month", 'M') \
|
||
+ _st_("numeric", 'n') \
|
||
+ _st_("random", 'R') \
|
||
+ _st_("version", 'V')
|
||
+
|
||
+static char const *const sort_args[] =
|
||
+{
|
||
+#define _st_(_s, _c) _s,
|
||
+ SORT_TABLE NULL
|
||
+#undef _st_
|
||
+};
|
||
+static char const sort_types[] =
|
||
+{
|
||
+#define _st_(_s, _c) _c,
|
||
+ SORT_TABLE
|
||
+#undef _st_
|
||
+};
|
||
+
|
||
+/* The set of signals that are caught. */
|
||
+static sigset_t caught_signals;
|
||
+
|
||
+/* Critical section status. */
|
||
+struct cs_status
|
||
+{
|
||
+ bool valid;
|
||
+ sigset_t sigs;
|
||
+};
|
||
+
|
||
+/* Enter a critical section. */
|
||
+static struct cs_status
|
||
+cs_enter (void)
|
||
+{
|
||
+ struct cs_status status;
|
||
+ status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
|
||
+ return status;
|
||
+}
|
||
+
|
||
+/* Leave a critical section. */
|
||
+static void
|
||
+cs_leave (struct cs_status status)
|
||
+{
|
||
+ if (status.valid)
|
||
+ {
|
||
+ /* Ignore failure when restoring the signal mask. */
|
||
+ sigprocmask (SIG_SETMASK, &status.sigs, NULL);
|
||
+ }
|
||
+}
|
||
+
|
||
+/* The list of temporary files. */
|
||
+struct tempnode
|
||
+{
|
||
+ struct tempnode *volatile next;
|
||
+ pid_t pid; /* If compressed, the pid of compressor, else zero */
|
||
+ char name[1]; /* Actual size is 1 + file name length. */
|
||
+};
|
||
+static struct tempnode *volatile temphead;
|
||
+static struct tempnode *volatile *temptail = &temphead;
|
||
+
|
||
+struct sortfile
|
||
+{
|
||
+ char const *name;
|
||
+ pid_t pid; /* If compressed, the pid of compressor, else zero */
|
||
+};
|
||
+
|
||
+/* A table where we store compression process states. We clean up all
|
||
+ processes in a timely manner so as not to exhaust system resources,
|
||
+ so we store the info on whether the process is still running, or has
|
||
+ been reaped here. */
|
||
+static Hash_table *proctab;
|
||
+
|
||
+enum { INIT_PROCTAB_SIZE = 47 };
|
||
+
|
||
+enum procstate { ALIVE, ZOMBIE };
|
||
+
|
||
+/* A proctab entry. The COUNT field is there in case we fork a new
|
||
+ compression process that has the same PID as an old zombie process
|
||
+ that is still in the table (because the process to decompress the
|
||
+ temp file it was associated with hasn't started yet). */
|
||
+struct procnode
|
||
+{
|
||
+ pid_t pid;
|
||
+ enum procstate state;
|
||
+ size_t count;
|
||
+};
|
||
+
|
||
+static size_t
|
||
+proctab_hasher (const void *entry, size_t tabsize)
|
||
+{
|
||
+ const struct procnode *node = entry;
|
||
+ return node->pid % tabsize;
|
||
+}
|
||
+
|
||
+static bool
|
||
+proctab_comparator (const void *e1, const void *e2)
|
||
+{
|
||
+ const struct procnode *n1 = e1, *n2 = e2;
|
||
+ return n1->pid == n2->pid;
|
||
+}
|
||
+
|
||
+/* The total number of forked processes (compressors and decompressors)
|
||
+ that have not been reaped yet. */
|
||
+static size_t nprocs;
|
||
+
|
||
+/* The number of child processes we'll allow before we try to reap some. */
|
||
+enum { MAX_PROCS_BEFORE_REAP = 2 };
|
||
+
|
||
+/* If 0 < PID, wait for the child process with that PID to exit.
|
||
+ If PID is -1, clean up a random child process which has finished and
|
||
+ return the process ID of that child. If PID is -1 and no processes
|
||
+ have quit yet, return 0 without waiting. */
|
||
+
|
||
+static pid_t
|
||
+reap (pid_t pid)
|
||
+{
|
||
+ int status;
|
||
+ pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
|
||
+
|
||
+ if (cpid < 0)
|
||
+ error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
|
||
+ compress_program);
|
||
+ else if (0 < cpid)
|
||
+ {
|
||
+ if (! WIFEXITED (status) || WEXITSTATUS (status))
|
||
+ error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
|
||
+ compress_program);
|
||
+ --nprocs;
|
||
+ }
|
||
+
|
||
+ return cpid;
|
||
+}
|
||
+
|
||
+/* Add the PID of a running compression process to proctab, or update
|
||
+ the entry COUNT and STATE fields if it's already there. This also
|
||
+ creates the table for us the first time it's called. */
|
||
+
|
||
+static void
|
||
+register_proc (pid_t pid)
|
||
+{
|
||
+ struct procnode test, *node;
|
||
+
|
||
+ if (! proctab)
|
||
+ {
|
||
+ proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
|
||
+ proctab_hasher,
|
||
+ proctab_comparator,
|
||
+ free);
|
||
+ if (! proctab)
|
||
+ xalloc_die ();
|
||
+ }
|
||
+
|
||
+ test.pid = pid;
|
||
+ node = hash_lookup (proctab, &test);
|
||
+ if (node)
|
||
+ {
|
||
+ node->state = ALIVE;
|
||
+ ++node->count;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ node = xmalloc (sizeof *node);
|
||
+ node->pid = pid;
|
||
+ node->state = ALIVE;
|
||
+ node->count = 1;
|
||
+ if (hash_insert (proctab, node) == NULL)
|
||
+ xalloc_die ();
|
||
+ }
|
||
+}
|
||
+
|
||
+/* This is called when we reap a random process. We don't know
|
||
+ whether we have reaped a compression process or a decompression
|
||
+ process until we look in the table. If there's an ALIVE entry for
|
||
+ it, then we have reaped a compression process, so change the state
|
||
+ to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */
|
||
+
|
||
+static void
|
||
+update_proc (pid_t pid)
|
||
+{
|
||
+ struct procnode test, *node;
|
||
+
|
||
+ test.pid = pid;
|
||
+ node = hash_lookup (proctab, &test);
|
||
+ if (node)
|
||
+ node->state = ZOMBIE;
|
||
+}
|
||
+
|
||
+/* This is for when we need to wait for a compression process to exit.
|
||
+ If it has a ZOMBIE entry in the table then it's already dead and has
|
||
+ been reaped. Note that if there's an ALIVE entry for it, it still may
|
||
+ already have died and been reaped if a second process was created with
|
||
+ the same PID. This is probably exceedingly rare, but to be on the safe
|
||
+ side we will have to wait for any compression process with this PID. */
|
||
+
|
||
+static void
|
||
+wait_proc (pid_t pid)
|
||
+{
|
||
+ struct procnode test, *node;
|
||
+
|
||
+ test.pid = pid;
|
||
+ node = hash_lookup (proctab, &test);
|
||
+ if (node->state == ALIVE)
|
||
+ reap (pid);
|
||
+
|
||
+ node->state = ZOMBIE;
|
||
+ if (! --node->count)
|
||
+ {
|
||
+ hash_delete (proctab, node);
|
||
+ free (node);
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Keep reaping finished children as long as there are more to reap.
|
||
+ This doesn't block waiting for any of them, it only reaps those
|
||
+ that are already dead. */
|
||
+
|
||
+static void
|
||
+reap_some (void)
|
||
+{
|
||
+ pid_t pid;
|
||
+
|
||
+ while (0 < nprocs && (pid = reap (-1)))
|
||
+ update_proc (pid);
|
||
+}
|
||
+
|
||
+/* Clean up any remaining temporary files. */
|
||
+
|
||
+static void
|
||
+cleanup (void)
|
||
+{
|
||
+ struct tempnode const *node;
|
||
+
|
||
+ for (node = temphead; node; node = node->next)
|
||
+ unlink (node->name);
|
||
+ temphead = NULL;
|
||
+}
|
||
+
|
||
+/* Cleanup actions to take when exiting. */
|
||
+
|
||
+static void
|
||
+exit_cleanup (void)
|
||
+{
|
||
+ if (temphead)
|
||
+ {
|
||
+ /* Clean up any remaining temporary files in a critical section so
|
||
+ that a signal handler does not try to clean them too. */
|
||
+ struct cs_status cs = cs_enter ();
|
||
+ cleanup ();
|
||
+ cs_leave (cs);
|
||
+ }
|
||
+
|
||
+ close_stdout ();
|
||
+}
|
||
+
|
||
+/* Create a new temporary file, returning its newly allocated tempnode.
|
||
+ Store into *PFD the file descriptor open for writing.
|
||
+ If the creation fails, return NULL and store -1 into *PFD if the
|
||
+ failure is due to file descriptor exhaustion and
|
||
+ SURVIVE_FD_EXHAUSTION; otherwise, die. */
|
||
+
|
||
+static struct tempnode *
|
||
+create_temp_file (int *pfd, bool survive_fd_exhaustion)
|
||
+{
|
||
+ static char const slashbase[] = "/sortXXXXXX";
|
||
+ static size_t temp_dir_index;
|
||
+ int fd;
|
||
+ int saved_errno;
|
||
+ char const *temp_dir = temp_dirs[temp_dir_index];
|
||
+ size_t len = strlen (temp_dir);
|
||
+ struct tempnode *node =
|
||
+ xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
|
||
+ char *file = node->name;
|
||
+ struct cs_status cs;
|
||
+
|
||
+ memcpy (file, temp_dir, len);
|
||
+ memcpy (file + len, slashbase, sizeof slashbase);
|
||
+ node->next = NULL;
|
||
+ node->pid = 0;
|
||
+ if (++temp_dir_index == temp_dir_count)
|
||
+ temp_dir_index = 0;
|
||
+
|
||
+ /* Create the temporary file in a critical section, to avoid races. */
|
||
+ cs = cs_enter ();
|
||
+ fd = mkstemp (file);
|
||
+ if (0 <= fd)
|
||
+ {
|
||
+ *temptail = node;
|
||
+ temptail = &node->next;
|
||
+ }
|
||
+ saved_errno = errno;
|
||
+ cs_leave (cs);
|
||
+ errno = saved_errno;
|
||
+
|
||
+ if (fd < 0)
|
||
+ {
|
||
+ if (! (survive_fd_exhaustion && errno == EMFILE))
|
||
+ error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
|
||
+ quote (temp_dir));
|
||
+ free (node);
|
||
+ node = NULL;
|
||
+ }
|
||
+
|
||
+ *pfd = fd;
|
||
+ return node;
|
||
+}
|
||
+
|
||
+/* Return a stream for FILE, opened with mode HOW. A null FILE means
|
||
+ standard output; HOW should be "w". When opening for input, "-"
|
||
+ means standard input. To avoid confusion, do not return file
|
||
+ descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
|
||
+ opening an ordinary FILE. Return NULL if unsuccessful. */
|
||
+
|
||
+static FILE *
|
||
+stream_open (const char *file, const char *how)
|
||
+{
|
||
+ if (!file)
|
||
+ return stdout;
|
||
+ if (STREQ (file, "-") && *how == 'r')
|
||
+ {
|
||
+ have_read_stdin = true;
|
||
+ return stdin;
|
||
+ }
|
||
+ return fopen (file, how);
|
||
+}
|
||
+
|
||
+/* Same as stream_open, except always return a non-null value; die on
|
||
+ failure. */
|
||
+
|
||
+static FILE *
|
||
+xfopen (const char *file, const char *how)
|
||
+ {
|
||
+ FILE *fp = stream_open (file, how);
|
||
+ if (!fp)
|
||
+ die (_("open failed"), file);
|
||
+ return fp;
|
||
+}
|
||
+
|
||
+/* Close FP, whose name is FILE, and report any errors. */
|
||
+
|
||
+static void
|
||
+xfclose (FILE *fp, char const *file)
|
||
+{
|
||
+ switch (fileno (fp))
|
||
+ {
|
||
+ case STDIN_FILENO:
|
||
+ /* Allow reading stdin from tty more than once. */
|
||
+ if (feof (fp))
|
||
+ clearerr (fp);
|
||
+ break;
|
||
+
|
||
+ case STDOUT_FILENO:
|
||
+ /* Don't close stdout just yet. close_stdout does that. */
|
||
+ if (fflush (fp) != 0)
|
||
+ die (_("fflush failed"), file);
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ if (fclose (fp) != 0)
|
||
+ die (_("close failed"), file);
|
||
+ break;
|
||
+ }
|
||
+}
|
||
+
|
||
+static void
|
||
+dup2_or_die (int oldfd, int newfd)
|
||
+{
|
||
+ if (dup2 (oldfd, newfd) < 0)
|
||
+ error (SORT_FAILURE, errno, _("dup2 failed"));
|
||
+}
|
||
+
|
||
+/* Fork a child process for piping to and do common cleanup. The
|
||
+ TRIES parameter tells us how many times to try to fork before
|
||
+ giving up. Return the PID of the child, or -1 (setting errno)
|
||
+ on failure. */
|
||
+
|
||
+static pid_t
|
||
+pipe_fork (int pipefds[2], size_t tries)
|
||
+{
|
||
+#if HAVE_WORKING_FORK
|
||
+ struct tempnode *saved_temphead;
|
||
+ int saved_errno;
|
||
+ unsigned int wait_retry = 1;
|
||
+ pid_t pid IF_LINT (= -1);
|
||
+ struct cs_status cs;
|
||
+
|
||
+ if (pipe (pipefds) < 0)
|
||
+ return -1;
|
||
+
|
||
+ while (tries--)
|
||
+ {
|
||
+ /* This is so the child process won't delete our temp files
|
||
+ if it receives a signal before exec-ing. */
|
||
+ cs = cs_enter ();
|
||
+ saved_temphead = temphead;
|
||
+ temphead = NULL;
|
||
+
|
||
+ pid = fork ();
|
||
+ saved_errno = errno;
|
||
+ if (pid)
|
||
+ temphead = saved_temphead;
|
||
+
|
||
+ cs_leave (cs);
|
||
+ errno = saved_errno;
|
||
+
|
||
+ if (0 <= pid || errno != EAGAIN)
|
||
+ break;
|
||
+ else
|
||
+ {
|
||
+ sleep (wait_retry);
|
||
+ wait_retry *= 2;
|
||
+ reap_some ();
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (pid < 0)
|
||
+ {
|
||
+ saved_errno = errno;
|
||
+ close (pipefds[0]);
|
||
+ close (pipefds[1]);
|
||
+ errno = saved_errno;
|
||
+ }
|
||
+ else if (pid == 0)
|
||
+ {
|
||
+ close (STDIN_FILENO);
|
||
+ close (STDOUT_FILENO);
|
||
+ }
|
||
+ else
|
||
+ ++nprocs;
|
||
+
|
||
+ return pid;
|
||
+
|
||
+#else /* ! HAVE_WORKING_FORK */
|
||
+ return -1;
|
||
+#endif
|
||
+}
|
||
+
|
||
+/* Create a temporary file and start a compression program to filter output
|
||
+ to that file. Set *PFP to the file handle and if PPID is non-NULL,
|
||
+ set *PPID to the PID of the newly-created process. If the creation
|
||
+ fails, return NULL if the failure is due to file descriptor
|
||
+ exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */
|
||
+
|
||
+static char *
|
||
+maybe_create_temp (FILE **pfp, pid_t *ppid, bool survive_fd_exhaustion)
|
||
+{
|
||
+ int tempfd;
|
||
+ struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion);
|
||
+ char *name;
|
||
+
|
||
+ if (! node)
|
||
+ return NULL;
|
||
+
|
||
+ name = node->name;
|
||
+
|
||
+ if (compress_program)
|
||
+ {
|
||
+ int pipefds[2];
|
||
+
|
||
+ node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
|
||
+ if (0 < node->pid)
|
||
+ {
|
||
+ close (tempfd);
|
||
+ close (pipefds[0]);
|
||
+ tempfd = pipefds[1];
|
||
+
|
||
+ register_proc (node->pid);
|
||
+ }
|
||
+ else if (node->pid == 0)
|
||
+ {
|
||
+ close (pipefds[1]);
|
||
+ dup2_or_die (tempfd, STDOUT_FILENO);
|
||
+ close (tempfd);
|
||
+ dup2_or_die (pipefds[0], STDIN_FILENO);
|
||
+ close (pipefds[0]);
|
||
+
|
||
+ if (execlp (compress_program, compress_program, (char *) NULL) < 0)
|
||
+ error (SORT_FAILURE, errno, _("couldn't execute %s"),
|
||
+ compress_program);
|
||
+ }
|
||
+ else
|
||
+ node->pid = 0;
|
||
+ }
|
||
+
|
||
+ *pfp = fdopen (tempfd, "w");
|
||
+ if (! *pfp)
|
||
+ die (_("couldn't create temporary file"), name);
|
||
+
|
||
+ if (ppid)
|
||
+ *ppid = node->pid;
|
||
+
|
||
+ return name;
|
||
+}
|
||
+
|
||
+/* Create a temporary file and start a compression program to filter output
|
||
+ to that file. Set *PFP to the file handle and if *PPID is non-NULL,
|
||
+ set it to the PID of the newly-created process. Die on failure. */
|
||
+
|
||
+static char *
|
||
+create_temp (FILE **pfp, pid_t *ppid)
|
||
+{
|
||
+ return maybe_create_temp (pfp, ppid, false);
|
||
+}
|
||
+
|
||
+/* Open a compressed temp file and start a decompression process through
|
||
+ which to filter the input. PID must be the valid processes ID of the
|
||
+ process used to compress the file. Return NULL (setting errno to
|
||
+ EMFILE) if we ran out of file descriptors, and die on any other
|
||
+ kind of failure. */
|
||
+
|
||
+static FILE *
|
||
+open_temp (const char *name, pid_t pid)
|
||
+{
|
||
+ int tempfd, pipefds[2];
|
||
+ FILE *fp = NULL;
|
||
+
|
||
+ wait_proc (pid);
|
||
+
|
||
+ tempfd = open (name, O_RDONLY);
|
||
+ if (tempfd < 0)
|
||
+ return NULL;
|
||
+
|
||
+ switch (pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS))
|
||
+ {
|
||
+ case -1:
|
||
+ if (errno != EMFILE)
|
||
+ error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
|
||
+ compress_program);
|
||
+ close (tempfd);
|
||
+ errno = EMFILE;
|
||
+ break;
|
||
+
|
||
+ case 0:
|
||
+ close (pipefds[0]);
|
||
+ dup2_or_die (tempfd, STDIN_FILENO);
|
||
+ close (tempfd);
|
||
+ dup2_or_die (pipefds[1], STDOUT_FILENO);
|
||
+ close (pipefds[1]);
|
||
+
|
||
+ execlp (compress_program, compress_program, "-d", (char *) NULL);
|
||
+ error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
|
||
+ compress_program);
|
||
+
|
||
+ default:
|
||
+ close (tempfd);
|
||
+ close (pipefds[1]);
|
||
+
|
||
+ fp = fdopen (pipefds[0], "r");
|
||
+ if (! fp)
|
||
+ {
|
||
+ int saved_errno = errno;
|
||
+ close (pipefds[0]);
|
||
+ errno = saved_errno;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ return fp;
|
||
+}
|
||
+
|
||
+static void
|
||
+write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
|
||
+{
|
||
+ if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
|
||
+ die (_("write failed"), output_file);
|
||
+}
|
||
+
|
||
+/* Append DIR to the array of temporary directory names. */
|
||
+static void
|
||
+add_temp_dir (char const *dir)
|
||
+{
|
||
+ if (temp_dir_count == temp_dir_alloc)
|
||
+ temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
|
||
+
|
||
+ temp_dirs[temp_dir_count++] = dir;
|
||
+}
|
||
+
|
||
+/* Remove NAME from the list of temporary files. */
|
||
+
|
||
+static void
|
||
+zaptemp (const char *name)
|
||
+{
|
||
+ struct tempnode *volatile *pnode;
|
||
+ struct tempnode *node;
|
||
+ struct tempnode *next;
|
||
+ int unlink_status;
|
||
+ int unlink_errno = 0;
|
||
+ struct cs_status cs;
|
||
+
|
||
+ for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
|
||
+ continue;
|
||
+
|
||
+ /* Unlink the temporary file in a critical section to avoid races. */
|
||
+ next = node->next;
|
||
+ cs = cs_enter ();
|
||
+ unlink_status = unlink (name);
|
||
+ unlink_errno = errno;
|
||
+ *pnode = next;
|
||
+ cs_leave (cs);
|
||
+
|
||
+ if (unlink_status != 0)
|
||
+ error (0, unlink_errno, _("warning: cannot remove: %s"), name);
|
||
+ if (! next)
|
||
+ temptail = pnode;
|
||
+ free (node);
|
||
+}
|
||
+
|
||
+#if HAVE_NL_LANGINFO
|
||
+
|
||
+static int
|
||
+struct_month_cmp (const void *m1, const void *m2)
|
||
+{
|
||
+ struct month const *month1 = m1;
|
||
+ struct month const *month2 = m2;
|
||
+ return strcmp (month1->name, month2->name);
|
||
+}
|
||
+
|
||
+#endif
|
||
+
|
||
+/* Initialize the character class tables. */
|
||
+
|
||
+static void
|
||
+inittables (void)
|
||
+{
|
||
+ size_t i;
|
||
+
|
||
+ for (i = 0; i < UCHAR_LIM; ++i)
|
||
+ {
|
||
+ blanks[i] = !! isblank (i);
|
||
+ nonprinting[i] = ! isprint (i);
|
||
+ nondictionary[i] = ! isalnum (i) && ! isblank (i);
|
||
+ fold_toupper[i] = toupper (i);
|
||
+ }
|
||
+
|
||
+#if HAVE_NL_LANGINFO
|
||
+ /* If we're not in the "C" locale, read different names for months. */
|
||
+ if (hard_LC_TIME)
|
||
+ {
|
||
+ for (i = 0; i < MONTHS_PER_YEAR; i++)
|
||
+ {
|
||
+ char const *s;
|
||
+ size_t s_len;
|
||
+ size_t j;
|
||
+ char *name;
|
||
+
|
||
+ s = (char *) nl_langinfo (ABMON_1 + i);
|
||
+ s_len = strlen (s);
|
||
+ monthtab[i].name = name = xmalloc (s_len + 1);
|
||
+ monthtab[i].val = i + 1;
|
||
+
|
||
+ for (j = 0; j < s_len; j++)
|
||
+ name[j] = fold_toupper[to_uchar (s[j])];
|
||
+ name[j] = '\0';
|
||
+ }
|
||
+ qsort ((void *) monthtab, MONTHS_PER_YEAR,
|
||
+ sizeof *monthtab, struct_month_cmp);
|
||
+ }
|
||
+#endif
|
||
+}
|
||
+
|
||
+/* Specify how many inputs may be merged at once.
|
||
+ This may be set on the command-line with the
|
||
+ --batch-size option. */
|
||
+static void
|
||
+specify_nmerge (int oi, char c, char const *s)
|
||
+{
|
||
+ uintmax_t n;
|
||
+ struct rlimit rlimit;
|
||
+ enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
|
||
+
|
||
+ /* Try to find out how many file descriptors we'll be able
|
||
+ to open. We need at least nmerge + 3 (STDIN_FILENO,
|
||
+ STDOUT_FILENO and STDERR_FILENO). */
|
||
+ unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
|
||
+ ? rlimit.rlim_cur
|
||
+ : OPEN_MAX)
|
||
+ - 3);
|
||
+
|
||
+ if (e == LONGINT_OK)
|
||
+ {
|
||
+ nmerge = n;
|
||
+ if (nmerge != n)
|
||
+ e = LONGINT_OVERFLOW;
|
||
+ else
|
||
+ {
|
||
+ if (nmerge < 2)
|
||
+ {
|
||
+ error (0, 0, _("invalid --%s argument %s"),
|
||
+ long_options[oi].name, quote(s));
|
||
+ error (SORT_FAILURE, 0,
|
||
+ _("minimum --%s argument is %s"),
|
||
+ long_options[oi].name, quote("2"));
|
||
+ }
|
||
+ else if (max_nmerge < nmerge)
|
||
+ {
|
||
+ e = LONGINT_OVERFLOW;
|
||
+ }
|
||
+ else
|
||
+ return;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (e == LONGINT_OVERFLOW)
|
||
+ {
|
||
+ char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
|
||
+ error (0, 0, _("--%s argument %s too large"),
|
||
+ long_options[oi].name, quote(s));
|
||
+ error (SORT_FAILURE, 0,
|
||
+ _("maximum --%s argument with current rlimit is %s"),
|
||
+ long_options[oi].name,
|
||
+ uinttostr (max_nmerge, max_nmerge_buf));
|
||
+ }
|
||
+ else
|
||
+ xstrtol_fatal (e, oi, c, long_options, s);
|
||
+}
|
||
+
|
||
+/* Specify the amount of main memory to use when sorting. */
|
||
+static void
|
||
+specify_sort_size (int oi, char c, char const *s)
|
||
+{
|
||
+ uintmax_t n;
|
||
+ char *suffix;
|
||
+ enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
|
||
+
|
||
+ /* The default unit is KiB. */
|
||
+ if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
|
||
+ {
|
||
+ if (n <= UINTMAX_MAX / 1024)
|
||
+ n *= 1024;
|
||
+ else
|
||
+ e = LONGINT_OVERFLOW;
|
||
+ }
|
||
+
|
||
+ /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
|
||
+ if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
|
||
+ switch (suffix[0])
|
||
+ {
|
||
+ case 'b':
|
||
+ e = LONGINT_OK;
|
||
+ break;
|
||
+
|
||
+ case '%':
|
||
+ {
|
||
+ double mem = physmem_total () * n / 100;
|
||
+
|
||
+ /* Use "<", not "<=", to avoid problems with rounding. */
|
||
+ if (mem < UINTMAX_MAX)
|
||
+ {
|
||
+ n = mem;
|
||
+ e = LONGINT_OK;
|
||
+ }
|
||
+ else
|
||
+ e = LONGINT_OVERFLOW;
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ if (e == LONGINT_OK)
|
||
+ {
|
||
+ /* If multiple sort sizes are specified, take the maximum, so
|
||
+ that option order does not matter. */
|
||
+ if (n < sort_size)
|
||
+ return;
|
||
+
|
||
+ sort_size = n;
|
||
+ if (sort_size == n)
|
||
+ {
|
||
+ sort_size = MAX (sort_size, MIN_SORT_SIZE);
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ e = LONGINT_OVERFLOW;
|
||
+ }
|
||
+
|
||
+ xstrtol_fatal (e, oi, c, long_options, s);
|
||
+}
|
||
+
|
||
+/* Return the default sort size. */
|
||
+static size_t
|
||
+default_sort_size (void)
|
||
+{
|
||
+ /* Let MEM be available memory or 1/8 of total memory, whichever
|
||
+ is greater. */
|
||
+ double avail = physmem_available ();
|
||
+ double total = physmem_total ();
|
||
+ double mem = MAX (avail, total / 8);
|
||
+ struct rlimit rlimit;
|
||
+
|
||
+ /* Let SIZE be MEM, but no more than the maximum object size or
|
||
+ system resource limits. Avoid the MIN macro here, as it is not
|
||
+ quite right when only one argument is floating point. Don't
|
||
+ bother to check for values like RLIM_INFINITY since in practice
|
||
+ they are not much less than SIZE_MAX. */
|
||
+ size_t size = SIZE_MAX;
|
||
+ if (mem < size)
|
||
+ size = mem;
|
||
+ if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
|
||
+ size = rlimit.rlim_cur;
|
||
+#ifdef RLIMIT_AS
|
||
+ if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
|
||
+ size = rlimit.rlim_cur;
|
||
+#endif
|
||
+
|
||
+ /* Leave a large safety margin for the above limits, as failure can
|
||
+ occur when they are exceeded. */
|
||
+ size /= 2;
|
||
+
|
||
+#ifdef RLIMIT_RSS
|
||
+ /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
|
||
+ Exceeding RSS is not fatal, but can be quite slow. */
|
||
+ if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
|
||
+ size = rlimit.rlim_cur / 16 * 15;
|
||
+#endif
|
||
+
|
||
+ /* Use no less than the minimum. */
|
||
+ return MAX (size, MIN_SORT_SIZE);
|
||
+}
|
||
+
|
||
+/* Return the sort buffer size to use with the input files identified
|
||
+ by FPS and FILES, which are alternate names of the same files.
|
||
+ NFILES gives the number of input files; NFPS may be less. Assume
|
||
+ that each input line requires LINE_BYTES extra bytes' worth of line
|
||
+ information. Do not exceed the size bound specified by the user
|
||
+ (or a default size bound, if the user does not specify one). */
|
||
+
|
||
+static size_t
|
||
+sort_buffer_size (FILE *const *fps, size_t nfps,
|
||
+ char *const *files, size_t nfiles,
|
||
+ size_t line_bytes)
|
||
+{
|
||
+ /* A bound on the input size. If zero, the bound hasn't been
|
||
+ determined yet. */
|
||
+ static size_t size_bound;
|
||
+
|
||
+ /* In the worst case, each input byte is a newline. */
|
||
+ size_t worst_case_per_input_byte = line_bytes + 1;
|
||
+
|
||
+ /* Keep enough room for one extra input line and an extra byte.
|
||
+ This extra room might be needed when preparing to read EOF. */
|
||
+ size_t size = worst_case_per_input_byte + 1;
|
||
+
|
||
+ size_t i;
|
||
+
|
||
+ for (i = 0; i < nfiles; i++)
|
||
+ {
|
||
+ struct stat st;
|
||
+ off_t file_size;
|
||
+ size_t worst_case;
|
||
+
|
||
+ if ((i < nfps ? fstat (fileno (fps[i]), &st)
|
||
+ : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
|
||
+ : stat (files[i], &st))
|
||
+ != 0)
|
||
+ die (_("stat failed"), files[i]);
|
||
+
|
||
+ if (S_ISREG (st.st_mode))
|
||
+ file_size = st.st_size;
|
||
+ else
|
||
+ {
|
||
+ /* The file has unknown size. If the user specified a sort
|
||
+ buffer size, use that; otherwise, guess the size. */
|
||
+ if (sort_size)
|
||
+ return sort_size;
|
||
+ file_size = INPUT_FILE_SIZE_GUESS;
|
||
+ }
|
||
+
|
||
+ if (! size_bound)
|
||
+ {
|
||
+ size_bound = sort_size;
|
||
+ if (! size_bound)
|
||
+ size_bound = default_sort_size ();
|
||
+ }
|
||
+
|
||
+ /* Add the amount of memory needed to represent the worst case
|
||
+ where the input consists entirely of newlines followed by a
|
||
+ single non-newline. Check for overflow. */
|
||
+ worst_case = file_size * worst_case_per_input_byte + 1;
|
||
+ if (file_size != worst_case / worst_case_per_input_byte
|
||
+ || size_bound - size <= worst_case)
|
||
+ return size_bound;
|
||
+ size += worst_case;
|
||
+ }
|
||
+
|
||
+ return size;
|
||
+}
|
||
+
|
||
+/* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
|
||
+ must be at least sizeof (struct line). Allocate ALLOC bytes
|
||
+ initially. */
|
||
+
|
||
+static void
|
||
+initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
|
||
+{
|
||
+ /* Ensure that the line array is properly aligned. If the desired
|
||
+ size cannot be allocated, repeatedly halve it until allocation
|
||
+ succeeds. The smaller allocation may hurt overall performance,
|
||
+ but that's better than failing. */
|
||
+ for (;;)
|
||
+ {
|
||
+ alloc += sizeof (struct line) - alloc % sizeof (struct line);
|
||
+ buf->buf = malloc (alloc);
|
||
+ if (buf->buf)
|
||
+ break;
|
||
+ alloc /= 2;
|
||
+ if (alloc <= line_bytes + 1)
|
||
+ xalloc_die ();
|
||
+ }
|
||
+
|
||
+ buf->line_bytes = line_bytes;
|
||
+ buf->alloc = alloc;
|
||
+ buf->used = buf->left = buf->nlines = 0;
|
||
+ buf->eof = false;
|
||
+}
|
||
+
|
||
+/* Return one past the limit of the line array. */
|
||
+
|
||
+static inline struct line *
|
||
+buffer_linelim (struct buffer const *buf)
|
||
+{
|
||
+ return (struct line *) (buf->buf + buf->alloc);
|
||
+}
|
||
+
|
||
+/* Return a pointer to the first character of the field specified
|
||
+ by KEY in LINE. */
|
||
+
|
||
+static char *
|
||
+begfield (const struct line *line, const struct keyfield *key)
|
||
+{
|
||
+ char *ptr = line->text, *lim = ptr + line->length - 1;
|
||
+ size_t sword = key->sword;
|
||
+ size_t schar = key->schar;
|
||
+
|
||
+ /* The leading field separator itself is included in a field when -t
|
||
+ is absent. */
|
||
+
|
||
+ if (tab != TAB_DEFAULT)
|
||
+ while (ptr < lim && sword--)
|
||
+ {
|
||
+ while (ptr < lim && *ptr != tab)
|
||
+ ++ptr;
|
||
+ if (ptr < lim)
|
||
+ ++ptr;
|
||
+ }
|
||
+ else
|
||
+ while (ptr < lim && sword--)
|
||
+ {
|
||
+ while (ptr < lim && blanks[to_uchar (*ptr)])
|
||
+ ++ptr;
|
||
+ while (ptr < lim && !blanks[to_uchar (*ptr)])
|
||
+ ++ptr;
|
||
+ }
|
||
+
|
||
+ /* If we're ignoring leading blanks when computing the Start
|
||
+ of the field, skip past them here. */
|
||
+ if (key->skipsblanks)
|
||
+ while (ptr < lim && blanks[to_uchar (*ptr)])
|
||
+ ++ptr;
|
||
+
|
||
+ /* Advance PTR by SCHAR (if possible), but no further than LIM. */
|
||
+ ptr = MIN (lim, ptr + schar);
|
||
+
|
||
+ return ptr;
|
||
+}
|
||
+
|
||
+/* Return the limit of (a pointer to the first character after) the field
|
||
+ in LINE specified by KEY. */
|
||
+
|
||
+static char *
|
||
+limfield (const struct line *line, const struct keyfield *key)
|
||
+{
|
||
+ char *ptr = line->text, *lim = ptr + line->length - 1;
|
||
+ size_t eword = key->eword, echar = key->echar;
|
||
+
|
||
+ if (echar == 0)
|
||
+ eword++; /* Skip all of end field. */
|
||
+
|
||
+ /* Move PTR past EWORD fields or to one past the last byte on LINE,
|
||
+ whichever comes first. If there are more than EWORD fields, leave
|
||
+ PTR pointing at the beginning of the field having zero-based index,
|
||
+ EWORD. If a delimiter character was specified (via -t), then that
|
||
+ `beginning' is the first character following the delimiting TAB.
|
||
+ Otherwise, leave PTR pointing at the first `blank' character after
|
||
+ the preceding field. */
|
||
+ if (tab != TAB_DEFAULT)
|
||
+ while (ptr < lim && eword--)
|
||
+ {
|
||
+ while (ptr < lim && *ptr != tab)
|
||
+ ++ptr;
|
||
+ if (ptr < lim && (eword || echar))
|
||
+ ++ptr;
|
||
+ }
|
||
+ else
|
||
+ while (ptr < lim && eword--)
|
||
+ {
|
||
+ while (ptr < lim && blanks[to_uchar (*ptr)])
|
||
+ ++ptr;
|
||
+ while (ptr < lim && !blanks[to_uchar (*ptr)])
|
||
+ ++ptr;
|
||
+ }
|
||
+
|
||
+#ifdef POSIX_UNSPECIFIED
|
||
+ /* The following block of code makes GNU sort incompatible with
|
||
+ standard Unix sort, so it's ifdef'd out for now.
|
||
+ The POSIX spec isn't clear on how to interpret this.
|
||
+ FIXME: request clarification.
|
||
+
|
||
+ From: kwzh@gnu.ai.mit.edu (Karl Heuer)
|
||
+ Date: Thu, 30 May 96 12:20:41 -0400
|
||
+ [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
|
||
+
|
||
+ [...]I believe I've found another bug in `sort'.
|
||
+
|
||
+ $ cat /tmp/sort.in
|
||
+ a b c 2 d
|
||
+ pq rs 1 t
|
||
+ $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
|
||
+ a b c 2 d
|
||
+ pq rs 1 t
|
||
+ $ /bin/sort -k1.7,1.7 </tmp/sort.in
|
||
+ pq rs 1 t
|
||
+ a b c 2 d
|
||
+
|
||
+ Unix sort produced the answer I expected: sort on the single character
|
||
+ in column 7. GNU sort produced different results, because it disagrees
|
||
+ on the interpretation of the key-end spec "M.N". Unix sort reads this
|
||
+ as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
|
||
+ "skip M-1 fields, then either N-1 characters or the rest of the current
|
||
+ field, whichever comes first". This extra clause applies only to
|
||
+ key-ends, not key-starts.
|
||
+ */
|
||
+
|
||
+ /* Make LIM point to the end of (one byte past) the current field. */
|
||
+ if (tab != TAB_DEFAULT)
|
||
+ {
|
||
+ char *newlim;
|
||
+ newlim = memchr (ptr, tab, lim - ptr);
|
||
+ if (newlim)
|
||
+ lim = newlim;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ char *newlim;
|
||
+ newlim = ptr;
|
||
+ while (newlim < lim && blanks[to_uchar (*newlim)])
|
||
+ ++newlim;
|
||
+ while (newlim < lim && !blanks[to_uchar (*newlim)])
|
||
+ ++newlim;
|
||
+ lim = newlim;
|
||
+ }
|
||
+#endif
|
||
+
|
||
+ if (echar != 0) /* We need to skip over a portion of the end field. */
|
||
+ {
|
||
+ /* If we're ignoring leading blanks when computing the End
|
||
+ of the field, skip past them here. */
|
||
+ if (key->skipeblanks)
|
||
+ while (ptr < lim && blanks[to_uchar (*ptr)])
|
||
+ ++ptr;
|
||
+
|
||
+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */
|
||
+ ptr = MIN (lim, ptr + echar);
|
||
+ }
|
||
+
|
||
+ return ptr;
|
||
+}
|
||
+
|
||
+/* Fill BUF reading from FP, moving buf->left bytes from the end
|
||
+ of buf->buf to the beginning first. If EOF is reached and the
|
||
+ file wasn't terminated by a newline, supply one. Set up BUF's line
|
||
+ table too. FILE is the name of the file corresponding to FP.
|
||
+ Return true if some input was read. */
|
||
+
|
||
+static bool
|
||
+fillbuf (struct buffer *buf, FILE *fp, char const *file)
|
||
+{
|
||
+ struct keyfield const *key = keylist;
|
||
+ char eol = eolchar;
|
||
+ size_t line_bytes = buf->line_bytes;
|
||
+ size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
|
||
+
|
||
+ if (buf->eof)
|
||
+ return false;
|
||
+
|
||
+ if (buf->used != buf->left)
|
||
+ {
|
||
+ memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
|
||
+ buf->used = buf->left;
|
||
+ buf->nlines = 0;
|
||
+ }
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ char *ptr = buf->buf + buf->used;
|
||
+ struct line *linelim = buffer_linelim (buf);
|
||
+ struct line *line = linelim - buf->nlines;
|
||
+ size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
|
||
+ char *line_start = buf->nlines ? line->text + line->length : buf->buf;
|
||
+
|
||
+ while (line_bytes + 1 < avail)
|
||
+ {
|
||
+ /* Read as many bytes as possible, but do not read so many
|
||
+ bytes that there might not be enough room for the
|
||
+ corresponding line array. The worst case is when the
|
||
+ rest of the input file consists entirely of newlines,
|
||
+ except that the last byte is not a newline. */
|
||
+ size_t readsize = (avail - 1) / (line_bytes + 1);
|
||
+ size_t bytes_read = fread (ptr, 1, readsize, fp);
|
||
+ char *ptrlim = ptr + bytes_read;
|
||
+ char *p;
|
||
+ avail -= bytes_read;
|
||
+
|
||
+ if (bytes_read != readsize)
|
||
+ {
|
||
+ if (ferror (fp))
|
||
+ die (_("read failed"), file);
|
||
+ if (feof (fp))
|
||
+ {
|
||
+ buf->eof = true;
|
||
+ if (buf->buf == ptrlim)
|
||
+ return false;
|
||
+ if (ptrlim[-1] != eol)
|
||
+ *ptrlim++ = eol;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* Find and record each line in the just-read input. */
|
||
+ while ((p = memchr (ptr, eol, ptrlim - ptr)))
|
||
+ {
|
||
+ ptr = p + 1;
|
||
+ line--;
|
||
+ line->text = line_start;
|
||
+ line->length = ptr - line_start;
|
||
+ mergesize = MAX (mergesize, line->length);
|
||
+ avail -= line_bytes;
|
||
+
|
||
+ if (key)
|
||
+ {
|
||
+ /* Precompute the position of the first key for
|
||
+ efficiency. */
|
||
+ line->keylim = (key->eword == SIZE_MAX
|
||
+ ? p
|
||
+ : limfield (line, key));
|
||
+
|
||
+ if (key->sword != SIZE_MAX)
|
||
+ line->keybeg = begfield (line, key);
|
||
+ else
|
||
+ {
|
||
+ if (key->skipsblanks)
|
||
+ while (blanks[to_uchar (*line_start)])
|
||
+ line_start++;
|
||
+ line->keybeg = line_start;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ line_start = ptr;
|
||
+ }
|
||
+
|
||
+ ptr = ptrlim;
|
||
+ if (buf->eof)
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ buf->used = ptr - buf->buf;
|
||
+ buf->nlines = buffer_linelim (buf) - line;
|
||
+ if (buf->nlines != 0)
|
||
+ {
|
||
+ buf->left = ptr - line_start;
|
||
+ merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
|
||
+ return true;
|
||
+ }
|
||
+
|
||
+ {
|
||
+ /* The current input line is too long to fit in the buffer.
|
||
+ Double the buffer size and try again, keeping it properly
|
||
+ aligned. */
|
||
+ size_t line_alloc = buf->alloc / sizeof (struct line);
|
||
+ buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
|
||
+ buf->alloc = line_alloc * sizeof (struct line);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Compare strings A and B as numbers without explicitly converting them to
|
||
+ machine numbers. Comparatively slow for short strings, but asymptotically
|
||
+ hideously fast. */
|
||
+
|
||
+static int
|
||
+numcompare (const char *a, const char *b)
|
||
+{
|
||
+ while (blanks[to_uchar (*a)])
|
||
+ a++;
|
||
+ while (blanks[to_uchar (*b)])
|
||
+ b++;
|
||
+
|
||
+ return strnumcmp (a, b, decimal_point, thousands_sep);
|
||
+}
|
||
+
|
||
+/* Exit with an error if a mixture of SI and IEC units detected. */
|
||
+
|
||
+static void
|
||
+check_mixed_SI_IEC (char prefix, struct keyfield *key)
|
||
+{
|
||
+ int si_present = prefix == 'i';
|
||
+ if (key->si_present != -1 && si_present != key->si_present)
|
||
+ error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
|
||
+ key->si_present = si_present;
|
||
+}
|
||
+
|
||
+/* Return an integer which represents the order of magnitude of
|
||
+ the unit following the number. NUMBER can contain thousands separators
|
||
+ or a decimal point, but not have preceeding blanks.
|
||
+ Negative numbers return a negative unit order. */
|
||
+
|
||
+static int
|
||
+find_unit_order (const char *number, struct keyfield *key)
|
||
+{
|
||
+ static const char orders [UCHAR_LIM] =
|
||
+ {
|
||
+#if SOME_DAY_WE_WILL_REQUIRE_C99
|
||
+ ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
|
||
+ ['k']=1,
|
||
+#else
|
||
+ /* Generate the following table with this command:
|
||
+ perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
|
||
+ foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
|
||
+ |fmt */
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
|
||
+ 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||
+#endif
|
||
+ };
|
||
+
|
||
+ const unsigned char *p = number;
|
||
+
|
||
+ int sign = 1;
|
||
+
|
||
+ if (*p == '-')
|
||
+ {
|
||
+ sign = -1;
|
||
+ p++;
|
||
+ }
|
||
+
|
||
+ /* Scan to end of number.
|
||
+ Decimals or separators not followed by digits stop the scan.
|
||
+ Numbers ending in decimals or separators are thus considered
|
||
+ to be lacking in units.
|
||
+ FIXME: add support for multibyte thousands_sep and decimal_point. */
|
||
+
|
||
+ while (ISDIGIT (*p))
|
||
+ {
|
||
+ p++;
|
||
+
|
||
+ if (*p == decimal_point && ISDIGIT (*(p + 1)))
|
||
+ p += 2;
|
||
+ else if (*p == thousands_sep && ISDIGIT (*(p + 1)))
|
||
+ p += 2;
|
||
+ }
|
||
+
|
||
+ int order = orders[*p];
|
||
+
|
||
+ /* For valid units check for MiB vs MB etc. */
|
||
+ if (order)
|
||
+ check_mixed_SI_IEC (*(p + 1), key);
|
||
+
|
||
+ return sign * order;
|
||
+}
|
||
+
|
||
+/* Compare numbers ending in units with SI xor IEC prefixes
|
||
+ <none/unknown> < K/k < M < G < T < P < E < Z < Y
|
||
+ Assume that numbers are properly abbreviated.
|
||
+ i.e. input will never have both 6000K and 5M. */
|
||
+
|
||
+static int
|
||
+human_numcompare (const char *a, const char *b, struct keyfield *key)
|
||
+{
|
||
+ while (blanks[to_uchar (*a)])
|
||
+ a++;
|
||
+ while (blanks[to_uchar (*b)])
|
||
+ b++;
|
||
+
|
||
+ int order_a = find_unit_order (a, key);
|
||
+ int order_b = find_unit_order (b, key);
|
||
+
|
||
+ return (order_a > order_b ? 1
|
||
+ : order_a < order_b ? -1
|
||
+ : strnumcmp (a, b, decimal_point, thousands_sep));
|
||
+}
|
||
+
|
||
+static int
|
||
+general_numcompare (const char *sa, const char *sb)
|
||
+{
|
||
+ /* FIXME: add option to warn about failed conversions. */
|
||
+ /* FIXME: maybe add option to try expensive FP conversion
|
||
+ only if A and B can't be compared more cheaply/accurately. */
|
||
+
|
||
+ char *ea;
|
||
+ char *eb;
|
||
+ double a = strtod (sa, &ea);
|
||
+ double b = strtod (sb, &eb);
|
||
+
|
||
+ /* Put conversion errors at the start of the collating sequence. */
|
||
+ if (sa == ea)
|
||
+ return sb == eb ? 0 : -1;
|
||
+ if (sb == eb)
|
||
+ return 1;
|
||
+
|
||
+ /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
|
||
+ conversion errors but before numbers; sort them by internal
|
||
+ bit-pattern, for lack of a more portable alternative. */
|
||
+ return (a < b ? -1
|
||
+ : a > b ? 1
|
||
+ : a == b ? 0
|
||
+ : b == b ? -1
|
||
+ : a == a ? 1
|
||
+ : memcmp ((char *) &a, (char *) &b, sizeof a));
|
||
+}
|
||
+
|
||
+/* Return an integer in 1..12 of the month name MONTH with length LEN.
|
||
+ Return 0 if the name in S is not recognized. */
|
||
+
|
||
+static int
|
||
+getmonth (char const *month, size_t len)
|
||
+{
|
||
+ size_t lo = 0;
|
||
+ size_t hi = MONTHS_PER_YEAR;
|
||
+ char const *monthlim = month + len;
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ if (month == monthlim)
|
||
+ return 0;
|
||
+ if (!blanks[to_uchar (*month)])
|
||
+ break;
|
||
+ ++month;
|
||
+ }
|
||
+
|
||
+ do
|
||
+ {
|
||
+ size_t ix = (lo + hi) / 2;
|
||
+ char const *m = month;
|
||
+ char const *n = monthtab[ix].name;
|
||
+
|
||
+ for (;; m++, n++)
|
||
+ {
|
||
+ if (!*n)
|
||
+ return monthtab[ix].val;
|
||
+ if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
|
||
+ {
|
||
+ hi = ix;
|
||
+ break;
|
||
+ }
|
||
+ else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
|
||
+ {
|
||
+ lo = ix + 1;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ while (lo < hi);
|
||
+
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+/* A source of random data. */
|
||
+static struct randread_source *randread_source;
|
||
+
|
||
+/* Return the Ith randomly-generated state. The caller must invoke
|
||
+ random_state (H) for all H less than I before invoking random_state
|
||
+ (I). */
|
||
+
|
||
+static struct md5_ctx
|
||
+random_state (size_t i)
|
||
+{
|
||
+ /* An array of states resulting from the random data, and counts of
|
||
+ its used and allocated members. */
|
||
+ static struct md5_ctx *state;
|
||
+ static size_t used;
|
||
+ static size_t allocated;
|
||
+
|
||
+ struct md5_ctx *s = &state[i];
|
||
+
|
||
+ if (used <= i)
|
||
+ {
|
||
+ unsigned char buf[MD5_DIGEST_SIZE];
|
||
+
|
||
+ used++;
|
||
+
|
||
+ if (allocated <= i)
|
||
+ {
|
||
+ state = X2NREALLOC (state, &allocated);
|
||
+ s = &state[i];
|
||
+ }
|
||
+
|
||
+ randread (randread_source, buf, sizeof buf);
|
||
+ md5_init_ctx (s);
|
||
+ md5_process_bytes (buf, sizeof buf, s);
|
||
+ }
|
||
+
|
||
+ return *s;
|
||
+}
|
||
+
|
||
+/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
|
||
+ with length LENGTHB. Return negative if less, zero if equal,
|
||
+ positive if greater. */
|
||
+
|
||
+static int
|
||
+cmp_hashes (char const *texta, size_t lena,
|
||
+ char const *textb, size_t lenb)
|
||
+{
|
||
+ /* Try random hashes until a pair of hashes disagree. But if the
|
||
+ first pair of random hashes agree, check whether the keys are
|
||
+ identical and if so report no difference. */
|
||
+ int diff;
|
||
+ size_t i;
|
||
+ for (i = 0; ; i++)
|
||
+ {
|
||
+ uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
|
||
+ struct md5_ctx s[2];
|
||
+ s[0] = s[1] = random_state (i);
|
||
+ md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]);
|
||
+ md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]);
|
||
+ diff = memcmp (dig[0], dig[1], sizeof dig[0]);
|
||
+ if (diff != 0)
|
||
+ break;
|
||
+ if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ return diff;
|
||
+}
|
||
+
|
||
+/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
|
||
+ using one or more random hash functions. */
|
||
+
|
||
+static int
|
||
+compare_random (char *restrict texta, size_t lena,
|
||
+ char *restrict textb, size_t lenb)
|
||
+{
|
||
+ int diff;
|
||
+
|
||
+ if (! hard_LC_COLLATE)
|
||
+ diff = cmp_hashes (texta, lena, textb, lenb);
|
||
+ else
|
||
+ {
|
||
+ /* Transform the text into the basis of comparison, so that byte
|
||
+ strings that would otherwise considered to be equal are
|
||
+ considered equal here even if their bytes differ. */
|
||
+
|
||
+ char *buf = NULL;
|
||
+ char stackbuf[4000];
|
||
+ size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
|
||
+ bool a_fits = tlena <= sizeof stackbuf;
|
||
+ size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
|
||
+ (a_fits ? sizeof stackbuf - tlena : 0),
|
||
+ textb, lenb);
|
||
+
|
||
+ if (a_fits && tlena + tlenb <= sizeof stackbuf)
|
||
+ buf = stackbuf;
|
||
+ else
|
||
+ {
|
||
+ /* Adding 1 to the buffer size lets xmemxfrm run a bit
|
||
+ faster by avoiding the need for an extra buffer copy. */
|
||
+ buf = xmalloc (tlena + tlenb + 1);
|
||
+ xmemxfrm (buf, tlena + 1, texta, lena);
|
||
+ xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
|
||
+ }
|
||
+
|
||
+ diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
|
||
+
|
||
+ if (buf != stackbuf)
|
||
+ free (buf);
|
||
+ }
|
||
+
|
||
+ return diff;
|
||
+}
|
||
+
|
||
+/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
|
||
+ using filevercmp. See lib/filevercmp.h for function description. */
|
||
+
|
||
+static int
|
||
+compare_version (char *restrict texta, size_t lena,
|
||
+ char *restrict textb, size_t lenb)
|
||
+{
|
||
+ int diff;
|
||
+
|
||
+ /* It is necessary to save the character after the end of the field.
|
||
+ "filevercmp" works with NUL terminated strings. Our blocks of
|
||
+ text are not necessarily terminated with a NUL byte. */
|
||
+ char sv_a = texta[lena];
|
||
+ char sv_b = textb[lenb];
|
||
+
|
||
+ texta[lena] = '\0';
|
||
+ textb[lenb] = '\0';
|
||
+
|
||
+ diff = filevercmp (texta, textb);
|
||
+
|
||
+ texta[lena] = sv_a;
|
||
+ textb[lenb] = sv_b;
|
||
+
|
||
+ return diff;
|
||
+}
|
||
+
|
||
+/* Compare two lines A and B trying every key in sequence until there
|
||
+ are no more keys or a difference is found. */
|
||
+
|
||
+static int
|
||
+keycompare (const struct line *a, const struct line *b)
|
||
+{
|
||
+ struct keyfield *key = keylist;
|
||
+
|
||
+ /* For the first iteration only, the key positions have been
|
||
+ precomputed for us. */
|
||
+ char *texta = a->keybeg;
|
||
+ char *textb = b->keybeg;
|
||
+ char *lima = a->keylim;
|
||
+ char *limb = b->keylim;
|
||
+
|
||
+ int diff;
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ char const *translate = key->translate;
|
||
+ bool const *ignore = key->ignore;
|
||
+
|
||
+ /* Treat field ends before field starts as empty fields. */
|
||
+ lima = MAX (texta, lima);
|
||
+ limb = MAX (textb, limb);
|
||
+
|
||
+ /* Find the lengths. */
|
||
+ size_t lena = lima - texta;
|
||
+ size_t lenb = limb - textb;
|
||
+
|
||
+ /* Actually compare the fields. */
|
||
+
|
||
+ if (key->random)
|
||
+ diff = compare_random (texta, lena, textb, lenb);
|
||
+ else if (key->numeric || key->general_numeric || key->human_numeric)
|
||
+ {
|
||
+ char savea = *lima, saveb = *limb;
|
||
+
|
||
+ *lima = *limb = '\0';
|
||
+ diff = (key->numeric ? numcompare (texta, textb)
|
||
+ : key->general_numeric ? general_numcompare (texta, textb)
|
||
+ : human_numcompare (texta, textb, key));
|
||
+ *lima = savea, *limb = saveb;
|
||
+ }
|
||
+ else if (key->version)
|
||
+ diff = compare_version (texta, lena, textb, lenb);
|
||
+ else if (key->month)
|
||
+ diff = getmonth (texta, lena) - getmonth (textb, lenb);
|
||
+ /* Sorting like this may become slow, so in a simple locale the user
|
||
+ can select a faster sort that is similar to ascii sort. */
|
||
+ else if (hard_LC_COLLATE)
|
||
+ {
|
||
+ if (ignore || translate)
|
||
+ {
|
||
+ char buf[4000];
|
||
+ size_t size = lena + 1 + lenb + 1;
|
||
+ char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
|
||
+ char *copy_b = copy_a + lena + 1;
|
||
+ size_t new_len_a, new_len_b, i;
|
||
+
|
||
+ /* Ignore and/or translate chars before comparing. */
|
||
+ for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
|
||
+ {
|
||
+ if (i < lena)
|
||
+ {
|
||
+ copy_a[new_len_a] = (translate
|
||
+ ? translate[to_uchar (texta[i])]
|
||
+ : texta[i]);
|
||
+ if (!ignore || !ignore[to_uchar (texta[i])])
|
||
+ ++new_len_a;
|
||
+ }
|
||
+ if (i < lenb)
|
||
+ {
|
||
+ copy_b[new_len_b] = (translate
|
||
+ ? translate[to_uchar (textb[i])]
|
||
+ : textb [i]);
|
||
+ if (!ignore || !ignore[to_uchar (textb[i])])
|
||
+ ++new_len_b;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
|
||
+
|
||
+ if (sizeof buf < size)
|
||
+ free (copy_a);
|
||
+ }
|
||
+ else if (lena == 0)
|
||
+ diff = - NONZERO (lenb);
|
||
+ else if (lenb == 0)
|
||
+ goto greater;
|
||
+ else
|
||
+ diff = xmemcoll (texta, lena, textb, lenb);
|
||
+ }
|
||
+ else if (ignore)
|
||
+ {
|
||
+#define CMP_WITH_IGNORE(A, B) \
|
||
+ do \
|
||
+ { \
|
||
+ for (;;) \
|
||
+ { \
|
||
+ while (texta < lima && ignore[to_uchar (*texta)]) \
|
||
+ ++texta; \
|
||
+ while (textb < limb && ignore[to_uchar (*textb)]) \
|
||
+ ++textb; \
|
||
+ if (! (texta < lima && textb < limb)) \
|
||
+ break; \
|
||
+ diff = to_uchar (A) - to_uchar (B); \
|
||
+ if (diff) \
|
||
+ goto not_equal; \
|
||
+ ++texta; \
|
||
+ ++textb; \
|
||
+ } \
|
||
+ \
|
||
+ diff = (texta < lima) - (textb < limb); \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+ if (translate)
|
||
+ CMP_WITH_IGNORE (translate[to_uchar (*texta)],
|
||
+ translate[to_uchar (*textb)]);
|
||
+ else
|
||
+ CMP_WITH_IGNORE (*texta, *textb);
|
||
+ }
|
||
+ else if (lena == 0)
|
||
+ diff = - NONZERO (lenb);
|
||
+ else if (lenb == 0)
|
||
+ goto greater;
|
||
+ else
|
||
+ {
|
||
+ if (translate)
|
||
+ {
|
||
+ while (texta < lima && textb < limb)
|
||
+ {
|
||
+ diff = (to_uchar (translate[to_uchar (*texta++)])
|
||
+ - to_uchar (translate[to_uchar (*textb++)]));
|
||
+ if (diff)
|
||
+ goto not_equal;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ diff = memcmp (texta, textb, MIN (lena, lenb));
|
||
+ if (diff)
|
||
+ goto not_equal;
|
||
+ }
|
||
+ diff = lena < lenb ? -1 : lena != lenb;
|
||
+ }
|
||
+
|
||
+ if (diff)
|
||
+ goto not_equal;
|
||
+
|
||
+ key = key->next;
|
||
+ if (! key)
|
||
+ break;
|
||
+
|
||
+ /* Find the beginning and limit of the next field. */
|
||
+ if (key->eword != SIZE_MAX)
|
||
+ lima = limfield (a, key), limb = limfield (b, key);
|
||
+ else
|
||
+ lima = a->text + a->length - 1, limb = b->text + b->length - 1;
|
||
+
|
||
+ if (key->sword != SIZE_MAX)
|
||
+ texta = begfield (a, key), textb = begfield (b, key);
|
||
+ else
|
||
+ {
|
||
+ texta = a->text, textb = b->text;
|
||
+ if (key->skipsblanks)
|
||
+ {
|
||
+ while (texta < lima && blanks[to_uchar (*texta)])
|
||
+ ++texta;
|
||
+ while (textb < limb && blanks[to_uchar (*textb)])
|
||
+ ++textb;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ return 0;
|
||
+
|
||
+ greater:
|
||
+ diff = 1;
|
||
+ not_equal:
|
||
+ return key->reverse ? -diff : diff;
|
||
+}
|
||
+
|
||
+/* Compare two lines A and B, returning negative, zero, or positive
|
||
+ depending on whether A compares less than, equal to, or greater than B. */
|
||
+
|
||
+static int
|
||
+compare (const struct line *a, const struct line *b)
|
||
+{
|
||
+ int diff;
|
||
+ size_t alen, blen;
|
||
+
|
||
+ /* First try to compare on the specified keys (if any).
|
||
+ The only two cases with no key at all are unadorned sort,
|
||
+ and unadorned sort -r. */
|
||
+ if (keylist)
|
||
+ {
|
||
+ diff = keycompare (a, b);
|
||
+ if (diff || unique || stable)
|
||
+ return diff;
|
||
+ }
|
||
+
|
||
+ /* If the keys all compare equal (or no keys were specified)
|
||
+ fall through to the default comparison. */
|
||
+ alen = a->length - 1, blen = b->length - 1;
|
||
+
|
||
+ if (alen == 0)
|
||
+ diff = - NONZERO (blen);
|
||
+ else if (blen == 0)
|
||
+ diff = 1;
|
||
+ else if (hard_LC_COLLATE)
|
||
+ diff = xmemcoll (a->text, alen, b->text, blen);
|
||
+ else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
|
||
+ diff = alen < blen ? -1 : alen != blen;
|
||
+
|
||
+ return reverse ? -diff : diff;
|
||
+}
|
||
+
|
||
+/* Check that the lines read from FILE_NAME come in order. Return
|
||
+ true if they are in order. If CHECKONLY == 'c', also print a
|
||
+ diagnostic (FILE_NAME, line number, contents of line) to stderr if
|
||
+ they are not in order. */
|
||
+
|
||
+static bool
|
||
+check (char const *file_name, char checkonly)
|
||
+{
|
||
+ FILE *fp = xfopen (file_name, "r");
|
||
+ struct buffer buf; /* Input buffer. */
|
||
+ struct line temp; /* Copy of previous line. */
|
||
+ size_t alloc = 0;
|
||
+ uintmax_t line_number = 0;
|
||
+ struct keyfield const *key = keylist;
|
||
+ bool nonunique = ! unique;
|
||
+ bool ordered = true;
|
||
+
|
||
+ initbuf (&buf, sizeof (struct line),
|
||
+ MAX (merge_buffer_size, sort_size));
|
||
+ temp.text = NULL;
|
||
+
|
||
+ while (fillbuf (&buf, fp, file_name))
|
||
+ {
|
||
+ struct line const *line = buffer_linelim (&buf);
|
||
+ struct line const *linebase = line - buf.nlines;
|
||
+
|
||
+ /* Make sure the line saved from the old buffer contents is
|
||
+ less than or equal to the first line of the new buffer. */
|
||
+ if (alloc && nonunique <= compare (&temp, line - 1))
|
||
+ {
|
||
+ found_disorder:
|
||
+ {
|
||
+ if (checkonly == 'c')
|
||
+ {
|
||
+ struct line const *disorder_line = line - 1;
|
||
+ uintmax_t disorder_line_number =
|
||
+ buffer_linelim (&buf) - disorder_line + line_number;
|
||
+ char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
|
||
+ fprintf (stderr, _("%s: %s:%s: disorder: "),
|
||
+ program_name, file_name,
|
||
+ umaxtostr (disorder_line_number, hr_buf));
|
||
+ write_bytes (disorder_line->text, disorder_line->length,
|
||
+ stderr, _("standard error"));
|
||
+ }
|
||
+
|
||
+ ordered = false;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* Compare each line in the buffer with its successor. */
|
||
+ while (linebase < --line)
|
||
+ if (nonunique <= compare (line, line - 1))
|
||
+ goto found_disorder;
|
||
+
|
||
+ line_number += buf.nlines;
|
||
+
|
||
+ /* Save the last line of the buffer. */
|
||
+ if (alloc < line->length)
|
||
+ {
|
||
+ do
|
||
+ {
|
||
+ alloc *= 2;
|
||
+ if (! alloc)
|
||
+ {
|
||
+ alloc = line->length;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ while (alloc < line->length);
|
||
+
|
||
+ temp.text = xrealloc (temp.text, alloc);
|
||
+ }
|
||
+ memcpy (temp.text, line->text, line->length);
|
||
+ temp.length = line->length;
|
||
+ if (key)
|
||
+ {
|
||
+ temp.keybeg = temp.text + (line->keybeg - line->text);
|
||
+ temp.keylim = temp.text + (line->keylim - line->text);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ xfclose (fp, file_name);
|
||
+ free (buf.buf);
|
||
+ free (temp.text);
|
||
+ return ordered;
|
||
+}
|
||
+
|
||
+/* Open FILES (there are NFILES of them) and store the resulting array
|
||
+ of stream pointers into (*PFPS). Allocate the array. Return the
|
||
+ number of successfully opened files, setting errno if this value is
|
||
+ less than NFILES. */
|
||
+
|
||
+static size_t
|
||
+open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps)
|
||
+{
|
||
+ FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps);
|
||
+ int i;
|
||
+
|
||
+ /* Open as many input files as we can. */
|
||
+ for (i = 0; i < nfiles; i++)
|
||
+ {
|
||
+ fps[i] = (files[i].pid
|
||
+ ? open_temp (files[i].name, files[i].pid)
|
||
+ : stream_open (files[i].name, "r"));
|
||
+ if (!fps[i])
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ return i;
|
||
+}
|
||
+
|
||
+/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
|
||
+ files (all of which are at the start of the FILES array), and
|
||
+ NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
|
||
+ FPS is the vector of open stream corresponding to the files.
|
||
+ Close input and output streams before returning.
|
||
+ OUTPUT_FILE gives the name of the output file. If it is NULL,
|
||
+ the output file is standard output. */
|
||
+
|
||
+static void
|
||
+mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
|
||
+ FILE *ofp, char const *output_file, FILE **fps)
|
||
+{
|
||
+ struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer);
|
||
+ /* Input buffers for each file. */
|
||
+ struct line saved; /* Saved line storage for unique check. */
|
||
+ struct line const *savedline = NULL;
|
||
+ /* &saved if there is a saved line. */
|
||
+ size_t savealloc = 0; /* Size allocated for the saved line. */
|
||
+ struct line const **cur = xnmalloc (nfiles, sizeof *cur);
|
||
+ /* Current line in each line table. */
|
||
+ struct line const **base = xnmalloc (nfiles, sizeof *base);
|
||
+ /* Base of each line table. */
|
||
+ size_t *ord = xnmalloc (nfiles, sizeof *ord);
|
||
+ /* Table representing a permutation of fps,
|
||
+ such that cur[ord[0]] is the smallest line
|
||
+ and will be next output. */
|
||
+ size_t i;
|
||
+ size_t j;
|
||
+ size_t t;
|
||
+ struct keyfield const *key = keylist;
|
||
+ saved.text = NULL;
|
||
+
|
||
+ /* Read initial lines from each input file. */
|
||
+ for (i = 0; i < nfiles; )
|
||
+ {
|
||
+ initbuf (&buffer[i], sizeof (struct line),
|
||
+ MAX (merge_buffer_size, sort_size / nfiles));
|
||
+ if (fillbuf (&buffer[i], fps[i], files[i].name))
|
||
+ {
|
||
+ struct line const *linelim = buffer_linelim (&buffer[i]);
|
||
+ cur[i] = linelim - 1;
|
||
+ base[i] = linelim - buffer[i].nlines;
|
||
+ i++;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* fps[i] is empty; eliminate it from future consideration. */
|
||
+ xfclose (fps[i], files[i].name);
|
||
+ if (i < ntemps)
|
||
+ {
|
||
+ ntemps--;
|
||
+ zaptemp (files[i].name);
|
||
+ }
|
||
+ free (buffer[i].buf);
|
||
+ --nfiles;
|
||
+ for (j = i; j < nfiles; ++j)
|
||
+ {
|
||
+ files[j] = files[j + 1];
|
||
+ fps[j] = fps[j + 1];
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* Set up the ord table according to comparisons among input lines.
|
||
+ Since this only reorders two items if one is strictly greater than
|
||
+ the other, it is stable. */
|
||
+ for (i = 0; i < nfiles; ++i)
|
||
+ ord[i] = i;
|
||
+ for (i = 1; i < nfiles; ++i)
|
||
+ if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
|
||
+ t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
|
||
+
|
||
+ /* Repeatedly output the smallest line until no input remains. */
|
||
+ while (nfiles)
|
||
+ {
|
||
+ struct line const *smallest = cur[ord[0]];
|
||
+
|
||
+ /* If uniquified output is turned on, output only the first of
|
||
+ an identical series of lines. */
|
||
+ if (unique)
|
||
+ {
|
||
+ if (savedline && compare (savedline, smallest))
|
||
+ {
|
||
+ savedline = NULL;
|
||
+ write_bytes (saved.text, saved.length, ofp, output_file);
|
||
+ }
|
||
+ if (!savedline)
|
||
+ {
|
||
+ savedline = &saved;
|
||
+ if (savealloc < smallest->length)
|
||
+ {
|
||
+ do
|
||
+ if (! savealloc)
|
||
+ {
|
||
+ savealloc = smallest->length;
|
||
+ break;
|
||
+ }
|
||
+ while ((savealloc *= 2) < smallest->length);
|
||
+
|
||
+ saved.text = xrealloc (saved.text, savealloc);
|
||
+ }
|
||
+ saved.length = smallest->length;
|
||
+ memcpy (saved.text, smallest->text, saved.length);
|
||
+ if (key)
|
||
+ {
|
||
+ saved.keybeg =
|
||
+ saved.text + (smallest->keybeg - smallest->text);
|
||
+ saved.keylim =
|
||
+ saved.text + (smallest->keylim - smallest->text);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ write_bytes (smallest->text, smallest->length, ofp, output_file);
|
||
+
|
||
+ /* Check if we need to read more lines into core. */
|
||
+ if (base[ord[0]] < smallest)
|
||
+ cur[ord[0]] = smallest - 1;
|
||
+ else
|
||
+ {
|
||
+ if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
|
||
+ {
|
||
+ struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
|
||
+ cur[ord[0]] = linelim - 1;
|
||
+ base[ord[0]] = linelim - buffer[ord[0]].nlines;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* We reached EOF on fps[ord[0]]. */
|
||
+ for (i = 1; i < nfiles; ++i)
|
||
+ if (ord[i] > ord[0])
|
||
+ --ord[i];
|
||
+ --nfiles;
|
||
+ xfclose (fps[ord[0]], files[ord[0]].name);
|
||
+ if (ord[0] < ntemps)
|
||
+ {
|
||
+ ntemps--;
|
||
+ zaptemp (files[ord[0]].name);
|
||
+ }
|
||
+ free (buffer[ord[0]].buf);
|
||
+ for (i = ord[0]; i < nfiles; ++i)
|
||
+ {
|
||
+ fps[i] = fps[i + 1];
|
||
+ files[i] = files[i + 1];
|
||
+ buffer[i] = buffer[i + 1];
|
||
+ cur[i] = cur[i + 1];
|
||
+ base[i] = base[i + 1];
|
||
+ }
|
||
+ for (i = 0; i < nfiles; ++i)
|
||
+ ord[i] = ord[i + 1];
|
||
+ continue;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* The new line just read in may be larger than other lines
|
||
+ already in main memory; push it back in the queue until we
|
||
+ encounter a line larger than it. Optimize for the common
|
||
+ case where the new line is smallest. */
|
||
+ {
|
||
+ size_t lo = 1;
|
||
+ size_t hi = nfiles;
|
||
+ size_t probe = lo;
|
||
+ size_t ord0 = ord[0];
|
||
+ size_t count_of_smaller_lines;
|
||
+
|
||
+ while (lo < hi)
|
||
+ {
|
||
+ int cmp = compare (cur[ord0], cur[ord[probe]]);
|
||
+ if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
|
||
+ hi = probe;
|
||
+ else
|
||
+ lo = probe + 1;
|
||
+ probe = (lo + hi) / 2;
|
||
+ }
|
||
+
|
||
+ count_of_smaller_lines = lo - 1;
|
||
+ for (j = 0; j < count_of_smaller_lines; j++)
|
||
+ ord[j] = ord[j + 1];
|
||
+ ord[count_of_smaller_lines] = ord0;
|
||
+ }
|
||
+
|
||
+ /* Free up some resources every once in a while. */
|
||
+ if (MAX_PROCS_BEFORE_REAP < nprocs)
|
||
+ reap_some ();
|
||
+ }
|
||
+
|
||
+ if (unique && savedline)
|
||
+ {
|
||
+ write_bytes (saved.text, saved.length, ofp, output_file);
|
||
+ free (saved.text);
|
||
+ }
|
||
+
|
||
+ xfclose (ofp, output_file);
|
||
+ free(fps);
|
||
+ free(buffer);
|
||
+ free(ord);
|
||
+ free(base);
|
||
+ free(cur);
|
||
+}
|
||
+
|
||
+/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary
|
||
+ files (all of which are at the start of the FILES array), and
|
||
+ NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
|
||
+ Close input and output files before returning.
|
||
+ OUTPUT_FILE gives the name of the output file.
|
||
+
|
||
+ Return the number of files successfully merged. This number can be
|
||
+ less than NFILES if we ran low on file descriptors, but in this
|
||
+ case it is never less than 2. */
|
||
+
|
||
+static size_t
|
||
+mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles,
|
||
+ FILE *ofp, char const *output_file)
|
||
+{
|
||
+ FILE **fps;
|
||
+ size_t nopened = open_input_files (files, nfiles, &fps);
|
||
+ if (nopened < nfiles && nopened < 2)
|
||
+ die (_("open failed"), files[nopened].name);
|
||
+ mergefps (files, ntemps, nopened, ofp, output_file, fps);
|
||
+ return nopened;
|
||
+}
|
||
+
|
||
+/* Merge into T the two sorted arrays of lines LO (with NLO members)
|
||
+ and HI (with NHI members). T, LO, and HI point just past their
|
||
+ respective arrays, and the arrays are in reverse order. NLO and
|
||
+ NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
|
||
+
|
||
+static inline void
|
||
+mergelines (struct line *t,
|
||
+ struct line const *lo, size_t nlo,
|
||
+ struct line const *hi, size_t nhi)
|
||
+{
|
||
+ for (;;)
|
||
+ if (compare (lo - 1, hi - 1) <= 0)
|
||
+ {
|
||
+ *--t = *--lo;
|
||
+ if (! --nlo)
|
||
+ {
|
||
+ /* HI - NHI equalled T - (NLO + NHI) when this function
|
||
+ began. Therefore HI must equal T now, and there is no
|
||
+ need to copy from HI to T. */
|
||
+ return;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ *--t = *--hi;
|
||
+ if (! --nhi)
|
||
+ {
|
||
+ do
|
||
+ *--t = *--lo;
|
||
+ while (--nlo);
|
||
+
|
||
+ return;
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Sort the array LINES with NLINES members, using TEMP for temporary space.
|
||
+ NLINES must be at least 2.
|
||
+ The input and output arrays are in reverse order, and LINES and
|
||
+ TEMP point just past the end of their respective arrays.
|
||
+
|
||
+ Use a recursive divide-and-conquer algorithm, in the style
|
||
+ suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
|
||
+ the optimization suggested by exercise 5.2.4-10; this requires room
|
||
+ for only 1.5*N lines, rather than the usual 2*N lines. Knuth
|
||
+ writes that this memory optimization was originally published by
|
||
+ D. A. Bell, Comp J. 1 (1958), 75. */
|
||
+
|
||
+static void
|
||
+sortlines (struct line *lines, size_t nlines, struct line *temp)
|
||
+{
|
||
+ if (nlines == 2)
|
||
+ {
|
||
+ if (0 < compare (&lines[-1], &lines[-2]))
|
||
+ {
|
||
+ struct line tmp = lines[-1];
|
||
+ lines[-1] = lines[-2];
|
||
+ lines[-2] = tmp;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ size_t nlo = nlines / 2;
|
||
+ size_t nhi = nlines - nlo;
|
||
+ struct line *lo = lines;
|
||
+ struct line *hi = lines - nlo;
|
||
+ struct line *sorted_lo = temp;
|
||
+
|
||
+ sortlines (hi, nhi, temp);
|
||
+ if (1 < nlo)
|
||
+ sortlines_temp (lo, nlo, sorted_lo);
|
||
+ else
|
||
+ sorted_lo[-1] = lo[-1];
|
||
+
|
||
+ mergelines (lines, sorted_lo, nlo, hi, nhi);
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
|
||
+ rather than sorting in place. */
|
||
+
|
||
+static void
|
||
+sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
|
||
+{
|
||
+ if (nlines == 2)
|
||
+ {
|
||
+ /* Declare `swap' as int, not bool, to work around a bug
|
||
+ <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
|
||
+ in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */
|
||
+ int swap = (0 < compare (&lines[-1], &lines[-2]));
|
||
+ temp[-1] = lines[-1 - swap];
|
||
+ temp[-2] = lines[-2 + swap];
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ size_t nlo = nlines / 2;
|
||
+ size_t nhi = nlines - nlo;
|
||
+ struct line *lo = lines;
|
||
+ struct line *hi = lines - nlo;
|
||
+ struct line *sorted_hi = temp - nlo;
|
||
+
|
||
+ sortlines_temp (hi, nhi, sorted_hi);
|
||
+ if (1 < nlo)
|
||
+ sortlines (lo, nlo, temp);
|
||
+
|
||
+ mergelines (temp, lo, nlo, sorted_hi, nhi);
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
|
||
+ the same as OUTFILE. If found, merge the found instances (and perhaps
|
||
+ some other files) into a temporary file so that it can in turn be
|
||
+ merged into OUTFILE without destroying OUTFILE before it is completely
|
||
+ read. Return the new value of NFILES, which differs from the old if
|
||
+ some merging occurred.
|
||
+
|
||
+ This test ensures that an otherwise-erroneous use like
|
||
+ "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
|
||
+ It's not clear that POSIX requires this nicety.
|
||
+ Detect common error cases, but don't try to catch obscure cases like
|
||
+ "cat ... FILE ... | sort -m -o FILE"
|
||
+ where traditional "sort" doesn't copy the input and where
|
||
+ people should know that they're getting into trouble anyway.
|
||
+ Catching these obscure cases would slow down performance in
|
||
+ common cases. */
|
||
+
|
||
+static size_t
|
||
+avoid_trashing_input (struct sortfile *files, size_t ntemps,
|
||
+ size_t nfiles, char const *outfile)
|
||
+{
|
||
+ size_t i;
|
||
+ bool got_outstat = false;
|
||
+ struct stat outstat;
|
||
+
|
||
+ for (i = ntemps; i < nfiles; i++)
|
||
+ {
|
||
+ bool is_stdin = STREQ (files[i].name, "-");
|
||
+ bool same;
|
||
+ struct stat instat;
|
||
+
|
||
+ if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
|
||
+ same = true;
|
||
+ else
|
||
+ {
|
||
+ if (! got_outstat)
|
||
+ {
|
||
+ if ((outfile
|
||
+ ? stat (outfile, &outstat)
|
||
+ : fstat (STDOUT_FILENO, &outstat))
|
||
+ != 0)
|
||
+ break;
|
||
+ got_outstat = true;
|
||
+ }
|
||
+
|
||
+ same = (((is_stdin
|
||
+ ? fstat (STDIN_FILENO, &instat)
|
||
+ : stat (files[i].name, &instat))
|
||
+ == 0)
|
||
+ && SAME_INODE (instat, outstat));
|
||
+ }
|
||
+
|
||
+ if (same)
|
||
+ {
|
||
+ FILE *tftp;
|
||
+ pid_t pid;
|
||
+ char *temp = create_temp (&tftp, &pid);
|
||
+ size_t num_merged = 0;
|
||
+ do
|
||
+ {
|
||
+ num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
|
||
+ files[i].name = temp;
|
||
+ files[i].pid = pid;
|
||
+
|
||
+ if (i + num_merged < nfiles)
|
||
+ memmove(&files[i + 1], &files[i + num_merged],
|
||
+ num_merged * sizeof *files);
|
||
+ ntemps += 1;
|
||
+ nfiles -= num_merged - 1;;
|
||
+ i += num_merged;
|
||
+ }
|
||
+ while (i < nfiles);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ return nfiles;
|
||
+}
|
||
+
|
||
+/* Merge the input FILES. NTEMPS is the number of files at the
|
||
+ start of FILES that are temporary; it is zero at the top level.
|
||
+ NFILES is the total number of files. Put the output in
|
||
+ OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */
|
||
+
|
||
+static void
|
||
+merge (struct sortfile *files, size_t ntemps, size_t nfiles,
|
||
+ char const *output_file)
|
||
+{
|
||
+ while (nmerge < nfiles)
|
||
+ {
|
||
+ /* Number of input files processed so far. */
|
||
+ size_t in;
|
||
+
|
||
+ /* Number of output files generated so far. */
|
||
+ size_t out;
|
||
+
|
||
+ /* nfiles % NMERGE; this counts input files that are left over
|
||
+ after all full-sized merges have been done. */
|
||
+ size_t remainder;
|
||
+
|
||
+ /* Number of easily-available slots at the next loop iteration. */
|
||
+ size_t cheap_slots;
|
||
+
|
||
+ /* Do as many NMERGE-size merges as possible. In the case that
|
||
+ nmerge is bogus, increment by the maximum number of file
|
||
+ descriptors allowed. */
|
||
+ for (out = in = 0; nmerge <= nfiles - in; out++)
|
||
+ {
|
||
+ FILE *tfp;
|
||
+ pid_t pid;
|
||
+ char *temp = create_temp (&tfp, &pid);
|
||
+ size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge),
|
||
+ nmerge, tfp, temp);
|
||
+ ntemps -= MIN (ntemps, num_merged);
|
||
+ files[out].name = temp;
|
||
+ files[out].pid = pid;
|
||
+ in += num_merged;
|
||
+ }
|
||
+
|
||
+ remainder = nfiles - in;
|
||
+ cheap_slots = nmerge - out % nmerge;
|
||
+
|
||
+ if (cheap_slots < remainder)
|
||
+ {
|
||
+ /* So many files remain that they can't all be put into the last
|
||
+ NMERGE-sized output window. Do one more merge. Merge as few
|
||
+ files as possible, to avoid needless I/O. */
|
||
+ size_t nshortmerge = remainder - cheap_slots + 1;
|
||
+ FILE *tfp;
|
||
+ pid_t pid;
|
||
+ char *temp = create_temp (&tfp, &pid);
|
||
+ size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge),
|
||
+ nshortmerge, tfp, temp);
|
||
+ ntemps -= MIN (ntemps, num_merged);
|
||
+ files[out].name = temp;
|
||
+ files[out++].pid = pid;
|
||
+ in += num_merged;
|
||
+ }
|
||
+
|
||
+ /* Put the remaining input files into the last NMERGE-sized output
|
||
+ window, so they will be merged in the next pass. */
|
||
+ memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
|
||
+ ntemps += out;
|
||
+ nfiles -= in - out;
|
||
+ }
|
||
+
|
||
+ nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
|
||
+
|
||
+ /* We aren't guaranteed that this final mergefiles will work, therefore we
|
||
+ try to merge into the output, and then merge as much as we can into a
|
||
+ temp file if we can't. Repeat. */
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ /* Merge directly into the output file if possible. */
|
||
+ FILE **fps;
|
||
+ size_t nopened = open_input_files (files, nfiles, &fps);
|
||
+
|
||
+ if (nopened == nfiles)
|
||
+ {
|
||
+ FILE *ofp = stream_open (output_file, "w");
|
||
+ if (ofp)
|
||
+ {
|
||
+ mergefps (files, ntemps, nfiles, ofp, output_file, fps);
|
||
+ break;
|
||
+ }
|
||
+ if (errno != EMFILE || nopened <= 2)
|
||
+ die (_("open failed"), output_file);
|
||
+ }
|
||
+ else if (nopened <= 2)
|
||
+ die (_("open failed"), files[nopened].name);
|
||
+
|
||
+ /* We ran out of file descriptors. Close one of the input
|
||
+ files, to gain a file descriptor. Then create a temporary
|
||
+ file with our spare file descriptor. Retry if that failed
|
||
+ (e.g., some other process could open a file between the time
|
||
+ we closed and tried to create). */
|
||
+ FILE *tfp;
|
||
+ pid_t pid;
|
||
+ char *temp;
|
||
+ do
|
||
+ {
|
||
+ nopened--;
|
||
+ xfclose (fps[nopened], files[nopened].name);
|
||
+ temp = maybe_create_temp (&tfp, &pid, ! (nopened <= 2));
|
||
+ }
|
||
+ while (!temp);
|
||
+
|
||
+ /* Merge into the newly allocated temporary. */
|
||
+ mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp, fps);
|
||
+ ntemps -= MIN (ntemps, nopened);
|
||
+ files[0].name = temp;
|
||
+ files[0].pid = pid;
|
||
+
|
||
+ memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files);
|
||
+ ntemps++;
|
||
+ nfiles -= nopened - 1;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Sort NFILES FILES onto OUTPUT_FILE. */
|
||
+
|
||
+static void
|
||
+sort (char * const *files, size_t nfiles, char const *output_file)
|
||
+{
|
||
+ struct buffer buf;
|
||
+ size_t ntemps = 0;
|
||
+ bool output_file_created = false;
|
||
+
|
||
+ buf.alloc = 0;
|
||
+
|
||
+ while (nfiles)
|
||
+ {
|
||
+ char const *temp_output;
|
||
+ char const *file = *files;
|
||
+ FILE *fp = xfopen (file, "r");
|
||
+ FILE *tfp;
|
||
+ size_t bytes_per_line = (2 * sizeof (struct line)
|
||
+ - sizeof (struct line) / 2);
|
||
+
|
||
+ if (! buf.alloc)
|
||
+ initbuf (&buf, bytes_per_line,
|
||
+ sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
|
||
+ buf.eof = false;
|
||
+ files++;
|
||
+ nfiles--;
|
||
+
|
||
+ while (fillbuf (&buf, fp, file))
|
||
+ {
|
||
+ struct line *line;
|
||
+ struct line *linebase;
|
||
+
|
||
+ if (buf.eof && nfiles
|
||
+ && (bytes_per_line + 1
|
||
+ < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
|
||
+ {
|
||
+ /* End of file, but there is more input and buffer room.
|
||
+ Concatenate the next input file; this is faster in
|
||
+ the usual case. */
|
||
+ buf.left = buf.used;
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ line = buffer_linelim (&buf);
|
||
+ linebase = line - buf.nlines;
|
||
+ if (1 < buf.nlines)
|
||
+ sortlines (line, buf.nlines, linebase);
|
||
+ if (buf.eof && !nfiles && !ntemps && !buf.left)
|
||
+ {
|
||
+ xfclose (fp, file);
|
||
+ tfp = xfopen (output_file, "w");
|
||
+ temp_output = output_file;
|
||
+ output_file_created = true;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ ++ntemps;
|
||
+ temp_output = create_temp (&tfp, NULL);
|
||
+ }
|
||
+
|
||
+ do
|
||
+ {
|
||
+ line--;
|
||
+ write_bytes (line->text, line->length, tfp, temp_output);
|
||
+ if (unique)
|
||
+ while (linebase < line && compare (line, line - 1) == 0)
|
||
+ line--;
|
||
+ }
|
||
+ while (linebase < line);
|
||
+
|
||
+ xfclose (tfp, temp_output);
|
||
+
|
||
+ /* Free up some resources every once in a while. */
|
||
+ if (MAX_PROCS_BEFORE_REAP < nprocs)
|
||
+ reap_some ();
|
||
+
|
||
+ if (output_file_created)
|
||
+ goto finish;
|
||
+ }
|
||
+ xfclose (fp, file);
|
||
+ }
|
||
+
|
||
+ finish:
|
||
+ free (buf.buf);
|
||
+
|
||
+ if (! output_file_created)
|
||
+ {
|
||
+ size_t i;
|
||
+ struct tempnode *node = temphead;
|
||
+ struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
|
||
+ for (i = 0; node; i++)
|
||
+ {
|
||
+ tempfiles[i].name = node->name;
|
||
+ tempfiles[i].pid = node->pid;
|
||
+ node = node->next;
|
||
+ }
|
||
+ merge (tempfiles, ntemps, ntemps, output_file);
|
||
+ free (tempfiles);
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */
|
||
+
|
||
+static void
|
||
+insertkey (struct keyfield *key_arg)
|
||
+{
|
||
+ struct keyfield **p;
|
||
+ struct keyfield *key = xmemdup (key_arg, sizeof *key);
|
||
+
|
||
+ for (p = &keylist; *p; p = &(*p)->next)
|
||
+ continue;
|
||
+ *p = key;
|
||
+ key->next = NULL;
|
||
+}
|
||
+
|
||
+/* Report a bad field specification SPEC, with extra info MSGID. */
|
||
+
|
||
+static void badfieldspec (char const *, char const *)
|
||
+ ATTRIBUTE_NORETURN;
|
||
+static void
|
||
+badfieldspec (char const *spec, char const *msgid)
|
||
+{
|
||
+ error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
|
||
+ _(msgid), quote (spec));
|
||
+ abort ();
|
||
+}
|
||
+
|
||
+/* Report incompatible options. */
|
||
+
|
||
+static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
|
||
+static void
|
||
+incompatible_options (char const *opts)
|
||
+{
|
||
+ error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
|
||
+ abort ();
|
||
+}
|
||
+
|
||
+/* Check compatibility of ordering options. */
|
||
+
|
||
+static void
|
||
+check_ordering_compatibility (void)
|
||
+{
|
||
+ struct keyfield const *key;
|
||
+
|
||
+ for (key = keylist; key; key = key->next)
|
||
+ if ((1 < (key->random + key->numeric + key->general_numeric + key->month
|
||
+ + key->version + !!key->ignore + key->human_numeric))
|
||
+ || (key->random && key->translate))
|
||
+ {
|
||
+ /* The following is too big, but guaranteed to be "big enough". */
|
||
+ char opts[sizeof short_options];
|
||
+ char *p = opts;
|
||
+ if (key->ignore == nondictionary)
|
||
+ *p++ = 'd';
|
||
+ if (key->translate)
|
||
+ *p++ = 'f';
|
||
+ if (key->general_numeric)
|
||
+ *p++ = 'g';
|
||
+ if (key->human_numeric)
|
||
+ *p++ = 'h';
|
||
+ if (key->ignore == nonprinting)
|
||
+ *p++ = 'i';
|
||
+ if (key->month)
|
||
+ *p++ = 'M';
|
||
+ if (key->numeric)
|
||
+ *p++ = 'n';
|
||
+ if (key->version)
|
||
+ *p++ = 'V';
|
||
+ if (key->random)
|
||
+ *p++ = 'R';
|
||
+ *p = '\0';
|
||
+ incompatible_options (opts);
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Parse the leading integer in STRING and store the resulting value
|
||
+ (which must fit into size_t) into *VAL. Return the address of the
|
||
+ suffix after the integer. If the value is too large, silently
|
||
+ substitute SIZE_MAX. If MSGID is NULL, return NULL after
|
||
+ failure; otherwise, report MSGID and exit on failure. */
|
||
+
|
||
+static char const *
|
||
+parse_field_count (char const *string, size_t *val, char const *msgid)
|
||
+{
|
||
+ char *suffix;
|
||
+ uintmax_t n;
|
||
+
|
||
+ switch (xstrtoumax (string, &suffix, 10, &n, ""))
|
||
+ {
|
||
+ case LONGINT_OK:
|
||
+ case LONGINT_INVALID_SUFFIX_CHAR:
|
||
+ *val = n;
|
||
+ if (*val == n)
|
||
+ break;
|
||
+ /* Fall through. */
|
||
+ case LONGINT_OVERFLOW:
|
||
+ case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
|
||
+ *val = SIZE_MAX;
|
||
+ break;
|
||
+
|
||
+ case LONGINT_INVALID:
|
||
+ if (msgid)
|
||
+ error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
|
||
+ _(msgid), quote (string));
|
||
+ return NULL;
|
||
+ }
|
||
+
|
||
+ return suffix;
|
||
+}
|
||
+
|
||
+/* Handle interrupts and hangups. */
|
||
+
|
||
+static void
|
||
+sighandler (int sig)
|
||
+{
|
||
+ if (! SA_NOCLDSTOP)
|
||
+ signal (sig, SIG_IGN);
|
||
+
|
||
+ cleanup ();
|
||
+
|
||
+ signal (sig, SIG_DFL);
|
||
+ raise (sig);
|
||
+}
|
||
+
|
||
+/* Set the ordering options for KEY specified in S.
|
||
+ Return the address of the first character in S that
|
||
+ is not a valid ordering option.
|
||
+ BLANKTYPE is the kind of blanks that 'b' should skip. */
|
||
+
|
||
+static char *
|
||
+set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
|
||
+{
|
||
+ while (*s)
|
||
+ {
|
||
+ switch (*s)
|
||
+ {
|
||
+ case 'b':
|
||
+ if (blanktype == bl_start || blanktype == bl_both)
|
||
+ key->skipsblanks = true;
|
||
+ if (blanktype == bl_end || blanktype == bl_both)
|
||
+ key->skipeblanks = true;
|
||
+ break;
|
||
+ case 'd':
|
||
+ key->ignore = nondictionary;
|
||
+ break;
|
||
+ case 'f':
|
||
+ key->translate = fold_toupper;
|
||
+ break;
|
||
+ case 'g':
|
||
+ key->general_numeric = true;
|
||
+ break;
|
||
+ case 'h':
|
||
+ key->human_numeric = true;
|
||
+ break;
|
||
+ case 'i':
|
||
+ /* Option order should not matter, so don't let -i override
|
||
+ -d. -d implies -i, but -i does not imply -d. */
|
||
+ if (! key->ignore)
|
||
+ key->ignore = nonprinting;
|
||
+ break;
|
||
+ case 'M':
|
||
+ key->month = true;
|
||
+ break;
|
||
+ case 'n':
|
||
+ key->numeric = true;
|
||
+ break;
|
||
+ case 'R':
|
||
+ key->random = true;
|
||
+ break;
|
||
+ case 'r':
|
||
+ key->reverse = true;
|
||
+ break;
|
||
+ case 'V':
|
||
+ key->version = true;
|
||
+ break;
|
||
+ default:
|
||
+ return (char *) s;
|
||
+ }
|
||
+ ++s;
|
||
+ }
|
||
+ return (char *) s;
|
||
+}
|
||
+
|
||
+static struct keyfield *
|
||
+key_init (struct keyfield *key)
|
||
+{
|
||
+ memset (key, 0, sizeof *key);
|
||
+ key->eword = SIZE_MAX;
|
||
+ key->si_present = -1;
|
||
+ return key;
|
||
+}
|
||
+
|
||
+int
|
||
+main (int argc, char **argv)
|
||
+{
|
||
+ struct keyfield *key;
|
||
+ struct keyfield key_buf;
|
||
+ struct keyfield gkey;
|
||
+ char const *s;
|
||
+ int c = 0;
|
||
+ char checkonly = 0;
|
||
+ bool mergeonly = false;
|
||
+ char *random_source = NULL;
|
||
+ bool need_random = false;
|
||
+ size_t nfiles = 0;
|
||
+ bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
|
||
+ bool obsolete_usage = (posix2_version () < 200112);
|
||
+ char **files;
|
||
+ char *files_from = NULL;
|
||
+ struct Tokens tok;
|
||
+ char const *outfile = NULL;
|
||
+
|
||
+ initialize_main (&argc, &argv);
|
||
+ set_program_name (argv[0]);
|
||
+ setlocale (LC_ALL, "");
|
||
+ bindtextdomain (PACKAGE, LOCALEDIR);
|
||
+ textdomain (PACKAGE);
|
||
+
|
||
+ initialize_exit_failure (SORT_FAILURE);
|
||
+
|
||
+ hard_LC_COLLATE = hard_locale (LC_COLLATE);
|
||
+#if HAVE_NL_LANGINFO
|
||
+ hard_LC_TIME = hard_locale (LC_TIME);
|
||
+#endif
|
||
+
|
||
+ /* Get locale's representation of the decimal point. */
|
||
+ {
|
||
+ struct lconv const *locale = localeconv ();
|
||
+
|
||
+ /* If the locale doesn't define a decimal point, or if the decimal
|
||
+ point is multibyte, use the C locale's decimal point. FIXME:
|
||
+ add support for multibyte decimal points. */
|
||
+ decimal_point = to_uchar (locale->decimal_point[0]);
|
||
+ if (! decimal_point || locale->decimal_point[1])
|
||
+ decimal_point = '.';
|
||
+
|
||
+ /* FIXME: add support for multibyte thousands separators. */
|
||
+ thousands_sep = to_uchar (*locale->thousands_sep);
|
||
+ if (! thousands_sep || locale->thousands_sep[1])
|
||
+ thousands_sep = -1;
|
||
+ }
|
||
+
|
||
+ have_read_stdin = false;
|
||
+ inittables ();
|
||
+
|
||
+ {
|
||
+ size_t i;
|
||
+ static int const sig[] =
|
||
+ {
|
||
+ /* The usual suspects. */
|
||
+ SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
|
||
+#ifdef SIGPOLL
|
||
+ SIGPOLL,
|
||
+#endif
|
||
+#ifdef SIGPROF
|
||
+ SIGPROF,
|
||
+#endif
|
||
+#ifdef SIGVTALRM
|
||
+ SIGVTALRM,
|
||
+#endif
|
||
+#ifdef SIGXCPU
|
||
+ SIGXCPU,
|
||
+#endif
|
||
+#ifdef SIGXFSZ
|
||
+ SIGXFSZ,
|
||
+#endif
|
||
+ };
|
||
+ enum { nsigs = ARRAY_CARDINALITY (sig) };
|
||
+
|
||
+#if SA_NOCLDSTOP
|
||
+ struct sigaction act;
|
||
+
|
||
+ sigemptyset (&caught_signals);
|
||
+ for (i = 0; i < nsigs; i++)
|
||
+ {
|
||
+ sigaction (sig[i], NULL, &act);
|
||
+ if (act.sa_handler != SIG_IGN)
|
||
+ sigaddset (&caught_signals, sig[i]);
|
||
+ }
|
||
+
|
||
+ act.sa_handler = sighandler;
|
||
+ act.sa_mask = caught_signals;
|
||
+ act.sa_flags = 0;
|
||
+
|
||
+ for (i = 0; i < nsigs; i++)
|
||
+ if (sigismember (&caught_signals, sig[i]))
|
||
+ sigaction (sig[i], &act, NULL);
|
||
+#else
|
||
+ for (i = 0; i < nsigs; i++)
|
||
+ if (signal (sig[i], SIG_IGN) != SIG_IGN)
|
||
+ {
|
||
+ signal (sig[i], sighandler);
|
||
+ siginterrupt (sig[i], 1);
|
||
+ }
|
||
+#endif
|
||
+ }
|
||
+
|
||
+ /* The signal mask is known, so it is safe to invoke exit_cleanup. */
|
||
+ atexit (exit_cleanup);
|
||
+
|
||
+ gkey.sword = gkey.eword = SIZE_MAX;
|
||
+ gkey.ignore = NULL;
|
||
+ gkey.translate = NULL;
|
||
+ gkey.numeric = gkey.general_numeric = gkey.human_numeric = false;
|
||
+ gkey.si_present = -1;
|
||
+ gkey.random = gkey.version = false;
|
||
+ gkey.month = gkey.reverse = false;
|
||
+ gkey.skipsblanks = gkey.skipeblanks = false;
|
||
+
|
||
+ files = xnmalloc (argc, sizeof *files);
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ /* Parse an operand as a file after "--" was seen; or if
|
||
+ pedantic and a file was seen, unless the POSIX version
|
||
+ predates 1003.1-2001 and -c was not seen and the operand is
|
||
+ "-o FILE" or "-oFILE". */
|
||
+ int oi = -1;
|
||
+
|
||
+ if (c == -1
|
||
+ || (posixly_correct && nfiles != 0
|
||
+ && ! (obsolete_usage
|
||
+ && ! checkonly
|
||
+ && optind != argc
|
||
+ && argv[optind][0] == '-' && argv[optind][1] == 'o'
|
||
+ && (argv[optind][2] || optind + 1 != argc)))
|
||
+ || ((c = getopt_long (argc, argv, short_options,
|
||
+ long_options, &oi))
|
||
+ == -1))
|
||
+ {
|
||
+ if (argc <= optind)
|
||
+ break;
|
||
+ files[nfiles++] = argv[optind++];
|
||
+ }
|
||
+ else switch (c)
|
||
+ {
|
||
+ case 1:
|
||
+ key = NULL;
|
||
+ if (optarg[0] == '+')
|
||
+ {
|
||
+ bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
|
||
+ && ISDIGIT (argv[optind][1]));
|
||
+ obsolete_usage |= minus_pos_usage && !posixly_correct;
|
||
+ if (obsolete_usage)
|
||
+ {
|
||
+ /* Treat +POS1 [-POS2] as a key if possible; but silently
|
||
+ treat an operand as a file if it is not a valid +POS1. */
|
||
+ key = key_init (&key_buf);
|
||
+ s = parse_field_count (optarg + 1, &key->sword, NULL);
|
||
+ if (s && *s == '.')
|
||
+ s = parse_field_count (s + 1, &key->schar, NULL);
|
||
+ if (! (key->sword || key->schar))
|
||
+ key->sword = SIZE_MAX;
|
||
+ if (! s || *set_ordering (s, key, bl_start))
|
||
+ key = NULL;
|
||
+ else
|
||
+ {
|
||
+ if (minus_pos_usage)
|
||
+ {
|
||
+ char const *optarg1 = argv[optind++];
|
||
+ s = parse_field_count (optarg1 + 1, &key->eword,
|
||
+ N_("invalid number after `-'"));
|
||
+ if (*s == '.')
|
||
+ s = parse_field_count (s + 1, &key->echar,
|
||
+ N_("invalid number after `.'"));
|
||
+ if (*set_ordering (s, key, bl_end))
|
||
+ badfieldspec (optarg1,
|
||
+ N_("stray character in field spec"));
|
||
+ }
|
||
+ insertkey (key);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ if (! key)
|
||
+ files[nfiles++] = optarg;
|
||
+ break;
|
||
+
|
||
+ case SORT_OPTION:
|
||
+ c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
|
||
+ /* Fall through. */
|
||
+ case 'b':
|
||
+ case 'd':
|
||
+ case 'f':
|
||
+ case 'g':
|
||
+ case 'h':
|
||
+ case 'i':
|
||
+ case 'M':
|
||
+ case 'n':
|
||
+ case 'r':
|
||
+ case 'R':
|
||
+ case 'V':
|
||
+ {
|
||
+ char str[2];
|
||
+ str[0] = c;
|
||
+ str[1] = '\0';
|
||
+ set_ordering (str, &gkey, bl_both);
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case CHECK_OPTION:
|
||
+ c = (optarg
|
||
+ ? XARGMATCH ("--check", optarg, check_args, check_types)
|
||
+ : 'c');
|
||
+ /* Fall through. */
|
||
+ case 'c':
|
||
+ case 'C':
|
||
+ if (checkonly && checkonly != c)
|
||
+ incompatible_options ("cC");
|
||
+ checkonly = c;
|
||
+ break;
|
||
+
|
||
+ case COMPRESS_PROGRAM_OPTION:
|
||
+ if (compress_program && !STREQ (compress_program, optarg))
|
||
+ error (SORT_FAILURE, 0, _("multiple compress programs specified"));
|
||
+ compress_program = optarg;
|
||
+ break;
|
||
+
|
||
+ case FILES0_FROM_OPTION:
|
||
+ files_from = optarg;
|
||
+ break;
|
||
+
|
||
+ case 'k':
|
||
+ key = key_init (&key_buf);
|
||
+
|
||
+ /* Get POS1. */
|
||
+ s = parse_field_count (optarg, &key->sword,
|
||
+ N_("invalid number at field start"));
|
||
+ if (! key->sword--)
|
||
+ {
|
||
+ /* Provoke with `sort -k0' */
|
||
+ badfieldspec (optarg, N_("field number is zero"));
|
||
+ }
|
||
+ if (*s == '.')
|
||
+ {
|
||
+ s = parse_field_count (s + 1, &key->schar,
|
||
+ N_("invalid number after `.'"));
|
||
+ if (! key->schar--)
|
||
+ {
|
||
+ /* Provoke with `sort -k1.0' */
|
||
+ badfieldspec (optarg, N_("character offset is zero"));
|
||
+ }
|
||
+ }
|
||
+ if (! (key->sword || key->schar))
|
||
+ key->sword = SIZE_MAX;
|
||
+ s = set_ordering (s, key, bl_start);
|
||
+ if (*s != ',')
|
||
+ {
|
||
+ key->eword = SIZE_MAX;
|
||
+ key->echar = 0;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* Get POS2. */
|
||
+ s = parse_field_count (s + 1, &key->eword,
|
||
+ N_("invalid number after `,'"));
|
||
+ if (! key->eword--)
|
||
+ {
|
||
+ /* Provoke with `sort -k1,0' */
|
||
+ badfieldspec (optarg, N_("field number is zero"));
|
||
+ }
|
||
+ if (*s == '.')
|
||
+ {
|
||
+ s = parse_field_count (s + 1, &key->echar,
|
||
+ N_("invalid number after `.'"));
|
||
+ }
|
||
+ s = set_ordering (s, key, bl_end);
|
||
+ }
|
||
+ if (*s)
|
||
+ badfieldspec (optarg, N_("stray character in field spec"));
|
||
+ insertkey (key);
|
||
+ break;
|
||
+
|
||
+ case 'm':
|
||
+ mergeonly = true;
|
||
+ break;
|
||
+
|
||
+ case NMERGE_OPTION:
|
||
+ specify_nmerge (oi, c, optarg);
|
||
+ break;
|
||
+
|
||
+ case 'o':
|
||
+ if (outfile && !STREQ (outfile, optarg))
|
||
+ error (SORT_FAILURE, 0, _("multiple output files specified"));
|
||
+ outfile = optarg;
|
||
+ break;
|
||
+
|
||
+ case RANDOM_SOURCE_OPTION:
|
||
+ if (random_source && !STREQ (random_source, optarg))
|
||
+ error (SORT_FAILURE, 0, _("multiple random sources specified"));
|
||
+ random_source = optarg;
|
||
+ break;
|
||
+
|
||
+ case 's':
|
||
+ stable = true;
|
||
+ break;
|
||
+
|
||
+ case 'S':
|
||
+ specify_sort_size (oi, c, optarg);
|
||
+ break;
|
||
+
|
||
+ case 't':
|
||
+ {
|
||
+ char newtab = optarg[0];
|
||
+ if (! newtab)
|
||
+ error (SORT_FAILURE, 0, _("empty tab"));
|
||
+ if (optarg[1])
|
||
+ {
|
||
+ if (STREQ (optarg, "\\0"))
|
||
+ newtab = '\0';
|
||
+ else
|
||
+ {
|
||
+ /* Provoke with `sort -txx'. Complain about
|
||
+ "multi-character tab" instead of "multibyte tab", so
|
||
+ that the diagnostic's wording does not need to be
|
||
+ changed once multibyte characters are supported. */
|
||
+ error (SORT_FAILURE, 0, _("multi-character tab %s"),
|
||
+ quote (optarg));
|
||
+ }
|
||
+ }
|
||
+ if (tab != TAB_DEFAULT && tab != newtab)
|
||
+ error (SORT_FAILURE, 0, _("incompatible tabs"));
|
||
+ tab = newtab;
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case 'T':
|
||
+ add_temp_dir (optarg);
|
||
+ break;
|
||
+
|
||
+ case 'u':
|
||
+ unique = true;
|
||
+ break;
|
||
+
|
||
+ case 'y':
|
||
+ /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
|
||
+ through Solaris 7. It is also accepted by many non-Solaris
|
||
+ "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
|
||
+ -y is marked as obsolete starting with Solaris 8 (1999), but is
|
||
+ still accepted as of Solaris 10 prerelease (2004).
|
||
+
|
||
+ Solaris 2.5.1 "sort -y 100" reads the input file "100", but
|
||
+ emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
|
||
+ and which in general ignores the argument after "-y" if it
|
||
+ consists entirely of digits (it can even be empty). */
|
||
+ if (optarg == argv[optind - 1])
|
||
+ {
|
||
+ char const *p;
|
||
+ for (p = optarg; ISDIGIT (*p); p++)
|
||
+ continue;
|
||
+ optind -= (*p != '\0');
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case 'z':
|
||
+ eolchar = 0;
|
||
+ break;
|
||
+
|
||
+ case_GETOPT_HELP_CHAR;
|
||
+
|
||
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||
+
|
||
+ default:
|
||
+ usage (SORT_FAILURE);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (files_from)
|
||
+ {
|
||
+ FILE *stream;
|
||
+
|
||
+ /* When using --files0-from=F, you may not specify any files
|
||
+ on the command-line. */
|
||
+ if (nfiles)
|
||
+ {
|
||
+ error (0, 0, _("extra operand %s"), quote (files[0]));
|
||
+ fprintf (stderr, "%s\n",
|
||
+ _("file operands cannot be combined with --files0-from"));
|
||
+ usage (SORT_FAILURE);
|
||
+ }
|
||
+
|
||
+ if (STREQ (files_from, "-"))
|
||
+ stream = stdin;
|
||
+ else
|
||
+ {
|
||
+ stream = fopen (files_from, "r");
|
||
+ if (stream == NULL)
|
||
+ error (SORT_FAILURE, errno, _("cannot open %s for reading"),
|
||
+ quote (files_from));
|
||
+ }
|
||
+
|
||
+ readtokens0_init (&tok);
|
||
+
|
||
+ if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
|
||
+ error (SORT_FAILURE, 0, _("cannot read file names from %s"),
|
||
+ quote (files_from));
|
||
+
|
||
+ if (tok.n_tok)
|
||
+ {
|
||
+ size_t i;
|
||
+ free (files);
|
||
+ files = tok.tok;
|
||
+ nfiles = tok.n_tok;
|
||
+ for (i = 0; i < nfiles; i++)
|
||
+ {
|
||
+ if (STREQ (files[i], "-"))
|
||
+ error (SORT_FAILURE, 0, _("when reading file names from stdin, "
|
||
+ "no file name of %s allowed"),
|
||
+ quote (files[i]));
|
||
+ else if (files[i][0] == '\0')
|
||
+ {
|
||
+ /* Using the standard `filename:line-number:' prefix here is
|
||
+ not totally appropriate, since NUL is the separator, not NL,
|
||
+ but it might be better than nothing. */
|
||
+ unsigned long int file_number = i + 1;
|
||
+ error (SORT_FAILURE, 0,
|
||
+ _("%s:%lu: invalid zero-length file name"),
|
||
+ quotearg_colon (files_from), file_number);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ error (SORT_FAILURE, 0, _("no input from %s"),
|
||
+ quote (files_from));
|
||
+ }
|
||
+
|
||
+ /* Inheritance of global options to individual keys. */
|
||
+ for (key = keylist; key; key = key->next)
|
||
+ {
|
||
+ if (! (key->ignore
|
||
+ || key->translate
|
||
+ || (key->skipsblanks
|
||
+ || key->reverse
|
||
+ || key->skipeblanks
|
||
+ || key->month
|
||
+ || key->numeric
|
||
+ || key->version
|
||
+ || key->general_numeric
|
||
+ || key->human_numeric
|
||
+ || key->random)))
|
||
+ {
|
||
+ key->ignore = gkey.ignore;
|
||
+ key->translate = gkey.translate;
|
||
+ key->skipsblanks = gkey.skipsblanks;
|
||
+ key->skipeblanks = gkey.skipeblanks;
|
||
+ key->month = gkey.month;
|
||
+ key->numeric = gkey.numeric;
|
||
+ key->general_numeric = gkey.general_numeric;
|
||
+ key->human_numeric = gkey.human_numeric;
|
||
+ key->random = gkey.random;
|
||
+ key->reverse = gkey.reverse;
|
||
+ key->version = gkey.version;
|
||
+ }
|
||
+
|
||
+ need_random |= key->random;
|
||
+ }
|
||
+
|
||
+ if (!keylist && (gkey.ignore
|
||
+ || gkey.translate
|
||
+ || (gkey.skipsblanks
|
||
+ || gkey.skipeblanks
|
||
+ || gkey.month
|
||
+ || gkey.numeric
|
||
+ || gkey.general_numeric
|
||
+ || gkey.human_numeric
|
||
+ || gkey.random
|
||
+ || gkey.version)))
|
||
+ {
|
||
+ insertkey (&gkey);
|
||
+ need_random |= gkey.random;
|
||
+ }
|
||
+
|
||
+ check_ordering_compatibility ();
|
||
+
|
||
+ reverse = gkey.reverse;
|
||
+
|
||
+ if (need_random)
|
||
+ {
|
||
+ randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
|
||
+ if (! randread_source)
|
||
+ die (_("open failed"), random_source);
|
||
+ }
|
||
+
|
||
+ if (temp_dir_count == 0)
|
||
+ {
|
||
+ char const *tmp_dir = getenv ("TMPDIR");
|
||
+ add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
|
||
+ }
|
||
+
|
||
+ if (nfiles == 0)
|
||
+ {
|
||
+ static char *minus = (char *) "-";
|
||
+ nfiles = 1;
|
||
+ free (files);
|
||
+ files = −
|
||
+ }
|
||
+
|
||
+ /* Need to re-check that we meet the minimum requirement for memory
|
||
+ usage with the final value for NMERGE. */
|
||
+ if (0 < sort_size)
|
||
+ sort_size = MAX (sort_size, MIN_SORT_SIZE);
|
||
+
|
||
+ if (checkonly)
|
||
+ {
|
||
+ if (nfiles > 1)
|
||
+ error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
|
||
+ quote (files[1]), checkonly);
|
||
+
|
||
+ if (outfile)
|
||
+ {
|
||
+ static char opts[] = {0, 'o', 0};
|
||
+ opts[0] = checkonly;
|
||
+ incompatible_options (opts);
|
||
+ }
|
||
+
|
||
+ /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
|
||
+ input is not properly sorted. */
|
||
+ exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
|
||
+ }
|
||
+
|
||
+ if (mergeonly)
|
||
+ {
|
||
+ struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
|
||
+ size_t i;
|
||
+
|
||
+ for (i = 0; i < nfiles; ++i)
|
||
+ sortfiles[i].name = files[i];
|
||
+
|
||
+ merge (sortfiles, 0, nfiles, outfile);
|
||
+ IF_LINT (free (sortfiles));
|
||
+ }
|
||
+ else
|
||
+ sort (files, nfiles, outfile);
|
||
+
|
||
+ if (have_read_stdin && fclose (stdin) == EOF)
|
||
+ die (_("close failed"), "-");
|
||
+
|
||
+ exit (EXIT_SUCCESS);
|
||
+}
|
||
diff -urNp coreutils-8.0-orig/src/unexpand.c coreutils-8.0/src/unexpand.c
|
||
--- coreutils-8.0-orig/src/unexpand.c 2009-09-29 15:27:54.000000000 +0200
|
||
+++ coreutils-8.0/src/unexpand.c 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -38,11 +38,28 @@
|
||
#include <stdio.h>
|
||
#include <getopt.h>
|
||
#include <sys/types.h>
|
||
+
|
||
+/* Get mbstate_t, mbrtowc(), wcwidth(). */
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
+
|
||
#include "system.h"
|
||
#include "error.h"
|
||
#include "quote.h"
|
||
#include "xstrndup.h"
|
||
|
||
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
|
||
+ installation; work around this configuration error. */
|
||
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
|
||
+# define MB_LEN_MAX 16
|
||
+#endif
|
||
+
|
||
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
|
||
+#if HAVE_MBRTOWC && defined mbstate_t
|
||
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
|
||
+#endif
|
||
+
|
||
/* The official name of this program (e.g., no `g' prefix). */
|
||
#define PROGRAM_NAME "unexpand"
|
||
|
||
@@ -102,6 +119,208 @@ static struct option const longopts[] =
|
||
{NULL, 0, NULL, 0}
|
||
};
|
||
|
||
+static FILE *next_file (FILE *fp);
|
||
+
|
||
+#if HAVE_MBRTOWC
|
||
+static void
|
||
+unexpand_multibyte (void)
|
||
+{
|
||
+ FILE *fp; /* Input stream. */
|
||
+ mbstate_t i_state; /* Current shift state of the input stream. */
|
||
+ mbstate_t i_state_bak; /* Back up the I_STATE. */
|
||
+ mbstate_t o_state; /* Current shift state of the output stream. */
|
||
+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
|
||
+ char *bufpos; /* Next read position of BUF. */
|
||
+ size_t buflen = 0; /* The length of the byte sequence in buf. */
|
||
+ wint_t wc; /* A gotten wide character. */
|
||
+ size_t mblength; /* The byte size of a multibyte character
|
||
+ which shows as same character as WC. */
|
||
+
|
||
+ /* Index in `tab_list' of next tabstop: */
|
||
+ int tab_index = 0; /* For calculating width of pending tabs. */
|
||
+ int print_tab_index = 0; /* For printing as many tabs as possible. */
|
||
+ unsigned int column = 0; /* Column on screen of next char. */
|
||
+ int next_tab_column; /* Column the next tab stop is on. */
|
||
+ int convert = 1; /* If nonzero, perform translations. */
|
||
+ unsigned int pending = 0; /* Pending columns of blanks. */
|
||
+
|
||
+ fp = next_file ((FILE *) NULL);
|
||
+ if (fp == NULL)
|
||
+ return;
|
||
+
|
||
+ memset (&o_state, '\0', sizeof(mbstate_t));
|
||
+ memset (&i_state, '\0', sizeof(mbstate_t));
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
|
||
+ {
|
||
+ memmove (buf, bufpos, buflen);
|
||
+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
|
||
+ bufpos = buf;
|
||
+ }
|
||
+
|
||
+ /* Get a wide character. */
|
||
+ if (buflen < 1)
|
||
+ {
|
||
+ mblength = 1;
|
||
+ wc = WEOF;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ i_state_bak = i_state;
|
||
+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
|
||
+ }
|
||
+
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ i_state = i_state_bak;
|
||
+ wc = L'\0';
|
||
+ }
|
||
+
|
||
+ if (wc == L' ' && convert && column < INT_MAX)
|
||
+ {
|
||
+ ++pending;
|
||
+ ++column;
|
||
+ }
|
||
+ else if (wc == L'\t' && convert)
|
||
+ {
|
||
+ if (tab_size == 0)
|
||
+ {
|
||
+ /* Do not let tab_index == first_free_tab;
|
||
+ stop when it is 1 less. */
|
||
+ while (tab_index < first_free_tab - 1
|
||
+ && column >= tab_list[tab_index])
|
||
+ tab_index++;
|
||
+ next_tab_column = tab_list[tab_index];
|
||
+ if (tab_index < first_free_tab - 1)
|
||
+ tab_index++;
|
||
+ if (column >= next_tab_column)
|
||
+ {
|
||
+ convert = 0; /* Ran out of tab stops. */
|
||
+ goto flush_pend_mb;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ next_tab_column = column + tab_size - column % tab_size;
|
||
+ }
|
||
+ pending += next_tab_column - column;
|
||
+ column = next_tab_column;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+flush_pend_mb:
|
||
+ /* Flush pending spaces. Print as many tabs as possible,
|
||
+ then print the rest as spaces. */
|
||
+ if (pending == 1)
|
||
+ {
|
||
+ putchar (' ');
|
||
+ pending = 0;
|
||
+ }
|
||
+ column -= pending;
|
||
+ while (pending > 0)
|
||
+ {
|
||
+ if (tab_size == 0)
|
||
+ {
|
||
+ /* Do not let print_tab_index == first_free_tab;
|
||
+ stop when it is 1 less. */
|
||
+ while (print_tab_index < first_free_tab - 1
|
||
+ && column >= tab_list[print_tab_index])
|
||
+ print_tab_index++;
|
||
+ next_tab_column = tab_list[print_tab_index];
|
||
+ if (print_tab_index < first_free_tab - 1)
|
||
+ print_tab_index++;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ next_tab_column =
|
||
+ column + tab_size - column % tab_size;
|
||
+ }
|
||
+ if (next_tab_column - column <= pending)
|
||
+ {
|
||
+ putchar ('\t');
|
||
+ pending -= next_tab_column - column;
|
||
+ column = next_tab_column;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ --print_tab_index;
|
||
+ column += pending;
|
||
+ while (pending != 0)
|
||
+ {
|
||
+ putchar (' ');
|
||
+ pending--;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (wc == WEOF)
|
||
+ {
|
||
+ fp = next_file (fp);
|
||
+ if (fp == NULL)
|
||
+ break; /* No more files. */
|
||
+ else
|
||
+ {
|
||
+ memset (&i_state, '\0', sizeof(mbstate_t));
|
||
+ continue;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
|
||
+ {
|
||
+ if (convert)
|
||
+ {
|
||
+ ++column;
|
||
+ if (convert_entire_line == 0)
|
||
+ convert = 0;
|
||
+ }
|
||
+ mblength = 1;
|
||
+ putchar (buf[0]);
|
||
+ }
|
||
+ else if (mblength == 0)
|
||
+ {
|
||
+ if (convert && convert_entire_line == 0)
|
||
+ convert = 0;
|
||
+ mblength = 1;
|
||
+ putchar ('\0');
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ if (convert)
|
||
+ {
|
||
+ if (wc == L'\b')
|
||
+ {
|
||
+ if (column > 0)
|
||
+ --column;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ int width; /* The width of WC. */
|
||
+
|
||
+ width = wcwidth (wc);
|
||
+ column += (width > 0) ? width : 0;
|
||
+ if (convert_entire_line == 0)
|
||
+ convert = 0;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (wc == L'\n')
|
||
+ {
|
||
+ tab_index = print_tab_index = 0;
|
||
+ column = pending = 0;
|
||
+ convert = 1;
|
||
+ }
|
||
+ fwrite (bufpos, sizeof(char), mblength, stdout);
|
||
+ }
|
||
+ }
|
||
+ buflen -= mblength;
|
||
+ bufpos += mblength;
|
||
+ }
|
||
+}
|
||
+#endif
|
||
+
|
||
+
|
||
void
|
||
usage (int status)
|
||
{
|
||
@@ -523,7 +742,12 @@ main (int argc, char **argv)
|
||
|
||
file_list = (optind < argc ? &argv[optind] : stdin_argv);
|
||
|
||
- unexpand ();
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ unexpand_multibyte ();
|
||
+ else
|
||
+#endif
|
||
+ unexpand ();
|
||
|
||
if (have_read_stdin && fclose (stdin) != 0)
|
||
error (EXIT_FAILURE, errno, "-");
|
||
diff -urNp coreutils-8.0-orig/src/unexpand.c.orig coreutils-8.0/src/unexpand.c.orig
|
||
--- coreutils-8.0-orig/src/unexpand.c.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/src/unexpand.c.orig 2009-09-29 15:27:54.000000000 +0200
|
||
@@ -0,0 +1,532 @@
|
||
+/* unexpand - convert blanks to tabs
|
||
+ Copyright (C) 89, 91, 1995-2006, 2008-2009 Free Software Foundation, Inc.
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation, either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||
+
|
||
+/* By default, convert only maximal strings of initial blanks and tabs
|
||
+ into tabs.
|
||
+ Preserves backspace characters in the output; they decrement the
|
||
+ column count for tab calculations.
|
||
+ The default action is equivalent to -8.
|
||
+
|
||
+ Options:
|
||
+ --tabs=tab1[,tab2[,...]]
|
||
+ -t tab1[,tab2[,...]]
|
||
+ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
|
||
+ columns apart instead of the default 8. Otherwise,
|
||
+ set the tabs at columns tab1, tab2, etc. (numbered from
|
||
+ 0); preserve any blanks beyond the tab stops given.
|
||
+ --all
|
||
+ -a Use tabs wherever they would replace 2 or more blanks,
|
||
+ not just at the beginnings of lines.
|
||
+
|
||
+ David MacKenzie <djm@gnu.ai.mit.edu> */
|
||
+
|
||
+#include <config.h>
|
||
+
|
||
+#include <stdio.h>
|
||
+#include <getopt.h>
|
||
+#include <sys/types.h>
|
||
+#include "system.h"
|
||
+#include "error.h"
|
||
+#include "quote.h"
|
||
+#include "xstrndup.h"
|
||
+
|
||
+/* The official name of this program (e.g., no `g' prefix). */
|
||
+#define PROGRAM_NAME "unexpand"
|
||
+
|
||
+#define AUTHORS proper_name ("David MacKenzie")
|
||
+
|
||
+/* If true, convert blanks even after nonblank characters have been
|
||
+ read on the line. */
|
||
+static bool convert_entire_line;
|
||
+
|
||
+/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
|
||
+static size_t tab_size;
|
||
+
|
||
+/* The maximum distance between tab stops. */
|
||
+static size_t max_column_width;
|
||
+
|
||
+/* Array of the explicit column numbers of the tab stops;
|
||
+ after `tab_list' is exhausted, the rest of the line is printed
|
||
+ unchanged. The first column is column 0. */
|
||
+static uintmax_t *tab_list;
|
||
+
|
||
+/* The number of allocated entries in `tab_list'. */
|
||
+static size_t n_tabs_allocated;
|
||
+
|
||
+/* The index of the first invalid element of `tab_list',
|
||
+ where the next element can be added. */
|
||
+static size_t first_free_tab;
|
||
+
|
||
+/* Null-terminated array of input filenames. */
|
||
+static char **file_list;
|
||
+
|
||
+/* Default for `file_list' if no files are given on the command line. */
|
||
+static char *stdin_argv[] =
|
||
+{
|
||
+ (char *) "-", NULL
|
||
+};
|
||
+
|
||
+/* True if we have ever read standard input. */
|
||
+static bool have_read_stdin;
|
||
+
|
||
+/* The desired exit status. */
|
||
+static int exit_status;
|
||
+
|
||
+/* For long options that have no equivalent short option, use a
|
||
+ non-character as a pseudo short option, starting with CHAR_MAX + 1. */
|
||
+enum
|
||
+{
|
||
+ CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
|
||
+};
|
||
+
|
||
+static struct option const longopts[] =
|
||
+{
|
||
+ {"tabs", required_argument, NULL, 't'},
|
||
+ {"all", no_argument, NULL, 'a'},
|
||
+ {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
|
||
+ {GETOPT_HELP_OPTION_DECL},
|
||
+ {GETOPT_VERSION_OPTION_DECL},
|
||
+ {NULL, 0, NULL, 0}
|
||
+};
|
||
+
|
||
+void
|
||
+usage (int status)
|
||
+{
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
|
||
+ program_name);
|
||
+ else
|
||
+ {
|
||
+ printf (_("\
|
||
+Usage: %s [OPTION]... [FILE]...\n\
|
||
+"),
|
||
+ program_name);
|
||
+ fputs (_("\
|
||
+Convert blanks in each FILE to tabs, writing to standard output.\n\
|
||
+With no FILE, or when FILE is -, read standard input.\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Mandatory arguments to long options are mandatory for short options too.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -a, --all convert all blanks, instead of just initial blanks\n\
|
||
+ --first-only convert only leading sequences of blanks (overrides -a)\n\
|
||
+ -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
|
||
+ -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
|
||
+"), stdout);
|
||
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||
+ emit_ancillary_info ();
|
||
+ }
|
||
+ exit (status);
|
||
+}
|
||
+
|
||
+/* Add tab stop TABVAL to the end of `tab_list'. */
|
||
+
|
||
+static void
|
||
+add_tab_stop (uintmax_t tabval)
|
||
+{
|
||
+ uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
|
||
+ uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
|
||
+
|
||
+ if (first_free_tab == n_tabs_allocated)
|
||
+ tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
|
||
+ tab_list[first_free_tab++] = tabval;
|
||
+
|
||
+ if (max_column_width < column_width)
|
||
+ {
|
||
+ if (SIZE_MAX < column_width)
|
||
+ error (EXIT_FAILURE, 0, _("tabs are too far apart"));
|
||
+ max_column_width = column_width;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Add the comma or blank separated list of tab stops STOPS
|
||
+ to the list of tab stops. */
|
||
+
|
||
+static void
|
||
+parse_tab_stops (char const *stops)
|
||
+{
|
||
+ bool have_tabval = false;
|
||
+ uintmax_t tabval IF_LINT (= 0);
|
||
+ char const *num_start IF_LINT (= NULL);
|
||
+ bool ok = true;
|
||
+
|
||
+ for (; *stops; stops++)
|
||
+ {
|
||
+ if (*stops == ',' || isblank (to_uchar (*stops)))
|
||
+ {
|
||
+ if (have_tabval)
|
||
+ add_tab_stop (tabval);
|
||
+ have_tabval = false;
|
||
+ }
|
||
+ else if (ISDIGIT (*stops))
|
||
+ {
|
||
+ if (!have_tabval)
|
||
+ {
|
||
+ tabval = 0;
|
||
+ have_tabval = true;
|
||
+ num_start = stops;
|
||
+ }
|
||
+
|
||
+ /* Detect overflow. */
|
||
+ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
|
||
+ {
|
||
+ size_t len = strspn (num_start, "0123456789");
|
||
+ char *bad_num = xstrndup (num_start, len);
|
||
+ error (0, 0, _("tab stop is too large %s"), quote (bad_num));
|
||
+ free (bad_num);
|
||
+ ok = false;
|
||
+ stops = num_start + len - 1;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ error (0, 0, _("tab size contains invalid character(s): %s"),
|
||
+ quote (stops));
|
||
+ ok = false;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (!ok)
|
||
+ exit (EXIT_FAILURE);
|
||
+
|
||
+ if (have_tabval)
|
||
+ add_tab_stop (tabval);
|
||
+}
|
||
+
|
||
+/* Check that the list of tab stops TABS, with ENTRIES entries,
|
||
+ contains only nonzero, ascending values. */
|
||
+
|
||
+static void
|
||
+validate_tab_stops (uintmax_t const *tabs, size_t entries)
|
||
+{
|
||
+ uintmax_t prev_tab = 0;
|
||
+ size_t i;
|
||
+
|
||
+ for (i = 0; i < entries; i++)
|
||
+ {
|
||
+ if (tabs[i] == 0)
|
||
+ error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
|
||
+ if (tabs[i] <= prev_tab)
|
||
+ error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
|
||
+ prev_tab = tabs[i];
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Close the old stream pointer FP if it is non-NULL,
|
||
+ and return a new one opened to read the next input file.
|
||
+ Open a filename of `-' as the standard input.
|
||
+ Return NULL if there are no more input files. */
|
||
+
|
||
+static FILE *
|
||
+next_file (FILE *fp)
|
||
+{
|
||
+ static char *prev_file;
|
||
+ char *file;
|
||
+
|
||
+ if (fp)
|
||
+ {
|
||
+ if (ferror (fp))
|
||
+ {
|
||
+ error (0, errno, "%s", prev_file);
|
||
+ exit_status = EXIT_FAILURE;
|
||
+ }
|
||
+ if (STREQ (prev_file, "-"))
|
||
+ clearerr (fp); /* Also clear EOF. */
|
||
+ else if (fclose (fp) != 0)
|
||
+ {
|
||
+ error (0, errno, "%s", prev_file);
|
||
+ exit_status = EXIT_FAILURE;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ while ((file = *file_list++) != NULL)
|
||
+ {
|
||
+ if (STREQ (file, "-"))
|
||
+ {
|
||
+ have_read_stdin = true;
|
||
+ prev_file = file;
|
||
+ return stdin;
|
||
+ }
|
||
+ fp = fopen (file, "r");
|
||
+ if (fp)
|
||
+ {
|
||
+ prev_file = file;
|
||
+ return fp;
|
||
+ }
|
||
+ error (0, errno, "%s", file);
|
||
+ exit_status = EXIT_FAILURE;
|
||
+ }
|
||
+ return NULL;
|
||
+}
|
||
+
|
||
+/* Change blanks to tabs, writing to stdout.
|
||
+ Read each file in `file_list', in order. */
|
||
+
|
||
+static void
|
||
+unexpand (void)
|
||
+{
|
||
+ /* Input stream. */
|
||
+ FILE *fp = next_file (NULL);
|
||
+
|
||
+ /* The array of pending blanks. In non-POSIX locales, blanks can
|
||
+ include characters other than spaces, so the blanks must be
|
||
+ stored, not merely counted. */
|
||
+ char *pending_blank;
|
||
+
|
||
+ if (!fp)
|
||
+ return;
|
||
+
|
||
+ /* The worst case is a non-blank character, then one blank, then a
|
||
+ tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
|
||
+ allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
|
||
+ pending_blank = xmalloc (max_column_width);
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ /* Input character, or EOF. */
|
||
+ int c;
|
||
+
|
||
+ /* If true, perform translations. */
|
||
+ bool convert = true;
|
||
+
|
||
+
|
||
+ /* The following variables have valid values only when CONVERT
|
||
+ is true: */
|
||
+
|
||
+ /* Column of next input character. */
|
||
+ uintmax_t column = 0;
|
||
+
|
||
+ /* Column the next input tab stop is on. */
|
||
+ uintmax_t next_tab_column = 0;
|
||
+
|
||
+ /* Index in TAB_LIST of next tab stop to examine. */
|
||
+ size_t tab_index = 0;
|
||
+
|
||
+ /* If true, the first pending blank came just before a tab stop. */
|
||
+ bool one_blank_before_tab_stop = false;
|
||
+
|
||
+ /* If true, the previous input character was a blank. This is
|
||
+ initially true, since initial strings of blanks are treated
|
||
+ as if the line was preceded by a blank. */
|
||
+ bool prev_blank = true;
|
||
+
|
||
+ /* Number of pending columns of blanks. */
|
||
+ size_t pending = 0;
|
||
+
|
||
+
|
||
+ /* Convert a line of text. */
|
||
+
|
||
+ do
|
||
+ {
|
||
+ while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
|
||
+ continue;
|
||
+
|
||
+ if (convert)
|
||
+ {
|
||
+ bool blank = !! isblank (c);
|
||
+
|
||
+ if (blank)
|
||
+ {
|
||
+ if (next_tab_column <= column)
|
||
+ {
|
||
+ if (tab_size)
|
||
+ next_tab_column =
|
||
+ column + (tab_size - column % tab_size);
|
||
+ else
|
||
+ for (;;)
|
||
+ if (tab_index == first_free_tab)
|
||
+ {
|
||
+ convert = false;
|
||
+ break;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ uintmax_t tab = tab_list[tab_index++];
|
||
+ if (column < tab)
|
||
+ {
|
||
+ next_tab_column = tab;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (convert)
|
||
+ {
|
||
+ if (next_tab_column < column)
|
||
+ error (EXIT_FAILURE, 0, _("input line is too long"));
|
||
+
|
||
+ if (c == '\t')
|
||
+ {
|
||
+ column = next_tab_column;
|
||
+
|
||
+ /* Discard pending blanks, unless it was a single
|
||
+ blank just before the previous tab stop. */
|
||
+ if (! (pending == 1 && one_blank_before_tab_stop))
|
||
+ {
|
||
+ pending = 0;
|
||
+ one_blank_before_tab_stop = false;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ column++;
|
||
+
|
||
+ if (! (prev_blank && column == next_tab_column))
|
||
+ {
|
||
+ /* It is not yet known whether the pending blanks
|
||
+ will be replaced by tabs. */
|
||
+ if (column == next_tab_column)
|
||
+ one_blank_before_tab_stop = true;
|
||
+ pending_blank[pending++] = c;
|
||
+ prev_blank = true;
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ /* Replace the pending blanks by a tab or two. */
|
||
+ pending_blank[0] = c = '\t';
|
||
+ pending = one_blank_before_tab_stop;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ else if (c == '\b')
|
||
+ {
|
||
+ /* Go back one column, and force recalculation of the
|
||
+ next tab stop. */
|
||
+ column -= !!column;
|
||
+ next_tab_column = column;
|
||
+ tab_index -= !!tab_index;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ column++;
|
||
+ if (!column)
|
||
+ error (EXIT_FAILURE, 0, _("input line is too long"));
|
||
+ }
|
||
+
|
||
+ if (pending)
|
||
+ {
|
||
+ if (fwrite (pending_blank, 1, pending, stdout) != pending)
|
||
+ error (EXIT_FAILURE, errno, _("write error"));
|
||
+ pending = 0;
|
||
+ one_blank_before_tab_stop = false;
|
||
+ }
|
||
+
|
||
+ prev_blank = blank;
|
||
+ convert &= convert_entire_line || blank;
|
||
+ }
|
||
+
|
||
+ if (c < 0)
|
||
+ {
|
||
+ free (pending_blank);
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ if (putchar (c) < 0)
|
||
+ error (EXIT_FAILURE, errno, _("write error"));
|
||
+ }
|
||
+ while (c != '\n');
|
||
+ }
|
||
+}
|
||
+
|
||
+int
|
||
+main (int argc, char **argv)
|
||
+{
|
||
+ bool have_tabval = false;
|
||
+ uintmax_t tabval IF_LINT (= 0);
|
||
+ int c;
|
||
+
|
||
+ /* If true, cancel the effect of any -a (explicit or implicit in -t),
|
||
+ so that only leading blanks will be considered. */
|
||
+ bool convert_first_only = false;
|
||
+
|
||
+ initialize_main (&argc, &argv);
|
||
+ set_program_name (argv[0]);
|
||
+ setlocale (LC_ALL, "");
|
||
+ bindtextdomain (PACKAGE, LOCALEDIR);
|
||
+ textdomain (PACKAGE);
|
||
+
|
||
+ atexit (close_stdout);
|
||
+
|
||
+ have_read_stdin = false;
|
||
+ exit_status = EXIT_SUCCESS;
|
||
+ convert_entire_line = false;
|
||
+ tab_list = NULL;
|
||
+ first_free_tab = 0;
|
||
+
|
||
+ while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
|
||
+ != -1)
|
||
+ {
|
||
+ switch (c)
|
||
+ {
|
||
+ case '?':
|
||
+ usage (EXIT_FAILURE);
|
||
+ case 'a':
|
||
+ convert_entire_line = true;
|
||
+ break;
|
||
+ case 't':
|
||
+ convert_entire_line = true;
|
||
+ parse_tab_stops (optarg);
|
||
+ break;
|
||
+ case CONVERT_FIRST_ONLY_OPTION:
|
||
+ convert_first_only = true;
|
||
+ break;
|
||
+ case ',':
|
||
+ if (have_tabval)
|
||
+ add_tab_stop (tabval);
|
||
+ have_tabval = false;
|
||
+ break;
|
||
+ case_GETOPT_HELP_CHAR;
|
||
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||
+ default:
|
||
+ if (!have_tabval)
|
||
+ {
|
||
+ tabval = 0;
|
||
+ have_tabval = true;
|
||
+ }
|
||
+ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
|
||
+ error (EXIT_FAILURE, 0, _("tab stop value is too large"));
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (convert_first_only)
|
||
+ convert_entire_line = false;
|
||
+
|
||
+ if (have_tabval)
|
||
+ add_tab_stop (tabval);
|
||
+
|
||
+ validate_tab_stops (tab_list, first_free_tab);
|
||
+
|
||
+ if (first_free_tab == 0)
|
||
+ tab_size = max_column_width = 8;
|
||
+ else if (first_free_tab == 1)
|
||
+ tab_size = tab_list[0];
|
||
+ else
|
||
+ tab_size = 0;
|
||
+
|
||
+ file_list = (optind < argc ? &argv[optind] : stdin_argv);
|
||
+
|
||
+ unexpand ();
|
||
+
|
||
+ if (have_read_stdin && fclose (stdin) != 0)
|
||
+ error (EXIT_FAILURE, errno, "-");
|
||
+
|
||
+ exit (exit_status);
|
||
+}
|
||
diff -urNp coreutils-8.0-orig/src/uniq.c coreutils-8.0/src/uniq.c
|
||
--- coreutils-8.0-orig/src/uniq.c 2009-09-23 10:25:44.000000000 +0200
|
||
+++ coreutils-8.0/src/uniq.c 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -22,6 +22,16 @@
|
||
#include <getopt.h>
|
||
#include <sys/types.h>
|
||
|
||
+/* Get mbstate_t, mbrtowc(). */
|
||
+#if HAVE_WCHAR_H
|
||
+# include <wchar.h>
|
||
+#endif
|
||
+
|
||
+/* Get isw* functions. */
|
||
+#if HAVE_WCTYPE_H
|
||
+# include <wctype.h>
|
||
+#endif
|
||
+
|
||
#include "system.h"
|
||
#include "argmatch.h"
|
||
#include "linebuffer.h"
|
||
@@ -31,7 +41,19 @@
|
||
#include "quote.h"
|
||
#include "xmemcoll.h"
|
||
#include "xstrtol.h"
|
||
-#include "memcasecmp.h"
|
||
+#include "xmemcoll.h"
|
||
+
|
||
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
|
||
+ installation; work around this configuration error. */
|
||
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
|
||
+# define MB_LEN_MAX 16
|
||
+#endif
|
||
+
|
||
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
|
||
+#if HAVE_MBRTOWC && defined mbstate_t
|
||
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
|
||
+#endif
|
||
+
|
||
|
||
/* The official name of this program (e.g., no `g' prefix). */
|
||
#define PROGRAM_NAME "uniq"
|
||
@@ -107,6 +129,10 @@ static enum delimit_method const delimit
|
||
/* Select whether/how to delimit groups of duplicate lines. */
|
||
static enum delimit_method delimit_groups;
|
||
|
||
+/* Function pointers. */
|
||
+static char *
|
||
+(*find_field) (struct linebuffer *line);
|
||
+
|
||
static struct option const longopts[] =
|
||
{
|
||
{"count", no_argument, NULL, 'c'},
|
||
@@ -206,7 +232,7 @@ size_opt (char const *opt, char const *m
|
||
return a pointer to the beginning of the line's field to be compared. */
|
||
|
||
static char *
|
||
-find_field (struct linebuffer const *line)
|
||
+find_field_uni (struct linebuffer *line)
|
||
{
|
||
size_t count;
|
||
char const *lp = line->buffer;
|
||
@@ -227,6 +253,83 @@ find_field (struct linebuffer const *lin
|
||
return line->buffer + i;
|
||
}
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+
|
||
+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
|
||
+ do \
|
||
+ { \
|
||
+ mbstate_t state_bak; \
|
||
+ \
|
||
+ CONVFAIL = 0; \
|
||
+ state_bak = *STATEP; \
|
||
+ \
|
||
+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
|
||
+ \
|
||
+ switch (MBLENGTH) \
|
||
+ { \
|
||
+ case (size_t)-2: \
|
||
+ case (size_t)-1: \
|
||
+ *STATEP = state_bak; \
|
||
+ CONVFAIL++; \
|
||
+ /* Fall through */ \
|
||
+ case 0: \
|
||
+ MBLENGTH = 1; \
|
||
+ } \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+static char *
|
||
+find_field_multi (struct linebuffer *line)
|
||
+{
|
||
+ size_t count;
|
||
+ char *lp = line->buffer;
|
||
+ size_t size = line->length - 1;
|
||
+ size_t pos;
|
||
+ size_t mblength;
|
||
+ wchar_t wc;
|
||
+ mbstate_t *statep;
|
||
+ int convfail;
|
||
+
|
||
+ pos = 0;
|
||
+ statep = &(line->state);
|
||
+
|
||
+ /* skip fields. */
|
||
+ for (count = 0; count < skip_fields && pos < size; count++)
|
||
+ {
|
||
+ while (pos < size)
|
||
+ {
|
||
+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
|
||
+
|
||
+ if (convfail || !iswblank (wc))
|
||
+ {
|
||
+ pos += mblength;
|
||
+ break;
|
||
+ }
|
||
+ pos += mblength;
|
||
+ }
|
||
+
|
||
+ while (pos < size)
|
||
+ {
|
||
+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
|
||
+
|
||
+ if (!convfail && iswblank (wc))
|
||
+ break;
|
||
+
|
||
+ pos += mblength;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* skip fields. */
|
||
+ for (count = 0; count < skip_chars && pos < size; count++)
|
||
+ {
|
||
+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
|
||
+ pos += mblength;
|
||
+ }
|
||
+
|
||
+ return lp + pos;
|
||
+}
|
||
+#endif
|
||
+
|
||
/* Return false if two strings OLD and NEW match, true if not.
|
||
OLD and NEW point not to the beginnings of the lines
|
||
but rather to the beginnings of the fields to compare.
|
||
@@ -235,6 +338,8 @@ find_field (struct linebuffer const *lin
|
||
static bool
|
||
different (char *old, char *new, size_t oldlen, size_t newlen)
|
||
{
|
||
+ char *copy_old, *copy_new;
|
||
+
|
||
if (check_chars < oldlen)
|
||
oldlen = check_chars;
|
||
if (check_chars < newlen)
|
||
@@ -242,14 +347,92 @@ different (char *old, char *new, size_t
|
||
|
||
if (ignore_case)
|
||
{
|
||
- /* FIXME: This should invoke strcoll somehow. */
|
||
- return oldlen != newlen || memcasecmp (old, new, oldlen);
|
||
+ size_t i;
|
||
+
|
||
+ copy_old = alloca (oldlen + 1);
|
||
+ copy_new = alloca (oldlen + 1);
|
||
+
|
||
+ for (i = 0; i < oldlen; i++)
|
||
+ {
|
||
+ copy_old[i] = toupper (old[i]);
|
||
+ copy_new[i] = toupper (new[i]);
|
||
+ }
|
||
}
|
||
- else if (hard_LC_COLLATE)
|
||
- return xmemcoll (old, oldlen, new, newlen) != 0;
|
||
else
|
||
- return oldlen != newlen || memcmp (old, new, oldlen);
|
||
+ {
|
||
+ copy_old = (char *)old;
|
||
+ copy_new = (char *)new;
|
||
+ }
|
||
+
|
||
+ return xmemcoll (copy_old, oldlen, copy_new, newlen);
|
||
+}
|
||
+
|
||
+#if HAVE_MBRTOWC
|
||
+static int
|
||
+different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
|
||
+{
|
||
+ size_t i, j, chars;
|
||
+ const char *str[2];
|
||
+ char *copy[2];
|
||
+ size_t len[2];
|
||
+ mbstate_t state[2];
|
||
+ size_t mblength;
|
||
+ wchar_t wc, uwc;
|
||
+ mbstate_t state_bak;
|
||
+
|
||
+ str[0] = old;
|
||
+ str[1] = new;
|
||
+ len[0] = oldlen;
|
||
+ len[1] = newlen;
|
||
+ state[0] = oldstate;
|
||
+ state[1] = newstate;
|
||
+
|
||
+ for (i = 0; i < 2; i++)
|
||
+ {
|
||
+ copy[i] = alloca (len[i] + 1);
|
||
+
|
||
+ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
|
||
+ {
|
||
+ state_bak = state[i];
|
||
+ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
|
||
+
|
||
+ switch (mblength)
|
||
+ {
|
||
+ case (size_t)-1:
|
||
+ case (size_t)-2:
|
||
+ state[i] = state_bak;
|
||
+ /* Fall through */
|
||
+ case 0:
|
||
+ mblength = 1;
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ if (ignore_case)
|
||
+ {
|
||
+ uwc = towupper (wc);
|
||
+
|
||
+ if (uwc != wc)
|
||
+ {
|
||
+ mbstate_t state_wc;
|
||
+
|
||
+ memset (&state_wc, '\0', sizeof(mbstate_t));
|
||
+ wcrtomb (copy[i] + j, uwc, &state_wc);
|
||
+ }
|
||
+ else
|
||
+ memcpy (copy[i] + j, str[i] + j, mblength);
|
||
+ }
|
||
+ else
|
||
+ memcpy (copy[i] + j, str[i] + j, mblength);
|
||
+ }
|
||
+ j += mblength;
|
||
+ }
|
||
+ copy[i][j] = '\0';
|
||
+ len[i] = j;
|
||
+ }
|
||
+
|
||
+ return xmemcoll (copy[0], len[0], copy[1], len[1]);
|
||
}
|
||
+#endif
|
||
|
||
/* Output the line in linebuffer LINE to standard output
|
||
provided that the switches say it should be output.
|
||
@@ -303,15 +486,43 @@ check_file (const char *infile, const ch
|
||
{
|
||
char *prevfield IF_LINT (= NULL);
|
||
size_t prevlen IF_LINT (= 0);
|
||
+#if HAVE_MBRTOWC
|
||
+ mbstate_t prevstate;
|
||
+
|
||
+ memset (&prevstate, '\0', sizeof (mbstate_t));
|
||
+#endif
|
||
|
||
while (!feof (stdin))
|
||
{
|
||
char *thisfield;
|
||
size_t thislen;
|
||
+#if HAVE_MBRTOWC
|
||
+ mbstate_t thisstate;
|
||
+#endif
|
||
+
|
||
if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
|
||
break;
|
||
thisfield = find_field (thisline);
|
||
thislen = thisline->length - 1 - (thisfield - thisline->buffer);
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ thisstate = thisline->state;
|
||
+
|
||
+ if (prevline->length == 0 || different_multi
|
||
+ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
|
||
+ {
|
||
+ fwrite (thisline->buffer, sizeof (char),
|
||
+ thisline->length, stdout);
|
||
+
|
||
+ SWAP_LINES (prevline, thisline);
|
||
+ prevfield = thisfield;
|
||
+ prevlen = thislen;
|
||
+ prevstate = thisstate;
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
if (prevline->length == 0
|
||
|| different (thisfield, prevfield, thislen, prevlen))
|
||
{
|
||
@@ -330,17 +541,26 @@ check_file (const char *infile, const ch
|
||
size_t prevlen;
|
||
uintmax_t match_count = 0;
|
||
bool first_delimiter = true;
|
||
+#if HAVE_MBRTOWC
|
||
+ mbstate_t prevstate;
|
||
+#endif
|
||
|
||
if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
|
||
goto closefiles;
|
||
prevfield = find_field (prevline);
|
||
prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
|
||
+#if HAVE_MBRTOWC
|
||
+ prevstate = prevline->state;
|
||
+#endif
|
||
|
||
while (!feof (stdin))
|
||
{
|
||
bool match;
|
||
char *thisfield;
|
||
size_t thislen;
|
||
+#if HAVE_MBRTOWC
|
||
+ mbstate_t thisstate;
|
||
+#endif
|
||
if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
|
||
{
|
||
if (ferror (stdin))
|
||
@@ -349,6 +569,15 @@ check_file (const char *infile, const ch
|
||
}
|
||
thisfield = find_field (thisline);
|
||
thislen = thisline->length - 1 - (thisfield - thisline->buffer);
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ thisstate = thisline->state;
|
||
+ match = !different_multi (thisfield, prevfield,
|
||
+ thislen, prevlen, thisstate, prevstate);
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
match = !different (thisfield, prevfield, thislen, prevlen);
|
||
match_count += match;
|
||
|
||
@@ -381,6 +610,9 @@ check_file (const char *infile, const ch
|
||
SWAP_LINES (prevline, thisline);
|
||
prevfield = thisfield;
|
||
prevlen = thislen;
|
||
+#if HAVE_MBRTOWC
|
||
+ prevstate = thisstate;
|
||
+#endif
|
||
if (!match)
|
||
match_count = 0;
|
||
}
|
||
@@ -426,6 +658,19 @@ main (int argc, char **argv)
|
||
|
||
atexit (close_stdout);
|
||
|
||
+#if HAVE_MBRTOWC
|
||
+ if (MB_CUR_MAX > 1)
|
||
+ {
|
||
+ find_field = find_field_multi;
|
||
+ }
|
||
+ else
|
||
+#endif
|
||
+ {
|
||
+ find_field = find_field_uni;
|
||
+ }
|
||
+
|
||
+
|
||
+
|
||
skip_chars = 0;
|
||
skip_fields = 0;
|
||
check_chars = SIZE_MAX;
|
||
diff -urNp coreutils-8.0-orig/src/uniq.c.orig coreutils-8.0/src/uniq.c.orig
|
||
--- coreutils-8.0-orig/src/uniq.c.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/src/uniq.c.orig 2009-09-23 10:25:44.000000000 +0200
|
||
@@ -0,0 +1,565 @@
|
||
+/* uniq -- remove duplicate lines from a sorted file
|
||
+ Copyright (C) 86, 91, 1995-2009 Free Software Foundation, Inc.
|
||
+
|
||
+ This program is free software: you can redistribute it and/or modify
|
||
+ it under the terms of the GNU General Public License as published by
|
||
+ the Free Software Foundation, either version 3 of the License, or
|
||
+ (at your option) any later version.
|
||
+
|
||
+ This program is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
+ GNU General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU General Public License
|
||
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||
+
|
||
+/* Written by Richard M. Stallman and David MacKenzie. */
|
||
+
|
||
+#include <config.h>
|
||
+
|
||
+#include <stdio.h>
|
||
+#include <getopt.h>
|
||
+#include <sys/types.h>
|
||
+
|
||
+#include "system.h"
|
||
+#include "argmatch.h"
|
||
+#include "linebuffer.h"
|
||
+#include "error.h"
|
||
+#include "hard-locale.h"
|
||
+#include "posixver.h"
|
||
+#include "quote.h"
|
||
+#include "xmemcoll.h"
|
||
+#include "xstrtol.h"
|
||
+#include "memcasecmp.h"
|
||
+
|
||
+/* The official name of this program (e.g., no `g' prefix). */
|
||
+#define PROGRAM_NAME "uniq"
|
||
+
|
||
+#define AUTHORS \
|
||
+ proper_name ("Richard M. Stallman"), \
|
||
+ proper_name ("David MacKenzie")
|
||
+
|
||
+#define SWAP_LINES(A, B) \
|
||
+ do \
|
||
+ { \
|
||
+ struct linebuffer *_tmp; \
|
||
+ _tmp = (A); \
|
||
+ (A) = (B); \
|
||
+ (B) = _tmp; \
|
||
+ } \
|
||
+ while (0)
|
||
+
|
||
+/* True if the LC_COLLATE locale is hard. */
|
||
+static bool hard_LC_COLLATE;
|
||
+
|
||
+/* Number of fields to skip on each line when doing comparisons. */
|
||
+static size_t skip_fields;
|
||
+
|
||
+/* Number of chars to skip after skipping any fields. */
|
||
+static size_t skip_chars;
|
||
+
|
||
+/* Number of chars to compare. */
|
||
+static size_t check_chars;
|
||
+
|
||
+enum countmode
|
||
+{
|
||
+ count_occurrences, /* -c Print count before output lines. */
|
||
+ count_none /* Default. Do not print counts. */
|
||
+};
|
||
+
|
||
+/* Whether and how to precede the output lines with a count of the number of
|
||
+ times they occurred in the input. */
|
||
+static enum countmode countmode;
|
||
+
|
||
+/* Which lines to output: unique lines, the first of a group of
|
||
+ repeated lines, and the second and subsequented of a group of
|
||
+ repeated lines. */
|
||
+static bool output_unique;
|
||
+static bool output_first_repeated;
|
||
+static bool output_later_repeated;
|
||
+
|
||
+/* If true, ignore case when comparing. */
|
||
+static bool ignore_case;
|
||
+
|
||
+enum delimit_method
|
||
+{
|
||
+ /* No delimiters output. --all-repeated[=none] */
|
||
+ DM_NONE,
|
||
+
|
||
+ /* Delimiter precedes all groups. --all-repeated=prepend */
|
||
+ DM_PREPEND,
|
||
+
|
||
+ /* Delimit all groups. --all-repeated=separate */
|
||
+ DM_SEPARATE
|
||
+};
|
||
+
|
||
+static char const *const delimit_method_string[] =
|
||
+{
|
||
+ "none", "prepend", "separate", NULL
|
||
+};
|
||
+
|
||
+static enum delimit_method const delimit_method_map[] =
|
||
+{
|
||
+ DM_NONE, DM_PREPEND, DM_SEPARATE
|
||
+};
|
||
+
|
||
+/* Select whether/how to delimit groups of duplicate lines. */
|
||
+static enum delimit_method delimit_groups;
|
||
+
|
||
+static struct option const longopts[] =
|
||
+{
|
||
+ {"count", no_argument, NULL, 'c'},
|
||
+ {"repeated", no_argument, NULL, 'd'},
|
||
+ {"all-repeated", optional_argument, NULL, 'D'},
|
||
+ {"ignore-case", no_argument, NULL, 'i'},
|
||
+ {"unique", no_argument, NULL, 'u'},
|
||
+ {"skip-fields", required_argument, NULL, 'f'},
|
||
+ {"skip-chars", required_argument, NULL, 's'},
|
||
+ {"check-chars", required_argument, NULL, 'w'},
|
||
+ {"zero-terminated", no_argument, NULL, 'z'},
|
||
+ {GETOPT_HELP_OPTION_DECL},
|
||
+ {GETOPT_VERSION_OPTION_DECL},
|
||
+ {NULL, 0, NULL, 0}
|
||
+};
|
||
+
|
||
+void
|
||
+usage (int status)
|
||
+{
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
|
||
+ program_name);
|
||
+ else
|
||
+ {
|
||
+ printf (_("\
|
||
+Usage: %s [OPTION]... [INPUT [OUTPUT]]\n\
|
||
+"),
|
||
+ program_name);
|
||
+ fputs (_("\
|
||
+Filter adjacent matching lines from INPUT (or standard input),\n\
|
||
+writing to OUTPUT (or standard output).\n\
|
||
+\n\
|
||
+With no options, matching lines are merged to the first occurrence.\n\
|
||
+\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+Mandatory arguments to long options are mandatory for short options too.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -c, --count prefix lines by the number of occurrences\n\
|
||
+ -d, --repeated only print duplicate lines\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -D, --all-repeated[=delimit-method] print all duplicate lines\n\
|
||
+ delimit-method={none(default),prepend,separate}\n\
|
||
+ Delimiting is done with blank lines.\n\
|
||
+ -f, --skip-fields=N avoid comparing the first N fields\n\
|
||
+ -i, --ignore-case ignore differences in case when comparing\n\
|
||
+ -s, --skip-chars=N avoid comparing the first N characters\n\
|
||
+ -u, --unique only print unique lines\n\
|
||
+ -z, --zero-terminated end lines with 0 byte, not newline\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+ -w, --check-chars=N compare no more than N characters in lines\n\
|
||
+"), stdout);
|
||
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||
+ fputs (_("\
|
||
+\n\
|
||
+A field is a run of blanks (usually spaces and/or TABs), then non-blank\n\
|
||
+characters. Fields are skipped before chars.\n\
|
||
+"), stdout);
|
||
+ fputs (_("\
|
||
+\n\
|
||
+Note: 'uniq' does not detect repeated lines unless they are adjacent.\n\
|
||
+You may want to sort the input first, or use `sort -u' without `uniq'.\n\
|
||
+Also, comparisons honor the rules specified by `LC_COLLATE'.\n\
|
||
+"), stdout);
|
||
+ emit_ancillary_info ();
|
||
+ }
|
||
+ exit (status);
|
||
+}
|
||
+
|
||
+/* Convert OPT to size_t, reporting an error using MSGID if OPT is
|
||
+ invalid. Silently convert too-large values to SIZE_MAX. */
|
||
+
|
||
+static size_t
|
||
+size_opt (char const *opt, char const *msgid)
|
||
+{
|
||
+ unsigned long int size;
|
||
+ verify (SIZE_MAX <= ULONG_MAX);
|
||
+
|
||
+ switch (xstrtoul (opt, NULL, 10, &size, ""))
|
||
+ {
|
||
+ case LONGINT_OK:
|
||
+ case LONGINT_OVERFLOW:
|
||
+ break;
|
||
+
|
||
+ default:
|
||
+ error (EXIT_FAILURE, 0, "%s: %s", opt, _(msgid));
|
||
+ }
|
||
+
|
||
+ return MIN (size, SIZE_MAX);
|
||
+}
|
||
+
|
||
+/* Given a linebuffer LINE,
|
||
+ return a pointer to the beginning of the line's field to be compared. */
|
||
+
|
||
+static char *
|
||
+find_field (struct linebuffer const *line)
|
||
+{
|
||
+ size_t count;
|
||
+ char const *lp = line->buffer;
|
||
+ size_t size = line->length - 1;
|
||
+ size_t i = 0;
|
||
+
|
||
+ for (count = 0; count < skip_fields; count++)
|
||
+ {
|
||
+ while (i < size && isblank (to_uchar (lp[i])))
|
||
+ i++;
|
||
+ while (i < size && !isblank (to_uchar (lp[i])))
|
||
+ i++;
|
||
+ }
|
||
+
|
||
+ for (count = 0; count < skip_chars && i < size; count++)
|
||
+ i++;
|
||
+
|
||
+ return line->buffer + i;
|
||
+}
|
||
+
|
||
+/* Return false if two strings OLD and NEW match, true if not.
|
||
+ OLD and NEW point not to the beginnings of the lines
|
||
+ but rather to the beginnings of the fields to compare.
|
||
+ OLDLEN and NEWLEN are their lengths. */
|
||
+
|
||
+static bool
|
||
+different (char *old, char *new, size_t oldlen, size_t newlen)
|
||
+{
|
||
+ if (check_chars < oldlen)
|
||
+ oldlen = check_chars;
|
||
+ if (check_chars < newlen)
|
||
+ newlen = check_chars;
|
||
+
|
||
+ if (ignore_case)
|
||
+ {
|
||
+ /* FIXME: This should invoke strcoll somehow. */
|
||
+ return oldlen != newlen || memcasecmp (old, new, oldlen);
|
||
+ }
|
||
+ else if (hard_LC_COLLATE)
|
||
+ return xmemcoll (old, oldlen, new, newlen) != 0;
|
||
+ else
|
||
+ return oldlen != newlen || memcmp (old, new, oldlen);
|
||
+}
|
||
+
|
||
+/* Output the line in linebuffer LINE to standard output
|
||
+ provided that the switches say it should be output.
|
||
+ MATCH is true if the line matches the previous line.
|
||
+ If requested, print the number of times it occurred, as well;
|
||
+ LINECOUNT + 1 is the number of times that the line occurred. */
|
||
+
|
||
+static void
|
||
+writeline (struct linebuffer const *line,
|
||
+ bool match, uintmax_t linecount)
|
||
+{
|
||
+ if (! (linecount == 0 ? output_unique
|
||
+ : !match ? output_first_repeated
|
||
+ : output_later_repeated))
|
||
+ return;
|
||
+
|
||
+ if (countmode == count_occurrences)
|
||
+ printf ("%7" PRIuMAX " ", linecount + 1);
|
||
+
|
||
+ fwrite (line->buffer, sizeof (char), line->length, stdout);
|
||
+}
|
||
+
|
||
+/* Process input file INFILE with output to OUTFILE.
|
||
+ If either is "-", use the standard I/O stream for it instead. */
|
||
+
|
||
+static void
|
||
+check_file (const char *infile, const char *outfile, char delimiter)
|
||
+{
|
||
+ struct linebuffer lb1, lb2;
|
||
+ struct linebuffer *thisline, *prevline;
|
||
+
|
||
+ if (! (STREQ (infile, "-") || freopen (infile, "r", stdin)))
|
||
+ error (EXIT_FAILURE, errno, "%s", infile);
|
||
+ if (! (STREQ (outfile, "-") || freopen (outfile, "w", stdout)))
|
||
+ error (EXIT_FAILURE, errno, "%s", outfile);
|
||
+
|
||
+ thisline = &lb1;
|
||
+ prevline = &lb2;
|
||
+
|
||
+ initbuffer (thisline);
|
||
+ initbuffer (prevline);
|
||
+
|
||
+ /* The duplication in the following `if' and `else' blocks is an
|
||
+ optimization to distinguish the common case (in which none of
|
||
+ the following options has been specified: --count, -repeated,
|
||
+ --all-repeated, --unique) from the others. In the common case,
|
||
+ this optimization lets uniq output each different line right away,
|
||
+ without waiting to see if the next one is different. */
|
||
+
|
||
+ if (output_unique && output_first_repeated && countmode == count_none)
|
||
+ {
|
||
+ char *prevfield IF_LINT (= NULL);
|
||
+ size_t prevlen IF_LINT (= 0);
|
||
+
|
||
+ while (!feof (stdin))
|
||
+ {
|
||
+ char *thisfield;
|
||
+ size_t thislen;
|
||
+ if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
|
||
+ break;
|
||
+ thisfield = find_field (thisline);
|
||
+ thislen = thisline->length - 1 - (thisfield - thisline->buffer);
|
||
+ if (prevline->length == 0
|
||
+ || different (thisfield, prevfield, thislen, prevlen))
|
||
+ {
|
||
+ fwrite (thisline->buffer, sizeof (char),
|
||
+ thisline->length, stdout);
|
||
+
|
||
+ SWAP_LINES (prevline, thisline);
|
||
+ prevfield = thisfield;
|
||
+ prevlen = thislen;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ char *prevfield;
|
||
+ size_t prevlen;
|
||
+ uintmax_t match_count = 0;
|
||
+ bool first_delimiter = true;
|
||
+
|
||
+ if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
|
||
+ goto closefiles;
|
||
+ prevfield = find_field (prevline);
|
||
+ prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
|
||
+
|
||
+ while (!feof (stdin))
|
||
+ {
|
||
+ bool match;
|
||
+ char *thisfield;
|
||
+ size_t thislen;
|
||
+ if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
|
||
+ {
|
||
+ if (ferror (stdin))
|
||
+ goto closefiles;
|
||
+ break;
|
||
+ }
|
||
+ thisfield = find_field (thisline);
|
||
+ thislen = thisline->length - 1 - (thisfield - thisline->buffer);
|
||
+ match = !different (thisfield, prevfield, thislen, prevlen);
|
||
+ match_count += match;
|
||
+
|
||
+ if (match_count == UINTMAX_MAX)
|
||
+ {
|
||
+ if (count_occurrences)
|
||
+ error (EXIT_FAILURE, 0, _("too many repeated lines"));
|
||
+ match_count--;
|
||
+ }
|
||
+
|
||
+ if (delimit_groups != DM_NONE)
|
||
+ {
|
||
+ if (!match)
|
||
+ {
|
||
+ if (match_count) /* a previous match */
|
||
+ first_delimiter = false; /* Only used when DM_SEPARATE */
|
||
+ }
|
||
+ else if (match_count == 1)
|
||
+ {
|
||
+ if ((delimit_groups == DM_PREPEND)
|
||
+ || (delimit_groups == DM_SEPARATE
|
||
+ && !first_delimiter))
|
||
+ putchar (delimiter);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (!match || output_later_repeated)
|
||
+ {
|
||
+ writeline (prevline, match, match_count);
|
||
+ SWAP_LINES (prevline, thisline);
|
||
+ prevfield = thisfield;
|
||
+ prevlen = thislen;
|
||
+ if (!match)
|
||
+ match_count = 0;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ writeline (prevline, false, match_count);
|
||
+ }
|
||
+
|
||
+ closefiles:
|
||
+ if (ferror (stdin) || fclose (stdin) != 0)
|
||
+ error (EXIT_FAILURE, 0, _("error reading %s"), infile);
|
||
+
|
||
+ /* stdout is handled via the atexit-invoked close_stdout function. */
|
||
+
|
||
+ free (lb1.buffer);
|
||
+ free (lb2.buffer);
|
||
+}
|
||
+
|
||
+enum Skip_field_option_type
|
||
+ {
|
||
+ SFO_NONE,
|
||
+ SFO_OBSOLETE,
|
||
+ SFO_NEW
|
||
+ };
|
||
+
|
||
+int
|
||
+main (int argc, char **argv)
|
||
+{
|
||
+ int optc = 0;
|
||
+ bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
|
||
+ enum Skip_field_option_type skip_field_option_type = SFO_NONE;
|
||
+ int nfiles = 0;
|
||
+ char const *file[2];
|
||
+ char delimiter = '\n'; /* change with --zero-terminated, -z */
|
||
+
|
||
+ file[0] = file[1] = "-";
|
||
+ initialize_main (&argc, &argv);
|
||
+ set_program_name (argv[0]);
|
||
+ setlocale (LC_ALL, "");
|
||
+ bindtextdomain (PACKAGE, LOCALEDIR);
|
||
+ textdomain (PACKAGE);
|
||
+ hard_LC_COLLATE = hard_locale (LC_COLLATE);
|
||
+
|
||
+ atexit (close_stdout);
|
||
+
|
||
+ skip_chars = 0;
|
||
+ skip_fields = 0;
|
||
+ check_chars = SIZE_MAX;
|
||
+ output_unique = output_first_repeated = true;
|
||
+ output_later_repeated = false;
|
||
+ countmode = count_none;
|
||
+ delimit_groups = DM_NONE;
|
||
+
|
||
+ for (;;)
|
||
+ {
|
||
+ /* Parse an operand with leading "+" as a file after "--" was
|
||
+ seen; or if pedantic and a file was seen; or if not
|
||
+ obsolete. */
|
||
+
|
||
+ if (optc == -1
|
||
+ || (posixly_correct && nfiles != 0)
|
||
+ || ((optc = getopt_long (argc, argv,
|
||
+ "-0123456789Dcdf:is:uw:z", longopts, NULL))
|
||
+ == -1))
|
||
+ {
|
||
+ if (argc <= optind)
|
||
+ break;
|
||
+ if (nfiles == 2)
|
||
+ {
|
||
+ error (0, 0, _("extra operand %s"), quote (argv[optind]));
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+ file[nfiles++] = argv[optind++];
|
||
+ }
|
||
+ else switch (optc)
|
||
+ {
|
||
+ case 1:
|
||
+ {
|
||
+ unsigned long int size;
|
||
+ if (optarg[0] == '+'
|
||
+ && posix2_version () < 200112
|
||
+ && xstrtoul (optarg, NULL, 10, &size, "") == LONGINT_OK
|
||
+ && size <= SIZE_MAX)
|
||
+ skip_chars = size;
|
||
+ else if (nfiles == 2)
|
||
+ {
|
||
+ error (0, 0, _("extra operand %s"), quote (optarg));
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+ else
|
||
+ file[nfiles++] = optarg;
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case '0':
|
||
+ case '1':
|
||
+ case '2':
|
||
+ case '3':
|
||
+ case '4':
|
||
+ case '5':
|
||
+ case '6':
|
||
+ case '7':
|
||
+ case '8':
|
||
+ case '9':
|
||
+ {
|
||
+ if (skip_field_option_type == SFO_NEW)
|
||
+ skip_fields = 0;
|
||
+
|
||
+ if (!DECIMAL_DIGIT_ACCUMULATE (skip_fields, optc - '0', size_t))
|
||
+ skip_fields = SIZE_MAX;
|
||
+
|
||
+ skip_field_option_type = SFO_OBSOLETE;
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case 'c':
|
||
+ countmode = count_occurrences;
|
||
+ break;
|
||
+
|
||
+ case 'd':
|
||
+ output_unique = false;
|
||
+ break;
|
||
+
|
||
+ case 'D':
|
||
+ output_unique = false;
|
||
+ output_later_repeated = true;
|
||
+ if (optarg == NULL)
|
||
+ delimit_groups = DM_NONE;
|
||
+ else
|
||
+ delimit_groups = XARGMATCH ("--all-repeated", optarg,
|
||
+ delimit_method_string,
|
||
+ delimit_method_map);
|
||
+ break;
|
||
+
|
||
+ case 'f':
|
||
+ skip_field_option_type = SFO_NEW;
|
||
+ skip_fields = size_opt (optarg,
|
||
+ N_("invalid number of fields to skip"));
|
||
+ break;
|
||
+
|
||
+ case 'i':
|
||
+ ignore_case = true;
|
||
+ break;
|
||
+
|
||
+ case 's':
|
||
+ skip_chars = size_opt (optarg,
|
||
+ N_("invalid number of bytes to skip"));
|
||
+ break;
|
||
+
|
||
+ case 'u':
|
||
+ output_first_repeated = false;
|
||
+ break;
|
||
+
|
||
+ case 'w':
|
||
+ check_chars = size_opt (optarg,
|
||
+ N_("invalid number of bytes to compare"));
|
||
+ break;
|
||
+
|
||
+ case 'z':
|
||
+ delimiter = '\0';
|
||
+ break;
|
||
+
|
||
+ case_GETOPT_HELP_CHAR;
|
||
+
|
||
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||
+
|
||
+ default:
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (countmode == count_occurrences && output_later_repeated)
|
||
+ {
|
||
+ error (0, 0,
|
||
+ _("printing all duplicated lines and repeat counts is meaningless"));
|
||
+ usage (EXIT_FAILURE);
|
||
+ }
|
||
+
|
||
+ check_file (file[0], file[1], delimiter);
|
||
+
|
||
+ exit (EXIT_SUCCESS);
|
||
+}
|
||
diff -urNp coreutils-8.0-orig/tests/Makefile.am coreutils-8.0/tests/Makefile.am
|
||
--- coreutils-8.0-orig/tests/Makefile.am 2009-09-29 16:25:44.000000000 +0200
|
||
+++ coreutils-8.0/tests/Makefile.am 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -208,6 +208,7 @@ TESTS = \
|
||
misc/sort-compress \
|
||
misc/sort-continue \
|
||
misc/sort-files0-from \
|
||
+ misc/sort-mb-tests \
|
||
misc/sort-merge \
|
||
misc/sort-merge-fdlimit \
|
||
misc/sort-rand \
|
||
@@ -452,6 +453,10 @@ TESTS = \
|
||
$(root_tests)
|
||
|
||
pr_data = \
|
||
+ misc/mb1.X \
|
||
+ misc/mb1.I \
|
||
+ misc/mb2.X \
|
||
+ misc/mb2.I \
|
||
pr/0F \
|
||
pr/0FF \
|
||
pr/0FFnt \
|
||
diff -urNp coreutils-8.0-orig/tests/Makefile.am.orig coreutils-8.0/tests/Makefile.am.orig
|
||
--- coreutils-8.0-orig/tests/Makefile.am.orig 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/tests/Makefile.am.orig 2009-09-29 16:25:44.000000000 +0200
|
||
@@ -0,0 +1,616 @@
|
||
+## Process this file with automake to produce Makefile.in -*-Makefile-*-.
|
||
+
|
||
+# Sort in traditional ASCII order, regardless of the current locale;
|
||
+# otherwise we may get into trouble with distinct strings that the
|
||
+# current locale considers to be equal.
|
||
+ASSORT = LC_ALL=C sort
|
||
+
|
||
+EXTRA_DIST = \
|
||
+ Coreutils.pm \
|
||
+ CuTmpdir.pm \
|
||
+ README \
|
||
+ check.mk \
|
||
+ envvar-check \
|
||
+ lang-default \
|
||
+ other-fs-tmpdir \
|
||
+ require-perl \
|
||
+ sample-test \
|
||
+ test-lib.sh \
|
||
+ $(pr_data)
|
||
+
|
||
+root_tests = \
|
||
+ chown/basic \
|
||
+ cp/cp-a-selinux \
|
||
+ cp/preserve-gid \
|
||
+ cp/special-bits \
|
||
+ cp/cp-mv-enotsup-xattr \
|
||
+ chroot/credentials \
|
||
+ dd/skip-seek-past-dev \
|
||
+ install/install-C-root \
|
||
+ ls/capability \
|
||
+ ls/nameless-uid \
|
||
+ misc/chcon \
|
||
+ misc/selinux \
|
||
+ misc/truncate-owned-by-other \
|
||
+ mkdir/writable-under-readonly \
|
||
+ mv/sticky-to-xpart \
|
||
+ rm/fail-2eperm \
|
||
+ rm/no-give-up \
|
||
+ rm/one-file-system \
|
||
+ tail-2/append-only \
|
||
+ touch/now-owned-by-other
|
||
+
|
||
+.PHONY: check-root
|
||
+check-root:
|
||
+ $(MAKE) check TESTS='$(root_tests)'
|
||
+
|
||
+check-recursive: root-hint
|
||
+
|
||
+# Advertise `check-root' target.
|
||
+.PHONY: root-hint
|
||
+root-hint:
|
||
+ @echo '***********************************************************'
|
||
+ @echo "NOTICE: Some tests may be run only as root."
|
||
+ @echo " See the 'Running tests as root' section in README."
|
||
+ @echo '***********************************************************'
|
||
+
|
||
+EXTRA_DIST += $(TESTS)
|
||
+
|
||
+# Do not choose a name that is a shell keyword like 'if', or a
|
||
+# commonly-used utility like 'cat' or 'test', as the name of a test.
|
||
+# Otherwise, VPATH builds will fail on hosts like Solaris, since they
|
||
+# will expand 'if test ...' to 'if .../test ...', and the '.../test'
|
||
+# will execute the test script rather than the standard utility.
|
||
+
|
||
+# Notes on the ordering of these tests:
|
||
+# Place early in the list tests of the tools that
|
||
+# are most commonly used in test scripts themselves.
|
||
+# E.g., nearly every test script uses rm and chmod.
|
||
+# help-version comes early because it's a basic sanity test.
|
||
+# Put seq early, since lots of other tests use it.
|
||
+# Put tests that sleep early, but not all together, so in parallel builds
|
||
+# they share time with tests that burn CPU, not with others that sleep.
|
||
+# Put head-elide-tail early, because it's long-running.
|
||
+
|
||
+TESTS = \
|
||
+ misc/help-version \
|
||
+ misc/invalid-opt \
|
||
+ rm/ext3-perf \
|
||
+ rm/cycle \
|
||
+ cp/link-heap \
|
||
+ chmod/no-x \
|
||
+ chgrp/basic \
|
||
+ rm/dangling-symlink \
|
||
+ misc/ls-time \
|
||
+ rm/deep-1 \
|
||
+ rm/deep-2 \
|
||
+ rm/dir-no-w \
|
||
+ rm/dir-nonrecur \
|
||
+ rm/dot-rel \
|
||
+ rm/isatty \
|
||
+ rm/empty-inacc \
|
||
+ rm/empty-name \
|
||
+ rm/f-1 \
|
||
+ rm/fail-eacces \
|
||
+ rm/fail-eperm \
|
||
+ tail-2/assert \
|
||
+ rm/hash \
|
||
+ rm/i-1 \
|
||
+ rm/i-never \
|
||
+ rm/i-no-r \
|
||
+ tail-2/infloop-1 \
|
||
+ rm/ignorable \
|
||
+ rm/inaccessible \
|
||
+ rm/interactive-always \
|
||
+ rm/interactive-once \
|
||
+ rm/ir-1 \
|
||
+ rm/r-1 \
|
||
+ rm/r-2 \
|
||
+ rm/r-3 \
|
||
+ rm/r-4 \
|
||
+ rm/readdir-bug \
|
||
+ rm/rm1 \
|
||
+ touch/empty-file \
|
||
+ rm/rm2 \
|
||
+ rm/rm3 \
|
||
+ rm/rm4 \
|
||
+ rm/rm5 \
|
||
+ rm/sunos-1 \
|
||
+ rm/unread2 \
|
||
+ rm/unread3 \
|
||
+ rm/unreadable \
|
||
+ rm/v-slash \
|
||
+ chgrp/default-no-deref \
|
||
+ chgrp/deref \
|
||
+ chgrp/no-x \
|
||
+ chgrp/posix-H \
|
||
+ chgrp/recurse \
|
||
+ misc/ptx \
|
||
+ misc/test \
|
||
+ misc/seq \
|
||
+ misc/seq-long-double \
|
||
+ misc/head \
|
||
+ misc/head-elide-tail \
|
||
+ tail-2/tail-n0f \
|
||
+ misc/ls-misc \
|
||
+ misc/date \
|
||
+ misc/date-next-dow \
|
||
+ misc/ptx-overrun \
|
||
+ misc/xstrtol \
|
||
+ tail-2/pid \
|
||
+ misc/od \
|
||
+ misc/mktemp \
|
||
+ misc/arch \
|
||
+ misc/pr \
|
||
+ misc/join \
|
||
+ pr/pr-tests \
|
||
+ misc/df-P \
|
||
+ misc/pwd-option \
|
||
+ misc/pwd-unreadable-parent \
|
||
+ misc/chcon-fail \
|
||
+ misc/cut \
|
||
+ misc/wc \
|
||
+ misc/wc-files0-from \
|
||
+ misc/wc-files0 \
|
||
+ misc/cat-proc \
|
||
+ misc/cat-buf \
|
||
+ misc/base64 \
|
||
+ misc/basename \
|
||
+ misc/close-stdout \
|
||
+ misc/comm \
|
||
+ misc/csplit \
|
||
+ misc/date-sec \
|
||
+ misc/dircolors \
|
||
+ misc/df \
|
||
+ misc/dirname \
|
||
+ misc/expand \
|
||
+ misc/expr \
|
||
+ misc/factor \
|
||
+ misc/false-status \
|
||
+ misc/fmt \
|
||
+ misc/fmt-long-line \
|
||
+ misc/fold \
|
||
+ misc/groups-dash \
|
||
+ misc/groups-version \
|
||
+ misc/head-c \
|
||
+ misc/head-pos \
|
||
+ misc/id-context \
|
||
+ misc/id-groups \
|
||
+ misc/md5sum \
|
||
+ misc/md5sum-newline \
|
||
+ misc/mknod \
|
||
+ misc/nice \
|
||
+ misc/nl \
|
||
+ misc/nohup \
|
||
+ misc/od-N \
|
||
+ misc/od-multiple-t \
|
||
+ misc/od-x8 \
|
||
+ misc/paste \
|
||
+ misc/pathchk1 \
|
||
+ misc/printf \
|
||
+ misc/printf-cov \
|
||
+ misc/printf-hex \
|
||
+ misc/printf-surprise \
|
||
+ misc/pwd-long \
|
||
+ misc/readlink-fp-loop \
|
||
+ misc/runcon-no-reorder \
|
||
+ misc/sha1sum \
|
||
+ misc/sha1sum-vec \
|
||
+ misc/sha224sum \
|
||
+ misc/sha256sum \
|
||
+ misc/sha384sum \
|
||
+ misc/sha512sum \
|
||
+ misc/shred-exact \
|
||
+ misc/shred-passes \
|
||
+ misc/shred-remove \
|
||
+ misc/shuf \
|
||
+ misc/sort \
|
||
+ misc/sort-compress \
|
||
+ misc/sort-continue \
|
||
+ misc/sort-files0-from \
|
||
+ misc/sort-merge \
|
||
+ misc/sort-merge-fdlimit \
|
||
+ misc/sort-rand \
|
||
+ misc/sort-version \
|
||
+ misc/split-a \
|
||
+ misc/split-fail \
|
||
+ misc/split-l \
|
||
+ misc/stat-fmt \
|
||
+ misc/stat-hyphen \
|
||
+ misc/stat-printf \
|
||
+ misc/stdbuf \
|
||
+ misc/stty \
|
||
+ misc/stty-invalid \
|
||
+ misc/stty-row-col \
|
||
+ misc/sum \
|
||
+ misc/sum-sysv \
|
||
+ misc/tac \
|
||
+ misc/tac-continue \
|
||
+ misc/tail \
|
||
+ misc/tee \
|
||
+ misc/tee-dash \
|
||
+ misc/test-diag \
|
||
+ misc/timeout \
|
||
+ misc/timeout-parameters \
|
||
+ misc/tr \
|
||
+ misc/truncate-dangling-symlink \
|
||
+ misc/truncate-dir-fail \
|
||
+ misc/truncate-fail-diag \
|
||
+ misc/truncate-fifo \
|
||
+ misc/truncate-no-create-missing \
|
||
+ misc/truncate-overflow \
|
||
+ misc/truncate-parameters \
|
||
+ misc/truncate-relative \
|
||
+ misc/tsort \
|
||
+ misc/tty-eof \
|
||
+ misc/unexpand \
|
||
+ misc/uniq \
|
||
+ misc/xattr \
|
||
+ tail-2/wait \
|
||
+ chmod/c-option \
|
||
+ chmod/equal-x \
|
||
+ chmod/equals \
|
||
+ chmod/inaccessible \
|
||
+ chmod/octal \
|
||
+ chmod/setgid \
|
||
+ chmod/silent \
|
||
+ chmod/thru-dangling \
|
||
+ chmod/umask-x \
|
||
+ chmod/usage \
|
||
+ chown/deref \
|
||
+ chown/preserve-root \
|
||
+ chown/separator \
|
||
+ cp/abuse \
|
||
+ cp/acl \
|
||
+ cp/backup-1 \
|
||
+ cp/backup-dir \
|
||
+ cp/backup-is-src \
|
||
+ cp/cp-HL \
|
||
+ cp/cp-deref \
|
||
+ cp/cp-i \
|
||
+ cp/cp-mv-backup \
|
||
+ cp/cp-parents \
|
||
+ cp/deref-slink \
|
||
+ cp/dir-rm-dest \
|
||
+ cp/dir-slash \
|
||
+ cp/dir-vs-file \
|
||
+ cp/existing-perm-race \
|
||
+ cp/fail-perm \
|
||
+ cp/file-perm-race \
|
||
+ cp/into-self \
|
||
+ cp/link \
|
||
+ cp/link-no-deref \
|
||
+ cp/link-preserve \
|
||
+ cp/no-deref-link1 \
|
||
+ cp/no-deref-link2 \
|
||
+ cp/no-deref-link3 \
|
||
+ cp/parent-perm \
|
||
+ cp/parent-perm-race \
|
||
+ cp/perm \
|
||
+ cp/preserve-2 \
|
||
+ cp/preserve-slink-time \
|
||
+ cp/proc-short-read \
|
||
+ cp/proc-zero-len \
|
||
+ cp/r-vs-symlink \
|
||
+ cp/reflink-auto \
|
||
+ cp/reflink-perm \
|
||
+ cp/same-file \
|
||
+ cp/slink-2-slink \
|
||
+ cp/sparse \
|
||
+ cp/special-f \
|
||
+ cp/src-base-dot \
|
||
+ cp/symlink-slash \
|
||
+ cp/thru-dangling \
|
||
+ df/unreadable \
|
||
+ dd/direct \
|
||
+ dd/misc \
|
||
+ dd/not-rewound \
|
||
+ dd/reblock \
|
||
+ dd/skip-seek \
|
||
+ dd/skip-seek2 \
|
||
+ dd/skip-seek-past-file \
|
||
+ dd/stderr \
|
||
+ dd/unblock \
|
||
+ dd/unblock-sync \
|
||
+ df/total-verify \
|
||
+ du/2g \
|
||
+ du/8gb \
|
||
+ du/basic \
|
||
+ du/deref \
|
||
+ du/deref-args \
|
||
+ du/exclude \
|
||
+ du/fd-leak \
|
||
+ du/files0-from \
|
||
+ du/hard-link \
|
||
+ du/inacc-dest \
|
||
+ du/inacc-dir \
|
||
+ du/inaccessible-cwd \
|
||
+ du/long-from-unreadable \
|
||
+ du/long-sloop \
|
||
+ du/no-deref \
|
||
+ du/no-x \
|
||
+ du/one-file-system \
|
||
+ du/restore-wd \
|
||
+ du/slash \
|
||
+ du/slink \
|
||
+ du/trailing-slash \
|
||
+ du/two-args \
|
||
+ id/no-context \
|
||
+ install/basic-1 \
|
||
+ install/create-leading \
|
||
+ install/d-slashdot \
|
||
+ install/install-C \
|
||
+ install/install-C-selinux \
|
||
+ install/strip-program \
|
||
+ install/trap \
|
||
+ ln/backup-1 \
|
||
+ ln/hard-backup \
|
||
+ ln/hard-to-sym \
|
||
+ ln/misc \
|
||
+ ln/sf-1 \
|
||
+ ln/slash-decorated-nonexistent-dest \
|
||
+ ln/target-1 \
|
||
+ ls/abmon-align \
|
||
+ ls/color-clear-to-eol \
|
||
+ ls/color-dtype-dir \
|
||
+ ls/dangle \
|
||
+ ls/dired \
|
||
+ ls/file-type \
|
||
+ ls/follow-slink \
|
||
+ ls/infloop \
|
||
+ ls/inode \
|
||
+ ls/m-option \
|
||
+ ls/multihardlink \
|
||
+ ls/no-arg \
|
||
+ ls/no-cap \
|
||
+ ls/proc-selinux-segfault \
|
||
+ ls/readdir-mountpoint-inode \
|
||
+ ls/recursive \
|
||
+ ls/rt-1 \
|
||
+ ls/stat-dtype \
|
||
+ ls/stat-failed \
|
||
+ ls/stat-free-symlinks \
|
||
+ ls/stat-vs-dirent \
|
||
+ ls/symlink-slash \
|
||
+ ls/x-option \
|
||
+ mkdir/p-1 \
|
||
+ mkdir/p-2 \
|
||
+ mkdir/p-3 \
|
||
+ mkdir/p-slashdot \
|
||
+ mkdir/p-thru-slink \
|
||
+ mkdir/p-v \
|
||
+ mkdir/parents \
|
||
+ mkdir/perm \
|
||
+ mkdir/selinux \
|
||
+ mkdir/special-1 \
|
||
+ mkdir/t-slash \
|
||
+ mv/acl \
|
||
+ mv/atomic \
|
||
+ mv/atomic2 \
|
||
+ mv/backup-dir \
|
||
+ mv/backup-is-src \
|
||
+ mv/childproof \
|
||
+ mv/diag \
|
||
+ mv/dir-file \
|
||
+ mv/dir2dir \
|
||
+ mv/dup-source \
|
||
+ mv/force \
|
||
+ mv/hard-2 \
|
||
+ mv/hard-3 \
|
||
+ mv/hard-4 \
|
||
+ mv/hard-link-1 \
|
||
+ mv/hard-verbose \
|
||
+ mv/i-1 \
|
||
+ mv/i-2 \
|
||
+ mv/i-3 \
|
||
+ mv/i-4 \
|
||
+ mv/i-5 \
|
||
+ mv/i-link-no \
|
||
+ mv/into-self \
|
||
+ mv/into-self-2 \
|
||
+ mv/into-self-3 \
|
||
+ mv/into-self-4 \
|
||
+ mv/leak-fd \
|
||
+ mv/mv-n \
|
||
+ mv/mv-special-1 \
|
||
+ mv/no-target-dir \
|
||
+ mv/part-fail \
|
||
+ mv/part-hardlink \
|
||
+ mv/part-rename \
|
||
+ mv/part-symlink \
|
||
+ mv/partition-perm \
|
||
+ mv/perm-1 \
|
||
+ mv/to-symlink \
|
||
+ mv/trailing-slash \
|
||
+ mv/update \
|
||
+ readlink/can-e \
|
||
+ readlink/can-f \
|
||
+ readlink/can-m \
|
||
+ readlink/rl-1 \
|
||
+ rmdir/fail-perm \
|
||
+ rmdir/ignore \
|
||
+ rmdir/t-slash \
|
||
+ tail-2/assert-2 \
|
||
+ tail-2/big-4gb \
|
||
+ tail-2/flush-initial \
|
||
+ tail-2/follow-stdin \
|
||
+ tail-2/pipe-f \
|
||
+ tail-2/pipe-f2 \
|
||
+ tail-2/proc-ksyms \
|
||
+ tail-2/start-middle \
|
||
+ touch/60-seconds \
|
||
+ touch/dangling-symlink \
|
||
+ touch/dir-1 \
|
||
+ touch/fail-diag \
|
||
+ touch/fifo \
|
||
+ touch/no-create-missing \
|
||
+ touch/no-rights \
|
||
+ touch/not-owner \
|
||
+ touch/obsolescent \
|
||
+ touch/read-only \
|
||
+ touch/relative \
|
||
+ $(root_tests)
|
||
+
|
||
+pr_data = \
|
||
+ pr/0F \
|
||
+ pr/0FF \
|
||
+ pr/0FFnt \
|
||
+ pr/0FFt \
|
||
+ pr/0FnFnt \
|
||
+ pr/0FnFt \
|
||
+ pr/0Fnt \
|
||
+ pr/0Ft \
|
||
+ pr/2-S_f-t_notab \
|
||
+ pr/2-Sf-t_notab \
|
||
+ pr/2f-t_notab \
|
||
+ pr/2s_f-t_notab \
|
||
+ pr/2s_w60f-t_nota \
|
||
+ pr/2sf-t_notab \
|
||
+ pr/2sw60f-t_notab \
|
||
+ pr/2w60f-t_notab \
|
||
+ pr/3-0F \
|
||
+ pr/3-5l24f-t \
|
||
+ pr/3-FF \
|
||
+ pr/3a2l17-FF \
|
||
+ pr/3a3f-0F \
|
||
+ pr/3a3l15-t \
|
||
+ pr/3a3l15f-t \
|
||
+ pr/3b2l17-FF \
|
||
+ pr/3b3f-0F \
|
||
+ pr/3b3f-0FF \
|
||
+ pr/3b3f-FF \
|
||
+ pr/3b3l15-t \
|
||
+ pr/3b3l15f-t \
|
||
+ pr/3f-0F \
|
||
+ pr/3f-FF \
|
||
+ pr/3l24-t \
|
||
+ pr/3l24f-t \
|
||
+ pr/3ml24-FF \
|
||
+ pr/3ml24-t \
|
||
+ pr/3ml24-t-FF \
|
||
+ pr/3ml24f-t \
|
||
+ pr/4-7l24-FF \
|
||
+ pr/4l24-FF \
|
||
+ pr/FF \
|
||
+ pr/FFn \
|
||
+ pr/FFtn \
|
||
+ pr/FnFn \
|
||
+ pr/Ja3l24f-lm \
|
||
+ pr/Jb3l24f-lm \
|
||
+ pr/Jml24f-lm-lo \
|
||
+ pr/W-72l24f-ll \
|
||
+ pr/W20l24f-ll \
|
||
+ pr/W26l24f-ll \
|
||
+ pr/W27l24f-ll \
|
||
+ pr/W28l24f-ll \
|
||
+ pr/W35Ja3l24f-lm \
|
||
+ pr/W35Jb3l24f-lm \
|
||
+ pr/W35Jml24f-lmlo \
|
||
+ pr/W35a3l24f-lm \
|
||
+ pr/W35b3l24f-lm \
|
||
+ pr/W35ml24f-lm-lo \
|
||
+ pr/W72Jl24f-ll \
|
||
+ pr/a2l15-FF \
|
||
+ pr/a2l17-FF \
|
||
+ pr/a3-0F \
|
||
+ pr/a3f-0F \
|
||
+ pr/a3f-0FF \
|
||
+ pr/a3f-FF \
|
||
+ pr/a3l15-t \
|
||
+ pr/a3l15f-t \
|
||
+ pr/a3l24f-lm \
|
||
+ pr/b2l15-FF \
|
||
+ pr/b2l17-FF \
|
||
+ pr/b3-0F \
|
||
+ pr/b3f-0F \
|
||
+ pr/b3f-0FF \
|
||
+ pr/b3f-FF \
|
||
+ pr/b3l15-t \
|
||
+ pr/b3l15f-t \
|
||
+ pr/b3l24f-lm \
|
||
+ pr/l24-FF \
|
||
+ pr/l24-t \
|
||
+ pr/l24f-t \
|
||
+ pr/loli \
|
||
+ pr/ml20-FF-t \
|
||
+ pr/ml24-FF \
|
||
+ pr/ml24-t \
|
||
+ pr/ml24-t-FF \
|
||
+ pr/ml24f-0F \
|
||
+ pr/ml24f-lm-lo \
|
||
+ pr/ml24f-t \
|
||
+ pr/ml24f-t-0F \
|
||
+ pr/n+2-5l24f-0FF \
|
||
+ pr/n+2l24f-0FF \
|
||
+ pr/n+2l24f-bl \
|
||
+ pr/n+3-7l24-FF \
|
||
+ pr/n+3l24f-0FF \
|
||
+ pr/n+3l24f-bl \
|
||
+ pr/n+3ml20f-bl-FF \
|
||
+ pr/n+3ml24f-bl-tn \
|
||
+ pr/n+3ml24f-tn-bl \
|
||
+ pr/n+4-8a2l17-FF \
|
||
+ pr/n+4b2l17f-0FF \
|
||
+ pr/n+5-8b3l17f-FF \
|
||
+ pr/n+5a3l13f-0FF \
|
||
+ pr/n+6a2l17-FF \
|
||
+ pr/n+6b3l13f-FF \
|
||
+ pr/n+7l24-FF \
|
||
+ pr/n+8l20-FF \
|
||
+ pr/nJml24f-lmlmlo \
|
||
+ pr/nJml24f-lmlolm \
|
||
+ pr/nN1+3l24f-bl \
|
||
+ pr/nN15l24f-bl \
|
||
+ pr/nSml20-bl-FF \
|
||
+ pr/nSml20-t-t-FF \
|
||
+ pr/nSml20-t-tFFFF \
|
||
+ pr/nSml24-bl-FF \
|
||
+ pr/nSml24-t-t-FF \
|
||
+ pr/nSml24-t-tFFFF \
|
||
+ pr/nl24f-bl \
|
||
+ pr/o3Jml24f-lm-lo \
|
||
+ pr/o3a3Sl24f-tn \
|
||
+ pr/o3a3Snl24f-tn \
|
||
+ pr/o3a3l24f-tn \
|
||
+ pr/o3b3Sl24f-tn \
|
||
+ pr/o3b3Snl24f-tn \
|
||
+ pr/o3b3l24f-tn \
|
||
+ pr/o3mSl24f-bl-tn \
|
||
+ pr/o3mSnl24fbltn \
|
||
+ pr/o3ml24f-bl-tn \
|
||
+ pr/t-0FF \
|
||
+ pr/t-FF \
|
||
+ pr/t-bl \
|
||
+ pr/t-t \
|
||
+ pr/tFFn \
|
||
+ pr/tFFt \
|
||
+ pr/tFFt-bl \
|
||
+ pr/tFFt-ll \
|
||
+ pr/tFFt-lm \
|
||
+ pr/tFnFt \
|
||
+ pr/t_notab \
|
||
+ pr/t_tab \
|
||
+ pr/t_tab_ \
|
||
+ pr/ta3-0FF \
|
||
+ pr/ta3-FF \
|
||
+ pr/tb3-0FF \
|
||
+ pr/tb3-FF \
|
||
+ pr/tn \
|
||
+ pr/tn2e5o3-t_tab \
|
||
+ pr/tn2e8-t_tab \
|
||
+ pr/tn2e8o3-t_tab \
|
||
+ pr/tn_2e8-t_tab \
|
||
+ pr/tn_2e8S-t_tab \
|
||
+ pr/tne8-t_tab \
|
||
+ pr/tne8o3-t_tab \
|
||
+ pr/tt-0FF \
|
||
+ pr/tt-FF \
|
||
+ pr/tt-bl \
|
||
+ pr/tt-t \
|
||
+ pr/tta3-0FF \
|
||
+ pr/tta3-FF \
|
||
+ pr/ttb3-0FF \
|
||
+ pr/ttb3-FF \
|
||
+ pr/w72l24f-ll
|
||
+
|
||
+include $(srcdir)/check.mk
|
||
diff -urNp coreutils-8.0-orig/tests/misc/cut coreutils-8.0/tests/misc/cut
|
||
--- coreutils-8.0-orig/tests/misc/cut 2009-09-21 14:29:33.000000000 +0200
|
||
+++ coreutils-8.0/tests/misc/cut 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -26,7 +26,7 @@ use strict;
|
||
my $prog = 'cut';
|
||
my $try = "Try \`$prog --help' for more information.\n";
|
||
my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
|
||
-my $inval = "$prog: invalid byte or field list\n$try";
|
||
+my $inval = "$prog: invalid byte, character or field list\n$try";
|
||
my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
|
||
|
||
my @Tests =
|
||
@@ -141,7 +141,7 @@ my @Tests =
|
||
|
||
# None of the following invalid ranges provoked an error up to coreutils-6.9.
|
||
['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
|
||
- {ERR=>"$prog: invalid decreasing range\n$try"}],
|
||
+ {ERR=>"$prog: invalid byte, character or field list\n$try"}],
|
||
['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
|
||
['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
|
||
['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
|
||
diff -urNp coreutils-8.0-orig/tests/misc/mb1.I coreutils-8.0/tests/misc/mb1.I
|
||
--- coreutils-8.0-orig/tests/misc/mb1.I 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/tests/misc/mb1.I 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -0,0 +1,4 @@
|
||
+Apple@10
|
||
+Banana@5
|
||
+Citrus@20
|
||
+Cherry@30
|
||
diff -urNp coreutils-8.0-orig/tests/misc/mb1.X coreutils-8.0/tests/misc/mb1.X
|
||
--- coreutils-8.0-orig/tests/misc/mb1.X 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/tests/misc/mb1.X 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -0,0 +1,4 @@
|
||
+Banana@5
|
||
+Apple@10
|
||
+Citrus@20
|
||
+Cherry@30
|
||
diff -urNp coreutils-8.0-orig/tests/misc/mb2.I coreutils-8.0/tests/misc/mb2.I
|
||
--- coreutils-8.0-orig/tests/misc/mb2.I 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/tests/misc/mb2.I 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -0,0 +1,4 @@
|
||
+Apple@AA10@@20
|
||
+Banana@AA5@@30
|
||
+Citrus@AA20@@5
|
||
+Cherry@AA30@@10
|
||
diff -urNp coreutils-8.0-orig/tests/misc/mb2.X coreutils-8.0/tests/misc/mb2.X
|
||
--- coreutils-8.0-orig/tests/misc/mb2.X 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/tests/misc/mb2.X 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -0,0 +1,4 @@
|
||
+Citrus@AA20@@5
|
||
+Cherry@AA30@@10
|
||
+Apple@AA10@@20
|
||
+Banana@AA5@@30
|
||
diff -urNp coreutils-8.0-orig/tests/misc/sort-mb-tests coreutils-8.0/tests/misc/sort-mb-tests
|
||
--- coreutils-8.0-orig/tests/misc/sort-mb-tests 1970-01-01 01:00:00.000000000 +0100
|
||
+++ coreutils-8.0/tests/misc/sort-mb-tests 2009-10-07 10:07:16.000000000 +0200
|
||
@@ -0,0 +1,58 @@
|
||
+#! /bin/sh
|
||
+case $# in
|
||
+ 0) xx='../src/sort';;
|
||
+ *) xx="$1";;
|
||
+esac
|
||
+test "$VERBOSE" && echo=echo || echo=:
|
||
+$echo testing program: $xx
|
||
+errors=0
|
||
+test "$srcdir" || srcdir=.
|
||
+test "$VERBOSE" && $xx --version 2> /dev/null
|
||
+
|
||
+export LC_ALL=en_US.UTF-8
|
||
+locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
|
||
+errors=0
|
||
+
|
||
+$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
|
||
+code=$?
|
||
+if test $code != 0; then
|
||
+ $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
|
||
+ errors=`expr $errors + 1`
|
||
+else
|
||
+ cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
|
||
+ case $? in
|
||
+ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
|
||
+ 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
|
||
+ (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
|
||
+ errors=`expr $errors + 1`;;
|
||
+ 2) $echo "Test mb1 may have failed." 1>&2
|
||
+ $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
|
||
+ errors=`expr $errors + 1`;;
|
||
+ esac
|
||
+fi
|
||
+
|
||
+$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
|
||
+code=$?
|
||
+if test $code != 0; then
|
||
+ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
|
||
+ errors=`expr $errors + 1`
|
||
+else
|
||
+ cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
|
||
+ case $? in
|
||
+ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
|
||
+ 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
|
||
+ (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
|
||
+ errors=`expr $errors + 1`;;
|
||
+ 2) $echo "Test mb2 may have failed." 1>&2
|
||
+ $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
|
||
+ errors=`expr $errors + 1`;;
|
||
+ esac
|
||
+fi
|
||
+
|
||
+if test $errors = 0; then
|
||
+ $echo Passed all 113 tests. 1>&2
|
||
+else
|
||
+ $echo Failed $errors tests. 1>&2
|
||
+fi
|
||
+test $errors = 0 || errors=1
|
||
+exit $errors
|