diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index f419eaa..2d56736 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -1,140 +1,7 @@ -diff -urN coreutils-6.12-orig/tests/misc/cut coreutils-6.12/tests/misc/cut ---- coreutils-6.12-orig/tests/misc/cut 2008-05-17 08:41:11.000000000 +0200 -+++ coreutils-6.12/tests/misc/cut 2008-06-02 11:13:08.000000000 +0200 -@@ -26,7 +26,7 @@ - my $prog = 'cut'; - my $try = "Try \`$prog --help' for more information.\n"; - my $from_1 = "$prog: fields and positions are numbered from 1\n$try"; --my $inval = "$prog: invalid byte or field list\n$try"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; - my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try"; - - my @Tests = -@@ -140,8 +140,8 @@ - ['od-overlap5', '-b1-3,1-4', '--output-d=:', {IN=>"abcde\n"}, {OUT=>"abcd\n"}], - - # None of the following invalid ranges provoked an error up to coreutils-6.9. -- ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, -- {ERR=>"$prog: invalid decreasing range\n$try"}], -+ ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, -+ {ERR=>"$prog: invalid byte, character or field list\n$try"}], - ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], - ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], - ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,58 @@ -+#! /bin/sh -+case $# in -+ 0) xx='../src/sort';; -+ *) xx="$1";; -+esac -+test "$VERBOSE" && echo=echo || echo=: -+$echo testing program: $xx -+errors=0 -+test "$srcdir" || srcdir=. -+test "$VERBOSE" && $xx --version 2> /dev/null -+ -+export LC_ALL=en_US.UTF-8 -+locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77 -+errors=0 -+ -+$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O -+code=$? -+if test $code != 0; then -+ $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2 -+ errors=`expr $errors + 1` -+else -+ cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1 -+ case $? in -+ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;; -+ 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2 -+ (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null -+ errors=`expr $errors + 1`;; -+ 2) $echo "Test mb1 may have failed." 1>&2 -+ $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2 -+ errors=`expr $errors + 1`;; -+ esac -+fi -+ -+$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O -+code=$? -+if test $code != 0; then -+ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2 -+ errors=`expr $errors + 1` -+else -+ cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1 -+ case $? in -+ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;; -+ 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2 -+ (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null -+ errors=`expr $errors + 1`;; -+ 2) $echo "Test mb2 may have failed." 1>&2 -+ $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2 -+ errors=`expr $errors + 1`;; -+ esac -+fi -+ -+if test $errors = 0; then -+ $echo Passed all 113 tests. 1>&2 -+else -+ $echo Failed $errors tests. 1>&2 -+fi -+test $errors = 0 || errors=1 -+exit $errors ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/mb2.I 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,4 @@ -+Apple@AA10@@20 -+Banana@AA5@@30 -+Citrus@AA20@@5 -+Cherry@AA30@@10 ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/mb2.X 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,4 @@ -+Citrus@AA20@@5 -+Cherry@AA30@@10 -+Apple@AA10@@20 -+Banana@AA5@@30 ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/mb1.I 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,4 @@ -+Apple@10 -+Banana@5 -+Citrus@20 -+Cherry@30 ---- /dev/null 2007-03-01 09:16:39.219409909 +0000 -+++ coreutils-6.8+/tests/misc/mb1.X 2007-03-01 15:08:24.000000000 +0000 -@@ -0,0 +1,4 @@ -+Banana@5 -+Apple@10 -+Citrus@20 -+Cherry@30 -diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am ---- coreutils-6.12-orig/tests/Makefile.am 2008-05-27 13:47:53.000000000 +0200 -+++ coreutils-6.12/tests/Makefile.am 2008-06-02 10:06:03.000000000 +0200 -@@ -192,6 +192,7 @@ - misc/sort-compress \ - misc/sort-continue \ - misc/sort-files0-from \ -+ misc/sort-mb-tests \ - misc/sort-merge \ - misc/sort-merge-fdlimit \ - misc/sort-rand \ -@@ -391,6 +392,10 @@ - $(root_tests) - - pr_data = \ -+ misc/mb1.X \ -+ misc/mb1.I \ -+ misc/mb2.X \ -+ misc/mb2.I \ - pr/0F \ - pr/0FF \ - pr/0FFnt \ ---- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100 -+++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000 -@@ -22,6 +22,11 @@ +diff -urNp coreutils-8.0-orig/lib/linebuffer.h coreutils-8.0/lib/linebuffer.h +--- coreutils-8.0-orig/lib/linebuffer.h 2009-10-06 10:59:48.000000000 +0200 ++++ coreutils-8.0/lib/linebuffer.h 2009-10-07 10:07:16.000000000 +0200 +@@ -21,6 +21,11 @@ # include @@ -146,7 +13,7 @@ diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am /* A `struct linebuffer' holds a line of text. */ struct linebuffer -@@ -29,6 +34,9 @@ +@@ -28,6 +33,9 @@ struct linebuffer size_t size; /* Allocated. */ size_t length; /* Used. */ char *buffer; @@ -156,9 +23,1557 @@ diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am }; /* Initialize linebuffer LINEBUFFER for use. */ ---- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000 -@@ -38,11 +38,28 @@ +diff -urNp coreutils-8.0-orig/lib/linebuffer.h.orig coreutils-8.0/lib/linebuffer.h.orig +--- coreutils-8.0-orig/lib/linebuffer.h.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/lib/linebuffer.h.orig 2009-10-06 10:59:48.000000000 +0200 +@@ -0,0 +1,53 @@ ++/* linebuffer.h -- declarations for reading arbitrarily long lines ++ ++ Copyright (C) 1986, 1991, 1998, 1999, 2002, 2003, 2007 Free Software ++ Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++#if !defined LINEBUFFER_H ++# define LINEBUFFER_H ++ ++# include ++ ++/* A `struct linebuffer' holds a line of text. */ ++ ++struct linebuffer ++{ ++ size_t size; /* Allocated. */ ++ size_t length; /* Used. */ ++ char *buffer; ++}; ++ ++/* Initialize linebuffer LINEBUFFER for use. */ ++void initbuffer (struct linebuffer *linebuffer); ++ ++/* Read an arbitrarily long line of text from STREAM into LINEBUFFER. ++ Consider lines to be terminated by DELIMITER. ++ Keep the delimiter; append DELIMITER if we reach EOF and it wasn't ++ the last character in the file. Do not NUL-terminate. ++ Return LINEBUFFER, except at end of file return NULL. */ ++struct linebuffer *readlinebuffer_delim (struct linebuffer *linebuffer, ++ FILE *stream, char delimiter); ++ ++/* Read an arbitrarily long line of text from STREAM into LINEBUFFER. ++ Keep the newline; append a newline if it's the last line of a file ++ that ends in a non-newline character. Do not NUL-terminate. ++ Return LINEBUFFER, except at end of file return NULL. */ ++struct linebuffer *readlinebuffer (struct linebuffer *linebuffer, FILE *stream); ++ ++/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */ ++void freebuffer (struct linebuffer *); ++ ++#endif /* LINEBUFFER_H */ +diff -urNp coreutils-8.0-orig/src/cut.c coreutils-8.0/src/cut.c +--- coreutils-8.0-orig/src/cut.c 2009-09-23 10:25:44.000000000 +0200 ++++ coreutils-8.0/src/cut.c 2009-10-07 10:07:16.000000000 +0200 +@@ -28,6 +28,11 @@ + #include + #include + #include ++ ++/* Get mbstate_t, mbrtowc(). */ ++#if HAVE_WCHAR_H ++# include ++#endif + #include "system.h" + + #include "error.h" +@@ -36,6 +41,18 @@ + #include "quote.h" + #include "xstrndup.h" + ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC ++ installation; work around this configuration error. */ ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 ++# undef MB_LEN_MAX ++# define MB_LEN_MAX 16 ++#endif ++ ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ + /* The official name of this program (e.g., no `g' prefix). */ + #define PROGRAM_NAME "cut" + +@@ -71,6 +88,52 @@ + } \ + while (0) + ++/* Refill the buffer BUF to get a multibyte character. */ ++#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ ++ do \ ++ { \ ++ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ ++ { \ ++ memmove (BUF, BUFPOS, BUFLEN); \ ++ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ ++ BUFPOS = BUF; \ ++ } \ ++ } \ ++ while (0) ++ ++/* Get wide character on BUFPOS. BUFPOS is not included after that. ++ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ ++#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ ++ do \ ++ { \ ++ mbstate_t state_bak; \ ++ \ ++ if (BUFLEN < 1) \ ++ { \ ++ WC = WEOF; \ ++ break; \ ++ } \ ++ \ ++ /* Get a wide character. */ \ ++ CONVFAIL = 0; \ ++ state_bak = STATE; \ ++ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ ++ \ ++ switch (MBLENGTH) \ ++ { \ ++ case (size_t)-1: \ ++ case (size_t)-2: \ ++ CONVFAIL++; \ ++ STATE = state_bak; \ ++ /* Fall througn. */ \ ++ \ ++ case 0: \ ++ MBLENGTH = 1; \ ++ break; \ ++ } \ ++ } \ ++ while (0) ++ + struct range_pair + { + size_t lo; +@@ -89,7 +152,7 @@ static char *field_1_buffer; + /* The number of bytes allocated for FIELD_1_BUFFER. */ + static size_t field_1_bufsize; + +-/* The largest field or byte index used as an endpoint of a closed ++/* The largest byte, character or field index used as an endpoint of a closed + or degenerate range specification; this doesn't include the starting + index of right-open-ended ranges. For example, with either range spec + `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ +@@ -101,10 +164,11 @@ static size_t eol_range_start; + + /* This is a bit vector. + In byte mode, which bytes to output. ++ In character mode, which characters to output. + In field mode, which DELIM-separated fields to output. +- Both bytes and fields are numbered starting with 1, ++ Bytes, characters and fields are numbered starting with 1, + so the zeroth bit of this array is unused. +- A field or byte K has been selected if ++ A byte, character or field K has been selected if + (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) + || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ + static unsigned char *printable_field; +@@ -113,15 +177,25 @@ enum operating_mode + { + undefined_mode, + +- /* Output characters that are in the given bytes. */ ++ /* Output bytes that are at the given positions. */ + byte_mode, + ++ /* Output characters that are at the given positions. */ ++ character_mode, ++ + /* Output the given delimeter-separated fields. */ + field_mode + }; + + static enum operating_mode operating_mode; + ++/* If nonzero, when in byte mode, don't split multibyte characters. */ ++static int byte_mode_character_aware; ++ ++/* If nonzero, the function for single byte locale is work ++ if this program runs on multibyte locale. */ ++static int force_singlebyte_mode; ++ + /* If true do not output lines containing no delimeter characters. + Otherwise, all such lines are printed. This option is valid only + with field mode. */ +@@ -133,6 +207,9 @@ static bool complement; + + /* The delimeter character for field mode. */ + static unsigned char delim; ++#if HAVE_WCHAR_H ++static wchar_t wcdelim; ++#endif + + /* True if the --output-delimiter=STRING option was specified. */ + static bool output_delimiter_specified; +@@ -206,7 +283,7 @@ Mandatory arguments to long options are + -f, --fields=LIST select only these fields; also print any line\n\ + that contains no delimiter character, unless\n\ + the -s option is specified\n\ +- -n (ignored)\n\ ++ -n with -b: don't split multibyte characters\n\ + "), stdout); + fputs (_("\ + --complement complement the set of selected bytes, characters\n\ +@@ -365,7 +442,7 @@ set_fields (const char *fieldstr) + in_digits = false; + /* Starting a range. */ + if (dash_found) +- FATAL_ERROR (_("invalid byte or field list")); ++ FATAL_ERROR (_("invalid byte, character or field list")); + dash_found = true; + fieldstr++; + +@@ -389,14 +466,16 @@ set_fields (const char *fieldstr) + if (!rhs_specified) + { + /* `n-'. From `initial' to end of line. */ +- eol_range_start = initial; ++ if (eol_range_start == 0 || ++ (eol_range_start != 0 && eol_range_start > initial)) ++ eol_range_start = initial; + field_found = true; + } + else + { + /* `m-n' or `-n' (1-n). */ + if (value < initial) +- FATAL_ERROR (_("invalid decreasing range")); ++ FATAL_ERROR (_("invalid byte, character or field list")); + + /* Is there already a range going to end of line? */ + if (eol_range_start != 0) +@@ -476,6 +555,9 @@ set_fields (const char *fieldstr) + if (operating_mode == byte_mode) + error (0, 0, + _("byte offset %s is too large"), quote (bad_num)); ++ else if (operating_mode == character_mode) ++ error (0, 0, ++ _("character offset %s is too large"), quote (bad_num)); + else + error (0, 0, + _("field number %s is too large"), quote (bad_num)); +@@ -486,7 +568,7 @@ set_fields (const char *fieldstr) + fieldstr++; + } + else +- FATAL_ERROR (_("invalid byte or field list")); ++ FATAL_ERROR (_("invalid byte, character or field list")); + } + + max_range_endpoint = 0; +@@ -579,6 +661,63 @@ cut_bytes (FILE *stream) + } + } + ++#if HAVE_MBRTOWC ++/* This function is in use for the following case. ++ ++ 1. Read from the stream STREAM, printing to standard output any selected ++ characters. ++ ++ 2. Read from stream STREAM, printing to standard output any selected bytes, ++ without splitting multibyte characters. */ ++ ++static void ++cut_characters_or_cut_bytes_no_split (FILE *stream) ++{ ++ int idx; /* number of bytes or characters in the line so far. */ ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ ++ char *bufpos; /* Next read position of BUF. */ ++ size_t buflen; /* The length of the byte sequence in buf. */ ++ wint_t wc; /* A gotten wide character. */ ++ size_t mblength; /* The byte size of a multibyte character which shows ++ as same character as WC. */ ++ mbstate_t state; /* State of the stream. */ ++ int convfail; /* 1, when conversion is failed. Otherwise 0. */ ++ ++ idx = 0; ++ buflen = 0; ++ bufpos = buf; ++ memset (&state, '\0', sizeof(mbstate_t)); ++ ++ while (1) ++ { ++ REFILL_BUFFER (buf, bufpos, buflen, stream); ++ ++ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); ++ ++ if (wc == WEOF) ++ { ++ if (idx > 0) ++ putchar ('\n'); ++ break; ++ } ++ else if (wc == L'\n') ++ { ++ putchar ('\n'); ++ idx = 0; ++ } ++ else ++ { ++ idx += (operating_mode == byte_mode) ? mblength : 1; ++ if (print_kth (idx, NULL)) ++ fwrite (bufpos, mblength, sizeof(char), stdout); ++ } ++ ++ buflen -= mblength; ++ bufpos += mblength; ++ } ++} ++#endif ++ + /* Read from stream STREAM, printing to standard output any selected fields. */ + + static void +@@ -701,13 +840,192 @@ cut_fields (FILE *stream) + } + } + ++#if HAVE_MBRTOWC ++static void ++cut_fields_mb (FILE *stream) ++{ ++ int c; ++ unsigned int field_idx; ++ int found_any_selected_field; ++ int buffer_first_field; ++ int empty_input; ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ ++ char *bufpos; /* Next read position of BUF. */ ++ size_t buflen; /* The length of the byte sequence in buf. */ ++ wint_t wc = 0; /* A gotten wide character. */ ++ size_t mblength; /* The byte size of a multibyte character which shows ++ as same character as WC. */ ++ mbstate_t state; /* State of the stream. */ ++ int convfail; /* 1, when conversion is failed. Otherwise 0. */ ++ ++ found_any_selected_field = 0; ++ field_idx = 1; ++ bufpos = buf; ++ buflen = 0; ++ memset (&state, '\0', sizeof(mbstate_t)); ++ ++ c = getc (stream); ++ empty_input = (c == EOF); ++ if (c != EOF) ++ ungetc (c, stream); ++ else ++ wc = WEOF; ++ ++ /* To support the semantics of the -s flag, we may have to buffer ++ all of the first field to determine whether it is `delimited.' ++ But that is unnecessary if all non-delimited lines must be printed ++ and the first field has been selected, or if non-delimited lines ++ must be suppressed and the first field has *not* been selected. ++ That is because a non-delimited line has exactly one field. */ ++ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); ++ ++ while (1) ++ { ++ if (field_idx == 1 && buffer_first_field) ++ { ++ int len = 0; ++ ++ while (1) ++ { ++ REFILL_BUFFER (buf, bufpos, buflen, stream); ++ ++ GET_NEXT_WC_FROM_BUFFER ++ (wc, bufpos, buflen, mblength, state, convfail); ++ ++ if (wc == WEOF) ++ break; ++ ++ field_1_buffer = xrealloc (field_1_buffer, len + mblength); ++ memcpy (field_1_buffer + len, bufpos, mblength); ++ len += mblength; ++ buflen -= mblength; ++ bufpos += mblength; ++ ++ if (!convfail && (wc == L'\n' || wc == wcdelim)) ++ break; ++ } ++ ++ if (wc == WEOF) ++ break; ++ ++ /* If the first field extends to the end of line (it is not ++ delimited) and we are printing all non-delimited lines, ++ print this one. */ ++ if (convfail || (!convfail && wc != wcdelim)) ++ { ++ if (suppress_non_delimited) ++ { ++ /* Empty. */ ++ } ++ else ++ { ++ fwrite (field_1_buffer, sizeof (char), len, stdout); ++ /* Make sure the output line is newline terminated. */ ++ if (convfail || (!convfail && wc != L'\n')) ++ putchar ('\n'); ++ } ++ continue; ++ } ++ ++ if (print_kth (1, NULL)) ++ { ++ /* Print the field, but not the trailing delimiter. */ ++ fwrite (field_1_buffer, sizeof (char), len - 1, stdout); ++ found_any_selected_field = 1; ++ } ++ ++field_idx; ++ } ++ ++ if (wc != WEOF) ++ { ++ if (print_kth (field_idx, NULL)) ++ { ++ if (found_any_selected_field) ++ { ++ fwrite (output_delimiter_string, sizeof (char), ++ output_delimiter_length, stdout); ++ } ++ found_any_selected_field = 1; ++ } ++ ++ while (1) ++ { ++ REFILL_BUFFER (buf, bufpos, buflen, stream); ++ ++ GET_NEXT_WC_FROM_BUFFER ++ (wc, bufpos, buflen, mblength, state, convfail); ++ ++ if (wc == WEOF) ++ break; ++ else if (!convfail && (wc == wcdelim || wc == L'\n')) ++ { ++ buflen -= mblength; ++ bufpos += mblength; ++ break; ++ } ++ ++ if (print_kth (field_idx, NULL)) ++ fwrite (bufpos, mblength, sizeof(char), stdout); ++ ++ buflen -= mblength; ++ bufpos += mblength; ++ } ++ } ++ ++ if ((!convfail || wc == L'\n') && buflen < 1) ++ wc = WEOF; ++ ++ if (!convfail && wc == wcdelim) ++ ++field_idx; ++ else if (wc == WEOF || (!convfail && wc == L'\n')) ++ { ++ if (found_any_selected_field ++ || (!empty_input && !(suppress_non_delimited && field_idx == 1))) ++ putchar ('\n'); ++ if (wc == WEOF) ++ break; ++ field_idx = 1; ++ found_any_selected_field = 0; ++ } ++ } ++} ++#endif ++ + static void + cut_stream (FILE *stream) + { +- if (operating_mode == byte_mode) +- cut_bytes (stream); ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) ++ { ++ switch (operating_mode) ++ { ++ case byte_mode: ++ if (byte_mode_character_aware) ++ cut_characters_or_cut_bytes_no_split (stream); ++ else ++ cut_bytes (stream); ++ break; ++ ++ case character_mode: ++ cut_characters_or_cut_bytes_no_split (stream); ++ break; ++ ++ case field_mode: ++ cut_fields_mb (stream); ++ break; ++ ++ default: ++ abort (); ++ } ++ } + else +- cut_fields (stream); ++#endif ++ { ++ if (operating_mode == field_mode) ++ cut_fields (stream); ++ else ++ cut_bytes (stream); ++ } + } + + /* Process file FILE to standard output. +@@ -757,6 +1075,8 @@ main (int argc, char **argv) + bool ok; + bool delim_specified = false; + char *spec_list_string IF_LINT(= NULL); ++ char mbdelim[MB_LEN_MAX + 1]; ++ size_t delimlen = 0; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); +@@ -779,7 +1099,6 @@ main (int argc, char **argv) + switch (optc) + { + case 'b': +- case 'c': + /* Build the byte list. */ + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); +@@ -787,6 +1106,14 @@ main (int argc, char **argv) + spec_list_string = optarg; + break; + ++ case 'c': ++ /* Build the character list. */ ++ if (operating_mode != undefined_mode) ++ FATAL_ERROR (_("only one type of list may be specified")); ++ operating_mode = character_mode; ++ spec_list_string = optarg; ++ break; ++ + case 'f': + /* Build the field list. */ + if (operating_mode != undefined_mode) +@@ -798,10 +1125,35 @@ main (int argc, char **argv) + case 'd': + /* New delimiter. */ + /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ +- if (optarg[0] != '\0' && optarg[1] != '\0') +- FATAL_ERROR (_("the delimiter must be a single character")); +- delim = optarg[0]; +- delim_specified = true; ++ { ++#if HAVE_MBRTOWC ++ if(MB_CUR_MAX > 1) ++ { ++ mbstate_t state; ++ ++ memset (&state, '\0', sizeof(mbstate_t)); ++ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); ++ ++ if (delimlen == (size_t)-1 || delimlen == (size_t)-2) ++ ++force_singlebyte_mode; ++ else ++ { ++ delimlen = (delimlen < 1) ? 1 : delimlen; ++ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') ++ FATAL_ERROR (_("the delimiter must be a single character")); ++ memcpy (mbdelim, optarg, delimlen); ++ } ++ } ++ ++ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) ++#endif ++ { ++ if (optarg[0] != '\0' && optarg[1] != '\0') ++ FATAL_ERROR (_("the delimiter must be a single character")); ++ delim = (unsigned char) optarg[0]; ++ } ++ delim_specified = true; ++ } + break; + + case OUTPUT_DELIMITER_OPTION: +@@ -814,6 +1166,7 @@ main (int argc, char **argv) + break; + + case 'n': ++ byte_mode_character_aware = 1; + break; + + case 's': +@@ -836,7 +1189,7 @@ main (int argc, char **argv) + if (operating_mode == undefined_mode) + FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); + +- if (delim != '\0' && operating_mode != field_mode) ++ if (delim_specified && operating_mode != field_mode) + FATAL_ERROR (_("an input delimiter may be specified only\ + when operating on fields")); + +@@ -863,15 +1216,34 @@ main (int argc, char **argv) + } + + if (!delim_specified) +- delim = '\t'; ++ { ++ delim = '\t'; ++#ifdef HAVE_MBRTOWC ++ wcdelim = L'\t'; ++ mbdelim[0] = '\t'; ++ mbdelim[1] = '\0'; ++ delimlen = 1; ++#endif ++ } + + if (output_delimiter_string == NULL) + { +- static char dummy[2]; +- dummy[0] = delim; +- dummy[1] = '\0'; +- output_delimiter_string = dummy; +- output_delimiter_length = 1; ++#ifdef HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) ++ { ++ output_delimiter_string = xstrdup(mbdelim); ++ output_delimiter_length = delimlen; ++ } ++ ++ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) ++#endif ++ { ++ static char dummy[2]; ++ dummy[0] = delim; ++ dummy[1] = '\0'; ++ output_delimiter_string = dummy; ++ output_delimiter_length = 1; ++ } + } + + if (optind == argc) +diff -urNp coreutils-8.0-orig/src/cut.c.orig coreutils-8.0/src/cut.c.orig +--- coreutils-8.0-orig/src/cut.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/cut.c.orig 2009-09-23 10:25:44.000000000 +0200 +@@ -0,0 +1,893 @@ ++/* cut - remove parts of lines of files ++ Copyright (C) 1997-2009 Free Software Foundation, Inc. ++ Copyright (C) 1984 David M. Ihnat ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Written by David Ihnat. */ ++ ++/* POSIX changes, bug fixes, long-named options, and cleanup ++ by David MacKenzie . ++ ++ Rewrite cut_fields and cut_bytes -- Jim Meyering. */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include "system.h" ++ ++#include "error.h" ++#include "getndelim2.h" ++#include "hash.h" ++#include "quote.h" ++#include "xstrndup.h" ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "cut" ++ ++#define AUTHORS \ ++ proper_name ("David M. Ihnat"), \ ++ proper_name ("David MacKenzie"), \ ++ proper_name ("Jim Meyering") ++ ++#define FATAL_ERROR(Message) \ ++ do \ ++ { \ ++ error (0, 0, (Message)); \ ++ usage (EXIT_FAILURE); \ ++ } \ ++ while (0) ++ ++/* Append LOW, HIGH to the list RP of range pairs, allocating additional ++ space if necessary. Update local variable N_RP. When allocating, ++ update global variable N_RP_ALLOCATED. */ ++ ++#define ADD_RANGE_PAIR(rp, low, high) \ ++ do \ ++ { \ ++ if (low == 0 || high == 0) \ ++ FATAL_ERROR (_("fields and positions are numbered from 1")); \ ++ if (n_rp >= n_rp_allocated) \ ++ { \ ++ (rp) = X2NREALLOC (rp, &n_rp_allocated); \ ++ } \ ++ rp[n_rp].lo = (low); \ ++ rp[n_rp].hi = (high); \ ++ ++n_rp; \ ++ } \ ++ while (0) ++ ++struct range_pair ++ { ++ size_t lo; ++ size_t hi; ++ }; ++ ++/* This buffer is used to support the semantics of the -s option ++ (or lack of same) when the specified field list includes (does ++ not include) the first field. In both of those cases, the entire ++ first field must be read into this buffer to determine whether it ++ is followed by a delimiter or a newline before any of it may be ++ output. Otherwise, cut_fields can do the job without using this ++ buffer. */ ++static char *field_1_buffer; ++ ++/* The number of bytes allocated for FIELD_1_BUFFER. */ ++static size_t field_1_bufsize; ++ ++/* The largest field or byte index used as an endpoint of a closed ++ or degenerate range specification; this doesn't include the starting ++ index of right-open-ended ranges. For example, with either range spec ++ `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ ++static size_t max_range_endpoint; ++ ++/* If nonzero, this is the index of the first field in a range that goes ++ to end of line. */ ++static size_t eol_range_start; ++ ++/* This is a bit vector. ++ In byte mode, which bytes to output. ++ In field mode, which DELIM-separated fields to output. ++ Both bytes and fields are numbered starting with 1, ++ so the zeroth bit of this array is unused. ++ A field or byte K has been selected if ++ (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) ++ || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ ++static unsigned char *printable_field; ++ ++enum operating_mode ++ { ++ undefined_mode, ++ ++ /* Output characters that are in the given bytes. */ ++ byte_mode, ++ ++ /* Output the given delimeter-separated fields. */ ++ field_mode ++ }; ++ ++static enum operating_mode operating_mode; ++ ++/* If true do not output lines containing no delimeter characters. ++ Otherwise, all such lines are printed. This option is valid only ++ with field mode. */ ++static bool suppress_non_delimited; ++ ++/* If nonzero, print all bytes, characters, or fields _except_ ++ those that were specified. */ ++static bool complement; ++ ++/* The delimeter character for field mode. */ ++static unsigned char delim; ++ ++/* True if the --output-delimiter=STRING option was specified. */ ++static bool output_delimiter_specified; ++ ++/* The length of output_delimiter_string. */ ++static size_t output_delimiter_length; ++ ++/* The output field separator string. Defaults to the 1-character ++ string consisting of the input delimiter. */ ++static char *output_delimiter_string; ++ ++/* True if we have ever read standard input. */ ++static bool have_read_stdin; ++ ++#define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31 ++ ++/* The set of range-start indices. For example, given a range-spec list like ++ `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15. ++ Note that although `4' looks like a range-start index, it is in the middle ++ of the `3-5' range, so it doesn't count. ++ This table is created/used IFF output_delimiter_specified is set. */ ++static Hash_table *range_start_ht; ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, ++ COMPLEMENT_OPTION ++}; ++ ++static struct option const longopts[] = ++{ ++ {"bytes", required_argument, NULL, 'b'}, ++ {"characters", required_argument, NULL, 'c'}, ++ {"fields", required_argument, NULL, 'f'}, ++ {"delimiter", required_argument, NULL, 'd'}, ++ {"only-delimited", no_argument, NULL, 's'}, ++ {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, ++ {"complement", no_argument, NULL, COMPLEMENT_OPTION}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s OPTION... [FILE]...\n\ ++"), ++ program_name); ++ fputs (_("\ ++Print selected parts of lines from each FILE to standard output.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ -b, --bytes=LIST select only these bytes\n\ ++ -c, --characters=LIST select only these characters\n\ ++ -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ ++"), stdout); ++ fputs (_("\ ++ -f, --fields=LIST select only these fields; also print any line\n\ ++ that contains no delimiter character, unless\n\ ++ the -s option is specified\n\ ++ -n (ignored)\n\ ++"), stdout); ++ fputs (_("\ ++ --complement complement the set of selected bytes, characters\n\ ++ or fields\n\ ++"), stdout); ++ fputs (_("\ ++ -s, --only-delimited do not print lines not containing delimiters\n\ ++ --output-delimiter=STRING use STRING as the output delimiter\n\ ++ the default is to use the input delimiter\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ ++range, or many ranges separated by commas. Selected input is written\n\ ++in the same order that it is read, and is written exactly once.\n\ ++"), stdout); ++ fputs (_("\ ++Each range is one of:\n\ ++\n\ ++ N N'th byte, character or field, counted from 1\n\ ++ N- from N'th byte, character or field, to end of line\n\ ++ N-M from N'th to M'th (included) byte, character or field\n\ ++ -M from first to M'th (included) byte, character or field\n\ ++\n\ ++With no FILE, or when FILE is -, read standard input.\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++static inline void ++mark_range_start (size_t i) ++{ ++ /* Record the fact that `i' is a range-start index. */ ++ void *ent_from_table = hash_insert (range_start_ht, (void*) i); ++ if (ent_from_table == NULL) ++ { ++ /* Insertion failed due to lack of memory. */ ++ xalloc_die (); ++ } ++ assert ((size_t) ent_from_table == i); ++} ++ ++static inline void ++mark_printable_field (size_t i) ++{ ++ size_t n = i / CHAR_BIT; ++ printable_field[n] |= (1 << (i % CHAR_BIT)); ++} ++ ++static inline bool ++is_printable_field (size_t i) ++{ ++ size_t n = i / CHAR_BIT; ++ return (printable_field[n] >> (i % CHAR_BIT)) & 1; ++} ++ ++static size_t ++hash_int (const void *x, size_t tablesize) ++{ ++#ifdef UINTPTR_MAX ++ uintptr_t y = (uintptr_t) x; ++#else ++ size_t y = (size_t) x; ++#endif ++ return y % tablesize; ++} ++ ++static bool ++hash_compare_ints (void const *x, void const *y) ++{ ++ return (x == y) ? true : false; ++} ++ ++static bool ++is_range_start_index (size_t i) ++{ ++ return hash_lookup (range_start_ht, (void *) i) ? true : false; ++} ++ ++/* Return nonzero if the K'th field or byte is printable. ++ When returning nonzero, if RANGE_START is non-NULL, ++ set *RANGE_START to true if K is the beginning of a range, and to ++ false otherwise. */ ++ ++static bool ++print_kth (size_t k, bool *range_start) ++{ ++ bool k_selected ++ = ((0 < eol_range_start && eol_range_start <= k) ++ || (k <= max_range_endpoint && is_printable_field (k))); ++ ++ bool is_selected = k_selected ^ complement; ++ if (range_start && is_selected) ++ *range_start = is_range_start_index (k); ++ ++ return is_selected; ++} ++ ++/* Comparison function for qsort to order the list of ++ struct range_pairs. */ ++static int ++compare_ranges (const void *a, const void *b) ++{ ++ int a_start = ((const struct range_pair *) a)->lo; ++ int b_start = ((const struct range_pair *) b)->lo; ++ return a_start < b_start ? -1 : a_start > b_start; ++} ++ ++/* Given the list of field or byte range specifications FIELDSTR, set ++ MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD ++ array. If there is a right-open-ended range, set EOL_RANGE_START ++ to its starting index. FIELDSTR should be composed of one or more ++ numbers or ranges of numbers, separated by blanks or commas. ++ Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n' ++ through end of line. Return true if FIELDSTR contains at least ++ one field specification, false otherwise. */ ++ ++/* FIXME-someday: What if the user wants to cut out the 1,000,000-th ++ field of some huge input file? This function shouldn't have to ++ allocate a table of a million bits just so we can test every ++ field < 10^6 with an array dereference. Instead, consider using ++ an adaptive approach: if the range of selected fields is too large, ++ but only a few fields/byte-offsets are actually selected, use a ++ hash table. If the range of selected fields is too large, and ++ too many are selected, then resort to using the range-pairs (the ++ `rp' array) directly. */ ++ ++static bool ++set_fields (const char *fieldstr) ++{ ++ size_t initial = 1; /* Value of first number in a range. */ ++ size_t value = 0; /* If nonzero, a number being accumulated. */ ++ bool lhs_specified = false; ++ bool rhs_specified = false; ++ bool dash_found = false; /* True if a '-' is found in this field. */ ++ bool field_found = false; /* True if at least one field spec ++ has been processed. */ ++ ++ struct range_pair *rp = NULL; ++ size_t n_rp = 0; ++ size_t n_rp_allocated = 0; ++ size_t i; ++ bool in_digits = false; ++ ++ /* Collect and store in RP the range end points. ++ It also sets EOL_RANGE_START if appropriate. */ ++ ++ for (;;) ++ { ++ if (*fieldstr == '-') ++ { ++ in_digits = false; ++ /* Starting a range. */ ++ if (dash_found) ++ FATAL_ERROR (_("invalid byte or field list")); ++ dash_found = true; ++ fieldstr++; ++ ++ initial = (lhs_specified ? value : 1); ++ value = 0; ++ } ++ else if (*fieldstr == ',' || ++ isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') ++ { ++ in_digits = false; ++ /* Ending the string, or this field/byte sublist. */ ++ if (dash_found) ++ { ++ dash_found = false; ++ ++ if (!lhs_specified && !rhs_specified) ++ FATAL_ERROR (_("invalid range with no endpoint: -")); ++ ++ /* A range. Possibilities: -n, m-n, n-. ++ In any case, `initial' contains the start of the range. */ ++ if (!rhs_specified) ++ { ++ /* `n-'. From `initial' to end of line. */ ++ eol_range_start = initial; ++ field_found = true; ++ } ++ else ++ { ++ /* `m-n' or `-n' (1-n). */ ++ if (value < initial) ++ FATAL_ERROR (_("invalid decreasing range")); ++ ++ /* Is there already a range going to end of line? */ ++ if (eol_range_start != 0) ++ { ++ /* Yes. Is the new sequence already contained ++ in the old one? If so, no processing is ++ necessary. */ ++ if (initial < eol_range_start) ++ { ++ /* No, the new sequence starts before the ++ old. Does the old range going to end of line ++ extend into the new range? */ ++ if (eol_range_start <= value) ++ { ++ /* Yes. Simply move the end of line marker. */ ++ eol_range_start = initial; ++ } ++ else ++ { ++ /* No. A simple range, before and disjoint from ++ the range going to end of line. Fill it. */ ++ ADD_RANGE_PAIR (rp, initial, value); ++ } ++ ++ /* In any case, some fields were selected. */ ++ field_found = true; ++ } ++ } ++ else ++ { ++ /* There is no range going to end of line. */ ++ ADD_RANGE_PAIR (rp, initial, value); ++ field_found = true; ++ } ++ value = 0; ++ } ++ } ++ else ++ { ++ /* A simple field number, not a range. */ ++ ADD_RANGE_PAIR (rp, value, value); ++ value = 0; ++ field_found = true; ++ } ++ ++ if (*fieldstr == '\0') ++ { ++ break; ++ } ++ ++ fieldstr++; ++ lhs_specified = false; ++ rhs_specified = false; ++ } ++ else if (ISDIGIT (*fieldstr)) ++ { ++ /* Record beginning of digit string, in case we have to ++ complain about it. */ ++ static char const *num_start; ++ if (!in_digits || !num_start) ++ num_start = fieldstr; ++ in_digits = true; ++ ++ if (dash_found) ++ rhs_specified = 1; ++ else ++ lhs_specified = 1; ++ ++ /* Detect overflow. */ ++ if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)) ++ { ++ /* In case the user specified -c$(echo 2^64|bc),22, ++ complain only about the first number. */ ++ /* Determine the length of the offending number. */ ++ size_t len = strspn (num_start, "0123456789"); ++ char *bad_num = xstrndup (num_start, len); ++ if (operating_mode == byte_mode) ++ error (0, 0, ++ _("byte offset %s is too large"), quote (bad_num)); ++ else ++ error (0, 0, ++ _("field number %s is too large"), quote (bad_num)); ++ free (bad_num); ++ exit (EXIT_FAILURE); ++ } ++ ++ fieldstr++; ++ } ++ else ++ FATAL_ERROR (_("invalid byte or field list")); ++ } ++ ++ max_range_endpoint = 0; ++ for (i = 0; i < n_rp; i++) ++ { ++ if (rp[i].hi > max_range_endpoint) ++ max_range_endpoint = rp[i].hi; ++ } ++ ++ /* Allocate an array large enough so that it may be indexed by ++ the field numbers corresponding to all finite ranges ++ (i.e. `2-6' or `-4', but not `5-') in FIELDSTR. */ ++ ++ printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1); ++ ++ qsort (rp, n_rp, sizeof (rp[0]), compare_ranges); ++ ++ /* Set the array entries corresponding to integers in the ranges of RP. */ ++ for (i = 0; i < n_rp; i++) ++ { ++ size_t j; ++ size_t rsi_candidate; ++ ++ /* Record the range-start indices, i.e., record each start ++ index that is not part of any other (lo..hi] range. */ ++ rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo; ++ if (output_delimiter_specified ++ && !is_printable_field (rsi_candidate)) ++ mark_range_start (rsi_candidate); ++ ++ for (j = rp[i].lo; j <= rp[i].hi; j++) ++ mark_printable_field (j); ++ } ++ ++ if (output_delimiter_specified ++ && !complement ++ && eol_range_start && !is_printable_field (eol_range_start)) ++ mark_range_start (eol_range_start); ++ ++ free (rp); ++ ++ return field_found; ++} ++ ++/* Read from stream STREAM, printing to standard output any selected bytes. */ ++ ++static void ++cut_bytes (FILE *stream) ++{ ++ size_t byte_idx; /* Number of bytes in the line so far. */ ++ /* Whether to begin printing delimiters between ranges for the current line. ++ Set after we've begun printing data corresponding to the first range. */ ++ bool print_delimiter; ++ ++ byte_idx = 0; ++ print_delimiter = false; ++ while (1) ++ { ++ int c; /* Each character from the file. */ ++ ++ c = getc (stream); ++ ++ if (c == '\n') ++ { ++ putchar ('\n'); ++ byte_idx = 0; ++ print_delimiter = false; ++ } ++ else if (c == EOF) ++ { ++ if (byte_idx > 0) ++ putchar ('\n'); ++ break; ++ } ++ else ++ { ++ bool range_start; ++ bool *rs = output_delimiter_specified ? &range_start : NULL; ++ if (print_kth (++byte_idx, rs)) ++ { ++ if (rs && *rs && print_delimiter) ++ { ++ fwrite (output_delimiter_string, sizeof (char), ++ output_delimiter_length, stdout); ++ } ++ print_delimiter = true; ++ putchar (c); ++ } ++ } ++ } ++} ++ ++/* Read from stream STREAM, printing to standard output any selected fields. */ ++ ++static void ++cut_fields (FILE *stream) ++{ ++ int c; ++ size_t field_idx = 1; ++ bool found_any_selected_field = false; ++ bool buffer_first_field; ++ ++ c = getc (stream); ++ if (c == EOF) ++ return; ++ ++ ungetc (c, stream); ++ ++ /* To support the semantics of the -s flag, we may have to buffer ++ all of the first field to determine whether it is `delimited.' ++ But that is unnecessary if all non-delimited lines must be printed ++ and the first field has been selected, or if non-delimited lines ++ must be suppressed and the first field has *not* been selected. ++ That is because a non-delimited line has exactly one field. */ ++ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); ++ ++ while (1) ++ { ++ if (field_idx == 1 && buffer_first_field) ++ { ++ ssize_t len; ++ size_t n_bytes; ++ ++ len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, ++ GETNLINE_NO_LIMIT, delim, '\n', stream); ++ if (len < 0) ++ { ++ free (field_1_buffer); ++ field_1_buffer = NULL; ++ if (ferror (stream) || feof (stream)) ++ break; ++ xalloc_die (); ++ } ++ ++ n_bytes = len; ++ assert (n_bytes != 0); ++ ++ /* If the first field extends to the end of line (it is not ++ delimited) and we are printing all non-delimited lines, ++ print this one. */ ++ if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) ++ { ++ if (suppress_non_delimited) ++ { ++ /* Empty. */ ++ } ++ else ++ { ++ fwrite (field_1_buffer, sizeof (char), n_bytes, stdout); ++ /* Make sure the output line is newline terminated. */ ++ if (field_1_buffer[n_bytes - 1] != '\n') ++ putchar ('\n'); ++ } ++ continue; ++ } ++ if (print_kth (1, NULL)) ++ { ++ /* Print the field, but not the trailing delimiter. */ ++ fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout); ++ found_any_selected_field = true; ++ } ++ ++field_idx; ++ } ++ ++ if (c != EOF) ++ { ++ if (print_kth (field_idx, NULL)) ++ { ++ if (found_any_selected_field) ++ { ++ fwrite (output_delimiter_string, sizeof (char), ++ output_delimiter_length, stdout); ++ } ++ found_any_selected_field = true; ++ ++ while ((c = getc (stream)) != delim && c != '\n' && c != EOF) ++ { ++ putchar (c); ++ } ++ } ++ else ++ { ++ while ((c = getc (stream)) != delim && c != '\n' && c != EOF) ++ { ++ /* Empty. */ ++ } ++ } ++ } ++ ++ if (c == '\n') ++ { ++ c = getc (stream); ++ if (c != EOF) ++ { ++ ungetc (c, stream); ++ c = '\n'; ++ } ++ } ++ ++ if (c == delim) ++ ++field_idx; ++ else if (c == '\n' || c == EOF) ++ { ++ if (found_any_selected_field ++ || !(suppress_non_delimited && field_idx == 1)) ++ putchar ('\n'); ++ if (c == EOF) ++ break; ++ field_idx = 1; ++ found_any_selected_field = false; ++ } ++ } ++} ++ ++static void ++cut_stream (FILE *stream) ++{ ++ if (operating_mode == byte_mode) ++ cut_bytes (stream); ++ else ++ cut_fields (stream); ++} ++ ++/* Process file FILE to standard output. ++ Return true if successful. */ ++ ++static bool ++cut_file (char const *file) ++{ ++ FILE *stream; ++ ++ if (STREQ (file, "-")) ++ { ++ have_read_stdin = true; ++ stream = stdin; ++ } ++ else ++ { ++ stream = fopen (file, "r"); ++ if (stream == NULL) ++ { ++ error (0, errno, "%s", file); ++ return false; ++ } ++ } ++ ++ cut_stream (stream); ++ ++ if (ferror (stream)) ++ { ++ error (0, errno, "%s", file); ++ return false; ++ } ++ if (STREQ (file, "-")) ++ clearerr (stream); /* Also clear EOF. */ ++ else if (fclose (stream) == EOF) ++ { ++ error (0, errno, "%s", file); ++ return false; ++ } ++ return true; ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int optc; ++ bool ok; ++ bool delim_specified = false; ++ char *spec_list_string IF_LINT(= NULL); ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdout); ++ ++ operating_mode = undefined_mode; ++ ++ /* By default, all non-delimited lines are printed. */ ++ suppress_non_delimited = false; ++ ++ delim = '\0'; ++ have_read_stdin = false; ++ ++ while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1) ++ { ++ switch (optc) ++ { ++ case 'b': ++ case 'c': ++ /* Build the byte list. */ ++ if (operating_mode != undefined_mode) ++ FATAL_ERROR (_("only one type of list may be specified")); ++ operating_mode = byte_mode; ++ spec_list_string = optarg; ++ break; ++ ++ case 'f': ++ /* Build the field list. */ ++ if (operating_mode != undefined_mode) ++ FATAL_ERROR (_("only one type of list may be specified")); ++ operating_mode = field_mode; ++ spec_list_string = optarg; ++ break; ++ ++ case 'd': ++ /* New delimiter. */ ++ /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ ++ if (optarg[0] != '\0' && optarg[1] != '\0') ++ FATAL_ERROR (_("the delimiter must be a single character")); ++ delim = optarg[0]; ++ delim_specified = true; ++ break; ++ ++ case OUTPUT_DELIMITER_OPTION: ++ output_delimiter_specified = true; ++ /* Interpret --output-delimiter='' to mean ++ `use the NUL byte as the delimiter.' */ ++ output_delimiter_length = (optarg[0] == '\0' ++ ? 1 : strlen (optarg)); ++ output_delimiter_string = xstrdup (optarg); ++ break; ++ ++ case 'n': ++ break; ++ ++ case 's': ++ suppress_non_delimited = true; ++ break; ++ ++ case COMPLEMENT_OPTION: ++ complement = true; ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ if (operating_mode == undefined_mode) ++ FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); ++ ++ if (delim != '\0' && operating_mode != field_mode) ++ FATAL_ERROR (_("an input delimiter may be specified only\ ++ when operating on fields")); ++ ++ if (suppress_non_delimited && operating_mode != field_mode) ++ FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ ++\tonly when operating on fields")); ++ ++ if (output_delimiter_specified) ++ { ++ range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY, ++ NULL, hash_int, ++ hash_compare_ints, NULL); ++ if (range_start_ht == NULL) ++ xalloc_die (); ++ ++ } ++ ++ if (! set_fields (spec_list_string)) ++ { ++ if (operating_mode == field_mode) ++ FATAL_ERROR (_("missing list of fields")); ++ else ++ FATAL_ERROR (_("missing list of positions")); ++ } ++ ++ if (!delim_specified) ++ delim = '\t'; ++ ++ if (output_delimiter_string == NULL) ++ { ++ static char dummy[2]; ++ dummy[0] = delim; ++ dummy[1] = '\0'; ++ output_delimiter_string = dummy; ++ output_delimiter_length = 1; ++ } ++ ++ if (optind == argc) ++ ok = cut_file ("-"); ++ else ++ for (ok = true; optind < argc; optind++) ++ ok &= cut_file (argv[optind]); ++ ++ if (range_start_ht) ++ hash_free (range_start_ht); ++ ++ if (have_read_stdin && fclose (stdin) == EOF) ++ { ++ error (0, errno, "-"); ++ ok = false; ++ } ++ ++ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); ++} +diff -urNp coreutils-8.0-orig/src/expand.c coreutils-8.0/src/expand.c +--- coreutils-8.0-orig/src/expand.c 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/src/expand.c 2009-10-07 10:07:16.000000000 +0200 +@@ -37,11 +37,28 @@ #include #include #include @@ -187,7 +1602,7 @@ diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME "expand" -@@ -365,6 +383,142 @@ +@@ -357,6 +374,142 @@ expand (void) } } @@ -330,7 +1745,7 @@ diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am int main (int argc, char **argv) { -@@ -429,7 +583,12 @@ +@@ -421,7 +574,12 @@ main (int argc, char **argv) file_list = (optind < argc ? &argv[optind] : stdin_argv); @@ -344,843 +1759,444 @@ diff -urN coreutils-6.12-orig/tests/Makefile.am coreutils-6.12/tests/Makefile.am if (have_read_stdin && fclose (stdin) != 0) error (EXIT_FAILURE, errno, "-"); ---- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000 -@@ -23,17 +23,31 @@ - #include - #include - -+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif +diff -urNp coreutils-8.0-orig/src/expand.c.orig coreutils-8.0/src/expand.c.orig +--- coreutils-8.0-orig/src/expand.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/expand.c.orig 2009-09-29 15:27:54.000000000 +0200 +@@ -0,0 +1,430 @@ ++/* expand - convert tabs to spaces ++ Copyright (C) 89, 91, 1995-2006, 2008-2009 Free Software Foundation, Inc. + -+/* Get iswblank(), towupper. */ -+#if HAVE_WCTYPE_H -+# include -+#endif ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. + - #include "system.h" - #include "error.h" - #include "hard-locale.h" - #include "linebuffer.h" --#include "memcasecmp.h" - #include "quote.h" - #include "stdio--.h" - #include "xmemcoll.h" - #include "xstrtol.h" - #include "argmatch.h" - -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. + - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "join" - -@@ -104,10 +118,12 @@ - /* Last element in `outlist', where a new element can be added. */ - static struct outlist *outlist_end = &outlist_head; - --/* Tab character separating fields. If negative, fields are separated -- by any nonempty string of blanks, otherwise by exactly one -- tab character whose value (when cast to unsigned char) equals TAB. */ --static int tab = -1; -+/* Tab character separating fields. If NULL, fields are separated -+ by any nonempty string of blanks. */ -+static char *tab = NULL; ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ + -+/* The number of bytes used for tab. */ -+static size_t tablen = 0; - - /* If nonzero, check that the input is correctly ordered. */ - static enum -@@ -199,10 +217,11 @@ - if (ptr == lim) - return; - -- if (0 <= tab) -+ if (tab != NULL) - { -+ unsigned char t = tab[0]; - char *sep; -- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) -+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) - extract_field (line, ptr, sep - ptr); - } - else -@@ -229,6 +248,148 @@ - extract_field (line, ptr, lim - ptr); - } - -+#if HAVE_MBRTOWC -+static void -+xfields_multibyte (struct line *line) ++/* By default, convert all tabs to spaces. ++ Preserves backspace characters in the output; they decrement the ++ column count for tab calculations. ++ The default action is equivalent to -8. ++ ++ Options: ++ --tabs=tab1[,tab2[,...]] ++ -t tab1[,tab2[,...]] ++ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1 ++ columns apart instead of the default 8. Otherwise, ++ set the tabs at columns tab1, tab2, etc. (numbered from ++ 0); replace any tabs beyond the tab stops given with ++ single spaces. ++ --initial ++ -i Only convert initial tabs on each line to spaces. ++ ++ David MacKenzie */ ++ ++#include ++ ++#include ++#include ++#include ++#include "system.h" ++#include "error.h" ++#include "quote.h" ++#include "xstrndup.h" ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "expand" ++ ++#define AUTHORS proper_name ("David MacKenzie") ++ ++/* If true, convert blanks even after nonblank characters have been ++ read on the line. */ ++static bool convert_entire_line; ++ ++/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */ ++static uintmax_t tab_size; ++ ++/* Array of the explicit column numbers of the tab stops; ++ after `tab_list' is exhausted, each additional tab is replaced ++ by a space. The first column is column 0. */ ++static uintmax_t *tab_list; ++ ++/* The number of allocated entries in `tab_list'. */ ++static size_t n_tabs_allocated; ++ ++/* The index of the first invalid element of `tab_list', ++ where the next element can be added. */ ++static size_t first_free_tab; ++ ++/* Null-terminated array of input filenames. */ ++static char **file_list; ++ ++/* Default for `file_list' if no files are given on the command line. */ ++static char *stdin_argv[] = +{ -+ char *ptr = line->buf.buffer; -+ char const *lim = ptr + line->buf.length - 1; -+ wchar_t wc = 0; -+ size_t mblength = 1; -+ mbstate_t state, state_bak; ++ (char *) "-", NULL ++}; + -+ memset (&state, 0, sizeof (mbstate_t)); ++/* True if we have ever read standard input. */ ++static bool have_read_stdin; + -+ if (ptr >= lim) ++/* The desired exit status. */ ++static int exit_status; ++ ++static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::"; ++ ++static struct option const longopts[] = ++{ ++ {"tabs", required_argument, NULL, 't'}, ++ {"initial", no_argument, NULL, 'i'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... [FILE]...\n\ ++"), ++ program_name); ++ fputs (_("\ ++Convert tabs in each FILE to spaces, writing to standard output.\n\ ++With no FILE, or when FILE is -, read standard input.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ -i, --initial do not convert tabs after non blanks\n\ ++ -t, --tabs=NUMBER have tabs NUMBER characters apart, not 8\n\ ++"), stdout); ++ fputs (_("\ ++ -t, --tabs=LIST use comma separated list of explicit tab positions\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++/* Add tab stop TABVAL to the end of `tab_list'. */ ++ ++static void ++add_tab_stop (uintmax_t tabval) ++{ ++ if (first_free_tab == n_tabs_allocated) ++ tab_list = X2NREALLOC (tab_list, &n_tabs_allocated); ++ tab_list[first_free_tab++] = tabval; ++} ++ ++/* Add the comma or blank separated list of tab stops STOPS ++ to the list of tab stops. */ ++ ++static void ++parse_tab_stops (char const *stops) ++{ ++ bool have_tabval = false; ++ uintmax_t tabval IF_LINT (= 0); ++ char const *num_start IF_LINT (= NULL); ++ bool ok = true; ++ ++ for (; *stops; stops++) ++ { ++ if (*stops == ',' || isblank (to_uchar (*stops))) ++ { ++ if (have_tabval) ++ add_tab_stop (tabval); ++ have_tabval = false; ++ } ++ else if (ISDIGIT (*stops)) ++ { ++ if (!have_tabval) ++ { ++ tabval = 0; ++ have_tabval = true; ++ num_start = stops; ++ } ++ ++ /* Detect overflow. */ ++ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) ++ { ++ size_t len = strspn (num_start, "0123456789"); ++ char *bad_num = xstrndup (num_start, len); ++ error (0, 0, _("tab stop is too large %s"), quote (bad_num)); ++ free (bad_num); ++ ok = false; ++ stops = num_start + len - 1; ++ } ++ } ++ else ++ { ++ error (0, 0, _("tab size contains invalid character(s): %s"), ++ quote (stops)); ++ ok = false; ++ break; ++ } ++ } ++ ++ if (!ok) ++ exit (EXIT_FAILURE); ++ ++ if (have_tabval) ++ add_tab_stop (tabval); ++} ++ ++/* Check that the list of tab stops TABS, with ENTRIES entries, ++ contains only nonzero, ascending values. */ ++ ++static void ++validate_tab_stops (uintmax_t const *tabs, size_t entries) ++{ ++ uintmax_t prev_tab = 0; ++ size_t i; ++ ++ for (i = 0; i < entries; i++) ++ { ++ if (tabs[i] == 0) ++ error (EXIT_FAILURE, 0, _("tab size cannot be 0")); ++ if (tabs[i] <= prev_tab) ++ error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); ++ prev_tab = tabs[i]; ++ } ++} ++ ++/* Close the old stream pointer FP if it is non-NULL, ++ and return a new one opened to read the next input file. ++ Open a filename of `-' as the standard input. ++ Return NULL if there are no more input files. */ ++ ++static FILE * ++next_file (FILE *fp) ++{ ++ static char *prev_file; ++ char *file; ++ ++ if (fp) ++ { ++ if (ferror (fp)) ++ { ++ error (0, errno, "%s", prev_file); ++ exit_status = EXIT_FAILURE; ++ } ++ if (STREQ (prev_file, "-")) ++ clearerr (fp); /* Also clear EOF. */ ++ else if (fclose (fp) != 0) ++ { ++ error (0, errno, "%s", prev_file); ++ exit_status = EXIT_FAILURE; ++ } ++ } ++ ++ while ((file = *file_list++) != NULL) ++ { ++ if (STREQ (file, "-")) ++ { ++ have_read_stdin = true; ++ prev_file = file; ++ return stdin; ++ } ++ fp = fopen (file, "r"); ++ if (fp) ++ { ++ prev_file = file; ++ return fp; ++ } ++ error (0, errno, "%s", file); ++ exit_status = EXIT_FAILURE; ++ } ++ return NULL; ++} ++ ++/* Change tabs to spaces, writing to stdout. ++ Read each file in `file_list', in order. */ ++ ++static void ++expand (void) ++{ ++ /* Input stream. */ ++ FILE *fp = next_file (NULL); ++ ++ if (!fp) + return; + -+ if (tab != NULL) ++ for (;;) + { -+ unsigned char t = tab[0]; -+ char *sep = ptr; -+ for (; ptr < lim; ptr = sep + mblength) -+ { -+ sep = ptr; -+ while (sep < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ /* Input character, or EOF. */ ++ int c; + -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; ++ /* If true, perform translations. */ ++ bool convert = true; + -+ if (mblength == tablen && !memcmp (sep, tab, mblength)) -+ break; -+ else -+ { -+ sep += mblength; -+ continue; -+ } -+ } + -+ if (sep >= lim) -+ break; ++ /* The following variables have valid values only when CONVERT ++ is true: */ + -+ extract_field (line, ptr, sep - ptr); -+ } -+ } -+ else -+ { -+ /* Skip leading blanks before the first field. */ -+ while(ptr < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ /* Column of next input character. */ ++ uintmax_t column = 0; + -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; ++ /* Index in TAB_LIST of next tab stop to examine. */ ++ size_t tab_index = 0; + -+ if (!iswblank(wc)) -+ break; -+ ptr += mblength; -+ } ++ ++ /* Convert a line of text. */ + + do -+ { -+ char *sep; -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ sep = ptr + mblength; -+ while (sep < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (iswblank (wc)) -+ break; -+ -+ sep += mblength; -+ } -+ -+ extract_field (line, ptr, sep - ptr); -+ if (sep >= lim) -+ return; -+ -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ ptr = sep + mblength; -+ while (ptr < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (!iswblank (wc)) -+ break; -+ -+ ptr += mblength; -+ } -+ } -+ while (ptr < lim); -+ } -+ -+ extract_field (line, ptr, lim - ptr); -+} -+#endif -+ - static void - freeline (struct line *line) - { -@@ -377,11 +601,18 @@ - - /* Print the join of LINE1 and LINE2. */ - -+#define PUT_TAB_CHAR \ -+ do \ -+ { \ -+ (tab != NULL) ? \ -+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ -+ } \ -+ while (0) -+ - static void - prjoin (struct line const *line1, struct line const *line2) - { - const struct outlist *outlist; -- char output_separator = tab < 0 ? ' ' : tab; - - outlist = outlist_head.next; - if (outlist) -@@ -416,7 +647,7 @@ - o = o->next; - if (o == NULL) - break; -- putchar (output_separator); -+ PUT_TAB_CHAR; - } - putchar ('\n'); - } -@@ -434,23 +665,23 @@ - prfield (join_field_1, line1); - for (i = 0; i < join_field_1 && i < line1->nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line1); - } - for (i = join_field_1 + 1; i < line1->nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line1); - } - - for (i = 0; i < join_field_2 && i < line2->nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line2); - } - for (i = join_field_2 + 1; i < line2->nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line2); - } - putchar ('\n'); -@@ -859,20 +1090,41 @@ - - case 't': - { -- unsigned char newtab = optarg[0]; -- if (! newtab) -+ char *newtab; -+ size_t newtablen; -+ if (! optarg[0]) - error (EXIT_FAILURE, 0, _("empty tab")); -- if (optarg[1]) -+ newtab = xstrdup (optarg); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ mbstate_t state; -+ -+ memset (&state, 0, sizeof (mbstate_t)); -+ newtablen = mbrtowc (NULL, newtab, -+ strnlen (newtab, MB_LEN_MAX), -+ &state); -+ if (newtablen == (size_t) 0 -+ || newtablen == (size_t) -1 -+ || newtablen == (size_t) -2) -+ newtablen = 1; -+ } -+ else -+#endif -+ newtablen = 1; -+ -+ if (newtablen == 1 && newtab[1]) -+ { -+ if (STREQ (newtab, "\\0")) -+ newtab[0] = '\0'; -+ } -+ if (tab != NULL && strcmp (tab, newtab)) - { -- if (STREQ (optarg, "\\0")) -- newtab = '\0'; -- else -- error (EXIT_FAILURE, 0, _("multi-character tab %s"), -- quote (optarg)); -+ free (newtab); -+ error (EXIT_FAILURE, 0, _("incompatible tabs")); - } -- if (0 <= tab && tab != newtab) -- error (EXIT_FAILURE, 0, _("incompatible tabs")); - tab = newtab; -+ tablen = newtablen; - } - break; - -diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c ---- coreutils-6.11-orig/src/join.c 2008-04-21 13:44:32.000000000 +0200 -+++ coreutils-6.11/src/join.c 2008-04-21 14:03:22.000000000 +0200 -@@ -324,56 +324,115 @@ keycmp (struct line const *line1, struct - size_t jf_1, size_t jf_2) - { - /* Start of field to compare in each file. */ -- char *beg1; -- char *beg2; -- -- size_t len1; -- size_t len2; /* Length of fields to compare. */ -+ char *beg[2]; -+ char *copy[2]; -+ size_t len[2]; /* Length of fields to compare. */ - int diff; -+ int i, j; - - if (jf_1 < line1->nfields) - { -- beg1 = line1->fields[jf_1].beg; -- len1 = line1->fields[jf_1].len; -+ beg[0] = line1->fields[jf_1].beg; -+ len[0] = line1->fields[jf_1].len; - } - else - { -- beg1 = NULL; -- len1 = 0; -+ beg[0] = NULL; -+ len[0] = 0; - } - - if (jf_2 < line2->nfields) - { -- beg2 = line2->fields[jf_2].beg; -- len2 = line2->fields[jf_2].len; -+ beg[1] = line2->fields[jf_2].beg; -+ len[1] = line2->fields[jf_2].len; - } - else - { -- beg2 = NULL; -- len2 = 0; -+ beg[1] = NULL; -+ len[1] = 0; - } - -- if (len1 == 0) -- return len2 == 0 ? 0 : -1; -- if (len2 == 0) -+ if (len[0] == 0) -+ return len[1] == 0 ? 0 : -1; -+ if (len[1] == 0) - return 1; - - if (ignore_case) - { -- /* FIXME: ignore_case does not work with NLS (in particular, -- with multibyte chars). */ -- diff = memcasecmp (beg1, beg2, MIN (len1, len2)); -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ size_t mblength; -+ wchar_t wc, uwc; -+ mbstate_t state, state_bak; -+ -+ memset (&state, '\0', sizeof (mbstate_t)); -+ -+ for (i = 0; i < 2; i++) -+ { -+ copy[i] = alloca (len[i] + 1); -+ -+ for (j = 0; j < MIN (len[0], len[1]);) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); -+ -+ switch (mblength) -+ { -+ case (size_t) -1: -+ case (size_t) -2: -+ state = state_bak; -+ /* Fall through */ -+ case 0: -+ mblength = 1; -+ break; -+ -+ default: -+ uwc = towupper (wc); -+ -+ if (uwc != wc) -+ { -+ mbstate_t state_wc; -+ -+ memset (&state_wc, '\0', sizeof (mbstate_t)); -+ wcrtomb (copy[i] + j, uwc, &state_wc); -+ } -+ else -+ memcpy (copy[i] + j, beg[i] + j, mblength); -+ } -+ j += mblength; -+ } -+ copy[i][j] = '\0'; -+ } -+ } -+ else -+#endif -+ { -+ for (i = 0; i < 2; i++) -+ { -+ copy[i] = alloca (len[i] + 1); -+ -+ for (j = 0; j < MIN (len[0], len[1]); j++) -+ copy[i][j] = toupper (beg[i][j]); -+ -+ copy[i][j] = '\0'; -+ } -+ } - } - else - { -- if (hard_LC_COLLATE) -- return xmemcoll (beg1, len1, beg2, len2); -- diff = memcmp (beg1, beg2, MIN (len1, len2)); -+ copy[0] = (unsigned char *) beg[0]; -+ copy[1] = (unsigned char *) beg[1]; - } - -+ if (hard_LC_COLLATE) -+ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); -+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); -+ -+ - if (diff) - return diff; -- return len1 < len2 ? -1 : len1 != len2; -+ return len[0] - len[1]; - } - - /* Check that successive input lines PREV and CURRENT from input file ---- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000 -@@ -23,6 +23,16 @@ - #include - #include - -+/* Get mbstate_t, mbrtowc(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ -+/* Get isw* functions. */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+ - #include "system.h" - #include "argmatch.h" - #include "linebuffer.h" -@@ -32,7 +42,19 @@ - #include "quote.h" - #include "xmemcoll.h" - #include "xstrtol.h" --#include "memcasecmp.h" -+#include "xmemcoll.h" -+ -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "uniq" -@@ -109,6 +131,10 @@ - /* Select whether/how to delimit groups of duplicate lines. */ - static enum delimit_method delimit_groups; - -+/* Function pointers. */ -+static char * -+(*find_field) (struct linebuffer *line); -+ - static struct option const longopts[] = - { - {"count", no_argument, NULL, 'c'}, -@@ -198,7 +224,7 @@ - return a pointer to the beginning of the line's field to be compared. */ - - static char * --find_field (struct linebuffer const *line) -+find_field_uni (struct linebuffer *line) - { - size_t count; - char const *lp = line->buffer; -@@ -219,6 +245,83 @@ - return line->buffer + i; - } - -+#if HAVE_MBRTOWC -+ -+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ -+ do \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ CONVFAIL = 0; \ -+ state_bak = *STATEP; \ -+ \ -+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-2: \ -+ case (size_t)-1: \ -+ *STATEP = state_bak; \ -+ CONVFAIL++; \ -+ /* Fall through */ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ } \ -+ } \ -+ while (0) -+ -+static char * -+find_field_multi (struct linebuffer *line) -+{ -+ size_t count; -+ char *lp = line->buffer; -+ size_t size = line->length - 1; -+ size_t pos; -+ size_t mblength; -+ wchar_t wc; -+ mbstate_t *statep; -+ int convfail; -+ -+ pos = 0; -+ statep = &(line->state); -+ -+ /* skip fields. */ -+ for (count = 0; count < skip_fields && pos < size; count++) -+ { -+ while (pos < size) + { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ -+ if (convfail || !iswblank (wc)) ++ while ((c = getc (fp)) < 0 && (fp = next_file (fp))) ++ continue; ++ ++ if (convert) + { -+ pos += mblength; -+ break; -+ } -+ pos += mblength; -+ } -+ -+ while (pos < size) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ -+ if (!convfail && iswblank (wc)) -+ break; -+ -+ pos += mblength; -+ } -+ } -+ -+ /* skip fields. */ -+ for (count = 0; count < skip_chars && pos < size; count++) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ pos += mblength; -+ } -+ -+ return lp + pos; -+} -+#endif -+ - /* Return false if two strings OLD and NEW match, true if not. - OLD and NEW point not to the beginnings of the lines - but rather to the beginnings of the fields to compare. -@@ -227,6 +330,8 @@ - static bool - different (char *old, char *new, size_t oldlen, size_t newlen) - { -+ char *copy_old, *copy_new; -+ - if (check_chars < oldlen) - oldlen = check_chars; - if (check_chars < newlen) -@@ -234,14 +339,92 @@ - - if (ignore_case) - { -- /* FIXME: This should invoke strcoll somehow. */ -- return oldlen != newlen || memcasecmp (old, new, oldlen); -+ size_t i; -+ -+ copy_old = alloca (oldlen + 1); -+ copy_new = alloca (oldlen + 1); -+ -+ for (i = 0; i < oldlen; i++) -+ { -+ copy_old[i] = toupper (old[i]); -+ copy_new[i] = toupper (new[i]); -+ } - } -- else if (hard_LC_COLLATE) -- return xmemcoll (old, oldlen, new, newlen) != 0; - else -- return oldlen != newlen || memcmp (old, new, oldlen); -+ { -+ copy_old = (char *)old; -+ copy_new = (char *)new; -+ } -+ -+ return xmemcoll (copy_old, oldlen, copy_new, newlen); -+} -+ -+#if HAVE_MBRTOWC -+static int -+different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate) -+{ -+ size_t i, j, chars; -+ const char *str[2]; -+ char *copy[2]; -+ size_t len[2]; -+ mbstate_t state[2]; -+ size_t mblength; -+ wchar_t wc, uwc; -+ mbstate_t state_bak; -+ -+ str[0] = old; -+ str[1] = new; -+ len[0] = oldlen; -+ len[1] = newlen; -+ state[0] = oldstate; -+ state[1] = newstate; -+ -+ for (i = 0; i < 2; i++) -+ { -+ copy[i] = alloca (len[i] + 1); -+ -+ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) -+ { -+ state_bak = state[i]; -+ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); -+ -+ switch (mblength) -+ { -+ case (size_t)-1: -+ case (size_t)-2: -+ state[i] = state_bak; -+ /* Fall through */ -+ case 0: -+ mblength = 1; -+ break; -+ -+ default: -+ if (ignore_case) ++ if (c == '\t') + { -+ uwc = towupper (wc); ++ /* Column the next input tab stop is on. */ ++ uintmax_t next_tab_column; + -+ if (uwc != wc) -+ { -+ mbstate_t state_wc; -+ -+ memset (&state_wc, '\0', sizeof(mbstate_t)); -+ wcrtomb (copy[i] + j, uwc, &state_wc); -+ } ++ if (tab_size) ++ next_tab_column = column + (tab_size - column % tab_size); + else -+ memcpy (copy[i] + j, str[i] + j, mblength); ++ for (;;) ++ if (tab_index == first_free_tab) ++ { ++ next_tab_column = column + 1; ++ break; ++ } ++ else ++ { ++ uintmax_t tab = tab_list[tab_index++]; ++ if (column < tab) ++ { ++ next_tab_column = tab; ++ break; ++ } ++ } ++ ++ if (next_tab_column < column) ++ error (EXIT_FAILURE, 0, _("input line is too long")); ++ ++ while (++column < next_tab_column) ++ if (putchar (' ') < 0) ++ error (EXIT_FAILURE, errno, _("write error")); ++ ++ c = ' '; ++ } ++ else if (c == '\b') ++ { ++ /* Go back one column, and force recalculation of the ++ next tab stop. */ ++ column -= !!column; ++ tab_index -= !!tab_index; + } + else -+ memcpy (copy[i] + j, str[i] + j, mblength); ++ { ++ column++; ++ if (!column) ++ error (EXIT_FAILURE, 0, _("input line is too long")); ++ } ++ ++ convert &= convert_entire_line || !! isblank (c); + } -+ j += mblength; ++ ++ if (c < 0) ++ return; ++ ++ if (putchar (c) < 0) ++ error (EXIT_FAILURE, errno, _("write error")); + } -+ copy[i][j] = '\0'; -+ len[i] = j; ++ while (c != '\n'); + } ++} + -+ return xmemcoll (copy[0], len[0], copy[1], len[1]); - } -+#endif - - /* Output the line in linebuffer LINE to standard output - provided that the switches say it should be output. -@@ -295,15 +478,43 @@ - { - char *prevfield IF_LINT (= NULL); - size_t prevlen IF_LINT (= 0); -+#if HAVE_MBRTOWC -+ mbstate_t prevstate; ++int ++main (int argc, char **argv) ++{ ++ int c; + -+ memset (&prevstate, '\0', sizeof (mbstate_t)); -+#endif - - while (!feof (stdin)) - { - char *thisfield; - size_t thislen; -+#if HAVE_MBRTOWC -+ mbstate_t thisstate; -+#endif ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); + - if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) - break; - thisfield = find_field (thisline); - thislen = thisline->length - 1 - (thisfield - thisline->buffer); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ thisstate = thisline->state; ++ atexit (close_stdout); + -+ if (prevline->length == 0 || different_multi -+ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) -+ { -+ fwrite (thisline->buffer, sizeof (char), -+ thisline->length, stdout); ++ have_read_stdin = false; ++ exit_status = EXIT_SUCCESS; ++ convert_entire_line = true; ++ tab_list = NULL; ++ first_free_tab = 0; + -+ SWAP_LINES (prevline, thisline); -+ prevfield = thisfield; -+ prevlen = thislen; -+ prevstate = thisstate; -+ } -+ } -+ else -+#endif - if (prevline->length == 0 - || different (thisfield, prevfield, thislen, prevlen)) - { -@@ -322,17 +533,26 @@ - size_t prevlen; - uintmax_t match_count = 0; - bool first_delimiter = true; -+#if HAVE_MBRTOWC -+ mbstate_t prevstate; -+#endif - - if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) - goto closefiles; - prevfield = find_field (prevline); - prevlen = prevline->length - 1 - (prevfield - prevline->buffer); -+#if HAVE_MBRTOWC -+ prevstate = prevline->state; -+#endif - - while (!feof (stdin)) - { - bool match; - char *thisfield; - size_t thislen; -+#if HAVE_MBRTOWC -+ mbstate_t thisstate; -+#endif - if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) - { - if (ferror (stdin)) -@@ -341,6 +561,15 @@ - } - thisfield = find_field (thisline); - thislen = thisline->length - 1 - (thisfield - thisline->buffer); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ thisstate = thisline->state; -+ match = !different_multi (thisfield, prevfield, -+ thislen, prevlen, thisstate, prevstate); -+ } ++ while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) ++ { ++ switch (c) ++ { ++ case 'i': ++ convert_entire_line = false; ++ break; ++ ++ case 't': ++ parse_tab_stops (optarg); ++ break; ++ ++ case '0': case '1': case '2': case '3': case '4': ++ case '5': case '6': case '7': case '8': case '9': ++ if (optarg) ++ parse_tab_stops (optarg - 1); + else -+#endif - match = !different (thisfield, prevfield, thislen, prevlen); - match_count += match; - -@@ -373,6 +602,9 @@ - SWAP_LINES (prevline, thisline); - prevfield = thisfield; - prevlen = thislen; -+#if HAVE_MBRTOWC -+ prevstate = thisstate; -+#endif - if (!match) - match_count = 0; - } -@@ -417,6 +649,19 @@ - - atexit (close_stdout); - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ find_field = find_field_multi; ++ { ++ char tab_stop[2]; ++ tab_stop[0] = c; ++ tab_stop[1] = '\0'; ++ parse_tab_stops (tab_stop); ++ } ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (EXIT_FAILURE); ++ } + } ++ ++ validate_tab_stops (tab_list, first_free_tab); ++ ++ if (first_free_tab == 0) ++ tab_size = 8; ++ else if (first_free_tab == 1) ++ tab_size = tab_list[0]; + else -+#endif -+ { -+ find_field = find_field_uni; -+ } ++ tab_size = 0; + ++ file_list = (optind < argc ? &argv[optind] : stdin_argv); + ++ expand (); + - skip_chars = 0; - skip_fields = 0; - check_chars = SIZE_MAX; ---- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000 -+++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000 -@@ -23,11 +23,33 @@ ++ if (have_read_stdin && fclose (stdin) != 0) ++ error (EXIT_FAILURE, errno, "-"); ++ ++ exit (exit_status); ++} +diff -urNp coreutils-8.0-orig/src/fold.c coreutils-8.0/src/fold.c +--- coreutils-8.0-orig/src/fold.c 2009-09-23 10:25:44.000000000 +0200 ++++ coreutils-8.0/src/fold.c 2009-10-07 10:07:16.000000000 +0200 +@@ -22,11 +22,33 @@ #include #include @@ -1214,7 +2230,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c #define TAB_WIDTH 8 /* The official name of this program (e.g., no `g' prefix). */ -@@ -35,20 +57,41 @@ +@@ -34,20 +56,41 @@ #define AUTHORS proper_name ("David MacKenzie") @@ -1260,7 +2276,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c {"spaces", no_argument, NULL, 's'}, {"width", required_argument, NULL, 'w'}, {GETOPT_HELP_OPTION_DECL}, -@@ -81,6 +124,7 @@ +@@ -77,6 +120,7 @@ Mandatory arguments to long options are "), stdout); fputs (_("\ -b, --bytes count bytes rather than columns\n\ @@ -1268,7 +2284,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c -s, --spaces break at spaces\n\ -w, --width=WIDTH use WIDTH columns instead of 80\n\ "), stdout); -@@ -98,7 +142,7 @@ +@@ -94,7 +138,7 @@ Mandatory arguments to long options are static size_t adjust_column (size_t column, char c) { @@ -1277,7 +2293,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c { if (c == '\b') { -@@ -121,30 +165,14 @@ +@@ -117,30 +161,14 @@ adjust_column (size_t column, char c) to stdout, with maximum line length WIDTH. Return true if successful. */ @@ -1310,7 +2326,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c while ((c = getc (istream)) != EOF) { -@@ -172,6 +200,15 @@ +@@ -168,6 +196,15 @@ fold_file (char const *filename, size_t bool found_blank = false; size_t logical_end = offset_out; @@ -1326,16 +2342,16 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c /* Look for the last blank. */ while (logical_end) { -@@ -218,11 +255,222 @@ +@@ -214,11 +251,222 @@ fold_file (char const *filename, size_t line_out[offset_out++] = c; } - saved_errno = errno; + *saved_errno = errno; -+ -+ if (offset_out) -+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); -+ + + if (offset_out) + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); + +} + +#if HAVE_MBRTOWC @@ -1508,10 +2524,10 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c + } + + *saved_errno = errno; - - if (offset_out) - fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); - ++ ++ if (offset_out) ++ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); ++ +} +#endif + @@ -1550,7 +2566,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c if (ferror (istream)) { error (0, saved_errno, "%s", filename); -@@ -255,7 +506,8 @@ +@@ -251,7 +499,8 @@ main (int argc, char **argv) atexit (close_stdout); @@ -1560,7 +2576,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) { -@@ -264,7 +516,15 @@ +@@ -260,7 +509,15 @@ main (int argc, char **argv) switch (optc) { case 'b': /* Count bytes rather than columns. */ @@ -1577,9 +2593,5774 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c break; case 's': /* Break at word boundaries. */ ---- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000 -+++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000 -@@ -23,10 +23,19 @@ +diff -urNp coreutils-8.0-orig/src/fold.c.orig coreutils-8.0/src/fold.c.orig +--- coreutils-8.0-orig/src/fold.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/fold.c.orig 2009-09-23 10:25:44.000000000 +0200 +@@ -0,0 +1,314 @@ ++/* fold -- wrap each input line to fit in specified width. ++ Copyright (C) 91, 1995-2006, 2008-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Written by David MacKenzie, djm@gnu.ai.mit.edu. */ ++ ++#include ++ ++#include ++#include ++#include ++ ++#include "system.h" ++#include "error.h" ++#include "quote.h" ++#include "xstrtol.h" ++ ++#define TAB_WIDTH 8 ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "fold" ++ ++#define AUTHORS proper_name ("David MacKenzie") ++ ++/* If nonzero, try to break on whitespace. */ ++static bool break_spaces; ++ ++/* If nonzero, count bytes, not column positions. */ ++static bool count_bytes; ++ ++/* If nonzero, at least one of the files we read was standard input. */ ++static bool have_read_stdin; ++ ++static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; ++ ++static struct option const longopts[] = ++{ ++ {"bytes", no_argument, NULL, 'b'}, ++ {"spaces", no_argument, NULL, 's'}, ++ {"width", required_argument, NULL, 'w'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... [FILE]...\n\ ++"), ++ program_name); ++ fputs (_("\ ++Wrap input lines in each FILE (standard input by default), writing to\n\ ++standard output.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ -b, --bytes count bytes rather than columns\n\ ++ -s, --spaces break at spaces\n\ ++ -w, --width=WIDTH use WIDTH columns instead of 80\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++/* Assuming the current column is COLUMN, return the column that ++ printing C will move the cursor to. ++ The first column is 0. */ ++ ++static size_t ++adjust_column (size_t column, char c) ++{ ++ if (!count_bytes) ++ { ++ if (c == '\b') ++ { ++ if (column > 0) ++ column--; ++ } ++ else if (c == '\r') ++ column = 0; ++ else if (c == '\t') ++ column += TAB_WIDTH - column % TAB_WIDTH; ++ else /* if (isprint (c)) */ ++ column++; ++ } ++ else ++ column++; ++ return column; ++} ++ ++/* Fold file FILENAME, or standard input if FILENAME is "-", ++ to stdout, with maximum line length WIDTH. ++ Return true if successful. */ ++ ++static bool ++fold_file (char const *filename, size_t width) ++{ ++ FILE *istream; ++ int c; ++ size_t column = 0; /* Screen column where next char will go. */ ++ size_t offset_out = 0; /* Index in `line_out' for next char. */ ++ static char *line_out = NULL; ++ static size_t allocated_out = 0; ++ int saved_errno; ++ ++ if (STREQ (filename, "-")) ++ { ++ istream = stdin; ++ have_read_stdin = true; ++ } ++ else ++ istream = fopen (filename, "r"); ++ ++ if (istream == NULL) ++ { ++ error (0, errno, "%s", filename); ++ return false; ++ } ++ ++ while ((c = getc (istream)) != EOF) ++ { ++ if (offset_out + 1 >= allocated_out) ++ line_out = X2REALLOC (line_out, &allocated_out); ++ ++ if (c == '\n') ++ { ++ line_out[offset_out++] = c; ++ fwrite (line_out, sizeof (char), offset_out, stdout); ++ column = offset_out = 0; ++ continue; ++ } ++ ++ rescan: ++ column = adjust_column (column, c); ++ ++ if (column > width) ++ { ++ /* This character would make the line too long. ++ Print the line plus a newline, and make this character ++ start the next line. */ ++ if (break_spaces) ++ { ++ bool found_blank = false; ++ size_t logical_end = offset_out; ++ ++ /* Look for the last blank. */ ++ while (logical_end) ++ { ++ --logical_end; ++ if (isblank (to_uchar (line_out[logical_end]))) ++ { ++ found_blank = true; ++ break; ++ } ++ } ++ ++ if (found_blank) ++ { ++ size_t i; ++ ++ /* Found a blank. Don't output the part after it. */ ++ logical_end++; ++ fwrite (line_out, sizeof (char), (size_t) logical_end, ++ stdout); ++ putchar ('\n'); ++ /* Move the remainder to the beginning of the next line. ++ The areas being copied here might overlap. */ ++ memmove (line_out, line_out + logical_end, ++ offset_out - logical_end); ++ offset_out -= logical_end; ++ for (column = i = 0; i < offset_out; i++) ++ column = adjust_column (column, line_out[i]); ++ goto rescan; ++ } ++ } ++ ++ if (offset_out == 0) ++ { ++ line_out[offset_out++] = c; ++ continue; ++ } ++ ++ line_out[offset_out++] = '\n'; ++ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); ++ column = offset_out = 0; ++ goto rescan; ++ } ++ ++ line_out[offset_out++] = c; ++ } ++ ++ saved_errno = errno; ++ ++ if (offset_out) ++ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); ++ ++ if (ferror (istream)) ++ { ++ error (0, saved_errno, "%s", filename); ++ if (!STREQ (filename, "-")) ++ fclose (istream); ++ return false; ++ } ++ if (!STREQ (filename, "-") && fclose (istream) == EOF) ++ { ++ error (0, errno, "%s", filename); ++ return false; ++ } ++ ++ return true; ++} ++ ++int ++main (int argc, char **argv) ++{ ++ size_t width = 80; ++ int i; ++ int optc; ++ bool ok; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdout); ++ ++ break_spaces = count_bytes = have_read_stdin = false; ++ ++ while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) ++ { ++ char optargbuf[2]; ++ ++ switch (optc) ++ { ++ case 'b': /* Count bytes rather than columns. */ ++ count_bytes = true; ++ break; ++ ++ case 's': /* Break at word boundaries. */ ++ break_spaces = true; ++ break; ++ ++ case '0': case '1': case '2': case '3': case '4': ++ case '5': case '6': case '7': case '8': case '9': ++ if (optarg) ++ optarg--; ++ else ++ { ++ optargbuf[0] = optc; ++ optargbuf[1] = '\0'; ++ optarg = optargbuf; ++ } ++ /* Fall through. */ ++ case 'w': /* Line width. */ ++ { ++ unsigned long int tmp_ulong; ++ if (! (xstrtoul (optarg, NULL, 10, &tmp_ulong, "") == LONGINT_OK ++ && 0 < tmp_ulong && tmp_ulong < SIZE_MAX - TAB_WIDTH)) ++ error (EXIT_FAILURE, 0, ++ _("invalid number of columns: %s"), quote (optarg)); ++ width = tmp_ulong; ++ } ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ if (argc == optind) ++ ok = fold_file ("-", width); ++ else ++ { ++ ok = true; ++ for (i = optind; i < argc; i++) ++ ok &= fold_file (argv[i], width); ++ } ++ ++ if (have_read_stdin && fclose (stdin) == EOF) ++ error (EXIT_FAILURE, errno, "-"); ++ ++ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); ++} +diff -urNp coreutils-8.0-orig/src/join.c coreutils-8.0/src/join.c +--- coreutils-8.0-orig/src/join.c 2009-09-23 10:25:44.000000000 +0200 ++++ coreutils-8.0/src/join.c 2009-10-07 10:07:16.000000000 +0200 +@@ -22,17 +22,31 @@ + #include + #include + ++/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ ++#if HAVE_WCHAR_H ++# include ++#endif ++ ++/* Get iswblank(), towupper. */ ++#if HAVE_WCTYPE_H ++# include ++#endif ++ + #include "system.h" + #include "error.h" + #include "hard-locale.h" + #include "linebuffer.h" +-#include "memcasecmp.h" + #include "quote.h" + #include "stdio--.h" + #include "xmemcoll.h" + #include "xstrtol.h" + #include "argmatch.h" + ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ + /* The official name of this program (e.g., no `g' prefix). */ + #define PROGRAM_NAME "join" + +@@ -121,10 +135,12 @@ static struct outlist outlist_head; + /* Last element in `outlist', where a new element can be added. */ + static struct outlist *outlist_end = &outlist_head; + +-/* Tab character separating fields. If negative, fields are separated +- by any nonempty string of blanks, otherwise by exactly one +- tab character whose value (when cast to unsigned char) equals TAB. */ +-static int tab = -1; ++/* Tab character separating fields. If NULL, fields are separated ++ by any nonempty string of blanks. */ ++static char *tab = NULL; ++ ++/* The number of bytes used for tab. */ ++static size_t tablen = 0; + + /* If nonzero, check that the input is correctly ordered. */ + static enum +@@ -239,10 +255,11 @@ xfields (struct line *line) + if (ptr == lim) + return; + +- if (0 <= tab) ++ if (tab != NULL) + { ++ unsigned char t = tab[0]; + char *sep; +- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) ++ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) + extract_field (line, ptr, sep - ptr); + } + else +@@ -269,6 +286,148 @@ xfields (struct line *line) + extract_field (line, ptr, lim - ptr); + } + ++#if HAVE_MBRTOWC ++static void ++xfields_multibyte (struct line *line) ++{ ++ char *ptr = line->buf.buffer; ++ char const *lim = ptr + line->buf.length - 1; ++ wchar_t wc = 0; ++ size_t mblength = 1; ++ mbstate_t state, state_bak; ++ ++ memset (&state, 0, sizeof (mbstate_t)); ++ ++ if (ptr >= lim) ++ return; ++ ++ if (tab != NULL) ++ { ++ unsigned char t = tab[0]; ++ char *sep = ptr; ++ for (; ptr < lim; ptr = sep + mblength) ++ { ++ sep = ptr; ++ while (sep < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (mblength == tablen && !memcmp (sep, tab, mblength)) ++ break; ++ else ++ { ++ sep += mblength; ++ continue; ++ } ++ } ++ ++ if (sep >= lim) ++ break; ++ ++ extract_field (line, ptr, sep - ptr); ++ } ++ } ++ else ++ { ++ /* Skip leading blanks before the first field. */ ++ while(ptr < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (!iswblank(wc)) ++ break; ++ ptr += mblength; ++ } ++ ++ do ++ { ++ char *sep; ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ sep = ptr + mblength; ++ while (sep < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (iswblank (wc)) ++ break; ++ ++ sep += mblength; ++ } ++ ++ extract_field (line, ptr, sep - ptr); ++ if (sep >= lim) ++ return; ++ ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ ptr = sep + mblength; ++ while (ptr < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (!iswblank (wc)) ++ break; ++ ++ ptr += mblength; ++ } ++ } ++ while (ptr < lim); ++ } ++ ++ extract_field (line, ptr, lim - ptr); ++} ++#endif ++ + static void + freeline (struct line *line) + { +@@ -287,56 +446,115 @@ keycmp (struct line const *line1, struct + size_t jf_1, size_t jf_2) + { + /* Start of field to compare in each file. */ +- char *beg1; +- char *beg2; +- +- size_t len1; +- size_t len2; /* Length of fields to compare. */ ++ char *beg[2]; ++ char *copy[2]; ++ size_t len[2]; /* Length of fields to compare. */ + int diff; ++ int i, j; + + if (jf_1 < line1->nfields) + { +- beg1 = line1->fields[jf_1].beg; +- len1 = line1->fields[jf_1].len; ++ beg[0] = line1->fields[jf_1].beg; ++ len[0] = line1->fields[jf_1].len; + } + else + { +- beg1 = NULL; +- len1 = 0; ++ beg[0] = NULL; ++ len[0] = 0; + } + + if (jf_2 < line2->nfields) + { +- beg2 = line2->fields[jf_2].beg; +- len2 = line2->fields[jf_2].len; ++ beg[1] = line2->fields[jf_2].beg; ++ len[1] = line2->fields[jf_2].len; + } + else + { +- beg2 = NULL; +- len2 = 0; ++ beg[1] = NULL; ++ len[1] = 0; + } + +- if (len1 == 0) +- return len2 == 0 ? 0 : -1; +- if (len2 == 0) ++ if (len[0] == 0) ++ return len[1] == 0 ? 0 : -1; ++ if (len[1] == 0) + return 1; + + if (ignore_case) + { +- /* FIXME: ignore_case does not work with NLS (in particular, +- with multibyte chars). */ +- diff = memcasecmp (beg1, beg2, MIN (len1, len2)); ++#ifdef HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ size_t mblength; ++ wchar_t wc, uwc; ++ mbstate_t state, state_bak; ++ ++ memset (&state, '\0', sizeof (mbstate_t)); ++ ++ for (i = 0; i < 2; i++) ++ { ++ copy[i] = alloca (len[i] + 1); ++ ++ for (j = 0; j < MIN (len[0], len[1]);) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); ++ ++ switch (mblength) ++ { ++ case (size_t) -1: ++ case (size_t) -2: ++ state = state_bak; ++ /* Fall through */ ++ case 0: ++ mblength = 1; ++ break; ++ ++ default: ++ uwc = towupper (wc); ++ ++ if (uwc != wc) ++ { ++ mbstate_t state_wc; ++ ++ memset (&state_wc, '\0', sizeof (mbstate_t)); ++ wcrtomb (copy[i] + j, uwc, &state_wc); ++ } ++ else ++ memcpy (copy[i] + j, beg[i] + j, mblength); ++ } ++ j += mblength; ++ } ++ copy[i][j] = '\0'; ++ } ++ } ++ else ++#endif ++ { ++ for (i = 0; i < 2; i++) ++ { ++ copy[i] = alloca (len[i] + 1); ++ ++ for (j = 0; j < MIN (len[0], len[1]); j++) ++ copy[i][j] = toupper (beg[i][j]); ++ ++ copy[i][j] = '\0'; ++ } ++ } + } + else + { +- if (hard_LC_COLLATE) +- return xmemcoll (beg1, len1, beg2, len2); +- diff = memcmp (beg1, beg2, MIN (len1, len2)); ++ copy[0] = (unsigned char *) beg[0]; ++ copy[1] = (unsigned char *) beg[1]; + } + ++ if (hard_LC_COLLATE) ++ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); ++ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); ++ ++ + if (diff) + return diff; +- return len1 < len2 ? -1 : len1 != len2; ++ return len[0] - len[1]; + } + + /* Check that successive input lines PREV and CURRENT from input file +@@ -417,6 +635,11 @@ get_line (FILE *fp, struct line **linep, + return false; + } + ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ xfields_multibyte (line); ++ else ++#endif + xfields (line); + + if (prevline[which - 1]) +@@ -518,11 +741,18 @@ prfield (size_t n, struct line const *li + + /* Print the join of LINE1 and LINE2. */ + ++#define PUT_TAB_CHAR \ ++ do \ ++ { \ ++ (tab != NULL) ? \ ++ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ ++ } \ ++ while (0) ++ + static void + prjoin (struct line const *line1, struct line const *line2) + { + const struct outlist *outlist; +- char output_separator = tab < 0 ? ' ' : tab; + + outlist = outlist_head.next; + if (outlist) +@@ -557,7 +787,7 @@ prjoin (struct line const *line1, struct + o = o->next; + if (o == NULL) + break; +- putchar (output_separator); ++ PUT_TAB_CHAR; + } + putchar ('\n'); + } +@@ -575,23 +805,23 @@ prjoin (struct line const *line1, struct + prfield (join_field_1, line1); + for (i = 0; i < join_field_1 && i < line1->nfields; ++i) + { +- putchar (output_separator); ++ PUT_TAB_CHAR; + prfield (i, line1); + } + for (i = join_field_1 + 1; i < line1->nfields; ++i) + { +- putchar (output_separator); ++ PUT_TAB_CHAR; + prfield (i, line1); + } + + for (i = 0; i < join_field_2 && i < line2->nfields; ++i) + { +- putchar (output_separator); ++ PUT_TAB_CHAR; + prfield (i, line2); + } + for (i = join_field_2 + 1; i < line2->nfields; ++i) + { +- putchar (output_separator); ++ PUT_TAB_CHAR; + prfield (i, line2); + } + putchar ('\n'); +@@ -1022,20 +1252,41 @@ main (int argc, char **argv) + + case 't': + { +- unsigned char newtab = optarg[0]; +- if (! newtab) ++ char *newtab; ++ size_t newtablen; ++ if (! optarg[0]) + error (EXIT_FAILURE, 0, _("empty tab")); +- if (optarg[1]) ++ newtab = xstrdup (optarg); ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ mbstate_t state; ++ ++ memset (&state, 0, sizeof (mbstate_t)); ++ newtablen = mbrtowc (NULL, newtab, ++ strnlen (newtab, MB_LEN_MAX), ++ &state); ++ if (newtablen == (size_t) 0 ++ || newtablen == (size_t) -1 ++ || newtablen == (size_t) -2) ++ newtablen = 1; ++ } ++ else ++#endif ++ newtablen = 1; ++ ++ if (newtablen == 1 && newtab[1]) ++ { ++ if (STREQ (newtab, "\\0")) ++ newtab[0] = '\0'; ++ } ++ if (tab != NULL && strcmp (tab, newtab)) + { +- if (STREQ (optarg, "\\0")) +- newtab = '\0'; +- else +- error (EXIT_FAILURE, 0, _("multi-character tab %s"), +- quote (optarg)); ++ free (newtab); ++ error (EXIT_FAILURE, 0, _("incompatible tabs")); + } +- if (0 <= tab && tab != newtab) +- error (EXIT_FAILURE, 0, _("incompatible tabs")); + tab = newtab; ++ tablen = newtablen; + } + break; + +diff -urNp coreutils-8.0-orig/src/join.c.orig coreutils-8.0/src/join.c.orig +--- coreutils-8.0-orig/src/join.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/join.c.orig 2009-10-07 10:07:16.000000000 +0200 +@@ -0,0 +1,1360 @@ ++/* join - join lines of two files on a common field ++ Copyright (C) 91, 1995-2006, 2008-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . ++ ++ Written by Mike Haertel, mike@gnu.ai.mit.edu. */ ++ ++#include ++ ++#include ++#include ++#include ++ ++/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ ++#if HAVE_WCHAR_H ++# include ++#endif ++ ++/* Get iswblank(), towupper. */ ++#if HAVE_WCTYPE_H ++# include ++#endif ++ ++#include "system.h" ++#include "error.h" ++#include "hard-locale.h" ++#include "linebuffer.h" ++#include "quote.h" ++#include "stdio--.h" ++#include "xmemcoll.h" ++#include "xstrtol.h" ++#include "argmatch.h" ++ ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "join" ++ ++#define AUTHORS proper_name ("Mike Haertel") ++ ++#define join system_join ++ ++#define SWAPLINES(a, b) do { \ ++ struct line *tmp = a; \ ++ a = b; \ ++ b = tmp; \ ++} while (0); ++ ++/* An element of the list identifying which fields to print for each ++ output line. */ ++struct outlist ++ { ++ /* File number: 0, 1, or 2. 0 means use the join field. ++ 1 means use the first file argument, 2 the second. */ ++ int file; ++ ++ /* Field index (zero-based), specified only when FILE is 1 or 2. */ ++ size_t field; ++ ++ struct outlist *next; ++ }; ++ ++/* A field of a line. */ ++struct field ++ { ++ char *beg; /* First character in field. */ ++ size_t len; /* The length of the field. */ ++ }; ++ ++/* A line read from an input file. */ ++struct line ++ { ++ struct linebuffer buf; /* The line itself. */ ++ size_t nfields; /* Number of elements in `fields'. */ ++ size_t nfields_allocated; /* Number of elements allocated for `fields'. */ ++ struct field *fields; ++ }; ++ ++/* One or more consecutive lines read from a file that all have the ++ same join field value. */ ++struct seq ++ { ++ size_t count; /* Elements used in `lines'. */ ++ size_t alloc; /* Elements allocated in `lines'. */ ++ struct line **lines; ++ }; ++ ++/* The previous line read from each file. */ ++static struct line *prevline[2] = {NULL, NULL}; ++ ++/* This provides an extra line buffer for each file. We need these if we ++ try to read two consecutive lines into the same buffer, since we don't ++ want to overwrite the previous buffer before we check order. */ ++static struct line *spareline[2] = {NULL, NULL}; ++ ++/* True if the LC_COLLATE locale is hard. */ ++static bool hard_LC_COLLATE; ++ ++/* If nonzero, print unpairable lines in file 1 or 2. */ ++static bool print_unpairables_1, print_unpairables_2; ++ ++/* If nonzero, print pairable lines. */ ++static bool print_pairables; ++ ++/* If nonzero, we have seen at least one unpairable line. */ ++static bool seen_unpairable; ++ ++/* If nonzero, we have warned about disorder in that file. */ ++static bool issued_disorder_warning[2]; ++ ++/* Empty output field filler. */ ++static char const *empty_filler; ++ ++/* Field to join on; SIZE_MAX means they haven't been determined yet. */ ++static size_t join_field_1 = SIZE_MAX; ++static size_t join_field_2 = SIZE_MAX; ++ ++/* List of fields to print. */ ++static struct outlist outlist_head; ++ ++/* Last element in `outlist', where a new element can be added. */ ++static struct outlist *outlist_end = &outlist_head; ++ ++/* Tab character separating fields. If NULL, fields are separated ++ by any nonempty string of blanks. */ ++static char *tab = NULL; ++ ++/* The number of bytes used for tab. */ ++static size_t tablen = 0; ++ ++/* If nonzero, check that the input is correctly ordered. */ ++static enum ++ { ++ CHECK_ORDER_DEFAULT, ++ CHECK_ORDER_ENABLED, ++ CHECK_ORDER_DISABLED ++ } check_input_order; ++ ++enum ++{ ++ CHECK_ORDER_OPTION = CHAR_MAX + 1, ++ NOCHECK_ORDER_OPTION ++}; ++ ++ ++static struct option const longopts[] = ++{ ++ {"ignore-case", no_argument, NULL, 'i'}, ++ {"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, ++ {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++/* Used to print non-joining lines */ ++static struct line uni_blank; ++ ++/* If nonzero, ignore case when comparing join fields. */ ++static bool ignore_case; ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... FILE1 FILE2\n\ ++"), ++ program_name); ++ fputs (_("\ ++For each pair of input lines with identical join fields, write a line to\n\ ++standard output. The default join field is the first, delimited\n\ ++by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\ ++\n\ ++ -a FILENUM print unpairable lines coming from file FILENUM, where\n\ ++ FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\ ++ -e EMPTY replace missing input fields with EMPTY\n\ ++"), stdout); ++ fputs (_("\ ++ -i, --ignore-case ignore differences in case when comparing fields\n\ ++ -j FIELD equivalent to `-1 FIELD -2 FIELD'\n\ ++ -o FORMAT obey FORMAT while constructing output line\n\ ++ -t CHAR use CHAR as input and output field separator\n\ ++"), stdout); ++ fputs (_("\ ++ -v FILENUM like -a FILENUM, but suppress joined output lines\n\ ++ -1 FIELD join on this FIELD of file 1\n\ ++ -2 FIELD join on this FIELD of file 2\n\ ++ --check-order check that the input is correctly sorted, even\n\ ++ if all input lines are pairable\n\ ++ --nocheck-order do not check that the input is correctly sorted\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\ ++else fields are separated by CHAR. Any FIELD is a field number counted\n\ ++from 1. FORMAT is one or more comma or blank separated specifications,\n\ ++each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\ ++the remaining fields from FILE1, the remaining fields from FILE2, all\n\ ++separated by CHAR.\n\ ++\n\ ++Important: FILE1 and FILE2 must be sorted on the join fields.\n\ ++E.g., use `sort -k 1b,1' if `join' has no options.\n\ ++Note, comparisons honor the rules specified by `LC_COLLATE'.\n\ ++If the input is not sorted and some lines cannot be joined, a\n\ ++warning message will be given.\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++/* Record a field in LINE, with location FIELD and size LEN. */ ++ ++static void ++extract_field (struct line *line, char *field, size_t len) ++{ ++ if (line->nfields >= line->nfields_allocated) ++ { ++ line->fields = X2NREALLOC (line->fields, &line->nfields_allocated); ++ } ++ line->fields[line->nfields].beg = field; ++ line->fields[line->nfields].len = len; ++ ++(line->nfields); ++} ++ ++/* Fill in the `fields' structure in LINE. */ ++ ++static void ++xfields (struct line *line) ++{ ++ char *ptr = line->buf.buffer; ++ char const *lim = ptr + line->buf.length - 1; ++ ++ if (ptr == lim) ++ return; ++ ++ if (tab != NULL) ++ { ++ unsigned char t = tab[0]; ++ char *sep; ++ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) ++ extract_field (line, ptr, sep - ptr); ++ } ++ else ++ { ++ /* Skip leading blanks before the first field. */ ++ while (isblank (to_uchar (*ptr))) ++ if (++ptr == lim) ++ return; ++ ++ do ++ { ++ char *sep; ++ for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++) ++ continue; ++ extract_field (line, ptr, sep - ptr); ++ if (sep == lim) ++ return; ++ for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++) ++ continue; ++ } ++ while (ptr != lim); ++ } ++ ++ extract_field (line, ptr, lim - ptr); ++} ++ ++#if HAVE_MBRTOWC ++static void ++xfields_multibyte (struct line *line) ++{ ++ char *ptr = line->buf.buffer; ++ char const *lim = ptr + line->buf.length - 1; ++ wchar_t wc = 0; ++ size_t mblength = 1; ++ mbstate_t state, state_bak; ++ ++ memset (&state, 0, sizeof (mbstate_t)); ++ ++ if (ptr >= lim) ++ return; ++ ++ if (tab != NULL) ++ { ++ unsigned char t = tab[0]; ++ char *sep = ptr; ++ for (; ptr < lim; ptr = sep + mblength) ++ { ++ sep = ptr; ++ while (sep < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (mblength == tablen && !memcmp (sep, tab, mblength)) ++ break; ++ else ++ { ++ sep += mblength; ++ continue; ++ } ++ } ++ ++ if (sep >= lim) ++ break; ++ ++ extract_field (line, ptr, sep - ptr); ++ } ++ } ++ else ++ { ++ /* Skip leading blanks before the first field. */ ++ while(ptr < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (!iswblank(wc)) ++ break; ++ ptr += mblength; ++ } ++ ++ do ++ { ++ char *sep; ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ sep = ptr + mblength; ++ while (sep < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (iswblank (wc)) ++ break; ++ ++ sep += mblength; ++ } ++ ++ extract_field (line, ptr, sep - ptr); ++ if (sep >= lim) ++ return; ++ ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ ptr = sep + mblength; ++ while (ptr < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (!iswblank (wc)) ++ break; ++ ++ ptr += mblength; ++ } ++ } ++ while (ptr < lim); ++ } ++ ++ extract_field (line, ptr, lim - ptr); ++} ++#endif ++ ++static void ++freeline (struct line *line) ++{ ++ free (line->fields); ++ free (line->buf.buffer); ++ line->buf.buffer = NULL; ++} ++ ++/* Return <0 if the join field in LINE1 compares less than the one in LINE2; ++ >0 if it compares greater; 0 if it compares equal. ++ Report an error and exit if the comparison fails. ++ Use join fields JF_1 and JF_2 respectively. */ ++ ++static int ++keycmp (struct line const *line1, struct line const *line2, ++ size_t jf_1, size_t jf_2) ++{ ++ /* Start of field to compare in each file. */ ++ char *beg[2]; ++ char *copy[2]; ++ size_t len[2]; /* Length of fields to compare. */ ++ int diff; ++ int i, j; ++ ++ if (jf_1 < line1->nfields) ++ { ++ beg[0] = line1->fields[jf_1].beg; ++ len[0] = line1->fields[jf_1].len; ++ } ++ else ++ { ++ beg[0] = NULL; ++ len[0] = 0; ++ } ++ ++ if (jf_2 < line2->nfields) ++ { ++ beg[1] = line2->fields[jf_2].beg; ++ len[1] = line2->fields[jf_2].len; ++ } ++ else ++ { ++ beg[1] = NULL; ++ len[1] = 0; ++ } ++ ++ if (len[0] == 0) ++ return len[1] == 0 ? 0 : -1; ++ if (len[1] == 0) ++ return 1; ++ ++ if (ignore_case) ++ { ++#ifdef HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ size_t mblength; ++ wchar_t wc, uwc; ++ mbstate_t state, state_bak; ++ ++ memset (&state, '\0', sizeof (mbstate_t)); ++ ++ for (i = 0; i < 2; i++) ++ { ++ copy[i] = alloca (len[i] + 1); ++ ++ for (j = 0; j < MIN (len[0], len[1]);) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); ++ ++ switch (mblength) ++ { ++ case (size_t) -1: ++ case (size_t) -2: ++ state = state_bak; ++ /* Fall through */ ++ case 0: ++ mblength = 1; ++ break; ++ ++ default: ++ uwc = towupper (wc); ++ ++ if (uwc != wc) ++ { ++ mbstate_t state_wc; ++ ++ memset (&state_wc, '\0', sizeof (mbstate_t)); ++ wcrtomb (copy[i] + j, uwc, &state_wc); ++ } ++ else ++ memcpy (copy[i] + j, beg[i] + j, mblength); ++ } ++ j += mblength; ++ } ++ copy[i][j] = '\0'; ++ } ++ } ++ else ++#endif ++ { ++ for (i = 0; i < 2; i++) ++ { ++ copy[i] = alloca (len[i] + 1); ++ ++ for (j = 0; j < MIN (len[0], len[1]); j++) ++ copy[i][j] = toupper (beg[i][j]); ++ ++ copy[i][j] = '\0'; ++ } ++ } ++ } ++ else ++ { ++ copy[0] = (unsigned char *) beg[0]; ++ copy[1] = (unsigned char *) beg[1]; ++ } ++ ++ if (hard_LC_COLLATE) ++ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); ++ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); ++ ++ ++ if (diff) ++ return diff; ++ return len[0] - len[1]; ++} ++ ++/* Check that successive input lines PREV and CURRENT from input file ++ WHATFILE are presented in order, unless the user may be relying on ++ the GNU extension that input lines may be out of order if no input ++ lines are unpairable. ++ ++ If the user specified --nocheck-order, the check is not made. ++ If the user specified --check-order, the problem is fatal. ++ Otherwise (the default), the message is simply a warning. ++ ++ A message is printed at most once per input file. */ ++ ++static void ++check_order (const struct line *prev, ++ const struct line *current, ++ int whatfile) ++{ ++ if (check_input_order != CHECK_ORDER_DISABLED ++ && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable)) ++ { ++ if (!issued_disorder_warning[whatfile-1]) ++ { ++ size_t join_field = whatfile == 1 ? join_field_1 : join_field_2; ++ if (keycmp (prev, current, join_field, join_field) > 0) ++ { ++ error ((check_input_order == CHECK_ORDER_ENABLED ++ ? EXIT_FAILURE : 0), ++ 0, _("file %d is not in sorted order"), whatfile); ++ ++ /* If we get to here, the message was just a warning, but we ++ want only to issue it once. */ ++ issued_disorder_warning[whatfile-1] = true; ++ } ++ } ++ } ++} ++ ++static inline void ++reset_line (struct line *line) ++{ ++ line->nfields = 0; ++} ++ ++static struct line * ++init_linep (struct line **linep) ++{ ++ struct line *line = xmalloc (sizeof *line); ++ memset (line, '\0', sizeof *line); ++ *linep = line; ++ return line; ++} ++ ++/* Read a line from FP into LINE and split it into fields. ++ Return true if successful. */ ++ ++static bool ++get_line (FILE *fp, struct line **linep, int which) ++{ ++ struct line *line = *linep; ++ ++ if (line == prevline[which - 1]) ++ { ++ SWAPLINES (line, spareline[which - 1]); ++ *linep = line; ++ } ++ ++ if (line) ++ reset_line (line); ++ else ++ line = init_linep (linep); ++ ++ if (! readlinebuffer (&line->buf, fp)) ++ { ++ if (ferror (fp)) ++ error (EXIT_FAILURE, errno, _("read error")); ++ freeline (line); ++ return false; ++ } ++ ++ xfields (line); ++ ++ if (prevline[which - 1]) ++ check_order (prevline[which - 1], line, which); ++ ++ prevline[which - 1] = line; ++ return true; ++} ++ ++static void ++free_spareline (void) ++{ ++ size_t i; ++ ++ for (i = 0; i < ARRAY_CARDINALITY (spareline); i++) ++ { ++ if (spareline[i]) ++ { ++ freeline (spareline[i]); ++ free (spareline[i]); ++ } ++ } ++} ++ ++static void ++initseq (struct seq *seq) ++{ ++ seq->count = 0; ++ seq->alloc = 0; ++ seq->lines = NULL; ++} ++ ++/* Read a line from FP and add it to SEQ. Return true if successful. */ ++ ++static bool ++getseq (FILE *fp, struct seq *seq, int whichfile) ++{ ++ if (seq->count == seq->alloc) ++ { ++ size_t i; ++ seq->lines = X2NREALLOC (seq->lines, &seq->alloc); ++ for (i = seq->count; i < seq->alloc; i++) ++ seq->lines[i] = NULL; ++ } ++ ++ if (get_line (fp, &seq->lines[seq->count], whichfile)) ++ { ++ ++seq->count; ++ return true; ++ } ++ return false; ++} ++ ++/* Read a line from FP and add it to SEQ, as the first item if FIRST is ++ true, else as the next. */ ++static bool ++advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile) ++{ ++ if (first) ++ seq->count = 0; ++ ++ return getseq (fp, seq, whichfile); ++} ++ ++static void ++delseq (struct seq *seq) ++{ ++ size_t i; ++ for (i = 0; i < seq->alloc; i++) ++ if (seq->lines[i]) ++ { ++ if (seq->lines[i]->buf.buffer) ++ freeline (seq->lines[i]); ++ free (seq->lines[i]); ++ } ++ free (seq->lines); ++} ++ ++ ++/* Print field N of LINE if it exists and is nonempty, otherwise ++ `empty_filler' if it is nonempty. */ ++ ++static void ++prfield (size_t n, struct line const *line) ++{ ++ size_t len; ++ ++ if (n < line->nfields) ++ { ++ len = line->fields[n].len; ++ if (len) ++ fwrite (line->fields[n].beg, 1, len, stdout); ++ else if (empty_filler) ++ fputs (empty_filler, stdout); ++ } ++ else if (empty_filler) ++ fputs (empty_filler, stdout); ++} ++ ++/* Print the join of LINE1 and LINE2. */ ++ ++#define PUT_TAB_CHAR \ ++ do \ ++ { \ ++ (tab != NULL) ? \ ++ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ ++ } \ ++ while (0) ++ ++static void ++prjoin (struct line const *line1, struct line const *line2) ++{ ++ const struct outlist *outlist; ++ ++ outlist = outlist_head.next; ++ if (outlist) ++ { ++ const struct outlist *o; ++ ++ o = outlist; ++ while (1) ++ { ++ size_t field; ++ struct line const *line; ++ ++ if (o->file == 0) ++ { ++ if (line1 == &uni_blank) ++ { ++ line = line2; ++ field = join_field_2; ++ } ++ else ++ { ++ line = line1; ++ field = join_field_1; ++ } ++ } ++ else ++ { ++ line = (o->file == 1 ? line1 : line2); ++ field = o->field; ++ } ++ prfield (field, line); ++ o = o->next; ++ if (o == NULL) ++ break; ++ PUT_TAB_CHAR; ++ } ++ putchar ('\n'); ++ } ++ else ++ { ++ size_t i; ++ ++ if (line1 == &uni_blank) ++ { ++ struct line const *t; ++ t = line1; ++ line1 = line2; ++ line2 = t; ++ } ++ prfield (join_field_1, line1); ++ for (i = 0; i < join_field_1 && i < line1->nfields; ++i) ++ { ++ PUT_TAB_CHAR; ++ prfield (i, line1); ++ } ++ for (i = join_field_1 + 1; i < line1->nfields; ++i) ++ { ++ PUT_TAB_CHAR; ++ prfield (i, line1); ++ } ++ ++ for (i = 0; i < join_field_2 && i < line2->nfields; ++i) ++ { ++ PUT_TAB_CHAR; ++ prfield (i, line2); ++ } ++ for (i = join_field_2 + 1; i < line2->nfields; ++i) ++ { ++ PUT_TAB_CHAR; ++ prfield (i, line2); ++ } ++ putchar ('\n'); ++ } ++} ++ ++/* Print the join of the files in FP1 and FP2. */ ++ ++static void ++join (FILE *fp1, FILE *fp2) ++{ ++ struct seq seq1, seq2; ++ struct line **linep = xmalloc (sizeof *linep); ++ int diff; ++ bool eof1, eof2, checktail; ++ ++ *linep = NULL; ++ ++ /* Read the first line of each file. */ ++ initseq (&seq1); ++ getseq (fp1, &seq1, 1); ++ initseq (&seq2); ++ getseq (fp2, &seq2, 2); ++ ++ while (seq1.count && seq2.count) ++ { ++ size_t i; ++ diff = keycmp (seq1.lines[0], seq2.lines[0], ++ join_field_1, join_field_2); ++ if (diff < 0) ++ { ++ if (print_unpairables_1) ++ prjoin (seq1.lines[0], &uni_blank); ++ advance_seq (fp1, &seq1, true, 1); ++ seen_unpairable = true; ++ continue; ++ } ++ if (diff > 0) ++ { ++ if (print_unpairables_2) ++ prjoin (&uni_blank, seq2.lines[0]); ++ advance_seq (fp2, &seq2, true, 2); ++ seen_unpairable = true; ++ continue; ++ } ++ ++ /* Keep reading lines from file1 as long as they continue to ++ match the current line from file2. */ ++ eof1 = false; ++ do ++ if (!advance_seq (fp1, &seq1, false, 1)) ++ { ++ eof1 = true; ++ ++seq1.count; ++ break; ++ } ++ while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0], ++ join_field_1, join_field_2)); ++ ++ /* Keep reading lines from file2 as long as they continue to ++ match the current line from file1. */ ++ eof2 = false; ++ do ++ if (!advance_seq (fp2, &seq2, false, 2)) ++ { ++ eof2 = true; ++ ++seq2.count; ++ break; ++ } ++ while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1], ++ join_field_1, join_field_2)); ++ ++ if (print_pairables) ++ { ++ for (i = 0; i < seq1.count - 1; ++i) ++ { ++ size_t j; ++ for (j = 0; j < seq2.count - 1; ++j) ++ prjoin (seq1.lines[i], seq2.lines[j]); ++ } ++ } ++ ++ if (!eof1) ++ { ++ SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]); ++ seq1.count = 1; ++ } ++ else ++ seq1.count = 0; ++ ++ if (!eof2) ++ { ++ SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]); ++ seq2.count = 1; ++ } ++ else ++ seq2.count = 0; ++ } ++ ++ /* If the user did not specify --check-order, and the we read the ++ tail ends of both inputs to verify that they are in order. We ++ skip the rest of the tail once we have issued a warning for that ++ file, unless we actually need to print the unpairable lines. */ ++ if (check_input_order != CHECK_ORDER_DISABLED ++ && !(issued_disorder_warning[0] && issued_disorder_warning[1])) ++ checktail = true; ++ else ++ checktail = false; ++ ++ if ((print_unpairables_1 || checktail) && seq1.count) ++ { ++ if (print_unpairables_1) ++ prjoin (seq1.lines[0], &uni_blank); ++ seen_unpairable = true; ++ while (get_line (fp1, linep, 1)) ++ { ++ if (print_unpairables_1) ++ prjoin (*linep, &uni_blank); ++ if (issued_disorder_warning[0] && !print_unpairables_1) ++ break; ++ } ++ } ++ ++ if ((print_unpairables_2 || checktail) && seq2.count) ++ { ++ if (print_unpairables_2) ++ prjoin (&uni_blank, seq2.lines[0]); ++ seen_unpairable = true; ++ while (get_line (fp2, linep, 2)) ++ { ++ if (print_unpairables_2) ++ prjoin (&uni_blank, *linep); ++ if (issued_disorder_warning[1] && !print_unpairables_2) ++ break; ++ } ++ } ++ ++ free (*linep); ++ ++ free (linep); ++ delseq (&seq1); ++ delseq (&seq2); ++} ++ ++/* Add a field spec for field FIELD of file FILE to `outlist'. */ ++ ++static void ++add_field (int file, size_t field) ++{ ++ struct outlist *o; ++ ++ assert (file == 0 || file == 1 || file == 2); ++ assert (file != 0 || field == 0); ++ ++ o = xmalloc (sizeof *o); ++ o->file = file; ++ o->field = field; ++ o->next = NULL; ++ ++ /* Add to the end of the list so the fields are in the right order. */ ++ outlist_end->next = o; ++ outlist_end = o; ++} ++ ++/* Convert a string of decimal digits, STR (the 1-based join field number), ++ to an integral value. Upon successful conversion, return one less ++ (the zero-based field number). Silently convert too-large values ++ to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a ++ diagnostic and exit. */ ++ ++static size_t ++string_to_join_field (char const *str) ++{ ++ size_t result; ++ unsigned long int val; ++ verify (SIZE_MAX <= ULONG_MAX); ++ ++ strtol_error s_err = xstrtoul (str, NULL, 10, &val, ""); ++ if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val)) ++ val = SIZE_MAX; ++ else if (s_err != LONGINT_OK || val == 0) ++ error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str)); ++ ++ result = val - 1; ++ ++ return result; ++} ++ ++/* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX ++ pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based. ++ If S is valid, return true. Otherwise, give a diagnostic and exit. */ ++ ++static void ++decode_field_spec (const char *s, int *file_index, size_t *field_index) ++{ ++ /* The first character must be 0, 1, or 2. */ ++ switch (s[0]) ++ { ++ case '0': ++ if (s[1]) ++ { ++ /* `0' must be all alone -- no `.FIELD'. */ ++ error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); ++ } ++ *file_index = 0; ++ *field_index = 0; ++ break; ++ ++ case '1': ++ case '2': ++ if (s[1] != '.') ++ error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); ++ *file_index = s[0] - '0'; ++ *field_index = string_to_join_field (s + 2); ++ break; ++ ++ default: ++ error (EXIT_FAILURE, 0, ++ _("invalid file number in field spec: %s"), quote (s)); ++ ++ /* Tell gcc -W -Wall that we can't get beyond this point. ++ This avoids a warning (otherwise legit) that the caller's copies ++ of *file_index and *field_index might be used uninitialized. */ ++ abort (); ++ ++ break; ++ } ++} ++ ++/* Add the comma or blank separated field spec(s) in STR to `outlist'. */ ++ ++static void ++add_field_list (char *str) ++{ ++ char *p = str; ++ ++ do ++ { ++ int file_index; ++ size_t field_index; ++ char const *spec_item = p; ++ ++ p = strpbrk (p, ", \t"); ++ if (p) ++ *p++ = '\0'; ++ decode_field_spec (spec_item, &file_index, &field_index); ++ add_field (file_index, field_index); ++ } ++ while (p); ++} ++ ++/* Set the join field *VAR to VAL, but report an error if *VAR is set ++ more than once to incompatible values. */ ++ ++static void ++set_join_field (size_t *var, size_t val) ++{ ++ if (*var != SIZE_MAX && *var != val) ++ { ++ unsigned long int var1 = *var + 1; ++ unsigned long int val1 = val + 1; ++ error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"), ++ var1, val1); ++ } ++ *var = val; ++} ++ ++/* Status of command-line arguments. */ ++ ++enum operand_status ++ { ++ /* This argument must be an operand, i.e., one of the files to be ++ joined. */ ++ MUST_BE_OPERAND, ++ ++ /* This might be the argument of the preceding -j1 or -j2 option, ++ or it might be an operand. */ ++ MIGHT_BE_J1_ARG, ++ MIGHT_BE_J2_ARG, ++ ++ /* This might be the argument of the preceding -o option, or it might be ++ an operand. */ ++ MIGHT_BE_O_ARG ++ }; ++ ++/* Add NAME to the array of input file NAMES with operand statuses ++ OPERAND_STATUS; currently there are NFILES names in the list. */ ++ ++static void ++add_file_name (char *name, char *names[2], ++ int operand_status[2], int joption_count[2], int *nfiles, ++ int *prev_optc_status, int *optc_status) ++{ ++ int n = *nfiles; ++ ++ if (n == 2) ++ { ++ bool op0 = (operand_status[0] == MUST_BE_OPERAND); ++ char *arg = names[op0]; ++ switch (operand_status[op0]) ++ { ++ case MUST_BE_OPERAND: ++ error (0, 0, _("extra operand %s"), quote (name)); ++ usage (EXIT_FAILURE); ++ ++ case MIGHT_BE_J1_ARG: ++ joption_count[0]--; ++ set_join_field (&join_field_1, string_to_join_field (arg)); ++ break; ++ ++ case MIGHT_BE_J2_ARG: ++ joption_count[1]--; ++ set_join_field (&join_field_2, string_to_join_field (arg)); ++ break; ++ ++ case MIGHT_BE_O_ARG: ++ add_field_list (arg); ++ break; ++ } ++ if (!op0) ++ { ++ operand_status[0] = operand_status[1]; ++ names[0] = names[1]; ++ } ++ n = 1; ++ } ++ ++ operand_status[n] = *prev_optc_status; ++ names[n] = name; ++ *nfiles = n + 1; ++ if (*prev_optc_status == MIGHT_BE_O_ARG) ++ *optc_status = MIGHT_BE_O_ARG; ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int optc_status; ++ int prev_optc_status = MUST_BE_OPERAND; ++ int operand_status[2]; ++ int joption_count[2] = { 0, 0 }; ++ char *names[2]; ++ FILE *fp1, *fp2; ++ int optc; ++ int nfiles = 0; ++ int i; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ hard_LC_COLLATE = hard_locale (LC_COLLATE); ++ ++ atexit (close_stdout); ++ atexit (free_spareline); ++ ++ print_pairables = true; ++ seen_unpairable = false; ++ issued_disorder_warning[0] = issued_disorder_warning[1] = false; ++ check_input_order = CHECK_ORDER_DEFAULT; ++ ++ while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:", ++ longopts, NULL)) ++ != -1) ++ { ++ optc_status = MUST_BE_OPERAND; ++ ++ switch (optc) ++ { ++ case 'v': ++ print_pairables = false; ++ /* Fall through. */ ++ ++ case 'a': ++ { ++ unsigned long int val; ++ if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK ++ || (val != 1 && val != 2)) ++ error (EXIT_FAILURE, 0, ++ _("invalid field number: %s"), quote (optarg)); ++ if (val == 1) ++ print_unpairables_1 = true; ++ else ++ print_unpairables_2 = true; ++ } ++ break; ++ ++ case 'e': ++ if (empty_filler && ! STREQ (empty_filler, optarg)) ++ error (EXIT_FAILURE, 0, ++ _("conflicting empty-field replacement strings")); ++ empty_filler = optarg; ++ break; ++ ++ case 'i': ++ ignore_case = true; ++ break; ++ ++ case '1': ++ set_join_field (&join_field_1, string_to_join_field (optarg)); ++ break; ++ ++ case '2': ++ set_join_field (&join_field_2, string_to_join_field (optarg)); ++ break; ++ ++ case 'j': ++ if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1] ++ && optarg == argv[optind - 1] + 2) ++ { ++ /* The argument was either "-j1" or "-j2". */ ++ bool is_j2 = (optarg[0] == '2'); ++ joption_count[is_j2]++; ++ optc_status = MIGHT_BE_J1_ARG + is_j2; ++ } ++ else ++ { ++ set_join_field (&join_field_1, string_to_join_field (optarg)); ++ set_join_field (&join_field_2, join_field_1); ++ } ++ break; ++ ++ case 'o': ++ add_field_list (optarg); ++ optc_status = MIGHT_BE_O_ARG; ++ break; ++ ++ case 't': ++ { ++ char *newtab; ++ size_t newtablen; ++ if (! optarg[0]) ++ error (EXIT_FAILURE, 0, _("empty tab")); ++ newtab = xstrdup (optarg); ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ mbstate_t state; ++ ++ memset (&state, 0, sizeof (mbstate_t)); ++ newtablen = mbrtowc (NULL, newtab, ++ strnlen (newtab, MB_LEN_MAX), ++ &state); ++ if (newtablen == (size_t) 0 ++ || newtablen == (size_t) -1 ++ || newtablen == (size_t) -2) ++ newtablen = 1; ++ } ++ else ++#endif ++ newtablen = 1; ++ ++ if (newtablen == 1 && newtab[1]) ++ { ++ if (STREQ (newtab, "\\0")) ++ newtab[0] = '\0'; ++ } ++ if (tab != NULL && strcmp (tab, newtab)) ++ { ++ free (newtab); ++ error (EXIT_FAILURE, 0, _("incompatible tabs")); ++ } ++ tab = newtab; ++ tablen = newtablen; ++ } ++ break; ++ ++ case NOCHECK_ORDER_OPTION: ++ check_input_order = CHECK_ORDER_DISABLED; ++ break; ++ ++ case CHECK_ORDER_OPTION: ++ check_input_order = CHECK_ORDER_ENABLED; ++ break; ++ ++ case 1: /* Non-option argument. */ ++ add_file_name (optarg, names, operand_status, joption_count, ++ &nfiles, &prev_optc_status, &optc_status); ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (EXIT_FAILURE); ++ } ++ ++ prev_optc_status = optc_status; ++ } ++ ++ /* Process any operands after "--". */ ++ prev_optc_status = MUST_BE_OPERAND; ++ while (optind < argc) ++ add_file_name (argv[optind++], names, operand_status, joption_count, ++ &nfiles, &prev_optc_status, &optc_status); ++ ++ if (nfiles != 2) ++ { ++ if (nfiles == 0) ++ error (0, 0, _("missing operand")); ++ else ++ error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); ++ usage (EXIT_FAILURE); ++ } ++ ++ /* If "-j1" was specified and it turns out not to have had an argument, ++ treat it as "-j 1". Likewise for -j2. */ ++ for (i = 0; i < 2; i++) ++ if (joption_count[i] != 0) ++ { ++ set_join_field (&join_field_1, i); ++ set_join_field (&join_field_2, i); ++ } ++ ++ if (join_field_1 == SIZE_MAX) ++ join_field_1 = 0; ++ if (join_field_2 == SIZE_MAX) ++ join_field_2 = 0; ++ ++ fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r"); ++ if (!fp1) ++ error (EXIT_FAILURE, errno, "%s", names[0]); ++ fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r"); ++ if (!fp2) ++ error (EXIT_FAILURE, errno, "%s", names[1]); ++ if (fp1 == fp2) ++ error (EXIT_FAILURE, errno, _("both files cannot be standard input")); ++ join (fp1, fp2); ++ ++ if (fclose (fp1) != 0) ++ error (EXIT_FAILURE, errno, "%s", names[0]); ++ if (fclose (fp2) != 0) ++ error (EXIT_FAILURE, errno, "%s", names[1]); ++ ++ if (issued_disorder_warning[0] || issued_disorder_warning[1]) ++ exit (EXIT_FAILURE); ++ else ++ exit (EXIT_SUCCESS); ++} +diff -urNp coreutils-8.0-orig/src/pr.c coreutils-8.0/src/pr.c +--- coreutils-8.0-orig/src/pr.c 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/src/pr.c 2009-10-07 10:07:16.000000000 +0200 +@@ -312,6 +312,32 @@ + + #include + #include ++ ++/* Get MB_LEN_MAX. */ ++#include ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC ++ installation; work around this configuration error. */ ++#if !defined MB_LEN_MAX || MB_LEN_MAX == 1 ++# define MB_LEN_MAX 16 ++#endif ++ ++/* Get MB_CUR_MAX. */ ++#include ++ ++/* Solaris 2.5 has a bug: must be included before . */ ++/* Get mbstate_t, mbrtowc(), wcwidth(). */ ++#if HAVE_WCHAR_H ++# include ++#endif ++ ++/* Get iswprint(). -- for wcwidth(). */ ++#if HAVE_WCTYPE_H ++# include ++#endif ++#if !defined iswprint && !HAVE_ISWPRINT ++# define iswprint(wc) 1 ++#endif ++ + #include "system.h" + #include "error.h" + #include "hard-locale.h" +@@ -322,6 +348,18 @@ + #include "strftime.h" + #include "xstrtol.h" + ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ ++#ifndef HAVE_DECL_WCWIDTH ++"this configure-time declaration test was not run" ++#endif ++#if !HAVE_DECL_WCWIDTH ++extern int wcwidth (); ++#endif ++ + /* The official name of this program (e.g., no `g' prefix). */ + #define PROGRAM_NAME "pr" + +@@ -414,7 +452,20 @@ struct COLUMN + + typedef struct COLUMN COLUMN; + +-static int char_to_clump (char c); ++/* Funtion pointers to switch functions for single byte locale or for ++ multibyte locale. If multibyte functions do not exist in your sysytem, ++ these pointers always point the function for single byte locale. */ ++static void (*print_char) (char c); ++static int (*char_to_clump) (char c); ++ ++/* Functions for single byte locale. */ ++static void print_char_single (char c); ++static int char_to_clump_single (char c); ++ ++/* Functions for multibyte locale. */ ++static void print_char_multi (char c); ++static int char_to_clump_multi (char c); ++ + static bool read_line (COLUMN *p); + static bool print_page (void); + static bool print_stored (COLUMN *p); +@@ -424,6 +475,7 @@ static void print_header (void); + static void pad_across_to (int position); + static void add_line_number (COLUMN *p); + static void getoptarg (char *arg, char switch_char, char *character, ++ int *character_length, int *character_width, + int *number); + void usage (int status); + static void print_files (int number_of_files, char **av); +@@ -438,7 +490,6 @@ static void store_char (char c); + static void pad_down (int lines); + static void read_rest_of_line (COLUMN *p); + static void skip_read (COLUMN *p, int column_number); +-static void print_char (char c); + static void cleanup (void); + static void print_sep_string (void); + static void separator_string (const char *optarg_S); +@@ -450,7 +501,7 @@ static COLUMN *column_vector; + we store the leftmost columns contiguously in buff. + To print a line from buff, get the index of the first character + from line_vector[i], and print up to line_vector[i + 1]. */ +-static char *buff; ++static unsigned char *buff; + + /* Index of the position in buff where the next character + will be stored. */ +@@ -554,7 +605,7 @@ static int chars_per_column; + static bool untabify_input = false; + + /* (-e) The input tab character. */ +-static char input_tab_char = '\t'; ++static char input_tab_char[MB_LEN_MAX] = "\t"; + + /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... + where the leftmost column is 1. */ +@@ -564,7 +615,10 @@ static int chars_per_input_tab = 8; + static bool tabify_output = false; + + /* (-i) The output tab character. */ +-static char output_tab_char = '\t'; ++static char output_tab_char[MB_LEN_MAX] = "\t"; ++ ++/* (-i) The byte length of output tab character. */ ++static int output_tab_char_length = 1; + + /* (-i) The width of the output tab. */ + static int chars_per_output_tab = 8; +@@ -638,7 +692,13 @@ static int power_10; + static bool numbered_lines = false; + + /* (-n) Character which follows each line number. */ +-static char number_separator = '\t'; ++static char number_separator[MB_LEN_MAX] = "\t"; ++ ++/* (-n) The byte length of the character which follows each line number. */ ++static int number_separator_length = 1; ++ ++/* (-n) The character width of the character which follows each line number. */ ++static int number_separator_width = 0; + + /* (-n) line counting starts with 1st line of input file (not with 1st + line of 1st page printed). */ +@@ -691,6 +751,7 @@ static bool use_col_separator = false; + -a|COLUMN|-m is a `space' and with the -J option a `tab'. */ + static char *col_sep_string = (char *) ""; + static int col_sep_length = 0; ++static int col_sep_width = 0; + static char *column_separator = (char *) " "; + static char *line_separator = (char *) "\t"; + +@@ -847,6 +908,13 @@ separator_string (const char *optarg_S) + col_sep_length = (int) strlen (optarg_S); + col_sep_string = xmalloc (col_sep_length + 1); + strcpy (col_sep_string, optarg_S); ++ ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ col_sep_width = mbswidth (col_sep_string, 0); ++ else ++#endif ++ col_sep_width = col_sep_length; + } + + int +@@ -871,6 +939,21 @@ main (int argc, char **argv) + + atexit (close_stdout); + ++/* Define which functions are used, the ones for single byte locale or the ones ++ for multibyte locale. */ ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ print_char = print_char_multi; ++ char_to_clump = char_to_clump_multi; ++ } ++ else ++#endif ++ { ++ print_char = print_char_single; ++ char_to_clump = char_to_clump_single; ++ } ++ + n_files = 0; + file_names = (argc > 1 + ? xmalloc ((argc - 1) * sizeof (char *)) +@@ -947,8 +1030,12 @@ main (int argc, char **argv) + break; + case 'e': + if (optarg) +- getoptarg (optarg, 'e', &input_tab_char, +- &chars_per_input_tab); ++ { ++ int dummy_length, dummy_width; ++ ++ getoptarg (optarg, 'e', input_tab_char, &dummy_length, ++ &dummy_width, &chars_per_input_tab); ++ } + /* Could check tab width > 0. */ + untabify_input = true; + break; +@@ -961,8 +1048,12 @@ main (int argc, char **argv) + break; + case 'i': + if (optarg) +- getoptarg (optarg, 'i', &output_tab_char, +- &chars_per_output_tab); ++ { ++ int dummy_width; ++ ++ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, ++ &dummy_width, &chars_per_output_tab); ++ } + /* Could check tab width > 0. */ + tabify_output = true; + break; +@@ -989,8 +1080,8 @@ main (int argc, char **argv) + case 'n': + numbered_lines = true; + if (optarg) +- getoptarg (optarg, 'n', &number_separator, +- &chars_per_number); ++ getoptarg (optarg, 'n', number_separator, &number_separator_length, ++ &number_separator_width, &chars_per_number); + break; + case 'N': + skip_count = false; +@@ -1029,7 +1120,7 @@ main (int argc, char **argv) + old_s = false; + /* Reset an additional input of -s, -S dominates -s */ + col_sep_string = bad_cast (""); +- col_sep_length = 0; ++ col_sep_length = col_sep_width = 0; + use_col_separator = true; + if (optarg) + separator_string (optarg); +@@ -1186,10 +1277,45 @@ main (int argc, char **argv) + a number. */ + + static void +-getoptarg (char *arg, char switch_char, char *character, int *number) ++getoptarg (char *arg, char switch_char, char *character, int *character_length, ++ int *character_width, int *number) + { + if (!ISDIGIT (*arg)) +- *character = *arg++; ++ { ++#ifdef HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) /* for multibyte locale. */ ++ { ++ wchar_t wc; ++ size_t mblength; ++ int width; ++ mbstate_t state = {'\0'}; ++ ++ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); ++ ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ *character_length = 1; ++ *character_width = 1; ++ } ++ else ++ { ++ *character_length = (mblength < 1) ? 1 : mblength; ++ width = wcwidth (wc); ++ *character_width = (width < 0) ? 0 : width; ++ } ++ ++ strncpy (character, arg, *character_length); ++ arg += *character_length; ++ } ++ else /* for single byte locale. */ ++#endif ++ { ++ *character = *arg++; ++ *character_length = 1; ++ *character_width = 1; ++ } ++ } ++ + if (*arg) + { + long int tmp_long; +@@ -1248,7 +1374,7 @@ init_parameters (int number_of_files) + else + col_sep_string = column_separator; + +- col_sep_length = 1; ++ col_sep_length = col_sep_width = 1; + use_col_separator = true; + } + /* It's rather pointless to define a TAB separator with column +@@ -1279,11 +1405,11 @@ init_parameters (int number_of_files) + TAB_WIDTH (chars_per_input_tab, chars_per_number); */ + + /* Estimate chars_per_text without any margin and keep it constant. */ +- if (number_separator == '\t') ++ if (number_separator[0] == '\t') + number_width = chars_per_number + + TAB_WIDTH (chars_per_default_tab, chars_per_number); + else +- number_width = chars_per_number + 1; ++ number_width = chars_per_number + number_separator_width; + + /* The number is part of the column width unless we are + printing files in parallel. */ +@@ -1298,7 +1424,7 @@ init_parameters (int number_of_files) + } + + chars_per_column = (chars_per_line - chars_used_by_number - +- (columns - 1) * col_sep_length) / columns; ++ (columns - 1) * col_sep_width) / columns; + + if (chars_per_column < 1) + error (EXIT_FAILURE, 0, _("page width too narrow")); +@@ -1423,7 +1549,7 @@ init_funcs (void) + + /* Enlarge p->start_position of first column to use the same form of + padding_not_printed with all columns. */ +- h = h + col_sep_length; ++ h = h + col_sep_width; + + /* This loop takes care of all but the rightmost column. */ + +@@ -1457,7 +1583,7 @@ init_funcs (void) + } + else + { +- h = h_next + col_sep_length; ++ h = h_next + col_sep_width; + h_next = h + chars_per_column; + } + } +@@ -1747,9 +1873,9 @@ static void + align_column (COLUMN *p) + { + padding_not_printed = p->start_position; +- if (padding_not_printed - col_sep_length > 0) ++ if (padding_not_printed - col_sep_width > 0) + { +- pad_across_to (padding_not_printed - col_sep_length); ++ pad_across_to (padding_not_printed - col_sep_width); + padding_not_printed = ANYWHERE; + } + +@@ -2020,13 +2146,13 @@ store_char (char c) + /* May be too generous. */ + buff = X2REALLOC (buff, &buff_allocated); + } +- buff[buff_current++] = c; ++ buff[buff_current++] = (unsigned char) c; + } + + static void + add_line_number (COLUMN *p) + { +- int i; ++ int i, j; + char *s; + int left_cut; + +@@ -2049,22 +2175,24 @@ add_line_number (COLUMN *p) + /* Tabification is assumed for multiple columns, also for n-separators, + but `default n-separator = TAB' hasn't been given priority over + equal column_width also specified by POSIX. */ +- if (number_separator == '\t') ++ if (number_separator[0] == '\t') + { + i = number_width - chars_per_number; + while (i-- > 0) + (p->char_func) (' '); + } + else +- (p->char_func) (number_separator); ++ for (j = 0; j < number_separator_length; j++) ++ (p->char_func) (number_separator[j]); + } + else + /* To comply with POSIX, we avoid any expansion of default TAB + separator with a single column output. No column_width requirement + has to be considered. */ + { +- (p->char_func) (number_separator); +- if (number_separator == '\t') ++ for (j = 0; j < number_separator_length; j++) ++ (p->char_func) (number_separator[j]); ++ if (number_separator[0] == '\t') + output_position = POS_AFTER_TAB (chars_per_output_tab, + output_position); + } +@@ -2225,7 +2353,7 @@ print_white_space (void) + while (goal - h_old > 1 + && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) + { +- putchar (output_tab_char); ++ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); + h_old = h_new; + } + while (++h_old <= goal) +@@ -2245,6 +2373,7 @@ print_sep_string (void) + { + char *s; + int l = col_sep_length; ++ int not_space_flag; + + s = col_sep_string; + +@@ -2258,6 +2387,7 @@ print_sep_string (void) + { + for (; separators_not_printed > 0; --separators_not_printed) + { ++ not_space_flag = 0; + while (l-- > 0) + { + /* 3 types of sep_strings: spaces only, spaces and chars, +@@ -2271,12 +2401,15 @@ print_sep_string (void) + } + else + { ++ not_space_flag = 1; + if (spaces_not_printed > 0) + print_white_space (); + putchar (*s++); +- ++output_position; + } + } ++ if (not_space_flag) ++ output_position += col_sep_width; ++ + /* sep_string ends with some spaces */ + if (spaces_not_printed > 0) + print_white_space (); +@@ -2304,7 +2437,7 @@ print_clump (COLUMN *p, int n, char *clu + required number of tabs and spaces. */ + + static void +-print_char (char c) ++print_char_single (char c) + { + if (tabify_output) + { +@@ -2328,6 +2461,74 @@ print_char (char c) + putchar (c); + } + ++#ifdef HAVE_MBRTOWC ++static void ++print_char_multi (char c) ++{ ++ static size_t mbc_pos = 0; ++ static char mbc[MB_LEN_MAX] = {'\0'}; ++ static mbstate_t state = {'\0'}; ++ mbstate_t state_bak; ++ wchar_t wc; ++ size_t mblength; ++ int width; ++ ++ if (tabify_output) ++ { ++ state_bak = state; ++ mbc[mbc_pos++] = c; ++ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); ++ ++ while (mbc_pos > 0) ++ { ++ switch (mblength) ++ { ++ case (size_t)-2: ++ state = state_bak; ++ return; ++ ++ case (size_t)-1: ++ state = state_bak; ++ ++output_position; ++ putchar (mbc[0]); ++ memmove (mbc, mbc + 1, MB_CUR_MAX - 1); ++ --mbc_pos; ++ break; ++ ++ case 0: ++ mblength = 1; ++ ++ default: ++ if (wc == L' ') ++ { ++ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); ++ --mbc_pos; ++ ++spaces_not_printed; ++ return; ++ } ++ else if (spaces_not_printed > 0) ++ print_white_space (); ++ ++ /* Nonprintables are assumed to have width 0, except L'\b'. */ ++ if ((width = wcwidth (wc)) < 1) ++ { ++ if (wc == L'\b') ++ --output_position; ++ } ++ else ++ output_position += width; ++ ++ fwrite (mbc, sizeof(char), mblength, stdout); ++ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); ++ mbc_pos -= mblength; ++ } ++ } ++ return; ++ } ++ putchar (c); ++} ++#endif ++ + /* Skip to page PAGE before printing. + PAGE may be larger than total number of pages. */ + +@@ -2507,9 +2708,9 @@ read_line (COLUMN *p) + align_empty_cols = false; + } + +- if (padding_not_printed - col_sep_length > 0) ++ if (padding_not_printed - col_sep_width > 0) + { +- pad_across_to (padding_not_printed - col_sep_length); ++ pad_across_to (padding_not_printed - col_sep_width); + padding_not_printed = ANYWHERE; + } + +@@ -2610,9 +2811,9 @@ print_stored (COLUMN *p) + } + } + +- if (padding_not_printed - col_sep_length > 0) ++ if (padding_not_printed - col_sep_width > 0) + { +- pad_across_to (padding_not_printed - col_sep_length); ++ pad_across_to (padding_not_printed - col_sep_width); + padding_not_printed = ANYWHERE; + } + +@@ -2625,8 +2826,8 @@ print_stored (COLUMN *p) + if (spaces_not_printed == 0) + { + output_position = p->start_position + end_vector[line]; +- if (p->start_position - col_sep_length == chars_per_margin) +- output_position -= col_sep_length; ++ if (p->start_position - col_sep_width == chars_per_margin) ++ output_position -= col_sep_width; + } + + return true; +@@ -2645,7 +2846,7 @@ print_stored (COLUMN *p) + number of characters is 1.) */ + + static int +-char_to_clump (char c) ++char_to_clump_single (char c) + { + unsigned char uc = c; + char *s = clump_buff; +@@ -2655,10 +2856,10 @@ char_to_clump (char c) + int chars; + int chars_per_c = 8; + +- if (c == input_tab_char) ++ if (c == input_tab_char[0]) + chars_per_c = chars_per_input_tab; + +- if (c == input_tab_char || c == '\t') ++ if (c == input_tab_char[0] || c == '\t') + { + width = TAB_WIDTH (chars_per_c, input_position); + +@@ -2739,6 +2940,154 @@ char_to_clump (char c) + return chars; + } + ++#ifdef HAVE_MBRTOWC ++static int ++char_to_clump_multi (char c) ++{ ++ static size_t mbc_pos = 0; ++ static char mbc[MB_LEN_MAX] = {'\0'}; ++ static mbstate_t state = {'\0'}; ++ mbstate_t state_bak; ++ wchar_t wc; ++ size_t mblength; ++ int wc_width; ++ register char *s = clump_buff; ++ register int i, j; ++ char esc_buff[4]; ++ int width; ++ int chars; ++ int chars_per_c = 8; ++ ++ state_bak = state; ++ mbc[mbc_pos++] = c; ++ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); ++ ++ width = 0; ++ chars = 0; ++ while (mbc_pos > 0) ++ { ++ switch (mblength) ++ { ++ case (size_t)-2: ++ state = state_bak; ++ return 0; ++ ++ case (size_t)-1: ++ state = state_bak; ++ mblength = 1; ++ ++ if (use_esc_sequence || use_cntrl_prefix) ++ { ++ width = +4; ++ chars = +4; ++ *s++ = '\\'; ++ sprintf (esc_buff, "%03o", mbc[0]); ++ for (i = 0; i <= 2; ++i) ++ *s++ = (int) esc_buff[i]; ++ } ++ else ++ { ++ width += 1; ++ chars += 1; ++ *s++ = mbc[0]; ++ } ++ break; ++ ++ case 0: ++ mblength = 1; ++ /* Fall through */ ++ ++ default: ++ if (memcmp (mbc, input_tab_char, mblength) == 0) ++ chars_per_c = chars_per_input_tab; ++ ++ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') ++ { ++ int width_inc; ++ ++ width_inc = TAB_WIDTH (chars_per_c, input_position); ++ width += width_inc; ++ ++ if (untabify_input) ++ { ++ for (i = width_inc; i; --i) ++ *s++ = ' '; ++ chars += width_inc; ++ } ++ else ++ { ++ for (i = 0; i < mblength; i++) ++ *s++ = mbc[i]; ++ chars += mblength; ++ } ++ } ++ else if ((wc_width = wcwidth (wc)) < 1) ++ { ++ if (use_esc_sequence) ++ { ++ for (i = 0; i < mblength; i++) ++ { ++ width += 4; ++ chars += 4; ++ *s++ = '\\'; ++ sprintf (esc_buff, "%03o", c); ++ for (j = 0; j <= 2; ++j) ++ *s++ = (int) esc_buff[j]; ++ } ++ } ++ else if (use_cntrl_prefix) ++ { ++ if (wc < 0200) ++ { ++ width += 2; ++ chars += 2; ++ *s++ = '^'; ++ *s++ = wc ^ 0100; ++ } ++ else ++ { ++ for (i = 0; i < mblength; i++) ++ { ++ width += 4; ++ chars += 4; ++ *s++ = '\\'; ++ sprintf (esc_buff, "%03o", c); ++ for (j = 0; j <= 2; ++j) ++ *s++ = (int) esc_buff[j]; ++ } ++ } ++ } ++ else if (wc == L'\b') ++ { ++ width += -1; ++ chars += 1; ++ *s++ = c; ++ } ++ else ++ { ++ width += 0; ++ chars += mblength; ++ for (i = 0; i < mblength; i++) ++ *s++ = mbc[i]; ++ } ++ } ++ else ++ { ++ width += wc_width; ++ chars += mblength; ++ for (i = 0; i < mblength; i++) ++ *s++ = mbc[i]; ++ } ++ } ++ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); ++ mbc_pos -= mblength; ++ } ++ ++ input_position += width; ++ return chars; ++} ++#endif ++ + /* We've just printed some files and need to clean up things before + looking for more options and printing the next batch of files. + +diff -urNp coreutils-8.0-orig/src/pr.c.orig coreutils-8.0/src/pr.c.orig +--- coreutils-8.0-orig/src/pr.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/pr.c.orig 2009-09-29 15:27:54.000000000 +0200 +@@ -0,0 +1,2877 @@ ++/* pr -- convert text files for printing. ++ Copyright (C) 88, 91, 1995-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* By Pete TerMaat, with considerable refinement by Roland Huebner. */ ++ ++/* Things to watch: Sys V screws up on ... ++ pr -n -3 -s: /usr/dict/words ++ pr -m -o10 -n /usr/dict/words{,,,} ++ pr -6 -a -n -o5 /usr/dict/words ++ ++ Ideas: ++ ++ Keep a things_to_do list of functions to call when we know we have ++ something to print. Cleaner than current series of checks. ++ ++ Improve the printing of control prefixes. ++ ++ Expand the file name in the centered header line to a full file name. ++ ++ ++ Concept: ++ ++ If the input_tab_char differs from the default value TAB ++ (`-e[CHAR[...]]' is used), any input text tab is expanded to the ++ default width of 8 spaces (compare char_to_clump). - Same as SunOS ++ does. ++ ++ The treatment of the number_separator (compare add_line_number): ++ The default value TAB of the number_separator (`-n[SEP[...]]') doesn't ++ be thought to be an input character. An optional `-e'-input has no ++ effect. ++ - With single column output ++ only one POSIX requirement has to be met: ++ The default n-separator should be a TAB. The consequence is a ++ different width between the number and the text if the output position ++ of the separator changes, i.e. it depends upon the left margin used. ++ That's not nice but easy-to-use together with the defaults of other ++ utilities, e.g. sort or cut. - Same as SunOS does. ++ - With multicolumn output ++ two conflicting POSIX requirements exist: ++ First `default n-separator is TAB', second `output text columns shall ++ be of equal width'. Moreover POSIX specifies the number+separator a ++ part of the column, together with `-COLUMN' and `-a -COLUMN'. ++ (With -m output the number shall occupy each line only once. Exactly ++ the same situation as single column output exists.) ++ GNU pr gives priority to the 2nd requirement and observes POSIX ++ column definition. The n-separator TAB is expanded to the same number ++ of spaces in each column using the default value 8. Tabification is ++ only performed if it is compatible with the output position. ++ Consequence: The output text columns are of equal width. The layout ++ of a page does not change if the left margin varies. - Looks better ++ than the SunOS approach. ++ SunOS pr gives priority to the 1st requirement. n-separator TAB ++ width varies with each column. Only the width of text part of the ++ column is fixed. ++ Consequence: The output text columns don't have equal width. The ++ widths and the layout of the whole page varies with the left margin. ++ An overflow of the line length (without margin) over the input value ++ PAGE_WIDTH may occur. ++ ++ The interference of the POSIX-compliant small letter options -w and -s: ++ (`interference' means `setting a _separator_ with -s switches off the ++ column structure and the default - not generally - page_width, ++ acts on -w option') ++ options: text form / separator: equivalent new options: ++ -w l -s[x] ++ -------------------------------------------------------------------- ++ 1. -- -- columns / space -- ++ trunc. to page_width = 72 ++ 2. -- -s[:] full lines / TAB[:] -J --sep-string[=""|:] ++ no truncation ++ 3. -w l -- columns / space -W l ++ trunc. to page_width = l ++ 4. -w l -s[:] columns / no sep.[:] -W l --sep-string[=:] ++ trunc. to page_width = l ++ -------------------------------------------------------------------- ++ ++ ++ Options: ++ ++ Including version 1.22i: ++ Some SMALL LETTER options have been redefined with the object of a ++ better POSIX compliance. The output of some further cases has been ++ adapted to other UNIXes. A violation of downward compatibility has to ++ be accepted. ++ Some NEW CAPITAL LETTER options ( -J, -S, -W) has been introduced to ++ turn off unexpected interferences of small letter options (-s and -w ++ together with the three column options). ++ -N option and the second argument LAST_PAGE of +FIRST_PAGE offer more ++ flexibility; The detailed handling of form feeds set in the input ++ files requires -T option. ++ ++ Capital letter options dominate small letter ones. ++ ++ Some of the option-arguments cannot be specified as separate arguments ++ from the preceding option letter (already stated in POSIX specification). ++ ++ Form feeds in the input cause page breaks in the output. Multiple ++ form feeds produce empty pages. ++ ++ +FIRST_PAGE[:LAST_PAGE], --pages=FIRST_PAGE[:LAST_PAGE] ++ begin [stop] printing with page FIRST_[LAST_]PAGE ++ ++ -COLUMN, --columns=COLUMN ++ Produce output that is COLUMN columns wide and ++ print columns down, unless -a is used. Balance number of ++ lines in the columns on each page. ++ ++ -a, --across Print columns across rather than down, used ++ together with -COLUMN. The input ++ one ++ two ++ three ++ four ++ will be printed with `-a -3' as ++ one two three ++ four ++ ++ -b Balance columns on the last page. ++ -b is no longer an independent option. It's always used ++ together with -COLUMN (unless -a is used) to get a ++ consistent formulation with "FF set by hand" in input ++ files. Each formfeed found terminates the number of lines ++ to be read with the actual page. The situation for ++ printing columns down is equivalent to that on the last ++ page. So we need a balancing. ++ ++ Keeping -b as an underground option guarantees some ++ downward compatibility. Utilities using pr with -b ++ (a most frequently used form) still work as usual. ++ ++ -c, --show-control-chars ++ Print unprintable characters as control prefixes. ++ Control-g is printed as ^G (use hat notation) and ++ octal backslash notation. ++ ++ -d, --double-space Double space the output. ++ ++ -D FORMAT, --date-format=FORMAT Use FORMAT for the header date. ++ ++ -e[CHAR[WIDTH]], --expand-tabs[=CHAR[WIDTH]] ++ Expand tabs to spaces on input. Optional argument CHAR ++ is the input TAB character. (Default is TAB). Optional ++ argument WIDTH is the input TAB character's width. ++ (Default is 8.) ++ ++ -F, -f, --form-feed Use formfeeds instead of newlines to separate ++ pages. A three line HEADER is used, no TRAILER with -F, ++ without -F both HEADER and TRAILER are made of five lines. ++ ++ -h HEADER, --header=HEADER ++ Replace the filename in the header with the string HEADER. ++ A centered header is used. ++ ++ -i[CHAR[WIDTH]], --output-tabs[=CHAR[WIDTH]] ++ Replace spaces with tabs on output. Optional argument ++ CHAR is the output TAB character. (Default is TAB). ++ Optional argument WIDTH is the output TAB character's ++ width. (Default is 8) ++ ++ -J, --join-lines Merge lines of full length, turns off -W/-w ++ line truncation, no column alignment, --sep-string[=STRING] ++ sets separators, works with all column options ++ (-COLUMN | -a -COLUMN | -m). ++ -J has been introduced (together with -W and --sep-string) to ++ disentangle the old (POSIX compliant) options -w, -s ++ along with the 3 column options. ++ ++ -l PAGE_LENGTH, --length=PAGE_LENGTH ++ Set the page length to PAGE_LENGTH lines. Default is 66, ++ including 5 lines of HEADER and 5 lines of TRAILER ++ without -F, but only 3 lines of HEADER and no TRAILER ++ with -F (i.e the number of text lines defaults to 56 or ++ 63 respectively). ++ ++ -m, --merge Print files in parallel; pad_across_to align ++ columns; truncate lines and print separator strings; ++ Do it also with empty columns to get a continuous line ++ numbering and column marking by separators throughout ++ the whole merged file. ++ ++ Empty pages in some input files produce empty columns ++ [marked by separators] in the merged pages. Completely ++ empty merged pages show no column separators at all. ++ ++ The layout of a merged page is ruled by the largest form ++ feed distance of the single pages at that page. Shorter ++ columns will be filled up with empty lines. ++ ++ Together with -J option join lines of full length and ++ set separators when -S option is used. ++ ++ -n[SEP[DIGITS]], --number-lines[=SEP[DIGITS]] ++ Provide DIGITS digit line numbering (default for DIGITS ++ is 5). With multicolumn output the number occupies the ++ first DIGITS column positions of each text column or only ++ each line of -m output. ++ With single column output the number precedes each line ++ just as -m output. ++ Optional argument SEP is the character appended to the ++ line number to separate it from the text followed. ++ The default separator is a TAB. In a strict sense a TAB ++ is always printed with single column output only. The ++ TAB-width varies with the TAB-position, e.g. with the ++ left margin specified by -o option. ++ With multicolumn output priority is given to `equal width ++ of output columns' (a POSIX specification). The TAB-width ++ is fixed to the value of the 1st column and does not ++ change with different values of left margin. That means a ++ fixed number of spaces is always printed in the place of ++ a TAB. The tabification depends upon the output ++ position. ++ ++ Default counting of the line numbers starts with 1st ++ line of the input file (not the 1st line printed, ++ compare the --page option and -N option). ++ ++ -N NUMBER, --first-line-number=NUMBER ++ Start line counting with the number NUMBER at the 1st ++ line of first page printed (mostly not the 1st line of ++ the input file). ++ ++ -o MARGIN, --indent=MARGIN ++ Offset each line with a margin MARGIN spaces wide. ++ Total page width is the size of the margin plus the ++ PAGE_WIDTH set with -W/-w option. ++ ++ -r, --no-file-warnings ++ Omit warning when a file cannot be opened. ++ ++ -s[CHAR], --separator[=CHAR] ++ Separate columns by a single character CHAR, default for ++ CHAR is the TAB character without -w and 'no char' with -w. ++ Without `-s' default separator `space' is set. ++ -s[CHAR] turns off line truncation of all 3 column options ++ (-COLUMN|-a -COLUMN|-m) except -w is set. That is a POSIX ++ compliant formulation. The source code translates -s into ++ the new options -S and -J, also -W if required. ++ ++ -S STRING, --sep-string[=STRING] ++ Separate columns by any string STRING. The -S option ++ doesn't react upon the -W/-w option (unlike -s option ++ does). It defines a separator nothing else. ++ Without -S: Default separator TAB is used with -J and ++ `space' otherwise (same as -S" "). ++ With -S "": No separator is used. ++ Quotes should be used with blanks and some shell active ++ characters. ++ -S is problematic because in its obsolete form you ++ cannot use -S "STRING", but in its standard form you ++ must use -S "STRING" if STRING is empty. Use ++ --sep-string to avoid the ambiguity. ++ ++ -t, --omit-header Do not print headers or footers but retain form ++ feeds set in the input files. ++ ++ -T, --omit-pagination ++ Do not print headers or footers, eliminate any pagination ++ by form feeds set in the input files. ++ ++ -v, --show-nonprinting ++ Print unprintable characters as escape sequences. Use ++ octal backslash notation. Control-G becomes \007. ++ ++ -w PAGE_WIDTH, --width=PAGE_WIDTH ++ Set page width to PAGE_WIDTH characters for multiple ++ text-column output only (default for PAGE_WIDTH is 72). ++ -s[CHAR] turns off the default page width and any line ++ truncation. Lines of full length will be merged, ++ regardless of the column options set. A POSIX compliant ++ formulation. ++ ++ -W PAGE_WIDTH, --page-width=PAGE_WIDTH ++ Set the page width to PAGE_WIDTH characters. That's valid ++ with and without a column option. Text lines will be ++ truncated, unless -J is used. Together with one of the ++ column options (-COLUMN| -a -COLUMN| -m) column alignment ++ is always used. ++ Default is 72 characters. ++ Without -W PAGE_WIDTH ++ - but with one of the column options default truncation of ++ 72 characters is used (to keep downward compatibility ++ and to simplify most frequently met column tasks). ++ Column alignment and column separators are used. ++ - and without any of the column options NO line truncation ++ is used (to keep downward compatibility and to meet most ++ frequent tasks). That's equivalent to -W 72 -J . ++ ++ With/without -W PAGE_WIDTH the header line is always ++ truncated to avoid line overflow. ++ ++ (In pr versions newer than 1.14 -S option does no longer ++ affect -W option.) ++ ++*/ ++ ++ ++#include ++ ++#include ++#include ++#include "system.h" ++#include "error.h" ++#include "hard-locale.h" ++#include "mbswidth.h" ++#include "quote.h" ++#include "stat-time.h" ++#include "stdio--.h" ++#include "strftime.h" ++#include "xstrtol.h" ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "pr" ++ ++#define AUTHORS \ ++ proper_name ("Pete TerMaat"), \ ++ proper_name ("Roland Huebner") ++ ++/* Used with start_position in the struct COLUMN described below. ++ If start_position == ANYWHERE, we aren't truncating columns and ++ can begin printing a column anywhere. Otherwise we must pad to ++ the horizontal position start_position. */ ++#define ANYWHERE 0 ++ ++/* Each column has one of these structures allocated for it. ++ If we're only dealing with one file, fp is the same for all ++ columns. ++ ++ The general strategy is to spend time setting up these column ++ structures (storing columns if necessary), after which printing ++ is a matter of flitting from column to column and calling ++ print_func. ++ ++ Parallel files, single files printing across in multiple ++ columns, and single files printing down in multiple columns all ++ fit the same printing loop. ++ ++ print_func Function used to print lines in this column. ++ If we're storing this column it will be ++ print_stored(), Otherwise it will be read_line(). ++ ++ char_func Function used to process characters in this column. ++ If we're storing this column it will be store_char(), ++ otherwise it will be print_char(). ++ ++ current_line Index of the current entry in line_vector, which ++ contains the index of the first character of the ++ current line in buff[]. ++ ++ lines_stored Number of lines in this column which are stored in ++ buff. ++ ++ lines_to_print If we're storing this column, lines_to_print is ++ the number of stored_lines which remain to be ++ printed. Otherwise it is the number of lines ++ we can print without exceeding lines_per_body. ++ ++ start_position The horizontal position we want to be in before we ++ print the first character in this column. ++ ++ numbered True means precede this column with a line number. */ ++ ++/* FIXME: There are many unchecked integer overflows in this file, ++ that will cause this command to misbehave given large inputs or ++ options. Many of the "int" values below should be "size_t" or ++ something else like that. */ ++ ++struct COLUMN; ++struct COLUMN ++ { ++ FILE *fp; /* Input stream for this column. */ ++ char const *name; /* File name. */ ++ enum ++ { ++ OPEN, ++ FF_FOUND, /* used with -b option, set with \f, changed ++ to ON_HOLD after print_header */ ++ ON_HOLD, /* Hit a form feed. */ ++ CLOSED ++ } ++ status; /* Status of the file pointer. */ ++ ++ /* Func to print lines in this col. */ ++ bool (*print_func) (struct COLUMN *); ++ ++ /* Func to print/store chars in this col. */ ++ void (*char_func) (char); ++ ++ int current_line; /* Index of current place in line_vector. */ ++ int lines_stored; /* Number of lines stored in buff. */ ++ int lines_to_print; /* No. lines stored or space left on page. */ ++ int start_position; /* Horizontal position of first char. */ ++ bool numbered; ++ bool full_page_printed; /* True means printed without a FF found. */ ++ ++ /* p->full_page_printed controls a special case of "FF set by hand": ++ True means a full page has been printed without FF found. To avoid an ++ additional empty page we have to ignore a FF immediately following in ++ the next line. */ ++ }; ++ ++typedef struct COLUMN COLUMN; ++ ++static int char_to_clump (char c); ++static bool read_line (COLUMN *p); ++static bool print_page (void); ++static bool print_stored (COLUMN *p); ++static bool open_file (char *name, COLUMN *p); ++static bool skip_to_page (uintmax_t page); ++static void print_header (void); ++static void pad_across_to (int position); ++static void add_line_number (COLUMN *p); ++static void getoptarg (char *arg, char switch_char, char *character, ++ int *number); ++void usage (int status); ++static void print_files (int number_of_files, char **av); ++static void init_parameters (int number_of_files); ++static void init_header (char const *filename, int desc); ++static bool init_fps (int number_of_files, char **av); ++static void init_funcs (void); ++static void init_store_cols (void); ++static void store_columns (void); ++static void balance (int total_stored); ++static void store_char (char c); ++static void pad_down (int lines); ++static void read_rest_of_line (COLUMN *p); ++static void skip_read (COLUMN *p, int column_number); ++static void print_char (char c); ++static void cleanup (void); ++static void print_sep_string (void); ++static void separator_string (const char *optarg_S); ++ ++/* All of the columns to print. */ ++static COLUMN *column_vector; ++ ++/* When printing a single file in multiple downward columns, ++ we store the leftmost columns contiguously in buff. ++ To print a line from buff, get the index of the first character ++ from line_vector[i], and print up to line_vector[i + 1]. */ ++static char *buff; ++ ++/* Index of the position in buff where the next character ++ will be stored. */ ++static unsigned int buff_current; ++ ++/* The number of characters in buff. ++ Used for allocation of buff and to detect overflow of buff. */ ++static size_t buff_allocated; ++ ++/* Array of indices into buff. ++ Each entry is an index of the first character of a line. ++ This is used when storing lines to facilitate shuffling when ++ we do column balancing on the last page. */ ++static int *line_vector; ++ ++/* Array of horizonal positions. ++ For each line in line_vector, end_vector[line] is the horizontal ++ position we are in after printing that line. We keep track of this ++ so that we know how much we need to pad to prepare for the next ++ column. */ ++static int *end_vector; ++ ++/* (-m) True means we're printing multiple files in parallel. */ ++static bool parallel_files = false; ++ ++/* (-m) True means a line starts with some empty columns (some files ++ already CLOSED or ON_HOLD) which we have to align. */ ++static bool align_empty_cols; ++ ++/* (-m) True means we have not yet found any printable column in a line. ++ align_empty_cols = true has to be maintained. */ ++static bool empty_line; ++ ++/* (-m) False means printable column output precedes a form feed found. ++ Column alignment is done only once. No additional action with that form ++ feed. ++ True means we found only a form feed in a column. Maybe we have to do ++ some column alignment with that form feed. */ ++static bool FF_only; ++ ++/* (-[0-9]+) True means we're given an option explicitly specifying ++ number of columns. Used to detect when this option is used with -m ++ and when translating old options to new/long options. */ ++static bool explicit_columns = false; ++ ++/* (-t|-T) False means we aren't printing headers and footers. */ ++static bool extremities = true; ++ ++/* (-t) True means we retain all FF set by hand in input files. ++ False is set with -T option. */ ++static bool keep_FF = false; ++static bool print_a_FF = false; ++ ++/* True means we need to print a header as soon as we know we've got input ++ to print after it. */ ++static bool print_a_header; ++ ++/* (-f) True means use formfeeds instead of newlines to separate pages. */ ++static bool use_form_feed = false; ++ ++/* True means we have read the standard input. */ ++static bool have_read_stdin = false; ++ ++/* True means the -a flag has been given. */ ++static bool print_across_flag = false; ++ ++/* True means we're printing one file in multiple (>1) downward columns. */ ++static bool storing_columns = true; ++ ++/* (-b) True means balance columns on the last page as Sys V does. */ ++/* That's no longer an independent option. With storing_columns = true ++ balance_columns = true is used too (s. function init_parameters). ++ We get a consistent formulation with "FF set by hand" in input files. */ ++static bool balance_columns = false; ++ ++/* (-l) Number of lines on a page, including header and footer lines. */ ++static int lines_per_page = 66; ++ ++/* Number of lines in the header and footer can be reset to 0 using ++ the -t flag. */ ++enum { lines_per_header = 5 }; ++static int lines_per_body; ++enum { lines_per_footer = 5 }; ++ ++/* (-w|-W) Width in characters of the page. Does not include the width of ++ the margin. */ ++static int chars_per_line = 72; ++ ++/* (-w|W) True means we truncate lines longer than chars_per_column. */ ++static bool truncate_lines = false; ++ ++/* (-J) True means we join lines without any line truncation. -J ++ dominates -w option. */ ++static bool join_lines = false; ++ ++/* Number of characters in a column. Based on col_sep_length and ++ page width. */ ++static int chars_per_column; ++ ++/* (-e) True means convert tabs to spaces on input. */ ++static bool untabify_input = false; ++ ++/* (-e) The input tab character. */ ++static char input_tab_char = '\t'; ++ ++/* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... ++ where the leftmost column is 1. */ ++static int chars_per_input_tab = 8; ++ ++/* (-i) True means convert spaces to tabs on output. */ ++static bool tabify_output = false; ++ ++/* (-i) The output tab character. */ ++static char output_tab_char = '\t'; ++ ++/* (-i) The width of the output tab. */ ++static int chars_per_output_tab = 8; ++ ++/* Keeps track of pending white space. When we hit a nonspace ++ character after some whitespace, we print whitespace, tabbing ++ if necessary to get to output_position + spaces_not_printed. */ ++static int spaces_not_printed; ++ ++/* (-o) Number of spaces in the left margin (tabs used when possible). */ ++static int chars_per_margin = 0; ++ ++/* Position where the next character will fall. ++ Leftmost position is 0 + chars_per_margin. ++ Rightmost position is chars_per_margin + chars_per_line - 1. ++ This is important for converting spaces to tabs on output. */ ++static int output_position; ++ ++/* Horizontal position relative to the current file. ++ (output_position depends on where we are on the page; ++ input_position depends on where we are in the file.) ++ Important for converting tabs to spaces on input. */ ++static int input_position; ++ ++/* True if there were any failed opens so we can exit with nonzero ++ status. */ ++static bool failed_opens = false; ++ ++/* The number of spaces taken up if we print a tab character with width ++ c_ from position h_. */ ++#define TAB_WIDTH(c_, h_) ((c_) - ((h_) % (c_))) ++ ++/* The horizontal position we'll be at after printing a tab character ++ of width c_ from the position h_. */ ++#define POS_AFTER_TAB(c_, h_) ((h_) + TAB_WIDTH (c_, h_)) ++ ++/* (-NNN) Number of columns of text to print. */ ++static int columns = 1; ++ ++/* (+NNN:MMM) Page numbers on which to begin and stop printing. ++ first_page_number = 0 will be used to check input only. */ ++static uintmax_t first_page_number = 0; ++static uintmax_t last_page_number = UINTMAX_MAX; ++ ++/* Number of files open (not closed, not on hold). */ ++static int files_ready_to_read = 0; ++ ++/* Current page number. Displayed in header. */ ++static uintmax_t page_number; ++ ++/* Current line number. Displayed when -n flag is specified. ++ ++ When printing files in parallel (-m flag), line numbering is as follows: ++ 1 foo goo moo ++ 2 hoo too zoo ++ ++ When printing files across (-a flag), ... ++ 1 foo 2 moo 3 goo ++ 4 hoo 5 too 6 zoo ++ ++ Otherwise, line numbering is as follows: ++ 1 foo 3 goo 5 too ++ 2 moo 4 hoo 6 zoo */ ++static int line_number; ++ ++/* With line_number overflow, we use power_10 to cut off the higher-order ++ digits of the line_number */ ++static int power_10; ++ ++/* (-n) True means lines should be preceded by numbers. */ ++static bool numbered_lines = false; ++ ++/* (-n) Character which follows each line number. */ ++static char number_separator = '\t'; ++ ++/* (-n) line counting starts with 1st line of input file (not with 1st ++ line of 1st page printed). */ ++static int line_count = 1; ++ ++/* (-n) True means counting of skipped lines starts with 1st line of ++ input file. False means -N option is used in addition, counting of ++ skipped lines not required. */ ++static bool skip_count = true; ++ ++/* (-N) Counting starts with start_line_number = NUMBER at 1st line of ++ first page printed, usually not 1st page of input file. */ ++static int start_line_num = 1; ++ ++/* (-n) Width in characters of a line number. */ ++static int chars_per_number = 5; ++ ++/* Used when widening the first column to accommodate numbers -- only ++ needed when printing files in parallel. Includes width of both the ++ number and the number_separator. */ ++static int number_width; ++ ++/* Buffer sprintf uses to format a line number. */ ++static char *number_buff; ++ ++/* (-v) True means unprintable characters are printed as escape sequences. ++ control-g becomes \007. */ ++static bool use_esc_sequence = false; ++ ++/* (-c) True means unprintable characters are printed as control prefixes. ++ control-g becomes ^G. */ ++static bool use_cntrl_prefix = false; ++ ++/* (-d) True means output is double spaced. */ ++static bool double_space = false; ++ ++/* Number of files opened initially in init_files. Should be 1 ++ unless we're printing multiple files in parallel. */ ++static int total_files = 0; ++ ++/* (-r) True means don't complain if we can't open a file. */ ++static bool ignore_failed_opens = false; ++ ++/* (-S) True means we separate columns with a specified string. ++ -S option does not affect line truncation nor column alignment. */ ++static bool use_col_separator = false; ++ ++/* String used to separate columns if the -S option has been specified. ++ Default without -S but together with one of the column options ++ -a|COLUMN|-m is a `space' and with the -J option a `tab'. */ ++static char *col_sep_string = (char *) ""; ++static int col_sep_length = 0; ++static char *column_separator = (char *) " "; ++static char *line_separator = (char *) "\t"; ++ ++/* Number of separator characters waiting to be printed as soon as we ++ know that we have any input remaining to be printed. */ ++static int separators_not_printed; ++ ++/* Position we need to pad to, as soon as we know that we have input ++ remaining to be printed. */ ++static int padding_not_printed; ++ ++/* True means we should pad the end of the page. Remains false until we ++ know we have a page to print. */ ++static bool pad_vertically; ++ ++/* (-h) String of characters used in place of the filename in the header. */ ++static char *custom_header; ++ ++/* (-D) Date format for the header. */ ++static char const *date_format; ++ ++/* Date and file name for the header. */ ++static char *date_text; ++static char const *file_text; ++ ++/* Output columns available, not counting the date and file name. */ ++static int header_width_available; ++ ++static char *clump_buff; ++ ++/* True means we read the line no. lines_per_body in skip_read ++ called by skip_to_page. That variable controls the coincidence of a ++ "FF set by hand" and "full_page_printed", see above the definition of ++ structure COLUMN. */ ++static bool last_line = false; ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ COLUMNS_OPTION = CHAR_MAX + 1, ++ PAGES_OPTION ++}; ++ ++static char const short_options[] = ++ "-0123456789D:FJN:S::TW:abcde::fh:i::l:mn::o:rs::tvw:"; ++ ++static struct option const long_options[] = ++{ ++ {"pages", required_argument, NULL, PAGES_OPTION}, ++ {"columns", required_argument, NULL, COLUMNS_OPTION}, ++ {"across", no_argument, NULL, 'a'}, ++ {"show-control-chars", no_argument, NULL, 'c'}, ++ {"double-space", no_argument, NULL, 'd'}, ++ {"date-format", required_argument, NULL, 'D'}, ++ {"expand-tabs", optional_argument, NULL, 'e'}, ++ {"form-feed", no_argument, NULL, 'f'}, ++ {"header", required_argument, NULL, 'h'}, ++ {"output-tabs", optional_argument, NULL, 'i'}, ++ {"join-lines", no_argument, NULL, 'J'}, ++ {"length", required_argument, NULL, 'l'}, ++ {"merge", no_argument, NULL, 'm'}, ++ {"number-lines", optional_argument, NULL, 'n'}, ++ {"first-line-number", required_argument, NULL, 'N'}, ++ {"indent", required_argument, NULL, 'o'}, ++ {"no-file-warnings", no_argument, NULL, 'r'}, ++ {"separator", optional_argument, NULL, 's'}, ++ {"sep-string", optional_argument, NULL, 'S'}, ++ {"omit-header", no_argument, NULL, 't'}, ++ {"omit-pagination", no_argument, NULL, 'T'}, ++ {"show-nonprinting", no_argument, NULL, 'v'}, ++ {"width", required_argument, NULL, 'w'}, ++ {"page-width", required_argument, NULL, 'W'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++/* Return the number of columns that have either an open file or ++ stored lines. */ ++ ++static int ++cols_ready_to_print (void) ++{ ++ COLUMN *q; ++ int i; ++ int n; ++ ++ n = 0; ++ for (q = column_vector, i = 0; i < columns; ++q, ++i) ++ if (q->status == OPEN || ++ q->status == FF_FOUND || /* With -b: To print a header only */ ++ (storing_columns && q->lines_stored > 0 && q->lines_to_print > 0)) ++ ++n; ++ return n; ++} ++ ++/* Estimate first_ / last_page_number ++ using option +FIRST_PAGE:LAST_PAGE */ ++ ++static bool ++first_last_page (int oi, char c, char const *pages) ++{ ++ char *p; ++ uintmax_t first; ++ uintmax_t last = UINTMAX_MAX; ++ strtol_error err = xstrtoumax (pages, &p, 10, &first, ""); ++ if (err != LONGINT_OK && err != LONGINT_INVALID_SUFFIX_CHAR) ++ xstrtol_fatal (err, oi, c, long_options, pages); ++ ++ if (p == pages || !first) ++ return false; ++ ++ if (*p == ':') ++ { ++ char const *p1 = p + 1; ++ err = xstrtoumax (p1, &p, 10, &last, ""); ++ if (err != LONGINT_OK) ++ xstrtol_fatal (err, oi, c, long_options, pages); ++ if (p1 == p || last < first) ++ return false; ++ } ++ ++ if (*p) ++ return false; ++ ++ first_page_number = first; ++ last_page_number = last; ++ return true; ++} ++ ++/* Parse column count string S, and if it's valid (1 or larger and ++ within range of the type of `columns') set the global variables ++ columns and explicit_columns and return true. ++ Otherwise, exit with a diagnostic. */ ++static void ++parse_column_count (char const *s) ++{ ++ long int tmp_long; ++ if (xstrtol (s, NULL, 10, &tmp_long, "") != LONGINT_OK ++ || !(1 <= tmp_long && tmp_long <= INT_MAX)) ++ error (EXIT_FAILURE, 0, ++ _("invalid number of columns: %s"), quote (s)); ++ ++ columns = tmp_long; ++ explicit_columns = true; ++} ++ ++/* Estimate length of col_sep_string with option -S. */ ++ ++static void ++separator_string (const char *optarg_S) ++{ ++ col_sep_length = (int) strlen (optarg_S); ++ col_sep_string = xmalloc (col_sep_length + 1); ++ strcpy (col_sep_string, optarg_S); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int n_files; ++ bool old_options = false; ++ bool old_w = false; ++ bool old_s = false; ++ char **file_names; ++ ++ /* Accumulate the digits of old-style options like -99. */ ++ char *column_count_string = NULL; ++ size_t n_digits = 0; ++ size_t n_alloc = 0; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdout); ++ ++ n_files = 0; ++ file_names = (argc > 1 ++ ? xmalloc ((argc - 1) * sizeof (char *)) ++ : NULL); ++ ++ for (;;) ++ { ++ int oi = -1; ++ int c = getopt_long (argc, argv, short_options, long_options, &oi); ++ if (c == -1) ++ break; ++ ++ if (ISDIGIT (c)) ++ { ++ /* Accumulate column-count digits specified via old-style options. */ ++ if (n_digits + 1 >= n_alloc) ++ column_count_string ++ = X2REALLOC (column_count_string, &n_alloc); ++ column_count_string[n_digits++] = c; ++ column_count_string[n_digits] = '\0'; ++ continue; ++ } ++ ++ n_digits = 0; ++ ++ switch (c) ++ { ++ case 1: /* Non-option argument. */ ++ /* long option --page dominates old `+FIRST_PAGE ...'. */ ++ if (! (first_page_number == 0 ++ && *optarg == '+' && first_last_page (-2, '+', optarg + 1))) ++ file_names[n_files++] = optarg; ++ break; ++ ++ case PAGES_OPTION: /* --pages=FIRST_PAGE[:LAST_PAGE] */ ++ { /* dominates old opt +... */ ++ if (! optarg) ++ error (EXIT_FAILURE, 0, ++ _("`--pages=FIRST_PAGE[:LAST_PAGE]' missing argument")); ++ else if (! first_last_page (oi, 0, optarg)) ++ error (EXIT_FAILURE, 0, _("invalid page range %s"), ++ quote (optarg)); ++ break; ++ } ++ ++ case COLUMNS_OPTION: /* --columns=COLUMN */ ++ { ++ parse_column_count (optarg); ++ ++ /* If there was a prior column count specified via the ++ short-named option syntax, e.g., -9, ensure that this ++ long-name-specified value overrides it. */ ++ free (column_count_string); ++ column_count_string = NULL; ++ n_alloc = 0; ++ break; ++ } ++ ++ case 'a': ++ print_across_flag = true; ++ storing_columns = false; ++ break; ++ case 'b': ++ balance_columns = true; ++ break; ++ case 'c': ++ use_cntrl_prefix = true; ++ break; ++ case 'd': ++ double_space = true; ++ break; ++ case 'D': ++ date_format = optarg; ++ break; ++ case 'e': ++ if (optarg) ++ getoptarg (optarg, 'e', &input_tab_char, ++ &chars_per_input_tab); ++ /* Could check tab width > 0. */ ++ untabify_input = true; ++ break; ++ case 'f': ++ case 'F': ++ use_form_feed = true; ++ break; ++ case 'h': ++ custom_header = optarg; ++ break; ++ case 'i': ++ if (optarg) ++ getoptarg (optarg, 'i', &output_tab_char, ++ &chars_per_output_tab); ++ /* Could check tab width > 0. */ ++ tabify_output = true; ++ break; ++ case 'J': ++ join_lines = true; ++ break; ++ case 'l': ++ { ++ long int tmp_long; ++ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK ++ || tmp_long <= 0 || tmp_long > INT_MAX) ++ { ++ error (EXIT_FAILURE, 0, ++ _("`-l PAGE_LENGTH' invalid number of lines: %s"), ++ quote (optarg)); ++ } ++ lines_per_page = tmp_long; ++ break; ++ } ++ case 'm': ++ parallel_files = true; ++ storing_columns = false; ++ break; ++ case 'n': ++ numbered_lines = true; ++ if (optarg) ++ getoptarg (optarg, 'n', &number_separator, ++ &chars_per_number); ++ break; ++ case 'N': ++ skip_count = false; ++ { ++ long int tmp_long; ++ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK ++ || tmp_long > INT_MAX) ++ { ++ error (EXIT_FAILURE, 0, ++ _("`-N NUMBER' invalid starting line number: %s"), ++ quote (optarg)); ++ } ++ start_line_num = tmp_long; ++ break; ++ } ++ case 'o': ++ { ++ long int tmp_long; ++ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK ++ || tmp_long < 0 || tmp_long > INT_MAX) ++ error (EXIT_FAILURE, 0, ++ _("`-o MARGIN' invalid line offset: %s"), quote (optarg)); ++ chars_per_margin = tmp_long; ++ break; ++ } ++ case 'r': ++ ignore_failed_opens = true; ++ break; ++ case 's': ++ old_options = true; ++ old_s = true; ++ if (!use_col_separator && optarg) ++ separator_string (optarg); ++ break; ++ case 'S': ++ old_s = false; ++ /* Reset an additional input of -s, -S dominates -s */ ++ col_sep_string = bad_cast (""); ++ col_sep_length = 0; ++ use_col_separator = true; ++ if (optarg) ++ separator_string (optarg); ++ break; ++ case 't': ++ extremities = false; ++ keep_FF = true; ++ break; ++ case 'T': ++ extremities = false; ++ keep_FF = false; ++ break; ++ case 'v': ++ use_esc_sequence = true; ++ break; ++ case 'w': ++ old_options = true; ++ old_w = true; ++ { ++ long int tmp_long; ++ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK ++ || tmp_long <= 0 || tmp_long > INT_MAX) ++ error (EXIT_FAILURE, 0, ++ _("`-w PAGE_WIDTH' invalid number of characters: %s"), ++ quote (optarg)); ++ if (!truncate_lines) ++ chars_per_line = tmp_long; ++ break; ++ } ++ case 'W': ++ old_w = false; /* dominates -w */ ++ truncate_lines = true; ++ { ++ long int tmp_long; ++ if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK ++ || tmp_long <= 0 || tmp_long > INT_MAX) ++ error (EXIT_FAILURE, 0, ++ _("`-W PAGE_WIDTH' invalid number of characters: %s"), ++ quote (optarg)); ++ chars_per_line = tmp_long; ++ break; ++ } ++ case_GETOPT_HELP_CHAR; ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ default: ++ usage (EXIT_FAILURE); ++ break; ++ } ++ } ++ ++ if (column_count_string) ++ { ++ parse_column_count (column_count_string); ++ free (column_count_string); ++ } ++ ++ if (! date_format) ++ date_format = (getenv ("POSIXLY_CORRECT") && !hard_locale (LC_TIME) ++ ? "%b %e %H:%M %Y" ++ : "%Y-%m-%d %H:%M"); ++ ++ /* Now we can set a reasonable initial value: */ ++ if (first_page_number == 0) ++ first_page_number = 1; ++ ++ if (parallel_files && explicit_columns) ++ error (EXIT_FAILURE, 0, ++ _("cannot specify number of columns when printing in parallel")); ++ ++ if (parallel_files && print_across_flag) ++ error (EXIT_FAILURE, 0, ++ _("cannot specify both printing across and printing in parallel")); ++ ++/* Translate some old short options to new/long options. ++ To meet downward compatibility with other UNIX pr utilities ++ and some POSIX specifications. */ ++ ++ if (old_options) ++ { ++ if (old_w) ++ { ++ if (parallel_files || explicit_columns) ++ { ++ /* activate -W */ ++ truncate_lines = true; ++ if (old_s) ++ /* adapt HP-UX and SunOS: -s = no separator; ++ activate -S */ ++ use_col_separator = true; ++ } ++ else ++ /* old -w sets width with columns only ++ activate -J */ ++ join_lines = true; ++ } ++ else if (!use_col_separator) ++ { ++ /* No -S option read */ ++ if (old_s && (parallel_files || explicit_columns)) ++ { ++ if (!truncate_lines) ++ { ++ /* old -s (without -w and -W) annuls column alignment, ++ uses fields, activate -J */ ++ join_lines = true; ++ if (col_sep_length > 0) ++ /* activate -S */ ++ use_col_separator = true; ++ } ++ else ++ /* with -W */ ++ /* adapt HP-UX and SunOS: -s = no separator; ++ activate -S */ ++ use_col_separator = true; ++ } ++ } ++ } ++ ++ for (; optind < argc; optind++) ++ { ++ file_names[n_files++] = argv[optind]; ++ } ++ ++ if (n_files == 0) ++ { ++ /* No file arguments specified; read from standard input. */ ++ print_files (0, NULL); ++ } ++ else ++ { ++ if (parallel_files) ++ print_files (n_files, file_names); ++ else ++ { ++ int i; ++ for (i = 0; i < n_files; i++) ++ print_files (1, &file_names[i]); ++ } ++ } ++ ++ cleanup (); ++ ++ if (have_read_stdin && fclose (stdin) == EOF) ++ error (EXIT_FAILURE, errno, _("standard input")); ++ if (failed_opens) ++ exit (EXIT_FAILURE); ++ exit (EXIT_SUCCESS); ++} ++ ++/* Parse options of the form -scNNN. ++ ++ Example: -nck, where 'n' is the option, c is the optional number ++ separator, and k is the optional width of the field used when printing ++ a number. */ ++ ++static void ++getoptarg (char *arg, char switch_char, char *character, int *number) ++{ ++ if (!ISDIGIT (*arg)) ++ *character = *arg++; ++ if (*arg) ++ { ++ long int tmp_long; ++ if (xstrtol (arg, NULL, 10, &tmp_long, "") != LONGINT_OK ++ || tmp_long <= 0 || tmp_long > INT_MAX) ++ { ++ error (0, 0, ++ _("`-%c' extra characters or invalid number in the argument: %s"), ++ switch_char, quote (arg)); ++ usage (EXIT_FAILURE); ++ } ++ *number = tmp_long; ++ } ++} ++ ++/* Set parameters related to formatting. */ ++ ++static void ++init_parameters (int number_of_files) ++{ ++ int chars_used_by_number = 0; ++ ++ lines_per_body = lines_per_page - lines_per_header - lines_per_footer; ++ if (lines_per_body <= 0) ++ { ++ extremities = false; ++ keep_FF = true; ++ } ++ if (extremities == false) ++ lines_per_body = lines_per_page; ++ ++ if (double_space) ++ lines_per_body = lines_per_body / 2; ++ ++ /* If input is stdin, cannot print parallel files. BSD dumps core ++ on this. */ ++ if (number_of_files == 0) ++ parallel_files = false; ++ ++ if (parallel_files) ++ columns = number_of_files; ++ ++ /* One file, multi columns down: -b option is set to get a consistent ++ formulation with "FF set by hand" in input files. */ ++ if (storing_columns) ++ balance_columns = true; ++ ++ /* Tabification is assumed for multiple columns. */ ++ if (columns > 1) ++ { ++ if (!use_col_separator) ++ { ++ /* Use default separator */ ++ if (join_lines) ++ col_sep_string = line_separator; ++ else ++ col_sep_string = column_separator; ++ ++ col_sep_length = 1; ++ use_col_separator = true; ++ } ++ /* It's rather pointless to define a TAB separator with column ++ alignment */ ++ else if (!join_lines && *col_sep_string == '\t') ++ col_sep_string = column_separator; ++ ++ truncate_lines = true; ++ tabify_output = true; ++ } ++ else ++ storing_columns = false; ++ ++ /* -J dominates -w in any case */ ++ if (join_lines) ++ truncate_lines = false; ++ ++ if (numbered_lines) ++ { ++ int tmp_i; ++ int chars_per_default_tab = 8; ++ ++ line_count = start_line_num; ++ ++ /* To allow input tab-expansion (-e sensitive) use: ++ if (number_separator == input_tab_char) ++ number_width = chars_per_number + ++ TAB_WIDTH (chars_per_input_tab, chars_per_number); */ ++ ++ /* Estimate chars_per_text without any margin and keep it constant. */ ++ if (number_separator == '\t') ++ number_width = chars_per_number + ++ TAB_WIDTH (chars_per_default_tab, chars_per_number); ++ else ++ number_width = chars_per_number + 1; ++ ++ /* The number is part of the column width unless we are ++ printing files in parallel. */ ++ if (parallel_files) ++ chars_used_by_number = number_width; ++ ++ /* We use power_10 to cut off the higher-order digits of the ++ line_number in function add_line_number */ ++ tmp_i = chars_per_number; ++ for (power_10 = 1; tmp_i > 0; --tmp_i) ++ power_10 = 10 * power_10; ++ } ++ ++ chars_per_column = (chars_per_line - chars_used_by_number - ++ (columns - 1) * col_sep_length) / columns; ++ ++ if (chars_per_column < 1) ++ error (EXIT_FAILURE, 0, _("page width too narrow")); ++ ++ if (numbered_lines) ++ { ++ free (number_buff); ++ number_buff = xmalloc (2 * chars_per_number); ++ } ++ ++ /* Pick the maximum between the tab width and the width of an ++ escape sequence. ++ The width of an escape sequence (4) isn't the lower limit any longer. ++ We've to use 8 as the lower limit, if we use chars_per_default_tab = 8 ++ to expand a tab which is not an input_tab-char. */ ++ free (clump_buff); ++ clump_buff = xmalloc (MAX (8, chars_per_input_tab)); ++} ++ ++/* Open the necessary files, ++ maintaining a COLUMN structure for each column. ++ ++ With multiple files, each column p has a different p->fp. ++ With single files, each column p has the same p->fp. ++ Return false if (number_of_files > 0) and no files can be opened, ++ true otherwise. ++ ++ With each column/file p, p->full_page_printed is initialized, ++ see also open_file. */ ++ ++static bool ++init_fps (int number_of_files, char **av) ++{ ++ int i, files_left; ++ COLUMN *p; ++ FILE *firstfp; ++ char const *firstname; ++ ++ total_files = 0; ++ ++ free (column_vector); ++ column_vector = xnmalloc (columns, sizeof (COLUMN)); ++ ++ if (parallel_files) ++ { ++ files_left = number_of_files; ++ for (p = column_vector; files_left--; ++p, ++av) ++ { ++ if (! open_file (*av, p)) ++ { ++ --p; ++ --columns; ++ } ++ } ++ if (columns == 0) ++ return false; ++ init_header ("", -1); ++ } ++ else ++ { ++ p = column_vector; ++ if (number_of_files > 0) ++ { ++ if (! open_file (*av, p)) ++ return false; ++ init_header (*av, fileno (p->fp)); ++ p->lines_stored = 0; ++ } ++ else ++ { ++ p->name = _("standard input"); ++ p->fp = stdin; ++ have_read_stdin = true; ++ p->status = OPEN; ++ p->full_page_printed = false; ++ ++total_files; ++ init_header ("", -1); ++ p->lines_stored = 0; ++ } ++ ++ firstname = p->name; ++ firstfp = p->fp; ++ for (i = columns - 1, ++p; i; --i, ++p) ++ { ++ p->name = firstname; ++ p->fp = firstfp; ++ p->status = OPEN; ++ p->full_page_printed = false; ++ p->lines_stored = 0; ++ } ++ } ++ files_ready_to_read = total_files; ++ return true; ++} ++ ++/* Determine print_func and char_func, the functions ++ used by each column for printing and/or storing. ++ ++ Determine the horizontal position desired when we begin ++ printing a column (p->start_position). */ ++ ++static void ++init_funcs (void) ++{ ++ int i, h, h_next; ++ COLUMN *p; ++ ++ h = chars_per_margin; ++ ++ if (!truncate_lines) ++ h_next = ANYWHERE; ++ else ++ { ++ /* When numbering lines of parallel files, we enlarge the ++ first column to accomodate the number. Looks better than ++ the Sys V approach. */ ++ if (parallel_files && numbered_lines) ++ h_next = h + chars_per_column + number_width; ++ else ++ h_next = h + chars_per_column; ++ } ++ ++ /* Enlarge p->start_position of first column to use the same form of ++ padding_not_printed with all columns. */ ++ h = h + col_sep_length; ++ ++ /* This loop takes care of all but the rightmost column. */ ++ ++ for (p = column_vector, i = 1; i < columns; ++p, ++i) ++ { ++ if (storing_columns) /* One file, multi columns down. */ ++ { ++ p->char_func = store_char; ++ p->print_func = print_stored; ++ } ++ else ++ /* One file, multi columns across; or parallel files. */ ++ { ++ p->char_func = print_char; ++ p->print_func = read_line; ++ } ++ ++ /* Number only the first column when printing files in ++ parallel. */ ++ p->numbered = numbered_lines && (!parallel_files || i == 1); ++ p->start_position = h; ++ ++ /* If we don't truncate lines, all start_positions are ++ ANYWHERE, except the first column's start_position when ++ using a margin. */ ++ ++ if (!truncate_lines) ++ { ++ h = ANYWHERE; ++ h_next = ANYWHERE; ++ } ++ else ++ { ++ h = h_next + col_sep_length; ++ h_next = h + chars_per_column; ++ } ++ } ++ ++ /* The rightmost column. ++ ++ Doesn't need to be stored unless we intend to balance ++ columns on the last page. */ ++ if (storing_columns && balance_columns) ++ { ++ p->char_func = store_char; ++ p->print_func = print_stored; ++ } ++ else ++ { ++ p->char_func = print_char; ++ p->print_func = read_line; ++ } ++ ++ p->numbered = numbered_lines && (!parallel_files || i == 1); ++ p->start_position = h; ++} ++ ++/* Open a file. Return true if successful. ++ ++ With each file p, p->full_page_printed is initialized, ++ see also init_fps. */ ++ ++static bool ++open_file (char *name, COLUMN *p) ++{ ++ if (STREQ (name, "-")) ++ { ++ p->name = _("standard input"); ++ p->fp = stdin; ++ have_read_stdin = true; ++ } ++ else ++ { ++ p->name = name; ++ p->fp = fopen (name, "r"); ++ } ++ if (p->fp == NULL) ++ { ++ failed_opens = true; ++ if (!ignore_failed_opens) ++ error (0, errno, "%s", name); ++ return false; ++ } ++ p->status = OPEN; ++ p->full_page_printed = false; ++ ++total_files; ++ return true; ++} ++ ++/* Close the file in P. ++ ++ If we aren't dealing with multiple files in parallel, we change ++ the status of all columns in the column list to reflect the close. */ ++ ++static void ++close_file (COLUMN *p) ++{ ++ COLUMN *q; ++ int i; ++ ++ if (p->status == CLOSED) ++ return; ++ if (ferror (p->fp)) ++ error (EXIT_FAILURE, errno, "%s", p->name); ++ if (fileno (p->fp) != STDIN_FILENO && fclose (p->fp) != 0) ++ error (EXIT_FAILURE, errno, "%s", p->name); ++ ++ if (!parallel_files) ++ { ++ for (q = column_vector, i = columns; i; ++q, --i) ++ { ++ q->status = CLOSED; ++ if (q->lines_stored == 0) ++ { ++ q->lines_to_print = 0; ++ } ++ } ++ } ++ else ++ { ++ p->status = CLOSED; ++ p->lines_to_print = 0; ++ } ++ ++ --files_ready_to_read; ++} ++ ++/* Put a file on hold until we start a new page, ++ since we've hit a form feed. ++ ++ If we aren't dealing with parallel files, we must change the ++ status of all columns in the column list. */ ++ ++static void ++hold_file (COLUMN *p) ++{ ++ COLUMN *q; ++ int i; ++ ++ if (!parallel_files) ++ for (q = column_vector, i = columns; i; ++q, --i) ++ { ++ if (storing_columns) ++ q->status = FF_FOUND; ++ else ++ q->status = ON_HOLD; ++ } ++ else ++ p->status = ON_HOLD; ++ ++ p->lines_to_print = 0; ++ --files_ready_to_read; ++} ++ ++/* Undo hold_file -- go through the column list and change any ++ ON_HOLD columns to OPEN. Used at the end of each page. */ ++ ++static void ++reset_status (void) ++{ ++ int i = columns; ++ COLUMN *p; ++ ++ for (p = column_vector; i; --i, ++p) ++ if (p->status == ON_HOLD) ++ { ++ p->status = OPEN; ++ files_ready_to_read++; ++ } ++ ++ if (storing_columns) ++ { ++ if (column_vector->status == CLOSED) ++ /* We use the info to output an error message in skip_to_page. */ ++ files_ready_to_read = 0; ++ else ++ files_ready_to_read = 1; ++ } ++} ++ ++/* Print a single file, or multiple files in parallel. ++ ++ Set up the list of columns, opening the necessary files. ++ Allocate space for storing columns, if necessary. ++ Skip to first_page_number, if user has asked to skip leading pages. ++ Determine which functions are appropriate to store/print lines ++ in each column. ++ Print the file(s). */ ++ ++static void ++print_files (int number_of_files, char **av) ++{ ++ init_parameters (number_of_files); ++ if (! init_fps (number_of_files, av)) ++ return; ++ if (storing_columns) ++ init_store_cols (); ++ ++ if (first_page_number > 1) ++ { ++ if (!skip_to_page (first_page_number)) ++ return; ++ else ++ page_number = first_page_number; ++ } ++ else ++ page_number = 1; ++ ++ init_funcs (); ++ ++ line_number = line_count; ++ while (print_page ()) ++ ; ++} ++ ++/* Initialize header information. ++ If DESC is non-negative, it is a file descriptor open to ++ FILENAME for reading. */ ++ ++static void ++init_header (char const *filename, int desc) ++{ ++ char *buf = NULL; ++ struct stat st; ++ struct timespec t; ++ int ns; ++ struct tm *tm; ++ ++ /* If parallel files or standard input, use current date. */ ++ if (STREQ (filename, "-")) ++ desc = -1; ++ if (0 <= desc && fstat (desc, &st) == 0) ++ t = get_stat_mtime (&st); ++ else ++ { ++ static struct timespec timespec; ++ if (! timespec.tv_sec) ++ gettime (×pec); ++ t = timespec; ++ } ++ ++ ns = t.tv_nsec; ++ tm = localtime (&t.tv_sec); ++ if (tm == NULL) ++ { ++ buf = xmalloc (INT_BUFSIZE_BOUND (long int) ++ + MAX (10, INT_BUFSIZE_BOUND (int))); ++ sprintf (buf, "%ld.%09d", (long int) t.tv_sec, ns); ++ } ++ else ++ { ++ size_t bufsize = nstrftime (NULL, SIZE_MAX, date_format, tm, 0, ns) + 1; ++ buf = xmalloc (bufsize); ++ nstrftime (buf, bufsize, date_format, tm, 0, ns); ++ } ++ ++ free (date_text); ++ date_text = buf; ++ file_text = custom_header ? custom_header : desc < 0 ? "" : filename; ++ header_width_available = (chars_per_line ++ - mbswidth (date_text, 0) ++ - mbswidth (file_text, 0)); ++} ++ ++/* Set things up for printing a page ++ ++ Scan through the columns ... ++ Determine which are ready to print ++ (i.e., which have lines stored or open files) ++ Set p->lines_to_print appropriately ++ (to p->lines_stored if we're storing, or lines_per_body ++ if we're reading straight from the file) ++ Keep track of this total so we know when to stop printing */ ++ ++static void ++init_page (void) ++{ ++ int j; ++ COLUMN *p; ++ ++ if (storing_columns) ++ { ++ store_columns (); ++ for (j = columns - 1, p = column_vector; j; --j, ++p) ++ { ++ p->lines_to_print = p->lines_stored; ++ } ++ ++ /* Last column. */ ++ if (balance_columns) ++ { ++ p->lines_to_print = p->lines_stored; ++ } ++ /* Since we're not balancing columns, we don't need to store ++ the rightmost column. Read it straight from the file. */ ++ else ++ { ++ if (p->status == OPEN) ++ { ++ p->lines_to_print = lines_per_body; ++ } ++ else ++ p->lines_to_print = 0; ++ } ++ } ++ else ++ for (j = columns, p = column_vector; j; --j, ++p) ++ if (p->status == OPEN) ++ { ++ p->lines_to_print = lines_per_body; ++ } ++ else ++ p->lines_to_print = 0; ++} ++ ++/* Align empty columns and print separators. ++ Empty columns will be formed by files with status ON_HOLD or CLOSED ++ when printing multiple files in parallel. */ ++ ++static void ++align_column (COLUMN *p) ++{ ++ padding_not_printed = p->start_position; ++ if (padding_not_printed - col_sep_length > 0) ++ { ++ pad_across_to (padding_not_printed - col_sep_length); ++ padding_not_printed = ANYWHERE; ++ } ++ ++ if (use_col_separator) ++ print_sep_string (); ++ ++ if (p->numbered) ++ add_line_number (p); ++} ++ ++/* Print one page. ++ ++ As long as there are lines left on the page and columns ready to print, ++ Scan across the column list ++ if the column has stored lines or the file is open ++ pad to the appropriate spot ++ print the column ++ pad the remainder of the page with \n or \f as requested ++ reset the status of all files -- any files which where on hold because ++ of formfeeds are now put back into the lineup. */ ++ ++static bool ++print_page (void) ++{ ++ int j; ++ int lines_left_on_page; ++ COLUMN *p; ++ ++ /* Used as an accumulator (with | operator) of successive values of ++ pad_vertically. The trick is to set pad_vertically ++ to false before each run through the inner loop, then after that ++ loop, it tells us whether a line was actually printed (whether a ++ newline needs to be output -- or two for double spacing). But those ++ values have to be accumulated (in pv) so we can invoke pad_down ++ properly after the outer loop completes. */ ++ bool pv; ++ ++ init_page (); ++ ++ if (cols_ready_to_print () == 0) ++ return false; ++ ++ if (extremities) ++ print_a_header = true; ++ ++ /* Don't pad unless we know a page was printed. */ ++ pad_vertically = false; ++ pv = false; ++ ++ lines_left_on_page = lines_per_body; ++ if (double_space) ++ lines_left_on_page *= 2; ++ ++ while (lines_left_on_page > 0 && cols_ready_to_print () > 0) ++ { ++ output_position = 0; ++ spaces_not_printed = 0; ++ separators_not_printed = 0; ++ pad_vertically = false; ++ align_empty_cols = false; ++ empty_line = true; ++ ++ for (j = 1, p = column_vector; j <= columns; ++j, ++p) ++ { ++ input_position = 0; ++ if (p->lines_to_print > 0 || p->status == FF_FOUND) ++ { ++ FF_only = false; ++ padding_not_printed = p->start_position; ++ if (!(p->print_func) (p)) ++ read_rest_of_line (p); ++ pv |= pad_vertically; ++ ++ --p->lines_to_print; ++ if (p->lines_to_print <= 0) ++ { ++ if (cols_ready_to_print () <= 0) ++ break; ++ } ++ ++ /* File p changed its status to ON_HOLD or CLOSED */ ++ if (parallel_files && p->status != OPEN) ++ { ++ if (empty_line) ++ align_empty_cols = true; ++ else if (p->status == CLOSED || ++ (p->status == ON_HOLD && FF_only)) ++ align_column (p); ++ } ++ } ++ else if (parallel_files) ++ { ++ /* File status ON_HOLD or CLOSED */ ++ if (empty_line) ++ align_empty_cols = true; ++ else ++ align_column (p); ++ } ++ ++ /* We need it also with an empty column */ ++ if (use_col_separator) ++ ++separators_not_printed; ++ } ++ ++ if (pad_vertically) ++ { ++ putchar ('\n'); ++ --lines_left_on_page; ++ } ++ ++ if (cols_ready_to_print () <= 0 && !extremities) ++ break; ++ ++ if (double_space && pv) ++ { ++ putchar ('\n'); ++ --lines_left_on_page; ++ } ++ } ++ ++ if (lines_left_on_page == 0) ++ for (j = 1, p = column_vector; j <= columns; ++j, ++p) ++ if (p->status == OPEN) ++ p->full_page_printed = true; ++ ++ pad_vertically = pv; ++ ++ if (pad_vertically && extremities) ++ pad_down (lines_left_on_page + lines_per_footer); ++ else if (keep_FF && print_a_FF) ++ { ++ putchar ('\f'); ++ print_a_FF = false; ++ } ++ ++ if (last_page_number < page_number) ++ return false; /* Stop printing with LAST_PAGE */ ++ ++ reset_status (); /* Change ON_HOLD to OPEN. */ ++ ++ return true; /* More pages to go. */ ++} ++ ++/* Allocate space for storing columns. ++ ++ This is necessary when printing multiple columns from a single file. ++ Lines are stored consecutively in buff, separated by '\0'. ++ ++ The following doesn't apply any longer - any tuning possible? ++ (We can't use a fixed offset since with the '-s' flag lines aren't ++ truncated.) ++ ++ We maintain a list (line_vector) of pointers to the beginnings ++ of lines in buff. We allocate one more than the number of lines ++ because the last entry tells us the index of the last character, ++ which we need to know in order to print the last line in buff. */ ++ ++static void ++init_store_cols (void) ++{ ++ int total_lines = lines_per_body * columns; ++ int chars_if_truncate = total_lines * (chars_per_column + 1); ++ ++ free (line_vector); ++ /* FIXME: here's where it was allocated. */ ++ line_vector = xmalloc ((total_lines + 1) * sizeof (int *)); ++ ++ free (end_vector); ++ end_vector = xmalloc (total_lines * sizeof (int *)); ++ ++ free (buff); ++ buff_allocated = (use_col_separator ++ ? 2 * chars_if_truncate ++ : chars_if_truncate); /* Tune this. */ ++ buff = xmalloc (buff_allocated); ++} ++ ++/* Store all but the rightmost column. ++ (Used when printing a single file in multiple downward columns) ++ ++ For each column ++ set p->current_line to be the index in line_vector of the ++ first line in the column ++ For each line in the column ++ store the line in buff ++ add to line_vector the index of the line's first char ++ buff_start is the index in buff of the first character in the ++ current line. */ ++ ++static void ++store_columns (void) ++{ ++ int i, j; ++ unsigned int line = 0; ++ unsigned int buff_start; ++ int last_col; /* The rightmost column which will be saved in buff */ ++ COLUMN *p; ++ ++ buff_current = 0; ++ buff_start = 0; ++ ++ if (balance_columns) ++ last_col = columns; ++ else ++ last_col = columns - 1; ++ ++ for (i = 1, p = column_vector; i <= last_col; ++i, ++p) ++ p->lines_stored = 0; ++ ++ for (i = 1, p = column_vector; i <= last_col && files_ready_to_read; ++ ++i, ++p) ++ { ++ p->current_line = line; ++ for (j = lines_per_body; j && files_ready_to_read; --j) ++ ++ if (p->status == OPEN) /* Redundant. Clean up. */ ++ { ++ input_position = 0; ++ ++ if (!read_line (p)) ++ read_rest_of_line (p); ++ ++ if (p->status == OPEN ++ || buff_start != buff_current) ++ { ++ ++p->lines_stored; ++ line_vector[line] = buff_start; ++ end_vector[line++] = input_position; ++ buff_start = buff_current; ++ } ++ } ++ } ++ ++ /* Keep track of the location of the last char in buff. */ ++ line_vector[line] = buff_start; ++ ++ if (balance_columns) ++ balance (line); ++} ++ ++static void ++balance (int total_stored) ++{ ++ COLUMN *p; ++ int i, lines; ++ int first_line = 0; ++ ++ for (i = 1, p = column_vector; i <= columns; ++i, ++p) ++ { ++ lines = total_stored / columns; ++ if (i <= total_stored % columns) ++ ++lines; ++ ++ p->lines_stored = lines; ++ p->current_line = first_line; ++ ++ first_line += lines; ++ } ++} ++ ++/* Store a character in the buffer. */ ++ ++static void ++store_char (char c) ++{ ++ if (buff_current >= buff_allocated) ++ { ++ /* May be too generous. */ ++ buff = X2REALLOC (buff, &buff_allocated); ++ } ++ buff[buff_current++] = c; ++} ++ ++static void ++add_line_number (COLUMN *p) ++{ ++ int i; ++ char *s; ++ int left_cut; ++ ++ /* Cutting off the higher-order digits is more informative than ++ lower-order cut off*/ ++ if (line_number < power_10) ++ sprintf (number_buff, "%*d", chars_per_number, line_number); ++ else ++ { ++ left_cut = line_number % power_10; ++ sprintf (number_buff, "%0*d", chars_per_number, left_cut); ++ } ++ line_number++; ++ s = number_buff; ++ for (i = chars_per_number; i > 0; i--) ++ (p->char_func) (*s++); ++ ++ if (columns > 1) ++ { ++ /* Tabification is assumed for multiple columns, also for n-separators, ++ but `default n-separator = TAB' hasn't been given priority over ++ equal column_width also specified by POSIX. */ ++ if (number_separator == '\t') ++ { ++ i = number_width - chars_per_number; ++ while (i-- > 0) ++ (p->char_func) (' '); ++ } ++ else ++ (p->char_func) (number_separator); ++ } ++ else ++ /* To comply with POSIX, we avoid any expansion of default TAB ++ separator with a single column output. No column_width requirement ++ has to be considered. */ ++ { ++ (p->char_func) (number_separator); ++ if (number_separator == '\t') ++ output_position = POS_AFTER_TAB (chars_per_output_tab, ++ output_position); ++ } ++ ++ if (truncate_lines && !parallel_files) ++ input_position += number_width; ++} ++ ++/* Print (or store) padding until the current horizontal position ++ is position. */ ++ ++static void ++pad_across_to (int position) ++{ ++ int h = output_position; ++ ++ if (tabify_output) ++ spaces_not_printed = position - output_position; ++ else ++ { ++ while (++h <= position) ++ putchar (' '); ++ output_position = position; ++ } ++} ++ ++/* Pad to the bottom of the page. ++ ++ If the user has requested a formfeed, use one. ++ Otherwise, use newlines. */ ++ ++static void ++pad_down (int lines) ++{ ++ int i; ++ ++ if (use_form_feed) ++ putchar ('\f'); ++ else ++ for (i = lines; i; --i) ++ putchar ('\n'); ++} ++ ++/* Read the rest of the line. ++ ++ Read from the current column's file until an end of line is ++ hit. Used when we've truncated a line and we no longer need ++ to print or store its characters. */ ++ ++static void ++read_rest_of_line (COLUMN *p) ++{ ++ int c; ++ FILE *f = p->fp; ++ ++ while ((c = getc (f)) != '\n') ++ { ++ if (c == '\f') ++ { ++ if ((c = getc (f)) != '\n') ++ ungetc (c, f); ++ if (keep_FF) ++ print_a_FF = true; ++ hold_file (p); ++ break; ++ } ++ else if (c == EOF) ++ { ++ close_file (p); ++ break; ++ } ++ } ++} ++ ++/* Read a line with skip_to_page. ++ ++ Read from the current column's file until an end of line is ++ hit. Used when we read full lines to skip pages. ++ With skip_to_page we have to check for FF-coincidence which is done ++ in function read_line otherwise. ++ Count lines of skipped pages to find the line number of 1st page ++ printed relative to 1st line of input file (start_line_num). */ ++ ++static void ++skip_read (COLUMN *p, int column_number) ++{ ++ int c; ++ FILE *f = p->fp; ++ int i; ++ bool single_ff = false; ++ COLUMN *q; ++ ++ /* Read 1st character in a line or any character succeeding a FF */ ++ if ((c = getc (f)) == '\f' && p->full_page_printed) ++ /* A FF-coincidence with a previous full_page_printed. ++ To avoid an additional empty page, eliminate the FF */ ++ if ((c = getc (f)) == '\n') ++ c = getc (f); ++ ++ p->full_page_printed = false; ++ ++ /* 1st character a FF means a single FF without any printable ++ characters. Don't count it as a line with -n option. */ ++ if (c == '\f') ++ single_ff = true; ++ ++ /* Preparing for a FF-coincidence: Maybe we finish that page ++ without a FF found */ ++ if (last_line) ++ p->full_page_printed = true; ++ ++ while (c != '\n') ++ { ++ if (c == '\f') ++ { ++ /* No FF-coincidence possible, ++ no catching up of a FF-coincidence with next page */ ++ if (last_line) ++ { ++ if (!parallel_files) ++ for (q = column_vector, i = columns; i; ++q, --i) ++ q->full_page_printed = false; ++ else ++ p->full_page_printed = false; ++ } ++ ++ if ((c = getc (f)) != '\n') ++ ungetc (c, f); ++ hold_file (p); ++ break; ++ } ++ else if (c == EOF) ++ { ++ close_file (p); ++ break; ++ } ++ c = getc (f); ++ } ++ ++ if (skip_count) ++ if ((!parallel_files || column_number == 1) && !single_ff) ++ ++line_count; ++} ++ ++/* If we're tabifying output, ++ ++ When print_char encounters white space it keeps track ++ of our desired horizontal position and delays printing ++ until this function is called. */ ++ ++static void ++print_white_space (void) ++{ ++ int h_new; ++ int h_old = output_position; ++ int goal = h_old + spaces_not_printed; ++ ++ while (goal - h_old > 1 ++ && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) ++ { ++ putchar (output_tab_char); ++ h_old = h_new; ++ } ++ while (++h_old <= goal) ++ putchar (' '); ++ ++ output_position = goal; ++ spaces_not_printed = 0; ++} ++ ++/* Print column separators. ++ ++ We keep a count until we know that we'll be printing a line, ++ then print_sep_string() is called. */ ++ ++static void ++print_sep_string (void) ++{ ++ char *s; ++ int l = col_sep_length; ++ ++ s = col_sep_string; ++ ++ if (separators_not_printed <= 0) ++ { ++ /* We'll be starting a line with chars_per_margin, anything else? */ ++ if (spaces_not_printed > 0) ++ print_white_space (); ++ } ++ else ++ { ++ for (; separators_not_printed > 0; --separators_not_printed) ++ { ++ while (l-- > 0) ++ { ++ /* 3 types of sep_strings: spaces only, spaces and chars, ++ chars only */ ++ if (*s == ' ') ++ { ++ /* We're tabifying output; consecutive spaces in ++ sep_string may have to be converted to tabs */ ++ s++; ++ ++spaces_not_printed; ++ } ++ else ++ { ++ if (spaces_not_printed > 0) ++ print_white_space (); ++ putchar (*s++); ++ ++output_position; ++ } ++ } ++ /* sep_string ends with some spaces */ ++ if (spaces_not_printed > 0) ++ print_white_space (); ++ } ++ } ++} ++ ++/* Print (or store, depending on p->char_func) a clump of N ++ characters. */ ++ ++static void ++print_clump (COLUMN *p, int n, char *clump) ++{ ++ while (n--) ++ (p->char_func) (*clump++); ++} ++ ++/* Print a character. ++ ++ Update the following comment: process-char hasn't been used any ++ longer. ++ If we're tabifying, all tabs have been converted to spaces by ++ process_char(). Keep a count of consecutive spaces, and when ++ a nonspace is encountered, call print_white_space() to print the ++ required number of tabs and spaces. */ ++ ++static void ++print_char (char c) ++{ ++ if (tabify_output) ++ { ++ if (c == ' ') ++ { ++ ++spaces_not_printed; ++ return; ++ } ++ else if (spaces_not_printed > 0) ++ print_white_space (); ++ ++ /* Nonprintables are assumed to have width 0, except '\b'. */ ++ if (! isprint (to_uchar (c))) ++ { ++ if (c == '\b') ++ --output_position; ++ } ++ else ++ ++output_position; ++ } ++ putchar (c); ++} ++ ++/* Skip to page PAGE before printing. ++ PAGE may be larger than total number of pages. */ ++ ++static bool ++skip_to_page (uintmax_t page) ++{ ++ uintmax_t n; ++ int i; ++ int j; ++ COLUMN *p; ++ ++ for (n = 1; n < page; ++n) ++ { ++ for (i = 1; i < lines_per_body; ++i) ++ { ++ for (j = 1, p = column_vector; j <= columns; ++j, ++p) ++ if (p->status == OPEN) ++ skip_read (p, j); ++ } ++ last_line = true; ++ for (j = 1, p = column_vector; j <= columns; ++j, ++p) ++ if (p->status == OPEN) ++ skip_read (p, j); ++ ++ if (storing_columns) /* change FF_FOUND to ON_HOLD */ ++ for (j = 1, p = column_vector; j <= columns; ++j, ++p) ++ if (p->status != CLOSED) ++ p->status = ON_HOLD; ++ ++ reset_status (); ++ last_line = false; ++ ++ if (files_ready_to_read < 1) ++ { ++ /* It's very helpful, normally the total number of pages is ++ not known in advance. */ ++ error (0, 0, ++ _("starting page number %"PRIuMAX ++ " exceeds page count %"PRIuMAX), ++ page, n); ++ break; ++ } ++ } ++ return files_ready_to_read > 0; ++} ++ ++/* Print a header. ++ ++ Formfeeds are assumed to use up two lines at the beginning of ++ the page. */ ++ ++static void ++print_header (void) ++{ ++ char page_text[256 + INT_STRLEN_BOUND (page_number)]; ++ int available_width; ++ int lhs_spaces; ++ int rhs_spaces; ++ ++ output_position = 0; ++ pad_across_to (chars_per_margin); ++ print_white_space (); ++ ++ if (page_number == 0) ++ error (EXIT_FAILURE, 0, _("page number overflow")); ++ ++ /* The translator must ensure that formatting the translation of ++ "Page %"PRIuMAX does not generate more than (sizeof page_text - 1) ++ bytes. */ ++ sprintf (page_text, _("Page %"PRIuMAX), page_number++); ++ available_width = header_width_available - mbswidth (page_text, 0); ++ available_width = MAX (0, available_width); ++ lhs_spaces = available_width >> 1; ++ rhs_spaces = available_width - lhs_spaces; ++ ++ printf ("\n\n%*.*s%s%*.*s%s%*.*s%s\n\n\n", ++ chars_per_margin, chars_per_margin, " ", ++ date_text, lhs_spaces, lhs_spaces, " ", ++ file_text, rhs_spaces, rhs_spaces, " ", page_text); ++ ++ print_a_header = false; ++ output_position = 0; ++} ++ ++/* Print (or store, if p->char_func is store_char()) a line. ++ ++ Read a character to determine whether we have a line or not. ++ (We may hit EOF, \n, or \f) ++ ++ Once we know we have a line, ++ set pad_vertically = true, meaning it's safe ++ to pad down at the end of the page, since we do have a page. ++ print a header if needed. ++ pad across to padding_not_printed if needed. ++ print any separators which need to be printed. ++ print a line number if it needs to be printed. ++ ++ Print the clump which corresponds to the first character. ++ ++ Enter a loop and keep printing until an end of line condition ++ exists, or until we exceed chars_per_column. ++ ++ Return false if we exceed chars_per_column before reading ++ an end of line character, true otherwise. */ ++ ++static bool ++read_line (COLUMN *p) ++{ ++ int c; ++ int chars IF_LINT (= 0); ++ int last_input_position; ++ int j, k; ++ COLUMN *q; ++ ++ /* read 1st character in each line or any character succeeding a FF: */ ++ c = getc (p->fp); ++ ++ last_input_position = input_position; ++ ++ if (c == '\f' && p->full_page_printed) ++ if ((c = getc (p->fp)) == '\n') ++ c = getc (p->fp); ++ p->full_page_printed = false; ++ ++ switch (c) ++ { ++ case '\f': ++ if ((c = getc (p->fp)) != '\n') ++ ungetc (c, p->fp); ++ FF_only = true; ++ if (print_a_header && !storing_columns) ++ { ++ pad_vertically = true; ++ print_header (); ++ } ++ else if (keep_FF) ++ print_a_FF = true; ++ hold_file (p); ++ return true; ++ case EOF: ++ close_file (p); ++ return true; ++ case '\n': ++ break; ++ default: ++ chars = char_to_clump (c); ++ } ++ ++ if (truncate_lines && input_position > chars_per_column) ++ { ++ input_position = last_input_position; ++ return false; ++ } ++ ++ if (p->char_func != store_char) ++ { ++ pad_vertically = true; ++ ++ if (print_a_header && !storing_columns) ++ print_header (); ++ ++ if (parallel_files && align_empty_cols) ++ { ++ /* We have to align empty columns at the beginning of a line. */ ++ k = separators_not_printed; ++ separators_not_printed = 0; ++ for (j = 1, q = column_vector; j <= k; ++j, ++q) ++ { ++ align_column (q); ++ separators_not_printed += 1; ++ } ++ padding_not_printed = p->start_position; ++ if (truncate_lines) ++ spaces_not_printed = chars_per_column; ++ else ++ spaces_not_printed = 0; ++ align_empty_cols = false; ++ } ++ ++ if (padding_not_printed - col_sep_length > 0) ++ { ++ pad_across_to (padding_not_printed - col_sep_length); ++ padding_not_printed = ANYWHERE; ++ } ++ ++ if (use_col_separator) ++ print_sep_string (); ++ } ++ ++ if (p->numbered) ++ add_line_number (p); ++ ++ empty_line = false; ++ if (c == '\n') ++ return true; ++ ++ print_clump (p, chars, clump_buff); ++ ++ for (;;) ++ { ++ c = getc (p->fp); ++ ++ switch (c) ++ { ++ case '\n': ++ return true; ++ case '\f': ++ if ((c = getc (p->fp)) != '\n') ++ ungetc (c, p->fp); ++ if (keep_FF) ++ print_a_FF = true; ++ hold_file (p); ++ return true; ++ case EOF: ++ close_file (p); ++ return true; ++ } ++ ++ last_input_position = input_position; ++ chars = char_to_clump (c); ++ if (truncate_lines && input_position > chars_per_column) ++ { ++ input_position = last_input_position; ++ return false; ++ } ++ ++ print_clump (p, chars, clump_buff); ++ } ++} ++ ++/* Print a line from buff. ++ ++ If this function has been called, we know we have "something to ++ print". But it remains to be seen whether we have a real text page ++ or an empty page (a single form feed) with/without a header only. ++ Therefore first we set pad_vertically to true and print a header ++ if necessary. ++ If FF_FOUND and we are using -t|-T option we omit any newline by ++ setting pad_vertically to false (see print_page). ++ Otherwise we pad across if necessary, print separators if necessary ++ and text of COLUMN *p. ++ ++ Return true, meaning there is no need to call read_rest_of_line. */ ++ ++static bool ++print_stored (COLUMN *p) ++{ ++ COLUMN *q; ++ int i; ++ ++ int line = p->current_line++; ++ char *first = &buff[line_vector[line]]; ++ /* FIXME ++ UMR: Uninitialized memory read: ++ * This is occurring while in: ++ print_stored [pr.c:2239] ++ * Reading 4 bytes from 0x5148c in the heap. ++ * Address 0x5148c is 4 bytes into a malloc'd block at 0x51488 of 676 bytes ++ * This block was allocated from: ++ malloc [rtlib.o] ++ xmalloc [xmalloc.c:94] ++ init_store_cols [pr.c:1648] ++ */ ++ char *last = &buff[line_vector[line + 1]]; ++ ++ pad_vertically = true; ++ ++ if (print_a_header) ++ print_header (); ++ ++ if (p->status == FF_FOUND) ++ { ++ for (i = 1, q = column_vector; i <= columns; ++i, ++q) ++ q->status = ON_HOLD; ++ if (column_vector->lines_to_print <= 0) ++ { ++ if (!extremities) ++ pad_vertically = false; ++ return true; /* print a header only */ ++ } ++ } ++ ++ if (padding_not_printed - col_sep_length > 0) ++ { ++ pad_across_to (padding_not_printed - col_sep_length); ++ padding_not_printed = ANYWHERE; ++ } ++ ++ if (use_col_separator) ++ print_sep_string (); ++ ++ while (first != last) ++ print_char (*first++); ++ ++ if (spaces_not_printed == 0) ++ { ++ output_position = p->start_position + end_vector[line]; ++ if (p->start_position - col_sep_length == chars_per_margin) ++ output_position -= col_sep_length; ++ } ++ ++ return true; ++} ++ ++/* Convert a character to the proper format and return the number of ++ characters in the resulting clump. Increment input_position by ++ the width of the clump. ++ ++ Tabs are converted to clumps of spaces. ++ Nonprintable characters may be converted to clumps of escape ++ sequences or control prefixes. ++ ++ Note: the width of a clump is not necessarily equal to the number of ++ characters in clump_buff. (e.g, the width of '\b' is -1, while the ++ number of characters is 1.) */ ++ ++static int ++char_to_clump (char c) ++{ ++ unsigned char uc = c; ++ char *s = clump_buff; ++ int i; ++ char esc_buff[4]; ++ int width; ++ int chars; ++ int chars_per_c = 8; ++ ++ if (c == input_tab_char) ++ chars_per_c = chars_per_input_tab; ++ ++ if (c == input_tab_char || c == '\t') ++ { ++ width = TAB_WIDTH (chars_per_c, input_position); ++ ++ if (untabify_input) ++ { ++ for (i = width; i; --i) ++ *s++ = ' '; ++ chars = width; ++ } ++ else ++ { ++ *s = c; ++ chars = 1; ++ } ++ ++ } ++ else if (! isprint (uc)) ++ { ++ if (use_esc_sequence) ++ { ++ width = 4; ++ chars = 4; ++ *s++ = '\\'; ++ sprintf (esc_buff, "%03o", uc); ++ for (i = 0; i <= 2; ++i) ++ *s++ = esc_buff[i]; ++ } ++ else if (use_cntrl_prefix) ++ { ++ if (uc < 0200) ++ { ++ width = 2; ++ chars = 2; ++ *s++ = '^'; ++ *s++ = c ^ 0100; ++ } ++ else ++ { ++ width = 4; ++ chars = 4; ++ *s++ = '\\'; ++ sprintf (esc_buff, "%03o", uc); ++ for (i = 0; i <= 2; ++i) ++ *s++ = esc_buff[i]; ++ } ++ } ++ else if (c == '\b') ++ { ++ width = -1; ++ chars = 1; ++ *s = c; ++ } ++ else ++ { ++ width = 0; ++ chars = 1; ++ *s = c; ++ } ++ } ++ else ++ { ++ width = 1; ++ chars = 1; ++ *s = c; ++ } ++ ++ /* Too many backspaces must put us in position 0 -- never negative. */ ++ if (width < 0 && input_position == 0) ++ { ++ chars = 0; ++ input_position = 0; ++ } ++ else if (width < 0 && input_position <= -width) ++ input_position = 0; ++ else ++ input_position += width; ++ ++ return chars; ++} ++ ++/* We've just printed some files and need to clean up things before ++ looking for more options and printing the next batch of files. ++ ++ Free everything we've xmalloc'ed, except `header'. */ ++ ++static void ++cleanup (void) ++{ ++ free (number_buff); ++ free (clump_buff); ++ free (column_vector); ++ free (line_vector); ++ free (end_vector); ++ free (buff); ++} ++ ++/* Complain, print a usage message, and die. */ ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... [FILE]...\n\ ++"), ++ program_name); ++ ++ fputs (_("\ ++Paginate or columnate FILE(s) for printing.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ +FIRST_PAGE[:LAST_PAGE], --pages=FIRST_PAGE[:LAST_PAGE]\n\ ++ begin [stop] printing with page FIRST_[LAST_]PAGE\n\ ++ -COLUMN, --columns=COLUMN\n\ ++ output COLUMN columns and print columns down,\n\ ++ unless -a is used. Balance number of lines in the\n\ ++ columns on each page.\n\ ++"), stdout); ++ fputs (_("\ ++ -a, --across print columns across rather than down, used together\n\ ++ with -COLUMN\n\ ++ -c, --show-control-chars\n\ ++ use hat notation (^G) and octal backslash notation\n\ ++ -d, --double-space\n\ ++ double space the output\n\ ++"), stdout); ++ fputs (_("\ ++ -D, --date-format=FORMAT\n\ ++ use FORMAT for the header date\n\ ++ -e[CHAR[WIDTH]], --expand-tabs[=CHAR[WIDTH]]\n\ ++ expand input CHARs (TABs) to tab WIDTH (8)\n\ ++ -F, -f, --form-feed\n\ ++ use form feeds instead of newlines to separate pages\n\ ++ (by a 3-line page header with -F or a 5-line header\n\ ++ and trailer without -F)\n\ ++"), stdout); ++ fputs (_("\ ++ -h, --header=HEADER\n\ ++ use a centered HEADER instead of filename in page header,\n\ ++ -h \"\" prints a blank line, don't use -h\"\"\n\ ++ -i[CHAR[WIDTH]], --output-tabs[=CHAR[WIDTH]]\n\ ++ replace spaces with CHARs (TABs) to tab WIDTH (8)\n\ ++ -J, --join-lines merge full lines, turns off -W line truncation, no column\n\ ++ alignment, --sep-string[=STRING] sets separators\n\ ++"), stdout); ++ fputs (_("\ ++ -l, --length=PAGE_LENGTH\n\ ++ set the page length to PAGE_LENGTH (66) lines\n\ ++ (default number of lines of text 56, and with -F 63)\n\ ++ -m, --merge print all files in parallel, one in each column,\n\ ++ truncate lines, but join lines of full length with -J\n\ ++"), stdout); ++ fputs (_("\ ++ -n[SEP[DIGITS]], --number-lines[=SEP[DIGITS]]\n\ ++ number lines, use DIGITS (5) digits, then SEP (TAB),\n\ ++ default counting starts with 1st line of input file\n\ ++ -N, --first-line-number=NUMBER\n\ ++ start counting with NUMBER at 1st line of first\n\ ++ page printed (see +FIRST_PAGE)\n\ ++"), stdout); ++ fputs (_("\ ++ -o, --indent=MARGIN\n\ ++ offset each line with MARGIN (zero) spaces, do not\n\ ++ affect -w or -W, MARGIN will be added to PAGE_WIDTH\n\ ++ -r, --no-file-warnings\n\ ++ omit warning when a file cannot be opened\n\ ++"), stdout); ++ fputs (_("\ ++ -s[CHAR],--separator[=CHAR]\n\ ++ separate columns by a single character, default for CHAR\n\ ++ is the character without -w and \'no char\' with -w\n\ ++ -s[CHAR] turns off line truncation of all 3 column\n\ ++ options (-COLUMN|-a -COLUMN|-m) except -w is set\n\ ++"), stdout); ++ fputs (_("\ ++ -SSTRING, --sep-string[=STRING]\n\ ++ separate columns by STRING,\n\ ++ without -S: Default separator with -J and \n\ ++ otherwise (same as -S\" \"), no effect on column options\n\ ++ -t, --omit-header omit page headers and trailers\n\ ++"), stdout); ++ fputs (_("\ ++ -T, --omit-pagination\n\ ++ omit page headers and trailers, eliminate any pagination\n\ ++ by form feeds set in input files\n\ ++ -v, --show-nonprinting\n\ ++ use octal backslash notation\n\ ++ -w, --width=PAGE_WIDTH\n\ ++ set page width to PAGE_WIDTH (72) characters for\n\ ++ multiple text-column output only, -s[char] turns off (72)\n\ ++"), stdout); ++ fputs (_("\ ++ -W, --page-width=PAGE_WIDTH\n\ ++ set page width to PAGE_WIDTH (72) characters always,\n\ ++ truncate lines, except -J option is set, no interference\n\ ++ with -S or -s\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++-t is implied if PAGE_LENGTH <= 10. With no FILE, or when\n\ ++FILE is -, read standard input.\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} +diff -urNp coreutils-8.0-orig/src/sort.c coreutils-8.0/src/sort.c +--- coreutils-8.0-orig/src/sort.c 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/src/sort.c 2009-10-07 10:07:16.000000000 +0200 +@@ -22,10 +22,19 @@ #include @@ -1599,7 +8380,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c #include "system.h" #include "argmatch.h" #include "error.h" -@@ -116,14 +125,38 @@ +@@ -122,14 +131,38 @@ static int decimal_point; /* Thousands separator; if -1, then there isn't one. */ static int thousands_sep; @@ -1639,7 +8420,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c /* The kind of blanks for '-b' to skip in various options. */ enum blanktype { bl_start, bl_end, bl_both }; -@@ -261,13 +294,11 @@ +@@ -268,13 +301,11 @@ static bool reverse; they were read if all keys compare equal. */ static bool stable; @@ -1656,7 +8437,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c /* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */ -@@ -639,6 +670,44 @@ +@@ -712,6 +743,44 @@ reap_some (void) update_proc (pid); } @@ -1701,7 +8482,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c /* Clean up any remaining temporary files. */ static void -@@ -978,7 +1047,7 @@ +@@ -1093,7 +1162,7 @@ zaptemp (const char *name) free (node); } @@ -1710,7 +8491,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c static int struct_month_cmp (const void *m1, const void *m2) -@@ -993,7 +1062,7 @@ +@@ -1108,7 +1177,7 @@ struct_month_cmp (const void *m1, const /* Initialize the character class tables. */ static void @@ -1719,7 +8500,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c { size_t i; -@@ -1005,7 +1074,7 @@ +@@ -1120,7 +1189,7 @@ inittables (void) fold_toupper[i] = toupper (i); } @@ -1728,7 +8509,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c /* If we're not in the "C" locale, read different names for months. */ if (hard_LC_TIME) { -@@ -1031,6 +1100,64 @@ +@@ -1202,6 +1271,64 @@ specify_nmerge (int oi, char c, char con xstrtol_fatal (e, oi, c, long_options, s); } @@ -1793,7 +8574,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c /* Specify the amount of main memory to use when sorting. */ static void specify_sort_size (int oi, char c, char const *s) -@@ -1241,7 +1368,7 @@ +@@ -1412,7 +1539,7 @@ buffer_linelim (struct buffer const *buf by KEY in LINE. */ static char * @@ -1802,7 +8583,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c { char *ptr = line->text, *lim = ptr + line->length - 1; size_t sword = key->sword; -@@ -1251,10 +1378,10 @@ +@@ -1421,10 +1548,10 @@ begfield (const struct line *line, const /* The leading field separator itself is included in a field when -t is absent. */ @@ -1815,7 +8596,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c ++ptr; if (ptr < lim) ++ptr; -@@ -1282,11 +1409,70 @@ +@@ -1450,11 +1577,70 @@ begfield (const struct line *line, const return ptr; } @@ -1887,7 +8668,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c { char *ptr = line->text, *lim = ptr + line->length - 1; size_t eword = key->eword, echar = key->echar; -@@ -1299,10 +1485,10 @@ +@@ -1469,10 +1655,10 @@ limfield (const struct line *line, const `beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first `blank' character after the preceding field. */ @@ -1898,9 +8679,9 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c - while (ptr < lim && *ptr != tab) + while (ptr < lim && *ptr != tab[0]) ++ptr; - if (ptr < lim && (eword | echar)) + if (ptr < lim && (eword || echar)) ++ptr; -@@ -1348,10 +1534,10 @@ +@@ -1518,10 +1704,10 @@ limfield (const struct line *line, const */ /* Make LIM point to the end of (one byte past) the current field. */ @@ -1913,7 +8694,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c if (newlim) lim = newlim; } -@@ -1384,6 +1570,113 @@ +@@ -1552,6 +1738,113 @@ limfield (const struct line *line, const return ptr; } @@ -2027,7 +8808,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c /* Fill BUF reading from FP, moving buf->left bytes from the end of buf->buf to the beginning first. If EOF is reached and the file wasn't terminated by a newline, supply one. Set up BUF's line -@@ -1466,8 +1753,24 @@ +@@ -1634,8 +1927,24 @@ fillbuf (struct buffer *buf, FILE *fp, c else { if (key->skipsblanks) @@ -2054,7 +8835,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c line->keybeg = line_start; } } -@@ -1500,7 +1803,7 @@ +@@ -1673,7 +1982,7 @@ fillbuf (struct buffer *buf, FILE *fp, c hideously fast. */ static int @@ -2063,7 +8844,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c { while (blanks[to_uchar (*a)]) a++; -@@ -1510,6 +1813,25 @@ +@@ -1782,6 +2091,25 @@ human_numcompare (const char *a, const c : strnumcmp (a, b, decimal_point, thousands_sep)); } @@ -2089,7 +8870,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c static int general_numcompare (const char *sa, const char *sb) { -@@ -1543,7 +1865,7 @@ +@@ -1815,7 +2143,7 @@ general_numcompare (const char *sa, cons Return 0 if the name in S is not recognized. */ static int @@ -2098,7 +8879,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c { size_t lo = 0; size_t hi = MONTHS_PER_YEAR; -@@ -1698,11 +2020,79 @@ +@@ -1996,11 +2324,79 @@ compare_version (char *restrict texta, s return diff; } @@ -2179,7 +8960,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c { struct keyfield *key = keylist; -@@ -1875,6 +2265,179 @@ +@@ -2180,6 +2576,179 @@ keycompare (const struct line *a, const return key->reverse ? -diff : diff; } @@ -2358,8 +9139,8 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c + /* Compare two lines A and B, returning negative, zero, or positive depending on whether A compares less than, equal to, or greater than B. */ - -@@ -2744,7 +3307,7 @@ + +@@ -3178,7 +3747,7 @@ main (int argc, char **argv) initialize_exit_failure (SORT_FAILURE); hard_LC_COLLATE = hard_locale (LC_COLLATE); @@ -2368,7 +9149,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c hard_LC_TIME = hard_locale (LC_TIME); #endif -@@ -2765,6 +3328,27 @@ +@@ -3199,6 +3768,27 @@ main (int argc, char **argv) thousands_sep = -1; } @@ -2396,7 +9177,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c have_read_stdin = false; inittables (); -@@ -3015,13 +3599,35 @@ +@@ -3459,13 +4049,35 @@ main (int argc, char **argv) case 't': { @@ -2436,7 +9217,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c else { /* Provoke with `sort -txx'. Complain about -@@ -3032,9 +3638,12 @@ +@@ -3476,9 +4088,12 @@ main (int argc, char **argv) quote (optarg)); } } @@ -2451,9 +9232,3711 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c } break; ---- coreutils-6.8+/src/unexpand.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/unexpand.c 2007-03-01 15:08:24.000000000 +0000 -@@ -39,11 +39,28 @@ +diff -urNp coreutils-8.0-orig/src/sort.c.orig coreutils-8.0/src/sort.c.orig +--- coreutils-8.0-orig/src/sort.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/sort.c.orig 2009-09-29 15:27:54.000000000 +0200 +@@ -0,0 +1,3697 @@ ++/* sort - sort lines of text (with all kinds of options). ++ Copyright (C) 1988, 1991-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . ++ ++ Written December 1988 by Mike Haertel. ++ The author may be reached (Email) at the address mike@gnu.ai.mit.edu, ++ or (US mail) as Mike Haertel c/o Free Software Foundation. ++ ++ Ørn E. Hansen added NLS support in 1997. */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include "system.h" ++#include "argmatch.h" ++#include "error.h" ++#include "filevercmp.h" ++#include "hard-locale.h" ++#include "hash.h" ++#include "md5.h" ++#include "physmem.h" ++#include "posixver.h" ++#include "quote.h" ++#include "quotearg.h" ++#include "randread.h" ++#include "readtokens0.h" ++#include "stdio--.h" ++#include "stdlib--.h" ++#include "strnumcmp.h" ++#include "xmemcoll.h" ++#include "xmemxfrm.h" ++#include "xstrtol.h" ++ ++#if HAVE_SYS_RESOURCE_H ++# include ++#endif ++#ifndef RLIMIT_DATA ++struct rlimit { size_t rlim_cur; }; ++# define getrlimit(Resource, Rlp) (-1) ++#endif ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "sort" ++ ++#define AUTHORS \ ++ proper_name ("Mike Haertel"), \ ++ proper_name ("Paul Eggert") ++ ++#if HAVE_LANGINFO_CODESET ++# include ++#endif ++ ++/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is ++ present. */ ++#ifndef SA_NOCLDSTOP ++# define SA_NOCLDSTOP 0 ++/* No sigprocmask. Always 'return' zero. */ ++# define sigprocmask(How, Set, Oset) (0) ++# define sigset_t int ++# if ! HAVE_SIGINTERRUPT ++# define siginterrupt(sig, flag) /* empty */ ++# endif ++#endif ++ ++#if !defined OPEN_MAX && defined NR_OPEN ++# define OPEN_MAX NR_OPEN ++#endif ++#if !defined OPEN_MAX ++# define OPEN_MAX 20 ++#endif ++ ++#define UCHAR_LIM (UCHAR_MAX + 1) ++ ++#ifndef DEFAULT_TMPDIR ++# define DEFAULT_TMPDIR "/tmp" ++#endif ++ ++/* Exit statuses. */ ++enum ++ { ++ /* POSIX says to exit with status 1 if invoked with -c and the ++ input is not properly sorted. */ ++ SORT_OUT_OF_ORDER = 1, ++ ++ /* POSIX says any other irregular exit must exit with a status ++ code greater than 1. */ ++ SORT_FAILURE = 2 ++ }; ++ ++enum ++ { ++ /* The number of times we should try to fork a compression process ++ (we retry if the fork call fails). We don't _need_ to compress ++ temp files, this is just to reduce disk access, so this number ++ can be small. */ ++ MAX_FORK_TRIES_COMPRESS = 2, ++ ++ /* The number of times we should try to fork a decompression process. ++ If we can't fork a decompression process, we can't sort, so this ++ number should be big. */ ++ MAX_FORK_TRIES_DECOMPRESS = 8 ++ }; ++ ++/* The representation of the decimal point in the current locale. */ ++static int decimal_point; ++ ++/* Thousands separator; if -1, then there isn't one. */ ++static int thousands_sep; ++ ++/* Nonzero if the corresponding locales are hard. */ ++static bool hard_LC_COLLATE; ++#if HAVE_NL_LANGINFO ++static bool hard_LC_TIME; ++#endif ++ ++#define NONZERO(x) ((x) != 0) ++ ++/* The kind of blanks for '-b' to skip in various options. */ ++enum blanktype { bl_start, bl_end, bl_both }; ++ ++/* The character marking end of line. Default to \n. */ ++static char eolchar = '\n'; ++ ++/* Lines are held in core as counted strings. */ ++struct line ++{ ++ char *text; /* Text of the line. */ ++ size_t length; /* Length including final newline. */ ++ char *keybeg; /* Start of first key. */ ++ char *keylim; /* Limit of first key. */ ++}; ++ ++/* Input buffers. */ ++struct buffer ++{ ++ char *buf; /* Dynamically allocated buffer, ++ partitioned into 3 regions: ++ - input data; ++ - unused area; ++ - an array of lines, in reverse order. */ ++ size_t used; /* Number of bytes used for input data. */ ++ size_t nlines; /* Number of lines in the line array. */ ++ size_t alloc; /* Number of bytes allocated. */ ++ size_t left; /* Number of bytes left from previous reads. */ ++ size_t line_bytes; /* Number of bytes to reserve for each line. */ ++ bool eof; /* An EOF has been read. */ ++}; ++ ++struct keyfield ++{ ++ size_t sword; /* Zero-origin 'word' to start at. */ ++ size_t schar; /* Additional characters to skip. */ ++ size_t eword; /* Zero-origin first word after field. */ ++ size_t echar; /* Additional characters in field. */ ++ bool const *ignore; /* Boolean array of characters to ignore. */ ++ char const *translate; /* Translation applied to characters. */ ++ bool skipsblanks; /* Skip leading blanks when finding start. */ ++ bool skipeblanks; /* Skip leading blanks when finding end. */ ++ bool numeric; /* Flag for numeric comparison. Handle ++ strings of digits with optional decimal ++ point, but no exponential notation. */ ++ bool random; /* Sort by random hash of key. */ ++ bool general_numeric; /* Flag for general, numeric comparison. ++ Handle numbers in exponential notation. */ ++ bool human_numeric; /* Flag for sorting by human readable ++ units with either SI xor IEC prefixes. */ ++ int si_present; /* Flag for checking for mixed SI and IEC. */ ++ bool month; /* Flag for comparison by month name. */ ++ bool reverse; /* Reverse the sense of comparison. */ ++ bool version; /* sort by version number */ ++ struct keyfield *next; /* Next keyfield to try. */ ++}; ++ ++struct month ++{ ++ char const *name; ++ int val; ++}; ++ ++/* FIXME: None of these tables work with multibyte character sets. ++ Also, there are many other bugs when handling multibyte characters. ++ One way to fix this is to rewrite `sort' to use wide characters ++ internally, but doing this with good performance is a bit ++ tricky. */ ++ ++/* Table of blanks. */ ++static bool blanks[UCHAR_LIM]; ++ ++/* Table of non-printing characters. */ ++static bool nonprinting[UCHAR_LIM]; ++ ++/* Table of non-dictionary characters (not letters, digits, or blanks). */ ++static bool nondictionary[UCHAR_LIM]; ++ ++/* Translation table folding lower case to upper. */ ++static char fold_toupper[UCHAR_LIM]; ++ ++#define MONTHS_PER_YEAR 12 ++ ++/* Table mapping month names to integers. ++ Alphabetic order allows binary search. */ ++static struct month monthtab[] = ++{ ++ {"APR", 4}, ++ {"AUG", 8}, ++ {"DEC", 12}, ++ {"FEB", 2}, ++ {"JAN", 1}, ++ {"JUL", 7}, ++ {"JUN", 6}, ++ {"MAR", 3}, ++ {"MAY", 5}, ++ {"NOV", 11}, ++ {"OCT", 10}, ++ {"SEP", 9} ++}; ++ ++/* During the merge phase, the number of files to merge at once. */ ++#define NMERGE_DEFAULT 16 ++ ++/* Minimum size for a merge or check buffer. */ ++#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) ++ ++/* Minimum sort size; the code might not work with smaller sizes. */ ++#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE) ++ ++/* The number of bytes needed for a merge or check buffer, which can ++ function relatively efficiently even if it holds only one line. If ++ a longer line is seen, this value is increased. */ ++static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024); ++ ++/* The approximate maximum number of bytes of main memory to use, as ++ specified by the user. Zero if the user has not specified a size. */ ++static size_t sort_size; ++ ++/* The guessed size for non-regular files. */ ++#define INPUT_FILE_SIZE_GUESS (1024 * 1024) ++ ++/* Array of directory names in which any temporary files are to be created. */ ++static char const **temp_dirs; ++ ++/* Number of temporary directory names used. */ ++static size_t temp_dir_count; ++ ++/* Number of allocated slots in temp_dirs. */ ++static size_t temp_dir_alloc; ++ ++/* Flag to reverse the order of all comparisons. */ ++static bool reverse; ++ ++/* Flag for stable sort. This turns off the last ditch bytewise ++ comparison of lines, and instead leaves lines in the same order ++ they were read if all keys compare equal. */ ++static bool stable; ++ ++/* If TAB has this value, blanks separate fields. */ ++enum { TAB_DEFAULT = CHAR_MAX + 1 }; ++ ++/* Tab character separating fields. If TAB_DEFAULT, then fields are ++ separated by the empty string between a non-blank character and a blank ++ character. */ ++static int tab = TAB_DEFAULT; ++ ++/* Flag to remove consecutive duplicate lines from the output. ++ Only the last of a sequence of equal lines will be output. */ ++static bool unique; ++ ++/* Nonzero if any of the input files are the standard input. */ ++static bool have_read_stdin; ++ ++/* List of key field comparisons to be tried. */ ++static struct keyfield *keylist; ++ ++/* Program used to (de)compress temp files. Must accept -d. */ ++static char const *compress_program; ++ ++/* Maximum number of files to merge in one go. If more than this ++ number are present, temp files will be used. */ ++static unsigned int nmerge = NMERGE_DEFAULT; ++ ++static void sortlines_temp (struct line *, size_t, struct line *); ++ ++/* Report MESSAGE for FILE, then clean up and exit. ++ If FILE is null, it represents standard output. */ ++ ++static void die (char const *, char const *) ATTRIBUTE_NORETURN; ++static void ++die (char const *message, char const *file) ++{ ++ error (0, errno, "%s: %s", message, file ? file : _("standard output")); ++ exit (SORT_FAILURE); ++} ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... [FILE]...\n\ ++ or: %s [OPTION]... --files0-from=F\n\ ++"), ++ program_name, program_name); ++ fputs (_("\ ++Write sorted concatenation of all FILE(s) to standard output.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++Ordering options:\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++ -b, --ignore-leading-blanks ignore leading blanks\n\ ++ -d, --dictionary-order consider only blanks and alphanumeric characters\n\ ++ -f, --ignore-case fold lower case to upper case characters\n\ ++"), stdout); ++ fputs (_("\ ++ -g, --general-numeric-sort compare according to general numerical value\n\ ++ -i, --ignore-nonprinting consider only printable characters\n\ ++ -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ ++"), stdout); ++ fputs (_("\ ++ -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\ ++"), stdout); ++ fputs (_("\ ++ -n, --numeric-sort compare according to string numerical value\n\ ++ -R, --random-sort sort by random hash of keys\n\ ++ --random-source=FILE get random bytes from FILE\n\ ++ -r, --reverse reverse the result of comparisons\n\ ++"), stdout); ++ fputs (_("\ ++ --sort=WORD sort according to WORD:\n\ ++ general-numeric -g, human-numeric -h, month -M,\n\ ++ numeric -n, random -R, version -V\n\ ++ -V, --version-sort natural sort of (version) numbers within text\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Other options:\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++ --batch-size=NMERGE merge at most NMERGE inputs at once;\n\ ++ for more use temp files\n\ ++"), stdout); ++ fputs (_("\ ++ -c, --check, --check=diagnose-first check for sorted input; do not sort\n\ ++ -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\ ++ --compress-program=PROG compress temporaries with PROG;\n\ ++ decompress them with PROG -d\n\ ++ --files0-from=F read input from the files specified by\n\ ++ NUL-terminated names in file F;\n\ ++ If F is - then read names from standard input\n\ ++"), stdout); ++ fputs (_("\ ++ -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\ ++ (default end of line)\n\ ++ -m, --merge merge already sorted files; do not sort\n\ ++"), stdout); ++ fputs (_("\ ++ -o, --output=FILE write result to FILE instead of standard output\n\ ++ -s, --stable stabilize sort by disabling last-resort comparison\n\ ++ -S, --buffer-size=SIZE use SIZE for main memory buffer\n\ ++"), stdout); ++ printf (_("\ ++ -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\ ++ -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\ ++ multiple options specify multiple directories\n\ ++ -u, --unique with -c, check for strict ordering;\n\ ++ without -c, output only the first of an equal run\n\ ++"), DEFAULT_TMPDIR); ++ fputs (_("\ ++ -z, --zero-terminated end lines with 0 byte, not newline\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++POS is F[.C][OPTS], where F is the field number and C the character position\n\ ++in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\ ++in a field are counted from the beginning of the preceding whitespace. OPTS is\n\ ++one or more single-letter ordering options, which override global ordering\n\ ++options for that key. If no key is given, use the entire line as the key.\n\ ++\n\ ++SIZE may be followed by the following multiplicative suffixes:\n\ ++"), stdout); ++ fputs (_("\ ++% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\ ++\n\ ++With no FILE, or when FILE is -, read standard input.\n\ ++\n\ ++*** WARNING ***\n\ ++The locale specified by the environment affects sort order.\n\ ++Set LC_ALL=C to get the traditional sort order that uses\n\ ++native byte values.\n\ ++"), stdout ); ++ emit_ancillary_info (); ++ } ++ ++ exit (status); ++} ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ CHECK_OPTION = CHAR_MAX + 1, ++ COMPRESS_PROGRAM_OPTION, ++ FILES0_FROM_OPTION, ++ NMERGE_OPTION, ++ RANDOM_SOURCE_OPTION, ++ SORT_OPTION ++}; ++ ++static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z"; ++ ++static struct option const long_options[] = ++{ ++ {"ignore-leading-blanks", no_argument, NULL, 'b'}, ++ {"check", optional_argument, NULL, CHECK_OPTION}, ++ {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION}, ++ {"dictionary-order", no_argument, NULL, 'd'}, ++ {"ignore-case", no_argument, NULL, 'f'}, ++ {"files0-from", required_argument, NULL, FILES0_FROM_OPTION}, ++ {"general-numeric-sort", no_argument, NULL, 'g'}, ++ {"ignore-nonprinting", no_argument, NULL, 'i'}, ++ {"key", required_argument, NULL, 'k'}, ++ {"merge", no_argument, NULL, 'm'}, ++ {"month-sort", no_argument, NULL, 'M'}, ++ {"numeric-sort", no_argument, NULL, 'n'}, ++ {"human-numeric-sort", no_argument, NULL, 'h'}, ++ {"version-sort", no_argument, NULL, 'V'}, ++ {"random-sort", no_argument, NULL, 'R'}, ++ {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION}, ++ {"sort", required_argument, NULL, SORT_OPTION}, ++ {"output", required_argument, NULL, 'o'}, ++ {"reverse", no_argument, NULL, 'r'}, ++ {"stable", no_argument, NULL, 's'}, ++ {"batch-size", required_argument, NULL, NMERGE_OPTION}, ++ {"buffer-size", required_argument, NULL, 'S'}, ++ {"field-separator", required_argument, NULL, 't'}, ++ {"temporary-directory", required_argument, NULL, 'T'}, ++ {"unique", no_argument, NULL, 'u'}, ++ {"zero-terminated", no_argument, NULL, 'z'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0}, ++}; ++ ++#define CHECK_TABLE \ ++ _ct_("quiet", 'C') \ ++ _ct_("silent", 'C') \ ++ _ct_("diagnose-first", 'c') ++ ++static char const *const check_args[] = ++{ ++#define _ct_(_s, _c) _s, ++ CHECK_TABLE NULL ++#undef _ct_ ++}; ++static char const check_types[] = ++{ ++#define _ct_(_s, _c) _c, ++ CHECK_TABLE ++#undef _ct_ ++}; ++ ++#define SORT_TABLE \ ++ _st_("general-numeric", 'g') \ ++ _st_("human-numeric", 'h') \ ++ _st_("month", 'M') \ ++ _st_("numeric", 'n') \ ++ _st_("random", 'R') \ ++ _st_("version", 'V') ++ ++static char const *const sort_args[] = ++{ ++#define _st_(_s, _c) _s, ++ SORT_TABLE NULL ++#undef _st_ ++}; ++static char const sort_types[] = ++{ ++#define _st_(_s, _c) _c, ++ SORT_TABLE ++#undef _st_ ++}; ++ ++/* The set of signals that are caught. */ ++static sigset_t caught_signals; ++ ++/* Critical section status. */ ++struct cs_status ++{ ++ bool valid; ++ sigset_t sigs; ++}; ++ ++/* Enter a critical section. */ ++static struct cs_status ++cs_enter (void) ++{ ++ struct cs_status status; ++ status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0); ++ return status; ++} ++ ++/* Leave a critical section. */ ++static void ++cs_leave (struct cs_status status) ++{ ++ if (status.valid) ++ { ++ /* Ignore failure when restoring the signal mask. */ ++ sigprocmask (SIG_SETMASK, &status.sigs, NULL); ++ } ++} ++ ++/* The list of temporary files. */ ++struct tempnode ++{ ++ struct tempnode *volatile next; ++ pid_t pid; /* If compressed, the pid of compressor, else zero */ ++ char name[1]; /* Actual size is 1 + file name length. */ ++}; ++static struct tempnode *volatile temphead; ++static struct tempnode *volatile *temptail = &temphead; ++ ++struct sortfile ++{ ++ char const *name; ++ pid_t pid; /* If compressed, the pid of compressor, else zero */ ++}; ++ ++/* A table where we store compression process states. We clean up all ++ processes in a timely manner so as not to exhaust system resources, ++ so we store the info on whether the process is still running, or has ++ been reaped here. */ ++static Hash_table *proctab; ++ ++enum { INIT_PROCTAB_SIZE = 47 }; ++ ++enum procstate { ALIVE, ZOMBIE }; ++ ++/* A proctab entry. The COUNT field is there in case we fork a new ++ compression process that has the same PID as an old zombie process ++ that is still in the table (because the process to decompress the ++ temp file it was associated with hasn't started yet). */ ++struct procnode ++{ ++ pid_t pid; ++ enum procstate state; ++ size_t count; ++}; ++ ++static size_t ++proctab_hasher (const void *entry, size_t tabsize) ++{ ++ const struct procnode *node = entry; ++ return node->pid % tabsize; ++} ++ ++static bool ++proctab_comparator (const void *e1, const void *e2) ++{ ++ const struct procnode *n1 = e1, *n2 = e2; ++ return n1->pid == n2->pid; ++} ++ ++/* The total number of forked processes (compressors and decompressors) ++ that have not been reaped yet. */ ++static size_t nprocs; ++ ++/* The number of child processes we'll allow before we try to reap some. */ ++enum { MAX_PROCS_BEFORE_REAP = 2 }; ++ ++/* If 0 < PID, wait for the child process with that PID to exit. ++ If PID is -1, clean up a random child process which has finished and ++ return the process ID of that child. If PID is -1 and no processes ++ have quit yet, return 0 without waiting. */ ++ ++static pid_t ++reap (pid_t pid) ++{ ++ int status; ++ pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0); ++ ++ if (cpid < 0) ++ error (SORT_FAILURE, errno, _("waiting for %s [-d]"), ++ compress_program); ++ else if (0 < cpid) ++ { ++ if (! WIFEXITED (status) || WEXITSTATUS (status)) ++ error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"), ++ compress_program); ++ --nprocs; ++ } ++ ++ return cpid; ++} ++ ++/* Add the PID of a running compression process to proctab, or update ++ the entry COUNT and STATE fields if it's already there. This also ++ creates the table for us the first time it's called. */ ++ ++static void ++register_proc (pid_t pid) ++{ ++ struct procnode test, *node; ++ ++ if (! proctab) ++ { ++ proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL, ++ proctab_hasher, ++ proctab_comparator, ++ free); ++ if (! proctab) ++ xalloc_die (); ++ } ++ ++ test.pid = pid; ++ node = hash_lookup (proctab, &test); ++ if (node) ++ { ++ node->state = ALIVE; ++ ++node->count; ++ } ++ else ++ { ++ node = xmalloc (sizeof *node); ++ node->pid = pid; ++ node->state = ALIVE; ++ node->count = 1; ++ if (hash_insert (proctab, node) == NULL) ++ xalloc_die (); ++ } ++} ++ ++/* This is called when we reap a random process. We don't know ++ whether we have reaped a compression process or a decompression ++ process until we look in the table. If there's an ALIVE entry for ++ it, then we have reaped a compression process, so change the state ++ to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */ ++ ++static void ++update_proc (pid_t pid) ++{ ++ struct procnode test, *node; ++ ++ test.pid = pid; ++ node = hash_lookup (proctab, &test); ++ if (node) ++ node->state = ZOMBIE; ++} ++ ++/* This is for when we need to wait for a compression process to exit. ++ If it has a ZOMBIE entry in the table then it's already dead and has ++ been reaped. Note that if there's an ALIVE entry for it, it still may ++ already have died and been reaped if a second process was created with ++ the same PID. This is probably exceedingly rare, but to be on the safe ++ side we will have to wait for any compression process with this PID. */ ++ ++static void ++wait_proc (pid_t pid) ++{ ++ struct procnode test, *node; ++ ++ test.pid = pid; ++ node = hash_lookup (proctab, &test); ++ if (node->state == ALIVE) ++ reap (pid); ++ ++ node->state = ZOMBIE; ++ if (! --node->count) ++ { ++ hash_delete (proctab, node); ++ free (node); ++ } ++} ++ ++/* Keep reaping finished children as long as there are more to reap. ++ This doesn't block waiting for any of them, it only reaps those ++ that are already dead. */ ++ ++static void ++reap_some (void) ++{ ++ pid_t pid; ++ ++ while (0 < nprocs && (pid = reap (-1))) ++ update_proc (pid); ++} ++ ++/* Clean up any remaining temporary files. */ ++ ++static void ++cleanup (void) ++{ ++ struct tempnode const *node; ++ ++ for (node = temphead; node; node = node->next) ++ unlink (node->name); ++ temphead = NULL; ++} ++ ++/* Cleanup actions to take when exiting. */ ++ ++static void ++exit_cleanup (void) ++{ ++ if (temphead) ++ { ++ /* Clean up any remaining temporary files in a critical section so ++ that a signal handler does not try to clean them too. */ ++ struct cs_status cs = cs_enter (); ++ cleanup (); ++ cs_leave (cs); ++ } ++ ++ close_stdout (); ++} ++ ++/* Create a new temporary file, returning its newly allocated tempnode. ++ Store into *PFD the file descriptor open for writing. ++ If the creation fails, return NULL and store -1 into *PFD if the ++ failure is due to file descriptor exhaustion and ++ SURVIVE_FD_EXHAUSTION; otherwise, die. */ ++ ++static struct tempnode * ++create_temp_file (int *pfd, bool survive_fd_exhaustion) ++{ ++ static char const slashbase[] = "/sortXXXXXX"; ++ static size_t temp_dir_index; ++ int fd; ++ int saved_errno; ++ char const *temp_dir = temp_dirs[temp_dir_index]; ++ size_t len = strlen (temp_dir); ++ struct tempnode *node = ++ xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase); ++ char *file = node->name; ++ struct cs_status cs; ++ ++ memcpy (file, temp_dir, len); ++ memcpy (file + len, slashbase, sizeof slashbase); ++ node->next = NULL; ++ node->pid = 0; ++ if (++temp_dir_index == temp_dir_count) ++ temp_dir_index = 0; ++ ++ /* Create the temporary file in a critical section, to avoid races. */ ++ cs = cs_enter (); ++ fd = mkstemp (file); ++ if (0 <= fd) ++ { ++ *temptail = node; ++ temptail = &node->next; ++ } ++ saved_errno = errno; ++ cs_leave (cs); ++ errno = saved_errno; ++ ++ if (fd < 0) ++ { ++ if (! (survive_fd_exhaustion && errno == EMFILE)) ++ error (SORT_FAILURE, errno, _("cannot create temporary file in %s"), ++ quote (temp_dir)); ++ free (node); ++ node = NULL; ++ } ++ ++ *pfd = fd; ++ return node; ++} ++ ++/* Return a stream for FILE, opened with mode HOW. A null FILE means ++ standard output; HOW should be "w". When opening for input, "-" ++ means standard input. To avoid confusion, do not return file ++ descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when ++ opening an ordinary FILE. Return NULL if unsuccessful. */ ++ ++static FILE * ++stream_open (const char *file, const char *how) ++{ ++ if (!file) ++ return stdout; ++ if (STREQ (file, "-") && *how == 'r') ++ { ++ have_read_stdin = true; ++ return stdin; ++ } ++ return fopen (file, how); ++} ++ ++/* Same as stream_open, except always return a non-null value; die on ++ failure. */ ++ ++static FILE * ++xfopen (const char *file, const char *how) ++ { ++ FILE *fp = stream_open (file, how); ++ if (!fp) ++ die (_("open failed"), file); ++ return fp; ++} ++ ++/* Close FP, whose name is FILE, and report any errors. */ ++ ++static void ++xfclose (FILE *fp, char const *file) ++{ ++ switch (fileno (fp)) ++ { ++ case STDIN_FILENO: ++ /* Allow reading stdin from tty more than once. */ ++ if (feof (fp)) ++ clearerr (fp); ++ break; ++ ++ case STDOUT_FILENO: ++ /* Don't close stdout just yet. close_stdout does that. */ ++ if (fflush (fp) != 0) ++ die (_("fflush failed"), file); ++ break; ++ ++ default: ++ if (fclose (fp) != 0) ++ die (_("close failed"), file); ++ break; ++ } ++} ++ ++static void ++dup2_or_die (int oldfd, int newfd) ++{ ++ if (dup2 (oldfd, newfd) < 0) ++ error (SORT_FAILURE, errno, _("dup2 failed")); ++} ++ ++/* Fork a child process for piping to and do common cleanup. The ++ TRIES parameter tells us how many times to try to fork before ++ giving up. Return the PID of the child, or -1 (setting errno) ++ on failure. */ ++ ++static pid_t ++pipe_fork (int pipefds[2], size_t tries) ++{ ++#if HAVE_WORKING_FORK ++ struct tempnode *saved_temphead; ++ int saved_errno; ++ unsigned int wait_retry = 1; ++ pid_t pid IF_LINT (= -1); ++ struct cs_status cs; ++ ++ if (pipe (pipefds) < 0) ++ return -1; ++ ++ while (tries--) ++ { ++ /* This is so the child process won't delete our temp files ++ if it receives a signal before exec-ing. */ ++ cs = cs_enter (); ++ saved_temphead = temphead; ++ temphead = NULL; ++ ++ pid = fork (); ++ saved_errno = errno; ++ if (pid) ++ temphead = saved_temphead; ++ ++ cs_leave (cs); ++ errno = saved_errno; ++ ++ if (0 <= pid || errno != EAGAIN) ++ break; ++ else ++ { ++ sleep (wait_retry); ++ wait_retry *= 2; ++ reap_some (); ++ } ++ } ++ ++ if (pid < 0) ++ { ++ saved_errno = errno; ++ close (pipefds[0]); ++ close (pipefds[1]); ++ errno = saved_errno; ++ } ++ else if (pid == 0) ++ { ++ close (STDIN_FILENO); ++ close (STDOUT_FILENO); ++ } ++ else ++ ++nprocs; ++ ++ return pid; ++ ++#else /* ! HAVE_WORKING_FORK */ ++ return -1; ++#endif ++} ++ ++/* Create a temporary file and start a compression program to filter output ++ to that file. Set *PFP to the file handle and if PPID is non-NULL, ++ set *PPID to the PID of the newly-created process. If the creation ++ fails, return NULL if the failure is due to file descriptor ++ exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */ ++ ++static char * ++maybe_create_temp (FILE **pfp, pid_t *ppid, bool survive_fd_exhaustion) ++{ ++ int tempfd; ++ struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion); ++ char *name; ++ ++ if (! node) ++ return NULL; ++ ++ name = node->name; ++ ++ if (compress_program) ++ { ++ int pipefds[2]; ++ ++ node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS); ++ if (0 < node->pid) ++ { ++ close (tempfd); ++ close (pipefds[0]); ++ tempfd = pipefds[1]; ++ ++ register_proc (node->pid); ++ } ++ else if (node->pid == 0) ++ { ++ close (pipefds[1]); ++ dup2_or_die (tempfd, STDOUT_FILENO); ++ close (tempfd); ++ dup2_or_die (pipefds[0], STDIN_FILENO); ++ close (pipefds[0]); ++ ++ if (execlp (compress_program, compress_program, (char *) NULL) < 0) ++ error (SORT_FAILURE, errno, _("couldn't execute %s"), ++ compress_program); ++ } ++ else ++ node->pid = 0; ++ } ++ ++ *pfp = fdopen (tempfd, "w"); ++ if (! *pfp) ++ die (_("couldn't create temporary file"), name); ++ ++ if (ppid) ++ *ppid = node->pid; ++ ++ return name; ++} ++ ++/* Create a temporary file and start a compression program to filter output ++ to that file. Set *PFP to the file handle and if *PPID is non-NULL, ++ set it to the PID of the newly-created process. Die on failure. */ ++ ++static char * ++create_temp (FILE **pfp, pid_t *ppid) ++{ ++ return maybe_create_temp (pfp, ppid, false); ++} ++ ++/* Open a compressed temp file and start a decompression process through ++ which to filter the input. PID must be the valid processes ID of the ++ process used to compress the file. Return NULL (setting errno to ++ EMFILE) if we ran out of file descriptors, and die on any other ++ kind of failure. */ ++ ++static FILE * ++open_temp (const char *name, pid_t pid) ++{ ++ int tempfd, pipefds[2]; ++ FILE *fp = NULL; ++ ++ wait_proc (pid); ++ ++ tempfd = open (name, O_RDONLY); ++ if (tempfd < 0) ++ return NULL; ++ ++ switch (pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS)) ++ { ++ case -1: ++ if (errno != EMFILE) ++ error (SORT_FAILURE, errno, _("couldn't create process for %s -d"), ++ compress_program); ++ close (tempfd); ++ errno = EMFILE; ++ break; ++ ++ case 0: ++ close (pipefds[0]); ++ dup2_or_die (tempfd, STDIN_FILENO); ++ close (tempfd); ++ dup2_or_die (pipefds[1], STDOUT_FILENO); ++ close (pipefds[1]); ++ ++ execlp (compress_program, compress_program, "-d", (char *) NULL); ++ error (SORT_FAILURE, errno, _("couldn't execute %s -d"), ++ compress_program); ++ ++ default: ++ close (tempfd); ++ close (pipefds[1]); ++ ++ fp = fdopen (pipefds[0], "r"); ++ if (! fp) ++ { ++ int saved_errno = errno; ++ close (pipefds[0]); ++ errno = saved_errno; ++ } ++ break; ++ } ++ ++ return fp; ++} ++ ++static void ++write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file) ++{ ++ if (fwrite (buf, 1, n_bytes, fp) != n_bytes) ++ die (_("write failed"), output_file); ++} ++ ++/* Append DIR to the array of temporary directory names. */ ++static void ++add_temp_dir (char const *dir) ++{ ++ if (temp_dir_count == temp_dir_alloc) ++ temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc); ++ ++ temp_dirs[temp_dir_count++] = dir; ++} ++ ++/* Remove NAME from the list of temporary files. */ ++ ++static void ++zaptemp (const char *name) ++{ ++ struct tempnode *volatile *pnode; ++ struct tempnode *node; ++ struct tempnode *next; ++ int unlink_status; ++ int unlink_errno = 0; ++ struct cs_status cs; ++ ++ for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next) ++ continue; ++ ++ /* Unlink the temporary file in a critical section to avoid races. */ ++ next = node->next; ++ cs = cs_enter (); ++ unlink_status = unlink (name); ++ unlink_errno = errno; ++ *pnode = next; ++ cs_leave (cs); ++ ++ if (unlink_status != 0) ++ error (0, unlink_errno, _("warning: cannot remove: %s"), name); ++ if (! next) ++ temptail = pnode; ++ free (node); ++} ++ ++#if HAVE_NL_LANGINFO ++ ++static int ++struct_month_cmp (const void *m1, const void *m2) ++{ ++ struct month const *month1 = m1; ++ struct month const *month2 = m2; ++ return strcmp (month1->name, month2->name); ++} ++ ++#endif ++ ++/* Initialize the character class tables. */ ++ ++static void ++inittables (void) ++{ ++ size_t i; ++ ++ for (i = 0; i < UCHAR_LIM; ++i) ++ { ++ blanks[i] = !! isblank (i); ++ nonprinting[i] = ! isprint (i); ++ nondictionary[i] = ! isalnum (i) && ! isblank (i); ++ fold_toupper[i] = toupper (i); ++ } ++ ++#if HAVE_NL_LANGINFO ++ /* If we're not in the "C" locale, read different names for months. */ ++ if (hard_LC_TIME) ++ { ++ for (i = 0; i < MONTHS_PER_YEAR; i++) ++ { ++ char const *s; ++ size_t s_len; ++ size_t j; ++ char *name; ++ ++ s = (char *) nl_langinfo (ABMON_1 + i); ++ s_len = strlen (s); ++ monthtab[i].name = name = xmalloc (s_len + 1); ++ monthtab[i].val = i + 1; ++ ++ for (j = 0; j < s_len; j++) ++ name[j] = fold_toupper[to_uchar (s[j])]; ++ name[j] = '\0'; ++ } ++ qsort ((void *) monthtab, MONTHS_PER_YEAR, ++ sizeof *monthtab, struct_month_cmp); ++ } ++#endif ++} ++ ++/* Specify how many inputs may be merged at once. ++ This may be set on the command-line with the ++ --batch-size option. */ ++static void ++specify_nmerge (int oi, char c, char const *s) ++{ ++ uintmax_t n; ++ struct rlimit rlimit; ++ enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL); ++ ++ /* Try to find out how many file descriptors we'll be able ++ to open. We need at least nmerge + 3 (STDIN_FILENO, ++ STDOUT_FILENO and STDERR_FILENO). */ ++ unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0 ++ ? rlimit.rlim_cur ++ : OPEN_MAX) ++ - 3); ++ ++ if (e == LONGINT_OK) ++ { ++ nmerge = n; ++ if (nmerge != n) ++ e = LONGINT_OVERFLOW; ++ else ++ { ++ if (nmerge < 2) ++ { ++ error (0, 0, _("invalid --%s argument %s"), ++ long_options[oi].name, quote(s)); ++ error (SORT_FAILURE, 0, ++ _("minimum --%s argument is %s"), ++ long_options[oi].name, quote("2")); ++ } ++ else if (max_nmerge < nmerge) ++ { ++ e = LONGINT_OVERFLOW; ++ } ++ else ++ return; ++ } ++ } ++ ++ if (e == LONGINT_OVERFLOW) ++ { ++ char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)]; ++ error (0, 0, _("--%s argument %s too large"), ++ long_options[oi].name, quote(s)); ++ error (SORT_FAILURE, 0, ++ _("maximum --%s argument with current rlimit is %s"), ++ long_options[oi].name, ++ uinttostr (max_nmerge, max_nmerge_buf)); ++ } ++ else ++ xstrtol_fatal (e, oi, c, long_options, s); ++} ++ ++/* Specify the amount of main memory to use when sorting. */ ++static void ++specify_sort_size (int oi, char c, char const *s) ++{ ++ uintmax_t n; ++ char *suffix; ++ enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ"); ++ ++ /* The default unit is KiB. */ ++ if (e == LONGINT_OK && ISDIGIT (suffix[-1])) ++ { ++ if (n <= UINTMAX_MAX / 1024) ++ n *= 1024; ++ else ++ e = LONGINT_OVERFLOW; ++ } ++ ++ /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */ ++ if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1]) ++ switch (suffix[0]) ++ { ++ case 'b': ++ e = LONGINT_OK; ++ break; ++ ++ case '%': ++ { ++ double mem = physmem_total () * n / 100; ++ ++ /* Use "<", not "<=", to avoid problems with rounding. */ ++ if (mem < UINTMAX_MAX) ++ { ++ n = mem; ++ e = LONGINT_OK; ++ } ++ else ++ e = LONGINT_OVERFLOW; ++ } ++ break; ++ } ++ ++ if (e == LONGINT_OK) ++ { ++ /* If multiple sort sizes are specified, take the maximum, so ++ that option order does not matter. */ ++ if (n < sort_size) ++ return; ++ ++ sort_size = n; ++ if (sort_size == n) ++ { ++ sort_size = MAX (sort_size, MIN_SORT_SIZE); ++ return; ++ } ++ ++ e = LONGINT_OVERFLOW; ++ } ++ ++ xstrtol_fatal (e, oi, c, long_options, s); ++} ++ ++/* Return the default sort size. */ ++static size_t ++default_sort_size (void) ++{ ++ /* Let MEM be available memory or 1/8 of total memory, whichever ++ is greater. */ ++ double avail = physmem_available (); ++ double total = physmem_total (); ++ double mem = MAX (avail, total / 8); ++ struct rlimit rlimit; ++ ++ /* Let SIZE be MEM, but no more than the maximum object size or ++ system resource limits. Avoid the MIN macro here, as it is not ++ quite right when only one argument is floating point. Don't ++ bother to check for values like RLIM_INFINITY since in practice ++ they are not much less than SIZE_MAX. */ ++ size_t size = SIZE_MAX; ++ if (mem < size) ++ size = mem; ++ if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) ++ size = rlimit.rlim_cur; ++#ifdef RLIMIT_AS ++ if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) ++ size = rlimit.rlim_cur; ++#endif ++ ++ /* Leave a large safety margin for the above limits, as failure can ++ occur when they are exceeded. */ ++ size /= 2; ++ ++#ifdef RLIMIT_RSS ++ /* Leave a 1/16 margin for RSS to leave room for code, stack, etc. ++ Exceeding RSS is not fatal, but can be quite slow. */ ++ if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) ++ size = rlimit.rlim_cur / 16 * 15; ++#endif ++ ++ /* Use no less than the minimum. */ ++ return MAX (size, MIN_SORT_SIZE); ++} ++ ++/* Return the sort buffer size to use with the input files identified ++ by FPS and FILES, which are alternate names of the same files. ++ NFILES gives the number of input files; NFPS may be less. Assume ++ that each input line requires LINE_BYTES extra bytes' worth of line ++ information. Do not exceed the size bound specified by the user ++ (or a default size bound, if the user does not specify one). */ ++ ++static size_t ++sort_buffer_size (FILE *const *fps, size_t nfps, ++ char *const *files, size_t nfiles, ++ size_t line_bytes) ++{ ++ /* A bound on the input size. If zero, the bound hasn't been ++ determined yet. */ ++ static size_t size_bound; ++ ++ /* In the worst case, each input byte is a newline. */ ++ size_t worst_case_per_input_byte = line_bytes + 1; ++ ++ /* Keep enough room for one extra input line and an extra byte. ++ This extra room might be needed when preparing to read EOF. */ ++ size_t size = worst_case_per_input_byte + 1; ++ ++ size_t i; ++ ++ for (i = 0; i < nfiles; i++) ++ { ++ struct stat st; ++ off_t file_size; ++ size_t worst_case; ++ ++ if ((i < nfps ? fstat (fileno (fps[i]), &st) ++ : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st) ++ : stat (files[i], &st)) ++ != 0) ++ die (_("stat failed"), files[i]); ++ ++ if (S_ISREG (st.st_mode)) ++ file_size = st.st_size; ++ else ++ { ++ /* The file has unknown size. If the user specified a sort ++ buffer size, use that; otherwise, guess the size. */ ++ if (sort_size) ++ return sort_size; ++ file_size = INPUT_FILE_SIZE_GUESS; ++ } ++ ++ if (! size_bound) ++ { ++ size_bound = sort_size; ++ if (! size_bound) ++ size_bound = default_sort_size (); ++ } ++ ++ /* Add the amount of memory needed to represent the worst case ++ where the input consists entirely of newlines followed by a ++ single non-newline. Check for overflow. */ ++ worst_case = file_size * worst_case_per_input_byte + 1; ++ if (file_size != worst_case / worst_case_per_input_byte ++ || size_bound - size <= worst_case) ++ return size_bound; ++ size += worst_case; ++ } ++ ++ return size; ++} ++ ++/* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES ++ must be at least sizeof (struct line). Allocate ALLOC bytes ++ initially. */ ++ ++static void ++initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) ++{ ++ /* Ensure that the line array is properly aligned. If the desired ++ size cannot be allocated, repeatedly halve it until allocation ++ succeeds. The smaller allocation may hurt overall performance, ++ but that's better than failing. */ ++ for (;;) ++ { ++ alloc += sizeof (struct line) - alloc % sizeof (struct line); ++ buf->buf = malloc (alloc); ++ if (buf->buf) ++ break; ++ alloc /= 2; ++ if (alloc <= line_bytes + 1) ++ xalloc_die (); ++ } ++ ++ buf->line_bytes = line_bytes; ++ buf->alloc = alloc; ++ buf->used = buf->left = buf->nlines = 0; ++ buf->eof = false; ++} ++ ++/* Return one past the limit of the line array. */ ++ ++static inline struct line * ++buffer_linelim (struct buffer const *buf) ++{ ++ return (struct line *) (buf->buf + buf->alloc); ++} ++ ++/* Return a pointer to the first character of the field specified ++ by KEY in LINE. */ ++ ++static char * ++begfield (const struct line *line, const struct keyfield *key) ++{ ++ char *ptr = line->text, *lim = ptr + line->length - 1; ++ size_t sword = key->sword; ++ size_t schar = key->schar; ++ ++ /* The leading field separator itself is included in a field when -t ++ is absent. */ ++ ++ if (tab != TAB_DEFAULT) ++ while (ptr < lim && sword--) ++ { ++ while (ptr < lim && *ptr != tab) ++ ++ptr; ++ if (ptr < lim) ++ ++ptr; ++ } ++ else ++ while (ptr < lim && sword--) ++ { ++ while (ptr < lim && blanks[to_uchar (*ptr)]) ++ ++ptr; ++ while (ptr < lim && !blanks[to_uchar (*ptr)]) ++ ++ptr; ++ } ++ ++ /* If we're ignoring leading blanks when computing the Start ++ of the field, skip past them here. */ ++ if (key->skipsblanks) ++ while (ptr < lim && blanks[to_uchar (*ptr)]) ++ ++ptr; ++ ++ /* Advance PTR by SCHAR (if possible), but no further than LIM. */ ++ ptr = MIN (lim, ptr + schar); ++ ++ return ptr; ++} ++ ++/* Return the limit of (a pointer to the first character after) the field ++ in LINE specified by KEY. */ ++ ++static char * ++limfield (const struct line *line, const struct keyfield *key) ++{ ++ char *ptr = line->text, *lim = ptr + line->length - 1; ++ size_t eword = key->eword, echar = key->echar; ++ ++ if (echar == 0) ++ eword++; /* Skip all of end field. */ ++ ++ /* Move PTR past EWORD fields or to one past the last byte on LINE, ++ whichever comes first. If there are more than EWORD fields, leave ++ PTR pointing at the beginning of the field having zero-based index, ++ EWORD. If a delimiter character was specified (via -t), then that ++ `beginning' is the first character following the delimiting TAB. ++ Otherwise, leave PTR pointing at the first `blank' character after ++ the preceding field. */ ++ if (tab != TAB_DEFAULT) ++ while (ptr < lim && eword--) ++ { ++ while (ptr < lim && *ptr != tab) ++ ++ptr; ++ if (ptr < lim && (eword || echar)) ++ ++ptr; ++ } ++ else ++ while (ptr < lim && eword--) ++ { ++ while (ptr < lim && blanks[to_uchar (*ptr)]) ++ ++ptr; ++ while (ptr < lim && !blanks[to_uchar (*ptr)]) ++ ++ptr; ++ } ++ ++#ifdef POSIX_UNSPECIFIED ++ /* The following block of code makes GNU sort incompatible with ++ standard Unix sort, so it's ifdef'd out for now. ++ The POSIX spec isn't clear on how to interpret this. ++ FIXME: request clarification. ++ ++ From: kwzh@gnu.ai.mit.edu (Karl Heuer) ++ Date: Thu, 30 May 96 12:20:41 -0400 ++ [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.] ++ ++ [...]I believe I've found another bug in `sort'. ++ ++ $ cat /tmp/sort.in ++ a b c 2 d ++ pq rs 1 t ++ $ textutils-1.15/src/sort -k1.7,1.7 skipeblanks) ++ while (ptr < lim && blanks[to_uchar (*ptr)]) ++ ++ptr; ++ ++ /* Advance PTR by ECHAR (if possible), but no further than LIM. */ ++ ptr = MIN (lim, ptr + echar); ++ } ++ ++ return ptr; ++} ++ ++/* Fill BUF reading from FP, moving buf->left bytes from the end ++ of buf->buf to the beginning first. If EOF is reached and the ++ file wasn't terminated by a newline, supply one. Set up BUF's line ++ table too. FILE is the name of the file corresponding to FP. ++ Return true if some input was read. */ ++ ++static bool ++fillbuf (struct buffer *buf, FILE *fp, char const *file) ++{ ++ struct keyfield const *key = keylist; ++ char eol = eolchar; ++ size_t line_bytes = buf->line_bytes; ++ size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; ++ ++ if (buf->eof) ++ return false; ++ ++ if (buf->used != buf->left) ++ { ++ memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); ++ buf->used = buf->left; ++ buf->nlines = 0; ++ } ++ ++ for (;;) ++ { ++ char *ptr = buf->buf + buf->used; ++ struct line *linelim = buffer_linelim (buf); ++ struct line *line = linelim - buf->nlines; ++ size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; ++ char *line_start = buf->nlines ? line->text + line->length : buf->buf; ++ ++ while (line_bytes + 1 < avail) ++ { ++ /* Read as many bytes as possible, but do not read so many ++ bytes that there might not be enough room for the ++ corresponding line array. The worst case is when the ++ rest of the input file consists entirely of newlines, ++ except that the last byte is not a newline. */ ++ size_t readsize = (avail - 1) / (line_bytes + 1); ++ size_t bytes_read = fread (ptr, 1, readsize, fp); ++ char *ptrlim = ptr + bytes_read; ++ char *p; ++ avail -= bytes_read; ++ ++ if (bytes_read != readsize) ++ { ++ if (ferror (fp)) ++ die (_("read failed"), file); ++ if (feof (fp)) ++ { ++ buf->eof = true; ++ if (buf->buf == ptrlim) ++ return false; ++ if (ptrlim[-1] != eol) ++ *ptrlim++ = eol; ++ } ++ } ++ ++ /* Find and record each line in the just-read input. */ ++ while ((p = memchr (ptr, eol, ptrlim - ptr))) ++ { ++ ptr = p + 1; ++ line--; ++ line->text = line_start; ++ line->length = ptr - line_start; ++ mergesize = MAX (mergesize, line->length); ++ avail -= line_bytes; ++ ++ if (key) ++ { ++ /* Precompute the position of the first key for ++ efficiency. */ ++ line->keylim = (key->eword == SIZE_MAX ++ ? p ++ : limfield (line, key)); ++ ++ if (key->sword != SIZE_MAX) ++ line->keybeg = begfield (line, key); ++ else ++ { ++ if (key->skipsblanks) ++ while (blanks[to_uchar (*line_start)]) ++ line_start++; ++ line->keybeg = line_start; ++ } ++ } ++ ++ line_start = ptr; ++ } ++ ++ ptr = ptrlim; ++ if (buf->eof) ++ break; ++ } ++ ++ buf->used = ptr - buf->buf; ++ buf->nlines = buffer_linelim (buf) - line; ++ if (buf->nlines != 0) ++ { ++ buf->left = ptr - line_start; ++ merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE; ++ return true; ++ } ++ ++ { ++ /* The current input line is too long to fit in the buffer. ++ Double the buffer size and try again, keeping it properly ++ aligned. */ ++ size_t line_alloc = buf->alloc / sizeof (struct line); ++ buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line)); ++ buf->alloc = line_alloc * sizeof (struct line); ++ } ++ } ++} ++ ++/* Compare strings A and B as numbers without explicitly converting them to ++ machine numbers. Comparatively slow for short strings, but asymptotically ++ hideously fast. */ ++ ++static int ++numcompare (const char *a, const char *b) ++{ ++ while (blanks[to_uchar (*a)]) ++ a++; ++ while (blanks[to_uchar (*b)]) ++ b++; ++ ++ return strnumcmp (a, b, decimal_point, thousands_sep); ++} ++ ++/* Exit with an error if a mixture of SI and IEC units detected. */ ++ ++static void ++check_mixed_SI_IEC (char prefix, struct keyfield *key) ++{ ++ int si_present = prefix == 'i'; ++ if (key->si_present != -1 && si_present != key->si_present) ++ error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units")); ++ key->si_present = si_present; ++} ++ ++/* Return an integer which represents the order of magnitude of ++ the unit following the number. NUMBER can contain thousands separators ++ or a decimal point, but not have preceeding blanks. ++ Negative numbers return a negative unit order. */ ++ ++static int ++find_unit_order (const char *number, struct keyfield *key) ++{ ++ static const char orders [UCHAR_LIM] = ++ { ++#if SOME_DAY_WE_WILL_REQUIRE_C99 ++ ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8, ++ ['k']=1, ++#else ++ /* Generate the following table with this command: ++ perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8); ++ foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\ ++ |fmt */ ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, ++ 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ++#endif ++ }; ++ ++ const unsigned char *p = number; ++ ++ int sign = 1; ++ ++ if (*p == '-') ++ { ++ sign = -1; ++ p++; ++ } ++ ++ /* Scan to end of number. ++ Decimals or separators not followed by digits stop the scan. ++ Numbers ending in decimals or separators are thus considered ++ to be lacking in units. ++ FIXME: add support for multibyte thousands_sep and decimal_point. */ ++ ++ while (ISDIGIT (*p)) ++ { ++ p++; ++ ++ if (*p == decimal_point && ISDIGIT (*(p + 1))) ++ p += 2; ++ else if (*p == thousands_sep && ISDIGIT (*(p + 1))) ++ p += 2; ++ } ++ ++ int order = orders[*p]; ++ ++ /* For valid units check for MiB vs MB etc. */ ++ if (order) ++ check_mixed_SI_IEC (*(p + 1), key); ++ ++ return sign * order; ++} ++ ++/* Compare numbers ending in units with SI xor IEC prefixes ++ < K/k < M < G < T < P < E < Z < Y ++ Assume that numbers are properly abbreviated. ++ i.e. input will never have both 6000K and 5M. */ ++ ++static int ++human_numcompare (const char *a, const char *b, struct keyfield *key) ++{ ++ while (blanks[to_uchar (*a)]) ++ a++; ++ while (blanks[to_uchar (*b)]) ++ b++; ++ ++ int order_a = find_unit_order (a, key); ++ int order_b = find_unit_order (b, key); ++ ++ return (order_a > order_b ? 1 ++ : order_a < order_b ? -1 ++ : strnumcmp (a, b, decimal_point, thousands_sep)); ++} ++ ++static int ++general_numcompare (const char *sa, const char *sb) ++{ ++ /* FIXME: add option to warn about failed conversions. */ ++ /* FIXME: maybe add option to try expensive FP conversion ++ only if A and B can't be compared more cheaply/accurately. */ ++ ++ char *ea; ++ char *eb; ++ double a = strtod (sa, &ea); ++ double b = strtod (sb, &eb); ++ ++ /* Put conversion errors at the start of the collating sequence. */ ++ if (sa == ea) ++ return sb == eb ? 0 : -1; ++ if (sb == eb) ++ return 1; ++ ++ /* Sort numbers in the usual way, where -0 == +0. Put NaNs after ++ conversion errors but before numbers; sort them by internal ++ bit-pattern, for lack of a more portable alternative. */ ++ return (a < b ? -1 ++ : a > b ? 1 ++ : a == b ? 0 ++ : b == b ? -1 ++ : a == a ? 1 ++ : memcmp ((char *) &a, (char *) &b, sizeof a)); ++} ++ ++/* Return an integer in 1..12 of the month name MONTH with length LEN. ++ Return 0 if the name in S is not recognized. */ ++ ++static int ++getmonth (char const *month, size_t len) ++{ ++ size_t lo = 0; ++ size_t hi = MONTHS_PER_YEAR; ++ char const *monthlim = month + len; ++ ++ for (;;) ++ { ++ if (month == monthlim) ++ return 0; ++ if (!blanks[to_uchar (*month)]) ++ break; ++ ++month; ++ } ++ ++ do ++ { ++ size_t ix = (lo + hi) / 2; ++ char const *m = month; ++ char const *n = monthtab[ix].name; ++ ++ for (;; m++, n++) ++ { ++ if (!*n) ++ return monthtab[ix].val; ++ if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n)) ++ { ++ hi = ix; ++ break; ++ } ++ else if (fold_toupper[to_uchar (*m)] > to_uchar (*n)) ++ { ++ lo = ix + 1; ++ break; ++ } ++ } ++ } ++ while (lo < hi); ++ ++ return 0; ++} ++ ++/* A source of random data. */ ++static struct randread_source *randread_source; ++ ++/* Return the Ith randomly-generated state. The caller must invoke ++ random_state (H) for all H less than I before invoking random_state ++ (I). */ ++ ++static struct md5_ctx ++random_state (size_t i) ++{ ++ /* An array of states resulting from the random data, and counts of ++ its used and allocated members. */ ++ static struct md5_ctx *state; ++ static size_t used; ++ static size_t allocated; ++ ++ struct md5_ctx *s = &state[i]; ++ ++ if (used <= i) ++ { ++ unsigned char buf[MD5_DIGEST_SIZE]; ++ ++ used++; ++ ++ if (allocated <= i) ++ { ++ state = X2NREALLOC (state, &allocated); ++ s = &state[i]; ++ } ++ ++ randread (randread_source, buf, sizeof buf); ++ md5_init_ctx (s); ++ md5_process_bytes (buf, sizeof buf, s); ++ } ++ ++ return *s; ++} ++ ++/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB ++ with length LENGTHB. Return negative if less, zero if equal, ++ positive if greater. */ ++ ++static int ++cmp_hashes (char const *texta, size_t lena, ++ char const *textb, size_t lenb) ++{ ++ /* Try random hashes until a pair of hashes disagree. But if the ++ first pair of random hashes agree, check whether the keys are ++ identical and if so report no difference. */ ++ int diff; ++ size_t i; ++ for (i = 0; ; i++) ++ { ++ uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; ++ struct md5_ctx s[2]; ++ s[0] = s[1] = random_state (i); ++ md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]); ++ md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]); ++ diff = memcmp (dig[0], dig[1], sizeof dig[0]); ++ if (diff != 0) ++ break; ++ if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0) ++ break; ++ } ++ ++ return diff; ++} ++ ++/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB) ++ using one or more random hash functions. */ ++ ++static int ++compare_random (char *restrict texta, size_t lena, ++ char *restrict textb, size_t lenb) ++{ ++ int diff; ++ ++ if (! hard_LC_COLLATE) ++ diff = cmp_hashes (texta, lena, textb, lenb); ++ else ++ { ++ /* Transform the text into the basis of comparison, so that byte ++ strings that would otherwise considered to be equal are ++ considered equal here even if their bytes differ. */ ++ ++ char *buf = NULL; ++ char stackbuf[4000]; ++ size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena); ++ bool a_fits = tlena <= sizeof stackbuf; ++ size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL), ++ (a_fits ? sizeof stackbuf - tlena : 0), ++ textb, lenb); ++ ++ if (a_fits && tlena + tlenb <= sizeof stackbuf) ++ buf = stackbuf; ++ else ++ { ++ /* Adding 1 to the buffer size lets xmemxfrm run a bit ++ faster by avoiding the need for an extra buffer copy. */ ++ buf = xmalloc (tlena + tlenb + 1); ++ xmemxfrm (buf, tlena + 1, texta, lena); ++ xmemxfrm (buf + tlena, tlenb + 1, textb, lenb); ++ } ++ ++ diff = cmp_hashes (buf, tlena, buf + tlena, tlenb); ++ ++ if (buf != stackbuf) ++ free (buf); ++ } ++ ++ return diff; ++} ++ ++/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB) ++ using filevercmp. See lib/filevercmp.h for function description. */ ++ ++static int ++compare_version (char *restrict texta, size_t lena, ++ char *restrict textb, size_t lenb) ++{ ++ int diff; ++ ++ /* It is necessary to save the character after the end of the field. ++ "filevercmp" works with NUL terminated strings. Our blocks of ++ text are not necessarily terminated with a NUL byte. */ ++ char sv_a = texta[lena]; ++ char sv_b = textb[lenb]; ++ ++ texta[lena] = '\0'; ++ textb[lenb] = '\0'; ++ ++ diff = filevercmp (texta, textb); ++ ++ texta[lena] = sv_a; ++ textb[lenb] = sv_b; ++ ++ return diff; ++} ++ ++/* Compare two lines A and B trying every key in sequence until there ++ are no more keys or a difference is found. */ ++ ++static int ++keycompare (const struct line *a, const struct line *b) ++{ ++ struct keyfield *key = keylist; ++ ++ /* For the first iteration only, the key positions have been ++ precomputed for us. */ ++ char *texta = a->keybeg; ++ char *textb = b->keybeg; ++ char *lima = a->keylim; ++ char *limb = b->keylim; ++ ++ int diff; ++ ++ for (;;) ++ { ++ char const *translate = key->translate; ++ bool const *ignore = key->ignore; ++ ++ /* Treat field ends before field starts as empty fields. */ ++ lima = MAX (texta, lima); ++ limb = MAX (textb, limb); ++ ++ /* Find the lengths. */ ++ size_t lena = lima - texta; ++ size_t lenb = limb - textb; ++ ++ /* Actually compare the fields. */ ++ ++ if (key->random) ++ diff = compare_random (texta, lena, textb, lenb); ++ else if (key->numeric || key->general_numeric || key->human_numeric) ++ { ++ char savea = *lima, saveb = *limb; ++ ++ *lima = *limb = '\0'; ++ diff = (key->numeric ? numcompare (texta, textb) ++ : key->general_numeric ? general_numcompare (texta, textb) ++ : human_numcompare (texta, textb, key)); ++ *lima = savea, *limb = saveb; ++ } ++ else if (key->version) ++ diff = compare_version (texta, lena, textb, lenb); ++ else if (key->month) ++ diff = getmonth (texta, lena) - getmonth (textb, lenb); ++ /* Sorting like this may become slow, so in a simple locale the user ++ can select a faster sort that is similar to ascii sort. */ ++ else if (hard_LC_COLLATE) ++ { ++ if (ignore || translate) ++ { ++ char buf[4000]; ++ size_t size = lena + 1 + lenb + 1; ++ char *copy_a = (size <= sizeof buf ? buf : xmalloc (size)); ++ char *copy_b = copy_a + lena + 1; ++ size_t new_len_a, new_len_b, i; ++ ++ /* Ignore and/or translate chars before comparing. */ ++ for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++) ++ { ++ if (i < lena) ++ { ++ copy_a[new_len_a] = (translate ++ ? translate[to_uchar (texta[i])] ++ : texta[i]); ++ if (!ignore || !ignore[to_uchar (texta[i])]) ++ ++new_len_a; ++ } ++ if (i < lenb) ++ { ++ copy_b[new_len_b] = (translate ++ ? translate[to_uchar (textb[i])] ++ : textb [i]); ++ if (!ignore || !ignore[to_uchar (textb[i])]) ++ ++new_len_b; ++ } ++ } ++ ++ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); ++ ++ if (sizeof buf < size) ++ free (copy_a); ++ } ++ else if (lena == 0) ++ diff = - NONZERO (lenb); ++ else if (lenb == 0) ++ goto greater; ++ else ++ diff = xmemcoll (texta, lena, textb, lenb); ++ } ++ else if (ignore) ++ { ++#define CMP_WITH_IGNORE(A, B) \ ++ do \ ++ { \ ++ for (;;) \ ++ { \ ++ while (texta < lima && ignore[to_uchar (*texta)]) \ ++ ++texta; \ ++ while (textb < limb && ignore[to_uchar (*textb)]) \ ++ ++textb; \ ++ if (! (texta < lima && textb < limb)) \ ++ break; \ ++ diff = to_uchar (A) - to_uchar (B); \ ++ if (diff) \ ++ goto not_equal; \ ++ ++texta; \ ++ ++textb; \ ++ } \ ++ \ ++ diff = (texta < lima) - (textb < limb); \ ++ } \ ++ while (0) ++ ++ if (translate) ++ CMP_WITH_IGNORE (translate[to_uchar (*texta)], ++ translate[to_uchar (*textb)]); ++ else ++ CMP_WITH_IGNORE (*texta, *textb); ++ } ++ else if (lena == 0) ++ diff = - NONZERO (lenb); ++ else if (lenb == 0) ++ goto greater; ++ else ++ { ++ if (translate) ++ { ++ while (texta < lima && textb < limb) ++ { ++ diff = (to_uchar (translate[to_uchar (*texta++)]) ++ - to_uchar (translate[to_uchar (*textb++)])); ++ if (diff) ++ goto not_equal; ++ } ++ } ++ else ++ { ++ diff = memcmp (texta, textb, MIN (lena, lenb)); ++ if (diff) ++ goto not_equal; ++ } ++ diff = lena < lenb ? -1 : lena != lenb; ++ } ++ ++ if (diff) ++ goto not_equal; ++ ++ key = key->next; ++ if (! key) ++ break; ++ ++ /* Find the beginning and limit of the next field. */ ++ if (key->eword != SIZE_MAX) ++ lima = limfield (a, key), limb = limfield (b, key); ++ else ++ lima = a->text + a->length - 1, limb = b->text + b->length - 1; ++ ++ if (key->sword != SIZE_MAX) ++ texta = begfield (a, key), textb = begfield (b, key); ++ else ++ { ++ texta = a->text, textb = b->text; ++ if (key->skipsblanks) ++ { ++ while (texta < lima && blanks[to_uchar (*texta)]) ++ ++texta; ++ while (textb < limb && blanks[to_uchar (*textb)]) ++ ++textb; ++ } ++ } ++ } ++ ++ return 0; ++ ++ greater: ++ diff = 1; ++ not_equal: ++ return key->reverse ? -diff : diff; ++} ++ ++/* Compare two lines A and B, returning negative, zero, or positive ++ depending on whether A compares less than, equal to, or greater than B. */ ++ ++static int ++compare (const struct line *a, const struct line *b) ++{ ++ int diff; ++ size_t alen, blen; ++ ++ /* First try to compare on the specified keys (if any). ++ The only two cases with no key at all are unadorned sort, ++ and unadorned sort -r. */ ++ if (keylist) ++ { ++ diff = keycompare (a, b); ++ if (diff || unique || stable) ++ return diff; ++ } ++ ++ /* If the keys all compare equal (or no keys were specified) ++ fall through to the default comparison. */ ++ alen = a->length - 1, blen = b->length - 1; ++ ++ if (alen == 0) ++ diff = - NONZERO (blen); ++ else if (blen == 0) ++ diff = 1; ++ else if (hard_LC_COLLATE) ++ diff = xmemcoll (a->text, alen, b->text, blen); ++ else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen)))) ++ diff = alen < blen ? -1 : alen != blen; ++ ++ return reverse ? -diff : diff; ++} ++ ++/* Check that the lines read from FILE_NAME come in order. Return ++ true if they are in order. If CHECKONLY == 'c', also print a ++ diagnostic (FILE_NAME, line number, contents of line) to stderr if ++ they are not in order. */ ++ ++static bool ++check (char const *file_name, char checkonly) ++{ ++ FILE *fp = xfopen (file_name, "r"); ++ struct buffer buf; /* Input buffer. */ ++ struct line temp; /* Copy of previous line. */ ++ size_t alloc = 0; ++ uintmax_t line_number = 0; ++ struct keyfield const *key = keylist; ++ bool nonunique = ! unique; ++ bool ordered = true; ++ ++ initbuf (&buf, sizeof (struct line), ++ MAX (merge_buffer_size, sort_size)); ++ temp.text = NULL; ++ ++ while (fillbuf (&buf, fp, file_name)) ++ { ++ struct line const *line = buffer_linelim (&buf); ++ struct line const *linebase = line - buf.nlines; ++ ++ /* Make sure the line saved from the old buffer contents is ++ less than or equal to the first line of the new buffer. */ ++ if (alloc && nonunique <= compare (&temp, line - 1)) ++ { ++ found_disorder: ++ { ++ if (checkonly == 'c') ++ { ++ struct line const *disorder_line = line - 1; ++ uintmax_t disorder_line_number = ++ buffer_linelim (&buf) - disorder_line + line_number; ++ char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)]; ++ fprintf (stderr, _("%s: %s:%s: disorder: "), ++ program_name, file_name, ++ umaxtostr (disorder_line_number, hr_buf)); ++ write_bytes (disorder_line->text, disorder_line->length, ++ stderr, _("standard error")); ++ } ++ ++ ordered = false; ++ break; ++ } ++ } ++ ++ /* Compare each line in the buffer with its successor. */ ++ while (linebase < --line) ++ if (nonunique <= compare (line, line - 1)) ++ goto found_disorder; ++ ++ line_number += buf.nlines; ++ ++ /* Save the last line of the buffer. */ ++ if (alloc < line->length) ++ { ++ do ++ { ++ alloc *= 2; ++ if (! alloc) ++ { ++ alloc = line->length; ++ break; ++ } ++ } ++ while (alloc < line->length); ++ ++ temp.text = xrealloc (temp.text, alloc); ++ } ++ memcpy (temp.text, line->text, line->length); ++ temp.length = line->length; ++ if (key) ++ { ++ temp.keybeg = temp.text + (line->keybeg - line->text); ++ temp.keylim = temp.text + (line->keylim - line->text); ++ } ++ } ++ ++ xfclose (fp, file_name); ++ free (buf.buf); ++ free (temp.text); ++ return ordered; ++} ++ ++/* Open FILES (there are NFILES of them) and store the resulting array ++ of stream pointers into (*PFPS). Allocate the array. Return the ++ number of successfully opened files, setting errno if this value is ++ less than NFILES. */ ++ ++static size_t ++open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps) ++{ ++ FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps); ++ int i; ++ ++ /* Open as many input files as we can. */ ++ for (i = 0; i < nfiles; i++) ++ { ++ fps[i] = (files[i].pid ++ ? open_temp (files[i].name, files[i].pid) ++ : stream_open (files[i].name, "r")); ++ if (!fps[i]) ++ break; ++ } ++ ++ return i; ++} ++ ++/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary ++ files (all of which are at the start of the FILES array), and ++ NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE. ++ FPS is the vector of open stream corresponding to the files. ++ Close input and output streams before returning. ++ OUTPUT_FILE gives the name of the output file. If it is NULL, ++ the output file is standard output. */ ++ ++static void ++mergefps (struct sortfile *files, size_t ntemps, size_t nfiles, ++ FILE *ofp, char const *output_file, FILE **fps) ++{ ++ struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer); ++ /* Input buffers for each file. */ ++ struct line saved; /* Saved line storage for unique check. */ ++ struct line const *savedline = NULL; ++ /* &saved if there is a saved line. */ ++ size_t savealloc = 0; /* Size allocated for the saved line. */ ++ struct line const **cur = xnmalloc (nfiles, sizeof *cur); ++ /* Current line in each line table. */ ++ struct line const **base = xnmalloc (nfiles, sizeof *base); ++ /* Base of each line table. */ ++ size_t *ord = xnmalloc (nfiles, sizeof *ord); ++ /* Table representing a permutation of fps, ++ such that cur[ord[0]] is the smallest line ++ and will be next output. */ ++ size_t i; ++ size_t j; ++ size_t t; ++ struct keyfield const *key = keylist; ++ saved.text = NULL; ++ ++ /* Read initial lines from each input file. */ ++ for (i = 0; i < nfiles; ) ++ { ++ initbuf (&buffer[i], sizeof (struct line), ++ MAX (merge_buffer_size, sort_size / nfiles)); ++ if (fillbuf (&buffer[i], fps[i], files[i].name)) ++ { ++ struct line const *linelim = buffer_linelim (&buffer[i]); ++ cur[i] = linelim - 1; ++ base[i] = linelim - buffer[i].nlines; ++ i++; ++ } ++ else ++ { ++ /* fps[i] is empty; eliminate it from future consideration. */ ++ xfclose (fps[i], files[i].name); ++ if (i < ntemps) ++ { ++ ntemps--; ++ zaptemp (files[i].name); ++ } ++ free (buffer[i].buf); ++ --nfiles; ++ for (j = i; j < nfiles; ++j) ++ { ++ files[j] = files[j + 1]; ++ fps[j] = fps[j + 1]; ++ } ++ } ++ } ++ ++ /* Set up the ord table according to comparisons among input lines. ++ Since this only reorders two items if one is strictly greater than ++ the other, it is stable. */ ++ for (i = 0; i < nfiles; ++i) ++ ord[i] = i; ++ for (i = 1; i < nfiles; ++i) ++ if (0 < compare (cur[ord[i - 1]], cur[ord[i]])) ++ t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; ++ ++ /* Repeatedly output the smallest line until no input remains. */ ++ while (nfiles) ++ { ++ struct line const *smallest = cur[ord[0]]; ++ ++ /* If uniquified output is turned on, output only the first of ++ an identical series of lines. */ ++ if (unique) ++ { ++ if (savedline && compare (savedline, smallest)) ++ { ++ savedline = NULL; ++ write_bytes (saved.text, saved.length, ofp, output_file); ++ } ++ if (!savedline) ++ { ++ savedline = &saved; ++ if (savealloc < smallest->length) ++ { ++ do ++ if (! savealloc) ++ { ++ savealloc = smallest->length; ++ break; ++ } ++ while ((savealloc *= 2) < smallest->length); ++ ++ saved.text = xrealloc (saved.text, savealloc); ++ } ++ saved.length = smallest->length; ++ memcpy (saved.text, smallest->text, saved.length); ++ if (key) ++ { ++ saved.keybeg = ++ saved.text + (smallest->keybeg - smallest->text); ++ saved.keylim = ++ saved.text + (smallest->keylim - smallest->text); ++ } ++ } ++ } ++ else ++ write_bytes (smallest->text, smallest->length, ofp, output_file); ++ ++ /* Check if we need to read more lines into core. */ ++ if (base[ord[0]] < smallest) ++ cur[ord[0]] = smallest - 1; ++ else ++ { ++ if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name)) ++ { ++ struct line const *linelim = buffer_linelim (&buffer[ord[0]]); ++ cur[ord[0]] = linelim - 1; ++ base[ord[0]] = linelim - buffer[ord[0]].nlines; ++ } ++ else ++ { ++ /* We reached EOF on fps[ord[0]]. */ ++ for (i = 1; i < nfiles; ++i) ++ if (ord[i] > ord[0]) ++ --ord[i]; ++ --nfiles; ++ xfclose (fps[ord[0]], files[ord[0]].name); ++ if (ord[0] < ntemps) ++ { ++ ntemps--; ++ zaptemp (files[ord[0]].name); ++ } ++ free (buffer[ord[0]].buf); ++ for (i = ord[0]; i < nfiles; ++i) ++ { ++ fps[i] = fps[i + 1]; ++ files[i] = files[i + 1]; ++ buffer[i] = buffer[i + 1]; ++ cur[i] = cur[i + 1]; ++ base[i] = base[i + 1]; ++ } ++ for (i = 0; i < nfiles; ++i) ++ ord[i] = ord[i + 1]; ++ continue; ++ } ++ } ++ ++ /* The new line just read in may be larger than other lines ++ already in main memory; push it back in the queue until we ++ encounter a line larger than it. Optimize for the common ++ case where the new line is smallest. */ ++ { ++ size_t lo = 1; ++ size_t hi = nfiles; ++ size_t probe = lo; ++ size_t ord0 = ord[0]; ++ size_t count_of_smaller_lines; ++ ++ while (lo < hi) ++ { ++ int cmp = compare (cur[ord0], cur[ord[probe]]); ++ if (cmp < 0 || (cmp == 0 && ord0 < ord[probe])) ++ hi = probe; ++ else ++ lo = probe + 1; ++ probe = (lo + hi) / 2; ++ } ++ ++ count_of_smaller_lines = lo - 1; ++ for (j = 0; j < count_of_smaller_lines; j++) ++ ord[j] = ord[j + 1]; ++ ord[count_of_smaller_lines] = ord0; ++ } ++ ++ /* Free up some resources every once in a while. */ ++ if (MAX_PROCS_BEFORE_REAP < nprocs) ++ reap_some (); ++ } ++ ++ if (unique && savedline) ++ { ++ write_bytes (saved.text, saved.length, ofp, output_file); ++ free (saved.text); ++ } ++ ++ xfclose (ofp, output_file); ++ free(fps); ++ free(buffer); ++ free(ord); ++ free(base); ++ free(cur); ++} ++ ++/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary ++ files (all of which are at the start of the FILES array), and ++ NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE. ++ Close input and output files before returning. ++ OUTPUT_FILE gives the name of the output file. ++ ++ Return the number of files successfully merged. This number can be ++ less than NFILES if we ran low on file descriptors, but in this ++ case it is never less than 2. */ ++ ++static size_t ++mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles, ++ FILE *ofp, char const *output_file) ++{ ++ FILE **fps; ++ size_t nopened = open_input_files (files, nfiles, &fps); ++ if (nopened < nfiles && nopened < 2) ++ die (_("open failed"), files[nopened].name); ++ mergefps (files, ntemps, nopened, ofp, output_file, fps); ++ return nopened; ++} ++ ++/* Merge into T the two sorted arrays of lines LO (with NLO members) ++ and HI (with NHI members). T, LO, and HI point just past their ++ respective arrays, and the arrays are in reverse order. NLO and ++ NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */ ++ ++static inline void ++mergelines (struct line *t, ++ struct line const *lo, size_t nlo, ++ struct line const *hi, size_t nhi) ++{ ++ for (;;) ++ if (compare (lo - 1, hi - 1) <= 0) ++ { ++ *--t = *--lo; ++ if (! --nlo) ++ { ++ /* HI - NHI equalled T - (NLO + NHI) when this function ++ began. Therefore HI must equal T now, and there is no ++ need to copy from HI to T. */ ++ return; ++ } ++ } ++ else ++ { ++ *--t = *--hi; ++ if (! --nhi) ++ { ++ do ++ *--t = *--lo; ++ while (--nlo); ++ ++ return; ++ } ++ } ++} ++ ++/* Sort the array LINES with NLINES members, using TEMP for temporary space. ++ NLINES must be at least 2. ++ The input and output arrays are in reverse order, and LINES and ++ TEMP point just past the end of their respective arrays. ++ ++ Use a recursive divide-and-conquer algorithm, in the style ++ suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use ++ the optimization suggested by exercise 5.2.4-10; this requires room ++ for only 1.5*N lines, rather than the usual 2*N lines. Knuth ++ writes that this memory optimization was originally published by ++ D. A. Bell, Comp J. 1 (1958), 75. */ ++ ++static void ++sortlines (struct line *lines, size_t nlines, struct line *temp) ++{ ++ if (nlines == 2) ++ { ++ if (0 < compare (&lines[-1], &lines[-2])) ++ { ++ struct line tmp = lines[-1]; ++ lines[-1] = lines[-2]; ++ lines[-2] = tmp; ++ } ++ } ++ else ++ { ++ size_t nlo = nlines / 2; ++ size_t nhi = nlines - nlo; ++ struct line *lo = lines; ++ struct line *hi = lines - nlo; ++ struct line *sorted_lo = temp; ++ ++ sortlines (hi, nhi, temp); ++ if (1 < nlo) ++ sortlines_temp (lo, nlo, sorted_lo); ++ else ++ sorted_lo[-1] = lo[-1]; ++ ++ mergelines (lines, sorted_lo, nlo, hi, nhi); ++ } ++} ++ ++/* Like sortlines (LINES, NLINES, TEMP), except output into TEMP ++ rather than sorting in place. */ ++ ++static void ++sortlines_temp (struct line *lines, size_t nlines, struct line *temp) ++{ ++ if (nlines == 2) ++ { ++ /* Declare `swap' as int, not bool, to work around a bug ++ ++ in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */ ++ int swap = (0 < compare (&lines[-1], &lines[-2])); ++ temp[-1] = lines[-1 - swap]; ++ temp[-2] = lines[-2 + swap]; ++ } ++ else ++ { ++ size_t nlo = nlines / 2; ++ size_t nhi = nlines - nlo; ++ struct line *lo = lines; ++ struct line *hi = lines - nlo; ++ struct line *sorted_hi = temp - nlo; ++ ++ sortlines_temp (hi, nhi, sorted_hi); ++ if (1 < nlo) ++ sortlines (lo, nlo, temp); ++ ++ mergelines (temp, lo, nlo, sorted_hi, nhi); ++ } ++} ++ ++/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is ++ the same as OUTFILE. If found, merge the found instances (and perhaps ++ some other files) into a temporary file so that it can in turn be ++ merged into OUTFILE without destroying OUTFILE before it is completely ++ read. Return the new value of NFILES, which differs from the old if ++ some merging occurred. ++ ++ This test ensures that an otherwise-erroneous use like ++ "sort -m -o FILE ... FILE ..." copies FILE before writing to it. ++ It's not clear that POSIX requires this nicety. ++ Detect common error cases, but don't try to catch obscure cases like ++ "cat ... FILE ... | sort -m -o FILE" ++ where traditional "sort" doesn't copy the input and where ++ people should know that they're getting into trouble anyway. ++ Catching these obscure cases would slow down performance in ++ common cases. */ ++ ++static size_t ++avoid_trashing_input (struct sortfile *files, size_t ntemps, ++ size_t nfiles, char const *outfile) ++{ ++ size_t i; ++ bool got_outstat = false; ++ struct stat outstat; ++ ++ for (i = ntemps; i < nfiles; i++) ++ { ++ bool is_stdin = STREQ (files[i].name, "-"); ++ bool same; ++ struct stat instat; ++ ++ if (outfile && STREQ (outfile, files[i].name) && !is_stdin) ++ same = true; ++ else ++ { ++ if (! got_outstat) ++ { ++ if ((outfile ++ ? stat (outfile, &outstat) ++ : fstat (STDOUT_FILENO, &outstat)) ++ != 0) ++ break; ++ got_outstat = true; ++ } ++ ++ same = (((is_stdin ++ ? fstat (STDIN_FILENO, &instat) ++ : stat (files[i].name, &instat)) ++ == 0) ++ && SAME_INODE (instat, outstat)); ++ } ++ ++ if (same) ++ { ++ FILE *tftp; ++ pid_t pid; ++ char *temp = create_temp (&tftp, &pid); ++ size_t num_merged = 0; ++ do ++ { ++ num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp); ++ files[i].name = temp; ++ files[i].pid = pid; ++ ++ if (i + num_merged < nfiles) ++ memmove(&files[i + 1], &files[i + num_merged], ++ num_merged * sizeof *files); ++ ntemps += 1; ++ nfiles -= num_merged - 1;; ++ i += num_merged; ++ } ++ while (i < nfiles); ++ } ++ } ++ ++ return nfiles; ++} ++ ++/* Merge the input FILES. NTEMPS is the number of files at the ++ start of FILES that are temporary; it is zero at the top level. ++ NFILES is the total number of files. Put the output in ++ OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */ ++ ++static void ++merge (struct sortfile *files, size_t ntemps, size_t nfiles, ++ char const *output_file) ++{ ++ while (nmerge < nfiles) ++ { ++ /* Number of input files processed so far. */ ++ size_t in; ++ ++ /* Number of output files generated so far. */ ++ size_t out; ++ ++ /* nfiles % NMERGE; this counts input files that are left over ++ after all full-sized merges have been done. */ ++ size_t remainder; ++ ++ /* Number of easily-available slots at the next loop iteration. */ ++ size_t cheap_slots; ++ ++ /* Do as many NMERGE-size merges as possible. In the case that ++ nmerge is bogus, increment by the maximum number of file ++ descriptors allowed. */ ++ for (out = in = 0; nmerge <= nfiles - in; out++) ++ { ++ FILE *tfp; ++ pid_t pid; ++ char *temp = create_temp (&tfp, &pid); ++ size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge), ++ nmerge, tfp, temp); ++ ntemps -= MIN (ntemps, num_merged); ++ files[out].name = temp; ++ files[out].pid = pid; ++ in += num_merged; ++ } ++ ++ remainder = nfiles - in; ++ cheap_slots = nmerge - out % nmerge; ++ ++ if (cheap_slots < remainder) ++ { ++ /* So many files remain that they can't all be put into the last ++ NMERGE-sized output window. Do one more merge. Merge as few ++ files as possible, to avoid needless I/O. */ ++ size_t nshortmerge = remainder - cheap_slots + 1; ++ FILE *tfp; ++ pid_t pid; ++ char *temp = create_temp (&tfp, &pid); ++ size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge), ++ nshortmerge, tfp, temp); ++ ntemps -= MIN (ntemps, num_merged); ++ files[out].name = temp; ++ files[out++].pid = pid; ++ in += num_merged; ++ } ++ ++ /* Put the remaining input files into the last NMERGE-sized output ++ window, so they will be merged in the next pass. */ ++ memmove(&files[out], &files[in], (nfiles - in) * sizeof *files); ++ ntemps += out; ++ nfiles -= in - out; ++ } ++ ++ nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file); ++ ++ /* We aren't guaranteed that this final mergefiles will work, therefore we ++ try to merge into the output, and then merge as much as we can into a ++ temp file if we can't. Repeat. */ ++ ++ for (;;) ++ { ++ /* Merge directly into the output file if possible. */ ++ FILE **fps; ++ size_t nopened = open_input_files (files, nfiles, &fps); ++ ++ if (nopened == nfiles) ++ { ++ FILE *ofp = stream_open (output_file, "w"); ++ if (ofp) ++ { ++ mergefps (files, ntemps, nfiles, ofp, output_file, fps); ++ break; ++ } ++ if (errno != EMFILE || nopened <= 2) ++ die (_("open failed"), output_file); ++ } ++ else if (nopened <= 2) ++ die (_("open failed"), files[nopened].name); ++ ++ /* We ran out of file descriptors. Close one of the input ++ files, to gain a file descriptor. Then create a temporary ++ file with our spare file descriptor. Retry if that failed ++ (e.g., some other process could open a file between the time ++ we closed and tried to create). */ ++ FILE *tfp; ++ pid_t pid; ++ char *temp; ++ do ++ { ++ nopened--; ++ xfclose (fps[nopened], files[nopened].name); ++ temp = maybe_create_temp (&tfp, &pid, ! (nopened <= 2)); ++ } ++ while (!temp); ++ ++ /* Merge into the newly allocated temporary. */ ++ mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp, fps); ++ ntemps -= MIN (ntemps, nopened); ++ files[0].name = temp; ++ files[0].pid = pid; ++ ++ memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files); ++ ntemps++; ++ nfiles -= nopened - 1; ++ } ++} ++ ++/* Sort NFILES FILES onto OUTPUT_FILE. */ ++ ++static void ++sort (char * const *files, size_t nfiles, char const *output_file) ++{ ++ struct buffer buf; ++ size_t ntemps = 0; ++ bool output_file_created = false; ++ ++ buf.alloc = 0; ++ ++ while (nfiles) ++ { ++ char const *temp_output; ++ char const *file = *files; ++ FILE *fp = xfopen (file, "r"); ++ FILE *tfp; ++ size_t bytes_per_line = (2 * sizeof (struct line) ++ - sizeof (struct line) / 2); ++ ++ if (! buf.alloc) ++ initbuf (&buf, bytes_per_line, ++ sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line)); ++ buf.eof = false; ++ files++; ++ nfiles--; ++ ++ while (fillbuf (&buf, fp, file)) ++ { ++ struct line *line; ++ struct line *linebase; ++ ++ if (buf.eof && nfiles ++ && (bytes_per_line + 1 ++ < (buf.alloc - buf.used - bytes_per_line * buf.nlines))) ++ { ++ /* End of file, but there is more input and buffer room. ++ Concatenate the next input file; this is faster in ++ the usual case. */ ++ buf.left = buf.used; ++ break; ++ } ++ ++ line = buffer_linelim (&buf); ++ linebase = line - buf.nlines; ++ if (1 < buf.nlines) ++ sortlines (line, buf.nlines, linebase); ++ if (buf.eof && !nfiles && !ntemps && !buf.left) ++ { ++ xfclose (fp, file); ++ tfp = xfopen (output_file, "w"); ++ temp_output = output_file; ++ output_file_created = true; ++ } ++ else ++ { ++ ++ntemps; ++ temp_output = create_temp (&tfp, NULL); ++ } ++ ++ do ++ { ++ line--; ++ write_bytes (line->text, line->length, tfp, temp_output); ++ if (unique) ++ while (linebase < line && compare (line, line - 1) == 0) ++ line--; ++ } ++ while (linebase < line); ++ ++ xfclose (tfp, temp_output); ++ ++ /* Free up some resources every once in a while. */ ++ if (MAX_PROCS_BEFORE_REAP < nprocs) ++ reap_some (); ++ ++ if (output_file_created) ++ goto finish; ++ } ++ xfclose (fp, file); ++ } ++ ++ finish: ++ free (buf.buf); ++ ++ if (! output_file_created) ++ { ++ size_t i; ++ struct tempnode *node = temphead; ++ struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles); ++ for (i = 0; node; i++) ++ { ++ tempfiles[i].name = node->name; ++ tempfiles[i].pid = node->pid; ++ node = node->next; ++ } ++ merge (tempfiles, ntemps, ntemps, output_file); ++ free (tempfiles); ++ } ++} ++ ++/* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */ ++ ++static void ++insertkey (struct keyfield *key_arg) ++{ ++ struct keyfield **p; ++ struct keyfield *key = xmemdup (key_arg, sizeof *key); ++ ++ for (p = &keylist; *p; p = &(*p)->next) ++ continue; ++ *p = key; ++ key->next = NULL; ++} ++ ++/* Report a bad field specification SPEC, with extra info MSGID. */ ++ ++static void badfieldspec (char const *, char const *) ++ ATTRIBUTE_NORETURN; ++static void ++badfieldspec (char const *spec, char const *msgid) ++{ ++ error (SORT_FAILURE, 0, _("%s: invalid field specification %s"), ++ _(msgid), quote (spec)); ++ abort (); ++} ++ ++/* Report incompatible options. */ ++ ++static void incompatible_options (char const *) ATTRIBUTE_NORETURN; ++static void ++incompatible_options (char const *opts) ++{ ++ error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts); ++ abort (); ++} ++ ++/* Check compatibility of ordering options. */ ++ ++static void ++check_ordering_compatibility (void) ++{ ++ struct keyfield const *key; ++ ++ for (key = keylist; key; key = key->next) ++ if ((1 < (key->random + key->numeric + key->general_numeric + key->month ++ + key->version + !!key->ignore + key->human_numeric)) ++ || (key->random && key->translate)) ++ { ++ /* The following is too big, but guaranteed to be "big enough". */ ++ char opts[sizeof short_options]; ++ char *p = opts; ++ if (key->ignore == nondictionary) ++ *p++ = 'd'; ++ if (key->translate) ++ *p++ = 'f'; ++ if (key->general_numeric) ++ *p++ = 'g'; ++ if (key->human_numeric) ++ *p++ = 'h'; ++ if (key->ignore == nonprinting) ++ *p++ = 'i'; ++ if (key->month) ++ *p++ = 'M'; ++ if (key->numeric) ++ *p++ = 'n'; ++ if (key->version) ++ *p++ = 'V'; ++ if (key->random) ++ *p++ = 'R'; ++ *p = '\0'; ++ incompatible_options (opts); ++ } ++} ++ ++/* Parse the leading integer in STRING and store the resulting value ++ (which must fit into size_t) into *VAL. Return the address of the ++ suffix after the integer. If the value is too large, silently ++ substitute SIZE_MAX. If MSGID is NULL, return NULL after ++ failure; otherwise, report MSGID and exit on failure. */ ++ ++static char const * ++parse_field_count (char const *string, size_t *val, char const *msgid) ++{ ++ char *suffix; ++ uintmax_t n; ++ ++ switch (xstrtoumax (string, &suffix, 10, &n, "")) ++ { ++ case LONGINT_OK: ++ case LONGINT_INVALID_SUFFIX_CHAR: ++ *val = n; ++ if (*val == n) ++ break; ++ /* Fall through. */ ++ case LONGINT_OVERFLOW: ++ case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR: ++ *val = SIZE_MAX; ++ break; ++ ++ case LONGINT_INVALID: ++ if (msgid) ++ error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"), ++ _(msgid), quote (string)); ++ return NULL; ++ } ++ ++ return suffix; ++} ++ ++/* Handle interrupts and hangups. */ ++ ++static void ++sighandler (int sig) ++{ ++ if (! SA_NOCLDSTOP) ++ signal (sig, SIG_IGN); ++ ++ cleanup (); ++ ++ signal (sig, SIG_DFL); ++ raise (sig); ++} ++ ++/* Set the ordering options for KEY specified in S. ++ Return the address of the first character in S that ++ is not a valid ordering option. ++ BLANKTYPE is the kind of blanks that 'b' should skip. */ ++ ++static char * ++set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype) ++{ ++ while (*s) ++ { ++ switch (*s) ++ { ++ case 'b': ++ if (blanktype == bl_start || blanktype == bl_both) ++ key->skipsblanks = true; ++ if (blanktype == bl_end || blanktype == bl_both) ++ key->skipeblanks = true; ++ break; ++ case 'd': ++ key->ignore = nondictionary; ++ break; ++ case 'f': ++ key->translate = fold_toupper; ++ break; ++ case 'g': ++ key->general_numeric = true; ++ break; ++ case 'h': ++ key->human_numeric = true; ++ break; ++ case 'i': ++ /* Option order should not matter, so don't let -i override ++ -d. -d implies -i, but -i does not imply -d. */ ++ if (! key->ignore) ++ key->ignore = nonprinting; ++ break; ++ case 'M': ++ key->month = true; ++ break; ++ case 'n': ++ key->numeric = true; ++ break; ++ case 'R': ++ key->random = true; ++ break; ++ case 'r': ++ key->reverse = true; ++ break; ++ case 'V': ++ key->version = true; ++ break; ++ default: ++ return (char *) s; ++ } ++ ++s; ++ } ++ return (char *) s; ++} ++ ++static struct keyfield * ++key_init (struct keyfield *key) ++{ ++ memset (key, 0, sizeof *key); ++ key->eword = SIZE_MAX; ++ key->si_present = -1; ++ return key; ++} ++ ++int ++main (int argc, char **argv) ++{ ++ struct keyfield *key; ++ struct keyfield key_buf; ++ struct keyfield gkey; ++ char const *s; ++ int c = 0; ++ char checkonly = 0; ++ bool mergeonly = false; ++ char *random_source = NULL; ++ bool need_random = false; ++ size_t nfiles = 0; ++ bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); ++ bool obsolete_usage = (posix2_version () < 200112); ++ char **files; ++ char *files_from = NULL; ++ struct Tokens tok; ++ char const *outfile = NULL; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ initialize_exit_failure (SORT_FAILURE); ++ ++ hard_LC_COLLATE = hard_locale (LC_COLLATE); ++#if HAVE_NL_LANGINFO ++ hard_LC_TIME = hard_locale (LC_TIME); ++#endif ++ ++ /* Get locale's representation of the decimal point. */ ++ { ++ struct lconv const *locale = localeconv (); ++ ++ /* If the locale doesn't define a decimal point, or if the decimal ++ point is multibyte, use the C locale's decimal point. FIXME: ++ add support for multibyte decimal points. */ ++ decimal_point = to_uchar (locale->decimal_point[0]); ++ if (! decimal_point || locale->decimal_point[1]) ++ decimal_point = '.'; ++ ++ /* FIXME: add support for multibyte thousands separators. */ ++ thousands_sep = to_uchar (*locale->thousands_sep); ++ if (! thousands_sep || locale->thousands_sep[1]) ++ thousands_sep = -1; ++ } ++ ++ have_read_stdin = false; ++ inittables (); ++ ++ { ++ size_t i; ++ static int const sig[] = ++ { ++ /* The usual suspects. */ ++ SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, ++#ifdef SIGPOLL ++ SIGPOLL, ++#endif ++#ifdef SIGPROF ++ SIGPROF, ++#endif ++#ifdef SIGVTALRM ++ SIGVTALRM, ++#endif ++#ifdef SIGXCPU ++ SIGXCPU, ++#endif ++#ifdef SIGXFSZ ++ SIGXFSZ, ++#endif ++ }; ++ enum { nsigs = ARRAY_CARDINALITY (sig) }; ++ ++#if SA_NOCLDSTOP ++ struct sigaction act; ++ ++ sigemptyset (&caught_signals); ++ for (i = 0; i < nsigs; i++) ++ { ++ sigaction (sig[i], NULL, &act); ++ if (act.sa_handler != SIG_IGN) ++ sigaddset (&caught_signals, sig[i]); ++ } ++ ++ act.sa_handler = sighandler; ++ act.sa_mask = caught_signals; ++ act.sa_flags = 0; ++ ++ for (i = 0; i < nsigs; i++) ++ if (sigismember (&caught_signals, sig[i])) ++ sigaction (sig[i], &act, NULL); ++#else ++ for (i = 0; i < nsigs; i++) ++ if (signal (sig[i], SIG_IGN) != SIG_IGN) ++ { ++ signal (sig[i], sighandler); ++ siginterrupt (sig[i], 1); ++ } ++#endif ++ } ++ ++ /* The signal mask is known, so it is safe to invoke exit_cleanup. */ ++ atexit (exit_cleanup); ++ ++ gkey.sword = gkey.eword = SIZE_MAX; ++ gkey.ignore = NULL; ++ gkey.translate = NULL; ++ gkey.numeric = gkey.general_numeric = gkey.human_numeric = false; ++ gkey.si_present = -1; ++ gkey.random = gkey.version = false; ++ gkey.month = gkey.reverse = false; ++ gkey.skipsblanks = gkey.skipeblanks = false; ++ ++ files = xnmalloc (argc, sizeof *files); ++ ++ for (;;) ++ { ++ /* Parse an operand as a file after "--" was seen; or if ++ pedantic and a file was seen, unless the POSIX version ++ predates 1003.1-2001 and -c was not seen and the operand is ++ "-o FILE" or "-oFILE". */ ++ int oi = -1; ++ ++ if (c == -1 ++ || (posixly_correct && nfiles != 0 ++ && ! (obsolete_usage ++ && ! checkonly ++ && optind != argc ++ && argv[optind][0] == '-' && argv[optind][1] == 'o' ++ && (argv[optind][2] || optind + 1 != argc))) ++ || ((c = getopt_long (argc, argv, short_options, ++ long_options, &oi)) ++ == -1)) ++ { ++ if (argc <= optind) ++ break; ++ files[nfiles++] = argv[optind++]; ++ } ++ else switch (c) ++ { ++ case 1: ++ key = NULL; ++ if (optarg[0] == '+') ++ { ++ bool minus_pos_usage = (optind != argc && argv[optind][0] == '-' ++ && ISDIGIT (argv[optind][1])); ++ obsolete_usage |= minus_pos_usage && !posixly_correct; ++ if (obsolete_usage) ++ { ++ /* Treat +POS1 [-POS2] as a key if possible; but silently ++ treat an operand as a file if it is not a valid +POS1. */ ++ key = key_init (&key_buf); ++ s = parse_field_count (optarg + 1, &key->sword, NULL); ++ if (s && *s == '.') ++ s = parse_field_count (s + 1, &key->schar, NULL); ++ if (! (key->sword || key->schar)) ++ key->sword = SIZE_MAX; ++ if (! s || *set_ordering (s, key, bl_start)) ++ key = NULL; ++ else ++ { ++ if (minus_pos_usage) ++ { ++ char const *optarg1 = argv[optind++]; ++ s = parse_field_count (optarg1 + 1, &key->eword, ++ N_("invalid number after `-'")); ++ if (*s == '.') ++ s = parse_field_count (s + 1, &key->echar, ++ N_("invalid number after `.'")); ++ if (*set_ordering (s, key, bl_end)) ++ badfieldspec (optarg1, ++ N_("stray character in field spec")); ++ } ++ insertkey (key); ++ } ++ } ++ } ++ if (! key) ++ files[nfiles++] = optarg; ++ break; ++ ++ case SORT_OPTION: ++ c = XARGMATCH ("--sort", optarg, sort_args, sort_types); ++ /* Fall through. */ ++ case 'b': ++ case 'd': ++ case 'f': ++ case 'g': ++ case 'h': ++ case 'i': ++ case 'M': ++ case 'n': ++ case 'r': ++ case 'R': ++ case 'V': ++ { ++ char str[2]; ++ str[0] = c; ++ str[1] = '\0'; ++ set_ordering (str, &gkey, bl_both); ++ } ++ break; ++ ++ case CHECK_OPTION: ++ c = (optarg ++ ? XARGMATCH ("--check", optarg, check_args, check_types) ++ : 'c'); ++ /* Fall through. */ ++ case 'c': ++ case 'C': ++ if (checkonly && checkonly != c) ++ incompatible_options ("cC"); ++ checkonly = c; ++ break; ++ ++ case COMPRESS_PROGRAM_OPTION: ++ if (compress_program && !STREQ (compress_program, optarg)) ++ error (SORT_FAILURE, 0, _("multiple compress programs specified")); ++ compress_program = optarg; ++ break; ++ ++ case FILES0_FROM_OPTION: ++ files_from = optarg; ++ break; ++ ++ case 'k': ++ key = key_init (&key_buf); ++ ++ /* Get POS1. */ ++ s = parse_field_count (optarg, &key->sword, ++ N_("invalid number at field start")); ++ if (! key->sword--) ++ { ++ /* Provoke with `sort -k0' */ ++ badfieldspec (optarg, N_("field number is zero")); ++ } ++ if (*s == '.') ++ { ++ s = parse_field_count (s + 1, &key->schar, ++ N_("invalid number after `.'")); ++ if (! key->schar--) ++ { ++ /* Provoke with `sort -k1.0' */ ++ badfieldspec (optarg, N_("character offset is zero")); ++ } ++ } ++ if (! (key->sword || key->schar)) ++ key->sword = SIZE_MAX; ++ s = set_ordering (s, key, bl_start); ++ if (*s != ',') ++ { ++ key->eword = SIZE_MAX; ++ key->echar = 0; ++ } ++ else ++ { ++ /* Get POS2. */ ++ s = parse_field_count (s + 1, &key->eword, ++ N_("invalid number after `,'")); ++ if (! key->eword--) ++ { ++ /* Provoke with `sort -k1,0' */ ++ badfieldspec (optarg, N_("field number is zero")); ++ } ++ if (*s == '.') ++ { ++ s = parse_field_count (s + 1, &key->echar, ++ N_("invalid number after `.'")); ++ } ++ s = set_ordering (s, key, bl_end); ++ } ++ if (*s) ++ badfieldspec (optarg, N_("stray character in field spec")); ++ insertkey (key); ++ break; ++ ++ case 'm': ++ mergeonly = true; ++ break; ++ ++ case NMERGE_OPTION: ++ specify_nmerge (oi, c, optarg); ++ break; ++ ++ case 'o': ++ if (outfile && !STREQ (outfile, optarg)) ++ error (SORT_FAILURE, 0, _("multiple output files specified")); ++ outfile = optarg; ++ break; ++ ++ case RANDOM_SOURCE_OPTION: ++ if (random_source && !STREQ (random_source, optarg)) ++ error (SORT_FAILURE, 0, _("multiple random sources specified")); ++ random_source = optarg; ++ break; ++ ++ case 's': ++ stable = true; ++ break; ++ ++ case 'S': ++ specify_sort_size (oi, c, optarg); ++ break; ++ ++ case 't': ++ { ++ char newtab = optarg[0]; ++ if (! newtab) ++ error (SORT_FAILURE, 0, _("empty tab")); ++ if (optarg[1]) ++ { ++ if (STREQ (optarg, "\\0")) ++ newtab = '\0'; ++ else ++ { ++ /* Provoke with `sort -txx'. Complain about ++ "multi-character tab" instead of "multibyte tab", so ++ that the diagnostic's wording does not need to be ++ changed once multibyte characters are supported. */ ++ error (SORT_FAILURE, 0, _("multi-character tab %s"), ++ quote (optarg)); ++ } ++ } ++ if (tab != TAB_DEFAULT && tab != newtab) ++ error (SORT_FAILURE, 0, _("incompatible tabs")); ++ tab = newtab; ++ } ++ break; ++ ++ case 'T': ++ add_temp_dir (optarg); ++ break; ++ ++ case 'u': ++ unique = true; ++ break; ++ ++ case 'y': ++ /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x ++ through Solaris 7. It is also accepted by many non-Solaris ++ "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5. ++ -y is marked as obsolete starting with Solaris 8 (1999), but is ++ still accepted as of Solaris 10 prerelease (2004). ++ ++ Solaris 2.5.1 "sort -y 100" reads the input file "100", but ++ emulate Solaris 8 and 9 "sort -y 100" which ignores the "100", ++ and which in general ignores the argument after "-y" if it ++ consists entirely of digits (it can even be empty). */ ++ if (optarg == argv[optind - 1]) ++ { ++ char const *p; ++ for (p = optarg; ISDIGIT (*p); p++) ++ continue; ++ optind -= (*p != '\0'); ++ } ++ break; ++ ++ case 'z': ++ eolchar = 0; ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (SORT_FAILURE); ++ } ++ } ++ ++ if (files_from) ++ { ++ FILE *stream; ++ ++ /* When using --files0-from=F, you may not specify any files ++ on the command-line. */ ++ if (nfiles) ++ { ++ error (0, 0, _("extra operand %s"), quote (files[0])); ++ fprintf (stderr, "%s\n", ++ _("file operands cannot be combined with --files0-from")); ++ usage (SORT_FAILURE); ++ } ++ ++ if (STREQ (files_from, "-")) ++ stream = stdin; ++ else ++ { ++ stream = fopen (files_from, "r"); ++ if (stream == NULL) ++ error (SORT_FAILURE, errno, _("cannot open %s for reading"), ++ quote (files_from)); ++ } ++ ++ readtokens0_init (&tok); ++ ++ if (! readtokens0 (stream, &tok) || fclose (stream) != 0) ++ error (SORT_FAILURE, 0, _("cannot read file names from %s"), ++ quote (files_from)); ++ ++ if (tok.n_tok) ++ { ++ size_t i; ++ free (files); ++ files = tok.tok; ++ nfiles = tok.n_tok; ++ for (i = 0; i < nfiles; i++) ++ { ++ if (STREQ (files[i], "-")) ++ error (SORT_FAILURE, 0, _("when reading file names from stdin, " ++ "no file name of %s allowed"), ++ quote (files[i])); ++ else if (files[i][0] == '\0') ++ { ++ /* Using the standard `filename:line-number:' prefix here is ++ not totally appropriate, since NUL is the separator, not NL, ++ but it might be better than nothing. */ ++ unsigned long int file_number = i + 1; ++ error (SORT_FAILURE, 0, ++ _("%s:%lu: invalid zero-length file name"), ++ quotearg_colon (files_from), file_number); ++ } ++ } ++ } ++ else ++ error (SORT_FAILURE, 0, _("no input from %s"), ++ quote (files_from)); ++ } ++ ++ /* Inheritance of global options to individual keys. */ ++ for (key = keylist; key; key = key->next) ++ { ++ if (! (key->ignore ++ || key->translate ++ || (key->skipsblanks ++ || key->reverse ++ || key->skipeblanks ++ || key->month ++ || key->numeric ++ || key->version ++ || key->general_numeric ++ || key->human_numeric ++ || key->random))) ++ { ++ key->ignore = gkey.ignore; ++ key->translate = gkey.translate; ++ key->skipsblanks = gkey.skipsblanks; ++ key->skipeblanks = gkey.skipeblanks; ++ key->month = gkey.month; ++ key->numeric = gkey.numeric; ++ key->general_numeric = gkey.general_numeric; ++ key->human_numeric = gkey.human_numeric; ++ key->random = gkey.random; ++ key->reverse = gkey.reverse; ++ key->version = gkey.version; ++ } ++ ++ need_random |= key->random; ++ } ++ ++ if (!keylist && (gkey.ignore ++ || gkey.translate ++ || (gkey.skipsblanks ++ || gkey.skipeblanks ++ || gkey.month ++ || gkey.numeric ++ || gkey.general_numeric ++ || gkey.human_numeric ++ || gkey.random ++ || gkey.version))) ++ { ++ insertkey (&gkey); ++ need_random |= gkey.random; ++ } ++ ++ check_ordering_compatibility (); ++ ++ reverse = gkey.reverse; ++ ++ if (need_random) ++ { ++ randread_source = randread_new (random_source, MD5_DIGEST_SIZE); ++ if (! randread_source) ++ die (_("open failed"), random_source); ++ } ++ ++ if (temp_dir_count == 0) ++ { ++ char const *tmp_dir = getenv ("TMPDIR"); ++ add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR); ++ } ++ ++ if (nfiles == 0) ++ { ++ static char *minus = (char *) "-"; ++ nfiles = 1; ++ free (files); ++ files = − ++ } ++ ++ /* Need to re-check that we meet the minimum requirement for memory ++ usage with the final value for NMERGE. */ ++ if (0 < sort_size) ++ sort_size = MAX (sort_size, MIN_SORT_SIZE); ++ ++ if (checkonly) ++ { ++ if (nfiles > 1) ++ error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"), ++ quote (files[1]), checkonly); ++ ++ if (outfile) ++ { ++ static char opts[] = {0, 'o', 0}; ++ opts[0] = checkonly; ++ incompatible_options (opts); ++ } ++ ++ /* POSIX requires that sort return 1 IFF invoked with -c or -C and the ++ input is not properly sorted. */ ++ exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); ++ } ++ ++ if (mergeonly) ++ { ++ struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles); ++ size_t i; ++ ++ for (i = 0; i < nfiles; ++i) ++ sortfiles[i].name = files[i]; ++ ++ merge (sortfiles, 0, nfiles, outfile); ++ IF_LINT (free (sortfiles)); ++ } ++ else ++ sort (files, nfiles, outfile); ++ ++ if (have_read_stdin && fclose (stdin) == EOF) ++ die (_("close failed"), "-"); ++ ++ exit (EXIT_SUCCESS); ++} +diff -urNp coreutils-8.0-orig/src/unexpand.c coreutils-8.0/src/unexpand.c +--- coreutils-8.0-orig/src/unexpand.c 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/src/unexpand.c 2009-10-07 10:07:16.000000000 +0200 +@@ -38,11 +38,28 @@ #include #include #include @@ -2482,7 +12965,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME "unexpand" -@@ -110,6 +127,208 @@ +@@ -102,6 +119,208 @@ static struct option const longopts[] = {NULL, 0, NULL, 0} }; @@ -2691,7 +13174,7 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c void usage (int status) { -@@ -531,7 +750,12 @@ +@@ -523,7 +742,12 @@ main (int argc, char **argv) file_list = (optind < argc ? &argv[optind] : stdin_argv); @@ -2705,752 +13188,572 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c if (have_read_stdin && fclose (stdin) != 0) error (EXIT_FAILURE, errno, "-"); ---- coreutils-6.8+/src/pr.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/pr.c 2007-03-01 15:08:24.000000000 +0000 -@@ -313,6 +313,32 @@ - - #include - #include +diff -urNp coreutils-8.0-orig/src/unexpand.c.orig coreutils-8.0/src/unexpand.c.orig +--- coreutils-8.0-orig/src/unexpand.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/unexpand.c.orig 2009-09-29 15:27:54.000000000 +0200 +@@ -0,0 +1,532 @@ ++/* unexpand - convert blanks to tabs ++ Copyright (C) 89, 91, 1995-2006, 2008-2009 Free Software Foundation, Inc. + -+/* Get MB_LEN_MAX. */ -+#include -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX == 1 -+# define MB_LEN_MAX 16 -+#endif ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. + -+/* Get MB_CUR_MAX. */ -+#include ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. + -+/* Solaris 2.5 has a bug: must be included before . */ -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ + -+/* Get iswprint(). -- for wcwidth(). */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+#if !defined iswprint && !HAVE_ISWPRINT -+# define iswprint(wc) 1 -+#endif ++/* By default, convert only maximal strings of initial blanks and tabs ++ into tabs. ++ Preserves backspace characters in the output; they decrement the ++ column count for tab calculations. ++ The default action is equivalent to -8. + - #include "system.h" - #include "error.h" - #include "mbswidth.h" -@@ -324,6 +350,18 @@ - #include "strftime.h" - #include "xstrtol.h" - -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif ++ Options: ++ --tabs=tab1[,tab2[,...]] ++ -t tab1[,tab2[,...]] ++ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1 ++ columns apart instead of the default 8. Otherwise, ++ set the tabs at columns tab1, tab2, etc. (numbered from ++ 0); preserve any blanks beyond the tab stops given. ++ --all ++ -a Use tabs wherever they would replace 2 or more blanks, ++ not just at the beginnings of lines. + -+#ifndef HAVE_DECL_WCWIDTH -+"this configure-time declaration test was not run" -+#endif -+#if !HAVE_DECL_WCWIDTH -+extern int wcwidth (); -+#endif ++ David MacKenzie */ + - /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "pr" - -@@ -416,7 +454,20 @@ - - typedef struct COLUMN COLUMN; - --static int char_to_clump (char c); -+/* Funtion pointers to switch functions for single byte locale or for -+ multibyte locale. If multibyte functions do not exist in your sysytem, -+ these pointers always point the function for single byte locale. */ -+static void (*print_char) (char c); -+static int (*char_to_clump) (char c); ++#include + -+/* Functions for single byte locale. */ -+static void print_char_single (char c); -+static int char_to_clump_single (char c); ++#include ++#include ++#include ++#include "system.h" ++#include "error.h" ++#include "quote.h" ++#include "xstrndup.h" + -+/* Functions for multibyte locale. */ -+static void print_char_multi (char c); -+static int char_to_clump_multi (char c); ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "unexpand" + - static bool read_line (COLUMN *p); - static bool print_page (void); - static bool print_stored (COLUMN *p); -@@ -426,6 +477,7 @@ - static void pad_across_to (int position); - static void add_line_number (COLUMN *p); - static void getoptarg (char *arg, char switch_char, char *character, -+ int *character_length, int *character_width, - int *number); - void usage (int status); - static void print_files (int number_of_files, char **av); -@@ -440,7 +492,6 @@ - static void pad_down (int lines); - static void read_rest_of_line (COLUMN *p); - static void skip_read (COLUMN *p, int column_number); --static void print_char (char c); - static void cleanup (void); - static void print_sep_string (void); - static void separator_string (const char *optarg_S); -@@ -455,7 +506,7 @@ - we store the leftmost columns contiguously in buff. - To print a line from buff, get the index of the first character - from line_vector[i], and print up to line_vector[i + 1]. */ --static char *buff; -+static unsigned char *buff; - - /* Index of the position in buff where the next character - will be stored. */ -@@ -559,7 +610,7 @@ - static bool untabify_input = false; - - /* (-e) The input tab character. */ --static char input_tab_char = '\t'; -+static char input_tab_char[MB_LEN_MAX] = "\t"; - - /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... - where the leftmost column is 1. */ -@@ -569,7 +620,10 @@ - static bool tabify_output = false; - - /* (-i) The output tab character. */ --static char output_tab_char = '\t'; -+static char output_tab_char[MB_LEN_MAX] = "\t"; ++#define AUTHORS proper_name ("David MacKenzie") + -+/* (-i) The byte length of output tab character. */ -+static int output_tab_char_length = 1; - - /* (-i) The width of the output tab. */ - static int chars_per_output_tab = 8; -@@ -643,7 +697,13 @@ - static bool numbered_lines = false; - - /* (-n) Character which follows each line number. */ --static char number_separator = '\t'; -+static char number_separator[MB_LEN_MAX] = "\t"; ++/* If true, convert blanks even after nonblank characters have been ++ read on the line. */ ++static bool convert_entire_line; + -+/* (-n) The byte length of the character which follows each line number. */ -+static int number_separator_length = 1; ++/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */ ++static size_t tab_size; + -+/* (-n) The character width of the character which follows each line number. */ -+static int number_separator_width = 0; - - /* (-n) line counting starts with 1st line of input file (not with 1st - line of 1st page printed). */ -@@ -696,6 +756,7 @@ - -a|COLUMN|-m is a `space' and with the -J option a `tab'. */ - static char *col_sep_string = (char *) ""; - static int col_sep_length = 0; -+static int col_sep_width = 0; - static char *column_separator = (char *) " "; - static char *line_separator = (char *) "\t"; - -@@ -852,6 +913,13 @@ - col_sep_length = (int) strlen (optarg_S); - col_sep_string = xmalloc (col_sep_length + 1); - strcpy (col_sep_string, optarg_S); ++/* The maximum distance between tab stops. */ ++static size_t max_column_width; + -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ col_sep_width = mbswidth (col_sep_string, 0); -+ else -+#endif -+ col_sep_width = col_sep_length; - } - - int -@@ -877,6 +945,21 @@ - - atexit (close_stdout); - -+/* Define which functions are used, the ones for single byte locale or the ones -+ for multibyte locale. */ -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ print_char = print_char_multi; -+ char_to_clump = char_to_clump_multi; -+ } -+ else -+#endif -+ { -+ print_char = print_char_single; -+ char_to_clump = char_to_clump_single; -+ } ++/* Array of the explicit column numbers of the tab stops; ++ after `tab_list' is exhausted, the rest of the line is printed ++ unchanged. The first column is column 0. */ ++static uintmax_t *tab_list; + - n_files = 0; - file_names = (argc > 1 - ? xmalloc ((argc - 1) * sizeof (char *)) -@@ -949,8 +1032,12 @@ - break; - case 'e': - if (optarg) -- getoptarg (optarg, 'e', &input_tab_char, -- &chars_per_input_tab); -+ { -+ int dummy_length, dummy_width; ++/* The number of allocated entries in `tab_list'. */ ++static size_t n_tabs_allocated; + -+ getoptarg (optarg, 'e', input_tab_char, &dummy_length, -+ &dummy_width, &chars_per_input_tab); -+ } - /* Could check tab width > 0. */ - untabify_input = true; - break; -@@ -963,8 +1050,12 @@ - break; - case 'i': - if (optarg) -- getoptarg (optarg, 'i', &output_tab_char, -- &chars_per_output_tab); -+ { -+ int dummy_width; ++/* The index of the first invalid element of `tab_list', ++ where the next element can be added. */ ++static size_t first_free_tab; + -+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length, -+ &dummy_width, &chars_per_output_tab); -+ } - /* Could check tab width > 0. */ - tabify_output = true; - break; -@@ -991,8 +1082,8 @@ - case 'n': - numbered_lines = true; - if (optarg) -- getoptarg (optarg, 'n', &number_separator, -- &chars_per_number); -+ getoptarg (optarg, 'n', number_separator, &number_separator_length, -+ &number_separator_width, &chars_per_number); - break; - case 'N': - skip_count = false; -@@ -1031,7 +1122,7 @@ - old_s = false; - /* Reset an additional input of -s, -S dominates -s */ - col_sep_string = bad_cast (""); -- col_sep_length = 0; -+ col_sep_length = col_sep_width = 0; - use_col_separator = true; - if (optarg) - separator_string (optarg); -@@ -1188,10 +1279,45 @@ - a number. */ - - static void --getoptarg (char *arg, char switch_char, char *character, int *number) -+getoptarg (char *arg, char switch_char, char *character, int *character_length, -+ int *character_width, int *number) - { - if (!ISDIGIT (*arg)) -- *character = *arg++; -+ { -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) /* for multibyte locale. */ -+ { -+ wchar_t wc; -+ size_t mblength; -+ int width; -+ mbstate_t state = {'\0'}; ++/* Null-terminated array of input filenames. */ ++static char **file_list; + -+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ *character_length = 1; -+ *character_width = 1; -+ } -+ else -+ { -+ *character_length = (mblength < 1) ? 1 : mblength; -+ width = wcwidth (wc); -+ *character_width = (width < 0) ? 0 : width; -+ } -+ -+ strncpy (character, arg, *character_length); -+ arg += *character_length; -+ } -+ else /* for single byte locale. */ -+#endif -+ { -+ *character = *arg++; -+ *character_length = 1; -+ *character_width = 1; -+ } -+ } -+ - if (*arg) - { - long int tmp_long; -@@ -1256,7 +1382,7 @@ - else - col_sep_string = column_separator; - -- col_sep_length = 1; -+ col_sep_length = col_sep_width = 1; - use_col_separator = true; - } - /* It's rather pointless to define a TAB separator with column -@@ -1288,11 +1414,11 @@ - TAB_WIDTH (chars_per_input_tab, chars_per_number); */ - - /* Estimate chars_per_text without any margin and keep it constant. */ -- if (number_separator == '\t') -+ if (number_separator[0] == '\t') - number_width = chars_per_number + - TAB_WIDTH (chars_per_default_tab, chars_per_number); - else -- number_width = chars_per_number + 1; -+ number_width = chars_per_number + number_separator_width; - - /* The number is part of the column width unless we are - printing files in parallel. */ -@@ -1307,7 +1433,7 @@ - } - - chars_per_column = (chars_per_line - chars_used_by_number - -- (columns - 1) * col_sep_length) / columns; -+ (columns - 1) * col_sep_width) / columns; - - if (chars_per_column < 1) - error (EXIT_FAILURE, 0, _("page width too narrow")); -@@ -1432,7 +1558,7 @@ - - /* Enlarge p->start_position of first column to use the same form of - padding_not_printed with all columns. */ -- h = h + col_sep_length; -+ h = h + col_sep_width; - - /* This loop takes care of all but the rightmost column. */ - -@@ -1466,7 +1592,7 @@ - } - else - { -- h = h_next + col_sep_length; -+ h = h_next + col_sep_width; - h_next = h + chars_per_column; - } - } -@@ -1756,9 +1882,9 @@ - align_column (COLUMN *p) - { - padding_not_printed = p->start_position; -- if (padding_not_printed - col_sep_length > 0) -+ if (padding_not_printed - col_sep_width > 0) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2029,13 +2155,13 @@ - /* May be too generous. */ - buff = X2REALLOC (buff, &buff_allocated); - } -- buff[buff_current++] = c; -+ buff[buff_current++] = (unsigned char) c; - } - - static void - add_line_number (COLUMN *p) - { -- int i; -+ int i, j; - char *s; - int left_cut; - -@@ -2058,22 +2184,24 @@ - /* Tabification is assumed for multiple columns, also for n-separators, - but `default n-separator = TAB' hasn't been given priority over - equal column_width also specified by POSIX. */ -- if (number_separator == '\t') -+ if (number_separator[0] == '\t') - { - i = number_width - chars_per_number; - while (i-- > 0) - (p->char_func) (' '); - } - else -- (p->char_func) (number_separator); -+ for (j = 0; j < number_separator_length; j++) -+ (p->char_func) (number_separator[j]); - } - else - /* To comply with POSIX, we avoid any expansion of default TAB - separator with a single column output. No column_width requirement - has to be considered. */ - { -- (p->char_func) (number_separator); -- if (number_separator == '\t') -+ for (j = 0; j < number_separator_length; j++) -+ (p->char_func) (number_separator[j]); -+ if (number_separator[0] == '\t') - output_position = POS_AFTER_TAB (chars_per_output_tab, - output_position); - } -@@ -2234,7 +2362,7 @@ - while (goal - h_old > 1 - && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) - { -- putchar (output_tab_char); -+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout); - h_old = h_new; - } - while (++h_old <= goal) -@@ -2254,6 +2382,7 @@ - { - char *s; - int l = col_sep_length; -+ int not_space_flag; - - s = col_sep_string; - -@@ -2267,6 +2396,7 @@ - { - for (; separators_not_printed > 0; --separators_not_printed) - { -+ not_space_flag = 0; - while (l-- > 0) - { - /* 3 types of sep_strings: spaces only, spaces and chars, -@@ -2280,12 +2410,15 @@ - } - else - { -+ not_space_flag = 1; - if (spaces_not_printed > 0) - print_white_space (); - putchar (*s++); -- ++output_position; - } - } -+ if (not_space_flag) -+ output_position += col_sep_width; -+ - /* sep_string ends with some spaces */ - if (spaces_not_printed > 0) - print_white_space (); -@@ -2313,7 +2446,7 @@ - required number of tabs and spaces. */ - - static void --print_char (char c) -+print_char_single (char c) - { - if (tabify_output) - { -@@ -2337,6 +2470,74 @@ - putchar (c); - } - -+#ifdef HAVE_MBRTOWC -+static void -+print_char_multi (char c) ++/* Default for `file_list' if no files are given on the command line. */ ++static char *stdin_argv[] = +{ -+ static size_t mbc_pos = 0; -+ static char mbc[MB_LEN_MAX] = {'\0'}; -+ static mbstate_t state = {'\0'}; -+ mbstate_t state_bak; -+ wchar_t wc; -+ size_t mblength; -+ int width; ++ (char *) "-", NULL ++}; + -+ if (tabify_output) ++/* True if we have ever read standard input. */ ++static bool have_read_stdin; ++ ++/* The desired exit status. */ ++static int exit_status; ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1 ++}; ++ ++static struct option const longopts[] = ++{ ++ {"tabs", required_argument, NULL, 't'}, ++ {"all", no_argument, NULL, 'a'}, ++ {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else + { -+ state_bak = state; -+ mbc[mbc_pos++] = c; -+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); -+ -+ while (mbc_pos > 0) -+ { -+ switch (mblength) -+ { -+ case (size_t)-2: -+ state = state_bak; -+ return; -+ -+ case (size_t)-1: -+ state = state_bak; -+ ++output_position; -+ putchar (mbc[0]); -+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1); -+ --mbc_pos; -+ break; -+ -+ case 0: -+ mblength = 1; -+ -+ default: -+ if (wc == L' ') -+ { -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ --mbc_pos; -+ ++spaces_not_printed; -+ return; -+ } -+ else if (spaces_not_printed > 0) -+ print_white_space (); -+ -+ /* Nonprintables are assumed to have width 0, except L'\b'. */ -+ if ((width = wcwidth (wc)) < 1) -+ { -+ if (wc == L'\b') -+ --output_position; -+ } -+ else -+ output_position += width; -+ -+ fwrite (mbc, sizeof(char), mblength, stdout); -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ mbc_pos -= mblength; -+ } -+ } -+ return; ++ printf (_("\ ++Usage: %s [OPTION]... [FILE]...\n\ ++"), ++ program_name); ++ fputs (_("\ ++Convert blanks in each FILE to tabs, writing to standard output.\n\ ++With no FILE, or when FILE is -, read standard input.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ -a, --all convert all blanks, instead of just initial blanks\n\ ++ --first-only convert only leading sequences of blanks (overrides -a)\n\ ++ -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\ ++ -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ emit_ancillary_info (); + } -+ putchar (c); ++ exit (status); +} -+#endif + - /* Skip to page PAGE before printing. - PAGE may be larger than total number of pages. */ - -@@ -2517,9 +2718,9 @@ - align_empty_cols = false; - } - -- if (padding_not_printed - col_sep_length > 0) -+ if (padding_not_printed - col_sep_width > 0) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2620,9 +2821,9 @@ - } - } - -- if (padding_not_printed - col_sep_length > 0) -+ if (padding_not_printed - col_sep_width > 0) - { -- pad_across_to (padding_not_printed - col_sep_length); -+ pad_across_to (padding_not_printed - col_sep_width); - padding_not_printed = ANYWHERE; - } - -@@ -2635,8 +2836,8 @@ - if (spaces_not_printed == 0) - { - output_position = p->start_position + end_vector[line]; -- if (p->start_position - col_sep_length == chars_per_margin) -- output_position -= col_sep_length; -+ if (p->start_position - col_sep_width == chars_per_margin) -+ output_position -= col_sep_width; - } - - return true; -@@ -2655,7 +2856,7 @@ - number of characters is 1.) */ - - static int --char_to_clump (char c) -+char_to_clump_single (char c) - { - unsigned char uc = c; - char *s = clump_buff; -@@ -2665,10 +2866,10 @@ - int chars; - int chars_per_c = 8; - -- if (c == input_tab_char) -+ if (c == input_tab_char[0]) - chars_per_c = chars_per_input_tab; - -- if (c == input_tab_char || c == '\t') -+ if (c == input_tab_char[0] || c == '\t') - { - width = TAB_WIDTH (chars_per_c, input_position); - -@@ -2739,6 +2940,154 @@ - return chars; - } - -+#ifdef HAVE_MBRTOWC -+static int -+char_to_clump_multi (char c) ++/* Add tab stop TABVAL to the end of `tab_list'. */ ++ ++static void ++add_tab_stop (uintmax_t tabval) +{ -+ static size_t mbc_pos = 0; -+ static char mbc[MB_LEN_MAX] = {'\0'}; -+ static mbstate_t state = {'\0'}; -+ mbstate_t state_bak; -+ wchar_t wc; -+ size_t mblength; -+ int wc_width; -+ register char *s = clump_buff; -+ register int i, j; -+ char esc_buff[4]; -+ int width; -+ int chars; -+ int chars_per_c = 8; ++ uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0; ++ uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0; + -+ state_bak = state; -+ mbc[mbc_pos++] = c; -+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); ++ if (first_free_tab == n_tabs_allocated) ++ tab_list = X2NREALLOC (tab_list, &n_tabs_allocated); ++ tab_list[first_free_tab++] = tabval; + -+ width = 0; -+ chars = 0; -+ while (mbc_pos > 0) ++ if (max_column_width < column_width) + { -+ switch (mblength) ++ if (SIZE_MAX < column_width) ++ error (EXIT_FAILURE, 0, _("tabs are too far apart")); ++ max_column_width = column_width; ++ } ++} ++ ++/* Add the comma or blank separated list of tab stops STOPS ++ to the list of tab stops. */ ++ ++static void ++parse_tab_stops (char const *stops) ++{ ++ bool have_tabval = false; ++ uintmax_t tabval IF_LINT (= 0); ++ char const *num_start IF_LINT (= NULL); ++ bool ok = true; ++ ++ for (; *stops; stops++) ++ { ++ if (*stops == ',' || isblank (to_uchar (*stops))) + { -+ case (size_t)-2: -+ state = state_bak; -+ return 0; -+ -+ case (size_t)-1: -+ state = state_bak; -+ mblength = 1; -+ -+ if (use_esc_sequence || use_cntrl_prefix) ++ if (have_tabval) ++ add_tab_stop (tabval); ++ have_tabval = false; ++ } ++ else if (ISDIGIT (*stops)) ++ { ++ if (!have_tabval) + { -+ width = +4; -+ chars = +4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", mbc[0]); -+ for (i = 0; i <= 2; ++i) -+ *s++ = (int) esc_buff[i]; ++ tabval = 0; ++ have_tabval = true; ++ num_start = stops; + } -+ else ++ ++ /* Detect overflow. */ ++ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) + { -+ width += 1; -+ chars += 1; -+ *s++ = mbc[0]; ++ size_t len = strspn (num_start, "0123456789"); ++ char *bad_num = xstrndup (num_start, len); ++ error (0, 0, _("tab stop is too large %s"), quote (bad_num)); ++ free (bad_num); ++ ok = false; ++ stops = num_start + len - 1; + } ++ } ++ else ++ { ++ error (0, 0, _("tab size contains invalid character(s): %s"), ++ quote (stops)); ++ ok = false; + break; ++ } ++ } + -+ case 0: -+ mblength = 1; -+ /* Fall through */ ++ if (!ok) ++ exit (EXIT_FAILURE); + -+ default: -+ if (memcmp (mbc, input_tab_char, mblength) == 0) -+ chars_per_c = chars_per_input_tab; ++ if (have_tabval) ++ add_tab_stop (tabval); ++} + -+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t') ++/* Check that the list of tab stops TABS, with ENTRIES entries, ++ contains only nonzero, ascending values. */ ++ ++static void ++validate_tab_stops (uintmax_t const *tabs, size_t entries) ++{ ++ uintmax_t prev_tab = 0; ++ size_t i; ++ ++ for (i = 0; i < entries; i++) ++ { ++ if (tabs[i] == 0) ++ error (EXIT_FAILURE, 0, _("tab size cannot be 0")); ++ if (tabs[i] <= prev_tab) ++ error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); ++ prev_tab = tabs[i]; ++ } ++} ++ ++/* Close the old stream pointer FP if it is non-NULL, ++ and return a new one opened to read the next input file. ++ Open a filename of `-' as the standard input. ++ Return NULL if there are no more input files. */ ++ ++static FILE * ++next_file (FILE *fp) ++{ ++ static char *prev_file; ++ char *file; ++ ++ if (fp) ++ { ++ if (ferror (fp)) ++ { ++ error (0, errno, "%s", prev_file); ++ exit_status = EXIT_FAILURE; ++ } ++ if (STREQ (prev_file, "-")) ++ clearerr (fp); /* Also clear EOF. */ ++ else if (fclose (fp) != 0) ++ { ++ error (0, errno, "%s", prev_file); ++ exit_status = EXIT_FAILURE; ++ } ++ } ++ ++ while ((file = *file_list++) != NULL) ++ { ++ if (STREQ (file, "-")) ++ { ++ have_read_stdin = true; ++ prev_file = file; ++ return stdin; ++ } ++ fp = fopen (file, "r"); ++ if (fp) ++ { ++ prev_file = file; ++ return fp; ++ } ++ error (0, errno, "%s", file); ++ exit_status = EXIT_FAILURE; ++ } ++ return NULL; ++} ++ ++/* Change blanks to tabs, writing to stdout. ++ Read each file in `file_list', in order. */ ++ ++static void ++unexpand (void) ++{ ++ /* Input stream. */ ++ FILE *fp = next_file (NULL); ++ ++ /* The array of pending blanks. In non-POSIX locales, blanks can ++ include characters other than spaces, so the blanks must be ++ stored, not merely counted. */ ++ char *pending_blank; ++ ++ if (!fp) ++ return; ++ ++ /* The worst case is a non-blank character, then one blank, then a ++ tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so ++ allocate MAX_COLUMN_WIDTH bytes to store the blanks. */ ++ pending_blank = xmalloc (max_column_width); ++ ++ for (;;) ++ { ++ /* Input character, or EOF. */ ++ int c; ++ ++ /* If true, perform translations. */ ++ bool convert = true; ++ ++ ++ /* The following variables have valid values only when CONVERT ++ is true: */ ++ ++ /* Column of next input character. */ ++ uintmax_t column = 0; ++ ++ /* Column the next input tab stop is on. */ ++ uintmax_t next_tab_column = 0; ++ ++ /* Index in TAB_LIST of next tab stop to examine. */ ++ size_t tab_index = 0; ++ ++ /* If true, the first pending blank came just before a tab stop. */ ++ bool one_blank_before_tab_stop = false; ++ ++ /* If true, the previous input character was a blank. This is ++ initially true, since initial strings of blanks are treated ++ as if the line was preceded by a blank. */ ++ bool prev_blank = true; ++ ++ /* Number of pending columns of blanks. */ ++ size_t pending = 0; ++ ++ ++ /* Convert a line of text. */ ++ ++ do ++ { ++ while ((c = getc (fp)) < 0 && (fp = next_file (fp))) ++ continue; ++ ++ if (convert) + { -+ int width_inc; ++ bool blank = !! isblank (c); + -+ width_inc = TAB_WIDTH (chars_per_c, input_position); -+ width += width_inc; ++ if (blank) ++ { ++ if (next_tab_column <= column) ++ { ++ if (tab_size) ++ next_tab_column = ++ column + (tab_size - column % tab_size); ++ else ++ for (;;) ++ if (tab_index == first_free_tab) ++ { ++ convert = false; ++ break; ++ } ++ else ++ { ++ uintmax_t tab = tab_list[tab_index++]; ++ if (column < tab) ++ { ++ next_tab_column = tab; ++ break; ++ } ++ } ++ } + -+ if (untabify_input) -+ { -+ for (i = width_inc; i; --i) -+ *s++ = ' '; -+ chars += width_inc; -+ } -+ else -+ { -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; -+ chars += mblength; -+ } -+ } -+ else if ((wc_width = wcwidth (wc)) < 1) -+ { -+ if (use_esc_sequence) -+ { -+ for (i = 0; i < mblength; i++) ++ if (convert) + { -+ width += 4; -+ chars += 4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", c); -+ for (j = 0; j <= 2; ++j) -+ *s++ = (int) esc_buff[j]; -+ } -+ } -+ else if (use_cntrl_prefix) -+ { -+ if (wc < 0200) -+ { -+ width += 2; -+ chars += 2; -+ *s++ = '^'; -+ *s++ = wc ^ 0100; -+ } -+ else -+ { -+ for (i = 0; i < mblength; i++) ++ if (next_tab_column < column) ++ error (EXIT_FAILURE, 0, _("input line is too long")); ++ ++ if (c == '\t') + { -+ width += 4; -+ chars += 4; -+ *s++ = '\\'; -+ sprintf (esc_buff, "%03o", c); -+ for (j = 0; j <= 2; ++j) -+ *s++ = (int) esc_buff[j]; ++ column = next_tab_column; ++ ++ /* Discard pending blanks, unless it was a single ++ blank just before the previous tab stop. */ ++ if (! (pending == 1 && one_blank_before_tab_stop)) ++ { ++ pending = 0; ++ one_blank_before_tab_stop = false; ++ } ++ } ++ else ++ { ++ column++; ++ ++ if (! (prev_blank && column == next_tab_column)) ++ { ++ /* It is not yet known whether the pending blanks ++ will be replaced by tabs. */ ++ if (column == next_tab_column) ++ one_blank_before_tab_stop = true; ++ pending_blank[pending++] = c; ++ prev_blank = true; ++ continue; ++ } ++ ++ /* Replace the pending blanks by a tab or two. */ ++ pending_blank[0] = c = '\t'; ++ pending = one_blank_before_tab_stop; + } + } + } -+ else if (wc == L'\b') ++ else if (c == '\b') + { -+ width += -1; -+ chars += 1; -+ *s++ = c; ++ /* Go back one column, and force recalculation of the ++ next tab stop. */ ++ column -= !!column; ++ next_tab_column = column; ++ tab_index -= !!tab_index; + } + else + { -+ width += 0; -+ chars += mblength; -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; ++ column++; ++ if (!column) ++ error (EXIT_FAILURE, 0, _("input line is too long")); + } ++ ++ if (pending) ++ { ++ if (fwrite (pending_blank, 1, pending, stdout) != pending) ++ error (EXIT_FAILURE, errno, _("write error")); ++ pending = 0; ++ one_blank_before_tab_stop = false; ++ } ++ ++ prev_blank = blank; ++ convert &= convert_entire_line || blank; + } -+ else ++ ++ if (c < 0) + { -+ width += wc_width; -+ chars += mblength; -+ for (i = 0; i < mblength; i++) -+ *s++ = mbc[i]; ++ free (pending_blank); ++ return; + } ++ ++ if (putchar (c) < 0) ++ error (EXIT_FAILURE, errno, _("write error")); ++ } ++ while (c != '\n'); ++ } ++} ++ ++int ++main (int argc, char **argv) ++{ ++ bool have_tabval = false; ++ uintmax_t tabval IF_LINT (= 0); ++ int c; ++ ++ /* If true, cancel the effect of any -a (explicit or implicit in -t), ++ so that only leading blanks will be considered. */ ++ bool convert_first_only = false; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdout); ++ ++ have_read_stdin = false; ++ exit_status = EXIT_SUCCESS; ++ convert_entire_line = false; ++ tab_list = NULL; ++ first_free_tab = 0; ++ ++ while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL)) ++ != -1) ++ { ++ switch (c) ++ { ++ case '?': ++ usage (EXIT_FAILURE); ++ case 'a': ++ convert_entire_line = true; ++ break; ++ case 't': ++ convert_entire_line = true; ++ parse_tab_stops (optarg); ++ break; ++ case CONVERT_FIRST_ONLY_OPTION: ++ convert_first_only = true; ++ break; ++ case ',': ++ if (have_tabval) ++ add_tab_stop (tabval); ++ have_tabval = false; ++ break; ++ case_GETOPT_HELP_CHAR; ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ default: ++ if (!have_tabval) ++ { ++ tabval = 0; ++ have_tabval = true; ++ } ++ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t)) ++ error (EXIT_FAILURE, 0, _("tab stop value is too large")); ++ break; + } -+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength); -+ mbc_pos -= mblength; + } + -+ input_position += width; -+ return chars; -+} -+#endif ++ if (convert_first_only) ++ convert_entire_line = false; + - /* We've just printed some files and need to clean up things before - looking for more options and printing the next batch of files. - ---- coreutils-6.8+/src/cut.c.i18n 2007-01-14 15:41:28.000000000 +0000 -+++ coreutils-6.8+/src/cut.c 2007-03-01 15:08:24.000000000 +0000 -@@ -29,6 +29,11 @@ - #include ++ if (have_tabval) ++ add_tab_stop (tabval); ++ ++ validate_tab_stops (tab_list, first_free_tab); ++ ++ if (first_free_tab == 0) ++ tab_size = max_column_width = 8; ++ else if (first_free_tab == 1) ++ tab_size = tab_list[0]; ++ else ++ tab_size = 0; ++ ++ file_list = (optind < argc ? &argv[optind] : stdin_argv); ++ ++ unexpand (); ++ ++ if (have_read_stdin && fclose (stdin) != 0) ++ error (EXIT_FAILURE, errno, "-"); ++ ++ exit (exit_status); ++} +diff -urNp coreutils-8.0-orig/src/uniq.c coreutils-8.0/src/uniq.c +--- coreutils-8.0-orig/src/uniq.c 2009-09-23 10:25:44.000000000 +0200 ++++ coreutils-8.0/src/uniq.c 2009-10-07 10:07:16.000000000 +0200 +@@ -22,6 +22,16 @@ #include #include -+ -+/* Get mbstate_t, mbrtowc(). */ + ++/* Get mbstate_t, mbrtowc(). */ +#if HAVE_WCHAR_H +# include +#endif ++ ++/* Get isw* functions. */ ++#if HAVE_WCTYPE_H ++# include ++#endif ++ #include "system.h" - - #include "error.h" -@@ -37,6 +42,18 @@ + #include "argmatch.h" + #include "linebuffer.h" +@@ -31,7 +41,19 @@ #include "quote.h" - #include "xstrndup.h" - + #include "xmemcoll.h" + #include "xstrtol.h" +-#include "memcasecmp.h" ++#include "xmemcoll.h" ++ +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ ++ installation; work around this configuration error. */ +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# undef MB_LEN_MAX +# define MB_LEN_MAX 16 +#endif + @@ -3459,581 +13762,1660 @@ diff -urNp coreutils-6.11-orig/src/join.c coreutils-6.11/src/join.c +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) +#endif + + /* The official name of this program (e.g., no `g' prefix). */ - #define PROGRAM_NAME "cut" + #define PROGRAM_NAME "uniq" +@@ -107,6 +129,10 @@ static enum delimit_method const delimit + /* Select whether/how to delimit groups of duplicate lines. */ + static enum delimit_method delimit_groups; -@@ -67,6 +84,52 @@ - } \ - while (0) - -+/* Refill the buffer BUF to get a multibyte character. */ -+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \ -+ do \ -+ { \ -+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \ -+ { \ -+ memmove (BUF, BUFPOS, BUFLEN); \ -+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \ -+ BUFPOS = BUF; \ -+ } \ -+ } \ -+ while (0) ++/* Function pointers. */ ++static char * ++(*find_field) (struct linebuffer *line); + -+/* Get wide character on BUFPOS. BUFPOS is not included after that. -+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */ -+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \ + static struct option const longopts[] = + { + {"count", no_argument, NULL, 'c'}, +@@ -206,7 +232,7 @@ size_opt (char const *opt, char const *m + return a pointer to the beginning of the line's field to be compared. */ + + static char * +-find_field (struct linebuffer const *line) ++find_field_uni (struct linebuffer *line) + { + size_t count; + char const *lp = line->buffer; +@@ -227,6 +253,83 @@ find_field (struct linebuffer const *lin + return line->buffer + i; + } + ++#if HAVE_MBRTOWC ++ ++# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ + do \ + { \ + mbstate_t state_bak; \ + \ -+ if (BUFLEN < 1) \ -+ { \ -+ WC = WEOF; \ -+ break; \ -+ } \ -+ \ -+ /* Get a wide character. */ \ + CONVFAIL = 0; \ -+ state_bak = STATE; \ -+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \ ++ state_bak = *STATEP; \ ++ \ ++ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ + \ + switch (MBLENGTH) \ + { \ -+ case (size_t)-1: \ + case (size_t)-2: \ ++ case (size_t)-1: \ ++ *STATEP = state_bak; \ + CONVFAIL++; \ -+ STATE = state_bak; \ -+ /* Fall througn. */ \ -+ \ ++ /* Fall through */ \ + case 0: \ + MBLENGTH = 1; \ -+ break; \ + } \ + } \ + while (0) + - struct range_pair - { - size_t lo; -@@ -85,7 +148,7 @@ - /* The number of bytes allocated for FIELD_1_BUFFER. */ - static size_t field_1_bufsize; - --/* The largest field or byte index used as an endpoint of a closed -+/* The largest byte, character or field index used as an endpoint of a closed - or degenerate range specification; this doesn't include the starting - index of right-open-ended ranges. For example, with either range spec - `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ -@@ -97,10 +160,11 @@ - - /* This is a bit vector. - In byte mode, which bytes to output. -+ In character mode, which characters to output. - In field mode, which DELIM-separated fields to output. -- Both bytes and fields are numbered starting with 1, -+ Bytes, characters and fields are numbered starting with 1, - so the zeroth bit of this array is unused. -- A field or byte K has been selected if -+ A byte, character or field K has been selected if - (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) - || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ - static unsigned char *printable_field; -@@ -109,9 +173,12 @@ - { - undefined_mode, - -- /* Output characters that are in the given bytes. */ -+ /* Output bytes that are at the given positions. */ - byte_mode, - -+ /* Output characters that are at the given positions. */ -+ character_mode, ++static char * ++find_field_multi (struct linebuffer *line) ++{ ++ size_t count; ++ char *lp = line->buffer; ++ size_t size = line->length - 1; ++ size_t pos; ++ size_t mblength; ++ wchar_t wc; ++ mbstate_t *statep; ++ int convfail; + - /* Output the given delimeter-separated fields. */ - field_mode - }; -@@ -121,6 +188,13 @@ - - static enum operating_mode operating_mode; - -+/* If nonzero, when in byte mode, don't split multibyte characters. */ -+static int byte_mode_character_aware; ++ pos = 0; ++ statep = &(line->state); + -+/* If nonzero, the function for single byte locale is work -+ if this program runs on multibyte locale. */ -+static int force_singlebyte_mode; -+ - /* If true do not output lines containing no delimeter characters. - Otherwise, all such lines are printed. This option is valid only - with field mode. */ -@@ -132,6 +206,9 @@ - - /* The delimeter character for field mode. */ - static unsigned char delim; -+#if HAVE_WCHAR_H -+static wchar_t wcdelim; -+#endif - - /* True if the --output-delimiter=STRING option was specified. */ - static bool output_delimiter_specified; -@@ -205,7 +282,7 @@ - -f, --fields=LIST select only these fields; also print any line\n\ - that contains no delimiter character, unless\n\ - the -s option is specified\n\ -- -n (ignored)\n\ -+ -n with -b: don't split multibyte characters\n\ - "), stdout); - fputs (_("\ - --complement complement the set of selected bytes, characters\n\ -@@ -362,7 +439,7 @@ - in_digits = false; - /* Starting a range. */ - if (dash_found) -- FATAL_ERROR (_("invalid byte or field list")); -+ FATAL_ERROR (_("invalid byte, character or field list")); - dash_found = true; - fieldstr++; - -@@ -387,14 +464,16 @@ - if (!rhs_specified) - { - /* `n-'. From `initial' to end of line. */ -- eol_range_start = initial; -+ if (eol_range_start == 0 || -+ (eol_range_start != 0 && eol_range_start > initial)) -+ eol_range_start = initial; - field_found = true; - } - else - { - /* `m-n' or `-n' (1-n). */ - if (value < initial) -- FATAL_ERROR (_("invalid decreasing range")); -+ FATAL_ERROR (_("invalid byte, character or field list")); - - /* Is there already a range going to end of line? */ - if (eol_range_start != 0) -@@ -467,6 +546,9 @@ - if (operating_mode == byte_mode) - error (0, 0, - _("byte offset %s is too large"), quote (bad_num)); -+ else if (operating_mode == character_mode) -+ error (0, 0, -+ _("character offset %s is too large"), quote (bad_num)); - else - error (0, 0, - _("field number %s is too large"), quote (bad_num)); -@@ -477,7 +559,7 @@ - fieldstr++; - } - else -- FATAL_ERROR (_("invalid byte or field list")); -+ FATAL_ERROR (_("invalid byte, character or field list")); - } - - max_range_endpoint = 0; -@@ -570,6 +652,63 @@ - } - } - -+#if HAVE_MBRTOWC -+/* This function is in use for the following case. -+ -+ 1. Read from the stream STREAM, printing to standard output any selected -+ characters. -+ -+ 2. Read from stream STREAM, printing to standard output any selected bytes, -+ without splitting multibyte characters. */ ++ /* skip fields. */ ++ for (count = 0; count < skip_fields && pos < size; count++) ++ { ++ while (pos < size) ++ { ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); + -+static void -+cut_characters_or_cut_bytes_no_split (FILE *stream) -+{ -+ int idx; /* number of bytes or characters in the line so far. */ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen; /* The length of the byte sequence in buf. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state; /* State of the stream. */ -+ int convfail; /* 1, when conversion is failed. Otherwise 0. */ -+ -+ idx = 0; -+ buflen = 0; -+ bufpos = buf; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ { -+ if (idx > 0) -+ putchar ('\n'); -+ break; -+ } -+ else if (wc == L'\n') -+ { -+ putchar ('\n'); -+ idx = 0; -+ } -+ else -+ { -+ idx += (operating_mode == byte_mode) ? mblength : 1; -+ if (print_kth (idx, NULL)) -+ fwrite (bufpos, mblength, sizeof(char), stdout); -+ } -+ -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+} -+#endif -+ - /* Read from stream STREAM, printing to standard output any selected fields. */ - - static void -@@ -692,13 +831,192 @@ - } - } - -+#if HAVE_MBRTOWC -+static void -+cut_fields_mb (FILE *stream) -+{ -+ int c; -+ unsigned int field_idx; -+ int found_any_selected_field; -+ int buffer_first_field; -+ int empty_input; -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ char *bufpos; /* Next read position of BUF. */ -+ size_t buflen; /* The length of the byte sequence in buf. */ -+ wint_t wc = 0; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state; /* State of the stream. */ -+ int convfail; /* 1, when conversion is failed. Otherwise 0. */ -+ -+ found_any_selected_field = 0; -+ field_idx = 1; -+ bufpos = buf; -+ buflen = 0; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ c = getc (stream); -+ empty_input = (c == EOF); -+ if (c != EOF) -+ ungetc (c, stream); -+ else -+ wc = WEOF; -+ -+ /* To support the semantics of the -s flag, we may have to buffer -+ all of the first field to determine whether it is `delimited.' -+ But that is unnecessary if all non-delimited lines must be printed -+ and the first field has been selected, or if non-delimited lines -+ must be suppressed and the first field has *not* been selected. -+ That is because a non-delimited line has exactly one field. */ -+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); -+ -+ while (1) -+ { -+ if (field_idx == 1 && buffer_first_field) -+ { -+ int len = 0; -+ -+ while (1) ++ if (convfail || !iswblank (wc)) + { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER -+ (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ break; -+ -+ field_1_buffer = xrealloc (field_1_buffer, len + mblength); -+ memcpy (field_1_buffer + len, bufpos, mblength); -+ len += mblength; -+ buflen -= mblength; -+ bufpos += mblength; -+ -+ if (!convfail && (wc == L'\n' || wc == wcdelim)) -+ break; ++ pos += mblength; ++ break; + } ++ pos += mblength; ++ } + -+ if (wc == WEOF) ++ while (pos < size) ++ { ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); ++ ++ if (!convfail && iswblank (wc)) + break; + -+ /* If the first field extends to the end of line (it is not -+ delimited) and we are printing all non-delimited lines, -+ print this one. */ -+ if (convfail || (!convfail && wc != wcdelim)) ++ pos += mblength; ++ } ++ } ++ ++ /* skip fields. */ ++ for (count = 0; count < skip_chars && pos < size; count++) ++ { ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); ++ pos += mblength; ++ } ++ ++ return lp + pos; ++} ++#endif ++ + /* Return false if two strings OLD and NEW match, true if not. + OLD and NEW point not to the beginnings of the lines + but rather to the beginnings of the fields to compare. +@@ -235,6 +338,8 @@ find_field (struct linebuffer const *lin + static bool + different (char *old, char *new, size_t oldlen, size_t newlen) + { ++ char *copy_old, *copy_new; ++ + if (check_chars < oldlen) + oldlen = check_chars; + if (check_chars < newlen) +@@ -242,14 +347,92 @@ different (char *old, char *new, size_t + + if (ignore_case) + { +- /* FIXME: This should invoke strcoll somehow. */ +- return oldlen != newlen || memcasecmp (old, new, oldlen); ++ size_t i; ++ ++ copy_old = alloca (oldlen + 1); ++ copy_new = alloca (oldlen + 1); ++ ++ for (i = 0; i < oldlen; i++) ++ { ++ copy_old[i] = toupper (old[i]); ++ copy_new[i] = toupper (new[i]); ++ } + } +- else if (hard_LC_COLLATE) +- return xmemcoll (old, oldlen, new, newlen) != 0; + else +- return oldlen != newlen || memcmp (old, new, oldlen); ++ { ++ copy_old = (char *)old; ++ copy_new = (char *)new; ++ } ++ ++ return xmemcoll (copy_old, oldlen, copy_new, newlen); ++} ++ ++#if HAVE_MBRTOWC ++static int ++different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate) ++{ ++ size_t i, j, chars; ++ const char *str[2]; ++ char *copy[2]; ++ size_t len[2]; ++ mbstate_t state[2]; ++ size_t mblength; ++ wchar_t wc, uwc; ++ mbstate_t state_bak; ++ ++ str[0] = old; ++ str[1] = new; ++ len[0] = oldlen; ++ len[1] = newlen; ++ state[0] = oldstate; ++ state[1] = newstate; ++ ++ for (i = 0; i < 2; i++) ++ { ++ copy[i] = alloca (len[i] + 1); ++ ++ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++) ++ { ++ state_bak = state[i]; ++ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i])); ++ ++ switch (mblength) + { -+ if (suppress_non_delimited) ++ case (size_t)-1: ++ case (size_t)-2: ++ state[i] = state_bak; ++ /* Fall through */ ++ case 0: ++ mblength = 1; ++ break; ++ ++ default: ++ if (ignore_case) + { -+ /* Empty. */ ++ uwc = towupper (wc); ++ ++ if (uwc != wc) ++ { ++ mbstate_t state_wc; ++ ++ memset (&state_wc, '\0', sizeof(mbstate_t)); ++ wcrtomb (copy[i] + j, uwc, &state_wc); ++ } ++ else ++ memcpy (copy[i] + j, str[i] + j, mblength); + } + else -+ { -+ fwrite (field_1_buffer, sizeof (char), len, stdout); -+ /* Make sure the output line is newline terminated. */ -+ if (convfail || (!convfail && wc != L'\n')) -+ putchar ('\n'); -+ } -+ continue; ++ memcpy (copy[i] + j, str[i] + j, mblength); + } -+ -+ if (print_kth (1, NULL)) -+ { -+ /* Print the field, but not the trailing delimiter. */ -+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout); -+ found_any_selected_field = 1; -+ } -+ ++field_idx; -+ } -+ -+ if (wc != WEOF) -+ { -+ if (print_kth (field_idx, NULL)) -+ { -+ if (found_any_selected_field) -+ { -+ fwrite (output_delimiter_string, sizeof (char), -+ output_delimiter_length, stdout); -+ } -+ found_any_selected_field = 1; -+ } -+ -+ while (1) -+ { -+ REFILL_BUFFER (buf, bufpos, buflen, stream); -+ -+ GET_NEXT_WC_FROM_BUFFER -+ (wc, bufpos, buflen, mblength, state, convfail); -+ -+ if (wc == WEOF) -+ break; -+ else if (!convfail && (wc == wcdelim || wc == L'\n')) -+ { -+ buflen -= mblength; -+ bufpos += mblength; -+ break; -+ } -+ -+ if (print_kth (field_idx, NULL)) -+ fwrite (bufpos, mblength, sizeof(char), stdout); -+ -+ buflen -= mblength; -+ bufpos += mblength; -+ } -+ } -+ -+ if ((!convfail || wc == L'\n') && buflen < 1) -+ wc = WEOF; -+ -+ if (!convfail && wc == wcdelim) -+ ++field_idx; -+ else if (wc == WEOF || (!convfail && wc == L'\n')) -+ { -+ if (found_any_selected_field -+ || (!empty_input && !(suppress_non_delimited && field_idx == 1))) -+ putchar ('\n'); -+ if (wc == WEOF) -+ break; -+ field_idx = 1; -+ found_any_selected_field = 0; ++ j += mblength; + } ++ copy[i][j] = '\0'; ++ len[i] = j; + } -+} -+#endif + - static void - cut_stream (FILE *stream) - { -- if (operating_mode == byte_mode) -- cut_bytes (stream); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) -+ { -+ switch (operating_mode) -+ { -+ case byte_mode: -+ if (byte_mode_character_aware) -+ cut_characters_or_cut_bytes_no_split (stream); -+ else -+ cut_bytes (stream); -+ break; -+ -+ case character_mode: -+ cut_characters_or_cut_bytes_no_split (stream); -+ break; -+ -+ case field_mode: -+ cut_fields_mb (stream); -+ break; -+ -+ default: -+ abort (); -+ } -+ } - else -- cut_fields (stream); -+#endif -+ { -+ if (operating_mode == field_mode) -+ cut_fields (stream); -+ else -+ cut_bytes (stream); -+ } ++ return xmemcoll (copy[0], len[0], copy[1], len[1]); } - - /* Process file FILE to standard output. -@@ -748,6 +1066,8 @@ - bool ok; - bool delim_specified = false; - char *spec_list_string IF_LINT(= NULL); -+ char mbdelim[MB_LEN_MAX + 1]; -+ size_t delimlen = 0; - - initialize_main (&argc, &argv); - set_program_name (argv[0]); -@@ -770,7 +1090,6 @@ - switch (optc) - { - case 'b': -- case 'c': - /* Build the byte list. */ - if (operating_mode != undefined_mode) - FATAL_ERROR (_("only one type of list may be specified")); -@@ -778,6 +1097,14 @@ - spec_list_string = optarg; - break; - -+ case 'c': -+ /* Build the character list. */ -+ if (operating_mode != undefined_mode) -+ FATAL_ERROR (_("only one type of list may be specified")); -+ operating_mode = character_mode; -+ spec_list_string = optarg; -+ break; -+ - case 'f': - /* Build the field list. */ - if (operating_mode != undefined_mode) -@@ -789,10 +1116,35 @@ - case 'd': - /* New delimiter. */ - /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ -- if (optarg[0] != '\0' && optarg[1] != '\0') -- FATAL_ERROR (_("the delimiter must be a single character")); -- delim = optarg[0]; -- delim_specified = true; -+ { -+#if HAVE_MBRTOWC -+ if(MB_CUR_MAX > 1) -+ { -+ mbstate_t state; -+ -+ memset (&state, '\0', sizeof(mbstate_t)); -+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state); -+ -+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2) -+ ++force_singlebyte_mode; -+ else -+ { -+ delimlen = (delimlen < 1) ? 1 : delimlen; -+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0') -+ FATAL_ERROR (_("the delimiter must be a single character")); -+ memcpy (mbdelim, optarg, delimlen); -+ } -+ } -+ -+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) +#endif -+ { -+ if (optarg[0] != '\0' && optarg[1] != '\0') -+ FATAL_ERROR (_("the delimiter must be a single character")); -+ delim = (unsigned char) optarg[0]; -+ } -+ delim_specified = true; -+ } - break; - case OUTPUT_DELIMITER_OPTION: -@@ -805,6 +1157,7 @@ - break; - - case 'n': -+ byte_mode_character_aware = 1; - break; - - case 's': -@@ -827,7 +1180,7 @@ - if (operating_mode == undefined_mode) - FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); - -- if (delim != '\0' && operating_mode != field_mode) -+ if (delim_specified && operating_mode != field_mode) - FATAL_ERROR (_("an input delimiter may be specified only\ - when operating on fields")); - -@@ -854,15 +1207,34 @@ - } - - if (!delim_specified) -- delim = '\t'; -+ { -+ delim = '\t'; -+#ifdef HAVE_MBRTOWC -+ wcdelim = L'\t'; -+ mbdelim[0] = '\t'; -+ mbdelim[1] = '\0'; -+ delimlen = 1; -+#endif -+ } - - if (output_delimiter_string == NULL) + /* Output the line in linebuffer LINE to standard output + provided that the switches say it should be output. +@@ -303,15 +486,43 @@ check_file (const char *infile, const ch { -- static char dummy[2]; -- dummy[0] = delim; -- dummy[1] = '\0'; -- output_delimiter_string = dummy; -- output_delimiter_length = 1; -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode) -+ { -+ output_delimiter_string = xstrdup(mbdelim); -+ output_delimiter_length = delimlen; -+ } + char *prevfield IF_LINT (= NULL); + size_t prevlen IF_LINT (= 0); ++#if HAVE_MBRTOWC ++ mbstate_t prevstate; + -+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode) ++ memset (&prevstate, '\0', sizeof (mbstate_t)); +#endif -+ { -+ static char dummy[2]; -+ dummy[0] = delim; -+ dummy[1] = '\0'; -+ output_delimiter_string = dummy; -+ output_delimiter_length = 1; -+ } - } - if (optind == argc) -diff -urNp coreutils-6.12/src/join.c coreutils-6.12-orig/src/join.c ---- coreutils-6.12/src/join.c 2008-07-16 14:08:01.000000000 +0200 -+++ coreutils-6.12-orig/src/join.c 2008-07-16 14:07:02.000000000 +0200 -@@ -634,6 +634,11 @@ get_line (FILE *fp, struct line *line, i - return false; - } + while (!feof (stdin)) + { + char *thisfield; + size_t thislen; ++#if HAVE_MBRTOWC ++ mbstate_t thisstate; ++#endif ++ + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) + break; + thisfield = find_field (thisline); + thislen = thisline->length - 1 - (thisfield - thisline->buffer); ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ thisstate = thisline->state; ++ ++ if (prevline->length == 0 || different_multi ++ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate)) ++ { ++ fwrite (thisline->buffer, sizeof (char), ++ thisline->length, stdout); ++ ++ SWAP_LINES (prevline, thisline); ++ prevfield = thisfield; ++ prevlen = thislen; ++ prevstate = thisstate; ++ } ++ } ++ else ++#endif + if (prevline->length == 0 + || different (thisfield, prevfield, thislen, prevlen)) + { +@@ -330,17 +541,26 @@ check_file (const char *infile, const ch + size_t prevlen; + uintmax_t match_count = 0; + bool first_delimiter = true; ++#if HAVE_MBRTOWC ++ mbstate_t prevstate; ++#endif + + if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) + goto closefiles; + prevfield = find_field (prevline); + prevlen = prevline->length - 1 - (prevfield - prevline->buffer); ++#if HAVE_MBRTOWC ++ prevstate = prevline->state; ++#endif + + while (!feof (stdin)) + { + bool match; + char *thisfield; + size_t thislen; ++#if HAVE_MBRTOWC ++ mbstate_t thisstate; ++#endif + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) + { + if (ferror (stdin)) +@@ -349,6 +569,15 @@ check_file (const char *infile, const ch + } + thisfield = find_field (thisline); + thislen = thisline->length - 1 - (thisfield - thisline->buffer); ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ thisstate = thisline->state; ++ match = !different_multi (thisfield, prevfield, ++ thislen, prevlen, thisstate, prevstate); ++ } ++ else ++#endif + match = !different (thisfield, prevfield, thislen, prevlen); + match_count += match; + +@@ -381,6 +610,9 @@ check_file (const char *infile, const ch + SWAP_LINES (prevline, thisline); + prevfield = thisfield; + prevlen = thislen; ++#if HAVE_MBRTOWC ++ prevstate = thisstate; ++#endif + if (!match) + match_count = 0; + } +@@ -426,6 +658,19 @@ main (int argc, char **argv) + + atexit (close_stdout); +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) -+ xfields_multibyte (line); ++ { ++ find_field = find_field_multi; ++ } + else +#endif - xfields (line); ++ { ++ find_field = find_field_uni; ++ } ++ ++ ++ + skip_chars = 0; + skip_fields = 0; + check_chars = SIZE_MAX; +diff -urNp coreutils-8.0-orig/src/uniq.c.orig coreutils-8.0/src/uniq.c.orig +--- coreutils-8.0-orig/src/uniq.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/uniq.c.orig 2009-09-23 10:25:44.000000000 +0200 +@@ -0,0 +1,565 @@ ++/* uniq -- remove duplicate lines from a sorted file ++ Copyright (C) 86, 91, 1995-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Written by Richard M. Stallman and David MacKenzie. */ ++ ++#include ++ ++#include ++#include ++#include ++ ++#include "system.h" ++#include "argmatch.h" ++#include "linebuffer.h" ++#include "error.h" ++#include "hard-locale.h" ++#include "posixver.h" ++#include "quote.h" ++#include "xmemcoll.h" ++#include "xstrtol.h" ++#include "memcasecmp.h" ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "uniq" ++ ++#define AUTHORS \ ++ proper_name ("Richard M. Stallman"), \ ++ proper_name ("David MacKenzie") ++ ++#define SWAP_LINES(A, B) \ ++ do \ ++ { \ ++ struct linebuffer *_tmp; \ ++ _tmp = (A); \ ++ (A) = (B); \ ++ (B) = _tmp; \ ++ } \ ++ while (0) ++ ++/* True if the LC_COLLATE locale is hard. */ ++static bool hard_LC_COLLATE; ++ ++/* Number of fields to skip on each line when doing comparisons. */ ++static size_t skip_fields; ++ ++/* Number of chars to skip after skipping any fields. */ ++static size_t skip_chars; ++ ++/* Number of chars to compare. */ ++static size_t check_chars; ++ ++enum countmode ++{ ++ count_occurrences, /* -c Print count before output lines. */ ++ count_none /* Default. Do not print counts. */ ++}; ++ ++/* Whether and how to precede the output lines with a count of the number of ++ times they occurred in the input. */ ++static enum countmode countmode; ++ ++/* Which lines to output: unique lines, the first of a group of ++ repeated lines, and the second and subsequented of a group of ++ repeated lines. */ ++static bool output_unique; ++static bool output_first_repeated; ++static bool output_later_repeated; ++ ++/* If true, ignore case when comparing. */ ++static bool ignore_case; ++ ++enum delimit_method ++{ ++ /* No delimiters output. --all-repeated[=none] */ ++ DM_NONE, ++ ++ /* Delimiter precedes all groups. --all-repeated=prepend */ ++ DM_PREPEND, ++ ++ /* Delimit all groups. --all-repeated=separate */ ++ DM_SEPARATE ++}; ++ ++static char const *const delimit_method_string[] = ++{ ++ "none", "prepend", "separate", NULL ++}; ++ ++static enum delimit_method const delimit_method_map[] = ++{ ++ DM_NONE, DM_PREPEND, DM_SEPARATE ++}; ++ ++/* Select whether/how to delimit groups of duplicate lines. */ ++static enum delimit_method delimit_groups; ++ ++static struct option const longopts[] = ++{ ++ {"count", no_argument, NULL, 'c'}, ++ {"repeated", no_argument, NULL, 'd'}, ++ {"all-repeated", optional_argument, NULL, 'D'}, ++ {"ignore-case", no_argument, NULL, 'i'}, ++ {"unique", no_argument, NULL, 'u'}, ++ {"skip-fields", required_argument, NULL, 'f'}, ++ {"skip-chars", required_argument, NULL, 's'}, ++ {"check-chars", required_argument, NULL, 'w'}, ++ {"zero-terminated", no_argument, NULL, 'z'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... [INPUT [OUTPUT]]\n\ ++"), ++ program_name); ++ fputs (_("\ ++Filter adjacent matching lines from INPUT (or standard input),\n\ ++writing to OUTPUT (or standard output).\n\ ++\n\ ++With no options, matching lines are merged to the first occurrence.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ -c, --count prefix lines by the number of occurrences\n\ ++ -d, --repeated only print duplicate lines\n\ ++"), stdout); ++ fputs (_("\ ++ -D, --all-repeated[=delimit-method] print all duplicate lines\n\ ++ delimit-method={none(default),prepend,separate}\n\ ++ Delimiting is done with blank lines.\n\ ++ -f, --skip-fields=N avoid comparing the first N fields\n\ ++ -i, --ignore-case ignore differences in case when comparing\n\ ++ -s, --skip-chars=N avoid comparing the first N characters\n\ ++ -u, --unique only print unique lines\n\ ++ -z, --zero-terminated end lines with 0 byte, not newline\n\ ++"), stdout); ++ fputs (_("\ ++ -w, --check-chars=N compare no more than N characters in lines\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++A field is a run of blanks (usually spaces and/or TABs), then non-blank\n\ ++characters. Fields are skipped before chars.\n\ ++"), stdout); ++ fputs (_("\ ++\n\ ++Note: 'uniq' does not detect repeated lines unless they are adjacent.\n\ ++You may want to sort the input first, or use `sort -u' without `uniq'.\n\ ++Also, comparisons honor the rules specified by `LC_COLLATE'.\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++/* Convert OPT to size_t, reporting an error using MSGID if OPT is ++ invalid. Silently convert too-large values to SIZE_MAX. */ ++ ++static size_t ++size_opt (char const *opt, char const *msgid) ++{ ++ unsigned long int size; ++ verify (SIZE_MAX <= ULONG_MAX); ++ ++ switch (xstrtoul (opt, NULL, 10, &size, "")) ++ { ++ case LONGINT_OK: ++ case LONGINT_OVERFLOW: ++ break; ++ ++ default: ++ error (EXIT_FAILURE, 0, "%s: %s", opt, _(msgid)); ++ } ++ ++ return MIN (size, SIZE_MAX); ++} ++ ++/* Given a linebuffer LINE, ++ return a pointer to the beginning of the line's field to be compared. */ ++ ++static char * ++find_field (struct linebuffer const *line) ++{ ++ size_t count; ++ char const *lp = line->buffer; ++ size_t size = line->length - 1; ++ size_t i = 0; ++ ++ for (count = 0; count < skip_fields; count++) ++ { ++ while (i < size && isblank (to_uchar (lp[i]))) ++ i++; ++ while (i < size && !isblank (to_uchar (lp[i]))) ++ i++; ++ } ++ ++ for (count = 0; count < skip_chars && i < size; count++) ++ i++; ++ ++ return line->buffer + i; ++} ++ ++/* Return false if two strings OLD and NEW match, true if not. ++ OLD and NEW point not to the beginnings of the lines ++ but rather to the beginnings of the fields to compare. ++ OLDLEN and NEWLEN are their lengths. */ ++ ++static bool ++different (char *old, char *new, size_t oldlen, size_t newlen) ++{ ++ if (check_chars < oldlen) ++ oldlen = check_chars; ++ if (check_chars < newlen) ++ newlen = check_chars; ++ ++ if (ignore_case) ++ { ++ /* FIXME: This should invoke strcoll somehow. */ ++ return oldlen != newlen || memcasecmp (old, new, oldlen); ++ } ++ else if (hard_LC_COLLATE) ++ return xmemcoll (old, oldlen, new, newlen) != 0; ++ else ++ return oldlen != newlen || memcmp (old, new, oldlen); ++} ++ ++/* Output the line in linebuffer LINE to standard output ++ provided that the switches say it should be output. ++ MATCH is true if the line matches the previous line. ++ If requested, print the number of times it occurred, as well; ++ LINECOUNT + 1 is the number of times that the line occurred. */ ++ ++static void ++writeline (struct linebuffer const *line, ++ bool match, uintmax_t linecount) ++{ ++ if (! (linecount == 0 ? output_unique ++ : !match ? output_first_repeated ++ : output_later_repeated)) ++ return; ++ ++ if (countmode == count_occurrences) ++ printf ("%7" PRIuMAX " ", linecount + 1); ++ ++ fwrite (line->buffer, sizeof (char), line->length, stdout); ++} ++ ++/* Process input file INFILE with output to OUTFILE. ++ If either is "-", use the standard I/O stream for it instead. */ ++ ++static void ++check_file (const char *infile, const char *outfile, char delimiter) ++{ ++ struct linebuffer lb1, lb2; ++ struct linebuffer *thisline, *prevline; ++ ++ if (! (STREQ (infile, "-") || freopen (infile, "r", stdin))) ++ error (EXIT_FAILURE, errno, "%s", infile); ++ if (! (STREQ (outfile, "-") || freopen (outfile, "w", stdout))) ++ error (EXIT_FAILURE, errno, "%s", outfile); ++ ++ thisline = &lb1; ++ prevline = &lb2; ++ ++ initbuffer (thisline); ++ initbuffer (prevline); ++ ++ /* The duplication in the following `if' and `else' blocks is an ++ optimization to distinguish the common case (in which none of ++ the following options has been specified: --count, -repeated, ++ --all-repeated, --unique) from the others. In the common case, ++ this optimization lets uniq output each different line right away, ++ without waiting to see if the next one is different. */ ++ ++ if (output_unique && output_first_repeated && countmode == count_none) ++ { ++ char *prevfield IF_LINT (= NULL); ++ size_t prevlen IF_LINT (= 0); ++ ++ while (!feof (stdin)) ++ { ++ char *thisfield; ++ size_t thislen; ++ if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) ++ break; ++ thisfield = find_field (thisline); ++ thislen = thisline->length - 1 - (thisfield - thisline->buffer); ++ if (prevline->length == 0 ++ || different (thisfield, prevfield, thislen, prevlen)) ++ { ++ fwrite (thisline->buffer, sizeof (char), ++ thisline->length, stdout); ++ ++ SWAP_LINES (prevline, thisline); ++ prevfield = thisfield; ++ prevlen = thislen; ++ } ++ } ++ } ++ else ++ { ++ char *prevfield; ++ size_t prevlen; ++ uintmax_t match_count = 0; ++ bool first_delimiter = true; ++ ++ if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) ++ goto closefiles; ++ prevfield = find_field (prevline); ++ prevlen = prevline->length - 1 - (prevfield - prevline->buffer); ++ ++ while (!feof (stdin)) ++ { ++ bool match; ++ char *thisfield; ++ size_t thislen; ++ if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) ++ { ++ if (ferror (stdin)) ++ goto closefiles; ++ break; ++ } ++ thisfield = find_field (thisline); ++ thislen = thisline->length - 1 - (thisfield - thisline->buffer); ++ match = !different (thisfield, prevfield, thislen, prevlen); ++ match_count += match; ++ ++ if (match_count == UINTMAX_MAX) ++ { ++ if (count_occurrences) ++ error (EXIT_FAILURE, 0, _("too many repeated lines")); ++ match_count--; ++ } ++ ++ if (delimit_groups != DM_NONE) ++ { ++ if (!match) ++ { ++ if (match_count) /* a previous match */ ++ first_delimiter = false; /* Only used when DM_SEPARATE */ ++ } ++ else if (match_count == 1) ++ { ++ if ((delimit_groups == DM_PREPEND) ++ || (delimit_groups == DM_SEPARATE ++ && !first_delimiter)) ++ putchar (delimiter); ++ } ++ } ++ ++ if (!match || output_later_repeated) ++ { ++ writeline (prevline, match, match_count); ++ SWAP_LINES (prevline, thisline); ++ prevfield = thisfield; ++ prevlen = thislen; ++ if (!match) ++ match_count = 0; ++ } ++ } ++ ++ writeline (prevline, false, match_count); ++ } ++ ++ closefiles: ++ if (ferror (stdin) || fclose (stdin) != 0) ++ error (EXIT_FAILURE, 0, _("error reading %s"), infile); ++ ++ /* stdout is handled via the atexit-invoked close_stdout function. */ ++ ++ free (lb1.buffer); ++ free (lb2.buffer); ++} ++ ++enum Skip_field_option_type ++ { ++ SFO_NONE, ++ SFO_OBSOLETE, ++ SFO_NEW ++ }; ++ ++int ++main (int argc, char **argv) ++{ ++ int optc = 0; ++ bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); ++ enum Skip_field_option_type skip_field_option_type = SFO_NONE; ++ int nfiles = 0; ++ char const *file[2]; ++ char delimiter = '\n'; /* change with --zero-terminated, -z */ ++ ++ file[0] = file[1] = "-"; ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ hard_LC_COLLATE = hard_locale (LC_COLLATE); ++ ++ atexit (close_stdout); ++ ++ skip_chars = 0; ++ skip_fields = 0; ++ check_chars = SIZE_MAX; ++ output_unique = output_first_repeated = true; ++ output_later_repeated = false; ++ countmode = count_none; ++ delimit_groups = DM_NONE; ++ ++ for (;;) ++ { ++ /* Parse an operand with leading "+" as a file after "--" was ++ seen; or if pedantic and a file was seen; or if not ++ obsolete. */ ++ ++ if (optc == -1 ++ || (posixly_correct && nfiles != 0) ++ || ((optc = getopt_long (argc, argv, ++ "-0123456789Dcdf:is:uw:z", longopts, NULL)) ++ == -1)) ++ { ++ if (argc <= optind) ++ break; ++ if (nfiles == 2) ++ { ++ error (0, 0, _("extra operand %s"), quote (argv[optind])); ++ usage (EXIT_FAILURE); ++ } ++ file[nfiles++] = argv[optind++]; ++ } ++ else switch (optc) ++ { ++ case 1: ++ { ++ unsigned long int size; ++ if (optarg[0] == '+' ++ && posix2_version () < 200112 ++ && xstrtoul (optarg, NULL, 10, &size, "") == LONGINT_OK ++ && size <= SIZE_MAX) ++ skip_chars = size; ++ else if (nfiles == 2) ++ { ++ error (0, 0, _("extra operand %s"), quote (optarg)); ++ usage (EXIT_FAILURE); ++ } ++ else ++ file[nfiles++] = optarg; ++ } ++ break; ++ ++ case '0': ++ case '1': ++ case '2': ++ case '3': ++ case '4': ++ case '5': ++ case '6': ++ case '7': ++ case '8': ++ case '9': ++ { ++ if (skip_field_option_type == SFO_NEW) ++ skip_fields = 0; ++ ++ if (!DECIMAL_DIGIT_ACCUMULATE (skip_fields, optc - '0', size_t)) ++ skip_fields = SIZE_MAX; ++ ++ skip_field_option_type = SFO_OBSOLETE; ++ } ++ break; ++ ++ case 'c': ++ countmode = count_occurrences; ++ break; ++ ++ case 'd': ++ output_unique = false; ++ break; ++ ++ case 'D': ++ output_unique = false; ++ output_later_repeated = true; ++ if (optarg == NULL) ++ delimit_groups = DM_NONE; ++ else ++ delimit_groups = XARGMATCH ("--all-repeated", optarg, ++ delimit_method_string, ++ delimit_method_map); ++ break; ++ ++ case 'f': ++ skip_field_option_type = SFO_NEW; ++ skip_fields = size_opt (optarg, ++ N_("invalid number of fields to skip")); ++ break; ++ ++ case 'i': ++ ignore_case = true; ++ break; ++ ++ case 's': ++ skip_chars = size_opt (optarg, ++ N_("invalid number of bytes to skip")); ++ break; ++ ++ case 'u': ++ output_first_repeated = false; ++ break; ++ ++ case 'w': ++ check_chars = size_opt (optarg, ++ N_("invalid number of bytes to compare")); ++ break; ++ ++ case 'z': ++ delimiter = '\0'; ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ if (countmode == count_occurrences && output_later_repeated) ++ { ++ error (0, 0, ++ _("printing all duplicated lines and repeat counts is meaningless")); ++ usage (EXIT_FAILURE); ++ } ++ ++ check_file (file[0], file[1], delimiter); ++ ++ exit (EXIT_SUCCESS); ++} +diff -urNp coreutils-8.0-orig/tests/Makefile.am coreutils-8.0/tests/Makefile.am +--- coreutils-8.0-orig/tests/Makefile.am 2009-09-29 16:25:44.000000000 +0200 ++++ coreutils-8.0/tests/Makefile.am 2009-10-07 10:07:16.000000000 +0200 +@@ -208,6 +208,7 @@ TESTS = \ + misc/sort-compress \ + misc/sort-continue \ + misc/sort-files0-from \ ++ misc/sort-mb-tests \ + misc/sort-merge \ + misc/sort-merge-fdlimit \ + misc/sort-rand \ +@@ -452,6 +453,10 @@ TESTS = \ + $(root_tests) - if (prevline[which - 1]) + pr_data = \ ++ misc/mb1.X \ ++ misc/mb1.I \ ++ misc/mb2.X \ ++ misc/mb2.I \ + pr/0F \ + pr/0FF \ + pr/0FFnt \ +diff -urNp coreutils-8.0-orig/tests/Makefile.am.orig coreutils-8.0/tests/Makefile.am.orig +--- coreutils-8.0-orig/tests/Makefile.am.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/tests/Makefile.am.orig 2009-09-29 16:25:44.000000000 +0200 +@@ -0,0 +1,616 @@ ++## Process this file with automake to produce Makefile.in -*-Makefile-*-. ++ ++# Sort in traditional ASCII order, regardless of the current locale; ++# otherwise we may get into trouble with distinct strings that the ++# current locale considers to be equal. ++ASSORT = LC_ALL=C sort ++ ++EXTRA_DIST = \ ++ Coreutils.pm \ ++ CuTmpdir.pm \ ++ README \ ++ check.mk \ ++ envvar-check \ ++ lang-default \ ++ other-fs-tmpdir \ ++ require-perl \ ++ sample-test \ ++ test-lib.sh \ ++ $(pr_data) ++ ++root_tests = \ ++ chown/basic \ ++ cp/cp-a-selinux \ ++ cp/preserve-gid \ ++ cp/special-bits \ ++ cp/cp-mv-enotsup-xattr \ ++ chroot/credentials \ ++ dd/skip-seek-past-dev \ ++ install/install-C-root \ ++ ls/capability \ ++ ls/nameless-uid \ ++ misc/chcon \ ++ misc/selinux \ ++ misc/truncate-owned-by-other \ ++ mkdir/writable-under-readonly \ ++ mv/sticky-to-xpart \ ++ rm/fail-2eperm \ ++ rm/no-give-up \ ++ rm/one-file-system \ ++ tail-2/append-only \ ++ touch/now-owned-by-other ++ ++.PHONY: check-root ++check-root: ++ $(MAKE) check TESTS='$(root_tests)' ++ ++check-recursive: root-hint ++ ++# Advertise `check-root' target. ++.PHONY: root-hint ++root-hint: ++ @echo '***********************************************************' ++ @echo "NOTICE: Some tests may be run only as root." ++ @echo " See the 'Running tests as root' section in README." ++ @echo '***********************************************************' ++ ++EXTRA_DIST += $(TESTS) ++ ++# Do not choose a name that is a shell keyword like 'if', or a ++# commonly-used utility like 'cat' or 'test', as the name of a test. ++# Otherwise, VPATH builds will fail on hosts like Solaris, since they ++# will expand 'if test ...' to 'if .../test ...', and the '.../test' ++# will execute the test script rather than the standard utility. ++ ++# Notes on the ordering of these tests: ++# Place early in the list tests of the tools that ++# are most commonly used in test scripts themselves. ++# E.g., nearly every test script uses rm and chmod. ++# help-version comes early because it's a basic sanity test. ++# Put seq early, since lots of other tests use it. ++# Put tests that sleep early, but not all together, so in parallel builds ++# they share time with tests that burn CPU, not with others that sleep. ++# Put head-elide-tail early, because it's long-running. ++ ++TESTS = \ ++ misc/help-version \ ++ misc/invalid-opt \ ++ rm/ext3-perf \ ++ rm/cycle \ ++ cp/link-heap \ ++ chmod/no-x \ ++ chgrp/basic \ ++ rm/dangling-symlink \ ++ misc/ls-time \ ++ rm/deep-1 \ ++ rm/deep-2 \ ++ rm/dir-no-w \ ++ rm/dir-nonrecur \ ++ rm/dot-rel \ ++ rm/isatty \ ++ rm/empty-inacc \ ++ rm/empty-name \ ++ rm/f-1 \ ++ rm/fail-eacces \ ++ rm/fail-eperm \ ++ tail-2/assert \ ++ rm/hash \ ++ rm/i-1 \ ++ rm/i-never \ ++ rm/i-no-r \ ++ tail-2/infloop-1 \ ++ rm/ignorable \ ++ rm/inaccessible \ ++ rm/interactive-always \ ++ rm/interactive-once \ ++ rm/ir-1 \ ++ rm/r-1 \ ++ rm/r-2 \ ++ rm/r-3 \ ++ rm/r-4 \ ++ rm/readdir-bug \ ++ rm/rm1 \ ++ touch/empty-file \ ++ rm/rm2 \ ++ rm/rm3 \ ++ rm/rm4 \ ++ rm/rm5 \ ++ rm/sunos-1 \ ++ rm/unread2 \ ++ rm/unread3 \ ++ rm/unreadable \ ++ rm/v-slash \ ++ chgrp/default-no-deref \ ++ chgrp/deref \ ++ chgrp/no-x \ ++ chgrp/posix-H \ ++ chgrp/recurse \ ++ misc/ptx \ ++ misc/test \ ++ misc/seq \ ++ misc/seq-long-double \ ++ misc/head \ ++ misc/head-elide-tail \ ++ tail-2/tail-n0f \ ++ misc/ls-misc \ ++ misc/date \ ++ misc/date-next-dow \ ++ misc/ptx-overrun \ ++ misc/xstrtol \ ++ tail-2/pid \ ++ misc/od \ ++ misc/mktemp \ ++ misc/arch \ ++ misc/pr \ ++ misc/join \ ++ pr/pr-tests \ ++ misc/df-P \ ++ misc/pwd-option \ ++ misc/pwd-unreadable-parent \ ++ misc/chcon-fail \ ++ misc/cut \ ++ misc/wc \ ++ misc/wc-files0-from \ ++ misc/wc-files0 \ ++ misc/cat-proc \ ++ misc/cat-buf \ ++ misc/base64 \ ++ misc/basename \ ++ misc/close-stdout \ ++ misc/comm \ ++ misc/csplit \ ++ misc/date-sec \ ++ misc/dircolors \ ++ misc/df \ ++ misc/dirname \ ++ misc/expand \ ++ misc/expr \ ++ misc/factor \ ++ misc/false-status \ ++ misc/fmt \ ++ misc/fmt-long-line \ ++ misc/fold \ ++ misc/groups-dash \ ++ misc/groups-version \ ++ misc/head-c \ ++ misc/head-pos \ ++ misc/id-context \ ++ misc/id-groups \ ++ misc/md5sum \ ++ misc/md5sum-newline \ ++ misc/mknod \ ++ misc/nice \ ++ misc/nl \ ++ misc/nohup \ ++ misc/od-N \ ++ misc/od-multiple-t \ ++ misc/od-x8 \ ++ misc/paste \ ++ misc/pathchk1 \ ++ misc/printf \ ++ misc/printf-cov \ ++ misc/printf-hex \ ++ misc/printf-surprise \ ++ misc/pwd-long \ ++ misc/readlink-fp-loop \ ++ misc/runcon-no-reorder \ ++ misc/sha1sum \ ++ misc/sha1sum-vec \ ++ misc/sha224sum \ ++ misc/sha256sum \ ++ misc/sha384sum \ ++ misc/sha512sum \ ++ misc/shred-exact \ ++ misc/shred-passes \ ++ misc/shred-remove \ ++ misc/shuf \ ++ misc/sort \ ++ misc/sort-compress \ ++ misc/sort-continue \ ++ misc/sort-files0-from \ ++ misc/sort-merge \ ++ misc/sort-merge-fdlimit \ ++ misc/sort-rand \ ++ misc/sort-version \ ++ misc/split-a \ ++ misc/split-fail \ ++ misc/split-l \ ++ misc/stat-fmt \ ++ misc/stat-hyphen \ ++ misc/stat-printf \ ++ misc/stdbuf \ ++ misc/stty \ ++ misc/stty-invalid \ ++ misc/stty-row-col \ ++ misc/sum \ ++ misc/sum-sysv \ ++ misc/tac \ ++ misc/tac-continue \ ++ misc/tail \ ++ misc/tee \ ++ misc/tee-dash \ ++ misc/test-diag \ ++ misc/timeout \ ++ misc/timeout-parameters \ ++ misc/tr \ ++ misc/truncate-dangling-symlink \ ++ misc/truncate-dir-fail \ ++ misc/truncate-fail-diag \ ++ misc/truncate-fifo \ ++ misc/truncate-no-create-missing \ ++ misc/truncate-overflow \ ++ misc/truncate-parameters \ ++ misc/truncate-relative \ ++ misc/tsort \ ++ misc/tty-eof \ ++ misc/unexpand \ ++ misc/uniq \ ++ misc/xattr \ ++ tail-2/wait \ ++ chmod/c-option \ ++ chmod/equal-x \ ++ chmod/equals \ ++ chmod/inaccessible \ ++ chmod/octal \ ++ chmod/setgid \ ++ chmod/silent \ ++ chmod/thru-dangling \ ++ chmod/umask-x \ ++ chmod/usage \ ++ chown/deref \ ++ chown/preserve-root \ ++ chown/separator \ ++ cp/abuse \ ++ cp/acl \ ++ cp/backup-1 \ ++ cp/backup-dir \ ++ cp/backup-is-src \ ++ cp/cp-HL \ ++ cp/cp-deref \ ++ cp/cp-i \ ++ cp/cp-mv-backup \ ++ cp/cp-parents \ ++ cp/deref-slink \ ++ cp/dir-rm-dest \ ++ cp/dir-slash \ ++ cp/dir-vs-file \ ++ cp/existing-perm-race \ ++ cp/fail-perm \ ++ cp/file-perm-race \ ++ cp/into-self \ ++ cp/link \ ++ cp/link-no-deref \ ++ cp/link-preserve \ ++ cp/no-deref-link1 \ ++ cp/no-deref-link2 \ ++ cp/no-deref-link3 \ ++ cp/parent-perm \ ++ cp/parent-perm-race \ ++ cp/perm \ ++ cp/preserve-2 \ ++ cp/preserve-slink-time \ ++ cp/proc-short-read \ ++ cp/proc-zero-len \ ++ cp/r-vs-symlink \ ++ cp/reflink-auto \ ++ cp/reflink-perm \ ++ cp/same-file \ ++ cp/slink-2-slink \ ++ cp/sparse \ ++ cp/special-f \ ++ cp/src-base-dot \ ++ cp/symlink-slash \ ++ cp/thru-dangling \ ++ df/unreadable \ ++ dd/direct \ ++ dd/misc \ ++ dd/not-rewound \ ++ dd/reblock \ ++ dd/skip-seek \ ++ dd/skip-seek2 \ ++ dd/skip-seek-past-file \ ++ dd/stderr \ ++ dd/unblock \ ++ dd/unblock-sync \ ++ df/total-verify \ ++ du/2g \ ++ du/8gb \ ++ du/basic \ ++ du/deref \ ++ du/deref-args \ ++ du/exclude \ ++ du/fd-leak \ ++ du/files0-from \ ++ du/hard-link \ ++ du/inacc-dest \ ++ du/inacc-dir \ ++ du/inaccessible-cwd \ ++ du/long-from-unreadable \ ++ du/long-sloop \ ++ du/no-deref \ ++ du/no-x \ ++ du/one-file-system \ ++ du/restore-wd \ ++ du/slash \ ++ du/slink \ ++ du/trailing-slash \ ++ du/two-args \ ++ id/no-context \ ++ install/basic-1 \ ++ install/create-leading \ ++ install/d-slashdot \ ++ install/install-C \ ++ install/install-C-selinux \ ++ install/strip-program \ ++ install/trap \ ++ ln/backup-1 \ ++ ln/hard-backup \ ++ ln/hard-to-sym \ ++ ln/misc \ ++ ln/sf-1 \ ++ ln/slash-decorated-nonexistent-dest \ ++ ln/target-1 \ ++ ls/abmon-align \ ++ ls/color-clear-to-eol \ ++ ls/color-dtype-dir \ ++ ls/dangle \ ++ ls/dired \ ++ ls/file-type \ ++ ls/follow-slink \ ++ ls/infloop \ ++ ls/inode \ ++ ls/m-option \ ++ ls/multihardlink \ ++ ls/no-arg \ ++ ls/no-cap \ ++ ls/proc-selinux-segfault \ ++ ls/readdir-mountpoint-inode \ ++ ls/recursive \ ++ ls/rt-1 \ ++ ls/stat-dtype \ ++ ls/stat-failed \ ++ ls/stat-free-symlinks \ ++ ls/stat-vs-dirent \ ++ ls/symlink-slash \ ++ ls/x-option \ ++ mkdir/p-1 \ ++ mkdir/p-2 \ ++ mkdir/p-3 \ ++ mkdir/p-slashdot \ ++ mkdir/p-thru-slink \ ++ mkdir/p-v \ ++ mkdir/parents \ ++ mkdir/perm \ ++ mkdir/selinux \ ++ mkdir/special-1 \ ++ mkdir/t-slash \ ++ mv/acl \ ++ mv/atomic \ ++ mv/atomic2 \ ++ mv/backup-dir \ ++ mv/backup-is-src \ ++ mv/childproof \ ++ mv/diag \ ++ mv/dir-file \ ++ mv/dir2dir \ ++ mv/dup-source \ ++ mv/force \ ++ mv/hard-2 \ ++ mv/hard-3 \ ++ mv/hard-4 \ ++ mv/hard-link-1 \ ++ mv/hard-verbose \ ++ mv/i-1 \ ++ mv/i-2 \ ++ mv/i-3 \ ++ mv/i-4 \ ++ mv/i-5 \ ++ mv/i-link-no \ ++ mv/into-self \ ++ mv/into-self-2 \ ++ mv/into-self-3 \ ++ mv/into-self-4 \ ++ mv/leak-fd \ ++ mv/mv-n \ ++ mv/mv-special-1 \ ++ mv/no-target-dir \ ++ mv/part-fail \ ++ mv/part-hardlink \ ++ mv/part-rename \ ++ mv/part-symlink \ ++ mv/partition-perm \ ++ mv/perm-1 \ ++ mv/to-symlink \ ++ mv/trailing-slash \ ++ mv/update \ ++ readlink/can-e \ ++ readlink/can-f \ ++ readlink/can-m \ ++ readlink/rl-1 \ ++ rmdir/fail-perm \ ++ rmdir/ignore \ ++ rmdir/t-slash \ ++ tail-2/assert-2 \ ++ tail-2/big-4gb \ ++ tail-2/flush-initial \ ++ tail-2/follow-stdin \ ++ tail-2/pipe-f \ ++ tail-2/pipe-f2 \ ++ tail-2/proc-ksyms \ ++ tail-2/start-middle \ ++ touch/60-seconds \ ++ touch/dangling-symlink \ ++ touch/dir-1 \ ++ touch/fail-diag \ ++ touch/fifo \ ++ touch/no-create-missing \ ++ touch/no-rights \ ++ touch/not-owner \ ++ touch/obsolescent \ ++ touch/read-only \ ++ touch/relative \ ++ $(root_tests) ++ ++pr_data = \ ++ pr/0F \ ++ pr/0FF \ ++ pr/0FFnt \ ++ pr/0FFt \ ++ pr/0FnFnt \ ++ pr/0FnFt \ ++ pr/0Fnt \ ++ pr/0Ft \ ++ pr/2-S_f-t_notab \ ++ pr/2-Sf-t_notab \ ++ pr/2f-t_notab \ ++ pr/2s_f-t_notab \ ++ pr/2s_w60f-t_nota \ ++ pr/2sf-t_notab \ ++ pr/2sw60f-t_notab \ ++ pr/2w60f-t_notab \ ++ pr/3-0F \ ++ pr/3-5l24f-t \ ++ pr/3-FF \ ++ pr/3a2l17-FF \ ++ pr/3a3f-0F \ ++ pr/3a3l15-t \ ++ pr/3a3l15f-t \ ++ pr/3b2l17-FF \ ++ pr/3b3f-0F \ ++ pr/3b3f-0FF \ ++ pr/3b3f-FF \ ++ pr/3b3l15-t \ ++ pr/3b3l15f-t \ ++ pr/3f-0F \ ++ pr/3f-FF \ ++ pr/3l24-t \ ++ pr/3l24f-t \ ++ pr/3ml24-FF \ ++ pr/3ml24-t \ ++ pr/3ml24-t-FF \ ++ pr/3ml24f-t \ ++ pr/4-7l24-FF \ ++ pr/4l24-FF \ ++ pr/FF \ ++ pr/FFn \ ++ pr/FFtn \ ++ pr/FnFn \ ++ pr/Ja3l24f-lm \ ++ pr/Jb3l24f-lm \ ++ pr/Jml24f-lm-lo \ ++ pr/W-72l24f-ll \ ++ pr/W20l24f-ll \ ++ pr/W26l24f-ll \ ++ pr/W27l24f-ll \ ++ pr/W28l24f-ll \ ++ pr/W35Ja3l24f-lm \ ++ pr/W35Jb3l24f-lm \ ++ pr/W35Jml24f-lmlo \ ++ pr/W35a3l24f-lm \ ++ pr/W35b3l24f-lm \ ++ pr/W35ml24f-lm-lo \ ++ pr/W72Jl24f-ll \ ++ pr/a2l15-FF \ ++ pr/a2l17-FF \ ++ pr/a3-0F \ ++ pr/a3f-0F \ ++ pr/a3f-0FF \ ++ pr/a3f-FF \ ++ pr/a3l15-t \ ++ pr/a3l15f-t \ ++ pr/a3l24f-lm \ ++ pr/b2l15-FF \ ++ pr/b2l17-FF \ ++ pr/b3-0F \ ++ pr/b3f-0F \ ++ pr/b3f-0FF \ ++ pr/b3f-FF \ ++ pr/b3l15-t \ ++ pr/b3l15f-t \ ++ pr/b3l24f-lm \ ++ pr/l24-FF \ ++ pr/l24-t \ ++ pr/l24f-t \ ++ pr/loli \ ++ pr/ml20-FF-t \ ++ pr/ml24-FF \ ++ pr/ml24-t \ ++ pr/ml24-t-FF \ ++ pr/ml24f-0F \ ++ pr/ml24f-lm-lo \ ++ pr/ml24f-t \ ++ pr/ml24f-t-0F \ ++ pr/n+2-5l24f-0FF \ ++ pr/n+2l24f-0FF \ ++ pr/n+2l24f-bl \ ++ pr/n+3-7l24-FF \ ++ pr/n+3l24f-0FF \ ++ pr/n+3l24f-bl \ ++ pr/n+3ml20f-bl-FF \ ++ pr/n+3ml24f-bl-tn \ ++ pr/n+3ml24f-tn-bl \ ++ pr/n+4-8a2l17-FF \ ++ pr/n+4b2l17f-0FF \ ++ pr/n+5-8b3l17f-FF \ ++ pr/n+5a3l13f-0FF \ ++ pr/n+6a2l17-FF \ ++ pr/n+6b3l13f-FF \ ++ pr/n+7l24-FF \ ++ pr/n+8l20-FF \ ++ pr/nJml24f-lmlmlo \ ++ pr/nJml24f-lmlolm \ ++ pr/nN1+3l24f-bl \ ++ pr/nN15l24f-bl \ ++ pr/nSml20-bl-FF \ ++ pr/nSml20-t-t-FF \ ++ pr/nSml20-t-tFFFF \ ++ pr/nSml24-bl-FF \ ++ pr/nSml24-t-t-FF \ ++ pr/nSml24-t-tFFFF \ ++ pr/nl24f-bl \ ++ pr/o3Jml24f-lm-lo \ ++ pr/o3a3Sl24f-tn \ ++ pr/o3a3Snl24f-tn \ ++ pr/o3a3l24f-tn \ ++ pr/o3b3Sl24f-tn \ ++ pr/o3b3Snl24f-tn \ ++ pr/o3b3l24f-tn \ ++ pr/o3mSl24f-bl-tn \ ++ pr/o3mSnl24fbltn \ ++ pr/o3ml24f-bl-tn \ ++ pr/t-0FF \ ++ pr/t-FF \ ++ pr/t-bl \ ++ pr/t-t \ ++ pr/tFFn \ ++ pr/tFFt \ ++ pr/tFFt-bl \ ++ pr/tFFt-ll \ ++ pr/tFFt-lm \ ++ pr/tFnFt \ ++ pr/t_notab \ ++ pr/t_tab \ ++ pr/t_tab_ \ ++ pr/ta3-0FF \ ++ pr/ta3-FF \ ++ pr/tb3-0FF \ ++ pr/tb3-FF \ ++ pr/tn \ ++ pr/tn2e5o3-t_tab \ ++ pr/tn2e8-t_tab \ ++ pr/tn2e8o3-t_tab \ ++ pr/tn_2e8-t_tab \ ++ pr/tn_2e8S-t_tab \ ++ pr/tne8-t_tab \ ++ pr/tne8o3-t_tab \ ++ pr/tt-0FF \ ++ pr/tt-FF \ ++ pr/tt-bl \ ++ pr/tt-t \ ++ pr/tta3-0FF \ ++ pr/tta3-FF \ ++ pr/ttb3-0FF \ ++ pr/ttb3-FF \ ++ pr/w72l24f-ll ++ ++include $(srcdir)/check.mk +diff -urNp coreutils-8.0-orig/tests/misc/cut coreutils-8.0/tests/misc/cut +--- coreutils-8.0-orig/tests/misc/cut 2009-09-21 14:29:33.000000000 +0200 ++++ coreutils-8.0/tests/misc/cut 2009-10-07 10:07:16.000000000 +0200 +@@ -26,7 +26,7 @@ use strict; + my $prog = 'cut'; + my $try = "Try \`$prog --help' for more information.\n"; + my $from_1 = "$prog: fields and positions are numbered from 1\n$try"; +-my $inval = "$prog: invalid byte or field list\n$try"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; + my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try"; + + my @Tests = +@@ -141,7 +141,7 @@ my @Tests = + + # None of the following invalid ranges provoked an error up to coreutils-6.9. + ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1}, +- {ERR=>"$prog: invalid decreasing range\n$try"}], ++ {ERR=>"$prog: invalid byte, character or field list\n$try"}], + ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], + ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], + ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}], +diff -urNp coreutils-8.0-orig/tests/misc/mb1.I coreutils-8.0/tests/misc/mb1.I +--- coreutils-8.0-orig/tests/misc/mb1.I 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/tests/misc/mb1.I 2009-10-07 10:07:16.000000000 +0200 +@@ -0,0 +1,4 @@ ++Apple@10 ++Banana@5 ++Citrus@20 ++Cherry@30 +diff -urNp coreutils-8.0-orig/tests/misc/mb1.X coreutils-8.0/tests/misc/mb1.X +--- coreutils-8.0-orig/tests/misc/mb1.X 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/tests/misc/mb1.X 2009-10-07 10:07:16.000000000 +0200 +@@ -0,0 +1,4 @@ ++Banana@5 ++Apple@10 ++Citrus@20 ++Cherry@30 +diff -urNp coreutils-8.0-orig/tests/misc/mb2.I coreutils-8.0/tests/misc/mb2.I +--- coreutils-8.0-orig/tests/misc/mb2.I 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/tests/misc/mb2.I 2009-10-07 10:07:16.000000000 +0200 +@@ -0,0 +1,4 @@ ++Apple@AA10@@20 ++Banana@AA5@@30 ++Citrus@AA20@@5 ++Cherry@AA30@@10 +diff -urNp coreutils-8.0-orig/tests/misc/mb2.X coreutils-8.0/tests/misc/mb2.X +--- coreutils-8.0-orig/tests/misc/mb2.X 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/tests/misc/mb2.X 2009-10-07 10:07:16.000000000 +0200 +@@ -0,0 +1,4 @@ ++Citrus@AA20@@5 ++Cherry@AA30@@10 ++Apple@AA10@@20 ++Banana@AA5@@30 +diff -urNp coreutils-8.0-orig/tests/misc/sort-mb-tests coreutils-8.0/tests/misc/sort-mb-tests +--- coreutils-8.0-orig/tests/misc/sort-mb-tests 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/tests/misc/sort-mb-tests 2009-10-07 10:07:16.000000000 +0200 +@@ -0,0 +1,58 @@ ++#! /bin/sh ++case $# in ++ 0) xx='../src/sort';; ++ *) xx="$1";; ++esac ++test "$VERBOSE" && echo=echo || echo=: ++$echo testing program: $xx ++errors=0 ++test "$srcdir" || srcdir=. ++test "$VERBOSE" && $xx --version 2> /dev/null ++ ++export LC_ALL=en_US.UTF-8 ++locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77 ++errors=0 ++ ++$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O ++code=$? ++if test $code != 0; then ++ $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2 ++ errors=`expr $errors + 1` ++else ++ cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1 ++ case $? in ++ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;; ++ 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2 ++ (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null ++ errors=`expr $errors + 1`;; ++ 2) $echo "Test mb1 may have failed." 1>&2 ++ $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2 ++ errors=`expr $errors + 1`;; ++ esac ++fi ++ ++$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O ++code=$? ++if test $code != 0; then ++ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2 ++ errors=`expr $errors + 1` ++else ++ cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1 ++ case $? in ++ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;; ++ 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2 ++ (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null ++ errors=`expr $errors + 1`;; ++ 2) $echo "Test mb2 may have failed." 1>&2 ++ $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2 ++ errors=`expr $errors + 1`;; ++ esac ++fi ++ ++if test $errors = 0; then ++ $echo Passed all 113 tests. 1>&2 ++else ++ $echo Failed $errors tests. 1>&2 ++fi ++test $errors = 0 || errors=1 ++exit $errors diff --git a/coreutils-pam.patch b/coreutils-pam.patch index 3bfb9d3..2a6334f 100644 --- a/coreutils-pam.patch +++ b/coreutils-pam.patch @@ -1,6 +1,16366 @@ ---- coreutils-6.7/src/Makefile.am.pam 2006-11-24 21:28:10.000000000 +0000 -+++ coreutils-6.7/src/Makefile.am 2007-01-09 17:00:01.000000000 +0000 -@@ -103,7 +103,7 @@ +diff -urNp coreutils-8.0-orig/configure.ac coreutils-8.0/configure.ac +--- coreutils-8.0-orig/configure.ac 2009-09-29 15:27:11.000000000 +0200 ++++ coreutils-8.0/configure.ac 2009-10-07 10:04:27.000000000 +0200 +@@ -115,6 +115,13 @@ if test "$gl_gcc_warnings" = yes; then + AC_DEFINE([GNULIB_PORTCHECK], [1], [enable some gnulib portability checks]) + fi + ++dnl Give the chance to enable PAM ++AC_ARG_ENABLE(pam, dnl ++[ --enable-pam Enable use of the PAM libraries], ++[AC_DEFINE(USE_PAM, 1, [Define if you want to use PAM]) ++LIB_PAM="-ldl -lpam -lpam_misc" ++AC_SUBST(LIB_PAM)]) ++ + AC_FUNC_FORK + + optional_bin_progs= +diff -urNp coreutils-8.0-orig/configure.ac.orig coreutils-8.0/configure.ac.orig +--- coreutils-8.0-orig/configure.ac.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/configure.ac.orig 2009-09-29 15:27:11.000000000 +0200 +@@ -0,0 +1,439 @@ ++# -*- autoconf -*- ++# Process this file with autoconf to produce a configure script. ++ ++# Copyright (C) 1991, 1993-2009 Free Software Foundation, Inc. ++ ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++dnl Written by Jim Meyering. ++ ++AC_PREREQ([2.61]) ++ ++# Make inter-release version strings look like, e.g., v6.9-219-g58ddd, which ++# indicates that it is built from the 219th delta (in _some_ repository) ++# following the v6.9 tag, and that 58ddd is a prefix of the commit SHA1. ++AC_INIT([GNU coreutils], ++ m4_esyscmd([build-aux/git-version-gen .tarball-version]), ++ [bug-coreutils@gnu.org]) ++ ++AC_CONFIG_SRCDIR([src/ls.c]) ++ ++AC_CONFIG_AUX_DIR([build-aux]) ++AC_CONFIG_HEADERS([lib/config.h:lib/config.hin]) ++ ++AM_INIT_AUTOMAKE([1.11 dist-xz color-tests parallel-tests]) ++AM_SILENT_RULES([yes]) # make --enable-silent-rules the default. ++ ++AC_PROG_CC_STDC ++AM_PROG_CC_C_O ++AC_PROG_CPP ++AC_PROG_GCC_TRADITIONAL ++AC_PROG_RANLIB ++AC_PROG_LN_S ++gl_EARLY ++gl_INIT ++coreutils_MACROS ++ ++AC_ARG_ENABLE([gcc-warnings], ++ [AS_HELP_STRING([--enable-gcc-warnings], ++ [turn on lots of GCC warnings (for developers)])], ++ [case $enableval in ++ yes|no) ;; ++ *) AC_MSG_ERROR([bad value $enableval for gcc-warnings option]) ;; ++ esac ++ gl_gcc_warnings=$enableval], ++ [gl_gcc_warnings=no] ++) ++ ++if test "$gl_gcc_warnings" = yes; then ++ gl_WARN_ADD([-Werror], [WERROR_CFLAGS]) ++ AC_SUBST([WERROR_CFLAGS]) ++ ++ nw= ++ # This, $nw, is the list of warnings we disable. ++ nw="$nw -Wdeclaration-after-statement" # too useful to forbid ++ nw="$nw -Waggregate-return" # anachronistic ++ nw="$nw -Wlong-long" # C90 is anachronistic (lib/gethrxtime.h) ++ nw="$nw -Wc++-compat" # We don't care about C++ compilers ++ nw="$nw -Wundef" # Warns on '#if GNULIB_FOO' etc in gnulib ++ nw="$nw -Wtraditional" # Warns on #elif which we use often ++ nw="$nw -Wcast-qual" # Too many warnings for now ++ nw="$nw -Wconversion" # Too many warnings for now ++ nw="$nw -Wsystem-headers" # Don't let system headers trigger warnings ++ nw="$nw -Wsign-conversion" # Too many warnings for now ++ nw="$nw -Wtraditional-conversion" # Too many warnings for now ++ nw="$nw -Wunreachable-code" # Too many warnings for now ++ nw="$nw -Wpadded" # Our structs are not padded ++ nw="$nw -Wredundant-decls" # openat.h declares e.g., mkdirat ++ nw="$nw -Wlogical-op" # any use of fwrite provokes this ++ nw="$nw -Wformat-nonliteral" # who.c and pinky.c strftime uses ++ nw="$nw -Wvla" # warnings in gettext.h ++ nw="$nw -Wnested-externs" # use of XARGMATCH/verify_function__ ++ nw="$nw -Wswitch-enum" # Too many warnings for now ++ nw="$nw -Wswitch-default" # Too many warnings for now ++ nw="$nw -Wstack-protector" # not worth working around ++ # things I might fix soon: ++ nw="$nw -Wfloat-equal" # sort.c, seq.c ++ nw="$nw -Wmissing-format-attribute" # copy.c ++ nw="$nw -Wunsafe-loop-optimizations" # a few src/*.c ++ nw="$nw -Winline" # system.h's readdir_ignoring_dot_and_dotdot ++ nw="$nw -Wstrict-overflow" # expr.c, pr.c, tr.c, factor.c ++ # ?? -Wstrict-overflow ++ ++ gl_MANYWARN_ALL_GCC([ws]) ++ gl_MANYWARN_COMPLEMENT([ws], [$ws], [$nw]) ++ for w in $ws; do ++ gl_WARN_ADD([$w]) ++ done ++ gl_WARN_ADD([-Wno-missing-field-initializers]) # We need this one ++ gl_WARN_ADD([-Wno-sign-compare]) # Too many warnings for now ++ gl_WARN_ADD([-Wno-pointer-sign]) # Too many warnings for now ++ gl_WARN_ADD([-Wno-unused-parameter]) # Too many warnings for now ++ ++ # In spite of excluding -Wlogical-op above, it is enabled, as of ++ # gcc 4.5.0 20090517, and it provokes warnings in cat.c, dd.c, truncate.c ++ gl_WARN_ADD([-Wno-logical-op]) ++ ++ gl_WARN_ADD([-fdiagnostics-show-option]) ++ ++ AC_SUBST([WARN_CFLAGS]) ++ ++ AC_DEFINE([lint], [1], [Define to 1 if the compiler is checking for lint.]) ++ AC_DEFINE([_FORTIFY_SOURCE], [2], ++ [enable compile-time and run-time bounds-checking, and some warnings]) ++ AC_DEFINE([GNULIB_PORTCHECK], [1], [enable some gnulib portability checks]) ++fi ++ ++AC_FUNC_FORK ++ ++optional_bin_progs= ++AC_CHECK_FUNCS([uname], ++ gl_ADD_PROG([optional_bin_progs], [uname])) ++AC_CHECK_FUNCS([chroot], ++ gl_ADD_PROG([optional_bin_progs], [chroot])) ++AC_CHECK_FUNCS([gethostid], ++ gl_ADD_PROG([optional_bin_progs], [hostid])) ++ ++gl_WINSIZE_IN_PTEM ++ ++AC_MSG_CHECKING([whether localtime caches TZ]) ++AC_CACHE_VAL([utils_cv_localtime_cache], ++[if test x$ac_cv_func_tzset = xyes; then ++AC_RUN_IFELSE([AC_LANG_SOURCE([[#include ++#if STDC_HEADERS ++# include ++#endif ++extern char **environ; ++void unset_TZ (void) ++{ ++ char **from, **to; ++ for (to = from = environ; (*to = *from); from++) ++ if (! (to[0][0] == 'T' && to[0][1] == 'Z' && to[0][2] == '=')) ++ to++; ++} ++int main() ++{ ++ time_t now = time ((time_t *) 0); ++ int hour_GMT0, hour_unset; ++ if (putenv ("TZ=GMT0") != 0) ++ exit (1); ++ hour_GMT0 = localtime (&now)->tm_hour; ++ unset_TZ (); ++ hour_unset = localtime (&now)->tm_hour; ++ if (putenv ("TZ=PST8") != 0) ++ exit (1); ++ if (localtime (&now)->tm_hour == hour_GMT0) ++ exit (1); ++ unset_TZ (); ++ if (localtime (&now)->tm_hour != hour_unset) ++ exit (1); ++ exit (0); ++}]])], ++[utils_cv_localtime_cache=no], ++[utils_cv_localtime_cache=yes], ++[# If we have tzset, assume the worst when cross-compiling. ++utils_cv_localtime_cache=yes]) ++else ++ # If we lack tzset, report that localtime does not cache TZ, ++ # since we can't invalidate the cache if we don't have tzset. ++ utils_cv_localtime_cache=no ++fi])dnl ++AC_MSG_RESULT([$utils_cv_localtime_cache]) ++if test $utils_cv_localtime_cache = yes; then ++ AC_DEFINE([LOCALTIME_CACHE], [1], [FIXME]) ++fi ++ ++# SCO-ODT-3.0 is reported to need -los to link programs using initgroups ++AC_CHECK_FUNCS([initgroups]) ++if test $ac_cv_func_initgroups = no; then ++ AC_CHECK_LIB([os], [initgroups]) ++fi ++ ++AC_CHECK_FUNCS([syslog]) ++if test $ac_cv_func_syslog = no; then ++ # syslog is not in the default libraries. See if it's in some other. ++ for lib in bsd socket inet; do ++ AC_CHECK_LIB([$lib], [syslog], [AC_DEFINE([HAVE_SYSLOG], [1], [FIXME]) ++ LIBS="$LIBS -l$lib"; break]) ++ done ++fi ++ ++AC_CACHE_CHECK([for 3-argument setpriority function], ++ [utils_cv_func_setpriority], ++ [AC_LINK_IFELSE( ++ [AC_LANG_PROGRAM( ++ [[#include ++ #include ++ ]], ++ [[setpriority (0, 0, 0);]])], ++ [utils_cv_func_setpriority=yes], ++ [utils_cv_func_setpriority=no])]) ++if test $utils_cv_func_setpriority = no; then ++ AC_CHECK_FUNCS([nice]) ++fi ++case $utils_cv_func_setpriority,$ac_cv_func_nice in ++*yes*) ++ gl_ADD_PROG([optional_bin_progs], [nice]) ++esac ++ ++AC_DEFUN([coreutils_DUMMY_1], ++[ ++ AC_REQUIRE([gl_READUTMP]) ++ if test $ac_cv_header_utmp_h = yes || test $ac_cv_header_utmpx_h = yes; then ++ gl_ADD_PROG([optional_bin_progs], [who]) ++ gl_ADD_PROG([optional_bin_progs], [users]) ++ gl_ADD_PROG([optional_bin_progs], [pinky]) ++ fi ++]) ++coreutils_DUMMY_1 ++ ++AC_MSG_CHECKING([ut_host in struct utmp]) ++AC_CACHE_VAL([su_cv_func_ut_host_in_utmp], ++[AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ++#include ]], [[struct utmp ut; return !sizeof ut.ut_host;]])], ++ [su_cv_func_ut_host_in_utmp=yes], ++ [su_cv_func_ut_host_in_utmp=no])]) ++AC_MSG_RESULT([$su_cv_func_ut_host_in_utmp]) ++if test $su_cv_func_ut_host_in_utmp = yes; then ++ have_ut_host=1 ++ AC_DEFINE([HAVE_UT_HOST], [1], [FIXME]) ++fi ++ ++if test -z "$have_ut_host"; then ++ AC_MSG_CHECKING([ut_host in struct utmpx]) ++ AC_CACHE_VAL([su_cv_func_ut_host_in_utmpx], ++ [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ++#include ]], [[struct utmpx ut; return !sizeof ut.ut_host;]])], ++ [su_cv_func_ut_host_in_utmpx=yes], ++ [su_cv_func_ut_host_in_utmpx=no])]) ++ AC_MSG_RESULT([$su_cv_func_ut_host_in_utmpx]) ++ if test $su_cv_func_ut_host_in_utmpx = yes; then ++ AC_DEFINE([HAVE_UTMPX_H], [1], [FIXME]) ++ AC_DEFINE([HAVE_UT_HOST], [1], [FIXME]) ++ fi ++fi ++ ++GNULIB_BOOT_TIME([gl_ADD_PROG([optional_bin_progs], [uptime])]) ++ ++AC_SYS_POSIX_TERMIOS() ++gl_HEADER_TIOCGWINSZ_NEEDS_SYS_IOCTL ++ ++if test $ac_cv_sys_posix_termios = yes; then ++ gl_ADD_PROG([optional_bin_progs], [stty]) ++ ++ AC_MSG_CHECKING([whether termios.h needs _XOPEN_SOURCE]) ++ AC_CACHE_VAL([su_cv_sys_termios_needs_xopen_source], ++ [AC_EGREP_CPP([yes], [#include ++#ifdef IUCLC ++yes ++#endif], su_cv_sys_termios_needs_xopen_source=no, ++ AC_EGREP_CPP([yes], [#define _XOPEN_SOURCE ++#include ++#ifdef IUCLC ++yes ++#endif], su_cv_sys_termios_needs_xopen_source=yes, ++ su_cv_sys_termios_needs_xopen_source=no))]) ++ AC_MSG_RESULT([$su_cv_sys_termios_needs_xopen_source]) ++ test $su_cv_sys_termios_needs_xopen_source = yes && ++ AC_DEFINE([TERMIOS_NEEDS_XOPEN_SOURCE], [1], [FIXME]) ++ ++ AC_MSG_CHECKING([c_line in struct termios]) ++ AC_CACHE_VAL([su_cv_sys_c_line_in_termios], ++ [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#if TERMIOS_NEEDS_XOPEN_SOURCE ++#define _XOPEN_SOURCE ++#endif ++#include ++#include ]], [[struct termios t; return !sizeof t.c_line;]])], ++ [su_cv_sys_c_line_in_termios=yes], ++ [su_cv_sys_c_line_in_termios=no])]) ++ AC_MSG_RESULT([$su_cv_sys_c_line_in_termios]) ++ test $su_cv_sys_c_line_in_termios = yes \ ++ && AC_DEFINE([HAVE_C_LINE], [1], [FIXME]) ++fi ++ ++# FIXME: note that this macro appears above, too. ++# I'm leaving it here for now. This whole thing needs to be modernized... ++gl_WINSIZE_IN_PTEM ++ ++gl_HEADER_TIOCGWINSZ_IN_TERMIOS_H ++ ++if test $gl_cv_sys_tiocgwinsz_needs_termios_h = no && \ ++ test $gl_cv_sys_tiocgwinsz_needs_sys_ioctl_h = no; then ++ AC_MSG_CHECKING([TIOCGWINSZ in sys/pty.h]) ++ AC_CACHE_VAL([su_cv_sys_tiocgwinsz_in_sys_pty_h], ++ [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ++#ifdef WINSIZE_IN_PTEM ++# include ++# include ++#endif ++#include ++#include ++#include ]], [[int x = TIOCGWINSZ;]])], ++ [su_cv_sys_tiocgwinsz_in_sys_pty_h=yes], ++ [su_cv_sys_tiocgwinsz_in_sys_pty_h=no])]) ++ AC_MSG_RESULT([$su_cv_sys_tiocgwinsz_in_sys_pty_h]) ++ ++ test $su_cv_sys_tiocgwinsz_in_sys_pty_h = yes \ ++ && AC_DEFINE([GWINSZ_IN_SYS_PTY], [1], ++ [Define if your system defines TIOCGWINSZ in sys/pty.h.]) ++fi ++ ++# For src/kill.c. ++AC_CHECK_DECLS([strsignal, sys_siglist, _sys_siglist, __sys_siglist], , , ++ [AC_INCLUDES_DEFAULT ++#include ]) ++ ++cu_LIB_CHECK ++cu_GMP ++ ++# Build df only if there's a point to it. ++if test $gl_cv_list_mounted_fs = yes && test $gl_cv_fs_space = yes; then ++ gl_ADD_PROG([optional_bin_progs], [df]) ++fi ++ ++# Limit stdbuf to ELF systems with GCC ++optional_pkglib_progs= ++AC_MSG_CHECKING([whether this is an ELF system]) ++AC_EGREP_CPP([yes], [#if __ELF__ ++yes ++#endif], [elf_sys=yes], [elf_sys=no]) ++AC_MSG_RESULT([$elf_sys]) ++if test "$elf_sys" = "yes" && \ ++ test "$GCC" = "yes"; then ++ gl_ADD_PROG([optional_bin_progs], [stdbuf]) ++ gl_ADD_PROG([optional_pkglib_progs], [libstdbuf.so]) ++fi ++ ++############################################################################ ++mk="$srcdir/src/Makefile.am" ++# Extract all literal names from the definition of $(EXTRA_PROGRAMS) ++# in $mk but don't expand the variable references. ++# Append each literal name to $optional_bin_progs. ++v=EXTRA_PROGRAMS ++for gl_i in `sed -n '/^'$v' =/,/[[^\]]$/p' $mk \ ++ | sed 's/^ *//;/^\$.*/d;/^'$v' =/d' \ ++ | tr -s '\\015\\012\\\\' ' '`; do ++ gl_ADD_PROG([optional_bin_progs], $gl_i) ++done ++ ++# As above, extract literal names from the definition of $(no_install__progs) ++# in $mk but don't expand the variable references. ++v=no_install__progs ++t=`sed -n '/^'$v' =/,/[[^\]]$/p' $mk \ ++ | sed 's/^ *//;/^\$.*/d;/^'$v' =/d' \ ++ | tr -s '\\015\\012\\\\' ' '` ++# Remove any trailing space. ++no_install_progs_default=`echo "$t"|sed 's/ $//'` ++ ++# Unfortunately, due to the way autoconf's AS_HELP_STRING works, the list ++# of default-not-installed programs, "arch hostname su", must appear in two ++# places: in this file below, and in $mk. Using "$no_install_progs_default" ++# below cannot work. And we can't substitute the names into $mk because ++# automake needs the literals, too. ++# The compromise is to ensure that the space-separated list extracted ++# above matches the literal 2nd argument below. ++c="$srcdir/configure.ac" ++re='^g''l_INCLUDE_EXCLUDE_PROG(.* [\[\(.*\)\]])' ++t=`sed -n '/'"$re"'/{s/'"$re"'/\1/;s/,/ /gp ++}' $c` ++case $t in ++ $no_install_progs_default) ;; ++ *) AC_MSG_ERROR([[internal error: g'l_INCLUDE_EXCLUDE_PROG's 2nd arg, $t, ++ does not match the list of default-not-installed programs ++ ($no_install_progs_default) also recorded in $mk]], ++ 1) ;; ++esac ++ ++# Given the name of a variable containing a space-separated list of ++# install-by-default programs and the actual list do-not-install-by-default ++# programs, modify the former variable to reflect any "do-install" and ++# "don't-install" requests. ++# I.e., add any program name specified via --enable-install-program=..., and ++# remove any program name specified via --enable-no-install-program=... ++# Note how the second argument below is a literal, with "," separators. ++# That is required due to the way the macro works, and since the ++# corresponding ./configure option argument is comma-separated on input. ++gl_INCLUDE_EXCLUDE_PROG([optional_bin_progs], [arch,hostname,su]) ++ ++# Set INSTALL_SU if su installation has been requested via ++# --enable-install-program=su. ++AC_SUBST([INSTALL_SU]) ++case " $optional_bin_progs " in ++ *' su '*) INSTALL_SU=yes ;; ++ *) INSTALL_SU=no ;; ++esac ++ ++MAN=`echo "$optional_bin_progs "|sed 's/ /.1 /g;s/ $//'|tr -d '\\015\\012'` ++ ++# Change ginstall.1 to "install.h" in $MAN. ++MAN=`for m in $MAN; do test $m = ginstall.1 && m=install.1; echo $m; done \ ++ | tr '\015\012' ' '; echo` ++ ++# Remove [.1, since writing a portable rule for it in man/Makefile.am ++# is not practical. The sed LHS below uses the autoconf quadrigraph ++# representing '['. ++MAN=`echo "$MAN"|sed 's/\@<:@\.1//'` ++ ++OPTIONAL_BIN_PROGS=`echo "$optional_bin_progs "|sed 's/ /\$(EXEEXT) /g;s/ $//'` ++AC_SUBST([OPTIONAL_BIN_PROGS]) ++OPTIONAL_PKGLIB_PROGS=`echo "$optional_pkglib_progs " | sed 's/ $//'` ++AC_SUBST([OPTIONAL_PKGLIB_PROGS]) ++NO_INSTALL_PROGS_DEFAULT=$no_install_progs_default ++AC_SUBST([NO_INSTALL_PROGS_DEFAULT]) ++ ++AM_CONDITIONAL([CROSS_COMPILING], [test "$cross_compiling" = yes]) ++ ++# Arrange to rerun configure whenever the file, src/Makefile.am, ++# containing the list of program names changes. ++CONFIG_STATUS_DEPENDENCIES='$(top_srcdir)/src/Makefile.am' ++AC_SUBST([CONFIG_STATUS_DEPENDENCIES]) ++############################################################################ ++ ++AM_GNU_GETTEXT([external], [need-formatstring-macros]) ++AM_GNU_GETTEXT_VERSION([0.15]) ++ ++# For a test of uniq: it uses the $LOCALE_FR envvar. ++gt_LOCALE_FR ++ ++AC_CONFIG_FILES( ++ Makefile ++ doc/Makefile ++ lib/Makefile ++ man/Makefile ++ po/Makefile.in ++ src/Makefile ++ tests/Makefile ++ gnulib-tests/Makefile ++ ) ++AC_OUTPUT +diff -urNp coreutils-8.0-orig/doc/coreutils.texi coreutils-8.0/doc/coreutils.texi +--- coreutils-8.0-orig/doc/coreutils.texi 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/doc/coreutils.texi 2009-10-07 10:04:27.000000000 +0200 +@@ -14742,8 +14742,11 @@ to certain shells, etc.). + @findex syslog + @command{su} can optionally be compiled to use @code{syslog} to report + failed, and optionally successful, @command{su} attempts. (If the system +-supports @code{syslog}.) However, GNU @command{su} does not check if the +-user is a member of the @code{wheel} group; see below. ++supports @code{syslog}.) ++ ++This version of @command{su} has support for using PAM for ++authentication. You can edit @file{/etc/pam.d/su} to customize its ++behaviour. + + The program accepts the following options. Also see @ref{Common options}. + +@@ -14785,6 +14788,8 @@ environment variables except @env{TERM}, + @env{PATH} to a compiled-in default value. Change to @var{user}'s home + directory. Prepend @samp{-} to the shell's name, intended to make it + read its login startup file(s). ++Additionaly @env{DISPLAY} and @env{XAUTHORITY} environment variables ++are preserved as well for PAM functionality. + + @item -m + @itemx -p +@@ -14824,33 +14829,6 @@ Exit status: + the exit status of the subshell otherwise + @end display + +-@cindex wheel group, not supported +-@cindex group wheel, not supported +-@cindex fascism +-@subsection Why GNU @command{su} does not support the @samp{wheel} group +- +-(This section is by Richard Stallman.) +- +-@cindex Twenex +-@cindex MIT AI lab +-Sometimes a few of the users try to hold total power over all the +-rest. For example, in 1984, a few users at the MIT AI lab decided to +-seize power by changing the operator password on the Twenex system and +-keeping it secret from everyone else. (I was able to thwart this coup +-and give power back to the users by patching the kernel, but I +-wouldn't know how to do that in Unix.) +- +-However, occasionally the rulers do tell someone. Under the usual +-@command{su} mechanism, once someone learns the root password who +-sympathizes with the ordinary users, he or she can tell the rest. The +-``wheel group'' feature would make this impossible, and thus cement the +-power of the rulers. +- +-I'm on the side of the masses, not that of the rulers. If you are +-used to supporting the bosses and sysadmins in whatever they do, you +-might find this idea strange at first. +- +- + @node timeout invocation + @section @command{timeout}: Run a command with a time limit + +diff -urNp coreutils-8.0-orig/doc/coreutils.texi.orig coreutils-8.0/doc/coreutils.texi.orig +--- coreutils-8.0-orig/doc/coreutils.texi.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/doc/coreutils.texi.orig 2009-09-29 15:27:54.000000000 +0200 +@@ -0,0 +1,15835 @@ ++\input texinfo ++@c %**start of header ++@setfilename coreutils.info ++@settitle @sc{gnu} Coreutils ++ ++@c %**end of header ++ ++@include version.texi ++@include constants.texi ++ ++@c Define new indices. ++@defcodeindex op ++@defcodeindex fl ++ ++@c Put everything in one index (arbitrarily chosen to be the concept index). ++@syncodeindex fl cp ++@syncodeindex fn cp ++@syncodeindex ky cp ++@syncodeindex op cp ++@syncodeindex pg cp ++@syncodeindex vr cp ++ ++@dircategory Basics ++@direntry ++* Coreutils: (coreutils). Core GNU (file, text, shell) utilities. ++* Common options: (coreutils)Common options. Common options. ++* File permissions: (coreutils)File permissions. Access modes. ++* Date input formats: (coreutils)Date input formats. ++@end direntry ++ ++@c FIXME: the following need documentation ++@c * [: (coreutils)[ invocation. File/string tests. ++@c * pinky: (coreutils)pinky invocation. FIXME. ++@c * mktemp: (coreutils)mktemp invocation. FIXME. ++ ++@dircategory Individual utilities ++@direntry ++* arch: (coreutils)arch invocation. Print machine hardware name. ++* base64: (coreutils)base64 invocation. Base64 encode/decode data. ++* basename: (coreutils)basename invocation. Strip directory and suffix. ++* cat: (coreutils)cat invocation. Concatenate and write files. ++* chcon: (coreutils)chcon invocation. Change SELinux CTX of files. ++* chgrp: (coreutils)chgrp invocation. Change file groups. ++* chmod: (coreutils)chmod invocation. Change file permissions. ++* chown: (coreutils)chown invocation. Change file owners/groups. ++* chroot: (coreutils)chroot invocation. Specify the root directory. ++* cksum: (coreutils)cksum invocation. Print POSIX CRC checksum. ++* comm: (coreutils)comm invocation. Compare sorted files by line. ++* cp: (coreutils)cp invocation. Copy files. ++* csplit: (coreutils)csplit invocation. Split by context. ++* cut: (coreutils)cut invocation. Print selected parts of lines. ++* date: (coreutils)date invocation. Print/set system date and time. ++* dd: (coreutils)dd invocation. Copy and convert a file. ++* df: (coreutils)df invocation. Report file system disk usage. ++* dir: (coreutils)dir invocation. List directories briefly. ++* dircolors: (coreutils)dircolors invocation. Color setup for ls. ++* dirname: (coreutils)dirname invocation. Strip non-directory suffix. ++* du: (coreutils)du invocation. Report on disk usage. ++* echo: (coreutils)echo invocation. Print a line of text. ++* env: (coreutils)env invocation. Modify the environment. ++* expand: (coreutils)expand invocation. Convert tabs to spaces. ++* expr: (coreutils)expr invocation. Evaluate expressions. ++* factor: (coreutils)factor invocation. Print prime factors ++* false: (coreutils)false invocation. Do nothing, unsuccessfully. ++* fmt: (coreutils)fmt invocation. Reformat paragraph text. ++* fold: (coreutils)fold invocation. Wrap long input lines. ++* groups: (coreutils)groups invocation. Print group names a user is in. ++* head: (coreutils)head invocation. Output the first part of files. ++* hostid: (coreutils)hostid invocation. Print numeric host identifier. ++* hostname: (coreutils)hostname invocation. Print or set system name. ++* id: (coreutils)id invocation. Print user identity. ++* install: (coreutils)install invocation. Copy and change attributes. ++* join: (coreutils)join invocation. Join lines on a common field. ++* kill: (coreutils)kill invocation. Send a signal to processes. ++* link: (coreutils)link invocation. Make hard links between files. ++* ln: (coreutils)ln invocation. Make links between files. ++* logname: (coreutils)logname invocation. Print current login name. ++* ls: (coreutils)ls invocation. List directory contents. ++* md5sum: (coreutils)md5sum invocation. Print or check MD5 digests. ++* mkdir: (coreutils)mkdir invocation. Create directories. ++* mkfifo: (coreutils)mkfifo invocation. Create FIFOs (named pipes). ++* mknod: (coreutils)mknod invocation. Create special files. ++* mv: (coreutils)mv invocation. Rename files. ++* nice: (coreutils)nice invocation. Modify niceness. ++* nl: (coreutils)nl invocation. Number lines and write files. ++* nohup: (coreutils)nohup invocation. Immunize to hangups. ++* od: (coreutils)od invocation. Dump files in octal, etc. ++* paste: (coreutils)paste invocation. Merge lines of files. ++* pathchk: (coreutils)pathchk invocation. Check file name portability. ++* pr: (coreutils)pr invocation. Paginate or columnate files. ++* printenv: (coreutils)printenv invocation. Print environment variables. ++* printf: (coreutils)printf invocation. Format and print data. ++* ptx: (coreutils)ptx invocation. Produce permuted indexes. ++* pwd: (coreutils)pwd invocation. Print working directory. ++* readlink: (coreutils)readlink invocation. Print referent of a symlink. ++* rm: (coreutils)rm invocation. Remove files. ++* rmdir: (coreutils)rmdir invocation. Remove empty directories. ++* runcon: (coreutils)runcon invocation. Run in specified SELinux CTX. ++* seq: (coreutils)seq invocation. Print numeric sequences ++* sha1sum: (coreutils)sha1sum invocation. Print or check SHA-1 digests. ++* sha2: (coreutils)sha2 utilities. Print or check SHA-2 digests. ++* shred: (coreutils)shred invocation. Remove files more securely. ++* shuf: (coreutils)shuf invocation. Shuffling text files. ++* sleep: (coreutils)sleep invocation. Delay for a specified time. ++* sort: (coreutils)sort invocation. Sort text files. ++* split: (coreutils)split invocation. Split into fixed-size pieces. ++* stat: (coreutils)stat invocation. Report file(system) status. ++* stdbuf: (coreutils)stdbuf invocation. Modify stdio buffering. ++* stty: (coreutils)stty invocation. Print/change terminal settings. ++* su: (coreutils)su invocation. Modify user and group ID. ++* sum: (coreutils)sum invocation. Print traditional checksum. ++* sync: (coreutils)sync invocation. Synchronize memory and disk. ++* tac: (coreutils)tac invocation. Reverse files. ++* tail: (coreutils)tail invocation. Output the last part of files. ++* tee: (coreutils)tee invocation. Redirect to multiple files. ++* test: (coreutils)test invocation. File/string tests. ++* timeout: (coreutils)timeout invocation. Run with time limit. ++* touch: (coreutils)touch invocation. Change file timestamps. ++* tr: (coreutils)tr invocation. Translate characters. ++* true: (coreutils)true invocation. Do nothing, successfully. ++* truncate: (coreutils)truncate invocation. Shrink/extend size of a file. ++* tsort: (coreutils)tsort invocation. Topological sort. ++* tty: (coreutils)tty invocation. Print terminal name. ++* uname: (coreutils)uname invocation. Print system information. ++* unexpand: (coreutils)unexpand invocation. Convert spaces to tabs. ++* uniq: (coreutils)uniq invocation. Uniquify files. ++* unlink: (coreutils)unlink invocation. Removal via unlink(2). ++* uptime: (coreutils)uptime invocation. Print uptime and load. ++* users: (coreutils)users invocation. Print current user names. ++* vdir: (coreutils)vdir invocation. List directories verbosely. ++* wc: (coreutils)wc invocation. Line, word, and byte counts. ++* who: (coreutils)who invocation. Print who is logged in. ++* whoami: (coreutils)whoami invocation. Print effective user ID. ++* yes: (coreutils)yes invocation. Print a string indefinitely. ++@end direntry ++ ++@copying ++This manual documents version @value{VERSION} of the @sc{gnu} core ++utilities, including the standard programs for text and file manipulation. ++ ++Copyright @copyright{} 1994-1996, 2000-2009 Free Software Foundation, Inc. ++ ++@quotation ++Permission is granted to copy, distribute and/or modify this document ++under the terms of the GNU Free Documentation License, Version 1.3 or ++any later version published by the Free Software Foundation; with no ++Invariant Sections, with no Front-Cover Texts, and with no Back-Cover ++Texts. A copy of the license is included in the section entitled ``GNU ++Free Documentation License''. ++@end quotation ++@end copying ++ ++@titlepage ++@title @sc{gnu} @code{Coreutils} ++@subtitle Core GNU utilities ++@subtitle for version @value{VERSION}, @value{UPDATED} ++@author David MacKenzie et al. ++ ++@page ++@vskip 0pt plus 1filll ++@insertcopying ++@end titlepage ++@shortcontents ++@contents ++ ++@ifnottex ++@node Top ++@top GNU Coreutils ++ ++@insertcopying ++@end ifnottex ++ ++@cindex core utilities ++@cindex text utilities ++@cindex shell utilities ++@cindex file utilities ++ ++@menu ++* Introduction:: Caveats, overview, and authors ++* Common options:: Common options ++* Output of entire files:: cat tac nl od base64 ++* Formatting file contents:: fmt pr fold ++* Output of parts of files:: head tail split csplit ++* Summarizing files:: wc sum cksum md5sum sha1sum sha2 ++* Operating on sorted files:: sort shuf uniq comm ptx tsort ++* Operating on fields:: cut paste join ++* Operating on characters:: tr expand unexpand ++* Directory listing:: ls dir vdir dircolors ++* Basic operations:: cp dd install mv rm shred ++* Special file types:: mkdir rmdir unlink mkfifo mknod ln link readlink ++* Changing file attributes:: chgrp chmod chown touch ++* Disk usage:: df du stat sync truncate ++* Printing text:: echo printf yes ++* Conditions:: false true test expr ++* Redirection:: tee ++* File name manipulation:: dirname basename pathchk ++* Working context:: pwd stty printenv tty ++* User information:: id logname whoami groups users who ++* System context:: date arch uname hostname hostid uptime ++* SELinux context:: chcon runcon ++* Modified command invocation:: chroot env nice nohup stdbuf su timeout ++* Process control:: kill ++* Delaying:: sleep ++* Numeric operations:: factor seq ++* File permissions:: Access modes ++* Date input formats:: Specifying date strings ++* Opening the software toolbox:: The software tools philosophy ++* GNU Free Documentation License:: Copying and sharing this manual ++* Concept index:: General index ++ ++@detailmenu ++ --- The Detailed Node Listing --- ++ ++Common Options ++ ++* Exit status:: Indicating program success or failure ++* Backup options:: Backup options ++* Block size:: Block size ++* Signal specifications:: Specifying signals ++* Disambiguating names and IDs:: chgrp and chown owner and group syntax ++* Random sources:: Sources of random data ++* Target directory:: Target directory ++* Trailing slashes:: Trailing slashes ++* Traversing symlinks:: Traversing symlinks to directories ++* Treating / specially:: Treating / specially ++* Standards conformance:: Standards conformance ++ ++Output of entire files ++ ++* cat invocation:: Concatenate and write files ++* tac invocation:: Concatenate and write files in reverse ++* nl invocation:: Number lines and write files ++* od invocation:: Write files in octal or other formats ++* base64 invocation:: Transform data into printable data ++ ++Formatting file contents ++ ++* fmt invocation:: Reformat paragraph text ++* pr invocation:: Paginate or columnate files for printing ++* fold invocation:: Wrap input lines to fit in specified width ++ ++Output of parts of files ++ ++* head invocation:: Output the first part of files ++* tail invocation:: Output the last part of files ++* split invocation:: Split a file into fixed-size pieces ++* csplit invocation:: Split a file into context-determined pieces ++ ++Summarizing files ++ ++* wc invocation:: Print newline, word, and byte counts ++* sum invocation:: Print checksum and block counts ++* cksum invocation:: Print CRC checksum and byte counts ++* md5sum invocation:: Print or check MD5 digests ++* sha1sum invocation:: Print or check SHA-1 digests ++* sha2 utilities:: Print or check SHA-2 digests ++ ++Operating on sorted files ++ ++* sort invocation:: Sort text files ++* shuf invocation:: Shuffle text files ++* uniq invocation:: Uniquify files ++* comm invocation:: Compare two sorted files line by line ++* ptx invocation:: Produce a permuted index of file contents ++* tsort invocation:: Topological sort ++ ++@command{ptx}: Produce permuted indexes ++ ++* General options in ptx:: Options which affect general program behavior ++* Charset selection in ptx:: Underlying character set considerations ++* Input processing in ptx:: Input fields, contexts, and keyword selection ++* Output formatting in ptx:: Types of output format, and sizing the fields ++* Compatibility in ptx:: The @acronym{GNU} extensions to @command{ptx} ++ ++Operating on fields ++ ++* cut invocation:: Print selected parts of lines ++* paste invocation:: Merge lines of files ++* join invocation:: Join lines on a common field ++ ++Operating on characters ++ ++* tr invocation:: Translate, squeeze, and/or delete characters ++* expand invocation:: Convert tabs to spaces ++* unexpand invocation:: Convert spaces to tabs ++ ++@command{tr}: Translate, squeeze, and/or delete characters ++ ++* Character sets:: Specifying sets of characters ++* Translating:: Changing one set of characters to another ++* Squeezing:: Squeezing repeats and deleting ++ ++Directory listing ++ ++* ls invocation:: List directory contents ++* dir invocation:: Briefly list directory contents ++* vdir invocation:: Verbosely list directory contents ++* dircolors invocation:: Color setup for @command{ls} ++ ++@command{ls}: List directory contents ++ ++* Which files are listed:: Which files are listed ++* What information is listed:: What information is listed ++* Sorting the output:: Sorting the output ++* Details about version sort:: More details about version sort ++* General output formatting:: General output formatting ++* Formatting the file names:: Formatting the file names ++ ++Basic operations ++ ++* cp invocation:: Copy files and directories ++* dd invocation:: Convert and copy a file ++* install invocation:: Copy files and set attributes ++* mv invocation:: Move (rename) files ++* rm invocation:: Remove files or directories ++* shred invocation:: Remove files more securely ++ ++Special file types ++ ++* link invocation:: Make a hard link via the link syscall ++* ln invocation:: Make links between files ++* mkdir invocation:: Make directories ++* mkfifo invocation:: Make FIFOs (named pipes) ++* mknod invocation:: Make block or character special files ++* readlink invocation:: Print value of a symlink or canonical file name ++* rmdir invocation:: Remove empty directories ++* unlink invocation:: Remove files via unlink syscall ++ ++Changing file attributes ++ ++* chown invocation:: Change file owner and group ++* chgrp invocation:: Change group ownership ++* chmod invocation:: Change access permissions ++* touch invocation:: Change file timestamps ++ ++Disk usage ++ ++* df invocation:: Report file system disk space usage ++* du invocation:: Estimate file space usage ++* stat invocation:: Report file or file system status ++* sync invocation:: Synchronize data on disk with memory ++* truncate invocation:: Shrink or extend the size of a file ++ ++Printing text ++ ++* echo invocation:: Print a line of text ++* printf invocation:: Format and print data ++* yes invocation:: Print a string until interrupted ++ ++Conditions ++ ++* false invocation:: Do nothing, unsuccessfully ++* true invocation:: Do nothing, successfully ++* test invocation:: Check file types and compare values ++* expr invocation:: Evaluate expressions ++ ++@command{test}: Check file types and compare values ++ ++* File type tests:: File type tests ++* Access permission tests:: Access permission tests ++* File characteristic tests:: File characteristic tests ++* String tests:: String tests ++* Numeric tests:: Numeric tests ++ ++@command{expr}: Evaluate expression ++ ++* String expressions:: + : match substr index length ++* Numeric expressions:: + - * / % ++* Relations for expr:: | & < <= = == != >= > ++* Examples of expr:: Examples of using @command{expr} ++ ++Redirection ++ ++* tee invocation:: Redirect output to multiple files or processes ++ ++File name manipulation ++ ++* basename invocation:: Strip directory and suffix from a file name ++* dirname invocation:: Strip non-directory suffix from a file name ++* pathchk invocation:: Check file name validity and portability ++ ++Working context ++ ++* pwd invocation:: Print working directory ++* stty invocation:: Print or change terminal characteristics ++* printenv invocation:: Print all or some environment variables ++* tty invocation:: Print file name of terminal on standard input ++ ++@command{stty}: Print or change terminal characteristics ++ ++* Control:: Control settings ++* Input:: Input settings ++* Output:: Output settings ++* Local:: Local settings ++* Combination:: Combination settings ++* Characters:: Special characters ++* Special:: Special settings ++ ++User information ++ ++* id invocation:: Print user identity ++* logname invocation:: Print current login name ++* whoami invocation:: Print effective user ID ++* groups invocation:: Print group names a user is in ++* users invocation:: Print login names of users currently logged in ++* who invocation:: Print who is currently logged in ++ ++System context ++ ++* arch invocation:: Print machine hardware name ++* date invocation:: Print or set system date and time ++* uname invocation:: Print system information ++* hostname invocation:: Print or set system name ++* hostid invocation:: Print numeric host identifier ++* uptime invocation:: Print system uptime and load ++ ++@command{date}: Print or set system date and time ++ ++* Time conversion specifiers:: %[HIklMNpPrRsSTXzZ] ++* Date conversion specifiers:: %[aAbBcCdDeFgGhjmuUVwWxyY] ++* Literal conversion specifiers:: %[%nt] ++* Padding and other flags:: Pad with zeros, spaces, etc. ++* Setting the time:: Changing the system clock ++* Options for date:: Instead of the current time ++* Date input formats:: Specifying date strings ++* Examples of date:: Examples ++ ++SELinux context ++ ++* chcon invocation:: Change SELinux context of file ++* runcon invocation:: Run a command in specified SELinux context ++ ++Modified command invocation ++ ++* chroot invocation:: Run a command with a different root directory ++* env invocation:: Run a command in a modified environment ++* nice invocation:: Run a command with modified niceness ++* nohup invocation:: Run a command immune to hangups ++* stdbuf invocation:: Run a command with modified I/O buffering ++* su invocation:: Run a command with substitute user and group ID ++* timeout invocation:: Run a command with a time limit ++ ++Process control ++ ++* kill invocation:: Sending a signal to processes. ++ ++Delaying ++ ++* sleep invocation:: Delay for a specified time ++ ++Numeric operations ++ ++* factor invocation:: Print prime factors ++* seq invocation:: Print numeric sequences ++ ++File permissions ++ ++* Mode Structure:: Structure of file mode bits ++* Symbolic Modes:: Mnemonic representation of file mode bits ++* Numeric Modes:: File mode bits as octal numbers ++* Directory Setuid and Setgid:: Set-user-ID and set-group-ID on directories ++ ++Date input formats ++ ++* General date syntax:: Common rules ++* Calendar date items:: 19 Dec 1994 ++* Time of day items:: 9:20pm ++* Time zone items:: @sc{est}, @sc{pdt}, @sc{gmt} ++* Day of week items:: Monday and others ++* Relative items in date strings:: next tuesday, 2 years ago ++* Pure numbers in date strings:: 19931219, 1440 ++* Seconds since the Epoch:: @@1078100502 ++* Specifying time zone rules:: TZ="America/New_York", TZ="UTC0" ++* Authors of get_date:: Bellovin, Eggert, Salz, Berets, et al ++ ++Opening the software toolbox ++ ++* Toolbox introduction:: Toolbox introduction ++* I/O redirection:: I/O redirection ++* The who command:: The @command{who} command ++* The cut command:: The @command{cut} command ++* The sort command:: The @command{sort} command ++* The uniq command:: The @command{uniq} command ++* Putting the tools together:: Putting the tools together ++ ++Copying This Manual ++ ++* GNU Free Documentation License:: Copying and sharing this manual ++ ++@end detailmenu ++@end menu ++ ++ ++@node Introduction ++@chapter Introduction ++ ++This manual is a work in progress: many sections make no attempt to explain ++basic concepts in a way suitable for novices. Thus, if you are interested, ++please get involved in improving this manual. The entire @sc{gnu} community ++will benefit. ++ ++@cindex @acronym{POSIX} ++The @sc{gnu} utilities documented here are mostly compatible with the ++@acronym{POSIX} standard. ++@cindex bugs, reporting ++Please report bugs to @email{bug-coreutils@@gnu.org}. Remember ++to include the version number, machine architecture, input files, and ++any other information needed to reproduce the bug: your input, what you ++expected, what you got, and why it is wrong. Diffs are welcome, but ++please include a description of the problem as well, since this is ++sometimes difficult to infer. @xref{Bugs, , , gcc, Using and Porting GNU CC}. ++ ++@cindex Berry, K. ++@cindex Paterson, R. ++@cindex Stallman, R. ++@cindex Pinard, F. ++@cindex MacKenzie, D. ++@cindex Meyering, J. ++@cindex Youmans, B. ++This manual was originally derived from the Unix man pages in the ++distributions, which were written by David MacKenzie and updated by Jim ++Meyering. What you are reading now is the authoritative documentation ++for these utilities; the man pages are no longer being maintained. The ++original @command{fmt} man page was written by Ross Paterson. Fran@,{c}ois ++Pinard did the initial conversion to Texinfo format. Karl Berry did the ++indexing, some reorganization, and editing of the results. Brian ++Youmans of the Free Software Foundation office staff combined the ++manuals for textutils, fileutils, and sh-utils to produce the present ++omnibus manual. Richard Stallman contributed his usual invaluable ++insights to the overall process. ++ ++@node Common options ++@chapter Common options ++ ++@macro optBackup ++@item -b ++@itemx @w{@kbd{--backup}[=@var{method}]} ++@opindex -b ++@opindex --backup ++@vindex VERSION_CONTROL ++@cindex backups, making ++@xref{Backup options}. ++Make a backup of each file that would otherwise be overwritten or removed. ++@end macro ++ ++@macro optBackupSuffix ++@item -S @var{suffix} ++@itemx --suffix=@var{suffix} ++@opindex -S ++@opindex --suffix ++Append @var{suffix} to each backup file made with @option{-b}. ++@xref{Backup options}. ++@end macro ++ ++@macro optTargetDirectory ++@item -t @var{directory} ++@itemx @w{@kbd{--target-directory}=@var{directory}} ++@opindex -t ++@opindex --target-directory ++@cindex target directory ++@cindex destination directory ++Specify the destination @var{directory}. ++@xref{Target directory}. ++@end macro ++ ++@macro optNoTargetDirectory ++@item -T ++@itemx --no-target-directory ++@opindex -T ++@opindex --no-target-directory ++@cindex target directory ++@cindex destination directory ++Do not treat the last operand specially when it is a directory or a ++symbolic link to a directory. @xref{Target directory}. ++@end macro ++ ++@macro optSi ++@itemx --si ++@opindex --si ++@cindex SI output ++Append an SI-style abbreviation to each size, such as @samp{M} for ++megabytes. Powers of 1000 are used, not 1024; @samp{M} stands for ++1,000,000 bytes. This option is equivalent to ++@option{--block-size=si}. Use the @option{-h} or ++@option{--human-readable} option if ++you prefer powers of 1024. ++@end macro ++ ++@macro optHumanReadable ++@item -h ++@itemx --human-readable ++@opindex -h ++@opindex --human-readable ++@cindex human-readable output ++Append a size letter to each size, such as @samp{M} for mebibytes. ++Powers of 1024 are used, not 1000; @samp{M} stands for 1,048,576 bytes. ++This option is equivalent to @option{--block-size=human-readable}. ++Use the @option{--si} option if you prefer powers of 1000. ++@end macro ++ ++@macro optStripTrailingSlashes ++@itemx @w{@kbd{--strip-trailing-slashes}} ++@opindex --strip-trailing-slashes ++@cindex stripping trailing slashes ++Remove any trailing slashes from each @var{source} argument. ++@xref{Trailing slashes}. ++@end macro ++ ++@macro mayConflictWithShellBuiltIn{cmd} ++@cindex conflicts with shell built-ins ++@cindex built-in shell commands, conflicts with ++Due to shell aliases and built-in @command{\cmd\} command, using an ++unadorned @command{\cmd\} interactively or in a script may get you ++different functionality than that described here. Invoke it via ++@command{env} (i.e., @code{env \cmd\ @dots{}}) to avoid interference ++from the shell. ++ ++@end macro ++ ++@macro multiplierSuffixes{varName} ++@var{\varName\} may be, or may be an integer optionally followed by, ++one of the following multiplicative suffixes: ++@example ++@samp{b} => 512 ("blocks") ++@samp{KB} => 1000 (KiloBytes) ++@samp{K} => 1024 (KibiBytes) ++@samp{MB} => 1000*1000 (MegaBytes) ++@samp{M} => 1024*1024 (MebiBytes) ++@samp{GB} => 1000*1000*1000 (GigaBytes) ++@samp{G} => 1024*1024*1024 (GibiBytes) ++@end example ++and so on for @samp{T}, @samp{P}, @samp{E}, @samp{Z}, and @samp{Y}. ++@end macro ++ ++@c FIXME: same as above, but no ``blocks'' line. ++@macro multiplierSuffixesNoBlocks{varName} ++@var{\varName\} may be, or may be an integer optionally followed by, ++one of the following multiplicative suffixes: ++@example ++@samp{KB} => 1000 (KiloBytes) ++@samp{K} => 1024 (KibiBytes) ++@samp{MB} => 1000*1000 (MegaBytes) ++@samp{M} => 1024*1024 (MebiBytes) ++@samp{GB} => 1000*1000*1000 (GigaBytes) ++@samp{G} => 1024*1024*1024 (GibiBytes) ++@end example ++and so on for @samp{T}, @samp{P}, @samp{E}, @samp{Z}, and @samp{Y}. ++@end macro ++ ++@cindex common options ++ ++Certain options are available in all of these programs. Rather than ++writing identical descriptions for each of the programs, they are ++described here. (In fact, every @sc{gnu} program accepts (or should accept) ++these options.) ++ ++@vindex POSIXLY_CORRECT ++Normally options and operands can appear in any order, and programs act ++as if all the options appear before any operands. For example, ++@samp{sort -r passwd -t :} acts like @samp{sort -r -t : passwd}, since ++@samp{:} is an option-argument of @option{-t}. However, if the ++@env{POSIXLY_CORRECT} environment variable is set, options must appear ++before operands, unless otherwise specified for a particular command. ++ ++A few programs can usefully have trailing operands with leading ++@samp{-}. With such a program, options must precede operands even if ++@env{POSIXLY_CORRECT} is not set, and this fact is noted in the ++program description. For example, the @command{env} command's options ++must appear before its operands, since in some cases the operands ++specify a command that itself contains options. ++ ++Most programs that accept long options recognize unambiguous ++abbreviations of those options. For example, @samp{rmdir ++--ignore-fail-on-non-empty} can be invoked as @samp{rmdir ++--ignore-fail} or even @samp{rmdir --i}. Ambiguous options, such as ++@samp{ls --h}, are identified as such. ++ ++Some of these programs recognize the @option{--help} and @option{--version} ++options only when one of them is the sole command line argument. For ++these programs, abbreviations of the long options are not always recognized. ++ ++@table @samp ++ ++@item --help ++@opindex --help ++@cindex help, online ++Print a usage message listing all available options, then exit successfully. ++ ++@item --version ++@opindex --version ++@cindex version number, finding ++Print the version number, then exit successfully. ++ ++@item -- ++@opindex -- ++@cindex option delimiter ++Delimit the option list. Later arguments, if any, are treated as ++operands even if they begin with @samp{-}. For example, @samp{sort -- ++-r} reads from the file named @file{-r}. ++ ++@end table ++ ++@cindex standard input ++@cindex standard output ++A single @samp{-} operand is not really an option, though it looks like one. It ++stands for standard input, or for standard output if that is clear from ++the context. For example, @samp{sort -} reads from standard input, ++and is equivalent to plain @samp{sort}, and @samp{tee -} writes an ++extra copy of its input to standard output. Unless otherwise ++specified, @samp{-} can appear as any operand that requires a file ++name. ++ ++@menu ++* Exit status:: Indicating program success or failure. ++* Backup options:: -b -S, in some programs. ++* Block size:: BLOCK_SIZE and --block-size, in some programs. ++* Signal specifications:: Specifying signals using the --signal option. ++* Disambiguating names and IDs:: chgrp and chown owner and group syntax ++* Random sources:: --random-source, in some programs. ++* Target directory:: Specifying a target directory, in some programs. ++* Trailing slashes:: --strip-trailing-slashes, in some programs. ++* Traversing symlinks:: -H, -L, or -P, in some programs. ++* Treating / specially:: --preserve-root and --no-preserve-root. ++* Special built-in utilities:: @command{break}, @command{:}, @command{eval}, @dots{} ++* Standards conformance:: Conformance to the @acronym{POSIX} standard. ++@end menu ++ ++ ++@node Exit status ++@section Exit status ++ ++@macro exitstatus ++An exit status of zero indicates success, ++and a nonzero value indicates failure. ++@end macro ++ ++Nearly every command invocation yields an integral @dfn{exit status} ++that can be used to change how other commands work. ++For the vast majority of commands, an exit status of zero indicates ++success. Failure is indicated by a nonzero value---typically ++@samp{1}, though it may differ on unusual platforms as @acronym{POSIX} ++requires only that it be nonzero. ++ ++However, some of the programs documented here do produce ++other exit status values and a few associate different ++meanings with the values @samp{0} and @samp{1}. ++Here are some of the exceptions: ++@command{chroot}, @command{env}, @command{expr}, @command{nice}, ++@command{nohup}, @command{printenv}, @command{sort}, @command{stdbuf}, ++@command{su}, @command{test}, @command{timeout}, @command{tty}. ++ ++ ++@node Backup options ++@section Backup options ++ ++@cindex backup options ++ ++Some @sc{gnu} programs (at least @command{cp}, @command{install}, ++@command{ln}, and @command{mv}) optionally make backups of files ++before writing new versions. ++These options control the details of these backups. The options are also ++briefly mentioned in the descriptions of the particular programs. ++ ++@table @samp ++ ++@item -b ++@itemx @w{@kbd{--backup}[=@var{method}]} ++@opindex -b ++@opindex --backup ++@vindex VERSION_CONTROL ++@cindex backups, making ++Make a backup of each file that would otherwise be overwritten or removed. ++Without this option, the original versions are destroyed. ++Use @var{method} to determine the type of backups to make. ++When this option is used but @var{method} is not specified, ++then the value of the @env{VERSION_CONTROL} ++environment variable is used. And if @env{VERSION_CONTROL} is not set, ++the default backup type is @samp{existing}. ++ ++Note that the short form of this option, @option{-b} does not accept any ++argument. Using @option{-b} is equivalent to using @option{--backup=existing}. ++ ++@vindex version-control @r{Emacs variable} ++This option corresponds to the Emacs variable @samp{version-control}; ++the values for @var{method} are the same as those used in Emacs. ++This option also accepts more descriptive names. ++The valid @var{method}s are (unique abbreviations are accepted): ++ ++@table @samp ++@item none ++@itemx off ++@opindex none @r{backup method} ++Never make backups. ++ ++@item numbered ++@itemx t ++@opindex numbered @r{backup method} ++Always make numbered backups. ++ ++@item existing ++@itemx nil ++@opindex existing @r{backup method} ++Make numbered backups of files that already have them, simple backups ++of the others. ++ ++@item simple ++@itemx never ++@opindex simple @r{backup method} ++Always make simple backups. Please note @samp{never} is not to be ++confused with @samp{none}. ++ ++@end table ++ ++@item -S @var{suffix} ++@itemx --suffix=@var{suffix} ++@opindex -S ++@opindex --suffix ++@cindex backup suffix ++@vindex SIMPLE_BACKUP_SUFFIX ++Append @var{suffix} to each backup file made with @option{-b}. If this ++option is not specified, the value of the @env{SIMPLE_BACKUP_SUFFIX} ++environment variable is used. And if @env{SIMPLE_BACKUP_SUFFIX} is not ++set, the default is @samp{~}, just as in Emacs. ++ ++@end table ++ ++@node Block size ++@section Block size ++ ++@cindex block size ++ ++Some @sc{gnu} programs (at least @command{df}, @command{du}, and ++@command{ls}) display sizes in ``blocks''. You can adjust the block size ++and method of display to make sizes easier to read. The block size ++used for display is independent of any file system block size. ++Fractional block counts are rounded up to the nearest integer. ++ ++@opindex --block-size=@var{size} ++@vindex BLOCKSIZE ++@vindex BLOCK_SIZE ++@vindex DF_BLOCK_SIZE ++@vindex DU_BLOCK_SIZE ++@vindex LS_BLOCK_SIZE ++@vindex POSIXLY_CORRECT@r{, and block size} ++ ++The default block size is chosen by examining the following environment ++variables in turn; the first one that is set determines the block size. ++ ++@table @code ++ ++@item DF_BLOCK_SIZE ++This specifies the default block size for the @command{df} command. ++Similarly, @env{DU_BLOCK_SIZE} specifies the default for @command{du} and ++@env{LS_BLOCK_SIZE} for @command{ls}. ++ ++@item BLOCK_SIZE ++This specifies the default block size for all three commands, if the ++above command-specific environment variables are not set. ++ ++@item BLOCKSIZE ++This specifies the default block size for all values that are normally ++printed as blocks, if neither @env{BLOCK_SIZE} nor the above ++command-specific environment variables are set. Unlike the other ++environment variables, @env{BLOCKSIZE} does not affect values that are ++normally printed as byte counts, e.g., the file sizes contained in ++@code{ls -l} output. ++ ++@item POSIXLY_CORRECT ++If neither @env{@var{command}_BLOCK_SIZE}, nor @env{BLOCK_SIZE}, nor ++@env{BLOCKSIZE} is set, but this variable is set, the block size ++defaults to 512. ++ ++@end table ++ ++If none of the above environment variables are set, the block size ++currently defaults to 1024 bytes in most contexts, but this number may ++change in the future. For @command{ls} file sizes, the block size ++defaults to 1 byte. ++ ++@cindex human-readable output ++@cindex SI output ++ ++A block size specification can be a positive integer specifying the number ++of bytes per block, or it can be @code{human-readable} or @code{si} to ++select a human-readable format. Integers may be followed by suffixes ++that are upward compatible with the ++@uref{http://www.bipm.fr/enus/3_SI/si-prefixes.html, SI prefixes} ++for decimal multiples and with the ++@uref{http://physics.nist.gov/cuu/Units/binary.html, IEC 60027-2 ++prefixes for binary multiples}. ++ ++With human-readable formats, output sizes are followed by a size letter ++such as @samp{M} for megabytes. @code{BLOCK_SIZE=human-readable} uses ++powers of 1024; @samp{M} stands for 1,048,576 bytes. ++@code{BLOCK_SIZE=si} is similar, but uses powers of 1000 and appends ++@samp{B}; @samp{MB} stands for 1,000,000 bytes. ++ ++@vindex LC_NUMERIC ++A block size specification preceded by @samp{'} causes output sizes to ++be displayed with thousands separators. The @env{LC_NUMERIC} locale ++specifies the thousands separator and grouping. For example, in an ++American English locale, @samp{--block-size="'1kB"} would cause a size ++of 1234000 bytes to be displayed as @samp{1,234}. In the default C ++locale, there is no thousands separator so a leading @samp{'} has no ++effect. ++ ++An integer block size can be followed by a suffix to specify a ++multiple of that size. A bare size letter, ++or one followed by @samp{iB}, specifies ++a multiple using powers of 1024. A size letter followed by @samp{B} ++specifies powers of 1000 instead. For example, @samp{1M} and ++@samp{1MiB} are equivalent to @samp{1048576}, whereas @samp{1MB} is ++equivalent to @samp{1000000}. ++ ++A plain suffix without a preceding integer acts as if @samp{1} were ++prepended, except that it causes a size indication to be appended to ++the output. For example, @samp{--block-size="kB"} displays 3000 as ++@samp{3kB}. ++ ++The following suffixes are defined. Large sizes like @code{1Y} ++may be rejected by your computer due to limitations of its arithmetic. ++ ++@table @samp ++@item kB ++@cindex kilobyte, definition of ++kilobyte: @math{10^3 = 1000}. ++@item k ++@itemx K ++@itemx KiB ++@cindex kibibyte, definition of ++kibibyte: @math{2^{10} = 1024}. @samp{K} is special: the SI prefix is ++@samp{k} and the IEC 60027-2 prefix is @samp{Ki}, but tradition and ++@acronym{POSIX} use @samp{k} to mean @samp{KiB}. ++@item MB ++@cindex megabyte, definition of ++megabyte: @math{10^6 = 1,000,000}. ++@item M ++@itemx MiB ++@cindex mebibyte, definition of ++mebibyte: @math{2^{20} = 1,048,576}. ++@item GB ++@cindex gigabyte, definition of ++gigabyte: @math{10^9 = 1,000,000,000}. ++@item G ++@itemx GiB ++@cindex gibibyte, definition of ++gibibyte: @math{2^{30} = 1,073,741,824}. ++@item TB ++@cindex terabyte, definition of ++terabyte: @math{10^{12} = 1,000,000,000,000}. ++@item T ++@itemx TiB ++@cindex tebibyte, definition of ++tebibyte: @math{2^{40} = 1,099,511,627,776}. ++@item PB ++@cindex petabyte, definition of ++petabyte: @math{10^{15} = 1,000,000,000,000,000}. ++@item P ++@itemx PiB ++@cindex pebibyte, definition of ++pebibyte: @math{2^{50} = 1,125,899,906,842,624}. ++@item EB ++@cindex exabyte, definition of ++exabyte: @math{10^{18} = 1,000,000,000,000,000,000}. ++@item E ++@itemx EiB ++@cindex exbibyte, definition of ++exbibyte: @math{2^{60} = 1,152,921,504,606,846,976}. ++@item ZB ++@cindex zettabyte, definition of ++zettabyte: @math{10^{21} = 1,000,000,000,000,000,000,000} ++@item Z ++@itemx ZiB ++@math{2^{70} = 1,180,591,620,717,411,303,424}. ++(@samp{Zi} is a @acronym{GNU} extension to IEC 60027-2.) ++@item YB ++@cindex yottabyte, definition of ++yottabyte: @math{10^{24} = 1,000,000,000,000,000,000,000,000}. ++@item Y ++@itemx YiB ++@math{2^{80} = 1,208,925,819,614,629,174,706,176}. ++(@samp{Yi} is a @acronym{GNU} extension to IEC 60027-2.) ++@end table ++ ++@opindex -k ++@opindex -h ++@opindex --block-size ++@opindex --human-readable ++@opindex --si ++ ++Block size defaults can be overridden by an explicit ++@option{--block-size=@var{size}} option. The @option{-k} ++option is equivalent to @option{--block-size=1K}, which ++is the default unless the @env{POSIXLY_CORRECT} environment variable is ++set. The @option{-h} or @option{--human-readable} option is equivalent to ++@option{--block-size=human-readable}. The @option{--si} option is ++equivalent to @option{--block-size=si}. ++ ++@node Signal specifications ++@section Signal specifications ++@cindex signals, specifying ++ ++A @var{signal} may be a signal name like @samp{HUP}, or a signal ++number like @samp{1}, or an exit status of a process terminated by the ++signal. A signal name can be given in canonical form or prefixed by ++@samp{SIG}. The case of the letters is ignored. The following signal names ++and numbers are supported on all @acronym{POSIX} compliant systems: ++ ++@table @samp ++@item HUP ++1. Hangup. ++@item INT ++2. Terminal interrupt. ++@item QUIT ++3. Terminal quit. ++@item ABRT ++6. Process abort. ++@item KILL ++9. Kill (cannot be caught or ignored). ++@item ALRM ++14. Alarm Clock. ++@item TERM ++15. Termination. ++@end table ++ ++@noindent ++Other supported signal names have system-dependent corresponding ++numbers. All systems conforming to @acronym{POSIX} 1003.1-2001 also ++support the following signals: ++ ++@table @samp ++@item BUS ++Access to an undefined portion of a memory object. ++@item CHLD ++Child process terminated, stopped, or continued. ++@item CONT ++Continue executing, if stopped. ++@item FPE ++Erroneous arithmetic operation. ++@item ILL ++Illegal Instruction. ++@item PIPE ++Write on a pipe with no one to read it. ++@item SEGV ++Invalid memory reference. ++@item STOP ++Stop executing (cannot be caught or ignored). ++@item TSTP ++Terminal stop. ++@item TTIN ++Background process attempting read. ++@item TTOU ++Background process attempting write. ++@item URG ++High bandwidth data is available at a socket. ++@item USR1 ++User-defined signal 1. ++@item USR2 ++User-defined signal 2. ++@end table ++ ++@noindent ++@acronym{POSIX} 1003.1-2001 systems that support the @acronym{XSI} extension ++also support the following signals: ++ ++@table @samp ++@item POLL ++Pollable event. ++@item PROF ++Profiling timer expired. ++@item SYS ++Bad system call. ++@item TRAP ++Trace/breakpoint trap. ++@item VTALRM ++Virtual timer expired. ++@item XCPU ++CPU time limit exceeded. ++@item XFSZ ++File size limit exceeded. ++@end table ++ ++@noindent ++@acronym{POSIX} 1003.1-2001 systems that support the @acronym{XRT} extension ++also support at least eight real-time signals called @samp{RTMIN}, ++@samp{RTMIN+1}, @dots{}, @samp{RTMAX-1}, @samp{RTMAX}. ++ ++@node Disambiguating names and IDs ++@section chown and chgrp: Disambiguating user names and IDs ++@cindex user names, disambiguating ++@cindex user IDs, disambiguating ++@cindex group names, disambiguating ++@cindex group IDs, disambiguating ++@cindex disambiguating group names and IDs ++ ++Since the @var{owner} and @var{group} arguments to @command{chown} and ++@command{chgrp} may be specified as names or numeric IDs, there is an ++apparent ambiguity. ++What if a user or group @emph{name} is a string of digits? ++@footnote{Using a number as a user name is common in some environments.} ++Should the command interpret it as a user name or as an ID? ++@acronym{POSIX} requires that @command{chown} and @command{chgrp} ++first attempt to resolve the specified string as a name, and ++only once that fails, then try to interpret it as an ID. ++This is troublesome when you want to specify a numeric ID, say 42, ++and it must work even in a pathological situation where ++@samp{42} is a user name that maps to some other user ID, say 1000. ++Simply invoking @code{chown 42 F}, will set @file{F}s owner ID to ++1000---not what you intended. ++ ++GNU @command{chown} and @command{chgrp} provide a way to work around this, ++that at the same time may result in a significant performance improvement ++by eliminating a database look-up. ++Simply precede each numeric user ID and/or group ID with a @samp{+}, ++in order to force its interpretation as an integer: ++ ++@example ++chown +42 F ++chgrp +$numeric_group_id another-file ++chown +0:+0 / ++@end example ++ ++GNU @command{chown} and @command{chgrp} ++skip the name look-up process for each @samp{+}-prefixed string, ++because a string containing @samp{+} is never a valid user or group name. ++This syntax is accepted on most common Unix systems, but not on Solaris 10. ++ ++@node Random sources ++@section Sources of random data ++ ++@cindex random sources ++ ++The @command{shuf}, @command{shred}, and @command{sort} commands ++sometimes need random data to do their work. For example, @samp{sort ++-R} must choose a hash function at random, and it needs random data to ++make this selection. ++ ++By default these commands use an internal pseudorandom generator ++initialized by a small amount of entropy, but can be directed to use ++an external source with the @option{--random-source=@var{file}} option. ++An error is reported if @var{file} does not contain enough bytes. ++ ++For example, the device file @file{/dev/urandom} could be used as the ++source of random data. Typically, this device gathers environmental ++noise from device drivers and other sources into an entropy pool, and ++uses the pool to generate random bits. If the pool is short of data, ++the device reuses the internal pool to produce more bits, using a ++cryptographically secure pseudorandom number generator. But be aware ++that this device is not designed for bulk random data generation ++and is relatively slow. ++ ++@file{/dev/urandom} suffices for most practical uses, but applications ++requiring high-value or long-term protection of private data may ++require an alternate data source like @file{/dev/random} or ++@file{/dev/arandom}. The set of available sources depends on your ++operating system. ++ ++To reproduce the results of an earlier invocation of a command, you ++can save some random data into a file and then use that file as the ++random source in earlier and later invocations of the command. ++ ++@node Target directory ++@section Target directory ++ ++@cindex target directory ++ ++The @command{cp}, @command{install}, @command{ln}, and @command{mv} ++commands normally treat the last operand specially when it is a ++directory or a symbolic link to a directory. For example, @samp{cp ++source dest} is equivalent to @samp{cp source dest/source} if ++@file{dest} is a directory. Sometimes this behavior is not exactly ++what is wanted, so these commands support the following options to ++allow more fine-grained control: ++ ++@table @samp ++ ++@item -T ++@itemx --no-target-directory ++@opindex --no-target-directory ++@cindex target directory ++@cindex destination directory ++Do not treat the last operand specially when it is a directory or a ++symbolic link to a directory. This can help avoid race conditions in ++programs that operate in a shared area. For example, when the command ++@samp{mv /tmp/source /tmp/dest} succeeds, there is no guarantee that ++@file{/tmp/source} was renamed to @file{/tmp/dest}: it could have been ++renamed to @file{/tmp/dest/source} instead, if some other process ++created @file{/tmp/dest} as a directory. However, if @file{mv ++-T /tmp/source /tmp/dest} succeeds, there is no ++question that @file{/tmp/source} was renamed to @file{/tmp/dest}. ++ ++In the opposite situation, where you want the last operand to be ++treated as a directory and want a diagnostic otherwise, you can use ++the @option{--target-directory} (@option{-t}) option. ++ ++@item -t @var{directory} ++@itemx @w{@kbd{--target-directory}=@var{directory}} ++@opindex --target-directory ++@cindex target directory ++@cindex destination directory ++Use @var{directory} as the directory component of each destination ++file name. ++ ++The interface for most programs is that after processing options and a ++finite (possibly zero) number of fixed-position arguments, the remaining ++argument list is either expected to be empty, or is a list of items ++(usually files) that will all be handled identically. The @command{xargs} ++program is designed to work well with this convention. ++ ++The commands in the @command{mv}-family are unusual in that they take ++a variable number of arguments with a special case at the @emph{end} ++(namely, the target directory). This makes it nontrivial to perform some ++operations, e.g., ``move all files from here to ../d/'', because ++@code{mv * ../d/} might exhaust the argument space, and @code{ls | xargs ...} ++doesn't have a clean way to specify an extra final argument for each ++invocation of the subject command. (It can be done by going through a ++shell command, but that requires more human labor and brain power than ++it should.) ++ ++The @w{@kbd{--target-directory}} (@option{-t}) option allows the @command{cp}, ++@command{install}, @command{ln}, and @command{mv} programs to be used ++conveniently with @command{xargs}. For example, you can move the files ++from the current directory to a sibling directory, @code{d} like this: ++ ++@smallexample ++ls | xargs mv -t ../d -- ++@end smallexample ++ ++However, this doesn't move files whose names begin with @samp{.}. ++If you use the @sc{gnu} @command{find} program, you can move those ++files too, with this command: ++ ++@example ++find . -mindepth 1 -maxdepth 1 \ ++ | xargs mv -t ../d ++@end example ++ ++But both of the above approaches fail if there are no files in the ++current directory, or if any file has a name containing a blank or ++some other special characters. ++The following example removes those limitations and requires both ++@sc{gnu} @command{find} and @sc{gnu} @command{xargs}: ++ ++@example ++find . -mindepth 1 -maxdepth 1 -print0 \ ++ | xargs --null --no-run-if-empty \ ++ mv -t ../d ++@end example ++ ++@end table ++ ++@noindent ++The @option{--target-directory} (@option{-t}) and ++@option{--no-target-directory} (@option{-T}) ++options cannot be combined. ++ ++@node Trailing slashes ++@section Trailing slashes ++ ++@cindex trailing slashes ++ ++Some @sc{gnu} programs (at least @command{cp} and @command{mv}) allow you to ++remove any trailing slashes from each @var{source} argument before ++operating on it. The @w{@kbd{--strip-trailing-slashes}} option enables ++this behavior. ++ ++This is useful when a @var{source} argument may have a trailing slash and ++@c FIXME: mv's behavior in this case is system-dependent ++specify a symbolic link to a directory. This scenario is in fact rather ++common because some shells can automatically append a trailing slash when ++performing file name completion on such symbolic links. Without this ++option, @command{mv}, for example, (via the system's rename function) must ++interpret a trailing slash as a request to dereference the symbolic link ++and so must rename the indirectly referenced @emph{directory} and not ++the symbolic link. Although it may seem surprising that such behavior ++be the default, it is required by @acronym{POSIX} and is consistent with ++other parts of that standard. ++ ++@node Traversing symlinks ++@section Traversing symlinks ++ ++@cindex symbolic link to directory, controlling traversal of ++ ++The following options modify how @command{chown} and @command{chgrp} ++@c FIXME: note that `du' has these options, too, but they have slightly ++@c different meaning. ++traverse a hierarchy when the @option{--recursive} (@option{-R}) ++option is also specified. ++If more than one of the following options is specified, only the final ++one takes effect. ++These options specify whether processing a symbolic link to a directory ++entails operating on just the symbolic link or on all files in the ++hierarchy rooted at that directory. ++ ++These options are independent of @option{--dereference} and ++@option{--no-dereference} (@option{-h}), which control whether to modify ++a symlink or its referent. ++ ++@table @samp ++ ++@macro choptH ++@item -H ++@opindex -H ++@cindex symbolic link to directory, traverse each that is specified on the command line ++If @option{--recursive} (@option{-R}) is specified and ++a command line argument is a symbolic link to a directory, traverse it. ++@end macro ++@choptH ++ ++@macro choptL ++@item -L ++@opindex -L ++@cindex symbolic link to directory, traverse each that is encountered ++In a recursive traversal, traverse every symbolic link to a directory ++that is encountered. ++@end macro ++@choptL ++ ++@macro choptP ++@item -P ++@opindex -P ++@cindex symbolic link to directory, never traverse ++Do not traverse any symbolic links. ++This is the default if none of @option{-H}, @option{-L}, ++or @option{-P} is specified. ++@end macro ++@choptP ++ ++@end table ++ ++ ++@node Treating / specially ++@section Treating @file{/} specially ++ ++Certain commands can operate destructively on entire hierarchies. ++For example, if a user with appropriate privileges mistakenly runs ++@samp{rm -rf / tmp/junk}, that may remove ++all files on the entire system. Since there are so few ++legitimate uses for such a command, ++@sc{gnu} @command{rm} normally declines to operate on any directory ++that resolves to @file{/}. If you really want to try to remove all ++the files on your system, you can use the @option{--no-preserve-root} ++option, but the default behavior, specified by the ++@option{--preserve-option}, is safer for most purposes. ++ ++The commands @command{chgrp}, @command{chmod} and @command{chown} ++can also operate destructively on entire hierarchies, so they too ++support these options. Although, unlike @command{rm}, they don't ++actually unlink files, these commands are arguably more dangerous ++when operating recursively on @file{/}, since they often work much ++more quickly, and hence damage more files before an alert user can ++interrupt them. Tradition and @acronym{POSIX} require these commands ++to operate recursively on @file{/}, so they default to ++@option{--no-preserve-root}, but using the @option{--preserve-root} ++option makes them safer for most purposes. For convenience you can ++specify @option{--preserve-root} in an alias or in a shell function. ++ ++Note that the @option{--preserve-root} option also ensures ++that @command{chgrp} and @command{chown} do not modify @file{/} ++even when dereferencing a symlink pointing to @file{/}. ++ ++@node Special built-in utilities ++@section Special built-in utilities ++ ++Some programs like @command{nice} can invoke other programs; for ++example, the command @samp{nice cat file} invokes the program ++@command{cat} by executing the command @samp{cat file}. However, ++@dfn{special built-in utilities} like @command{exit} cannot be invoked ++this way. For example, the command @samp{nice exit} does not have a ++well-defined behavior: it may generate an error message instead of ++exiting. ++ ++Here is a list of the special built-in utilities that are standardized ++by @acronym{POSIX} 1003.1-2004. ++ ++@quotation ++@t{.@: : break continue eval exec exit export readonly ++return set shift times trap unset} ++@end quotation ++ ++For example, because @samp{.}, @samp{:}, and @samp{exec} are special, ++the commands @samp{nice . foo.sh}, @samp{nice :}, and @samp{nice exec ++pwd} do not work as you might expect. ++ ++Many shells extend this list. For example, Bash has several extra ++special built-in utilities like @command{history}, and ++@command{suspend}, and with Bash the command @samp{nice suspend} ++generates an error message instead of suspending. ++ ++@node Standards conformance ++@section Standards conformance ++ ++@vindex POSIXLY_CORRECT ++In a few cases, the @sc{gnu} utilities' default behavior is ++incompatible with the @acronym{POSIX} standard. To suppress these ++incompatibilities, define the @env{POSIXLY_CORRECT} environment ++variable. Unless you are checking for @acronym{POSIX} conformance, you ++probably do not need to define @env{POSIXLY_CORRECT}. ++ ++Newer versions of @acronym{POSIX} are occasionally incompatible with older ++versions. For example, older versions of @acronym{POSIX} required the ++command @samp{sort +1} to sort based on the second and succeeding ++fields in each input line, but starting with @acronym{POSIX} 1003.1-2001 ++the same command is required to sort the file named @file{+1}, and you ++must instead use the command @samp{sort -k 2} to get the field-based ++sort. ++ ++@vindex _POSIX2_VERSION ++The @sc{gnu} utilities normally conform to the version of @acronym{POSIX} ++that is standard for your system. To cause them to conform to a ++different version of @acronym{POSIX}, define the @env{_POSIX2_VERSION} ++environment variable to a value of the form @var{yyyymm} specifying ++the year and month the standard was adopted. Two values are currently ++supported for @env{_POSIX2_VERSION}: @samp{199209} stands for ++@acronym{POSIX} 1003.2-1992, and @samp{200112} stands for @acronym{POSIX} ++1003.1-2001. For example, if you have a newer system but are running software ++that assumes an older version of @acronym{POSIX} and uses @samp{sort +1} ++or @samp{tail +10}, you can work around any compatibility problems by setting ++@samp{_POSIX2_VERSION=199209} in your environment. ++ ++@node Output of entire files ++@chapter Output of entire files ++ ++@cindex output of entire files ++@cindex entire files, output of ++ ++These commands read and write entire files, possibly transforming them ++in some way. ++ ++@menu ++* cat invocation:: Concatenate and write files. ++* tac invocation:: Concatenate and write files in reverse. ++* nl invocation:: Number lines and write files. ++* od invocation:: Write files in octal or other formats. ++* base64 invocation:: Transform data into printable data. ++@end menu ++ ++@node cat invocation ++@section @command{cat}: Concatenate and write files ++ ++@pindex cat ++@cindex concatenate and write files ++@cindex copying files ++ ++@command{cat} copies each @var{file} (@samp{-} means standard input), or ++standard input if none are given, to standard output. Synopsis: ++ ++@example ++cat [@var{option}] [@var{file}]@dots{} ++@end example ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -A ++@itemx --show-all ++@opindex -A ++@opindex --show-all ++Equivalent to @option{-vET}. ++ ++@item -b ++@itemx --number-nonblank ++@opindex -b ++@opindex --number-nonblank ++Number all nonempty output lines, starting with 1. ++ ++@item -e ++@opindex -e ++Equivalent to @option{-vE}. ++ ++@item -E ++@itemx --show-ends ++@opindex -E ++@opindex --show-ends ++Display a @samp{$} after the end of each line. ++ ++@item -n ++@itemx --number ++@opindex -n ++@opindex --number ++Number all output lines, starting with 1. ++ ++@item -s ++@itemx --squeeze-blank ++@opindex -s ++@opindex --squeeze-blank ++@cindex squeezing empty lines ++Suppress repeated adjacent empty lines; output just one empty line ++instead of several. ++ ++@item -t ++@opindex -t ++Equivalent to @option{-vT}. ++ ++@item -T ++@itemx --show-tabs ++@opindex -T ++@opindex --show-tabs ++Display TAB characters as @samp{^I}. ++ ++@item -u ++@opindex -u ++Ignored; for @acronym{POSIX} compatibility. ++ ++@item -v ++@itemx --show-nonprinting ++@opindex -v ++@opindex --show-nonprinting ++Display control characters except for LFD and TAB using ++@samp{^} notation and precede characters that have the high bit set with ++@samp{M-}. ++ ++@end table ++ ++On systems like MS-DOS that distinguish between text and binary files, ++@command{cat} normally reads and writes in binary mode. However, ++@command{cat} reads in text mode if one of the options ++@option{-bensAE} is used or if @command{cat} is reading from standard ++input and standard input is a terminal. Similarly, @command{cat} ++writes in text mode if one of the options @option{-bensAE} is used or ++if standard output is a terminal. ++ ++@exitstatus ++ ++Examples: ++ ++@smallexample ++# Output f's contents, then standard input, then g's contents. ++cat f - g ++ ++# Copy standard input to standard output. ++cat ++@end smallexample ++ ++ ++@node tac invocation ++@section @command{tac}: Concatenate and write files in reverse ++ ++@pindex tac ++@cindex reversing files ++ ++@command{tac} copies each @var{file} (@samp{-} means standard input), or ++standard input if none are given, to standard output, reversing the ++records (lines by default) in each separately. Synopsis: ++ ++@example ++tac [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@dfn{Records} are separated by instances of a string (newline by ++default). By default, this separator string is attached to the end of ++the record that it follows in the file. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -b ++@itemx --before ++@opindex -b ++@opindex --before ++The separator is attached to the beginning of the record that it ++precedes in the file. ++ ++@item -r ++@itemx --regex ++@opindex -r ++@opindex --regex ++Treat the separator string as a regular expression. Users of @command{tac} ++on MS-DOS/MS-Windows should note that, since @command{tac} reads files in ++binary mode, each line of a text file might end with a CR/LF pair ++instead of the Unix-style LF. ++ ++@item -s @var{separator} ++@itemx --separator=@var{separator} ++@opindex -s ++@opindex --separator ++Use @var{separator} as the record separator, instead of newline. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node nl invocation ++@section @command{nl}: Number lines and write files ++ ++@pindex nl ++@cindex numbering lines ++@cindex line numbering ++ ++@command{nl} writes each @var{file} (@samp{-} means standard input), or ++standard input if none are given, to standard output, with line numbers ++added to some or all of the lines. Synopsis: ++ ++@example ++nl [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@cindex logical pages, numbering on ++@command{nl} decomposes its input into (logical) pages; by default, the ++line number is reset to 1 at the top of each logical page. @command{nl} ++treats all of the input files as a single document; it does not reset ++line numbers or logical pages between files. ++ ++@cindex headers, numbering ++@cindex body, numbering ++@cindex footers, numbering ++A logical page consists of three sections: header, body, and footer. ++Any of the sections can be empty. Each can be numbered in a different ++style from the others. ++ ++The beginnings of the sections of logical pages are indicated in the ++input file by a line containing exactly one of these delimiter strings: ++ ++@table @samp ++@item \:\:\: ++start of header; ++@item \:\: ++start of body; ++@item \: ++start of footer. ++@end table ++ ++The two characters from which these strings are made can be changed from ++@samp{\} and @samp{:} via options (see below), but the pattern and ++length of each string cannot be changed. ++ ++A section delimiter is replaced by an empty line on output. Any text ++that comes before the first section delimiter string in the input file ++is considered to be part of a body section, so @command{nl} treats a ++file that contains no section delimiters as a single body section. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -b @var{style} ++@itemx --body-numbering=@var{style} ++@opindex -b ++@opindex --body-numbering ++Select the numbering style for lines in the body section of each ++logical page. When a line is not numbered, the current line number ++is not incremented, but the line number separator character is still ++prepended to the line. The styles are: ++ ++@table @samp ++@item a ++number all lines, ++@item t ++number only nonempty lines (default for body), ++@item n ++do not number lines (default for header and footer), ++@item p@var{bre} ++number only lines that contain a match for the basic regular ++expression @var{bre}. ++@xref{Regular Expressions, , Regular Expressions, grep, The GNU Grep Manual}. ++@end table ++ ++@item -d @var{cd} ++@itemx --section-delimiter=@var{cd} ++@opindex -d ++@opindex --section-delimiter ++@cindex section delimiters of pages ++Set the section delimiter characters to @var{cd}; default is ++@samp{\:}. If only @var{c} is given, the second remains @samp{:}. ++(Remember to protect @samp{\} or other metacharacters from shell ++expansion with quotes or extra backslashes.) ++ ++@item -f @var{style} ++@itemx --footer-numbering=@var{style} ++@opindex -f ++@opindex --footer-numbering ++Analogous to @option{--body-numbering}. ++ ++@item -h @var{style} ++@itemx --header-numbering=@var{style} ++@opindex -h ++@opindex --header-numbering ++Analogous to @option{--body-numbering}. ++ ++@item -i @var{number} ++@itemx --line-increment=@var{number} ++@opindex -i ++@opindex --line-increment ++Increment line numbers by @var{number} (default 1). ++ ++@item -l @var{number} ++@itemx --join-blank-lines=@var{number} ++@opindex -l ++@opindex --join-blank-lines ++@cindex empty lines, numbering ++@cindex blank lines, numbering ++Consider @var{number} (default 1) consecutive empty lines to be one ++logical line for numbering, and only number the last one. Where fewer ++than @var{number} consecutive empty lines occur, do not number them. ++An empty line is one that contains no characters, not even spaces ++or tabs. ++ ++@item -n @var{format} ++@itemx --number-format=@var{format} ++@opindex -n ++@opindex --number-format ++Select the line numbering format (default is @code{rn}): ++ ++@table @samp ++@item ln ++@opindex ln @r{format for @command{nl}} ++left justified, no leading zeros; ++@item rn ++@opindex rn @r{format for @command{nl}} ++right justified, no leading zeros; ++@item rz ++@opindex rz @r{format for @command{nl}} ++right justified, leading zeros. ++@end table ++ ++@item -p ++@itemx --no-renumber ++@opindex -p ++@opindex --no-renumber ++Do not reset the line number at the start of a logical page. ++ ++@item -s @var{string} ++@itemx --number-separator=@var{string} ++@opindex -s ++@opindex --number-separator ++Separate the line number from the text line in the output with ++@var{string} (default is the TAB character). ++ ++@item -v @var{number} ++@itemx --starting-line-number=@var{number} ++@opindex -v ++@opindex --starting-line-number ++Set the initial line number on each logical page to @var{number} (default 1). ++ ++@item -w @var{number} ++@itemx --number-width=@var{number} ++@opindex -w ++@opindex --number-width ++Use @var{number} characters for line numbers (default 6). ++ ++@end table ++ ++@exitstatus ++ ++ ++@node od invocation ++@section @command{od}: Write files in octal or other formats ++ ++@pindex od ++@cindex octal dump of files ++@cindex hex dump of files ++@cindex ASCII dump of files ++@cindex file contents, dumping unambiguously ++ ++@command{od} writes an unambiguous representation of each @var{file} ++(@samp{-} means standard input), or standard input if none are given. ++Synopses: ++ ++@smallexample ++od [@var{option}]@dots{} [@var{file}]@dots{} ++od [-abcdfilosx]@dots{} [@var{file}] [[+]@var{offset}[.][b]] ++od [@var{option}]@dots{} --traditional [@var{file}] [[+]@var{offset}[.][b] [[+]@var{label}[.][b]]] ++@end smallexample ++ ++Each line of output consists of the offset in the input, followed by ++groups of data from the file. By default, @command{od} prints the offset in ++octal, and each group of file data is a C @code{short int}'s worth of input ++printed as a single octal number. ++ ++If @var{offset} is given, it specifies how many input bytes to skip ++before formatting and writing. By default, it is interpreted as an ++octal number, but the optional trailing decimal point causes it to be ++interpreted as decimal. If no decimal is specified and the offset ++begins with @samp{0x} or @samp{0X} it is interpreted as a hexadecimal ++number. If there is a trailing @samp{b}, the number of bytes skipped ++will be @var{offset} multiplied by 512. ++ ++If a command is of both the first and second forms, the second form is ++assumed if the last operand begins with @samp{+} or (if there are two ++operands) a digit. For example, in @samp{od foo 10} and @samp{od +10} ++the @samp{10} is an offset, whereas in @samp{od 10} the @samp{10} is a ++file name. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -A @var{radix} ++@itemx --address-radix=@var{radix} ++@opindex -A ++@opindex --address-radix ++@cindex radix for file offsets ++@cindex file offset radix ++Select the base in which file offsets are printed. @var{radix} can ++be one of the following: ++ ++@table @samp ++@item d ++decimal; ++@item o ++octal; ++@item x ++hexadecimal; ++@item n ++none (do not print offsets). ++@end table ++ ++The default is octal. ++ ++@item -j @var{bytes} ++@itemx --skip-bytes=@var{bytes} ++@opindex -j ++@opindex --skip-bytes ++Skip @var{bytes} input bytes before formatting and writing. If ++@var{bytes} begins with @samp{0x} or @samp{0X}, it is interpreted in ++hexadecimal; otherwise, if it begins with @samp{0}, in octal; otherwise, ++in decimal. ++@multiplierSuffixes{bytes} ++ ++@item -N @var{bytes} ++@itemx --read-bytes=@var{bytes} ++@opindex -N ++@opindex --read-bytes ++Output at most @var{bytes} bytes of the input. Prefixes and suffixes on ++@code{bytes} are interpreted as for the @option{-j} option. ++ ++@item -S @var{bytes} ++@itemx --strings[=@var{bytes}] ++@opindex -S ++@opindex --strings ++@cindex string constants, outputting ++Instead of the normal output, output only @dfn{string constants}: at ++least @var{bytes} consecutive @acronym{ASCII} graphic characters, ++followed by a zero byte (@acronym{ASCII} @sc{nul}). ++Prefixes and suffixes on @code{bytes} are interpreted as for the ++@option{-j} option. ++ ++If @var{n} is omitted with @option{--strings}, the default is 3. ++ ++@item -t @var{type} ++@itemx --format=@var{type} ++@opindex -t ++@opindex --format ++Select the format in which to output the file data. @var{type} is a ++string of one or more of the below type indicator characters. If you ++include more than one type indicator character in a single @var{type} ++string, or use this option more than once, @command{od} writes one copy ++of each output line using each of the data types that you specified, ++in the order that you specified. ++ ++Adding a trailing ``z'' to any type specification appends a display ++of the @acronym{ASCII} character representation of the printable characters ++to the output line generated by the type specification. ++ ++@table @samp ++@item a ++named character, ignoring high-order bit ++@item c ++@acronym{ASCII} character or backslash escape, ++@item d ++signed decimal ++@item f ++floating point ++@item o ++octal ++@item u ++unsigned decimal ++@item x ++hexadecimal ++@end table ++ ++The type @code{a} outputs things like @samp{sp} for space, @samp{nl} for ++newline, and @samp{nul} for a zero byte. Only the least significant ++seven bits of each byte is used; the high-order bit is ignored. ++Type @code{c} outputs ++@samp{ }, @samp{\n}, and @code{\0}, respectively. ++ ++@cindex type size ++Except for types @samp{a} and @samp{c}, you can specify the number ++of bytes to use in interpreting each number in the given data type ++by following the type indicator character with a decimal integer. ++Alternately, you can specify the size of one of the C compiler's ++built-in data types by following the type indicator character with ++one of the following characters. For integers (@samp{d}, @samp{o}, ++@samp{u}, @samp{x}): ++ ++@table @samp ++@item C ++char ++@item S ++short ++@item I ++int ++@item L ++long ++@end table ++ ++For floating point (@code{f}): ++ ++@table @asis ++@item F ++float ++@item D ++double ++@item L ++long double ++@end table ++ ++@item -v ++@itemx --output-duplicates ++@opindex -v ++@opindex --output-duplicates ++Output consecutive lines that are identical. By default, when two or ++more consecutive output lines would be identical, @command{od} outputs only ++the first line, and puts just an asterisk on the following line to ++indicate the elision. ++ ++@item -w[@var{n}] ++@itemx --width[=@var{n}] ++@opindex -w ++@opindex --width ++Dump @code{n} input bytes per output line. This must be a multiple of ++the least common multiple of the sizes associated with the specified ++output types. ++ ++If this option is not given at all, the default is 16. If @var{n} is ++omitted, the default is 32. ++ ++@end table ++ ++The next several options are shorthands for format specifications. ++@sc{gnu} @command{od} accepts any combination of shorthands and format ++specification options. These options accumulate. ++ ++@table @samp ++ ++@item -a ++@opindex -a ++Output as named characters. Equivalent to @samp{-t a}. ++ ++@item -b ++@opindex -b ++Output as octal bytes. Equivalent to @samp{-t o1}. ++ ++@item -c ++@opindex -c ++Output as @acronym{ASCII} characters or backslash escapes. Equivalent to ++@samp{-t c}. ++ ++@item -d ++@opindex -d ++Output as unsigned decimal two-byte units. Equivalent to @samp{-t u2}. ++ ++@item -f ++@opindex -f ++Output as floats. Equivalent to @samp{-t fF}. ++ ++@item -i ++@opindex -i ++Output as decimal ints. Equivalent to @samp{-t dI}. ++ ++@item -l ++@opindex -l ++Output as decimal long ints. Equivalent to @samp{-t dL}. ++ ++@item -o ++@opindex -o ++Output as octal two-byte units. Equivalent to @option{-t o2}. ++ ++@item -s ++@opindex -s ++Output as decimal two-byte units. Equivalent to @option{-t d2}. ++ ++@item -x ++@opindex -x ++Output as hexadecimal two-byte units. Equivalent to @samp{-t x2}. ++ ++@item --traditional ++@opindex --traditional ++Recognize the non-option label argument that traditional @command{od} ++accepted. The following syntax: ++ ++@smallexample ++od --traditional [@var{file}] [[+]@var{offset}[.][b] [[+]@var{label}[.][b]]] ++@end smallexample ++ ++@noindent ++can be used to specify at most one file and optional arguments ++specifying an offset and a pseudo-start address, @var{label}. ++The @var{label} argument is interpreted ++just like @var{offset}, but it specifies an initial pseudo-address. The ++pseudo-addresses are displayed in parentheses following any normal ++address. ++ ++@end table ++ ++@exitstatus ++ ++@node base64 invocation ++@section @command{base64}: Transform data into printable data ++ ++@pindex base64 ++@cindex base64 encoding ++ ++@command{base64} transforms data read from a file, or standard input, ++into (or from) base64 encoded form. The base64 encoded form uses ++printable @acronym{ASCII} characters to represent binary data. ++Synopses: ++ ++@smallexample ++base64 [@var{option}]@dots{} [@var{file}] ++base64 --decode [@var{option}]@dots{} [@var{file}] ++@end smallexample ++ ++The base64 encoding expands data to roughly 133% of the original. ++The format conforms to ++@uref{ftp://ftp.rfc-editor.org/in-notes/rfc4648.txt, RFC 4648}. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -w @var{cols} ++@itemx --wrap=@var{cols} ++@opindex -w ++@opindex --wrap ++@cindex wrap data ++@cindex column to wrap data after ++During encoding, wrap lines after @var{cols} characters. This must be ++a positive number. ++ ++The default is to wrap after 76 characters. Use the value 0 to ++disable line wrapping altogether. ++ ++@item -d ++@itemx --decode ++@opindex -d ++@opindex --decode ++@cindex Decode base64 data ++@cindex Base64 decoding ++Change the mode of operation, from the default of encoding data, to ++decoding data. Input is expected to be base64 encoded data, and the ++output will be the original data. ++ ++@item -i ++@itemx --ignore-garbage ++@opindex -i ++@opindex --ignore-garbage ++@cindex Ignore garbage in base64 stream ++When decoding, newlines are always accepted. ++During decoding, ignore unrecognized bytes, ++to permit distorted data to be decoded. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node Formatting file contents ++@chapter Formatting file contents ++ ++@cindex formatting file contents ++ ++These commands reformat the contents of files. ++ ++@menu ++* fmt invocation:: Reformat paragraph text. ++* pr invocation:: Paginate or columnate files for printing. ++* fold invocation:: Wrap input lines to fit in specified width. ++@end menu ++ ++ ++@node fmt invocation ++@section @command{fmt}: Reformat paragraph text ++ ++@pindex fmt ++@cindex reformatting paragraph text ++@cindex paragraphs, reformatting ++@cindex text, reformatting ++ ++@command{fmt} fills and joins lines to produce output lines of (at most) ++a given number of characters (75 by default). Synopsis: ++ ++@example ++fmt [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@command{fmt} reads from the specified @var{file} arguments (or standard ++input if none are given), and writes to standard output. ++ ++By default, blank lines, spaces between words, and indentation are ++preserved in the output; successive input lines with different ++indentation are not joined; tabs are expanded on input and introduced on ++output. ++ ++@cindex line-breaking ++@cindex sentences and line-breaking ++@cindex Knuth, Donald E. ++@cindex Plass, Michael F. ++@command{fmt} prefers breaking lines at the end of a sentence, and tries to ++avoid line breaks after the first word of a sentence or before the last ++word of a sentence. A @dfn{sentence break} is defined as either the end ++of a paragraph or a word ending in any of @samp{.?!}, followed by two ++spaces or end of line, ignoring any intervening parentheses or quotes. ++Like @TeX{}, @command{fmt} reads entire ``paragraphs'' before choosing line ++breaks; the algorithm is a variant of that given by Donald E. Knuth ++and Michael F. Plass in ``Breaking Paragraphs Into Lines'', ++@cite{Software---Practice & Experience} @b{11}, 11 (November 1981), ++1119--1184. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c ++@itemx --crown-margin ++@opindex -c ++@opindex --crown-margin ++@cindex crown margin ++@dfn{Crown margin} mode: preserve the indentation of the first two ++lines within a paragraph, and align the left margin of each subsequent ++line with that of the second line. ++ ++@item -t ++@itemx --tagged-paragraph ++@opindex -t ++@opindex --tagged-paragraph ++@cindex tagged paragraphs ++@dfn{Tagged paragraph} mode: like crown margin mode, except that if ++indentation of the first line of a paragraph is the same as the ++indentation of the second, the first line is treated as a one-line ++paragraph. ++ ++@item -s ++@itemx --split-only ++@opindex -s ++@opindex --split-only ++Split lines only. Do not join short lines to form longer ones. This ++prevents sample lines of code, and other such ``formatted'' text from ++being unduly combined. ++ ++@item -u ++@itemx --uniform-spacing ++@opindex -u ++@opindex --uniform-spacing ++Uniform spacing. Reduce spacing between words to one space, and spacing ++between sentences to two spaces. ++ ++@item -@var{width} ++@itemx -w @var{width} ++@itemx --width=@var{width} ++@opindex -@var{width} ++@opindex -w ++@opindex --width ++Fill output lines up to @var{width} characters (default 75). @command{fmt} ++initially tries to make lines about 7% shorter than this, to give it ++room to balance line lengths. ++ ++@item -p @var{prefix} ++@itemx --prefix=@var{prefix} ++Only lines beginning with @var{prefix} (possibly preceded by whitespace) ++are subject to formatting. The prefix and any preceding whitespace are ++stripped for the formatting and then re-attached to each formatted output ++line. One use is to format certain kinds of program comments, while ++leaving the code unchanged. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node pr invocation ++@section @command{pr}: Paginate or columnate files for printing ++ ++@pindex pr ++@cindex printing, preparing files for ++@cindex multicolumn output, generating ++@cindex merging files in parallel ++ ++@command{pr} writes each @var{file} (@samp{-} means standard input), or ++standard input if none are given, to standard output, paginating and ++optionally outputting in multicolumn format; optionally merges all ++@var{file}s, printing all in parallel, one per column. Synopsis: ++ ++@example ++pr [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@vindex LC_MESSAGES ++By default, a 5-line header is printed at each page: two blank lines; ++a line with the date, the file name, and the page count; and two more ++blank lines. A footer of five blank lines is also printed. ++The default @var{page_length} is 66 ++lines. The default number of text lines is therefore 56. ++The text line of the header takes the form ++@samp{@var{date} @var{string} @var{page}}, with spaces inserted around ++@var{string} so that the line takes up the full @var{page_width}. Here, ++@var{date} is the date (see the @option{-D} or @option{--date-format} ++option for details), @var{string} is the centered header string, and ++@var{page} identifies the page number. The @env{LC_MESSAGES} locale ++category affects the spelling of @var{page}; in the default C locale, it ++is @samp{Page @var{number}} where @var{number} is the decimal page ++number. ++ ++Form feeds in the input cause page breaks in the output. Multiple form ++feeds produce empty pages. ++ ++Columns are of equal width, separated by an optional string (default ++is @samp{space}). For multicolumn output, lines will always be truncated to ++@var{page_width} (default 72), unless you use the @option{-J} option. ++For single ++column output no line truncation occurs by default. Use @option{-W} option to ++truncate lines in that case. ++ ++The following changes were made in version 1.22i and apply to later ++versions of @command{pr}: ++@c FIXME: this whole section here sounds very awkward to me. I ++@c made a few small changes, but really it all needs to be redone. - Brian ++@c OK, I fixed another sentence or two, but some of it I just don't understand. ++@ - Brian ++@itemize @bullet ++ ++@item ++Some small @var{letter options} (@option{-s}, @option{-w}) have been ++redefined for better @acronym{POSIX} compliance. The output of some further ++cases has been adapted to other Unix systems. These changes are not ++compatible with earlier versions of the program. ++ ++@item ++Some @var{new capital letter} options (@option{-J}, @option{-S}, @option{-W}) ++have been introduced to turn off unexpected interferences of small letter ++options. The @option{-N} option and the second argument @var{last_page} ++of @samp{+FIRST_PAGE} offer more flexibility. The detailed handling of ++form feeds set in the input files requires the @option{-T} option. ++ ++@item ++Capital letter options override small letter ones. ++ ++@item ++Some of the option-arguments (compare @option{-s}, @option{-e}, ++@option{-i}, @option{-n}) cannot be specified as separate arguments from the ++preceding option letter (already stated in the @acronym{POSIX} specification). ++@end itemize ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item +@var{first_page}[:@var{last_page}] ++@itemx --pages=@var{first_page}[:@var{last_page}] ++@c The two following @opindex lines evoke warnings because they contain `:' ++@c The `info' spec does not permit that. If we use those lines, we end ++@c up with truncated index entries that don't work. ++@c @opindex +@var{first_page}[:@var{last_page}] ++@c @opindex --pages=@var{first_page}[:@var{last_page}] ++@opindex +@var{page_range} ++@opindex --pages=@var{page_range} ++Begin printing with page @var{first_page} and stop with @var{last_page}. ++Missing @samp{:@var{last_page}} implies end of file. While estimating ++the number of skipped pages each form feed in the input file results ++in a new page. Page counting with and without @samp{+@var{first_page}} ++is identical. By default, counting starts with the first page of input ++file (not first page printed). Line numbering may be altered by @option{-N} ++option. ++ ++@item -@var{column} ++@itemx --columns=@var{column} ++@opindex -@var{column} ++@opindex --columns ++@cindex down columns ++With each single @var{file}, produce @var{column} columns of output ++(default is 1) and print columns down, unless @option{-a} is used. The ++column width is automatically decreased as @var{column} increases; unless ++you use the @option{-W/-w} option to increase @var{page_width} as well. ++This option might well cause some lines to be truncated. The number of ++lines in the columns on each page are balanced. The options @option{-e} ++and @option{-i} are on for multiple text-column output. Together with ++@option{-J} option column alignment and line truncation is turned off. ++Lines of full length are joined in a free field format and @option{-S} ++option may set field separators. @option{-@var{column}} may not be used ++with @option{-m} option. ++ ++@item -a ++@itemx --across ++@opindex -a ++@opindex --across ++@cindex across columns ++With each single @var{file}, print columns across rather than down. The ++@option{-@var{column}} option must be given with @var{column} greater than one. ++If a line is too long to fit in a column, it is truncated. ++ ++@item -c ++@itemx --show-control-chars ++@opindex -c ++@opindex --show-control-chars ++Print control characters using hat notation (e.g., @samp{^G}); print ++other nonprinting characters in octal backslash notation. By default, ++nonprinting characters are not changed. ++ ++@item -d ++@itemx --double-space ++@opindex -d ++@opindex --double-space ++@cindex double spacing ++Double space the output. ++ ++@item -D @var{format} ++@itemx --date-format=@var{format} ++@cindex time formats ++@cindex formatting times ++Format header dates using @var{format}, using the same conventions as ++for the command @samp{date +@var{format}}; @xref{date invocation}. ++Except for directives, which start with ++@samp{%}, characters in @var{format} are printed unchanged. You can use ++this option to specify an arbitrary string in place of the header date, ++e.g., @option{--date-format="Monday morning"}. ++ ++@vindex POSIXLY_CORRECT ++@vindex LC_TIME ++The default date format is @samp{%Y-%m-%d %H:%M} (for example, ++@samp{2001-12-04 23:59}); ++but if the @env{POSIXLY_CORRECT} environment variable is set ++and the @env{LC_TIME} locale category specifies the @acronym{POSIX} ++locale, the default is @samp{%b %e %H:%M %Y} (for example, ++@samp{Dec@ @ 4 23:59 2001}. ++ ++@vindex TZ ++Time stamps are listed according to the time zone rules specified by ++the @env{TZ} environment variable, or by the system default rules if ++@env{TZ} is not set. @xref{TZ Variable,, Specifying the Time Zone ++with @env{TZ}, libc, The GNU C Library Reference Manual}. ++ ++@item -e[@var{in-tabchar}[@var{in-tabwidth}]] ++@itemx --expand-tabs[=@var{in-tabchar}[@var{in-tabwidth}]] ++@opindex -e ++@opindex --expand-tabs ++@cindex input tabs ++Expand @var{tab}s to spaces on input. Optional argument @var{in-tabchar} is ++the input tab character (default is the TAB character). Second optional ++argument @var{in-tabwidth} is the input tab character's width (default ++is 8). ++ ++@item -f ++@itemx -F ++@itemx --form-feed ++@opindex -F ++@opindex -f ++@opindex --form-feed ++Use a form feed instead of newlines to separate output pages. This does ++not alter the default page length of 66 lines. ++ ++@item -h @var{header} ++@itemx --header=@var{header} ++@opindex -h ++@opindex --header ++Replace the file name in the header with the centered string @var{header}. ++When using the shell, @var{header} should be quoted and should be ++separated from @option{-h} by a space. ++ ++@item -i[@var{out-tabchar}[@var{out-tabwidth}]] ++@itemx --output-tabs[=@var{out-tabchar}[@var{out-tabwidth}]] ++@opindex -i ++@opindex --output-tabs ++@cindex output tabs ++Replace spaces with @var{tab}s on output. Optional argument @var{out-tabchar} ++is the output tab character (default is the TAB character). Second optional ++argument @var{out-tabwidth} is the output tab character's width (default ++is 8). ++ ++@item -J ++@itemx --join-lines ++@opindex -J ++@opindex --join-lines ++Merge lines of full length. Used together with the column options ++@option{-@var{column}}, @option{-a -@var{column}} or @option{-m}. Turns off ++@option{-W/-w} line truncation; ++no column alignment used; may be used with ++@option{--sep-string[=@var{string}]}. @option{-J} has been introduced ++(together with @option{-W} and @option{--sep-string}) ++to disentangle the old (@acronym{POSIX}-compliant) options @option{-w} and ++@option{-s} along with the three column options. ++ ++ ++@item -l @var{page_length} ++@itemx --length=@var{page_length} ++@opindex -l ++@opindex --length ++Set the page length to @var{page_length} (default 66) lines, including ++the lines of the header [and the footer]. If @var{page_length} is less ++than or equal to 10, the header and footer are omitted, as if the ++@option{-t} option had been given. ++ ++@item -m ++@itemx --merge ++@opindex -m ++@opindex --merge ++Merge and print all @var{file}s in parallel, one in each column. If a ++line is too long to fit in a column, it is truncated, unless the @option{-J} ++option is used. @option{--sep-string[=@var{string}]} may be used. ++Empty pages in ++some @var{file}s (form feeds set) produce empty columns, still marked ++by @var{string}. The result is a continuous line numbering and column ++marking throughout the whole merged file. Completely empty merged pages ++show no separators or line numbers. The default header becomes ++@samp{@var{date} @var{page}} with spaces inserted in the middle; this ++may be used with the @option{-h} or @option{--header} option to fill up ++the middle blank part. ++ ++@item -n[@var{number-separator}[@var{digits}]] ++@itemx --number-lines[=@var{number-separator}[@var{digits}]] ++@opindex -n ++@opindex --number-lines ++Provide @var{digits} digit line numbering (default for @var{digits} is ++5). With multicolumn output the number occupies the first @var{digits} ++column positions of each text column or only each line of @option{-m} ++output. With single column output the number precedes each line just as ++@option{-m} does. Default counting of the line numbers starts with the ++first line of the input file (not the first line printed, compare the ++@option{--page} option and @option{-N} option). ++Optional argument @var{number-separator} is the character appended to ++the line number to separate it from the text followed. The default ++separator is the TAB character. In a strict sense a TAB is always ++printed with single column output only. The TAB width varies ++with the TAB position, e.g., with the left @var{margin} specified ++by @option{-o} option. With multicolumn output priority is given to ++@samp{equal width of output columns} (a @acronym{POSIX} specification). ++The TAB width is fixed to the value of the first column and does ++not change with different values of left @var{margin}. That means a ++fixed number of spaces is always printed in the place of the ++@var{number-separator} TAB. The tabification depends upon the output ++position. ++ ++@item -N @var{line_number} ++@itemx --first-line-number=@var{line_number} ++@opindex -N ++@opindex --first-line-number ++Start line counting with the number @var{line_number} at first line of ++first page printed (in most cases not the first line of the input file). ++ ++@item -o @var{margin} ++@itemx --indent=@var{margin} ++@opindex -o ++@opindex --indent ++@cindex indenting lines ++@cindex left margin ++Indent each line with a margin @var{margin} spaces wide (default is zero). ++The total page width is the size of the margin plus the @var{page_width} ++set with the @option{-W/-w} option. A limited overflow may occur with ++numbered single column output (compare @option{-n} option). ++ ++@item -r ++@itemx --no-file-warnings ++@opindex -r ++@opindex --no-file-warnings ++Do not print a warning message when an argument @var{file} cannot be ++opened. (The exit status will still be nonzero, however.) ++ ++@item -s[@var{char}] ++@itemx --separator[=@var{char}] ++@opindex -s ++@opindex --separator ++Separate columns by a single character @var{char}. The default for ++@var{char} is the TAB character without @option{-w} and @samp{no ++character} with @option{-w}. Without @option{-s} the default separator ++@samp{space} is set. @option{-s[char]} turns off line truncation of all ++three column options (@option{-COLUMN}|@option{-a -COLUMN}|@option{-m}) unless ++@option{-w} is set. This is a @acronym{POSIX}-compliant formulation. ++ ++ ++@item -S@var{string} ++@itemx --sep-string[=@var{string}] ++@opindex -S ++@opindex --sep-string ++Use @var{string} to separate output columns. The @option{-S} option doesn't ++affect the @option{-W/-w} option, unlike the @option{-s} option which does. It ++does not affect line truncation or column alignment. ++Without @option{-S}, and with @option{-J}, @command{pr} uses the default output ++separator, TAB@. ++Without @option{-S} or @option{-J}, @command{pr} uses a @samp{space} ++(same as @option{-S"@w{ }"}). @option{--sep-string} with no ++@samp{=@var{string}} is equivalent to @option{--sep-string=""}. ++ ++@item -t ++@itemx --omit-header ++@opindex -t ++@opindex --omit-header ++Do not print the usual header [and footer] on each page, and do not fill ++out the bottom of pages (with blank lines or a form feed). No page ++structure is produced, but form feeds set in the input files are retained. ++The predefined pagination is not changed. @option{-t} or @option{-T} may be ++useful together with other options; e.g.: @option{-t -e4}, expand TAB characters ++in the input file to 4 spaces but don't make any other changes. Use of ++@option{-t} overrides @option{-h}. ++ ++@item -T ++@itemx --omit-pagination ++@opindex -T ++@opindex --omit-pagination ++Do not print header [and footer]. In addition eliminate all form feeds ++set in the input files. ++ ++@item -v ++@itemx --show-nonprinting ++@opindex -v ++@opindex --show-nonprinting ++Print nonprinting characters in octal backslash notation. ++ ++@item -w @var{page_width} ++@itemx --width=@var{page_width} ++@opindex -w ++@opindex --width ++Set page width to @var{page_width} characters for multiple text-column ++output only (default for @var{page_width} is 72). @option{-s[CHAR]} turns ++off the default page width and any line truncation and column alignment. ++Lines of full length are merged, regardless of the column options ++set. No @var{page_width} setting is possible with single column output. ++A @acronym{POSIX}-compliant formulation. ++ ++@item -W @var{page_width} ++@itemx --page_width=@var{page_width} ++@opindex -W ++@opindex --page_width ++Set the page width to @var{page_width} characters. That's valid with and ++without a column option. Text lines are truncated, unless @option{-J} ++is used. Together with one of the three column options ++(@option{-@var{column}}, @option{-a -@var{column}} or @option{-m}) column ++alignment is always used. The separator options @option{-S} or @option{-s} ++don't affect the @option{-W} option. Default is 72 characters. Without ++@option{-W @var{page_width}} and without any of the column options NO line ++truncation is used (defined to keep downward compatibility and to meet ++most frequent tasks). That's equivalent to @option{-W 72 -J}. The header ++line is never truncated. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node fold invocation ++@section @command{fold}: Wrap input lines to fit in specified width ++ ++@pindex fold ++@cindex wrapping long input lines ++@cindex folding long input lines ++ ++@command{fold} writes each @var{file} (@option{-} means standard input), or ++standard input if none are given, to standard output, breaking long ++lines. Synopsis: ++ ++@example ++fold [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++By default, @command{fold} breaks lines wider than 80 columns. The output ++is split into as many lines as necessary. ++ ++@cindex screen columns ++@command{fold} counts screen columns by default; thus, a tab may count more ++than one column, backspace decreases the column count, and carriage ++return sets the column to zero. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -b ++@itemx --bytes ++@opindex -b ++@opindex --bytes ++Count bytes rather than columns, so that tabs, backspaces, and carriage ++returns are each counted as taking up one column, just like other ++characters. ++ ++@item -s ++@itemx --spaces ++@opindex -s ++@opindex --spaces ++Break at word boundaries: the line is broken after the last blank before ++the maximum line length. If the line contains no such blanks, the line ++is broken at the maximum line length as usual. ++ ++@item -w @var{width} ++@itemx --width=@var{width} ++@opindex -w ++@opindex --width ++Use a maximum line length of @var{width} columns instead of 80. ++ ++For compatibility @command{fold} supports an obsolete option syntax ++@option{-@var{width}}. New scripts should use @option{-w @var{width}} ++instead. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node Output of parts of files ++@chapter Output of parts of files ++ ++@cindex output of parts of files ++@cindex parts of files, output of ++ ++These commands output pieces of the input. ++ ++@menu ++* head invocation:: Output the first part of files. ++* tail invocation:: Output the last part of files. ++* split invocation:: Split a file into fixed-size pieces. ++* csplit invocation:: Split a file into context-determined pieces. ++@end menu ++ ++@node head invocation ++@section @command{head}: Output the first part of files ++ ++@pindex head ++@cindex initial part of files, outputting ++@cindex first part of files, outputting ++ ++@command{head} prints the first part (10 lines by default) of each ++@var{file}; it reads from standard input if no files are given or ++when given a @var{file} of @option{-}. Synopsis: ++ ++@example ++head [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++If more than one @var{file} is specified, @command{head} prints a ++one-line header consisting of: ++ ++@example ++==> @var{file name} <== ++@end example ++ ++@noindent ++before the output for each @var{file}. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c @var{k} ++@itemx --bytes=@var{k} ++@opindex -c ++@opindex --bytes ++Print the first @var{k} bytes, instead of initial lines. ++However, if @var{k} starts with a @samp{-}, ++print all but the last @var{k} bytes of each file. ++@multiplierSuffixes{k} ++ ++@itemx -n @var{k} ++@itemx --lines=@var{k} ++@opindex -n ++@opindex --lines ++Output the first @var{k} lines. ++However, if @var{k} starts with a @samp{-}, ++print all but the last @var{k} lines of each file. ++Size multiplier suffixes are the same as with the @option{-c} option. ++ ++@item -q ++@itemx --quiet ++@itemx --silent ++@opindex -q ++@opindex --quiet ++@opindex --silent ++Never print file name headers. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Always print file name headers. ++ ++@end table ++ ++For compatibility @command{head} also supports an obsolete option syntax ++@option{-@var{count}@var{options}}, which is recognized only if it is ++specified first. @var{count} is a decimal number optionally followed ++by a size letter (@samp{b}, @samp{k}, @samp{m}) as in @option{-c}, or ++@samp{l} to mean count by lines, or other option letters (@samp{cqv}). ++Scripts intended for standard hosts should use @option{-c @var{count}} ++or @option{-n @var{count}} instead. If your script must also run on ++hosts that support only the obsolete syntax, it is usually simpler to ++avoid @command{head}, e.g., by using @samp{sed 5q} instead of ++@samp{head -5}. ++ ++@exitstatus ++ ++ ++@node tail invocation ++@section @command{tail}: Output the last part of files ++ ++@pindex tail ++@cindex last part of files, outputting ++ ++@command{tail} prints the last part (10 lines by default) of each ++@var{file}; it reads from standard input if no files are given or ++when given a @var{file} of @samp{-}. Synopsis: ++ ++@example ++tail [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++If more than one @var{file} is specified, @command{tail} prints a ++one-line header consisting of: ++ ++@example ++==> @var{file name} <== ++@end example ++ ++@noindent ++before the output for each @var{file}. ++ ++@cindex BSD @command{tail} ++@sc{gnu} @command{tail} can output any amount of data (some other versions of ++@command{tail} cannot). It also has no @option{-r} option (print in ++reverse), since reversing a file is really a different job from printing ++the end of a file; BSD @command{tail} (which is the one with @option{-r}) can ++only reverse files that are at most as large as its buffer, which is ++typically 32 KiB@. A more reliable and versatile way to reverse files is ++the @sc{gnu} @command{tac} command. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c @var{k} ++@itemx --bytes=@var{k} ++@opindex -c ++@opindex --bytes ++Output the last @var{k} bytes, instead of final lines. ++However, if @var{k} starts with a @samp{+}, start printing with the ++@var{k}th byte from the start of each file, instead of from the end. ++@multiplierSuffixes{k} ++ ++@item -f ++@itemx --follow[=@var{how}] ++@opindex -f ++@opindex --follow ++@cindex growing files ++@vindex name @r{follow option} ++@vindex descriptor @r{follow option} ++Loop forever trying to read more characters at the end of the file, ++presumably because the file is growing. ++If more than one file is given, @command{tail} prints a header whenever it ++gets output from a different file, to indicate which file that output is ++from. ++ ++There are two ways to specify how you'd like to track files with this option, ++but that difference is noticeable only when a followed file is removed or ++renamed. ++If you'd like to continue to track the end of a growing file even after ++it has been unlinked, use @option{--follow=descriptor}. This is the default ++behavior, but it is not useful if you're tracking a log file that may be ++rotated (removed or renamed, then reopened). In that case, use ++@option{--follow=name} to track the named file by reopening it periodically ++to see if it has been removed and recreated by some other program. ++ ++No matter which method you use, if the tracked file is determined to have ++shrunk, @command{tail} prints a message saying the file has been truncated ++and resumes tracking the end of the file from the newly-determined endpoint. ++ ++When a file is removed, @command{tail}'s behavior depends on whether it is ++following the name or the descriptor. When following by name, tail can ++detect that a file has been removed and gives a message to that effect, ++and if @option{--retry} has been specified it will continue checking ++periodically to see if the file reappears. ++When following a descriptor, tail does not detect that the file has ++been unlinked or renamed and issues no message; even though the file ++may no longer be accessible via its original name, it may still be ++growing. ++ ++The option values @samp{descriptor} and @samp{name} may be specified only ++with the long form of the option, not with @option{-f}. ++ ++The @option{-f} option is ignored if ++no @var{file} operand is specified and standard input is a FIFO or a pipe. ++Likewise, the @option{-f} option has no effect for any ++operand specified as @samp{-}, when standard input is a FIFO or a pipe. ++ ++@item -F ++@opindex -F ++This option is the same as @option{--follow=name --retry}. That is, tail ++will attempt to reopen a file when it is removed. Should this fail, tail ++will keep trying until it becomes accessible again. ++ ++@itemx --retry ++@opindex --retry ++This option is useful mainly when following by name (i.e., with ++@option{--follow=name}). ++Without this option, when tail encounters a file that doesn't ++exist or is otherwise inaccessible, it reports that fact and ++never checks it again. ++ ++@itemx --sleep-interval=@var{number} ++@opindex --sleep-interval ++Change the number of seconds to wait between iterations (the default is 1.0). ++During one iteration, every specified file is checked to see if it has ++changed size. ++Historical implementations of @command{tail} have required that ++@var{number} be an integer. However, GNU @command{tail} accepts ++an arbitrary floating point number (using a period before any ++fractional digits). ++ ++@itemx --pid=@var{pid} ++@opindex --pid ++When following by name or by descriptor, you may specify the process ID, ++@var{pid}, of the sole writer of all @var{file} arguments. Then, shortly ++after that process terminates, tail will also terminate. This will ++work properly only if the writer and the tailing process are running on ++the same machine. For example, to save the output of a build in a file ++and to watch the file grow, if you invoke @command{make} and @command{tail} ++like this then the tail process will stop when your build completes. ++Without this option, you would have had to kill the @code{tail -f} ++process yourself. ++ ++@example ++$ make >& makerr & tail --pid=$! -f makerr ++@end example ++ ++If you specify a @var{pid} that is not in use or that does not correspond ++to the process that is writing to the tailed files, then @command{tail} ++may terminate long before any @var{file}s stop growing or it may not ++terminate until long after the real writer has terminated. ++Note that @option{--pid} cannot be supported on some systems; @command{tail} ++will print a warning if this is the case. ++ ++@itemx --max-unchanged-stats=@var{n} ++@opindex --max-unchanged-stats ++When tailing a file by name, if there have been @var{n} (default ++n=@value{DEFAULT_MAX_N_UNCHANGED_STATS_BETWEEN_OPENS}) consecutive ++iterations for which the file has not changed, then ++@code{open}/@code{fstat} the file to determine if that file name is ++still associated with the same device/inode-number pair as before. ++When following a log file that is rotated, this is approximately the ++number of seconds between when tail prints the last pre-rotation lines ++and when it prints the lines that have accumulated in the new log file. ++This option is meaningful only when following by name. ++ ++@itemx -n @var{k} ++@itemx --lines=@var{k} ++@opindex -n ++@opindex --lines ++Output the last @var{k} lines. ++However, if @var{k} starts with a @samp{+}, start printing with the ++@var{k}th line from the start of each file, instead of from the end. ++Size multiplier suffixes are the same as with the @option{-c} option. ++ ++@item -q ++@itemx --quiet ++@itemx --silent ++@opindex -q ++@opindex --quiet ++@opindex --silent ++Never print file name headers. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Always print file name headers. ++ ++@end table ++ ++For compatibility @command{tail} also supports an obsolete usage ++@samp{tail -[@var{count}][bcl][f] [@var{file}]}, which is recognized ++only if it does not conflict with the usage described ++above. This obsolete form uses exactly one option and at most one ++file. In the option, @var{count} is an optional decimal number optionally ++followed by a size letter (@samp{b}, @samp{c}, @samp{l}) to mean count ++by 512-byte blocks, bytes, or lines, optionally followed by @samp{f} ++which has the same meaning as @option{-f}. ++ ++@vindex _POSIX2_VERSION ++On older systems, the leading @samp{-} can be replaced by @samp{+} in ++the obsolete option syntax with the same meaning as in counts, and ++obsolete usage overrides normal usage when the two conflict. ++This obsolete behavior can be enabled or disabled with the ++@env{_POSIX2_VERSION} environment variable (@pxref{Standards ++conformance}). ++ ++Scripts intended for use on standard hosts should avoid obsolete ++syntax and should use @option{-c @var{count}[b]}, @option{-n ++@var{count}}, and/or @option{-f} instead. If your script must also ++run on hosts that support only the obsolete syntax, you can often ++rewrite it to avoid problematic usages, e.g., by using @samp{sed -n ++'$p'} rather than @samp{tail -1}. If that's not possible, the script ++can use a test like @samp{if tail -c +1 /dev/null 2>&1; ++then @dots{}} to decide which syntax to use. ++ ++Even if your script assumes the standard behavior, you should still ++beware usages whose behaviors differ depending on the @acronym{POSIX} ++version. For example, avoid @samp{tail - main.c}, since it might be ++interpreted as either @samp{tail main.c} or as @samp{tail -- - ++main.c}; avoid @samp{tail -c 4}, since it might mean either @samp{tail ++-c4} or @samp{tail -c 10 4}; and avoid @samp{tail +4}, since it might ++mean either @samp{tail ./+4} or @samp{tail -n +4}. ++ ++@exitstatus ++ ++ ++@node split invocation ++@section @command{split}: Split a file into fixed-size pieces ++ ++@pindex split ++@cindex splitting a file into pieces ++@cindex pieces, splitting a file into ++ ++@command{split} creates output files containing consecutive sections of ++@var{input} (standard input if none is given or @var{input} is ++@samp{-}). Synopsis: ++ ++@example ++split [@var{option}] [@var{input} [@var{prefix}]] ++@end example ++ ++By default, @command{split} puts 1000 lines of @var{input} (or whatever is ++left over for the last section), into each output file. ++ ++@cindex output file name prefix ++The output files' names consist of @var{prefix} (@samp{x} by default) ++followed by a group of characters (@samp{aa}, @samp{ab}, @dots{} by ++default), such that concatenating the output files in traditional ++sorted order by file name produces ++the original input file. If the output file names are exhausted, ++@command{split} reports an error without deleting the output files ++that it did create. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -l @var{lines} ++@itemx --lines=@var{lines} ++@opindex -l ++@opindex --lines ++Put @var{lines} lines of @var{input} into each output file. ++ ++For compatibility @command{split} also supports an obsolete ++option syntax @option{-@var{lines}}. New scripts should use @option{-l ++@var{lines}} instead. ++ ++@item -b @var{size} ++@itemx --bytes=@var{size} ++@opindex -b ++@opindex --bytes ++Put @var{size} bytes of @var{input} into each output file. ++@multiplierSuffixes{size} ++ ++@item -C @var{size} ++@itemx --line-bytes=@var{size} ++@opindex -C ++@opindex --line-bytes ++Put into each output file as many complete lines of @var{input} as ++possible without exceeding @var{size} bytes. Individual lines longer than ++@var{size} bytes are broken into multiple files. ++@var{size} has the same format as for the @option{--bytes} option. ++ ++@item -a @var{length} ++@itemx --suffix-length=@var{length} ++@opindex -a ++@opindex --suffix-length ++Use suffixes of length @var{length}. The default @var{length} is 2. ++ ++@item -d ++@itemx --numeric-suffixes ++@opindex -d ++@opindex --numeric-suffixes ++Use digits in suffixes rather than lower-case letters. ++ ++@itemx --verbose ++@opindex --verbose ++Write a diagnostic just before each output file is opened. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node csplit invocation ++@section @command{csplit}: Split a file into context-determined pieces ++ ++@pindex csplit ++@cindex context splitting ++@cindex splitting a file into pieces by context ++ ++@command{csplit} creates zero or more output files containing sections of ++@var{input} (standard input if @var{input} is @samp{-}). Synopsis: ++ ++@example ++csplit [@var{option}]@dots{} @var{input} @var{pattern}@dots{} ++@end example ++ ++The contents of the output files are determined by the @var{pattern} ++arguments, as detailed below. An error occurs if a @var{pattern} ++argument refers to a nonexistent line of the input file (e.g., if no ++remaining line matches a given regular expression). After every ++@var{pattern} has been matched, any remaining input is copied into one ++last output file. ++ ++By default, @command{csplit} prints the number of bytes written to each ++output file after it has been created. ++ ++The types of pattern arguments are: ++ ++@table @samp ++ ++@item @var{n} ++Create an output file containing the input up to but not including line ++@var{n} (a positive integer). If followed by a repeat count, also ++create an output file containing the next @var{n} lines of the input ++file once for each repeat. ++ ++@item /@var{regexp}/[@var{offset}] ++Create an output file containing the current line up to (but not ++including) the next line of the input file that contains a match for ++@var{regexp}. The optional @var{offset} is an integer. ++If it is given, the input up to (but not including) the ++matching line plus or minus @var{offset} is put into the output file, ++and the line after that begins the next section of input. ++ ++@item %@var{regexp}%[@var{offset}] ++Like the previous type, except that it does not create an output ++file, so that section of the input file is effectively ignored. ++ ++@item @{@var{repeat-count}@} ++Repeat the previous pattern @var{repeat-count} additional ++times. The @var{repeat-count} can either be a positive integer or an ++asterisk, meaning repeat as many times as necessary until the input is ++exhausted. ++ ++@end table ++ ++The output files' names consist of a prefix (@samp{xx} by default) ++followed by a suffix. By default, the suffix is an ascending sequence ++of two-digit decimal numbers from @samp{00} to @samp{99}. In any case, ++concatenating the output files in sorted order by file name produces the ++original input file. ++ ++By default, if @command{csplit} encounters an error or receives a hangup, ++interrupt, quit, or terminate signal, it removes any output files ++that it has created so far before it exits. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -f @var{prefix} ++@itemx --prefix=@var{prefix} ++@opindex -f ++@opindex --prefix ++@cindex output file name prefix ++Use @var{prefix} as the output file name prefix. ++ ++@item -b @var{suffix} ++@itemx --suffix=@var{suffix} ++@opindex -b ++@opindex --suffix ++@cindex output file name suffix ++Use @var{suffix} as the output file name suffix. When this option is ++specified, the suffix string must include exactly one ++@code{printf(3)}-style conversion specification, possibly including ++format specification flags, a field width, a precision specifications, ++or all of these kinds of modifiers. The format letter must convert a ++binary integer argument to readable form; thus, only @samp{d}, @samp{i}, ++@samp{u}, @samp{o}, @samp{x}, and @samp{X} conversions are allowed. The ++entire @var{suffix} is given (with the current output file number) to ++@code{sprintf(3)} to form the file name suffixes for each of the ++individual output files in turn. If this option is used, the ++@option{--digits} option is ignored. ++ ++@item -n @var{digits} ++@itemx --digits=@var{digits} ++@opindex -n ++@opindex --digits ++Use output file names containing numbers that are @var{digits} digits ++long instead of the default 2. ++ ++@item -k ++@itemx --keep-files ++@opindex -k ++@opindex --keep-files ++Do not remove output files when errors are encountered. ++ ++@item -z ++@itemx --elide-empty-files ++@opindex -z ++@opindex --elide-empty-files ++Suppress the generation of zero-length output files. (In cases where ++the section delimiters of the input file are supposed to mark the first ++lines of each of the sections, the first output file will generally be a ++zero-length file unless you use this option.) The output file sequence ++numbers always run consecutively starting from 0, even when this option ++is specified. ++ ++@item -s ++@itemx -q ++@itemx --silent ++@itemx --quiet ++@opindex -s ++@opindex -q ++@opindex --silent ++@opindex --quiet ++Do not print counts of output file sizes. ++ ++@end table ++ ++@exitstatus ++ ++Here is an example of its usage. ++First, create an empty directory for the exercise, ++and cd into it: ++ ++@example ++$ mkdir d && cd d ++@end example ++ ++Now, split the sequence of 1..14 on lines that end with 0 or 5: ++ ++@example ++$ seq 14 | csplit - '/[05]$/' '@{*@}' ++8 ++10 ++15 ++@end example ++ ++Each number printed above is the size of an output ++file that csplit has just created. ++List the names of those output files: ++ ++@example ++$ ls ++xx00 xx01 xx02 ++@end example ++ ++Use @command{head} to show their contents: ++ ++@example ++$ head xx* ++==> xx00 <== ++1 ++2 ++3 ++4 ++ ++==> xx01 <== ++5 ++6 ++7 ++8 ++9 ++ ++==> xx02 <== ++10 ++11 ++12 ++13 ++14 ++@end example ++ ++@node Summarizing files ++@chapter Summarizing files ++ ++@cindex summarizing files ++ ++These commands generate just a few numbers representing entire ++contents of files. ++ ++@menu ++* wc invocation:: Print newline, word, and byte counts. ++* sum invocation:: Print checksum and block counts. ++* cksum invocation:: Print CRC checksum and byte counts. ++* md5sum invocation:: Print or check MD5 digests. ++* sha1sum invocation:: Print or check SHA-1 digests. ++* sha2 utilities:: Print or check SHA-2 digests. ++@end menu ++ ++ ++@node wc invocation ++@section @command{wc}: Print newline, word, and byte counts ++ ++@pindex wc ++@cindex byte count ++@cindex character count ++@cindex word count ++@cindex line count ++ ++@command{wc} counts the number of bytes, characters, whitespace-separated ++words, and newlines in each given @var{file}, or standard input if none ++are given or for a @var{file} of @samp{-}. Synopsis: ++ ++@example ++wc [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@cindex total counts ++@command{wc} prints one line of counts for each file, and if the file was ++given as an argument, it prints the file name following the counts. If ++more than one @var{file} is given, @command{wc} prints a final line ++containing the cumulative counts, with the file name @file{total}. The ++counts are printed in this order: newlines, words, characters, bytes, ++maximum line length. ++Each count is printed right-justified in a field with at least one ++space between fields so that the numbers and file names normally line ++up nicely in columns. The width of the count fields varies depending ++on the inputs, so you should not depend on a particular field width. ++However, as a @acronym{GNU} extension, if only one count is printed, ++it is guaranteed to be printed without leading spaces. ++ ++By default, @command{wc} prints three counts: the newline, words, and byte ++counts. Options can specify that only certain counts be printed. ++Options do not undo others previously given, so ++ ++@example ++wc --bytes --words ++@end example ++ ++@noindent ++prints both the byte counts and the word counts. ++ ++With the @option{--max-line-length} option, @command{wc} prints the length ++of the longest line per file, and if there is more than one file it ++prints the maximum (not the sum) of those lengths. The line lengths here ++are measured in screen columns, according to the current locale and ++assuming tab positions in every 8th column. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c ++@itemx --bytes ++@opindex -c ++@opindex --bytes ++Print only the byte counts. ++ ++@item -m ++@itemx --chars ++@opindex -m ++@opindex --chars ++Print only the character counts. ++ ++@item -w ++@itemx --words ++@opindex -w ++@opindex --words ++Print only the word counts. ++ ++@item -l ++@itemx --lines ++@opindex -l ++@opindex --lines ++Print only the newline counts. ++ ++@item -L ++@itemx --max-line-length ++@opindex -L ++@opindex --max-line-length ++Print only the maximum line lengths. ++ ++@macro filesZeroFromOption{cmd,withTotalOption,subListOutput} ++@itemx --files0-from=@var{file} ++@opindex --files0-from=@var{file} ++@c This is commented out to avoid a texi2dvi failure. ++@c texi2dvi (GNU Texinfo 4.11) 1.104 ++@c @cindex including files from @command{\cmd\} ++Disallow processing files named on the command line, and instead process ++those named in file @var{file}; each name being terminated by a zero byte ++(@acronym{ASCII} @sc{nul}). ++This is useful \withTotalOption\ ++when the list of file names is so long that it may exceed a command line ++length limitation. ++In such cases, running @command{\cmd\} via @command{xargs} is undesirable ++because it splits the list into pieces and makes @command{\cmd\} print ++\subListOutput\ for each sublist rather than for the entire list. ++One way to produce a list of @acronym{ASCII} @sc{nul} terminated file names is with @sc{gnu} ++@command{find}, using its @option{-print0} predicate. ++If @var{file} is @samp{-} then the @acronym{ASCII} @sc{nul} terminated file names ++are read from standard input. ++@end macro ++@filesZeroFromOption{wc,,a total} ++ ++For example, to find the length of the longest line in any @file{.c} or ++@file{.h} file in the current hierarchy, do this: ++ ++@example ++find . -name '*.[ch]' -print0 | ++ wc -L --files0-from=- | tail -n1 ++@end example ++ ++@end table ++ ++@exitstatus ++ ++ ++@node sum invocation ++@section @command{sum}: Print checksum and block counts ++ ++@pindex sum ++@cindex 16-bit checksum ++@cindex checksum, 16-bit ++ ++@command{sum} computes a 16-bit checksum for each given @var{file}, or ++standard input if none are given or for a @var{file} of @samp{-}. Synopsis: ++ ++@example ++sum [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@command{sum} prints the checksum for each @var{file} followed by the ++number of blocks in the file (rounded up). If more than one @var{file} ++is given, file names are also printed (by default). (With the ++@option{--sysv} option, corresponding file names are printed when there is ++at least one file argument.) ++ ++By default, @sc{gnu} @command{sum} computes checksums using an algorithm ++compatible with BSD @command{sum} and prints file sizes in units of ++1024-byte blocks. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -r ++@opindex -r ++@cindex BSD @command{sum} ++Use the default (BSD compatible) algorithm. This option is included for ++compatibility with the System V @command{sum}. Unless @option{-s} was also ++given, it has no effect. ++ ++@item -s ++@itemx --sysv ++@opindex -s ++@opindex --sysv ++@cindex System V @command{sum} ++Compute checksums using an algorithm compatible with System V ++@command{sum}'s default, and print file sizes in units of 512-byte blocks. ++ ++@end table ++ ++@command{sum} is provided for compatibility; the @command{cksum} program (see ++next section) is preferable in new applications. ++ ++@exitstatus ++ ++ ++@node cksum invocation ++@section @command{cksum}: Print CRC checksum and byte counts ++ ++@pindex cksum ++@cindex cyclic redundancy check ++@cindex CRC checksum ++ ++@command{cksum} computes a cyclic redundancy check (CRC) checksum for each ++given @var{file}, or standard input if none are given or for a ++@var{file} of @samp{-}. Synopsis: ++ ++@example ++cksum [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@command{cksum} prints the CRC checksum for each file along with the number ++of bytes in the file, and the file name unless no arguments were given. ++ ++@command{cksum} is typically used to ensure that files ++transferred by unreliable means (e.g., netnews) have not been corrupted, ++by comparing the @command{cksum} output for the received files with the ++@command{cksum} output for the original files (typically given in the ++distribution). ++ ++The CRC algorithm is specified by the @acronym{POSIX} standard. It is not ++compatible with the BSD or System V @command{sum} algorithms (see the ++previous section); it is more robust. ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. ++ ++@exitstatus ++ ++ ++@node md5sum invocation ++@section @command{md5sum}: Print or check MD5 digests ++ ++@pindex md5sum ++@cindex MD5 ++@cindex 128-bit checksum ++@cindex checksum, 128-bit ++@cindex fingerprint, 128-bit ++@cindex message-digest, 128-bit ++ ++@command{md5sum} computes a 128-bit checksum (or @dfn{fingerprint} or ++@dfn{message-digest}) for each specified @var{file}. ++ ++Note: The MD5 digest is more reliable than a simple CRC (provided by ++the @command{cksum} command) for detecting accidental file corruption, ++as the chances of accidentally having two files with identical MD5 ++are vanishingly small. However, it should not be considered truly ++secure against malicious tampering: although finding a file with a ++given MD5 fingerprint, or modifying a file so as to retain its MD5 are ++considered infeasible at the moment, it is known how to produce ++different files with identical MD5 (a ``collision''), something which ++can be a security issue in certain contexts. For more secure hashes, ++consider using SHA-1 or SHA-2. @xref{sha1sum invocation}, and ++@ref{sha2 utilities}. ++ ++If a @var{file} is specified as @samp{-} or if no files are given ++@command{md5sum} computes the checksum for the standard input. ++@command{md5sum} can also determine whether a file and checksum are ++consistent. Synopsis: ++ ++@example ++md5sum [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++For each @var{file}, @samp{md5sum} outputs the MD5 checksum, a flag ++indicating a binary or text input file, and the file name. ++If @var{file} contains a backslash or newline, the ++line is started with a backslash, and each problematic character in ++the file name is escaped with a backslash, making the output ++unambiguous even in the presence of arbitrary file names. ++If @var{file} is omitted or specified as @samp{-}, standard input is read. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -b ++@itemx --binary ++@opindex -b ++@opindex --binary ++@cindex binary input files ++Treat each input file as binary, by reading it in binary mode and ++outputting a @samp{*} flag. This is the inverse of @option{--text}. ++On systems like @acronym{GNU} that do not distinguish between binary ++and text files, this option merely flags each input file as binary: ++the MD5 checksum is unaffected. This option is the default on systems ++like MS-DOS that distinguish between binary and text files, except ++for reading standard input when standard input is a terminal. ++ ++@item -c ++@itemx --check ++Read file names and checksum information (not data) from each ++@var{file} (or from stdin if no @var{file} was specified) and report ++whether the checksums match the contents of the named files. ++The input to this mode of @command{md5sum} is usually the output of ++a prior, checksum-generating run of @samp{md5sum}. ++Each valid line of input consists of an MD5 checksum, a binary/text ++flag, and then a file name. ++Binary files are marked with @samp{*}, text with @samp{ }. ++For each such line, @command{md5sum} reads the named file and computes its ++MD5 checksum. Then, if the computed message digest does not match the ++one on the line with the file name, the file is noted as having ++failed the test. Otherwise, the file passes the test. ++By default, for each valid line, one line is written to standard ++output indicating whether the named file passed the test. ++After all checks have been performed, if there were any failures, ++a warning is issued to standard error. ++Use the @option{--status} option to inhibit that output. ++If any listed file cannot be opened or read, if any valid line has ++an MD5 checksum inconsistent with the associated file, or if no valid ++line is found, @command{md5sum} exits with nonzero status. Otherwise, ++it exits successfully. ++ ++@itemx --quiet ++@opindex --quiet ++@cindex verifying MD5 checksums ++This option is useful only when verifying checksums. ++When verifying checksums, don't generate an 'OK' message per successfully ++checked file. Files that fail the verification are reported in the ++default one-line-per-file format. If there is any checksum mismatch, ++print a warning summarizing the failures to standard error. ++ ++@itemx --status ++@opindex --status ++@cindex verifying MD5 checksums ++This option is useful only when verifying checksums. ++When verifying checksums, don't generate the default one-line-per-file ++diagnostic and don't output the warning summarizing any failures. ++Failures to open or read a file still evoke individual diagnostics to ++standard error. ++If all listed files are readable and are consistent with the associated ++MD5 checksums, exit successfully. Otherwise exit with a status code ++indicating there was a failure. ++ ++@item -t ++@itemx --text ++@opindex -t ++@opindex --text ++@cindex text input files ++Treat each input file as text, by reading it in text mode and ++outputting a @samp{ } flag. This is the inverse of @option{--binary}. ++This option is the default on systems like @acronym{GNU} that do not ++distinguish between binary and text files. On other systems, it is ++the default for reading standard input when standard input is a ++terminal. ++ ++@item -w ++@itemx --warn ++@opindex -w ++@opindex --warn ++@cindex verifying MD5 checksums ++When verifying checksums, warn about improperly formatted MD5 checksum lines. ++This option is useful only if all but a few lines in the checked input ++are valid. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node sha1sum invocation ++@section @command{sha1sum}: Print or check SHA-1 digests ++ ++@pindex sha1sum ++@cindex SHA-1 ++@cindex 160-bit checksum ++@cindex checksum, 160-bit ++@cindex fingerprint, 160-bit ++@cindex message-digest, 160-bit ++ ++@command{sha1sum} computes a 160-bit checksum for each specified ++@var{file}. The usage and options of this command are precisely the ++same as for @command{md5sum}. @xref{md5sum invocation}. ++ ++Note: The SHA-1 digest is more secure than MD5, and no collisions of ++it are known (different files having the same fingerprint). However, ++it is known that they can be produced with considerable, but not ++unreasonable, resources. For this reason, it is generally considered ++that SHA-1 should be gradually phased out in favor of the more secure ++SHA-2 hash algorithms. @xref{sha2 utilities}. ++ ++ ++@node sha2 utilities ++@section sha2 utilities: Print or check SHA-2 digests ++ ++@pindex sha224sum ++@pindex sha256sum ++@pindex sha384sum ++@pindex sha512sum ++@cindex SHA-2 ++@cindex 224-bit checksum ++@cindex 256-bit checksum ++@cindex 384-bit checksum ++@cindex 512-bit checksum ++@cindex checksum, 224-bit ++@cindex checksum, 256-bit ++@cindex checksum, 384-bit ++@cindex checksum, 512-bit ++@cindex fingerprint, 224-bit ++@cindex fingerprint, 256-bit ++@cindex fingerprint, 384-bit ++@cindex fingerprint, 512-bit ++@cindex message-digest, 224-bit ++@cindex message-digest, 256-bit ++@cindex message-digest, 384-bit ++@cindex message-digest, 512-bit ++ ++The commands @command{sha224sum}, @command{sha256sum}, ++@command{sha384sum} and @command{sha512sum} compute checksums of ++various lengths (respectively 224, 256, 384 and 512 bits), ++collectively known as the SHA-2 hashes. The usage and options of ++these commands are precisely the same as for @command{md5sum}. ++@xref{md5sum invocation}. ++ ++Note: The SHA384 and SHA512 digests are considerably slower to ++compute, especially on 32-bit computers, than SHA224 or SHA256. ++ ++ ++@node Operating on sorted files ++@chapter Operating on sorted files ++ ++@cindex operating on sorted files ++@cindex sorted files, operations on ++ ++These commands work with (or produce) sorted files. ++ ++@menu ++* sort invocation:: Sort text files. ++* shuf invocation:: Shuffle text files. ++* uniq invocation:: Uniquify files. ++* comm invocation:: Compare two sorted files line by line. ++* ptx invocation:: Produce a permuted index of file contents. ++* tsort invocation:: Topological sort. ++@end menu ++ ++ ++@node sort invocation ++@section @command{sort}: Sort text files ++ ++@pindex sort ++@cindex sorting files ++ ++@command{sort} sorts, merges, or compares all the lines from the given ++files, or standard input if none are given or for a @var{file} of ++@samp{-}. By default, @command{sort} writes the results to standard ++output. Synopsis: ++ ++@example ++sort [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@command{sort} has three modes of operation: sort (the default), merge, ++and check for sortedness. The following options change the operation ++mode: ++ ++@table @samp ++ ++@item -c ++@itemx --check ++@itemx --check=diagnose-first ++@opindex -c ++@opindex --check ++@cindex checking for sortedness ++Check whether the given file is already sorted: if it is not all ++sorted, print a diagnostic containing the first out-of-order line and ++exit with a status of 1. ++Otherwise, exit successfully. ++At most one input file can be given. ++ ++@item -C ++@itemx --check=quiet ++@itemx --check=silent ++@opindex -c ++@opindex --check ++@cindex checking for sortedness ++Exit successfully if the given file is already sorted, and ++exit with status 1 otherwise. ++At most one input file can be given. ++This is like @option{-c}, except it does not print a diagnostic. ++ ++@item -m ++@itemx --merge ++@opindex -m ++@opindex --merge ++@cindex merging sorted files ++Merge the given files by sorting them as a group. Each input file must ++always be individually sorted. It always works to sort instead of ++merge; merging is provided because it is faster, in the case where it ++works. ++ ++@end table ++ ++@cindex sort stability ++@cindex sort's last-resort comparison ++A pair of lines is compared as follows: ++@command{sort} compares each pair of fields, in the ++order specified on the command line, according to the associated ++ordering options, until a difference is found or no fields are left. ++If no key fields are specified, @command{sort} uses a default key of ++the entire line. Finally, as a last resort when all keys compare ++equal, @command{sort} compares entire lines as if no ordering options ++other than @option{--reverse} (@option{-r}) were specified. The ++@option{--stable} (@option{-s}) option disables this @dfn{last-resort ++comparison} so that lines in which all fields compare equal are left ++in their original relative order. The @option{--unique} ++(@option{-u}) option also disables the last-resort comparison. ++ ++@vindex LC_ALL ++@vindex LC_COLLATE ++Unless otherwise specified, all comparisons use the character collating ++sequence specified by the @env{LC_COLLATE} locale.@footnote{If you ++use a non-@acronym{POSIX} locale (e.g., by setting @env{LC_ALL} ++to @samp{en_US}), then @command{sort} may produce output that is sorted ++differently than you're accustomed to. In that case, set the @env{LC_ALL} ++environment variable to @samp{C}. Note that setting only @env{LC_COLLATE} ++has two problems. First, it is ineffective if @env{LC_ALL} is also set. ++Second, it has undefined behavior if @env{LC_CTYPE} (or @env{LANG}, if ++@env{LC_CTYPE} is unset) is set to an incompatible value. For example, ++you get undefined behavior if @env{LC_CTYPE} is @code{ja_JP.PCK} but ++@env{LC_COLLATE} is @code{en_US.UTF-8}.} ++ ++@sc{gnu} @command{sort} (as specified for all @sc{gnu} utilities) has no ++limit on input line length or restrictions on bytes allowed within lines. ++In addition, if the final byte of an input file is not a newline, @sc{gnu} ++@command{sort} silently supplies one. A line's trailing newline is not ++part of the line for comparison purposes. ++ ++@cindex exit status of @command{sort} ++Exit status: ++ ++@display ++0 if no error occurred ++1 if invoked with @option{-c} or @option{-C} and the input is not sorted ++2 if an error occurred ++@end display ++ ++@vindex TMPDIR ++If the environment variable @env{TMPDIR} is set, @command{sort} uses its ++value as the directory for temporary files instead of @file{/tmp}. The ++@option{--temporary-directory} (@option{-T}) option in turn overrides ++the environment variable. ++ ++The following options affect the ordering of output lines. They may be ++specified globally or as part of a specific key field. If no key ++fields are specified, global options apply to comparison of entire ++lines; otherwise the global options are inherited by key fields that do ++not specify any special options of their own. In pre-@acronym{POSIX} ++versions of @command{sort}, global options affect only later key fields, ++so portable shell scripts should specify global options first. ++ ++@table @samp ++ ++@item -b ++@itemx --ignore-leading-blanks ++@opindex -b ++@opindex --ignore-leading-blanks ++@cindex blanks, ignoring leading ++@vindex LC_CTYPE ++Ignore leading blanks when finding sort keys in each line. ++By default a blank is a space or a tab, but the @env{LC_CTYPE} locale ++can change this. Note blanks may be ignored by your locale's collating ++rules, but without this option they will be significant for character ++positions specified in keys with the @option{-k} option. ++ ++@item -d ++@itemx --dictionary-order ++@opindex -d ++@opindex --dictionary-order ++@cindex dictionary order ++@cindex phone directory order ++@cindex telephone directory order ++@vindex LC_CTYPE ++Sort in @dfn{phone directory} order: ignore all characters except ++letters, digits and blanks when sorting. ++By default letters and digits are those of @acronym{ASCII} and a blank ++is a space or a tab, but the @env{LC_CTYPE} locale can change this. ++ ++@item -f ++@itemx --ignore-case ++@opindex -f ++@opindex --ignore-case ++@cindex ignoring case ++@cindex case folding ++@vindex LC_CTYPE ++Fold lowercase characters into the equivalent uppercase characters when ++comparing so that, for example, @samp{b} and @samp{B} sort as equal. ++The @env{LC_CTYPE} locale determines character types. ++When used with @option{--unique} those lower case equivalent lines are ++thrown away. (There is currently no way to throw away the upper case ++equivalent instead. (Any @option{--reverse} given would only affect ++the final result, after the throwing away.)) ++ ++@item -g ++@itemx --general-numeric-sort ++@itemx --sort=general-numeric ++@opindex -g ++@opindex --general-numeric-sort ++@opindex --sort ++@cindex general numeric sort ++@vindex LC_NUMERIC ++Sort numerically, using the standard C function @code{strtod} to convert ++a prefix of each line to a double-precision floating point number. ++This allows floating point numbers to be specified in scientific notation, ++like @code{1.0e-34} and @code{10e100}. ++The @env{LC_NUMERIC} locale determines the decimal-point character. ++Do not report overflow, underflow, or conversion errors. ++Use the following collating sequence: ++ ++@itemize @bullet ++@item ++Lines that do not start with numbers (all considered to be equal). ++@item ++NaNs (``Not a Number'' values, in IEEE floating point arithmetic) ++in a consistent but machine-dependent order. ++@item ++Minus infinity. ++@item ++Finite numbers in ascending numeric order (with @math{-0} and @math{+0} equal). ++@item ++Plus infinity. ++@end itemize ++ ++Use this option only if there is no alternative; it is much slower than ++@option{--numeric-sort} (@option{-n}) and it can lose information when ++converting to floating point. ++ ++@item -h ++@itemx --human-numeric-sort ++@itemx --sort=human-numeric ++@opindex -h ++@opindex --human-numeric-sort ++@opindex --sort ++@cindex human numeric sort ++@vindex LC_NUMERIC ++Sort numerically, as per the @option{--numeric-sort} option below, and in ++addition handle IEC or SI suffixes like MiB, MB etc (@ref{Block size}). ++Note a mixture of IEC and SI suffixes is not supported and will ++be flagged as an error. Also the numbers must be abbreviated uniformly. ++I.E. values with different precisions like 6000K and 5M will be sorted ++incorrectly. ++ ++@item -i ++@itemx --ignore-nonprinting ++@opindex -i ++@opindex --ignore-nonprinting ++@cindex nonprinting characters, ignoring ++@cindex unprintable characters, ignoring ++@vindex LC_CTYPE ++Ignore nonprinting characters. ++The @env{LC_CTYPE} locale determines character types. ++This option has no effect if the stronger @option{--dictionary-order} ++(@option{-d}) option is also given. ++ ++@item -M ++@itemx --month-sort ++@itemx --sort=month ++@opindex -M ++@opindex --month-sort ++@opindex --sort ++@cindex months, sorting by ++@vindex LC_TIME ++An initial string, consisting of any amount of blanks, followed ++by a month name abbreviation, is folded to UPPER case and ++compared in the order @samp{JAN} < @samp{FEB} < @dots{} < @samp{DEC}. ++Invalid names compare low to valid names. The @env{LC_TIME} locale ++category determines the month spellings. ++By default a blank is a space or a tab, but the @env{LC_CTYPE} locale ++can change this. ++ ++@item -n ++@itemx --numeric-sort ++@itemx --sort=numeric ++@opindex -n ++@opindex --numeric-sort ++@opindex --sort ++@cindex numeric sort ++@vindex LC_NUMERIC ++Sort numerically. The number begins each line and consists ++of optional blanks, an optional @samp{-} sign, and zero or more ++digits possibly separated by thousands separators, optionally followed ++by a decimal-point character and zero or more digits. An empty ++number is treated as @samp{0}. The @env{LC_NUMERIC} ++locale specifies the decimal-point character and thousands separator. ++By default a blank is a space or a tab, but the @env{LC_CTYPE} locale ++can change this. ++ ++Comparison is exact; there is no rounding error. ++ ++Neither a leading @samp{+} nor exponential notation is recognized. ++To compare such strings numerically, use the ++@option{--general-numeric-sort} (@option{-g}) option. ++ ++@item -V ++@itemx --version-sort ++@opindex -V ++@opindex --version-sort ++@cindex version number sort ++@vindex LC_NUMERIC ++Sort per @code{strverscmp(3)}. This is a normal string comparison, except ++that embedded decimal numbers are sorted by numeric value ++(see @option{--numeric-sort} above). ++ ++@item -r ++@itemx --reverse ++@opindex -r ++@opindex --reverse ++@cindex reverse sorting ++Reverse the result of comparison, so that lines with greater key values ++appear earlier in the output instead of later. ++ ++@item -R ++@itemx --random-sort ++@itemx --sort=random ++@opindex -R ++@opindex --random-sort ++@opindex --sort ++@cindex random sort ++Sort by hashing the input keys and then sorting the hash values. ++Choose the hash function at random, ensuring that it is free of ++collisions so that differing keys have differing hash values. This is ++like a random permutation of the inputs (@pxref{shuf invocation}), ++except that keys with the same value sort together. ++ ++If multiple random sort fields are specified, the same random hash ++function is used for all fields. To use different random hash ++functions for different fields, you can invoke @command{sort} more ++than once. ++ ++The choice of hash function is affected by the ++@option{--random-source} option. ++ ++@end table ++ ++Other options are: ++ ++@table @samp ++ ++@item --compress-program=@var{prog} ++Compress any temporary files with the program @var{prog}. ++ ++With no arguments, @var{prog} must compress standard input to standard ++output, and when given the @option{-d} option it must decompress ++standard input to standard output. ++ ++Terminate with an error if @var{prog} exits with nonzero status. ++ ++White space and the backslash character should not appear in ++@var{prog}; they are reserved for future use. ++ ++@filesZeroFromOption{sort,,sorted output} ++ ++@item -k @var{pos1}[,@var{pos2}] ++@itemx --key=@var{pos1}[,@var{pos2}] ++@opindex -k ++@opindex --key ++@cindex sort field ++Specify a sort field that consists of the part of the line between ++@var{pos1} and @var{pos2} (or the end of the line, if @var{pos2} is ++omitted), @emph{inclusive}. ++ ++Each @var{pos} has the form @samp{@var{f}[.@var{c}][@var{opts}]}, ++where @var{f} is the number of the field to use, and @var{c} is the number ++of the first character from the beginning of the field. Fields and character ++positions are numbered starting with 1; a character position of zero in ++@var{pos2} indicates the field's last character. If @samp{.@var{c}} is ++omitted from @var{pos1}, it defaults to 1 (the beginning of the field); ++if omitted from @var{pos2}, it defaults to 0 (the end of the field). ++@var{opts} are ordering options, allowing individual keys to be sorted ++according to different rules; see below for details. Keys can span ++multiple fields. ++ ++Example: To sort on the second field, use @option{--key=2,2} ++(@option{-k 2,2}). See below for more notes on keys and more examples. ++ ++@item --batch-size=@var{nmerge} ++@opindex --batch-size ++@cindex number of inputs to merge, nmerge ++Merge at most @var{nmerge} inputs at once. ++ ++When @command{sort} has to merge more than @var{nmerge} inputs, ++it merges them in groups of @var{nmerge}, saving the result in ++a temporary file, which is then used as an input in a subsequent merge. ++ ++A large value of @var{nmerge} may improve merge performance and decrease ++temporary storage utilization at the expense of increased memory usage ++and I/0. Conversely a small value of @var{nmerge} may reduce memory ++requirements and I/0 at the expense of temporary storage consumption and ++merge performance. ++ ++The value of @var{nmerge} must be at least 2. The default value is ++currently 16, but this is implementation-dependent and may change in ++the future. ++ ++The value of @var{nmerge} may be bounded by a resource limit for open ++file descriptors. The commands @samp{ulimit -n} or @samp{getconf ++OPEN_MAX} may display limits for your systems; these limits may be ++modified further if your program already has some files open, or if ++the operating system has other limits on the number of open files. If ++the value of @var{nmerge} exceeds the resource limit, @command{sort} ++silently uses a smaller value. ++ ++@item -o @var{output-file} ++@itemx --output=@var{output-file} ++@opindex -o ++@opindex --output ++@cindex overwriting of input, allowed ++Write output to @var{output-file} instead of standard output. ++Normally, @command{sort} reads all input before opening ++@var{output-file}, so you can safely sort a file in place by using ++commands like @code{sort -o F F} and @code{cat F | sort -o F}. ++However, @command{sort} with @option{--merge} (@option{-m}) can open ++the output file before reading all input, so a command like @code{cat ++F | sort -m -o F - G} is not safe as @command{sort} might start ++writing @file{F} before @command{cat} is done reading it. ++ ++@vindex POSIXLY_CORRECT ++On newer systems, @option{-o} cannot appear after an input file if ++@env{POSIXLY_CORRECT} is set, e.g., @samp{sort F -o F}. Portable ++scripts should specify @option{-o @var{output-file}} before any input ++files. ++ ++@item --random-source=@var{file} ++@opindex --random-source ++@cindex random source for sorting ++Use @var{file} as a source of random data used to determine which ++random hash function to use with the @option{-R} option. @xref{Random ++sources}. ++ ++@item -s ++@itemx --stable ++@opindex -s ++@opindex --stable ++@cindex sort stability ++@cindex sort's last-resort comparison ++ ++Make @command{sort} stable by disabling its last-resort comparison. ++This option has no effect if no fields or global ordering options ++other than @option{--reverse} (@option{-r}) are specified. ++ ++@item -S @var{size} ++@itemx --buffer-size=@var{size} ++@opindex -S ++@opindex --buffer-size ++@cindex size for main memory sorting ++Use a main-memory sort buffer of the given @var{size}. By default, ++@var{size} is in units of 1024 bytes. Appending @samp{%} causes ++@var{size} to be interpreted as a percentage of physical memory. ++Appending @samp{K} multiplies @var{size} by 1024 (the default), ++@samp{M} by 1,048,576, @samp{G} by 1,073,741,824, and so on for ++@samp{T}, @samp{P}, @samp{E}, @samp{Z}, and @samp{Y}. Appending ++@samp{b} causes @var{size} to be interpreted as a byte count, with no ++multiplication. ++ ++This option can improve the performance of @command{sort} by causing it ++to start with a larger or smaller sort buffer than the default. ++However, this option affects only the initial buffer size. The buffer ++grows beyond @var{size} if @command{sort} encounters input lines larger ++than @var{size}. ++ ++@item -t @var{separator} ++@itemx --field-separator=@var{separator} ++@opindex -t ++@opindex --field-separator ++@cindex field separator character ++Use character @var{separator} as the field separator when finding the ++sort keys in each line. By default, fields are separated by the empty ++string between a non-blank character and a blank character. ++By default a blank is a space or a tab, but the @env{LC_CTYPE} locale ++can change this. ++ ++That is, given the input line @w{@samp{ foo bar}}, @command{sort} breaks it ++into fields @w{@samp{ foo}} and @w{@samp{ bar}}. The field separator is ++not considered to be part of either the field preceding or the field ++following, so with @samp{sort @w{-t " "}} the same input line has ++three fields: an empty field, @samp{foo}, and @samp{bar}. ++However, fields that extend to the end of the line, ++as @option{-k 2}, or fields consisting of a range, as @option{-k 2,3}, ++retain the field separators present between the endpoints of the range. ++ ++To specify @acronym{ASCII} @sc{nul} as the field separator, ++use the two-character string @samp{\0}, e.g., @samp{sort -t '\0'}. ++ ++@item -T @var{tempdir} ++@itemx --temporary-directory=@var{tempdir} ++@opindex -T ++@opindex --temporary-directory ++@cindex temporary directory ++@vindex TMPDIR ++Use directory @var{tempdir} to store temporary files, overriding the ++@env{TMPDIR} environment variable. If this option is given more than ++once, temporary files are stored in all the directories given. If you ++have a large sort or merge that is I/O-bound, you can often improve ++performance by using this option to specify directories on different ++disks and controllers. ++ ++@item -u ++@itemx --unique ++@opindex -u ++@opindex --unique ++@cindex uniquifying output ++ ++Normally, output only the first of a sequence of lines that compare ++equal. For the @option{--check} (@option{-c} or @option{-C}) option, ++check that no pair of consecutive lines compares equal. ++ ++This option also disables the default last-resort comparison. ++ ++The commands @code{sort -u} and @code{sort | uniq} are equivalent, but ++this equivalence does not extend to arbitrary @command{sort} options. ++For example, @code{sort -n -u} inspects only the value of the initial ++numeric string when checking for uniqueness, whereas @code{sort -n | ++uniq} inspects the entire line. @xref{uniq invocation}. ++ ++@macro zeroTerminatedOption ++@item -z ++@itemx --zero-terminated ++@opindex -z ++@opindex --zero-terminated ++@cindex process zero-terminated items ++Delimit items with a zero byte rather than a newline (@acronym{ASCII} @sc{lf}). ++I.E. treat input as items separated by @acronym{ASCII} @sc{nul} ++and terminate output items with @acronym{ASCII} @sc{nul}. ++This option can be useful in conjunction with @samp{perl -0} or ++@samp{find -print0} and @samp{xargs -0} which do the same in order to ++reliably handle arbitrary file names (even those containing blanks ++or other special characters). ++@end macro ++@zeroTerminatedOption ++ ++@end table ++ ++Historical (BSD and System V) implementations of @command{sort} have ++differed in their interpretation of some options, particularly ++@option{-b}, @option{-f}, and @option{-n}. @sc{gnu} sort follows the @acronym{POSIX} ++behavior, which is usually (but not always!) like the System V behavior. ++According to @acronym{POSIX}, @option{-n} no longer implies @option{-b}. For ++consistency, @option{-M} has been changed in the same way. This may ++affect the meaning of character positions in field specifications in ++obscure cases. The only fix is to add an explicit @option{-b}. ++ ++A position in a sort field specified with @option{-k} may have any ++of the option letters @samp{MbdfghinRrV} appended to it, in which case no ++global ordering options are inherited by that particular field. The ++@option{-b} option may be independently attached to either or both of ++the start and end positions of a field specification, and if it is ++inherited from the global options it will be attached to both. ++If input lines can contain leading or adjacent blanks and @option{-t} ++is not used, then @option{-k} is typically combined with @option{-b} or ++an option that implicitly ignores leading blanks (@samp{MghnV}) as otherwise ++the varying numbers of leading blanks in fields can cause confusing results. ++ ++If the start position in a sort field specifier falls after the end of ++the line or after the end field, the field is empty. If the @option{-b} ++option was specified, the @samp{.@var{c}} part of a field specification ++is counted from the first nonblank character of the field. ++ ++@vindex _POSIX2_VERSION ++@vindex POSIXLY_CORRECT ++On older systems, @command{sort} supports an obsolete origin-zero ++syntax @samp{+@var{pos1} [-@var{pos2}]} for specifying sort keys. ++This obsolete behavior can be enabled or disabled with the ++@env{_POSIX2_VERSION} environment variable (@pxref{Standards ++conformance}); it can also be enabled when @env{POSIXLY_CORRECT} is ++not set by using the obsolete syntax with @samp{-@var{pos2}} present. ++ ++Scripts intended for use on standard hosts should avoid obsolete ++syntax and should use @option{-k} instead. For example, avoid ++@samp{sort +2}, since it might be interpreted as either @samp{sort ++./+2} or @samp{sort -k 3}. If your script must also run on hosts that ++support only the obsolete syntax, it can use a test like @samp{if sort ++-k 1 /dev/null 2>&1; then @dots{}} to decide which syntax ++to use. ++ ++Here are some examples to illustrate various combinations of options. ++ ++@itemize @bullet ++ ++@item ++Sort in descending (reverse) numeric order. ++ ++@example ++sort -n -r ++@end example ++ ++@item ++Sort alphabetically, omitting the first and second fields ++and the blanks at the start of the third field. ++This uses a single key composed of the characters beginning ++at the start of the first nonblank character in field three ++and extending to the end of each line. ++ ++@example ++sort -k 3b ++@end example ++ ++@item ++Sort numerically on the second field and resolve ties by sorting ++alphabetically on the third and fourth characters of field five. ++Use @samp{:} as the field delimiter. ++ ++@example ++sort -t : -k 2,2n -k 5.3,5.4 ++@end example ++ ++Note that if you had written @option{-k 2n} instead of @option{-k 2,2n} ++@command{sort} would have used all characters beginning in the second field ++and extending to the end of the line as the primary @emph{numeric} ++key. For the large majority of applications, treating keys spanning ++more than one field as numeric will not do what you expect. ++ ++Also note that the @samp{n} modifier was applied to the field-end ++specifier for the first key. It would have been equivalent to ++specify @option{-k 2n,2} or @option{-k 2n,2n}. All modifiers except ++@samp{b} apply to the associated @emph{field}, regardless of whether ++the modifier character is attached to the field-start and/or the ++field-end part of the key specifier. ++ ++@item ++Sort the password file on the fifth field and ignore any ++leading blanks. Sort lines with equal values in field five ++on the numeric user ID in field three. Fields are separated ++by @samp{:}. ++ ++@example ++sort -t : -k 5b,5 -k 3,3n /etc/passwd ++sort -t : -n -k 5b,5 -k 3,3 /etc/passwd ++sort -t : -b -k 5,5 -k 3,3n /etc/passwd ++@end example ++ ++These three commands have equivalent effect. The first specifies that ++the first key's start position ignores leading blanks and the second ++key is sorted numerically. The other two commands rely on global ++options being inherited by sort keys that lack modifiers. The inheritance ++works in this case because @option{-k 5b,5b} and @option{-k 5b,5} are ++equivalent, as the location of a field-end lacking a @samp{.@var{c}} ++character position is not affected by whether initial blanks are ++skipped. ++ ++@item ++Sort a set of log files, primarily by IPv4 address and secondarily by ++time stamp. If two lines' primary and secondary keys are identical, ++output the lines in the same order that they were input. The log ++files contain lines that look like this: ++ ++@example ++4.150.156.3 - - [01/Apr/2004:06:31:51 +0000] message 1 ++211.24.3.231 - - [24/Apr/2004:20:17:39 +0000] message 2 ++@end example ++ ++Fields are separated by exactly one space. Sort IPv4 addresses ++lexicographically, e.g., 212.61.52.2 sorts before 212.129.233.201 ++because 61 is less than 129. ++ ++@example ++sort -s -t ' ' -k 4.9n -k 4.5M -k 4.2n -k 4.14,4.21 file*.log | ++sort -s -t '.' -k 1,1n -k 2,2n -k 3,3n -k 4,4n ++@end example ++ ++This example cannot be done with a single @command{sort} invocation, ++since IPv4 address components are separated by @samp{.} while dates ++come just after a space. So it is broken down into two invocations of ++@command{sort}: the first sorts by time stamp and the second by IPv4 ++address. The time stamp is sorted by year, then month, then day, and ++finally by hour-minute-second field, using @option{-k} to isolate each ++field. Except for hour-minute-second there's no need to specify the ++end of each key field, since the @samp{n} and @samp{M} modifiers sort ++based on leading prefixes that cannot cross field boundaries. The ++IPv4 addresses are sorted lexicographically. The second sort uses ++@samp{-s} so that ties in the primary key are broken by the secondary ++key; the first sort uses @samp{-s} so that the combination of the two ++sorts is stable. ++ ++@item ++Generate a tags file in case-insensitive sorted order. ++ ++@smallexample ++find src -type f -print0 | sort -z -f | xargs -0 etags --append ++@end smallexample ++ ++The use of @option{-print0}, @option{-z}, and @option{-0} in this case means ++that file names that contain blanks or other special characters are ++not broken up ++by the sort operation. ++ ++@c This example is a bit contrived and needs more explanation. ++@c @item ++@c Sort records separated by an arbitrary string by using a pipe to convert ++@c each record delimiter string to @samp{\0}, then using sort's -z option, ++@c and converting each @samp{\0} back to the original record delimiter. ++@c ++@c @example ++@c printf 'c\n\nb\n\na\n'|perl -0pe 's/\n\n/\n\0/g'|sort -z|perl -0pe 's/\0/\n/g' ++@c @end example ++ ++@item ++Use the common @acronym{DSU, Decorate Sort Undecorate} idiom to ++sort lines according to their length. ++ ++@example ++awk '@{print length, $0@}' /etc/passwd | sort -n | cut -f2- -d' ' ++@end example ++ ++In general this technique can be used to sort data that the @command{sort} ++command does not support, or is inefficient at, sorting directly. ++ ++@item ++Shuffle a list of directories, but preserve the order of files within ++each directory. For instance, one could use this to generate a music ++playlist in which albums are shuffled but the songs of each album are ++played in order. ++ ++@example ++ls */* | sort -t / -k 1,1R -k 2,2 ++@end example ++ ++@end itemize ++ ++ ++@node shuf invocation ++@section @command{shuf}: Shuffling text ++ ++@pindex shuf ++@cindex shuffling files ++ ++@command{shuf} shuffles its input by outputting a random permutation ++of its input lines. Each output permutation is equally likely. ++Synopses: ++ ++@example ++shuf [@var{option}]@dots{} [@var{file}] ++shuf -e [@var{option}]@dots{} [@var{arg}]@dots{} ++shuf -i @var{lo}-@var{hi} [@var{option}]@dots{} ++@end example ++ ++@command{shuf} has three modes of operation that affect where it ++obtains its input lines. By default, it reads lines from standard ++input. The following options change the operation mode: ++ ++@table @samp ++ ++@item -e ++@itemx --echo ++@opindex -c ++@opindex --echo ++@cindex command-line operands to shuffle ++Treat each command-line operand as an input line. ++ ++@item -i @var{lo}-@var{hi} ++@itemx --input-range=@var{lo}-@var{hi} ++@opindex -i ++@opindex --input-range ++@cindex input range to shuffle ++Act as if input came from a file containing the range of unsigned ++decimal integers @var{lo}@dots{}@var{hi}, one per line. ++ ++@end table ++ ++@command{shuf}'s other options can affect its behavior in all ++operation modes: ++ ++@table @samp ++ ++@item -n @var{lines} ++@itemx --head-count=@var{count} ++@opindex -n ++@opindex --head-count ++@cindex head of output ++Output at most @var{count} lines. By default, all input lines are ++output. ++ ++@item -o @var{output-file} ++@itemx --output=@var{output-file} ++@opindex -o ++@opindex --output ++@cindex overwriting of input, allowed ++Write output to @var{output-file} instead of standard output. ++@command{shuf} reads all input before opening ++@var{output-file}, so you can safely shuffle a file in place by using ++commands like @code{shuf -o F out ++$ dd bs=1 skip=222 count=6 < out 2>/dev/null; echo ++deeper ++@end example ++ ++Note that although the listing above includes a trailing slash ++for the @samp{deeper} entry, the offsets select the name without ++the trailing slash. However, if you invoke @command{ls} with @option{--dired} ++along with an option like @option{--escape} (aka @option{-b}) and operate ++on a file whose name contains special characters, notice that the backslash ++@emph{is} included: ++ ++@example ++$ touch 'a b' ++$ ls -blog --dired 'a b' ++ -rw-r--r-- 1 0 Jun 10 12:28 a\ b ++//DIRED// 30 34 ++//DIRED-OPTIONS// --quoting-style=escape ++@end example ++ ++If you use a quoting style that adds quote marks ++(e.g., @option{--quoting-style=c}), then the offsets include the quote marks. ++So beware that the user may select the quoting style via the environment ++variable @env{QUOTING_STYLE}. Hence, applications using @option{--dired} ++should either specify an explicit @option{--quoting-style=literal} option ++(aka @option{-N} or @option{--literal}) on the command line, or else be ++prepared to parse the escaped names. ++ ++@item --full-time ++@opindex --full-time ++Produce long format directory listings, and list times in full. It is ++equivalent to using @option{--format=long} with ++@option{--time-style=full-iso} (@pxref{Formatting file timestamps}). ++ ++@item -g ++@opindex -g ++Produce long format directory listings, but don't display owner information. ++ ++@item -G ++@itemx --no-group ++@opindex -G ++@opindex --no-group ++Inhibit display of group information in a long format directory listing. ++(This is the default in some non-@sc{gnu} versions of @command{ls}, so we ++provide this option for compatibility.) ++ ++@optHumanReadable ++ ++@item -i ++@itemx --inode ++@opindex -i ++@opindex --inode ++@cindex inode number, printing ++Print the inode number (also called the file serial number and index ++number) of each file to the left of the file name. (This number ++uniquely identifies each file within a particular file system.) ++ ++@item -l ++@itemx --format=long ++@itemx --format=verbose ++@opindex -l ++@opindex --format ++@opindex long ls @r{format} ++@opindex verbose ls @r{format} ++In addition to the name of each file, print the file type, file mode bits, ++number of hard links, owner name, group name, size, and ++timestamp (@pxref{Formatting file timestamps}), normally ++the modification time. Print question marks for information that ++cannot be determined. ++ ++Normally the size is printed as a byte count without punctuation, but ++this can be overridden (@pxref{Block size}). For example, @option{-h} ++prints an abbreviated, human-readable count, and ++@samp{--block-size="'1"} prints a byte count with the thousands ++separator of the current locale. ++ ++For each directory that is listed, preface the files with a line ++@samp{total @var{blocks}}, where @var{blocks} is the total disk allocation ++for all files in that directory. The block size currently defaults to 1024 ++bytes, but this can be overridden (@pxref{Block size}). ++The @var{blocks} computed counts each hard link separately; ++this is arguably a deficiency. ++ ++The file type is one of the following characters: ++ ++@c The commented-out entries are ones we're not sure about. ++ ++@table @samp ++@item - ++regular file ++@item b ++block special file ++@item c ++character special file ++@item C ++high performance (``contiguous data'') file ++@item d ++directory ++@item D ++door (Solaris 2.5 and up) ++@c @item F ++@c semaphore, if this is a distinct file type ++@item l ++symbolic link ++@c @item m ++@c multiplexed file (7th edition Unix; obsolete) ++@item M ++off-line (``migrated'') file (Cray DMF) ++@item n ++network special file (HP-UX) ++@item p ++FIFO (named pipe) ++@item P ++port (Solaris 10 and up) ++@c @item Q ++@c message queue, if this is a distinct file type ++@item s ++socket ++@c @item S ++@c shared memory object, if this is a distinct file type ++@c @item T ++@c typed memory object, if this is a distinct file type ++@c @item w ++@c whiteout (4.4BSD; not implemented) ++@item ? ++some other file type ++@end table ++ ++@cindex permissions, output by @command{ls} ++The file mode bits listed are similar to symbolic mode specifications ++(@pxref{Symbolic Modes}). But @command{ls} combines multiple bits into the ++third character of each set of permissions as follows: ++ ++@table @samp ++@item s ++If the set-user-ID or set-group-ID bit and the corresponding executable bit ++are both set. ++ ++@item S ++If the set-user-ID or set-group-ID bit is set but the corresponding ++executable bit is not set. ++ ++@item t ++If the restricted deletion flag or sticky bit, and the ++other-executable bit, are both set. The restricted deletion flag is ++another name for the sticky bit. @xref{Mode Structure}. ++ ++@item T ++If the restricted deletion flag or sticky bit is set but the ++other-executable bit is not set. ++ ++@item x ++If the executable bit is set and none of the above apply. ++ ++@item - ++Otherwise. ++@end table ++ ++Following the file mode bits is a single character that specifies ++whether an alternate access method such as an access control list ++applies to the file. When the character following the file mode bits is a ++space, there is no alternate access method. When it is a printing ++character, then there is such a method. ++ ++GNU @command{ls} uses a @samp{.} character to indicate a file ++with an SELinux security context, but no other alternate access method. ++ ++A file with any other combination of alternate access methods ++is marked with a @samp{+} character. ++ ++@item -n ++@itemx --numeric-uid-gid ++@opindex -n ++@opindex --numeric-uid-gid ++@cindex numeric uid and gid ++@cindex numeric user and group IDs ++Produce long format directory listings, but ++display numeric user and group IDs instead of the owner and group names. ++ ++@item -o ++@opindex -o ++Produce long format directory listings, but don't display group information. ++It is equivalent to using @option{--format=long} with @option{--no-group} . ++ ++@item -s ++@itemx --size ++@opindex -s ++@opindex --size ++@cindex disk allocation ++@cindex size of files, reporting ++Print the disk allocation of each file to the left of the file name. ++This is the amount of disk space used by the file, which is usually a ++bit more than the file's size, but it can be less if the file has holes. ++ ++Normally the disk allocation is printed in units of ++1024 bytes, but this can be overridden (@pxref{Block size}). ++ ++@cindex NFS mounts from BSD to HP-UX ++For files that are NFS-mounted from an HP-UX system to a BSD system, ++this option reports sizes that are half the correct values. On HP-UX ++systems, it reports sizes that are twice the correct values for files ++that are NFS-mounted from BSD systems. This is due to a flaw in HP-UX; ++it also affects the HP-UX @command{ls} program. ++ ++@optSi ++ ++@item -Z ++@itemx --context ++@opindex -Z ++@opindex --context ++@cindex SELinux ++@cindex security context ++Display the SELinux security context or @samp{?} if none is found. ++When used with the @option{-l} option, print the security context ++to the left of the size column. ++ ++@end table ++ ++ ++@node Sorting the output ++@subsection Sorting the output ++ ++@cindex sorting @command{ls} output ++These options change the order in which @command{ls} sorts the information ++it outputs. By default, sorting is done by character code ++(e.g., @acronym{ASCII} order). ++ ++@table @samp ++ ++@item -c ++@itemx --time=ctime ++@itemx --time=status ++@opindex -c ++@opindex --time ++@opindex ctime@r{, printing or sorting by} ++@opindex status time@r{, printing or sorting by} ++@opindex use time@r{, printing or sorting files by} ++If the long listing format (e.g., @option{-l}, @option{-o}) is being used, ++print the status change time (the @samp{ctime} in the inode) instead of ++the modification time. ++When explicitly sorting by time (@option{--sort=time} or @option{-t}) ++or when not using a long listing format, ++sort according to the status change time. ++ ++@item -f ++@opindex -f ++@cindex unsorted directory listing ++@cindex directory order, listing by ++Primarily, like @option{-U}---do not sort; list the files in whatever ++order they are stored in the directory. But also enable @option{-a} (list ++all files) and disable @option{-l}, @option{--color}, and @option{-s} (if they ++were specified before the @option{-f}). ++ ++@item -r ++@itemx --reverse ++@opindex -r ++@opindex --reverse ++@cindex reverse sorting ++Reverse whatever the sorting method is---e.g., list files in reverse ++alphabetical order, youngest first, smallest first, or whatever. ++ ++@item -S ++@itemx --sort=size ++@opindex -S ++@opindex --sort ++@opindex size of files@r{, sorting files by} ++Sort by file size, largest first. ++ ++@item -t ++@itemx --sort=time ++@opindex -t ++@opindex --sort ++@opindex modification time@r{, sorting files by} ++Sort by modification time (the @samp{mtime} in the inode), newest first. ++ ++@item -u ++@itemx --time=atime ++@itemx --time=access ++@itemx --time=use ++@opindex -u ++@opindex --time ++@opindex use time@r{, printing or sorting files by} ++@opindex atime@r{, printing or sorting files by} ++@opindex access time@r{, printing or sorting files by} ++If the long listing format (e.g., @option{--format=long}) is being used, ++print the last access time (the @samp{atime} in the inode). ++When explicitly sorting by time (@option{--sort=time} or @option{-t}) ++or when not using a long listing format, sort according to the access time. ++ ++@item -U ++@itemx --sort=none ++@opindex -U ++@opindex --sort ++@opindex none@r{, sorting option for @command{ls}} ++Do not sort; list the files in whatever order they are ++stored in the directory. (Do not do any of the other unrelated things ++that @option{-f} does.) This is especially useful when listing very large ++directories, since not doing any sorting can be noticeably faster. ++ ++@item -v ++@itemx --sort=version ++@opindex -v ++@opindex --sort ++@opindex version@r{, sorting option for @command{ls}} ++Sort by version name and number, lowest first. It behaves like a default ++sort, except that each sequence of decimal digits is treated numerically ++as an index/version number. (@xref{Details about version sort}.) ++ ++@item -X ++@itemx --sort=extension ++@opindex -X ++@opindex --sort ++@opindex extension@r{, sorting files by} ++Sort directory contents alphabetically by file extension (characters ++after the last @samp{.}); files with no extension are sorted first. ++ ++@end table ++ ++ ++@node Details about version sort ++@subsection Details about version sort ++ ++The version sort takes into account the fact that file names frequently include ++indices or version numbers. Standard sorting functions usually do not produce ++the ordering that people expect because comparisons are made on a ++character-by-character basis. The version ++sort addresses this problem, and is especially useful when browsing ++directories that contain many files with indices/version numbers in their ++names: ++ ++@example ++$ ls -1 $ ls -1v ++foo.zml-1.gz foo.zml-1.gz ++foo.zml-100.gz foo.zml-2.gz ++foo.zml-12.gz foo.zml-6.gz ++foo.zml-13.gz foo.zml-12.gz ++foo.zml-2.gz foo.zml-13.gz ++foo.zml-25.gz foo.zml-25.gz ++foo.zml-6.gz foo.zml-100.gz ++@end example ++ ++Version-sorted strings are compared such that if @var{ver1} and @var{ver2} ++are version numbers and @var{prefix} and @var{suffix} (@var{suffix} matching ++the regular expression @samp{(\.[A-Za-z~][A-Za-z0-9~]*)*}) are strings then ++@var{ver1} < @var{ver2} implies that the name composed of ++``@var{prefix} @var{ver1} @var{suffix}'' sorts before ++``@var{prefix} @var{ver2} @var{suffix}''. ++ ++Note also that leading zeros of numeric parts are ignored: ++ ++@example ++$ ls -1 $ ls -1v ++abc-1.007.tgz abc-1.01a.tgz ++abc-1.012b.tgz abc-1.007.tgz ++abc-1.01a.tgz abc-1.012b.tgz ++@end example ++ ++This functionality is implemented using gnulib's @code{filevercmp} function. ++One result of that implementation decision is that @samp{ls -v} ++and @samp{sort -V} do not use the locale category, @env{LC_COLLATE}, ++which means non-numeric prefixes are sorted as if @env{LC_COLLATE} were set ++to @samp{C}. ++ ++@node General output formatting ++@subsection General output formatting ++ ++These options affect the appearance of the overall output. ++ ++@table @samp ++ ++@item -1 ++@itemx --format=single-column ++@opindex -1 ++@opindex --format ++@opindex single-column @r{output of files} ++List one file per line. This is the default for @command{ls} when standard ++output is not a terminal. ++ ++@item -C ++@itemx --format=vertical ++@opindex -C ++@opindex --format ++@opindex vertical @r{sorted files in columns} ++List files in columns, sorted vertically. This is the default for ++@command{ls} if standard output is a terminal. It is always the default ++for the @command{dir} program. ++@sc{gnu} @command{ls} uses variable width columns to display as many files as ++possible in the fewest lines. ++ ++@item --color [=@var{when}] ++@opindex --color ++@cindex color, distinguishing file types with ++Specify whether to use color for distinguishing file types. @var{when} ++may be omitted, or one of: ++@itemize @bullet ++@item none ++@vindex none @r{color option} ++- Do not use color at all. This is the default. ++@item auto ++@vindex auto @r{color option} ++@cindex terminal, using color iff ++- Only use color if standard output is a terminal. ++@item always ++@vindex always @r{color option} ++- Always use color. ++@end itemize ++Specifying @option{--color} and no @var{when} is equivalent to ++@option{--color=always}. ++Piping a colorized listing through a pager like @command{more} or ++@command{less} usually produces unreadable results. However, using ++@code{more -f} does seem to work. ++ ++@item -F ++@itemx --classify ++@itemx --indicator-style=classify ++@opindex -F ++@opindex --classify ++@opindex --indicator-style ++@cindex file type and executables, marking ++@cindex executables and file type, marking ++Append a character to each file name indicating the file type. Also, ++for regular files that are executable, append @samp{*}. The file type ++indicators are @samp{/} for directories, @samp{@@} for symbolic links, ++@samp{|} for FIFOs, @samp{=} for sockets, @samp{>} for doors, ++and nothing for regular files. ++@c The following sentence is the same as the one for -d. ++Do not follow symbolic links listed on the ++command line unless the @option{--dereference-command-line} (@option{-H}), ++@option{--dereference} (@option{-L}), or ++@option{--dereference-command-line-symlink-to-dir} options are specified. ++ ++@item --file-type ++@itemx --indicator-style=file-type ++@opindex --file-type ++@opindex --indicator-style ++@cindex file type, marking ++Append a character to each file name indicating the file type. This is ++like @option{-F}, except that executables are not marked. ++ ++@item --indicator-style=@var{word} ++@opindex --indicator-style ++Append a character indicator with style @var{word} to entry names, ++as follows: ++ ++@table @samp ++@item none ++Do not append any character indicator; this is the default. ++@item slash ++Append @samp{/} for directories. This is the same as the @option{-p} ++option. ++@item file-type ++Append @samp{/} for directories, @samp{@@} for symbolic links, @samp{|} ++for FIFOs, @samp{=} for sockets, and nothing for regular files. This is ++the same as the @option{--file-type} option. ++@item classify ++Append @samp{*} for executable regular files, otherwise behave as for ++@samp{file-type}. This is the same as the @option{-F} or ++@option{--classify} option. ++@end table ++ ++@item -k ++@opindex -k ++Print file sizes in 1024-byte blocks, overriding the default block ++size (@pxref{Block size}). ++This option is equivalent to @option{--block-size=1K}. ++ ++@item -m ++@itemx --format=commas ++@opindex -m ++@opindex --format ++@opindex commas@r{, outputting between files} ++List files horizontally, with as many as will fit on each line, ++separated by @samp{, } (a comma and a space). ++ ++@item -p ++@itemx --indicator-style=slash ++@opindex -p ++@opindex --indicator-style ++@cindex file type, marking ++Append a @samp{/} to directory names. ++ ++@item -x ++@itemx --format=across ++@itemx --format=horizontal ++@opindex -x ++@opindex --format ++@opindex across@r{, listing files} ++@opindex horizontal@r{, listing files} ++List the files in columns, sorted horizontally. ++ ++@item -T @var{cols} ++@itemx --tabsize=@var{cols} ++@opindex -T ++@opindex --tabsize ++Assume that each tab stop is @var{cols} columns wide. The default is 8. ++@command{ls} uses tabs where possible in the output, for efficiency. If ++@var{cols} is zero, do not use tabs at all. ++ ++@c FIXME: remove in 2009, if Apple Terminal has been fixed for long enough. ++Some terminal emulators (at least Apple Terminal 1.5 (133) from Mac OS X 10.4.8) ++do not properly align columns to the right of a TAB following a ++non-@acronym{ASCII} byte. If you use such a terminal emulator, use the ++@option{-T0} option or put @code{TABSIZE=0} in your environment to tell ++@command{ls} to align using spaces, not tabs. ++ ++@item -w ++@itemx --width=@var{cols} ++@opindex -w ++@opindex --width ++@vindex COLUMNS ++Assume the screen is @var{cols} columns wide. The default is taken ++from the terminal settings if possible; otherwise the environment ++variable @env{COLUMNS} is used if it is set; otherwise the default ++is 80. ++ ++@end table ++ ++ ++@node Formatting file timestamps ++@subsection Formatting file timestamps ++ ++By default, file timestamps are listed in abbreviated form. Most ++locales use a timestamp like @samp{2002-03-30 23:45}. However, the ++default @acronym{POSIX} locale uses a date like @samp{Mar 30@ @ 2002} ++for non-recent timestamps, and a date-without-year and time like ++@samp{Mar 30 23:45} for recent timestamps. ++ ++A timestamp is considered to be @dfn{recent} if it is less than six ++months old, and is not dated in the future. If a timestamp dated ++today is not listed in recent form, the timestamp is in the future, ++which means you probably have clock skew problems which may break ++programs like @command{make} that rely on file timestamps. ++ ++@vindex TZ ++Time stamps are listed according to the time zone rules specified by ++the @env{TZ} environment variable, or by the system default rules if ++@env{TZ} is not set. @xref{TZ Variable,, Specifying the Time Zone ++with @env{TZ}, libc, The GNU C Library Reference Manual}. ++ ++The following option changes how file timestamps are printed. ++ ++@table @samp ++@item --time-style=@var{style} ++@opindex --time-style ++@cindex time style ++List timestamps in style @var{style}. The @var{style} should ++be one of the following: ++ ++@table @samp ++@item +@var{format} ++@vindex LC_TIME ++List timestamps using @var{format}, where @var{format} is interpreted ++like the format argument of @command{date} (@pxref{date invocation}). ++For example, @option{--time-style="+%Y-%m-%d %H:%M:%S"} causes ++@command{ls} to list timestamps like @samp{2002-03-30 23:45:56}. As ++with @command{date}, @var{format}'s interpretation is affected by the ++@env{LC_TIME} locale category. ++ ++If @var{format} contains two format strings separated by a newline, ++the former is used for non-recent files and the latter for recent ++files; if you want output columns to line up, you may need to insert ++spaces in one of the two formats. ++ ++@item full-iso ++List timestamps in full using @acronym{ISO} 8601 date, time, and time zone ++format with nanosecond precision, e.g., @samp{2002-03-30 ++23:45:56.477817180 -0700}. This style is equivalent to ++@samp{+%Y-%m-%d %H:%M:%S.%N %z}. ++ ++This is useful because the time output includes all the information that ++is available from the operating system. For example, this can help ++explain @command{make}'s behavior, since @acronym{GNU} @command{make} ++uses the full timestamp to determine whether a file is out of date. ++ ++@item long-iso ++List @acronym{ISO} 8601 date and time in minutes, e.g., ++@samp{2002-03-30 23:45}. These timestamps are shorter than ++@samp{full-iso} timestamps, and are usually good enough for everyday ++work. This style is equivalent to @samp{+%Y-%m-%d %H:%M}. ++ ++@item iso ++List @acronym{ISO} 8601 dates for non-recent timestamps (e.g., ++@samp{2002-03-30@ }), and @acronym{ISO} 8601 month, day, hour, and ++minute for recent timestamps (e.g., @samp{03-30 23:45}). These ++timestamps are uglier than @samp{long-iso} timestamps, but they carry ++nearly the same information in a smaller space and their brevity helps ++@command{ls} output fit within traditional 80-column output lines. ++The following two @command{ls} invocations are equivalent: ++ ++@example ++newline=' ++' ++ls -l --time-style="+%Y-%m-%d $newline%m-%d %H:%M" ++ls -l --time-style="iso" ++@end example ++ ++@item locale ++@vindex LC_TIME ++List timestamps in a locale-dependent form. For example, a Finnish ++locale might list non-recent timestamps like @samp{maalis 30@ @ 2002} ++and recent timestamps like @samp{maalis 30 23:45}. Locale-dependent ++timestamps typically consume more space than @samp{iso} timestamps and ++are harder for programs to parse because locale conventions vary so ++widely, but they are easier for many people to read. ++ ++The @env{LC_TIME} locale category specifies the timestamp format. The ++default @acronym{POSIX} locale uses timestamps like @samp{Mar 30@ ++@ 2002} and @samp{Mar 30 23:45}; in this locale, the following two ++@command{ls} invocations are equivalent: ++ ++@example ++newline=' ++' ++ls -l --time-style="+%b %e %Y$newline%b %e %H:%M" ++ls -l --time-style="locale" ++@end example ++ ++Other locales behave differently. For example, in a German locale, ++@option{--time-style="locale"} might be equivalent to ++@option{--time-style="+%e. %b %Y $newline%e. %b %H:%M"} ++and might generate timestamps like @samp{30. M@"ar 2002@ } and ++@samp{30. M@"ar 23:45}. ++ ++@item posix-@var{style} ++@vindex LC_TIME ++List @acronym{POSIX}-locale timestamps if the @env{LC_TIME} locale ++category is @acronym{POSIX}, @var{style} timestamps otherwise. For ++example, the @samp{posix-long-iso} style lists ++timestamps like @samp{Mar 30@ @ 2002} and @samp{Mar 30 23:45} when in ++the @acronym{POSIX} locale, and like @samp{2002-03-30 23:45} otherwise. ++@end table ++@end table ++ ++@vindex TIME_STYLE ++You can specify the default value of the @option{--time-style} option ++with the environment variable @env{TIME_STYLE}; if @env{TIME_STYLE} is not set ++the default style is @samp{locale}. @acronym{GNU} Emacs 21.3 and ++later use the @option{--dired} option and therefore can parse any date ++format, but if you are using Emacs 21.1 or 21.2 and specify a ++non-@acronym{POSIX} locale you may need to set ++@samp{TIME_STYLE="posix-long-iso"}. ++ ++To avoid certain denial-of-service attacks, timestamps that would be ++longer than 1000 bytes may be treated as errors. ++ ++ ++@node Formatting the file names ++@subsection Formatting the file names ++ ++These options change how file names themselves are printed. ++ ++@table @samp ++ ++@item -b ++@itemx --escape ++@itemx --quoting-style=escape ++@opindex -b ++@opindex --escape ++@opindex --quoting-style ++@cindex backslash sequences for file names ++Quote nongraphic characters in file names using alphabetic and octal ++backslash sequences like those used in C. ++ ++@item -N ++@itemx --literal ++@itemx --quoting-style=literal ++@opindex -N ++@opindex --literal ++@opindex --quoting-style ++Do not quote file names. However, with @command{ls} nongraphic ++characters are still printed as question marks if the output is a ++terminal and you do not specify the @option{--show-control-chars} ++option. ++ ++@item -q ++@itemx --hide-control-chars ++@opindex -q ++@opindex --hide-control-chars ++Print question marks instead of nongraphic characters in file names. ++This is the default if the output is a terminal and the program is ++@command{ls}. ++ ++@item -Q ++@itemx --quote-name ++@itemx --quoting-style=c ++@opindex -Q ++@opindex --quote-name ++@opindex --quoting-style ++Enclose file names in double quotes and quote nongraphic characters as ++in C. ++ ++@item --quoting-style=@var{word} ++@opindex --quoting-style ++@cindex quoting style ++Use style @var{word} to quote file names and other strings that may ++contain arbitrary characters. The @var{word} should ++be one of the following: ++ ++@table @samp ++@item literal ++Output strings as-is; this is the same as the @option{-N} or ++@option{--literal} option. ++@item shell ++Quote strings for the shell if they contain shell metacharacters or would ++cause ambiguous output. ++The quoting is suitable for @acronym{POSIX}-compatible shells like ++@command{bash}, but it does not always work for incompatible shells ++like @command{csh}. ++@item shell-always ++Quote strings for the shell, even if they would normally not require quoting. ++@item c ++Quote strings as for C character string literals, including the ++surrounding double-quote characters; this is the same as the ++@option{-Q} or @option{--quote-name} option. ++@item escape ++Quote strings as for C character string literals, except omit the ++surrounding double-quote ++characters; this is the same as the @option{-b} or @option{--escape} option. ++@item clocale ++Quote strings as for C character string literals, except use ++surrounding quotation marks appropriate for the ++locale. ++@item locale ++@c Use @t instead of @samp to avoid duplicate quoting in some output styles. ++Quote strings as for C character string literals, except use ++surrounding quotation marks appropriate for the locale, and quote ++@t{`like this'} instead of @t{"like ++this"} in the default C locale. This looks nicer on many displays. ++@end table ++ ++You can specify the default value of the @option{--quoting-style} option ++with the environment variable @env{QUOTING_STYLE}. If that environment ++variable is not set, the default value is @samp{literal}, but this ++default may change to @samp{shell} in a future version of this package. ++ ++@item --show-control-chars ++@opindex --show-control-chars ++Print nongraphic characters as-is in file names. ++This is the default unless the output is a terminal and the program is ++@command{ls}. ++ ++@end table ++ ++ ++@node dir invocation ++@section @command{dir}: Briefly list directory contents ++ ++@pindex dir ++@cindex directory listing, brief ++ ++@command{dir} is equivalent to @code{ls -C ++-b}; that is, by default files are listed in columns, sorted vertically, ++and special characters are represented by backslash escape sequences. ++ ++@xref{ls invocation, @command{ls}}. ++ ++ ++@node vdir invocation ++@section @command{vdir}: Verbosely list directory contents ++ ++@pindex vdir ++@cindex directory listing, verbose ++ ++@command{vdir} is equivalent to @code{ls -l ++-b}; that is, by default files are listed in long format and special ++characters are represented by backslash escape sequences. ++ ++@node dircolors invocation ++@section @command{dircolors}: Color setup for @command{ls} ++ ++@pindex dircolors ++@cindex color setup ++@cindex setup for color ++ ++@command{dircolors} outputs a sequence of shell commands to set up the ++terminal for color output from @command{ls} (and @command{dir}, etc.). ++Typical usage: ++ ++@example ++eval "`dircolors [@var{option}]@dots{} [@var{file}]`" ++@end example ++ ++If @var{file} is specified, @command{dircolors} reads it to determine which ++colors to use for which file types and extensions. Otherwise, a ++precompiled database is used. For details on the format of these files, ++run @samp{dircolors --print-database}. ++ ++To make @command{dircolors} read a @file{~/.dircolors} file if it ++exists, you can put the following lines in your @file{~/.bashrc} (or ++adapt them to your favorite shell): ++ ++@example ++d=.dircolors ++test -r $d && eval "$(dircolors $d)" ++@end example ++ ++@vindex LS_COLORS ++@vindex SHELL @r{environment variable, and color} ++The output is a shell command to set the @env{LS_COLORS} environment ++variable. You can specify the shell syntax to use on the command line, ++or @command{dircolors} will guess it from the value of the @env{SHELL} ++environment variable. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++@item -b ++@itemx --sh ++@itemx --bourne-shell ++@opindex -b ++@opindex --sh ++@opindex --bourne-shell ++@cindex Bourne shell syntax for color setup ++@cindex @command{sh} syntax for color setup ++Output Bourne shell commands. This is the default if the @env{SHELL} ++environment variable is set and does not end with @samp{csh} or ++@samp{tcsh}. ++ ++@item -c ++@itemx --csh ++@itemx --c-shell ++@opindex -c ++@opindex --csh ++@opindex --c-shell ++@cindex C shell syntax for color setup ++@cindex @command{csh} syntax for color setup ++Output C shell commands. This is the default if @code{SHELL} ends with ++@command{csh} or @command{tcsh}. ++ ++@item -p ++@itemx --print-database ++@opindex -p ++@opindex --print-database ++@cindex color database, printing ++@cindex database for color setup, printing ++@cindex printing color database ++Print the (compiled-in) default color configuration database. This ++output is itself a valid configuration file, and is fairly descriptive ++of the possibilities. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node Basic operations ++@chapter Basic operations ++ ++@cindex manipulating files ++ ++This chapter describes the commands for basic file manipulation: ++copying, moving (renaming), and deleting (removing). ++ ++@menu ++* cp invocation:: Copy files. ++* dd invocation:: Convert and copy a file. ++* install invocation:: Copy files and set attributes. ++* mv invocation:: Move (rename) files. ++* rm invocation:: Remove files or directories. ++* shred invocation:: Remove files more securely. ++@end menu ++ ++ ++@node cp invocation ++@section @command{cp}: Copy files and directories ++ ++@pindex cp ++@cindex copying files and directories ++@cindex files, copying ++@cindex directories, copying ++ ++@command{cp} copies files (or, optionally, directories). The copy is ++completely independent of the original. You can either copy one file to ++another, or copy arbitrarily many files to a destination directory. ++Synopses: ++ ++@example ++cp [@var{option}]@dots{} [-T] @var{source} @var{dest} ++cp [@var{option}]@dots{} @var{source}@dots{} @var{directory} ++cp [@var{option}]@dots{} -t @var{directory} @var{source}@dots{} ++@end example ++ ++@itemize @bullet ++@item ++If two file names are given, @command{cp} copies the first file to the ++second. ++ ++@item ++If the @option{--target-directory} (@option{-t}) option is given, or ++failing that if the last file is a directory and the ++@option{--no-target-directory} (@option{-T}) option is not given, ++@command{cp} copies each @var{source} file to the specified directory, ++using the @var{source}s' names. ++@end itemize ++ ++Generally, files are written just as they are read. For exceptions, ++see the @option{--sparse} option below. ++ ++By default, @command{cp} does not copy directories. However, the ++@option{-R}, @option{-a}, and @option{-r} options cause @command{cp} to ++copy recursively by descending into source directories and copying files ++to corresponding destination directories. ++ ++When copying from a symbolic link, @command{cp} normally follows the ++link only when not copying ++recursively. This default can be overridden with the ++@option{--archive} (@option{-a}), @option{-d}, @option{--dereference} ++(@option{-L}), @option{--no-dereference} (@option{-P}), and ++@option{-H} options. If more than one of these options is specified, ++the last one silently overrides the others. ++ ++When copying to a symbolic link, @command{cp} follows the ++link only when it refers to an existing regular file. ++However, when copying to a dangling symbolic link, @command{cp} ++refuses by default, and fails with a diagnostic, since the operation ++is inherently dangerous. This behavior is contrary to historical ++practice and to @acronym{POSIX}. ++Set @env{POSIXLY_CORRECT} to make @command{cp} attempt to create ++the target of a dangling destination symlink, in spite of the possible risk. ++Also, when an option like ++@option{--backup} or @option{--link} acts to rename or remove the ++destination before copying, @command{cp} renames or removes the ++symbolic link rather than the file it points to. ++ ++By default, @command{cp} copies the contents of special files only ++when not copying recursively. This default can be overridden with the ++@option{--copy-contents} option. ++ ++@cindex self-backups ++@cindex backups, making only ++@command{cp} generally refuses to copy a file onto itself, with the ++following exception: if @option{--force --backup} is specified with ++@var{source} and @var{dest} identical, and referring to a regular file, ++@command{cp} will make a backup file, either regular or numbered, as ++specified in the usual ways (@pxref{Backup options}). This is useful when ++you simply want to make a backup of an existing file before changing it. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++@item -a ++@itemx --archive ++@opindex -a ++@opindex --archive ++Preserve as much as possible of the structure and attributes of the ++original files in the copy (but do not attempt to preserve internal ++directory structure; i.e., @samp{ls -U} may list the entries in a copied ++directory in a different order). ++Try to preserve SELinux security context and extended attributes (xattr), ++but ignore any failure to do that and print no corresponding diagnostic. ++Equivalent to @option{-dR --preserve=all} with the reduced diagnostics. ++ ++@item -b ++@itemx @w{@kbd{--backup}[=@var{method}]} ++@opindex -b ++@opindex --backup ++@vindex VERSION_CONTROL ++@cindex backups, making ++@xref{Backup options}. ++Make a backup of each file that would otherwise be overwritten or removed. ++As a special case, @command{cp} makes a backup of @var{source} when the force ++and backup options are given and @var{source} and @var{dest} are the same ++name for an existing, regular file. One useful application of this ++combination of options is this tiny Bourne shell script: ++ ++@example ++#!/bin/sh ++# Usage: backup FILE... ++# Create a @sc{gnu}-style backup of each listed FILE. ++for i; do ++ cp --backup --force -- "$i" "$i" ++done ++@end example ++ ++@item --copy-contents ++@cindex directories, copying recursively ++@cindex copying directories recursively ++@cindex recursively copying directories ++@cindex non-directories, copying as special files ++If copying recursively, copy the contents of any special files (e.g., ++FIFOs and device files) as if they were regular files. This means ++trying to read the data in each source file and writing it to the ++destination. It is usually a mistake to use this option, as it ++normally has undesirable effects on special files like FIFOs and the ++ones typically found in the @file{/dev} directory. In most cases, ++@code{cp -R --copy-contents} will hang indefinitely trying to read ++from FIFOs and special files like @file{/dev/console}, and it will ++fill up your destination disk if you use it to copy @file{/dev/zero}. ++This option has no effect unless copying recursively, and it does not ++affect the copying of symbolic links. ++ ++@item -d ++@opindex -d ++@cindex symbolic links, copying ++@cindex hard links, preserving ++Copy symbolic links as symbolic links rather than copying the files that ++they point to, and preserve hard links between source files in the copies. ++Equivalent to @option{--no-dereference --preserve=links}. ++ ++@item -f ++@itemx --force ++@opindex -f ++@opindex --force ++When copying without this option and an existing destination file cannot ++be opened for writing, the copy fails. However, with @option{--force}), ++when a destination file cannot be opened, @command{cp} then removes it and ++tries to open it again. Contrast this behavior with that enabled by ++@option{--link} and @option{--symbolic-link}, whereby the destination file ++is never opened but rather is removed unconditionally. Also see the ++description of @option{--remove-destination}. ++ ++This option is independent of the @option{--interactive} or ++@option{-i} option: neither cancels the effect of the other. ++ ++This option is redundant if the @option{--no-clobber} or @option{-n} option is ++used. ++ ++@item -H ++@opindex -H ++If a command line argument specifies a symbolic link, then copy the ++file it points to rather than the symbolic link itself. However, ++copy (preserving its nature) any symbolic link that is encountered ++via recursive traversal. ++ ++@item -i ++@itemx --interactive ++@opindex -i ++@opindex --interactive ++When copying a file other than a directory, prompt whether to ++overwrite an existing destination file. The @option{-i} option overrides ++a previous @option{-n} option. ++ ++@item -l ++@itemx --link ++@opindex -l ++@opindex --link ++Make hard links instead of copies of non-directories. ++ ++@item -L ++@itemx --dereference ++@opindex -L ++@opindex --dereference ++Follow symbolic links when copying from them. ++With this option, @command{cp} cannot create a symbolic link. ++For example, a symlink (to regular file) in the source tree will be copied to ++a regular file in the destination tree. ++ ++@item -n ++@itemx --no-clobber ++@opindex -n ++@opindex --no-clobber ++Do not overwrite an existing file. The @option{-n} option overrides a previous ++@option{-i} option. This option is mutually exclusive with @option{-b} or ++@option{--backup} option. ++ ++@item -P ++@itemx --no-dereference ++@opindex -P ++@opindex --no-dereference ++@cindex symbolic links, copying ++Copy symbolic links as symbolic links rather than copying the files that ++they point to. This option affects only symbolic links in the source; ++symbolic links in the destination are always followed if possible. ++ ++@item -p ++@itemx @w{@kbd{--preserve}[=@var{attribute_list}]} ++@opindex -p ++@opindex --preserve ++@cindex file information, preserving, extended attributes, xattr ++Preserve the specified attributes of the original files. ++If specified, the @var{attribute_list} must be a comma-separated list ++of one or more of the following strings: ++ ++@table @samp ++@itemx mode ++Preserve the file mode bits and access control lists. ++@itemx ownership ++Preserve the owner and group. On most modern systems, ++only users with appropriate privileges may change the owner of a file, ++and ordinary users ++may preserve the group ownership of a file only if they happen to be ++a member of the desired group. ++@itemx timestamps ++Preserve the times of last access and last modification, when possible. ++On older systems, it is not possible to preserve these attributes ++when the affected file is a symbolic link. ++However, many systems now provide the @code{utimensat} function, ++which makes it possible even for symbolic links. ++@itemx links ++Preserve in the destination files ++any links between corresponding source files. ++Note that with @option{-L} or @option{-H}, this option can convert ++symbolic links to hard links. For example, ++@example ++$ mkdir c; : > a; ln -s a b; cp -aH a b c; ls -i1 c ++74161745 a ++74161745 b ++@end example ++@noindent ++Note the inputs: @file{b} is a symlink to regular file @file{a}, ++yet the files in destination directory, @file{c/}, are hard-linked. ++Since @option{-a} implies @option{--preserve=links}, and since @option{-H} ++tells @command{cp} to dereference command line arguments, it sees two files ++with the same inode number, and preserves the perceived hard link. ++ ++Here is a similar example that exercises @command{cp}'s @option{-L} option: ++@smallexample ++$ mkdir b c; (cd b; : > a; ln -s a b); cp -aL b c; ls -i1 c/b ++74163295 a ++74163295 b ++@end smallexample ++ ++@itemx context ++Preserve SELinux security context of the file. @command{cp} will fail ++if the preserving of SELinux security context is not succesful. ++@itemx xattr ++Preserve extended attributes if @command{cp} is built with xattr support, ++and xattrs are supported and enabled on your file system. ++If SELinux context and/or ACLs are implemented using xattrs, ++they are preserved by this option as well. ++@itemx all ++Preserve all file attributes. ++Equivalent to specifying all of the above, but with the difference ++that failure to preserve SELinux security context or extended attributes ++does not change @command{cp}'s exit status. ++@command{cp} does diagnose such failures. ++@end table ++ ++Using @option{--preserve} with no @var{attribute_list} is equivalent ++to @option{--preserve=mode,ownership,timestamps}. ++ ++In the absence of this option, each destination file is created with the ++mode bits of the corresponding source file, minus the bits set in the ++umask and minus the set-user-ID and set-group-ID bits. ++@xref{File permissions}. ++ ++@itemx @w{@kbd{--no-preserve}=@var{attribute_list}} ++@cindex file information, preserving ++Do not preserve the specified attributes. The @var{attribute_list} ++has the same form as for @option{--preserve}. ++ ++@itemx --parents ++@opindex --parents ++@cindex parent directories and @command{cp} ++Form the name of each destination file by appending to the target ++directory a slash and the specified name of the source file. The last ++argument given to @command{cp} must be the name of an existing directory. ++For example, the command: ++ ++@example ++cp --parents a/b/c existing_dir ++@end example ++ ++@noindent ++copies the file @file{a/b/c} to @file{existing_dir/a/b/c}, creating ++any missing intermediate directories. ++ ++@item -R ++@itemx -r ++@itemx --recursive ++@opindex -R ++@opindex -r ++@opindex --recursive ++@cindex directories, copying recursively ++@cindex copying directories recursively ++@cindex recursively copying directories ++@cindex non-directories, copying as special files ++Copy directories recursively. By default, do not follow symbolic ++links in the source; see the @option{--archive} (@option{-a}), @option{-d}, ++@option{--dereference} (@option{-L}), @option{--no-dereference} ++(@option{-P}), and @option{-H} options. Special files are copied by ++creating a destination file of the same type as the source; see the ++@option{--copy-contents} option. It is not portable to use ++@option{-r} to copy symbolic links or special files. On some ++non-@sc{gnu} systems, @option{-r} implies the equivalent of ++@option{-L} and @option{--copy-contents} for historical reasons. ++Also, it is not portable to use @option{-R} to copy symbolic links ++unless you also specify @option{-P}, as @acronym{POSIX} allows ++implementations that dereference symbolic links by default. ++ ++@item --reflink[=@var{when}] ++@opindex --reflink[=@var{when}] ++@cindex COW ++@cindex clone ++@cindex copy on write ++Perform a lightweight, copy-on-write (COW) copy. ++Copying with this option can succeed only on some file systems. ++Once it has succeeded, beware that the source and destination files ++share the same disk data blocks as long as they remain unmodified. ++Thus, if a disk I/O error affects data blocks of one of the files, ++the other suffers the exact same fate. ++ ++The @var{when} value can be one of the following: ++ ++@table @samp ++@item always ++The default behavior: if the copy-on-write operation is not supported ++then report the failure for each file and exit with a failure status. ++ ++@item auto ++If the copy-on-write operation is not supported then fall back ++to the standard copy behaviour. ++@end table ++ ++ ++@item --remove-destination ++@opindex --remove-destination ++Remove each existing destination file before attempting to open it ++(contrast with @option{-f} above). ++ ++@item --sparse=@var{when} ++@opindex --sparse=@var{when} ++@cindex sparse files, copying ++@cindex holes, copying files with ++@findex read @r{system call, and holes} ++A @dfn{sparse file} contains @dfn{holes}---a sequence of zero bytes that ++does not occupy any physical disk blocks; the @samp{read} system call ++reads these as zeros. This can both save considerable disk space and ++increase speed, since many binary files contain lots of consecutive zero ++bytes. By default, @command{cp} detects holes in input source files via a crude ++heuristic and makes the corresponding output file sparse as well. ++Only regular files may be sparse. ++ ++The @var{when} value can be one of the following: ++ ++@table @samp ++@item auto ++The default behavior: if the input file is sparse, attempt to make ++the output file sparse, too. However, if an output file exists but ++refers to a non-regular file, then do not attempt to make it sparse. ++ ++@item always ++For each sufficiently long sequence of zero bytes in the input file, ++attempt to create a corresponding hole in the output file, even if the ++input file does not appear to be sparse. ++This is useful when the input file resides on a file system ++that does not support sparse files ++(for example, @samp{efs} file systems in SGI IRIX 5.3 and earlier), ++but the output file is on a type of file system that does support them. ++Holes may be created only in regular files, so if the destination file ++is of some other type, @command{cp} does not even try to make it sparse. ++ ++@item never ++Never make the output file sparse. ++This is useful in creating a file for use with the @command{mkswap} command, ++since such a file must not have any holes. ++@end table ++ ++@optStripTrailingSlashes ++ ++@item -s ++@itemx --symbolic-link ++@opindex -s ++@opindex --symbolic-link ++@cindex symbolic links, copying with ++Make symbolic links instead of copies of non-directories. All source ++file names must be absolute (starting with @samp{/}) unless the ++destination files are in the current directory. This option merely ++results in an error message on systems that do not support symbolic links. ++ ++@optBackupSuffix ++ ++@optTargetDirectory ++ ++@optNoTargetDirectory ++ ++@item -u ++@itemx --update ++@opindex -u ++@opindex --update ++@cindex newer files, copying only ++Do not copy a non-directory that has an existing destination with the ++same or newer modification time. If time stamps are being preserved, ++the comparison is to the source time stamp truncated to the ++resolutions of the destination file system and of the system calls ++used to update time stamps; this avoids duplicate work if several ++@samp{cp -pu} commands are executed with the same source and ++destination. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Print the name of each file before copying it. ++ ++@item -x ++@itemx --one-file-system ++@opindex -x ++@opindex --one-file-system ++@cindex file systems, omitting copying to different ++Skip subdirectories that are on different file systems from the one that ++the copy started on. ++However, mount point directories @emph{are} copied. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node dd invocation ++@section @command{dd}: Convert and copy a file ++ ++@pindex dd ++@cindex converting while copying a file ++ ++@command{dd} copies a file (from standard input to standard output, by ++default) with a changeable I/O block size, while optionally performing ++conversions on it. Synopses: ++ ++@example ++dd [@var{operand}]@dots{} ++dd @var{option} ++@end example ++ ++The only options are @option{--help} and @option{--version}. ++@xref{Common options}. @command{dd} accepts the following operands. ++ ++@table @samp ++ ++@item if=@var{file} ++@opindex if ++Read from @var{file} instead of standard input. ++ ++@item of=@var{file} ++@opindex of ++Write to @var{file} instead of standard output. Unless ++@samp{conv=notrunc} is given, @command{dd} truncates @var{file} to zero ++bytes (or the size specified with @samp{seek=}). ++ ++@item ibs=@var{bytes} ++@opindex ibs ++@cindex block size of input ++@cindex input block size ++Set the input block size to @var{bytes}. ++This makes @command{dd} read @var{bytes} per block. ++The default is 512 bytes. ++ ++@item obs=@var{bytes} ++@opindex obs ++@cindex block size of output ++@cindex output block size ++Set the output block size to @var{bytes}. ++This makes @command{dd} write @var{bytes} per block. ++The default is 512 bytes. ++ ++@item bs=@var{bytes} ++@opindex bs ++@cindex block size ++Set both input and output block sizes to @var{bytes}. ++This makes @command{dd} read and write @var{bytes} per block, ++overriding any @samp{ibs} and @samp{obs} settings. ++In addition, if no data-transforming @option{conv} option is specified, ++each input block is copied to the output as a single block, ++without aggregating short reads. ++ ++@item cbs=@var{bytes} ++@opindex cbs ++@cindex block size of conversion ++@cindex conversion block size ++@cindex fixed-length records, converting to variable-length ++@cindex variable-length records, converting to fixed-length ++Set the conversion block size to @var{bytes}. ++When converting variable-length records to fixed-length ones ++(@option{conv=block}) or the reverse (@option{conv=unblock}), ++use @var{bytes} as the fixed record length. ++ ++@item skip=@var{blocks} ++@opindex skip ++Skip @var{blocks} @samp{ibs}-byte blocks in the input file before copying. ++ ++@item seek=@var{blocks} ++@opindex seek ++Skip @var{blocks} @samp{obs}-byte blocks in the output file before copying. ++ ++@item count=@var{blocks} ++@opindex count ++Copy @var{blocks} @samp{ibs}-byte blocks from the input file, instead ++of everything until the end of the file. ++ ++@item status=noxfer ++@opindex status ++Do not print the overall transfer rate and volume statistics ++that normally make up the third status line when @command{dd} exits. ++ ++@item conv=@var{conversion}[,@var{conversion}]@dots{} ++@opindex conv ++Convert the file as specified by the @var{conversion} argument(s). ++(No spaces around any comma(s).) ++ ++Conversions: ++ ++@table @samp ++ ++@item ascii ++@opindex ascii@r{, converting to} ++Convert @acronym{EBCDIC} to @acronym{ASCII}, ++using the conversion table specified by @acronym{POSIX}. ++This provides a 1:1 translation for all 256 bytes. ++ ++@item ebcdic ++@opindex ebcdic@r{, converting to} ++Convert @acronym{ASCII} to @acronym{EBCDIC}. ++This is the inverse of the @samp{ascii} conversion. ++ ++@item ibm ++@opindex alternate ebcdic@r{, converting to} ++Convert @acronym{ASCII} to alternate @acronym{EBCDIC}, ++using the alternate conversion table specified by @acronym{POSIX}. ++This is not a 1:1 translation, but reflects common historical practice ++for @samp{~}, @samp{[}, and @samp{]}. ++ ++The @samp{ascii}, @samp{ebcdic}, and @samp{ibm} conversions are ++mutually exclusive. ++ ++@item block ++@opindex block @r{(space-padding)} ++For each line in the input, output @samp{cbs} bytes, replacing the ++input newline with a space and padding with spaces as necessary. ++ ++@item unblock ++@opindex unblock ++Remove any trailing spaces in each @samp{cbs}-sized input block, ++and append a newline. ++ ++The @samp{block} and @samp{unblock} conversions are mutually exclusive. ++ ++@item lcase ++@opindex lcase@r{, converting to} ++Change uppercase letters to lowercase. ++ ++@item ucase ++@opindex ucase@r{, converting to} ++Change lowercase letters to uppercase. ++ ++The @samp{lcase} and @samp{ucase} conversions are mutually exclusive. ++ ++@item swab ++@opindex swab @r{(byte-swapping)} ++@cindex byte-swapping ++Swap every pair of input bytes. @sc{gnu} @command{dd}, unlike others, works ++when an odd number of bytes are read---the last byte is simply copied ++(since there is nothing to swap it with). ++ ++@item noerror ++@opindex noerror ++@cindex read errors, ignoring ++Continue after read errors. ++ ++@item nocreat ++@opindex nocreat ++@cindex creating output file, avoiding ++Do not create the output file; the output file must already exist. ++ ++@item excl ++@opindex excl ++@cindex creating output file, requiring ++Fail if the output file already exists; @command{dd} must create the ++output file itself. ++ ++The @samp{excl} and @samp{nocreat} conversions are mutually exclusive. ++ ++@item notrunc ++@opindex notrunc ++@cindex truncating output file, avoiding ++Do not truncate the output file. ++ ++@item sync ++@opindex sync @r{(padding with @acronym{ASCII} @sc{nul}s)} ++Pad every input block to size of @samp{ibs} with trailing zero bytes. ++When used with @samp{block} or @samp{unblock}, pad with spaces instead of ++zero bytes. ++ ++@item fdatasync ++@opindex fdatasync ++@cindex synchronized data writes, before finishing ++Synchronize output data just before finishing. This forces a physical ++write of output data. ++ ++@item fsync ++@opindex fsync ++@cindex synchronized data and metadata writes, before finishing ++Synchronize output data and metadata just before finishing. This ++forces a physical write of output data and metadata. ++ ++@end table ++ ++@item iflag=@var{flag}[,@var{flag}]@dots{} ++@opindex iflag ++Access the input file using the flags specified by the @var{flag} ++argument(s). (No spaces around any comma(s).) ++ ++@item oflag=@var{flag}[,@var{flag}]@dots{} ++@opindex oflag ++Access the output file using the flags specified by the @var{flag} ++argument(s). (No spaces around any comma(s).) ++ ++Here are the flags. Not every flag is supported on every operating ++system. ++ ++@table @samp ++ ++@item append ++@opindex append ++@cindex appending to the output file ++Write in append mode, so that even if some other process is writing to ++this file, every @command{dd} write will append to the current ++contents of the file. This flag makes sense only for output. ++If you combine this flag with the @samp{of=@var{file}} operand, ++you should also specify @samp{conv=notrunc} unless you want the ++output file to be truncated before being appended to. ++ ++@item cio ++@opindex cio ++@cindex concurrent I/O ++Use concurrent I/O mode for data. This mode performs direct I/O ++and drops the @acronym{POSIX} requirement to serialize all I/O to the same file. ++A file cannot be opened in CIO mode and with a standard open at the ++same time. ++ ++@item direct ++@opindex direct ++@cindex direct I/O ++Use direct I/O for data, avoiding the buffer cache. ++Note that the kernel may impose restrictions on read or write buffer sizes. ++For example, with an ext4 destination file system and a linux-based kernel, ++using @samp{oflag=direct} will cause writes to fail with @code{EINVAL} if the ++output buffer size is not a multiple of 512. ++ ++@item directory ++@opindex directory ++@cindex directory I/O ++ ++Fail unless the file is a directory. Most operating systems do not ++allow I/O to a directory, so this flag has limited utility. ++ ++@item dsync ++@opindex dsync ++@cindex synchronized data reads ++Use synchronized I/O for data. For the output file, this forces a ++physical write of output data on each write. For the input file, ++this flag can matter when reading from a remote file that has been ++written to synchronously by some other process. Metadata (e.g., ++last-access and last-modified time) is not necessarily synchronized. ++ ++@item sync ++@opindex sync ++@cindex synchronized data and metadata I/O ++Use synchronized I/O for both data and metadata. ++ ++@item nonblock ++@opindex nonblock ++@cindex nonblocking I/O ++Use non-blocking I/O. ++ ++@item noatime ++@opindex noatime ++@cindex access time ++Do not update the file's access time. ++Some older file systems silently ignore this flag, so it is a good ++idea to test it on your files before relying on it. ++ ++@item noctty ++@opindex noctty ++@cindex controlling terminal ++Do not assign the file to be a controlling terminal for @command{dd}. ++This has no effect when the file is not a terminal. ++On many hosts (e.g., @acronym{GNU}/Linux hosts), this option has no effect ++at all. ++ ++@item nofollow ++@opindex nofollow ++@cindex symbolic links, following ++Do not follow symbolic links. ++ ++@item nolinks ++@opindex nolinks ++@cindex hard links ++Fail if the file has multiple hard links. ++ ++@item binary ++@opindex binary ++@cindex binary I/O ++Use binary I/O. This option has an effect only on nonstandard ++platforms that distinguish binary from text I/O. ++ ++@item text ++@opindex text ++@cindex text I/O ++Use text I/O. Like @samp{binary}, this option has no effect on ++standard platforms. ++ ++@item fullblock ++@opindex fullblock ++Accumulate full blocks from input. The @code{read} system call ++may return early if a full block is not available. ++When that happens, continue calling @code{read} to fill the remainder ++of the block. ++This flag can be used only with @code{iflag}. ++ ++@end table ++ ++These flags are not supported on all systems, and @samp{dd} rejects ++attempts to use them when they are not supported. When reading from ++standard input or writing to standard output, the @samp{nofollow} and ++@samp{noctty} flags should not be specified, and the other flags ++(e.g., @samp{nonblock}) can affect how other processes behave with the ++affected file descriptors, even after @command{dd} exits. ++ ++@end table ++ ++@cindex multipliers after numbers ++The numeric-valued strings above (@var{bytes} and @var{blocks}) can be ++followed by a multiplier: @samp{b}=512, @samp{c}=1, ++@samp{w}=2, @samp{x@var{m}}=@var{m}, or any of the ++standard block size suffixes like @samp{k}=1024 (@pxref{Block size}). ++ ++Use different @command{dd} invocations to use different block sizes for ++skipping and I/O@. For example, the following shell commands copy data ++in 512 KiB blocks between a disk and a tape, but do not save or restore a ++4 KiB label at the start of the disk: ++ ++@example ++disk=/dev/rdsk/c0t1d0s2 ++tape=/dev/rmt/0 ++ ++# Copy all but the label from disk to tape. ++(dd bs=4k skip=1 count=0 && dd bs=512k) <$disk >$tape ++ ++# Copy from tape back to disk, but leave the disk label alone. ++(dd bs=4k seek=1 count=0 && dd bs=512k) <$tape >$disk ++@end example ++ ++Sending an @samp{INFO} signal to a running @command{dd} ++process makes it print I/O statistics to standard error ++and then resume copying. In the example below, ++@command{dd} is run in the background to copy 10 million blocks. ++The @command{kill} command makes it output intermediate I/O statistics, ++and when @command{dd} completes normally or is killed by the ++@code{SIGINT} signal, it outputs the final statistics. ++ ++@example ++$ dd if=/dev/zero of=/dev/null count=10MB & pid=$! ++$ kill -s INFO $pid; wait $pid ++3385223+0 records in ++3385223+0 records out ++1733234176 bytes (1.7 GB) copied, 6.42173 seconds, 270 MB/s ++10000000+0 records in ++10000000+0 records out ++5120000000 bytes (5.1 GB) copied, 18.913 seconds, 271 MB/s ++@end example ++ ++@vindex POSIXLY_CORRECT ++On systems lacking the @samp{INFO} signal @command{dd} responds to the ++@samp{USR1} signal instead, unless the @env{POSIXLY_CORRECT} ++environment variable is set. ++ ++@exitstatus ++ ++ ++@node install invocation ++@section @command{install}: Copy files and set attributes ++ ++@pindex install ++@cindex copying files and setting attributes ++ ++@command{install} copies files while setting their file mode bits and, if ++possible, their owner and group. Synopses: ++ ++@example ++install [@var{option}]@dots{} [-T] @var{source} @var{dest} ++install [@var{option}]@dots{} @var{source}@dots{} @var{directory} ++install [@var{option}]@dots{} -t @var{directory} @var{source}@dots{} ++install [@var{option}]@dots{} -d @var{directory}@dots{} ++@end example ++ ++@itemize @bullet ++@item ++If two file names are given, @command{install} copies the first file to the ++second. ++ ++@item ++If the @option{--target-directory} (@option{-t}) option is given, or ++failing that if the last file is a directory and the ++@option{--no-target-directory} (@option{-T}) option is not given, ++@command{install} copies each @var{source} file to the specified ++directory, using the @var{source}s' names. ++ ++@item ++If the @option{--directory} (@option{-d}) option is given, ++@command{install} creates each @var{directory} and any missing parent ++directories. Parent directories are created with mode ++@samp{u=rwx,go=rx} (755), regardless of the @option{-m} option or the ++current umask. @xref{Directory Setuid and Setgid}, for how the ++set-user-ID and set-group-ID bits of parent directories are inherited. ++@end itemize ++ ++@cindex Makefiles, installing programs in ++@command{install} is similar to @command{cp}, but allows you to control the ++attributes of destination files. It is typically used in Makefiles to ++copy programs into their destination directories. It refuses to copy ++files onto themselves. ++ ++@cindex extended attributes, xattr ++@command{install} never preserves extended attributes (xattr). ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@optBackup ++ ++@item -C ++@itemx --compare ++@opindex -C ++@opindex --compare ++Compare each pair of source and destination files, and if the destination has ++identical content and any specified owner, group, permissions, and possibly ++SELinux context, then do not modify the destination at all. ++ ++@item -c ++@opindex -c ++Ignored; for compatibility with old Unix versions of @command{install}. ++ ++@item -D ++@opindex -D ++Create any missing parent directories of @var{dest}, ++then copy @var{source} to @var{dest}. ++This option is ignored if a destination directory is specified ++via @option{--target-directory=DIR}. ++ ++@item -d ++@itemx --directory ++@opindex -d ++@opindex --directory ++@cindex directories, creating with given attributes ++@cindex parent directories, creating missing ++@cindex leading directories, creating missing ++Create any missing parent directories, giving them the default ++attributes. Then create each given directory, setting their owner, ++group and mode as given on the command line or to the defaults. ++ ++@item -g @var{group} ++@itemx --group=@var{group} ++@opindex -g ++@opindex --group ++@cindex group ownership of installed files, setting ++Set the group ownership of installed files or directories to ++@var{group}. The default is the process's current group. @var{group} ++may be either a group name or a numeric group ID. ++ ++@item -m @var{mode} ++@itemx --mode=@var{mode} ++@opindex -m ++@opindex --mode ++@cindex permissions of installed files, setting ++Set the file mode bits for the installed file or directory to @var{mode}, ++which can be either an octal number, or a symbolic mode as in ++@command{chmod}, with @samp{a=} (no access allowed to anyone) as the ++point of departure (@pxref{File permissions}). ++The default mode is @samp{u=rwx,go=rx,a-s}---read, write, and ++execute for the owner, read and execute for group and other, and with ++set-user-ID and set-group-ID disabled. ++This default is not quite the same as @samp{755}, since it disables ++instead of preserving set-user-ID and set-group-ID on directories. ++@xref{Directory Setuid and Setgid}. ++ ++@item -o @var{owner} ++@itemx --owner=@var{owner} ++@opindex -o ++@opindex --owner ++@cindex ownership of installed files, setting ++@cindex appropriate privileges ++@vindex root @r{as default owner} ++If @command{install} has appropriate privileges (is run as root), set the ++ownership of installed files or directories to @var{owner}. The default ++is @code{root}. @var{owner} may be either a user name or a numeric user ++ID. ++ ++@item --preserve-context ++@opindex --preserve-context ++@cindex SELinux ++@cindex security context ++Preserve the SELinux security context of files and directories. ++Failure to preserve the context in all of the files or directories ++will result in an exit status of 1. If SELinux is disabled then ++print a warning and ignore the option. ++ ++@item -p ++@itemx --preserve-timestamps ++@opindex -p ++@opindex --preserve-timestamps ++@cindex timestamps of installed files, preserving ++Set the time of last access and the time of last modification of each ++installed file to match those of each corresponding original file. ++When a file is installed without this option, its last access and ++last modification times are both set to the time of installation. ++This option is useful if you want to use the last modification times ++of installed files to keep track of when they were last built as opposed ++to when they were last installed. ++ ++@item -s ++@itemx --strip ++@opindex -s ++@opindex --strip ++@cindex symbol table information, stripping ++@cindex stripping symbol table information ++Strip the symbol tables from installed binary executables. ++ ++@itemx --strip-program=@var{program} ++@opindex --strip-program ++@cindex symbol table information, stripping, program ++Program used to strip binaries. ++ ++@optBackupSuffix ++ ++@optTargetDirectory ++ ++@optNoTargetDirectory ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Print the name of each file before copying it. ++ ++@item -Z @var{context} ++@itemx --context=@var{context} ++@opindex -Z ++@opindex --context ++@cindex SELinux ++@cindex security context ++Set the default SELinux security context to be used for any ++created files and directories. If SELinux is disabled then ++print a warning and ignore the option. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node mv invocation ++@section @command{mv}: Move (rename) files ++ ++@pindex mv ++ ++@command{mv} moves or renames files (or directories). Synopses: ++ ++@example ++mv [@var{option}]@dots{} [-T] @var{source} @var{dest} ++mv [@var{option}]@dots{} @var{source}@dots{} @var{directory} ++mv [@var{option}]@dots{} -t @var{directory} @var{source}@dots{} ++@end example ++ ++@itemize @bullet ++@item ++If two file names are given, @command{mv} moves the first file to the ++second. ++ ++@item ++If the @option{--target-directory} (@option{-t}) option is given, or ++failing that if the last file is a directory and the ++@option{--no-target-directory} (@option{-T}) option is not given, ++@command{mv} moves each @var{source} file to the specified ++directory, using the @var{source}s' names. ++@end itemize ++ ++@command{mv} can move any type of file from one file system to another. ++Prior to version @code{4.0} of the fileutils, ++@command{mv} could move only regular files between file systems. ++For example, now @command{mv} can move an entire directory hierarchy ++including special device files from one partition to another. It first ++uses some of the same code that's used by @code{cp -a} to copy the ++requested directories and files, then (assuming the copy succeeded) ++it removes the originals. If the copy fails, then the part that was ++copied to the destination partition is removed. If you were to copy ++three directories from one partition to another and the copy of the first ++directory succeeded, but the second didn't, the first would be left on ++the destination partition and the second and third would be left on the ++original partition. ++ ++@cindex extended attributes, xattr ++@command{mv} always tries to copy extended attributes (xattr). ++ ++@cindex prompting, and @command{mv} ++If a destination file exists but is normally unwritable, standard input ++is a terminal, and the @option{-f} or @option{--force} option is not given, ++@command{mv} prompts the user for whether to replace the file. (You might ++own the file, or have write permission on its directory.) If the ++response is not affirmative, the file is skipped. ++ ++@emph{Warning}: Avoid specifying a source name with a trailing slash, ++when it might be a symlink to a directory. ++Otherwise, @command{mv} may do something very surprising, since ++its behavior depends on the underlying rename system call. ++On a system with a modern Linux-based kernel, it fails with @code{errno=ENOTDIR}. ++However, on other systems (at least FreeBSD 6.1 and Solaris 10) it silently ++renames not the symlink but rather the directory referenced by the symlink. ++@xref{Trailing slashes}. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@optBackup ++ ++@item -f ++@itemx --force ++@opindex -f ++@opindex --force ++@cindex prompts, omitting ++Do not prompt the user before removing a destination file. ++@macro mvOptsIfn ++If you specify more than one of the @option{-i}, @option{-f}, @option{-n} ++options, only the final one takes effect. ++@end macro ++@mvOptsIfn ++ ++@item -i ++@itemx --interactive ++@opindex -i ++@opindex --interactive ++@cindex prompts, forcing ++Prompt whether to overwrite each existing destination file, regardless ++of its permissions. ++If the response is not affirmative, the file is skipped. ++@mvOptsIfn ++ ++@item -n ++@itemx --no-clobber ++@opindex -n ++@opindex --no-clobber ++@cindex prompts, omitting ++Do not overwrite an existing file. ++@mvOptsIfn ++This option is mutually exclusive with @option{-b} or @option{--backup} option. ++ ++@item -u ++@itemx --update ++@opindex -u ++@opindex --update ++@cindex newer files, moving only ++Do not move a non-directory that has an existing destination with the ++same or newer modification time. ++If the move is across file system boundaries, the comparison is to the ++source time stamp truncated to the resolutions of the destination file ++system and of the system calls used to update time stamps; this avoids ++duplicate work if several @samp{mv -u} commands are executed with the ++same source and destination. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Print the name of each file before moving it. ++ ++@optStripTrailingSlashes ++ ++@optBackupSuffix ++ ++@optTargetDirectory ++ ++@optNoTargetDirectory ++ ++@end table ++ ++@exitstatus ++ ++ ++@node rm invocation ++@section @command{rm}: Remove files or directories ++ ++@pindex rm ++@cindex removing files or directories ++ ++@command{rm} removes each given @var{file}. By default, it does not remove ++directories. Synopsis: ++ ++@example ++rm [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++@cindex prompting, and @command{rm} ++If the @option{-I} or @option{--interactive=once} option is given, ++and there are more than three files or the @option{-r}, @option{-R}, ++or @option{--recursive} are given, then @command{rm} prompts the user ++for whether to proceed with the entire operation. If the response is ++not affirmative, the entire command is aborted. ++ ++Otherwise, if a file is unwritable, standard input is a terminal, and ++the @option{-f} or @option{--force} option is not given, or the ++@option{-i} or @option{--interactive=always} option @emph{is} given, ++@command{rm} prompts the user for whether to remove the file. ++If the response is not affirmative, the file is skipped. ++ ++Any attempt to remove a file whose last file name component is ++@file{.} or @file{..} is rejected without any prompting. ++ ++@emph{Warning}: If you use @command{rm} to remove a file, it is usually ++possible to recover the contents of that file. If you want more assurance ++that the contents are truly unrecoverable, consider using @command{shred}. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -f ++@itemx --force ++@opindex -f ++@opindex --force ++Ignore nonexistent files and never prompt the user. ++Ignore any previous @option{--interactive} (@option{-i}) option. ++ ++@item -i ++@opindex -i ++Prompt whether to remove each file. ++If the response is not affirmative, the file is skipped. ++Ignore any previous @option{--force} (@option{-f}) option. ++Equivalent to @option{--interactive=always}. ++ ++@item -I ++@opindex -I ++Prompt once whether to proceed with the command, if more than three ++files are named or if a recursive removal is requested. Ignore any ++previous @option{--force} (@option{-f}) option. Equivalent to ++@option{--interactive=once}. ++ ++@itemx --interactive [=@var{when}] ++@opindex --interactive ++Specify when to issue an interactive prompt. @var{when} may be ++omitted, or one of: ++@itemize @bullet ++@item never ++@vindex never @r{interactive option} ++- Do not prompt at all. ++@item once ++@vindex once @r{interactive option} ++- Prompt once if more than three files are named or if a recursive ++removal is requested. Equivalent to @option{-I}. ++@item always ++@vindex always @r{interactive option} ++- Prompt for every file being removed. Equivalent to @option{-i}. ++@end itemize ++@option{--interactive} with no @var{when} is equivalent to ++@option{--interactive=always}. ++ ++@itemx --one-file-system ++@opindex --one-file-system ++@cindex one file system, restricting @command{rm} to ++When removing a hierarchy recursively, skip any directory that is on a ++file system different from that of the corresponding command line argument. ++ ++This option is useful when removing a build ``chroot'' hierarchy, ++which normally contains no valuable data. However, it is not uncommon ++to bind-mount @file{/home} into such a hierarchy, to make it easier to ++use one's start-up file. The catch is that it's easy to forget to ++unmount @file{/home}. Then, when you use @command{rm -rf} to remove ++your normally throw-away chroot, that command will remove everything ++under @file{/home}, too. ++Use the @option{--one-file-system} option, and it will ++warn about and skip directories on other file systems. ++Of course, this will not save your @file{/home} if it and your ++chroot happen to be on the same file system. ++ ++@itemx --preserve-root ++@opindex --preserve-root ++@cindex root directory, disallow recursive destruction ++Fail upon any attempt to remove the root directory, @file{/}, ++when used with the @option{--recursive} option. ++This is the default behavior. ++@xref{Treating / specially}. ++ ++@itemx --no-preserve-root ++@opindex --no-preserve-root ++@cindex root directory, allow recursive destruction ++Do not treat @file{/} specially when removing recursively. ++This option is not recommended unless you really want to ++remove all the files on your computer. ++@xref{Treating / specially}. ++ ++@item -r ++@itemx -R ++@itemx --recursive ++@opindex -r ++@opindex -R ++@opindex --recursive ++@cindex directories, removing (recursively) ++Remove the listed directories and their contents recursively. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Print the name of each file before removing it. ++ ++@end table ++ ++@cindex files beginning with @samp{-}, removing ++@cindex @samp{-}, removing files beginning with ++One common question is how to remove files whose names begin with a ++@samp{-}. @sc{gnu} @command{rm}, like every program that uses the @code{getopt} ++function to parse its arguments, lets you use the @samp{--} option to ++indicate that all following arguments are non-options. To remove a file ++called @file{-f} in the current directory, you could type either: ++ ++@example ++rm -- -f ++@end example ++ ++@noindent ++or: ++ ++@example ++rm ./-f ++@end example ++ ++@opindex - @r{and Unix @command{rm}} ++The Unix @command{rm} program's use of a single @samp{-} for this purpose ++predates the development of the getopt standard syntax. ++ ++@exitstatus ++ ++ ++@node shred invocation ++@section @command{shred}: Remove files more securely ++ ++@pindex shred ++@cindex data, erasing ++@cindex erasing data ++ ++@command{shred} overwrites devices or files, to help prevent even ++very expensive hardware from recovering the data. ++ ++Ordinarily when you remove a file (@pxref{rm invocation}), the data is ++not actually destroyed. Only the index listing where the file is ++stored is destroyed, and the storage is made available for reuse. ++There are undelete utilities that will attempt to reconstruct the index ++and can bring the file back if the parts were not reused. ++ ++On a busy system with a nearly-full drive, space can get reused in a few ++seconds. But there is no way to know for sure. If you have sensitive ++data, you may want to be sure that recovery is not possible by actually ++overwriting the file with non-sensitive data. ++ ++However, even after doing that, it is possible to take the disk back ++to a laboratory and use a lot of sensitive (and expensive) equipment ++to look for the faint ``echoes'' of the original data underneath the ++overwritten data. If the data has only been overwritten once, it's not ++even that hard. ++ ++The best way to remove something irretrievably is to destroy the media ++it's on with acid, melt it down, or the like. For cheap removable media ++like floppy disks, this is the preferred method. However, hard drives ++are expensive and hard to melt, so the @command{shred} utility tries ++to achieve a similar effect non-destructively. ++ ++This uses many overwrite passes, with the data patterns chosen to ++maximize the damage they do to the old data. While this will work on ++floppies, the patterns are designed for best effect on hard drives. ++For more details, see the source code and Peter Gutmann's paper ++@uref{http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html, ++@cite{Secure Deletion of Data from Magnetic and Solid-State Memory}}, ++from the proceedings of the Sixth @acronym{USENIX} Security Symposium (San Jose, ++California, July 22--25, 1996). ++ ++@strong{Please note} that @command{shred} relies on a very important assumption: ++that the file system overwrites data in place. This is the traditional ++way to do things, but many modern file system designs do not satisfy this ++assumption. Exceptions include: ++ ++@itemize @bullet ++ ++@item ++Log-structured or journaled file systems, such as those supplied with ++AIX and Solaris, and JFS, ReiserFS, XFS, Ext3 (in @code{data=journal} mode), ++BFS, NTFS, etc.@: when they are configured to journal @emph{data}. ++ ++@item ++File systems that write redundant data and carry on even if some writes ++fail, such as RAID-based file systems. ++ ++@item ++File systems that make snapshots, such as Network Appliance's NFS server. ++ ++@item ++File systems that cache in temporary locations, such as NFS version 3 ++clients. ++ ++@item ++Compressed file systems. ++@end itemize ++ ++In the particular case of ext3 file systems, the above disclaimer applies (and ++@command{shred} is thus of limited effectiveness) only in @code{data=journal} ++mode, which journals file data in addition to just metadata. In both ++the @code{data=ordered} (default) and @code{data=writeback} modes, ++@command{shred} works as usual. Ext3 journaling modes can be changed ++by adding the @code{data=something} option to the mount options for a ++particular file system in the @file{/etc/fstab} file, as documented in ++the mount man page (man mount). ++ ++If you are not sure how your file system operates, then you should assume ++that it does not overwrite data in place, which means that shred cannot ++reliably operate on regular files in your file system. ++ ++Generally speaking, it is more reliable to shred a device than a file, ++since this bypasses the problem of file system design mentioned above. ++However, even shredding devices is not always completely reliable. For ++example, most disks map out bad sectors invisibly to the application; if ++the bad sectors contain sensitive data, @command{shred} won't be able to ++destroy it. ++ ++@command{shred} makes no attempt to detect or report this problem, just as ++it makes no attempt to do anything about backups. However, since it is ++more reliable to shred devices than files, @command{shred} by default does ++not truncate or remove the output file. This default is more suitable ++for devices, which typically cannot be truncated and should not be ++removed. ++ ++Finally, consider the risk of backups and mirrors. ++File system backups and remote mirrors may contain copies of the ++file that cannot be removed, and that will allow a shredded file ++to be recovered later. So if you keep any data you may later want ++to destroy using @command{shred}, be sure that it is not backed up or mirrored. ++ ++@example ++shred [@var{option}]@dots{} @var{file}[@dots{}] ++@end example ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -f ++@itemx --force ++@opindex -f ++@opindex --force ++@cindex force deletion ++Override file permissions if necessary to allow overwriting. ++ ++@item -@var{number} ++@itemx -n @var{number} ++@itemx --iterations=@var{number} ++@opindex -n @var{number} ++@opindex --iterations=@var{number} ++@cindex iterations, selecting the number of ++By default, @command{shred} uses @value{SHRED_DEFAULT_PASSES} passes of ++overwrite. You can reduce this to save time, or increase it if you think it's ++appropriate. After 25 passes all of the internal overwrite patterns will have ++been used at least once. ++ ++@item --random-source=@var{file} ++@opindex --random-source ++@cindex random source for shredding ++Use @var{file} as a source of random data used to overwrite and to ++choose pass ordering. @xref{Random sources}. ++ ++@item -s @var{bytes} ++@itemx --size=@var{bytes} ++@opindex -s @var{bytes} ++@opindex --size=@var{bytes} ++@cindex size of file to shred ++Shred the first @var{bytes} bytes of the file. The default is to shred ++the whole file. @var{bytes} can be followed by a size specification like ++@samp{K}, @samp{M}, or @samp{G} to specify a multiple. @xref{Block size}. ++ ++@item -u ++@itemx --remove ++@opindex -u ++@opindex --remove ++@cindex removing files after shredding ++After shredding a file, truncate it (if possible) and then remove it. ++If a file has multiple links, only the named links will be removed. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Display to standard error all status updates as sterilization proceeds. ++ ++@item -x ++@itemx --exact ++@opindex -x ++@opindex --exact ++By default, @command{shred} rounds the size of a regular file up to the next ++multiple of the file system block size to fully erase the last block of the file. ++Use @option{--exact} to suppress that behavior. ++Thus, by default if you shred a 10-byte regular file on a system with 512-byte ++blocks, the resulting file will be 512 bytes long. With this option, ++shred does not increase the apparent size of the file. ++ ++@item -z ++@itemx --zero ++@opindex -z ++@opindex --zero ++Normally, the last pass that @command{shred} writes is made up of ++random data. If this would be conspicuous on your hard drive (for ++example, because it looks like encrypted data), or you just think ++it's tidier, the @option{--zero} option adds an additional overwrite pass with ++all zero bits. This is in addition to the number of passes specified ++by the @option{--iterations} option. ++ ++@end table ++ ++You might use the following command to erase all trace of the ++file system you'd created on the floppy disk in your first drive. ++That command takes about 20 minutes to erase a ``1.44MB'' (actually ++1440 KiB) floppy. ++ ++@example ++shred --verbose /dev/fd0 ++@end example ++ ++Similarly, to erase all data on a selected partition of ++your hard disk, you could give a command like this: ++ ++@example ++shred --verbose /dev/sda5 ++@end example ++ ++A @var{file} of @samp{-} denotes standard output. ++The intended use of this is to shred a removed temporary file. ++For example: ++ ++@example ++i=`tempfile -m 0600` ++exec 3<>"$i" ++rm -- "$i" ++echo "Hello, world" >&3 ++shred - >&3 ++exec 3>- ++@end example ++ ++However, the command @samp{shred - >file} does not shred the contents ++of @var{file}, since the shell truncates @var{file} before invoking ++@command{shred}. Use the command @samp{shred file} or (if using a ++Bourne-compatible shell) the command @samp{shred - 1<>file} instead. ++ ++@exitstatus ++ ++ ++@node Special file types ++@chapter Special file types ++ ++@cindex special file types ++@cindex file types, special ++ ++This chapter describes commands which create special types of files (and ++@command{rmdir}, which removes directories, one special file type). ++ ++@cindex special file types ++@cindex file types ++Although Unix-like operating systems have markedly fewer special file ++types than others, not @emph{everything} can be treated only as the ++undifferentiated byte stream of @dfn{normal files}. For example, when a ++file is created or removed, the system must record this information, ++which it does in a @dfn{directory}---a special type of file. Although ++you can read directories as normal files, if you're curious, in order ++for the system to do its job it must impose a structure, a certain ++order, on the bytes of the file. Thus it is a ``special'' type of file. ++ ++Besides directories, other special file types include named pipes ++(FIFOs), symbolic links, sockets, and so-called @dfn{special files}. ++ ++@menu ++* link invocation:: Make a hard link via the link syscall ++* ln invocation:: Make links between files. ++* mkdir invocation:: Make directories. ++* mkfifo invocation:: Make FIFOs (named pipes). ++* mknod invocation:: Make block or character special files. ++* readlink invocation:: Print value of a symlink or canonical file name. ++* rmdir invocation:: Remove empty directories. ++* unlink invocation:: Remove files via the unlink syscall ++@end menu ++ ++ ++@node link invocation ++@section @command{link}: Make a hard link via the link syscall ++ ++@pindex link ++@cindex links, creating ++@cindex hard links, creating ++@cindex creating links (hard only) ++ ++@command{link} creates a single hard link at a time. ++It is a minimalist interface to the system-provided ++@code{link} function. @xref{Hard Links, , , libc, ++The GNU C Library Reference Manual}. ++It avoids the bells and whistles of the more commonly-used ++@command{ln} command (@pxref{ln invocation}). ++Synopsis: ++ ++@example ++link @var{filename} @var{linkname} ++@end example ++ ++@var{filename} must specify an existing file, and @var{linkname} ++must specify a nonexistent entry in an existing directory. ++@command{link} simply calls @code{link (@var{filename}, @var{linkname})} ++to create the link. ++ ++On a @acronym{GNU} system, this command acts like @samp{ln --directory ++--no-target-directory @var{filename} @var{linkname}}. However, the ++@option{--directory} and @option{--no-target-directory} options are ++not specified by @acronym{POSIX}, and the @command{link} command is ++more portable in practice. ++ ++If @var{filename} is a symbolic link, it is unspecified whether ++@var{linkname} will be a hard link to the symbolic link or to the ++target of the symbolic link. Use @command{ln -P} or @command{ln -L} ++to specify which behavior is desired. ++ ++@exitstatus ++ ++ ++@node ln invocation ++@section @command{ln}: Make links between files ++ ++@pindex ln ++@cindex links, creating ++@cindex hard links, creating ++@cindex symbolic (soft) links, creating ++@cindex creating links (hard or soft) ++ ++@cindex file systems and hard links ++@command{ln} makes links between files. By default, it makes hard links; ++with the @option{-s} option, it makes symbolic (or @dfn{soft}) links. ++Synopses: ++ ++@example ++ln [@var{option}]@dots{} [-T] @var{target} @var{linkname} ++ln [@var{option}]@dots{} @var{target} ++ln [@var{option}]@dots{} @var{target}@dots{} @var{directory} ++ln [@var{option}]@dots{} -t @var{directory} @var{target}@dots{} ++@end example ++ ++@itemize @bullet ++ ++@item ++If two file names are given, @command{ln} creates a link to the first ++file from the second. ++ ++@item ++If one @var{target} is given, @command{ln} creates a link to that file ++in the current directory. ++ ++@item ++If the @option{--target-directory} (@option{-t}) option is given, or ++failing that if the last file is a directory and the ++@option{--no-target-directory} (@option{-T}) option is not given, ++@command{ln} creates a link to each @var{target} file in the specified ++directory, using the @var{target}s' names. ++ ++@end itemize ++ ++Normally @command{ln} does not remove existing files. Use the ++@option{--force} (@option{-f}) option to remove them unconditionally, ++the @option{--interactive} (@option{-i}) option to remove them ++conditionally, and the @option{--backup} (@option{-b}) option to ++rename them. ++ ++@cindex hard link, defined ++@cindex inode, and hard links ++A @dfn{hard link} is another name for an existing file; the link and the ++original are indistinguishable. Technically speaking, they share the ++same inode, and the inode contains all the information about a ++file---indeed, it is not incorrect to say that the inode @emph{is} the ++file. Most systems prohibit making a hard link to ++a directory; on those where it is allowed, only the super-user can do ++so (and with caution, since creating a cycle will cause problems to many ++other utilities). Hard links cannot cross file system boundaries. (These ++restrictions are not mandated by @acronym{POSIX}, however.) ++ ++@cindex dereferencing symbolic links ++@cindex symbolic link, defined ++@dfn{Symbolic links} (@dfn{symlinks} for short), on the other hand, are ++a special file type (which not all kernels support: System V release 3 ++(and older) systems lack symlinks) in which the link file actually ++refers to a different file, by name. When most operations (opening, ++reading, writing, and so on) are passed the symbolic link file, the ++kernel automatically @dfn{dereferences} the link and operates on the ++target of the link. But some operations (e.g., removing) work on the ++link file itself, rather than on its target. The owner and group of a ++symlink are not significant to file access performed through ++the link, but do have implications on deleting a symbolic link from a ++directory with the restricted deletion bit set. On the GNU system, ++the mode of a symlink has no significance and cannot be changed, but ++on some BSD systems, the mode can be changed and will affect whether ++the symlink will be traversed in file name resolution. @xref{Symbolic Links,,, ++libc, The GNU C Library Reference Manual}. ++ ++Symbolic links can contain arbitrary strings; a @dfn{dangling symlink} ++occurs when the string in the symlink does not resolve to a file. ++There are no restrictions against creating dangling symbolic links. ++There are trade-offs to using absolute or relative symlinks. An ++absolute symlink always points to the same file, even if the directory ++containing the link is moved. However, if the symlink is visible from ++more than one machine (such as on a networked file system), the file ++pointed to might not always be the same. A relative symbolic link is ++resolved in relation to the directory that contains the link, and is ++often useful in referring to files on the same device without regards ++to what name that device is mounted on when accessed via networked ++machines. ++ ++When creating a relative symlink in a different location than the ++current directory, the resolution of the symlink will be different ++than the resolution of the same string from the current directory. ++Therefore, many users prefer to first change directories to the ++location where the relative symlink will be created, so that ++tab-completion or other file resolution will find the same target as ++what will be placed in the symlink. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@optBackup ++ ++@item -d ++@itemx -F ++@itemx --directory ++@opindex -d ++@opindex -F ++@opindex --directory ++@cindex hard links to directories ++Allow users with appropriate privileges to attempt to make hard links ++to directories. ++However, note that this will probably fail due to ++system restrictions, even for the super-user. ++ ++@item -f ++@itemx --force ++@opindex -f ++@opindex --force ++Remove existing destination files. ++ ++@item -i ++@itemx --interactive ++@opindex -i ++@opindex --interactive ++@cindex prompting, and @command{ln} ++Prompt whether to remove existing destination files. ++ ++@item -L ++@itemx --logical ++@opindex -L ++@opindex --logical ++If @option{-s} is not in effect, and the source file is a symbolic ++link, create the hard link to the file referred to by the symbolic ++link, rather than the symbolic link itself. ++ ++@item -n ++@itemx --no-dereference ++@opindex -n ++@opindex --no-dereference ++Do not treat the last operand specially when it is a symbolic link to ++a directory. Instead, treat it as if it were a normal file. ++ ++When the destination is an actual directory (not a symlink to one), ++there is no ambiguity. The link is created in that directory. ++But when the specified destination is a symlink to a directory, ++there are two ways to treat the user's request. @command{ln} can ++treat the destination just as it would a normal directory and create ++the link in it. On the other hand, the destination can be viewed as a ++non-directory---as the symlink itself. In that case, @command{ln} ++must delete or backup that symlink before creating the new link. ++The default is to treat a destination that is a symlink to a directory ++just like a directory. ++ ++This option is weaker than the @option{--no-target-directory} ++(@option{-T}) option, so it has no effect if both options are given. ++ ++@item -P ++@itemx --physical ++@opindex -P ++@opindex --physical ++If @option{-s} is not in effect, and the source file is a symbolic ++link, create the hard link to the symbolic link itself. On platforms ++where this is not supported by the kernel, this option creates a ++symbolic link with identical contents; since symbolic link contents ++cannot be edited, any file name resolution performed through either ++link will be the same as if a hard link had been created. ++ ++@item -s ++@itemx --symbolic ++@opindex -s ++@opindex --symbolic ++Make symbolic links instead of hard links. This option merely produces ++an error message on systems that do not support symbolic links. ++ ++@optBackupSuffix ++ ++@optTargetDirectory ++ ++@optNoTargetDirectory ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Print the name of each file after linking it successfully. ++ ++@end table ++ ++@cindex hard links to symbolic links ++@cindex symbolic links and @command{ln} ++If @option{-L} and @option{-P} are both given, the last one takes ++precedence. If @option{-s} is also given, @option{-L} and @option{-P} ++are silently ignored. If neither option is given, then this ++implementation defaults to @option{-P} if the system @code{link} supports ++hard links to symbolic links (such as the GNU system), and @option{-L} ++if @code{link} follows symbolic links (such as on BSD). ++ ++@exitstatus ++ ++Examples: ++ ++@smallexample ++Bad Example: ++ ++# Create link ../a pointing to a in that directory. ++# Not really useful because it points to itself. ++ln -s a .. ++ ++Better Example: ++ ++# Change to the target before creating symlinks to avoid being confused. ++cd .. ++ln -s adir/a . ++ ++Bad Example: ++ ++# Hard coded file names don't move well. ++ln -s $(pwd)/a /some/dir/ ++ ++Better Example: ++ ++# Relative file names survive directory moves and also ++# work across networked file systems. ++ln -s afile anotherfile ++ln -s ../adir/afile yetanotherfile ++@end smallexample ++ ++ ++@node mkdir invocation ++@section @command{mkdir}: Make directories ++ ++@pindex mkdir ++@cindex directories, creating ++@cindex creating directories ++ ++@command{mkdir} creates directories with the specified names. Synopsis: ++ ++@example ++mkdir [@var{option}]@dots{} @var{name}@dots{} ++@end example ++ ++@command{mkdir} creates each directory @var{name} in the order given. ++It reports an error if @var{name} already exists, unless the ++@option{-p} option is given and @var{name} is a directory. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -m @var{mode} ++@itemx --mode=@var{mode} ++@opindex -m ++@opindex --mode ++@cindex modes of created directories, setting ++Set the file permission bits of created directories to @var{mode}, ++which uses the same syntax as ++in @command{chmod} and uses @samp{a=rwx} (read, write and execute allowed for ++everyone) for the point of the departure. @xref{File permissions}. ++ ++Normally the directory has the desired file mode bits at the moment it ++is created. As a @acronym{GNU} extension, @var{mode} may also mention ++special mode bits, but in this case there may be a temporary window ++during which the directory exists but its special mode bits are ++incorrect. @xref{Directory Setuid and Setgid}, for how the ++set-user-ID and set-group-ID bits of directories are inherited unless ++overridden in this way. ++ ++@item -p ++@itemx --parents ++@opindex -p ++@opindex --parents ++@cindex parent directories, creating ++Make any missing parent directories for each argument, setting their ++file permission bits to the umask modified by @samp{u+wx}. Ignore ++existing parent directories, and do not change their file permission ++bits. ++ ++To set the file permission bits of any newly-created parent ++directories to a value that includes @samp{u+wx}, you can set the ++umask before invoking @command{mkdir}. For example, if the shell ++command @samp{(umask u=rwx,go=rx; mkdir -p P/Q)} creates the parent ++@file{P} it sets the parent's permission bits to @samp{u=rwx,go=rx}. ++To set a parent's special mode bits as well, you can invoke ++@command{chmod} after @command{mkdir}. @xref{Directory Setuid and ++Setgid}, for how the set-user-ID and set-group-ID bits of ++newly-created parent directories are inherited. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Print a message for each created directory. This is most useful with ++@option{--parents}. ++ ++@item -Z @var{context} ++@itemx --context=@var{context} ++@opindex -Z ++@opindex --context ++@cindex SELinux ++@cindex security context ++Set the default SELinux security context to be used for created directories. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node mkfifo invocation ++@section @command{mkfifo}: Make FIFOs (named pipes) ++ ++@pindex mkfifo ++@cindex FIFOs, creating ++@cindex named pipes, creating ++@cindex creating FIFOs (named pipes) ++ ++@command{mkfifo} creates FIFOs (also called @dfn{named pipes}) with the ++specified names. Synopsis: ++ ++@example ++mkfifo [@var{option}] @var{name}@dots{} ++@end example ++ ++A @dfn{FIFO} is a special file type that permits independent processes ++to communicate. One process opens the FIFO file for writing, and ++another for reading, after which data can flow as with the usual ++anonymous pipe in shells or elsewhere. ++ ++The program accepts the following option. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -m @var{mode} ++@itemx --mode=@var{mode} ++@opindex -m ++@opindex --mode ++@cindex modes of created FIFOs, setting ++Set the mode of created FIFOs to @var{mode}, which is symbolic as in ++@command{chmod} and uses @samp{a=rw} (read and write allowed for everyone) ++for the point of departure. @var{mode} should specify only file ++permission bits. @xref{File permissions}. ++ ++@item -Z @var{context} ++@itemx --context=@var{context} ++@opindex -Z ++@opindex --context ++@cindex SELinux ++@cindex security context ++Set the default SELinux security context to be used for created FIFOs. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node mknod invocation ++@section @command{mknod}: Make block or character special files ++ ++@pindex mknod ++@cindex block special files, creating ++@cindex character special files, creating ++ ++@command{mknod} creates a FIFO, character special file, or block special ++file with the specified name. Synopsis: ++ ++@example ++mknod [@var{option}]@dots{} @var{name} @var{type} [@var{major} @var{minor}] ++@end example ++ ++@cindex special files ++@cindex block special files ++@cindex character special files ++Unlike the phrase ``special file type'' above, the term @dfn{special ++file} has a technical meaning on Unix: something that can generate or ++receive data. Usually this corresponds to a physical piece of hardware, ++e.g., a printer or a disk. (These files are typically created at ++system-configuration time.) The @command{mknod} command is what creates ++files of this type. Such devices can be read either a character at a ++time or a ``block'' (many characters) at a time, hence we say there are ++@dfn{block special} files and @dfn{character special} files. ++ ++@c mknod is a shell built-in at least with OpenBSD's /bin/sh ++@mayConflictWithShellBuiltIn{mknod} ++ ++The arguments after @var{name} specify the type of file to make: ++ ++@table @samp ++ ++@item p ++@opindex p @r{for FIFO file} ++for a FIFO ++ ++@item b ++@opindex b @r{for block special file} ++for a block special file ++ ++@item c ++@c Don't document the `u' option -- it's just a synonym for `c'. ++@c Do *any* versions of mknod still use it? ++@c @itemx u ++@opindex c @r{for character special file} ++@c @opindex u @r{for character special file} ++for a character special file ++ ++@end table ++ ++When making a block or character special file, the major and minor ++device numbers must be given after the file type. ++If a major or minor device number begins with @samp{0x} or @samp{0X}, ++it is interpreted as hexadecimal; otherwise, if it begins with @samp{0}, ++as octal; otherwise, as decimal. ++ ++The program accepts the following option. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -m @var{mode} ++@itemx --mode=@var{mode} ++@opindex -m ++@opindex --mode ++Set the mode of created files to @var{mode}, which is symbolic as in ++@command{chmod} and uses @samp{a=rw} as the point of departure. ++@var{mode} should specify only file permission bits. ++@xref{File permissions}. ++ ++@item -Z @var{context} ++@itemx --context=@var{context} ++@opindex -Z ++@opindex --context ++@cindex SELinux ++@cindex security context ++Set the default SELinux security context to be used for created files. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node readlink invocation ++@section @command{readlink}: Print value of a symlink or canonical file name ++ ++@pindex readlink ++@cindex displaying value of a symbolic link ++@cindex canonical file name ++@cindex canonicalize a file name ++@pindex realpath ++@findex realpath ++ ++@command{readlink} may work in one of two supported modes: ++ ++@table @samp ++ ++@item Readlink mode ++ ++@command{readlink} outputs the value of the given symbolic link. ++If @command{readlink} is invoked with an argument other than the name ++of a symbolic link, it produces no output and exits with a nonzero exit code. ++ ++@item Canonicalize mode ++ ++@command{readlink} outputs the absolute name of the given file which contains ++no @file{.}, @file{..} components nor any repeated separators ++(@file{/}) or symbolic links. ++ ++@end table ++ ++@example ++readlink [@var{option}] @var{file} ++@end example ++ ++By default, @command{readlink} operates in readlink mode. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -f ++@itemx --canonicalize ++@opindex -f ++@opindex --canonicalize ++Activate canonicalize mode. ++If any component of the file name except the last one is missing or unavailable, ++@command{readlink} produces no output and exits with a nonzero exit ++code. A trailing slash is ignored. ++ ++@item -e ++@itemx --canonicalize-existing ++@opindex -e ++@opindex --canonicalize-existing ++Activate canonicalize mode. ++If any component is missing or unavailable, @command{readlink} produces ++no output and exits with a nonzero exit code. A trailing slash ++requires that the name resolve to a directory. ++ ++@item -m ++@itemx --canonicalize-missing ++@opindex -m ++@opindex --canonicalize-missing ++Activate canonicalize mode. ++If any component is missing or unavailable, @command{readlink} treats it ++as a directory. ++ ++@item -n ++@itemx --no-newline ++@opindex -n ++@opindex --no-newline ++Do not output the trailing newline. ++ ++@item -s ++@itemx -q ++@itemx --silent ++@itemx --quiet ++@opindex -s ++@opindex -q ++@opindex --silent ++@opindex --quiet ++Suppress most error messages. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Report error messages. ++ ++@end table ++ ++The @command{readlink} utility first appeared in OpenBSD 2.1. ++ ++There is a @command{realpath} command on some systems ++which operates like @command{readlink} in canonicalize mode. ++ ++@exitstatus ++ ++ ++@node rmdir invocation ++@section @command{rmdir}: Remove empty directories ++ ++@pindex rmdir ++@cindex removing empty directories ++@cindex directories, removing empty ++ ++@command{rmdir} removes empty directories. Synopsis: ++ ++@example ++rmdir [@var{option}]@dots{} @var{directory}@dots{} ++@end example ++ ++If any @var{directory} argument does not refer to an existing empty ++directory, it is an error. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item --ignore-fail-on-non-empty ++@opindex --ignore-fail-on-non-empty ++@cindex directory deletion, ignoring failures ++Ignore each failure to remove a directory that is solely because ++the directory is non-empty. ++ ++@item -p ++@itemx --parents ++@opindex -p ++@opindex --parents ++@cindex parent directories, removing ++Remove @var{directory}, then try to remove each component of @var{directory}. ++So, for example, @samp{rmdir -p a/b/c} is similar to @samp{rmdir a/b/c a/b a}. ++As such, it fails if any of those directories turns out not to be empty. ++Use the @option{--ignore-fail-on-non-empty} option to make it so such ++a failure does not evoke a diagnostic and does not cause @command{rmdir} to ++exit unsuccessfully. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++@cindex directory deletion, reporting ++Give a diagnostic for each successful removal. ++@var{directory} is removed. ++ ++@end table ++ ++@xref{rm invocation}, for how to remove non-empty directories (recursively). ++ ++@exitstatus ++ ++ ++@node unlink invocation ++@section @command{unlink}: Remove files via the unlink syscall ++ ++@pindex unlink ++@cindex removing files or directories (via the unlink syscall) ++ ++@command{unlink} deletes a single specified file name. ++It is a minimalist interface to the system-provided ++@code{unlink} function. @xref{Deleting Files, , , libc, ++The GNU C Library Reference Manual}. Synopsis: ++It avoids the bells and whistles of the more commonly-used ++@command{rm} command (@pxref{rm invocation}). ++ ++@example ++unlink @var{filename} ++@end example ++ ++On some systems @code{unlink} can be used to delete the name of a ++directory. On others, it can be used that way only by a privileged user. ++In the GNU system @code{unlink} can never delete the name of a directory. ++ ++The @command{unlink} command honors the @option{--help} and ++@option{--version} options. To remove a file whose name begins with ++@samp{-}, prefix the name with @samp{./}, e.g., @samp{unlink ./--help}. ++ ++@exitstatus ++ ++ ++@node Changing file attributes ++@chapter Changing file attributes ++ ++@cindex changing file attributes ++@cindex file attributes, changing ++@cindex attributes, file ++ ++A file is not merely its contents, a name, and a file type ++(@pxref{Special file types}). A file also has an owner (a user ID), a ++group (a group ID), permissions (what the owner can do with the file, ++what people in the group can do, and what everyone else can do), various ++timestamps, and other information. Collectively, we call these a file's ++@dfn{attributes}. ++ ++These commands change file attributes. ++ ++@menu ++* chgrp invocation:: Change file groups. ++* chmod invocation:: Change access permissions. ++* chown invocation:: Change file owners and groups. ++* touch invocation:: Change file timestamps. ++@end menu ++ ++ ++@node chown invocation ++@section @command{chown}: Change file owner and group ++ ++@pindex chown ++@cindex file ownership, changing ++@cindex group ownership, changing ++@cindex changing file ownership ++@cindex changing group ownership ++ ++@command{chown} changes the user and/or group ownership of each given @var{file} ++to @var{new-owner} or to the user and group of an existing reference file. ++Synopsis: ++ ++@example ++chown [@var{option}]@dots{} @{@var{new-owner} | --reference=@var{ref_file}@} @var{file}@dots{} ++@end example ++ ++If used, @var{new-owner} specifies the new owner and/or group as follows ++(with no embedded white space): ++ ++@example ++[@var{owner}] [ : [@var{group}] ] ++@end example ++ ++Specifically: ++ ++@table @var ++@item owner ++If only an @var{owner} (a user name or numeric user ID) is given, that ++user is made the owner of each given file, and the files' group is not ++changed. ++ ++@item owner@samp{:}group ++If the @var{owner} is followed by a colon and a @var{group} (a ++group name or numeric group ID), with no spaces between them, the group ++ownership of the files is changed as well (to @var{group}). ++ ++@item owner@samp{:} ++If a colon but no group name follows @var{owner}, that user is ++made the owner of the files and the group of the files is changed to ++@var{owner}'s login group. ++ ++@item @samp{:}group ++If the colon and following @var{group} are given, but the owner ++is omitted, only the group of the files is changed; in this case, ++@command{chown} performs the same function as @command{chgrp}. ++ ++@item @samp{:} ++If only a colon is given, or if @var{new-owner} is empty, neither the ++owner nor the group is changed. ++ ++@end table ++ ++If @var{owner} or @var{group} is intended to represent a numeric user ++or group ID, then you may specify it with a leading @samp{+}. ++@xref{Disambiguating names and IDs}. ++ ++Some older scripts may still use @samp{.} in place of the @samp{:} separator. ++@acronym{POSIX} 1003.1-2001 (@pxref{Standards conformance}) does not ++require support for that, but for backward compatibility @acronym{GNU} ++@command{chown} supports @samp{.} so long as no ambiguity results. ++New scripts should avoid the use of @samp{.} because it is not ++portable, and because it has undesirable results if the entire ++@var{owner@samp{.}group} happens to identify a user whose name ++contains @samp{.}. ++ ++The @command{chown} command sometimes clears the set-user-ID or ++set-group-ID permission bits. This behavior depends on the policy and ++functionality of the underlying @code{chown} system call, which may ++make system-dependent file mode modifications outside the control of ++the @command{chown} command. For example, the @command{chown} command ++might not affect those bits when invoked by a user with appropriate ++privileges, or when the ++bits signify some function other than executable permission (e.g., ++mandatory locking). ++When in doubt, check the underlying system behavior. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c ++@itemx --changes ++@opindex -c ++@opindex --changes ++@cindex changed owners, verbosely describing ++Verbosely describe the action for each @var{file} whose ownership ++actually changes. ++ ++@item -f ++@itemx --silent ++@itemx --quiet ++@opindex -f ++@opindex --silent ++@opindex --quiet ++@cindex error messages, omitting ++Do not print error messages about files whose ownership cannot be ++changed. ++ ++@itemx @w{@kbd{--from}=@var{old-owner}} ++@opindex --from ++@cindex symbolic links, changing owner ++Change a @var{file}'s ownership only if it has current attributes specified ++by @var{old-owner}. @var{old-owner} has the same form as @var{new-owner} ++described above. ++This option is useful primarily from a security standpoint in that ++it narrows considerably the window of potential abuse. ++For example, to reflect a user ID numbering change for one user's files ++without an option like this, @code{root} might run ++ ++@smallexample ++find / -owner OLDUSER -print0 | xargs -0 chown -h NEWUSER ++@end smallexample ++ ++But that is dangerous because the interval between when the @command{find} ++tests the existing file's owner and when the @command{chown} is actually run ++may be quite large. ++One way to narrow the gap would be to invoke chown for each file ++as it is found: ++ ++@example ++find / -owner OLDUSER -exec chown -h NEWUSER @{@} \; ++@end example ++ ++But that is very slow if there are many affected files. ++With this option, it is safer (the gap is narrower still) ++though still not perfect: ++ ++@example ++chown -h -R --from=OLDUSER NEWUSER / ++@end example ++ ++@item --dereference ++@opindex --dereference ++@cindex symbolic links, changing owner ++@findex lchown ++Do not act on symbolic links themselves but rather on what they point to. ++This is the default. ++ ++@item -h ++@itemx --no-dereference ++@opindex -h ++@opindex --no-dereference ++@cindex symbolic links, changing owner ++@findex lchown ++Act on symbolic links themselves instead of what they point to. ++This mode relies on the @code{lchown} system call. ++On systems that do not provide the @code{lchown} system call, ++@command{chown} fails when a file specified on the command line ++is a symbolic link. ++By default, no diagnostic is issued for symbolic links encountered ++during a recursive traversal, but see @option{--verbose}. ++ ++@itemx --preserve-root ++@opindex --preserve-root ++@cindex root directory, disallow recursive modification ++Fail upon any attempt to recursively change the root directory, @file{/}. ++Without @option{--recursive}, this option has no effect. ++@xref{Treating / specially}. ++ ++@itemx --no-preserve-root ++@opindex --no-preserve-root ++@cindex root directory, allow recursive modification ++Cancel the effect of any preceding @option{--preserve-root} option. ++@xref{Treating / specially}. ++ ++@item --reference=@var{ref_file} ++@opindex --reference ++Change the user and group of each @var{file} to be the same as those of ++@var{ref_file}. If @var{ref_file} is a symbolic link, do not use the ++user and group of the symbolic link, but rather those of the file it ++refers to. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Output a diagnostic for every file processed. ++If a symbolic link is encountered during a recursive traversal ++on a system without the @code{lchown} system call, and @option{--no-dereference} ++is in effect, then issue a diagnostic saying neither the symbolic link nor ++its referent is being changed. ++ ++@item -R ++@itemx --recursive ++@opindex -R ++@opindex --recursive ++@cindex recursively changing file ownership ++Recursively change ownership of directories and their contents. ++ ++@choptH ++@xref{Traversing symlinks}. ++ ++@choptL ++@xref{Traversing symlinks}. ++ ++@choptP ++@xref{Traversing symlinks}. ++ ++@end table ++ ++@exitstatus ++ ++Examples: ++ ++@smallexample ++# Change the owner of /u to "root". ++chown root /u ++ ++# Likewise, but also change its group to "staff". ++chown root:staff /u ++ ++# Change the owner of /u and subfiles to "root". ++chown -hR root /u ++@end smallexample ++ ++ ++@node chgrp invocation ++@section @command{chgrp}: Change group ownership ++ ++@pindex chgrp ++@cindex group ownership, changing ++@cindex changing group ownership ++ ++@command{chgrp} changes the group ownership of each given @var{file} ++to @var{group} (which can be either a group name or a numeric group ID) ++or to the group of an existing reference file. Synopsis: ++ ++@example ++chgrp [@var{option}]@dots{} @{@var{group} | --reference=@var{ref_file}@} @var{file}@dots{} ++@end example ++ ++If @var{group} is intended to represent a ++numeric group ID, then you may specify it with a leading @samp{+}. ++@xref{Disambiguating names and IDs}. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c ++@itemx --changes ++@opindex -c ++@opindex --changes ++@cindex changed files, verbosely describing ++Verbosely describe the action for each @var{file} whose group actually ++changes. ++ ++@item -f ++@itemx --silent ++@itemx --quiet ++@opindex -f ++@opindex --silent ++@opindex --quiet ++@cindex error messages, omitting ++Do not print error messages about files whose group cannot be ++changed. ++ ++@item --dereference ++@opindex --dereference ++@cindex symbolic links, changing owner ++@findex lchown ++Do not act on symbolic links themselves but rather on what they point to. ++This is the default. ++ ++@item -h ++@itemx --no-dereference ++@opindex -h ++@opindex --no-dereference ++@cindex symbolic links, changing group ++@findex lchown ++Act on symbolic links themselves instead of what they point to. ++This mode relies on the @code{lchown} system call. ++On systems that do not provide the @code{lchown} system call, ++@command{chgrp} fails when a file specified on the command line ++is a symbolic link. ++By default, no diagnostic is issued for symbolic links encountered ++during a recursive traversal, but see @option{--verbose}. ++ ++@itemx --preserve-root ++@opindex --preserve-root ++@cindex root directory, disallow recursive modification ++Fail upon any attempt to recursively change the root directory, @file{/}. ++Without @option{--recursive}, this option has no effect. ++@xref{Treating / specially}. ++ ++@itemx --no-preserve-root ++@opindex --no-preserve-root ++@cindex root directory, allow recursive modification ++Cancel the effect of any preceding @option{--preserve-root} option. ++@xref{Treating / specially}. ++ ++@item --reference=@var{ref_file} ++@opindex --reference ++Change the group of each @var{file} to be the same as that of ++@var{ref_file}. If @var{ref_file} is a symbolic link, do not use the ++group of the symbolic link, but rather that of the file it refers to. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Output a diagnostic for every file processed. ++If a symbolic link is encountered during a recursive traversal ++on a system without the @code{lchown} system call, and @option{--no-dereference} ++is in effect, then issue a diagnostic saying neither the symbolic link nor ++its referent is being changed. ++ ++@item -R ++@itemx --recursive ++@opindex -R ++@opindex --recursive ++@cindex recursively changing group ownership ++Recursively change the group ownership of directories and their contents. ++ ++@choptH ++@xref{Traversing symlinks}. ++ ++@choptL ++@xref{Traversing symlinks}. ++ ++@choptP ++@xref{Traversing symlinks}. ++ ++@end table ++ ++@exitstatus ++ ++Examples: ++ ++@smallexample ++# Change the group of /u to "staff". ++chgrp staff /u ++ ++# Change the group of /u and subfiles to "staff". ++chgrp -hR staff /u ++@end smallexample ++ ++ ++@node chmod invocation ++@section @command{chmod}: Change access permissions ++ ++@pindex chmod ++@cindex changing access permissions ++@cindex access permissions, changing ++@cindex permissions, changing access ++ ++@command{chmod} changes the access permissions of the named files. Synopsis: ++ ++@example ++chmod [@var{option}]@dots{} @{@var{mode} | --reference=@var{ref_file}@} @var{file}@dots{} ++@end example ++ ++@cindex symbolic links, permissions of ++@command{chmod} never changes the permissions of symbolic links, since ++the @command{chmod} system call cannot change their permissions. ++This is not a problem since the permissions of symbolic links are ++never used. However, for each symbolic link listed on the command ++line, @command{chmod} changes the permissions of the pointed-to file. ++In contrast, @command{chmod} ignores symbolic links encountered during ++recursive directory traversals. ++ ++A successful use of @command{chmod} clears the set-group-ID bit of a ++regular file if the file's group ID does not match the user's ++effective group ID or one of the user's supplementary group IDs, ++unless the user has appropriate privileges. Additional restrictions ++may cause the set-user-ID and set-group-ID bits of @var{mode} or ++@var{ref_file} to be ignored. This behavior depends on the policy and ++functionality of the underlying @code{chmod} system call. When in ++doubt, check the underlying system behavior. ++ ++If used, @var{mode} specifies the new file mode bits. ++For details, see the section on @ref{File permissions}. ++If you really want @var{mode} to have a leading @samp{-}, you should ++use @option{--} first, e.g., @samp{chmod -- -w file}. Typically, ++though, @samp{chmod a-w file} is preferable, and @command{chmod -w ++file} (without the @option{--}) complains if it behaves differently ++from what @samp{chmod a-w file} would do. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c ++@itemx --changes ++@opindex -c ++@opindex --changes ++Verbosely describe the action for each @var{file} whose permissions ++actually changes. ++ ++@item -f ++@itemx --silent ++@itemx --quiet ++@opindex -f ++@opindex --silent ++@opindex --quiet ++@cindex error messages, omitting ++Do not print error messages about files whose permissions cannot be ++changed. ++ ++@itemx --preserve-root ++@opindex --preserve-root ++@cindex root directory, disallow recursive modification ++Fail upon any attempt to recursively change the root directory, @file{/}. ++Without @option{--recursive}, this option has no effect. ++@xref{Treating / specially}. ++ ++@itemx --no-preserve-root ++@opindex --no-preserve-root ++@cindex root directory, allow recursive modification ++Cancel the effect of any preceding @option{--preserve-root} option. ++@xref{Treating / specially}. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++Verbosely describe the action or non-action taken for every @var{file}. ++ ++@item --reference=@var{ref_file} ++@opindex --reference ++Change the mode of each @var{file} to be the same as that of @var{ref_file}. ++@xref{File permissions}. ++If @var{ref_file} is a symbolic link, do not use the mode ++of the symbolic link, but rather that of the file it refers to. ++ ++@item -R ++@itemx --recursive ++@opindex -R ++@opindex --recursive ++@cindex recursively changing access permissions ++Recursively change permissions of directories and their contents. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node touch invocation ++@section @command{touch}: Change file timestamps ++ ++@pindex touch ++@cindex changing file timestamps ++@cindex file timestamps, changing ++@cindex timestamps, changing file ++ ++@command{touch} changes the access and/or modification times of the ++specified files. Synopsis: ++ ++@example ++touch [@var{option}]@dots{} @var{file}@dots{} ++@end example ++ ++@cindex empty files, creating ++Any @var{file} argument that does not exist is created empty. ++ ++A @var{file} argument string of @samp{-} is handled specially and ++causes @command{touch} to change the times of the file associated with ++standard output. ++ ++@cindex permissions, for changing file timestamps ++If changing both the access and modification times to the current ++time, @command{touch} can change the timestamps for files that the user ++running it does not own but has write permission for. Otherwise, the ++user must own the files. ++ ++Although @command{touch} provides options for changing two of the times---the ++times of last access and modification---of a file, there is actually ++a third one as well: the inode change time. This is often referred to ++as a file's @code{ctime}. ++The inode change time represents the time when the file's meta-information ++last changed. One common example of this is when the permissions of a ++file change. Changing the permissions doesn't access the file, so ++the atime doesn't change, nor does it modify the file, so the mtime ++doesn't change. Yet, something about the file itself has changed, ++and this must be noted somewhere. This is the job of the ctime field. ++This is necessary, so that, for example, a backup program can make a ++fresh copy of the file, including the new permissions value. ++Another operation that modifies a file's ctime without affecting ++the others is renaming. In any case, it is not possible, in normal ++operations, for a user to change the ctime field to a user-specified value. ++ ++@vindex TZ ++Time stamps assume the time zone rules specified by the @env{TZ} ++environment variable, or by the system default rules if @env{TZ} is ++not set. @xref{TZ Variable,, Specifying the Time Zone with @env{TZ}, ++libc, The GNU C Library Reference Manual}. ++You can avoid ambiguities during ++daylight saving transitions by using @sc{utc} time stamps. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -a ++@itemx --time=atime ++@itemx --time=access ++@itemx --time=use ++@opindex -a ++@opindex --time ++@opindex atime@r{, changing} ++@opindex access @r{time, changing} ++@opindex use @r{time, changing} ++Change the access time only. ++ ++@item -c ++@itemx --no-create ++@opindex -c ++@opindex --no-create ++Do not create files that do not exist. ++ ++@item -d ++@itemx --date=@var{time} ++@opindex -d ++@opindex --date ++@opindex time ++Use @var{time} instead of the current time. It can contain month names, ++time zones, @samp{am} and @samp{pm}, @samp{yesterday}, etc. For ++example, @option{--date="2004-02-27 14:19:13.489392193 +0530"} ++specifies the instant of time that is 489,392,193 nanoseconds after ++February 27, 2004 at 2:19:13 PM in a time zone that is 5 hours and 30 ++minutes east of @acronym{UTC}. @xref{Date input formats}. ++File systems that do not support high-resolution time stamps ++silently ignore any excess precision here. ++ ++@item -f ++@opindex -f ++@cindex BSD @command{touch} compatibility ++Ignored; for compatibility with BSD versions of @command{touch}. ++ ++@item -m ++@itemx --time=mtime ++@itemx --time=modify ++@opindex -m ++@opindex --time ++@opindex mtime@r{, changing} ++@opindex modify @r{time, changing} ++Change the modification time only. ++ ++@item -r @var{file} ++@itemx --reference=@var{file} ++@opindex -r ++@opindex --reference ++Use the times of the reference @var{file} instead of the current time. ++If this option is combined with the @option{--date=@var{time}} ++(@option{-d @var{time}}) option, the reference @var{file}'s time is ++the origin for any relative @var{time}s given, but is otherwise ignored. ++For example, @samp{-r foo -d '-5 seconds'} specifies a time stamp ++equal to five seconds before the corresponding time stamp for @file{foo}. ++ ++@item -t [[@var{cc}]@var{yy}]@var{mmddhhmm}[.@var{ss}] ++Use the argument (optional four-digit or two-digit years, months, ++days, hours, minutes, optional seconds) instead of the current time. ++If the year is specified with only two digits, then @var{cc} ++is 20 for years in the range 0 @dots{} 68, and 19 for years in ++69 @dots{} 99. If no digits of the year are specified, ++the argument is interpreted as a date in the current year. ++Note that @var{ss} may be @samp{60}, to accommodate leap seconds. ++ ++@end table ++ ++@vindex _POSIX2_VERSION ++On older systems, @command{touch} supports an obsolete syntax, as follows. ++If no timestamp is given with any of the @option{-d}, @option{-r}, or ++@option{-t} options, and if there are two or more @var{file}s and the ++first @var{file} is of the form @samp{@var{mmddhhmm}[@var{yy}]} and this ++would be a valid argument to the @option{-t} option (if the @var{yy}, if ++any, were moved to the front), and if the represented year ++is in the range 1969--1999, that argument is interpreted as the time ++for the other files instead of as a file name. ++This obsolete behavior can be enabled or disabled with the ++@env{_POSIX2_VERSION} environment variable (@pxref{Standards ++conformance}), but portable scripts should avoid commands whose ++behavior depends on this variable. ++For example, use @samp{touch ./12312359 main.c} or @samp{touch -t ++12312359 main.c} rather than the ambiguous @samp{touch 12312359 main.c}. ++ ++@exitstatus ++ ++ ++@node Disk usage ++@chapter Disk usage ++ ++@cindex disk usage ++ ++No disk can hold an infinite amount of data. These commands report ++how much disk storage is in use or available, report other file and ++file status information, and write buffers to disk. ++ ++@menu ++* df invocation:: Report file system disk space usage. ++* du invocation:: Estimate file space usage. ++* stat invocation:: Report file or file system status. ++* sync invocation:: Synchronize memory and disk. ++* truncate invocation:: Shrink or extend the size of a file. ++@end menu ++ ++ ++@node df invocation ++@section @command{df}: Report file system disk space usage ++ ++@pindex df ++@cindex file system disk usage ++@cindex disk usage by file system ++ ++@command{df} reports the amount of disk space used and available on ++file systems. Synopsis: ++ ++@example ++df [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++With no arguments, @command{df} reports the space used and available on all ++currently mounted file systems (of all types). Otherwise, @command{df} ++reports on the file system containing each argument @var{file}. ++ ++Normally the disk space is printed in units of ++1024 bytes, but this can be overridden (@pxref{Block size}). ++Non-integer quantities are rounded up to the next higher unit. ++ ++@cindex disk device file ++@cindex device file, disk ++If an argument @var{file} is a disk device file containing a mounted ++file system, @command{df} shows the space available on that file system ++rather than on the file system containing the device node (i.e., the root ++file system). @sc{gnu} @command{df} does not attempt to determine the disk usage ++on unmounted file systems, because on most kinds of systems doing so ++requires extremely nonportable intimate knowledge of file system ++structures. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -a ++@itemx --all ++@opindex -a ++@opindex --all ++@cindex automounter file systems ++@cindex ignore file systems ++Include in the listing dummy file systems, which ++are omitted by default. Such file systems are typically special-purpose ++pseudo-file-systems, such as automounter entries. ++ ++@item -B @var{size} ++@itemx --block-size=@var{size} ++@opindex -B ++@opindex --block-size ++@cindex file system sizes ++Scale sizes by @var{size} before printing them (@pxref{Block size}). ++For example, @option{-BG} prints sizes in units of 1,073,741,824 bytes. ++ ++@itemx --total ++@opindex --total ++@cindex grand total of disk size, usage and available space ++Print a grand total of all arguments after all arguments have ++been processed. This can be used to find out the total disk size, usage ++and available space of all listed devices. ++ ++@optHumanReadable ++ ++@item -H ++@opindex -H ++Equivalent to @option{--si}. ++ ++@item -i ++@itemx --inodes ++@opindex -i ++@opindex --inodes ++@cindex inode usage ++List inode usage information instead of block usage. An inode (short ++for index node) contains information about a file such as its owner, ++permissions, timestamps, and location on the disk. ++ ++@item -k ++@opindex -k ++@cindex kibibytes for file system sizes ++Print sizes in 1024-byte blocks, overriding the default block size ++(@pxref{Block size}). ++This option is equivalent to @option{--block-size=1K}. ++ ++@item -l ++@itemx --local ++@opindex -l ++@opindex --local ++@cindex file system types, limiting output to certain ++Limit the listing to local file systems. By default, remote file systems ++are also listed. ++ ++@item --no-sync ++@opindex --no-sync ++@cindex file system space, retrieving old data more quickly ++Do not invoke the @code{sync} system call before getting any usage data. ++This may make @command{df} run significantly faster on systems with many ++disks, but on some systems (notably SunOS) the results may be slightly ++out of date. This is the default. ++ ++@item -P ++@itemx --portability ++@opindex -P ++@opindex --portability ++@cindex one-line output format ++@cindex @acronym{POSIX} output format ++@cindex portable output format ++@cindex output format, portable ++Use the @acronym{POSIX} output format. This is like the default format except ++for the following: ++ ++@enumerate ++@item ++The information about each file system is always printed on exactly ++one line; a mount device is never put on a line by itself. This means ++that if the mount device name is more than 20 characters long (e.g., for ++some network mounts), the columns are misaligned. ++ ++@item ++The labels in the header output line are changed to conform to @acronym{POSIX}. ++ ++@item ++The default block size and output format are unaffected by the ++@env{DF_BLOCK_SIZE}, @env{BLOCK_SIZE} and @env{BLOCKSIZE} environment ++variables. However, the default block size is still affected by ++@env{POSIXLY_CORRECT}: it is 512 if @env{POSIXLY_CORRECT} is set, 1024 ++otherwise. @xref{Block size}. ++@end enumerate ++ ++@optSi ++ ++@item --sync ++@opindex --sync ++@cindex file system space, retrieving current data more slowly ++Invoke the @code{sync} system call before getting any usage data. On ++some systems (notably SunOS), doing this yields more up to date results, ++but in general this option makes @command{df} much slower, especially when ++there are many or very busy file systems. ++ ++@item -t @var{fstype} ++@itemx --type=@var{fstype} ++@opindex -t ++@opindex --type ++@cindex file system types, limiting output to certain ++Limit the listing to file systems of type @var{fstype}. Multiple ++file system types can be specified by giving multiple @option{-t} options. ++By default, nothing is omitted. ++ ++@item -T ++@itemx --print-type ++@opindex -T ++@opindex --print-type ++@cindex file system types, printing ++Print each file system's type. The types printed here are the same ones ++you can include or exclude with @option{-t} and @option{-x}. The particular ++types printed are whatever is supported by the system. Here are some of ++the common names (this list is certainly not exhaustive): ++ ++@table @samp ++ ++@item nfs ++@cindex @acronym{NFS} file system type ++An @acronym{NFS} file system, i.e., one mounted over a network from another ++machine. This is the one type name which seems to be used uniformly by ++all systems. ++ ++@item 4.2@r{, }ufs@r{, }efs@dots{} ++@cindex Linux file system types ++@cindex local file system types ++@opindex 4.2 @r{file system type} ++@opindex ufs @r{file system type} ++@opindex efs @r{file system type} ++A file system on a locally-mounted hard disk. (The system might even ++support more than one type here; Linux does.) ++ ++@item hsfs@r{, }cdfs ++@cindex CD-ROM file system type ++@cindex High Sierra file system ++@opindex hsfs @r{file system type} ++@opindex cdfs @r{file system type} ++A file system on a CD-ROM drive. HP-UX uses @samp{cdfs}, most other ++systems use @samp{hsfs} (@samp{hs} for ``High Sierra''). ++ ++@item pcfs ++@cindex PC file system ++@cindex DOS file system ++@cindex MS-DOS file system ++@cindex diskette file system ++@opindex pcfs ++An MS-DOS file system, usually on a diskette. ++ ++@end table ++ ++@item -x @var{fstype} ++@itemx --exclude-type=@var{fstype} ++@opindex -x ++@opindex --exclude-type ++Limit the listing to file systems not of type @var{fstype}. ++Multiple file system types can be eliminated by giving multiple ++@option{-x} options. By default, no file system types are omitted. ++ ++@item -v ++Ignored; for compatibility with System V versions of @command{df}. ++ ++@end table ++ ++@exitstatus ++Failure includes the case where no output is generated, so you can ++inspect the exit status of a command like @samp{df -t ext3 -t reiserfs ++@var{dir}} to test whether @var{dir} is on a file system of type ++@samp{ext3} or @samp{reiserfs}. ++ ++ ++@node du invocation ++@section @command{du}: Estimate file space usage ++ ++@pindex du ++@cindex file space usage ++@cindex disk usage for files ++ ++@command{du} reports the amount of disk space used by the specified files ++and for each subdirectory (of directory arguments). Synopsis: ++ ++@example ++du [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++With no arguments, @command{du} reports the disk space for the current ++directory. Normally the disk space is printed in units of ++1024 bytes, but this can be overridden (@pxref{Block size}). ++Non-integer quantities are rounded up to the next higher unit. ++ ++If two or more hard links point to the same file, only one of the hard ++links is counted. The @var{file} argument order affects which links ++are counted, and changing the argument order may change the numbers ++that @command{du} outputs. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -a ++@itemx --all ++@opindex -a ++@opindex --all ++Show counts for all files, not just directories. ++ ++@itemx --apparent-size ++@opindex --apparent-size ++Print apparent sizes, rather than disk usage. The apparent size of a ++file is the number of bytes reported by @code{wc -c} on regular files, ++or more generally, @code{ls -l --block-size=1} or @code{stat --format=%s}. ++For example, a file containing the word @samp{zoo} with no newline would, ++of course, have an apparent size of 3. Such a small file may require ++anywhere from 0 to 16 KiB or more of disk space, depending on ++the type and configuration of the file system on which the file resides. ++However, a sparse file created with this command: ++ ++@example ++dd bs=1 seek=2GiB if=/dev/null of=big ++@end example ++ ++@noindent ++has an apparent size of 2 GiB, yet on most modern ++systems, it actually uses almost no disk space. ++ ++@item -b ++@itemx --bytes ++@opindex -b ++@opindex --bytes ++Equivalent to @code{--apparent-size --block-size=1}. ++ ++@item -B @var{size} ++@itemx --block-size=@var{size} ++@opindex -B ++@opindex --block-size ++@cindex file sizes ++Scale sizes by @var{size} before printing them (@pxref{Block size}). ++For example, @option{-BG} prints sizes in units of 1,073,741,824 bytes. ++ ++@item -c ++@itemx --total ++@opindex -c ++@opindex --total ++@cindex grand total of disk space ++Print a grand total of all arguments after all arguments have ++been processed. This can be used to find out the total disk usage of ++a given set of files or directories. ++ ++@item -D ++@itemx --dereference-args ++@opindex -D ++@opindex --dereference-args ++Dereference symbolic links that are command line arguments. ++Does not affect other symbolic links. This is helpful for finding ++out the disk usage of directories, such as @file{/usr/tmp}, which ++are often symbolic links. ++ ++@c --files0-from=FILE ++@filesZeroFromOption{du,, with the @option{--total} (@option{-c}) option} ++ ++@optHumanReadable ++ ++@item -H ++@opindex -H ++Equivalent to @option{--dereference-args} (@option{-D}). ++ ++@item -k ++@opindex -k ++@cindex kibibytes for file sizes ++Print sizes in 1024-byte blocks, overriding the default block size ++(@pxref{Block size}). ++This option is equivalent to @option{--block-size=1K}. ++ ++@item -l ++@itemx --count-links ++@opindex -l ++@opindex --count-links ++@cindex hard links, counting in @command{du} ++Count the size of all files, even if they have appeared already (as a ++hard link). ++ ++@item -L ++@itemx --dereference ++@opindex -L ++@opindex --dereference ++@cindex symbolic links, dereferencing in @command{du} ++Dereference symbolic links (show the disk space used by the file ++or directory that the link points to instead of the space used by ++the link). ++ ++@item -m ++@opindex -m ++@cindex mebibytes for file sizes ++Print sizes in 1,048,576-byte blocks, overriding the default block size ++(@pxref{Block size}). ++This option is equivalent to @option{--block-size=1M}. ++ ++@item -P ++@itemx --no-dereference ++@opindex -P ++@opindex --no-dereference ++@cindex symbolic links, dereferencing in @command{du} ++For each symbolic links encountered by @command{du}, ++consider the disk space used by the symbolic link. ++ ++@item --max-depth=@var{depth} ++@opindex --max-depth=@var{depth} ++@cindex limiting output of @command{du} ++Show the total for each directory (and file if --all) that is at ++most MAX_DEPTH levels down from the root of the hierarchy. The root ++is at level 0, so @code{du --max-depth=0} is equivalent to @code{du -s}. ++ ++@item -0 ++@opindex -0 ++@itemx --null ++@opindex --null ++@cindex output null-byte-terminated lines ++Output a zero byte (@acronym{ASCII} @sc{nul}) at the end of each line, ++rather than a newline. This option enables other programs to parse the ++output of @command{du} even when that output would contain file names ++with embedded newlines. ++ ++@optSi ++ ++@item -s ++@itemx --summarize ++@opindex -s ++@opindex --summarize ++Display only a total for each argument. ++ ++@item -S ++@itemx --separate-dirs ++@opindex -S ++@opindex --separate-dirs ++Normally, in the output of @command{du} (when not using @option{--summarize}), ++the size listed next to a directory name, @var{d}, represents the sum ++of sizes of all entries beneath @var{d} as well as the size of @var{d} itself. ++With @option{--separate-dirs}, the size reported for a directory name, ++@var{d}, is merely the @code{stat.st_size}-derived size of the directory ++entry, @var{d}. ++ ++@itemx --time ++@opindex --time ++@cindex last modified dates, displaying in @command{du} ++Show time of the most recent modification of any file in the directory, ++or any of its subdirectories. ++ ++@itemx --time=ctime ++@itemx --time=status ++@itemx --time=use ++@opindex --time ++@opindex ctime@r{, show the most recent} ++@opindex status time@r{, show the most recent} ++@opindex use time@r{, show the most recent} ++Show the most recent status change time (the @samp{ctime} in the inode) of ++any file in the directory, instead of the modification time. ++ ++@itemx --time=atime ++@itemx --time=access ++@opindex --time ++@opindex atime@r{, show the most recent} ++@opindex access time@r{, show the most recent} ++Show the most recent access time (the @samp{atime} in the inode) of ++any file in the directory, instead of the modification time. ++ ++@item --time-style=@var{style} ++@opindex --time-style ++@cindex time style ++List timestamps in style @var{style}. This option has an effect only if ++the @option{--time} option is also specified. The @var{style} should ++be one of the following: ++ ++@table @samp ++@item +@var{format} ++@vindex LC_TIME ++List timestamps using @var{format}, where @var{format} is interpreted ++like the format argument of @command{date} (@pxref{date invocation}). ++For example, @option{--time-style="+%Y-%m-%d %H:%M:%S"} causes ++@command{du} to list timestamps like @samp{2002-03-30 23:45:56}. As ++with @command{date}, @var{format}'s interpretation is affected by the ++@env{LC_TIME} locale category. ++ ++@item full-iso ++List timestamps in full using @acronym{ISO} 8601 date, time, and time zone ++format with nanosecond precision, e.g., @samp{2002-03-30 ++23:45:56.477817180 -0700}. This style is equivalent to ++@samp{+%Y-%m-%d %H:%M:%S.%N %z}. ++ ++@item long-iso ++List @acronym{ISO} 8601 date and time in minutes, e.g., ++@samp{2002-03-30 23:45}. These timestamps are shorter than ++@samp{full-iso} timestamps, and are usually good enough for everyday ++work. This style is equivalent to @samp{+%Y-%m-%d %H:%M}. ++ ++@item iso ++List @acronym{ISO} 8601 dates for timestamps, e.g., @samp{2002-03-30}. ++This style is equivalent to @samp{+%Y-%m-%d}. ++@end table ++ ++@vindex TIME_STYLE ++You can specify the default value of the @option{--time-style} option ++with the environment variable @env{TIME_STYLE}; if @env{TIME_STYLE} is not set ++the default style is @samp{long-iso}. For compatibility with @command{ls}, ++if @env{TIME_STYLE} begins with @samp{+} and contains a newline, ++the newline and any later characters are ignored; if @env{TIME_STYLE} ++begins with @samp{posix-} the @samp{posix-} is ignored; and if ++@env{TIME_STYLE} is @samp{locale} it is ignored. ++ ++@item -x ++@itemx --one-file-system ++@opindex -x ++@opindex --one-file-system ++@cindex one file system, restricting @command{du} to ++Skip directories that are on different file systems from the one that ++the argument being processed is on. ++ ++@item --exclude=@var{pattern} ++@opindex --exclude=@var{pattern} ++@cindex excluding files from @command{du} ++When recursing, skip subdirectories or files matching @var{pattern}. ++For example, @code{du --exclude='*.o'} excludes files whose names ++end in @samp{.o}. ++ ++@item -X @var{file} ++@itemx --exclude-from=@var{file} ++@opindex -X @var{file} ++@opindex --exclude-from=@var{file} ++@cindex excluding files from @command{du} ++Like @option{--exclude}, except take the patterns to exclude from @var{file}, ++one per line. If @var{file} is @samp{-}, take the patterns from standard ++input. ++ ++@end table ++ ++@cindex NFS mounts from BSD to HP-UX ++On BSD systems, @command{du} reports sizes that are half the correct ++values for files that are NFS-mounted from HP-UX systems. On HP-UX ++systems, it reports sizes that are twice the correct values for ++files that are NFS-mounted from BSD systems. This is due to a flaw ++in HP-UX; it also affects the HP-UX @command{du} program. ++ ++@exitstatus ++ ++ ++@node stat invocation ++@section @command{stat}: Report file or file system status ++ ++@pindex stat ++@cindex file status ++@cindex file system status ++ ++@command{stat} displays information about the specified file(s). Synopsis: ++ ++@example ++stat [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++With no option, @command{stat} reports all information about the given files. ++But it also can be used to report the information of the file systems the ++given files are located on. If the files are links, @command{stat} can ++also give information about the files the links point to. ++ ++@mayConflictWithShellBuiltIn{stat} ++ ++@table @samp ++ ++@item -L ++@itemx --dereference ++@opindex -L ++@opindex --dereference ++@cindex symbolic links, dereferencing in @command{stat} ++Change how @command{stat} treats symbolic links. ++With this option, @command{stat} acts on the file referenced ++by each symbolic link argument. ++Without it, @command{stat} acts on any symbolic link argument directly. ++ ++@item -f ++@itemx --file-system ++@opindex -f ++@opindex --file-system ++@cindex file systems ++Report information about the file systems where the given files are located ++instead of information about the files themselves. ++ ++@item -c ++@itemx --format=@var{format} ++@opindex -c ++@opindex --format=@var{format} ++@cindex output format ++Use @var{format} rather than the default format. ++@var{format} is automatically newline-terminated, so ++running a command like the following with two or more @var{file} ++operands produces a line of output for each operand: ++@example ++$ stat --format=%d:%i / /usr ++2050:2 ++2057:2 ++@end example ++ ++@itemx --printf=@var{format} ++@opindex --printf=@var{format} ++@cindex output format ++Use @var{format} rather than the default format. ++Like @option{--format}, but interpret backslash escapes, ++and do not output a mandatory trailing newline. ++If you want a newline, include @samp{\n} in the @var{format}. ++Here's how you would use @option{--printf} to print the device ++and inode numbers of @file{/} and @file{/usr}: ++@example ++$ stat --printf='%d:%i\n' / /usr ++2050:2 ++2057:2 ++@end example ++ ++@item -t ++@itemx --terse ++@opindex -t ++@opindex --terse ++@cindex terse output ++Print the information in terse form, suitable for parsing by other programs. ++ ++@end table ++ ++The valid @var{format} directives for files with @option{--format} and ++@option{--printf} are: ++ ++@itemize @bullet ++@item %a - Access rights in octal ++@item %A - Access rights in human readable form ++@item %b - Number of blocks allocated (see @samp{%B}) ++@item %B - The size in bytes of each block reported by @samp{%b} ++@item %d - Device number in decimal ++@item %D - Device number in hex ++@item %f - Raw mode in hex ++@item %F - File type ++@item %g - Group ID of owner ++@item %G - Group name of owner ++@item %h - Number of hard links ++@item %i - Inode number ++@item %n - File name ++@item %N - Quoted file name with dereference if symbolic link ++@item %o - I/O block size ++@item %s - Total size, in bytes ++@item %t - Major device type in hex ++@item %T - Minor device type in hex ++@item %u - User ID of owner ++@item %U - User name of owner ++@item %x - Time of last access ++@item %X - Time of last access as seconds since Epoch ++@item %y - Time of last modification ++@item %Y - Time of last modification as seconds since Epoch ++@item %z - Time of last change ++@item %Z - Time of last change as seconds since Epoch ++@end itemize ++ ++When listing file system information (@option{--file-system} (@option{-f})), ++you must use a different set of @var{format} directives: ++ ++@itemize @bullet ++@item %a - Free blocks available to non-super-user ++@item %b - Total data blocks in file system ++@item %c - Total file nodes in file system ++@item %d - Free file nodes in file system ++@item %f - Free blocks in file system ++@item %i - File System ID in hex ++@item %l - Maximum length of file names ++@item %n - File name ++@item %s - Block size (for faster transfers) ++@item %S - Fundamental block size (for block counts) ++@item %t - Type in hex ++@item %T - Type in human readable form ++@end itemize ++ ++@vindex TZ ++Time stamps are listed according to the time zone rules specified by ++the @env{TZ} environment variable, or by the system default rules if ++@env{TZ} is not set. @xref{TZ Variable,, Specifying the Time Zone ++with @env{TZ}, libc, The GNU C Library Reference Manual}. ++ ++@exitstatus ++ ++ ++@node sync invocation ++@section @command{sync}: Synchronize data on disk with memory ++ ++@pindex sync ++@cindex synchronize disk and memory ++ ++@cindex superblock, writing ++@cindex inodes, written buffered ++@command{sync} writes any data buffered in memory out to disk. This can ++include (but is not limited to) modified superblocks, modified inodes, ++and delayed reads and writes. This must be implemented by the kernel; ++The @command{sync} program does nothing but exercise the @code{sync} system ++call. ++ ++@cindex crashes and corruption ++The kernel keeps data in memory to avoid doing (relatively slow) disk ++reads and writes. This improves performance, but if the computer ++crashes, data may be lost or the file system corrupted as a ++result. The @command{sync} command ensures everything in memory ++is written to disk. ++ ++Any arguments are ignored, except for a lone @option{--help} or ++@option{--version} (@pxref{Common options}). ++ ++@exitstatus ++ ++ ++@node truncate invocation ++@section @command{truncate}: Shrink or extend the size of a file ++ ++@pindex truncate ++@cindex truncating, file sizes ++ ++@command{truncate} shrinks or extends the size of each @var{file} to the ++specified size. Synopsis: ++ ++@example ++truncate @var{option}@dots{} @var{file}@dots{} ++@end example ++ ++@cindex files, creating ++Any @var{file} that does not exist is created. ++ ++@cindex sparse files, creating ++@cindex holes, creating files with ++If a @var{file} is larger than the specified size, the extra data is lost. ++If a @var{file} is shorter, it is extended and the extended part (or hole) ++reads as zero bytes. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c ++@itemx --no-create ++@opindex -c ++@opindex --no-create ++Do not create files that do not exist. ++ ++@item -o ++@itemx --io-blocks ++@opindex -o ++@opindex --io-blocks ++Treat @var{size} as number of I/O blocks of the @var{file} rather than bytes. ++ ++@item -r @var{rfile} ++@itemx --reference=@var{rfile} ++@opindex -r ++@opindex --reference ++Set the size of each @var{file} to the same size as @var{rfile}. ++ ++@item -s @var{size} ++@itemx --size=@var{size} ++@opindex -s ++@opindex --size ++Set the size of each @var{file} to this @var{size}. ++@multiplierSuffixesNoBlocks{size} ++ ++@var{size} may also be prefixed by one of the following to adjust ++the size of each @var{file} based on their current size: ++@example ++@samp{+} => extend by ++@samp{-} => reduce by ++@samp{<} => at most ++@samp{>} => at least ++@samp{/} => round down to multiple of ++@samp{%} => round up to multiple of ++@end example ++ ++@end table ++ ++@exitstatus ++ ++ ++@node Printing text ++@chapter Printing text ++ ++@cindex printing text, commands for ++@cindex commands for printing text ++ ++This section describes commands that display text strings. ++ ++@menu ++* echo invocation:: Print a line of text. ++* printf invocation:: Format and print data. ++* yes invocation:: Print a string until interrupted. ++@end menu ++ ++ ++@node echo invocation ++@section @command{echo}: Print a line of text ++ ++@pindex echo ++@cindex displaying text ++@cindex printing text ++@cindex text, displaying ++@cindex arbitrary text, displaying ++ ++@command{echo} writes each given @var{string} to standard output, with a ++space between each and a newline after the last one. Synopsis: ++ ++@example ++echo [@var{option}]@dots{} [@var{string}]@dots{} ++@end example ++ ++@mayConflictWithShellBuiltIn{echo} ++ ++The program accepts the following options. Also see @ref{Common options}. ++Options must precede operands, and the normally-special argument ++@samp{--} has no special meaning and is treated like any other ++@var{string}. ++ ++@table @samp ++@item -n ++@opindex -n ++Do not output the trailing newline. ++ ++@item -e ++@opindex -e ++@cindex backslash escapes ++Enable interpretation of the following backslash-escaped characters in ++each @var{string}: ++ ++@table @samp ++@item \a ++alert (bell) ++@item \b ++backspace ++@item \c ++produce no further output ++@item \f ++form feed ++@item \n ++newline ++@item \r ++carriage return ++@item \t ++horizontal tab ++@item \v ++vertical tab ++@item \\ ++backslash ++@item \0@var{nnn} ++the eight-bit value that is the octal number @var{nnn} ++(zero to three octal digits) ++@item \@var{nnn} ++the eight-bit value that is the octal number @var{nnn} ++(one to three octal digits) ++@item \x@var{hh} ++the eight-bit value that is the hexadecimal number @var{hh} ++(one or two hexadecimal digits) ++@end table ++ ++@item -E ++@opindex -E ++@cindex backslash escapes ++Disable interpretation of backslash escapes in each @var{string}. ++This is the default. If @option{-e} and @option{-E} are both ++specified, the last one given takes effect. ++ ++@end table ++ ++@vindex POSIXLY_CORRECT ++If the @env{POSIXLY_CORRECT} environment variable is set, then when ++@command{echo}'s first argument is not @option{-n} it outputs ++option-like arguments instead of treating them as options. For ++example, @code{echo -ne hello} outputs @samp{-ne hello} instead of ++plain @samp{hello}. ++ ++@acronym{POSIX} does not require support for any options, and says ++that the behavior of @command{echo} is implementation-defined if any ++@var{string} contains a backslash or if the first argument is ++@option{-n}. Portable programs can use the @command{printf} command ++if they need to omit trailing newlines or output control characters or ++backslashes. @xref{printf invocation}. ++ ++@exitstatus ++ ++ ++@node printf invocation ++@section @command{printf}: Format and print data ++ ++@pindex printf ++@command{printf} does formatted printing of text. Synopsis: ++ ++@example ++printf @var{format} [@var{argument}]@dots{} ++@end example ++ ++@command{printf} prints the @var{format} string, interpreting @samp{%} ++directives and @samp{\} escapes to format numeric and string arguments ++in a way that is mostly similar to the C @samp{printf} function. ++@xref{Output Conversion Syntax,, @command{printf} format directives, ++libc, The GNU C Library Reference Manual}, for details. ++The differences are listed below. ++ ++@mayConflictWithShellBuiltIn{printf} ++ ++@itemize @bullet ++ ++@item ++The @var{format} argument is reused as necessary to convert all the ++given @var{argument}s. For example, the command @samp{printf %s a b} ++outputs @samp{ab}. ++ ++@item ++Missing @var{argument}s are treated as null strings or as zeros, ++depending on whether the context expects a string or a number. For ++example, the command @samp{printf %sx%d} prints @samp{x0}. ++ ++@item ++@kindex \c ++An additional escape, @samp{\c}, causes @command{printf} to produce no ++further output. For example, the command @samp{printf 'A%sC\cD%sF' B ++E} prints @samp{ABC}. ++ ++@item ++The hexadecimal escape sequence @samp{\x@var{hh}} has at most two ++digits, as opposed to C where it can have an unlimited number of ++digits. For example, the command @samp{printf '\x07e'} prints two ++bytes, whereas the C statement @samp{printf ("\x07e")} prints just ++one. ++ ++@item ++@kindex %b ++@command{printf} has an additional directive, @samp{%b}, which prints its ++argument string with @samp{\} escapes interpreted in the same way as in ++the @var{format} string, except that octal escapes are of the form ++@samp{\0@var{ooo}} where @var{ooo} is 0 to 3 octal digits. ++If a precision is also given, it limits the number of bytes printed ++from the converted string. ++ ++@item ++Numeric arguments must be single C constants, possibly with leading ++@samp{+} or @samp{-}. For example, @samp{printf %.4d -3} outputs ++@samp{-0003}. ++ ++@item ++@vindex POSIXLY_CORRECT ++If the leading character of a numeric argument is @samp{"} or @samp{'} ++then its value is the numeric value of the immediately following ++character. Any remaining characters are silently ignored if the ++@env{POSIXLY_CORRECT} environment variable is set; otherwise, a ++warning is printed. For example, @samp{printf "%d" "'a"} outputs ++@samp{97} on hosts that use the @acronym{ASCII} character set, since ++@samp{a} has the numeric value 97 in @acronym{ASCII}. ++ ++@end itemize ++ ++@vindex LC_NUMERIC ++A floating-point argument must use a period before any fractional ++digits, but is printed according to the @env{LC_NUMERIC} category of the ++current locale. For example, in a locale whose radix character is a ++comma, the command @samp{printf %g 3.14} outputs @samp{3,14} whereas ++the command @samp{printf %g 3,14} is an error. ++ ++@kindex \@var{ooo} ++@kindex \x@var{hh} ++@command{printf} interprets @samp{\@var{ooo}} in @var{format} as an octal number ++(if @var{ooo} is 1 to 3 octal digits) specifying a character to print, ++and @samp{\x@var{hh}} as a hexadecimal number (if @var{hh} is 1 to 2 hex ++digits) specifying a character to print. ++ ++@kindex \uhhhh ++@kindex \Uhhhhhhhh ++@cindex Unicode ++@cindex ISO/IEC 10646 ++@vindex LC_CTYPE ++@command{printf} interprets two character syntaxes introduced in ++@acronym{ISO} C 99: ++@samp{\u} for 16-bit Unicode (@acronym{ISO}/@acronym{IEC} 10646) ++characters, specified as ++four hexadecimal digits @var{hhhh}, and @samp{\U} for 32-bit Unicode ++characters, specified as eight hexadecimal digits @var{hhhhhhhh}. ++@command{printf} outputs the Unicode characters ++according to the @env{LC_CTYPE} locale. Unicode characters in the ranges ++U+0000...U+009F, U+D800...U+DFFF cannot be specified by this syntax, except ++for U+0024 ($), U+0040 (@@), and U+0060 (@`). ++ ++The processing of @samp{\u} and @samp{\U} requires a full-featured ++@code{iconv} facility. It is activated on systems with glibc 2.2 (or newer), ++or when @code{libiconv} is installed prior to this package. Otherwise ++@samp{\u} and @samp{\U} will print as-is. ++ ++The only options are a lone @option{--help} or ++@option{--version}. @xref{Common options}. ++Options must precede operands. ++ ++The Unicode character syntaxes are useful for writing strings in a locale ++independent way. For example, a string containing the Euro currency symbol ++ ++@example ++$ env printf '\u20AC 14.95' ++@end example ++ ++@noindent ++will be output correctly in all locales supporting the Euro symbol ++(@acronym{ISO}-8859-15, UTF-8, and others). Similarly, a Chinese string ++ ++@example ++$ env printf '\u4e2d\u6587' ++@end example ++ ++@noindent ++will be output correctly in all Chinese locales (GB2312, BIG5, UTF-8, etc). ++ ++Note that in these examples, the @command{printf} command has been ++invoked via @command{env} to ensure that we run the program found via ++your shell's search path, and not a shell alias or a built-in function. ++ ++For larger strings, you don't need to look up the hexadecimal code ++values of each character one by one. @acronym{ASCII} characters mixed with \u ++escape sequences is also known as the JAVA source file encoding. You can ++use GNU recode 3.5c (or newer) to convert strings to this encoding. Here ++is how to convert a piece of text into a shell script which will output ++this text in a locale-independent way: ++ ++@smallexample ++$ LC_CTYPE=zh_CN.big5 /usr/local/bin/printf \ ++ '\u4e2d\u6587\n' > sample.txt ++$ recode BIG5..JAVA < sample.txt \ ++ | sed -e "s|^|/usr/local/bin/printf '|" -e "s|$|\\\\n'|" \ ++ > sample.sh ++@end smallexample ++ ++@exitstatus ++ ++ ++@node yes invocation ++@section @command{yes}: Print a string until interrupted ++ ++@pindex yes ++@cindex repeated output of a string ++ ++@command{yes} prints the command line arguments, separated by spaces and ++followed by a newline, forever until it is killed. If no arguments are ++given, it prints @samp{y} followed by a newline forever until killed. ++ ++Upon a write error, @command{yes} exits with status @samp{1}. ++ ++The only options are a lone @option{--help} or @option{--version}. ++To output an argument that begins with ++@samp{-}, precede it with @option{--}, e.g., @samp{yes -- --help}. ++@xref{Common options}. ++ ++ ++@node Conditions ++@chapter Conditions ++ ++@cindex conditions ++@cindex commands for exit status ++@cindex exit status commands ++ ++This section describes commands that are primarily useful for their exit ++status, rather than their output. Thus, they are often used as the ++condition of shell @code{if} statements, or as the last command in a ++pipeline. ++ ++@menu ++* false invocation:: Do nothing, unsuccessfully. ++* true invocation:: Do nothing, successfully. ++* test invocation:: Check file types and compare values. ++* expr invocation:: Evaluate expressions. ++@end menu ++ ++ ++@node false invocation ++@section @command{false}: Do nothing, unsuccessfully ++ ++@pindex false ++@cindex do nothing, unsuccessfully ++@cindex failure exit status ++@cindex exit status of @command{false} ++ ++@command{false} does nothing except return an exit status of 1, meaning ++@dfn{failure}. It can be used as a place holder in shell scripts ++where an unsuccessful command is needed. ++In most modern shells, @command{false} is a built-in command, so when ++you use @samp{false} in a script, you're probably using the built-in ++command, not the one documented here. ++ ++@command{false} honors the @option{--help} and @option{--version} options. ++ ++This version of @command{false} is implemented as a C program, and is thus ++more secure and faster than a shell script implementation, and may safely ++be used as a dummy shell for the purpose of disabling accounts. ++ ++Note that @command{false} (unlike all other programs documented herein) ++exits unsuccessfully, even when invoked with ++@option{--help} or @option{--version}. ++ ++Portable programs should not assume that the exit status of ++@command{false} is 1, as it is greater than 1 on some ++non-@acronym{GNU} hosts. ++ ++ ++@node true invocation ++@section @command{true}: Do nothing, successfully ++ ++@pindex true ++@cindex do nothing, successfully ++@cindex no-op ++@cindex successful exit ++@cindex exit status of @command{true} ++ ++@command{true} does nothing except return an exit status of 0, meaning ++@dfn{success}. It can be used as a place holder in shell scripts ++where a successful command is needed, although the shell built-in ++command @code{:} (colon) may do the same thing faster. ++In most modern shells, @command{true} is a built-in command, so when ++you use @samp{true} in a script, you're probably using the built-in ++command, not the one documented here. ++ ++@command{true} honors the @option{--help} and @option{--version} options. ++ ++Note, however, that it is possible to cause @command{true} ++to exit with nonzero status: with the @option{--help} or @option{--version} ++option, and with standard ++output already closed or redirected to a file that evokes an I/O error. ++For example, using a Bourne-compatible shell: ++ ++@example ++$ ./true --version >&- ++./true: write error: Bad file number ++$ ./true --version > /dev/full ++./true: write error: No space left on device ++@end example ++ ++This version of @command{true} is implemented as a C program, and is thus ++more secure and faster than a shell script implementation, and may safely ++be used as a dummy shell for the purpose of disabling accounts. ++ ++@node test invocation ++@section @command{test}: Check file types and compare values ++ ++@pindex test ++@cindex check file types ++@cindex compare values ++@cindex expression evaluation ++ ++@command{test} returns a status of 0 (true) or 1 (false) depending on the ++evaluation of the conditional expression @var{expr}. Each part of the ++expression must be a separate argument. ++ ++@command{test} has file status checks, string operators, and numeric ++comparison operators. ++ ++@command{test} has an alternate form that uses opening and closing ++square brackets instead a leading @samp{test}. For example, instead ++of @samp{test -d /}, you can write @samp{[ -d / ]}. The square ++brackets must be separate arguments; for example, @samp{[-d /]} does ++not have the desired effect. Since @samp{test @var{expr}} and @samp{[ ++@var{expr} ]} have the same meaning, only the former form is discussed ++below. ++ ++Synopses: ++ ++@example ++test @var{expression} ++test ++[ @var{expression} ] ++[ ] ++[ @var{option} ++@end example ++ ++@mayConflictWithShellBuiltIn{test} ++ ++If @var{expression} is omitted, @command{test} returns false. ++If @var{expression} is a single argument, ++@command{test} returns false if the argument is null and true otherwise. The argument ++can be any string, including strings like @samp{-d}, @samp{-1}, ++@samp{--}, @samp{--help}, and @samp{--version} that most other ++programs would treat as options. To get help and version information, ++invoke the commands @samp{[ --help} and @samp{[ --version}, without ++the usual closing brackets. @xref{Common options}. ++ ++@cindex exit status of @command{test} ++Exit status: ++ ++@display ++0 if the expression is true, ++1 if the expression is false, ++2 if an error occurred. ++@end display ++ ++@menu ++* File type tests:: -[bcdfhLpSt] ++* Access permission tests:: -[gkruwxOG] ++* File characteristic tests:: -e -s -nt -ot -ef ++* String tests:: -z -n = != ++* Numeric tests:: -eq -ne -lt -le -gt -ge ++* Connectives for test:: ! -a -o ++@end menu ++ ++ ++@node File type tests ++@subsection File type tests ++ ++@cindex file type tests ++ ++These options test for particular types of files. (Everything's a file, ++but not all files are the same!) ++ ++@table @samp ++ ++@item -b @var{file} ++@opindex -b ++@cindex block special check ++True if @var{file} exists and is a block special device. ++ ++@item -c @var{file} ++@opindex -c ++@cindex character special check ++True if @var{file} exists and is a character special device. ++ ++@item -d @var{file} ++@opindex -d ++@cindex directory check ++True if @var{file} exists and is a directory. ++ ++@item -f @var{file} ++@opindex -f ++@cindex regular file check ++True if @var{file} exists and is a regular file. ++ ++@item -h @var{file} ++@itemx -L @var{file} ++@opindex -L ++@opindex -h ++@cindex symbolic link check ++True if @var{file} exists and is a symbolic link. ++Unlike all other file-related tests, this test does not dereference ++@var{file} if it is a symbolic link. ++ ++@item -p @var{file} ++@opindex -p ++@cindex named pipe check ++True if @var{file} exists and is a named pipe. ++ ++@item -S @var{file} ++@opindex -S ++@cindex socket check ++True if @var{file} exists and is a socket. ++ ++@item -t @var{fd} ++@opindex -t ++@cindex terminal check ++True if @var{fd} is a file descriptor that is associated with a ++terminal. ++ ++@end table ++ ++ ++@node Access permission tests ++@subsection Access permission tests ++ ++@cindex access permission tests ++@cindex permission tests ++ ++These options test for particular access permissions. ++ ++@table @samp ++ ++@item -g @var{file} ++@opindex -g ++@cindex set-group-ID check ++True if @var{file} exists and has its set-group-ID bit set. ++ ++@item -k @var{file} ++@opindex -k ++@cindex sticky bit check ++True if @var{file} exists and has its @dfn{sticky} bit set. ++ ++@item -r @var{file} ++@opindex -r ++@cindex readable file check ++True if @var{file} exists and read permission is granted. ++ ++@item -u @var{file} ++@opindex -u ++@cindex set-user-ID check ++True if @var{file} exists and has its set-user-ID bit set. ++ ++@item -w @var{file} ++@opindex -w ++@cindex writable file check ++True if @var{file} exists and write permission is granted. ++ ++@item -x @var{file} ++@opindex -x ++@cindex executable file check ++True if @var{file} exists and execute permission is granted ++(or search permission, if it is a directory). ++ ++@item -O @var{file} ++@opindex -O ++@cindex owned by effective user ID check ++True if @var{file} exists and is owned by the current effective user ID. ++ ++@item -G @var{file} ++@opindex -G ++@cindex owned by effective group ID check ++True if @var{file} exists and is owned by the current effective group ID. ++ ++@end table ++ ++@node File characteristic tests ++@subsection File characteristic tests ++ ++@cindex file characteristic tests ++ ++These options test other file characteristics. ++ ++@table @samp ++ ++@item -e @var{file} ++@opindex -e ++@cindex existence-of-file check ++True if @var{file} exists. ++ ++@item -s @var{file} ++@opindex -s ++@cindex nonempty file check ++True if @var{file} exists and has a size greater than zero. ++ ++@item @var{file1} -nt @var{file2} ++@opindex -nt ++@cindex newer-than file check ++True if @var{file1} is newer (according to modification date) than ++@var{file2}, or if @var{file1} exists and @var{file2} does not. ++ ++@item @var{file1} -ot @var{file2} ++@opindex -ot ++@cindex older-than file check ++True if @var{file1} is older (according to modification date) than ++@var{file2}, or if @var{file2} exists and @var{file1} does not. ++ ++@item @var{file1} -ef @var{file2} ++@opindex -ef ++@cindex same file check ++@cindex hard link check ++True if @var{file1} and @var{file2} have the same device and inode ++numbers, i.e., if they are hard links to each other. ++ ++@end table ++ ++ ++@node String tests ++@subsection String tests ++ ++@cindex string tests ++ ++These options test string characteristics. You may need to quote ++@var{string} arguments for the shell. For example: ++ ++@example ++test -n "$V" ++@end example ++ ++The quotes here prevent the wrong arguments from being passed to ++@command{test} if @samp{$V} is empty or contains special characters. ++ ++@table @samp ++ ++@item -z @var{string} ++@opindex -z ++@cindex zero-length string check ++True if the length of @var{string} is zero. ++ ++@item -n @var{string} ++@itemx @var{string} ++@opindex -n ++@cindex nonzero-length string check ++True if the length of @var{string} is nonzero. ++ ++@item @var{string1} = @var{string2} ++@opindex = ++@cindex equal string check ++True if the strings are equal. ++ ++@item @var{string1} != @var{string2} ++@opindex != ++@cindex not-equal string check ++True if the strings are not equal. ++ ++@end table ++ ++ ++@node Numeric tests ++@subsection Numeric tests ++ ++@cindex numeric tests ++@cindex arithmetic tests ++ ++Numeric relational operators. The arguments must be entirely numeric ++(possibly negative), or the special expression @w{@code{-l @var{string}}}, ++which evaluates to the length of @var{string}. ++ ++@table @samp ++ ++@item @var{arg1} -eq @var{arg2} ++@itemx @var{arg1} -ne @var{arg2} ++@itemx @var{arg1} -lt @var{arg2} ++@itemx @var{arg1} -le @var{arg2} ++@itemx @var{arg1} -gt @var{arg2} ++@itemx @var{arg1} -ge @var{arg2} ++@opindex -eq ++@opindex -ne ++@opindex -lt ++@opindex -le ++@opindex -gt ++@opindex -ge ++These arithmetic binary operators return true if @var{arg1} is equal, ++not-equal, less-than, less-than-or-equal, greater-than, or ++greater-than-or-equal than @var{arg2}, respectively. ++ ++@end table ++ ++For example: ++ ++@example ++test -1 -gt -2 && echo yes ++@result{} yes ++test -l abc -gt 1 && echo yes ++@result{} yes ++test 0x100 -eq 1 ++@error{} test: integer expression expected before -eq ++@end example ++ ++ ++@node Connectives for test ++@subsection Connectives for @command{test} ++ ++@cindex logical connectives ++@cindex connectives, logical ++ ++The usual logical connectives. ++ ++@table @samp ++ ++@item ! @var{expr} ++@opindex ! ++True if @var{expr} is false. ++ ++@item @var{expr1} -a @var{expr2} ++@opindex -a ++@cindex logical and operator ++@cindex and operator ++True if both @var{expr1} and @var{expr2} are true. ++ ++@item @var{expr1} -o @var{expr2} ++@opindex -o ++@cindex logical or operator ++@cindex or operator ++True if either @var{expr1} or @var{expr2} is true. ++ ++@end table ++ ++ ++@node expr invocation ++@section @command{expr}: Evaluate expressions ++ ++@pindex expr ++@cindex expression evaluation ++@cindex evaluation of expressions ++ ++@command{expr} evaluates an expression and writes the result on standard ++output. Each token of the expression must be a separate argument. ++ ++Operands are either integers or strings. Integers consist of one or ++more decimal digits, with an optional leading @samp{-}. ++@command{expr} converts ++anything appearing in an operand position to an integer or a string ++depending on the operation being applied to it. ++ ++Strings are not quoted for @command{expr} itself, though you may need to ++quote them to protect characters with special meaning to the shell, ++e.g., spaces. However, regardless of whether it is quoted, a string ++operand should not be a parenthesis or any of @command{expr}'s ++operators like @code{+}, so you cannot safely pass an arbitrary string ++@code{$str} to expr merely by quoting it to the shell. One way to ++work around this is to use the @sc{gnu} extension @code{+}, ++(e.g., @code{+ "$str" = foo}); a more portable way is to use ++@code{@w{" $str"}} and to adjust the rest of the expression to take ++the leading space into account (e.g., @code{@w{" $str" = " foo"}}). ++ ++You should not pass a negative integer or a string with leading ++@samp{-} as @command{expr}'s first argument, as it might be ++misinterpreted as an option; this can be avoided by parenthesization. ++Also, portable scripts should not use a string operand that happens to ++take the form of an integer; this can be worked around by inserting ++leading spaces as mentioned above. ++ ++@cindex parentheses for grouping ++Operators may be given as infix symbols or prefix keywords. Parentheses ++may be used for grouping in the usual manner. You must quote ++parentheses and many operators to avoid the shell evaluating them, ++however. ++ ++When built with support for the GNU MP library, @command{expr} uses ++arbitrary-precision arithmetic; otherwise, it uses native arithmetic ++types and may fail due to arithmetic overflow. ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. Options must precede operands. ++ ++@cindex exit status of @command{expr} ++Exit status: ++ ++@display ++0 if the expression is neither null nor 0, ++1 if the expression is null or 0, ++2 if the expression is invalid, ++3 if an internal error occurred (e.g., arithmetic overflow). ++@end display ++ ++@menu ++* String expressions:: + : match substr index length ++* Numeric expressions:: + - * / % ++* Relations for expr:: | & < <= = == != >= > ++* Examples of expr:: Examples. ++@end menu ++ ++ ++@node String expressions ++@subsection String expressions ++ ++@cindex string expressions ++@cindex expressions, string ++ ++@command{expr} supports pattern matching and other string operators. These ++have higher precedence than both the numeric and relational operators (in ++the next sections). ++ ++@table @samp ++ ++@item @var{string} : @var{regex} ++@cindex pattern matching ++@cindex regular expression matching ++@cindex matching patterns ++Perform pattern matching. The arguments are converted to strings and the ++second is considered to be a (basic, a la GNU @code{grep}) regular ++expression, with a @code{^} implicitly prepended. The first argument is ++then matched against this regular expression. ++ ++If the match succeeds and @var{regex} uses @samp{\(} and @samp{\)}, the ++@code{:} expression returns the part of @var{string} that matched the ++subexpression; otherwise, it returns the number of characters matched. ++ ++If the match fails, the @code{:} operator returns the null string if ++@samp{\(} and @samp{\)} are used in @var{regex}, otherwise 0. ++ ++@kindex \( @r{regexp operator} ++Only the first @samp{\( @dots{} \)} pair is relevant to the return ++value; additional pairs are meaningful only for grouping the regular ++expression operators. ++ ++@kindex \+ @r{regexp operator} ++@kindex \? @r{regexp operator} ++@kindex \| @r{regexp operator} ++In the regular expression, @code{\+}, @code{\?}, and @code{\|} are ++operators which respectively match one or more, zero or one, or separate ++alternatives. SunOS and other @command{expr}'s treat these as regular ++characters. (@acronym{POSIX} allows either behavior.) ++@xref{Top, , Regular Expression Library, regex, Regex}, for details of ++regular expression syntax. Some examples are in @ref{Examples of expr}. ++ ++@item match @var{string} @var{regex} ++@findex match ++An alternative way to do pattern matching. This is the same as ++@w{@samp{@var{string} : @var{regex}}}. ++ ++@item substr @var{string} @var{position} @var{length} ++@findex substr ++Returns the substring of @var{string} beginning at @var{position} ++with length at most @var{length}. If either @var{position} or ++@var{length} is negative, zero, or non-numeric, returns the null string. ++ ++@item index @var{string} @var{charset} ++@findex index ++Returns the first position in @var{string} where the first character in ++@var{charset} was found. If no character in @var{charset} is found in ++@var{string}, return 0. ++ ++@item length @var{string} ++@findex length ++Returns the length of @var{string}. ++ ++@item + @var{token} ++@kindex + ++Interpret @var{token} as a string, even if it is a keyword like @var{match} ++or an operator like @code{/}. ++This makes it possible to test @code{expr length + "$x"} or ++@code{expr + "$x" : '.*/\(.\)'} and have it do the right thing even if ++the value of @var{$x} happens to be (for example) @code{/} or @code{index}. ++This operator is a @acronym{GNU} extension. Portable shell scripts should use ++@code{@w{" $token"} : @w{' \(.*\)'}} instead of @code{+ "$token"}. ++ ++@end table ++ ++To make @command{expr} interpret keywords as strings, you must use the ++@code{quote} operator. ++ ++ ++@node Numeric expressions ++@subsection Numeric expressions ++ ++@cindex numeric expressions ++@cindex expressions, numeric ++ ++@command{expr} supports the usual numeric operators, in order of increasing ++precedence. These numeric operators have lower precedence than the ++string operators described in the previous section, and higher precedence ++than the connectives (next section). ++ ++@table @samp ++ ++@item + - ++@kindex + ++@kindex - ++@cindex addition ++@cindex subtraction ++Addition and subtraction. Both arguments are converted to integers; ++an error occurs if this cannot be done. ++ ++@item * / % ++@kindex * ++@kindex / ++@kindex % ++@cindex multiplication ++@cindex division ++@cindex remainder ++Multiplication, division, remainder. Both arguments are converted to ++integers; an error occurs if this cannot be done. ++ ++@end table ++ ++ ++@node Relations for expr ++@subsection Relations for @command{expr} ++ ++@cindex connectives, logical ++@cindex logical connectives ++@cindex relations, numeric or string ++ ++@command{expr} supports the usual logical connectives and relations. These ++have lower precedence than the string and numeric operators ++(previous sections). Here is the list, lowest-precedence operator first. ++ ++@table @samp ++ ++@item | ++@kindex | ++@cindex logical or operator ++@cindex or operator ++Returns its first argument if that is neither null nor zero, otherwise ++its second argument if it is neither null nor zero, otherwise 0. It ++does not evaluate its second argument if its first argument is neither ++null nor zero. ++ ++@item & ++@kindex & ++@cindex logical and operator ++@cindex and operator ++Return its first argument if neither argument is null or zero, otherwise ++0. It does not evaluate its second argument if its first argument is ++null or zero. ++ ++@item < <= = == != >= > ++@kindex < ++@kindex <= ++@kindex = ++@kindex == ++@kindex > ++@kindex >= ++@cindex comparison operators ++@vindex LC_COLLATE ++Compare the arguments and return 1 if the relation is true, 0 otherwise. ++@code{==} is a synonym for @code{=}. @command{expr} first tries to convert ++both arguments to integers and do a numeric comparison; if either ++conversion fails, it does a lexicographic comparison using the character ++collating sequence specified by the @env{LC_COLLATE} locale. ++ ++@end table ++ ++ ++@node Examples of expr ++@subsection Examples of using @command{expr} ++ ++@cindex examples of @command{expr} ++Here are a few examples, including quoting for shell metacharacters. ++ ++To add 1 to the shell variable @code{foo}, in Bourne-compatible shells: ++ ++@example ++foo=`expr $foo + 1` ++@end example ++ ++To print the non-directory part of the file name stored in ++@code{$fname}, which need not contain a @code{/}: ++ ++@example ++expr $fname : '.*/\(.*\)' '|' $fname ++@end example ++ ++An example showing that @code{\+} is an operator: ++ ++@example ++expr aaa : 'a\+' ++@result{} 3 ++@end example ++ ++@example ++expr abc : 'a\(.\)c' ++@result{} b ++expr index abcdef cz ++@result{} 3 ++expr index index a ++@error{} expr: syntax error ++expr index + index a ++@result{} 0 ++@end example ++ ++ ++@node Redirection ++@chapter Redirection ++ ++@cindex redirection ++@cindex commands for redirection ++ ++Unix shells commonly provide several forms of @dfn{redirection}---ways ++to change the input source or output destination of a command. But one ++useful redirection is performed by a separate command, not by the shell; ++it's described here. ++ ++@menu ++* tee invocation:: Redirect output to multiple files or processes. ++@end menu ++ ++ ++@node tee invocation ++@section @command{tee}: Redirect output to multiple files or processes ++ ++@pindex tee ++@cindex pipe fitting ++@cindex destinations, multiple output ++@cindex read from stdin and write to stdout and files ++ ++The @command{tee} command copies standard input to standard output and also ++to any files given as arguments. This is useful when you want not only ++to send some data down a pipe, but also to save a copy. Synopsis: ++ ++@example ++tee [@var{option}]@dots{} [@var{file}]@dots{} ++@end example ++ ++If a file being written to does not already exist, it is created. If a ++file being written to already exists, the data it previously contained ++is overwritten unless the @option{-a} option is used. ++ ++A @var{file} of @samp{-} causes @command{tee} to send another copy of ++input to standard output, but this is typically not that useful as the ++copies are interleaved. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++@item -a ++@itemx --append ++@opindex -a ++@opindex --append ++Append standard input to the given files rather than overwriting ++them. ++ ++@item -i ++@itemx --ignore-interrupts ++@opindex -i ++@opindex --ignore-interrupts ++Ignore interrupt signals. ++ ++@end table ++ ++The @command{tee} command is useful when you happen to be transferring a large ++amount of data and also want to summarize that data without reading ++it a second time. For example, when you are downloading a DVD image, ++you often want to verify its signature or checksum right away. ++The inefficient way to do it is simply: ++ ++@example ++wget http://example.com/some.iso && sha1sum some.iso ++@end example ++ ++One problem with the above is that it makes you wait for the ++download to complete before starting the time-consuming SHA1 computation. ++Perhaps even more importantly, the above requires reading ++the DVD image a second time (the first was from the network). ++ ++The efficient way to do it is to interleave the download ++and SHA1 computation. Then, you'll get the checksum for ++free, because the entire process parallelizes so well: ++ ++@example ++# slightly contrived, to demonstrate process substitution ++wget -O - http://example.com/dvd.iso \ ++ | tee >(sha1sum > dvd.sha1) > dvd.iso ++@end example ++ ++That makes @command{tee} write not just to the expected output file, ++but also to a pipe running @command{sha1sum} and saving the final ++checksum in a file named @file{dvd.sha1}. ++ ++Note, however, that this example relies on a feature of modern shells ++called @dfn{process substitution} ++(the @samp{>(command)} syntax, above; ++@xref{Process Substitution,,Process Substitution, bashref, ++The Bash Reference Manual}.), ++so it works with @command{zsh}, @command{bash}, and @command{ksh}, ++but not with @command{/bin/sh}. So if you write code like this ++in a shell script, be sure to start the script with @samp{#!/bin/bash}. ++ ++Since the above example writes to one file and one process, ++a more conventional and portable use of @command{tee} is even better: ++ ++@example ++wget -O - http://example.com/dvd.iso \ ++ | tee dvd.iso | sha1sum > dvd.sha1 ++@end example ++ ++You can extend this example to make @command{tee} write to two processes, ++computing MD5 and SHA1 checksums in parallel. In this case, ++process substitution is required: ++ ++@example ++wget -O - http://example.com/dvd.iso \ ++ | tee >(sha1sum > dvd.sha1) \ ++ >(md5sum > dvd.md5) \ ++ > dvd.iso ++@end example ++ ++This technique is also useful when you want to make a @emph{compressed} ++copy of the contents of a pipe. ++Consider a tool to graphically summarize disk usage data from @samp{du -ak}. ++For a large hierarchy, @samp{du -ak} can run for a long time, ++and can easily produce terabytes of data, so you won't want to ++rerun the command unnecessarily. Nor will you want to save ++the uncompressed output. ++ ++Doing it the inefficient way, you can't even start the GUI ++until after you've compressed all of the @command{du} output: ++ ++@example ++du -ak | gzip -9 > /tmp/du.gz ++gzip -d /tmp/du.gz | xdiskusage -a ++@end example ++ ++With @command{tee} and process substitution, you start the GUI ++right away and eliminate the decompression completely: ++ ++@example ++du -ak | tee >(gzip -9 > /tmp/du.gz) | xdiskusage -a ++@end example ++ ++Finally, if you regularly create more than one type of ++compressed tarball at once, for example when @code{make dist} creates ++both @command{gzip}-compressed and @command{bzip2}-compressed tarballs, ++there may be a better way. ++Typical @command{automake}-generated @file{Makefile} rules create ++the two compressed tar archives with commands in sequence, like this ++(slightly simplified): ++ ++@example ++tardir=your-pkg-M.N ++tar chof - "$tardir" | gzip -9 -c > your-pkg-M.N.tar.gz ++tar chof - "$tardir" | bzip2 -9 -c > your-pkg-M.N.tar.bz2 ++@end example ++ ++However, if the hierarchy you are archiving and compressing is larger ++than a couple megabytes, and especially if you are using a multi-processor ++system with plenty of memory, then you can do much better by reading the ++directory contents only once and running the compression programs in parallel: ++ ++@example ++tardir=your-pkg-M.N ++tar chof - "$tardir" \ ++ | tee >(gzip -9 -c > your-pkg-M.N.tar.gz) \ ++ | bzip2 -9 -c > your-pkg-M.N.tar.bz2 ++@end example ++ ++@exitstatus ++ ++ ++@node File name manipulation ++@chapter File name manipulation ++ ++@cindex file name manipulation ++@cindex manipulation of file names ++@cindex commands for file name manipulation ++ ++This section describes commands that manipulate file names. ++ ++@menu ++* basename invocation:: Strip directory and suffix from a file name. ++* dirname invocation:: Strip non-directory suffix from a file name. ++* pathchk invocation:: Check file name validity and portability. ++@end menu ++ ++ ++@node basename invocation ++@section @command{basename}: Strip directory and suffix from a file name ++ ++@pindex basename ++@cindex strip directory and suffix from file names ++@cindex directory, stripping from file names ++@cindex suffix, stripping from file names ++@cindex file names, stripping directory and suffix ++@cindex leading directory components, stripping ++ ++@command{basename} removes any leading directory components from ++@var{name}. Synopsis: ++ ++@example ++basename @var{name} [@var{suffix}] ++@end example ++ ++If @var{suffix} is specified and is identical to the end of @var{name}, ++it is removed from @var{name} as well. Note that since trailing slashes ++are removed prior to suffix matching, @var{suffix} will do nothing if it ++contains slashes. @command{basename} prints the result on standard ++output. ++ ++@c This test is used both here and in the section on dirname. ++@macro basenameAndDirname ++Together, @command{basename} and @command{dirname} are designed such ++that if @samp{ls "$name"} succeeds, then the command sequence @samp{cd ++"$(dirname "$name")"; ls "$(basename "$name")"} will, too. This works ++for everything except file names containing a trailing newline. ++@end macro ++@basenameAndDirname ++ ++@acronym{POSIX} allows the implementation to define the results if ++@var{name} is empty or @samp{//}. In the former case, @acronym{GNU} ++@command{basename} returns the empty string. In the latter case, the ++result is @samp{//} on platforms where @var{//} is distinct from ++@var{/}, and @samp{/} on platforms where there is no difference. ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. Options must precede operands. ++ ++@exitstatus ++ ++Examples: ++ ++@smallexample ++# Output "sort". ++basename /usr/bin/sort ++ ++# Output "stdio". ++basename include/stdio.h .h ++@end smallexample ++ ++ ++@node dirname invocation ++@section @command{dirname}: Strip non-directory suffix from a file name ++ ++@pindex dirname ++@cindex directory components, printing ++@cindex stripping non-directory suffix ++@cindex non-directory suffix, stripping ++ ++@command{dirname} prints all but the final slash-delimited component of ++a string (presumably a file name). Synopsis: ++ ++@example ++dirname @var{name} ++@end example ++ ++If @var{name} is a single component, @command{dirname} prints @samp{.} ++(meaning the current directory). ++ ++@basenameAndDirname ++ ++@acronym{POSIX} allows the implementation to define the results if ++@var{name} is @samp{//}. With @acronym{GNU} @command{dirname}, the ++result is @samp{//} on platforms where @var{//} is distinct from ++@var{/}, and @samp{/} on platforms where there is no difference. ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. ++ ++@exitstatus ++ ++Examples: ++ ++@smallexample ++# Output "/usr/bin". ++dirname /usr/bin/sort ++ ++# Output ".". ++dirname stdio.h ++@end smallexample ++ ++ ++@node pathchk invocation ++@section @command{pathchk}: Check file name validity and portability ++ ++@pindex pathchk ++@cindex file names, checking validity and portability ++@cindex valid file names, checking for ++@cindex portable file names, checking for ++ ++@command{pathchk} checks validity and portability of file names. Synopsis: ++ ++@example ++pathchk [@var{option}]@dots{} @var{name}@dots{} ++@end example ++ ++For each @var{name}, @command{pathchk} prints an error message if any of ++these conditions is true: ++ ++@enumerate ++@item ++One of the existing directories in @var{name} does not have search ++(execute) permission, ++@item ++The length of @var{name} is larger than the maximum supported by the ++operating system. ++@item ++The length of one component of @var{name} is longer than ++its file system's maximum. ++@end enumerate ++ ++A nonexistent @var{name} is not an error, so long a file with that ++name could be created under the above conditions. ++ ++The program accepts the following options. Also see @ref{Common options}. ++Options must precede operands. ++ ++@table @samp ++ ++@item -p ++@opindex -p ++Instead of performing checks based on the underlying file system, ++print an error message if any of these conditions is true: ++ ++@enumerate ++@item ++A file name is empty. ++ ++@item ++A file name contains a character outside the @acronym{POSIX} portable file ++name character set, namely, the ASCII letters and digits, @samp{.}, ++@samp{_}, @samp{-}, and @samp{/}. ++ ++@item ++The length of a file name or one of its components exceeds the ++@acronym{POSIX} minimum limits for portability. ++@end enumerate ++ ++@item -P ++@opindex -P ++Print an error message if a file name is empty, or if it contains a component ++that begins with @samp{-}. ++ ++@item --portability ++@opindex --portability ++Print an error message if a file name is not portable to all @acronym{POSIX} ++hosts. This option is equivalent to @samp{-p -P}. ++ ++@end table ++ ++@cindex exit status of @command{pathchk} ++Exit status: ++ ++@display ++0 if all specified file names passed all checks, ++1 otherwise. ++@end display ++ ++ ++@node Working context ++@chapter Working context ++ ++@cindex working context ++@cindex commands for printing the working context ++ ++This section describes commands that display or alter the context in ++which you are working: the current directory, the terminal settings, and ++so forth. See also the user-related commands in the next section. ++ ++@menu ++* pwd invocation:: Print working directory. ++* stty invocation:: Print or change terminal characteristics. ++* printenv invocation:: Print environment variables. ++* tty invocation:: Print file name of terminal on standard input. ++@end menu ++ ++ ++@node pwd invocation ++@section @command{pwd}: Print working directory ++ ++@pindex pwd ++@cindex print name of current directory ++@cindex current working directory, printing ++@cindex working directory, printing ++ ++ ++@command{pwd} prints the name of the current directory. Synopsis: ++ ++@example ++pwd [@var{option}]@dots{} ++@end example ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++@item -L ++@itemx --logical ++@opindex -L ++@opindex --logical ++If the contents of the environment variable @env{PWD} provide an ++absolute name of the current directory with no @samp{.} or @samp{..} ++components, but possibly with symbolic links, then output those ++contents. Otherwise, fall back to default @option{-P} handling. ++ ++@item -P ++@itemx --physical ++@opindex -P ++@opindex --physical ++Print a fully resolved name for the current directory. That is, all ++components of the printed name will be actual directory names---none ++will be symbolic links. ++@end table ++ ++@cindex symbolic links and @command{pwd} ++If @option{-L} and @option{-P} are both given, the last one takes ++precedence. If neither option is given, then this implementation uses ++@option{-P} as the default unless the @env{POSIXLY_CORRECT} ++environment variable is set. ++ ++@mayConflictWithShellBuiltIn{pwd} ++ ++@exitstatus ++ ++ ++@node stty invocation ++@section @command{stty}: Print or change terminal characteristics ++ ++@pindex stty ++@cindex change or print terminal settings ++@cindex terminal settings ++@cindex line settings of terminal ++ ++@command{stty} prints or changes terminal characteristics, such as baud rate. ++Synopses: ++ ++@example ++stty [@var{option}] [@var{setting}]@dots{} ++stty [@var{option}] ++@end example ++ ++If given no line settings, @command{stty} prints the baud rate, line ++discipline number (on systems that support it), and line settings ++that have been changed from the values set by @samp{stty sane}. ++By default, mode reading and setting are performed on the tty line ++connected to standard input, although this can be modified by the ++@option{--file} option. ++ ++@command{stty} accepts many non-option arguments that change aspects of ++the terminal line operation, as described below. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++@item -a ++@itemx --all ++@opindex -a ++@opindex --all ++Print all current settings in human-readable form. This option may not ++be used in combination with any line settings. ++ ++@item -F @var{device} ++@itemx --file=@var{device} ++@opindex -F ++@opindex --file ++Set the line opened by the file name specified in @var{device} instead of ++the tty line connected to standard input. This option is necessary ++because opening a @acronym{POSIX} tty requires use of the @code{O_NONDELAY} flag to ++prevent a @acronym{POSIX} tty from blocking until the carrier detect line is high if ++the @code{clocal} flag is not set. Hence, it is not always possible ++to allow the shell to open the device in the traditional manner. ++ ++@item -g ++@itemx --save ++@opindex -g ++@opindex --save ++@cindex machine-readable @command{stty} output ++Print all current settings in a form that can be used as an argument to ++another @command{stty} command to restore the current settings. This option ++may not be used in combination with any line settings. ++ ++@end table ++ ++Many settings can be turned off by preceding them with a @samp{-}. ++Such arguments are marked below with ``May be negated'' in their ++description. The descriptions themselves refer to the positive ++case, that is, when @emph{not} negated (unless stated otherwise, ++of course). ++ ++Some settings are not available on all @acronym{POSIX} systems, since they use ++extensions. Such arguments are marked below with ``Non-@acronym{POSIX}'' in their ++description. On non-@acronym{POSIX} systems, those or other settings also may not ++be available, but it's not feasible to document all the variations: just ++try it and see. ++ ++@exitstatus ++ ++@menu ++* Control:: Control settings ++* Input:: Input settings ++* Output:: Output settings ++* Local:: Local settings ++* Combination:: Combination settings ++* Characters:: Special characters ++* Special:: Special settings ++@end menu ++ ++ ++@node Control ++@subsection Control settings ++ ++@cindex control settings ++Control settings: ++ ++@table @samp ++@item parenb ++@opindex parenb ++@cindex two-way parity ++Generate parity bit in output and expect parity bit in input. ++May be negated. ++ ++@item parodd ++@opindex parodd ++@cindex odd parity ++@cindex even parity ++Set odd parity (even if negated). May be negated. ++ ++@item cs5 ++@itemx cs6 ++@itemx cs7 ++@itemx cs8 ++@opindex cs@var{n} ++@cindex character size ++@cindex eight-bit characters ++Set character size to 5, 6, 7, or 8 bits. ++ ++@item hup ++@itemx hupcl ++@opindex hup[cl] ++Send a hangup signal when the last process closes the tty. May be ++negated. ++ ++@item cstopb ++@opindex cstopb ++@cindex stop bits ++Use two stop bits per character (one if negated). May be negated. ++ ++@item cread ++@opindex cread ++Allow input to be received. May be negated. ++ ++@item clocal ++@opindex clocal ++@cindex modem control ++Disable modem control signals. May be negated. ++ ++@item crtscts ++@opindex crtscts ++@cindex hardware flow control ++@cindex flow control, hardware ++@cindex RTS/CTS flow control ++Enable RTS/CTS flow control. Non-@acronym{POSIX}. May be negated. ++@end table ++ ++ ++@node Input ++@subsection Input settings ++ ++@cindex input settings ++These settings control operations on data received from the terminal. ++ ++@table @samp ++@item ignbrk ++@opindex ignbrk ++@cindex breaks, ignoring ++Ignore break characters. May be negated. ++ ++@item brkint ++@opindex brkint ++@cindex breaks, cause interrupts ++Make breaks cause an interrupt signal. May be negated. ++ ++@item ignpar ++@opindex ignpar ++@cindex parity, ignoring ++Ignore characters with parity errors. May be negated. ++ ++@item parmrk ++@opindex parmrk ++@cindex parity errors, marking ++Mark parity errors (with a 255-0-character sequence). May be negated. ++ ++@item inpck ++@opindex inpck ++Enable input parity checking. May be negated. ++ ++@item istrip ++@opindex istrip ++@cindex eight-bit input ++Clear high (8th) bit of input characters. May be negated. ++ ++@item inlcr ++@opindex inlcr ++@cindex newline, translating to return ++Translate newline to carriage return. May be negated. ++ ++@item igncr ++@opindex igncr ++@cindex return, ignoring ++Ignore carriage return. May be negated. ++ ++@item icrnl ++@opindex icrnl ++@cindex return, translating to newline ++Translate carriage return to newline. May be negated. ++ ++@item iutf8 ++@opindex iutf8 ++@cindex input encoding, UTF-8 ++Assume input characters are UTF-8 encoded. May be negated. ++ ++@item ixon ++@opindex ixon ++@kindex C-s/C-q flow control ++@cindex XON/XOFF flow control ++Enable XON/XOFF flow control (that is, @kbd{CTRL-S}/@kbd{CTRL-Q}). May ++be negated. ++ ++@item ixoff ++@itemx tandem ++@opindex ixoff ++@opindex tandem ++@cindex software flow control ++@cindex flow control, software ++Enable sending of @code{stop} character when the system input buffer ++is almost full, and @code{start} character when it becomes almost ++empty again. May be negated. ++ ++@item iuclc ++@opindex iuclc ++@cindex uppercase, translating to lowercase ++Translate uppercase characters to lowercase. Non-@acronym{POSIX}. May be ++negated. Note ilcuc is not implemented, as one would not be able to issue ++almost any (lowercase) Unix command, after invoking it. ++ ++@item ixany ++@opindex ixany ++Allow any character to restart output (only the start character ++if negated). Non-@acronym{POSIX}. May be negated. ++ ++@item imaxbel ++@opindex imaxbel ++@cindex beeping at input buffer full ++Enable beeping and not flushing input buffer if a character arrives ++when the input buffer is full. Non-@acronym{POSIX}. May be negated. ++@end table ++ ++ ++@node Output ++@subsection Output settings ++ ++@cindex output settings ++These settings control operations on data sent to the terminal. ++ ++@table @samp ++@item opost ++@opindex opost ++Postprocess output. May be negated. ++ ++@item olcuc ++@opindex olcuc ++@cindex lowercase, translating to output ++Translate lowercase characters to uppercase. Non-@acronym{POSIX}. May be ++negated. (Note ouclc is not currently implemented.) ++ ++@item ocrnl ++@opindex ocrnl ++@cindex return, translating to newline ++Translate carriage return to newline. Non-@acronym{POSIX}. May be negated. ++ ++@item onlcr ++@opindex onlcr ++@cindex newline, translating to crlf ++Translate newline to carriage return-newline. Non-@acronym{POSIX}. May be ++negated. ++ ++@item onocr ++@opindex onocr ++Do not print carriage returns in the first column. Non-@acronym{POSIX}. ++May be negated. ++ ++@item onlret ++@opindex onlret ++Newline performs a carriage return. Non-@acronym{POSIX}. May be negated. ++ ++@item ofill ++@opindex ofill ++@cindex pad instead of timing for delaying ++Use fill (padding) characters instead of timing for delays. Non-@acronym{POSIX}. ++May be negated. ++ ++@item ofdel ++@opindex ofdel ++@cindex pad character ++Use @acronym{ASCII} @sc{del} characters for fill instead of ++@acronym{ASCII} @sc{nul} characters. Non-@acronym{POSIX}. ++May be negated. ++ ++@item nl1 ++@itemx nl0 ++@opindex nl@var{n} ++Newline delay style. Non-@acronym{POSIX}. ++ ++@item cr3 ++@itemx cr2 ++@itemx cr1 ++@itemx cr0 ++@opindex cr@var{n} ++Carriage return delay style. Non-@acronym{POSIX}. ++ ++@item tab3 ++@itemx tab2 ++@itemx tab1 ++@itemx tab0 ++@opindex tab@var{n} ++Horizontal tab delay style. Non-@acronym{POSIX}. ++ ++@item bs1 ++@itemx bs0 ++@opindex bs@var{n} ++Backspace delay style. Non-@acronym{POSIX}. ++ ++@item vt1 ++@itemx vt0 ++@opindex vt@var{n} ++Vertical tab delay style. Non-@acronym{POSIX}. ++ ++@item ff1 ++@itemx ff0 ++@opindex ff@var{n} ++Form feed delay style. Non-@acronym{POSIX}. ++@end table ++ ++ ++@node Local ++@subsection Local settings ++ ++@cindex local settings ++ ++@table @samp ++@item isig ++@opindex isig ++Enable @code{interrupt}, @code{quit}, and @code{suspend} special ++characters. May be negated. ++ ++@item icanon ++@opindex icanon ++Enable @code{erase}, @code{kill}, @code{werase}, and @code{rprnt} ++special characters. May be negated. ++ ++@item iexten ++@opindex iexten ++Enable non-@acronym{POSIX} special characters. May be negated. ++ ++@item echo ++@opindex echo ++Echo input characters. May be negated. ++ ++@item echoe ++@itemx crterase ++@opindex echoe ++@opindex crterase ++Echo @code{erase} characters as backspace-space-backspace. May be ++negated. ++ ++@item echok ++@opindex echok ++@cindex newline echoing after @code{kill} ++Echo a newline after a @code{kill} character. May be negated. ++ ++@item echonl ++@opindex echonl ++@cindex newline, echoing ++Echo newline even if not echoing other characters. May be negated. ++ ++@item noflsh ++@opindex noflsh ++@cindex flushing, disabling ++Disable flushing after @code{interrupt} and @code{quit} special ++characters. May be negated. ++ ++@item xcase ++@opindex xcase ++@cindex case translation ++Enable input and output of uppercase characters by preceding their ++lowercase equivalents with @samp{\}, when @code{icanon} is set. ++Non-@acronym{POSIX}. May be negated. ++ ++@item tostop ++@opindex tostop ++@cindex background jobs, stopping at terminal write ++Stop background jobs that try to write to the terminal. Non-@acronym{POSIX}. ++May be negated. ++ ++@item echoprt ++@itemx prterase ++@opindex echoprt ++@opindex prterase ++Echo erased characters backward, between @samp{\} and @samp{/}. ++Non-@acronym{POSIX}. May be negated. ++ ++@item echoctl ++@itemx ctlecho ++@opindex echoctl ++@opindex ctlecho ++@cindex control characters, using @samp{^@var{c}} ++@cindex hat notation for control characters ++Echo control characters in hat notation (@samp{^@var{c}}) instead ++of literally. Non-@acronym{POSIX}. May be negated. ++ ++@item echoke ++@itemx crtkill ++@opindex echoke ++@opindex crtkill ++Echo the @code{kill} special character by erasing each character on ++the line as indicated by the @code{echoprt} and @code{echoe} settings, ++instead of by the @code{echoctl} and @code{echok} settings. Non-@acronym{POSIX}. ++May be negated. ++@end table ++ ++ ++@node Combination ++@subsection Combination settings ++ ++@cindex combination settings ++Combination settings: ++ ++@table @samp ++@item evenp ++@opindex evenp ++@itemx parity ++@opindex parity ++Same as @code{parenb -parodd cs7}. May be negated. If negated, same ++as @code{-parenb cs8}. ++ ++@item oddp ++@opindex oddp ++Same as @code{parenb parodd cs7}. May be negated. If negated, same ++as @code{-parenb cs8}. ++ ++@item nl ++@opindex nl ++Same as @code{-icrnl -onlcr}. May be negated. If negated, same as ++@code{icrnl -inlcr -igncr onlcr -ocrnl -onlret}. ++ ++@item ek ++@opindex ek ++Reset the @code{erase} and @code{kill} special characters to their default ++values. ++ ++@item sane ++@opindex sane ++Same as: ++ ++@c This is too long to write inline. ++@example ++cread -ignbrk brkint -inlcr -igncr icrnl -ixoff ++-iuclc -ixany imaxbel opost -olcuc -ocrnl onlcr ++-onocr -onlret -ofill -ofdel nl0 cr0 tab0 bs0 vt0 ++ff0 isig icanon iexten echo echoe echok -echonl ++-noflsh -xcase -tostop -echoprt echoctl echoke ++@end example ++ ++@noindent ++and also sets all special characters to their default values. ++ ++@item cooked ++@opindex cooked ++Same as @code{brkint ignpar istrip icrnl ixon opost isig icanon}, plus ++sets the @code{eof} and @code{eol} characters to their default values ++if they are the same as the @code{min} and @code{time} characters. ++May be negated. If negated, same as @code{raw}. ++ ++@item raw ++@opindex raw ++Same as: ++ ++@example ++-ignbrk -brkint -ignpar -parmrk -inpck -istrip ++-inlcr -igncr -icrnl -ixon -ixoff -iuclc -ixany ++-imaxbel -opost -isig -icanon -xcase min 1 time 0 ++@end example ++ ++@noindent ++May be negated. If negated, same as @code{cooked}. ++ ++@item cbreak ++@opindex cbreak ++Same as @option{-icanon}. May be negated. If negated, same as ++@code{icanon}. ++ ++@item pass8 ++@opindex pass8 ++@cindex eight-bit characters ++Same as @code{-parenb -istrip cs8}. May be negated. If negated, ++same as @code{parenb istrip cs7}. ++ ++@item litout ++@opindex litout ++Same as @option{-parenb -istrip -opost cs8}. May be negated. ++If negated, same as @code{parenb istrip opost cs7}. ++ ++@item decctlq ++@opindex decctlq ++Same as @option{-ixany}. Non-@acronym{POSIX}. May be negated. ++ ++@item tabs ++@opindex tabs ++Same as @code{tab0}. Non-@acronym{POSIX}. May be negated. If negated, same ++as @code{tab3}. ++ ++@item lcase ++@itemx LCASE ++@opindex lcase ++@opindex LCASE ++Same as @code{xcase iuclc olcuc}. Non-@acronym{POSIX}. May be negated. ++(Used for terminals with uppercase characters only.) ++ ++@item crt ++@opindex crt ++Same as @code{echoe echoctl echoke}. ++ ++@item dec ++@opindex dec ++Same as @code{echoe echoctl echoke -ixany intr ^C erase ^? kill C-u}. ++@end table ++ ++ ++@node Characters ++@subsection Special characters ++ ++@cindex special characters ++@cindex characters, special ++ ++The special characters' default values vary from system to system. ++They are set with the syntax @samp{name value}, where the names are ++listed below and the value can be given either literally, in hat ++notation (@samp{^@var{c}}), or as an integer which may start with ++@samp{0x} to indicate hexadecimal, @samp{0} to indicate octal, or ++any other digit to indicate decimal. ++ ++@cindex disabling special characters ++@kindex u@r{, and disabling special characters} ++For GNU stty, giving a value of @code{^-} or @code{undef} disables that ++special character. (This is incompatible with Ultrix @command{stty}, ++which uses a value of @samp{u} to disable a special character. GNU ++@command{stty} treats a value @samp{u} like any other, namely to set that ++special character to @key{U}.) ++ ++@table @samp ++ ++@item intr ++@opindex intr ++Send an interrupt signal. ++ ++@item quit ++@opindex quit ++Send a quit signal. ++ ++@item erase ++@opindex erase ++Erase the last character typed. ++ ++@item kill ++@opindex kill ++Erase the current line. ++ ++@item eof ++@opindex eof ++Send an end of file (terminate the input). ++ ++@item eol ++@opindex eol ++End the line. ++ ++@item eol2 ++@opindex eol2 ++Alternate character to end the line. Non-@acronym{POSIX}. ++ ++@item swtch ++@opindex swtch ++Switch to a different shell layer. Non-@acronym{POSIX}. ++ ++@item start ++@opindex start ++Restart the output after stopping it. ++ ++@item stop ++@opindex stop ++Stop the output. ++ ++@item susp ++@opindex susp ++Send a terminal stop signal. ++ ++@item dsusp ++@opindex dsusp ++Send a terminal stop signal after flushing the input. Non-@acronym{POSIX}. ++ ++@item rprnt ++@opindex rprnt ++Redraw the current line. Non-@acronym{POSIX}. ++ ++@item werase ++@opindex werase ++Erase the last word typed. Non-@acronym{POSIX}. ++ ++@item lnext ++@opindex lnext ++Enter the next character typed literally, even if it is a special ++character. Non-@acronym{POSIX}. ++@end table ++ ++ ++@node Special ++@subsection Special settings ++ ++@cindex special settings ++ ++@table @samp ++@item min @var{n} ++@opindex min ++Set the minimum number of characters that will satisfy a read until ++the time value has expired, when @option{-icanon} is set. ++ ++@item time @var{n} ++@opindex time ++Set the number of tenths of a second before reads time out if the minimum ++number of characters have not been read, when @option{-icanon} is set. ++ ++@item ispeed @var{n} ++@opindex ispeed ++Set the input speed to @var{n}. ++ ++@item ospeed @var{n} ++@opindex ospeed ++Set the output speed to @var{n}. ++ ++@item rows @var{n} ++@opindex rows ++Tell the tty kernel driver that the terminal has @var{n} rows. Non-@acronym{POSIX}. ++ ++@item cols @var{n} ++@itemx columns @var{n} ++@opindex cols ++@opindex columns ++Tell the kernel that the terminal has @var{n} columns. Non-@acronym{POSIX}. ++ ++@item size ++@opindex size ++@vindex LINES ++@vindex COLUMNS ++Print the number of rows and columns that the kernel thinks the ++terminal has. (Systems that don't support rows and columns in the kernel ++typically use the environment variables @env{LINES} and @env{COLUMNS} ++instead; however, GNU @command{stty} does not know anything about them.) ++Non-@acronym{POSIX}. ++ ++@item line @var{n} ++@opindex line ++Use line discipline @var{n}. Non-@acronym{POSIX}. ++ ++@item speed ++@opindex speed ++Print the terminal speed. ++ ++@item @var{n} ++@cindex baud rate, setting ++Set the input and output speeds to @var{n}. @var{n} can be one of: 0 ++50 75 110 134 134.5 150 200 300 600 1200 1800 2400 4800 9600 19200 ++38400 @code{exta} @code{extb}. @code{exta} is the same as 19200; ++@code{extb} is the same as 38400. Many systems, including GNU/Linux, ++support higher speeds. The @command{stty} command includes support ++for speeds of ++57600, ++115200, ++230400, ++460800, ++500000, ++576000, ++921600, ++1000000, ++1152000, ++1500000, ++2000000, ++2500000, ++3000000, ++3500000, ++or ++4000000 where the system supports these. ++0 hangs up the line if @option{-clocal} is set. ++@end table ++ ++ ++@node printenv invocation ++@section @command{printenv}: Print all or some environment variables ++ ++@pindex printenv ++@cindex printing all or some environment variables ++@cindex environment variables, printing ++ ++@command{printenv} prints environment variable values. Synopsis: ++ ++@example ++printenv [@var{option}] [@var{variable}]@dots{} ++@end example ++ ++If no @var{variable}s are specified, @command{printenv} prints the value of ++every environment variable. Otherwise, it prints the value of each ++@var{variable} that is set, and nothing for those that are not set. ++ ++The only options are a lone @option{--help} or @option{--version}. ++@xref{Common options}. ++ ++@cindex exit status of @command{printenv} ++Exit status: ++ ++@display ++0 if all variables specified were found ++1 if at least one specified variable was not found ++2 if a write error occurred ++@end display ++ ++ ++@node tty invocation ++@section @command{tty}: Print file name of terminal on standard input ++ ++@pindex tty ++@cindex print terminal file name ++@cindex terminal file name, printing ++ ++@command{tty} prints the file name of the terminal connected to its standard ++input. It prints @samp{not a tty} if standard input is not a terminal. ++Synopsis: ++ ++@example ++tty [@var{option}]@dots{} ++@end example ++ ++The program accepts the following option. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -s ++@itemx --silent ++@itemx --quiet ++@opindex -s ++@opindex --silent ++@opindex --quiet ++Print nothing; only return an exit status. ++ ++@end table ++ ++@cindex exit status of @command{tty} ++Exit status: ++ ++@display ++0 if standard input is a terminal ++1 if standard input is not a terminal ++2 if given incorrect arguments ++3 if a write error occurs ++@end display ++ ++ ++@node User information ++@chapter User information ++ ++@cindex user information, commands for ++@cindex commands for printing user information ++ ++This section describes commands that print user-related information: ++logins, groups, and so forth. ++ ++@menu ++* id invocation:: Print user identity. ++* logname invocation:: Print current login name. ++* whoami invocation:: Print effective user ID. ++* groups invocation:: Print group names a user is in. ++* users invocation:: Print login names of users currently logged in. ++* who invocation:: Print who is currently logged in. ++@end menu ++ ++ ++@node id invocation ++@section @command{id}: Print user identity ++ ++@pindex id ++@cindex real user and group IDs, printing ++@cindex effective user and group IDs, printing ++@cindex printing real and effective user and group IDs ++ ++@command{id} prints information about the given user, or the process ++running it if no user is specified. Synopsis: ++ ++@example ++id [@var{option}]@dots{} [@var{username}] ++@end example ++ ++@vindex POSIXLY_CORRECT ++By default, it prints the real user ID, real group ID, effective user ID ++if different from the real user ID, effective group ID if different from ++the real group ID, and supplemental group IDs. ++In addition, if SELinux ++is enabled and the @env{POSIXLY_CORRECT} environment variable is not set, ++then print @samp{context=@var{c}}, where @var{c} is the security context. ++ ++Each of these numeric values is preceded by an identifying string and ++followed by the corresponding user or group name in parentheses. ++ ++The options cause @command{id} to print only part of the above information. ++Also see @ref{Common options}. ++ ++@table @samp ++@item -g ++@itemx --group ++@opindex -g ++@opindex --group ++Print only the group ID. ++ ++@item -G ++@itemx --groups ++@opindex -G ++@opindex --groups ++Print only the group ID and the supplementary groups. ++ ++@item -n ++@itemx --name ++@opindex -n ++@opindex --name ++Print the user or group name instead of the ID number. Requires ++@option{-u}, @option{-g}, or @option{-G}. ++ ++@item -r ++@itemx --real ++@opindex -r ++@opindex --real ++Print the real, instead of effective, user or group ID. Requires ++@option{-u}, @option{-g}, or @option{-G}. ++ ++@item -u ++@itemx --user ++@opindex -u ++@opindex --user ++Print only the user ID. ++ ++@item -Z ++@itemx --context ++@opindex -Z ++@opindex --context ++@cindex SELinux ++@cindex security context ++Print only the security context of the current user. ++If SELinux is disabled then print a warning and ++set the exit status to 1. ++ ++@end table ++ ++@exitstatus ++ ++@macro primaryAndSupplementaryGroups{cmd,arg} ++Primary and supplementary groups for a process are normally inherited ++from its parent and are usually unchanged since login. This means ++that if you change the group database after logging in, @command{\cmd\} ++will not reflect your changes within your existing login session. ++Running @command{\cmd\} with a \arg\ causes the user and group ++database to be consulted afresh, and so will give a different result. ++@end macro ++@primaryAndSupplementaryGroups{id,user argument} ++ ++@node logname invocation ++@section @command{logname}: Print current login name ++ ++@pindex logname ++@cindex printing user's login name ++@cindex login name, printing ++@cindex user name, printing ++ ++@flindex utmp ++@command{logname} prints the calling user's name, as found in a ++system-maintained file (often @file{/var/run/utmp} or ++@file{/etc/utmp}), and exits with a status of 0. If there is no entry ++for the calling process, @command{logname} prints ++an error message and exits with a status of 1. ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. ++ ++@exitstatus ++ ++ ++@node whoami invocation ++@section @command{whoami}: Print effective user ID ++ ++@pindex whoami ++@cindex effective user ID, printing ++@cindex printing the effective user ID ++ ++@command{whoami} prints the user name associated with the current ++effective user ID. It is equivalent to the command @samp{id -un}. ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. ++ ++@exitstatus ++ ++ ++@node groups invocation ++@section @command{groups}: Print group names a user is in ++ ++@pindex groups ++@cindex printing groups a user is in ++@cindex supplementary groups, printing ++ ++@command{groups} prints the names of the primary and any supplementary ++groups for each given @var{username}, or the current process if no names ++are given. If more than one name is given, the name of each user is ++printed before ++the list of that user's groups and the user name is separated from the ++group list by a colon. Synopsis: ++ ++@example ++groups [@var{username}]@dots{} ++@end example ++ ++The group lists are equivalent to the output of the command @samp{id -Gn}. ++ ++@primaryAndSupplementaryGroups{groups,list of users} ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. ++ ++@exitstatus ++ ++ ++@node users invocation ++@section @command{users}: Print login names of users currently logged in ++ ++@pindex users ++@cindex printing current usernames ++@cindex usernames, printing current ++ ++@cindex login sessions, printing users with ++@command{users} prints on a single line a blank-separated list of user ++names of users currently logged in to the current host. Each user name ++corresponds to a login session, so if a user has more than one login ++session, that user's name will appear the same number of times in the ++output. Synopsis: ++ ++@example ++users [@var{file}] ++@end example ++ ++@flindex utmp ++@flindex wtmp ++With no @var{file} argument, @command{users} extracts its information from ++a system-maintained file (often @file{/var/run/utmp} or ++@file{/etc/utmp}). If a file argument is given, @command{users} uses ++that file instead. A common choice is @file{/var/log/wtmp}. ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. ++ ++@exitstatus ++ ++ ++@node who invocation ++@section @command{who}: Print who is currently logged in ++ ++@pindex who ++@cindex printing current user information ++@cindex information, about current users ++ ++@command{who} prints information about users who are currently logged on. ++Synopsis: ++ ++@example ++@command{who} [@var{option}] [@var{file}] [am i] ++@end example ++ ++@cindex terminal lines, currently used ++@cindex login time ++@cindex remote hostname ++If given no non-option arguments, @command{who} prints the following ++information for each user currently logged on: login name, terminal ++line, login time, and remote hostname or X display. ++ ++@flindex utmp ++@flindex wtmp ++If given one non-option argument, @command{who} uses that instead of ++a default system-maintained file (often @file{/var/run/utmp} or ++@file{/etc/utmp}) as the name of the file containing the record of ++users logged on. @file{/var/log/wtmp} is commonly given as an argument ++to @command{who} to look at who has previously logged on. ++ ++@opindex am i ++@opindex who am i ++If given two non-option arguments, @command{who} prints only the entry ++for the user running it (determined from its standard input), preceded ++by the hostname. Traditionally, the two arguments given are @samp{am ++i}, as in @samp{who am i}. ++ ++@vindex TZ ++Time stamps are listed according to the time zone rules specified by ++the @env{TZ} environment variable, or by the system default rules if ++@env{TZ} is not set. @xref{TZ Variable,, Specifying the Time Zone ++with @env{TZ}, libc, The GNU C Library Reference Manual}. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -a ++@itemx --all ++@opindex -a ++@opindex --all ++Same as @samp{-b -d --login -p -r -t -T -u}. ++ ++@item -b ++@itemx --boot ++@opindex -b ++@opindex --boot ++Print the date and time of last system boot. ++ ++@item -d ++@itemx --dead ++@opindex -d ++@opindex --dead ++Print information corresponding to dead processes. ++ ++@item -H ++@itemx --heading ++@opindex -H ++@opindex --heading ++Print a line of column headings. ++ ++@item -l ++@itemx --login ++@opindex -l ++@opindex --login ++List only the entries that correspond to processes via which the ++system is waiting for a user to login. The user name is always @samp{LOGIN}. ++ ++@itemx --lookup ++@opindex --lookup ++Attempt to canonicalize hostnames found in utmp through a DNS lookup. This ++is not the default because it can cause significant delays on systems with ++automatic dial-up internet access. ++ ++@item -m ++@opindex -m ++Same as @samp{who am i}. ++ ++@item -p ++@itemx --process ++@opindex -p ++@opindex --process ++List active processes spawned by init. ++ ++@item -q ++@itemx --count ++@opindex -q ++@opindex --count ++Print only the login names and the number of users logged on. ++Overrides all other options. ++ ++@item -r ++@itemx --runlevel ++@opindex -r ++@opindex --runlevel ++Print the current (and maybe previous) run-level of the init process. ++ ++@item -s ++@opindex -s ++Ignored; for compatibility with other versions of @command{who}. ++ ++@item -t ++@itemx --time ++@opindex -t ++@opindex --time ++Print last system clock change. ++ ++@itemx -u ++@opindex -u ++@cindex idle time ++After the login time, print the number of hours and minutes that the ++user has been idle. @samp{.} means the user was active in the last minute. ++@samp{old} means the user has been idle for more than 24 hours. ++ ++@item -w ++@itemx -T ++@itemx --mesg ++@itemx --message ++@itemx --writable ++@opindex -w ++@opindex -T ++@opindex --mesg ++@opindex --message ++@opindex --writable ++@cindex message status ++@pindex write@r{, allowed} ++After each login name print a character indicating the user's message status: ++ ++@display ++@samp{+} allowing @code{write} messages ++@samp{-} disallowing @code{write} messages ++@samp{?} cannot find terminal device ++@end display ++ ++@end table ++ ++@exitstatus ++ ++ ++@node System context ++@chapter System context ++ ++@cindex system context ++@cindex context, system ++@cindex commands for system context ++ ++This section describes commands that print or change system-wide ++information. ++ ++@menu ++* date invocation:: Print or set system date and time. ++* arch invocation:: Print machine hardware name. ++* uname invocation:: Print system information. ++* hostname invocation:: Print or set system name. ++* hostid invocation:: Print numeric host identifier. ++* uptime invocation:: Print system uptime and load. ++@end menu ++ ++@node date invocation ++@section @command{date}: Print or set system date and time ++ ++@pindex date ++@cindex time, printing or setting ++@cindex printing the current time ++ ++Synopses: ++ ++@example ++date [@var{option}]@dots{} [+@var{format}] ++date [-u|--utc|--universal] @c this avoids a newline in the output ++[ MMDDhhmm[[CC]YY][.ss] ] ++@end example ++ ++@vindex LC_TIME ++Invoking @command{date} with no @var{format} argument is equivalent to invoking ++it with a default format that depends on the @env{LC_TIME} locale category. ++In the default C locale, this format is @samp{'+%a %b %e %H:%M:%S %Z %Y'}, ++so the output looks like @samp{Thu Mar @ 3 13:47:51 PST 2005}. ++ ++@vindex TZ ++Normally, @command{date} uses the time zone rules indicated by the ++@env{TZ} environment variable, or the system default rules if @env{TZ} ++is not set. @xref{TZ Variable,, Specifying the Time Zone with ++@env{TZ}, libc, The GNU C Library Reference Manual}. ++ ++@findex strftime @r{and @command{date}} ++@cindex time formats ++@cindex formatting times ++If given an argument that starts with a @samp{+}, @command{date} prints the ++current date and time (or the date and time specified by the ++@option{--date} option, see below) in the format defined by that argument, ++which is similar to that of the @code{strftime} function. Except for ++conversion specifiers, which start with @samp{%}, characters in the ++format string are printed unchanged. The conversion specifiers are ++described below. ++ ++@exitstatus ++ ++@menu ++* Time conversion specifiers:: %[HIklMNpPrRsSTXzZ] ++* Date conversion specifiers:: %[aAbBcCdDeFgGhjmuUVwWxyY] ++* Literal conversion specifiers:: %[%nt] ++* Padding and other flags:: Pad with zeros, spaces, etc. ++* Setting the time:: Changing the system clock. ++* Options for date:: Instead of the current time. ++@detailmenu ++* Date input formats:: Specifying date strings. ++@end detailmenu ++* Examples of date:: Examples. ++@end menu ++ ++@node Time conversion specifiers ++@subsection Time conversion specifiers ++ ++@cindex time conversion specifiers ++@cindex conversion specifiers, time ++ ++@command{date} conversion specifiers related to times. ++ ++@table @samp ++@item %H ++hour (@samp{00}@dots{}@samp{23}) ++@item %I ++hour (@samp{01}@dots{}@samp{12}) ++@item %k ++hour (@samp{ 0}@dots{}@samp{23}). ++This is a @acronym{GNU} extension. ++@item %l ++hour (@samp{ 1}@dots{}@samp{12}). ++This is a @acronym{GNU} extension. ++@item %M ++minute (@samp{00}@dots{}@samp{59}) ++@item %N ++nanoseconds (@samp{000000000}@dots{}@samp{999999999}). ++This is a @acronym{GNU} extension. ++@item %p ++locale's equivalent of either @samp{AM} or @samp{PM}; ++blank in many locales. ++Noon is treated as @samp{PM} and midnight as @samp{AM}. ++@item %P ++like @samp{%p}, except lower case. ++This is a @acronym{GNU} extension. ++@item %r ++locale's 12-hour clock time (e.g., @samp{11:11:04 PM}) ++@item %R ++24-hour hour and minute. Same as @samp{%H:%M}. ++This is a @acronym{GNU} extension. ++@item %s ++@cindex epoch, seconds since ++@cindex seconds since the epoch ++@cindex beginning of time ++seconds since the epoch, i.e., since 1970-01-01 00:00:00 UTC. ++Leap seconds are not counted unless leap second support is available. ++@xref{%s-examples}, for examples. ++This is a @acronym{GNU} extension. ++@item %S ++second (@samp{00}@dots{}@samp{60}). ++This may be @samp{60} if leap seconds are supported. ++@item %T ++24-hour hour, minute, and second. Same as @samp{%H:%M:%S}. ++@item %X ++locale's time representation (e.g., @samp{23:13:48}) ++@item %z ++@w{@acronym{RFC} 2822/@acronym{ISO} 8601} style numeric time zone ++(e.g., @samp{-0600} or @samp{+0530}), or nothing if no ++time zone is determinable. This value reflects the numeric time zone ++appropriate for the current time, using the time zone rules specified ++by the @env{TZ} environment variable. ++The time (and optionally, the time zone rules) can be overridden ++by the @option{--date} option. ++This is a @acronym{GNU} extension. ++@item %:z ++@w{@acronym{RFC} 3339/@acronym{ISO} 8601} style numeric time zone with ++@samp{:} (e.g., @samp{-06:00} or @samp{+05:30}), or nothing if no time ++zone is determinable. ++This is a @acronym{GNU} extension. ++@item %::z ++Numeric time zone to the nearest second with @samp{:} (e.g., ++@samp{-06:00:00} or @samp{+05:30:00}), or nothing if no time zone is ++determinable. ++This is a @acronym{GNU} extension. ++@item %:::z ++Numeric time zone with @samp{:} using the minimum necessary precision ++(e.g., @samp{-06}, @samp{+05:30}, or @samp{-04:56:02}), or nothing if ++no time zone is determinable. ++This is a @acronym{GNU} extension. ++@item %Z ++alphabetic time zone abbreviation (e.g., @samp{EDT}), or nothing if no ++time zone is determinable. See @samp{%z} for how it is determined. ++@end table ++ ++ ++@node Date conversion specifiers ++@subsection Date conversion specifiers ++ ++@cindex date conversion specifiers ++@cindex conversion specifiers, date ++ ++@command{date} conversion specifiers related to dates. ++ ++@table @samp ++@item %a ++locale's abbreviated weekday name (e.g., @samp{Sun}) ++@item %A ++locale's full weekday name, variable length (e.g., @samp{Sunday}) ++@item %b ++locale's abbreviated month name (e.g., @samp{Jan}) ++@item %B ++locale's full month name, variable length (e.g., @samp{January}) ++@item %c ++locale's date and time (e.g., @samp{Thu Mar @ 3 23:05:25 2005}) ++@item %C ++century. This is like @samp{%Y}, except the last two digits are omitted. ++For example, it is @samp{20} if @samp{%Y} is @samp{2000}, ++and is @samp{-0} if @samp{%Y} is @samp{-001}. ++It is normally at least two characters, but it may be more. ++@item %d ++day of month (e.g., @samp{01}) ++@item %D ++date; same as @samp{%m/%d/%y} ++@item %e ++day of month, space padded; same as @samp{%_d} ++@item %F ++full date in @acronym{ISO} 8601 format; same as @samp{%Y-%m-%d}. ++This is a good choice for a date format, as it is standard and ++is easy to sort in the usual case where years are in the range ++0000@dots{}9999. ++This is a @acronym{GNU} extension. ++@item %g ++year corresponding to the @acronym{ISO} week number, but without the century ++(range @samp{00} through @samp{99}). This has the same format and value ++as @samp{%y}, except that if the @acronym{ISO} week number (see ++@samp{%V}) belongs ++to the previous or next year, that year is used instead. ++This is a @acronym{GNU} extension. ++@item %G ++year corresponding to the @acronym{ISO} week number. This has the ++same format and value as @samp{%Y}, except that if the @acronym{ISO} ++week number (see ++@samp{%V}) belongs to the previous or next year, that year is used ++instead. ++It is normally useful only if @samp{%V} is also used; ++for example, the format @samp{%G-%m-%d} is probably a mistake, ++since it combines the ISO week number year with the conventional month and day. ++This is a @acronym{GNU} extension. ++@item %h ++same as @samp{%b} ++@item %j ++day of year (@samp{001}@dots{}@samp{366}) ++@item %m ++month (@samp{01}@dots{}@samp{12}) ++@item %u ++day of week (@samp{1}@dots{}@samp{7}) with @samp{1} corresponding to Monday ++@item %U ++week number of year, with Sunday as the first day of the week ++(@samp{00}@dots{}@samp{53}). ++Days in a new year preceding the first Sunday are in week zero. ++@item %V ++@acronym{ISO} week number, that is, the ++week number of year, with Monday as the first day of the week ++(@samp{01}@dots{}@samp{53}). ++If the week containing January 1 has four or more days in ++the new year, then it is considered week 1; otherwise, it is week 53 of ++the previous year, and the next week is week 1. (See the @acronym{ISO} 8601 ++standard.) ++@item %w ++day of week (@samp{0}@dots{}@samp{6}) with 0 corresponding to Sunday ++@item %W ++week number of year, with Monday as first day of week ++(@samp{00}@dots{}@samp{53}). ++Days in a new year preceding the first Monday are in week zero. ++@item %x ++locale's date representation (e.g., @samp{12/31/99}) ++@item %y ++last two digits of year (@samp{00}@dots{}@samp{99}) ++@item %Y ++year. This is normally at least four characters, but it may be more. ++Year @samp{0000} precedes year @samp{0001}, and year @samp{-001} ++precedes year @samp{0000}. ++@end table ++ ++ ++@node Literal conversion specifiers ++@subsection Literal conversion specifiers ++ ++@cindex literal conversion specifiers ++@cindex conversion specifiers, literal ++ ++@command{date} conversion specifiers that produce literal strings. ++ ++@table @samp ++@item %% ++a literal % ++@item %n ++a newline ++@item %t ++a horizontal tab ++@end table ++ ++ ++@node Padding and other flags ++@subsection Padding and other flags ++ ++@cindex numeric field padding ++@cindex padding of numeric fields ++@cindex fields, padding numeric ++ ++Unless otherwise specified, @command{date} normally pads numeric fields ++with zeros, so that, for ++example, numeric months are always output as two digits. ++Seconds since the epoch are not padded, though, ++since there is no natural width for them. ++ ++As a @acronym{GNU} extension, @command{date} recognizes any of the ++following optional flags after the @samp{%}: ++ ++@table @samp ++@item - ++(hyphen) Do not pad the field; useful if the output is intended for ++human consumption. ++@item _ ++(underscore) Pad with spaces; useful if you need a fixed ++number of characters in the output, but zeros are too distracting. ++@item 0 ++(zero) Pad with zeros even if the conversion specifier ++would normally pad with spaces. ++@item ^ ++Use upper case characters if possible. ++@item # ++Use opposite case characters if possible. ++A field that is normally upper case becomes lower case, and vice versa. ++@end table ++ ++@noindent ++Here are some examples of padding: ++ ++@example ++date +%d/%m -d "Feb 1" ++@result{} 01/02 ++date +%-d/%-m -d "Feb 1" ++@result{} 1/2 ++date +%_d/%_m -d "Feb 1" ++@result{} 1/ 2 ++@end example ++ ++As a @acronym{GNU} extension, you can specify the field width ++(after any flag, if present) as a decimal number. If the natural size of the ++output of the field has less than the specified number of characters, ++the result is written right adjusted and padded to the given ++size. For example, @samp{%9B} prints the right adjusted month name in ++a field of width 9. ++ ++An optional modifier can follow the optional flag and width ++specification. The modifiers are: ++ ++@table @samp ++@item E ++Use the locale's alternate representation for date and time. This ++modifier applies to the @samp{%c}, @samp{%C}, @samp{%x}, @samp{%X}, ++@samp{%y} and @samp{%Y} conversion specifiers. In a Japanese locale, for ++example, @samp{%Ex} might yield a date format based on the Japanese ++Emperors' reigns. ++ ++@item O ++Use the locale's alternate numeric symbols for numbers. This modifier ++applies only to numeric conversion specifiers. ++@end table ++ ++If the format supports the modifier but no alternate representation ++is available, it is ignored. ++ ++ ++@node Setting the time ++@subsection Setting the time ++ ++@cindex setting the time ++@cindex time setting ++@cindex appropriate privileges ++ ++If given an argument that does not start with @samp{+}, @command{date} sets ++the system clock to the date and time specified by that argument (as ++described below). You must have appropriate privileges to set the ++system clock. The @option{--date} and @option{--set} options may not be ++used with such an argument. The @option{--universal} option may be used ++with such an argument to indicate that the specified date and time are ++relative to Coordinated Universal Time rather than to the local time ++zone. ++ ++The argument must consist entirely of digits, which have the following ++meaning: ++ ++@table @samp ++@item MM ++month ++@item DD ++day within month ++@item hh ++hour ++@item mm ++minute ++@item CC ++first two digits of year (optional) ++@item YY ++last two digits of year (optional) ++@item ss ++second (optional) ++@end table ++ ++The @option{--set} option also sets the system clock; see the next section. ++ ++ ++@node Options for date ++@subsection Options for @command{date} ++ ++@cindex @command{date} options ++@cindex options for @command{date} ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -d @var{datestr} ++@itemx --date=@var{datestr} ++@opindex -d ++@opindex --date ++@cindex parsing date strings ++@cindex date strings, parsing ++@cindex arbitrary date strings, parsing ++@opindex yesterday ++@opindex tomorrow ++@opindex next @var{day} ++@opindex last @var{day} ++Display the date and time specified in @var{datestr} instead of the ++current date and time. @var{datestr} can be in almost any common ++format. It can contain month names, time zones, @samp{am} and @samp{pm}, ++@samp{yesterday}, etc. For example, @option{--date="2004-02-27 ++14:19:13.489392193 +0530"} specifies the instant of time that is ++489,392,193 nanoseconds after February 27, 2004 at 2:19:13 PM in a ++time zone that is 5 hours and 30 minutes east of @acronym{UTC}.@* ++Note: input currently must be in locale independent format. E.g., the ++LC_TIME=C below is needed to print back the correct date in many locales: ++@example ++date -d "$(LC_TIME=C date)" ++@end example ++@xref{Date input formats}. ++ ++@item -f @var{datefile} ++@itemx --file=@var{datefile} ++@opindex -f ++@opindex --file ++Parse each line in @var{datefile} as with @option{-d} and display the ++resulting date and time. If @var{datefile} is @samp{-}, use standard ++input. This is useful when you have many dates to process, because the ++system overhead of starting up the @command{date} executable many times can ++be considerable. ++ ++@item -r @var{file} ++@itemx --reference=@var{file} ++@opindex -r ++@opindex --reference ++Display the date and time of the last modification of @var{file}, ++instead of the current date and time. ++ ++@item -R ++@itemx --rfc-822 ++@itemx --rfc-2822 ++@opindex -R ++@opindex --rfc-822 ++@opindex --rfc-2822 ++Display the date and time using the format @samp{%a, %d %b %Y %H:%M:%S ++%z}, evaluated in the C locale so abbreviations are always in English. ++For example: ++ ++@example ++Fri, 09 Sep 2005 13:51:39 -0700 ++@end example ++ ++This format conforms to ++@uref{ftp://ftp.rfc-editor.org/in-notes/rfc2822.txt, Internet ++@acronym{RFCs} 2822} and ++@uref{ftp://ftp.rfc-editor.org/in-notes/rfc822.txt, 822}, the ++current and previous standards for Internet email. ++ ++@item --rfc-3339=@var{timespec} ++@opindex --rfc-3339=@var{timespec} ++Display the date using a format specified by ++@uref{ftp://ftp.rfc-editor.org/in-notes/rfc3339.txt, Internet ++@acronym{RFC} 3339}. This is a subset of the @acronym{ISO} 8601 ++format, except that it also permits applications to use a space rather ++than a @samp{T} to separate dates from times. Unlike the other ++standard formats, @acronym{RFC} 3339 format is always suitable as ++input for the @option{--date} (@option{-d}) and @option{--file} ++(@option{-f}) options, regardless of the current locale. ++ ++The argument @var{timespec} specifies how much of the time to include. ++It can be one of the following: ++ ++@table @samp ++@item date ++Print just the full-date, e.g., @samp{2005-09-14}. ++This is equivalent to the format @samp{%Y-%m-%d}. ++ ++@item seconds ++Print the full-date and full-time separated by a space, e.g., ++@samp{2005-09-14 00:56:06+05:30}. The output ends with a numeric ++time-offset; here the @samp{+05:30} means that local time is five ++hours and thirty minutes east of @acronym{UTC}. This is equivalent to ++the format @samp{%Y-%m-%d %H:%M:%S%:z}. ++ ++@item ns ++Like @samp{seconds}, but also print nanoseconds, e.g., ++@samp{2005-09-14 00:56:06.998458565+05:30}. ++This is equivalent to the format @samp{%Y-%m-%d %H:%M:%S.%N%:z}. ++ ++@end table ++ ++@item -s @var{datestr} ++@itemx --set=@var{datestr} ++@opindex -s ++@opindex --set ++Set the date and time to @var{datestr}. See @option{-d} above. ++ ++@item -u ++@itemx --utc ++@itemx --universal ++@opindex -u ++@opindex --utc ++@opindex --universal ++@cindex Coordinated Universal Time ++@cindex UTC ++@cindex Greenwich Mean Time ++@cindex GMT ++@vindex TZ ++Use Coordinated Universal Time (@acronym{UTC}) by operating as if the ++@env{TZ} environment variable were set to the string @samp{UTC0}. ++Coordinated ++Universal Time is often called ``Greenwich Mean Time'' (@sc{gmt}) for ++historical reasons. ++@end table ++ ++ ++@node Examples of date ++@subsection Examples of @command{date} ++ ++@cindex examples of @command{date} ++ ++Here are a few examples. Also see the documentation for the @option{-d} ++option in the previous section. ++ ++@itemize @bullet ++ ++@item ++To print the date of the day before yesterday: ++ ++@example ++date --date='2 days ago' ++@end example ++ ++@item ++To print the date of the day three months and one day hence: ++ ++@example ++date --date='3 months 1 day' ++@end example ++ ++@item ++To print the day of year of Christmas in the current year: ++ ++@example ++date --date='25 Dec' +%j ++@end example ++ ++@item ++To print the current full month name and the day of the month: ++ ++@example ++date '+%B %d' ++@end example ++ ++But this may not be what you want because for the first nine days of ++the month, the @samp{%d} expands to a zero-padded two-digit field, ++for example @samp{date -d 1may '+%B %d'} will print @samp{May 01}. ++ ++@item ++To print a date without the leading zero for one-digit days ++of the month, you can use the (@acronym{GNU} extension) ++@samp{-} flag to suppress ++the padding altogether: ++ ++@example ++date -d 1may '+%B %-d ++@end example ++ ++@item ++To print the current date and time in the format required by many ++non-@acronym{GNU} versions of @command{date} when setting the system clock: ++ ++@example ++date +%m%d%H%M%Y.%S ++@end example ++ ++@item ++To set the system clock forward by two minutes: ++ ++@example ++date --set='+2 minutes' ++@end example ++ ++@item ++To print the date in @acronym{RFC} 2822 format, ++use @samp{date --rfc-2822}. Here is some example output: ++ ++@example ++Fri, 09 Sep 2005 13:51:39 -0700 ++@end example ++ ++@anchor{%s-examples} ++@item ++To convert a date string to the number of seconds since the epoch ++(which is 1970-01-01 00:00:00 UTC), use the @option{--date} option with ++the @samp{%s} format. That can be useful in sorting and/or graphing ++and/or comparing data by date. The following command outputs the ++number of the seconds since the epoch for the time two minutes after the ++epoch: ++ ++@example ++date --date='1970-01-01 00:02:00 +0000' +%s ++120 ++@end example ++ ++If you do not specify time zone information in the date string, ++@command{date} uses your computer's idea of the time zone when ++interpreting the string. For example, if your computer's time zone is ++that of Cambridge, Massachusetts, which was then 5 hours (i.e., 18,000 ++seconds) behind UTC: ++ ++@example ++# local time zone used ++date --date='1970-01-01 00:02:00' +%s ++18120 ++@end example ++ ++@item ++If you're sorting or graphing dated data, your raw date values may be ++represented as seconds since the epoch. But few people can look at ++the date @samp{946684800} and casually note ``Oh, that's the first second ++of the year 2000 in Greenwich, England.'' ++ ++@example ++date --date='2000-01-01 UTC' +%s ++946684800 ++@end example ++ ++An alternative is to use the @option{--utc} (@option{-u}) option. ++Then you may omit @samp{UTC} from the date string. Although this ++produces the same result for @samp{%s} and many other format sequences, ++with a time zone offset different from zero, it would give a different ++result for zone-dependent formats like @samp{%z}. ++ ++@example ++date -u --date=2000-01-01 +%s ++946684800 ++@end example ++ ++To convert such an unwieldy number of seconds back to ++a more readable form, use a command like this: ++ ++@smallexample ++# local time zone used ++date -d '1970-01-01 UTC 946684800 seconds' +"%Y-%m-%d %T %z" ++1999-12-31 19:00:00 -0500 ++@end smallexample ++ ++Or if you do not mind depending on the @samp{@@} feature present since ++coreutils 5.3.0, you could shorten this to: ++ ++@smallexample ++date -d @@946684800 +"%F %T %z" ++1999-12-31 19:00:00 -0500 ++@end smallexample ++ ++Often it is better to output UTC-relative date and time: ++ ++@smallexample ++date -u -d '1970-01-01 946684800 seconds' +"%Y-%m-%d %T %z" ++2000-01-01 00:00:00 +0000 ++@end smallexample ++ ++@end itemize ++ ++ ++@node arch invocation ++@section @command{arch}: Print machine hardware name ++ ++@pindex arch ++@cindex print machine hardware name ++@cindex system information, printing ++ ++@command{arch} prints the machine hardware name, ++and is equivalent to @samp{uname -m}. ++Synopsis: ++ ++@example ++arch [@var{option}] ++@end example ++ ++The program accepts the @ref{Common options} only. ++ ++@exitstatus ++ ++ ++@node uname invocation ++@section @command{uname}: Print system information ++ ++@pindex uname ++@cindex print system information ++@cindex system information, printing ++ ++@command{uname} prints information about the machine and operating system ++it is run on. If no options are given, @command{uname} acts as if the ++@option{-s} option were given. Synopsis: ++ ++@example ++uname [@var{option}]@dots{} ++@end example ++ ++If multiple options or @option{-a} are given, the selected information is ++printed in this order: ++ ++@example ++@var{kernel-name} @var{nodename} @var{kernel-release} @var{kernel-version} ++@var{machine} @var{processor} @var{hardware-platform} @var{operating-system} ++@end example ++ ++The information may contain internal spaces, so such output cannot be ++parsed reliably. In the following example, @var{release} is ++@samp{2.2.18ss.e820-bda652a #4 SMP Tue Jun 5 11:24:08 PDT 2001}: ++ ++@smallexample ++uname -a ++@result{} Linux dum 2.2.18 #4 SMP Tue Jun 5 11:24:08 PDT 2001 i686 unknown unknown GNU/Linux ++@end smallexample ++ ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -a ++@itemx --all ++@opindex -a ++@opindex --all ++Print all of the below information, except omit the processor type ++and the hardware platform name if they are unknown. ++ ++@item -i ++@itemx --hardware-platform ++@opindex -i ++@opindex --hardware-platform ++@cindex implementation, hardware ++@cindex hardware platform ++@cindex platform, hardware ++Print the hardware platform name ++(sometimes called the hardware implementation). ++Print @samp{unknown} if the kernel does not make this information ++easily available, as is the case with Linux kernels. ++ ++@item -m ++@itemx --machine ++@opindex -m ++@opindex --machine ++@cindex machine type ++@cindex hardware class ++@cindex hardware type ++Print the machine hardware name (sometimes called the hardware class ++or hardware type). ++ ++@item -n ++@itemx --nodename ++@opindex -n ++@opindex --nodename ++@cindex hostname ++@cindex node name ++@cindex network node name ++Print the network node hostname. ++ ++@item -p ++@itemx --processor ++@opindex -p ++@opindex --processor ++@cindex host processor type ++Print the processor type (sometimes called the instruction set ++architecture or ISA). ++Print @samp{unknown} if the kernel does not make this information ++easily available, as is the case with Linux kernels. ++ ++@item -o ++@itemx --operating-system ++@opindex -o ++@opindex --operating-system ++@cindex operating system name ++Print the name of the operating system. ++ ++@item -r ++@itemx --kernel-release ++@opindex -r ++@opindex --kernel-release ++@cindex kernel release ++@cindex release of kernel ++Print the kernel release. ++ ++@item -s ++@itemx --kernel-name ++@opindex -s ++@opindex --kernel-name ++@cindex kernel name ++@cindex name of kernel ++Print the kernel name. ++@acronym{POSIX} 1003.1-2001 (@pxref{Standards conformance}) calls this ++``the implementation of the operating system'', because the ++@acronym{POSIX} specification itself has no notion of ``kernel''. ++The kernel name might be the same as the operating system name printed ++by the @option{-o} or @option{--operating-system} option, but it might ++differ. Some operating systems (e.g., FreeBSD, HP-UX) have the same ++name as their underlying kernels; others (e.g., GNU/Linux, Solaris) ++do not. ++ ++@item -v ++@itemx --kernel-version ++@opindex -v ++@opindex --kernel-version ++@cindex kernel version ++@cindex version of kernel ++Print the kernel version. ++ ++@end table ++ ++@exitstatus ++ ++ ++@node hostname invocation ++@section @command{hostname}: Print or set system name ++ ++@pindex hostname ++@cindex setting the hostname ++@cindex printing the hostname ++@cindex system name, printing ++@cindex appropriate privileges ++ ++With no arguments, @command{hostname} prints the name of the current host ++system. With one argument, it sets the current host name to the ++specified string. You must have appropriate privileges to set the host ++name. Synopsis: ++ ++@example ++hostname [@var{name}] ++@end example ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. ++ ++@exitstatus ++ ++ ++@node hostid invocation ++@section @command{hostid}: Print numeric host identifier ++ ++@pindex hostid ++@cindex printing the host identifier ++ ++@command{hostid} prints the numeric identifier of the current host ++in hexadecimal. This command accepts no arguments. ++The only options are @option{--help} and @option{--version}. ++@xref{Common options}. ++ ++For example, here's what it prints on one system I use: ++ ++@example ++$ hostid ++1bac013d ++@end example ++ ++On that system, the 32-bit quantity happens to be closely ++related to the system's Internet address, but that isn't always ++the case. ++ ++@exitstatus ++ ++@node uptime invocation ++@section @command{uptime}: Print system uptime and load ++ ++@pindex uptime ++@cindex printing the system uptime and load ++ ++@command{uptime} prints the current time, the system's uptime, the ++number of logged-in users and the current load average. ++ ++If an argument is specified, it is used as the file to be read ++to discover how many users are logged in. If no argument is ++specified, a system default is used (@command{uptime --help} indicates ++the default setting). ++ ++The only options are @option{--help} and @option{--version}. ++@xref{Common options}. ++ ++For example, here's what it prints right now on one system I use: ++ ++@example ++$ uptime ++ 14:07 up 3:35, 3 users, load average: 1.39, 1.15, 1.04 ++@end example ++ ++The precise method of calculation of load average varies somewhat ++between systems. Some systems calculate it as the average number of ++runnable processes over the last 1, 5 and 15 minutes, but some systems ++also include processes in the uninterruptible sleep state (that is, ++those processes which are waiting for disk I/O). The Linux kernel ++includes uninterruptible processes. ++ ++@node SELinux context ++@chapter SELinux context ++ ++@cindex SELinux context ++@cindex SELinux, context ++@cindex commands for SELinux context ++ ++This section describes commands for operations with SELinux ++contexts. ++ ++@menu ++* chcon invocation:: Change SELinux context of file ++* runcon invocation:: Run a command in specified SELinux context ++@end menu ++ ++@node chcon invocation ++@section @command{chcon}: Change SELinux context of file ++ ++@pindex chcon ++@cindex changing security context ++@cindex change SELinux context ++ ++@command{chcon} changes the SELinux security context of the selected files. ++Synopses: ++ ++@smallexample ++chcon [@var{option}]@dots{} @var{context} @var{file}@dots{} ++chcon [@var{option}]@dots{} [-u @var{user}] [-r @var{role}] [-l @var{range}] [-t @var{type}] @var{file}@dots{} ++chcon [@var{option}]@dots{} --reference=@var{rfile} @var{file}@dots{} ++@end smallexample ++ ++Change the SELinux security context of each @var{file} to @var{context}. ++With @option{--reference}, change the security context of each @var{file} ++to that of @var{rfile}. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -h ++@itemx --no-dereference ++@opindex -h ++@opindex --no-dereference ++@cindex no dereference ++Affect symbolic links instead of any referenced file. ++ ++@item --reference=@var{rfile} ++@opindex --reference ++@cindex reference file ++Use @var{rfile}'s security context rather than specifying a @var{context} value. ++ ++@item -R ++@itemx --recursive ++@opindex -R ++@opindex --recursive ++Operate on files and directories recursively. ++ ++@choptH ++@xref{Traversing symlinks}. ++ ++@choptL ++@xref{Traversing symlinks}. ++ ++@choptP ++@xref{Traversing symlinks}. ++ ++@item -v ++@itemx --verbose ++@opindex -v ++@opindex --verbose ++@cindex diagnostic ++Output a diagnostic for every file processed. ++ ++@item -u @var{user} ++@itemx --user=@var{user} ++@opindex -u ++@opindex --user ++Set user @var{user} in the target security context. ++ ++@item -r @var{role} ++@itemx --role=@var{role} ++@opindex -r ++@opindex --role ++Set role @var{role} in the target security context. ++ ++@item -t @var{type} ++@itemx --type=@var{type} ++@opindex -t ++@opindex --type ++Set type @var{type} in the target security context. ++ ++@item -l @var{range} ++@itemx --range=@var{range} ++@opindex -l ++@opindex --range ++Set range @var{range} in the target security context. ++ ++@end table ++ ++@exitstatus ++ ++@node runcon invocation ++@section @command{runcon}: Run a command in specified SELinux context ++ ++@pindex runcon ++@cindex run with security context ++ ++ ++@command{runcon} runs file in specified SELinux security context. ++ ++Synopses: ++@smallexample ++runcon @var{context} @var{command} [@var{args}] ++runcon [ -c ] [-u @var{user}] [-r @var{role}] [-t @var{type}] [-l @var{range}] @var{command} [@var{args}] ++@end smallexample ++ ++Run @var{command} with completely-specified @var{context}, or with ++current or transitioned security context modified by one or more of @var{level}, ++@var{role}, @var{type} and @var{user}. ++ ++If none of @option{-c}, @option{-t}, @option{-u}, @option{-r}, or @option{-l} ++is specified, the first argument is used as the complete context. ++Any additional arguments after @var{command} ++are interpreted as arguments to the command. ++ ++With neither @var{context} nor @var{command}, print the current security context. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -c ++@itemx --compute ++@opindex -c ++@opindex --compute ++Compute process transition context before modifying. ++ ++@item -u @var{user} ++@itemx --user=@var{user} ++@opindex -u ++@opindex --user ++Set user @var{user} in the target security context. ++ ++@item -r @var{role} ++@itemx --role=@var{role} ++@opindex -r ++@opindex --role ++Set role @var{role} in the target security context. ++ ++@item -t @var{type} ++@itemx --type=@var{type} ++@opindex -t ++@opindex --type ++Set type @var{type} in the target security context. ++ ++@item -l @var{range} ++@itemx --range=@var{range} ++@opindex -l ++@opindex --range ++Set range @var{range} in the target security context. ++ ++@end table ++ ++@cindex exit status of @command{runcon} ++Exit status: ++ ++@display ++126 if @var{command} is found but cannot be invoked ++127 if @command{runcon} itself fails or if @var{command} cannot be found ++the exit status of @var{command} otherwise ++@end display ++ ++@node Modified command invocation ++@chapter Modified command invocation ++ ++@cindex modified command invocation ++@cindex invocation of commands, modified ++@cindex commands for invoking other commands ++ ++This section describes commands that run other commands in some context ++different than the current one: a modified environment, as a different ++user, etc. ++ ++@menu ++* chroot invocation:: Modify the root directory. ++* env invocation:: Modify environment variables. ++* nice invocation:: Modify niceness. ++* nohup invocation:: Immunize to hangups. ++* stdbuf invocation:: Modify buffering of standard streams. ++* su invocation:: Modify user and group ID. ++* timeout invocation:: Run with time limit. ++@end menu ++ ++ ++@node chroot invocation ++@section @command{chroot}: Run a command with a different root directory ++ ++@pindex chroot ++@cindex running a program in a specified root directory ++@cindex root directory, running a program in a specified ++ ++@command{chroot} runs a command with a specified root directory. ++On many systems, only the super-user can do this.@footnote{However, ++some systems (e.g., FreeBSD) can be configured to allow certain regular ++users to use the @code{chroot} system call, and hence to run this program. ++Also, on Cygwin, anyone can run the @command{chroot} command, because the ++underlying function is non-privileged due to lack of support in MS-Windows.} ++Synopses: ++ ++@example ++chroot @var{option} @var{newroot} [@var{command} [@var{args}]@dots{}] ++chroot @var{option} ++@end example ++ ++Ordinarily, file names are looked up starting at the root of the ++directory structure, i.e., @file{/}. @command{chroot} changes the root to ++the directory @var{newroot} (which must exist) and then runs ++@var{command} with optional @var{args}. If @var{command} is not ++specified, the default is the value of the @env{SHELL} environment ++variable or @command{/bin/sh} if not set, invoked with the @option{-i} option. ++@var{command} must not be a special built-in utility ++(@pxref{Special built-in utilities}). ++ ++The program accepts the following options. Also see @ref{Common options}. ++Options must precede operands. ++ ++@table @samp ++ ++@itemx --userspec=@var{user}[:@var{group}] ++@opindex --userspec ++By default, @var{command} is run with the same credentials ++as the invoking process. ++Use this option to run it as a different @var{user} and/or with a ++different primary @var{group}. ++ ++@itemx --groups=@var{groups} ++@opindex --groups ++Use this option to specify the supplementary @var{groups} to be ++used by the new process. ++The items in the list (names or numeric IDs) must be separated by commas. ++ ++@end table ++ ++Here are a few tips to help avoid common problems in using chroot. ++To start with a simple example, make @var{command} refer to a statically ++linked binary. If you were to use a dynamically linked executable, then ++you'd have to arrange to have the shared libraries in the right place under ++your new root directory. ++ ++For example, if you create a statically linked @command{ls} executable, ++and put it in @file{/tmp/empty}, you can run this command as root: ++ ++@example ++$ chroot /tmp/empty /ls -Rl / ++@end example ++ ++Then you'll see output like this: ++ ++@example ++/: ++total 1023 ++-rwxr-xr-x 1 0 0 1041745 Aug 16 11:17 ls ++@end example ++ ++If you want to use a dynamically linked executable, say @command{bash}, ++then first run @samp{ldd bash} to see what shared objects it needs. ++Then, in addition to copying the actual binary, also copy the listed ++files to the required positions under your intended new root directory. ++Finally, if the executable requires any other files (e.g., data, state, ++device files), copy them into place, too. ++ ++@cindex exit status of @command{chroot} ++Exit status: ++ ++@display ++1 if @command{chroot} itself fails ++126 if @var{command} is found but cannot be invoked ++127 if @var{command} cannot be found ++the exit status of @var{command} otherwise ++@end display ++ ++ ++@node env invocation ++@section @command{env}: Run a command in a modified environment ++ ++@pindex env ++@cindex environment, running a program in a modified ++@cindex modified environment, running a program in a ++@cindex running a program in a modified environment ++ ++@command{env} runs a command with a modified environment. Synopses: ++ ++@example ++env [@var{option}]@dots{} [@var{name}=@var{value}]@dots{} @c ++[@var{command} [@var{args}]@dots{}] ++env ++@end example ++ ++Operands of the form @samp{@var{variable}=@var{value}} set ++the environment variable @var{variable} to value @var{value}. ++@var{value} may be empty (@samp{@var{variable}=}). Setting a variable ++to an empty value is different from unsetting it. ++These operands are evaluated left-to-right, so if two operands ++mention the same variable the earlier is ignored. ++ ++Environment variable names can be empty, and can contain any ++characters other than @samp{=} and @acronym{ASCII} @sc{nul}. ++However, it is wise to limit yourself to names that ++consist solely of underscores, digits, and @acronym{ASCII} letters, ++and that begin with a non-digit, as applications like the shell do not ++work well with other names. ++ ++@vindex PATH ++The first operand that does not contain the character @samp{=} ++specifies the program to invoke; it is ++searched for according to the @env{PATH} environment variable. Any ++remaining arguments are passed as arguments to that program. ++The program should not be a special built-in utility ++(@pxref{Special built-in utilities}). ++ ++@cindex environment, printing ++ ++If no command name is specified following the environment ++specifications, the resulting environment is printed. This is like ++specifying the @command{printenv} program. ++ ++The program accepts the following options. Also see @ref{Common options}. ++Options must precede operands. ++ ++@table @samp ++ ++@item -u @var{name} ++@itemx --unset=@var{name} ++@opindex -u ++@opindex --unset ++Remove variable @var{name} from the environment, if it was in the ++environment. ++ ++@item - ++@itemx -i ++@itemx --ignore-environment ++@opindex - ++@opindex -i ++@opindex --ignore-environment ++Start with an empty environment, ignoring the inherited environment. ++ ++@end table ++ ++@cindex exit status of @command{env} ++Exit status: ++ ++@display ++0 if no @var{command} is specified and the environment is output ++1 if @command{env} itself fails ++126 if @var{command} is found but cannot be invoked ++127 if @var{command} cannot be found ++the exit status of @var{command} otherwise ++@end display ++ ++ ++@node nice invocation ++@section @command{nice}: Run a command with modified niceness ++ ++@pindex nice ++@cindex niceness ++@cindex scheduling, affecting ++@cindex appropriate privileges ++ ++@command{nice} prints or modifies a process's @dfn{niceness}, ++a parameter that affects whether the process is scheduled favorably. ++Synopsis: ++ ++@example ++nice [@var{option}]@dots{} [@var{command} [@var{arg}]@dots{}] ++@end example ++ ++If no arguments are given, @command{nice} prints the current niceness. ++Otherwise, @command{nice} runs the given @var{command} with its ++niceness adjusted. By default, its niceness is incremented by 10. ++ ++Niceness values range at least from @minus{}20 (process has high priority ++and gets more resources, thus slowing down other processes) through 19 ++(process has lower priority and runs slowly itself, but has less impact ++on the speed of other running processes). Some systems ++may have a wider range of nicenesses; conversely, other systems may ++enforce more restrictive limits. An attempt to set the niceness ++outside the supported range is treated as an attempt to use the ++minimum or maximum supported value. ++ ++A niceness should not be confused with a scheduling priority, which ++lets applications determine the order in which threads are scheduled ++to run. Unlike a priority, a niceness is merely advice to the ++scheduler, which the scheduler is free to ignore. Also, as a point of ++terminology, @acronym{POSIX} defines the behavior of @command{nice} in ++terms of a @dfn{nice value}, which is the nonnegative difference ++between a niceness and the minimum niceness. Though @command{nice} ++conforms to @acronym{POSIX}, its documentation and diagnostics use the ++term ``niceness'' for compatibility with historical practice. ++ ++@var{command} must not be a special built-in utility (@pxref{Special ++built-in utilities}). ++ ++@mayConflictWithShellBuiltIn{nice} ++ ++The program accepts the following option. Also see @ref{Common options}. ++Options must precede operands. ++ ++@table @samp ++@item -n @var{adjustment} ++@itemx --adjustment=@var{adjustment} ++@opindex -n ++@opindex --adjustment ++Add @var{adjustment} instead of 10 to the command's niceness. If ++@var{adjustment} is negative and you lack appropriate privileges, ++@command{nice} issues a warning but otherwise acts as if you specified ++a zero adjustment. ++ ++For compatibility @command{nice} also supports an obsolete ++option syntax @option{-@var{adjustment}}. New scripts should use ++@option{-n @var{adjustment}} instead. ++ ++@end table ++ ++@cindex exit status of @command{nice} ++Exit status: ++ ++@display ++0 if no @var{command} is specified and the niceness is output ++1 if @command{nice} itself fails ++126 if @var{command} is found but cannot be invoked ++127 if @var{command} cannot be found ++the exit status of @var{command} otherwise ++@end display ++ ++It is sometimes useful to run a non-interactive program with reduced niceness. ++ ++@example ++$ nice factor 4611686018427387903 ++@end example ++ ++Since @command{nice} prints the current niceness, ++you can invoke it through itself to demonstrate how it works. ++ ++The default behavior is to increase the niceness by @samp{10}: ++ ++@example ++$ nice ++0 ++$ nice nice ++10 ++$ nice -n 10 nice ++10 ++@end example ++ ++The @var{adjustment} is relative to the current niceness. In the ++next example, the first @command{nice} invocation runs the second one ++with niceness 10, and it in turn runs the final one with a niceness ++that is 3 more: ++ ++@example ++$ nice nice -n 3 nice ++13 ++@end example ++ ++Specifying a niceness larger than the supported range ++is the same as specifying the maximum supported value: ++ ++@example ++$ nice -n 10000000000 nice ++19 ++@end example ++ ++Only a privileged user may run a process with lower niceness: ++ ++@example ++$ nice -n -1 nice ++nice: cannot set niceness: Permission denied ++0 ++$ sudo nice -n -1 nice ++-1 ++@end example ++ ++ ++@node nohup invocation ++@section @command{nohup}: Run a command immune to hangups ++ ++@pindex nohup ++@cindex hangups, immunity to ++@cindex immunity to hangups ++@cindex logging out and continuing to run ++ ++@flindex nohup.out ++@command{nohup} runs the given @var{command} with hangup signals ignored, ++so that the command can continue running in the background after you log ++out. Synopsis: ++ ++@example ++nohup @var{command} [@var{arg}]@dots{} ++@end example ++ ++If standard input is a terminal, it is redirected from ++@file{/dev/null} so that terminal sessions do not mistakenly consider ++the terminal to be used by the command. This is a @acronym{GNU} ++extension; programs intended to be portable to non-@acronym{GNU} hosts ++should use @samp{nohup @var{command} [@var{arg}]@dots{} make.log ++@end example ++ ++@command{nohup} does not automatically put the command it runs in the ++background; you must do that explicitly, by ending the command line ++with an @samp{&}. Also, @command{nohup} does not alter the ++niceness of @var{command}; use @command{nice} for that, ++e.g., @samp{nohup nice @var{command}}. ++ ++@var{command} must not be a special built-in utility (@pxref{Special ++built-in utilities}). ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. Options must precede operands. ++ ++@cindex exit status of @command{nohup} ++Exit status: ++ ++@display ++126 if @var{command} is found but cannot be invoked ++127 if @command{nohup} itself fails or if @var{command} cannot be found ++the exit status of @var{command} otherwise ++@end display ++ ++ ++@node stdbuf invocation ++@section @command{stdbuf}: Run a command with modified I/O stream buffering ++ ++@pindex stdbuf ++@cindex standard streams, buffering ++@cindex line buffered ++ ++@command{stdbuf} allows one to modify the buffering operations of the ++three standard I/O streams associated with a program. Synopsis: ++ ++@example ++stdbuf @var{option}@dots{} @var{command} ++@end example ++ ++Any additional @var{arg}s are passed as additional arguments to the ++@var{command}. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++ ++@item -i @var{mode} ++@itemx --input=@var{mode} ++@opindex -i ++@opindex --input ++Adjust the standard input stream buffering. ++ ++@item -o @var{mode} ++@itemx --output=@var{mode} ++@opindex -o ++@opindex --output ++Adjust the standard output stream buffering. ++ ++@item -e @var{mode} ++@itemx --error=@var{mode} ++@opindex -e ++@opindex --error ++Adjust the standard error stream buffering. ++ ++@end table ++ ++The @var{mode} can be specified as follows: ++ ++@table @samp ++ ++@item L ++Set the stream to line buffered mode. ++In this mode data is coalesced until a newline is output or ++input is read from any stream attached to a terminal device. ++This option is invalid with standard input. ++ ++@item 0 ++Disable buffering of the selected stream. ++In this mode data is output immediately and only the ++amount of data requested is read from input. ++ ++@item @var{size} ++Specify the size of the buffer to use in fully buffered mode. ++@multiplierSuffixesNoBlocks{size} ++ ++@end table ++ ++NOTE: If @var{command} adjusts the buffering of its standard streams ++(@command{tee} does for e.g.) then that will override corresponding settings ++changed by @command{stdbuf}. Also some filters (like @command{dd} and ++@command{cat} etc.) don't use streams for I/O, and are thus unaffected ++by @command{stdbuf} settings. ++ ++@cindex exit status of @command{stdbuf} ++Exit status: ++ ++@display ++125 if @command{stdbuf} itself fails ++126 if @var{command} is found but cannot be invoked ++127 if @var{command} cannot be found ++the exit status of @var{command} otherwise ++@end display ++ ++ ++@node su invocation ++@section @command{su}: Run a command with substitute user and group ID ++ ++@pindex su ++@cindex substitute user and group IDs ++@cindex user ID, switching ++@cindex super-user, becoming ++@cindex root, becoming ++ ++@command{su} allows one user to temporarily become another user. It runs a ++command (often an interactive shell) with the real and effective user ++ID, group ID, and supplemental groups of a given @var{user}. Synopsis: ++ ++@example ++su [@var{option}]@dots{} [@var{user} [@var{arg}]@dots{}] ++@end example ++ ++@cindex passwd entry, and @command{su} shell ++@flindex /bin/sh ++@flindex /etc/passwd ++If no @var{user} is given, the default is @code{root}, the super-user. ++The shell to use is taken from @var{user}'s @code{passwd} entry, or ++@file{/bin/sh} if none is specified there. If @var{user} has a ++password, @command{su} prompts for the password unless run by a user with ++effective user ID of zero (the super-user). ++ ++@vindex HOME ++@vindex SHELL ++@vindex USER ++@vindex LOGNAME ++@cindex login shell ++By default, @command{su} does not change the current directory. ++It sets the environment variables @env{HOME} and @env{SHELL} ++from the password entry for @var{user}, and if @var{user} is not ++the super-user, sets @env{USER} and @env{LOGNAME} to @var{user}. ++By default, the shell is not a login shell. ++ ++Any additional @var{arg}s are passed as additional arguments to the ++shell. ++ ++@cindex @option{-su} ++GNU @command{su} does not treat @file{/bin/sh} or any other shells specially ++(e.g., by setting @code{argv[0]} to @option{-su}, passing @option{-c} only ++to certain shells, etc.). ++ ++@findex syslog ++@command{su} can optionally be compiled to use @code{syslog} to report ++failed, and optionally successful, @command{su} attempts. (If the system ++supports @code{syslog}.) However, GNU @command{su} does not check if the ++user is a member of the @code{wheel} group; see below. ++ ++The program accepts the following options. Also see @ref{Common options}. ++ ++@table @samp ++@item -c @var{command} ++@itemx --command=@var{command} ++@opindex -c ++@opindex --command ++Pass @var{command}, a single command line to run, to the shell with ++a @option{-c} option instead of starting an interactive shell. ++ ++@item -f ++@itemx --fast ++@opindex -f ++@opindex --fast ++@flindex .cshrc ++@cindex file name pattern expansion, disabled ++@cindex globbing, disabled ++Pass the @option{-f} option to the shell. This probably only makes sense ++if the shell run is @command{csh} or @command{tcsh}, for which the @option{-f} ++option prevents reading the startup file (@file{.cshrc}). With ++Bourne-like shells, the @option{-f} option disables file name pattern ++expansion (globbing), which is not likely to be useful. ++ ++@item - ++@itemx -l ++@itemx --login ++@opindex - ++@opindex -l ++@opindex --login ++@c other variables already indexed above ++@vindex TERM ++@vindex PATH ++@cindex login shell, creating ++Make the shell a login shell. This means the following. Unset all ++environment variables except @env{TERM}, @env{HOME}, and @env{SHELL} ++(which are set as described above), and @env{USER} and @env{LOGNAME} ++(which are set, even for the super-user, as described above), and set ++@env{PATH} to a compiled-in default value. Change to @var{user}'s home ++directory. Prepend @samp{-} to the shell's name, intended to make it ++read its login startup file(s). ++ ++@item -m ++@itemx -p ++@itemx --preserve-environment ++@opindex -m ++@opindex -p ++@opindex --preserve-environment ++@cindex environment, preserving ++@flindex /etc/shells ++@cindex restricted shell ++Do not change the environment variables @env{HOME}, @env{USER}, ++@env{LOGNAME}, or @env{SHELL}. Run the shell given in the environment ++variable @env{SHELL} instead of the shell from @var{user}'s passwd ++entry, unless the user running @command{su} is not the super-user and ++@var{user}'s shell is restricted. A @dfn{restricted shell} is one that ++is not listed in the file @file{/etc/shells}, or in a compiled-in list ++if that file does not exist. Parts of what this option does can be ++overridden by @option{--login} and @option{--shell}. ++ ++@item -s @var{shell} ++@itemx --shell=@var{shell} ++@opindex -s ++@opindex --shell ++Run @var{shell} instead of the shell from @var{user}'s passwd entry, ++unless the user running @command{su} is not the super-user and @var{user}'s ++shell is restricted (see @option{-m} just above). ++ ++@end table ++ ++@cindex exit status of @command{su} ++Exit status: ++ ++@display ++1 if @command{su} itself fails ++126 if subshell is found but cannot be invoked ++127 if subshell cannot be found ++the exit status of the subshell otherwise ++@end display ++ ++@cindex wheel group, not supported ++@cindex group wheel, not supported ++@cindex fascism ++@subsection Why GNU @command{su} does not support the @samp{wheel} group ++ ++(This section is by Richard Stallman.) ++ ++@cindex Twenex ++@cindex MIT AI lab ++Sometimes a few of the users try to hold total power over all the ++rest. For example, in 1984, a few users at the MIT AI lab decided to ++seize power by changing the operator password on the Twenex system and ++keeping it secret from everyone else. (I was able to thwart this coup ++and give power back to the users by patching the kernel, but I ++wouldn't know how to do that in Unix.) ++ ++However, occasionally the rulers do tell someone. Under the usual ++@command{su} mechanism, once someone learns the root password who ++sympathizes with the ordinary users, he or she can tell the rest. The ++``wheel group'' feature would make this impossible, and thus cement the ++power of the rulers. ++ ++I'm on the side of the masses, not that of the rulers. If you are ++used to supporting the bosses and sysadmins in whatever they do, you ++might find this idea strange at first. ++ ++ ++@node timeout invocation ++@section @command{timeout}: Run a command with a time limit ++ ++@pindex timeout ++@cindex time limit ++@cindex run commands with bounded time ++ ++@command{timeout} runs the given @var{command} and kills it if it is ++still running after the specified time interval. Synopsis: ++ ++@example ++timeout [@var{option}] @var{number}[smhd] @var{command} [@var{arg}]@dots{} ++@end example ++ ++@cindex time units ++@var{number} is an integer followed by an optional unit; the default ++is seconds. The units are: ++ ++@table @samp ++@item s ++seconds ++@item m ++minutes ++@item h ++hours ++@item d ++days ++@end table ++ ++@var{command} must not be a special built-in utility (@pxref{Special ++built-in utilities}). ++ ++The program accepts the following option. Also see @ref{Common options}. ++Options must precede operands. ++ ++@table @samp ++@item -s @var{signal} ++@itemx --signal=@var{signal} ++@opindex -s ++@opindex --signal ++Send this @var{signal} to @var{command} on timeout, rather than the ++default @samp{TERM} signal. @var{signal} may be a name like @samp{HUP} ++or a number. Also see @xref{Signal specifications}. ++ ++@end table ++ ++@cindex exit status of @command{timeout} ++Exit status: ++ ++@display ++124 if @var{command} times out ++125 if @command{timeout} itself fails ++126 if @var{command} is found but cannot be invoked ++127 if @var{command} cannot be found ++the exit status of @var{command} otherwise ++@end display ++ ++ ++@node Process control ++@chapter Process control ++ ++@cindex processes, commands for controlling ++@cindex commands for controlling processes ++ ++@menu ++* kill invocation:: Sending a signal to processes. ++@end menu ++ ++ ++@node kill invocation ++@section @command{kill}: Send a signal to processes ++ ++@pindex kill ++@cindex send a signal to processes ++ ++The @command{kill} command sends a signal to processes, causing them ++to terminate or otherwise act upon receiving the signal in some way. ++Alternatively, it lists information about signals. Synopses: ++ ++@example ++kill [-s @var{signal} | --signal @var{signal} | -@var{signal}] @var{pid}@dots{} ++kill [-l | --list | -t | --table] [@var{signal}]@dots{} ++@end example ++ ++@mayConflictWithShellBuiltIn{kill} ++ ++The first form of the @command{kill} command sends a signal to all ++@var{pid} arguments. The default signal to send if none is specified ++is @samp{TERM}. The special signal number @samp{0} does not denote a ++valid signal, but can be used to test whether the @var{pid} arguments ++specify processes to which a signal could be sent. ++ ++If @var{pid} is positive, the signal is sent to the process with the ++process ID @var{pid}. If @var{pid} is zero, the signal is sent to all ++processes in the process group of the current process. If @var{pid} ++is @minus{}1, the signal is sent to all processes for which the user has ++permission to send a signal. If @var{pid} is less than @minus{}1, the signal ++is sent to all processes in the process group that equals the absolute ++value of @var{pid}. ++ ++If @var{pid} is not positive, a system-dependent set of system ++processes is excluded from the list of processes to which the signal ++is sent. ++ ++If a negative @var{pid} argument is desired as the first one, it ++should be preceded by @option{--}. However, as a common extension to ++@acronym{POSIX}, @option{--} is not required with @samp{kill ++-@var{signal} -@var{pid}}. The following commands are equivalent: ++ ++@example ++kill -15 -1 ++kill -TERM -1 ++kill -s TERM -- -1 ++kill -- -1 ++@end example ++ ++The first form of the @command{kill} command succeeds if every @var{pid} ++argument specifies at least one process that the signal was sent to. ++ ++The second form of the @command{kill} command lists signal information. ++Either the @option{-l} or @option{--list} option, or the @option{-t} ++or @option{--table} option must be specified. Without any ++@var{signal} argument, all supported signals are listed. The output ++of @option{-l} or @option{--list} is a list of the signal names, one ++per line; if @var{signal} is already a name, the signal number is ++printed instead. The output of @option{-t} or @option{--table} is a ++table of signal numbers, names, and descriptions. This form of the ++@command{kill} command succeeds if all @var{signal} arguments are valid ++and if there is no output error. ++ ++The @command{kill} command also supports the @option{--help} and ++@option{--version} options. @xref{Common options}. ++ ++A @var{signal} may be a signal name like @samp{HUP}, or a signal ++number like @samp{1}, or an exit status of a process terminated by the ++signal. A signal name can be given in canonical form or prefixed by ++@samp{SIG}. The case of the letters is ignored, except for the ++@option{-@var{signal}} option which must use upper case to avoid ++ambiguity with lower case option letters. For a list of supported ++signal names and numbers see @xref{Signal specifications}. ++ ++@node Delaying ++@chapter Delaying ++ ++@cindex delaying commands ++@cindex commands for delaying ++ ++@c Perhaps @command{wait} or other commands should be described here also? ++ ++@menu ++* sleep invocation:: Delay for a specified time. ++@end menu ++ ++ ++@node sleep invocation ++@section @command{sleep}: Delay for a specified time ++ ++@pindex sleep ++@cindex delay for a specified time ++ ++@command{sleep} pauses for an amount of time specified by the sum of ++the values of the command line arguments. ++Synopsis: ++ ++@example ++sleep @var{number}[smhd]@dots{} ++@end example ++ ++@cindex time units ++Each argument is a number followed by an optional unit; the default ++is seconds. The units are: ++ ++@table @samp ++@item s ++seconds ++@item m ++minutes ++@item h ++hours ++@item d ++days ++@end table ++ ++Historical implementations of @command{sleep} have required that ++@var{number} be an integer, and only accepted a single argument ++without a suffix. However, GNU @command{sleep} accepts ++arbitrary floating point numbers (using a period before any fractional ++digits). ++ ++The only options are @option{--help} and @option{--version}. @xref{Common ++options}. ++ ++@c sleep is a shell built-in at least with Solaris 11's /bin/sh ++@mayConflictWithShellBuiltIn{sleep} ++ ++@exitstatus ++ ++ ++@node Numeric operations ++@chapter Numeric operations ++ ++@cindex numeric operations ++These programs do numerically-related operations. ++ ++@menu ++* factor invocation:: Show factors of numbers. ++* seq invocation:: Print sequences of numbers. ++@end menu ++ ++ ++@node factor invocation ++@section @command{factor}: Print prime factors ++ ++@pindex factor ++@cindex prime factors ++ ++@command{factor} prints prime factors. Synopses: ++ ++@example ++factor [@var{number}]@dots{} ++factor @var{option} ++@end example ++ ++If no @var{number} is specified on the command line, @command{factor} reads ++numbers from standard input, delimited by newlines, tabs, or spaces. ++ ++The @command{factor} command supports only a small number of options: ++ ++@table @samp ++@item --help ++Print a short help on standard output, then exit without further ++processing. ++ ++@item --version ++Print the program version on standard output, then exit without further ++processing. ++@end table ++ ++Factoring the product of the eighth and ninth Mersenne primes ++takes about 30 milliseconds of CPU time on a 2.2 GHz Athlon. ++ ++@example ++M8=`echo 2^31-1|bc` ; M9=`echo 2^61-1|bc` ++/usr/bin/time -f '%U' factor $(echo "$M8 * $M9" | bc) ++4951760154835678088235319297: 2147483647 2305843009213693951 ++0.03 ++@end example ++ ++Similarly, factoring the eighth Fermat number @math{2^{256}+1} takes ++about 20 seconds on the same machine. ++ ++Factoring large prime numbers is, in general, hard. The Pollard Rho ++algorithm used by @command{factor} is particularly effective for ++numbers with relatively small factors. If you wish to factor large ++numbers which do not have small factors (for example, numbers which ++are the product of two large primes), other methods are far better. ++ ++If @command{factor} is built without using GNU MP, only ++single-precision arithmetic is available, and so large numbers ++(typically @math{2^{64}} and above) will not be supported. The single-precision ++code uses an algorithm which is designed for factoring smaller ++numbers. ++ ++@exitstatus ++ ++ ++@node seq invocation ++@section @command{seq}: Print numeric sequences ++ ++@pindex seq ++@cindex numeric sequences ++@cindex sequence of numbers ++ ++@command{seq} prints a sequence of numbers to standard output. Synopses: ++ ++@example ++seq [@var{option}]@dots{} @var{last} ++seq [@var{option}]@dots{} @var{first} @var{last} ++seq [@var{option}]@dots{} @var{first} @var{increment} @var{last} ++@end example ++ ++@command{seq} prints the numbers from @var{first} to @var{last} by ++@var{increment}. By default, each number is printed on a separate line. ++When @var{increment} is not specified, it defaults to @samp{1}, ++even when @var{first} is larger than @var{last}. ++@var{first} also defaults to @samp{1}. So @code{seq 1} prints ++@samp{1}, but @code{seq 0} and @code{seq 10 5} produce no output. ++Floating-point numbers ++may be specified (using a period before any fractional digits). ++ ++The program accepts the following options. Also see @ref{Common options}. ++Options must precede operands. ++ ++@table @samp ++@item -f @var{format} ++@itemx --format=@var{format} ++@opindex -f @var{format} ++@opindex --format=@var{format} ++@cindex formatting of numbers in @command{seq} ++Print all numbers using @var{format}. ++@var{format} must contain exactly one of the @samp{printf}-style ++floating point conversion specifications @samp{%a}, @samp{%e}, ++@samp{%f}, @samp{%g}, @samp{%A}, @samp{%E}, @samp{%F}, @samp{%G}. ++The @samp{%} may be followed by zero or more flags taken from the set ++@samp{-+#0 '}, then an optional width containing one or more digits, ++then an optional precision consisting of a @samp{.} followed by zero ++or more digits. @var{format} may also contain any number of @samp{%%} ++conversion specifications. All conversion specifications have the ++same meaning as with @samp{printf}. ++ ++The default format is derived from @var{first}, @var{step}, and ++@var{last}. If these all use a fixed point decimal representation, ++the default format is @samp{%.@var{p}f}, where @var{p} is the minimum ++precision that can represent the output numbers exactly. Otherwise, ++the default format is @samp{%g}. ++ ++@item -s @var{string} ++@itemx --separator=@var{string} ++@cindex separator for numbers in @command{seq} ++Separate numbers with @var{string}; default is a newline. ++The output always terminates with a newline. ++ ++@item -w ++@itemx --equal-width ++Print all numbers with the same width, by padding with leading zeros. ++@var{first}, @var{step}, and @var{last} should all use a fixed point ++decimal representation. ++(To have other kinds of padding, use @option{--format}). ++ ++@end table ++ ++You can get finer-grained control over output with @option{-f}: ++ ++@example ++$ seq -f '(%9.2E)' -9e5 1.1e6 1.3e6 ++(-9.00E+05) ++( 2.00E+05) ++( 1.30E+06) ++@end example ++ ++If you want hexadecimal integer output, you can use @command{printf} ++to perform the conversion: ++ ++@example ++$ printf '%x\n' `seq 1048575 1024 1050623` ++fffff ++1003ff ++1007ff ++@end example ++ ++For very long lists of numbers, use xargs to avoid ++system limitations on the length of an argument list: ++ ++@example ++$ seq 1000000 | xargs printf '%x\n' | tail -n 3 ++f423e ++f423f ++f4240 ++@end example ++ ++To generate octal output, use the printf @code{%o} format instead ++of @code{%x}. ++ ++On most systems, seq can produce whole-number output for values up to ++at least @math{2^{53}}. Larger integers are approximated. The details ++differ depending on your floating-point implementation, but a common ++case is that @command{seq} works with integers through @math{2^{64}}, ++and larger integers may not be numerically correct: ++ ++@example ++$ seq 18446744073709551616 1 18446744073709551618 ++18446744073709551616 ++18446744073709551616 ++18446744073709551618 ++@end example ++ ++Be careful when using @command{seq} with outlandish values: otherwise ++you may see surprising results, as @command{seq} uses floating point ++internally. For example, on the x86 platform, where the internal ++representation uses a 64-bit fraction, the command: ++ ++@example ++seq 1 0.0000000000000000001 1.0000000000000000009 ++@end example ++ ++outputs 1.0000000000000000007 twice and skips 1.0000000000000000008. ++ ++@exitstatus ++ ++ ++@node File permissions ++@chapter File permissions ++@include perm.texi ++ ++@include getdate.texi ++ ++@c What's GNU? ++@c Arnold Robbins ++@node Opening the software toolbox ++@chapter Opening the Software Toolbox ++ ++An earlier version of this chapter appeared in ++@uref{http://www.linuxjournal.com/article.php?sid=2762, the ++@cite{What's GNU?} column of @cite{Linux Journal}, 2 (June, 1994)}. ++It was written by Arnold Robbins. ++ ++@menu ++* Toolbox introduction:: Toolbox introduction ++* I/O redirection:: I/O redirection ++* The who command:: The @command{who} command ++* The cut command:: The @command{cut} command ++* The sort command:: The @command{sort} command ++* The uniq command:: The @command{uniq} command ++* Putting the tools together:: Putting the tools together ++@end menu ++ ++ ++@node Toolbox introduction ++@unnumberedsec Toolbox Introduction ++ ++This month's column is only peripherally related to the GNU Project, in ++that it describes a number of the GNU tools on your GNU/Linux system and how they ++might be used. What it's really about is the ``Software Tools'' philosophy ++of program development and usage. ++ ++The software tools philosophy was an important and integral concept ++in the initial design and development of Unix (of which Linux and GNU are ++essentially clones). Unfortunately, in the modern day press of ++Internetworking and flashy GUIs, it seems to have fallen by the ++wayside. This is a shame, since it provides a powerful mental model ++for solving many kinds of problems. ++ ++Many people carry a Swiss Army knife around in their pants pockets (or ++purse). A Swiss Army knife is a handy tool to have: it has several knife ++blades, a screwdriver, tweezers, toothpick, nail file, corkscrew, and perhaps ++a number of other things on it. For the everyday, small miscellaneous jobs ++where you need a simple, general purpose tool, it's just the thing. ++ ++On the other hand, an experienced carpenter doesn't build a house using ++a Swiss Army knife. Instead, he has a toolbox chock full of specialized ++tools---a saw, a hammer, a screwdriver, a plane, and so on. And he knows ++exactly when and where to use each tool; you won't catch him hammering nails ++with the handle of his screwdriver. ++ ++The Unix developers at Bell Labs were all professional programmers and trained ++computer scientists. They had found that while a one-size-fits-all program ++might appeal to a user because there's only one program to use, in practice ++such programs are ++ ++@enumerate a ++@item ++difficult to write, ++ ++@item ++difficult to maintain and ++debug, and ++ ++@item ++difficult to extend to meet new situations. ++@end enumerate ++ ++Instead, they felt that programs should be specialized tools. In short, each ++program ``should do one thing well.'' No more and no less. Such programs are ++simpler to design, write, and get right---they only do one thing. ++ ++Furthermore, they found that with the right machinery for hooking programs ++together, that the whole was greater than the sum of the parts. By combining ++several special purpose programs, you could accomplish a specific task ++that none of the programs was designed for, and accomplish it much more ++quickly and easily than if you had to write a special purpose program. ++We will see some (classic) examples of this further on in the column. ++(An important additional point was that, if necessary, take a detour ++and build any software tools you may need first, if you don't already ++have something appropriate in the toolbox.) ++ ++@node I/O redirection ++@unnumberedsec I/O Redirection ++ ++Hopefully, you are familiar with the basics of I/O redirection in the ++shell, in particular the concepts of ``standard input,'' ``standard output,'' ++and ``standard error''. Briefly, ``standard input'' is a data source, where ++data comes from. A program should not need to either know or care if the ++data source is a disk file, a keyboard, a magnetic tape, or even a punched ++card reader. Similarly, ``standard output'' is a data sink, where data goes ++to. The program should neither know nor care where this might be. ++Programs that only read their standard input, do something to the data, ++and then send it on, are called @dfn{filters}, by analogy to filters in a ++water pipeline. ++ ++With the Unix shell, it's very easy to set up data pipelines: ++ ++@smallexample ++program_to_create_data | filter1 | ... | filterN > final.pretty.data ++@end smallexample ++ ++We start out by creating the raw data; each filter applies some successive ++transformation to the data, until by the time it comes out of the pipeline, ++it is in the desired form. ++ ++This is fine and good for standard input and standard output. Where does the ++standard error come in to play? Well, think about @command{filter1} in ++the pipeline above. What happens if it encounters an error in the data it ++sees? If it writes an error message to standard output, it will just ++disappear down the pipeline into @command{filter2}'s input, and the ++user will probably never see it. So programs need a place where they can send ++error messages so that the user will notice them. This is standard error, ++and it is usually connected to your console or window, even if you have ++redirected standard output of your program away from your screen. ++ ++For filter programs to work together, the format of the data has to be ++agreed upon. The most straightforward and easiest format to use is simply ++lines of text. Unix data files are generally just streams of bytes, with ++lines delimited by the @acronym{ASCII} @sc{lf} (Line Feed) character, ++conventionally called a ``newline'' in the Unix literature. (This is ++@code{'\n'} if you're a C programmer.) This is the format used by all ++the traditional filtering programs. (Many earlier operating systems ++had elaborate facilities and special purpose programs for managing ++binary data. Unix has always shied away from such things, under the ++philosophy that it's easiest to simply be able to view and edit your ++data with a text editor.) ++ ++OK, enough introduction. Let's take a look at some of the tools, and then ++we'll see how to hook them together in interesting ways. In the following ++discussion, we will only present those command line options that interest ++us. As you should always do, double check your system documentation ++for the full story. ++ ++@node The who command ++@unnumberedsec The @command{who} Command ++ ++The first program is the @command{who} command. By itself, it generates a ++list of the users who are currently logged in. Although I'm writing ++this on a single-user system, we'll pretend that several people are ++logged in: ++ ++@example ++$ who ++@print{} arnold console Jan 22 19:57 ++@print{} miriam ttyp0 Jan 23 14:19(:0.0) ++@print{} bill ttyp1 Jan 21 09:32(:0.0) ++@print{} arnold ttyp2 Jan 23 20:48(:0.0) ++@end example ++ ++Here, the @samp{$} is the usual shell prompt, at which I typed @samp{who}. ++There are three people logged in, and I am logged in twice. On traditional ++Unix systems, user names are never more than eight characters long. This ++little bit of trivia will be useful later. The output of @command{who} is nice, ++but the data is not all that exciting. ++ ++@node The cut command ++@unnumberedsec The @command{cut} Command ++ ++The next program we'll look at is the @command{cut} command. This program ++cuts out columns or fields of input data. For example, we can tell it ++to print just the login name and full name from the @file{/etc/passwd} ++file. The @file{/etc/passwd} file has seven fields, separated by ++colons: ++ ++@example ++arnold:xyzzy:2076:10:Arnold D. Robbins:/home/arnold:/bin/bash ++@end example ++ ++To get the first and fifth fields, we would use @command{cut} like this: ++ ++@example ++$ cut -d: -f1,5 /etc/passwd ++@print{} root:Operator ++@dots{} ++@print{} arnold:Arnold D. Robbins ++@print{} miriam:Miriam A. Robbins ++@dots{} ++@end example ++ ++With the @option{-c} option, @command{cut} will cut out specific characters ++(i.e., columns) in the input lines. This is useful for input data ++that has fixed width fields, and does not have a field separator. For ++example, list the Monday dates for the current month: ++ ++@c Is using cal ok? Looked at gcal, but I don't like it. ++@example ++$ cal | cut -c 3-5 ++@print{}Mo ++@print{} ++@print{} 6 ++@print{} 13 ++@print{} 20 ++@print{} 27 ++@end example ++ ++@node The sort command ++@unnumberedsec The @command{sort} Command ++ ++Next we'll look at the @command{sort} command. This is one of the most ++powerful commands on a Unix-style system; one that you will often find ++yourself using when setting up fancy data plumbing. ++ ++The @command{sort} ++command reads and sorts each file named on the command line. It then ++merges the sorted data and writes it to standard output. It will read ++standard input if no files are given on the command line (thus ++making it into a filter). The sort is based on the character collating ++sequence or based on user-supplied ordering criteria. ++ ++ ++@node The uniq command ++@unnumberedsec The @command{uniq} Command ++ ++Finally (at least for now), we'll look at the @command{uniq} program. When ++sorting data, you will often end up with duplicate lines, lines that ++are identical. Usually, all you need is one instance of each line. ++This is where @command{uniq} comes in. The @command{uniq} program reads its ++standard input. It prints only one ++copy of each repeated line. It does have several options. Later on, ++we'll use the @option{-c} option, which prints each unique line, preceded ++by a count of the number of times that line occurred in the input. ++ ++ ++@node Putting the tools together ++@unnumberedsec Putting the Tools Together ++ ++Now, let's suppose this is a large ISP server system with dozens of users ++logged in. The management wants the system administrator to write a program that will ++generate a sorted list of logged in users. Furthermore, even if a user ++is logged in multiple times, his or her name should only show up in the ++output once. ++ ++The administrator could sit down with the system documentation and write a C ++program that did this. It would take perhaps a couple of hundred lines ++of code and about two hours to write it, test it, and debug it. ++However, knowing the software toolbox, the administrator can instead start out ++by generating just a list of logged on users: ++ ++@example ++$ who | cut -c1-8 ++@print{} arnold ++@print{} miriam ++@print{} bill ++@print{} arnold ++@end example ++ ++Next, sort the list: ++ ++@example ++$ who | cut -c1-8 | sort ++@print{} arnold ++@print{} arnold ++@print{} bill ++@print{} miriam ++@end example ++ ++Finally, run the sorted list through @command{uniq}, to weed out duplicates: ++ ++@example ++$ who | cut -c1-8 | sort | uniq ++@print{} arnold ++@print{} bill ++@print{} miriam ++@end example ++ ++The @command{sort} command actually has a @option{-u} option that does what ++@command{uniq} does. However, @command{uniq} has other uses for which one ++cannot substitute @samp{sort -u}. ++ ++The administrator puts this pipeline into a shell script, and makes it available for ++all the users on the system (@samp{#} is the system administrator, ++or @code{root}, prompt): ++ ++@example ++# cat > /usr/local/bin/listusers ++who | cut -c1-8 | sort | uniq ++^D ++# chmod +x /usr/local/bin/listusers ++@end example ++ ++There are four major points to note here. First, with just four ++programs, on one command line, the administrator was able to save about two ++hours worth of work. Furthermore, the shell pipeline is just about as ++efficient as the C program would be, and it is much more efficient in ++terms of programmer time. People time is much more expensive than ++computer time, and in our modern ``there's never enough time to do ++everything'' society, saving two hours of programmer time is no mean ++feat. ++ ++Second, it is also important to emphasize that with the ++@emph{combination} of the tools, it is possible to do a special ++purpose job never imagined by the authors of the individual programs. ++ ++Third, it is also valuable to build up your pipeline in stages, as we did here. ++This allows you to view the data at each stage in the pipeline, which helps ++you acquire the confidence that you are indeed using these tools correctly. ++ ++Finally, by bundling the pipeline in a shell script, other users can use ++your command, without having to remember the fancy plumbing you set up for ++them. In terms of how you run them, shell scripts and compiled programs are ++indistinguishable. ++ ++After the previous warm-up exercise, we'll look at two additional, more ++complicated pipelines. For them, we need to introduce two more tools. ++ ++The first is the @command{tr} command, which stands for ``transliterate.'' ++The @command{tr} command works on a character-by-character basis, changing ++characters. Normally it is used for things like mapping upper case to ++lower case: ++ ++@example ++$ echo ThIs ExAmPlE HaS MIXED case! | tr '[:upper:]' '[:lower:]' ++@print{} this example has mixed case! ++@end example ++ ++There are several options of interest: ++ ++@table @code ++@item -c ++work on the complement of the listed characters, i.e., ++operations apply to characters not in the given set ++ ++@item -d ++delete characters in the first set from the output ++ ++@item -s ++squeeze repeated characters in the output into just one character. ++@end table ++ ++We will be using all three options in a moment. ++ ++The other command we'll look at is @command{comm}. The @command{comm} ++command takes two sorted input files as input data, and prints out the ++files' lines in three columns. The output columns are the data lines ++unique to the first file, the data lines unique to the second file, and ++the data lines that are common to both. The @option{-1}, @option{-2}, and ++@option{-3} command line options @emph{omit} the respective columns. (This is ++non-intuitive and takes a little getting used to.) For example: ++ ++@example ++$ cat f1 ++@print{} 11111 ++@print{} 22222 ++@print{} 33333 ++@print{} 44444 ++$ cat f2 ++@print{} 00000 ++@print{} 22222 ++@print{} 33333 ++@print{} 55555 ++$ comm f1 f2 ++@print{} 00000 ++@print{} 11111 ++@print{} 22222 ++@print{} 33333 ++@print{} 44444 ++@print{} 55555 ++@end example ++ ++The file name @file{-} tells @command{comm} to read standard input ++instead of a regular file. ++ ++Now we're ready to build a fancy pipeline. The first application is a word ++frequency counter. This helps an author determine if he or she is over-using ++certain words. ++ ++The first step is to change the case of all the letters in our input file ++to one case. ``The'' and ``the'' are the same word when doing counting. ++ ++@example ++$ tr '[:upper:]' '[:lower:]' < whats.gnu | ... ++@end example ++ ++The next step is to get rid of punctuation. Quoted words and unquoted words ++should be treated identically; it's easiest to just get the punctuation out of ++the way. ++ ++@smallexample ++$ tr '[:upper:]' '[:lower:]' < whats.gnu | tr -cd '[:alnum:]_ \n' | ... ++@end smallexample ++ ++The second @command{tr} command operates on the complement of the listed ++characters, which are all the letters, the digits, the underscore, and ++the blank. The @samp{\n} represents the newline character; it has to ++be left alone. (The @acronym{ASCII} tab character should also be included for ++good measure in a production script.) ++ ++At this point, we have data consisting of words separated by blank space. ++The words only contain alphanumeric characters (and the underscore). The ++next step is break the data apart so that we have one word per line. This ++makes the counting operation much easier, as we will see shortly. ++ ++@smallexample ++$ tr '[:upper:]' '[:lower:]' < whats.gnu | tr -cd '[:alnum:]_ \n' | ++> tr -s ' ' '\n' | ... ++@end smallexample ++ ++This command turns blanks into newlines. The @option{-s} option squeezes ++multiple newline characters in the output into just one. This helps us ++avoid blank lines. (The @samp{>} is the shell's ``secondary prompt.'' ++This is what the shell prints when it notices you haven't finished ++typing in all of a command.) ++ ++We now have data consisting of one word per line, no punctuation, all one ++case. We're ready to count each word: ++ ++@smallexample ++$ tr '[:upper:]' '[:lower:]' < whats.gnu | tr -cd '[:alnum:]_ \n' | ++> tr -s ' ' '\n' | sort | uniq -c | ... ++@end smallexample ++ ++At this point, the data might look something like this: ++ ++@example ++ 60 a ++ 2 able ++ 6 about ++ 1 above ++ 2 accomplish ++ 1 acquire ++ 1 actually ++ 2 additional ++@end example ++ ++The output is sorted by word, not by count! What we want is the most ++frequently used words first. Fortunately, this is easy to accomplish, ++with the help of two more @command{sort} options: ++ ++@table @code ++@item -n ++do a numeric sort, not a textual one ++ ++@item -r ++reverse the order of the sort ++@end table ++ ++The final pipeline looks like this: ++ ++@smallexample ++$ tr '[:upper:]' '[:lower:]' < whats.gnu | tr -cd '[:alnum:]_ \n' | ++> tr -s ' ' '\n' | sort | uniq -c | sort -n -r ++@print{} 156 the ++@print{} 60 a ++@print{} 58 to ++@print{} 51 of ++@print{} 51 and ++@dots{} ++@end smallexample ++ ++Whew! That's a lot to digest. Yet, the same principles apply. With six ++commands, on two lines (really one long one split for convenience), we've ++created a program that does something interesting and useful, in much ++less time than we could have written a C program to do the same thing. ++ ++A minor modification to the above pipeline can give us a simple spelling ++checker! To determine if you've spelled a word correctly, all you have to ++do is look it up in a dictionary. If it is not there, then chances are ++that your spelling is incorrect. So, we need a dictionary. ++The conventional location for a dictionary is @file{/usr/dict/words}. ++On my GNU/Linux system,@footnote{Redhat Linux 6.1, for the November 2000 ++revision of this article.} ++this is a sorted, 45,402 word dictionary. ++ ++Now, how to compare our file with the dictionary? As before, we generate ++a sorted list of words, one per line: ++ ++@smallexample ++$ tr '[:upper:]' '[:lower:]' < whats.gnu | tr -cd '[:alnum:]_ \n' | ++> tr -s ' ' '\n' | sort -u | ... ++@end smallexample ++ ++Now, all we need is a list of words that are @emph{not} in the ++dictionary. Here is where the @command{comm} command comes in. ++ ++@smallexample ++$ tr '[:upper:]' '[:lower:]' < whats.gnu | tr -cd '[:alnum:]_ \n' | ++> tr -s ' ' '\n' | sort -u | ++> comm -23 - /usr/dict/words ++@end smallexample ++ ++The @option{-2} and @option{-3} options eliminate lines that are only in the ++dictionary (the second file), and lines that are in both files. Lines ++only in the first file (standard input, our stream of words), are ++words that are not in the dictionary. These are likely candidates for ++spelling errors. This pipeline was the first cut at a production ++spelling checker on Unix. ++ ++There are some other tools that deserve brief mention. ++ ++@table @command ++@item grep ++search files for text that matches a regular expression ++ ++@item wc ++count lines, words, characters ++ ++@item tee ++a T-fitting for data pipes, copies data to files and to standard output ++ ++@item sed ++the stream editor, an advanced tool ++ ++@item awk ++a data manipulation language, another advanced tool ++@end table ++ ++The software tools philosophy also espoused the following bit of ++advice: ``Let someone else do the hard part.'' This means, take ++something that gives you most of what you need, and then massage it the ++rest of the way until it's in the form that you want. ++ ++To summarize: ++ ++@enumerate 1 ++@item ++Each program should do one thing well. No more, no less. ++ ++@item ++Combining programs with appropriate plumbing leads to results where ++the whole is greater than the sum of the parts. It also leads to novel ++uses of programs that the authors might never have imagined. ++ ++@item ++Programs should never print extraneous header or trailer data, since these ++could get sent on down a pipeline. (A point we didn't mention earlier.) ++ ++@item ++Let someone else do the hard part. ++ ++@item ++Know your toolbox! Use each program appropriately. If you don't have an ++appropriate tool, build one. ++@end enumerate ++ ++As of this writing, all the programs we've discussed are available via ++anonymous @command{ftp} from: @* ++@uref{ftp://gnudist.gnu.org/textutils/textutils-1.22.tar.gz}. (There may ++be more recent versions available now.) ++ ++None of what I have presented in this column is new. The Software Tools ++philosophy was first introduced in the book @cite{Software Tools}, by ++Brian Kernighan and P.J. Plauger (Addison-Wesley, ISBN 0-201-03669-X). ++This book showed how to write and use software tools. It was written in ++1976, using a preprocessor for FORTRAN named @command{ratfor} (RATional ++FORtran). At the time, C was not as ubiquitous as it is now; FORTRAN ++was. The last chapter presented a @command{ratfor} to FORTRAN ++processor, written in @command{ratfor}. @command{ratfor} looks an awful ++lot like C; if you know C, you won't have any problem following the ++code. ++ ++In 1981, the book was updated and made available as @cite{Software Tools ++in Pascal} (Addison-Wesley, ISBN 0-201-10342-7). Both books are ++still in print and are well worth ++reading if you're a programmer. They certainly made a major change in ++how I view programming. ++ ++The programs in both books are available from ++@uref{http://cm.bell-labs.com/who/bwk, Brian Kernighan's home page}. ++For a number of years, there was an active ++Software Tools Users Group, whose members had ported the original ++@command{ratfor} programs to essentially every computer system with a ++FORTRAN compiler. The popularity of the group waned in the middle 1980s ++as Unix began to spread beyond universities. ++ ++With the current proliferation of GNU code and other clones of Unix programs, ++these programs now receive little attention; modern C versions are ++much more efficient and do more than these programs do. Nevertheless, as ++exposition of good programming style, and evangelism for a still-valuable ++philosophy, these books are unparalleled, and I recommend them highly. ++ ++Acknowledgment: I would like to express my gratitude to Brian Kernighan ++of Bell Labs, the original Software Toolsmith, for reviewing this column. ++ ++@node GNU Free Documentation License ++@appendix GNU Free Documentation License ++ ++@include fdl.texi ++ ++@node Concept index ++@unnumbered Index ++ ++@printindex cp ++ ++@bye ++ ++@c Local variables: ++@c texinfo-column-for-description: 32 ++@c End: +diff -urNp coreutils-8.0-orig/src/Makefile.am coreutils-8.0/src/Makefile.am +--- coreutils-8.0-orig/src/Makefile.am 2009-09-21 14:29:33.000000000 +0200 ++++ coreutils-8.0/src/Makefile.am 2009-10-07 10:04:27.000000000 +0200 +@@ -154,7 +154,7 @@ tail_LDADD = $(nanosec_libs) # If necessary, add -lm to resolve use of pow in lib/strtod.c. uptime_LDADD = $(LDADD) $(POW_LIB) $(GETLOADAVG_LIBS) @@ -9,9 +16369,494 @@ dir_LDADD += $(LIB_ACL) ls_LDADD += $(LIB_ACL) ---- coreutils-6.7/src/su.c.pam 2007-01-09 17:00:01.000000000 +0000 -+++ coreutils-6.7/src/su.c 2007-01-09 17:16:43.000000000 +0000 -@@ -38,6 +38,16 @@ +diff -urNp coreutils-8.0-orig/src/Makefile.am.orig coreutils-8.0/src/Makefile.am.orig +--- coreutils-8.0-orig/src/Makefile.am.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/Makefile.am.orig 2009-09-21 14:29:33.000000000 +0200 +@@ -0,0 +1,480 @@ ++## Process this file with automake to produce Makefile.in -*-Makefile-*- ++ ++## Copyright (C) 1990, 1991, 1993-2009 Free Software Foundation, Inc. ++ ++## This program is free software: you can redistribute it and/or modify ++## it under the terms of the GNU General Public License as published by ++## the Free Software Foundation, either version 3 of the License, or ++## (at your option) any later version. ++## ++## This program is distributed in the hope that it will be useful, ++## but WITHOUT ANY WARRANTY; without even the implied warranty of ++## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++## GNU General Public License for more details. ++## ++## You should have received a copy of the GNU General Public License ++## along with this program. If not, see . ++ ++# These are the names of programs that are not installed by default. ++# This list is *not* intended for programs like who, nice, chroot, etc., ++# that are built only when certain requisite system features are detected. ++# Hence, if you want to install programs from this list anyway, say A and B, ++# use --enable-install-program=A,B ++no_install__progs = \ ++ arch hostname su ++ ++build_if_possible__progs = \ ++ chroot df hostid nice pinky stdbuf libstdbuf.so stty su uname uptime users who ++ ++AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS) ++ ++EXTRA_PROGRAMS = \ ++ $(no_install__progs) \ ++ $(build_if_possible__progs) \ ++ [ chcon chgrp chown chmod cp dd dircolors du \ ++ ginstall link ln dir vdir ls mkdir \ ++ mkfifo mknod mktemp \ ++ mv nohup readlink rm rmdir shred stat sync touch unlink \ ++ cat cksum comm csplit cut expand fmt fold head join groups md5sum \ ++ nl od paste pr ptx sha1sum sha224sum sha256sum sha384sum sha512sum \ ++ shuf sort split sum tac tail tr tsort unexpand uniq wc \ ++ basename date dirname echo env expr factor false \ ++ id kill logname pathchk printenv printf pwd \ ++ runcon seq sleep tee \ ++ test timeout true truncate tty whoami yes \ ++ base64 ++ ++bin_PROGRAMS = $(OPTIONAL_BIN_PROGS) ++ ++noinst_PROGRAMS = setuidgid getlimits ++ ++pkglib_PROGRAMS = $(OPTIONAL_PKGLIB_PROGS) ++ ++noinst_HEADERS = \ ++ chown-core.h \ ++ copy.h \ ++ cp-hash.h \ ++ dircolors.h \ ++ fs.h \ ++ group-list.h \ ++ ls.h \ ++ operand2sig.h \ ++ prog-fprintf.h \ ++ remove.h \ ++ system.h \ ++ wheel-size.h \ ++ wheel.h \ ++ uname.h ++ ++EXTRA_DIST = dcgen dircolors.hin tac-pipe.c \ ++ wheel-gen.pl extract-magic c99-to-c89.diff ++BUILT_SOURCES = ++CLEANFILES = $(SCRIPTS) su ++ ++# Also remove these sometimes-built programs. ++# For example, even when excluded, they're built via sc_check-AUTHORS. ++CLEANFILES += $(no_install__progs) ++ ++AM_CPPFLAGS = -I$(top_srcdir)/lib ++ ++noinst_LIBRARIES = libver.a ++nodist_libver_a_SOURCES = version.c version.h ++ ++# Sometimes, the expansion of $(LIBINTL) includes -lc which may ++# include modules defining variables like `optind', so libcoreutils.a ++# must precede $(LIBINTL) in order to ensure we use GNU getopt. ++# But libcoreutils.a must also follow $(LIBINTL), since libintl uses ++# replacement functions defined in libcoreutils.a. ++LDADD = libver.a ../lib/libcoreutils.a $(LIBINTL) ../lib/libcoreutils.a ++ ++cat_LDADD = $(LDADD) ++df_LDADD = $(LDADD) ++du_LDADD = $(LDADD) ++getlimits_LDADD = $(LDADD) ++ptx_LDADD = $(LDADD) ++split_LDADD = $(LDADD) ++stdbuf_LDADD = $(LDADD) ++timeout_LDADD = $(LDADD) ++truncate_LDADD = $(LDADD) ++ ++# for eaccess in lib/euidaccess.c. ++chcon_LDADD = $(LDADD) $(LIB_SELINUX) ++cp_LDADD = $(LDADD) $(LIB_EACCESS) $(LIB_SELINUX) ++ginstall_LDADD = $(LDADD) $(LIB_EACCESS) $(LIB_SELINUX) ++mkdir_LDADD = $(LDADD) $(LIB_SELINUX) ++mkfifo_LDADD = $(LDADD) $(LIB_SELINUX) ++mknod_LDADD = $(LDADD) $(LIB_SELINUX) ++mv_LDADD = $(LDADD) $(LIB_EACCESS) $(LIB_SELINUX) ++runcon_LDADD = $(LDADD) $(LIB_SELINUX) ++pathchk_LDADD = $(LDADD) $(LIB_EACCESS) ++rm_LDADD = $(LDADD) $(LIB_EACCESS) ++test_LDADD = $(LDADD) $(LIB_EACCESS) ++# This is for the '[' program. Automake transliterates '[' to '_'. ++__LDADD = $(LDADD) $(LIB_EACCESS) ++ ++# for clock_gettime and fdatasync ++dd_LDADD = $(LDADD) $(LIB_GETHRXTIME) $(LIB_FDATASYNC) ++dir_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) $(LIB_SELINUX) $(LIB_CAP) ++id_LDADD = $(LDADD) $(LIB_SELINUX) ++ls_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) $(LIB_SELINUX) $(LIB_CAP) ++mktemp_LDADD = $(LDADD) $(LIB_GETHRXTIME) ++pr_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) ++shred_LDADD = $(LDADD) $(LIB_GETHRXTIME) $(LIB_FDATASYNC) ++shuf_LDADD = $(LDADD) $(LIB_GETHRXTIME) ++tac_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) ++vdir_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) $(LIB_SELINUX) $(LIB_CAP) ++ ++## If necessary, add -lm to resolve use of pow in lib/strtod.c. ++sort_LDADD = $(LDADD) $(POW_LIB) $(LIB_GETHRXTIME) ++ ++# for get_date and gettime ++date_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) ++touch_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) ++ ++# If necessary, add -lm to resolve use of pow in lib/strtod.c. ++# If necessary, add -liconv to resolve use of iconv in lib/unicodeio.c. ++printf_LDADD = $(LDADD) $(POW_LIB) $(LIBICONV) ++ ++# If necessary, add -lm to resolve use of pow in lib/strtod.c. ++seq_LDADD = $(LDADD) $(POW_LIB) ++ ++# If necessary, add libraries to resolve the `pow' reference in lib/strtod.c ++# and the `nanosleep' reference in lib/xnanosleep.c. ++nanosec_libs = $(LDADD) $(POW_LIB) $(LIB_NANOSLEEP) ++ ++# for various GMP functions ++expr_LDADD = $(LDADD) $(LIB_GMP) ++ ++# for various GMP functions ++factor_LDADD = $(LDADD) $(LIB_GMP) ++ ++sleep_LDADD = $(nanosec_libs) ++tail_LDADD = $(nanosec_libs) ++ ++# If necessary, add -lm to resolve use of pow in lib/strtod.c. ++uptime_LDADD = $(LDADD) $(POW_LIB) $(GETLOADAVG_LIBS) ++ ++su_LDADD = $(LDADD) $(LIB_CRYPT) ++ ++dir_LDADD += $(LIB_ACL) ++ls_LDADD += $(LIB_ACL) ++vdir_LDADD += $(LIB_ACL) ++cp_LDADD += $(LIB_ACL) $(LIB_XATTR) ++mv_LDADD += $(LIB_ACL) $(LIB_XATTR) ++ginstall_LDADD += $(LIB_ACL) $(LIB_XATTR) ++ ++stat_LDADD = $(LDADD) $(LIB_SELINUX) ++ ++# Append $(LIBICONV) to each program that uses proper_name_utf8. ++cat_LDADD += $(LIBICONV) ++cp_LDADD += $(LIBICONV) ++df_LDADD += $(LIBICONV) ++du_LDADD += $(LIBICONV) ++getlimits_LDADD += $(LIBICONV) ++ptx_LDADD += $(LIBICONV) ++split_LDADD += $(LIBICONV) ++stdbuf_LDADD += $(LIBICONV) ++timeout_LDADD += $(LIBICONV) ++truncate_LDADD += $(LIBICONV) ++ ++# programs that use getaddrinfo (e.g., via canon_host) ++pinky_LDADD = $(LDADD) $(GETADDRINFO_LIB) ++who_LDADD = $(LDADD) $(GETADDRINFO_LIB) ++ ++$(PROGRAMS): ../lib/libcoreutils.a ++ ++# Get the release year from ../lib/version-etc.c. ++RELEASE_YEAR = \ ++ `sed -n '/.*COPYRIGHT_YEAR = \([0-9][0-9][0-9][0-9]\) };/s//\1/p' \ ++ $(top_srcdir)/lib/version-etc.c` ++ ++all-local: su$(EXEEXT) ++ ++installed_su = $(DESTDIR)$(bindir)/`echo su|sed '$(transform)'` ++ ++setuid_root_mode = a=rx,u+s ++ ++install_su = \ ++ if test "$(INSTALL_SU)" = yes; then \ ++ p=su; \ ++ echo " $(INSTALL_PROGRAM) $$p $(installed_su)"; \ ++ $(INSTALL_PROGRAM) $$p $(installed_su); \ ++ echo " chown root $(installed_su)"; \ ++ chown root $(installed_su); \ ++ echo " chmod $(setuid_root_mode) $(installed_su)"; \ ++ chmod $(setuid_root_mode) $(installed_su); \ ++ else \ ++ :; \ ++ fi ++ ++install-root: su$(EXEEXT) ++ @$(install_su) ++ ++install-exec-hook: su$(EXEEXT) ++ @if test "$(INSTALL_SU)" = yes; then \ ++ TMPFILE=$(DESTDIR)$(bindir)/.su-$$$$; \ ++ rm -f $$TMPFILE; \ ++ echo > $$TMPFILE; \ ++## See if we can create a setuid root executable in $(bindir). ++## If not, then don't even try to install su. ++ can_create_suid_root_executable=no; \ ++ chown root $$TMPFILE > /dev/null 2>&1 \ ++ && chmod $(setuid_root_mode) $$TMPFILE > /dev/null 2>&1 \ ++ && can_create_suid_root_executable=yes; \ ++ rm -f $$TMPFILE; \ ++ if test $$can_create_suid_root_executable = yes; then \ ++ $(install_su); \ ++ else \ ++ echo "WARNING: insufficient access; not installing su"; \ ++ echo "NOTE: to install su, run 'make install-root' as root"; \ ++ rm -f $(installed_su); \ ++ fi; \ ++ else :; \ ++ fi ++ ++uninstall-local: ++# Remove su only if it's one we installed. ++ @if test "$(INSTALL_SU)" = yes; then \ ++ if grep '$(PACKAGE_NAME)' $(installed_su) > /dev/null 2>&1; then \ ++ echo " rm -f $(installed_su)"; \ ++ rm -f $(installed_su); \ ++ else :; \ ++ fi; \ ++ fi ++ ++copy_sources = copy.c cp-hash.c ++ ++# Use `ginstall' in the definition of PROGRAMS and in dependencies to avoid ++# confusion with the `install' target. The install rule transforms `ginstall' ++# to install before applying any user-specified name transformations. ++ ++transform = s/ginstall/install/; $(program_transform_name) ++ginstall_SOURCES = install.c prog-fprintf.c $(copy_sources) ++ ++# This is for the '[' program. Automake transliterates '[' to '_'. ++__SOURCES = lbracket.c ++ ++cp_SOURCES = cp.c $(copy_sources) ++dir_SOURCES = ls.c ls-dir.c ++vdir_SOURCES = ls.c ls-vdir.c ++id_SOURCES = id.c group-list.c ++groups_SOURCES = groups.c group-list.c ++ln_SOURCES = ln.c ++ls_SOURCES = ls.c ls-ls.c ++chown_SOURCES = chown.c chown-core.c ++chgrp_SOURCES = chgrp.c chown-core.c ++kill_SOURCES = kill.c operand2sig.c ++timeout_SOURCES = timeout.c operand2sig.c ++ ++mv_SOURCES = mv.c remove.c $(copy_sources) ++rm_SOURCES = rm.c remove.c ++ ++mkdir_SOURCES = mkdir.c prog-fprintf.c ++rmdir_SOURCES = rmdir.c prog-fprintf.c ++ ++uname_SOURCES = uname.c uname-uname.c ++arch_SOURCES = uname.c uname-arch.c ++ ++md5sum_SOURCES = md5sum.c ++md5sum_CPPFLAGS = -DHASH_ALGO_MD5=1 $(AM_CPPFLAGS) ++sha1sum_SOURCES = md5sum.c ++sha1sum_CPPFLAGS = -DHASH_ALGO_SHA1=1 $(AM_CPPFLAGS) ++sha224sum_SOURCES = md5sum.c ++sha224sum_CPPFLAGS = -DHASH_ALGO_SHA224=1 $(AM_CPPFLAGS) ++sha256sum_SOURCES = md5sum.c ++sha256sum_CPPFLAGS = -DHASH_ALGO_SHA256=1 $(AM_CPPFLAGS) ++sha384sum_SOURCES = md5sum.c ++sha384sum_CPPFLAGS = -DHASH_ALGO_SHA384=1 $(AM_CPPFLAGS) ++sha512sum_SOURCES = md5sum.c ++sha512sum_CPPFLAGS = -DHASH_ALGO_SHA512=1 $(AM_CPPFLAGS) ++ ++ginstall_CPPFLAGS = -DENABLE_MATCHPATHCON=1 $(AM_CPPFLAGS) ++ ++# Ensure we don't link against libcoreutils.a as that lib is ++# not compiled with -fPIC which causes issues on 64 bit at least ++libstdbuf_so_LDADD = ++ ++# Note libstdbuf is only compiled if GCC is available ++# (as per the check in configure.ac), so these flags should be available. ++# libtool is probably required to relax this dependency. ++libstdbuf_so_LDFLAGS = -shared ++libstdbuf_so_CFLAGS = -fPIC $(AM_CFLAGS) ++ ++editpl = sed -e 's,@''PERL''@,$(PERL),g' ++ ++BUILT_SOURCES += dircolors.h ++dircolors.h: dcgen dircolors.hin ++ $(AM_V_GEN)rm -f $@ $@-t ++ $(AM_V_at)$(PERL) -w -- $(srcdir)/dcgen $(srcdir)/dircolors.hin > $@-t ++ $(AM_V_at)chmod a-w $@-t ++ $(AM_V_at)mv $@-t $@ ++ ++wheel_size = 5 ++ ++BUILT_SOURCES += wheel-size.h ++wheel-size.h: Makefile.am ++ $(AM_V_GEN)rm -f $@ $@-t ++ $(AM_V_at)echo '#define WHEEL_SIZE $(wheel_size)' > $@-t ++ $(AM_V_at)chmod a-w $@-t ++ $(AM_V_at)mv $@-t $@ ++ ++BUILT_SOURCES += wheel.h ++wheel.h: wheel-gen.pl Makefile.am ++ $(AM_V_GEN)rm -f $@ $@-t ++ $(AM_V_at)$(srcdir)/wheel-gen.pl $(wheel_size) > $@-t ++ $(AM_V_at)chmod a-w $@-t ++ $(AM_V_at)mv $@-t $@ ++ ++# false exits nonzero even with --help or --version. ++# test doesn't support --help or --version. ++# Tell automake to exempt then from that installcheck test. ++AM_INSTALLCHECK_STD_OPTIONS_EXEMPT = false test ++ ++BUILT_SOURCES += fs.h ++fs.h: stat.c extract-magic ++ $(AM_V_GEN)rm -f $@ ++ $(AM_V_at)$(PERL) $(srcdir)/extract-magic $(srcdir)/stat.c > $@t ++ $(AM_V_at)chmod a-w $@t ++ $(AM_V_at)mv $@t $@ ++ ++BUILT_SOURCES += version.c ++version.c: Makefile ++ $(AM_V_GEN)rm -f $@ ++ $(AM_V_at)printf '#include \n' > $@t ++ $(AM_V_at)printf 'char const *Version = "$(PACKAGE_VERSION)";\n' >> $@t ++ $(AM_V_at)chmod a-w $@t ++ $(AM_V_at)mv $@t $@ ++ ++BUILT_SOURCES += version.h ++version.h: Makefile ++ $(AM_V_GEN)rm -f $@ ++ $(AM_V_at)printf 'extern char const *Version;\n' > $@t ++ $(AM_V_at)chmod a-w $@t ++ $(AM_V_at)mv $@t $@ ++ ++DISTCLEANFILES = version.c version.h ++MAINTAINERCLEANFILES = $(BUILT_SOURCES) ++ ++# Sort in traditional ASCII order, regardless of the current locale; ++# otherwise we may get into trouble with distinct strings that the ++# current locale considers to be equal. ++ASSORT = LC_ALL=C sort ++ ++all_programs = \ ++ $(bin_PROGRAMS) \ ++ $(bin_SCRIPTS) \ ++ $(EXTRA_PROGRAMS) ++ ++built_programs.list: ++ @echo $(bin_PROGRAMS) $(bin_SCRIPTS) | tr ' ' '\n' \ ++ | sed -e 's,$(EXEEXT)$$,,' | $(ASSORT) -u | tr '\n' ' ' ++ ++all_programs.list: ++ @echo $(all_programs) | tr ' ' '\n' | sed -e 's,$(EXEEXT)$$,,' \ ++ | $(ASSORT) -u ++ ++# This is required because we have broken inter-directory dependencies: ++# in order to generate all man pages, even those for which we don't ++# install a binary, require that all programs be built at distribution time. ++dist-hook: $(all_programs) ++ ++pm = progs-makefile ++pr = progs-readme ++# Ensure that the list of programs in README matches the list ++# of programs we can build. ++check: check-README check-duplicate-no-install ++.PHONY: check-README ++check-README: ++ $(AM_V_GEN)rm -rf $(pr) $(pm) ++ $(AM_V_at)echo $(all_programs) \ ++ | tr -s ' ' '\n' | sed -e 's,$(EXEEXT)$$,,;s/ginstall/install/' \ ++ | sed /libstdbuf/d \ ++ | $(ASSORT) -u > $(pm) && \ ++ sed -n '/^The programs .* are:/,/^[a-zA-Z]/p' $(top_srcdir)/README \ ++ | sed -n '/^ */s///p' | tr -s ' ' '\n' > $(pr) ++ $(AM_V_at)diff $(pm) $(pr) && rm -rf $(pr) $(pm) ++ ++# Ensure that a by-default-not-installed program (listed in ++# $(no_install__progs) is not also listed in $(EXTRA_PROGRAMS), because ++# if that were to happen, it *would* be installed by default. ++.PHONY: check-duplicate-no-install ++check-duplicate-no-install: tr ++ $(AM_V_GEN)test -z "`echo '$(EXTRA_PROGRAMS)'| ./tr ' ' '\n' | uniq -d`" ++ ++# Ensure that the list of programs and author names is accurate. ++# We need a UTF8 locale. If a lack of locale support or a missing ++# translation inhibits printing of UTF-8 names, just skip this test. ++au_dotdot = authors-dotdot ++au_actual = authors-actual ++.PHONY: sc_check-AUTHORS ++sc_check-AUTHORS: $(all_programs) ++ $(AM_V_GEN)locale=en_US.UTF-8; \ ++ LC_ALL="$$locale" ./cat --version \ ++ | grep ' Torbjorn ' > /dev/null \ ++ && { echo "$@: skipping this check"; exit 0; }; \ ++ rm -f $(au_actual) $(au_dotdot); \ ++ for i in `ls $(all_programs) | sed -e 's,$(EXEEXT)$$,,' \ ++ | sed /libstdbuf/d \ ++ | $(ASSORT) -u`; do \ ++ test "$$i" = '[' && continue; \ ++ exe=$$i; \ ++ if test "$$i" = install; then \ ++ exe=ginstall; \ ++ elif test "$$i" = test; then \ ++ exe='['; \ ++ fi; \ ++ LC_ALL="$$locale" ./$$exe --version \ ++ | perl -0 -pi -e 's/,\n/, /gm' \ ++ | sed -n -e '/Written by /{ s//'"$$i"': /;' \ ++ -e 's/,* and /, /; s/\.$$//; p; }'; \ ++ done > $(au_actual) && \ ++ sed -n '/^[^ ][^ ]*:/p' $(top_srcdir)/AUTHORS > $(au_dotdot) && \ ++ diff $(au_actual) $(au_dotdot) && rm -f $(au_actual) $(au_dotdot) ++ ++# The following rule is not designed to be portable, ++# and relies on tools that not everyone has. ++ ++# Most functions in src/*.c should have static scope. ++# Any that don't must be marked with `extern', but `main' ++# and `usage' are exceptions. They're always extern, but ++# don't need to be marked. Also functions starting with __ ++# are exempted due to possibly being added by the compiler ++# (when compiled as a shared library for example). ++# ++# The second nm|grep checks for file-scope variables with `extern' scope. ++.PHONY: sc_tight_scope ++sc_tight_scope: $(bin_PROGRAMS) ++ $(AM_V_GEN)t=exceptions-$$$$; \ ++ trap "s=$$?; rm -f $$t; exit $$s" 0 1 2 13 15; \ ++ src=`for f in $(SOURCES); do \ ++ test -f $$f && d= || d=$(srcdir)/; echo $$d$$f; done`; \ ++ hdr=`for f in $(noinst_HEADERS); do \ ++ test -f $$f && d= || d=$(srcdir)/; echo $$d$$f; done`; \ ++ ( printf 'main\nusage\n_.*\n'; \ ++ grep -h -A1 '^extern .*[^;]$$' $$src \ ++ | grep -vE '^(extern |--)' | sed 's/ .*//'; \ ++ perl -ne '/^extern (?:enum )?\S+ (\S*) \(/ and print "$$1\n"' $$hdr; \ ++ ) | $(ASSORT) -u | sed 's/^/^/;s/$$/$$/' > $$t; \ ++ nm -e *.$(OBJEXT) \ ++ | sed -n 's/.* T //p' \ ++ | sed 's/^_//' \ ++ | grep -Ev -f $$t && \ ++ { echo 'the above functions should have static scope' 1>&2; \ ++ exit 1; } || : ; \ ++ ( printf '^program_name$$\n'; \ ++ perl -ne '/^extern .*?\**(\w+);/ and print "^$$1\$$\n"' \ ++ $$hdr *.h ) | $(ASSORT) -u > $$t; \ ++ nm -e *.$(OBJEXT) \ ++ | sed -n 's/.* [BD] //p' \ ++ | sed 's/^_//' \ ++ | grep -Ev -f $$t && \ ++ { echo 'the above variables should have static scope' 1>&2; \ ++ exit 1; } || : ++ ++# Use the just-built ./ginstall, when not cross-compiling. ++if CROSS_COMPILING ++cu_install_program = @INSTALL_PROGRAM@ ++else ++cu_install_program = ./ginstall ++endif ++INSTALL_PROGRAM = $(cu_install_program) +diff -urNp coreutils-8.0-orig/src/su.c coreutils-8.0/src/su.c +--- coreutils-8.0-orig/src/su.c 2009-10-07 10:03:29.000000000 +0200 ++++ coreutils-8.0/src/su.c 2009-10-07 10:04:27.000000000 +0200 +@@ -37,6 +37,16 @@ restricts who can su to UID 0 accounts. RMS considers that to be fascist. @@ -28,9 +16873,9 @@ Compile-time options: -DSYSLOG_SUCCESS Log successful su's (by default, to root) with syslog. -DSYSLOG_FAILURE Log failed su's (by default, to root) with syslog. -@@ -59,6 +69,15 @@ - prototype (returning `int') in . */ - #define getusershell _getusershell_sys_proto_ +@@ -53,6 +63,15 @@ + #include + #include +#ifdef USE_PAM +# include @@ -44,7 +16889,7 @@ #include "system.h" #include "getpass.h" -@@ -128,12 +147,19 @@ +@@ -120,12 +139,19 @@ /* The user to become if none is specified. */ #define DEFAULT_USER "root" @@ -65,7 +16910,7 @@ /* If true, pass the `-f' option to the subshell. */ static bool fast_startup; -@@ -225,7 +251,26 @@ +@@ -211,7 +237,26 @@ log_su (struct passwd const *pw, bool su } #endif @@ -92,7 +16937,7 @@ Return true if the user gives the correct password for entry PW, false if not. Return true without asking for a password if run by UID 0 or if PW has an empty password. */ -@@ -233,6 +278,44 @@ +@@ -219,6 +264,44 @@ log_su (struct passwd const *pw, bool su static bool correct_password (const struct passwd *pw) { @@ -137,7 +16982,7 @@ char *unencrypted, *encrypted, *correct; #if HAVE_GETSPNAM && HAVE_STRUCT_SPWD_SP_PWDP /* Shadow passwd stuff for SVR3 and maybe other systems. */ -@@ -257,6 +340,7 @@ +@@ -243,6 +326,7 @@ correct_password (const struct passwd *p encrypted = crypt (unencrypted, correct); memset (unencrypted, 0, strlen (unencrypted)); return STREQ (encrypted, correct); @@ -145,7 +16990,7 @@ } /* Update `environ' for the new shell based on PW, with SHELL being -@@ -270,12 +354,18 @@ +@@ -256,12 +340,18 @@ modify_environment (const struct passwd /* Leave TERM unchanged. Set HOME, SHELL, USER, LOGNAME, PATH. Unset all other environment variables. */ char const *term = getenv ("TERM"); @@ -164,7 +17009,7 @@ xsetenv ("HOME", pw->pw_dir); xsetenv ("SHELL", shell); xsetenv ("USER", pw->pw_name); -@@ -308,8 +398,13 @@ +@@ -294,8 +384,13 @@ change_identity (const struct passwd *pw { #ifdef HAVE_INITGROUPS errno = 0; @@ -179,7 +17024,7 @@ endgrent (); #endif if (setgid (pw->pw_gid)) -@@ -318,6 +413,31 @@ +@@ -304,6 +399,31 @@ change_identity (const struct passwd *pw error (EXIT_FAILURE, errno, _("cannot set user id")); } @@ -211,7 +17056,7 @@ /* Run SHELL, or DEFAULT_SHELL if SHELL is empty. If COMMAND is nonzero, pass it to the shell with the -c option. Pass ADDITIONAL_ARGS to the shell as more arguments; there -@@ -325,17 +445,49 @@ +@@ -311,17 +431,49 @@ change_identity (const struct passwd *pw static void run_shell (char const *shell, char const *command, char **additional_args, @@ -262,7 +17107,7 @@ shell_basename = last_component (shell); arg0 = xmalloc (strlen (shell_basename) + 2); arg0[0] = '-'; -@@ -360,6 +512,66 @@ +@@ -346,6 +498,66 @@ run_shell (char const *shell, char const error (0, errno, "%s", shell); exit (exit_status); } @@ -329,7 +17174,7 @@ } /* Return true if SHELL is a restricted shell (one not returned by -@@ -527,9 +739,9 @@ +@@ -513,9 +725,9 @@ main (int argc, char **argv) shell = xstrdup (shell ? shell : pw->pw_shell); modify_environment (pw, shell); @@ -342,78 +17187,528 @@ - run_shell (shell, command, argv + optind, MAX (0, argc - optind)); + run_shell (shell, command, argv + optind, MAX (0, argc - optind), pw); } ---- coreutils-6.7/doc/coreutils.texi.pam 2006-10-27 15:30:48.000000000 +0100 -+++ coreutils-6.7/doc/coreutils.texi 2007-01-09 17:00:01.000000000 +0000 -@@ -13395,8 +13395,11 @@ - @findex syslog - @command{su} can optionally be compiled to use @code{syslog} to report - failed, and optionally successful, @command{su} attempts. (If the system --supports @code{syslog}.) However, GNU @command{su} does not check if the --user is a member of the @code{wheel} group; see below. -+supports @code{syslog}.) +diff -urNp coreutils-8.0-orig/src/su.c.orig coreutils-8.0/src/su.c.orig +--- coreutils-8.0-orig/src/su.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/su.c.orig 2009-10-07 10:03:29.000000000 +0200 +@@ -0,0 +1,521 @@ ++/* su for GNU. Run a shell with substitute user and group IDs. ++ Copyright (C) 1992-2006, 2008-2009 Free Software Foundation, Inc. + -+This version of @command{su} has support for using PAM for -+authentication. You can edit @file{/etc/pam.d/su} to customize its -+behaviour. - - The program accepts the following options. Also see @ref{Common options}. - -@@ -12815,6 +12815,8 @@ - @env{PATH} to a compiled-in default value. Change to @var{user}'s home - directory. Prepend @samp{-} to the shell's name, intended to make it - read its login startup file(s). -+Additionaly @env{DISPLAY} and @env{XAUTHORITY} environment variables -+are preserved as well for PAM functionality. - - @item -m - @itemx -p -@@ -13477,33 +13480,6 @@ - the exit status of the subshell otherwise - @end display - --@cindex wheel group, not supported --@cindex group wheel, not supported --@cindex fascism --@subsection Why GNU @command{su} does not support the @samp{wheel} group -- --(This section is by Richard Stallman.) -- --@cindex Twenex --@cindex MIT AI lab --Sometimes a few of the users try to hold total power over all the --rest. For example, in 1984, a few users at the MIT AI lab decided to --seize power by changing the operator password on the Twenex system and --keeping it secret from everyone else. (I was able to thwart this coup --and give power back to the users by patching the kernel, but I --wouldn't know how to do that in Unix.) -- --However, occasionally the rulers do tell someone. Under the usual --@command{su} mechanism, once someone learns the root password who --sympathizes with the ordinary users, he or she can tell the rest. The --``wheel group'' feature would make this impossible, and thus cement the --power of the rulers. -- --I'm on the side of the masses, not that of the rulers. If you are --used to supporting the bosses and sysadmins in whatever they do, you --might find this idea strange at first. -- -- - @node timeout invocation - @section @command{timeout}: Run a command with a time limit - ---- coreutils-7.1/configure.ac.pam -+++ coreutils-7.1/configure.ac -@@ -44,6 +44,13 @@ - AC_DEFINE([GNULIB_PORTCHECK], [1], [enable some gnulib portability checks]) - fi - -+dnl Give the chance to enable PAM -+AC_ARG_ENABLE(pam, dnl -+[ --enable-pam Enable use of the PAM libraries], -+[AC_DEFINE(USE_PAM, 1, [Define if you want to use PAM]) -+LIB_PAM="-ldl -lpam -lpam_misc" -+AC_SUBST(LIB_PAM)]) ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. + - AC_FUNC_FORK - - optional_bin_progs= ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Run a shell with the real and effective UID and GID and groups ++ of USER, default `root'. ++ ++ The shell run is taken from USER's password entry, /bin/sh if ++ none is specified there. If the account has a password, su ++ prompts for a password unless run by a user with real UID 0. ++ ++ Does not change the current directory. ++ Sets `HOME' and `SHELL' from the password entry for USER, and if ++ USER is not root, sets `USER' and `LOGNAME' to USER. ++ The subshell is not a login shell. ++ ++ If one or more ARGs are given, they are passed as additional ++ arguments to the subshell. ++ ++ Does not handle /bin/sh or other shells specially ++ (setting argv[0] to "-su", passing -c only to certain shells, etc.). ++ I don't see the point in doing that, and it's ugly. ++ ++ This program intentionally does not support a "wheel group" that ++ restricts who can su to UID 0 accounts. RMS considers that to ++ be fascist. ++ ++ Compile-time options: ++ -DSYSLOG_SUCCESS Log successful su's (by default, to root) with syslog. ++ -DSYSLOG_FAILURE Log failed su's (by default, to root) with syslog. ++ ++ -DSYSLOG_NON_ROOT Log all su's, not just those to root (UID 0). ++ Never logs attempted su's to nonexistent accounts. ++ ++ Written by David MacKenzie . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "system.h" ++#include "getpass.h" ++ ++#if HAVE_SYSLOG_H && HAVE_SYSLOG ++# include ++#else ++# undef SYSLOG_SUCCESS ++# undef SYSLOG_FAILURE ++# undef SYSLOG_NON_ROOT ++#endif ++ ++#if HAVE_SYS_PARAM_H ++# include ++#endif ++ ++#ifndef HAVE_ENDGRENT ++# define endgrent() ((void) 0) ++#endif ++ ++#ifndef HAVE_ENDPWENT ++# define endpwent() ((void) 0) ++#endif ++ ++#if HAVE_SHADOW_H ++# include ++#endif ++ ++#include "error.h" ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "su" ++ ++#define AUTHORS proper_name ("David MacKenzie") ++ ++#if HAVE_PATHS_H ++# include ++#endif ++ ++/* The default PATH for simulated logins to non-superuser accounts. */ ++#ifdef _PATH_DEFPATH ++# define DEFAULT_LOGIN_PATH _PATH_DEFPATH ++#else ++# define DEFAULT_LOGIN_PATH ":/usr/ucb:/bin:/usr/bin" ++#endif ++ ++/* The default PATH for simulated logins to superuser accounts. */ ++#ifdef _PATH_DEFPATH_ROOT ++# define DEFAULT_ROOT_LOGIN_PATH _PATH_DEFPATH_ROOT ++#else ++# define DEFAULT_ROOT_LOGIN_PATH "/usr/ucb:/bin:/usr/bin:/etc" ++#endif ++ ++/* The default paths which get set are both bogus and oddly influenced ++ by and -D on the commands line. Just to be clear, we'll set ++ these explicitly. -ewt */ ++#undef DEFAULT_LOGIN_PATH ++#undef DEFAULT_ROOT_LOGIN_PATH ++#define DEFAULT_LOGIN_PATH "/usr/local/bin:/bin:/usr/bin" ++#define DEFAULT_ROOT_LOGIN_PATH \ ++ "/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin" ++ ++/* The shell to run if none is given in the user's passwd entry. */ ++#define DEFAULT_SHELL "/bin/sh" ++ ++/* The user to become if none is specified. */ ++#define DEFAULT_USER "root" ++ ++char *crypt (char const *key, char const *salt); ++ ++extern char **environ; ++ ++static void run_shell (char const *, char const *, char **, size_t) ++ ATTRIBUTE_NORETURN; ++ ++/* If true, pass the `-f' option to the subshell. */ ++static bool fast_startup; ++ ++/* If true, simulate a login instead of just starting a shell. */ ++static bool simulate_login; ++ ++/* If true, change some environment vars to indicate the user su'd to. */ ++static bool change_environment; ++ ++static struct option const longopts[] = ++{ ++ {"command", required_argument, NULL, 'c'}, ++ {"fast", no_argument, NULL, 'f'}, ++ {"login", no_argument, NULL, 'l'}, ++ {"preserve-environment", no_argument, NULL, 'p'}, ++ {"shell", required_argument, NULL, 's'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++/* Add NAME=VAL to the environment, checking for out of memory errors. */ ++ ++static void ++xsetenv (char const *name, char const *val) ++{ ++ size_t namelen = strlen (name); ++ size_t vallen = strlen (val); ++ char *string = xmalloc (namelen + 1 + vallen + 1); ++ strcpy (string, name); ++ string[namelen] = '='; ++ strcpy (string + namelen + 1, val); ++ if (putenv (string) != 0) ++ xalloc_die (); ++} ++ ++#if defined SYSLOG_SUCCESS || defined SYSLOG_FAILURE ++/* Log the fact that someone has run su to the user given by PW; ++ if SUCCESSFUL is true, they gave the correct password, etc. */ ++ ++static void ++log_su (struct passwd const *pw, bool successful) ++{ ++ const char *new_user, *old_user, *tty; ++ ++# ifndef SYSLOG_NON_ROOT ++ if (pw->pw_uid) ++ return; ++# endif ++ new_user = pw->pw_name; ++ /* The utmp entry (via getlogin) is probably the best way to identify ++ the user, especially if someone su's from a su-shell. */ ++ old_user = getlogin (); ++ if (!old_user) ++ { ++ /* getlogin can fail -- usually due to lack of utmp entry. ++ Resort to getpwuid. */ ++ struct passwd *pwd = getpwuid (getuid ()); ++ old_user = (pwd ? pwd->pw_name : ""); ++ } ++ tty = ttyname (STDERR_FILENO); ++ if (!tty) ++ tty = "none"; ++ /* 4.2BSD openlog doesn't have the third parameter. */ ++ openlog (last_component (program_name), 0 ++# ifdef LOG_AUTH ++ , LOG_AUTH ++# endif ++ ); ++ syslog (LOG_NOTICE, ++# ifdef SYSLOG_NON_ROOT ++ "%s(to %s) %s on %s", ++# else ++ "%s%s on %s", ++# endif ++ successful ? "" : "FAILED SU ", ++# ifdef SYSLOG_NON_ROOT ++ new_user, ++# endif ++ old_user, tty); ++ closelog (); ++} ++#endif ++ ++/* Ask the user for a password. ++ Return true if the user gives the correct password for entry PW, ++ false if not. Return true without asking for a password if run by UID 0 ++ or if PW has an empty password. */ ++ ++static bool ++correct_password (const struct passwd *pw) ++{ ++ char *unencrypted, *encrypted, *correct; ++#if HAVE_GETSPNAM && HAVE_STRUCT_SPWD_SP_PWDP ++ /* Shadow passwd stuff for SVR3 and maybe other systems. */ ++ struct spwd *sp = getspnam (pw->pw_name); ++ ++ endspent (); ++ if (sp) ++ correct = sp->sp_pwdp; ++ else ++#endif ++ correct = pw->pw_passwd; ++ ++ if (getuid () == 0 || !correct || correct[0] == '\0') ++ return true; ++ ++ unencrypted = getpass (_("Password:")); ++ if (!unencrypted) ++ { ++ error (0, 0, _("getpass: cannot open /dev/tty")); ++ return false; ++ } ++ encrypted = crypt (unencrypted, correct); ++ memset (unencrypted, 0, strlen (unencrypted)); ++ return STREQ (encrypted, correct); ++} ++ ++/* Update `environ' for the new shell based on PW, with SHELL being ++ the value for the SHELL environment variable. */ ++ ++static void ++modify_environment (const struct passwd *pw, const char *shell) ++{ ++ if (simulate_login) ++ { ++ /* Leave TERM unchanged. Set HOME, SHELL, USER, LOGNAME, PATH. ++ Unset all other environment variables. */ ++ char const *term = getenv ("TERM"); ++ if (term) ++ term = xstrdup (term); ++ environ = xmalloc ((6 + !!term) * sizeof (char *)); ++ environ[0] = NULL; ++ if (term) ++ xsetenv ("TERM", term); ++ xsetenv ("HOME", pw->pw_dir); ++ xsetenv ("SHELL", shell); ++ xsetenv ("USER", pw->pw_name); ++ xsetenv ("LOGNAME", pw->pw_name); ++ xsetenv ("PATH", (pw->pw_uid ++ ? DEFAULT_LOGIN_PATH ++ : DEFAULT_ROOT_LOGIN_PATH)); ++ } ++ else ++ { ++ /* Set HOME, SHELL, and if not becoming a super-user, ++ USER and LOGNAME. */ ++ if (change_environment) ++ { ++ xsetenv ("HOME", pw->pw_dir); ++ xsetenv ("SHELL", shell); ++ if (pw->pw_uid) ++ { ++ xsetenv ("USER", pw->pw_name); ++ xsetenv ("LOGNAME", pw->pw_name); ++ } ++ } ++ } ++} ++ ++/* Become the user and group(s) specified by PW. */ ++ ++static void ++change_identity (const struct passwd *pw) ++{ ++#ifdef HAVE_INITGROUPS ++ errno = 0; ++ if (initgroups (pw->pw_name, pw->pw_gid) == -1) ++ error (EXIT_FAILURE, errno, _("cannot set groups")); ++ endgrent (); ++#endif ++ if (setgid (pw->pw_gid)) ++ error (EXIT_FAILURE, errno, _("cannot set group id")); ++ if (setuid (pw->pw_uid)) ++ error (EXIT_FAILURE, errno, _("cannot set user id")); ++} ++ ++/* Run SHELL, or DEFAULT_SHELL if SHELL is empty. ++ If COMMAND is nonzero, pass it to the shell with the -c option. ++ Pass ADDITIONAL_ARGS to the shell as more arguments; there ++ are N_ADDITIONAL_ARGS extra arguments. */ ++ ++static void ++run_shell (char const *shell, char const *command, char **additional_args, ++ size_t n_additional_args) ++{ ++ size_t n_args = 1 + fast_startup + 2 * !!command + n_additional_args + 1; ++ char const **args = xnmalloc (n_args, sizeof *args); ++ size_t argno = 1; ++ ++ if (simulate_login) ++ { ++ char *arg0; ++ char *shell_basename; ++ ++ shell_basename = last_component (shell); ++ arg0 = xmalloc (strlen (shell_basename) + 2); ++ arg0[0] = '-'; ++ strcpy (arg0 + 1, shell_basename); ++ args[0] = arg0; ++ } ++ else ++ args[0] = last_component (shell); ++ if (fast_startup) ++ args[argno++] = "-f"; ++ if (command) ++ { ++ args[argno++] = "-c"; ++ args[argno++] = command; ++ } ++ memcpy (args + argno, additional_args, n_additional_args * sizeof *args); ++ args[argno + n_additional_args] = NULL; ++ execv (shell, (char **) args); ++ ++ { ++ int exit_status = (errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE); ++ error (0, errno, "%s", shell); ++ exit (exit_status); ++ } ++} ++ ++/* Return true if SHELL is a restricted shell (one not returned by ++ getusershell), else false, meaning it is a standard shell. */ ++ ++static bool ++restricted_shell (const char *shell) ++{ ++ char *line; ++ ++ setusershell (); ++ while ((line = getusershell ()) != NULL) ++ { ++ if (*line != '#' && STREQ (line, shell)) ++ { ++ endusershell (); ++ return false; ++ } ++ } ++ endusershell (); ++ return true; ++} ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("Usage: %s [OPTION]... [-] [USER [ARG]...]\n"), program_name); ++ fputs (_("\ ++Change the effective user id and group id to that of USER.\n\ ++\n\ ++ -, -l, --login make the shell a login shell\n\ ++ -c, --command=COMMAND pass a single COMMAND to the shell with -c\n\ ++ -f, --fast pass -f to the shell (for csh or tcsh)\n\ ++ -m, --preserve-environment do not reset environment variables\n\ ++ -p same as -m\n\ ++ -s, --shell=SHELL run SHELL if /etc/shells allows it\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++A mere - implies -l. If USER not given, assume root.\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int optc; ++ const char *new_user = DEFAULT_USER; ++ char *command = NULL; ++ char *shell = NULL; ++ struct passwd *pw; ++ struct passwd pw_copy; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ initialize_exit_failure (EXIT_FAILURE); ++ atexit (close_stdout); ++ ++ fast_startup = false; ++ simulate_login = false; ++ change_environment = true; ++ ++ while ((optc = getopt_long (argc, argv, "c:flmps:", longopts, NULL)) != -1) ++ { ++ switch (optc) ++ { ++ case 'c': ++ command = optarg; ++ break; ++ ++ case 'f': ++ fast_startup = true; ++ break; ++ ++ case 'l': ++ simulate_login = true; ++ break; ++ ++ case 'm': ++ case 'p': ++ change_environment = false; ++ break; ++ ++ case 's': ++ shell = optarg; ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ if (optind < argc && STREQ (argv[optind], "-")) ++ { ++ simulate_login = true; ++ ++optind; ++ } ++ if (optind < argc) ++ new_user = argv[optind++]; ++ ++ pw = getpwnam (new_user); ++ if (! (pw && pw->pw_name && pw->pw_name[0] && pw->pw_dir && pw->pw_dir[0] ++ && pw->pw_passwd)) ++ error (EXIT_FAILURE, 0, _("user %s does not exist"), new_user); ++ ++ /* Make a copy of the password information and point pw at the local ++ copy instead. Otherwise, some systems (e.g. GNU/Linux) would clobber ++ the static data through the getlogin call from log_su. ++ Also, make sure pw->pw_shell is a nonempty string. ++ It may be NULL when NEW_USER is a username that is retrieved via NIS (YP), ++ but that doesn't have a default shell listed. */ ++ pw_copy = *pw; ++ pw = &pw_copy; ++ pw->pw_name = xstrdup (pw->pw_name); ++ pw->pw_passwd = xstrdup (pw->pw_passwd); ++ pw->pw_dir = xstrdup (pw->pw_dir); ++ pw->pw_shell = xstrdup (pw->pw_shell && pw->pw_shell[0] ++ ? pw->pw_shell ++ : DEFAULT_SHELL); ++ endpwent (); ++ ++ if (!correct_password (pw)) ++ { ++#ifdef SYSLOG_FAILURE ++ log_su (pw, false); ++#endif ++ error (EXIT_FAILURE, 0, _("incorrect password")); ++ } ++#ifdef SYSLOG_SUCCESS ++ else ++ { ++ log_su (pw, true); ++ } ++#endif ++ ++ if (!shell && !change_environment) ++ shell = getenv ("SHELL"); ++ if (shell && getuid () != 0 && restricted_shell (pw->pw_shell)) ++ { ++ /* The user being su'd to has a nonstandard shell, and so is ++ probably a uucp account or has restricted access. Don't ++ compromise the account by allowing access with a standard ++ shell. */ ++ error (0, 0, _("using restricted shell %s"), pw->pw_shell); ++ shell = NULL; ++ } ++ shell = xstrdup (shell ? shell : pw->pw_shell); ++ modify_environment (pw, shell); ++ ++ change_identity (pw); ++ if (simulate_login && chdir (pw->pw_dir) != 0) ++ error (0, errno, _("warning: cannot change directory to %s"), pw->pw_dir); ++ ++ run_shell (shell, command, argv + optind, MAX (0, argc - optind)); ++} diff --git a/coreutils-selinux.patch b/coreutils-selinux.patch index c646812..12995d2 100644 --- a/coreutils-selinux.patch +++ b/coreutils-selinux.patch @@ -1,7 +1,7 @@ -diff -urNp coreutils-7.1-orig/configure.ac coreutils-7.1/configure.ac ---- coreutils-7.1-orig/configure.ac 2009-02-24 13:47:15.000000000 +0100 -+++ coreutils-7.1/configure.ac 2009-02-24 13:47:15.000000000 +0100 -@@ -84,6 +84,13 @@ AC_ARG_ENABLE(pam, dnl +diff -urNp coreutils-8.0-orig/configure.ac coreutils-8.0/configure.ac +--- coreutils-8.0-orig/configure.ac 2009-10-07 10:09:43.000000000 +0200 ++++ coreutils-8.0/configure.ac 2009-10-07 10:10:11.000000000 +0200 +@@ -122,6 +122,13 @@ AC_ARG_ENABLE(pam, dnl LIB_PAM="-ldl -lpam -lpam_misc" AC_SUBST(LIB_PAM)]) @@ -15,18 +15,35 @@ diff -urNp coreutils-7.1-orig/configure.ac coreutils-7.1/configure.ac AC_FUNC_FORK optional_bin_progs= -diff -urNp coreutils-7.1-orig/man/chcon.x coreutils-7.1/man/chcon.x ---- coreutils-7.1-orig/man/chcon.x 2008-09-18 09:06:57.000000000 +0200 -+++ coreutils-7.1/man/chcon.x 2009-02-24 13:47:15.000000000 +0100 +diff -urNp coreutils-8.0-orig/configure.ac.orig coreutils-8.0/configure.ac.orig +--- coreutils-8.0-orig/configure.ac.orig 2009-10-07 10:09:43.000000000 +0200 ++++ coreutils-8.0/configure.ac.orig 2009-10-07 10:09:43.000000000 +0200 +@@ -115,6 +115,13 @@ if test "$gl_gcc_warnings" = yes; then + AC_DEFINE([GNULIB_PORTCHECK], [1], [enable some gnulib portability checks]) + fi + ++dnl Give the chance to enable PAM ++AC_ARG_ENABLE(pam, dnl ++[ --enable-pam Enable use of the PAM libraries], ++[AC_DEFINE(USE_PAM, 1, [Define if you want to use PAM]) ++LIB_PAM="-ldl -lpam -lpam_misc" ++AC_SUBST(LIB_PAM)]) ++ + AC_FUNC_FORK + + optional_bin_progs= +diff -urNp coreutils-8.0-orig/man/chcon.x coreutils-8.0/man/chcon.x +--- coreutils-8.0-orig/man/chcon.x 2009-09-01 13:01:16.000000000 +0200 ++++ coreutils-8.0/man/chcon.x 2009-10-07 10:10:11.000000000 +0200 @@ -1,4 +1,4 @@ [NAME] -chcon \- change file security context +chcon \- change file SELinux security context [DESCRIPTION] .\" Add any additional description here -diff -urNp coreutils-7.1-orig/man/runcon.x coreutils-7.1/man/runcon.x ---- coreutils-7.1-orig/man/runcon.x 2008-09-18 09:06:57.000000000 +0200 -+++ coreutils-7.1/man/runcon.x 2009-02-24 13:47:15.000000000 +0100 +diff -urNp coreutils-8.0-orig/man/runcon.x coreutils-8.0/man/runcon.x +--- coreutils-8.0-orig/man/runcon.x 2009-09-01 13:01:16.000000000 +0200 ++++ coreutils-8.0/man/runcon.x 2009-10-07 10:10:11.000000000 +0200 @@ -1,5 +1,5 @@ [NAME] -runcon \- run command with specified security context @@ -34,10 +51,10 @@ diff -urNp coreutils-7.1-orig/man/runcon.x coreutils-7.1/man/runcon.x [DESCRIPTION] Run COMMAND with completely-specified CONTEXT, or with current or transitioned security context modified by one or more of LEVEL, -diff -urNp coreutils-7.1-orig/src/copy.c coreutils-7.1/src/copy.c ---- coreutils-7.1-orig/src/copy.c 2009-02-18 15:32:52.000000000 +0100 -+++ coreutils-7.1/src/copy.c 2009-02-24 13:47:15.000000000 +0100 -@@ -1830,6 +1830,8 @@ copy_internal (char const *src_name, cha +diff -urNp coreutils-8.0-orig/src/copy.c coreutils-8.0/src/copy.c +--- coreutils-8.0-orig/src/copy.c 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/src/copy.c 2009-10-07 10:10:11.000000000 +0200 +@@ -1943,6 +1943,8 @@ copy_internal (char const *src_name, cha { /* Here, we are crossing a file system boundary and cp's -x option is in effect: so don't copy the contents of this directory. */ @@ -46,10 +63,2383 @@ diff -urNp coreutils-7.1-orig/src/copy.c coreutils-7.1/src/copy.c } else { -diff -urNp coreutils-7.1-orig/src/copy.h coreutils-7.1/src/copy.h ---- coreutils-7.1-orig/src/copy.h 2009-02-18 15:32:52.000000000 +0100 -+++ coreutils-7.1/src/copy.h 2009-02-24 13:47:15.000000000 +0100 -@@ -140,6 +140,9 @@ struct cp_options +diff -urNp coreutils-8.0-orig/src/copy.c.orig coreutils-8.0/src/copy.c.orig +--- coreutils-8.0-orig/src/copy.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/copy.c.orig 2009-09-29 15:27:54.000000000 +0200 +@@ -0,0 +1,2369 @@ ++/* copy.c -- core functions for copying files and directories ++ Copyright (C) 89, 90, 91, 1995-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Extracted from cp.c and librarified by Jim Meyering. */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#if HAVE_HURD_H ++# include ++#endif ++#if HAVE_PRIV_H ++# include ++#endif ++ ++#include "system.h" ++#include "acl.h" ++#include "backupfile.h" ++#include "buffer-lcm.h" ++#include "copy.h" ++#include "cp-hash.h" ++#include "error.h" ++#include "fcntl--.h" ++#include "file-set.h" ++#include "filemode.h" ++#include "filenamecat.h" ++#include "full-write.h" ++#include "hash.h" ++#include "hash-triple.h" ++#include "ignore-value.h" ++#include "quote.h" ++#include "same.h" ++#include "savedir.h" ++#include "stat-time.h" ++#include "utimecmp.h" ++#include "utimens.h" ++#include "write-any-file.h" ++#include "areadlink.h" ++#include "yesno.h" ++ ++#if USE_XATTR ++# include ++# include ++# include ++# include "verror.h" ++#endif ++ ++#if HAVE_SYS_IOCTL_H ++# include ++#endif ++ ++#ifndef HAVE_FCHOWN ++# define HAVE_FCHOWN false ++# define fchown(fd, uid, gid) (-1) ++#endif ++ ++#ifndef HAVE_LCHOWN ++# define HAVE_LCHOWN false ++# define lchown(name, uid, gid) chown (name, uid, gid) ++#endif ++ ++#ifndef HAVE_MKFIFO ++static int ++rpl_mkfifo (char const *file, mode_t mode) ++{ ++ errno = ENOTSUP; ++ return -1; ++} ++# define mkfifo rpl_mkfifo ++#endif ++ ++#ifndef USE_ACL ++# define USE_ACL 0 ++#endif ++ ++#define SAME_OWNER(A, B) ((A).st_uid == (B).st_uid) ++#define SAME_GROUP(A, B) ((A).st_gid == (B).st_gid) ++#define SAME_OWNER_AND_GROUP(A, B) (SAME_OWNER (A, B) && SAME_GROUP (A, B)) ++ ++struct dir_list ++{ ++ struct dir_list *parent; ++ ino_t ino; ++ dev_t dev; ++}; ++ ++/* Initial size of the cp.dest_info hash table. */ ++#define DEST_INFO_INITIAL_CAPACITY 61 ++ ++static bool copy_internal (char const *src_name, char const *dst_name, ++ bool new_dst, dev_t device, ++ struct dir_list *ancestors, ++ const struct cp_options *x, ++ bool command_line_arg, ++ bool *first_dir_created_per_command_line_arg, ++ bool *copy_into_self, ++ bool *rename_succeeded); ++static bool owner_failure_ok (struct cp_options const *x); ++ ++/* Pointers to the file names: they're used in the diagnostic that is issued ++ when we detect the user is trying to copy a directory into itself. */ ++static char const *top_level_src_name; ++static char const *top_level_dst_name; ++ ++/* Set the timestamp of symlink, FILE, to TIMESPEC. ++ If this system lacks support for that, simply return 0. */ ++static inline int ++utimens_symlink (char const *file, struct timespec const *timespec) ++{ ++ int err = 0; ++ ++#if HAVE_UTIMENSAT ++ err = utimensat (AT_FDCWD, file, timespec, AT_SYMLINK_NOFOLLOW); ++ /* When configuring on a system with new headers and libraries, and ++ running on one with a kernel that is old enough to lack the syscall, ++ utimensat fails with ENOSYS. Ignore that. */ ++ if (err && errno == ENOSYS) ++ err = 0; ++#else ++ (void) file; ++ (void) timespec; ++#endif ++ ++ return err; ++} ++ ++/* Perform the O(1) btrfs clone operation, if possible. ++ Upon success, return 0. Otherwise, return -1 and set errno. */ ++static inline int ++clone_file (int dest_fd, int src_fd) ++{ ++#ifdef __linux__ ++# undef BTRFS_IOCTL_MAGIC ++# define BTRFS_IOCTL_MAGIC 0x94 ++# undef BTRFS_IOC_CLONE ++# define BTRFS_IOC_CLONE _IOW (BTRFS_IOCTL_MAGIC, 9, int) ++ return ioctl (dest_fd, BTRFS_IOC_CLONE, src_fd); ++#else ++ (void) dest_fd; ++ (void) src_fd; ++ errno = ENOTSUP; ++ return -1; ++#endif ++} ++ ++/* FIXME: describe */ ++/* FIXME: rewrite this to use a hash table so we avoid the quadratic ++ performance hit that's probably noticeable only on trees deeper ++ than a few hundred levels. See use of active_dir_map in remove.c */ ++ ++static bool ++is_ancestor (const struct stat *sb, const struct dir_list *ancestors) ++{ ++ while (ancestors != 0) ++ { ++ if (ancestors->ino == sb->st_ino && ancestors->dev == sb->st_dev) ++ return true; ++ ancestors = ancestors->parent; ++ } ++ return false; ++} ++ ++static bool ++errno_unsupported (int err) ++{ ++ return err == ENOTSUP || err == ENODATA; ++} ++ ++#if USE_XATTR ++static void ++copy_attr_error (struct error_context *ctx ATTRIBUTE_UNUSED, ++ char const *fmt, ...) ++{ ++ int err = errno; ++ va_list ap; ++ ++ if (!errno_unsupported (errno)) ++ { ++ /* use verror module to print error message */ ++ va_start (ap, fmt); ++ verror (0, err, fmt, ap); ++ va_end (ap); ++ } ++} ++ ++static void ++copy_attr_allerror (struct error_context *ctx ATTRIBUTE_UNUSED, ++ char const *fmt, ...) ++{ ++ int err = errno; ++ va_list ap; ++ ++ /* use verror module to print error message */ ++ va_start (ap, fmt); ++ verror (0, err, fmt, ap); ++ va_end (ap); ++} ++ ++static char const * ++copy_attr_quote (struct error_context *ctx ATTRIBUTE_UNUSED, char const *str) ++{ ++ return quote (str); ++} ++ ++static void ++copy_attr_free (struct error_context *ctx ATTRIBUTE_UNUSED, ++ char const *str ATTRIBUTE_UNUSED) ++{ ++} ++ ++static bool ++copy_attr_by_fd (char const *src_path, int src_fd, ++ char const *dst_path, int dst_fd, const struct cp_options *x) ++{ ++ struct error_context ctx = ++ { ++ .error = x->require_preserve_xattr ? copy_attr_allerror : copy_attr_error, ++ .quote = copy_attr_quote, ++ .quote_free = copy_attr_free ++ }; ++ return 0 == attr_copy_fd (src_path, src_fd, dst_path, dst_fd, 0, ++ (x->reduce_diagnostics ++ && !x->require_preserve_xattr)? NULL : &ctx); ++} ++ ++static bool ++copy_attr_by_name (char const *src_path, char const *dst_path, ++ const struct cp_options *x) ++{ ++ struct error_context ctx = ++ { ++ .error = x->require_preserve_xattr ? copy_attr_allerror : copy_attr_error, ++ .quote = copy_attr_quote, ++ .quote_free = copy_attr_free ++ }; ++ return 0 == attr_copy_file (src_path, dst_path, 0, ++ (x-> reduce_diagnostics ++ && !x->require_preserve_xattr) ? NULL : &ctx); ++} ++#else /* USE_XATTR */ ++ ++static bool ++copy_attr_by_fd (char const *src_path ATTRIBUTE_UNUSED, ++ int src_fd ATTRIBUTE_UNUSED, ++ char const *dst_path ATTRIBUTE_UNUSED, ++ int dst_fd ATTRIBUTE_UNUSED, ++ const struct cp_options *x ATTRIBUTE_UNUSED) ++{ ++ return true; ++} ++ ++static bool ++copy_attr_by_name (char const *src_path ATTRIBUTE_UNUSED, ++ char const *dst_path ATTRIBUTE_UNUSED, ++ const struct cp_options *x ATTRIBUTE_UNUSED) ++{ ++ return true; ++} ++#endif /* USE_XATTR */ ++ ++/* Read the contents of the directory SRC_NAME_IN, and recursively ++ copy the contents to DST_NAME_IN. NEW_DST is true if ++ DST_NAME_IN is a directory that was created previously in the ++ recursion. SRC_SB and ANCESTORS describe SRC_NAME_IN. ++ Set *COPY_INTO_SELF if SRC_NAME_IN is a parent of ++ FIRST_DIR_CREATED_PER_COMMAND_LINE_ARG FIXME ++ (or the same as) DST_NAME_IN; otherwise, clear it. ++ Return true if successful. */ ++ ++static bool ++copy_dir (char const *src_name_in, char const *dst_name_in, bool new_dst, ++ const struct stat *src_sb, struct dir_list *ancestors, ++ const struct cp_options *x, ++ bool *first_dir_created_per_command_line_arg, ++ bool *copy_into_self) ++{ ++ char *name_space; ++ char *namep; ++ struct cp_options non_command_line_options = *x; ++ bool ok = true; ++ ++ name_space = savedir (src_name_in); ++ if (name_space == NULL) ++ { ++ /* This diagnostic is a bit vague because savedir can fail in ++ several different ways. */ ++ error (0, errno, _("cannot access %s"), quote (src_name_in)); ++ return false; ++ } ++ ++ /* For cp's -H option, dereference command line arguments, but do not ++ dereference symlinks that are found via recursive traversal. */ ++ if (x->dereference == DEREF_COMMAND_LINE_ARGUMENTS) ++ non_command_line_options.dereference = DEREF_NEVER; ++ ++ namep = name_space; ++ while (*namep != '\0') ++ { ++ bool local_copy_into_self; ++ char *src_name = file_name_concat (src_name_in, namep, NULL); ++ char *dst_name = file_name_concat (dst_name_in, namep, NULL); ++ ++ ok &= copy_internal (src_name, dst_name, new_dst, src_sb->st_dev, ++ ancestors, &non_command_line_options, false, ++ first_dir_created_per_command_line_arg, ++ &local_copy_into_self, NULL); ++ *copy_into_self |= local_copy_into_self; ++ ++ free (dst_name); ++ free (src_name); ++ ++ /* If we're copying into self, there's no point in continuing, ++ and in fact, that would even infloop, now that we record only ++ the first created directory per command line argument. */ ++ if (local_copy_into_self) ++ break; ++ ++ namep += strlen (namep) + 1; ++ } ++ free (name_space); ++ return ok; ++} ++ ++/* Set the owner and owning group of DEST_DESC to the st_uid and ++ st_gid fields of SRC_SB. If DEST_DESC is undefined (-1), set ++ the owner and owning group of DST_NAME instead; for ++ safety prefer lchown if the system supports it since no ++ symbolic links should be involved. DEST_DESC must ++ refer to the same file as DEST_NAME if defined. ++ Upon failure to set both UID and GID, try to set only the GID. ++ NEW_DST is true if the file was newly created; otherwise, ++ DST_SB is the status of the destination. ++ Return 1 if the initial syscall succeeds, 0 if it fails but it's OK ++ not to preserve ownership, -1 otherwise. */ ++ ++static int ++set_owner (const struct cp_options *x, char const *dst_name, int dest_desc, ++ struct stat const *src_sb, bool new_dst, ++ struct stat const *dst_sb) ++{ ++ uid_t uid = src_sb->st_uid; ++ gid_t gid = src_sb->st_gid; ++ ++ /* Naively changing the ownership of an already-existing file before ++ changing its permissions would create a window of vulnerability if ++ the file's old permissions are too generous for the new owner and ++ group. Avoid the window by first changing to a restrictive ++ temporary mode if necessary. */ ++ ++ if (!new_dst && (x->preserve_mode || x->move_mode || x->set_mode)) ++ { ++ mode_t old_mode = dst_sb->st_mode; ++ mode_t new_mode = ++ (x->preserve_mode || x->move_mode ? src_sb->st_mode : x->mode); ++ mode_t restrictive_temp_mode = old_mode & new_mode & S_IRWXU; ++ ++ if ((USE_ACL ++ || (old_mode & CHMOD_MODE_BITS ++ & (~new_mode | S_ISUID | S_ISGID | S_ISVTX))) ++ && qset_acl (dst_name, dest_desc, restrictive_temp_mode) != 0) ++ { ++ if (! owner_failure_ok (x)) ++ error (0, errno, _("clearing permissions for %s"), quote (dst_name)); ++ return -x->require_preserve; ++ } ++ } ++ ++ if (HAVE_FCHOWN && dest_desc != -1) ++ { ++ if (fchown (dest_desc, uid, gid) == 0) ++ return 1; ++ if (errno == EPERM || errno == EINVAL) ++ { ++ /* We've failed to set *both*. Now, try to set just the group ++ ID, but ignore any failure here, and don't change errno. */ ++ int saved_errno = errno; ++ ignore_value (fchown (dest_desc, -1, gid)); ++ errno = saved_errno; ++ } ++ } ++ else ++ { ++ if (lchown (dst_name, uid, gid) == 0) ++ return 1; ++ if (errno == EPERM || errno == EINVAL) ++ { ++ /* We've failed to set *both*. Now, try to set just the group ++ ID, but ignore any failure here, and don't change errno. */ ++ int saved_errno = errno; ++ ignore_value (lchown (dst_name, -1, gid)); ++ errno = saved_errno; ++ } ++ } ++ ++ if (! chown_failure_ok (x)) ++ { ++ error (0, errno, _("failed to preserve ownership for %s"), ++ quote (dst_name)); ++ if (x->require_preserve) ++ return -1; ++ } ++ ++ return 0; ++} ++ ++/* Set the st_author field of DEST_DESC to the st_author field of ++ SRC_SB. If DEST_DESC is undefined (-1), set the st_author field ++ of DST_NAME instead. DEST_DESC must refer to the same file as ++ DEST_NAME if defined. */ ++ ++static void ++set_author (const char *dst_name, int dest_desc, const struct stat *src_sb) ++{ ++#if HAVE_STRUCT_STAT_ST_AUTHOR ++ /* FIXME: Modify the following code so that it does not ++ follow symbolic links. */ ++ ++ /* Preserve the st_author field. */ ++ file_t file = (dest_desc < 0 ++ ? file_name_lookup (dst_name, 0, 0) ++ : getdport (dest_desc)); ++ if (file == MACH_PORT_NULL) ++ error (0, errno, _("failed to lookup file %s"), quote (dst_name)); ++ else ++ { ++ error_t err = file_chauthor (file, src_sb->st_author); ++ if (err) ++ error (0, err, _("failed to preserve authorship for %s"), ++ quote (dst_name)); ++ mach_port_deallocate (mach_task_self (), file); ++ } ++#else ++ (void) dst_name; ++ (void) dest_desc; ++ (void) src_sb; ++#endif ++} ++ ++/* Change the file mode bits of the file identified by DESC or NAME to MODE. ++ Use DESC if DESC is valid and fchmod is available, NAME otherwise. */ ++ ++static int ++fchmod_or_lchmod (int desc, char const *name, mode_t mode) ++{ ++#if HAVE_FCHMOD ++ if (0 <= desc) ++ return fchmod (desc, mode); ++#endif ++ return lchmod (name, mode); ++} ++ ++/* Copy a regular file from SRC_NAME to DST_NAME. ++ If the source file contains holes, copies holes and blocks of zeros ++ in the source file as holes in the destination file. ++ (Holes are read as zeroes by the `read' system call.) ++ When creating the destination, use DST_MODE & ~OMITTED_PERMISSIONS ++ as the third argument in the call to open, adding ++ OMITTED_PERMISSIONS after copying as needed. ++ X provides many option settings. ++ Return true if successful. ++ *NEW_DST is as in copy_internal. ++ SRC_SB is the result of calling XSTAT (aka stat) on SRC_NAME. */ ++ ++static bool ++copy_reg (char const *src_name, char const *dst_name, ++ const struct cp_options *x, ++ mode_t dst_mode, mode_t omitted_permissions, bool *new_dst, ++ struct stat const *src_sb) ++{ ++ char *buf; ++ char *buf_alloc = NULL; ++ char *name_alloc = NULL; ++ int dest_desc; ++ int dest_errno; ++ int source_desc; ++ mode_t src_mode = src_sb->st_mode; ++ struct stat sb; ++ struct stat src_open_sb; ++ bool return_val = true; ++ bool data_copy_required = true; ++ ++ source_desc = open (src_name, ++ (O_RDONLY | O_BINARY ++ | (x->dereference == DEREF_NEVER ? O_NOFOLLOW : 0))); ++ if (source_desc < 0) ++ { ++ error (0, errno, _("cannot open %s for reading"), quote (src_name)); ++ return false; ++ } ++ ++ if (fstat (source_desc, &src_open_sb) != 0) ++ { ++ error (0, errno, _("cannot fstat %s"), quote (src_name)); ++ return_val = false; ++ goto close_src_desc; ++ } ++ ++ /* Compare the source dev/ino from the open file to the incoming, ++ saved ones obtained via a previous call to stat. */ ++ if (! SAME_INODE (*src_sb, src_open_sb)) ++ { ++ error (0, 0, ++ _("skipping file %s, as it was replaced while being copied"), ++ quote (src_name)); ++ return_val = false; ++ goto close_src_desc; ++ } ++ ++ /* The semantics of the following open calls are mandated ++ by the specs for both cp and mv. */ ++ if (! *new_dst) ++ { ++ dest_desc = open (dst_name, O_WRONLY | O_TRUNC | O_BINARY); ++ dest_errno = errno; ++ ++ /* When using cp --preserve=context to copy to an existing destination, ++ use the default context rather than that of the source. Why? ++ 1) the src context may prohibit writing, and ++ 2) because it's more consistent to use the same context ++ that is used when the destination file doesn't already exist. */ ++ if (x->preserve_security_context && 0 <= dest_desc) ++ { ++ security_context_t con = NULL; ++ if (getfscreatecon (&con) < 0) ++ { ++ if (!x->reduce_diagnostics || x->require_preserve_context) ++ error (0, errno, _("failed to get file system create context")); ++ if (x->require_preserve_context) ++ { ++ return_val = false; ++ goto close_src_and_dst_desc; ++ } ++ } ++ ++ if (con) ++ { ++ if (fsetfilecon (dest_desc, con) < 0) ++ { ++ if (!x->reduce_diagnostics || x->require_preserve_context) ++ error (0, errno, ++ _("failed to set the security context of %s to %s"), ++ quote_n (0, dst_name), quote_n (1, con)); ++ if (x->require_preserve_context) ++ { ++ return_val = false; ++ freecon (con); ++ goto close_src_and_dst_desc; ++ } ++ } ++ freecon (con); ++ } ++ } ++ ++ if (dest_desc < 0 && x->unlink_dest_after_failed_open) ++ { ++ if (unlink (dst_name) != 0) ++ { ++ error (0, errno, _("cannot remove %s"), quote (dst_name)); ++ return_val = false; ++ goto close_src_desc; ++ } ++ if (x->verbose) ++ printf (_("removed %s\n"), quote (dst_name)); ++ ++ /* Tell caller that the destination file was unlinked. */ ++ *new_dst = true; ++ } ++ } ++ ++ if (*new_dst) ++ { ++ int open_flags = O_WRONLY | O_CREAT | O_BINARY; ++ dest_desc = open (dst_name, open_flags | O_EXCL, ++ dst_mode & ~omitted_permissions); ++ dest_errno = errno; ++ ++ /* When trying to copy through a dangling destination symlink, ++ the above open fails with EEXIST. If that happens, and ++ lstat'ing the DST_NAME shows that it is a symlink, then we ++ have a problem: trying to resolve this dangling symlink to ++ a directory/destination-entry pair is fundamentally racy, ++ so punt. If POSIXLY_CORRECT is set, simply call open again, ++ but without O_EXCL (potentially dangerous). If not, fail ++ with a diagnostic. These shenanigans are necessary only ++ when copying, i.e., not in move_mode. */ ++ if (dest_desc < 0 && dest_errno == EEXIST && ! x->move_mode) ++ { ++ struct stat dangling_link_sb; ++ if (lstat (dst_name, &dangling_link_sb) == 0 ++ && S_ISLNK (dangling_link_sb.st_mode)) ++ { ++ if (x->open_dangling_dest_symlink) ++ { ++ dest_desc = open (dst_name, open_flags, ++ dst_mode & ~omitted_permissions); ++ dest_errno = errno; ++ } ++ else ++ { ++ error (0, 0, _("not writing through dangling symlink %s"), ++ quote (dst_name)); ++ return_val = false; ++ goto close_src_desc; ++ } ++ } ++ } ++ } ++ else ++ omitted_permissions = 0; ++ ++ if (dest_desc < 0) ++ { ++ error (0, dest_errno, _("cannot create regular file %s"), ++ quote (dst_name)); ++ return_val = false; ++ goto close_src_desc; ++ } ++ ++ if (fstat (dest_desc, &sb) != 0) ++ { ++ error (0, errno, _("cannot fstat %s"), quote (dst_name)); ++ return_val = false; ++ goto close_src_and_dst_desc; ++ } ++ ++ if (x->reflink_mode) ++ { ++ bool clone_ok = clone_file (dest_desc, source_desc) == 0; ++ if (clone_ok || x->reflink_mode == REFLINK_ALWAYS) ++ { ++ if (!clone_ok) ++ { ++ error (0, errno, _("failed to clone %s"), quote (dst_name)); ++ return_val = false; ++ goto close_src_and_dst_desc; ++ } ++ data_copy_required = false; ++ } ++ } ++ ++ if (data_copy_required) ++ { ++ typedef uintptr_t word; ++ off_t n_read_total = 0; ++ ++ /* Choose a suitable buffer size; it may be adjusted later. */ ++ size_t buf_alignment = lcm (getpagesize (), sizeof (word)); ++ size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1; ++ size_t buf_size = io_blksize (sb); ++ ++ /* Deal with sparse files. */ ++ bool last_write_made_hole = false; ++ bool make_holes = false; ++ ++ if (S_ISREG (sb.st_mode)) ++ { ++ /* Even with --sparse=always, try to create holes only ++ if the destination is a regular file. */ ++ if (x->sparse_mode == SPARSE_ALWAYS) ++ make_holes = true; ++ ++#if HAVE_STRUCT_STAT_ST_BLOCKS ++ /* Use a heuristic to determine whether SRC_NAME contains any sparse ++ blocks. If the file has fewer blocks than would normally be ++ needed for a file of its size, then at least one of the blocks in ++ the file is a hole. */ ++ if (x->sparse_mode == SPARSE_AUTO && S_ISREG (src_open_sb.st_mode) ++ && ST_NBLOCKS (src_open_sb) < src_open_sb.st_size / ST_NBLOCKSIZE) ++ make_holes = true; ++#endif ++ } ++ ++ /* If not making a sparse file, try to use a more-efficient ++ buffer size. */ ++ if (! make_holes) ++ { ++ /* Compute the least common multiple of the input and output ++ buffer sizes, adjusting for outlandish values. */ ++ size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment_slop; ++ size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size, ++ blcm_max); ++ ++ /* Do not bother with a buffer larger than the input file, plus one ++ byte to make sure the file has not grown while reading it. */ ++ if (S_ISREG (src_open_sb.st_mode) && src_open_sb.st_size < buf_size) ++ buf_size = src_open_sb.st_size + 1; ++ ++ /* However, stick with a block size that is a positive multiple of ++ blcm, overriding the above adjustments. Watch out for ++ overflow. */ ++ buf_size += blcm - 1; ++ buf_size -= buf_size % blcm; ++ if (buf_size == 0 || blcm_max < buf_size) ++ buf_size = blcm; ++ } ++ ++ /* Make a buffer with space for a sentinel at the end. */ ++ buf_alloc = xmalloc (buf_size + buf_alignment_slop); ++ buf = ptr_align (buf_alloc, buf_alignment); ++ ++ for (;;) ++ { ++ word *wp = NULL; ++ ++ ssize_t n_read = read (source_desc, buf, buf_size); ++ if (n_read < 0) ++ { ++#ifdef EINTR ++ if (errno == EINTR) ++ continue; ++#endif ++ error (0, errno, _("reading %s"), quote (src_name)); ++ return_val = false; ++ goto close_src_and_dst_desc; ++ } ++ if (n_read == 0) ++ break; ++ ++ n_read_total += n_read; ++ ++ if (make_holes) ++ { ++ char *cp; ++ ++ /* Sentinel to stop loop. */ ++ buf[n_read] = '\1'; ++#ifdef lint ++ /* Usually, buf[n_read] is not the byte just before a "word" ++ (aka uintptr_t) boundary. In that case, the word-oriented ++ test below (*wp++ == 0) would read some uninitialized bytes ++ after the sentinel. To avoid false-positive reports about ++ this condition (e.g., from a tool like valgrind), set the ++ remaining bytes -- to any value. */ ++ memset (buf + n_read + 1, 0, sizeof (word) - 1); ++#endif ++ ++ /* Find first nonzero *word*, or the word with the sentinel. */ ++ ++ wp = (word *) buf; ++ while (*wp++ == 0) ++ continue; ++ ++ /* Find the first nonzero *byte*, or the sentinel. */ ++ ++ cp = (char *) (wp - 1); ++ while (*cp++ == 0) ++ continue; ++ ++ if (cp <= buf + n_read) ++ /* Clear to indicate that a normal write is needed. */ ++ wp = NULL; ++ else ++ { ++ /* We found the sentinel, so the whole input block was zero. ++ Make a hole. */ ++ if (lseek (dest_desc, n_read, SEEK_CUR) < 0) ++ { ++ error (0, errno, _("cannot lseek %s"), quote (dst_name)); ++ return_val = false; ++ goto close_src_and_dst_desc; ++ } ++ last_write_made_hole = true; ++ } ++ } ++ ++ if (!wp) ++ { ++ size_t n = n_read; ++ if (full_write (dest_desc, buf, n) != n) ++ { ++ error (0, errno, _("writing %s"), quote (dst_name)); ++ return_val = false; ++ goto close_src_and_dst_desc; ++ } ++ last_write_made_hole = false; ++ ++ /* It is tempting to return early here upon a short read from a ++ regular file. That would save the final read syscall for each ++ file. Unfortunately that doesn't work for certain files in ++ /proc with linux kernels from at least 2.6.9 .. 2.6.29. */ ++ } ++ } ++ ++ /* If the file ends with a `hole', we need to do something to record ++ the length of the file. On modern systems, calling ftruncate does ++ the job. On systems without native ftruncate support, we have to ++ write a byte at the ending position. Otherwise the kernel would ++ truncate the file at the end of the last write operation. */ ++ ++ if (last_write_made_hole) ++ { ++ if (HAVE_FTRUNCATE ++ ? /* ftruncate sets the file size, ++ so there is no need for a write. */ ++ ftruncate (dest_desc, n_read_total) < 0 ++ : /* Seek backwards one character and write a null. */ ++ (lseek (dest_desc, (off_t) -1, SEEK_CUR) < 0L ++ || full_write (dest_desc, "", 1) != 1)) ++ { ++ error (0, errno, _("writing %s"), quote (dst_name)); ++ return_val = false; ++ goto close_src_and_dst_desc; ++ } ++ } ++ } ++ ++ if (x->preserve_timestamps) ++ { ++ struct timespec timespec[2]; ++ timespec[0] = get_stat_atime (src_sb); ++ timespec[1] = get_stat_mtime (src_sb); ++ ++ if (gl_futimens (dest_desc, dst_name, timespec) != 0) ++ { ++ error (0, errno, _("preserving times for %s"), quote (dst_name)); ++ if (x->require_preserve) ++ { ++ return_val = false; ++ goto close_src_and_dst_desc; ++ } ++ } ++ } ++ ++ /* To allow copying xattrs on read-only files, temporarily chmod u+rw. ++ This workaround is required as an inode permission check is done ++ by xattr_permission() in fs/xattr.c of the GNU/Linux kernel tree. */ ++ if (x->preserve_xattr) ++ { ++ bool access_changed = false; ++ ++ if (!(sb.st_mode & S_IWUSR) && geteuid() != 0) ++ access_changed = fchmod_or_lchmod (dest_desc, dst_name, 0600) == 0; ++ ++ if (!copy_attr_by_fd (src_name, source_desc, dst_name, dest_desc, x) ++ && x->require_preserve_xattr) ++ return_val = false; ++ ++ if (access_changed) ++ fchmod_or_lchmod (dest_desc, dst_name, dst_mode & ~omitted_permissions); ++ } ++ ++ if (x->preserve_ownership && ! SAME_OWNER_AND_GROUP (*src_sb, sb)) ++ { ++ switch (set_owner (x, dst_name, dest_desc, src_sb, *new_dst, &sb)) ++ { ++ case -1: ++ return_val = false; ++ goto close_src_and_dst_desc; ++ ++ case 0: ++ src_mode &= ~ (S_ISUID | S_ISGID | S_ISVTX); ++ break; ++ } ++ } ++ ++ set_author (dst_name, dest_desc, src_sb); ++ ++ if (x->preserve_mode || x->move_mode) ++ { ++ if (copy_acl (src_name, source_desc, dst_name, dest_desc, src_mode) != 0 ++ && x->require_preserve) ++ return_val = false; ++ } ++ else if (x->set_mode) ++ { ++ if (set_acl (dst_name, dest_desc, x->mode) != 0) ++ return_val = false; ++ } ++ else if (omitted_permissions) ++ { ++ omitted_permissions &= ~ cached_umask (); ++ if (omitted_permissions ++ && fchmod_or_lchmod (dest_desc, dst_name, dst_mode) != 0) ++ { ++ error (0, errno, _("preserving permissions for %s"), ++ quote (dst_name)); ++ if (x->require_preserve) ++ return_val = false; ++ } ++ } ++ ++close_src_and_dst_desc: ++ if (close (dest_desc) < 0) ++ { ++ error (0, errno, _("closing %s"), quote (dst_name)); ++ return_val = false; ++ } ++close_src_desc: ++ if (close (source_desc) < 0) ++ { ++ error (0, errno, _("closing %s"), quote (src_name)); ++ return_val = false; ++ } ++ ++ free (buf_alloc); ++ free (name_alloc); ++ return return_val; ++} ++ ++/* Return true if it's ok that the source and destination ++ files are the `same' by some measure. The goal is to avoid ++ making the `copy' operation remove both copies of the file ++ in that case, while still allowing the user to e.g., move or ++ copy a regular file onto a symlink that points to it. ++ Try to minimize the cost of this function in the common case. ++ Set *RETURN_NOW if we've determined that the caller has no more ++ work to do and should return successfully, right away. ++ ++ Set *UNLINK_SRC if we've determined that the caller wants to do ++ `rename (a, b)' where `a' and `b' are distinct hard links to the same ++ file. In that case, the caller should try to unlink `a' and then return ++ successfully. Ideally, we wouldn't have to do that, and we'd be ++ able to rely on rename to remove the source file. However, POSIX ++ mistakenly requires that such a rename call do *nothing* and return ++ successfully. */ ++ ++static bool ++same_file_ok (char const *src_name, struct stat const *src_sb, ++ char const *dst_name, struct stat const *dst_sb, ++ const struct cp_options *x, bool *return_now, bool *unlink_src) ++{ ++ const struct stat *src_sb_link; ++ const struct stat *dst_sb_link; ++ struct stat tmp_dst_sb; ++ struct stat tmp_src_sb; ++ ++ bool same_link; ++ bool same = SAME_INODE (*src_sb, *dst_sb); ++ ++ *return_now = false; ++ *unlink_src = false; ++ ++ /* FIXME: this should (at the very least) be moved into the following ++ if-block. More likely, it should be removed, because it inhibits ++ making backups. But removing it will result in a change in behavior ++ that will probably have to be documented -- and tests will have to ++ be updated. */ ++ if (same && x->hard_link) ++ { ++ *return_now = true; ++ return true; ++ } ++ ++ if (x->dereference == DEREF_NEVER) ++ { ++ same_link = same; ++ ++ /* If both the source and destination files are symlinks (and we'll ++ know this here IFF preserving symlinks), then it's ok -- as long ++ as they are distinct. */ ++ if (S_ISLNK (src_sb->st_mode) && S_ISLNK (dst_sb->st_mode)) ++ return ! same_name (src_name, dst_name); ++ ++ src_sb_link = src_sb; ++ dst_sb_link = dst_sb; ++ } ++ else ++ { ++ if (!same) ++ return true; ++ ++ if (lstat (dst_name, &tmp_dst_sb) != 0 ++ || lstat (src_name, &tmp_src_sb) != 0) ++ return true; ++ ++ src_sb_link = &tmp_src_sb; ++ dst_sb_link = &tmp_dst_sb; ++ ++ same_link = SAME_INODE (*src_sb_link, *dst_sb_link); ++ ++ /* If both are symlinks, then it's ok, but only if the destination ++ will be unlinked before being opened. This is like the test ++ above, but with the addition of the unlink_dest_before_opening ++ conjunct because otherwise, with two symlinks to the same target, ++ we'd end up truncating the source file. */ ++ if (S_ISLNK (src_sb_link->st_mode) && S_ISLNK (dst_sb_link->st_mode) ++ && x->unlink_dest_before_opening) ++ return true; ++ } ++ ++ /* The backup code ensures there's a copy, so it's usually ok to ++ remove any destination file. One exception is when both ++ source and destination are the same directory entry. In that ++ case, moving the destination file aside (in making the backup) ++ would also rename the source file and result in an error. */ ++ if (x->backup_type != no_backups) ++ { ++ if (!same_link) ++ { ++ /* In copy mode when dereferencing symlinks, if the source is a ++ symlink and the dest is not, then backing up the destination ++ (moving it aside) would make it a dangling symlink, and the ++ subsequent attempt to open it in copy_reg would fail with ++ a misleading diagnostic. Avoid that by returning zero in ++ that case so the caller can make cp (or mv when it has to ++ resort to reading the source file) fail now. */ ++ ++ /* FIXME-note: even with the following kludge, we can still provoke ++ the offending diagnostic. It's just a little harder to do :-) ++ $ rm -f a b c; touch c; ln -s c b; ln -s b a; cp -b a b ++ cp: cannot open `a' for reading: No such file or directory ++ That's misleading, since a subsequent `ls' shows that `a' ++ is still there. ++ One solution would be to open the source file *before* moving ++ aside the destination, but that'd involve a big rewrite. */ ++ if ( ! x->move_mode ++ && x->dereference != DEREF_NEVER ++ && S_ISLNK (src_sb_link->st_mode) ++ && ! S_ISLNK (dst_sb_link->st_mode)) ++ return false; ++ ++ return true; ++ } ++ ++ return ! same_name (src_name, dst_name); ++ } ++ ++#if 0 ++ /* FIXME: use or remove */ ++ ++ /* If we're making a backup, we'll detect the problem case in ++ copy_reg because SRC_NAME will no longer exist. Allowing ++ the test to be deferred lets cp do some useful things. ++ But when creating hardlinks and SRC_NAME is a symlink ++ but DST_NAME is not we must test anyway. */ ++ if (x->hard_link ++ || !S_ISLNK (src_sb_link->st_mode) ++ || S_ISLNK (dst_sb_link->st_mode)) ++ return true; ++ ++ if (x->dereference != DEREF_NEVER) ++ return true; ++#endif ++ ++ /* They may refer to the same file if we're in move mode and the ++ target is a symlink. That is ok, since we remove any existing ++ destination file before opening it -- via `rename' if they're on ++ the same file system, via `unlink (DST_NAME)' otherwise. ++ It's also ok if they're distinct hard links to the same file. */ ++ if (x->move_mode || x->unlink_dest_before_opening) ++ { ++ if (S_ISLNK (dst_sb_link->st_mode)) ++ return true; ++ ++ if (same_link ++ && 1 < dst_sb_link->st_nlink ++ && ! same_name (src_name, dst_name)) ++ { ++ if (x->move_mode) ++ { ++ *unlink_src = true; ++ *return_now = true; ++ } ++ return true; ++ } ++ } ++ ++ /* If neither is a symlink, then it's ok as long as they aren't ++ hard links to the same file. */ ++ if (!S_ISLNK (src_sb_link->st_mode) && !S_ISLNK (dst_sb_link->st_mode)) ++ { ++ if (!SAME_INODE (*src_sb_link, *dst_sb_link)) ++ return true; ++ ++ /* If they are the same file, it's ok if we're making hard links. */ ++ if (x->hard_link) ++ { ++ *return_now = true; ++ return true; ++ } ++ } ++ ++ /* It's ok to remove a destination symlink. But that works only when we ++ unlink before opening the destination and when the source and destination ++ files are on the same partition. */ ++ if (x->unlink_dest_before_opening ++ && S_ISLNK (dst_sb_link->st_mode)) ++ return dst_sb_link->st_dev == src_sb_link->st_dev; ++ ++ if (x->dereference == DEREF_NEVER) ++ { ++ if ( ! S_ISLNK (src_sb_link->st_mode)) ++ tmp_src_sb = *src_sb_link; ++ else if (stat (src_name, &tmp_src_sb) != 0) ++ return true; ++ ++ if ( ! S_ISLNK (dst_sb_link->st_mode)) ++ tmp_dst_sb = *dst_sb_link; ++ else if (stat (dst_name, &tmp_dst_sb) != 0) ++ return true; ++ ++ if ( ! SAME_INODE (tmp_src_sb, tmp_dst_sb)) ++ return true; ++ ++ /* FIXME: shouldn't this be testing whether we're making symlinks? */ ++ if (x->hard_link) ++ { ++ *return_now = true; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/* Return true if FILE, with mode MODE, is writable in the sense of 'mv'. ++ Always consider a symbolic link to be writable. */ ++static bool ++writable_destination (char const *file, mode_t mode) ++{ ++ return (S_ISLNK (mode) ++ || can_write_any_file () ++ || euidaccess (file, W_OK) == 0); ++} ++ ++static void ++overwrite_prompt (char const *dst_name, struct stat const *dst_sb) ++{ ++ if (! writable_destination (dst_name, dst_sb->st_mode)) ++ { ++ char perms[12]; /* "-rwxrwxrwx " ls-style modes. */ ++ strmode (dst_sb->st_mode, perms); ++ perms[10] = '\0'; ++ fprintf (stderr, ++ _("%s: try to overwrite %s, overriding mode %04lo (%s)? "), ++ program_name, quote (dst_name), ++ (unsigned long int) (dst_sb->st_mode & CHMOD_MODE_BITS), ++ &perms[1]); ++ } ++ else ++ { ++ fprintf (stderr, _("%s: overwrite %s? "), ++ program_name, quote (dst_name)); ++ } ++} ++ ++/* Initialize the hash table implementing a set of F_triple entries ++ corresponding to destination files. */ ++extern void ++dest_info_init (struct cp_options *x) ++{ ++ x->dest_info ++ = hash_initialize (DEST_INFO_INITIAL_CAPACITY, ++ NULL, ++ triple_hash, ++ triple_compare, ++ triple_free); ++} ++ ++/* Initialize the hash table implementing a set of F_triple entries ++ corresponding to source files listed on the command line. */ ++extern void ++src_info_init (struct cp_options *x) ++{ ++ ++ /* Note that we use triple_hash_no_name here. ++ Contrast with the use of triple_hash above. ++ That is necessary because a source file may be specified ++ in many different ways. We want to warn about this ++ cp a a d/ ++ as well as this: ++ cp a ./a d/ ++ */ ++ x->src_info ++ = hash_initialize (DEST_INFO_INITIAL_CAPACITY, ++ NULL, ++ triple_hash_no_name, ++ triple_compare, ++ triple_free); ++} ++ ++/* When effecting a move (e.g., for mv(1)), and given the name DST_NAME ++ of the destination and a corresponding stat buffer, DST_SB, return ++ true if the logical `move' operation should _not_ proceed. ++ Otherwise, return false. ++ Depending on options specified in X, this code may issue an ++ interactive prompt asking whether it's ok to overwrite DST_NAME. */ ++static bool ++abandon_move (const struct cp_options *x, ++ char const *dst_name, ++ struct stat const *dst_sb) ++{ ++ assert (x->move_mode); ++ return (x->interactive == I_ALWAYS_NO ++ || ((x->interactive == I_ASK_USER ++ || (x->interactive == I_UNSPECIFIED ++ && x->stdin_tty ++ && ! writable_destination (dst_name, dst_sb->st_mode))) ++ && (overwrite_prompt (dst_name, dst_sb), 1) ++ && ! yesno ())); ++} ++ ++/* Print --verbose output on standard output, e.g. `new' -> `old'. ++ If BACKUP_DST_NAME is non-NULL, then also indicate that it is ++ the name of a backup file. */ ++static void ++emit_verbose (char const *src, char const *dst, char const *backup_dst_name) ++{ ++ printf ("%s -> %s", quote_n (0, src), quote_n (1, dst)); ++ if (backup_dst_name) ++ printf (_(" (backup: %s)"), quote (backup_dst_name)); ++ putchar ('\n'); ++} ++ ++/* A wrapper around "setfscreatecon (NULL)" that exits upon failure. */ ++static void ++restore_default_fscreatecon_or_die (void) ++{ ++ if (setfscreatecon (NULL) != 0) ++ error (EXIT_FAILURE, errno, ++ _("failed to restore the default file creation context")); ++} ++ ++/* Copy the file SRC_NAME to the file DST_NAME. The files may be of ++ any type. NEW_DST should be true if the file DST_NAME cannot ++ exist because its parent directory was just created; NEW_DST should ++ be false if DST_NAME might already exist. DEVICE is the device ++ number of the parent directory, or 0 if the parent of this file is ++ not known. ANCESTORS points to a linked, null terminated list of ++ devices and inodes of parent directories of SRC_NAME. COMMAND_LINE_ARG ++ is true iff SRC_NAME was specified on the command line. ++ FIRST_DIR_CREATED_PER_COMMAND_LINE_ARG is both input and output. ++ Set *COPY_INTO_SELF if SRC_NAME is a parent of (or the ++ same as) DST_NAME; otherwise, clear it. ++ Return true if successful. */ ++static bool ++copy_internal (char const *src_name, char const *dst_name, ++ bool new_dst, ++ dev_t device, ++ struct dir_list *ancestors, ++ const struct cp_options *x, ++ bool command_line_arg, ++ bool *first_dir_created_per_command_line_arg, ++ bool *copy_into_self, ++ bool *rename_succeeded) ++{ ++ struct stat src_sb; ++ struct stat dst_sb; ++ mode_t src_mode; ++ mode_t dst_mode IF_LINT (= 0); ++ mode_t dst_mode_bits; ++ mode_t omitted_permissions; ++ bool restore_dst_mode = false; ++ char *earlier_file = NULL; ++ char *dst_backup = NULL; ++ bool backup_succeeded = false; ++ bool delayed_ok; ++ bool copied_as_regular = false; ++ bool dest_is_symlink = false; ++ bool have_dst_lstat = false; ++ ++ if (x->move_mode && rename_succeeded) ++ *rename_succeeded = false; ++ ++ *copy_into_self = false; ++ ++ if (XSTAT (x, src_name, &src_sb) != 0) ++ { ++ error (0, errno, _("cannot stat %s"), quote (src_name)); ++ return false; ++ } ++ ++ src_mode = src_sb.st_mode; ++ ++ if (S_ISDIR (src_mode) && !x->recursive) ++ { ++ error (0, 0, _("omitting directory %s"), quote (src_name)); ++ return false; ++ } ++ ++ /* Detect the case in which the same source file appears more than ++ once on the command line and no backup option has been selected. ++ If so, simply warn and don't copy it the second time. ++ This check is enabled only if x->src_info is non-NULL. */ ++ if (command_line_arg) ++ { ++ if ( ! S_ISDIR (src_sb.st_mode) ++ && x->backup_type == no_backups ++ && seen_file (x->src_info, src_name, &src_sb)) ++ { ++ error (0, 0, _("warning: source file %s specified more than once"), ++ quote (src_name)); ++ return true; ++ } ++ ++ record_file (x->src_info, src_name, &src_sb); ++ } ++ ++ if (!new_dst) ++ { ++ /* Regular files can be created by writing through symbolic ++ links, but other files cannot. So use stat on the ++ destination when copying a regular file, and lstat otherwise. ++ However, if we intend to unlink or remove the destination ++ first, use lstat, since a copy won't actually be made to the ++ destination in that case. */ ++ bool use_stat = ++ ((S_ISREG (src_mode) ++ || (x->copy_as_regular ++ && ! (S_ISDIR (src_mode) || S_ISLNK (src_mode)))) ++ && ! (x->move_mode || x->symbolic_link || x->hard_link ++ || x->backup_type != no_backups ++ || x->unlink_dest_before_opening)); ++ if ((use_stat ++ ? stat (dst_name, &dst_sb) ++ : lstat (dst_name, &dst_sb)) ++ != 0) ++ { ++ if (errno != ENOENT) ++ { ++ error (0, errno, _("cannot stat %s"), quote (dst_name)); ++ return false; ++ } ++ else ++ { ++ new_dst = true; ++ } ++ } ++ else ++ { /* Here, we know that dst_name exists, at least to the point ++ that it is stat'able or lstat'able. */ ++ bool return_now; ++ bool unlink_src; ++ ++ have_dst_lstat = !use_stat; ++ if (! same_file_ok (src_name, &src_sb, dst_name, &dst_sb, ++ x, &return_now, &unlink_src)) ++ { ++ error (0, 0, _("%s and %s are the same file"), ++ quote_n (0, src_name), quote_n (1, dst_name)); ++ return false; ++ } ++ ++ if (!S_ISDIR (src_mode) && x->update) ++ { ++ /* When preserving time stamps (but not moving within a file ++ system), don't worry if the destination time stamp is ++ less than the source merely because of time stamp ++ truncation. */ ++ int options = ((x->preserve_timestamps ++ && ! (x->move_mode ++ && dst_sb.st_dev == src_sb.st_dev)) ++ ? UTIMECMP_TRUNCATE_SOURCE ++ : 0); ++ ++ if (0 <= utimecmp (dst_name, &dst_sb, &src_sb, options)) ++ { ++ /* We're using --update and the destination is not older ++ than the source, so do not copy or move. Pretend the ++ rename succeeded, so the caller (if it's mv) doesn't ++ end up removing the source file. */ ++ if (rename_succeeded) ++ *rename_succeeded = true; ++ return true; ++ } ++ } ++ ++ /* When there is an existing destination file, we may end up ++ returning early, and hence not copying/moving the file. ++ This may be due to an interactive `negative' reply to the ++ prompt about the existing file. It may also be due to the ++ use of the --reply=no option. ++ ++ cp and mv treat -i and -f differently. */ ++ if (x->move_mode) ++ { ++ if (abandon_move (x, dst_name, &dst_sb) ++ || (unlink_src && unlink (src_name) == 0)) ++ { ++ /* Pretend the rename succeeded, so the caller (mv) ++ doesn't end up removing the source file. */ ++ if (rename_succeeded) ++ *rename_succeeded = true; ++ if (unlink_src && x->verbose) ++ printf (_("removed %s\n"), quote (src_name)); ++ return true; ++ } ++ if (unlink_src) ++ { ++ error (0, errno, _("cannot remove %s"), quote (src_name)); ++ return false; ++ } ++ } ++ else ++ { ++ if (! S_ISDIR (src_mode) ++ && (x->interactive == I_ALWAYS_NO ++ || (x->interactive == I_ASK_USER ++ && (overwrite_prompt (dst_name, &dst_sb), 1) ++ && ! yesno ()))) ++ return true; ++ } ++ ++ if (return_now) ++ return true; ++ ++ if (!S_ISDIR (dst_sb.st_mode)) ++ { ++ if (S_ISDIR (src_mode)) ++ { ++ if (x->move_mode && x->backup_type != no_backups) ++ { ++ /* Moving a directory onto an existing ++ non-directory is ok only with --backup. */ ++ } ++ else ++ { ++ error (0, 0, ++ _("cannot overwrite non-directory %s with directory %s"), ++ quote_n (0, dst_name), quote_n (1, src_name)); ++ return false; ++ } ++ } ++ ++ /* Don't let the user destroy their data, even if they try hard: ++ This mv command must fail (likewise for cp): ++ rm -rf a b c; mkdir a b c; touch a/f b/f; mv a/f b/f c ++ Otherwise, the contents of b/f would be lost. ++ In the case of `cp', b/f would be lost if the user simulated ++ a move using cp and rm. ++ Note that it works fine if you use --backup=numbered. */ ++ if (command_line_arg ++ && x->backup_type != numbered_backups ++ && seen_file (x->dest_info, dst_name, &dst_sb)) ++ { ++ error (0, 0, ++ _("will not overwrite just-created %s with %s"), ++ quote_n (0, dst_name), quote_n (1, src_name)); ++ return false; ++ } ++ } ++ ++ if (!S_ISDIR (src_mode)) ++ { ++ if (S_ISDIR (dst_sb.st_mode)) ++ { ++ if (x->move_mode && x->backup_type != no_backups) ++ { ++ /* Moving a non-directory onto an existing ++ directory is ok only with --backup. */ ++ } ++ else ++ { ++ error (0, 0, ++ _("cannot overwrite directory %s with non-directory"), ++ quote (dst_name)); ++ return false; ++ } ++ } ++ } ++ ++ if (x->move_mode) ++ { ++ /* Don't allow user to move a directory onto a non-directory. */ ++ if (S_ISDIR (src_sb.st_mode) && !S_ISDIR (dst_sb.st_mode) ++ && x->backup_type == no_backups) ++ { ++ error (0, 0, ++ _("cannot move directory onto non-directory: %s -> %s"), ++ quote_n (0, src_name), quote_n (0, dst_name)); ++ return false; ++ } ++ } ++ ++ if (x->backup_type != no_backups ++ /* Don't try to back up a destination if the last ++ component of src_name is "." or "..". */ ++ && ! dot_or_dotdot (last_component (src_name)) ++ /* Create a backup of each destination directory in move mode, ++ but not in copy mode. FIXME: it might make sense to add an ++ option to suppress backup creation also for move mode. ++ That would let one use mv to merge new content into an ++ existing hierarchy. */ ++ && (x->move_mode || ! S_ISDIR (dst_sb.st_mode))) ++ { ++ char *tmp_backup = find_backup_file_name (dst_name, ++ x->backup_type); ++ ++ /* Detect (and fail) when creating the backup file would ++ destroy the source file. Before, running the commands ++ cd /tmp; rm -f a a~; : > a; echo A > a~; cp --b=simple a~ a ++ would leave two zero-length files: a and a~. */ ++ /* FIXME: but simply change e.g., the final a~ to `./a~' ++ and the source will still be destroyed. */ ++ if (STREQ (tmp_backup, src_name)) ++ { ++ const char *fmt; ++ fmt = (x->move_mode ++ ? _("backing up %s would destroy source; %s not moved") ++ : _("backing up %s would destroy source; %s not copied")); ++ error (0, 0, fmt, ++ quote_n (0, dst_name), ++ quote_n (1, src_name)); ++ free (tmp_backup); ++ return false; ++ } ++ ++ /* FIXME: use fts: ++ Using alloca for a file name that may be arbitrarily ++ long is not recommended. In fact, even forming such a name ++ should be discouraged. Eventually, this code will be rewritten ++ to use fts, so using alloca here will be less of a problem. */ ++ ASSIGN_STRDUPA (dst_backup, tmp_backup); ++ free (tmp_backup); ++ if (rename (dst_name, dst_backup) != 0) ++ { ++ if (errno != ENOENT) ++ { ++ error (0, errno, _("cannot backup %s"), quote (dst_name)); ++ return false; ++ } ++ else ++ { ++ dst_backup = NULL; ++ } ++ } ++ else ++ { ++ backup_succeeded = true; ++ } ++ new_dst = true; ++ } ++ else if (! S_ISDIR (dst_sb.st_mode) ++ /* Never unlink dst_name when in move mode. */ ++ && ! x->move_mode ++ && (x->unlink_dest_before_opening ++ || (x->preserve_links && 1 < dst_sb.st_nlink) ++ || (x->dereference == DEREF_NEVER ++ && ! S_ISREG (src_sb.st_mode)) ++ )) ++ { ++ if (unlink (dst_name) != 0 && errno != ENOENT) ++ { ++ error (0, errno, _("cannot remove %s"), quote (dst_name)); ++ return false; ++ } ++ new_dst = true; ++ if (x->verbose) ++ printf (_("removed %s\n"), quote (dst_name)); ++ } ++ } ++ } ++ ++ /* Ensure we don't try to copy through a symlink that was ++ created by a prior call to this function. */ ++ if (command_line_arg ++ && x->dest_info ++ && ! x->move_mode ++ && x->backup_type == no_backups) ++ { ++ bool lstat_ok = true; ++ struct stat tmp_buf; ++ struct stat *dst_lstat_sb; ++ ++ /* If we called lstat above, good: use that data. ++ Otherwise, call lstat here, in case dst_name is a symlink. */ ++ if (have_dst_lstat) ++ dst_lstat_sb = &dst_sb; ++ else ++ { ++ if (lstat (dst_name, &tmp_buf) == 0) ++ dst_lstat_sb = &tmp_buf; ++ else ++ lstat_ok = false; ++ } ++ ++ /* Never copy through a symlink we've just created. */ ++ if (lstat_ok ++ && S_ISLNK (dst_lstat_sb->st_mode) ++ && seen_file (x->dest_info, dst_name, dst_lstat_sb)) ++ { ++ error (0, 0, ++ _("will not copy %s through just-created symlink %s"), ++ quote_n (0, src_name), quote_n (1, dst_name)); ++ return false; ++ } ++ } ++ ++ /* If the source is a directory, we don't always create the destination ++ directory. So --verbose should not announce anything until we're ++ sure we'll create a directory. */ ++ if (x->verbose && !S_ISDIR (src_mode)) ++ emit_verbose (src_name, dst_name, backup_succeeded ? dst_backup : NULL); ++ ++ /* Associate the destination file name with the source device and inode ++ so that if we encounter a matching dev/ino pair in the source tree ++ we can arrange to create a hard link between the corresponding names ++ in the destination tree. ++ ++ When using the --link (-l) option, there is no need to take special ++ measures, because (barring race conditions) files that are hard-linked ++ in the source tree will also be hard-linked in the destination tree. ++ ++ Sometimes, when preserving links, we have to record dev/ino even ++ though st_nlink == 1: ++ - when in move_mode, since we may be moving a group of N hard-linked ++ files (via two or more command line arguments) to a different ++ partition; the links may be distributed among the command line ++ arguments (possibly hierarchies) so that the link count of ++ the final, once-linked source file is reduced to 1 when it is ++ considered below. But in this case (for mv) we don't need to ++ incur the expense of recording the dev/ino => name mapping; all we ++ really need is a lookup, to see if the dev/ino pair has already ++ been copied. ++ - when using -H and processing a command line argument; ++ that command line argument could be a symlink pointing to another ++ command line argument. With `cp -H --preserve=link', we hard-link ++ those two destination files. ++ - likewise for -L except that it applies to all files, not just ++ command line arguments. ++ ++ Also, with --recursive, record dev/ino of each command-line directory. ++ We'll use that info to detect this problem: cp -R dir dir. */ ++ ++ if (x->move_mode && src_sb.st_nlink == 1) ++ { ++ earlier_file = src_to_dest_lookup (src_sb.st_ino, src_sb.st_dev); ++ } ++ else if (x->preserve_links ++ && !x->hard_link ++ && (1 < src_sb.st_nlink ++ || (command_line_arg ++ && x->dereference == DEREF_COMMAND_LINE_ARGUMENTS) ++ || x->dereference == DEREF_ALWAYS)) ++ { ++ earlier_file = remember_copied (dst_name, src_sb.st_ino, src_sb.st_dev); ++ } ++ else if (x->recursive && S_ISDIR (src_mode)) ++ { ++ if (command_line_arg) ++ earlier_file = remember_copied (dst_name, src_sb.st_ino, src_sb.st_dev); ++ else ++ earlier_file = src_to_dest_lookup (src_sb.st_ino, src_sb.st_dev); ++ } ++ ++ /* Did we copy this inode somewhere else (in this command line argument) ++ and therefore this is a second hard link to the inode? */ ++ ++ if (earlier_file) ++ { ++ /* Avoid damaging the destination file system by refusing to preserve ++ hard-linked directories (which are found at least in Netapp snapshot ++ directories). */ ++ if (S_ISDIR (src_mode)) ++ { ++ /* If src_name and earlier_file refer to the same directory entry, ++ then warn about copying a directory into itself. */ ++ if (same_name (src_name, earlier_file)) ++ { ++ error (0, 0, _("cannot copy a directory, %s, into itself, %s"), ++ quote_n (0, top_level_src_name), ++ quote_n (1, top_level_dst_name)); ++ *copy_into_self = true; ++ goto un_backup; ++ } ++ else if (x->dereference == DEREF_ALWAYS) ++ { ++ /* This happens when e.g., encountering a directory for the ++ second or subsequent time via symlinks when cp is invoked ++ with -R and -L. E.g., ++ rm -rf a b c d; mkdir a b c d; ln -s ../c a; ln -s ../c b; ++ cp -RL a b d ++ */ ++ } ++ else ++ { ++ error (0, 0, _("will not create hard link %s to directory %s"), ++ quote_n (0, dst_name), quote_n (1, earlier_file)); ++ goto un_backup; ++ } ++ } ++ else ++ { ++ /* We want to guarantee that symlinks are not followed. */ ++ bool link_failed = (linkat (AT_FDCWD, earlier_file, AT_FDCWD, ++ dst_name, 0) != 0); ++ ++ /* If the link failed because of an existing destination, ++ remove that file and then call link again. */ ++ if (link_failed && errno == EEXIST) ++ { ++ if (unlink (dst_name) != 0) ++ { ++ error (0, errno, _("cannot remove %s"), quote (dst_name)); ++ goto un_backup; ++ } ++ if (x->verbose) ++ printf (_("removed %s\n"), quote (dst_name)); ++ link_failed = (linkat (AT_FDCWD, earlier_file, AT_FDCWD, ++ dst_name, 0) != 0); ++ } ++ ++ if (link_failed) ++ { ++ error (0, errno, _("cannot create hard link %s to %s"), ++ quote_n (0, dst_name), quote_n (1, earlier_file)); ++ goto un_backup; ++ } ++ ++ return true; ++ } ++ } ++ ++ if (x->move_mode) ++ { ++ if (rename (src_name, dst_name) == 0) ++ { ++ if (x->verbose && S_ISDIR (src_mode)) ++ emit_verbose (src_name, dst_name, ++ backup_succeeded ? dst_backup : NULL); ++ ++ if (rename_succeeded) ++ *rename_succeeded = true; ++ ++ if (command_line_arg) ++ { ++ /* Record destination dev/ino/name, so that if we are asked ++ to overwrite that file again, we can detect it and fail. */ ++ /* It's fine to use the _source_ stat buffer (src_sb) to get the ++ _destination_ dev/ino, since the rename above can't have ++ changed those, and `mv' always uses lstat. ++ We could limit it further by operating ++ only on non-directories. */ ++ record_file (x->dest_info, dst_name, &src_sb); ++ } ++ ++ return true; ++ } ++ ++ /* FIXME: someday, consider what to do when moving a directory into ++ itself but when source and destination are on different devices. */ ++ ++ /* This happens when attempting to rename a directory to a ++ subdirectory of itself. */ ++ if (errno == EINVAL) ++ { ++ /* FIXME: this is a little fragile in that it relies on rename(2) ++ failing with a specific errno value. Expect problems on ++ non-POSIX systems. */ ++ error (0, 0, _("cannot move %s to a subdirectory of itself, %s"), ++ quote_n (0, top_level_src_name), ++ quote_n (1, top_level_dst_name)); ++ ++ /* Note that there is no need to call forget_created here, ++ (compare with the other calls in this file) since the ++ destination directory didn't exist before. */ ++ ++ *copy_into_self = true; ++ /* FIXME-cleanup: Don't return true here; adjust mv.c accordingly. ++ The only caller that uses this code (mv.c) ends up setting its ++ exit status to nonzero when copy_into_self is nonzero. */ ++ return true; ++ } ++ ++ /* WARNING: there probably exist systems for which an inter-device ++ rename fails with a value of errno not handled here. ++ If/as those are reported, add them to the condition below. ++ If this happens to you, please do the following and send the output ++ to the bug-reporting address (e.g., in the output of cp --help): ++ touch k; perl -e 'rename "k","/tmp/k" or print "$!(",$!+0,")\n"' ++ where your current directory is on one partion and /tmp is the other. ++ Also, please try to find the E* errno macro name corresponding to ++ the diagnostic and parenthesized integer, and include that in your ++ e-mail. One way to do that is to run a command like this ++ find /usr/include/. -type f \ ++ | xargs grep 'define.*\.*\<18\>' /dev/null ++ where you'd replace `18' with the integer in parentheses that ++ was output from the perl one-liner above. ++ If necessary, of course, change `/tmp' to some other directory. */ ++ if (errno != EXDEV) ++ { ++ /* There are many ways this can happen due to a race condition. ++ When something happens between the initial XSTAT and the ++ subsequent rename, we can get many different types of errors. ++ For example, if the destination is initially a non-directory ++ or non-existent, but it is created as a directory, the rename ++ fails. If two `mv' commands try to rename the same file at ++ about the same time, one will succeed and the other will fail. ++ If the permissions on the directory containing the source or ++ destination file are made too restrictive, the rename will ++ fail. Etc. */ ++ error (0, errno, ++ _("cannot move %s to %s"), ++ quote_n (0, src_name), quote_n (1, dst_name)); ++ forget_created (src_sb.st_ino, src_sb.st_dev); ++ return false; ++ } ++ ++ /* The rename attempt has failed. Remove any existing destination ++ file so that a cross-device `mv' acts as if it were really using ++ the rename syscall. */ ++ if (unlink (dst_name) != 0 && errno != ENOENT) ++ { ++ error (0, errno, ++ _("inter-device move failed: %s to %s; unable to remove target"), ++ quote_n (0, src_name), quote_n (1, dst_name)); ++ forget_created (src_sb.st_ino, src_sb.st_dev); ++ return false; ++ } ++ ++ new_dst = true; ++ } ++ ++ /* If the ownership might change, or if it is a directory (whose ++ special mode bits may change after the directory is created), ++ omit some permissions at first, so unauthorized users cannot nip ++ in before the file is ready. */ ++ dst_mode_bits = (x->set_mode ? x->mode : src_mode) & CHMOD_MODE_BITS; ++ omitted_permissions = ++ (dst_mode_bits ++ & (x->preserve_ownership ? S_IRWXG | S_IRWXO ++ : S_ISDIR (src_mode) ? S_IWGRP | S_IWOTH ++ : 0)); ++ ++ delayed_ok = true; ++ ++ if (x->preserve_security_context) ++ { ++ security_context_t con; ++ ++ if (0 <= lgetfilecon (src_name, &con)) ++ { ++ if (setfscreatecon (con) < 0) ++ { ++ if (!x->reduce_diagnostics || x->require_preserve_context) ++ error (0, errno, ++ _("failed to set default file creation context to %s"), ++ quote (con)); ++ if (x->require_preserve_context) ++ { ++ freecon (con); ++ return false; ++ } ++ } ++ freecon (con); ++ } ++ else ++ { ++ if (!errno_unsupported (errno) || x->require_preserve_context) ++ { ++ if (!x->reduce_diagnostics || x->require_preserve_context) ++ error (0, errno, ++ _("failed to get security context of %s"), ++ quote (src_name)); ++ if (x->require_preserve_context) ++ return false; ++ } ++ } ++ } ++ ++ if (S_ISDIR (src_mode)) ++ { ++ struct dir_list *dir; ++ ++ /* If this directory has been copied before during the ++ recursion, there is a symbolic link to an ancestor ++ directory of the symbolic link. It is impossible to ++ continue to copy this, unless we've got an infinite disk. */ ++ ++ if (is_ancestor (&src_sb, ancestors)) ++ { ++ error (0, 0, _("cannot copy cyclic symbolic link %s"), ++ quote (src_name)); ++ goto un_backup; ++ } ++ ++ /* Insert the current directory in the list of parents. */ ++ ++ dir = alloca (sizeof *dir); ++ dir->parent = ancestors; ++ dir->ino = src_sb.st_ino; ++ dir->dev = src_sb.st_dev; ++ ++ if (new_dst || !S_ISDIR (dst_sb.st_mode)) ++ { ++ /* POSIX says mkdir's behavior is implementation-defined when ++ (src_mode & ~S_IRWXUGO) != 0. However, common practice is ++ to ask mkdir to copy all the CHMOD_MODE_BITS, letting mkdir ++ decide what to do with S_ISUID | S_ISGID | S_ISVTX. */ ++ if (mkdir (dst_name, dst_mode_bits & ~omitted_permissions) != 0) ++ { ++ error (0, errno, _("cannot create directory %s"), ++ quote (dst_name)); ++ goto un_backup; ++ } ++ ++ /* We need search and write permissions to the new directory ++ for writing the directory's contents. Check if these ++ permissions are there. */ ++ ++ if (lstat (dst_name, &dst_sb) != 0) ++ { ++ error (0, errno, _("cannot stat %s"), quote (dst_name)); ++ goto un_backup; ++ } ++ else if ((dst_sb.st_mode & S_IRWXU) != S_IRWXU) ++ { ++ /* Make the new directory searchable and writable. */ ++ ++ dst_mode = dst_sb.st_mode; ++ restore_dst_mode = true; ++ ++ if (lchmod (dst_name, dst_mode | S_IRWXU) != 0) ++ { ++ error (0, errno, _("setting permissions for %s"), ++ quote (dst_name)); ++ goto un_backup; ++ } ++ } ++ ++ /* Record the created directory's inode and device numbers into ++ the search structure, so that we can avoid copying it again. ++ Do this only for the first directory that is created for each ++ source command line argument. */ ++ if (!*first_dir_created_per_command_line_arg) ++ { ++ remember_copied (dst_name, dst_sb.st_ino, dst_sb.st_dev); ++ *first_dir_created_per_command_line_arg = true; ++ } ++ ++ if (x->verbose) ++ emit_verbose (src_name, dst_name, NULL); ++ } ++ ++ /* Decide whether to copy the contents of the directory. */ ++ if (x->one_file_system && device != 0 && device != src_sb.st_dev) ++ { ++ /* Here, we are crossing a file system boundary and cp's -x option ++ is in effect: so don't copy the contents of this directory. */ ++ } ++ else ++ { ++ /* Copy the contents of the directory. Don't just return if ++ this fails -- otherwise, the failure to read a single file ++ in a source directory would cause the containing destination ++ directory not to have owner/perms set properly. */ ++ delayed_ok = copy_dir (src_name, dst_name, new_dst, &src_sb, dir, x, ++ first_dir_created_per_command_line_arg, ++ copy_into_self); ++ } ++ } ++ else if (x->symbolic_link) ++ { ++ dest_is_symlink = true; ++ if (*src_name != '/') ++ { ++ /* Check that DST_NAME denotes a file in the current directory. */ ++ struct stat dot_sb; ++ struct stat dst_parent_sb; ++ char *dst_parent; ++ bool in_current_dir; ++ ++ dst_parent = dir_name (dst_name); ++ ++ in_current_dir = (STREQ (".", dst_parent) ++ /* If either stat call fails, it's ok not to report ++ the failure and say dst_name is in the current ++ directory. Other things will fail later. */ ++ || stat (".", &dot_sb) != 0 ++ || stat (dst_parent, &dst_parent_sb) != 0 ++ || SAME_INODE (dot_sb, dst_parent_sb)); ++ free (dst_parent); ++ ++ if (! in_current_dir) ++ { ++ error (0, 0, ++ _("%s: can make relative symbolic links only in current directory"), ++ quote (dst_name)); ++ goto un_backup; ++ } ++ } ++ if (symlink (src_name, dst_name) != 0) ++ { ++ error (0, errno, _("cannot create symbolic link %s to %s"), ++ quote_n (0, dst_name), quote_n (1, src_name)); ++ goto un_backup; ++ } ++ } ++ ++ /* cp, invoked with `--link --no-dereference', should not follow the ++ link; we guarantee this with gnulib's linkat module (on systems ++ where link(2) follows the link, gnulib creates a symlink with ++ identical contents, which is good enough for our purposes). */ ++ else if (x->hard_link ++ && (!S_ISLNK (src_mode) ++ || x->dereference != DEREF_NEVER)) ++ { ++ if (linkat (AT_FDCWD, src_name, AT_FDCWD, dst_name, 0)) ++ { ++ error (0, errno, _("cannot create link %s"), quote (dst_name)); ++ goto un_backup; ++ } ++ } ++ else if (S_ISREG (src_mode) ++ || (x->copy_as_regular && !S_ISLNK (src_mode))) ++ { ++ copied_as_regular = true; ++ /* POSIX says the permission bits of the source file must be ++ used as the 3rd argument in the open call. Historical ++ practice passed all the source mode bits to 'open', but the extra ++ bits were ignored, so it should be the same either way. */ ++ if (! copy_reg (src_name, dst_name, x, src_mode & S_IRWXUGO, ++ omitted_permissions, &new_dst, &src_sb)) ++ goto un_backup; ++ } ++ else if (S_ISFIFO (src_mode)) ++ { ++ /* Use mknod, rather than mkfifo, because the former preserves ++ the special mode bits of a fifo on Solaris 10, while mkfifo ++ does not. But fall back on mkfifo, because on some BSD systems, ++ mknod always fails when asked to create a FIFO. */ ++ if (mknod (dst_name, src_mode & ~omitted_permissions, 0) != 0) ++ if (mkfifo (dst_name, src_mode & ~S_IFIFO & ~omitted_permissions) != 0) ++ { ++ error (0, errno, _("cannot create fifo %s"), quote (dst_name)); ++ goto un_backup; ++ } ++ } ++ else if (S_ISBLK (src_mode) || S_ISCHR (src_mode) || S_ISSOCK (src_mode)) ++ { ++ if (mknod (dst_name, src_mode & ~omitted_permissions, src_sb.st_rdev) ++ != 0) ++ { ++ error (0, errno, _("cannot create special file %s"), ++ quote (dst_name)); ++ goto un_backup; ++ } ++ } ++ else if (S_ISLNK (src_mode)) ++ { ++ char *src_link_val = areadlink_with_size (src_name, src_sb.st_size); ++ dest_is_symlink = true; ++ if (src_link_val == NULL) ++ { ++ error (0, errno, _("cannot read symbolic link %s"), quote (src_name)); ++ goto un_backup; ++ } ++ ++ if (symlink (src_link_val, dst_name) == 0) ++ free (src_link_val); ++ else ++ { ++ int saved_errno = errno; ++ bool same_link = false; ++ if (x->update && !new_dst && S_ISLNK (dst_sb.st_mode) ++ && dst_sb.st_size == strlen (src_link_val)) ++ { ++ /* See if the destination is already the desired symlink. ++ FIXME: This behavior isn't documented, and seems wrong ++ in some cases, e.g., if the destination symlink has the ++ wrong ownership, permissions, or time stamps. */ ++ char *dest_link_val = ++ areadlink_with_size (dst_name, dst_sb.st_size); ++ if (dest_link_val && STREQ (dest_link_val, src_link_val)) ++ same_link = true; ++ free (dest_link_val); ++ } ++ free (src_link_val); ++ ++ if (! same_link) ++ { ++ error (0, saved_errno, _("cannot create symbolic link %s"), ++ quote (dst_name)); ++ goto un_backup; ++ } ++ } ++ ++ if (x->preserve_security_context) ++ restore_default_fscreatecon_or_die (); ++ ++ if (x->preserve_ownership) ++ { ++ /* Preserve the owner and group of the just-`copied' ++ symbolic link, if possible. */ ++ if (HAVE_LCHOWN ++ && lchown (dst_name, src_sb.st_uid, src_sb.st_gid) != 0 ++ && ! chown_failure_ok (x)) ++ { ++ error (0, errno, _("failed to preserve ownership for %s"), ++ dst_name); ++ goto un_backup; ++ } ++ else ++ { ++ /* Can't preserve ownership of symlinks. ++ FIXME: maybe give a warning or even error for symlinks ++ in directories with the sticky bit set -- there, not ++ preserving owner/group is a potential security problem. */ ++ } ++ } ++ } ++ else ++ { ++ error (0, 0, _("%s has unknown file type"), quote (src_name)); ++ goto un_backup; ++ } ++ ++ if (command_line_arg && x->dest_info) ++ { ++ /* Now that the destination file is very likely to exist, ++ add its info to the set. */ ++ struct stat sb; ++ if (lstat (dst_name, &sb) == 0) ++ record_file (x->dest_info, dst_name, &sb); ++ } ++ ++ /* If we've just created a hard-link due to cp's --link option, ++ we're done. */ ++ if (x->hard_link && ! S_ISDIR (src_mode)) ++ return delayed_ok; ++ ++ if (copied_as_regular) ++ return delayed_ok; ++ ++ /* POSIX says that `cp -p' must restore the following: ++ - permission bits ++ - setuid, setgid bits ++ - owner and group ++ If it fails to restore any of those, we may give a warning but ++ the destination must not be removed. ++ FIXME: implement the above. */ ++ ++ /* Adjust the times (and if possible, ownership) for the copy. ++ chown turns off set[ug]id bits for non-root, ++ so do the chmod last. */ ++ ++ if (x->preserve_timestamps) ++ { ++ struct timespec timespec[2]; ++ timespec[0] = get_stat_atime (&src_sb); ++ timespec[1] = get_stat_mtime (&src_sb); ++ ++ if ((dest_is_symlink ++ ? utimens_symlink (dst_name, timespec) ++ : utimens (dst_name, timespec)) ++ != 0) ++ { ++ error (0, errno, _("preserving times for %s"), quote (dst_name)); ++ if (x->require_preserve) ++ return false; ++ } ++ } ++ ++ /* The operations beyond this point may dereference a symlink. */ ++ if (dest_is_symlink) ++ return delayed_ok; ++ ++ /* Avoid calling chown if we know it's not necessary. */ ++ if (x->preserve_ownership ++ && (new_dst || !SAME_OWNER_AND_GROUP (src_sb, dst_sb))) ++ { ++ switch (set_owner (x, dst_name, -1, &src_sb, new_dst, &dst_sb)) ++ { ++ case -1: ++ return false; ++ ++ case 0: ++ src_mode &= ~ (S_ISUID | S_ISGID | S_ISVTX); ++ break; ++ } ++ } ++ ++ set_author (dst_name, -1, &src_sb); ++ ++ if (x->preserve_xattr && ! copy_attr_by_name (src_name, dst_name, x) ++ && x->require_preserve_xattr) ++ return false; ++ ++ if (x->preserve_mode || x->move_mode) ++ { ++ if (copy_acl (src_name, -1, dst_name, -1, src_mode) != 0 ++ && x->require_preserve) ++ return false; ++ } ++ else if (x->set_mode) ++ { ++ if (set_acl (dst_name, -1, x->mode) != 0) ++ return false; ++ } ++ else ++ { ++ if (omitted_permissions) ++ { ++ omitted_permissions &= ~ cached_umask (); ++ ++ if (omitted_permissions && !restore_dst_mode) ++ { ++ /* Permissions were deliberately omitted when the file ++ was created due to security concerns. See whether ++ they need to be re-added now. It'd be faster to omit ++ the lstat, but deducing the current destination mode ++ is tricky in the presence of implementation-defined ++ rules for special mode bits. */ ++ if (new_dst && lstat (dst_name, &dst_sb) != 0) ++ { ++ error (0, errno, _("cannot stat %s"), quote (dst_name)); ++ return false; ++ } ++ dst_mode = dst_sb.st_mode; ++ if (omitted_permissions & ~dst_mode) ++ restore_dst_mode = true; ++ } ++ } ++ ++ if (restore_dst_mode) ++ { ++ if (lchmod (dst_name, dst_mode | omitted_permissions) != 0) ++ { ++ error (0, errno, _("preserving permissions for %s"), ++ quote (dst_name)); ++ if (x->require_preserve) ++ return false; ++ } ++ } ++ } ++ ++ return delayed_ok; ++ ++un_backup: ++ ++ if (x->preserve_security_context) ++ restore_default_fscreatecon_or_die (); ++ ++ /* We have failed to create the destination file. ++ If we've just added a dev/ino entry via the remember_copied ++ call above (i.e., unless we've just failed to create a hard link), ++ remove the entry associating the source dev/ino with the ++ destination file name, so we don't try to `preserve' a link ++ to a file we didn't create. */ ++ if (earlier_file == NULL) ++ forget_created (src_sb.st_ino, src_sb.st_dev); ++ ++ if (dst_backup) ++ { ++ if (rename (dst_backup, dst_name) != 0) ++ error (0, errno, _("cannot un-backup %s"), quote (dst_name)); ++ else ++ { ++ if (x->verbose) ++ printf (_("%s -> %s (unbackup)\n"), ++ quote_n (0, dst_backup), quote_n (1, dst_name)); ++ } ++ } ++ return false; ++} ++ ++static bool ++valid_options (const struct cp_options *co) ++{ ++ assert (co != NULL); ++ assert (VALID_BACKUP_TYPE (co->backup_type)); ++ assert (VALID_SPARSE_MODE (co->sparse_mode)); ++ assert (VALID_REFLINK_MODE (co->reflink_mode)); ++ assert (!(co->hard_link && co->symbolic_link)); ++ assert (! ++ (co->reflink_mode == REFLINK_ALWAYS ++ && co->sparse_mode != SPARSE_AUTO)); ++ return true; ++} ++ ++/* Copy the file SRC_NAME to the file DST_NAME. The files may be of ++ any type. NONEXISTENT_DST should be true if the file DST_NAME ++ is known not to exist (e.g., because its parent directory was just ++ created); NONEXISTENT_DST should be false if DST_NAME might already ++ exist. OPTIONS is ... FIXME-describe ++ Set *COPY_INTO_SELF if SRC_NAME is a parent of (or the ++ same as) DST_NAME; otherwise, set clear it. ++ Return true if successful. */ ++ ++extern bool ++copy (char const *src_name, char const *dst_name, ++ bool nonexistent_dst, const struct cp_options *options, ++ bool *copy_into_self, bool *rename_succeeded) ++{ ++ assert (valid_options (options)); ++ ++ /* Record the file names: they're used in case of error, when copying ++ a directory into itself. I don't like to make these tools do *any* ++ extra work in the common case when that work is solely to handle ++ exceptional cases, but in this case, I don't see a way to derive the ++ top level source and destination directory names where they're used. ++ An alternative is to use COPY_INTO_SELF and print the diagnostic ++ from every caller -- but I don't want to do that. */ ++ top_level_src_name = src_name; ++ top_level_dst_name = dst_name; ++ ++ bool first_dir_created_per_command_line_arg = false; ++ return copy_internal (src_name, dst_name, nonexistent_dst, 0, NULL, ++ options, true, ++ &first_dir_created_per_command_line_arg, ++ copy_into_self, rename_succeeded); ++} ++ ++/* Set *X to the default options for a value of type struct cp_options. */ ++ ++extern void ++cp_options_default (struct cp_options *x) ++{ ++ memset (x, 0, sizeof *x); ++#ifdef PRIV_FILE_CHOWN ++ { ++ priv_set_t *pset = priv_allocset (); ++ if (!pset) ++ xalloc_die (); ++ if (getppriv (PRIV_EFFECTIVE, pset) == 0) ++ { ++ x->chown_privileges = priv_ismember (pset, PRIV_FILE_CHOWN); ++ x->owner_privileges = priv_ismember (pset, PRIV_FILE_OWNER); ++ } ++ priv_freeset (pset); ++ } ++#else ++ x->chown_privileges = x->owner_privileges = (geteuid () == 0); ++#endif ++} ++ ++/* Return true if it's OK for chown to fail, where errno is ++ the error number that chown failed with and X is the copying ++ option set. */ ++ ++extern bool ++chown_failure_ok (struct cp_options const *x) ++{ ++ /* If non-root uses -p, it's ok if we can't preserve ownership. ++ But root probably wants to know, e.g. if NFS disallows it, ++ or if the target system doesn't support file ownership. */ ++ ++ return ((errno == EPERM || errno == EINVAL) && !x->chown_privileges); ++} ++ ++/* Similarly, return true if it's OK for chmod and similar operations ++ to fail, where errno is the error number that chmod failed with and ++ X is the copying option set. */ ++ ++static bool ++owner_failure_ok (struct cp_options const *x) ++{ ++ return ((errno == EPERM || errno == EINVAL) && !x->owner_privileges); ++} ++ ++/* Return the user's umask, caching the result. */ ++ ++extern mode_t ++cached_umask (void) ++{ ++ static mode_t mask = (mode_t) -1; ++ if (mask == (mode_t) -1) ++ { ++ mask = umask (0); ++ umask (mask); ++ } ++ return mask; ++} +diff -urNp coreutils-8.0-orig/src/copy.h coreutils-8.0/src/copy.h +--- coreutils-8.0-orig/src/copy.h 2009-09-21 14:29:33.000000000 +0200 ++++ coreutils-8.0/src/copy.h 2009-10-07 10:10:11.000000000 +0200 +@@ -158,6 +158,9 @@ struct cp_options bool preserve_mode; bool preserve_timestamps; @@ -59,10 +2449,297 @@ diff -urNp coreutils-7.1-orig/src/copy.h coreutils-7.1/src/copy.h /* Enabled for mv, and for cp by the --preserve=links option. If true, attempt to preserve in the destination files any logical hard links between the source files. If used with cp's -diff -urNp coreutils-7.1-orig/src/cp.c coreutils-7.1/src/cp.c ---- coreutils-7.1-orig/src/cp.c 2009-02-18 15:32:52.000000000 +0100 -+++ coreutils-7.1/src/cp.c 2009-02-24 13:47:15.000000000 +0100 -@@ -133,6 +133,7 @@ static struct option const long_opts[] = +diff -urNp coreutils-8.0-orig/src/copy.h.orig coreutils-8.0/src/copy.h.orig +--- coreutils-8.0-orig/src/copy.h.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/copy.h.orig 2009-09-21 14:29:33.000000000 +0200 +@@ -0,0 +1,283 @@ ++/* core functions for copying files and directories ++ Copyright (C) 89, 90, 91, 1995-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Extracted from cp.c and librarified by Jim Meyering. */ ++ ++#ifndef COPY_H ++# define COPY_H ++ ++# include ++# include "hash.h" ++ ++/* Control creation of sparse files (files with holes). */ ++enum Sparse_type ++{ ++ SPARSE_UNUSED, ++ ++ /* Never create holes in DEST. */ ++ SPARSE_NEVER, ++ ++ /* This is the default. Use a crude (and sometimes inaccurate) ++ heuristic to determine if SOURCE has holes. If so, try to create ++ holes in DEST. */ ++ SPARSE_AUTO, ++ ++ /* For every sufficiently long sequence of bytes in SOURCE, try to ++ create a corresponding hole in DEST. There is a performance penalty ++ here because CP has to search for holes in SRC. But if the holes are ++ big enough, that penalty can be offset by the decrease in the amount ++ of data written to disk. */ ++ SPARSE_ALWAYS ++}; ++ ++/* Control creation of COW files. */ ++enum Reflink_type ++{ ++ /* Default to a standard copy. */ ++ REFLINK_NEVER, ++ ++ /* Try a COW copy and fall back to a standard copy. */ ++ REFLINK_AUTO, ++ ++ /* Require a COW copy and fail if not available. */ ++ REFLINK_ALWAYS ++}; ++ ++/* This type is used to help mv (via copy.c) distinguish these cases. */ ++enum Interactive ++{ ++ I_ALWAYS_YES = 1, ++ I_ALWAYS_NO, ++ I_ASK_USER, ++ I_UNSPECIFIED ++}; ++ ++/* How to handle symbolic links. */ ++enum Dereference_symlink ++{ ++ DEREF_UNDEFINED = 1, ++ ++ /* Copy the symbolic link itself. -P */ ++ DEREF_NEVER, ++ ++ /* If the symbolic is a command line argument, then copy ++ its referent. Otherwise, copy the symbolic link itself. -H */ ++ DEREF_COMMAND_LINE_ARGUMENTS, ++ ++ /* Copy the referent of the symbolic link. -L */ ++ DEREF_ALWAYS ++}; ++ ++# define VALID_SPARSE_MODE(Mode) \ ++ ((Mode) == SPARSE_NEVER \ ++ || (Mode) == SPARSE_AUTO \ ++ || (Mode) == SPARSE_ALWAYS) ++ ++# define VALID_REFLINK_MODE(Mode) \ ++ ((Mode) == REFLINK_NEVER \ ++ || (Mode) == REFLINK_AUTO \ ++ || (Mode) == REFLINK_ALWAYS) ++ ++/* These options control how files are copied by at least the ++ following programs: mv (when rename doesn't work), cp, install. ++ So, if you add a new member, be sure to initialize it in ++ mv.c, cp.c, and install.c. */ ++struct cp_options ++{ ++ enum backup_type backup_type; ++ ++ /* How to handle symlinks in the source. */ ++ enum Dereference_symlink dereference; ++ ++ /* This value is used to determine whether to prompt before removing ++ each existing destination file. It works differently depending on ++ whether move_mode is set. See code/comments in copy.c. */ ++ enum Interactive interactive; ++ ++ /* Control creation of sparse files. */ ++ enum Sparse_type sparse_mode; ++ ++ /* Set the mode of the destination file to exactly this value ++ if SET_MODE is nonzero. */ ++ mode_t mode; ++ ++ /* If true, copy all files except (directories and, if not dereferencing ++ them, symbolic links,) as if they were regular files. */ ++ bool copy_as_regular; ++ ++ /* If true, remove each existing destination nondirectory before ++ trying to open it. */ ++ bool unlink_dest_before_opening; ++ ++ /* If true, first try to open each existing destination nondirectory, ++ then, if the open fails, unlink and try again. ++ This option must be set for `cp -f', in case the destination file ++ exists when the open is attempted. It is irrelevant to `mv' since ++ any destination is sure to be removed before the open. */ ++ bool unlink_dest_after_failed_open; ++ ++ /* If true, create hard links instead of copying files. ++ Create destination directories as usual. */ ++ bool hard_link; ++ ++ /* If true, rather than copying, first attempt to use rename. ++ If that fails, then resort to copying. */ ++ bool move_mode; ++ ++ /* Whether this process has appropriate privileges to chown a file ++ whose owner is not the effective user ID. */ ++ bool chown_privileges; ++ ++ /* Whether this process has appropriate privileges to do the ++ following operations on a file even when it is owned by some ++ other user: set the file's atime, mtime, mode, or ACL; remove or ++ rename an entry in the file even though it is a sticky directory, ++ or to mount on the file. */ ++ bool owner_privileges; ++ ++ /* If true, when copying recursively, skip any subdirectories that are ++ on different file systems from the one we started on. */ ++ bool one_file_system; ++ ++ /* If true, attempt to give the copies the original files' permissions, ++ owner, group, and timestamps. */ ++ bool preserve_ownership; ++ bool preserve_mode; ++ bool preserve_timestamps; ++ ++ /* Enabled for mv, and for cp by the --preserve=links option. ++ If true, attempt to preserve in the destination files any ++ logical hard links between the source files. If used with cp's ++ --no-dereference option, and copying two hard-linked files, ++ the two corresponding destination files will also be hard linked. ++ ++ If used with cp's --dereference (-L) option, then, as that option implies, ++ hard links are *not* preserved. However, when copying a file F and ++ a symlink S to F, the resulting S and F in the destination directory ++ will be hard links to the same file (a copy of F). */ ++ bool preserve_links; ++ ++ /* If true and any of the above (for preserve) file attributes cannot ++ be applied to a destination file, treat it as a failure and return ++ nonzero immediately. E.g. for cp -p this must be true, for mv it ++ must be false. */ ++ bool require_preserve; ++ ++ /* If true, attempt to preserve the SELinux security context, too. ++ Set this only if the kernel is SELinux enabled. */ ++ bool preserve_security_context; ++ ++ /* Useful only when preserve_security_context is true. ++ If true, a failed attempt to preserve a file's security context ++ propagates failure "out" to the caller. If false, a failure to ++ preserve a file's security context does not change the invoking ++ application's exit status. Give diagnostics for failed syscalls ++ regardless of this setting. For example, with "cp --preserve=context" ++ this flag is "true", while with "cp -a", it is false. That means ++ "cp -a" attempts to preserve any security context, but does not ++ fail if it is unable to do so. */ ++ bool require_preserve_context; ++ ++ /* If true, attempt to preserve extended attributes using libattr. ++ Ignored if coreutils are compiled without xattr support. */ ++ bool preserve_xattr; ++ ++ /* Useful only when preserve_xattr is true. ++ If true, a failed attempt to preserve file's extended attributes ++ propagates failure "out" to the caller. If false, a failure to ++ preserve file's extended attributes does not change the invoking ++ application's exit status. Give diagnostics for failed syscalls ++ regardless of this setting. For example, with "cp --preserve=xattr" ++ this flag is "true", while with "cp --preserve=all", it is false. */ ++ bool require_preserve_xattr; ++ ++ /* Used as difference boolean between cp -a and cp -dR --preserve=all. ++ If true, non-mandatory failure diagnostics are not displayed. This ++ should prevent poluting cp -a output. ++ */ ++ bool reduce_diagnostics; ++ ++ /* If true, copy directories recursively and copy special files ++ as themselves rather than copying their contents. */ ++ bool recursive; ++ ++ /* If true, set file mode to value of MODE. Otherwise, ++ set it based on current umask modified by UMASK_KILL. */ ++ bool set_mode; ++ ++ /* If true, create symbolic links instead of copying files. ++ Create destination directories as usual. */ ++ bool symbolic_link; ++ ++ /* If true, do not copy a nondirectory that has an existing destination ++ with the same or newer modification time. */ ++ bool update; ++ ++ /* If true, display the names of the files before copying them. */ ++ bool verbose; ++ ++ /* If true, stdin is a tty. */ ++ bool stdin_tty; ++ ++ /* If true, open a dangling destination symlink when not in move_mode. ++ Otherwise, copy_reg gives a diagnostic (it refuses to write through ++ such a symlink) and returns false. */ ++ bool open_dangling_dest_symlink; ++ ++ /* Control creation of COW files. */ ++ enum Reflink_type reflink_mode; ++ ++ /* This is a set of destination name/inode/dev triples. Each such triple ++ represents a file we have created corresponding to a source file name ++ that was specified on the command line. Use it to avoid clobbering ++ source files in commands like this: ++ rm -rf a b c; mkdir a b c; touch a/f b/f; mv a/f b/f c ++ For now, it protects only regular files when copying (i.e. not renaming). ++ When renaming, it protects all non-directories. ++ Use dest_info_init to initialize it, or set it to NULL to disable ++ this feature. */ ++ Hash_table *dest_info; ++ ++ /* FIXME */ ++ Hash_table *src_info; ++}; ++ ++# define XSTAT(X, Src_name, Src_sb) \ ++ ((X)->dereference == DEREF_NEVER \ ++ ? lstat (Src_name, Src_sb) \ ++ : stat (Src_name, Src_sb)) ++ ++/* Arrange to make rename calls go through the wrapper function ++ on systems with a rename function that fails for a source file name ++ specified with a trailing slash. */ ++# if RENAME_TRAILING_SLASH_BUG ++int rpl_rename (const char *, const char *); ++# undef rename ++# define rename rpl_rename ++# endif ++ ++bool copy (char const *src_name, char const *dst_name, ++ bool nonexistent_dst, const struct cp_options *options, ++ bool *copy_into_self, bool *rename_succeeded); ++ ++void dest_info_init (struct cp_options *); ++void src_info_init (struct cp_options *); ++ ++void cp_options_default (struct cp_options *); ++bool chown_failure_ok (struct cp_options const *); ++mode_t cached_umask (void); ++ ++#endif +diff -urNp coreutils-8.0-orig/src/cp.c coreutils-8.0/src/cp.c +--- coreutils-8.0-orig/src/cp.c 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/src/cp.c 2009-10-07 10:10:11.000000000 +0200 +@@ -139,6 +139,7 @@ static struct option const long_opts[] = {"target-directory", required_argument, NULL, 't'}, {"update", no_argument, NULL, 'u'}, {"verbose", no_argument, NULL, 'v'}, @@ -70,7 +2747,7 @@ diff -urNp coreutils-7.1-orig/src/cp.c coreutils-7.1/src/cp.c {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} -@@ -191,6 +192,9 @@ Mandatory arguments to long options are +@@ -197,6 +198,9 @@ Mandatory arguments to long options are all\n\ "), stdout); fputs (_("\ @@ -80,7 +2757,7 @@ diff -urNp coreutils-7.1-orig/src/cp.c coreutils-7.1/src/cp.c --no-preserve=ATTR_LIST don't preserve the specified attributes\n\ --parents use full source file name under DIRECTORY\n\ "), stdout); -@@ -216,6 +220,7 @@ Mandatory arguments to long options are +@@ -223,6 +227,7 @@ Mandatory arguments to long options are destination file is missing\n\ -v, --verbose explain what is being done\n\ -x, --one-file-system stay on this file system\n\ @@ -88,7 +2765,7 @@ diff -urNp coreutils-7.1-orig/src/cp.c coreutils-7.1/src/cp.c "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); -@@ -765,6 +770,7 @@ cp_option_init (struct cp_options *x) +@@ -777,6 +782,7 @@ cp_option_init (struct cp_options *x) x->preserve_timestamps = false; x->preserve_security_context = false; x->require_preserve_context = false; @@ -96,7 +2773,7 @@ diff -urNp coreutils-7.1-orig/src/cp.c coreutils-7.1/src/cp.c x->preserve_xattr = false; x->reduce_diagnostics = false; x->require_preserve_xattr = false; -@@ -911,7 +917,7 @@ main (int argc, char **argv) +@@ -923,7 +929,7 @@ main (int argc, char **argv) we'll actually use backup_suffix_string. */ backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX"); @@ -105,7 +2782,7 @@ diff -urNp coreutils-7.1-orig/src/cp.c coreutils-7.1/src/cp.c long_opts, NULL)) != -1) { -@@ -945,6 +951,16 @@ main (int argc, char **argv) +@@ -966,6 +972,16 @@ main (int argc, char **argv) copy_contents = true; break; @@ -122,7 +2799,7 @@ diff -urNp coreutils-7.1-orig/src/cp.c coreutils-7.1/src/cp.c case 'd': x.preserve_links = true; x.dereference = DEREF_NEVER; -@@ -1054,6 +1070,27 @@ main (int argc, char **argv) +@@ -1075,6 +1091,27 @@ main (int argc, char **argv) x.one_file_system = true; break; @@ -150,10 +2827,1174 @@ diff -urNp coreutils-7.1-orig/src/cp.c coreutils-7.1/src/cp.c case 'S': make_backups = true; backup_suffix_string = optarg; -diff -urNp coreutils-7.1-orig/src/chcon.c coreutils-7.1/src/chcon.c ---- coreutils-7.1-orig/src/chcon.c 2008-10-12 16:12:56.000000000 +0200 -+++ coreutils-7.1/src/chcon.c 2009-02-24 13:47:15.000000000 +0100 -@@ -346,7 +346,7 @@ Usage: %s [OPTION]... CONTEXT FILE...\n\ +diff -urNp coreutils-8.0-orig/src/cp.c.orig coreutils-8.0/src/cp.c.orig +--- coreutils-8.0-orig/src/cp.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/cp.c.orig 2009-09-29 15:27:54.000000000 +0200 +@@ -0,0 +1,1160 @@ ++/* cp.c -- file copying (main routines) ++ Copyright (C) 89, 90, 91, 1995-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . ++ ++ Written by Torbjorn Granlund, David MacKenzie, and Jim Meyering. */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "system.h" ++#include "argmatch.h" ++#include "backupfile.h" ++#include "copy.h" ++#include "cp-hash.h" ++#include "error.h" ++#include "filenamecat.h" ++#include "ignore-value.h" ++#include "quote.h" ++#include "stat-time.h" ++#include "utimens.h" ++#include "acl.h" ++ ++#if ! HAVE_LCHOWN ++# define lchown(name, uid, gid) chown (name, uid, gid) ++#endif ++ ++#define ASSIGN_BASENAME_STRDUPA(Dest, File_name) \ ++ do \ ++ { \ ++ char *tmp_abns_; \ ++ ASSIGN_STRDUPA (tmp_abns_, (File_name)); \ ++ Dest = last_component (tmp_abns_); \ ++ strip_trailing_slashes (Dest); \ ++ } \ ++ while (0) ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "cp" ++ ++#define AUTHORS \ ++ proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ ++ proper_name ("David MacKenzie"), \ ++ proper_name ("Jim Meyering") ++ ++/* Used by do_copy, make_dir_parents_private, and re_protect ++ to keep a list of leading directories whose protections ++ need to be fixed after copying. */ ++struct dir_attr ++{ ++ struct stat st; ++ bool restore_mode; ++ size_t slash_offset; ++ struct dir_attr *next; ++}; ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ COPY_CONTENTS_OPTION = CHAR_MAX + 1, ++ NO_PRESERVE_ATTRIBUTES_OPTION, ++ PARENTS_OPTION, ++ PRESERVE_ATTRIBUTES_OPTION, ++ REFLINK_OPTION, ++ SPARSE_OPTION, ++ STRIP_TRAILING_SLASHES_OPTION, ++ UNLINK_DEST_BEFORE_OPENING ++}; ++ ++/* True if the kernel is SELinux enabled. */ ++static bool selinux_enabled; ++ ++/* If true, the command "cp x/e_file e_dir" uses "e_dir/x/e_file" ++ as its destination instead of the usual "e_dir/e_file." */ ++static bool parents_option = false; ++ ++/* Remove any trailing slashes from each SOURCE argument. */ ++static bool remove_trailing_slashes; ++ ++static char const *const sparse_type_string[] = ++{ ++ "never", "auto", "always", NULL ++}; ++static enum Sparse_type const sparse_type[] = ++{ ++ SPARSE_NEVER, SPARSE_AUTO, SPARSE_ALWAYS ++}; ++ARGMATCH_VERIFY (sparse_type_string, sparse_type); ++ ++static char const *const reflink_type_string[] = ++{ ++ "auto", "always", NULL ++}; ++static enum Reflink_type const reflink_type[] = ++{ ++ REFLINK_AUTO, REFLINK_ALWAYS ++}; ++ARGMATCH_VERIFY (reflink_type_string, reflink_type); ++ ++static struct option const long_opts[] = ++{ ++ {"archive", no_argument, NULL, 'a'}, ++ {"backup", optional_argument, NULL, 'b'}, ++ {"copy-contents", no_argument, NULL, COPY_CONTENTS_OPTION}, ++ {"dereference", no_argument, NULL, 'L'}, ++ {"force", no_argument, NULL, 'f'}, ++ {"interactive", no_argument, NULL, 'i'}, ++ {"link", no_argument, NULL, 'l'}, ++ {"no-clobber", no_argument, NULL, 'n'}, ++ {"no-dereference", no_argument, NULL, 'P'}, ++ {"no-preserve", required_argument, NULL, NO_PRESERVE_ATTRIBUTES_OPTION}, ++ {"no-target-directory", no_argument, NULL, 'T'}, ++ {"one-file-system", no_argument, NULL, 'x'}, ++ {"parents", no_argument, NULL, PARENTS_OPTION}, ++ {"path", no_argument, NULL, PARENTS_OPTION}, /* Deprecated. */ ++ {"preserve", optional_argument, NULL, PRESERVE_ATTRIBUTES_OPTION}, ++ {"recursive", no_argument, NULL, 'R'}, ++ {"remove-destination", no_argument, NULL, UNLINK_DEST_BEFORE_OPENING}, ++ {"sparse", required_argument, NULL, SPARSE_OPTION}, ++ {"reflink", optional_argument, NULL, REFLINK_OPTION}, ++ {"strip-trailing-slashes", no_argument, NULL, STRIP_TRAILING_SLASHES_OPTION}, ++ {"suffix", required_argument, NULL, 'S'}, ++ {"symbolic-link", no_argument, NULL, 's'}, ++ {"target-directory", required_argument, NULL, 't'}, ++ {"update", no_argument, NULL, 'u'}, ++ {"verbose", no_argument, NULL, 'v'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... [-T] SOURCE DEST\n\ ++ or: %s [OPTION]... SOURCE... DIRECTORY\n\ ++ or: %s [OPTION]... -t DIRECTORY SOURCE...\n\ ++"), ++ program_name, program_name, program_name); ++ fputs (_("\ ++Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ -a, --archive same as -dR --preserve=all\n\ ++ --backup[=CONTROL] make a backup of each existing destination file\n\ ++ -b like --backup but does not accept an argument\n\ ++ --copy-contents copy contents of special files when recursive\n\ ++ -d same as --no-dereference --preserve=links\n\ ++"), stdout); ++ fputs (_("\ ++ -f, --force if an existing destination file cannot be\n\ ++ opened, remove it and try again (redundant if\n\ ++ the -n option is used)\n\ ++ -i, --interactive prompt before overwrite (overrides a previous -n\n\ ++ option)\n\ ++ -H follow command-line symbolic links in SOURCE\n\ ++"), stdout); ++ fputs (_("\ ++ -l, --link link files instead of copying\n\ ++ -L, --dereference always follow symbolic links in SOURCE\n\ ++"), stdout); ++ fputs (_("\ ++ -n, --no-clobber do not overwrite an existing file (overrides\n\ ++ a previous -i option)\n\ ++ -P, --no-dereference never follow symbolic links in SOURCE\n\ ++"), stdout); ++ fputs (_("\ ++ -p same as --preserve=mode,ownership,timestamps\n\ ++ --preserve[=ATTR_LIST] preserve the specified attributes (default:\n\ ++ mode,ownership,timestamps), if possible\n\ ++ additional attributes: context, links, xattr,\n\ ++ all\n\ ++"), stdout); ++ fputs (_("\ ++ --no-preserve=ATTR_LIST don't preserve the specified attributes\n\ ++ --parents use full source file name under DIRECTORY\n\ ++"), stdout); ++ fputs (_("\ ++ -R, -r, --recursive copy directories recursively\n\ ++ --reflink[=WHEN] control clone/CoW copies. See below.\n\ ++ --remove-destination remove each existing destination file before\n\ ++ attempting to open it (contrast with --force)\n\ ++"), stdout); ++ fputs (_("\ ++ --sparse=WHEN control creation of sparse files. See below.\n\ ++ --strip-trailing-slashes remove any trailing slashes from each SOURCE\n\ ++ argument\n\ ++"), stdout); ++ fputs (_("\ ++ -s, --symbolic-link make symbolic links instead of copying\n\ ++ -S, --suffix=SUFFIX override the usual backup suffix\n\ ++ -t, --target-directory=DIRECTORY copy all SOURCE arguments into DIRECTORY\n\ ++ -T, --no-target-directory treat DEST as a normal file\n\ ++"), stdout); ++ fputs (_("\ ++ -u, --update copy only when the SOURCE file is newer\n\ ++ than the destination file or when the\n\ ++ destination file is missing\n\ ++ -v, --verbose explain what is being done\n\ ++ -x, --one-file-system stay on this file system\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++By default, sparse SOURCE files are detected by a crude heuristic and the\n\ ++corresponding DEST file is made sparse as well. That is the behavior\n\ ++selected by --sparse=auto. Specify --sparse=always to create a sparse DEST\n\ ++file whenever the SOURCE file contains a long enough sequence of zero bytes.\n\ ++Use --sparse=never to inhibit creation of sparse files.\n\ ++\n\ ++When --reflink[=always] is specified, perform a lightweight copy, where the\n\ ++data blocks are copied only when modified. If this is not possible the copy\n\ ++fails, or if --reflink=auto is specified, fall back to a standard copy.\n\ ++"), stdout); ++ fputs (_("\ ++\n\ ++The backup suffix is `~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX.\n\ ++The version control method may be selected via the --backup option or through\n\ ++the VERSION_CONTROL environment variable. Here are the values:\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++ none, off never make backups (even if --backup is given)\n\ ++ numbered, t make numbered backups\n\ ++ existing, nil numbered if numbered backups exist, simple otherwise\n\ ++ simple, never always make simple backups\n\ ++"), stdout); ++ fputs (_("\ ++\n\ ++As a special case, cp makes a backup of SOURCE when the force and backup\n\ ++options are given and SOURCE and DEST are the same name for an existing,\n\ ++regular file.\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++/* Ensure that the parent directories of CONST_DST_NAME have the ++ correct protections, for the --parents option. This is done ++ after all copying has been completed, to allow permissions ++ that don't include user write/execute. ++ ++ SRC_OFFSET is the index in CONST_DST_NAME of the beginning of the ++ source directory name. ++ ++ ATTR_LIST is a null-terminated linked list of structures that ++ indicates the end of the filename of each intermediate directory ++ in CONST_DST_NAME that may need to have its attributes changed. ++ The command `cp --parents --preserve a/b/c d/e_dir' changes the ++ attributes of the directories d/e_dir/a and d/e_dir/a/b to match ++ the corresponding source directories regardless of whether they ++ existed before the `cp' command was given. ++ ++ Return true if the parent of CONST_DST_NAME and any intermediate ++ directories specified by ATTR_LIST have the proper permissions ++ when done. */ ++ ++static bool ++re_protect (char const *const_dst_name, size_t src_offset, ++ struct dir_attr *attr_list, const struct cp_options *x) ++{ ++ struct dir_attr *p; ++ char *dst_name; /* A copy of CONST_DST_NAME we can change. */ ++ char *src_name; /* The source name in `dst_name'. */ ++ ++ ASSIGN_STRDUPA (dst_name, const_dst_name); ++ src_name = dst_name + src_offset; ++ ++ for (p = attr_list; p; p = p->next) ++ { ++ dst_name[p->slash_offset] = '\0'; ++ ++ /* Adjust the times (and if possible, ownership) for the copy. ++ chown turns off set[ug]id bits for non-root, ++ so do the chmod last. */ ++ ++ if (x->preserve_timestamps) ++ { ++ struct timespec timespec[2]; ++ ++ timespec[0] = get_stat_atime (&p->st); ++ timespec[1] = get_stat_mtime (&p->st); ++ ++ if (utimens (dst_name, timespec)) ++ { ++ error (0, errno, _("failed to preserve times for %s"), ++ quote (dst_name)); ++ return false; ++ } ++ } ++ ++ if (x->preserve_ownership) ++ { ++ if (lchown (dst_name, p->st.st_uid, p->st.st_gid) != 0) ++ { ++ if (! chown_failure_ok (x)) ++ { ++ error (0, errno, _("failed to preserve ownership for %s"), ++ quote (dst_name)); ++ return false; ++ } ++ /* Failing to preserve ownership is OK. Still, try to preserve ++ the group, but ignore the possible error. */ ++ ignore_value (lchown (dst_name, -1, p->st.st_gid)); ++ } ++ } ++ ++ if (x->preserve_mode) ++ { ++ if (copy_acl (src_name, -1, dst_name, -1, p->st.st_mode) != 0) ++ return false; ++ } ++ else if (p->restore_mode) ++ { ++ if (lchmod (dst_name, p->st.st_mode) != 0) ++ { ++ error (0, errno, _("failed to preserve permissions for %s"), ++ quote (dst_name)); ++ return false; ++ } ++ } ++ ++ dst_name[p->slash_offset] = '/'; ++ } ++ return true; ++} ++ ++/* Ensure that the parent directory of CONST_DIR exists, for ++ the --parents option. ++ ++ SRC_OFFSET is the index in CONST_DIR (which is a destination ++ directory) of the beginning of the source directory name. ++ Create any leading directories that don't already exist. ++ If VERBOSE_FMT_STRING is nonzero, use it as a printf format ++ string for printing a message after successfully making a directory. ++ The format should take two string arguments: the names of the ++ source and destination directories. ++ Creates a linked list of attributes of intermediate directories, ++ *ATTR_LIST, for re_protect to use after calling copy. ++ Sets *NEW_DST if this function creates parent of CONST_DIR. ++ ++ Return true if parent of CONST_DIR exists as a directory with the proper ++ permissions when done. */ ++ ++/* FIXME: Synch this function with the one in ../lib/mkdir-p.c. */ ++ ++static bool ++make_dir_parents_private (char const *const_dir, size_t src_offset, ++ char const *verbose_fmt_string, ++ struct dir_attr **attr_list, bool *new_dst, ++ const struct cp_options *x) ++{ ++ struct stat stats; ++ char *dir; /* A copy of CONST_DIR we can change. */ ++ char *src; /* Source name in DIR. */ ++ char *dst_dir; /* Leading directory of DIR. */ ++ size_t dirlen; /* Length of DIR. */ ++ ++ ASSIGN_STRDUPA (dir, const_dir); ++ ++ src = dir + src_offset; ++ ++ dirlen = dir_len (dir); ++ dst_dir = alloca (dirlen + 1); ++ memcpy (dst_dir, dir, dirlen); ++ dst_dir[dirlen] = '\0'; ++ ++ *attr_list = NULL; ++ ++ if (stat (dst_dir, &stats) != 0) ++ { ++ /* A parent of CONST_DIR does not exist. ++ Make all missing intermediate directories. */ ++ char *slash; ++ ++ slash = src; ++ while (*slash == '/') ++ slash++; ++ while ((slash = strchr (slash, '/'))) ++ { ++ struct dir_attr *new IF_LINT (= NULL); ++ bool missing_dir; ++ ++ *slash = '\0'; ++ missing_dir = (stat (dir, &stats) != 0); ++ ++ if (missing_dir | x->preserve_ownership | x->preserve_mode ++ | x->preserve_timestamps) ++ { ++ /* Add this directory to the list of directories whose ++ modes might need fixing later. */ ++ struct stat src_st; ++ int src_errno = (stat (src, &src_st) != 0 ++ ? errno ++ : S_ISDIR (src_st.st_mode) ++ ? 0 ++ : ENOTDIR); ++ if (src_errno) ++ { ++ error (0, src_errno, _("failed to get attributes of %s"), ++ quote (src)); ++ return false; ++ } ++ ++ new = xmalloc (sizeof *new); ++ new->st = src_st; ++ new->slash_offset = slash - dir; ++ new->restore_mode = false; ++ new->next = *attr_list; ++ *attr_list = new; ++ } ++ ++ if (missing_dir) ++ { ++ mode_t src_mode; ++ mode_t omitted_permissions; ++ mode_t mkdir_mode; ++ ++ /* This component does not exist. We must set ++ *new_dst and new->st.st_mode inside this loop because, ++ for example, in the command `cp --parents ../a/../b/c e_dir', ++ make_dir_parents_private creates only e_dir/../a if ++ ./b already exists. */ ++ *new_dst = true; ++ src_mode = new->st.st_mode; ++ ++ /* If the ownership or special mode bits might change, ++ omit some permissions at first, so unauthorized users ++ cannot nip in before the file is ready. */ ++ omitted_permissions = (src_mode ++ & (x->preserve_ownership ++ ? S_IRWXG | S_IRWXO ++ : x->preserve_mode ++ ? S_IWGRP | S_IWOTH ++ : 0)); ++ ++ /* POSIX says mkdir's behavior is implementation-defined when ++ (src_mode & ~S_IRWXUGO) != 0. However, common practice is ++ to ask mkdir to copy all the CHMOD_MODE_BITS, letting mkdir ++ decide what to do with S_ISUID | S_ISGID | S_ISVTX. */ ++ mkdir_mode = src_mode & CHMOD_MODE_BITS & ~omitted_permissions; ++ if (mkdir (dir, mkdir_mode) != 0) ++ { ++ error (0, errno, _("cannot make directory %s"), ++ quote (dir)); ++ return false; ++ } ++ else ++ { ++ if (verbose_fmt_string != NULL) ++ printf (verbose_fmt_string, src, dir); ++ } ++ ++ /* We need search and write permissions to the new directory ++ for writing the directory's contents. Check if these ++ permissions are there. */ ++ ++ if (lstat (dir, &stats)) ++ { ++ error (0, errno, _("failed to get attributes of %s"), ++ quote (dir)); ++ return false; ++ } ++ ++ ++ if (! x->preserve_mode) ++ { ++ if (omitted_permissions & ~stats.st_mode) ++ omitted_permissions &= ~ cached_umask (); ++ if (omitted_permissions & ~stats.st_mode ++ || (stats.st_mode & S_IRWXU) != S_IRWXU) ++ { ++ new->st.st_mode = stats.st_mode | omitted_permissions; ++ new->restore_mode = true; ++ } ++ } ++ ++ if ((stats.st_mode & S_IRWXU) != S_IRWXU) ++ { ++ /* Make the new directory searchable and writable. ++ The original permissions will be restored later. */ ++ ++ if (lchmod (dir, stats.st_mode | S_IRWXU) != 0) ++ { ++ error (0, errno, _("setting permissions for %s"), ++ quote (dir)); ++ return false; ++ } ++ } ++ } ++ else if (!S_ISDIR (stats.st_mode)) ++ { ++ error (0, 0, _("%s exists but is not a directory"), ++ quote (dir)); ++ return false; ++ } ++ else ++ *new_dst = false; ++ *slash++ = '/'; ++ ++ /* Avoid unnecessary calls to `stat' when given ++ file names containing multiple adjacent slashes. */ ++ while (*slash == '/') ++ slash++; ++ } ++ } ++ ++ /* We get here if the parent of DIR already exists. */ ++ ++ else if (!S_ISDIR (stats.st_mode)) ++ { ++ error (0, 0, _("%s exists but is not a directory"), quote (dst_dir)); ++ return false; ++ } ++ else ++ { ++ *new_dst = false; ++ } ++ return true; ++} ++ ++/* FILE is the last operand of this command. ++ Return true if FILE is a directory. ++ But report an error and exit if there is a problem accessing FILE, ++ or if FILE does not exist but would have to refer to an existing ++ directory if it referred to anything at all. ++ ++ If the file exists, store the file's status into *ST. ++ Otherwise, set *NEW_DST. */ ++ ++static bool ++target_directory_operand (char const *file, struct stat *st, bool *new_dst) ++{ ++ int err = (stat (file, st) == 0 ? 0 : errno); ++ bool is_a_dir = !err && S_ISDIR (st->st_mode); ++ if (err) ++ { ++ if (err != ENOENT) ++ error (EXIT_FAILURE, err, _("accessing %s"), quote (file)); ++ *new_dst = true; ++ } ++ return is_a_dir; ++} ++ ++/* Scan the arguments, and copy each by calling copy. ++ Return true if successful. */ ++ ++static bool ++do_copy (int n_files, char **file, const char *target_directory, ++ bool no_target_directory, struct cp_options *x) ++{ ++ struct stat sb; ++ bool new_dst = false; ++ bool ok = true; ++ ++ if (n_files <= !target_directory) ++ { ++ if (n_files <= 0) ++ error (0, 0, _("missing file operand")); ++ else ++ error (0, 0, _("missing destination file operand after %s"), ++ quote (file[0])); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (no_target_directory) ++ { ++ if (target_directory) ++ error (EXIT_FAILURE, 0, ++ _("cannot combine --target-directory (-t) " ++ "and --no-target-directory (-T)")); ++ if (2 < n_files) ++ { ++ error (0, 0, _("extra operand %s"), quote (file[2])); ++ usage (EXIT_FAILURE); ++ } ++ } ++ else if (!target_directory) ++ { ++ if (2 <= n_files ++ && target_directory_operand (file[n_files - 1], &sb, &new_dst)) ++ target_directory = file[--n_files]; ++ else if (2 < n_files) ++ error (EXIT_FAILURE, 0, _("target %s is not a directory"), ++ quote (file[n_files - 1])); ++ } ++ ++ if (target_directory) ++ { ++ /* cp file1...filen edir ++ Copy the files `file1' through `filen' ++ to the existing directory `edir'. */ ++ int i; ++ ++ /* Initialize these hash tables only if we'll need them. ++ The problems they're used to detect can arise only if ++ there are two or more files to copy. */ ++ if (2 <= n_files) ++ { ++ dest_info_init (x); ++ src_info_init (x); ++ } ++ ++ for (i = 0; i < n_files; i++) ++ { ++ char *dst_name; ++ bool parent_exists = true; /* True if dir_name (dst_name) exists. */ ++ struct dir_attr *attr_list; ++ char *arg_in_concat = NULL; ++ char *arg = file[i]; ++ ++ /* Trailing slashes are meaningful (i.e., maybe worth preserving) ++ only in the source file names. */ ++ if (remove_trailing_slashes) ++ strip_trailing_slashes (arg); ++ ++ if (parents_option) ++ { ++ char *arg_no_trailing_slash; ++ ++ /* Use `arg' without trailing slashes in constructing destination ++ file names. Otherwise, we can end up trying to create a ++ directory via `mkdir ("dst/foo/"...', which is not portable. ++ It fails, due to the trailing slash, on at least ++ NetBSD 1.[34] systems. */ ++ ASSIGN_STRDUPA (arg_no_trailing_slash, arg); ++ strip_trailing_slashes (arg_no_trailing_slash); ++ ++ /* Append all of `arg' (minus any trailing slash) to `dest'. */ ++ dst_name = file_name_concat (target_directory, ++ arg_no_trailing_slash, ++ &arg_in_concat); ++ ++ /* For --parents, we have to make sure that the directory ++ dir_name (dst_name) exists. We may have to create a few ++ leading directories. */ ++ parent_exists = ++ (make_dir_parents_private ++ (dst_name, arg_in_concat - dst_name, ++ (x->verbose ? "%s -> %s\n" : NULL), ++ &attr_list, &new_dst, x)); ++ } ++ else ++ { ++ char *arg_base; ++ /* Append the last component of `arg' to `target_directory'. */ ++ ++ ASSIGN_BASENAME_STRDUPA (arg_base, arg); ++ /* For `cp -R source/.. dest', don't copy into `dest/..'. */ ++ dst_name = (STREQ (arg_base, "..") ++ ? xstrdup (target_directory) ++ : file_name_concat (target_directory, arg_base, ++ NULL)); ++ } ++ ++ if (!parent_exists) ++ { ++ /* make_dir_parents_private failed, so don't even ++ attempt the copy. */ ++ ok = false; ++ } ++ else ++ { ++ bool copy_into_self; ++ ok &= copy (arg, dst_name, new_dst, x, ©_into_self, NULL); ++ ++ if (parents_option) ++ ok &= re_protect (dst_name, arg_in_concat - dst_name, ++ attr_list, x); ++ } ++ ++ if (parents_option) ++ { ++ while (attr_list) ++ { ++ struct dir_attr *p = attr_list; ++ attr_list = attr_list->next; ++ free (p); ++ } ++ } ++ ++ free (dst_name); ++ } ++ } ++ else /* !target_directory */ ++ { ++ char const *new_dest; ++ char const *source = file[0]; ++ char const *dest = file[1]; ++ bool unused; ++ ++ if (parents_option) ++ { ++ error (0, 0, ++ _("with --parents, the destination must be a directory")); ++ usage (EXIT_FAILURE); ++ } ++ ++ /* When the force and backup options have been specified and ++ the source and destination are the same name for an existing ++ regular file, convert the user's command, e.g., ++ `cp --force --backup foo foo' to `cp --force foo fooSUFFIX' ++ where SUFFIX is determined by any version control options used. */ ++ ++ if (x->unlink_dest_after_failed_open ++ && x->backup_type != no_backups ++ && STREQ (source, dest) ++ && !new_dst && S_ISREG (sb.st_mode)) ++ { ++ static struct cp_options x_tmp; ++ ++ new_dest = find_backup_file_name (dest, x->backup_type); ++ /* Set x->backup_type to `no_backups' so that the normal backup ++ mechanism is not used when performing the actual copy. ++ backup_type must be set to `no_backups' only *after* the above ++ call to find_backup_file_name -- that function uses ++ backup_type to determine the suffix it applies. */ ++ x_tmp = *x; ++ x_tmp.backup_type = no_backups; ++ x = &x_tmp; ++ } ++ else ++ { ++ new_dest = dest; ++ } ++ ++ ok = copy (source, new_dest, 0, x, &unused, NULL); ++ } ++ ++ return ok; ++} ++ ++static void ++cp_option_init (struct cp_options *x) ++{ ++ cp_options_default (x); ++ x->copy_as_regular = true; ++ x->dereference = DEREF_UNDEFINED; ++ x->unlink_dest_before_opening = false; ++ x->unlink_dest_after_failed_open = false; ++ x->hard_link = false; ++ x->interactive = I_UNSPECIFIED; ++ x->move_mode = false; ++ x->one_file_system = false; ++ x->reflink_mode = REFLINK_NEVER; ++ ++ x->preserve_ownership = false; ++ x->preserve_links = false; ++ x->preserve_mode = false; ++ x->preserve_timestamps = false; ++ x->preserve_security_context = false; ++ x->require_preserve_context = false; ++ x->preserve_xattr = false; ++ x->reduce_diagnostics = false; ++ x->require_preserve_xattr = false; ++ ++ x->require_preserve = false; ++ x->recursive = false; ++ x->sparse_mode = SPARSE_AUTO; ++ x->symbolic_link = false; ++ x->set_mode = false; ++ x->mode = 0; ++ ++ /* Not used. */ ++ x->stdin_tty = false; ++ ++ x->update = false; ++ x->verbose = false; ++ ++ /* By default, refuse to open a dangling destination symlink, because ++ in general one cannot do that safely, give the current semantics of ++ open's O_EXCL flag, (which POSIX doesn't even allow cp to use, btw). ++ But POSIX requires it. */ ++ x->open_dangling_dest_symlink = getenv ("POSIXLY_CORRECT") != NULL; ++ ++ x->dest_info = NULL; ++ x->src_info = NULL; ++} ++ ++/* Given a string, ARG, containing a comma-separated list of arguments ++ to the --preserve option, set the appropriate fields of X to ON_OFF. */ ++static void ++decode_preserve_arg (char const *arg, struct cp_options *x, bool on_off) ++{ ++ enum File_attribute ++ { ++ PRESERVE_MODE, ++ PRESERVE_TIMESTAMPS, ++ PRESERVE_OWNERSHIP, ++ PRESERVE_LINK, ++ PRESERVE_CONTEXT, ++ PRESERVE_XATTR, ++ PRESERVE_ALL ++ }; ++ static enum File_attribute const preserve_vals[] = ++ { ++ PRESERVE_MODE, PRESERVE_TIMESTAMPS, ++ PRESERVE_OWNERSHIP, PRESERVE_LINK, PRESERVE_CONTEXT, PRESERVE_XATTR, ++ PRESERVE_ALL ++ }; ++ /* Valid arguments to the `--preserve' option. */ ++ static char const* const preserve_args[] = ++ { ++ "mode", "timestamps", ++ "ownership", "links", "context", "xattr", "all", NULL ++ }; ++ ARGMATCH_VERIFY (preserve_args, preserve_vals); ++ ++ char *arg_writable = xstrdup (arg); ++ char *s = arg_writable; ++ do ++ { ++ /* find next comma */ ++ char *comma = strchr (s, ','); ++ enum File_attribute val; ++ ++ /* If we found a comma, put a NUL in its place and advance. */ ++ if (comma) ++ *comma++ = 0; ++ ++ /* process S. */ ++ val = XARGMATCH ("--preserve", s, preserve_args, preserve_vals); ++ switch (val) ++ { ++ case PRESERVE_MODE: ++ x->preserve_mode = on_off; ++ break; ++ ++ case PRESERVE_TIMESTAMPS: ++ x->preserve_timestamps = on_off; ++ break; ++ ++ case PRESERVE_OWNERSHIP: ++ x->preserve_ownership = on_off; ++ break; ++ ++ case PRESERVE_LINK: ++ x->preserve_links = on_off; ++ break; ++ ++ case PRESERVE_CONTEXT: ++ x->preserve_security_context = on_off; ++ x->require_preserve_context = on_off; ++ break; ++ ++ case PRESERVE_XATTR: ++ x->preserve_xattr = on_off; ++ x->require_preserve_xattr = on_off; ++ break; ++ ++ case PRESERVE_ALL: ++ x->preserve_mode = on_off; ++ x->preserve_timestamps = on_off; ++ x->preserve_ownership = on_off; ++ x->preserve_links = on_off; ++ if (selinux_enabled) ++ x->preserve_security_context = on_off; ++ x->preserve_xattr = on_off; ++ break; ++ ++ default: ++ abort (); ++ } ++ s = comma; ++ } ++ while (s); ++ ++ free (arg_writable); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int c; ++ bool ok; ++ bool make_backups = false; ++ char *backup_suffix_string; ++ char *version_control_string = NULL; ++ struct cp_options x; ++ bool copy_contents = false; ++ char *target_directory = NULL; ++ bool no_target_directory = false; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdin); ++ ++ selinux_enabled = (0 < is_selinux_enabled ()); ++ cp_option_init (&x); ++ ++ /* FIXME: consider not calling getenv for SIMPLE_BACKUP_SUFFIX unless ++ we'll actually use backup_suffix_string. */ ++ backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX"); ++ ++ while ((c = getopt_long (argc, argv, "abdfHilLnprst:uvxPRS:T", ++ long_opts, NULL)) ++ != -1) ++ { ++ switch (c) ++ { ++ case SPARSE_OPTION: ++ x.sparse_mode = XARGMATCH ("--sparse", optarg, ++ sparse_type_string, sparse_type); ++ break; ++ ++ case REFLINK_OPTION: ++ if (optarg == NULL) ++ x.reflink_mode = REFLINK_ALWAYS; ++ else ++ x.reflink_mode = XARGMATCH ("--reflink", optarg, ++ reflink_type_string, reflink_type); ++ break; ++ ++ case 'a': /* Like -dR --preserve=all with reduced failure diagnostics. */ ++ x.dereference = DEREF_NEVER; ++ x.preserve_links = true; ++ x.preserve_ownership = true; ++ x.preserve_mode = true; ++ x.preserve_timestamps = true; ++ x.require_preserve = true; ++ if (selinux_enabled) ++ x.preserve_security_context = true; ++ x.preserve_xattr = true; ++ x.reduce_diagnostics = true; ++ x.recursive = true; ++ break; ++ ++ case 'b': ++ make_backups = true; ++ if (optarg) ++ version_control_string = optarg; ++ break; ++ ++ case COPY_CONTENTS_OPTION: ++ copy_contents = true; ++ break; ++ ++ case 'd': ++ x.preserve_links = true; ++ x.dereference = DEREF_NEVER; ++ break; ++ ++ case 'f': ++ x.unlink_dest_after_failed_open = true; ++ break; ++ ++ case 'H': ++ x.dereference = DEREF_COMMAND_LINE_ARGUMENTS; ++ break; ++ ++ case 'i': ++ x.interactive = I_ASK_USER; ++ break; ++ ++ case 'l': ++ x.hard_link = true; ++ break; ++ ++ case 'L': ++ x.dereference = DEREF_ALWAYS; ++ break; ++ ++ case 'n': ++ x.interactive = I_ALWAYS_NO; ++ break; ++ ++ case 'P': ++ x.dereference = DEREF_NEVER; ++ break; ++ ++ case NO_PRESERVE_ATTRIBUTES_OPTION: ++ decode_preserve_arg (optarg, &x, false); ++ break; ++ ++ case PRESERVE_ATTRIBUTES_OPTION: ++ if (optarg == NULL) ++ { ++ /* Fall through to the case for `p' below. */ ++ } ++ else ++ { ++ decode_preserve_arg (optarg, &x, true); ++ x.require_preserve = true; ++ break; ++ } ++ ++ case 'p': ++ x.preserve_ownership = true; ++ x.preserve_mode = true; ++ x.preserve_timestamps = true; ++ x.require_preserve = true; ++ break; ++ ++ case PARENTS_OPTION: ++ parents_option = true; ++ break; ++ ++ case 'r': ++ case 'R': ++ x.recursive = true; ++ break; ++ ++ case UNLINK_DEST_BEFORE_OPENING: ++ x.unlink_dest_before_opening = true; ++ break; ++ ++ case STRIP_TRAILING_SLASHES_OPTION: ++ remove_trailing_slashes = true; ++ break; ++ ++ case 's': ++ x.symbolic_link = true; ++ break; ++ ++ case 't': ++ if (target_directory) ++ error (EXIT_FAILURE, 0, ++ _("multiple target directories specified")); ++ else ++ { ++ struct stat st; ++ if (stat (optarg, &st) != 0) ++ error (EXIT_FAILURE, errno, _("accessing %s"), quote (optarg)); ++ if (! S_ISDIR (st.st_mode)) ++ error (EXIT_FAILURE, 0, _("target %s is not a directory"), ++ quote (optarg)); ++ } ++ target_directory = optarg; ++ break; ++ ++ case 'T': ++ no_target_directory = true; ++ break; ++ ++ case 'u': ++ x.update = true; ++ break; ++ ++ case 'v': ++ x.verbose = true; ++ break; ++ ++ case 'x': ++ x.one_file_system = true; ++ break; ++ ++ case 'S': ++ make_backups = true; ++ backup_suffix_string = optarg; ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ if (x.hard_link && x.symbolic_link) ++ { ++ error (0, 0, _("cannot make both hard and symbolic links")); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (make_backups && x.interactive == I_ALWAYS_NO) ++ { ++ error (0, 0, ++ _("options --backup and --no-clobber are mutually exclusive")); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (x.reflink_mode == REFLINK_ALWAYS && x.sparse_mode != SPARSE_AUTO) ++ { ++ error (0, 0, _("--reflink can be used only with --sparse=auto")); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (backup_suffix_string) ++ simple_backup_suffix = xstrdup (backup_suffix_string); ++ ++ x.backup_type = (make_backups ++ ? xget_version (_("backup type"), ++ version_control_string) ++ : no_backups); ++ ++ if (x.dereference == DEREF_UNDEFINED) ++ { ++ if (x.recursive) ++ /* This is compatible with FreeBSD. */ ++ x.dereference = DEREF_NEVER; ++ else ++ x.dereference = DEREF_ALWAYS; ++ } ++ ++ if (x.recursive) ++ x.copy_as_regular = copy_contents; ++ ++ /* If --force (-f) was specified and we're in link-creation mode, ++ first remove any existing destination file. */ ++ if (x.unlink_dest_after_failed_open && (x.hard_link || x.symbolic_link)) ++ x.unlink_dest_before_opening = true; ++ ++ if (x.preserve_security_context) ++ { ++ if (!selinux_enabled) ++ error (EXIT_FAILURE, 0, ++ _("cannot preserve security context " ++ "without an SELinux-enabled kernel")); ++ } ++ ++#if !USE_XATTR ++ if (x.require_preserve_xattr) ++ error (EXIT_FAILURE, 0, _("cannot preserve extended attributes, cp is " ++ "built without xattr support")); ++#endif ++ ++ /* Allocate space for remembering copied and created files. */ ++ ++ hash_init (); ++ ++ ok = do_copy (argc - optind, argv + optind, ++ target_directory, no_target_directory, &x); ++ ++ forget_all (); ++ ++ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); ++} +diff -urNp coreutils-8.0-orig/src/chcon.c coreutils-8.0/src/chcon.c +--- coreutils-8.0-orig/src/chcon.c 2009-10-06 10:55:34.000000000 +0200 ++++ coreutils-8.0/src/chcon.c 2009-10-07 10:10:11.000000000 +0200 +@@ -348,7 +348,7 @@ Usage: %s [OPTION]... CONTEXT FILE...\n\ "), program_name, program_name, program_name); fputs (_("\ @@ -162,9 +4003,9 @@ diff -urNp coreutils-7.1-orig/src/chcon.c coreutils-7.1/src/chcon.c With --reference, change the security context of each FILE to that of RFILE.\n\ \n\ -h, --no-dereference affect symbolic links instead of any referenced file\n\ -@@ -519,6 +519,10 @@ main (int argc, char **argv) - usage (EXIT_FAILURE); - } +@@ -523,6 +523,10 @@ main (int argc, char **argv) + error (EXIT_FAILURE, 0, + _("%s may be used only on a SELinux kernel"), program_name); + if (is_selinux_enabled () != 1) + error (EXIT_FAILURE, 0, @@ -173,9 +4014,585 @@ diff -urNp coreutils-7.1-orig/src/chcon.c coreutils-7.1/src/chcon.c if (reference_file) { if (getfilecon (reference_file, &ref_context) < 0) -diff -urNp coreutils-7.1-orig/src/id.c coreutils-7.1/src/id.c ---- coreutils-7.1-orig/src/id.c 2009-02-16 15:57:44.000000000 +0100 -+++ coreutils-7.1/src/id.c 2009-02-24 13:47:15.000000000 +0100 +diff -urNp coreutils-8.0-orig/src/chcon.c.orig coreutils-8.0/src/chcon.c.orig +--- coreutils-8.0-orig/src/chcon.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/chcon.c.orig 2009-10-06 10:55:34.000000000 +0200 +@@ -0,0 +1,572 @@ ++/* chcon -- change security context of files ++ Copyright (C) 2005-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++#include ++#include ++#include ++#include ++ ++#include "system.h" ++#include "dev-ino.h" ++#include "error.h" ++#include "ignore-value.h" ++#include "quote.h" ++#include "quotearg.h" ++#include "root-dev-ino.h" ++#include "selinux-at.h" ++#include "xfts.h" ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "chcon" ++ ++#define AUTHORS \ ++ proper_name ("Russell Coker"), \ ++ proper_name ("Jim Meyering") ++ ++/* If nonzero, and the systems has support for it, change the context ++ of symbolic links rather than any files they point to. */ ++static bool affect_symlink_referent; ++ ++/* If true, change the modes of directories recursively. */ ++static bool recurse; ++ ++/* Level of verbosity. */ ++static bool verbose; ++ ++/* Pointer to the device and inode numbers of `/', when --recursive. ++ Otherwise NULL. */ ++static struct dev_ino *root_dev_ino; ++ ++/* The name of the context file is being given. */ ++static char const *specified_context; ++ ++/* Specific components of the context */ ++static char const *specified_user; ++static char const *specified_role; ++static char const *specified_range; ++static char const *specified_type; ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ DEREFERENCE_OPTION = CHAR_MAX + 1, ++ NO_PRESERVE_ROOT, ++ PRESERVE_ROOT, ++ REFERENCE_FILE_OPTION ++}; ++ ++static struct option const long_options[] = ++{ ++ {"recursive", no_argument, NULL, 'R'}, ++ {"dereference", no_argument, NULL, DEREFERENCE_OPTION}, ++ {"no-dereference", no_argument, NULL, 'h'}, ++ {"no-preserve-root", no_argument, NULL, NO_PRESERVE_ROOT}, ++ {"preserve-root", no_argument, NULL, PRESERVE_ROOT}, ++ {"reference", required_argument, NULL, REFERENCE_FILE_OPTION}, ++ {"user", required_argument, NULL, 'u'}, ++ {"role", required_argument, NULL, 'r'}, ++ {"type", required_argument, NULL, 't'}, ++ {"range", required_argument, NULL, 'l'}, ++ {"verbose", no_argument, NULL, 'v'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++/* Given a security context, CONTEXT, derive a context_t (*RET), ++ setting any portions selected via the global variables, specified_user, ++ specified_role, etc. */ ++static int ++compute_context_from_mask (security_context_t context, context_t *ret) ++{ ++ bool ok = true; ++ context_t new_context = context_new (context); ++ if (!new_context) ++ { ++ error (0, errno, _("failed to create security context: %s"), ++ quotearg_colon (context)); ++ return 1; ++ } ++ ++#define SET_COMPONENT(C, comp) \ ++ do \ ++ { \ ++ if (specified_ ## comp \ ++ && context_ ## comp ## _set ((C), specified_ ## comp)) \ ++ { \ ++ error (0, errno, \ ++ _("failed to set %s security context component to %s"), \ ++ #comp, quote (specified_ ## comp)); \ ++ ok = false; \ ++ } \ ++ } \ ++ while (0) ++ ++ SET_COMPONENT (new_context, user); ++ SET_COMPONENT (new_context, range); ++ SET_COMPONENT (new_context, role); ++ SET_COMPONENT (new_context, type); ++ ++ if (!ok) ++ { ++ int saved_errno = errno; ++ context_free (new_context); ++ errno = saved_errno; ++ return 1; ++ } ++ ++ *ret = new_context; ++ return 0; ++} ++ ++/* Change the context of FILE, using specified components. ++ If it is a directory and -R is given, recurse. ++ Return 0 if successful, 1 if errors occurred. */ ++ ++static int ++change_file_context (int fd, char const *file) ++{ ++ security_context_t file_context = NULL; ++ context_t context; ++ security_context_t context_string; ++ int errors = 0; ++ ++ if (specified_context == NULL) ++ { ++ int status = (affect_symlink_referent ++ ? getfileconat (fd, file, &file_context) ++ : lgetfileconat (fd, file, &file_context)); ++ ++ if (status < 0 && errno != ENODATA) ++ { ++ error (0, errno, _("failed to get security context of %s"), ++ quote (file)); ++ return 1; ++ } ++ ++ /* If the file doesn't have a context, and we're not setting all of ++ the context components, there isn't really an obvious default. ++ Thus, we just give up. */ ++ if (file_context == NULL) ++ { ++ error (0, 0, _("can't apply partial context to unlabeled file %s"), ++ quote (file)); ++ return 1; ++ } ++ ++ if (compute_context_from_mask (file_context, &context)) ++ return 1; ++ } ++ else ++ { ++ /* FIXME: this should be done exactly once, in main. */ ++ context = context_new (specified_context); ++ if (!context) ++ abort (); ++ } ++ ++ context_string = context_str (context); ++ ++ if (file_context == NULL || ! STREQ (context_string, file_context)) ++ { ++ int fail = (affect_symlink_referent ++ ? setfileconat (fd, file, context_string) ++ : lsetfileconat (fd, file, context_string)); ++ ++ if (fail) ++ { ++ errors = 1; ++ error (0, errno, _("failed to change context of %s to %s"), ++ quote_n (0, file), quote_n (1, context_string)); ++ } ++ } ++ ++ context_free (context); ++ freecon (file_context); ++ ++ return errors; ++} ++ ++/* Change the context of FILE. ++ Return true if successful. This function is called ++ once for every file system object that fts encounters. */ ++ ++static bool ++process_file (FTS *fts, FTSENT *ent) ++{ ++ char const *file_full_name = ent->fts_path; ++ char const *file = ent->fts_accpath; ++ const struct stat *file_stats = ent->fts_statp; ++ bool ok = true; ++ ++ switch (ent->fts_info) ++ { ++ case FTS_D: ++ if (recurse) ++ { ++ if (ROOT_DEV_INO_CHECK (root_dev_ino, ent->fts_statp)) ++ { ++ /* This happens e.g., with "chcon -R --preserve-root ... /" ++ and with "chcon -RH --preserve-root ... symlink-to-root". */ ++ ROOT_DEV_INO_WARN (file_full_name); ++ /* Tell fts not to traverse into this hierarchy. */ ++ fts_set (fts, ent, FTS_SKIP); ++ /* Ensure that we do not process "/" on the second visit. */ ++ ignore_ptr (fts_read (fts)); ++ return false; ++ } ++ return true; ++ } ++ break; ++ ++ case FTS_DP: ++ if (! recurse) ++ return true; ++ break; ++ ++ case FTS_NS: ++ /* For a top-level file or directory, this FTS_NS (stat failed) ++ indicator is determined at the time of the initial fts_open call. ++ With programs like chmod, chown, and chgrp, that modify ++ permissions, it is possible that the file in question is ++ accessible when control reaches this point. So, if this is ++ the first time we've seen the FTS_NS for this file, tell ++ fts_read to stat it "again". */ ++ if (ent->fts_level == 0 && ent->fts_number == 0) ++ { ++ ent->fts_number = 1; ++ fts_set (fts, ent, FTS_AGAIN); ++ return true; ++ } ++ error (0, ent->fts_errno, _("cannot access %s"), quote (file_full_name)); ++ ok = false; ++ break; ++ ++ case FTS_ERR: ++ error (0, ent->fts_errno, _("%s"), quote (file_full_name)); ++ ok = false; ++ break; ++ ++ case FTS_DNR: ++ error (0, ent->fts_errno, _("cannot read directory %s"), ++ quote (file_full_name)); ++ ok = false; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (ent->fts_info == FTS_DP ++ && ok && ROOT_DEV_INO_CHECK (root_dev_ino, file_stats)) ++ { ++ ROOT_DEV_INO_WARN (file_full_name); ++ ok = false; ++ } ++ ++ if (ok) ++ { ++ if (verbose) ++ printf (_("changing security context of %s\n"), ++ quote (file_full_name)); ++ ++ if (change_file_context (fts->fts_cwd_fd, file) != 0) ++ ok = false; ++ } ++ ++ if ( ! recurse) ++ fts_set (fts, ent, FTS_SKIP); ++ ++ return ok; ++} ++ ++/* Recursively operate on the specified FILES (the last entry ++ of which is NULL). BIT_FLAGS controls how fts works. ++ Return true if successful. */ ++ ++static bool ++process_files (char **files, int bit_flags) ++{ ++ bool ok = true; ++ ++ FTS *fts = xfts_open (files, bit_flags, NULL); ++ ++ while (1) ++ { ++ FTSENT *ent; ++ ++ ent = fts_read (fts); ++ if (ent == NULL) ++ { ++ if (errno != 0) ++ { ++ /* FIXME: try to give a better message */ ++ error (0, errno, _("fts_read failed")); ++ ok = false; ++ } ++ break; ++ } ++ ++ ok &= process_file (fts, ent); ++ } ++ ++ if (fts_close (fts) != 0) ++ { ++ error (0, errno, _("fts_close failed")); ++ ok = false; ++ } ++ ++ return ok; ++} ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... CONTEXT FILE...\n\ ++ or: %s [OPTION]... [-u USER] [-r ROLE] [-l RANGE] [-t TYPE] FILE...\n\ ++ or: %s [OPTION]... --reference=RFILE FILE...\n\ ++"), ++ program_name, program_name, program_name); ++ fputs (_("\ ++Change the security context of each FILE to CONTEXT.\n\ ++With --reference, change the security context of each FILE to that of RFILE.\n\ ++\n\ ++ -h, --no-dereference affect symbolic links instead of any referenced file\n\ ++"), stdout); ++ fputs (_("\ ++ --reference=RFILE use RFILE's security context rather than specifying\n\ ++ a CONTEXT value\n\ ++ -R, --recursive operate on files and directories recursively\n\ ++ -v, --verbose output a diagnostic for every file processed\n\ ++"), stdout); ++ fputs (_("\ ++ -u, --user=USER set user USER in the target security context\n\ ++ -r, --role=ROLE set role ROLE in the target security context\n\ ++ -t, --type=TYPE set type TYPE in the target security context\n\ ++ -l, --range=RANGE set range RANGE in the target security context\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++The following options modify how a hierarchy is traversed when the -R\n\ ++option is also specified. If more than one is specified, only the final\n\ ++one takes effect.\n\ ++\n\ ++ -H if a command line argument is a symbolic link\n\ ++ to a directory, traverse it\n\ ++ -L traverse every symbolic link to a directory\n\ ++ encountered\n\ ++ -P do not traverse any symbolic links (default)\n\ ++\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ security_context_t ref_context = NULL; ++ ++ /* Bit flags that control how fts works. */ ++ int bit_flags = FTS_PHYSICAL; ++ ++ /* 1 if --dereference, 0 if --no-dereference, -1 if neither has been ++ specified. */ ++ int dereference = -1; ++ ++ bool ok; ++ bool preserve_root = false; ++ bool component_specified = false; ++ char *reference_file = NULL; ++ int optc; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdout); ++ ++ while ((optc = getopt_long (argc, argv, "HLPRhvu:r:t:l:", long_options, NULL)) ++ != -1) ++ { ++ switch (optc) ++ { ++ case 'H': /* Traverse command-line symlinks-to-directories. */ ++ bit_flags = FTS_COMFOLLOW | FTS_PHYSICAL; ++ break; ++ ++ case 'L': /* Traverse all symlinks-to-directories. */ ++ bit_flags = FTS_LOGICAL; ++ break; ++ ++ case 'P': /* Traverse no symlinks-to-directories. */ ++ bit_flags = FTS_PHYSICAL; ++ break; ++ ++ case 'h': /* --no-dereference: affect symlinks */ ++ dereference = 0; ++ break; ++ ++ case DEREFERENCE_OPTION: /* --dereference: affect the referent ++ of each symlink */ ++ dereference = 1; ++ break; ++ ++ case NO_PRESERVE_ROOT: ++ preserve_root = false; ++ break; ++ ++ case PRESERVE_ROOT: ++ preserve_root = true; ++ break; ++ ++ case REFERENCE_FILE_OPTION: ++ reference_file = optarg; ++ break; ++ ++ case 'R': ++ recurse = true; ++ break; ++ ++ case 'f': ++ /* ignore */ ++ break; ++ ++ case 'v': ++ verbose = true; ++ break; ++ ++ case 'u': ++ specified_user = optarg; ++ component_specified = true; ++ break; ++ ++ case 'r': ++ specified_role = optarg; ++ component_specified = true; ++ break; ++ ++ case 't': ++ specified_type = optarg; ++ component_specified = true; ++ break; ++ ++ case 'l': ++ specified_range = optarg; ++ component_specified = true; ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ if (recurse) ++ { ++ if (bit_flags == FTS_PHYSICAL) ++ { ++ if (dereference == 1) ++ error (EXIT_FAILURE, 0, ++ _("-R --dereference requires either -H or -L")); ++ affect_symlink_referent = false; ++ } ++ else ++ { ++ if (dereference == 0) ++ error (EXIT_FAILURE, 0, _("-R -h requires -P")); ++ affect_symlink_referent = true; ++ } ++ } ++ else ++ { ++ bit_flags = FTS_PHYSICAL; ++ affect_symlink_referent = (dereference != 0); ++ } ++ ++ if (argc - optind < (reference_file || component_specified ? 1 : 2)) ++ { ++ if (argc <= optind) ++ error (0, 0, _("missing operand")); ++ else ++ error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (is_selinux_enabled () != 1) ++ error (EXIT_FAILURE, 0, ++ _("%s may be used only on a SELinux kernel"), program_name); ++ ++ if (reference_file) ++ { ++ if (getfilecon (reference_file, &ref_context) < 0) ++ error (EXIT_FAILURE, errno, _("failed to get security context of %s"), ++ quote (reference_file)); ++ ++ specified_context = ref_context; ++ } ++ else if (component_specified) ++ { ++ /* FIXME: it's already null, so this is a no-op. */ ++ specified_context = NULL; ++ } ++ else ++ { ++ context_t context; ++ specified_context = argv[optind++]; ++ context = context_new (specified_context); ++ if (!context) ++ error (EXIT_FAILURE, 0, _("invalid context: %s"), ++ quotearg_colon (specified_context)); ++ context_free (context); ++ } ++ ++ if (reference_file && component_specified) ++ { ++ error (0, 0, _("conflicting security context specifiers given")); ++ usage (1); ++ } ++ ++ if (recurse && preserve_root) ++ { ++ static struct dev_ino dev_ino_buf; ++ root_dev_ino = get_root_dev_ino (&dev_ino_buf); ++ if (root_dev_ino == NULL) ++ error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), ++ quote ("/")); ++ } ++ else ++ { ++ root_dev_ino = NULL; ++ } ++ ++ ok = process_files (argv + optind, bit_flags | FTS_NOSTAT); ++ ++ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); ++} +diff -urNp coreutils-8.0-orig/src/id.c coreutils-8.0/src/id.c +--- coreutils-8.0-orig/src/id.c 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/src/id.c 2009-10-07 10:10:11.000000000 +0200 @@ -107,7 +107,7 @@ int main (int argc, char **argv) { @@ -185,10 +4602,10 @@ diff -urNp coreutils-7.1-orig/src/id.c coreutils-7.1/src/id.c /* If true, output the list of all group IDs. -G */ bool just_group_list = false; -diff -urNp coreutils-7.1-orig/src/install.c coreutils-7.1/src/install.c ---- coreutils-7.1-orig/src/install.c 2009-02-18 15:32:52.000000000 +0100 -+++ coreutils-7.1/src/install.c 2009-02-24 13:47:15.000000000 +0100 -@@ -292,6 +292,7 @@ cp_option_init (struct cp_options *x) +diff -urNp coreutils-8.0-orig/src/install.c coreutils-8.0/src/install.c +--- coreutils-8.0-orig/src/install.c 2009-09-29 15:27:54.000000000 +0200 ++++ coreutils-8.0/src/install.c 2009-10-07 10:10:11.000000000 +0200 +@@ -284,6 +284,7 @@ cp_option_init (struct cp_options *x) x->reduce_diagnostics=false; x->require_preserve = false; x->require_preserve_context = false; @@ -196,7 +4613,7 @@ diff -urNp coreutils-7.1-orig/src/install.c coreutils-7.1/src/install.c x->require_preserve_xattr = false; x->recursive = false; x->sparse_mode = SPARSE_AUTO; -@@ -469,7 +470,7 @@ main (int argc, char **argv) +@@ -461,7 +462,7 @@ main (int argc, char **argv) we'll actually use backup_suffix_string. */ backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX"); @@ -205,7 +4622,7 @@ diff -urNp coreutils-7.1-orig/src/install.c coreutils-7.1/src/install.c NULL)) != -1) { switch (optc) -@@ -539,6 +540,7 @@ main (int argc, char **argv) +@@ -535,6 +536,7 @@ main (int argc, char **argv) error (0, 0, _("WARNING: --preserve_context is deprecated; " "use --preserve-context instead")); /* fall through */ @@ -213,7 +4630,7 @@ diff -urNp coreutils-7.1-orig/src/install.c coreutils-7.1/src/install.c case PRESERVE_CONTEXT_OPTION: if ( ! selinux_enabled) { -@@ -546,6 +548,10 @@ main (int argc, char **argv) +@@ -542,6 +544,10 @@ main (int argc, char **argv) "this kernel is not SELinux-enabled")); break; } @@ -224,7 +4641,7 @@ diff -urNp coreutils-7.1-orig/src/install.c coreutils-7.1/src/install.c x.preserve_security_context = true; use_default_selinux_context = false; break; -@@ -557,6 +563,7 @@ main (int argc, char **argv) +@@ -553,6 +559,7 @@ main (int argc, char **argv) break; } scontext = optarg; @@ -232,7 +4649,7 @@ diff -urNp coreutils-7.1-orig/src/install.c coreutils-7.1/src/install.c use_default_selinux_context = false; break; case_GETOPT_HELP_CHAR; -@@ -990,8 +997,8 @@ Mandatory arguments to long options are +@@ -986,8 +993,8 @@ Mandatory arguments to long options are -v, --verbose print the name of each directory as it is created\n\ "), stdout); fputs (_("\ @@ -243,10 +4660,1025 @@ diff -urNp coreutils-7.1-orig/src/install.c coreutils-7.1/src/install.c "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); -diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c ---- coreutils-7.1-orig/src/ls.c 2009-02-20 19:34:02.000000000 +0100 -+++ coreutils-7.1/src/ls.c 2009-02-24 13:47:15.000000000 +0100 -@@ -136,7 +136,8 @@ enum filetype +diff -urNp coreutils-8.0-orig/src/install.c.orig coreutils-8.0/src/install.c.orig +--- coreutils-8.0-orig/src/install.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/install.c.orig 2009-09-29 15:27:54.000000000 +0200 +@@ -0,0 +1,1011 @@ ++/* install - copy files and set attributes ++ Copyright (C) 89, 90, 91, 1995-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Written by David MacKenzie */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "system.h" ++#include "backupfile.h" ++#include "error.h" ++#include "cp-hash.h" ++#include "copy.h" ++#include "filenamecat.h" ++#include "full-read.h" ++#include "mkancesdirs.h" ++#include "mkdir-p.h" ++#include "modechange.h" ++#include "prog-fprintf.h" ++#include "quote.h" ++#include "quotearg.h" ++#include "savewd.h" ++#include "stat-time.h" ++#include "utimens.h" ++#include "xstrtol.h" ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "install" ++ ++#define AUTHORS proper_name ("David MacKenzie") ++ ++#if HAVE_SYS_WAIT_H ++# include ++#endif ++ ++static int selinux_enabled = 0; ++static bool use_default_selinux_context = true; ++ ++#if ! HAVE_ENDGRENT ++# define endgrent() ((void) 0) ++#endif ++ ++#if ! HAVE_ENDPWENT ++# define endpwent() ((void) 0) ++#endif ++ ++#if ! HAVE_LCHOWN ++# define lchown(name, uid, gid) chown (name, uid, gid) ++#endif ++ ++#if ! HAVE_MATCHPATHCON_INIT_PREFIX ++# define matchpathcon_init_prefix(a, p) /* empty */ ++#endif ++ ++static bool change_timestamps (struct stat const *from_sb, char const *to); ++static bool change_attributes (char const *name); ++static bool copy_file (const char *from, const char *to, ++ const struct cp_options *x); ++static bool install_file_in_file_parents (char const *from, char *to, ++ struct cp_options *x); ++static bool install_file_in_dir (const char *from, const char *to_dir, ++ const struct cp_options *x); ++static bool install_file_in_file (const char *from, const char *to, ++ const struct cp_options *x); ++static void get_ids (void); ++static void strip (char const *name); ++static void announce_mkdir (char const *dir, void *options); ++static int make_ancestor (char const *dir, char const *component, ++ void *options); ++void usage (int status); ++ ++/* The user name that will own the files, or NULL to make the owner ++ the current user ID. */ ++static char *owner_name; ++ ++/* The user ID corresponding to `owner_name'. */ ++static uid_t owner_id; ++ ++/* The group name that will own the files, or NULL to make the group ++ the current group ID. */ ++static char *group_name; ++ ++/* The group ID corresponding to `group_name'. */ ++static gid_t group_id; ++ ++#define DEFAULT_MODE (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) ++ ++/* The file mode bits to which non-directory files will be set. The umask has ++ no effect. */ ++static mode_t mode = DEFAULT_MODE; ++ ++/* Similar, but for directories. */ ++static mode_t dir_mode = DEFAULT_MODE; ++ ++/* The file mode bits that the user cares about. This should be a ++ superset of DIR_MODE and a subset of CHMOD_MODE_BITS. This matters ++ for directories, since otherwise directories may keep their S_ISUID ++ or S_ISGID bits. */ ++static mode_t dir_mode_bits = CHMOD_MODE_BITS; ++ ++/* Compare files before installing (-C) */ ++static bool copy_only_if_needed; ++ ++/* If true, strip executable files after copying them. */ ++static bool strip_files; ++ ++/* If true, install a directory instead of a regular file. */ ++static bool dir_arg; ++ ++/* Program used to strip binaries, "strip" is default */ ++static char const *strip_program = "strip"; ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ PRESERVE_CONTEXT_OPTION = CHAR_MAX + 1, ++ PRESERVE_CONTEXT_OPTION_DEPRECATED, ++ STRIP_PROGRAM_OPTION ++}; ++ ++static struct option const long_options[] = ++{ ++ {"backup", optional_argument, NULL, 'b'}, ++ {"compare", no_argument, NULL, 'C'}, ++ {GETOPT_SELINUX_CONTEXT_OPTION_DECL}, ++ {"directory", no_argument, NULL, 'd'}, ++ {"group", required_argument, NULL, 'g'}, ++ {"mode", required_argument, NULL, 'm'}, ++ {"no-target-directory", no_argument, NULL, 'T'}, ++ {"owner", required_argument, NULL, 'o'}, ++ {"preserve-timestamps", no_argument, NULL, 'p'}, ++ {"preserve-context", no_argument, NULL, PRESERVE_CONTEXT_OPTION}, ++ /* --preserve_context was silently supported until Apr 2009. ++ FIXME: disable altogether in a year or so. */ ++ {"preserve_context", no_argument, NULL, PRESERVE_CONTEXT_OPTION_DEPRECATED}, ++ {"strip", no_argument, NULL, 's'}, ++ {"strip-program", required_argument, NULL, STRIP_PROGRAM_OPTION}, ++ {"suffix", required_argument, NULL, 'S'}, ++ {"target-directory", required_argument, NULL, 't'}, ++ {"verbose", no_argument, NULL, 'v'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++/* Compare content of opened files using file descriptors A_FD and B_FD. Return ++ true if files are equal. */ ++static bool ++have_same_content (int a_fd, int b_fd) ++{ ++ enum { CMP_BLOCK_SIZE = 4096 }; ++ static char a_buff[CMP_BLOCK_SIZE]; ++ static char b_buff[CMP_BLOCK_SIZE]; ++ ++ size_t size; ++ while (0 < (size = full_read (a_fd, a_buff, sizeof a_buff))) { ++ if (size != full_read (b_fd, b_buff, sizeof b_buff)) ++ return false; ++ ++ if (memcmp (a_buff, b_buff, size) != 0) ++ return false; ++ } ++ ++ return size == 0; ++} ++ ++/* Return true for mode with non-permission bits. */ ++static bool ++extra_mode (mode_t input) ++{ ++ const mode_t mask = ~S_IRWXUGO & ~S_IFMT; ++ return !! (input & mask); ++} ++ ++/* Return true if copy of file SRC_NAME to file DEST_NAME is necessary. */ ++static bool ++need_copy (const char *src_name, const char *dest_name, ++ const struct cp_options *x) ++{ ++ struct stat src_sb, dest_sb; ++ int src_fd, dest_fd; ++ bool content_match; ++ ++ if (extra_mode (mode)) ++ return true; ++ ++ /* compare files using stat */ ++ if (lstat (src_name, &src_sb) != 0) ++ return true; ++ ++ if (lstat (dest_name, &dest_sb) != 0) ++ return true; ++ ++ if (!S_ISREG (src_sb.st_mode) || !S_ISREG (dest_sb.st_mode) ++ || extra_mode (src_sb.st_mode) || extra_mode (dest_sb.st_mode)) ++ return true; ++ ++ if (src_sb.st_size != dest_sb.st_size ++ || (dest_sb.st_mode & CHMOD_MODE_BITS) != mode ++ || dest_sb.st_uid != (owner_id == (uid_t) -1 ? getuid () : owner_id) ++ || dest_sb.st_gid != (group_id == (gid_t) -1 ? getgid () : group_id)) ++ return true; ++ ++ /* compare SELinux context if preserving */ ++ if (selinux_enabled && x->preserve_security_context) ++ { ++ security_context_t file_scontext = NULL; ++ security_context_t to_scontext = NULL; ++ bool scontext_match; ++ ++ if (getfilecon (src_name, &file_scontext) == -1) ++ return true; ++ ++ if (getfilecon (dest_name, &to_scontext) == -1) ++ { ++ freecon (file_scontext); ++ return true; ++ } ++ ++ scontext_match = STREQ (file_scontext, to_scontext); ++ ++ freecon (file_scontext); ++ freecon (to_scontext); ++ if (!scontext_match) ++ return true; ++ } ++ ++ /* compare files content */ ++ src_fd = open (src_name, O_RDONLY | O_BINARY); ++ if (src_fd < 0) ++ return true; ++ ++ dest_fd = open (dest_name, O_RDONLY | O_BINARY); ++ if (dest_fd < 0) ++ { ++ close (src_fd); ++ return true; ++ } ++ ++ content_match = have_same_content (src_fd, dest_fd); ++ ++ close (src_fd); ++ close (dest_fd); ++ return !content_match; ++} ++ ++static void ++cp_option_init (struct cp_options *x) ++{ ++ cp_options_default (x); ++ x->copy_as_regular = true; ++ x->reflink_mode = REFLINK_NEVER; ++ x->dereference = DEREF_ALWAYS; ++ x->unlink_dest_before_opening = true; ++ x->unlink_dest_after_failed_open = false; ++ x->hard_link = false; ++ x->interactive = I_UNSPECIFIED; ++ x->move_mode = false; ++ x->one_file_system = false; ++ x->preserve_ownership = false; ++ x->preserve_links = false; ++ x->preserve_mode = false; ++ x->preserve_timestamps = false; ++ x->reduce_diagnostics=false; ++ x->require_preserve = false; ++ x->require_preserve_context = false; ++ x->require_preserve_xattr = false; ++ x->recursive = false; ++ x->sparse_mode = SPARSE_AUTO; ++ x->symbolic_link = false; ++ x->backup_type = no_backups; ++ ++ /* Create destination files initially writable so we can run strip on them. ++ Although GNU strip works fine on read-only files, some others ++ would fail. */ ++ x->set_mode = true; ++ x->mode = S_IRUSR | S_IWUSR; ++ x->stdin_tty = false; ++ ++ x->open_dangling_dest_symlink = false; ++ x->update = false; ++ x->preserve_security_context = false; ++ x->preserve_xattr = false; ++ x->verbose = false; ++ x->dest_info = NULL; ++ x->src_info = NULL; ++} ++ ++#ifdef ENABLE_MATCHPATHCON ++/* Modify file context to match the specified policy. ++ If an error occurs the file will remain with the default directory ++ context. */ ++static void ++setdefaultfilecon (char const *file) ++{ ++ struct stat st; ++ security_context_t scontext = NULL; ++ static bool first_call = true; ++ ++ if (selinux_enabled != 1) ++ { ++ /* Indicate no context found. */ ++ return; ++ } ++ if (lstat (file, &st) != 0) ++ return; ++ ++ if (first_call && IS_ABSOLUTE_FILE_NAME (file)) ++ { ++ /* Calling matchpathcon_init_prefix (NULL, "/first_component/") ++ is an optimization to minimize the expense of the following ++ matchpathcon call. Do it only once, just before the first ++ matchpathcon call. We *could* call matchpathcon_fini after ++ the final matchpathcon call, but that's not necessary, since ++ by then we're about to exit, and besides, the buffers it ++ would free are still reachable. */ ++ char const *p0; ++ char const *p = file + 1; ++ while (ISSLASH (*p)) ++ ++p; ++ ++ /* Record final leading slash, for when FILE starts with two or more. */ ++ p0 = p - 1; ++ ++ if (*p) ++ { ++ char *prefix; ++ do ++ { ++ ++p; ++ } ++ while (*p && !ISSLASH (*p)); ++ ++ prefix = malloc (p - p0 + 2); ++ if (prefix) ++ { ++ stpcpy (stpncpy (prefix, p0, p - p0), "/"); ++ matchpathcon_init_prefix (NULL, prefix); ++ free (prefix); ++ } ++ } ++ } ++ first_call = false; ++ ++ /* If there's an error determining the context, or it has none, ++ return to allow default context */ ++ if ((matchpathcon (file, st.st_mode, &scontext) != 0) || ++ STREQ (scontext, "<>")) ++ { ++ if (scontext != NULL) ++ freecon (scontext); ++ return; ++ } ++ ++ if (lsetfilecon (file, scontext) < 0 && errno != ENOTSUP) ++ error (0, errno, ++ _("warning: %s: failed to change context to %s"), ++ quotearg_colon (file), scontext); ++ ++ freecon (scontext); ++ return; ++} ++#else ++static void ++setdefaultfilecon (char const *file) ++{ ++ (void) file; ++} ++#endif ++ ++/* FILE is the last operand of this command. Return true if FILE is a ++ directory. But report an error there is a problem accessing FILE, ++ or if FILE does not exist but would have to refer to an existing ++ directory if it referred to anything at all. */ ++ ++static bool ++target_directory_operand (char const *file) ++{ ++ char const *b = last_component (file); ++ size_t blen = strlen (b); ++ bool looks_like_a_dir = (blen == 0 || ISSLASH (b[blen - 1])); ++ struct stat st; ++ int err = (stat (file, &st) == 0 ? 0 : errno); ++ bool is_a_dir = !err && S_ISDIR (st.st_mode); ++ if (err && err != ENOENT) ++ error (EXIT_FAILURE, err, _("accessing %s"), quote (file)); ++ if (is_a_dir < looks_like_a_dir) ++ error (EXIT_FAILURE, err, _("target %s is not a directory"), quote (file)); ++ return is_a_dir; ++} ++ ++/* Process a command-line file name, for the -d option. */ ++static int ++process_dir (char *dir, struct savewd *wd, void *options) ++{ ++ return (make_dir_parents (dir, wd, ++ make_ancestor, options, ++ dir_mode, announce_mkdir, ++ dir_mode_bits, owner_id, group_id, false) ++ ? EXIT_SUCCESS ++ : EXIT_FAILURE); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int optc; ++ int exit_status = EXIT_SUCCESS; ++ const char *specified_mode = NULL; ++ bool make_backups = false; ++ char *backup_suffix_string; ++ char *version_control_string = NULL; ++ bool mkdir_and_install = false; ++ struct cp_options x; ++ char const *target_directory = NULL; ++ bool no_target_directory = false; ++ int n_files; ++ char **file; ++ bool strip_program_specified = false; ++ security_context_t scontext = NULL; ++ /* set iff kernel has extra selinux system calls */ ++ selinux_enabled = (0 < is_selinux_enabled ()); ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdin); ++ ++ cp_option_init (&x); ++ ++ owner_name = NULL; ++ group_name = NULL; ++ strip_files = false; ++ dir_arg = false; ++ umask (0); ++ ++ /* FIXME: consider not calling getenv for SIMPLE_BACKUP_SUFFIX unless ++ we'll actually use backup_suffix_string. */ ++ backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX"); ++ ++ while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pt:TvS:Z:", long_options, ++ NULL)) != -1) ++ { ++ switch (optc) ++ { ++ case 'b': ++ make_backups = true; ++ if (optarg) ++ version_control_string = optarg; ++ break; ++ case 'c': ++ break; ++ case 'C': ++ copy_only_if_needed = true; ++ break; ++ case 's': ++ strip_files = true; ++#ifdef SIGCHLD ++ /* System V fork+wait does not work if SIGCHLD is ignored. */ ++ signal (SIGCHLD, SIG_DFL); ++#endif ++ break; ++ case STRIP_PROGRAM_OPTION: ++ strip_program = xstrdup (optarg); ++ strip_program_specified = true; ++ break; ++ case 'd': ++ dir_arg = true; ++ break; ++ case 'D': ++ mkdir_and_install = true; ++ break; ++ case 'v': ++ x.verbose = true; ++ break; ++ case 'g': ++ group_name = optarg; ++ break; ++ case 'm': ++ specified_mode = optarg; ++ break; ++ case 'o': ++ owner_name = optarg; ++ break; ++ case 'p': ++ x.preserve_timestamps = true; ++ break; ++ case 'S': ++ make_backups = true; ++ backup_suffix_string = optarg; ++ break; ++ case 't': ++ if (target_directory) ++ error (EXIT_FAILURE, 0, ++ _("multiple target directories specified")); ++ else ++ { ++ struct stat st; ++ if (stat (optarg, &st) != 0) ++ error (EXIT_FAILURE, errno, _("accessing %s"), quote (optarg)); ++ if (! S_ISDIR (st.st_mode)) ++ error (EXIT_FAILURE, 0, _("target %s is not a directory"), ++ quote (optarg)); ++ } ++ target_directory = optarg; ++ break; ++ case 'T': ++ no_target_directory = true; ++ break; ++ ++ case PRESERVE_CONTEXT_OPTION_DEPRECATED: ++ error (0, 0, _("WARNING: --preserve_context is deprecated; " ++ "use --preserve-context instead")); ++ /* fall through */ ++ case PRESERVE_CONTEXT_OPTION: ++ if ( ! selinux_enabled) ++ { ++ error (0, 0, _("WARNING: ignoring --preserve-context; " ++ "this kernel is not SELinux-enabled")); ++ break; ++ } ++ x.preserve_security_context = true; ++ use_default_selinux_context = false; ++ break; ++ case 'Z': ++ if ( ! selinux_enabled) ++ { ++ error (0, 0, _("WARNING: ignoring --context (-Z); " ++ "this kernel is not SELinux-enabled")); ++ break; ++ } ++ scontext = optarg; ++ use_default_selinux_context = false; ++ break; ++ case_GETOPT_HELP_CHAR; ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ /* Check for invalid combinations of arguments. */ ++ if (dir_arg && strip_files) ++ error (EXIT_FAILURE, 0, ++ _("the strip option may not be used when installing a directory")); ++ if (dir_arg && target_directory) ++ error (EXIT_FAILURE, 0, ++ _("target directory not allowed when installing a directory")); ++ ++ if (x.preserve_security_context && scontext != NULL) ++ error (EXIT_FAILURE, 0, ++ _("cannot force target context to %s and preserve it"), ++ quote (scontext)); ++ ++ if (backup_suffix_string) ++ simple_backup_suffix = xstrdup (backup_suffix_string); ++ ++ x.backup_type = (make_backups ++ ? xget_version (_("backup type"), ++ version_control_string) ++ : no_backups); ++ ++ if (scontext && setfscreatecon (scontext) < 0) ++ error (EXIT_FAILURE, errno, ++ _("failed to set default file creation context to %s"), ++ quote (scontext)); ++ ++ n_files = argc - optind; ++ file = argv + optind; ++ ++ if (n_files <= ! (dir_arg || target_directory)) ++ { ++ if (n_files <= 0) ++ error (0, 0, _("missing file operand")); ++ else ++ error (0, 0, _("missing destination file operand after %s"), ++ quote (file[0])); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (no_target_directory) ++ { ++ if (target_directory) ++ error (EXIT_FAILURE, 0, ++ _("cannot combine --target-directory (-t) " ++ "and --no-target-directory (-T)")); ++ if (2 < n_files) ++ { ++ error (0, 0, _("extra operand %s"), quote (file[2])); ++ usage (EXIT_FAILURE); ++ } ++ } ++ else if (! (dir_arg || target_directory)) ++ { ++ if (2 <= n_files && target_directory_operand (file[n_files - 1])) ++ target_directory = file[--n_files]; ++ else if (2 < n_files) ++ error (EXIT_FAILURE, 0, _("target %s is not a directory"), ++ quote (file[n_files - 1])); ++ } ++ ++ if (specified_mode) ++ { ++ struct mode_change *change = mode_compile (specified_mode); ++ if (!change) ++ error (EXIT_FAILURE, 0, _("invalid mode %s"), quote (specified_mode)); ++ mode = mode_adjust (0, false, 0, change, NULL); ++ dir_mode = mode_adjust (0, true, 0, change, &dir_mode_bits); ++ free (change); ++ } ++ ++ if (strip_program_specified && !strip_files) ++ error (0, 0, _("WARNING: ignoring --strip-program option as -s option was " ++ "not specified")); ++ ++ if (copy_only_if_needed && x.preserve_timestamps) ++ { ++ error (0, 0, _("options --compare (-C) and --preserve-timestamps are " ++ "mutually exclusive")); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (copy_only_if_needed && strip_files) ++ { ++ error (0, 0, _("options --compare (-C) and --strip are mutually " ++ "exclusive")); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (copy_only_if_needed && extra_mode (mode)) ++ error (0, 0, _("the --compare (-C) option is ignored when you" ++ " specify a mode with non-permission bits")); ++ ++ get_ids (); ++ ++ if (dir_arg) ++ exit_status = savewd_process_files (n_files, file, process_dir, &x); ++ else ++ { ++ /* FIXME: it's a little gross that this initialization is ++ required by copy.c::copy. */ ++ hash_init (); ++ ++ if (!target_directory) ++ { ++ if (! (mkdir_and_install ++ ? install_file_in_file_parents (file[0], file[1], &x) ++ : install_file_in_file (file[0], file[1], &x))) ++ exit_status = EXIT_FAILURE; ++ } ++ else ++ { ++ int i; ++ dest_info_init (&x); ++ for (i = 0; i < n_files; i++) ++ if (! install_file_in_dir (file[i], target_directory, &x)) ++ exit_status = EXIT_FAILURE; ++ } ++ } ++ ++ exit (exit_status); ++} ++ ++/* Copy file FROM onto file TO, creating any missing parent directories of TO. ++ Return true if successful. */ ++ ++static bool ++install_file_in_file_parents (char const *from, char *to, ++ struct cp_options *x) ++{ ++ bool save_working_directory = ++ ! (IS_ABSOLUTE_FILE_NAME (from) && IS_ABSOLUTE_FILE_NAME (to)); ++ int status = EXIT_SUCCESS; ++ ++ struct savewd wd; ++ savewd_init (&wd); ++ if (! save_working_directory) ++ savewd_finish (&wd); ++ ++ if (mkancesdirs (to, &wd, make_ancestor, x) == -1) ++ { ++ error (0, errno, _("cannot create directory %s"), to); ++ status = EXIT_FAILURE; ++ } ++ ++ if (save_working_directory) ++ { ++ int restore_result = savewd_restore (&wd, status); ++ int restore_errno = errno; ++ savewd_finish (&wd); ++ if (EXIT_SUCCESS < restore_result) ++ return false; ++ if (restore_result < 0 && status == EXIT_SUCCESS) ++ { ++ error (0, restore_errno, _("cannot create directory %s"), to); ++ return false; ++ } ++ } ++ ++ return (status == EXIT_SUCCESS && install_file_in_file (from, to, x)); ++} ++ ++/* Copy file FROM onto file TO and give TO the appropriate ++ attributes. ++ Return true if successful. */ ++ ++static bool ++install_file_in_file (const char *from, const char *to, ++ const struct cp_options *x) ++{ ++ struct stat from_sb; ++ if (x->preserve_timestamps && stat (from, &from_sb) != 0) ++ { ++ error (0, errno, _("cannot stat %s"), quote (from)); ++ return false; ++ } ++ if (! copy_file (from, to, x)) ++ return false; ++ if (strip_files) ++ strip (to); ++ if (x->preserve_timestamps && (strip_files || ! S_ISREG (from_sb.st_mode)) ++ && ! change_timestamps (&from_sb, to)) ++ return false; ++ return change_attributes (to); ++} ++ ++/* Copy file FROM into directory TO_DIR, keeping its same name, ++ and give the copy the appropriate attributes. ++ Return true if successful. */ ++ ++static bool ++install_file_in_dir (const char *from, const char *to_dir, ++ const struct cp_options *x) ++{ ++ const char *from_base = last_component (from); ++ char *to = file_name_concat (to_dir, from_base, NULL); ++ bool ret = install_file_in_file (from, to, x); ++ free (to); ++ return ret; ++} ++ ++/* Copy file FROM onto file TO, creating TO if necessary. ++ Return true if successful. */ ++ ++static bool ++copy_file (const char *from, const char *to, const struct cp_options *x) ++{ ++ bool copy_into_self; ++ ++ if (copy_only_if_needed && !need_copy (from, to, x)) ++ return true; ++ ++ /* Allow installing from non-regular files like /dev/null. ++ Charles Karney reported that some Sun version of install allows that ++ and that sendmail's installation process relies on the behavior. ++ However, since !x->recursive, the call to "copy" will fail if FROM ++ is a directory. */ ++ ++ return copy (from, to, false, x, ©_into_self, NULL); ++} ++ ++/* Set the attributes of file or directory NAME. ++ Return true if successful. */ ++ ++static bool ++change_attributes (char const *name) ++{ ++ bool ok = false; ++ /* chown must precede chmod because on some systems, ++ chown clears the set[ug]id bits for non-superusers, ++ resulting in incorrect permissions. ++ On System V, users can give away files with chown and then not ++ be able to chmod them. So don't give files away. ++ ++ We don't normally ignore errors from chown because the idea of ++ the install command is that the file is supposed to end up with ++ precisely the attributes that the user specified (or defaulted). ++ If the file doesn't end up with the group they asked for, they'll ++ want to know. */ ++ ++ if (! (owner_id == (uid_t) -1 && group_id == (gid_t) -1) ++ && lchown (name, owner_id, group_id) != 0) ++ error (0, errno, _("cannot change ownership of %s"), quote (name)); ++ else if (chmod (name, mode) != 0) ++ error (0, errno, _("cannot change permissions of %s"), quote (name)); ++ else ++ ok = true; ++ ++ if (use_default_selinux_context) ++ setdefaultfilecon (name); ++ ++ return ok; ++} ++ ++/* Set the timestamps of file TO to match those of file FROM. ++ Return true if successful. */ ++ ++static bool ++change_timestamps (struct stat const *from_sb, char const *to) ++{ ++ struct timespec timespec[2]; ++ timespec[0] = get_stat_atime (from_sb); ++ timespec[1] = get_stat_mtime (from_sb); ++ ++ if (utimens (to, timespec)) ++ { ++ error (0, errno, _("cannot set time stamps for %s"), quote (to)); ++ return false; ++ } ++ return true; ++} ++ ++/* Strip the symbol table from the file NAME. ++ We could dig the magic number out of the file first to ++ determine whether to strip it, but the header files and ++ magic numbers vary so much from system to system that making ++ it portable would be very difficult. Not worth the effort. */ ++ ++static void ++strip (char const *name) ++{ ++ int status; ++ pid_t pid = fork (); ++ ++ switch (pid) ++ { ++ case -1: ++ error (EXIT_FAILURE, errno, _("fork system call failed")); ++ break; ++ case 0: /* Child. */ ++ execlp (strip_program, strip_program, name, NULL); ++ error (EXIT_FAILURE, errno, _("cannot run %s"), strip_program); ++ break; ++ default: /* Parent. */ ++ if (waitpid (pid, &status, 0) < 0) ++ error (EXIT_FAILURE, errno, _("waiting for strip")); ++ else if (! WIFEXITED (status) || WEXITSTATUS (status)) ++ error (EXIT_FAILURE, 0, _("strip process terminated abnormally")); ++ break; ++ } ++} ++ ++/* Initialize the user and group ownership of the files to install. */ ++ ++static void ++get_ids (void) ++{ ++ struct passwd *pw; ++ struct group *gr; ++ ++ if (owner_name) ++ { ++ pw = getpwnam (owner_name); ++ if (pw == NULL) ++ { ++ unsigned long int tmp; ++ if (xstrtoul (owner_name, NULL, 0, &tmp, NULL) != LONGINT_OK ++ || UID_T_MAX < tmp) ++ error (EXIT_FAILURE, 0, _("invalid user %s"), quote (owner_name)); ++ owner_id = tmp; ++ } ++ else ++ owner_id = pw->pw_uid; ++ endpwent (); ++ } ++ else ++ owner_id = (uid_t) -1; ++ ++ if (group_name) ++ { ++ gr = getgrnam (group_name); ++ if (gr == NULL) ++ { ++ unsigned long int tmp; ++ if (xstrtoul (group_name, NULL, 0, &tmp, NULL) != LONGINT_OK ++ || GID_T_MAX < tmp) ++ error (EXIT_FAILURE, 0, _("invalid group %s"), quote (group_name)); ++ group_id = tmp; ++ } ++ else ++ group_id = gr->gr_gid; ++ endgrent (); ++ } ++ else ++ group_id = (gid_t) -1; ++} ++ ++/* Report that directory DIR was made, if OPTIONS requests this. */ ++static void ++announce_mkdir (char const *dir, void *options) ++{ ++ struct cp_options const *x = options; ++ if (x->verbose) ++ prog_fprintf (stdout, _("creating directory %s"), quote (dir)); ++} ++ ++/* Make ancestor directory DIR, whose last file name component is ++ COMPONENT, with options OPTIONS. Assume the working directory is ++ COMPONENT's parent. */ ++static int ++make_ancestor (char const *dir, char const *component, void *options) ++{ ++ int r = mkdir (component, DEFAULT_MODE); ++ if (r == 0) ++ announce_mkdir (dir, options); ++ return r; ++} ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... [-T] SOURCE DEST\n\ ++ or: %s [OPTION]... SOURCE... DIRECTORY\n\ ++ or: %s [OPTION]... -t DIRECTORY SOURCE...\n\ ++ or: %s [OPTION]... -d DIRECTORY...\n\ ++"), ++ program_name, program_name, program_name, program_name); ++ fputs (_("\ ++\n\ ++This install program copies files (often just compiled) into destination\n\ ++locations you choose. If you want to download and install a ready-to-use\n\ ++package on a GNU/Linux system, you should instead be using a package manager\n\ ++like yum(1) or apt-get(1).\n\ ++\n\ ++In the first three forms, copy SOURCE to DEST or multiple SOURCE(s) to\n\ ++the existing DIRECTORY, while setting permission modes and owner/group.\n\ ++In the 4th form, create all components of the given DIRECTORY(ies).\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ --backup[=CONTROL] make a backup of each existing destination file\n\ ++ -b like --backup but does not accept an argument\n\ ++ -c (ignored)\n\ ++ -C, --compare compare each pair of source and destination files, and\n\ ++ in some cases, do not modify the destination at all\n\ ++ -d, --directory treat all arguments as directory names; create all\n\ ++ components of the specified directories\n\ ++"), stdout); ++ fputs (_("\ ++ -D create all leading components of DEST except the last,\n\ ++ then copy SOURCE to DEST\n\ ++ -g, --group=GROUP set group ownership, instead of process' current group\n\ ++ -m, --mode=MODE set permission mode (as in chmod), instead of rwxr-xr-x\n\ ++ -o, --owner=OWNER set ownership (super-user only)\n\ ++"), stdout); ++ fputs (_("\ ++ -p, --preserve-timestamps apply access/modification times of SOURCE files\n\ ++ to corresponding destination files\n\ ++ -s, --strip strip symbol tables\n\ ++ --strip-program=PROGRAM program used to strip binaries\n\ ++ -S, --suffix=SUFFIX override the usual backup suffix\n\ ++ -t, --target-directory=DIRECTORY copy all SOURCE arguments into DIRECTORY\n\ ++ -T, --no-target-directory treat DEST as a normal file\n\ ++ -v, --verbose print the name of each directory as it is created\n\ ++"), stdout); ++ fputs (_("\ ++ --preserve-context preserve SELinux security context\n\ ++ -Z, --context=CONTEXT set SELinux security context of files and directories\n\ ++"), stdout); ++ ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++The backup suffix is `~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX.\n\ ++The version control method may be selected via the --backup option or through\n\ ++the VERSION_CONTROL environment variable. Here are the values:\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++ none, off never make backups (even if --backup is given)\n\ ++ numbered, t make numbered backups\n\ ++ existing, nil numbered if numbered backups exist, simple otherwise\n\ ++ simple, never always make simple backups\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} +diff -urNp coreutils-8.0-orig/src/ls.c coreutils-8.0/src/ls.c +--- coreutils-8.0-orig/src/ls.c 2009-10-07 10:09:43.000000000 +0200 ++++ coreutils-8.0/src/ls.c 2009-10-07 10:10:11.000000000 +0200 +@@ -162,7 +162,8 @@ enum filetype symbolic_link, sock, whiteout, @@ -256,7 +5688,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c }; /* Display letters and indicators for each filetype. -@@ -253,6 +254,7 @@ static void queue_directory (char const +@@ -279,6 +280,7 @@ static void queue_directory (char const static void sort_files (void); static void parse_ls_color (void); void usage (int status); @@ -264,7 +5696,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c /* Initial size of hash table. Most hierarchies are likely to be shallower than this. */ -@@ -322,7 +324,7 @@ static struct pending *pending_dirs; +@@ -348,7 +350,7 @@ static struct pending *pending_dirs; static struct timespec current_time; @@ -273,7 +5705,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c static char UNKNOWN_SECURITY_CONTEXT[] = "?"; /* Whether any of the files has an ACL. This affects the width of the -@@ -362,7 +364,9 @@ enum format +@@ -388,7 +390,9 @@ enum format one_per_line, /* -1 */ many_per_line, /* -C */ horizontal, /* -x */ @@ -284,7 +5716,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c }; static enum format format; -@@ -754,6 +758,9 @@ enum +@@ -790,6 +794,9 @@ enum SHOW_CONTROL_CHARS_OPTION, SI_OPTION, SORT_OPTION, @@ -294,7 +5726,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c TIME_OPTION, TIME_STYLE_OPTION }; -@@ -799,7 +806,9 @@ static struct option const long_options[ +@@ -835,7 +842,9 @@ static struct option const long_options[ {"time-style", required_argument, NULL, TIME_STYLE_OPTION}, {"color", optional_argument, NULL, COLOR_OPTION}, {"block-size", required_argument, NULL, BLOCK_SIZE_OPTION}, @@ -305,7 +5737,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c {"author", no_argument, NULL, AUTHOR_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, -@@ -809,12 +818,12 @@ static struct option const long_options[ +@@ -845,12 +854,12 @@ static struct option const long_options[ static char const *const format_args[] = { "verbose", "long", "commas", "horizontal", "across", @@ -320,7 +5752,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c }; ARGMATCH_VERIFY (format_args, format_types); -@@ -1194,7 +1203,8 @@ main (int argc, char **argv) +@@ -1281,7 +1290,8 @@ main (int argc, char **argv) /* Avoid following symbolic links when possible. */ if (is_colored (C_ORPHAN) || (is_colored (C_EXEC) && color_symlink_as_referent) @@ -330,7 +5762,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c check_symlink_color = true; /* If the standard output is a controlling terminal, watch out -@@ -1241,7 +1251,7 @@ main (int argc, char **argv) +@@ -1328,7 +1338,7 @@ main (int argc, char **argv) if (dereference == DEREF_UNDEFINED) dereference = ((immediate_dirs || indicator_style == classify @@ -339,7 +5771,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c ? DEREF_NEVER : DEREF_COMMAND_LINE_SYMLINK_TO_DIR); -@@ -1261,7 +1271,7 @@ main (int argc, char **argv) +@@ -1348,7 +1358,7 @@ main (int argc, char **argv) format_needs_stat = sort_type == sort_time || sort_type == sort_size || format == long_format @@ -348,7 +5780,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c || print_block_size; format_needs_type = (! format_needs_stat && (recursive -@@ -1292,7 +1302,7 @@ main (int argc, char **argv) +@@ -1379,7 +1389,7 @@ main (int argc, char **argv) } else do @@ -357,7 +5789,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c while (i < argc); if (cwd_n_used) -@@ -1455,7 +1465,7 @@ decode_switches (int argc, char **argv) +@@ -1542,7 +1552,7 @@ decode_switches (int argc, char **argv) ignore_mode = IGNORE_DEFAULT; ignore_patterns = NULL; hide_patterns = NULL; @@ -366,7 +5798,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c /* FIXME: put this in a function. */ { -@@ -1837,13 +1847,27 @@ decode_switches (int argc, char **argv) +@@ -1924,13 +1934,27 @@ decode_switches (int argc, char **argv) break; case 'Z': @@ -395,7 +5827,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c default: usage (LS_FAILURE); } -@@ -2557,8 +2581,10 @@ clear_files (void) +@@ -2651,8 +2675,10 @@ clear_files (void) struct fileinfo *f = sorted_file[i]; free (f->name); free (f->linkname); @@ -408,7 +5840,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c } cwd_n_used = 0; -@@ -2600,6 +2626,7 @@ gobble_file (char const *name, enum file +@@ -2694,6 +2720,7 @@ gobble_file (char const *name, enum file memset (f, '\0', sizeof *f); f->stat.st_ino = inode; f->filetype = type; @@ -416,7 +5848,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c if (command_line_arg || format_needs_stat -@@ -2699,7 +2726,7 @@ gobble_file (char const *name, enum file +@@ -2793,7 +2820,7 @@ gobble_file (char const *name, enum file f->stat_ok = true; @@ -425,7 +5857,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c { bool have_selinux = false; bool have_acl = false; -@@ -2732,7 +2760,7 @@ gobble_file (char const *name, enum file +@@ -2827,7 +2854,7 @@ gobble_file (char const *name, enum file err = 0; } @@ -434,7 +5866,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c { int n = file_has_acl (absolute_name, &f->stat); err = (n < 0); -@@ -2751,7 +2779,8 @@ gobble_file (char const *name, enum file +@@ -2846,7 +2873,8 @@ gobble_file (char const *name, enum file } if (S_ISLNK (f->stat.st_mode) @@ -444,7 +5876,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c { char *linkname; struct stat linkstats; -@@ -2771,6 +2800,7 @@ gobble_file (char const *name, enum file +@@ -2866,6 +2894,7 @@ gobble_file (char const *name, enum file command line are automatically traced if not being listed as files. */ if (!command_line_arg || format == long_format @@ -452,7 +5884,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c || !S_ISDIR (linkstats.st_mode)) { /* Get the linked-to file's mode for the filetype indicator -@@ -2810,7 +2840,7 @@ gobble_file (char const *name, enum file +@@ -2905,7 +2934,7 @@ gobble_file (char const *name, enum file block_size_width = len; } @@ -461,7 +5893,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c { if (print_owner) { -@@ -3312,6 +3341,13 @@ print_current_files (void) +@@ -3406,6 +3435,13 @@ print_current_files (void) print_long_format (sorted_file[i]); DIRED_PUTCHAR ('\n'); } @@ -475,10 +5907,10 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c break; } } -@@ -3434,6 +3470,69 @@ format_group_width (gid_t g) +@@ -3568,6 +3604,69 @@ format_inode (char *buf, size_t buflen, + : (char *) "?"); } - +/* Print info about f in scontext format */ +static void +print_scontext_format (const struct fileinfo *f) @@ -543,9 +5975,9 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c +} + /* Print information about F in long format. */ - static void -@@ -3528,9 +3627,15 @@ print_long_format (const struct fileinfo + print_long_format (const struct fileinfo *f) +@@ -3659,9 +3758,15 @@ print_long_format (const struct fileinfo The latter is wrong when nlink_width is zero. */ p += strlen (p); @@ -562,7 +5994,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c { DIRED_FPUTS (buf, stdout, p - buf); -@@ -3543,9 +3648,6 @@ print_long_format (const struct fileinfo +@@ -3674,9 +3779,6 @@ print_long_format (const struct fileinfo if (print_author) format_user (f->stat.st_author, author_width, f->stat_ok); @@ -572,9 +6004,9 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c p = buf; } -@@ -3888,9 +3990,6 @@ print_file_name_and_frills (const struct - human_readable (ST_NBLOCKS (f->stat), buf, human_output_opts, - ST_NBLOCKSIZE, output_block_size)); +@@ -4020,9 +4122,6 @@ print_file_name_and_frills (const struct + : human_readable (ST_NBLOCKS (f->stat), buf, human_output_opts, + ST_NBLOCKSIZE, output_block_size)); - if (print_scontext) - printf ("%*s ", format == with_commas ? 0 : scontext_width, f->scontext); @@ -582,8 +6014,8 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c size_t width = print_name_with_quoting (f->name, FILE_OR_LINK_MODE (f), f->linkok, f->stat_ok, f->filetype, NULL, f->stat.st_nlink, start_col); -@@ -4105,9 +4204,6 @@ length_of_file_name_and_frills (const st - output_block_size)) +@@ -4241,9 +4340,6 @@ length_of_file_name_and_frills (const st + output_block_size)) : block_size_width); - if (print_scontext) @@ -592,7 +6024,7 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c quote_name (NULL, f->name, filename_quoting_options, &name_width); len += name_width; -@@ -4538,9 +4634,16 @@ Mandatory arguments to long options are +@@ -4674,9 +4770,16 @@ Mandatory arguments to long options are -w, --width=COLS assume screen width instead of current value\n\ -x list entries by lines instead of by columns\n\ -X sort alphabetically by entry extension\n\ @@ -610,9 +6042,4713 @@ diff -urNp coreutils-7.1-orig/src/ls.c coreutils-7.1/src/ls.c fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); emit_size_note (); -diff -urNp coreutils-7.1-orig/src/mkdir.c coreutils-7.1/src/mkdir.c ---- coreutils-7.1-orig/src/mkdir.c 2008-10-19 21:47:57.000000000 +0200 -+++ coreutils-7.1/src/mkdir.c 2009-02-24 13:47:15.000000000 +0100 +diff -urNp coreutils-8.0-orig/src/ls.c.orig coreutils-8.0/src/ls.c.orig +--- coreutils-8.0-orig/src/ls.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/ls.c.orig 2009-10-07 10:09:43.000000000 +0200 +@@ -0,0 +1,4700 @@ ++/* `dir', `vdir' and `ls' directory listing programs for GNU. ++ Copyright (C) 85, 88, 90, 91, 1995-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* If ls_mode is LS_MULTI_COL, ++ the multi-column format is the default regardless ++ of the type of output device. ++ This is for the `dir' program. ++ ++ If ls_mode is LS_LONG_FORMAT, ++ the long format is the default regardless of the ++ type of output device. ++ This is for the `vdir' program. ++ ++ If ls_mode is LS_LS, ++ the output format depends on whether the output ++ device is a terminal. ++ This is for the `ls' program. */ ++ ++/* Written by Richard Stallman and David MacKenzie. */ ++ ++/* Color support by Peter Anvin and Dennis ++ Flaherty based on original patches by ++ Greg Lee . */ ++ ++#include ++#include ++ ++#if HAVE_TERMIOS_H ++# include ++#endif ++#if HAVE_STROPTS_H ++# include ++#endif ++#if HAVE_SYS_IOCTL_H ++# include ++#endif ++ ++#ifdef WINSIZE_IN_PTEM ++# include ++# include ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if HAVE_LANGINFO_CODESET ++# include ++#endif ++ ++/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is ++ present. */ ++#ifndef SA_NOCLDSTOP ++# define SA_NOCLDSTOP 0 ++# define sigprocmask(How, Set, Oset) /* empty */ ++# define sigset_t int ++# if ! HAVE_SIGINTERRUPT ++# define siginterrupt(sig, flag) /* empty */ ++# endif ++#endif ++#ifndef SA_RESTART ++# define SA_RESTART 0 ++#endif ++ ++#include "system.h" ++#include ++ ++#ifdef HAVE_CAP ++# include ++#endif ++ ++#include "acl.h" ++#include "argmatch.h" ++#include "dev-ino.h" ++#include "error.h" ++#include "filenamecat.h" ++#include "hard-locale.h" ++#include "hash.h" ++#include "human.h" ++#include "filemode.h" ++#include "filevercmp.h" ++#include "idcache.h" ++#include "ls.h" ++#include "mbswidth.h" ++#include "mpsort.h" ++#include "obstack.h" ++#include "quote.h" ++#include "quotearg.h" ++#include "same.h" ++#include "stat-time.h" ++#include "strftime.h" ++#include "xstrtol.h" ++#include "areadlink.h" ++#include "mbsalign.h" ++ ++#define PROGRAM_NAME (ls_mode == LS_LS ? "ls" \ ++ : (ls_mode == LS_MULTI_COL \ ++ ? "dir" : "vdir")) ++ ++#define AUTHORS \ ++ proper_name ("Richard M. Stallman"), \ ++ proper_name ("David MacKenzie") ++ ++#define obstack_chunk_alloc malloc ++#define obstack_chunk_free free ++ ++/* Return an int indicating the result of comparing two integers. ++ Subtracting doesn't always work, due to overflow. */ ++#define longdiff(a, b) ((a) < (b) ? -1 : (a) > (b)) ++ ++/* Unix-based readdir implementations have historically returned a dirent.d_ino ++ value that is sometimes not equal to the stat-obtained st_ino value for ++ that same entry. This error occurs for a readdir entry that refers ++ to a mount point. readdir's error is to return the inode number of ++ the underlying directory -- one that typically cannot be stat'ed, as ++ long as a file system is mounted on that directory. RELIABLE_D_INO ++ encapsulates whether we can use the more efficient approach of relying ++ on readdir-supplied d_ino values, or whether we must incur the cost of ++ calling stat or lstat to obtain each guaranteed-valid inode number. */ ++ ++#ifndef READDIR_LIES_ABOUT_MOUNTPOINT_D_INO ++# define READDIR_LIES_ABOUT_MOUNTPOINT_D_INO 1 ++#endif ++ ++#if READDIR_LIES_ABOUT_MOUNTPOINT_D_INO ++# define RELIABLE_D_INO(dp) NOT_AN_INODE_NUMBER ++#else ++# define RELIABLE_D_INO(dp) D_INO (dp) ++#endif ++ ++#if ! HAVE_STRUCT_STAT_ST_AUTHOR ++# define st_author st_uid ++#endif ++ ++enum filetype ++ { ++ unknown, ++ fifo, ++ chardev, ++ directory, ++ blockdev, ++ normal, ++ symbolic_link, ++ sock, ++ whiteout, ++ arg_directory ++ }; ++ ++/* Display letters and indicators for each filetype. ++ Keep these in sync with enum filetype. */ ++static char const filetype_letter[] = "?pcdb-lswd"; ++ ++/* Ensure that filetype and filetype_letter have the same ++ number of elements. */ ++verify (sizeof filetype_letter - 1 == arg_directory + 1); ++ ++#define FILETYPE_INDICATORS \ ++ { \ ++ C_ORPHAN, C_FIFO, C_CHR, C_DIR, C_BLK, C_FILE, \ ++ C_LINK, C_SOCK, C_FILE, C_DIR \ ++ } ++ ++enum acl_type ++ { ++ ACL_T_NONE, ++ ACL_T_SELINUX_ONLY, ++ ACL_T_YES ++ }; ++ ++struct fileinfo ++ { ++ /* The file name. */ ++ char *name; ++ ++ /* For symbolic link, name of the file linked to, otherwise zero. */ ++ char *linkname; ++ ++ struct stat stat; ++ ++ enum filetype filetype; ++ ++ /* For symbolic link and long listing, st_mode of file linked to, otherwise ++ zero. */ ++ mode_t linkmode; ++ ++ /* SELinux security context. */ ++ security_context_t scontext; ++ ++ bool stat_ok; ++ ++ /* For symbolic link and color printing, true if linked-to file ++ exists, otherwise false. */ ++ bool linkok; ++ ++ /* For long listings, true if the file has an access control list, ++ or an SELinux security context. */ ++ enum acl_type acl_type; ++ }; ++ ++#define LEN_STR_PAIR(s) sizeof (s) - 1, s ++ ++/* Null is a valid character in a color indicator (think about Epson ++ printers, for example) so we have to use a length/buffer string ++ type. */ ++ ++struct bin_str ++ { ++ size_t len; /* Number of bytes */ ++ const char *string; /* Pointer to the same */ ++ }; ++ ++#if ! HAVE_TCGETPGRP ++# define tcgetpgrp(Fd) 0 ++#endif ++ ++static size_t quote_name (FILE *out, const char *name, ++ struct quoting_options const *options, ++ size_t *width); ++static char *make_link_name (char const *name, char const *linkname); ++static int decode_switches (int argc, char **argv); ++static bool file_ignored (char const *name); ++static uintmax_t gobble_file (char const *name, enum filetype type, ++ ino_t inode, bool command_line_arg, ++ char const *dirname); ++static bool print_color_indicator (const char *name, mode_t mode, int linkok, ++ bool stat_ok, enum filetype type, ++ nlink_t nlink); ++static void put_indicator (const struct bin_str *ind); ++static void add_ignore_pattern (const char *pattern); ++static void attach (char *dest, const char *dirname, const char *name); ++static void clear_files (void); ++static void extract_dirs_from_files (char const *dirname, ++ bool command_line_arg); ++static void get_link_name (char const *filename, struct fileinfo *f, ++ bool command_line_arg); ++static void indent (size_t from, size_t to); ++static size_t calculate_columns (bool by_columns); ++static void print_current_files (void); ++static void print_dir (char const *name, char const *realname, ++ bool command_line_arg); ++static size_t print_file_name_and_frills (const struct fileinfo *f, ++ size_t start_col); ++static void print_horizontal (void); ++static int format_user_width (uid_t u); ++static int format_group_width (gid_t g); ++static void print_long_format (const struct fileinfo *f); ++static void print_many_per_line (void); ++static size_t print_name_with_quoting (const char *p, mode_t mode, ++ int linkok, bool stat_ok, ++ enum filetype type, ++ struct obstack *stack, ++ nlink_t nlink, ++ size_t start_col); ++static void prep_non_filename_text (void); ++static bool print_type_indicator (bool stat_ok, mode_t mode, ++ enum filetype type); ++static void print_with_commas (void); ++static void queue_directory (char const *name, char const *realname, ++ bool command_line_arg); ++static void sort_files (void); ++static void parse_ls_color (void); ++void usage (int status); ++ ++/* Initial size of hash table. ++ Most hierarchies are likely to be shallower than this. */ ++#define INITIAL_TABLE_SIZE 30 ++ ++/* The set of `active' directories, from the current command-line argument ++ to the level in the hierarchy at which files are being listed. ++ A directory is represented by its device and inode numbers (struct dev_ino). ++ A directory is added to this set when ls begins listing it or its ++ entries, and it is removed from the set just after ls has finished ++ processing it. This set is used solely to detect loops, e.g., with ++ mkdir loop; cd loop; ln -s ../loop sub; ls -RL */ ++static Hash_table *active_dir_set; ++ ++#define LOOP_DETECT (!!active_dir_set) ++ ++/* The table of files in the current directory: ++ ++ `cwd_file' points to a vector of `struct fileinfo', one per file. ++ `cwd_n_alloc' is the number of elements space has been allocated for. ++ `cwd_n_used' is the number actually in use. */ ++ ++/* Address of block containing the files that are described. */ ++static struct fileinfo *cwd_file; ++ ++/* Length of block that `cwd_file' points to, measured in files. */ ++static size_t cwd_n_alloc; ++ ++/* Index of first unused slot in `cwd_file'. */ ++static size_t cwd_n_used; ++ ++/* Vector of pointers to files, in proper sorted order, and the number ++ of entries allocated for it. */ ++static void **sorted_file; ++static size_t sorted_file_alloc; ++ ++/* When true, in a color listing, color each symlink name according to the ++ type of file it points to. Otherwise, color them according to the `ln' ++ directive in LS_COLORS. Dangling (orphan) symlinks are treated specially, ++ regardless. This is set when `ln=target' appears in LS_COLORS. */ ++ ++static bool color_symlink_as_referent; ++ ++/* mode of appropriate file for colorization */ ++#define FILE_OR_LINK_MODE(File) \ ++ ((color_symlink_as_referent && (File)->linkok) \ ++ ? (File)->linkmode : (File)->stat.st_mode) ++ ++ ++/* Record of one pending directory waiting to be listed. */ ++ ++struct pending ++ { ++ char *name; ++ /* If the directory is actually the file pointed to by a symbolic link we ++ were told to list, `realname' will contain the name of the symbolic ++ link, otherwise zero. */ ++ char *realname; ++ bool command_line_arg; ++ struct pending *next; ++ }; ++ ++static struct pending *pending_dirs; ++ ++/* Current time in seconds and nanoseconds since 1970, updated as ++ needed when deciding whether a file is recent. */ ++ ++static struct timespec current_time; ++ ++static bool print_scontext; ++static char UNKNOWN_SECURITY_CONTEXT[] = "?"; ++ ++/* Whether any of the files has an ACL. This affects the width of the ++ mode column. */ ++ ++static bool any_has_acl; ++ ++/* The number of columns to use for columns containing inode numbers, ++ block sizes, link counts, owners, groups, authors, major device ++ numbers, minor device numbers, and file sizes, respectively. */ ++ ++static int inode_number_width; ++static int block_size_width; ++static int nlink_width; ++static int scontext_width; ++static int owner_width; ++static int group_width; ++static int author_width; ++static int major_device_number_width; ++static int minor_device_number_width; ++static int file_size_width; ++ ++/* Option flags */ ++ ++/* long_format for lots of info, one per line. ++ one_per_line for just names, one per line. ++ many_per_line for just names, many per line, sorted vertically. ++ horizontal for just names, many per line, sorted horizontally. ++ with_commas for just names, many per line, separated by commas. ++ ++ -l (and other options that imply -l), -1, -C, -x and -m control ++ this parameter. */ ++ ++enum format ++ { ++ long_format, /* -l and other options that imply -l */ ++ one_per_line, /* -1 */ ++ many_per_line, /* -C */ ++ horizontal, /* -x */ ++ with_commas /* -m */ ++ }; ++ ++static enum format format; ++ ++/* `full-iso' uses full ISO-style dates and times. `long-iso' uses longer ++ ISO-style time stamps, though shorter than `full-iso'. `iso' uses shorter ++ ISO-style time stamps. `locale' uses locale-dependent time stamps. */ ++enum time_style ++ { ++ full_iso_time_style, /* --time-style=full-iso */ ++ long_iso_time_style, /* --time-style=long-iso */ ++ iso_time_style, /* --time-style=iso */ ++ locale_time_style /* --time-style=locale */ ++ }; ++ ++static char const *const time_style_args[] = ++{ ++ "full-iso", "long-iso", "iso", "locale", NULL ++}; ++static enum time_style const time_style_types[] = ++{ ++ full_iso_time_style, long_iso_time_style, iso_time_style, ++ locale_time_style ++}; ++ARGMATCH_VERIFY (time_style_args, time_style_types); ++ ++/* Type of time to print or sort by. Controlled by -c and -u. ++ The values of each item of this enum are important since they are ++ used as indices in the sort functions array (see sort_files()). */ ++ ++enum time_type ++ { ++ time_mtime, /* default */ ++ time_ctime, /* -c */ ++ time_atime, /* -u */ ++ time_numtypes /* the number of elements of this enum */ ++ }; ++ ++static enum time_type time_type; ++ ++/* The file characteristic to sort by. Controlled by -t, -S, -U, -X, -v. ++ The values of each item of this enum are important since they are ++ used as indices in the sort functions array (see sort_files()). */ ++ ++enum sort_type ++ { ++ sort_none = -1, /* -U */ ++ sort_name, /* default */ ++ sort_extension, /* -X */ ++ sort_size, /* -S */ ++ sort_version, /* -v */ ++ sort_time, /* -t */ ++ sort_numtypes /* the number of elements of this enum */ ++ }; ++ ++static enum sort_type sort_type; ++ ++/* Direction of sort. ++ false means highest first if numeric, ++ lowest first if alphabetic; ++ these are the defaults. ++ true means the opposite order in each case. -r */ ++ ++static bool sort_reverse; ++ ++/* True means to display owner information. -g turns this off. */ ++ ++static bool print_owner = true; ++ ++/* True means to display author information. */ ++ ++static bool print_author; ++ ++/* True means to display group information. -G and -o turn this off. */ ++ ++static bool print_group = true; ++ ++/* True means print the user and group id's as numbers rather ++ than as names. -n */ ++ ++static bool numeric_ids; ++ ++/* True means mention the size in blocks of each file. -s */ ++ ++static bool print_block_size; ++ ++/* Human-readable options for output. */ ++static int human_output_opts; ++ ++/* The units to use when printing sizes other than file sizes. */ ++static uintmax_t output_block_size; ++ ++/* Likewise, but for file sizes. */ ++static uintmax_t file_output_block_size = 1; ++ ++/* Follow the output with a special string. Using this format, ++ Emacs' dired mode starts up twice as fast, and can handle all ++ strange characters in file names. */ ++static bool dired; ++ ++/* `none' means don't mention the type of files. ++ `slash' means mention directories only, with a '/'. ++ `file_type' means mention file types. ++ `classify' means mention file types and mark executables. ++ ++ Controlled by -F, -p, and --indicator-style. */ ++ ++enum indicator_style ++ { ++ none, /* --indicator-style=none */ ++ slash, /* -p, --indicator-style=slash */ ++ file_type, /* --indicator-style=file-type */ ++ classify /* -F, --indicator-style=classify */ ++ }; ++ ++static enum indicator_style indicator_style; ++ ++/* Names of indicator styles. */ ++static char const *const indicator_style_args[] = ++{ ++ "none", "slash", "file-type", "classify", NULL ++}; ++static enum indicator_style const indicator_style_types[] = ++{ ++ none, slash, file_type, classify ++}; ++ARGMATCH_VERIFY (indicator_style_args, indicator_style_types); ++ ++/* True means use colors to mark types. Also define the different ++ colors as well as the stuff for the LS_COLORS environment variable. ++ The LS_COLORS variable is now in a termcap-like format. */ ++ ++static bool print_with_color; ++ ++/* Whether we used any colors in the output so far. If so, we will ++ need to restore the default color later. If not, we will need to ++ call prep_non_filename_text before using color for the first time. */ ++ ++static bool used_color = false; ++ ++enum color_type ++ { ++ color_never, /* 0: default or --color=never */ ++ color_always, /* 1: --color=always */ ++ color_if_tty /* 2: --color=tty */ ++ }; ++ ++enum Dereference_symlink ++ { ++ DEREF_UNDEFINED = 1, ++ DEREF_NEVER, ++ DEREF_COMMAND_LINE_ARGUMENTS, /* -H */ ++ DEREF_COMMAND_LINE_SYMLINK_TO_DIR, /* the default, in certain cases */ ++ DEREF_ALWAYS /* -L */ ++ }; ++ ++enum indicator_no ++ { ++ C_LEFT, C_RIGHT, C_END, C_RESET, C_NORM, C_FILE, C_DIR, C_LINK, ++ C_FIFO, C_SOCK, ++ C_BLK, C_CHR, C_MISSING, C_ORPHAN, C_EXEC, C_DOOR, C_SETUID, C_SETGID, ++ C_STICKY, C_OTHER_WRITABLE, C_STICKY_OTHER_WRITABLE, C_CAP, C_MULTIHARDLINK, ++ C_CLR_TO_EOL ++ }; ++ ++static const char *const indicator_name[]= ++ { ++ "lc", "rc", "ec", "rs", "no", "fi", "di", "ln", "pi", "so", ++ "bd", "cd", "mi", "or", "ex", "do", "su", "sg", "st", ++ "ow", "tw", "ca", "mh", "cl", NULL ++ }; ++ ++struct color_ext_type ++ { ++ struct bin_str ext; /* The extension we're looking for */ ++ struct bin_str seq; /* The sequence to output when we do */ ++ struct color_ext_type *next; /* Next in list */ ++ }; ++ ++static struct bin_str color_indicator[] = ++ { ++ { LEN_STR_PAIR ("\033[") }, /* lc: Left of color sequence */ ++ { LEN_STR_PAIR ("m") }, /* rc: Right of color sequence */ ++ { 0, NULL }, /* ec: End color (replaces lc+no+rc) */ ++ { LEN_STR_PAIR ("0") }, /* rs: Reset to ordinary colors */ ++ { 0, NULL }, /* no: Normal */ ++ { 0, NULL }, /* fi: File: default */ ++ { LEN_STR_PAIR ("01;34") }, /* di: Directory: bright blue */ ++ { LEN_STR_PAIR ("01;36") }, /* ln: Symlink: bright cyan */ ++ { LEN_STR_PAIR ("33") }, /* pi: Pipe: yellow/brown */ ++ { LEN_STR_PAIR ("01;35") }, /* so: Socket: bright magenta */ ++ { LEN_STR_PAIR ("01;33") }, /* bd: Block device: bright yellow */ ++ { LEN_STR_PAIR ("01;33") }, /* cd: Char device: bright yellow */ ++ { 0, NULL }, /* mi: Missing file: undefined */ ++ { 0, NULL }, /* or: Orphaned symlink: undefined */ ++ { LEN_STR_PAIR ("01;32") }, /* ex: Executable: bright green */ ++ { LEN_STR_PAIR ("01;35") }, /* do: Door: bright magenta */ ++ { LEN_STR_PAIR ("37;41") }, /* su: setuid: white on red */ ++ { LEN_STR_PAIR ("30;43") }, /* sg: setgid: black on yellow */ ++ { LEN_STR_PAIR ("37;44") }, /* st: sticky: black on blue */ ++ { LEN_STR_PAIR ("34;42") }, /* ow: other-writable: blue on green */ ++ { LEN_STR_PAIR ("30;42") }, /* tw: ow w/ sticky: black on green */ ++ { LEN_STR_PAIR ("30;41") }, /* ca: black on red */ ++ { 0, NULL }, /* mh: disabled by default */ ++ { LEN_STR_PAIR ("\033[K") }, /* cl: clear to end of line */ ++ }; ++ ++/* FIXME: comment */ ++static struct color_ext_type *color_ext_list = NULL; ++ ++/* Buffer for color sequences */ ++static char *color_buf; ++ ++/* True means to check for orphaned symbolic link, for displaying ++ colors. */ ++ ++static bool check_symlink_color; ++ ++/* True means mention the inode number of each file. -i */ ++ ++static bool print_inode; ++ ++/* What to do with symbolic links. Affected by -d, -F, -H, -l (and ++ other options that imply -l), and -L. */ ++ ++static enum Dereference_symlink dereference; ++ ++/* True means when a directory is found, display info on its ++ contents. -R */ ++ ++static bool recursive; ++ ++/* True means when an argument is a directory name, display info ++ on it itself. -d */ ++ ++static bool immediate_dirs; ++ ++/* True means that directories are grouped before files. */ ++ ++static bool directories_first; ++ ++/* Which files to ignore. */ ++ ++static enum ++{ ++ /* Ignore files whose names start with `.', and files specified by ++ --hide and --ignore. */ ++ IGNORE_DEFAULT, ++ ++ /* Ignore `.', `..', and files specified by --ignore. */ ++ IGNORE_DOT_AND_DOTDOT, ++ ++ /* Ignore only files specified by --ignore. */ ++ IGNORE_MINIMAL ++} ignore_mode; ++ ++/* A linked list of shell-style globbing patterns. If a non-argument ++ file name matches any of these patterns, it is ignored. ++ Controlled by -I. Multiple -I options accumulate. ++ The -B option adds `*~' and `.*~' to this list. */ ++ ++struct ignore_pattern ++ { ++ const char *pattern; ++ struct ignore_pattern *next; ++ }; ++ ++static struct ignore_pattern *ignore_patterns; ++ ++/* Similar to IGNORE_PATTERNS, except that -a or -A causes this ++ variable itself to be ignored. */ ++static struct ignore_pattern *hide_patterns; ++ ++/* True means output nongraphic chars in file names as `?'. ++ (-q, --hide-control-chars) ++ qmark_funny_chars and the quoting style (-Q, --quoting-style=WORD) are ++ independent. The algorithm is: first, obey the quoting style to get a ++ string representing the file name; then, if qmark_funny_chars is set, ++ replace all nonprintable chars in that string with `?'. It's necessary ++ to replace nonprintable chars even in quoted strings, because we don't ++ want to mess up the terminal if control chars get sent to it, and some ++ quoting methods pass through control chars as-is. */ ++static bool qmark_funny_chars; ++ ++/* Quoting options for file and dir name output. */ ++ ++static struct quoting_options *filename_quoting_options; ++static struct quoting_options *dirname_quoting_options; ++ ++/* The number of chars per hardware tab stop. Setting this to zero ++ inhibits the use of TAB characters for separating columns. -T */ ++static size_t tabsize; ++ ++/* True means print each directory name before listing it. */ ++ ++static bool print_dir_name; ++ ++/* The line length to use for breaking lines in many-per-line format. ++ Can be set with -w. */ ++ ++static size_t line_length; ++ ++/* If true, the file listing format requires that stat be called on ++ each file. */ ++ ++static bool format_needs_stat; ++ ++/* Similar to `format_needs_stat', but set if only the file type is ++ needed. */ ++ ++static bool format_needs_type; ++ ++/* An arbitrary limit on the number of bytes in a printed time stamp. ++ This is set to a relatively small value to avoid the need to worry ++ about denial-of-service attacks on servers that run "ls" on behalf ++ of remote clients. 1000 bytes should be enough for any practical ++ time stamp format. */ ++ ++enum { TIME_STAMP_LEN_MAXIMUM = MAX (1000, INT_STRLEN_BOUND (time_t)) }; ++ ++/* strftime formats for non-recent and recent files, respectively, in ++ -l output. */ ++ ++static char const *long_time_format[2] = ++ { ++ /* strftime format for non-recent files (older than 6 months), in ++ -l output. This should contain the year, month and day (at ++ least), in an order that is understood by people in your ++ locale's territory. Please try to keep the number of used ++ screen columns small, because many people work in windows with ++ only 80 columns. But make this as wide as the other string ++ below, for recent files. */ ++ /* TRANSLATORS: ls output needs to be aligned for ease of reading, ++ so be wary of using variable width fields from the locale. ++ Note %b is handled specially by ls and aligned correctly. ++ Note also that specifying a width as in %5b is erroneous as strftime ++ will count bytes rather than characters in multibyte locales. */ ++ N_("%b %e %Y"), ++ /* strftime format for recent files (younger than 6 months), in -l ++ output. This should contain the month, day and time (at ++ least), in an order that is understood by people in your ++ locale's territory. Please try to keep the number of used ++ screen columns small, because many people work in windows with ++ only 80 columns. But make this as wide as the other string ++ above, for non-recent files. */ ++ /* TRANSLATORS: ls output needs to be aligned for ease of reading, ++ so be wary of using variable width fields from the locale. ++ Note %b is handled specially by ls and aligned correctly. ++ Note also that specifying a width as in %5b is erroneous as strftime ++ will count bytes rather than characters in multibyte locales. */ ++ N_("%b %e %H:%M") ++ }; ++ ++/* The set of signals that are caught. */ ++ ++static sigset_t caught_signals; ++ ++/* If nonzero, the value of the pending fatal signal. */ ++ ++static sig_atomic_t volatile interrupt_signal; ++ ++/* A count of the number of pending stop signals that have been received. */ ++ ++static sig_atomic_t volatile stop_signal_count; ++ ++/* Desired exit status. */ ++ ++static int exit_status; ++ ++/* Exit statuses. */ ++enum ++ { ++ /* "ls" had a minor problem. E.g., while processing a directory, ++ ls obtained the name of an entry via readdir, yet was later ++ unable to stat that name. This happens when listing a directory ++ in which entries are actively being removed or renamed. */ ++ LS_MINOR_PROBLEM = 1, ++ ++ /* "ls" had more serious trouble (e.g., memory exhausted, invalid ++ option or failure to stat a command line argument. */ ++ LS_FAILURE = 2 ++ }; ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ AUTHOR_OPTION = CHAR_MAX + 1, ++ BLOCK_SIZE_OPTION, ++ COLOR_OPTION, ++ DEREFERENCE_COMMAND_LINE_SYMLINK_TO_DIR_OPTION, ++ FILE_TYPE_INDICATOR_OPTION, ++ FORMAT_OPTION, ++ FULL_TIME_OPTION, ++ GROUP_DIRECTORIES_FIRST_OPTION, ++ HIDE_OPTION, ++ INDICATOR_STYLE_OPTION, ++ QUOTING_STYLE_OPTION, ++ SHOW_CONTROL_CHARS_OPTION, ++ SI_OPTION, ++ SORT_OPTION, ++ TIME_OPTION, ++ TIME_STYLE_OPTION ++}; ++ ++static struct option const long_options[] = ++{ ++ {"all", no_argument, NULL, 'a'}, ++ {"escape", no_argument, NULL, 'b'}, ++ {"directory", no_argument, NULL, 'd'}, ++ {"dired", no_argument, NULL, 'D'}, ++ {"full-time", no_argument, NULL, FULL_TIME_OPTION}, ++ {"group-directories-first", no_argument, NULL, ++ GROUP_DIRECTORIES_FIRST_OPTION}, ++ {"human-readable", no_argument, NULL, 'h'}, ++ {"inode", no_argument, NULL, 'i'}, ++ {"numeric-uid-gid", no_argument, NULL, 'n'}, ++ {"no-group", no_argument, NULL, 'G'}, ++ {"hide-control-chars", no_argument, NULL, 'q'}, ++ {"reverse", no_argument, NULL, 'r'}, ++ {"size", no_argument, NULL, 's'}, ++ {"width", required_argument, NULL, 'w'}, ++ {"almost-all", no_argument, NULL, 'A'}, ++ {"ignore-backups", no_argument, NULL, 'B'}, ++ {"classify", no_argument, NULL, 'F'}, ++ {"file-type", no_argument, NULL, FILE_TYPE_INDICATOR_OPTION}, ++ {"si", no_argument, NULL, SI_OPTION}, ++ {"dereference-command-line", no_argument, NULL, 'H'}, ++ {"dereference-command-line-symlink-to-dir", no_argument, NULL, ++ DEREFERENCE_COMMAND_LINE_SYMLINK_TO_DIR_OPTION}, ++ {"hide", required_argument, NULL, HIDE_OPTION}, ++ {"ignore", required_argument, NULL, 'I'}, ++ {"indicator-style", required_argument, NULL, INDICATOR_STYLE_OPTION}, ++ {"dereference", no_argument, NULL, 'L'}, ++ {"literal", no_argument, NULL, 'N'}, ++ {"quote-name", no_argument, NULL, 'Q'}, ++ {"quoting-style", required_argument, NULL, QUOTING_STYLE_OPTION}, ++ {"recursive", no_argument, NULL, 'R'}, ++ {"format", required_argument, NULL, FORMAT_OPTION}, ++ {"show-control-chars", no_argument, NULL, SHOW_CONTROL_CHARS_OPTION}, ++ {"sort", required_argument, NULL, SORT_OPTION}, ++ {"tabsize", required_argument, NULL, 'T'}, ++ {"time", required_argument, NULL, TIME_OPTION}, ++ {"time-style", required_argument, NULL, TIME_STYLE_OPTION}, ++ {"color", optional_argument, NULL, COLOR_OPTION}, ++ {"block-size", required_argument, NULL, BLOCK_SIZE_OPTION}, ++ {"context", no_argument, 0, 'Z'}, ++ {"author", no_argument, NULL, AUTHOR_OPTION}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++static char const *const format_args[] = ++{ ++ "verbose", "long", "commas", "horizontal", "across", ++ "vertical", "single-column", NULL ++}; ++static enum format const format_types[] = ++{ ++ long_format, long_format, with_commas, horizontal, horizontal, ++ many_per_line, one_per_line ++}; ++ARGMATCH_VERIFY (format_args, format_types); ++ ++static char const *const sort_args[] = ++{ ++ "none", "time", "size", "extension", "version", NULL ++}; ++static enum sort_type const sort_types[] = ++{ ++ sort_none, sort_time, sort_size, sort_extension, sort_version ++}; ++ARGMATCH_VERIFY (sort_args, sort_types); ++ ++static char const *const time_args[] = ++{ ++ "atime", "access", "use", "ctime", "status", NULL ++}; ++static enum time_type const time_types[] = ++{ ++ time_atime, time_atime, time_atime, time_ctime, time_ctime ++}; ++ARGMATCH_VERIFY (time_args, time_types); ++ ++static char const *const color_args[] = ++{ ++ /* force and none are for compatibility with another color-ls version */ ++ "always", "yes", "force", ++ "never", "no", "none", ++ "auto", "tty", "if-tty", NULL ++}; ++static enum color_type const color_types[] = ++{ ++ color_always, color_always, color_always, ++ color_never, color_never, color_never, ++ color_if_tty, color_if_tty, color_if_tty ++}; ++ARGMATCH_VERIFY (color_args, color_types); ++ ++/* Information about filling a column. */ ++struct column_info ++{ ++ bool valid_len; ++ size_t line_len; ++ size_t *col_arr; ++}; ++ ++/* Array with information about column filledness. */ ++static struct column_info *column_info; ++ ++/* Maximum number of columns ever possible for this display. */ ++static size_t max_idx; ++ ++/* The minimum width of a column is 3: 1 character for the name and 2 ++ for the separating white space. */ ++#define MIN_COLUMN_WIDTH 3 ++ ++ ++/* This zero-based index is used solely with the --dired option. ++ When that option is in effect, this counter is incremented for each ++ byte of output generated by this program so that the beginning ++ and ending indices (in that output) of every file name can be recorded ++ and later output themselves. */ ++static size_t dired_pos; ++ ++#define DIRED_PUTCHAR(c) do {putchar ((c)); ++dired_pos;} while (0) ++ ++/* Write S to STREAM and increment DIRED_POS by S_LEN. */ ++#define DIRED_FPUTS(s, stream, s_len) \ ++ do {fputs (s, stream); dired_pos += s_len;} while (0) ++ ++/* Like DIRED_FPUTS, but for use when S is a literal string. */ ++#define DIRED_FPUTS_LITERAL(s, stream) \ ++ do {fputs (s, stream); dired_pos += sizeof (s) - 1;} while (0) ++ ++#define DIRED_INDENT() \ ++ do \ ++ { \ ++ if (dired) \ ++ DIRED_FPUTS_LITERAL (" ", stdout); \ ++ } \ ++ while (0) ++ ++/* With --dired, store pairs of beginning and ending indices of filenames. */ ++static struct obstack dired_obstack; ++ ++/* With --dired, store pairs of beginning and ending indices of any ++ directory names that appear as headers (just before `total' line) ++ for lists of directory entries. Such directory names are seen when ++ listing hierarchies using -R and when a directory is listed with at ++ least one other command line argument. */ ++static struct obstack subdired_obstack; ++ ++/* Save the current index on the specified obstack, OBS. */ ++#define PUSH_CURRENT_DIRED_POS(obs) \ ++ do \ ++ { \ ++ if (dired) \ ++ obstack_grow (obs, &dired_pos, sizeof (dired_pos)); \ ++ } \ ++ while (0) ++ ++/* With -R, this stack is used to help detect directory cycles. ++ The device/inode pairs on this stack mirror the pairs in the ++ active_dir_set hash table. */ ++static struct obstack dev_ino_obstack; ++ ++/* Push a pair onto the device/inode stack. */ ++#define DEV_INO_PUSH(Dev, Ino) \ ++ do \ ++ { \ ++ struct dev_ino *di; \ ++ obstack_blank (&dev_ino_obstack, sizeof (struct dev_ino)); \ ++ di = -1 + (struct dev_ino *) obstack_next_free (&dev_ino_obstack); \ ++ di->st_dev = (Dev); \ ++ di->st_ino = (Ino); \ ++ } \ ++ while (0) ++ ++/* Pop a dev/ino struct off the global dev_ino_obstack ++ and return that struct. */ ++static struct dev_ino ++dev_ino_pop (void) ++{ ++ assert (sizeof (struct dev_ino) <= obstack_object_size (&dev_ino_obstack)); ++ obstack_blank (&dev_ino_obstack, -(int) (sizeof (struct dev_ino))); ++ return *(struct dev_ino *) obstack_next_free (&dev_ino_obstack); ++} ++ ++/* Note the use commented out below: ++#define ASSERT_MATCHING_DEV_INO(Name, Di) \ ++ do \ ++ { \ ++ struct stat sb; \ ++ assert (Name); \ ++ assert (0 <= stat (Name, &sb)); \ ++ assert (sb.st_dev == Di.st_dev); \ ++ assert (sb.st_ino == Di.st_ino); \ ++ } \ ++ while (0) ++*/ ++ ++/* Write to standard output PREFIX, followed by the quoting style and ++ a space-separated list of the integers stored in OS all on one line. */ ++ ++static void ++dired_dump_obstack (const char *prefix, struct obstack *os) ++{ ++ size_t n_pos; ++ ++ n_pos = obstack_object_size (os) / sizeof (dired_pos); ++ if (n_pos > 0) ++ { ++ size_t i; ++ size_t *pos; ++ ++ pos = (size_t *) obstack_finish (os); ++ fputs (prefix, stdout); ++ for (i = 0; i < n_pos; i++) ++ printf (" %lu", (unsigned long int) pos[i]); ++ putchar ('\n'); ++ } ++} ++ ++/* Read the abbreviated month names from the locale, to align them ++ and to determine the max width of the field and to truncate names ++ greater than our max allowed. ++ Note even though this handles multibyte locales correctly ++ it's not restricted to them as single byte locales can have ++ variable width abbreviated months and also precomputing/caching ++ the names was seen to increase the performance of ls significantly. */ ++ ++/* max number of display cells to use */ ++enum { MAX_MON_WIDTH = 5 }; ++/* In the unlikely event that the abmon[] storage is not big enough ++ an error message will be displayed, and we revert to using ++ unmodified abbreviated month names from the locale database. */ ++static char abmon[12][MAX_MON_WIDTH * 2 * MB_LEN_MAX + 1]; ++/* minimum width needed to align %b, 0 => don't use precomputed values. */ ++static size_t required_mon_width; ++ ++static size_t ++abmon_init (void) ++{ ++#ifdef HAVE_NL_LANGINFO ++ required_mon_width = MAX_MON_WIDTH; ++ size_t curr_max_width; ++ do ++ { ++ curr_max_width = required_mon_width; ++ required_mon_width = 0; ++ for (int i = 0; i < 12; i++) ++ { ++ size_t width = curr_max_width; ++ ++ size_t req = mbsalign (nl_langinfo (ABMON_1 + i), ++ abmon[i], sizeof (abmon[i]), ++ &width, MBS_ALIGN_LEFT, 0); ++ ++ if (req == (size_t) -1 || req >= sizeof (abmon[i])) ++ { ++ required_mon_width = 0; /* ignore precomputed strings. */ ++ return required_mon_width; ++ } ++ ++ required_mon_width = MAX (required_mon_width, width); ++ } ++ } ++ while (curr_max_width > required_mon_width); ++#endif ++ ++ return required_mon_width; ++} ++ ++static size_t ++dev_ino_hash (void const *x, size_t table_size) ++{ ++ struct dev_ino const *p = x; ++ return (uintmax_t) p->st_ino % table_size; ++} ++ ++static bool ++dev_ino_compare (void const *x, void const *y) ++{ ++ struct dev_ino const *a = x; ++ struct dev_ino const *b = y; ++ return SAME_INODE (*a, *b) ? true : false; ++} ++ ++static void ++dev_ino_free (void *x) ++{ ++ free (x); ++} ++ ++/* Add the device/inode pair (P->st_dev/P->st_ino) to the set of ++ active directories. Return true if there is already a matching ++ entry in the table. */ ++ ++static bool ++visit_dir (dev_t dev, ino_t ino) ++{ ++ struct dev_ino *ent; ++ struct dev_ino *ent_from_table; ++ bool found_match; ++ ++ ent = xmalloc (sizeof *ent); ++ ent->st_ino = ino; ++ ent->st_dev = dev; ++ ++ /* Attempt to insert this entry into the table. */ ++ ent_from_table = hash_insert (active_dir_set, ent); ++ ++ if (ent_from_table == NULL) ++ { ++ /* Insertion failed due to lack of memory. */ ++ xalloc_die (); ++ } ++ ++ found_match = (ent_from_table != ent); ++ ++ if (found_match) ++ { ++ /* ent was not inserted, so free it. */ ++ free (ent); ++ } ++ ++ return found_match; ++} ++ ++static void ++free_pending_ent (struct pending *p) ++{ ++ free (p->name); ++ free (p->realname); ++ free (p); ++} ++ ++static bool ++is_colored (enum indicator_no type) ++{ ++ size_t len = color_indicator[type].len; ++ char const *s = color_indicator[type].string; ++ return ! (len == 0 ++ || (len == 1 && strncmp (s, "0", 1) == 0) ++ || (len == 2 && strncmp (s, "00", 2) == 0)); ++} ++ ++static void ++restore_default_color (void) ++{ ++ put_indicator (&color_indicator[C_LEFT]); ++ put_indicator (&color_indicator[C_RIGHT]); ++} ++ ++/* An ordinary signal was received; arrange for the program to exit. */ ++ ++static void ++sighandler (int sig) ++{ ++ if (! SA_NOCLDSTOP) ++ signal (sig, SIG_IGN); ++ if (! interrupt_signal) ++ interrupt_signal = sig; ++} ++ ++/* A SIGTSTP was received; arrange for the program to suspend itself. */ ++ ++static void ++stophandler (int sig) ++{ ++ if (! SA_NOCLDSTOP) ++ signal (sig, stophandler); ++ if (! interrupt_signal) ++ stop_signal_count++; ++} ++ ++/* Process any pending signals. If signals are caught, this function ++ should be called periodically. Ideally there should never be an ++ unbounded amount of time when signals are not being processed. ++ Signal handling can restore the default colors, so callers must ++ immediately change colors after invoking this function. */ ++ ++static void ++process_signals (void) ++{ ++ while (interrupt_signal || stop_signal_count) ++ { ++ int sig; ++ int stops; ++ sigset_t oldset; ++ ++ if (used_color) ++ restore_default_color (); ++ fflush (stdout); ++ ++ sigprocmask (SIG_BLOCK, &caught_signals, &oldset); ++ ++ /* Reload interrupt_signal and stop_signal_count, in case a new ++ signal was handled before sigprocmask took effect. */ ++ sig = interrupt_signal; ++ stops = stop_signal_count; ++ ++ /* SIGTSTP is special, since the application can receive that signal ++ more than once. In this case, don't set the signal handler to the ++ default. Instead, just raise the uncatchable SIGSTOP. */ ++ if (stops) ++ { ++ stop_signal_count = stops - 1; ++ sig = SIGSTOP; ++ } ++ else ++ signal (sig, SIG_DFL); ++ ++ /* Exit or suspend the program. */ ++ raise (sig); ++ sigprocmask (SIG_SETMASK, &oldset, NULL); ++ ++ /* If execution reaches here, then the program has been ++ continued (after being suspended). */ ++ } ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ struct pending *thispend; ++ int n_files; ++ ++ /* The signals that are trapped, and the number of such signals. */ ++ static int const sig[] = ++ { ++ /* This one is handled specially. */ ++ SIGTSTP, ++ ++ /* The usual suspects. */ ++ SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, ++#ifdef SIGPOLL ++ SIGPOLL, ++#endif ++#ifdef SIGPROF ++ SIGPROF, ++#endif ++#ifdef SIGVTALRM ++ SIGVTALRM, ++#endif ++#ifdef SIGXCPU ++ SIGXCPU, ++#endif ++#ifdef SIGXFSZ ++ SIGXFSZ, ++#endif ++ }; ++ enum { nsigs = ARRAY_CARDINALITY (sig) }; ++ ++#if ! SA_NOCLDSTOP ++ bool caught_sig[nsigs]; ++#endif ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ initialize_exit_failure (LS_FAILURE); ++ atexit (close_stdout); ++ ++ assert (ARRAY_CARDINALITY (color_indicator) + 1 ++ == ARRAY_CARDINALITY (indicator_name)); ++ ++ exit_status = EXIT_SUCCESS; ++ print_dir_name = true; ++ pending_dirs = NULL; ++ ++ current_time.tv_sec = TYPE_MINIMUM (time_t); ++ current_time.tv_nsec = -1; ++ ++ i = decode_switches (argc, argv); ++ ++ if (print_with_color) ++ parse_ls_color (); ++ ++ /* Test print_with_color again, because the call to parse_ls_color ++ may have just reset it -- e.g., if LS_COLORS is invalid. */ ++ if (print_with_color) ++ { ++ /* Avoid following symbolic links when possible. */ ++ if (is_colored (C_ORPHAN) ++ || (is_colored (C_EXEC) && color_symlink_as_referent) ++ || (is_colored (C_MISSING) && format == long_format)) ++ check_symlink_color = true; ++ ++ /* If the standard output is a controlling terminal, watch out ++ for signals, so that the colors can be restored to the ++ default state if "ls" is suspended or interrupted. */ ++ ++ if (0 <= tcgetpgrp (STDOUT_FILENO)) ++ { ++ int j; ++#if SA_NOCLDSTOP ++ struct sigaction act; ++ ++ sigemptyset (&caught_signals); ++ for (j = 0; j < nsigs; j++) ++ { ++ sigaction (sig[j], NULL, &act); ++ if (act.sa_handler != SIG_IGN) ++ sigaddset (&caught_signals, sig[j]); ++ } ++ ++ act.sa_mask = caught_signals; ++ act.sa_flags = SA_RESTART; ++ ++ for (j = 0; j < nsigs; j++) ++ if (sigismember (&caught_signals, sig[j])) ++ { ++ act.sa_handler = sig[j] == SIGTSTP ? stophandler : sighandler; ++ sigaction (sig[j], &act, NULL); ++ } ++#else ++ for (j = 0; j < nsigs; j++) ++ { ++ caught_sig[j] = (signal (sig[j], SIG_IGN) != SIG_IGN); ++ if (caught_sig[j]) ++ { ++ signal (sig[j], sig[j] == SIGTSTP ? stophandler : sighandler); ++ siginterrupt (sig[j], 0); ++ } ++ } ++#endif ++ } ++ } ++ ++ if (dereference == DEREF_UNDEFINED) ++ dereference = ((immediate_dirs ++ || indicator_style == classify ++ || format == long_format) ++ ? DEREF_NEVER ++ : DEREF_COMMAND_LINE_SYMLINK_TO_DIR); ++ ++ /* When using -R, initialize a data structure we'll use to ++ detect any directory cycles. */ ++ if (recursive) ++ { ++ active_dir_set = hash_initialize (INITIAL_TABLE_SIZE, NULL, ++ dev_ino_hash, ++ dev_ino_compare, ++ dev_ino_free); ++ if (active_dir_set == NULL) ++ xalloc_die (); ++ ++ obstack_init (&dev_ino_obstack); ++ } ++ ++ format_needs_stat = sort_type == sort_time || sort_type == sort_size ++ || format == long_format ++ || print_scontext ++ || print_block_size; ++ format_needs_type = (! format_needs_stat ++ && (recursive ++ || print_with_color ++ || indicator_style != none ++ || directories_first)); ++ ++ if (dired) ++ { ++ obstack_init (&dired_obstack); ++ obstack_init (&subdired_obstack); ++ } ++ ++ cwd_n_alloc = 100; ++ cwd_file = xnmalloc (cwd_n_alloc, sizeof *cwd_file); ++ cwd_n_used = 0; ++ ++ clear_files (); ++ ++ n_files = argc - i; ++ ++ if (n_files <= 0) ++ { ++ if (immediate_dirs) ++ gobble_file (".", directory, NOT_AN_INODE_NUMBER, true, ""); ++ else ++ queue_directory (".", NULL, true); ++ } ++ else ++ do ++ gobble_file (argv[i++], unknown, NOT_AN_INODE_NUMBER, true, ""); ++ while (i < argc); ++ ++ if (cwd_n_used) ++ { ++ sort_files (); ++ if (!immediate_dirs) ++ extract_dirs_from_files (NULL, true); ++ /* `cwd_n_used' might be zero now. */ ++ } ++ ++ /* In the following if/else blocks, it is sufficient to test `pending_dirs' ++ (and not pending_dirs->name) because there may be no markers in the queue ++ at this point. A marker may be enqueued when extract_dirs_from_files is ++ called with a non-empty string or via print_dir. */ ++ if (cwd_n_used) ++ { ++ print_current_files (); ++ if (pending_dirs) ++ DIRED_PUTCHAR ('\n'); ++ } ++ else if (n_files <= 1 && pending_dirs && pending_dirs->next == 0) ++ print_dir_name = false; ++ ++ while (pending_dirs) ++ { ++ thispend = pending_dirs; ++ pending_dirs = pending_dirs->next; ++ ++ if (LOOP_DETECT) ++ { ++ if (thispend->name == NULL) ++ { ++ /* thispend->name == NULL means this is a marker entry ++ indicating we've finished processing the directory. ++ Use its dev/ino numbers to remove the corresponding ++ entry from the active_dir_set hash table. */ ++ struct dev_ino di = dev_ino_pop (); ++ struct dev_ino *found = hash_delete (active_dir_set, &di); ++ /* ASSERT_MATCHING_DEV_INO (thispend->realname, di); */ ++ assert (found); ++ dev_ino_free (found); ++ free_pending_ent (thispend); ++ continue; ++ } ++ } ++ ++ print_dir (thispend->name, thispend->realname, ++ thispend->command_line_arg); ++ ++ free_pending_ent (thispend); ++ print_dir_name = true; ++ } ++ ++ if (print_with_color) ++ { ++ int j; ++ ++ if (used_color) ++ restore_default_color (); ++ fflush (stdout); ++ ++ /* Restore the default signal handling. */ ++#if SA_NOCLDSTOP ++ for (j = 0; j < nsigs; j++) ++ if (sigismember (&caught_signals, sig[j])) ++ signal (sig[j], SIG_DFL); ++#else ++ for (j = 0; j < nsigs; j++) ++ if (caught_sig[j]) ++ signal (sig[j], SIG_DFL); ++#endif ++ ++ /* Act on any signals that arrived before the default was restored. ++ This can process signals out of order, but there doesn't seem to ++ be an easy way to do them in order, and the order isn't that ++ important anyway. */ ++ for (j = stop_signal_count; j; j--) ++ raise (SIGSTOP); ++ j = interrupt_signal; ++ if (j) ++ raise (j); ++ } ++ ++ if (dired) ++ { ++ /* No need to free these since we're about to exit. */ ++ dired_dump_obstack ("//DIRED//", &dired_obstack); ++ dired_dump_obstack ("//SUBDIRED//", &subdired_obstack); ++ printf ("//DIRED-OPTIONS// --quoting-style=%s\n", ++ quoting_style_args[get_quoting_style (filename_quoting_options)]); ++ } ++ ++ if (LOOP_DETECT) ++ { ++ assert (hash_get_n_entries (active_dir_set) == 0); ++ hash_free (active_dir_set); ++ } ++ ++ exit (exit_status); ++} ++ ++/* Set all the option flags according to the switches specified. ++ Return the index of the first non-option argument. */ ++ ++static int ++decode_switches (int argc, char **argv) ++{ ++ char *time_style_option = NULL; ++ ++ /* Record whether there is an option specifying sort type. */ ++ bool sort_type_specified = false; ++ ++ qmark_funny_chars = false; ++ ++ /* initialize all switches to default settings */ ++ ++ switch (ls_mode) ++ { ++ case LS_MULTI_COL: ++ /* This is for the `dir' program. */ ++ format = many_per_line; ++ set_quoting_style (NULL, escape_quoting_style); ++ break; ++ ++ case LS_LONG_FORMAT: ++ /* This is for the `vdir' program. */ ++ format = long_format; ++ set_quoting_style (NULL, escape_quoting_style); ++ break; ++ ++ case LS_LS: ++ /* This is for the `ls' program. */ ++ if (isatty (STDOUT_FILENO)) ++ { ++ format = many_per_line; ++ /* See description of qmark_funny_chars, above. */ ++ qmark_funny_chars = true; ++ } ++ else ++ { ++ format = one_per_line; ++ qmark_funny_chars = false; ++ } ++ break; ++ ++ default: ++ abort (); ++ } ++ ++ time_type = time_mtime; ++ sort_type = sort_name; ++ sort_reverse = false; ++ numeric_ids = false; ++ print_block_size = false; ++ indicator_style = none; ++ print_inode = false; ++ dereference = DEREF_UNDEFINED; ++ recursive = false; ++ immediate_dirs = false; ++ ignore_mode = IGNORE_DEFAULT; ++ ignore_patterns = NULL; ++ hide_patterns = NULL; ++ print_scontext = false; ++ ++ /* FIXME: put this in a function. */ ++ { ++ char const *q_style = getenv ("QUOTING_STYLE"); ++ if (q_style) ++ { ++ int i = ARGMATCH (q_style, quoting_style_args, quoting_style_vals); ++ if (0 <= i) ++ set_quoting_style (NULL, quoting_style_vals[i]); ++ else ++ error (0, 0, ++ _("ignoring invalid value of environment variable QUOTING_STYLE: %s"), ++ quotearg (q_style)); ++ } ++ } ++ ++ { ++ char const *ls_block_size = getenv ("LS_BLOCK_SIZE"); ++ human_options (ls_block_size, ++ &human_output_opts, &output_block_size); ++ if (ls_block_size || getenv ("BLOCK_SIZE")) ++ file_output_block_size = output_block_size; ++ } ++ ++ line_length = 80; ++ { ++ char const *p = getenv ("COLUMNS"); ++ if (p && *p) ++ { ++ unsigned long int tmp_ulong; ++ if (xstrtoul (p, NULL, 0, &tmp_ulong, NULL) == LONGINT_OK ++ && 0 < tmp_ulong && tmp_ulong <= SIZE_MAX) ++ { ++ line_length = tmp_ulong; ++ } ++ else ++ { ++ error (0, 0, ++ _("ignoring invalid width in environment variable COLUMNS: %s"), ++ quotearg (p)); ++ } ++ } ++ } ++ ++#ifdef TIOCGWINSZ ++ { ++ struct winsize ws; ++ ++ if (ioctl (STDOUT_FILENO, TIOCGWINSZ, &ws) != -1 ++ && 0 < ws.ws_col && ws.ws_col == (size_t) ws.ws_col) ++ line_length = ws.ws_col; ++ } ++#endif ++ ++ { ++ char const *p = getenv ("TABSIZE"); ++ tabsize = 8; ++ if (p) ++ { ++ unsigned long int tmp_ulong; ++ if (xstrtoul (p, NULL, 0, &tmp_ulong, NULL) == LONGINT_OK ++ && tmp_ulong <= SIZE_MAX) ++ { ++ tabsize = tmp_ulong; ++ } ++ else ++ { ++ error (0, 0, ++ _("ignoring invalid tab size in environment variable TABSIZE: %s"), ++ quotearg (p)); ++ } ++ } ++ } ++ ++ for (;;) ++ { ++ int oi = -1; ++ int c = getopt_long (argc, argv, ++ "abcdfghiklmnopqrstuvw:xABCDFGHI:LNQRST:UXZ1", ++ long_options, &oi); ++ if (c == -1) ++ break; ++ ++ switch (c) ++ { ++ case 'a': ++ ignore_mode = IGNORE_MINIMAL; ++ break; ++ ++ case 'b': ++ set_quoting_style (NULL, escape_quoting_style); ++ break; ++ ++ case 'c': ++ time_type = time_ctime; ++ break; ++ ++ case 'd': ++ immediate_dirs = true; ++ break; ++ ++ case 'f': ++ /* Same as enabling -a -U and disabling -l -s. */ ++ ignore_mode = IGNORE_MINIMAL; ++ sort_type = sort_none; ++ sort_type_specified = true; ++ /* disable -l */ ++ if (format == long_format) ++ format = (isatty (STDOUT_FILENO) ? many_per_line : one_per_line); ++ print_block_size = false; /* disable -s */ ++ print_with_color = false; /* disable --color */ ++ break; ++ ++ case FILE_TYPE_INDICATOR_OPTION: /* --file-type */ ++ indicator_style = file_type; ++ break; ++ ++ case 'g': ++ format = long_format; ++ print_owner = false; ++ break; ++ ++ case 'h': ++ human_output_opts = human_autoscale | human_SI | human_base_1024; ++ file_output_block_size = output_block_size = 1; ++ break; ++ ++ case 'i': ++ print_inode = true; ++ break; ++ ++ case 'k': ++ human_output_opts = 0; ++ file_output_block_size = output_block_size = 1024; ++ break; ++ ++ case 'l': ++ format = long_format; ++ break; ++ ++ case 'm': ++ format = with_commas; ++ break; ++ ++ case 'n': ++ numeric_ids = true; ++ format = long_format; ++ break; ++ ++ case 'o': /* Just like -l, but don't display group info. */ ++ format = long_format; ++ print_group = false; ++ break; ++ ++ case 'p': ++ indicator_style = slash; ++ break; ++ ++ case 'q': ++ qmark_funny_chars = true; ++ break; ++ ++ case 'r': ++ sort_reverse = true; ++ break; ++ ++ case 's': ++ print_block_size = true; ++ break; ++ ++ case 't': ++ sort_type = sort_time; ++ sort_type_specified = true; ++ break; ++ ++ case 'u': ++ time_type = time_atime; ++ break; ++ ++ case 'v': ++ sort_type = sort_version; ++ sort_type_specified = true; ++ break; ++ ++ case 'w': ++ { ++ unsigned long int tmp_ulong; ++ if (xstrtoul (optarg, NULL, 0, &tmp_ulong, NULL) != LONGINT_OK ++ || ! (0 < tmp_ulong && tmp_ulong <= SIZE_MAX)) ++ error (LS_FAILURE, 0, _("invalid line width: %s"), ++ quotearg (optarg)); ++ line_length = tmp_ulong; ++ break; ++ } ++ ++ case 'x': ++ format = horizontal; ++ break; ++ ++ case 'A': ++ if (ignore_mode == IGNORE_DEFAULT) ++ ignore_mode = IGNORE_DOT_AND_DOTDOT; ++ break; ++ ++ case 'B': ++ add_ignore_pattern ("*~"); ++ add_ignore_pattern (".*~"); ++ break; ++ ++ case 'C': ++ format = many_per_line; ++ break; ++ ++ case 'D': ++ dired = true; ++ break; ++ ++ case 'F': ++ indicator_style = classify; ++ break; ++ ++ case 'G': /* inhibit display of group info */ ++ print_group = false; ++ break; ++ ++ case 'H': ++ dereference = DEREF_COMMAND_LINE_ARGUMENTS; ++ break; ++ ++ case DEREFERENCE_COMMAND_LINE_SYMLINK_TO_DIR_OPTION: ++ dereference = DEREF_COMMAND_LINE_SYMLINK_TO_DIR; ++ break; ++ ++ case 'I': ++ add_ignore_pattern (optarg); ++ break; ++ ++ case 'L': ++ dereference = DEREF_ALWAYS; ++ break; ++ ++ case 'N': ++ set_quoting_style (NULL, literal_quoting_style); ++ break; ++ ++ case 'Q': ++ set_quoting_style (NULL, c_quoting_style); ++ break; ++ ++ case 'R': ++ recursive = true; ++ break; ++ ++ case 'S': ++ sort_type = sort_size; ++ sort_type_specified = true; ++ break; ++ ++ case 'T': ++ { ++ unsigned long int tmp_ulong; ++ if (xstrtoul (optarg, NULL, 0, &tmp_ulong, NULL) != LONGINT_OK ++ || SIZE_MAX < tmp_ulong) ++ error (LS_FAILURE, 0, _("invalid tab size: %s"), ++ quotearg (optarg)); ++ tabsize = tmp_ulong; ++ break; ++ } ++ ++ case 'U': ++ sort_type = sort_none; ++ sort_type_specified = true; ++ break; ++ ++ case 'X': ++ sort_type = sort_extension; ++ sort_type_specified = true; ++ break; ++ ++ case '1': ++ /* -1 has no effect after -l. */ ++ if (format != long_format) ++ format = one_per_line; ++ break; ++ ++ case AUTHOR_OPTION: ++ print_author = true; ++ break; ++ ++ case HIDE_OPTION: ++ { ++ struct ignore_pattern *hide = xmalloc (sizeof *hide); ++ hide->pattern = optarg; ++ hide->next = hide_patterns; ++ hide_patterns = hide; ++ } ++ break; ++ ++ case SORT_OPTION: ++ sort_type = XARGMATCH ("--sort", optarg, sort_args, sort_types); ++ sort_type_specified = true; ++ break; ++ ++ case GROUP_DIRECTORIES_FIRST_OPTION: ++ directories_first = true; ++ break; ++ ++ case TIME_OPTION: ++ time_type = XARGMATCH ("--time", optarg, time_args, time_types); ++ break; ++ ++ case FORMAT_OPTION: ++ format = XARGMATCH ("--format", optarg, format_args, format_types); ++ break; ++ ++ case FULL_TIME_OPTION: ++ format = long_format; ++ time_style_option = bad_cast ("full-iso"); ++ break; ++ ++ case COLOR_OPTION: ++ { ++ int i; ++ if (optarg) ++ i = XARGMATCH ("--color", optarg, color_args, color_types); ++ else ++ /* Using --color with no argument is equivalent to using ++ --color=always. */ ++ i = color_always; ++ ++ print_with_color = (i == color_always ++ || (i == color_if_tty ++ && isatty (STDOUT_FILENO))); ++ ++ if (print_with_color) ++ { ++ /* Don't use TAB characters in output. Some terminal ++ emulators can't handle the combination of tabs and ++ color codes on the same line. */ ++ tabsize = 0; ++ } ++ break; ++ } ++ ++ case INDICATOR_STYLE_OPTION: ++ indicator_style = XARGMATCH ("--indicator-style", optarg, ++ indicator_style_args, ++ indicator_style_types); ++ break; ++ ++ case QUOTING_STYLE_OPTION: ++ set_quoting_style (NULL, ++ XARGMATCH ("--quoting-style", optarg, ++ quoting_style_args, ++ quoting_style_vals)); ++ break; ++ ++ case TIME_STYLE_OPTION: ++ time_style_option = optarg; ++ break; ++ ++ case SHOW_CONTROL_CHARS_OPTION: ++ qmark_funny_chars = false; ++ break; ++ ++ case BLOCK_SIZE_OPTION: ++ { ++ enum strtol_error e = human_options (optarg, &human_output_opts, ++ &output_block_size); ++ if (e != LONGINT_OK) ++ xstrtol_fatal (e, oi, 0, long_options, optarg); ++ file_output_block_size = output_block_size; ++ } ++ break; ++ ++ case SI_OPTION: ++ human_output_opts = human_autoscale | human_SI; ++ file_output_block_size = output_block_size = 1; ++ break; ++ ++ case 'Z': ++ print_scontext = true; ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (LS_FAILURE); ++ } ++ } ++ ++ max_idx = MAX (1, line_length / MIN_COLUMN_WIDTH); ++ ++ filename_quoting_options = clone_quoting_options (NULL); ++ if (get_quoting_style (filename_quoting_options) == escape_quoting_style) ++ set_char_quoting (filename_quoting_options, ' ', 1); ++ if (file_type <= indicator_style) ++ { ++ char const *p; ++ for (p = "*=>@|" + indicator_style - file_type; *p; p++) ++ set_char_quoting (filename_quoting_options, *p, 1); ++ } ++ ++ dirname_quoting_options = clone_quoting_options (NULL); ++ set_char_quoting (dirname_quoting_options, ':', 1); ++ ++ /* --dired is meaningful only with --format=long (-l). ++ Otherwise, ignore it. FIXME: warn about this? ++ Alternatively, make --dired imply --format=long? */ ++ if (dired && format != long_format) ++ dired = false; ++ ++ /* If -c or -u is specified and not -l (or any other option that implies -l), ++ and no sort-type was specified, then sort by the ctime (-c) or atime (-u). ++ The behavior of ls when using either -c or -u but with neither -l nor -t ++ appears to be unspecified by POSIX. So, with GNU ls, `-u' alone means ++ sort by atime (this is the one that's not specified by the POSIX spec), ++ -lu means show atime and sort by name, -lut means show atime and sort ++ by atime. */ ++ ++ if ((time_type == time_ctime || time_type == time_atime) ++ && !sort_type_specified && format != long_format) ++ { ++ sort_type = sort_time; ++ } ++ ++ if (format == long_format) ++ { ++ char *style = time_style_option; ++ static char const posix_prefix[] = "posix-"; ++ ++ if (! style) ++ if (! (style = getenv ("TIME_STYLE"))) ++ style = bad_cast ("locale"); ++ ++ while (strncmp (style, posix_prefix, sizeof posix_prefix - 1) == 0) ++ { ++ if (! hard_locale (LC_TIME)) ++ return optind; ++ style += sizeof posix_prefix - 1; ++ } ++ ++ if (*style == '+') ++ { ++ char *p0 = style + 1; ++ char *p1 = strchr (p0, '\n'); ++ if (! p1) ++ p1 = p0; ++ else ++ { ++ if (strchr (p1 + 1, '\n')) ++ error (LS_FAILURE, 0, _("invalid time style format %s"), ++ quote (p0)); ++ *p1++ = '\0'; ++ } ++ long_time_format[0] = p0; ++ long_time_format[1] = p1; ++ } ++ else ++ switch (XARGMATCH ("time style", style, ++ time_style_args, ++ time_style_types)) ++ { ++ case full_iso_time_style: ++ long_time_format[0] = long_time_format[1] = ++ "%Y-%m-%d %H:%M:%S.%N %z"; ++ break; ++ ++ case long_iso_time_style: ++ case_long_iso_time_style: ++ long_time_format[0] = long_time_format[1] = "%Y-%m-%d %H:%M"; ++ break; ++ ++ case iso_time_style: ++ long_time_format[0] = "%Y-%m-%d "; ++ long_time_format[1] = "%m-%d %H:%M"; ++ break; ++ ++ case locale_time_style: ++ if (hard_locale (LC_TIME)) ++ { ++ /* Ensure that the locale has translations for both ++ formats. If not, fall back on long-iso format. */ ++ int i; ++ for (i = 0; i < 2; i++) ++ { ++ char const *locale_format = ++ dcgettext (NULL, long_time_format[i], LC_TIME); ++ if (locale_format == long_time_format[i]) ++ goto case_long_iso_time_style; ++ long_time_format[i] = locale_format; ++ } ++ } ++ } ++ /* Note we leave %5b etc. alone so user widths/flags are honored. */ ++ if (strstr (long_time_format[0],"%b") || strstr (long_time_format[1],"%b")) ++ if (!abmon_init ()) ++ error (0, 0, _("error initializing month strings")); ++ } ++ ++ return optind; ++} ++ ++/* Parse a string as part of the LS_COLORS variable; this may involve ++ decoding all kinds of escape characters. If equals_end is set an ++ unescaped equal sign ends the string, otherwise only a : or \0 ++ does. Set *OUTPUT_COUNT to the number of bytes output. Return ++ true if successful. ++ ++ The resulting string is *not* null-terminated, but may contain ++ embedded nulls. ++ ++ Note that both dest and src are char **; on return they point to ++ the first free byte after the array and the character that ended ++ the input string, respectively. */ ++ ++static bool ++get_funky_string (char **dest, const char **src, bool equals_end, ++ size_t *output_count) ++{ ++ char num; /* For numerical codes */ ++ size_t count; /* Something to count with */ ++ enum { ++ ST_GND, ST_BACKSLASH, ST_OCTAL, ST_HEX, ST_CARET, ST_END, ST_ERROR ++ } state; ++ const char *p; ++ char *q; ++ ++ p = *src; /* We don't want to double-indirect */ ++ q = *dest; /* the whole darn time. */ ++ ++ count = 0; /* No characters counted in yet. */ ++ num = 0; ++ ++ state = ST_GND; /* Start in ground state. */ ++ while (state < ST_END) ++ { ++ switch (state) ++ { ++ case ST_GND: /* Ground state (no escapes) */ ++ switch (*p) ++ { ++ case ':': ++ case '\0': ++ state = ST_END; /* End of string */ ++ break; ++ case '\\': ++ state = ST_BACKSLASH; /* Backslash scape sequence */ ++ ++p; ++ break; ++ case '^': ++ state = ST_CARET; /* Caret escape */ ++ ++p; ++ break; ++ case '=': ++ if (equals_end) ++ { ++ state = ST_END; /* End */ ++ break; ++ } ++ /* else fall through */ ++ default: ++ *(q++) = *(p++); ++ ++count; ++ break; ++ } ++ break; ++ ++ case ST_BACKSLASH: /* Backslash escaped character */ ++ switch (*p) ++ { ++ case '0': ++ case '1': ++ case '2': ++ case '3': ++ case '4': ++ case '5': ++ case '6': ++ case '7': ++ state = ST_OCTAL; /* Octal sequence */ ++ num = *p - '0'; ++ break; ++ case 'x': ++ case 'X': ++ state = ST_HEX; /* Hex sequence */ ++ num = 0; ++ break; ++ case 'a': /* Bell */ ++ num = '\a'; ++ break; ++ case 'b': /* Backspace */ ++ num = '\b'; ++ break; ++ case 'e': /* Escape */ ++ num = 27; ++ break; ++ case 'f': /* Form feed */ ++ num = '\f'; ++ break; ++ case 'n': /* Newline */ ++ num = '\n'; ++ break; ++ case 'r': /* Carriage return */ ++ num = '\r'; ++ break; ++ case 't': /* Tab */ ++ num = '\t'; ++ break; ++ case 'v': /* Vtab */ ++ num = '\v'; ++ break; ++ case '?': /* Delete */ ++ num = 127; ++ break; ++ case '_': /* Space */ ++ num = ' '; ++ break; ++ case '\0': /* End of string */ ++ state = ST_ERROR; /* Error! */ ++ break; ++ default: /* Escaped character like \ ^ : = */ ++ num = *p; ++ break; ++ } ++ if (state == ST_BACKSLASH) ++ { ++ *(q++) = num; ++ ++count; ++ state = ST_GND; ++ } ++ ++p; ++ break; ++ ++ case ST_OCTAL: /* Octal sequence */ ++ if (*p < '0' || *p > '7') ++ { ++ *(q++) = num; ++ ++count; ++ state = ST_GND; ++ } ++ else ++ num = (num << 3) + (*(p++) - '0'); ++ break; ++ ++ case ST_HEX: /* Hex sequence */ ++ switch (*p) ++ { ++ case '0': ++ case '1': ++ case '2': ++ case '3': ++ case '4': ++ case '5': ++ case '6': ++ case '7': ++ case '8': ++ case '9': ++ num = (num << 4) + (*(p++) - '0'); ++ break; ++ case 'a': ++ case 'b': ++ case 'c': ++ case 'd': ++ case 'e': ++ case 'f': ++ num = (num << 4) + (*(p++) - 'a') + 10; ++ break; ++ case 'A': ++ case 'B': ++ case 'C': ++ case 'D': ++ case 'E': ++ case 'F': ++ num = (num << 4) + (*(p++) - 'A') + 10; ++ break; ++ default: ++ *(q++) = num; ++ ++count; ++ state = ST_GND; ++ break; ++ } ++ break; ++ ++ case ST_CARET: /* Caret escape */ ++ state = ST_GND; /* Should be the next state... */ ++ if (*p >= '@' && *p <= '~') ++ { ++ *(q++) = *(p++) & 037; ++ ++count; ++ } ++ else if (*p == '?') ++ { ++ *(q++) = 127; ++ ++count; ++ } ++ else ++ state = ST_ERROR; ++ break; ++ ++ default: ++ abort (); ++ } ++ } ++ ++ *dest = q; ++ *src = p; ++ *output_count = count; ++ ++ return state != ST_ERROR; ++} ++ ++static void ++parse_ls_color (void) ++{ ++ const char *p; /* Pointer to character being parsed */ ++ char *buf; /* color_buf buffer pointer */ ++ int state; /* State of parser */ ++ int ind_no; /* Indicator number */ ++ char label[3]; /* Indicator label */ ++ struct color_ext_type *ext; /* Extension we are working on */ ++ ++ if ((p = getenv ("LS_COLORS")) == NULL || *p == '\0') ++ return; ++ ++ ext = NULL; ++ strcpy (label, "??"); ++ ++ /* This is an overly conservative estimate, but any possible ++ LS_COLORS string will *not* generate a color_buf longer than ++ itself, so it is a safe way of allocating a buffer in ++ advance. */ ++ buf = color_buf = xstrdup (p); ++ ++ state = 1; ++ while (state > 0) ++ { ++ switch (state) ++ { ++ case 1: /* First label character */ ++ switch (*p) ++ { ++ case ':': ++ ++p; ++ break; ++ ++ case '*': ++ /* Allocate new extension block and add to head of ++ linked list (this way a later definition will ++ override an earlier one, which can be useful for ++ having terminal-specific defs override global). */ ++ ++ ext = xmalloc (sizeof *ext); ++ ext->next = color_ext_list; ++ color_ext_list = ext; ++ ++ ++p; ++ ext->ext.string = buf; ++ ++ state = (get_funky_string (&buf, &p, true, &ext->ext.len) ++ ? 4 : -1); ++ break; ++ ++ case '\0': ++ state = 0; /* Done! */ ++ break; ++ ++ default: /* Assume it is file type label */ ++ label[0] = *(p++); ++ state = 2; ++ break; ++ } ++ break; ++ ++ case 2: /* Second label character */ ++ if (*p) ++ { ++ label[1] = *(p++); ++ state = 3; ++ } ++ else ++ state = -1; /* Error */ ++ break; ++ ++ case 3: /* Equal sign after indicator label */ ++ state = -1; /* Assume failure... */ ++ if (*(p++) == '=')/* It *should* be... */ ++ { ++ for (ind_no = 0; indicator_name[ind_no] != NULL; ++ind_no) ++ { ++ if (STREQ (label, indicator_name[ind_no])) ++ { ++ color_indicator[ind_no].string = buf; ++ state = (get_funky_string (&buf, &p, false, ++ &color_indicator[ind_no].len) ++ ? 1 : -1); ++ break; ++ } ++ } ++ if (state == -1) ++ error (0, 0, _("unrecognized prefix: %s"), quotearg (label)); ++ } ++ break; ++ ++ case 4: /* Equal sign after *.ext */ ++ if (*(p++) == '=') ++ { ++ ext->seq.string = buf; ++ state = (get_funky_string (&buf, &p, false, &ext->seq.len) ++ ? 1 : -1); ++ } ++ else ++ state = -1; ++ break; ++ } ++ } ++ ++ if (state < 0) ++ { ++ struct color_ext_type *e; ++ struct color_ext_type *e2; ++ ++ error (0, 0, ++ _("unparsable value for LS_COLORS environment variable")); ++ free (color_buf); ++ for (e = color_ext_list; e != NULL; /* empty */) ++ { ++ e2 = e; ++ e = e->next; ++ free (e2); ++ } ++ print_with_color = false; ++ } ++ ++ if (color_indicator[C_LINK].len == 6 ++ && !strncmp (color_indicator[C_LINK].string, "target", 6)) ++ color_symlink_as_referent = true; ++} ++ ++/* Set the exit status to report a failure. If SERIOUS, it is a ++ serious failure; otherwise, it is merely a minor problem. */ ++ ++static void ++set_exit_status (bool serious) ++{ ++ if (serious) ++ exit_status = LS_FAILURE; ++ else if (exit_status == EXIT_SUCCESS) ++ exit_status = LS_MINOR_PROBLEM; ++} ++ ++/* Assuming a failure is serious if SERIOUS, use the printf-style ++ MESSAGE to report the failure to access a file named FILE. Assume ++ errno is set appropriately for the failure. */ ++ ++static void ++file_failure (bool serious, char const *message, char const *file) ++{ ++ error (0, errno, message, quotearg_colon (file)); ++ set_exit_status (serious); ++} ++ ++/* Request that the directory named NAME have its contents listed later. ++ If REALNAME is nonzero, it will be used instead of NAME when the ++ directory name is printed. This allows symbolic links to directories ++ to be treated as regular directories but still be listed under their ++ real names. NAME == NULL is used to insert a marker entry for the ++ directory named in REALNAME. ++ If NAME is non-NULL, we use its dev/ino information to save ++ a call to stat -- when doing a recursive (-R) traversal. ++ COMMAND_LINE_ARG means this directory was mentioned on the command line. */ ++ ++static void ++queue_directory (char const *name, char const *realname, bool command_line_arg) ++{ ++ struct pending *new = xmalloc (sizeof *new); ++ new->realname = realname ? xstrdup (realname) : NULL; ++ new->name = name ? xstrdup (name) : NULL; ++ new->command_line_arg = command_line_arg; ++ new->next = pending_dirs; ++ pending_dirs = new; ++} ++ ++/* Read directory NAME, and list the files in it. ++ If REALNAME is nonzero, print its name instead of NAME; ++ this is used for symbolic links to directories. ++ COMMAND_LINE_ARG means this directory was mentioned on the command line. */ ++ ++static void ++print_dir (char const *name, char const *realname, bool command_line_arg) ++{ ++ DIR *dirp; ++ struct dirent *next; ++ uintmax_t total_blocks = 0; ++ static bool first = true; ++ ++ errno = 0; ++ dirp = opendir (name); ++ if (!dirp) ++ { ++ file_failure (command_line_arg, _("cannot open directory %s"), name); ++ return; ++ } ++ ++ if (LOOP_DETECT) ++ { ++ struct stat dir_stat; ++ int fd = dirfd (dirp); ++ ++ /* If dirfd failed, endure the overhead of using stat. */ ++ if ((0 <= fd ++ ? fstat (fd, &dir_stat) ++ : stat (name, &dir_stat)) < 0) ++ { ++ file_failure (command_line_arg, ++ _("cannot determine device and inode of %s"), name); ++ closedir (dirp); ++ return; ++ } ++ ++ /* If we've already visited this dev/inode pair, warn that ++ we've found a loop, and do not process this directory. */ ++ if (visit_dir (dir_stat.st_dev, dir_stat.st_ino)) ++ { ++ error (0, 0, _("%s: not listing already-listed directory"), ++ quotearg_colon (name)); ++ closedir (dirp); ++ set_exit_status (true); ++ return; ++ } ++ ++ DEV_INO_PUSH (dir_stat.st_dev, dir_stat.st_ino); ++ } ++ ++ if (recursive || print_dir_name) ++ { ++ if (!first) ++ DIRED_PUTCHAR ('\n'); ++ first = false; ++ DIRED_INDENT (); ++ PUSH_CURRENT_DIRED_POS (&subdired_obstack); ++ dired_pos += quote_name (stdout, realname ? realname : name, ++ dirname_quoting_options, NULL); ++ PUSH_CURRENT_DIRED_POS (&subdired_obstack); ++ DIRED_FPUTS_LITERAL (":\n", stdout); ++ } ++ ++ /* Read the directory entries, and insert the subfiles into the `cwd_file' ++ table. */ ++ ++ clear_files (); ++ ++ while (1) ++ { ++ /* Set errno to zero so we can distinguish between a readdir failure ++ and when readdir simply finds that there are no more entries. */ ++ errno = 0; ++ next = readdir (dirp); ++ if (next) ++ { ++ if (! file_ignored (next->d_name)) ++ { ++ enum filetype type = unknown; ++ ++#if HAVE_STRUCT_DIRENT_D_TYPE ++ switch (next->d_type) ++ { ++ case DT_BLK: type = blockdev; break; ++ case DT_CHR: type = chardev; break; ++ case DT_DIR: type = directory; break; ++ case DT_FIFO: type = fifo; break; ++ case DT_LNK: type = symbolic_link; break; ++ case DT_REG: type = normal; break; ++ case DT_SOCK: type = sock; break; ++# ifdef DT_WHT ++ case DT_WHT: type = whiteout; break; ++# endif ++ } ++#endif ++ total_blocks += gobble_file (next->d_name, type, ++ RELIABLE_D_INO (next), ++ false, name); ++ ++ /* In this narrow case, print out each name right away, so ++ ls uses constant memory while processing the entries of ++ this directory. Useful when there are many (millions) ++ of entries in a directory. */ ++ if (format == one_per_line && sort_type == sort_none ++ && !print_block_size && !recursive) ++ { ++ /* We must call sort_files in spite of ++ "sort_type == sort_none" for its initialization ++ of the sorted_file vector. */ ++ sort_files (); ++ print_current_files (); ++ clear_files (); ++ } ++ } ++ } ++ else if (errno != 0) ++ { ++ file_failure (command_line_arg, _("reading directory %s"), name); ++ if (errno != EOVERFLOW) ++ break; ++ } ++ else ++ break; ++ } ++ ++ if (closedir (dirp) != 0) ++ { ++ file_failure (command_line_arg, _("closing directory %s"), name); ++ /* Don't return; print whatever we got. */ ++ } ++ ++ /* Sort the directory contents. */ ++ sort_files (); ++ ++ /* If any member files are subdirectories, perhaps they should have their ++ contents listed rather than being mentioned here as files. */ ++ ++ if (recursive) ++ extract_dirs_from_files (name, command_line_arg); ++ ++ if (format == long_format || print_block_size) ++ { ++ const char *p; ++ char buf[LONGEST_HUMAN_READABLE + 1]; ++ ++ DIRED_INDENT (); ++ p = _("total"); ++ DIRED_FPUTS (p, stdout, strlen (p)); ++ DIRED_PUTCHAR (' '); ++ p = human_readable (total_blocks, buf, human_output_opts, ++ ST_NBLOCKSIZE, output_block_size); ++ DIRED_FPUTS (p, stdout, strlen (p)); ++ DIRED_PUTCHAR ('\n'); ++ } ++ ++ if (cwd_n_used) ++ print_current_files (); ++} ++ ++/* Add `pattern' to the list of patterns for which files that match are ++ not listed. */ ++ ++static void ++add_ignore_pattern (const char *pattern) ++{ ++ struct ignore_pattern *ignore; ++ ++ ignore = xmalloc (sizeof *ignore); ++ ignore->pattern = pattern; ++ /* Add it to the head of the linked list. */ ++ ignore->next = ignore_patterns; ++ ignore_patterns = ignore; ++} ++ ++/* Return true if one of the PATTERNS matches FILE. */ ++ ++static bool ++patterns_match (struct ignore_pattern const *patterns, char const *file) ++{ ++ struct ignore_pattern const *p; ++ for (p = patterns; p; p = p->next) ++ if (fnmatch (p->pattern, file, FNM_PERIOD) == 0) ++ return true; ++ return false; ++} ++ ++/* Return true if FILE should be ignored. */ ++ ++static bool ++file_ignored (char const *name) ++{ ++ return ((ignore_mode != IGNORE_MINIMAL ++ && name[0] == '.' ++ && (ignore_mode == IGNORE_DEFAULT || ! name[1 + (name[1] == '.')])) ++ || (ignore_mode == IGNORE_DEFAULT ++ && patterns_match (hide_patterns, name)) ++ || patterns_match (ignore_patterns, name)); ++} ++ ++/* POSIX requires that a file size be printed without a sign, even ++ when negative. Assume the typical case where negative sizes are ++ actually positive values that have wrapped around. */ ++ ++static uintmax_t ++unsigned_file_size (off_t size) ++{ ++ return size + (size < 0) * ((uintmax_t) OFF_T_MAX - OFF_T_MIN + 1); ++} ++ ++/* Enter and remove entries in the table `cwd_file'. */ ++ ++/* Empty the table of files. */ ++ ++static void ++clear_files (void) ++{ ++ size_t i; ++ ++ for (i = 0; i < cwd_n_used; i++) ++ { ++ struct fileinfo *f = sorted_file[i]; ++ free (f->name); ++ free (f->linkname); ++ if (f->scontext != UNKNOWN_SECURITY_CONTEXT) ++ freecon (f->scontext); ++ } ++ ++ cwd_n_used = 0; ++ any_has_acl = false; ++ inode_number_width = 0; ++ block_size_width = 0; ++ nlink_width = 0; ++ owner_width = 0; ++ group_width = 0; ++ author_width = 0; ++ scontext_width = 0; ++ major_device_number_width = 0; ++ minor_device_number_width = 0; ++ file_size_width = 0; ++} ++ ++/* Add a file to the current table of files. ++ Verify that the file exists, and print an error message if it does not. ++ Return the number of blocks that the file occupies. */ ++ ++static uintmax_t ++gobble_file (char const *name, enum filetype type, ino_t inode, ++ bool command_line_arg, char const *dirname) ++{ ++ uintmax_t blocks = 0; ++ struct fileinfo *f; ++ ++ /* An inode value prior to gobble_file necessarily came from readdir, ++ which is not used for command line arguments. */ ++ assert (! command_line_arg || inode == NOT_AN_INODE_NUMBER); ++ ++ if (cwd_n_used == cwd_n_alloc) ++ { ++ cwd_file = xnrealloc (cwd_file, cwd_n_alloc, 2 * sizeof *cwd_file); ++ cwd_n_alloc *= 2; ++ } ++ ++ f = &cwd_file[cwd_n_used]; ++ memset (f, '\0', sizeof *f); ++ f->stat.st_ino = inode; ++ f->filetype = type; ++ ++ if (command_line_arg ++ || format_needs_stat ++ /* When coloring a directory (we may know the type from ++ direct.d_type), we have to stat it in order to indicate ++ sticky and/or other-writable attributes. */ ++ || (type == directory && print_with_color) ++ /* When dereferencing symlinks, the inode and type must come from ++ stat, but readdir provides the inode and type of lstat. */ ++ || ((print_inode || format_needs_type) ++ && (type == symbolic_link || type == unknown) ++ && (dereference == DEREF_ALWAYS ++ || (command_line_arg && dereference != DEREF_NEVER) ++ || color_symlink_as_referent || check_symlink_color)) ++ /* Command line dereferences are already taken care of by the above ++ assertion that the inode number is not yet known. */ ++ || (print_inode && inode == NOT_AN_INODE_NUMBER) ++ || (format_needs_type ++ && (type == unknown || command_line_arg ++ /* --indicator-style=classify (aka -F) ++ requires that we stat each regular file ++ to see if it's executable. */ ++ || (type == normal && (indicator_style == classify ++ /* This is so that --color ends up ++ highlighting files with the executable ++ bit set even when options like -F are ++ not specified. */ ++ || (print_with_color ++ && is_colored (C_EXEC)) ++ ))))) ++ ++ { ++ /* Absolute name of this file. */ ++ char *absolute_name; ++ bool do_deref; ++ int err; ++ ++ if (name[0] == '/' || dirname[0] == 0) ++ absolute_name = (char *) name; ++ else ++ { ++ absolute_name = alloca (strlen (name) + strlen (dirname) + 2); ++ attach (absolute_name, dirname, name); ++ } ++ ++ switch (dereference) ++ { ++ case DEREF_ALWAYS: ++ err = stat (absolute_name, &f->stat); ++ do_deref = true; ++ break; ++ ++ case DEREF_COMMAND_LINE_ARGUMENTS: ++ case DEREF_COMMAND_LINE_SYMLINK_TO_DIR: ++ if (command_line_arg) ++ { ++ bool need_lstat; ++ err = stat (absolute_name, &f->stat); ++ do_deref = true; ++ ++ if (dereference == DEREF_COMMAND_LINE_ARGUMENTS) ++ break; ++ ++ need_lstat = (err < 0 ++ ? errno == ENOENT ++ : ! S_ISDIR (f->stat.st_mode)); ++ if (!need_lstat) ++ break; ++ ++ /* stat failed because of ENOENT, maybe indicating a dangling ++ symlink. Or stat succeeded, ABSOLUTE_NAME does not refer to a ++ directory, and --dereference-command-line-symlink-to-dir is ++ in effect. Fall through so that we call lstat instead. */ ++ } ++ ++ default: /* DEREF_NEVER */ ++ err = lstat (absolute_name, &f->stat); ++ do_deref = false; ++ break; ++ } ++ ++ if (err != 0) ++ { ++ /* Failure to stat a command line argument leads to ++ an exit status of 2. For other files, stat failure ++ provokes an exit status of 1. */ ++ file_failure (command_line_arg, ++ _("cannot access %s"), absolute_name); ++ if (command_line_arg) ++ return 0; ++ ++ f->name = xstrdup (name); ++ cwd_n_used++; ++ ++ return 0; ++ } ++ ++ f->stat_ok = true; ++ ++ if (format == long_format || print_scontext) ++ { ++ bool have_selinux = false; ++ bool have_acl = false; ++ int attr_len = (do_deref ++ ? getfilecon (absolute_name, &f->scontext) ++ : lgetfilecon (absolute_name, &f->scontext)); ++ err = (attr_len < 0); ++ ++ /* Contrary to its documented API, getfilecon may return 0, ++ yet set f->scontext to NULL (on at least Debian's libselinux1 ++ 2.0.15-2+b1), so work around that bug. ++ FIXME: remove this work-around in 2011, or whenever affected ++ versions of libselinux are long gone. */ ++ if (attr_len == 0) ++ { ++ err = 0; ++ f->scontext = xstrdup ("unlabeled"); ++ } ++ ++ if (err == 0) ++ have_selinux = ! STREQ ("unlabeled", f->scontext); ++ else ++ { ++ f->scontext = UNKNOWN_SECURITY_CONTEXT; ++ ++ /* When requesting security context information, don't make ++ ls fail just because the file (even a command line argument) ++ isn't on the right type of file system. I.e., a getfilecon ++ failure isn't in the same class as a stat failure. */ ++ if (errno == ENOTSUP || errno == EOPNOTSUPP || errno == ENODATA) ++ err = 0; ++ } ++ ++ if (err == 0 && format == long_format) ++ { ++ int n = file_has_acl (absolute_name, &f->stat); ++ err = (n < 0); ++ have_acl = (0 < n); ++ } ++ ++ f->acl_type = (!have_selinux && !have_acl ++ ? ACL_T_NONE ++ : (have_selinux && !have_acl ++ ? ACL_T_SELINUX_ONLY ++ : ACL_T_YES)); ++ any_has_acl |= f->acl_type != ACL_T_NONE; ++ ++ if (err) ++ error (0, errno, "%s", quotearg_colon (absolute_name)); ++ } ++ ++ if (S_ISLNK (f->stat.st_mode) ++ && (format == long_format || check_symlink_color)) ++ { ++ char *linkname; ++ struct stat linkstats; ++ ++ get_link_name (absolute_name, f, command_line_arg); ++ linkname = make_link_name (absolute_name, f->linkname); ++ ++ /* Avoid following symbolic links when possible, ie, when ++ they won't be traced and when no indicator is needed. */ ++ if (linkname ++ && (file_type <= indicator_style || check_symlink_color) ++ && stat (linkname, &linkstats) == 0) ++ { ++ f->linkok = true; ++ ++ /* Symbolic links to directories that are mentioned on the ++ command line are automatically traced if not being ++ listed as files. */ ++ if (!command_line_arg || format == long_format ++ || !S_ISDIR (linkstats.st_mode)) ++ { ++ /* Get the linked-to file's mode for the filetype indicator ++ in long listings. */ ++ f->linkmode = linkstats.st_mode; ++ } ++ } ++ free (linkname); ++ } ++ ++ /* When not distinguishing types of symlinks, pretend we know that ++ it is stat'able, so that it will be colored as a regular symlink, ++ and not as an orphan. */ ++ if (S_ISLNK (f->stat.st_mode) && !check_symlink_color) ++ f->linkok = true; ++ ++ if (S_ISLNK (f->stat.st_mode)) ++ f->filetype = symbolic_link; ++ else if (S_ISDIR (f->stat.st_mode)) ++ { ++ if (command_line_arg && !immediate_dirs) ++ f->filetype = arg_directory; ++ else ++ f->filetype = directory; ++ } ++ else ++ f->filetype = normal; ++ ++ blocks = ST_NBLOCKS (f->stat); ++ if (format == long_format || print_block_size) ++ { ++ char buf[LONGEST_HUMAN_READABLE + 1]; ++ int len = mbswidth (human_readable (blocks, buf, human_output_opts, ++ ST_NBLOCKSIZE, output_block_size), ++ 0); ++ if (block_size_width < len) ++ block_size_width = len; ++ } ++ ++ if (format == long_format) ++ { ++ if (print_owner) ++ { ++ int len = format_user_width (f->stat.st_uid); ++ if (owner_width < len) ++ owner_width = len; ++ } ++ ++ if (print_group) ++ { ++ int len = format_group_width (f->stat.st_gid); ++ if (group_width < len) ++ group_width = len; ++ } ++ ++ if (print_author) ++ { ++ int len = format_user_width (f->stat.st_author); ++ if (author_width < len) ++ author_width = len; ++ } ++ } ++ ++ if (print_scontext) ++ { ++ int len = strlen (f->scontext); ++ if (scontext_width < len) ++ scontext_width = len; ++ } ++ ++ if (format == long_format) ++ { ++ char b[INT_BUFSIZE_BOUND (uintmax_t)]; ++ int b_len = strlen (umaxtostr (f->stat.st_nlink, b)); ++ if (nlink_width < b_len) ++ nlink_width = b_len; ++ ++ if (S_ISCHR (f->stat.st_mode) || S_ISBLK (f->stat.st_mode)) ++ { ++ char buf[INT_BUFSIZE_BOUND (uintmax_t)]; ++ int len = strlen (umaxtostr (major (f->stat.st_rdev), buf)); ++ if (major_device_number_width < len) ++ major_device_number_width = len; ++ len = strlen (umaxtostr (minor (f->stat.st_rdev), buf)); ++ if (minor_device_number_width < len) ++ minor_device_number_width = len; ++ len = major_device_number_width + 2 + minor_device_number_width; ++ if (file_size_width < len) ++ file_size_width = len; ++ } ++ else ++ { ++ char buf[LONGEST_HUMAN_READABLE + 1]; ++ uintmax_t size = unsigned_file_size (f->stat.st_size); ++ int len = mbswidth (human_readable (size, buf, human_output_opts, ++ 1, file_output_block_size), ++ 0); ++ if (file_size_width < len) ++ file_size_width = len; ++ } ++ } ++ } ++ ++ if (print_inode) ++ { ++ char buf[INT_BUFSIZE_BOUND (uintmax_t)]; ++ int len = strlen (umaxtostr (f->stat.st_ino, buf)); ++ if (inode_number_width < len) ++ inode_number_width = len; ++ } ++ ++ f->name = xstrdup (name); ++ cwd_n_used++; ++ ++ return blocks; ++} ++ ++/* Return true if F refers to a directory. */ ++static bool ++is_directory (const struct fileinfo *f) ++{ ++ return f->filetype == directory || f->filetype == arg_directory; ++} ++ ++/* Put the name of the file that FILENAME is a symbolic link to ++ into the LINKNAME field of `f'. COMMAND_LINE_ARG indicates whether ++ FILENAME is a command-line argument. */ ++ ++static void ++get_link_name (char const *filename, struct fileinfo *f, bool command_line_arg) ++{ ++ f->linkname = areadlink_with_size (filename, f->stat.st_size); ++ if (f->linkname == NULL) ++ file_failure (command_line_arg, _("cannot read symbolic link %s"), ++ filename); ++} ++ ++/* If `linkname' is a relative name and `name' contains one or more ++ leading directories, return `linkname' with those directories ++ prepended; otherwise, return a copy of `linkname'. ++ If `linkname' is zero, return zero. */ ++ ++static char * ++make_link_name (char const *name, char const *linkname) ++{ ++ char *linkbuf; ++ size_t bufsiz; ++ ++ if (!linkname) ++ return NULL; ++ ++ if (*linkname == '/') ++ return xstrdup (linkname); ++ ++ /* The link is to a relative name. Prepend any leading directory ++ in `name' to the link name. */ ++ linkbuf = strrchr (name, '/'); ++ if (linkbuf == 0) ++ return xstrdup (linkname); ++ ++ bufsiz = linkbuf - name + 1; ++ linkbuf = xmalloc (bufsiz + strlen (linkname) + 1); ++ strncpy (linkbuf, name, bufsiz); ++ strcpy (linkbuf + bufsiz, linkname); ++ return linkbuf; ++} ++ ++/* Return true if the last component of NAME is `.' or `..' ++ This is so we don't try to recurse on `././././. ...' */ ++ ++static bool ++basename_is_dot_or_dotdot (const char *name) ++{ ++ char const *base = last_component (name); ++ return dot_or_dotdot (base); ++} ++ ++/* Remove any entries from CWD_FILE that are for directories, ++ and queue them to be listed as directories instead. ++ DIRNAME is the prefix to prepend to each dirname ++ to make it correct relative to ls's working dir; ++ if it is null, no prefix is needed and "." and ".." should not be ignored. ++ If COMMAND_LINE_ARG is true, this directory was mentioned at the top level, ++ This is desirable when processing directories recursively. */ ++ ++static void ++extract_dirs_from_files (char const *dirname, bool command_line_arg) ++{ ++ size_t i; ++ size_t j; ++ bool ignore_dot_and_dot_dot = (dirname != NULL); ++ ++ if (dirname && LOOP_DETECT) ++ { ++ /* Insert a marker entry first. When we dequeue this marker entry, ++ we'll know that DIRNAME has been processed and may be removed ++ from the set of active directories. */ ++ queue_directory (NULL, dirname, false); ++ } ++ ++ /* Queue the directories last one first, because queueing reverses the ++ order. */ ++ for (i = cwd_n_used; i-- != 0; ) ++ { ++ struct fileinfo *f = sorted_file[i]; ++ ++ if (is_directory (f) ++ && (! ignore_dot_and_dot_dot ++ || ! basename_is_dot_or_dotdot (f->name))) ++ { ++ if (!dirname || f->name[0] == '/') ++ queue_directory (f->name, f->linkname, command_line_arg); ++ else ++ { ++ char *name = file_name_concat (dirname, f->name, NULL); ++ queue_directory (name, f->linkname, command_line_arg); ++ free (name); ++ } ++ if (f->filetype == arg_directory) ++ free (f->name); ++ } ++ } ++ ++ /* Now delete the directories from the table, compacting all the remaining ++ entries. */ ++ ++ for (i = 0, j = 0; i < cwd_n_used; i++) ++ { ++ struct fileinfo *f = sorted_file[i]; ++ sorted_file[j] = f; ++ j += (f->filetype != arg_directory); ++ } ++ cwd_n_used = j; ++} ++ ++/* Use strcoll to compare strings in this locale. If an error occurs, ++ report an error and longjmp to failed_strcoll. */ ++ ++static jmp_buf failed_strcoll; ++ ++static int ++xstrcoll (char const *a, char const *b) ++{ ++ int diff; ++ errno = 0; ++ diff = strcoll (a, b); ++ if (errno) ++ { ++ error (0, errno, _("cannot compare file names %s and %s"), ++ quote_n (0, a), quote_n (1, b)); ++ set_exit_status (false); ++ longjmp (failed_strcoll, 1); ++ } ++ return diff; ++} ++ ++/* Comparison routines for sorting the files. */ ++ ++typedef void const *V; ++typedef int (*qsortFunc)(V a, V b); ++ ++/* Used below in DEFINE_SORT_FUNCTIONS for _df_ sort function variants. ++ The do { ... } while(0) makes it possible to use the macro more like ++ a statement, without violating C89 rules: */ ++#define DIRFIRST_CHECK(a, b) \ ++ do \ ++ { \ ++ bool a_is_dir = is_directory ((struct fileinfo const *) a); \ ++ bool b_is_dir = is_directory ((struct fileinfo const *) b); \ ++ if (a_is_dir && !b_is_dir) \ ++ return -1; /* a goes before b */ \ ++ if (!a_is_dir && b_is_dir) \ ++ return 1; /* b goes before a */ \ ++ } \ ++ while (0) ++ ++/* Define the 8 different sort function variants required for each sortkey. ++ KEY_NAME is a token describing the sort key, e.g., ctime, atime, size. ++ KEY_CMP_FUNC is a function to compare records based on that key, e.g., ++ ctime_cmp, atime_cmp, size_cmp. Append KEY_NAME to the string, ++ '[rev_][x]str{cmp|coll}[_df]_', to create each function name. */ ++#define DEFINE_SORT_FUNCTIONS(key_name, key_cmp_func) \ ++ /* direct, non-dirfirst versions */ \ ++ static int xstrcoll_##key_name (V a, V b) \ ++ { return key_cmp_func (a, b, xstrcoll); } \ ++ static int strcmp_##key_name (V a, V b) \ ++ { return key_cmp_func (a, b, strcmp); } \ ++ \ ++ /* reverse, non-dirfirst versions */ \ ++ static int rev_xstrcoll_##key_name (V a, V b) \ ++ { return key_cmp_func (b, a, xstrcoll); } \ ++ static int rev_strcmp_##key_name (V a, V b) \ ++ { return key_cmp_func (b, a, strcmp); } \ ++ \ ++ /* direct, dirfirst versions */ \ ++ static int xstrcoll_df_##key_name (V a, V b) \ ++ { DIRFIRST_CHECK (a, b); return key_cmp_func (a, b, xstrcoll); } \ ++ static int strcmp_df_##key_name (V a, V b) \ ++ { DIRFIRST_CHECK (a, b); return key_cmp_func (a, b, strcmp); } \ ++ \ ++ /* reverse, dirfirst versions */ \ ++ static int rev_xstrcoll_df_##key_name (V a, V b) \ ++ { DIRFIRST_CHECK (a, b); return key_cmp_func (b, a, xstrcoll); } \ ++ static int rev_strcmp_df_##key_name (V a, V b) \ ++ { DIRFIRST_CHECK (a, b); return key_cmp_func (b, a, strcmp); } ++ ++static inline int ++cmp_ctime (struct fileinfo const *a, struct fileinfo const *b, ++ int (*cmp) (char const *, char const *)) ++{ ++ int diff = timespec_cmp (get_stat_ctime (&b->stat), ++ get_stat_ctime (&a->stat)); ++ return diff ? diff : cmp (a->name, b->name); ++} ++ ++static inline int ++cmp_mtime (struct fileinfo const *a, struct fileinfo const *b, ++ int (*cmp) (char const *, char const *)) ++{ ++ int diff = timespec_cmp (get_stat_mtime (&b->stat), ++ get_stat_mtime (&a->stat)); ++ return diff ? diff : cmp (a->name, b->name); ++} ++ ++static inline int ++cmp_atime (struct fileinfo const *a, struct fileinfo const *b, ++ int (*cmp) (char const *, char const *)) ++{ ++ int diff = timespec_cmp (get_stat_atime (&b->stat), ++ get_stat_atime (&a->stat)); ++ return diff ? diff : cmp (a->name, b->name); ++} ++ ++static inline int ++cmp_size (struct fileinfo const *a, struct fileinfo const *b, ++ int (*cmp) (char const *, char const *)) ++{ ++ int diff = longdiff (b->stat.st_size, a->stat.st_size); ++ return diff ? diff : cmp (a->name, b->name); ++} ++ ++static inline int ++cmp_name (struct fileinfo const *a, struct fileinfo const *b, ++ int (*cmp) (char const *, char const *)) ++{ ++ return cmp (a->name, b->name); ++} ++ ++/* Compare file extensions. Files with no extension are `smallest'. ++ If extensions are the same, compare by filenames instead. */ ++ ++static inline int ++cmp_extension (struct fileinfo const *a, struct fileinfo const *b, ++ int (*cmp) (char const *, char const *)) ++{ ++ char const *base1 = strrchr (a->name, '.'); ++ char const *base2 = strrchr (b->name, '.'); ++ int diff = cmp (base1 ? base1 : "", base2 ? base2 : ""); ++ return diff ? diff : cmp (a->name, b->name); ++} ++ ++DEFINE_SORT_FUNCTIONS (ctime, cmp_ctime) ++DEFINE_SORT_FUNCTIONS (mtime, cmp_mtime) ++DEFINE_SORT_FUNCTIONS (atime, cmp_atime) ++DEFINE_SORT_FUNCTIONS (size, cmp_size) ++DEFINE_SORT_FUNCTIONS (name, cmp_name) ++DEFINE_SORT_FUNCTIONS (extension, cmp_extension) ++ ++/* Compare file versions. ++ Unlike all other compare functions above, cmp_version depends only ++ on filevercmp, which does not fail (even for locale reasons), and does not ++ need a secondary sort key. See lib/filevercmp.h for function description. ++ ++ All the other sort options, in fact, need xstrcoll and strcmp variants, ++ because they all use a string comparison (either as the primary or secondary ++ sort key), and xstrcoll has the ability to do a longjmp if strcoll fails for ++ locale reasons. Last, strverscmp is ALWAYS available in coreutils, ++ thanks to the gnulib library. */ ++static inline int ++cmp_version (struct fileinfo const *a, struct fileinfo const *b) ++{ ++ return filevercmp (a->name, b->name); ++} ++ ++static int xstrcoll_version (V a, V b) ++{ return cmp_version (a, b); } ++static int rev_xstrcoll_version (V a, V b) ++{ return cmp_version (b, a); } ++static int xstrcoll_df_version (V a, V b) ++{ DIRFIRST_CHECK (a, b); return cmp_version (a, b); } ++static int rev_xstrcoll_df_version (V a, V b) ++{ DIRFIRST_CHECK (a, b); return cmp_version (b, a); } ++ ++ ++/* We have 2^3 different variants for each sortkey function ++ (for 3 independent sort modes). ++ The function pointers stored in this array must be dereferenced as: ++ ++ sort_variants[sort_key][use_strcmp][reverse][dirs_first] ++ ++ Note that the order in which sortkeys are listed in the function pointer ++ array below is defined by the order of the elements in the time_type and ++ sort_type enums! */ ++ ++#define LIST_SORTFUNCTION_VARIANTS(key_name) \ ++ { \ ++ { \ ++ { xstrcoll_##key_name, xstrcoll_df_##key_name }, \ ++ { rev_xstrcoll_##key_name, rev_xstrcoll_df_##key_name }, \ ++ }, \ ++ { \ ++ { strcmp_##key_name, strcmp_df_##key_name }, \ ++ { rev_strcmp_##key_name, rev_strcmp_df_##key_name }, \ ++ } \ ++ } ++ ++static qsortFunc const sort_functions[][2][2][2] = ++ { ++ LIST_SORTFUNCTION_VARIANTS (name), ++ LIST_SORTFUNCTION_VARIANTS (extension), ++ LIST_SORTFUNCTION_VARIANTS (size), ++ ++ { ++ { ++ { xstrcoll_version, xstrcoll_df_version }, ++ { rev_xstrcoll_version, rev_xstrcoll_df_version }, ++ }, ++ ++ /* We use NULL for the strcmp variants of version comparison ++ since as explained in cmp_version definition, version comparison ++ does not rely on xstrcoll, so it will never longjmp, and never ++ need to try the strcmp fallback. */ ++ { ++ { NULL, NULL }, ++ { NULL, NULL }, ++ } ++ }, ++ ++ /* last are time sort functions */ ++ LIST_SORTFUNCTION_VARIANTS (mtime), ++ LIST_SORTFUNCTION_VARIANTS (ctime), ++ LIST_SORTFUNCTION_VARIANTS (atime) ++ }; ++ ++/* The number of sortkeys is calculated as ++ the number of elements in the sort_type enum (i.e. sort_numtypes) + ++ the number of elements in the time_type enum (i.e. time_numtypes) - 1 ++ This is because when sort_type==sort_time, we have up to ++ time_numtypes possible sortkeys. ++ ++ This line verifies at compile-time that the array of sort functions has been ++ initialized for all possible sortkeys. */ ++verify (ARRAY_CARDINALITY (sort_functions) ++ == sort_numtypes + time_numtypes - 1 ); ++ ++/* Set up SORTED_FILE to point to the in-use entries in CWD_FILE, in order. */ ++ ++static void ++initialize_ordering_vector (void) ++{ ++ size_t i; ++ for (i = 0; i < cwd_n_used; i++) ++ sorted_file[i] = &cwd_file[i]; ++} ++ ++/* Sort the files now in the table. */ ++ ++static void ++sort_files (void) ++{ ++ bool use_strcmp; ++ ++ if (sorted_file_alloc < cwd_n_used + cwd_n_used / 2) ++ { ++ free (sorted_file); ++ sorted_file = xnmalloc (cwd_n_used, 3 * sizeof *sorted_file); ++ sorted_file_alloc = 3 * cwd_n_used; ++ } ++ ++ initialize_ordering_vector (); ++ ++ if (sort_type == sort_none) ++ return; ++ ++ /* Try strcoll. If it fails, fall back on strcmp. We can't safely ++ ignore strcoll failures, as a failing strcoll might be a ++ comparison function that is not a total order, and if we ignored ++ the failure this might cause qsort to dump core. */ ++ ++ if (! setjmp (failed_strcoll)) ++ use_strcmp = false; /* strcoll() succeeded */ ++ else ++ { ++ use_strcmp = true; ++ assert (sort_type != sort_version); ++ initialize_ordering_vector (); ++ } ++ ++ /* When sort_type == sort_time, use time_type as subindex. */ ++ mpsort ((void const **) sorted_file, cwd_n_used, ++ sort_functions[sort_type + (sort_type == sort_time ? time_type : 0)] ++ [use_strcmp][sort_reverse] ++ [directories_first]); ++} ++ ++/* List all the files now in the table. */ ++ ++static void ++print_current_files (void) ++{ ++ size_t i; ++ ++ switch (format) ++ { ++ case one_per_line: ++ for (i = 0; i < cwd_n_used; i++) ++ { ++ print_file_name_and_frills (sorted_file[i], 0); ++ putchar ('\n'); ++ } ++ break; ++ ++ case many_per_line: ++ print_many_per_line (); ++ break; ++ ++ case horizontal: ++ print_horizontal (); ++ break; ++ ++ case with_commas: ++ print_with_commas (); ++ break; ++ ++ case long_format: ++ for (i = 0; i < cwd_n_used; i++) ++ { ++ print_long_format (sorted_file[i]); ++ DIRED_PUTCHAR ('\n'); ++ } ++ break; ++ } ++} ++ ++/* Replace the first %b with precomputed aligned month names. ++ Note on glibc-2.7 at least, this speeds up the whole `ls -lU` ++ process by around 17%, compared to letting strftime() handle the %b. */ ++ ++static size_t ++align_nstrftime (char *buf, size_t size, char const *fmt, struct tm const *tm, ++ int __utc, int __ns) ++{ ++ const char *nfmt = fmt; ++ /* In the unlikely event that rpl_fmt below is not large enough, ++ the replacement is not done. A malloc here slows ls down by 2% */ ++ char rpl_fmt[sizeof (abmon[0]) + 100]; ++ const char *pb; ++ if (required_mon_width && (pb = strstr (fmt, "%b"))) ++ { ++ if (strlen (fmt) < (sizeof (rpl_fmt) - sizeof (abmon[0]) + 2)) ++ { ++ char *pfmt = rpl_fmt; ++ nfmt = rpl_fmt; ++ ++ pfmt = mempcpy (pfmt, fmt, pb - fmt); ++ pfmt = stpcpy (pfmt, abmon[tm->tm_mon]); ++ strcpy (pfmt, pb + 2); ++ } ++ } ++ size_t ret = nstrftime (buf, size, nfmt, tm, __utc, __ns); ++ return ret; ++} ++ ++/* Return the expected number of columns in a long-format time stamp, ++ or zero if it cannot be calculated. */ ++ ++static int ++long_time_expected_width (void) ++{ ++ static int width = -1; ++ ++ if (width < 0) ++ { ++ time_t epoch = 0; ++ struct tm const *tm = localtime (&epoch); ++ char buf[TIME_STAMP_LEN_MAXIMUM + 1]; ++ ++ /* In case you're wondering if localtime can fail with an input time_t ++ value of 0, let's just say it's very unlikely, but not inconceivable. ++ The TZ environment variable would have to specify a time zone that ++ is 2**31-1900 years or more ahead of UTC. This could happen only on ++ a 64-bit system that blindly accepts e.g., TZ=UTC+20000000000000. ++ However, this is not possible with Solaris 10 or glibc-2.3.5, since ++ their implementations limit the offset to 167:59 and 24:00, resp. */ ++ if (tm) ++ { ++ size_t len = ++ align_nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0); ++ if (len != 0) ++ width = mbsnwidth (buf, len, 0); ++ } ++ ++ if (width < 0) ++ width = 0; ++ } ++ ++ return width; ++} ++ ++/* Print the user or group name NAME, with numeric id ID, using a ++ print width of WIDTH columns. */ ++ ++static void ++format_user_or_group (char const *name, unsigned long int id, int width) ++{ ++ size_t len; ++ ++ if (name) ++ { ++ int width_gap = width - mbswidth (name, 0); ++ int pad = MAX (0, width_gap); ++ fputs (name, stdout); ++ len = strlen (name) + pad; ++ ++ do ++ putchar (' '); ++ while (pad--); ++ } ++ else ++ { ++ printf ("%*lu ", width, id); ++ len = width; ++ } ++ ++ dired_pos += len + 1; ++} ++ ++/* Print the name or id of the user with id U, using a print width of ++ WIDTH. */ ++ ++static void ++format_user (uid_t u, int width, bool stat_ok) ++{ ++ format_user_or_group (! stat_ok ? "?" : ++ (numeric_ids ? NULL : getuser (u)), u, width); ++} ++ ++/* Likewise, for groups. */ ++ ++static void ++format_group (gid_t g, int width, bool stat_ok) ++{ ++ format_user_or_group (! stat_ok ? "?" : ++ (numeric_ids ? NULL : getgroup (g)), g, width); ++} ++ ++/* Return the number of columns that format_user_or_group will print. */ ++ ++static int ++format_user_or_group_width (char const *name, unsigned long int id) ++{ ++ if (name) ++ { ++ int len = mbswidth (name, 0); ++ return MAX (0, len); ++ } ++ else ++ { ++ char buf[INT_BUFSIZE_BOUND (unsigned long int)]; ++ sprintf (buf, "%lu", id); ++ return strlen (buf); ++ } ++} ++ ++/* Return the number of columns that format_user will print. */ ++ ++static int ++format_user_width (uid_t u) ++{ ++ return format_user_or_group_width (numeric_ids ? NULL : getuser (u), u); ++} ++ ++/* Likewise, for groups. */ ++ ++static int ++format_group_width (gid_t g) ++{ ++ return format_user_or_group_width (numeric_ids ? NULL : getgroup (g), g); ++} ++ ++/* Return a pointer to a formatted version of F->stat.st_ino, ++ possibly using buffer, BUF, of length BUFLEN, which must be at least ++ INT_BUFSIZE_BOUND (uintmax_t) bytes. */ ++static char * ++format_inode (char *buf, size_t buflen, const struct fileinfo *f) ++{ ++ assert (INT_BUFSIZE_BOUND (uintmax_t) <= buflen); ++ return (f->stat_ok && f->stat.st_ino != NOT_AN_INODE_NUMBER ++ ? umaxtostr (f->stat.st_ino, buf) ++ : (char *) "?"); ++} ++ ++/* Print information about F in long format. */ ++static void ++print_long_format (const struct fileinfo *f) ++{ ++ char modebuf[12]; ++ char buf ++ [LONGEST_HUMAN_READABLE + 1 /* inode */ ++ + LONGEST_HUMAN_READABLE + 1 /* size in blocks */ ++ + sizeof (modebuf) - 1 + 1 /* mode string */ ++ + INT_BUFSIZE_BOUND (uintmax_t) /* st_nlink */ ++ + LONGEST_HUMAN_READABLE + 2 /* major device number */ ++ + LONGEST_HUMAN_READABLE + 1 /* minor device number */ ++ + TIME_STAMP_LEN_MAXIMUM + 1 /* max length of time/date */ ++ ]; ++ size_t s; ++ char *p; ++ struct timespec when_timespec; ++ struct tm *when_local; ++ ++ /* Compute the mode string, except remove the trailing space if no ++ file in this directory has an ACL or SELinux security context. */ ++ if (f->stat_ok) ++ filemodestring (&f->stat, modebuf); ++ else ++ { ++ modebuf[0] = filetype_letter[f->filetype]; ++ memset (modebuf + 1, '?', 10); ++ modebuf[11] = '\0'; ++ } ++ if (! any_has_acl) ++ modebuf[10] = '\0'; ++ else if (f->acl_type == ACL_T_SELINUX_ONLY) ++ modebuf[10] = '.'; ++ else if (f->acl_type == ACL_T_YES) ++ modebuf[10] = '+'; ++ ++ switch (time_type) ++ { ++ case time_ctime: ++ when_timespec = get_stat_ctime (&f->stat); ++ break; ++ case time_mtime: ++ when_timespec = get_stat_mtime (&f->stat); ++ break; ++ case time_atime: ++ when_timespec = get_stat_atime (&f->stat); ++ break; ++ default: ++ abort (); ++ } ++ ++ p = buf; ++ ++ if (print_inode) ++ { ++ char hbuf[INT_BUFSIZE_BOUND (uintmax_t)]; ++ sprintf (p, "%*s ", inode_number_width, ++ format_inode (hbuf, sizeof hbuf, f)); ++ /* Increment by strlen (p) here, rather than by inode_number_width + 1. ++ The latter is wrong when inode_number_width is zero. */ ++ p += strlen (p); ++ } ++ ++ if (print_block_size) ++ { ++ char hbuf[LONGEST_HUMAN_READABLE + 1]; ++ char const *blocks = ++ (! f->stat_ok ++ ? "?" ++ : human_readable (ST_NBLOCKS (f->stat), hbuf, human_output_opts, ++ ST_NBLOCKSIZE, output_block_size)); ++ int pad; ++ for (pad = block_size_width - mbswidth (blocks, 0); 0 < pad; pad--) ++ *p++ = ' '; ++ while ((*p++ = *blocks++)) ++ continue; ++ p[-1] = ' '; ++ } ++ ++ /* The last byte of the mode string is the POSIX ++ "optional alternate access method flag". */ ++ { ++ char hbuf[INT_BUFSIZE_BOUND (uintmax_t)]; ++ sprintf (p, "%s %*s ", modebuf, nlink_width, ++ ! f->stat_ok ? "?" : umaxtostr (f->stat.st_nlink, hbuf)); ++ } ++ /* Increment by strlen (p) here, rather than by, e.g., ++ sizeof modebuf - 2 + any_has_acl + 1 + nlink_width + 1. ++ The latter is wrong when nlink_width is zero. */ ++ p += strlen (p); ++ ++ DIRED_INDENT (); ++ ++ if (print_owner || print_group || print_author || print_scontext) ++ { ++ DIRED_FPUTS (buf, stdout, p - buf); ++ ++ if (print_owner) ++ format_user (f->stat.st_uid, owner_width, f->stat_ok); ++ ++ if (print_group) ++ format_group (f->stat.st_gid, group_width, f->stat_ok); ++ ++ if (print_author) ++ format_user (f->stat.st_author, author_width, f->stat_ok); ++ ++ if (print_scontext) ++ format_user_or_group (f->scontext, 0, scontext_width); ++ ++ p = buf; ++ } ++ ++ if (f->stat_ok ++ && (S_ISCHR (f->stat.st_mode) || S_ISBLK (f->stat.st_mode))) ++ { ++ char majorbuf[INT_BUFSIZE_BOUND (uintmax_t)]; ++ char minorbuf[INT_BUFSIZE_BOUND (uintmax_t)]; ++ int blanks_width = (file_size_width ++ - (major_device_number_width + 2 ++ + minor_device_number_width)); ++ sprintf (p, "%*s, %*s ", ++ major_device_number_width + MAX (0, blanks_width), ++ umaxtostr (major (f->stat.st_rdev), majorbuf), ++ minor_device_number_width, ++ umaxtostr (minor (f->stat.st_rdev), minorbuf)); ++ p += file_size_width + 1; ++ } ++ else ++ { ++ char hbuf[LONGEST_HUMAN_READABLE + 1]; ++ char const *size = ++ (! f->stat_ok ++ ? "?" ++ : human_readable (unsigned_file_size (f->stat.st_size), ++ hbuf, human_output_opts, 1, file_output_block_size)); ++ int pad; ++ for (pad = file_size_width - mbswidth (size, 0); 0 < pad; pad--) ++ *p++ = ' '; ++ while ((*p++ = *size++)) ++ continue; ++ p[-1] = ' '; ++ } ++ ++ when_local = localtime (&when_timespec.tv_sec); ++ s = 0; ++ *p = '\1'; ++ ++ if (f->stat_ok && when_local) ++ { ++ struct timespec six_months_ago; ++ bool recent; ++ char const *fmt; ++ ++ /* If the file appears to be in the future, update the current ++ time, in case the file happens to have been modified since ++ the last time we checked the clock. */ ++ if (timespec_cmp (current_time, when_timespec) < 0) ++ { ++ /* Note that gettime may call gettimeofday which, on some non- ++ compliant systems, clobbers the buffer used for localtime's result. ++ But it's ok here, because we use a gettimeofday wrapper that ++ saves and restores the buffer around the gettimeofday call. */ ++ gettime (¤t_time); ++ } ++ ++ /* Consider a time to be recent if it is within the past six ++ months. A Gregorian year has 365.2425 * 24 * 60 * 60 == ++ 31556952 seconds on the average. Write this value as an ++ integer constant to avoid floating point hassles. */ ++ six_months_ago.tv_sec = current_time.tv_sec - 31556952 / 2; ++ six_months_ago.tv_nsec = current_time.tv_nsec; ++ ++ recent = (timespec_cmp (six_months_ago, when_timespec) < 0 ++ && (timespec_cmp (when_timespec, current_time) < 0)); ++ fmt = long_time_format[recent]; ++ ++ /* We assume here that all time zones are offset from UTC by a ++ whole number of seconds. */ ++ s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt, ++ when_local, 0, when_timespec.tv_nsec); ++ } ++ ++ if (s || !*p) ++ { ++ p += s; ++ *p++ = ' '; ++ ++ /* NUL-terminate the string -- fputs (via DIRED_FPUTS) requires it. */ ++ *p = '\0'; ++ } ++ else ++ { ++ /* The time cannot be converted using the desired format, so ++ print it as a huge integer number of seconds. */ ++ char hbuf[INT_BUFSIZE_BOUND (intmax_t)]; ++ sprintf (p, "%*s ", long_time_expected_width (), ++ (! f->stat_ok ++ ? "?" ++ : timetostr (when_timespec.tv_sec, hbuf))); ++ /* FIXME: (maybe) We discarded when_timespec.tv_nsec. */ ++ p += strlen (p); ++ } ++ ++ DIRED_FPUTS (buf, stdout, p - buf); ++ size_t w = print_name_with_quoting (f->name, FILE_OR_LINK_MODE (f), f->linkok, ++ f->stat_ok, f->filetype, &dired_obstack, ++ f->stat.st_nlink, p - buf); ++ ++ if (f->filetype == symbolic_link) ++ { ++ if (f->linkname) ++ { ++ DIRED_FPUTS_LITERAL (" -> ", stdout); ++ print_name_with_quoting (f->linkname, f->linkmode, f->linkok - 1, ++ f->stat_ok, f->filetype, NULL, ++ f->stat.st_nlink, (p - buf) + w + 4); ++ if (indicator_style != none) ++ print_type_indicator (true, f->linkmode, unknown); ++ } ++ } ++ else if (indicator_style != none) ++ print_type_indicator (f->stat_ok, f->stat.st_mode, f->filetype); ++} ++ ++/* Output to OUT a quoted representation of the file name NAME, ++ using OPTIONS to control quoting. Produce no output if OUT is NULL. ++ Store the number of screen columns occupied by NAME's quoted ++ representation into WIDTH, if non-NULL. Return the number of bytes ++ produced. */ ++ ++static size_t ++quote_name (FILE *out, const char *name, struct quoting_options const *options, ++ size_t *width) ++{ ++ char smallbuf[BUFSIZ]; ++ size_t len = quotearg_buffer (smallbuf, sizeof smallbuf, name, -1, options); ++ char *buf; ++ size_t displayed_width IF_LINT (= 0); ++ ++ if (len < sizeof smallbuf) ++ buf = smallbuf; ++ else ++ { ++ buf = alloca (len + 1); ++ quotearg_buffer (buf, len + 1, name, -1, options); ++ } ++ ++ if (qmark_funny_chars) ++ { ++ if (MB_CUR_MAX > 1) ++ { ++ char const *p = buf; ++ char const *plimit = buf + len; ++ char *q = buf; ++ displayed_width = 0; ++ ++ while (p < plimit) ++ switch (*p) ++ { ++ case ' ': case '!': case '"': case '#': case '%': ++ case '&': case '\'': case '(': case ')': case '*': ++ case '+': case ',': case '-': case '.': case '/': ++ case '0': case '1': case '2': case '3': case '4': ++ case '5': case '6': case '7': case '8': case '9': ++ case ':': case ';': case '<': case '=': case '>': ++ case '?': ++ case 'A': case 'B': case 'C': case 'D': case 'E': ++ case 'F': case 'G': case 'H': case 'I': case 'J': ++ case 'K': case 'L': case 'M': case 'N': case 'O': ++ case 'P': case 'Q': case 'R': case 'S': case 'T': ++ case 'U': case 'V': case 'W': case 'X': case 'Y': ++ case 'Z': ++ case '[': case '\\': case ']': case '^': case '_': ++ case 'a': case 'b': case 'c': case 'd': case 'e': ++ case 'f': case 'g': case 'h': case 'i': case 'j': ++ case 'k': case 'l': case 'm': case 'n': case 'o': ++ case 'p': case 'q': case 'r': case 's': case 't': ++ case 'u': case 'v': case 'w': case 'x': case 'y': ++ case 'z': case '{': case '|': case '}': case '~': ++ /* These characters are printable ASCII characters. */ ++ *q++ = *p++; ++ displayed_width += 1; ++ break; ++ default: ++ /* If we have a multibyte sequence, copy it until we ++ reach its end, replacing each non-printable multibyte ++ character with a single question mark. */ ++ { ++ DECLARE_ZEROED_AGGREGATE (mbstate_t, mbstate); ++ do ++ { ++ wchar_t wc; ++ size_t bytes; ++ int w; ++ ++ bytes = mbrtowc (&wc, p, plimit - p, &mbstate); ++ ++ if (bytes == (size_t) -1) ++ { ++ /* An invalid multibyte sequence was ++ encountered. Skip one input byte, and ++ put a question mark. */ ++ p++; ++ *q++ = '?'; ++ displayed_width += 1; ++ break; ++ } ++ ++ if (bytes == (size_t) -2) ++ { ++ /* An incomplete multibyte character ++ at the end. Replace it entirely with ++ a question mark. */ ++ p = plimit; ++ *q++ = '?'; ++ displayed_width += 1; ++ break; ++ } ++ ++ if (bytes == 0) ++ /* A null wide character was encountered. */ ++ bytes = 1; ++ ++ w = wcwidth (wc); ++ if (w >= 0) ++ { ++ /* A printable multibyte character. ++ Keep it. */ ++ for (; bytes > 0; --bytes) ++ *q++ = *p++; ++ displayed_width += w; ++ } ++ else ++ { ++ /* An unprintable multibyte character. ++ Replace it entirely with a question ++ mark. */ ++ p += bytes; ++ *q++ = '?'; ++ displayed_width += 1; ++ } ++ } ++ while (! mbsinit (&mbstate)); ++ } ++ break; ++ } ++ ++ /* The buffer may have shrunk. */ ++ len = q - buf; ++ } ++ else ++ { ++ char *p = buf; ++ char const *plimit = buf + len; ++ ++ while (p < plimit) ++ { ++ if (! isprint (to_uchar (*p))) ++ *p = '?'; ++ p++; ++ } ++ displayed_width = len; ++ } ++ } ++ else if (width != NULL) ++ { ++ if (MB_CUR_MAX > 1) ++ displayed_width = mbsnwidth (buf, len, 0); ++ else ++ { ++ char const *p = buf; ++ char const *plimit = buf + len; ++ ++ displayed_width = 0; ++ while (p < plimit) ++ { ++ if (isprint (to_uchar (*p))) ++ displayed_width++; ++ p++; ++ } ++ } ++ } ++ ++ if (out != NULL) ++ fwrite (buf, 1, len, out); ++ if (width != NULL) ++ *width = displayed_width; ++ return len; ++} ++ ++static size_t ++print_name_with_quoting (const char *p, mode_t mode, int linkok, ++ bool stat_ok, enum filetype type, ++ struct obstack *stack, nlink_t nlink, ++ size_t start_col) ++{ ++ bool used_color_this_time ++ = (print_with_color ++ && print_color_indicator (p, mode, linkok, stat_ok, type, nlink)); ++ ++ if (stack) ++ PUSH_CURRENT_DIRED_POS (stack); ++ ++ size_t width = quote_name (stdout, p, filename_quoting_options, NULL); ++ dired_pos += width; ++ ++ if (stack) ++ PUSH_CURRENT_DIRED_POS (stack); ++ ++ if (used_color_this_time) ++ { ++ process_signals (); ++ prep_non_filename_text (); ++ if (start_col / line_length != (start_col + width - 1) / line_length) ++ put_indicator (&color_indicator[C_CLR_TO_EOL]); ++ } ++ ++ return width; ++} ++ ++static void ++prep_non_filename_text (void) ++{ ++ if (color_indicator[C_END].string != NULL) ++ put_indicator (&color_indicator[C_END]); ++ else ++ { ++ put_indicator (&color_indicator[C_LEFT]); ++ put_indicator (&color_indicator[C_RESET]); ++ put_indicator (&color_indicator[C_RIGHT]); ++ } ++} ++ ++/* Print the file name of `f' with appropriate quoting. ++ Also print file size, inode number, and filetype indicator character, ++ as requested by switches. */ ++ ++static size_t ++print_file_name_and_frills (const struct fileinfo *f, size_t start_col) ++{ ++ char buf[MAX (LONGEST_HUMAN_READABLE + 1, INT_BUFSIZE_BOUND (uintmax_t))]; ++ ++ if (print_inode) ++ printf ("%*s ", format == with_commas ? 0 : inode_number_width, ++ format_inode (buf, sizeof buf, f)); ++ ++ if (print_block_size) ++ printf ("%*s ", format == with_commas ? 0 : block_size_width, ++ ! f->stat_ok ? "?" ++ : human_readable (ST_NBLOCKS (f->stat), buf, human_output_opts, ++ ST_NBLOCKSIZE, output_block_size)); ++ ++ if (print_scontext) ++ printf ("%*s ", format == with_commas ? 0 : scontext_width, f->scontext); ++ ++ size_t width = print_name_with_quoting (f->name, FILE_OR_LINK_MODE (f), ++ f->linkok, f->stat_ok, f->filetype, ++ NULL, f->stat.st_nlink, start_col); ++ ++ if (indicator_style != none) ++ width += print_type_indicator (f->stat_ok, f->stat.st_mode, f->filetype); ++ ++ return width; ++} ++ ++/* Given these arguments describing a file, return the single-byte ++ type indicator, or 0. */ ++static char ++get_type_indicator (bool stat_ok, mode_t mode, enum filetype type) ++{ ++ char c; ++ ++ if (stat_ok ? S_ISREG (mode) : type == normal) ++ { ++ if (stat_ok && indicator_style == classify && (mode & S_IXUGO)) ++ c = '*'; ++ else ++ c = 0; ++ } ++ else ++ { ++ if (stat_ok ? S_ISDIR (mode) : type == directory || type == arg_directory) ++ c = '/'; ++ else if (indicator_style == slash) ++ c = 0; ++ else if (stat_ok ? S_ISLNK (mode) : type == symbolic_link) ++ c = '@'; ++ else if (stat_ok ? S_ISFIFO (mode) : type == fifo) ++ c = '|'; ++ else if (stat_ok ? S_ISSOCK (mode) : type == sock) ++ c = '='; ++ else if (stat_ok && S_ISDOOR (mode)) ++ c = '>'; ++ else ++ c = 0; ++ } ++ return c; ++} ++ ++static bool ++print_type_indicator (bool stat_ok, mode_t mode, enum filetype type) ++{ ++ char c = get_type_indicator (stat_ok, mode, type); ++ if (c) ++ DIRED_PUTCHAR (c); ++ return !!c; ++} ++ ++#ifdef HAVE_CAP ++/* Return true if NAME has a capability (see linux/capability.h) */ ++static bool ++has_capability (char const *name) ++{ ++ char *result; ++ bool has_cap; ++ ++ cap_t cap_d = cap_get_file (name); ++ if (cap_d == NULL) ++ return false; ++ ++ result = cap_to_text (cap_d, NULL); ++ cap_free (cap_d); ++ if (!result) ++ return false; ++ ++ /* check if human-readable capability string is empty */ ++ has_cap = !!*result; ++ ++ cap_free (result); ++ return has_cap; ++} ++#else ++static bool ++has_capability (char const *name ATTRIBUTE_UNUSED) ++{ ++ return false; ++} ++#endif ++ ++/* Returns whether any color sequence was printed. */ ++static bool ++print_color_indicator (const char *name, mode_t mode, int linkok, ++ bool stat_ok, enum filetype filetype, ++ nlink_t nlink) ++{ ++ enum indicator_no type; ++ struct color_ext_type *ext; /* Color extension */ ++ size_t len; /* Length of name */ ++ ++ /* Is this a nonexistent file? If so, linkok == -1. */ ++ ++ if (linkok == -1 && color_indicator[C_MISSING].string != NULL) ++ type = C_MISSING; ++ else if (! stat_ok) ++ { ++ static enum indicator_no filetype_indicator[] = FILETYPE_INDICATORS; ++ type = filetype_indicator[filetype]; ++ } ++ else ++ { ++ if (S_ISREG (mode)) ++ { ++ type = C_FILE; ++ ++ if ((mode & S_ISUID) != 0 && is_colored (C_SETUID)) ++ type = C_SETUID; ++ else if ((mode & S_ISGID) != 0 && is_colored (C_SETGID)) ++ type = C_SETGID; ++ /* has_capability() called second for performance. */ ++ else if (is_colored (C_CAP) && has_capability (name)) ++ type = C_CAP; ++ else if ((mode & S_IXUGO) != 0 && is_colored (C_EXEC)) ++ type = C_EXEC; ++ else if ((1 < nlink) && is_colored (C_MULTIHARDLINK)) ++ type = C_MULTIHARDLINK; ++ } ++ else if (S_ISDIR (mode)) ++ { ++ type = C_DIR; ++ ++ if ((mode & S_ISVTX) && (mode & S_IWOTH) ++ && is_colored (C_STICKY_OTHER_WRITABLE)) ++ type = C_STICKY_OTHER_WRITABLE; ++ else if ((mode & S_IWOTH) != 0 && is_colored (C_OTHER_WRITABLE)) ++ type = C_OTHER_WRITABLE; ++ else if ((mode & S_ISVTX) != 0 && is_colored (C_STICKY)) ++ type = C_STICKY; ++ } ++ else if (S_ISLNK (mode)) ++ type = ((!linkok && color_indicator[C_ORPHAN].string) ++ ? C_ORPHAN : C_LINK); ++ else if (S_ISFIFO (mode)) ++ type = C_FIFO; ++ else if (S_ISSOCK (mode)) ++ type = C_SOCK; ++ else if (S_ISBLK (mode)) ++ type = C_BLK; ++ else if (S_ISCHR (mode)) ++ type = C_CHR; ++ else if (S_ISDOOR (mode)) ++ type = C_DOOR; ++ else ++ { ++ /* Classify a file of some other type as C_ORPHAN. */ ++ type = C_ORPHAN; ++ } ++ } ++ ++ /* Check the file's suffix only if still classified as C_FILE. */ ++ ext = NULL; ++ if (type == C_FILE) ++ { ++ /* Test if NAME has a recognized suffix. */ ++ ++ len = strlen (name); ++ name += len; /* Pointer to final \0. */ ++ for (ext = color_ext_list; ext != NULL; ext = ext->next) ++ { ++ if (ext->ext.len <= len ++ && strncmp (name - ext->ext.len, ext->ext.string, ++ ext->ext.len) == 0) ++ break; ++ } ++ } ++ ++ { ++ const struct bin_str *const s ++ = ext ? &(ext->seq) : &color_indicator[type]; ++ if (s->string != NULL) ++ { ++ put_indicator (&color_indicator[C_LEFT]); ++ put_indicator (s); ++ put_indicator (&color_indicator[C_RIGHT]); ++ return true; ++ } ++ else ++ return false; ++ } ++} ++ ++/* Output a color indicator (which may contain nulls). */ ++static void ++put_indicator (const struct bin_str *ind) ++{ ++ if (! used_color) ++ { ++ used_color = true; ++ prep_non_filename_text (); ++ } ++ ++ fwrite (ind->string, ind->len, 1, stdout); ++} ++ ++static size_t ++length_of_file_name_and_frills (const struct fileinfo *f) ++{ ++ size_t len = 0; ++ size_t name_width; ++ char buf[MAX (LONGEST_HUMAN_READABLE + 1, INT_BUFSIZE_BOUND (uintmax_t))]; ++ ++ if (print_inode) ++ len += 1 + (format == with_commas ++ ? strlen (umaxtostr (f->stat.st_ino, buf)) ++ : inode_number_width); ++ ++ if (print_block_size) ++ len += 1 + (format == with_commas ++ ? strlen (! f->stat_ok ? "?" ++ : human_readable (ST_NBLOCKS (f->stat), buf, ++ human_output_opts, ST_NBLOCKSIZE, ++ output_block_size)) ++ : block_size_width); ++ ++ if (print_scontext) ++ len += 1 + (format == with_commas ? strlen (f->scontext) : scontext_width); ++ ++ quote_name (NULL, f->name, filename_quoting_options, &name_width); ++ len += name_width; ++ ++ if (indicator_style != none) ++ { ++ char c = get_type_indicator (f->stat_ok, f->stat.st_mode, f->filetype); ++ len += (c != 0); ++ } ++ ++ return len; ++} ++ ++static void ++print_many_per_line (void) ++{ ++ size_t row; /* Current row. */ ++ size_t cols = calculate_columns (true); ++ struct column_info const *line_fmt = &column_info[cols - 1]; ++ ++ /* Calculate the number of rows that will be in each column except possibly ++ for a short column on the right. */ ++ size_t rows = cwd_n_used / cols + (cwd_n_used % cols != 0); ++ ++ for (row = 0; row < rows; row++) ++ { ++ size_t col = 0; ++ size_t filesno = row; ++ size_t pos = 0; ++ ++ /* Print the next row. */ ++ while (1) ++ { ++ struct fileinfo const *f = sorted_file[filesno]; ++ size_t name_length = length_of_file_name_and_frills (f); ++ size_t max_name_length = line_fmt->col_arr[col++]; ++ print_file_name_and_frills (f, pos); ++ ++ filesno += rows; ++ if (filesno >= cwd_n_used) ++ break; ++ ++ indent (pos + name_length, pos + max_name_length); ++ pos += max_name_length; ++ } ++ putchar ('\n'); ++ } ++} ++ ++static void ++print_horizontal (void) ++{ ++ size_t filesno; ++ size_t pos = 0; ++ size_t cols = calculate_columns (false); ++ struct column_info const *line_fmt = &column_info[cols - 1]; ++ struct fileinfo const *f = sorted_file[0]; ++ size_t name_length = length_of_file_name_and_frills (f); ++ size_t max_name_length = line_fmt->col_arr[0]; ++ ++ /* Print first entry. */ ++ print_file_name_and_frills (f, 0); ++ ++ /* Now the rest. */ ++ for (filesno = 1; filesno < cwd_n_used; ++filesno) ++ { ++ size_t col = filesno % cols; ++ ++ if (col == 0) ++ { ++ putchar ('\n'); ++ pos = 0; ++ } ++ else ++ { ++ indent (pos + name_length, pos + max_name_length); ++ pos += max_name_length; ++ } ++ ++ f = sorted_file[filesno]; ++ print_file_name_and_frills (f, pos); ++ ++ name_length = length_of_file_name_and_frills (f); ++ max_name_length = line_fmt->col_arr[col]; ++ } ++ putchar ('\n'); ++} ++ ++static void ++print_with_commas (void) ++{ ++ size_t filesno; ++ size_t pos = 0; ++ ++ for (filesno = 0; filesno < cwd_n_used; filesno++) ++ { ++ struct fileinfo const *f = sorted_file[filesno]; ++ size_t len = length_of_file_name_and_frills (f); ++ ++ if (filesno != 0) ++ { ++ char separator; ++ ++ if (pos + len + 2 < line_length) ++ { ++ pos += 2; ++ separator = ' '; ++ } ++ else ++ { ++ pos = 0; ++ separator = '\n'; ++ } ++ ++ putchar (','); ++ putchar (separator); ++ } ++ ++ print_file_name_and_frills (f, pos); ++ pos += len; ++ } ++ putchar ('\n'); ++} ++ ++/* Assuming cursor is at position FROM, indent up to position TO. ++ Use a TAB character instead of two or more spaces whenever possible. */ ++ ++static void ++indent (size_t from, size_t to) ++{ ++ while (from < to) ++ { ++ if (tabsize != 0 && to / tabsize > (from + 1) / tabsize) ++ { ++ putchar ('\t'); ++ from += tabsize - from % tabsize; ++ } ++ else ++ { ++ putchar (' '); ++ from++; ++ } ++ } ++} ++ ++/* Put DIRNAME/NAME into DEST, handling `.' and `/' properly. */ ++/* FIXME: maybe remove this function someday. See about using a ++ non-malloc'ing version of file_name_concat. */ ++ ++static void ++attach (char *dest, const char *dirname, const char *name) ++{ ++ const char *dirnamep = dirname; ++ ++ /* Copy dirname if it is not ".". */ ++ if (dirname[0] != '.' || dirname[1] != 0) ++ { ++ while (*dirnamep) ++ *dest++ = *dirnamep++; ++ /* Add '/' if `dirname' doesn't already end with it. */ ++ if (dirnamep > dirname && dirnamep[-1] != '/') ++ *dest++ = '/'; ++ } ++ while (*name) ++ *dest++ = *name++; ++ *dest = 0; ++} ++ ++/* Allocate enough column info suitable for the current number of ++ files and display columns, and initialize the info to represent the ++ narrowest possible columns. */ ++ ++static void ++init_column_info (void) ++{ ++ size_t i; ++ size_t max_cols = MIN (max_idx, cwd_n_used); ++ ++ /* Currently allocated columns in column_info. */ ++ static size_t column_info_alloc; ++ ++ if (column_info_alloc < max_cols) ++ { ++ size_t new_column_info_alloc; ++ size_t *p; ++ ++ if (max_cols < max_idx / 2) ++ { ++ /* The number of columns is far less than the display width ++ allows. Grow the allocation, but only so that it's ++ double the current requirements. If the display is ++ extremely wide, this avoids allocating a lot of memory ++ that is never needed. */ ++ column_info = xnrealloc (column_info, max_cols, ++ 2 * sizeof *column_info); ++ new_column_info_alloc = 2 * max_cols; ++ } ++ else ++ { ++ column_info = xnrealloc (column_info, max_idx, sizeof *column_info); ++ new_column_info_alloc = max_idx; ++ } ++ ++ /* Allocate the new size_t objects by computing the triangle ++ formula n * (n + 1) / 2, except that we don't need to ++ allocate the part of the triangle that we've already ++ allocated. Check for address arithmetic overflow. */ ++ { ++ size_t column_info_growth = new_column_info_alloc - column_info_alloc; ++ size_t s = column_info_alloc + 1 + new_column_info_alloc; ++ size_t t = s * column_info_growth; ++ if (s < new_column_info_alloc || t / column_info_growth != s) ++ xalloc_die (); ++ p = xnmalloc (t / 2, sizeof *p); ++ } ++ ++ /* Grow the triangle by parceling out the cells just allocated. */ ++ for (i = column_info_alloc; i < new_column_info_alloc; i++) ++ { ++ column_info[i].col_arr = p; ++ p += i + 1; ++ } ++ ++ column_info_alloc = new_column_info_alloc; ++ } ++ ++ for (i = 0; i < max_cols; ++i) ++ { ++ size_t j; ++ ++ column_info[i].valid_len = true; ++ column_info[i].line_len = (i + 1) * MIN_COLUMN_WIDTH; ++ for (j = 0; j <= i; ++j) ++ column_info[i].col_arr[j] = MIN_COLUMN_WIDTH; ++ } ++} ++ ++/* Calculate the number of columns needed to represent the current set ++ of files in the current display width. */ ++ ++static size_t ++calculate_columns (bool by_columns) ++{ ++ size_t filesno; /* Index into cwd_file. */ ++ size_t cols; /* Number of files across. */ ++ ++ /* Normally the maximum number of columns is determined by the ++ screen width. But if few files are available this might limit it ++ as well. */ ++ size_t max_cols = MIN (max_idx, cwd_n_used); ++ ++ init_column_info (); ++ ++ /* Compute the maximum number of possible columns. */ ++ for (filesno = 0; filesno < cwd_n_used; ++filesno) ++ { ++ struct fileinfo const *f = sorted_file[filesno]; ++ size_t name_length = length_of_file_name_and_frills (f); ++ size_t i; ++ ++ for (i = 0; i < max_cols; ++i) ++ { ++ if (column_info[i].valid_len) ++ { ++ size_t idx = (by_columns ++ ? filesno / ((cwd_n_used + i) / (i + 1)) ++ : filesno % (i + 1)); ++ size_t real_length = name_length + (idx == i ? 0 : 2); ++ ++ if (column_info[i].col_arr[idx] < real_length) ++ { ++ column_info[i].line_len += (real_length ++ - column_info[i].col_arr[idx]); ++ column_info[i].col_arr[idx] = real_length; ++ column_info[i].valid_len = (column_info[i].line_len ++ < line_length); ++ } ++ } ++ } ++ } ++ ++ /* Find maximum allowed columns. */ ++ for (cols = max_cols; 1 < cols; --cols) ++ { ++ if (column_info[cols - 1].valid_len) ++ break; ++ } ++ ++ return cols; ++} ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name); ++ fputs (_("\ ++List information about the FILEs (the current directory by default).\n\ ++Sort entries alphabetically if none of -cftuvSUX nor --sort.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ -a, --all do not ignore entries starting with .\n\ ++ -A, --almost-all do not list implied . and ..\n\ ++ --author with -l, print the author of each file\n\ ++ -b, --escape print octal escapes for nongraphic characters\n\ ++"), stdout); ++ fputs (_("\ ++ --block-size=SIZE use SIZE-byte blocks. See SIZE format below\n\ ++ -B, --ignore-backups do not list implied entries ending with ~\n\ ++ -c with -lt: sort by, and show, ctime (time of last\n\ ++ modification of file status information)\n\ ++ with -l: show ctime and sort by name\n\ ++ otherwise: sort by ctime\n\ ++"), stdout); ++ fputs (_("\ ++ -C list entries by columns\n\ ++ --color[=WHEN] colorize the output. WHEN defaults to `always'\n\ ++ or can be `never' or `auto'. More info below\n\ ++ -d, --directory list directory entries instead of contents,\n\ ++ and do not dereference symbolic links\n\ ++ -D, --dired generate output designed for Emacs' dired mode\n\ ++"), stdout); ++ fputs (_("\ ++ -f do not sort, enable -aU, disable -ls --color\n\ ++ -F, --classify append indicator (one of */=>@|) to entries\n\ ++ --file-type likewise, except do not append `*'\n\ ++ --format=WORD across -x, commas -m, horizontal -x, long -l,\n\ ++ single-column -1, verbose -l, vertical -C\n\ ++ --full-time like -l --time-style=full-iso\n\ ++"), stdout); ++ fputs (_("\ ++ -g like -l, but do not list owner\n\ ++"), stdout); ++ fputs (_("\ ++ --group-directories-first\n\ ++ group directories before files.\n\ ++ augment with a --sort option, but any\n\ ++ use of --sort=none (-U) disables grouping\n\ ++"), stdout); ++ fputs (_("\ ++ -G, --no-group in a long listing, don't print group names\n\ ++ -h, --human-readable with -l, print sizes in human readable format\n\ ++ (e.g., 1K 234M 2G)\n\ ++ --si likewise, but use powers of 1000 not 1024\n\ ++"), stdout); ++ fputs (_("\ ++ -H, --dereference-command-line\n\ ++ follow symbolic links listed on the command line\n\ ++ --dereference-command-line-symlink-to-dir\n\ ++ follow each command line symbolic link\n\ ++ that points to a directory\n\ ++ --hide=PATTERN do not list implied entries matching shell PATTERN\n\ ++ (overridden by -a or -A)\n\ ++"), stdout); ++ fputs (_("\ ++ --indicator-style=WORD append indicator with style WORD to entry names:\n\ ++ none (default), slash (-p),\n\ ++ file-type (--file-type), classify (-F)\n\ ++ -i, --inode print the index number of each file\n\ ++ -I, --ignore=PATTERN do not list implied entries matching shell PATTERN\n\ ++ -k like --block-size=1K\n\ ++"), stdout); ++ fputs (_("\ ++ -l use a long listing format\n\ ++ -L, --dereference when showing file information for a symbolic\n\ ++ link, show information for the file the link\n\ ++ references rather than for the link itself\n\ ++ -m fill width with a comma separated list of entries\n\ ++"), stdout); ++ fputs (_("\ ++ -n, --numeric-uid-gid like -l, but list numeric user and group IDs\n\ ++ -N, --literal print raw entry names (don't treat e.g. control\n\ ++ characters specially)\n\ ++ -o like -l, but do not list group information\n\ ++ -p, --indicator-style=slash\n\ ++ append / indicator to directories\n\ ++"), stdout); ++ fputs (_("\ ++ -q, --hide-control-chars print ? instead of non graphic characters\n\ ++ --show-control-chars show non graphic characters as-is (default\n\ ++ unless program is `ls' and output is a terminal)\n\ ++ -Q, --quote-name enclose entry names in double quotes\n\ ++ --quoting-style=WORD use quoting style WORD for entry names:\n\ ++ literal, locale, shell, shell-always, c, escape\n\ ++"), stdout); ++ fputs (_("\ ++ -r, --reverse reverse order while sorting\n\ ++ -R, --recursive list subdirectories recursively\n\ ++ -s, --size print the allocated size of each file, in blocks\n\ ++"), stdout); ++ fputs (_("\ ++ -S sort by file size\n\ ++ --sort=WORD sort by WORD instead of name: none -U,\n\ ++ extension -X, size -S, time -t, version -v\n\ ++ --time=WORD with -l, show time as WORD instead of modification\n\ ++ time: atime -u, access -u, use -u, ctime -c,\n\ ++ or status -c; use specified time as sort key\n\ ++ if --sort=time\n\ ++"), stdout); ++ fputs (_("\ ++ --time-style=STYLE with -l, show times using style STYLE:\n\ ++ full-iso, long-iso, iso, locale, +FORMAT.\n\ ++ FORMAT is interpreted like `date'; if FORMAT is\n\ ++ FORMAT1FORMAT2, FORMAT1 applies to\n\ ++ non-recent files and FORMAT2 to recent files;\n\ ++ if STYLE is prefixed with `posix-', STYLE\n\ ++ takes effect only outside the POSIX locale\n\ ++"), stdout); ++ fputs (_("\ ++ -t sort by modification time\n\ ++ -T, --tabsize=COLS assume tab stops at each COLS instead of 8\n\ ++"), stdout); ++ fputs (_("\ ++ -u with -lt: sort by, and show, access time\n\ ++ with -l: show access time and sort by name\n\ ++ otherwise: sort by access time\n\ ++ -U do not sort; list entries in directory order\n\ ++ -v natural sort of (version) numbers within text\n\ ++"), stdout); ++ fputs (_("\ ++ -w, --width=COLS assume screen width instead of current value\n\ ++ -x list entries by lines instead of by columns\n\ ++ -X sort alphabetically by entry extension\n\ ++ -Z, --context print any SELinux security context of each file\n\ ++ -1 list one file per line\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ emit_size_note (); ++ fputs (_("\ ++\n\ ++Using color to distinguish file types is disabled both by default and\n\ ++with --color=never. With --color=auto, ls emits color codes only when\n\ ++standard output is connected to a terminal. The LS_COLORS environment\n\ ++variable can change the settings. Use the dircolors command to set it.\n\ ++"), stdout); ++ fputs (_("\ ++\n\ ++Exit status:\n\ ++ 0 if OK,\n\ ++ 1 if minor problems (e.g., cannot access subdirectory),\n\ ++ 2 if serious trouble (e.g., cannot access command-line argument).\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} +diff -urNp coreutils-8.0-orig/src/mkdir.c coreutils-8.0/src/mkdir.c +--- coreutils-8.0-orig/src/mkdir.c 2009-09-23 10:25:44.000000000 +0200 ++++ coreutils-8.0/src/mkdir.c 2009-10-07 10:10:11.000000000 +0200 @@ -38,6 +38,7 @@ static struct option const longopts[] = { @@ -621,9 +10757,9 @@ diff -urNp coreutils-7.1-orig/src/mkdir.c coreutils-7.1/src/mkdir.c {"mode", required_argument, NULL, 'm'}, {"parents", no_argument, NULL, 'p'}, {"verbose", no_argument, NULL, 'v'}, -diff -urNp coreutils-7.1-orig/src/mknod.c coreutils-7.1/src/mknod.c ---- coreutils-7.1-orig/src/mknod.c 2008-09-22 16:01:21.000000000 +0200 -+++ coreutils-7.1/src/mknod.c 2009-02-24 13:47:15.000000000 +0100 +diff -urNp coreutils-8.0-orig/src/mknod.c coreutils-8.0/src/mknod.c +--- coreutils-8.0-orig/src/mknod.c 2009-09-23 10:25:44.000000000 +0200 ++++ coreutils-8.0/src/mknod.c 2009-10-07 10:10:11.000000000 +0200 @@ -35,7 +35,7 @@ static struct option const longopts[] = @@ -633,10 +10769,10 @@ diff -urNp coreutils-7.1-orig/src/mknod.c coreutils-7.1/src/mknod.c {"mode", required_argument, NULL, 'm'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, -diff -urNp coreutils-7.1-orig/src/mv.c coreutils-7.1/src/mv.c ---- coreutils-7.1-orig/src/mv.c 2009-02-18 15:32:52.000000000 +0100 -+++ coreutils-7.1/src/mv.c 2009-02-24 13:47:15.000000000 +0100 -@@ -122,6 +122,7 @@ cp_option_init (struct cp_options *x) +diff -urNp coreutils-8.0-orig/src/mv.c coreutils-8.0/src/mv.c +--- coreutils-8.0-orig/src/mv.c 2009-09-23 10:25:44.000000000 +0200 ++++ coreutils-8.0/src/mv.c 2009-10-07 10:10:11.000000000 +0200 +@@ -118,6 +118,7 @@ cp_option_init (struct cp_options *x) x->preserve_mode = true; x->preserve_timestamps = true; x->preserve_security_context = selinux_enabled; @@ -644,9 +10780,508 @@ diff -urNp coreutils-7.1-orig/src/mv.c coreutils-7.1/src/mv.c x->reduce_diagnostics = false; x->require_preserve = false; /* FIXME: maybe make this an option */ x->require_preserve_context = false; -diff -urNp coreutils-7.1-orig/src/runcon.c coreutils-7.1/src/runcon.c ---- coreutils-7.1-orig/src/runcon.c 2008-09-18 09:06:57.000000000 +0200 -+++ coreutils-7.1/src/runcon.c 2009-02-24 13:47:15.000000000 +0100 +diff -urNp coreutils-8.0-orig/src/mv.c.orig coreutils-8.0/src/mv.c.orig +--- coreutils-8.0-orig/src/mv.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/mv.c.orig 2009-09-23 10:25:44.000000000 +0200 +@@ -0,0 +1,495 @@ ++/* mv -- move or rename files ++ Copyright (C) 86, 89, 90, 91, 1995-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* Written by Mike Parker, David MacKenzie, and Jim Meyering */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "system.h" ++#include "backupfile.h" ++#include "copy.h" ++#include "cp-hash.h" ++#include "error.h" ++#include "filenamecat.h" ++#include "quote.h" ++#include "remove.h" ++#include "root-dev-ino.h" ++#include "priv-set.h" ++ ++/* The official name of this program (e.g., no `g' prefix). */ ++#define PROGRAM_NAME "mv" ++ ++#define AUTHORS \ ++ proper_name ("Mike Parker"), \ ++ proper_name ("David MacKenzie"), \ ++ proper_name ("Jim Meyering") ++ ++/* For long options that have no equivalent short option, use a ++ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ ++enum ++{ ++ STRIP_TRAILING_SLASHES_OPTION = CHAR_MAX + 1 ++}; ++ ++/* Remove any trailing slashes from each SOURCE argument. */ ++static bool remove_trailing_slashes; ++ ++static struct option const long_options[] = ++{ ++ {"backup", optional_argument, NULL, 'b'}, ++ {"force", no_argument, NULL, 'f'}, ++ {"interactive", no_argument, NULL, 'i'}, ++ {"no-clobber", no_argument, NULL, 'n'}, ++ {"no-target-directory", no_argument, NULL, 'T'}, ++ {"strip-trailing-slashes", no_argument, NULL, STRIP_TRAILING_SLASHES_OPTION}, ++ {"suffix", required_argument, NULL, 'S'}, ++ {"target-directory", required_argument, NULL, 't'}, ++ {"update", no_argument, NULL, 'u'}, ++ {"verbose", no_argument, NULL, 'v'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++static void ++rm_option_init (struct rm_options *x) ++{ ++ x->ignore_missing_files = false; ++ x->recursive = true; ++ x->one_file_system = false; ++ ++ /* Should we prompt for removal, too? No. Prompting for the `move' ++ part is enough. It implies removal. */ ++ x->interactive = RMI_NEVER; ++ x->stdin_tty = false; ++ ++ x->verbose = false; ++ ++ /* Since this program may well have to process additional command ++ line arguments after any call to `rm', that function must preserve ++ the initial working directory, in case one of those is a ++ `.'-relative name. */ ++ x->require_restore_cwd = true; ++ ++ { ++ static struct dev_ino dev_ino_buf; ++ x->root_dev_ino = get_root_dev_ino (&dev_ino_buf); ++ if (x->root_dev_ino == NULL) ++ error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), ++ quote ("/")); ++ } ++} ++ ++static void ++cp_option_init (struct cp_options *x) ++{ ++ bool selinux_enabled = (0 < is_selinux_enabled ()); ++ ++ cp_options_default (x); ++ x->copy_as_regular = false; /* FIXME: maybe make this an option */ ++ x->reflink_mode = REFLINK_NEVER; ++ x->dereference = DEREF_NEVER; ++ x->unlink_dest_before_opening = false; ++ x->unlink_dest_after_failed_open = false; ++ x->hard_link = false; ++ x->interactive = I_UNSPECIFIED; ++ x->move_mode = true; ++ x->one_file_system = false; ++ x->preserve_ownership = true; ++ x->preserve_links = true; ++ x->preserve_mode = true; ++ x->preserve_timestamps = true; ++ x->preserve_security_context = selinux_enabled; ++ x->reduce_diagnostics = false; ++ x->require_preserve = false; /* FIXME: maybe make this an option */ ++ x->require_preserve_context = false; ++ x->preserve_xattr = true; ++ x->require_preserve_xattr = false; ++ x->recursive = true; ++ x->sparse_mode = SPARSE_AUTO; /* FIXME: maybe make this an option */ ++ x->symbolic_link = false; ++ x->set_mode = false; ++ x->mode = 0; ++ x->stdin_tty = isatty (STDIN_FILENO); ++ ++ x->open_dangling_dest_symlink = false; ++ x->update = false; ++ x->verbose = false; ++ x->dest_info = NULL; ++ x->src_info = NULL; ++} ++ ++/* FILE is the last operand of this command. Return true if FILE is a ++ directory. But report an error if there is a problem accessing FILE, other ++ than nonexistence (errno == ENOENT). */ ++ ++static bool ++target_directory_operand (char const *file) ++{ ++ struct stat st; ++ int err = (stat (file, &st) == 0 ? 0 : errno); ++ bool is_a_dir = !err && S_ISDIR (st.st_mode); ++ if (err && err != ENOENT) ++ error (EXIT_FAILURE, err, _("accessing %s"), quote (file)); ++ return is_a_dir; ++} ++ ++/* Move SOURCE onto DEST. Handles cross-file-system moves. ++ If SOURCE is a directory, DEST must not exist. ++ Return true if successful. */ ++ ++static bool ++do_move (const char *source, const char *dest, const struct cp_options *x) ++{ ++ bool copy_into_self; ++ bool rename_succeeded; ++ bool ok = copy (source, dest, false, x, ©_into_self, &rename_succeeded); ++ ++ if (ok) ++ { ++ char const *dir_to_remove; ++ if (copy_into_self) ++ { ++ /* In general, when copy returns with copy_into_self set, SOURCE is ++ the same as, or a parent of DEST. In this case we know it's a ++ parent. It doesn't make sense to move a directory into itself, and ++ besides in some situations doing so would give highly nonintuitive ++ results. Run this `mkdir b; touch a c; mv * b' in an empty ++ directory. Here's the result of running echo `find b -print`: ++ b b/a b/b b/b/a b/c. Notice that only file `a' was copied ++ into b/b. Handle this by giving a diagnostic, removing the ++ copied-into-self directory, DEST (`b/b' in the example), ++ and failing. */ ++ ++ dir_to_remove = NULL; ++ ok = false; ++ } ++ else if (rename_succeeded) ++ { ++ /* No need to remove anything. SOURCE was successfully ++ renamed to DEST. Or the user declined to rename a file. */ ++ dir_to_remove = NULL; ++ } ++ else ++ { ++ /* This may mean SOURCE and DEST referred to different devices. ++ It may also conceivably mean that even though they referred ++ to the same device, rename wasn't implemented for that device. ++ ++ E.g., (from Joel N. Weber), ++ [...] there might someday be cases where you can't rename ++ but you can copy where the device name is the same, especially ++ on Hurd. Consider an ftpfs with a primitive ftp server that ++ supports uploading, downloading and deleting, but not renaming. ++ ++ Also, note that comparing device numbers is not a reliable ++ check for `can-rename'. Some systems can be set up so that ++ files from many different physical devices all have the same ++ st_dev field. This is a feature of some NFS mounting ++ configurations. ++ ++ We reach this point if SOURCE has been successfully copied ++ to DEST. Now we have to remove SOURCE. ++ ++ This function used to resort to copying only when rename ++ failed and set errno to EXDEV. */ ++ ++ dir_to_remove = source; ++ } ++ ++ if (dir_to_remove != NULL) ++ { ++ struct rm_options rm_options; ++ enum RM_status status; ++ char const *dir[2]; ++ ++ rm_option_init (&rm_options); ++ rm_options.verbose = x->verbose; ++ dir[0] = dir_to_remove; ++ dir[1] = NULL; ++ ++ status = rm ((void*) dir, &rm_options); ++ assert (VALID_STATUS (status)); ++ if (status == RM_ERROR) ++ ok = false; ++ } ++ } ++ ++ return ok; ++} ++ ++/* Move file SOURCE onto DEST. Handles the case when DEST is a directory. ++ Treat DEST as a directory if DEST_IS_DIR. ++ Return true if successful. */ ++ ++static bool ++movefile (char *source, char *dest, bool dest_is_dir, ++ const struct cp_options *x) ++{ ++ bool ok; ++ ++ /* This code was introduced to handle the ambiguity in the semantics ++ of mv that is induced by the varying semantics of the rename function. ++ Some systems (e.g., GNU/Linux) have a rename function that honors a ++ trailing slash, while others (like Solaris 5,6,7) have a rename ++ function that ignores a trailing slash. I believe the GNU/Linux ++ rename semantics are POSIX and susv2 compliant. */ ++ ++ if (remove_trailing_slashes) ++ strip_trailing_slashes (source); ++ ++ if (dest_is_dir) ++ { ++ /* Treat DEST as a directory; build the full filename. */ ++ char const *src_basename = last_component (source); ++ char *new_dest = file_name_concat (dest, src_basename, NULL); ++ strip_trailing_slashes (new_dest); ++ ok = do_move (source, new_dest, x); ++ free (new_dest); ++ } ++ else ++ { ++ ok = do_move (source, dest, x); ++ } ++ ++ return ok; ++} ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("\ ++Usage: %s [OPTION]... [-T] SOURCE DEST\n\ ++ or: %s [OPTION]... SOURCE... DIRECTORY\n\ ++ or: %s [OPTION]... -t DIRECTORY SOURCE...\n\ ++"), ++ program_name, program_name, program_name); ++ fputs (_("\ ++Rename SOURCE to DEST, or move SOURCE(s) to DIRECTORY.\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++Mandatory arguments to long options are mandatory for short options too.\n\ ++"), stdout); ++ fputs (_("\ ++ --backup[=CONTROL] make a backup of each existing destination file\n\ ++ -b like --backup but does not accept an argument\n\ ++ -f, --force do not prompt before overwriting\n\ ++ -i, --interactive prompt before overwrite\n\ ++ -n, --no-clobber do not overwrite an existing file\n\ ++If you specify more than one of -i, -f, -n, only the final one takes effect.\n\ ++"), stdout); ++ fputs (_("\ ++ --strip-trailing-slashes remove any trailing slashes from each SOURCE\n\ ++ argument\n\ ++ -S, --suffix=SUFFIX override the usual backup suffix\n\ ++"), stdout); ++ fputs (_("\ ++ -t, --target-directory=DIRECTORY move all SOURCE arguments into DIRECTORY\n\ ++ -T, --no-target-directory treat DEST as a normal file\n\ ++ -u, --update move only when the SOURCE file is newer\n\ ++ than the destination file or when the\n\ ++ destination file is missing\n\ ++ -v, --verbose explain what is being done\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ fputs (_("\ ++\n\ ++The backup suffix is `~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX.\n\ ++The version control method may be selected via the --backup option or through\n\ ++the VERSION_CONTROL environment variable. Here are the values:\n\ ++\n\ ++"), stdout); ++ fputs (_("\ ++ none, off never make backups (even if --backup is given)\n\ ++ numbered, t make numbered backups\n\ ++ existing, nil numbered if numbered backups exist, simple otherwise\n\ ++ simple, never always make simple backups\n\ ++"), stdout); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int c; ++ bool ok; ++ bool make_backups = false; ++ char *backup_suffix_string; ++ char *version_control_string = NULL; ++ struct cp_options x; ++ char *target_directory = NULL; ++ bool no_target_directory = false; ++ int n_files; ++ char **file; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdin); ++ ++ cp_option_init (&x); ++ ++ /* Try to disable the ability to unlink a directory. */ ++ priv_set_remove_linkdir (); ++ ++ /* FIXME: consider not calling getenv for SIMPLE_BACKUP_SUFFIX unless ++ we'll actually use backup_suffix_string. */ ++ backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX"); ++ ++ while ((c = getopt_long (argc, argv, "bfint:uvS:T", long_options, NULL)) ++ != -1) ++ { ++ switch (c) ++ { ++ case 'b': ++ make_backups = true; ++ if (optarg) ++ version_control_string = optarg; ++ break; ++ case 'f': ++ x.interactive = I_ALWAYS_YES; ++ break; ++ case 'i': ++ x.interactive = I_ASK_USER; ++ break; ++ case 'n': ++ x.interactive = I_ALWAYS_NO; ++ break; ++ case STRIP_TRAILING_SLASHES_OPTION: ++ remove_trailing_slashes = true; ++ break; ++ case 't': ++ if (target_directory) ++ error (EXIT_FAILURE, 0, _("multiple target directories specified")); ++ else ++ { ++ struct stat st; ++ if (stat (optarg, &st) != 0) ++ error (EXIT_FAILURE, errno, _("accessing %s"), quote (optarg)); ++ if (! S_ISDIR (st.st_mode)) ++ error (EXIT_FAILURE, 0, _("target %s is not a directory"), ++ quote (optarg)); ++ } ++ target_directory = optarg; ++ break; ++ case 'T': ++ no_target_directory = true; ++ break; ++ case 'u': ++ x.update = true; ++ break; ++ case 'v': ++ x.verbose = true; ++ break; ++ case 'S': ++ make_backups = true; ++ backup_suffix_string = optarg; ++ break; ++ case_GETOPT_HELP_CHAR; ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ n_files = argc - optind; ++ file = argv + optind; ++ ++ if (n_files <= !target_directory) ++ { ++ if (n_files <= 0) ++ error (0, 0, _("missing file operand")); ++ else ++ error (0, 0, _("missing destination file operand after %s"), ++ quote (file[0])); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (no_target_directory) ++ { ++ if (target_directory) ++ error (EXIT_FAILURE, 0, ++ _("cannot combine --target-directory (-t) " ++ "and --no-target-directory (-T)")); ++ if (2 < n_files) ++ { ++ error (0, 0, _("extra operand %s"), quote (file[2])); ++ usage (EXIT_FAILURE); ++ } ++ } ++ else if (!target_directory) ++ { ++ assert (2 <= n_files); ++ if (target_directory_operand (file[n_files - 1])) ++ target_directory = file[--n_files]; ++ else if (2 < n_files) ++ error (EXIT_FAILURE, 0, _("target %s is not a directory"), ++ quote (file[n_files - 1])); ++ } ++ ++ if (make_backups && x.interactive == I_ALWAYS_NO) ++ { ++ error (0, 0, ++ _("options --backup and --no-clobber are mutually exclusive")); ++ usage (EXIT_FAILURE); ++ } ++ ++ if (backup_suffix_string) ++ simple_backup_suffix = xstrdup (backup_suffix_string); ++ ++ x.backup_type = (make_backups ++ ? xget_version (_("backup type"), ++ version_control_string) ++ : no_backups); ++ ++ hash_init (); ++ ++ if (target_directory) ++ { ++ int i; ++ ++ /* Initialize the hash table only if we'll need it. ++ The problem it is used to detect can arise only if there are ++ two or more files to move. */ ++ if (2 <= n_files) ++ dest_info_init (&x); ++ ++ ok = true; ++ for (i = 0; i < n_files; ++i) ++ ok &= movefile (file[i], target_directory, true, &x); ++ } ++ else ++ ok = movefile (file[0], file[1], false, &x); ++ ++ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); ++} +diff -urNp coreutils-8.0-orig/src/runcon.c coreutils-8.0/src/runcon.c +--- coreutils-8.0-orig/src/runcon.c 2009-10-06 10:55:34.000000000 +0200 ++++ coreutils-8.0/src/runcon.c 2009-10-07 10:10:11.000000000 +0200 @@ -86,7 +86,7 @@ Usage: %s CONTEXT COMMAND [args]\n\ or: %s [ -c ] [-u USER] [-r ROLE] [-t TYPE] [-l RANGE] COMMAND [args]\n\ "), program_name, program_name); @@ -656,9 +11291,9 @@ diff -urNp coreutils-7.1-orig/src/runcon.c coreutils-7.1/src/runcon.c With neither CONTEXT nor COMMAND, print the current security context.\n\ \n\ CONTEXT Complete security context\n\ -diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c ---- coreutils-7.1-orig/src/stat.c 2009-01-27 22:11:25.000000000 +0100 -+++ coreutils-7.1/src/stat.c 2009-02-24 13:47:15.000000000 +0100 +diff -urNp coreutils-8.0-orig/src/stat.c coreutils-8.0/src/stat.c +--- coreutils-8.0-orig/src/stat.c 2009-09-29 16:25:44.000000000 +0200 ++++ coreutils-8.0/src/stat.c 2009-10-07 10:10:11.000000000 +0200 @@ -825,7 +825,7 @@ print_it (char const *format, char const /* Stat the file system and print what we find. */ @@ -668,7 +11303,7 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c { STRUCT_STATVFS statfsbuf; -@@ -837,15 +837,31 @@ do_statfs (char const *filename, bool te +@@ -844,15 +844,31 @@ do_statfs (char const *filename, bool te } if (format == NULL) @@ -678,22 +11313,20 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c - format = (terse - ? "%n %i %l %t %s %S %b %f %a %c %d\n" - : " File: \"%n\"\n" -- " ID: %-8i Namelen: %-7l Type: %T\n" -- "Block size: %-10s Fundamental block size: %S\n" -- "Blocks: Total: %-10b Free: %-10f Available: %a\n" -- "Inodes: Total: %-10c Free: %d\n"); + if (secure) + format = "%n %i %l %t %s %S %b %f %a %c %d %C\n"; + else + format = "%n %i %l %t %s %S %b %f %a %c %d\n"; - } ++ } + else + { + if (secure) + format = " File: \"%n\"\n" -+ " ID: %-8i Namelen: %-7l Type: %T\n" -+ "Block size: %-10s Fundamental block size: %S\n" -+ "Blocks: Total: %-10b Free: %-10f Available: %a\n" + " ID: %-8i Namelen: %-7l Type: %T\n" + "Block size: %-10s Fundamental block size: %S\n" + "Blocks: Total: %-10b Free: %-10f Available: %a\n" +- "Inodes: Total: %-10c Free: %d\n"); +- } + "Inodes: Total: %-10c Free: %d\n" + " S_Context: %C\n"; + else @@ -707,7 +11340,7 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c print_it (format, filename, print_statfs, &statfsbuf); return true; -@@ -853,7 +869,7 @@ do_statfs (char const *filename, bool te +@@ -860,7 +876,7 @@ do_statfs (char const *filename, bool te /* stat the file and print what we find */ static bool @@ -716,7 +11349,7 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c { struct stat statbuf; -@@ -866,9 +882,12 @@ do_stat (char const *filename, bool ters +@@ -881,9 +897,12 @@ do_stat (char const *filename, bool ters if (format == NULL) { if (terse) @@ -732,7 +11365,7 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c else { /* Temporary hack to match original output until conditional -@@ -885,12 +904,22 @@ do_stat (char const *filename, bool ters +@@ -900,12 +919,22 @@ do_stat (char const *filename, bool ters } else { @@ -761,7 +11394,7 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c } } } -@@ -911,6 +940,7 @@ usage (int status) +@@ -926,6 +955,7 @@ usage (int status) Display file or file system status.\n\ \n\ -L, --dereference follow links\n\ @@ -769,7 +11402,7 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c -f, --file-system display file system status instead of file status\n\ "), stdout); fputs (_("\ -@@ -995,6 +1025,7 @@ main (int argc, char *argv[]) +@@ -1010,6 +1040,7 @@ main (int argc, char *argv[]) int i; bool fs = false; bool terse = false; @@ -777,7 +11410,7 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c char *format = NULL; bool ok = true; -@@ -1034,13 +1065,13 @@ main (int argc, char *argv[]) +@@ -1049,13 +1080,13 @@ main (int argc, char *argv[]) terse = true; break; @@ -798,7 +11431,7 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c break; case_GETOPT_HELP_CHAR; -@@ -1060,8 +1091,8 @@ main (int argc, char *argv[]) +@@ -1075,8 +1106,8 @@ main (int argc, char *argv[]) for (i = optind; i < argc; i++) ok &= (fs @@ -809,9 +11442,1095 @@ diff -urNp coreutils-7.1-orig/src/stat.c coreutils-7.1/src/stat.c exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); } -diff -urNp coreutils-7.1-orig/tests/misc/selinux coreutils-7.1/tests/misc/selinux ---- coreutils-7.1-orig/tests/misc/selinux 2008-10-25 14:20:26.000000000 +0200 -+++ coreutils-7.1/tests/misc/selinux 2009-02-24 13:47:15.000000000 +0100 +diff -urNp coreutils-8.0-orig/src/stat.c.orig coreutils-8.0/src/stat.c.orig +--- coreutils-8.0-orig/src/stat.c.orig 1970-01-01 01:00:00.000000000 +0100 ++++ coreutils-8.0/src/stat.c.orig 2009-09-29 16:25:44.000000000 +0200 +@@ -0,0 +1,1082 @@ ++/* stat.c -- display file or file system status ++ Copyright (C) 2001-2009 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . ++ ++ Written by Michael Meskes. */ ++ ++#include ++ ++/* Keep this conditional in sync with the similar conditional in ++ ../m4/stat-prog.m4. */ ++#if (STAT_STATVFS \ ++ && (HAVE_STRUCT_STATVFS_F_BASETYPE || HAVE_STRUCT_STATVFS_F_FSTYPENAME \ ++ || (! HAVE_STRUCT_STATFS_F_FSTYPENAME && HAVE_STRUCT_STATVFS_F_TYPE))) ++# define USE_STATVFS 1 ++#else ++# define USE_STATVFS 0 ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#if USE_STATVFS ++# include ++#elif HAVE_SYS_VFS_H ++# include ++#elif HAVE_SYS_MOUNT_H && HAVE_SYS_PARAM_H ++/* NOTE: freebsd5.0 needs sys/param.h and sys/mount.h for statfs. ++ It does have statvfs.h, but shouldn't use it, since it doesn't ++ HAVE_STRUCT_STATVFS_F_BASETYPE. So find a clean way to fix it. */ ++/* NetBSD 1.5.2 needs these, for the declaration of struct statfs. */ ++# include ++# include ++# if HAVE_NETINET_IN_H && HAVE_NFS_NFS_CLNT_H && HAVE_NFS_VFS_H ++/* Ultrix 4.4 needs these for the declaration of struct statfs. */ ++# include ++# include ++# include ++# endif ++#elif HAVE_OS_H /* BeOS */ ++# include ++#endif ++#include ++ ++#include "system.h" ++ ++#include "error.h" ++#include "filemode.h" ++#include "file-type.h" ++#include "fs.h" ++#include "getopt.h" ++#include "quote.h" ++#include "quotearg.h" ++#include "stat-time.h" ++#include "strftime.h" ++#include "areadlink.h" ++ ++#define alignof(type) offsetof (struct { char c; type x; }, x) ++ ++#if USE_STATVFS ++# define STRUCT_STATVFS struct statvfs ++# define STRUCT_STATXFS_F_FSID_IS_INTEGER STRUCT_STATVFS_F_FSID_IS_INTEGER ++# define HAVE_STRUCT_STATXFS_F_TYPE HAVE_STRUCT_STATVFS_F_TYPE ++# if HAVE_STRUCT_STATVFS_F_NAMEMAX ++# define SB_F_NAMEMAX(S) ((S)->f_namemax) ++# endif ++# define STATFS statvfs ++# define STATFS_FRSIZE(S) ((S)->f_frsize) ++#else ++# define HAVE_STRUCT_STATXFS_F_TYPE HAVE_STRUCT_STATFS_F_TYPE ++# if HAVE_STRUCT_STATFS_F_NAMELEN ++# define SB_F_NAMEMAX(S) ((S)->f_namelen) ++# endif ++# define STATFS statfs ++# if HAVE_OS_H /* BeOS */ ++/* BeOS has a statvfs function, but it does not return sensible values ++ for f_files, f_ffree and f_favail, and lacks f_type, f_basetype and ++ f_fstypename. Use 'struct fs_info' instead. */ ++static int ++statfs (char const *filename, struct fs_info *buf) ++{ ++ dev_t device = dev_for_path (filename); ++ if (device < 0) ++ { ++ errno = (device == B_ENTRY_NOT_FOUND ? ENOENT ++ : device == B_BAD_VALUE ? EINVAL ++ : device == B_NAME_TOO_LONG ? ENAMETOOLONG ++ : device == B_NO_MEMORY ? ENOMEM ++ : device == B_FILE_ERROR ? EIO ++ : 0); ++ return -1; ++ } ++ /* If successful, buf->dev will be == device. */ ++ return fs_stat_dev (device, buf); ++} ++# define f_fsid dev ++# define f_blocks total_blocks ++# define f_bfree free_blocks ++# define f_bavail free_blocks ++# define f_bsize io_size ++# define f_files total_nodes ++# define f_ffree free_nodes ++# define STRUCT_STATVFS struct fs_info ++# define STRUCT_STATXFS_F_FSID_IS_INTEGER true ++# define STATFS_FRSIZE(S) ((S)->block_size) ++# else ++# define STRUCT_STATVFS struct statfs ++# define STRUCT_STATXFS_F_FSID_IS_INTEGER STRUCT_STATFS_F_FSID_IS_INTEGER ++# define STATFS_FRSIZE(S) 0 ++# endif ++#endif ++ ++#ifdef SB_F_NAMEMAX ++# define OUT_NAMEMAX out_uint ++#else ++/* NetBSD 1.5.2 has neither f_namemax nor f_namelen. */ ++# define SB_F_NAMEMAX(S) "*" ++# define OUT_NAMEMAX out_string ++#endif ++ ++#if HAVE_STRUCT_STATVFS_F_BASETYPE ++# define STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME f_basetype ++#else ++# if HAVE_STRUCT_STATVFS_F_FSTYPENAME || HAVE_STRUCT_STATFS_F_FSTYPENAME ++# define STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME f_fstypename ++# elif HAVE_OS_H /* BeOS */ ++# define STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME fsh_name ++# endif ++#endif ++ ++/* FIXME: these are used by printf.c, too */ ++#define isodigit(c) ('0' <= (c) && (c) <= '7') ++#define octtobin(c) ((c) - '0') ++#define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \ ++ (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0') ++ ++#define PROGRAM_NAME "stat" ++ ++#define AUTHORS proper_name ("Michael Meskes") ++ ++enum ++{ ++ PRINTF_OPTION = CHAR_MAX + 1 ++}; ++ ++static struct option const long_options[] = ++{ ++ {"context", no_argument, 0, 'Z'}, ++ {"dereference", no_argument, NULL, 'L'}, ++ {"file-system", no_argument, NULL, 'f'}, ++ {"format", required_argument, NULL, 'c'}, ++ {"printf", required_argument, NULL, PRINTF_OPTION}, ++ {"terse", no_argument, NULL, 't'}, ++ {GETOPT_HELP_OPTION_DECL}, ++ {GETOPT_VERSION_OPTION_DECL}, ++ {NULL, 0, NULL, 0} ++}; ++ ++/* Whether to follow symbolic links; True for --dereference (-L). */ ++static bool follow_links; ++ ++/* Whether to interpret backslash-escape sequences. ++ True for --printf=FMT, not for --format=FMT (-c). */ ++static bool interpret_backslash_escapes; ++ ++/* The trailing delimiter string: ++ "" for --printf=FMT, "\n" for --format=FMT (-c). */ ++static char const *trailing_delim = ""; ++ ++/* Return the type of the specified file system. ++ Some systems have statfvs.f_basetype[FSTYPSZ] (AIX, HP-UX, and Solaris). ++ Others have statvfs.f_fstypename[_VFS_NAMELEN] (NetBSD 3.0). ++ Others have statfs.f_fstypename[MFSNAMELEN] (NetBSD 1.5.2). ++ Still others have neither and have to get by with f_type (GNU/Linux). ++ But f_type may only exist in statfs (Cygwin). */ ++static char const * ++human_fstype (STRUCT_STATVFS const *statfsbuf) ++{ ++#ifdef STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME ++ return statfsbuf->STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME; ++#else ++ switch (statfsbuf->f_type) ++ { ++# if defined __linux__ ++ ++ /* Compare with what's in libc: ++ f=/a/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h ++ sed -n '/ADFS_SUPER_MAGIC/,/SYSFS_MAGIC/p' $f \ ++ | perl -n -e '/#define (.*?)_(?:SUPER_)MAGIC\s+0x(\S+)/' \ ++ -e 'and print "case S_MAGIC_$1: /\* 0x" . uc($2) . " *\/\n"' \ ++ | sort > sym_libc ++ perl -ne '/^\s+(case S_MAGIC_.*?): \/\* 0x(\S+) \*\//' \ ++ -e 'and do { $v=uc$2; print "$1: /\* 0x$v *\/\n"}' stat.c \ ++ | sort > sym_stat ++ diff -u sym_stat sym_libc ++ */ ++ ++ /* Also sync from the list in "man 2 statfs". */ ++ ++ /* IMPORTANT NOTE: Each of the following `case S_MAGIC_...:' ++ statements must be followed by a hexadecimal constant in ++ a comment. The S_MAGIC_... name and constant are automatically ++ combined to produce the #define directives in fs.h. */ ++ ++ case S_MAGIC_ADFS: /* 0xADF5 */ ++ return "adfs"; ++ case S_MAGIC_AFFS: /* 0xADFF */ ++ return "affs"; ++ case S_MAGIC_AUTOFS: /* 0x187 */ ++ return "autofs"; ++ case S_MAGIC_BEFS: /* 0x42465331 */ ++ return "befs"; ++ case S_MAGIC_BFS: /* 0x1BADFACE */ ++ return "bfs"; ++ case S_MAGIC_BINFMT_MISC: /* 0x42494e4d */ ++ return "binfmt_misc"; ++ case S_MAGIC_CODA: /* 0x73757245 */ ++ return "coda"; ++ case S_MAGIC_COH: /* 0x012FF7B7 */ ++ return "coh"; ++ case S_MAGIC_CRAMFS: /* 0x28CD3D45 */ ++ return "cramfs"; ++ case S_MAGIC_DEVFS: /* 0x1373 */ ++ return "devfs"; ++ case S_MAGIC_DEVPTS: /* 0x1CD1 */ ++ return "devpts"; ++ case S_MAGIC_EFS: /* 0x414A53 */ ++ return "efs"; ++ case S_MAGIC_EXT: /* 0x137D */ ++ return "ext"; ++ case S_MAGIC_EXT2: /* 0xEF53 */ ++ return "ext2/ext3"; ++ case S_MAGIC_EXT2_OLD: /* 0xEF51 */ ++ return "ext2"; ++ case S_MAGIC_FAT: /* 0x4006 */ ++ return "fat"; ++ case S_MAGIC_FUSECTL: /* 0x65735543 */ ++ return "fusectl"; ++ case S_MAGIC_HPFS: /* 0xF995E849 */ ++ return "hpfs"; ++ case S_MAGIC_HUGETLBFS: /* 0x958458f6 */ ++ return "hugetlbfs"; ++ case S_MAGIC_ISOFS: /* 0x9660 */ ++ return "isofs"; ++ case S_MAGIC_ISOFS_R_WIN: /* 0x4004 */ ++ return "isofs"; ++ case S_MAGIC_ISOFS_WIN: /* 0x4000 */ ++ return "isofs"; ++ case S_MAGIC_JFFS2: /* 0x72B6 */ ++ return "jffs2"; ++ case S_MAGIC_JFFS: /* 0x07C0 */ ++ return "jffs"; ++ case S_MAGIC_JFS: /* 0x3153464A */ ++ return "jfs"; ++ case S_MAGIC_LUSTRE: /* 0x0BD00BD0 */ ++ return "lustre"; ++ case S_MAGIC_MINIX: /* 0x137F */ ++ return "minix"; ++ case S_MAGIC_MINIX_30: /* 0x138F */ ++ return "minix (30 char.)"; ++ case S_MAGIC_MINIX_V2: /* 0x2468 */ ++ return "minix v2"; ++ case S_MAGIC_MINIX_V2_30: /* 0x2478 */ ++ return "minix v2 (30 char.)"; ++ case S_MAGIC_MSDOS: /* 0x4D44 */ ++ return "msdos"; ++ case S_MAGIC_NCP: /* 0x564C */ ++ return "novell"; ++ case S_MAGIC_NFS: /* 0x6969 */ ++ return "nfs"; ++ case S_MAGIC_NFSD: /* 0x6E667364 */ ++ return "nfsd"; ++ case S_MAGIC_NTFS: /* 0x5346544E */ ++ return "ntfs"; ++ case S_MAGIC_OPENPROM: /* 0x9fa1 */ ++ return "openprom"; ++ case S_MAGIC_PROC: /* 0x9FA0 */ ++ return "proc"; ++ case S_MAGIC_QNX4: /* 0x002F */ ++ return "qnx4"; ++ case S_MAGIC_RAMFS: /* 0x858458F6 */ ++ return "ramfs"; ++ case S_MAGIC_REISERFS: /* 0x52654973 */ ++ return "reiserfs"; ++ case S_MAGIC_ROMFS: /* 0x7275 */ ++ return "romfs"; ++ case S_MAGIC_SMB: /* 0x517B */ ++ return "smb"; ++ case S_MAGIC_SQUASHFS: /* 0x73717368 */ ++ return "squashfs"; ++ case S_MAGIC_SYSFS: /* 0x62656572 */ ++ return "sysfs"; ++ case S_MAGIC_SYSV2: /* 0x012FF7B6 */ ++ return "sysv2"; ++ case S_MAGIC_SYSV4: /* 0x012FF7B5 */ ++ return "sysv4"; ++ case S_MAGIC_TMPFS: /* 0x1021994 */ ++ return "tmpfs"; ++ case S_MAGIC_UDF: /* 0x15013346 */ ++ return "udf"; ++ case S_MAGIC_UFS: /* 0x00011954 */ ++ return "ufs"; ++ case S_MAGIC_UFS_BYTESWAPPED: /* 0x54190100 */ ++ return "ufs"; ++ case S_MAGIC_USBDEVFS: /* 0x9FA2 */ ++ return "usbdevfs"; ++ case S_MAGIC_VXFS: /* 0xA501FCF5 */ ++ return "vxfs"; ++ case S_MAGIC_XENIX: /* 0x012FF7B4 */ ++ return "xenix"; ++ case S_MAGIC_XFS: /* 0x58465342 */ ++ return "xfs"; ++ case S_MAGIC_XIAFS: /* 0x012FD16D */ ++ return "xia"; ++ ++# elif __GNU__ ++ case FSTYPE_UFS: ++ return "ufs"; ++ case FSTYPE_NFS: ++ return "nfs"; ++ case FSTYPE_GFS: ++ return "gfs"; ++ case FSTYPE_LFS: ++ return "lfs"; ++ case FSTYPE_SYSV: ++ return "sysv"; ++ case FSTYPE_FTP: ++ return "ftp"; ++ case FSTYPE_TAR: ++ return "tar"; ++ case FSTYPE_AR: ++ return "ar"; ++ case FSTYPE_CPIO: ++ return "cpio"; ++ case FSTYPE_MSLOSS: ++ return "msloss"; ++ case FSTYPE_CPM: ++ return "cpm"; ++ case FSTYPE_HFS: ++ return "hfs"; ++ case FSTYPE_DTFS: ++ return "dtfs"; ++ case FSTYPE_GRFS: ++ return "grfs"; ++ case FSTYPE_TERM: ++ return "term"; ++ case FSTYPE_DEV: ++ return "dev"; ++ case FSTYPE_PROC: ++ return "proc"; ++ case FSTYPE_IFSOCK: ++ return "ifsock"; ++ case FSTYPE_AFS: ++ return "afs"; ++ case FSTYPE_DFS: ++ return "dfs"; ++ case FSTYPE_PROC9: ++ return "proc9"; ++ case FSTYPE_SOCKET: ++ return "socket"; ++ case FSTYPE_MISC: ++ return "misc"; ++ case FSTYPE_EXT2FS: ++ return "ext2/ext3"; ++ case FSTYPE_HTTP: ++ return "http"; ++ case FSTYPE_MEMFS: ++ return "memfs"; ++ case FSTYPE_ISO9660: ++ return "iso9660"; ++# endif ++ default: ++ { ++ unsigned long int type = statfsbuf->f_type; ++ static char buf[sizeof "UNKNOWN (0x%lx)" - 3 ++ + (sizeof type * CHAR_BIT + 3) / 4]; ++ sprintf (buf, "UNKNOWN (0x%lx)", type); ++ return buf; ++ } ++ } ++#endif ++} ++ ++static char * ++human_access (struct stat const *statbuf) ++{ ++ static char modebuf[12]; ++ filemodestring (statbuf, modebuf); ++ modebuf[10] = 0; ++ return modebuf; ++} ++ ++static char * ++human_time (struct timespec t) ++{ ++ static char str[MAX (INT_BUFSIZE_BOUND (intmax_t), ++ (INT_STRLEN_BOUND (int) /* YYYY */ ++ + 1 /* because YYYY might equal INT_MAX + 1900 */ ++ + sizeof "-MM-DD HH:MM:SS.NNNNNNNNN +ZZZZ"))]; ++ struct tm const *tm = localtime (&t.tv_sec); ++ if (tm == NULL) ++ return timetostr (t.tv_sec, str); ++ nstrftime (str, sizeof str, "%Y-%m-%d %H:%M:%S.%N %z", tm, 0, t.tv_nsec); ++ return str; ++} ++ ++static void ++out_string (char *pformat, size_t prefix_len, char const *arg) ++{ ++ strcpy (pformat + prefix_len, "s"); ++ printf (pformat, arg); ++} ++static void ++out_int (char *pformat, size_t prefix_len, intmax_t arg) ++{ ++ strcpy (pformat + prefix_len, PRIdMAX); ++ printf (pformat, arg); ++} ++static void ++out_uint (char *pformat, size_t prefix_len, uintmax_t arg) ++{ ++ strcpy (pformat + prefix_len, PRIuMAX); ++ printf (pformat, arg); ++} ++static void ++out_uint_o (char *pformat, size_t prefix_len, uintmax_t arg) ++{ ++ strcpy (pformat + prefix_len, PRIoMAX); ++ printf (pformat, arg); ++} ++static void ++out_uint_x (char *pformat, size_t prefix_len, uintmax_t arg) ++{ ++ strcpy (pformat + prefix_len, PRIxMAX); ++ printf (pformat, arg); ++} ++ ++/* Very specialized function (modifies FORMAT), just so as to avoid ++ duplicating this code between both print_statfs and print_stat. */ ++static void ++out_file_context (char const *filename, char *pformat, size_t prefix_len) ++{ ++ char *scontext; ++ if ((follow_links ++ ? getfilecon (filename, &scontext) ++ : lgetfilecon (filename, &scontext)) < 0) ++ { ++ error (0, errno, _("failed to get security context of %s"), ++ quote (filename)); ++ scontext = NULL; ++ } ++ strcpy (pformat + prefix_len, "s"); ++ printf (pformat, (scontext ? scontext : "?")); ++ if (scontext) ++ freecon (scontext); ++} ++ ++/* print statfs info */ ++static void ++print_statfs (char *pformat, size_t prefix_len, char m, char const *filename, ++ void const *data) ++{ ++ STRUCT_STATVFS const *statfsbuf = data; ++ ++ switch (m) ++ { ++ case 'n': ++ out_string (pformat, prefix_len, filename); ++ break; ++ ++ case 'i': ++ { ++#if STRUCT_STATXFS_F_FSID_IS_INTEGER ++ uintmax_t fsid = statfsbuf->f_fsid; ++#else ++ typedef unsigned int fsid_word; ++ verify (alignof (STRUCT_STATVFS) % alignof (fsid_word) == 0); ++ verify (offsetof (STRUCT_STATVFS, f_fsid) % alignof (fsid_word) == 0); ++ verify (sizeof statfsbuf->f_fsid % alignof (fsid_word) == 0); ++ fsid_word const *p = (fsid_word *) &statfsbuf->f_fsid; ++ ++ /* Assume a little-endian word order, as that is compatible ++ with glibc's statvfs implementation. */ ++ uintmax_t fsid = 0; ++ int words = sizeof statfsbuf->f_fsid / sizeof *p; ++ int i; ++ for (i = 0; i < words && i * sizeof *p < sizeof fsid; i++) ++ { ++ uintmax_t u = p[words - 1 - i]; ++ fsid |= u << (i * CHAR_BIT * sizeof *p); ++ } ++#endif ++ out_uint_x (pformat, prefix_len, fsid); ++ } ++ break; ++ ++ case 'l': ++ OUT_NAMEMAX (pformat, prefix_len, SB_F_NAMEMAX (statfsbuf)); ++ break; ++ case 't': ++#if HAVE_STRUCT_STATXFS_F_TYPE ++ out_uint_x (pformat, prefix_len, statfsbuf->f_type); ++#else ++ fputc ('?', stdout); ++#endif ++ break; ++ case 'T': ++ out_string (pformat, prefix_len, human_fstype (statfsbuf)); ++ break; ++ case 'b': ++ out_int (pformat, prefix_len, statfsbuf->f_blocks); ++ break; ++ case 'f': ++ out_int (pformat, prefix_len, statfsbuf->f_bfree); ++ break; ++ case 'a': ++ out_int (pformat, prefix_len, statfsbuf->f_bavail); ++ break; ++ case 's': ++ out_uint (pformat, prefix_len, statfsbuf->f_bsize); ++ break; ++ case 'S': ++ { ++ uintmax_t frsize = STATFS_FRSIZE (statfsbuf); ++ if (! frsize) ++ frsize = statfsbuf->f_bsize; ++ out_uint (pformat, prefix_len, frsize); ++ } ++ break; ++ case 'c': ++ out_uint (pformat, prefix_len, statfsbuf->f_files); ++ break; ++ case 'd': ++ out_int (pformat, prefix_len, statfsbuf->f_ffree); ++ break; ++ case 'C': ++ out_file_context (filename, pformat, prefix_len); ++ break; ++ default: ++ fputc ('?', stdout); ++ break; ++ } ++} ++ ++/* print stat info */ ++static void ++print_stat (char *pformat, size_t prefix_len, char m, ++ char const *filename, void const *data) ++{ ++ struct stat *statbuf = (struct stat *) data; ++ struct passwd *pw_ent; ++ struct group *gw_ent; ++ ++ switch (m) ++ { ++ case 'n': ++ out_string (pformat, prefix_len, filename); ++ break; ++ case 'N': ++ out_string (pformat, prefix_len, quote (filename)); ++ if (S_ISLNK (statbuf->st_mode)) ++ { ++ char *linkname = areadlink_with_size (filename, statbuf->st_size); ++ if (linkname == NULL) ++ { ++ error (0, errno, _("cannot read symbolic link %s"), ++ quote (filename)); ++ return; ++ } ++ printf (" -> "); ++ out_string (pformat, prefix_len, quote (linkname)); ++ } ++ break; ++ case 'd': ++ out_uint (pformat, prefix_len, statbuf->st_dev); ++ break; ++ case 'D': ++ out_uint_x (pformat, prefix_len, statbuf->st_dev); ++ break; ++ case 'i': ++ out_uint (pformat, prefix_len, statbuf->st_ino); ++ break; ++ case 'a': ++ out_uint_o (pformat, prefix_len, statbuf->st_mode & CHMOD_MODE_BITS); ++ break; ++ case 'A': ++ out_string (pformat, prefix_len, human_access (statbuf)); ++ break; ++ case 'f': ++ out_uint_x (pformat, prefix_len, statbuf->st_mode); ++ break; ++ case 'F': ++ out_string (pformat, prefix_len, file_type (statbuf)); ++ break; ++ case 'h': ++ out_uint (pformat, prefix_len, statbuf->st_nlink); ++ break; ++ case 'u': ++ out_uint (pformat, prefix_len, statbuf->st_uid); ++ break; ++ case 'U': ++ setpwent (); ++ pw_ent = getpwuid (statbuf->st_uid); ++ out_string (pformat, prefix_len, ++ pw_ent ? pw_ent->pw_name : "UNKNOWN"); ++ break; ++ case 'g': ++ out_uint (pformat, prefix_len, statbuf->st_gid); ++ break; ++ case 'G': ++ setgrent (); ++ gw_ent = getgrgid (statbuf->st_gid); ++ out_string (pformat, prefix_len, ++ gw_ent ? gw_ent->gr_name : "UNKNOWN"); ++ break; ++ case 't': ++ out_uint_x (pformat, prefix_len, major (statbuf->st_rdev)); ++ break; ++ case 'T': ++ out_uint_x (pformat, prefix_len, minor (statbuf->st_rdev)); ++ break; ++ case 's': ++ out_uint (pformat, prefix_len, statbuf->st_size); ++ break; ++ case 'B': ++ out_uint (pformat, prefix_len, ST_NBLOCKSIZE); ++ break; ++ case 'b': ++ out_uint (pformat, prefix_len, ST_NBLOCKS (*statbuf)); ++ break; ++ case 'o': ++ out_uint (pformat, prefix_len, statbuf->st_blksize); ++ break; ++ case 'x': ++ out_string (pformat, prefix_len, human_time (get_stat_atime (statbuf))); ++ break; ++ case 'X': ++ if (TYPE_SIGNED (time_t)) ++ out_int (pformat, prefix_len, statbuf->st_atime); ++ else ++ out_uint (pformat, prefix_len, statbuf->st_atime); ++ break; ++ case 'y': ++ out_string (pformat, prefix_len, human_time (get_stat_mtime (statbuf))); ++ break; ++ case 'Y': ++ if (TYPE_SIGNED (time_t)) ++ out_int (pformat, prefix_len, statbuf->st_mtime); ++ else ++ out_uint (pformat, prefix_len, statbuf->st_mtime); ++ break; ++ case 'z': ++ out_string (pformat, prefix_len, human_time (get_stat_ctime (statbuf))); ++ break; ++ case 'Z': ++ if (TYPE_SIGNED (time_t)) ++ out_int (pformat, prefix_len, statbuf->st_ctime); ++ else ++ out_uint (pformat, prefix_len, statbuf->st_ctime); ++ break; ++ case 'C': ++ out_file_context (filename, pformat, prefix_len); ++ break; ++ default: ++ fputc ('?', stdout); ++ break; ++ } ++} ++ ++/* Output a single-character \ escape. */ ++ ++static void ++print_esc_char (char c) ++{ ++ switch (c) ++ { ++ case 'a': /* Alert. */ ++ c ='\a'; ++ break; ++ case 'b': /* Backspace. */ ++ c ='\b'; ++ break; ++ case 'f': /* Form feed. */ ++ c ='\f'; ++ break; ++ case 'n': /* New line. */ ++ c ='\n'; ++ break; ++ case 'r': /* Carriage return. */ ++ c ='\r'; ++ break; ++ case 't': /* Horizontal tab. */ ++ c ='\t'; ++ break; ++ case 'v': /* Vertical tab. */ ++ c ='\v'; ++ break; ++ case '"': ++ case '\\': ++ break; ++ default: ++ error (0, 0, _("warning: unrecognized escape `\\%c'"), c); ++ break; ++ } ++ putchar (c); ++} ++ ++static void ++print_it (char const *format, char const *filename, ++ void (*print_func) (char *, size_t, char, char const *, void const *), ++ void const *data) ++{ ++ /* Add 2 to accommodate our conversion of the stat `%s' format string ++ to the longer printf `%llu' one. */ ++ enum ++ { ++ MAX_ADDITIONAL_BYTES = ++ (MAX (sizeof PRIdMAX, ++ MAX (sizeof PRIoMAX, MAX (sizeof PRIuMAX, sizeof PRIxMAX))) ++ - 1) ++ }; ++ size_t n_alloc = strlen (format) + MAX_ADDITIONAL_BYTES + 1; ++ char *dest = xmalloc (n_alloc); ++ char const *b; ++ for (b = format; *b; b++) ++ { ++ switch (*b) ++ { ++ case '%': ++ { ++ size_t len = strspn (b + 1, "#-+.I 0123456789"); ++ char const *fmt_char = b + len + 1; ++ memcpy (dest, b, len + 1); ++ ++ b = fmt_char; ++ switch (*fmt_char) ++ { ++ case '\0': ++ --b; ++ /* fall through */ ++ case '%': ++ if (0 < len) ++ { ++ dest[len + 1] = *fmt_char; ++ dest[len + 2] = '\0'; ++ error (EXIT_FAILURE, 0, _("%s: invalid directive"), ++ quotearg_colon (dest)); ++ } ++ putchar ('%'); ++ break; ++ default: ++ print_func (dest, len + 1, *fmt_char, filename, data); ++ break; ++ } ++ break; ++ } ++ ++ case '\\': ++ if ( ! interpret_backslash_escapes) ++ { ++ putchar ('\\'); ++ break; ++ } ++ ++b; ++ if (isodigit (*b)) ++ { ++ int esc_value = octtobin (*b); ++ int esc_length = 1; /* number of octal digits */ ++ for (++b; esc_length < 3 && isodigit (*b); ++ ++esc_length, ++b) ++ { ++ esc_value = esc_value * 8 + octtobin (*b); ++ } ++ putchar (esc_value); ++ --b; ++ } ++ else if (*b == 'x' && isxdigit (to_uchar (b[1]))) ++ { ++ int esc_value = hextobin (b[1]); /* Value of \xhh escape. */ ++ /* A hexadecimal \xhh escape sequence must have ++ 1 or 2 hex. digits. */ ++ ++b; ++ if (isxdigit (to_uchar (b[1]))) ++ { ++ ++b; ++ esc_value = esc_value * 16 + hextobin (*b); ++ } ++ putchar (esc_value); ++ } ++ else if (*b == '\0') ++ { ++ error (0, 0, _("warning: backslash at end of format")); ++ putchar ('\\'); ++ /* Arrange to exit the loop. */ ++ --b; ++ } ++ else ++ { ++ print_esc_char (*b); ++ } ++ break; ++ ++ default: ++ putchar (*b); ++ break; ++ } ++ } ++ free (dest); ++ ++ fputs (trailing_delim, stdout); ++} ++ ++/* Stat the file system and print what we find. */ ++static bool ++do_statfs (char const *filename, bool terse, char const *format) ++{ ++ STRUCT_STATVFS statfsbuf; ++ ++ if (STREQ (filename, "-")) ++ { ++ error (0, 0, _("using %s to denote standard input does not work" ++ " in file system mode"), quote (filename)); ++ return false; ++ } ++ ++ if (STATFS (filename, &statfsbuf) != 0) ++ { ++ error (0, errno, _("cannot read file system information for %s"), ++ quote (filename)); ++ return false; ++ } ++ ++ if (format == NULL) ++ { ++ format = (terse ++ ? "%n %i %l %t %s %S %b %f %a %c %d\n" ++ : " File: \"%n\"\n" ++ " ID: %-8i Namelen: %-7l Type: %T\n" ++ "Block size: %-10s Fundamental block size: %S\n" ++ "Blocks: Total: %-10b Free: %-10f Available: %a\n" ++ "Inodes: Total: %-10c Free: %d\n"); ++ } ++ ++ print_it (format, filename, print_statfs, &statfsbuf); ++ return true; ++} ++ ++/* stat the file and print what we find */ ++static bool ++do_stat (char const *filename, bool terse, char const *format) ++{ ++ struct stat statbuf; ++ ++ if (STREQ (filename, "-")) ++ { ++ if (fstat (STDIN_FILENO, &statbuf) != 0) ++ { ++ error (0, errno, _("cannot stat standard input")); ++ return false; ++ } ++ } ++ else if ((follow_links ? stat : lstat) (filename, &statbuf) != 0) ++ { ++ error (0, errno, _("cannot stat %s"), quote (filename)); ++ return false; ++ } ++ ++ if (format == NULL) ++ { ++ if (terse) ++ { ++ format = "%n %s %b %f %u %g %D %i %h %t %T %X %Y %Z %o\n"; ++ } ++ else ++ { ++ /* Temporary hack to match original output until conditional ++ implemented. */ ++ if (S_ISBLK (statbuf.st_mode) || S_ISCHR (statbuf.st_mode)) ++ { ++ format = ++ " File: %N\n" ++ " Size: %-10s\tBlocks: %-10b IO Block: %-6o %F\n" ++ "Device: %Dh/%dd\tInode: %-10i Links: %-5h" ++ " Device type: %t,%T\n" ++ "Access: (%04a/%10.10A) Uid: (%5u/%8U) Gid: (%5g/%8G)\n" ++ "Access: %x\n" "Modify: %y\n" "Change: %z\n"; ++ } ++ else ++ { ++ format = ++ " File: %N\n" ++ " Size: %-10s\tBlocks: %-10b IO Block: %-6o %F\n" ++ "Device: %Dh/%dd\tInode: %-10i Links: %h\n" ++ "Access: (%04a/%10.10A) Uid: (%5u/%8U) Gid: (%5g/%8G)\n" ++ "Access: %x\n" "Modify: %y\n" "Change: %z\n"; ++ } ++ } ++ } ++ print_it (format, filename, print_stat, &statbuf); ++ return true; ++} ++ ++void ++usage (int status) ++{ ++ if (status != EXIT_SUCCESS) ++ fprintf (stderr, _("Try `%s --help' for more information.\n"), ++ program_name); ++ else ++ { ++ printf (_("Usage: %s [OPTION]... FILE...\n"), program_name); ++ fputs (_("\ ++Display file or file system status.\n\ ++\n\ ++ -L, --dereference follow links\n\ ++ -f, --file-system display file system status instead of file status\n\ ++"), stdout); ++ fputs (_("\ ++ -c --format=FORMAT use the specified FORMAT instead of the default;\n\ ++ output a newline after each use of FORMAT\n\ ++ --printf=FORMAT like --format, but interpret backslash escapes,\n\ ++ and do not output a mandatory trailing newline.\n\ ++ If you want a newline, include \\n in FORMAT.\n\ ++ -t, --terse print the information in terse form\n\ ++"), stdout); ++ fputs (HELP_OPTION_DESCRIPTION, stdout); ++ fputs (VERSION_OPTION_DESCRIPTION, stdout); ++ ++ fputs (_("\n\ ++The valid format sequences for files (without --file-system):\n\ ++\n\ ++ %a Access rights in octal\n\ ++ %A Access rights in human readable form\n\ ++ %b Number of blocks allocated (see %B)\n\ ++ %B The size in bytes of each block reported by %b\n\ ++ %C SELinux security context string\n\ ++"), stdout); ++ fputs (_("\ ++ %d Device number in decimal\n\ ++ %D Device number in hex\n\ ++ %f Raw mode in hex\n\ ++ %F File type\n\ ++ %g Group ID of owner\n\ ++ %G Group name of owner\n\ ++"), stdout); ++ fputs (_("\ ++ %h Number of hard links\n\ ++ %i Inode number\n\ ++ %n File name\n\ ++ %N Quoted file name with dereference if symbolic link\n\ ++ %o I/O block size\n\ ++ %s Total size, in bytes\n\ ++ %t Major device type in hex\n\ ++ %T Minor device type in hex\n\ ++"), stdout); ++ fputs (_("\ ++ %u User ID of owner\n\ ++ %U User name of owner\n\ ++ %x Time of last access\n\ ++ %X Time of last access as seconds since Epoch\n\ ++ %y Time of last modification\n\ ++ %Y Time of last modification as seconds since Epoch\n\ ++ %z Time of last change\n\ ++ %Z Time of last change as seconds since Epoch\n\ ++\n\ ++"), stdout); ++ ++ fputs (_("\ ++Valid format sequences for file systems:\n\ ++\n\ ++ %a Free blocks available to non-superuser\n\ ++ %b Total data blocks in file system\n\ ++ %c Total file nodes in file system\n\ ++ %d Free file nodes in file system\n\ ++ %f Free blocks in file system\n\ ++ %C SELinux security context string\n\ ++"), stdout); ++ fputs (_("\ ++ %i File System ID in hex\n\ ++ %l Maximum length of filenames\n\ ++ %n File name\n\ ++ %s Block size (for faster transfers)\n\ ++ %S Fundamental block size (for block counts)\n\ ++ %t Type in hex\n\ ++ %T Type in human readable form\n\ ++"), stdout); ++ printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); ++ emit_ancillary_info (); ++ } ++ exit (status); ++} ++ ++int ++main (int argc, char *argv[]) ++{ ++ int c; ++ int i; ++ bool fs = false; ++ bool terse = false; ++ char *format = NULL; ++ bool ok = true; ++ ++ initialize_main (&argc, &argv); ++ set_program_name (argv[0]); ++ setlocale (LC_ALL, ""); ++ bindtextdomain (PACKAGE, LOCALEDIR); ++ textdomain (PACKAGE); ++ ++ atexit (close_stdout); ++ ++ while ((c = getopt_long (argc, argv, "c:fLtZ", long_options, NULL)) != -1) ++ { ++ switch (c) ++ { ++ case PRINTF_OPTION: ++ format = optarg; ++ interpret_backslash_escapes = true; ++ trailing_delim = ""; ++ break; ++ ++ case 'c': ++ format = optarg; ++ interpret_backslash_escapes = false; ++ trailing_delim = "\n"; ++ break; ++ ++ case 'L': ++ follow_links = true; ++ break; ++ ++ case 'f': ++ fs = true; ++ break; ++ ++ case 't': ++ terse = true; ++ break; ++ ++ case 'Z': /* FIXME: remove in 2010 */ ++ /* Ignore, for compatibility with distributions ++ that implemented this before upstream. ++ But warn of impending removal. */ ++ error (0, 0, ++ _("the --context (-Z) option is obsolete and will be removed\n" ++ "in a future release")); ++ break; ++ ++ case_GETOPT_HELP_CHAR; ++ ++ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); ++ ++ default: ++ usage (EXIT_FAILURE); ++ } ++ } ++ ++ if (argc == optind) ++ { ++ error (0, 0, _("missing operand")); ++ usage (EXIT_FAILURE); ++ } ++ ++ for (i = optind; i < argc; i++) ++ ok &= (fs ++ ? do_statfs (argv[i], terse, format) ++ : do_stat (argv[i], terse, format)); ++ ++ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); ++} +diff -urNp coreutils-8.0-orig/tests/misc/selinux coreutils-8.0/tests/misc/selinux +--- coreutils-8.0-orig/tests/misc/selinux 2009-09-01 13:01:16.000000000 +0200 ++++ coreutils-8.0/tests/misc/selinux 2009-10-07 10:10:11.000000000 +0200 @@ -30,7 +30,7 @@ chcon $ctx f d p || # inspect that context with both ls -Z and stat. diff --git a/coreutils.spec b/coreutils.spec index 9af4d8a..c34eeb2 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -6,7 +6,7 @@ License: GPLv3+ Group: System Environment/Base Url: http://www.gnu.org/software/coreutils/ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -Source0: ftp://ftp.gnu.org/gnu/%{name}/%{name}-%{version}.tar.xz +Source0: ftp://ftp.gnu.org/gnu/%{name}/%{name}-%{version}.tar.gz Source101: coreutils-DIR_COLORS Source102: coreutils-DIR_COLORS.lightbgcolor Source103: coreutils-DIR_COLORS.256color