Ensure a newline after the final line in a file is output by pcre2grep
This commit is contained in:
		
							parent
							
								
									d7a3f4e85f
								
							
						
					
					
						commit
						ebe70d35d6
					
				| @ -0,0 +1,598 @@ | |||||||
|  | From b3f42a32920b20ae71988bc1d06a7148e0211925 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | ||||||
|  | Date: Sat, 25 Jan 2020 15:50:44 +0000 | ||||||
|  | Subject: [PATCH] Ensure a newline after the final line in a file is output by | ||||||
|  |  pcre2grep. | ||||||
|  | MIME-Version: 1.0 | ||||||
|  | Content-Type: text/plain; charset=UTF-8 | ||||||
|  | Content-Transfer-Encoding: 8bit | ||||||
|  | 
 | ||||||
|  | git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1211 6239d852-aaf2-0410-a92c-79f79f948069 | ||||||
|  | Petr Písař: Ported to 10.34. | ||||||
|  | ---
 | ||||||
|  |  RunGrepTest             |   4 +- | ||||||
|  |  doc/html/pcre2grep.html |  84 ++++++++++++++++++++------------- | ||||||
|  |  doc/pcre2grep.1         |  83 ++++++++++++++++++++------------- | ||||||
|  |  doc/pcre2grep.txt       | 100 ++++++++++++++++++++++++---------------- | ||||||
|  |  src/pcre2grep.c         |  66 ++++++++++++++++++++++++-- | ||||||
|  |  testdata/grepoutputN    |  16 ++++--- | ||||||
|  | 
 | ||||||
|  | diff --git a/RunGrepTest b/RunGrepTest
 | ||||||
|  | index 1113cd4..2ff4f7c 100755
 | ||||||
|  | --- a/RunGrepTest
 | ||||||
|  | +++ b/RunGrepTest
 | ||||||
|  | @@ -742,11 +742,11 @@ uname=`uname`
 | ||||||
|  |  case $uname in | ||||||
|  |    Linux) | ||||||
|  |      printf 'abc\0def' >testNinputgrep | ||||||
|  | -    $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
 | ||||||
|  | +    $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/g' >>testtrygrep
 | ||||||
|  |      echo "" >>testtrygrep | ||||||
|  |      ;; | ||||||
|  |    *) | ||||||
|  | -    echo '1:abcZERO2:def' >>testtrygrep
 | ||||||
|  | +    echo '1:abcZERO2:defZERO' >>testtrygrep
 | ||||||
|  |      ;; | ||||||
|  |  esac | ||||||
|  |   | ||||||
|  | diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
 | ||||||
|  | index f5b72f3..abbafa1 100644
 | ||||||
|  | --- a/doc/html/pcre2grep.html
 | ||||||
|  | +++ b/doc/html/pcre2grep.html
 | ||||||
|  | @@ -148,7 +148,7 @@ ignored.
 | ||||||
|  |  By default, a file that contains a binary zero byte within the first 1024 bytes | ||||||
|  |  is identified as a binary file, and is processed specially. (GNU grep | ||||||
|  |  identifies binary files in this manner.) However, if the newline type is | ||||||
|  | -specified as "nul", that is, the line terminator is a binary zero, the test for
 | ||||||
|  | +specified as NUL, that is, the line terminator is a binary zero, the test for
 | ||||||
|  |  a binary file is not applied. See the <b>--binary-files</b> option for a means | ||||||
|  |  of changing the way binary files are handled. | ||||||
|  |  </P> | ||||||
|  | @@ -601,25 +601,32 @@ does not work when input is read line by line (see \fP--line-buffered\fP.)
 | ||||||
|  |  </P> | ||||||
|  |  <P> | ||||||
|  |  <b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i> | ||||||
|  | -The PCRE2 library supports five different conventions for indicating
 | ||||||
|  | -the ends of lines. They are the single-character sequences CR (carriage return)
 | ||||||
|  | -and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
 | ||||||
|  | -which recognizes any of the preceding three types, and an "any" convention, in
 | ||||||
|  | -which any Unicode line ending sequence is assumed to end a line. The Unicode
 | ||||||
|  | -sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
 | ||||||
|  | -(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
 | ||||||
|  | -PS (paragraph separator, U+2029).
 | ||||||
|  | +Six different conventions for indicating the ends of lines in scanned files are
 | ||||||
|  | +supported. For example:
 | ||||||
|  | +<pre>
 | ||||||
|  | +  pcre2grep -N CRLF 'some pattern' <file>
 | ||||||
|  | +</pre>
 | ||||||
|  | +The newline type may be specified in upper, lower, or mixed case. If the
 | ||||||
|  | +newline type is NUL, lines are separated by binary zero characters. The other
 | ||||||
|  | +types are the single-character sequences CR (carriage return) and LF
 | ||||||
|  | +(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
 | ||||||
|  | +recognizes any of the preceding three types, and an "any" type, for which any
 | ||||||
|  | +Unicode line ending sequence is assumed to end a line. The Unicode sequences
 | ||||||
|  | +are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
 | ||||||
|  | +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
 | ||||||
|  | +(paragraph separator, U+2029).
 | ||||||
|  |  <br> | ||||||
|  |  <br> | ||||||
|  |  When the PCRE2 library is built, a default line-ending sequence is specified. | ||||||
|  |  This is normally the standard sequence for the operating system. Unless | ||||||
|  |  otherwise specified by this option, <b>pcre2grep</b> uses the library's default. | ||||||
|  | -The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
 | ||||||
|  | -makes it possible to use <b>pcre2grep</b> to scan files that have come from
 | ||||||
|  | -other environments without having to modify their line endings. If the data
 | ||||||
|  | -that is being scanned does not agree with the convention set by this option,
 | ||||||
|  | -<b>pcre2grep</b> may behave in strange ways. Note that this option does not
 | ||||||
|  | -apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
 | ||||||
|  | +<br>
 | ||||||
|  | +<br>
 | ||||||
|  | +This option makes it possible to use <b>pcre2grep</b> to scan files that have
 | ||||||
|  | +come from other environments without having to modify their line endings. If
 | ||||||
|  | +the data that is being scanned does not agree with the convention set by this
 | ||||||
|  | +option, <b>pcre2grep</b> may behave in strange ways. Note that this option does
 | ||||||
|  | +not apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
 | ||||||
|  |  <b>--include-from</b> options, which are expected to use the operating system's | ||||||
|  |  standard newline sequence. | ||||||
|  |  </P> | ||||||
|  | @@ -640,12 +647,14 @@ use of JIT at run time. It is provided for testing and working round problems.
 | ||||||
|  |  It should never be needed in normal use. | ||||||
|  |  </P> | ||||||
|  |  <P> | ||||||
|  | -<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
 | ||||||
|  | +<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i> 
 | ||||||
|  |  When there is a match, instead of outputting the whole line that matched, | ||||||
|  | -output just the given text. This option is mutually exclusive with
 | ||||||
|  | -<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
 | ||||||
|  | -sequences starting with a dollar character may be used to insert the contents
 | ||||||
|  | -of the matched part of the line and/or captured substrings into the text.
 | ||||||
|  | +output just the given text, followed by an operating-system standard newline.
 | ||||||
|  | +The <b>--newline</b> option has no effect on this option, which is mutually
 | ||||||
|  | +exclusive with <b>--only-matching</b>, <b>--file-offsets</b>, and
 | ||||||
|  | +<b>--line-offsets</b>. Escape sequences starting with a dollar character may be
 | ||||||
|  | +used to insert the contents of the matched part of the line and/or captured
 | ||||||
|  | +substrings into the text.
 | ||||||
|  |  <br> | ||||||
|  |  <br> | ||||||
|  |  $<digits> or ${<digits>} is replaced by the captured | ||||||
|  | @@ -807,16 +816,27 @@ by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
 | ||||||
|  |  <br><a name="SEC8" href="#TOC1">NEWLINES</a><br> | ||||||
|  |  <P> | ||||||
|  |  The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with | ||||||
|  | -different newline conventions from the default. Any parts of the input files
 | ||||||
|  | -that are written to the standard output are copied identically, with whatever
 | ||||||
|  | -newline sequences they have in the input. However, the setting of this option
 | ||||||
|  | -affects only the way scanned files are processed. It does not affect the
 | ||||||
|  | -interpretation of files specified by the <b>-f</b>, <b>--file-list</b>,
 | ||||||
|  | -<b>--exclude-from</b>, or <b>--include-from</b> options, nor does it affect the
 | ||||||
|  | -way in which <b>pcre2grep</b> writes informational messages to the standard
 | ||||||
|  | -error and output streams. For these it uses the string "\n" to indicate
 | ||||||
|  | -newlines, relying on the C I/O library to convert this to an appropriate
 | ||||||
|  | -sequence.
 | ||||||
|  | +newline conventions that differ from the default. This option affects only the
 | ||||||
|  | +way scanned files are processed. It does not affect the interpretation of files
 | ||||||
|  | +specified by the <b>-f</b>, <b>--file-list</b>, <b>--exclude-from</b>, or
 | ||||||
|  | +<b>--include-from</b> options.
 | ||||||
|  | +</P>
 | ||||||
|  | +<P>
 | ||||||
|  | +Any parts of the scanned input files that are written to the standard output
 | ||||||
|  | +are copied with whatever newline sequences they have in the input. However, if
 | ||||||
|  | +the final line of a file is output, and it does not end with a newline
 | ||||||
|  | +sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
 | ||||||
|  | +or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
 | ||||||
|  | +single NL is used.
 | ||||||
|  | +</P>
 | ||||||
|  | +<P>
 | ||||||
|  | +The newline setting does not affect the way in which <b>pcre2grep</b> writes
 | ||||||
|  | +newlines in informational messages to the standard output and error streams.
 | ||||||
|  | +Under Windows, the standard output is set to be binary, so that "\r\n" at the
 | ||||||
|  | +ends of output lines that are copied from the input is not converted to
 | ||||||
|  | +"\r\r\n" by the C I/O library. This means that any messages written to the
 | ||||||
|  | +standard output must end with "\r\n". For all other operating systems, and
 | ||||||
|  | +for all messages to the standard error stream, "\n" is used.
 | ||||||
|  |  </P> | ||||||
|  |  <br><a name="SEC9" href="#TOC1">OPTIONS COMPATIBILITY</a><br> | ||||||
|  |  <P> | ||||||
|  | @@ -992,9 +1012,9 @@ Cambridge, England.
 | ||||||
|  |  </P> | ||||||
|  |  <br><a name="SEC16" href="#TOC1">REVISION</a><br> | ||||||
|  |  <P> | ||||||
|  | -Last updated: 15 June 2019
 | ||||||
|  | +Last updated: 25 January 2020
 | ||||||
|  |  <br> | ||||||
|  | -Copyright © 1997-2019 University of Cambridge.
 | ||||||
|  | +Copyright © 1997-2020 University of Cambridge.
 | ||||||
|  |  <br> | ||||||
|  |  <p> | ||||||
|  |  Return to the <a href="index.html">PCRE2 index page</a>. | ||||||
|  | diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
 | ||||||
|  | index 22992b1..82f0435 100644
 | ||||||
|  | --- a/doc/pcre2grep.1
 | ||||||
|  | +++ b/doc/pcre2grep.1
 | ||||||
|  | @@ -1,4 +1,4 @@
 | ||||||
|  | -.TH PCRE2GREP 1 "15 June 2019" "PCRE2 10.34"
 | ||||||
|  | +.TH PCRE2GREP 1 "25 January 2020" "PCRE2 10.35"
 | ||||||
|  |  .SH NAME | ||||||
|  |  pcre2grep - a grep with Perl-compatible regular expressions. | ||||||
|  |  .SH SYNOPSIS | ||||||
|  | @@ -117,7 +117,7 @@ ignored.
 | ||||||
|  |  By default, a file that contains a binary zero byte within the first 1024 bytes | ||||||
|  |  is identified as a binary file, and is processed specially. (GNU grep | ||||||
|  |  identifies binary files in this manner.) However, if the newline type is | ||||||
|  | -specified as "nul", that is, the line terminator is a binary zero, the test for
 | ||||||
|  | +specified as NUL, that is, the line terminator is a binary zero, the test for
 | ||||||
|  |  a binary file is not applied. See the \fB--binary-files\fP option for a means | ||||||
|  |  of changing the way binary files are handled. | ||||||
|  |  . | ||||||
|  | @@ -523,24 +523,30 @@ large processing buffer, this should not be a problem, but the \fB-M\fP option
 | ||||||
|  |  does not work when input is read line by line (see \fP--line-buffered\fP.) | ||||||
|  |  .TP | ||||||
|  |  \fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP | ||||||
|  | -The PCRE2 library supports five different conventions for indicating
 | ||||||
|  | -the ends of lines. They are the single-character sequences CR (carriage return)
 | ||||||
|  | -and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
 | ||||||
|  | -which recognizes any of the preceding three types, and an "any" convention, in
 | ||||||
|  | -which any Unicode line ending sequence is assumed to end a line. The Unicode
 | ||||||
|  | -sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
 | ||||||
|  | -(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
 | ||||||
|  | -PS (paragraph separator, U+2029).
 | ||||||
|  | +Six different conventions for indicating the ends of lines in scanned files are
 | ||||||
|  | +supported. For example:
 | ||||||
|  | +.sp
 | ||||||
|  | +  pcre2grep -N CRLF 'some pattern' <file>
 | ||||||
|  | +.sp
 | ||||||
|  | +The newline type may be specified in upper, lower, or mixed case. If the
 | ||||||
|  | +newline type is NUL, lines are separated by binary zero characters. The other
 | ||||||
|  | +types are the single-character sequences CR (carriage return) and LF
 | ||||||
|  | +(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
 | ||||||
|  | +recognizes any of the preceding three types, and an "any" type, for which any
 | ||||||
|  | +Unicode line ending sequence is assumed to end a line. The Unicode sequences
 | ||||||
|  | +are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
 | ||||||
|  | +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
 | ||||||
|  | +(paragraph separator, U+2029).
 | ||||||
|  |  .sp | ||||||
|  |  When the PCRE2 library is built, a default line-ending sequence is specified. | ||||||
|  |  This is normally the standard sequence for the operating system. Unless | ||||||
|  |  otherwise specified by this option, \fBpcre2grep\fP uses the library's default. | ||||||
|  | -The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
 | ||||||
|  | -makes it possible to use \fBpcre2grep\fP to scan files that have come from
 | ||||||
|  | -other environments without having to modify their line endings. If the data
 | ||||||
|  | -that is being scanned does not agree with the convention set by this option,
 | ||||||
|  | -\fBpcre2grep\fP may behave in strange ways. Note that this option does not
 | ||||||
|  | -apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
 | ||||||
|  | +.sp
 | ||||||
|  | +This option makes it possible to use \fBpcre2grep\fP to scan files that have
 | ||||||
|  | +come from other environments without having to modify their line endings. If
 | ||||||
|  | +the data that is being scanned does not agree with the convention set by this
 | ||||||
|  | +option, \fBpcre2grep\fP may behave in strange ways. Note that this option does
 | ||||||
|  | +not apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
 | ||||||
|  |  \fB--include-from\fP options, which are expected to use the operating system's | ||||||
|  |  standard newline sequence. | ||||||
|  |  .TP | ||||||
|  | @@ -558,12 +564,14 @@ was explicitly disabled at build time. This option can be used to disable the
 | ||||||
|  |  use of JIT at run time. It is provided for testing and working round problems. | ||||||
|  |  It should never be needed in normal use. | ||||||
|  |  .TP | ||||||
|  | -\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
 | ||||||
|  | +\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP 
 | ||||||
|  |  When there is a match, instead of outputting the whole line that matched, | ||||||
|  | -output just the given text. This option is mutually exclusive with
 | ||||||
|  | -\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape
 | ||||||
|  | -sequences starting with a dollar character may be used to insert the contents
 | ||||||
|  | -of the matched part of the line and/or captured substrings into the text.
 | ||||||
|  | +output just the given text, followed by an operating-system standard newline.
 | ||||||
|  | +The \fB--newline\fP option has no effect on this option, which is mutually
 | ||||||
|  | +exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and
 | ||||||
|  | +\fB--line-offsets\fP. Escape sequences starting with a dollar character may be
 | ||||||
|  | +used to insert the contents of the matched part of the line and/or captured
 | ||||||
|  | +substrings into the text.
 | ||||||
|  |  .sp | ||||||
|  |  $<digits> or ${<digits>} is replaced by the captured | ||||||
|  |  substring of the given decimal number; zero substitutes the whole match. If | ||||||
|  | @@ -709,16 +717,25 @@ by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default
 | ||||||
|  |  .rs | ||||||
|  |  .sp | ||||||
|  |  The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with | ||||||
|  | -different newline conventions from the default. Any parts of the input files
 | ||||||
|  | -that are written to the standard output are copied identically, with whatever
 | ||||||
|  | -newline sequences they have in the input. However, the setting of this option
 | ||||||
|  | -affects only the way scanned files are processed. It does not affect the
 | ||||||
|  | -interpretation of files specified by the \fB-f\fP, \fB--file-list\fP,
 | ||||||
|  | -\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the
 | ||||||
|  | -way in which \fBpcre2grep\fP writes informational messages to the standard
 | ||||||
|  | -error and output streams. For these it uses the string "\en" to indicate
 | ||||||
|  | -newlines, relying on the C I/O library to convert this to an appropriate
 | ||||||
|  | -sequence.
 | ||||||
|  | +newline conventions that differ from the default. This option affects only the
 | ||||||
|  | +way scanned files are processed. It does not affect the interpretation of files
 | ||||||
|  | +specified by the \fB-f\fP, \fB--file-list\fP, \fB--exclude-from\fP, or
 | ||||||
|  | +\fB--include-from\fP options.
 | ||||||
|  | +.P
 | ||||||
|  | +Any parts of the scanned input files that are written to the standard output
 | ||||||
|  | +are copied with whatever newline sequences they have in the input. However, if
 | ||||||
|  | +the final line of a file is output, and it does not end with a newline
 | ||||||
|  | +sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
 | ||||||
|  | +or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
 | ||||||
|  | +single NL is used.
 | ||||||
|  | +.P
 | ||||||
|  | +The newline setting does not affect the way in which \fBpcre2grep\fP writes
 | ||||||
|  | +newlines in informational messages to the standard output and error streams.
 | ||||||
|  | +Under Windows, the standard output is set to be binary, so that "\er\en" at the
 | ||||||
|  | +ends of output lines that are copied from the input is not converted to
 | ||||||
|  | +"\er\er\en" by the C I/O library. This means that any messages written to the
 | ||||||
|  | +standard output must end with "\er\en". For all other operating systems, and
 | ||||||
|  | +for all messages to the standard error stream, "\en" is used.
 | ||||||
|  |  . | ||||||
|  |  . | ||||||
|  |  .SH "OPTIONS COMPATIBILITY" | ||||||
|  | @@ -904,6 +921,6 @@ Cambridge, England.
 | ||||||
|  |  .rs | ||||||
|  |  .sp | ||||||
|  |  .nf | ||||||
|  | -Last updated: 15 June 2019
 | ||||||
|  | -Copyright (c) 1997-2019 University of Cambridge.
 | ||||||
|  | +Last updated: 25 January 2020
 | ||||||
|  | +Copyright (c) 1997-2020 University of Cambridge.
 | ||||||
|  |  .fi | ||||||
|  | diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
 | ||||||
|  | index b11092a..4d41f54 100644
 | ||||||
|  | --- a/doc/pcre2grep.txt
 | ||||||
|  | +++ b/doc/pcre2grep.txt
 | ||||||
|  | @@ -116,9 +116,9 @@ BINARY FILES
 | ||||||
|  |         By  default,  a  file that contains a binary zero byte within the first | ||||||
|  |         1024 bytes is identified as a binary file, and is processed  specially. | ||||||
|  |         (GNU grep identifies binary files in this manner.) However, if the new- | ||||||
|  | -       line type is specified as "nul", that is, the line terminator is a  bi-
 | ||||||
|  | -       nary zero, the test for a binary file is not applied. See the --binary-
 | ||||||
|  | -       files option for a means of changing the way binary files are handled.
 | ||||||
|  | +       line type is specified as NUL, that is, the line terminator is a binary
 | ||||||
|  | +       zero, the test for a binary file is not applied. See the --binary-files
 | ||||||
|  | +       option for a means of changing the way binary files are handled.
 | ||||||
|  |   | ||||||
|  |   | ||||||
|  |  BINARY ZEROS IN PATTERNS | ||||||
|  | @@ -578,30 +578,36 @@ OPTIONS
 | ||||||
|  |                   when input is read line by line (see --line-buffered.) | ||||||
|  |   | ||||||
|  |         -N newline-type, --newline=newline-type | ||||||
|  | -                 The PCRE2 library supports five different conventions for in-
 | ||||||
|  | -                 dicating the ends of lines. They are the single-character se-
 | ||||||
|  | -                 quences CR (carriage return) and LF (linefeed), the two-char-
 | ||||||
|  | -                 acter sequence CRLF, an "anycrlf"  convention,  which  recog-
 | ||||||
|  | -                 nizes  any of the preceding three types, and an "any" conven-
 | ||||||
|  | -                 tion, in which any Unicode line ending sequence is assumed to
 | ||||||
|  | -                 end  a  line.  The  Unicode sequences are the three just men-
 | ||||||
|  | -                 tioned, plus  VT  (vertical  tab,  U+000B),  FF  (form  feed,
 | ||||||
|  | -                 U+000C),   NEL  (next  line,  U+0085),  LS  (line  separator,
 | ||||||
|  | -                 U+2028), and PS (paragraph separator, U+2029).
 | ||||||
|  | +                 Six different conventions for indicating the ends of lines in
 | ||||||
|  | +                 scanned files are supported. For example:
 | ||||||
|  | +
 | ||||||
|  | +                   pcre2grep -N CRLF 'some pattern' <file>
 | ||||||
|  | +
 | ||||||
|  | +                 The newline type may be specified in upper, lower,  or  mixed
 | ||||||
|  | +                 case.  If the newline type is NUL, lines are separated by bi-
 | ||||||
|  | +                 nary zero characters. The other types are the  single-charac-
 | ||||||
|  | +                 ter  sequences  CR  (carriage  return) and LF (linefeed), the
 | ||||||
|  | +                 two-character sequence CRLF, an "anycrlf" type, which  recog-
 | ||||||
|  | +                 nizes  any  of  the preceding three types, and an "any" type,
 | ||||||
|  | +                 for which any Unicode line ending sequence is assumed to  end
 | ||||||
|  | +                 a  line.  The Unicode sequences are the three just mentioned,
 | ||||||
|  | +                 plus VT (vertical tab, U+000B), FF (form feed,  U+000C),  NEL
 | ||||||
|  | +                 (next  line,  U+0085),  LS  (line  separator, U+2028), and PS
 | ||||||
|  | +                 (paragraph separator, U+2029).
 | ||||||
|  |   | ||||||
|  |                   When the PCRE2 library is built, a  default  line-ending  se- | ||||||
|  |                   quence  is specified.  This is normally the standard sequence | ||||||
|  |                   for the operating system. Unless otherwise specified by  this | ||||||
|  | -                 option,  pcre2grep  uses the library's default.  The possible
 | ||||||
|  | -                 values for this option are CR, LF,  CRLF,  ANYCRLF,  or  ANY.
 | ||||||
|  | -                 This  makes  it  possible to use pcre2grep to scan files that
 | ||||||
|  | -                 have come from other environments without  having  to  modify
 | ||||||
|  | -                 their  line  endings.  If the data that is being scanned does
 | ||||||
|  | -                 not agree with the convention set by this  option,  pcre2grep
 | ||||||
|  | -                 may  behave  in  strange ways. Note that this option does not
 | ||||||
|  | -                 apply to files specified by the -f, --exclude-from, or  --in-
 | ||||||
|  | -                 clude-from  options,  which are expected to use the operating
 | ||||||
|  | -                 system's standard newline sequence.
 | ||||||
|  | +                 option, pcre2grep uses the library's default.
 | ||||||
|  | +
 | ||||||
|  | +                 This  option makes it possible to use pcre2grep to scan files
 | ||||||
|  | +                 that have come from other environments without having to mod-
 | ||||||
|  | +                 ify  their  line  endings.  If the data that is being scanned
 | ||||||
|  | +                 does not agree  with  the  convention  set  by  this  option,
 | ||||||
|  | +                 pcre2grep  may  behave in strange ways. Note that this option
 | ||||||
|  | +                 does not apply to files specified by the -f,  --exclude-from,
 | ||||||
|  | +                 or  --include-from options, which are expected to use the op-
 | ||||||
|  | +                 erating system's standard newline sequence.
 | ||||||
|  |   | ||||||
|  |         -n, --line-number | ||||||
|  |                   Precede each output line by its line number in the file, fol- | ||||||
|  | @@ -620,11 +626,13 @@ OPTIONS
 | ||||||
|  |   | ||||||
|  |         -O text, --output=text | ||||||
|  |                   When there is a match, instead of outputting the  whole  line | ||||||
|  | -                 that  matched, output just the given text. This option is mu-
 | ||||||
|  | -                 tually exclusive with  --only-matching,  --file-offsets,  and
 | ||||||
|  | -                 --line-offsets. Escape sequences starting with a dollar char-
 | ||||||
|  | -                 acter may be used to insert the contents of the matched  part
 | ||||||
|  | -                 of the line and/or captured substrings into the text.
 | ||||||
|  | +                 that  matched, output just the given text, followed by an op-
 | ||||||
|  | +                 erating-system standard newline.  The --newline option has no
 | ||||||
|  | +                 effect  on  this  option,  which  is  mutually exclusive with
 | ||||||
|  | +                 --only-matching, --file-offsets, and  --line-offsets.  Escape
 | ||||||
|  | +                 sequences starting with a dollar character may be used to in-
 | ||||||
|  | +                 sert the contents of the matched part of the line and/or cap-
 | ||||||
|  | +                 tured substrings into the text.
 | ||||||
|  |   | ||||||
|  |                   $<digits>  or  ${<digits>}  is  replaced by the captured sub- | ||||||
|  |                   string of the given  decimal  number;  zero  substitutes  the | ||||||
|  | @@ -780,17 +788,27 @@ ENVIRONMENT VARIABLES
 | ||||||
|  |   | ||||||
|  |  NEWLINES | ||||||
|  |   | ||||||
|  | -       The -N (--newline) option allows pcre2grep to scan files with different
 | ||||||
|  | -       newline conventions from the default. Any parts of the input files that
 | ||||||
|  | -       are  written  to the standard output are copied identically, with what-
 | ||||||
|  | -       ever newline sequences they have in the input. However, the setting  of
 | ||||||
|  | -       this  option  affects only the way scanned files are processed. It does
 | ||||||
|  | -       not affect the interpretation of files specified  by  the  -f,  --file-
 | ||||||
|  | -       list, --exclude-from, or --include-from options, nor does it affect the
 | ||||||
|  | -       way in which pcre2grep writes informational messages  to  the  standard
 | ||||||
|  | -       error and output streams. For these it uses the string "\n" to indicate
 | ||||||
|  | -       newlines, relying on the C I/O library to convert this to an  appropri-
 | ||||||
|  | -       ate sequence.
 | ||||||
|  | +       The  -N  (--newline) option allows pcre2grep to scan files with newline
 | ||||||
|  | +       conventions that differ from the default. This option affects only  the
 | ||||||
|  | +       way  scanned files are processed. It does not affect the interpretation
 | ||||||
|  | +       of files specified by the -f,  --file-list,  --exclude-from,  or  --in-
 | ||||||
|  | +       clude-from options.
 | ||||||
|  | +
 | ||||||
|  | +       Any  parts  of the scanned input files that are written to the standard
 | ||||||
|  | +       output are copied with whatever newline sequences they have in the  in-
 | ||||||
|  | +       put.  However,  if  the final line of a file is output, and it does not
 | ||||||
|  | +       end with a newline sequence, a newline sequence is added. If  the  new-
 | ||||||
|  | +       line  setting  is  CR, LF, CRLF or NUL, that line ending is output; for
 | ||||||
|  | +       the other settings (ANYCRLF or ANY) a single NL is used.
 | ||||||
|  | +
 | ||||||
|  | +       The newline setting does not affect the way in which  pcre2grep  writes
 | ||||||
|  | +       newlines  in  informational  messages  to the standard output and error
 | ||||||
|  | +       streams.  Under Windows, the standard output is set to  be  binary,  so
 | ||||||
|  | +       that  "\r\n" at the ends of output lines that are copied from the input
 | ||||||
|  | +       is not converted to "\r\r\n" by the C I/O library. This means that  any
 | ||||||
|  | +       messages  written  to the standard output must end with "\r\n". For all
 | ||||||
|  | +       other operating systems, and for all messages  to  the  standard  error
 | ||||||
|  | +       stream, "\n" is used.
 | ||||||
|  |   | ||||||
|  |   | ||||||
|  |  OPTIONS COMPATIBILITY | ||||||
|  | @@ -963,5 +981,5 @@ AUTHOR
 | ||||||
|  |   | ||||||
|  |  REVISION | ||||||
|  |   | ||||||
|  | -       Last updated: 15 June 2019
 | ||||||
|  | -       Copyright (c) 1997-2019 University of Cambridge.
 | ||||||
|  | +       Last updated: 25 January 2020
 | ||||||
|  | +       Copyright (c) 1997-2020 University of Cambridge.
 | ||||||
|  | diff --git a/src/pcre2grep.c b/src/pcre2grep.c
 | ||||||
|  | index 12fe95e..10314a5 100644
 | ||||||
|  | --- a/src/pcre2grep.c
 | ||||||
|  | +++ b/src/pcre2grep.c
 | ||||||
|  | @@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS.
 | ||||||
|  |  The header can be found in the special z/OS distribution, which is available | ||||||
|  |  from www.zaconsultants.net or from www.cbttape.org. | ||||||
|  |   | ||||||
|  | -           Copyright (c) 1997-2019 University of Cambridge
 | ||||||
|  | +           Copyright (c) 1997-2020 University of Cambridge
 | ||||||
|  |   | ||||||
|  |  ----------------------------------------------------------------------------- | ||||||
|  |  Redistribution and use in source and binary forms, with or without | ||||||
|  | @@ -1665,6 +1665,44 @@ switch(endlinetype)
 | ||||||
|  |   | ||||||
|  |   | ||||||
|  |   | ||||||
|  | +/*************************************************
 | ||||||
|  | +*              Output newline at end             *
 | ||||||
|  | +*************************************************/
 | ||||||
|  | +
 | ||||||
|  | +/* This function is called if the final line of a file has been written to
 | ||||||
|  | +stdout, but it does not have a terminating newline.
 | ||||||
|  | +
 | ||||||
|  | +Arguments:  none
 | ||||||
|  | +Returns:    nothing
 | ||||||
|  | +*/
 | ||||||
|  | +
 | ||||||
|  | +static void
 | ||||||
|  | +write_final_newline(void)
 | ||||||
|  | +{
 | ||||||
|  | +switch(endlinetype)
 | ||||||
|  | +  {
 | ||||||
|  | +  default:      /* Just in case */
 | ||||||
|  | +  case PCRE2_NEWLINE_LF:
 | ||||||
|  | +  case PCRE2_NEWLINE_ANY:
 | ||||||
|  | +  case PCRE2_NEWLINE_ANYCRLF:
 | ||||||
|  | +  fprintf(stdout, "\n");
 | ||||||
|  | +  break;
 | ||||||
|  | +
 | ||||||
|  | +  case PCRE2_NEWLINE_CR:
 | ||||||
|  | +  fprintf(stdout, "\r");
 | ||||||
|  | +  break;
 | ||||||
|  | +
 | ||||||
|  | +  case PCRE2_NEWLINE_CRLF:
 | ||||||
|  | +  fprintf(stdout, "\r\n");
 | ||||||
|  | +  break;
 | ||||||
|  | +
 | ||||||
|  | +  case PCRE2_NEWLINE_NUL:
 | ||||||
|  | +  fprintf(stdout, "%c", 0);
 | ||||||
|  | +  break;
 | ||||||
|  | +  }
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +
 | ||||||
|  |  /************************************************* | ||||||
|  |  *       Print the previous "after" lines         * | ||||||
|  |  *************************************************/ | ||||||
|  | @@ -1689,9 +1727,9 @@ do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
 | ||||||
|  |  if (after_context > 0 && lastmatchnumber > 0) | ||||||
|  |    { | ||||||
|  |    int count = 0; | ||||||
|  | +  int ellength = 0;
 | ||||||
|  |    while (lastmatchrestart < endptr && count < after_context) | ||||||
|  |      { | ||||||
|  | -    int ellength;
 | ||||||
|  |      char *pp = end_of_line(lastmatchrestart, endptr, &ellength); | ||||||
|  |      if (ellength == 0 && pp == main_buffer + bufsize) break; | ||||||
|  |      if (printname != NULL) fprintf(stdout, "%s-", printname); | ||||||
|  | @@ -1700,7 +1738,17 @@ if (after_context > 0 && lastmatchnumber > 0)
 | ||||||
|  |      lastmatchrestart = pp; | ||||||
|  |      count++; | ||||||
|  |      } | ||||||
|  | -  if (count > 0) hyphenpending = TRUE;
 | ||||||
|  | +
 | ||||||
|  | +  /* If we have printed any lines, arrange for a hyphen separator if anything
 | ||||||
|  | +  else follows. Also, if the last line is the final line in the file and it had
 | ||||||
|  | +  no newline, add one. */
 | ||||||
|  | +
 | ||||||
|  | +  if (count > 0)
 | ||||||
|  | +    {
 | ||||||
|  | +    hyphenpending = TRUE;
 | ||||||
|  | +    if (ellength == 0 && lastmatchrestart >= endptr)
 | ||||||
|  | +      write_final_newline();
 | ||||||
|  | +    }
 | ||||||
|  |    } | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | @@ -2437,6 +2485,7 @@ char *endptr;
 | ||||||
|  |  PCRE2_SIZE bufflength; | ||||||
|  |  BOOL binary = FALSE; | ||||||
|  |  BOOL endhyphenpending = FALSE; | ||||||
|  | +BOOL lines_printed = FALSE;
 | ||||||
|  |  BOOL input_line_buffered = line_buffered; | ||||||
|  |  FILE *in = NULL;                    /* Ensure initialized */ | ||||||
|  |   | ||||||
|  | @@ -2777,6 +2826,8 @@ while (ptr < endptr)
 | ||||||
|  |   | ||||||
|  |      else | ||||||
|  |        { | ||||||
|  | +      lines_printed = TRUE;
 | ||||||
|  | +
 | ||||||
|  |        /* See if there is a requirement to print some "after" lines from a | ||||||
|  |        previous match. We never print any overlaps. */ | ||||||
|  |   | ||||||
|  | @@ -2825,7 +2876,8 @@ while (ptr < endptr)
 | ||||||
|  |          int linecount = 0; | ||||||
|  |          char *p = ptr; | ||||||
|  |   | ||||||
|  | -        while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
 | ||||||
|  | +        while (p > main_buffer &&
 | ||||||
|  | +               (lastmatchnumber == 0 || p > lastmatchrestart) &&
 | ||||||
|  |                 linecount < before_context) | ||||||
|  |            { | ||||||
|  |            linecount++; | ||||||
|  | @@ -2981,6 +3033,12 @@ while (ptr < endptr)
 | ||||||
|  |   | ||||||
|  |      lastmatchrestart = ptr + linelength + endlinelength; | ||||||
|  |      lastmatchnumber = linenumber + 1; | ||||||
|  | +
 | ||||||
|  | +    /* If a line was printed and we are now at the end of the file and the last
 | ||||||
|  | +    line had no newline, output one. */
 | ||||||
|  | +
 | ||||||
|  | +    if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
 | ||||||
|  | +      write_final_newline();
 | ||||||
|  |      } | ||||||
|  |   | ||||||
|  |    /* For a match in multiline inverted mode (which of course did not cause | ||||||
|  | diff --git a/testdata/grepoutputN b/testdata/grepoutputN
 | ||||||
|  | index ba97e90..caaeb75 100644
 | ||||||
|  | --- a/testdata/grepoutputN
 | ||||||
|  | +++ b/testdata/grepoutputN
 | ||||||
|  | @@ -2,16 +2,20 @@
 | ||||||
|  |  1:abc
2:def
---------------------------- Test N2 ------------------------------
 | ||||||
|  |  1:abc
def | ||||||
|  |  2:ghi | ||||||
|  | -jkl---------------------------- Test N3 ------------------------------
 | ||||||
|  | +jkl
 | ||||||
|  | +---------------------------- Test N3 ------------------------------
 | ||||||
|  |  2:def
3: | ||||||
|  |  ghi | ||||||
|  | -jkl---------------------------- Test N4 ------------------------------
 | ||||||
|  | +jkl
---------------------------- Test N4 ------------------------------
 | ||||||
|  |  2:ghi | ||||||
|  | -jkl---------------------------- Test N5 ------------------------------
 | ||||||
|  | +jkl
 | ||||||
|  | +---------------------------- Test N5 ------------------------------
 | ||||||
|  |  1:abc
2:def | ||||||
|  |  3:ghi | ||||||
|  | -4:jkl---------------------------- Test N6 ------------------------------
 | ||||||
|  | +4:jkl
 | ||||||
|  | +---------------------------- Test N6 ------------------------------
 | ||||||
|  |  1:abc
2:def | ||||||
|  |  3:ghi | ||||||
|  | -4:jkl---------------------------- Test N7 ------------------------------
 | ||||||
|  | -1:abcZERO2:def
 | ||||||
|  | +4:jkl
 | ||||||
|  | +---------------------------- Test N7 ------------------------------
 | ||||||
|  | +1:abcZERO2:defZERO
 | ||||||
|  | -- 
 | ||||||
|  | 2.21.1 | ||||||
|  | 
 | ||||||
| @ -62,6 +62,9 @@ Patch2:     pcre2-10.34-Fix-the-too-early-access-of-the-fields-of-a-compiled.pat | |||||||
| Patch3:     pcre2-10.34-Fix-THEN-verbs-in-lookahead-assertions-in-JIT.patch | Patch3:     pcre2-10.34-Fix-THEN-verbs-in-lookahead-assertions-in-JIT.patch | ||||||
| # Fix a memory leak when allocating a JIT stack fails, in upstream after 10.34 | # Fix a memory leak when allocating a JIT stack fails, in upstream after 10.34 | ||||||
| Patch4:     pcre2-10.34-The-JIT-stack-should-be-freed-when-the-low-level-sta.patch | Patch4:     pcre2-10.34-The-JIT-stack-should-be-freed-when-the-low-level-sta.patch | ||||||
|  | # Ensure a newline after the final line in a file is output by pcre2grep, | ||||||
|  | # upstream bug #2513, in upstream after 10.34 | ||||||
|  | Patch5:     pcre2-10.34-Ensure-a-newline-after-the-final-line-in-a-file-is-o.patch | ||||||
| BuildRequires:  autoconf | BuildRequires:  autoconf | ||||||
| BuildRequires:  automake | BuildRequires:  automake | ||||||
| BuildRequires:  coreutils | BuildRequires:  coreutils | ||||||
| @ -144,6 +147,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. | |||||||
| %patch2 -p1 | %patch2 -p1 | ||||||
| %patch3 -p1 | %patch3 -p1 | ||||||
| %patch4 -p1 | %patch4 -p1 | ||||||
|  | %patch5 -p1 | ||||||
| # Because of multilib patch | # Because of multilib patch | ||||||
| libtoolize --copy --force | libtoolize --copy --force | ||||||
| autoreconf -vif | autoreconf -vif | ||||||
| @ -243,6 +247,8 @@ make %{?_smp_mflags} check VERBOSE=yes | |||||||
| %changelog | %changelog | ||||||
| * Mon Jan 27 2020 Petr Pisar <ppisar@redhat.com> - 10.34-5 | * Mon Jan 27 2020 Petr Pisar <ppisar@redhat.com> - 10.34-5 | ||||||
| - Fix a memory leak when allocating a JIT stack fails | - Fix a memory leak when allocating a JIT stack fails | ||||||
|  | - Ensure a newline after the final line in a file is output by pcre2grep | ||||||
|  |   (upstream bug #2513) | ||||||
| 
 | 
 | ||||||
| * Mon Jan 13 2020 Petr Pisar <ppisar@redhat.com> - 10.34-4 | * Mon Jan 13 2020 Petr Pisar <ppisar@redhat.com> - 10.34-4 | ||||||
| - Fix a crash in JITted code when a *THEN verb is used in a lookahead assertion | - Fix a crash in JITted code when a *THEN verb is used in a lookahead assertion | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user