599 lines
29 KiB
Diff
599 lines
29 KiB
Diff
|
From b3f42a32920b20ae71988bc1d06a7148e0211925 Mon Sep 17 00:00:00 2001
|
||
|
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||
|
Date: Sat, 25 Jan 2020 15:50:44 +0000
|
||
|
Subject: [PATCH] Ensure a newline after the final line in a file is output by
|
||
|
pcre2grep.
|
||
|
MIME-Version: 1.0
|
||
|
Content-Type: text/plain; charset=UTF-8
|
||
|
Content-Transfer-Encoding: 8bit
|
||
|
|
||
|
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1211 6239d852-aaf2-0410-a92c-79f79f948069
|
||
|
Petr Písař: Ported to 10.34.
|
||
|
---
|
||
|
RunGrepTest | 4 +-
|
||
|
doc/html/pcre2grep.html | 84 ++++++++++++++++++++-------------
|
||
|
doc/pcre2grep.1 | 83 ++++++++++++++++++++-------------
|
||
|
doc/pcre2grep.txt | 100 ++++++++++++++++++++++++----------------
|
||
|
src/pcre2grep.c | 66 ++++++++++++++++++++++++--
|
||
|
testdata/grepoutputN | 16 ++++---
|
||
|
|
||
|
diff --git a/RunGrepTest b/RunGrepTest
|
||
|
index 1113cd4..2ff4f7c 100755
|
||
|
--- a/RunGrepTest
|
||
|
+++ b/RunGrepTest
|
||
|
@@ -742,11 +742,11 @@ uname=`uname`
|
||
|
case $uname in
|
||
|
Linux)
|
||
|
printf 'abc\0def' >testNinputgrep
|
||
|
- $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
|
||
|
+ $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/g' >>testtrygrep
|
||
|
echo "" >>testtrygrep
|
||
|
;;
|
||
|
*)
|
||
|
- echo '1:abcZERO2:def' >>testtrygrep
|
||
|
+ echo '1:abcZERO2:defZERO' >>testtrygrep
|
||
|
;;
|
||
|
esac
|
||
|
|
||
|
diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
|
||
|
index f5b72f3..abbafa1 100644
|
||
|
--- a/doc/html/pcre2grep.html
|
||
|
+++ b/doc/html/pcre2grep.html
|
||
|
@@ -148,7 +148,7 @@ ignored.
|
||
|
By default, a file that contains a binary zero byte within the first 1024 bytes
|
||
|
is identified as a binary file, and is processed specially. (GNU grep
|
||
|
identifies binary files in this manner.) However, if the newline type is
|
||
|
-specified as "nul", that is, the line terminator is a binary zero, the test for
|
||
|
+specified as NUL, that is, the line terminator is a binary zero, the test for
|
||
|
a binary file is not applied. See the <b>--binary-files</b> option for a means
|
||
|
of changing the way binary files are handled.
|
||
|
</P>
|
||
|
@@ -601,25 +601,32 @@ does not work when input is read line by line (see \fP--line-buffered\fP.)
|
||
|
</P>
|
||
|
<P>
|
||
|
<b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
|
||
|
-The PCRE2 library supports five different conventions for indicating
|
||
|
-the ends of lines. They are the single-character sequences CR (carriage return)
|
||
|
-and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
|
||
|
-which recognizes any of the preceding three types, and an "any" convention, in
|
||
|
-which any Unicode line ending sequence is assumed to end a line. The Unicode
|
||
|
-sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
|
||
|
-(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
|
||
|
-PS (paragraph separator, U+2029).
|
||
|
+Six different conventions for indicating the ends of lines in scanned files are
|
||
|
+supported. For example:
|
||
|
+<pre>
|
||
|
+ pcre2grep -N CRLF 'some pattern' <file>
|
||
|
+</pre>
|
||
|
+The newline type may be specified in upper, lower, or mixed case. If the
|
||
|
+newline type is NUL, lines are separated by binary zero characters. The other
|
||
|
+types are the single-character sequences CR (carriage return) and LF
|
||
|
+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
|
||
|
+recognizes any of the preceding three types, and an "any" type, for which any
|
||
|
+Unicode line ending sequence is assumed to end a line. The Unicode sequences
|
||
|
+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
|
||
|
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
|
||
|
+(paragraph separator, U+2029).
|
||
|
<br>
|
||
|
<br>
|
||
|
When the PCRE2 library is built, a default line-ending sequence is specified.
|
||
|
This is normally the standard sequence for the operating system. Unless
|
||
|
otherwise specified by this option, <b>pcre2grep</b> uses the library's default.
|
||
|
-The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
|
||
|
-makes it possible to use <b>pcre2grep</b> to scan files that have come from
|
||
|
-other environments without having to modify their line endings. If the data
|
||
|
-that is being scanned does not agree with the convention set by this option,
|
||
|
-<b>pcre2grep</b> may behave in strange ways. Note that this option does not
|
||
|
-apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
|
||
|
+<br>
|
||
|
+<br>
|
||
|
+This option makes it possible to use <b>pcre2grep</b> to scan files that have
|
||
|
+come from other environments without having to modify their line endings. If
|
||
|
+the data that is being scanned does not agree with the convention set by this
|
||
|
+option, <b>pcre2grep</b> may behave in strange ways. Note that this option does
|
||
|
+not apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
|
||
|
<b>--include-from</b> options, which are expected to use the operating system's
|
||
|
standard newline sequence.
|
||
|
</P>
|
||
|
@@ -640,12 +647,14 @@ use of JIT at run time. It is provided for testing and working round problems.
|
||
|
It should never be needed in normal use.
|
||
|
</P>
|
||
|
<P>
|
||
|
-<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
|
||
|
+<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
|
||
|
When there is a match, instead of outputting the whole line that matched,
|
||
|
-output just the given text. This option is mutually exclusive with
|
||
|
-<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
|
||
|
-sequences starting with a dollar character may be used to insert the contents
|
||
|
-of the matched part of the line and/or captured substrings into the text.
|
||
|
+output just the given text, followed by an operating-system standard newline.
|
||
|
+The <b>--newline</b> option has no effect on this option, which is mutually
|
||
|
+exclusive with <b>--only-matching</b>, <b>--file-offsets</b>, and
|
||
|
+<b>--line-offsets</b>. Escape sequences starting with a dollar character may be
|
||
|
+used to insert the contents of the matched part of the line and/or captured
|
||
|
+substrings into the text.
|
||
|
<br>
|
||
|
<br>
|
||
|
$<digits> or ${<digits>} is replaced by the captured
|
||
|
@@ -807,16 +816,27 @@ by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
|
||
|
<br><a name="SEC8" href="#TOC1">NEWLINES</a><br>
|
||
|
<P>
|
||
|
The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
|
||
|
-different newline conventions from the default. Any parts of the input files
|
||
|
-that are written to the standard output are copied identically, with whatever
|
||
|
-newline sequences they have in the input. However, the setting of this option
|
||
|
-affects only the way scanned files are processed. It does not affect the
|
||
|
-interpretation of files specified by the <b>-f</b>, <b>--file-list</b>,
|
||
|
-<b>--exclude-from</b>, or <b>--include-from</b> options, nor does it affect the
|
||
|
-way in which <b>pcre2grep</b> writes informational messages to the standard
|
||
|
-error and output streams. For these it uses the string "\n" to indicate
|
||
|
-newlines, relying on the C I/O library to convert this to an appropriate
|
||
|
-sequence.
|
||
|
+newline conventions that differ from the default. This option affects only the
|
||
|
+way scanned files are processed. It does not affect the interpretation of files
|
||
|
+specified by the <b>-f</b>, <b>--file-list</b>, <b>--exclude-from</b>, or
|
||
|
+<b>--include-from</b> options.
|
||
|
+</P>
|
||
|
+<P>
|
||
|
+Any parts of the scanned input files that are written to the standard output
|
||
|
+are copied with whatever newline sequences they have in the input. However, if
|
||
|
+the final line of a file is output, and it does not end with a newline
|
||
|
+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
|
||
|
+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
|
||
|
+single NL is used.
|
||
|
+</P>
|
||
|
+<P>
|
||
|
+The newline setting does not affect the way in which <b>pcre2grep</b> writes
|
||
|
+newlines in informational messages to the standard output and error streams.
|
||
|
+Under Windows, the standard output is set to be binary, so that "\r\n" at the
|
||
|
+ends of output lines that are copied from the input is not converted to
|
||
|
+"\r\r\n" by the C I/O library. This means that any messages written to the
|
||
|
+standard output must end with "\r\n". For all other operating systems, and
|
||
|
+for all messages to the standard error stream, "\n" is used.
|
||
|
</P>
|
||
|
<br><a name="SEC9" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
|
||
|
<P>
|
||
|
@@ -992,9 +1012,9 @@ Cambridge, England.
|
||
|
</P>
|
||
|
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
|
||
|
<P>
|
||
|
-Last updated: 15 June 2019
|
||
|
+Last updated: 25 January 2020
|
||
|
<br>
|
||
|
-Copyright © 1997-2019 University of Cambridge.
|
||
|
+Copyright © 1997-2020 University of Cambridge.
|
||
|
<br>
|
||
|
<p>
|
||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||
|
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
|
||
|
index 22992b1..82f0435 100644
|
||
|
--- a/doc/pcre2grep.1
|
||
|
+++ b/doc/pcre2grep.1
|
||
|
@@ -1,4 +1,4 @@
|
||
|
-.TH PCRE2GREP 1 "15 June 2019" "PCRE2 10.34"
|
||
|
+.TH PCRE2GREP 1 "25 January 2020" "PCRE2 10.35"
|
||
|
.SH NAME
|
||
|
pcre2grep - a grep with Perl-compatible regular expressions.
|
||
|
.SH SYNOPSIS
|
||
|
@@ -117,7 +117,7 @@ ignored.
|
||
|
By default, a file that contains a binary zero byte within the first 1024 bytes
|
||
|
is identified as a binary file, and is processed specially. (GNU grep
|
||
|
identifies binary files in this manner.) However, if the newline type is
|
||
|
-specified as "nul", that is, the line terminator is a binary zero, the test for
|
||
|
+specified as NUL, that is, the line terminator is a binary zero, the test for
|
||
|
a binary file is not applied. See the \fB--binary-files\fP option for a means
|
||
|
of changing the way binary files are handled.
|
||
|
.
|
||
|
@@ -523,24 +523,30 @@ large processing buffer, this should not be a problem, but the \fB-M\fP option
|
||
|
does not work when input is read line by line (see \fP--line-buffered\fP.)
|
||
|
.TP
|
||
|
\fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
|
||
|
-The PCRE2 library supports five different conventions for indicating
|
||
|
-the ends of lines. They are the single-character sequences CR (carriage return)
|
||
|
-and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
|
||
|
-which recognizes any of the preceding three types, and an "any" convention, in
|
||
|
-which any Unicode line ending sequence is assumed to end a line. The Unicode
|
||
|
-sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
|
||
|
-(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
|
||
|
-PS (paragraph separator, U+2029).
|
||
|
+Six different conventions for indicating the ends of lines in scanned files are
|
||
|
+supported. For example:
|
||
|
+.sp
|
||
|
+ pcre2grep -N CRLF 'some pattern' <file>
|
||
|
+.sp
|
||
|
+The newline type may be specified in upper, lower, or mixed case. If the
|
||
|
+newline type is NUL, lines are separated by binary zero characters. The other
|
||
|
+types are the single-character sequences CR (carriage return) and LF
|
||
|
+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
|
||
|
+recognizes any of the preceding three types, and an "any" type, for which any
|
||
|
+Unicode line ending sequence is assumed to end a line. The Unicode sequences
|
||
|
+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
|
||
|
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
|
||
|
+(paragraph separator, U+2029).
|
||
|
.sp
|
||
|
When the PCRE2 library is built, a default line-ending sequence is specified.
|
||
|
This is normally the standard sequence for the operating system. Unless
|
||
|
otherwise specified by this option, \fBpcre2grep\fP uses the library's default.
|
||
|
-The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
|
||
|
-makes it possible to use \fBpcre2grep\fP to scan files that have come from
|
||
|
-other environments without having to modify their line endings. If the data
|
||
|
-that is being scanned does not agree with the convention set by this option,
|
||
|
-\fBpcre2grep\fP may behave in strange ways. Note that this option does not
|
||
|
-apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
|
||
|
+.sp
|
||
|
+This option makes it possible to use \fBpcre2grep\fP to scan files that have
|
||
|
+come from other environments without having to modify their line endings. If
|
||
|
+the data that is being scanned does not agree with the convention set by this
|
||
|
+option, \fBpcre2grep\fP may behave in strange ways. Note that this option does
|
||
|
+not apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
|
||
|
\fB--include-from\fP options, which are expected to use the operating system's
|
||
|
standard newline sequence.
|
||
|
.TP
|
||
|
@@ -558,12 +564,14 @@ was explicitly disabled at build time. This option can be used to disable the
|
||
|
use of JIT at run time. It is provided for testing and working round problems.
|
||
|
It should never be needed in normal use.
|
||
|
.TP
|
||
|
-\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
|
||
|
+\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
|
||
|
When there is a match, instead of outputting the whole line that matched,
|
||
|
-output just the given text. This option is mutually exclusive with
|
||
|
-\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape
|
||
|
-sequences starting with a dollar character may be used to insert the contents
|
||
|
-of the matched part of the line and/or captured substrings into the text.
|
||
|
+output just the given text, followed by an operating-system standard newline.
|
||
|
+The \fB--newline\fP option has no effect on this option, which is mutually
|
||
|
+exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and
|
||
|
+\fB--line-offsets\fP. Escape sequences starting with a dollar character may be
|
||
|
+used to insert the contents of the matched part of the line and/or captured
|
||
|
+substrings into the text.
|
||
|
.sp
|
||
|
$<digits> or ${<digits>} is replaced by the captured
|
||
|
substring of the given decimal number; zero substitutes the whole match. If
|
||
|
@@ -709,16 +717,25 @@ by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default
|
||
|
.rs
|
||
|
.sp
|
||
|
The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with
|
||
|
-different newline conventions from the default. Any parts of the input files
|
||
|
-that are written to the standard output are copied identically, with whatever
|
||
|
-newline sequences they have in the input. However, the setting of this option
|
||
|
-affects only the way scanned files are processed. It does not affect the
|
||
|
-interpretation of files specified by the \fB-f\fP, \fB--file-list\fP,
|
||
|
-\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the
|
||
|
-way in which \fBpcre2grep\fP writes informational messages to the standard
|
||
|
-error and output streams. For these it uses the string "\en" to indicate
|
||
|
-newlines, relying on the C I/O library to convert this to an appropriate
|
||
|
-sequence.
|
||
|
+newline conventions that differ from the default. This option affects only the
|
||
|
+way scanned files are processed. It does not affect the interpretation of files
|
||
|
+specified by the \fB-f\fP, \fB--file-list\fP, \fB--exclude-from\fP, or
|
||
|
+\fB--include-from\fP options.
|
||
|
+.P
|
||
|
+Any parts of the scanned input files that are written to the standard output
|
||
|
+are copied with whatever newline sequences they have in the input. However, if
|
||
|
+the final line of a file is output, and it does not end with a newline
|
||
|
+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
|
||
|
+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
|
||
|
+single NL is used.
|
||
|
+.P
|
||
|
+The newline setting does not affect the way in which \fBpcre2grep\fP writes
|
||
|
+newlines in informational messages to the standard output and error streams.
|
||
|
+Under Windows, the standard output is set to be binary, so that "\er\en" at the
|
||
|
+ends of output lines that are copied from the input is not converted to
|
||
|
+"\er\er\en" by the C I/O library. This means that any messages written to the
|
||
|
+standard output must end with "\er\en". For all other operating systems, and
|
||
|
+for all messages to the standard error stream, "\en" is used.
|
||
|
.
|
||
|
.
|
||
|
.SH "OPTIONS COMPATIBILITY"
|
||
|
@@ -904,6 +921,6 @@ Cambridge, England.
|
||
|
.rs
|
||
|
.sp
|
||
|
.nf
|
||
|
-Last updated: 15 June 2019
|
||
|
-Copyright (c) 1997-2019 University of Cambridge.
|
||
|
+Last updated: 25 January 2020
|
||
|
+Copyright (c) 1997-2020 University of Cambridge.
|
||
|
.fi
|
||
|
diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
|
||
|
index b11092a..4d41f54 100644
|
||
|
--- a/doc/pcre2grep.txt
|
||
|
+++ b/doc/pcre2grep.txt
|
||
|
@@ -116,9 +116,9 @@ BINARY FILES
|
||
|
By default, a file that contains a binary zero byte within the first
|
||
|
1024 bytes is identified as a binary file, and is processed specially.
|
||
|
(GNU grep identifies binary files in this manner.) However, if the new-
|
||
|
- line type is specified as "nul", that is, the line terminator is a bi-
|
||
|
- nary zero, the test for a binary file is not applied. See the --binary-
|
||
|
- files option for a means of changing the way binary files are handled.
|
||
|
+ line type is specified as NUL, that is, the line terminator is a binary
|
||
|
+ zero, the test for a binary file is not applied. See the --binary-files
|
||
|
+ option for a means of changing the way binary files are handled.
|
||
|
|
||
|
|
||
|
BINARY ZEROS IN PATTERNS
|
||
|
@@ -578,30 +578,36 @@ OPTIONS
|
||
|
when input is read line by line (see --line-buffered.)
|
||
|
|
||
|
-N newline-type, --newline=newline-type
|
||
|
- The PCRE2 library supports five different conventions for in-
|
||
|
- dicating the ends of lines. They are the single-character se-
|
||
|
- quences CR (carriage return) and LF (linefeed), the two-char-
|
||
|
- acter sequence CRLF, an "anycrlf" convention, which recog-
|
||
|
- nizes any of the preceding three types, and an "any" conven-
|
||
|
- tion, in which any Unicode line ending sequence is assumed to
|
||
|
- end a line. The Unicode sequences are the three just men-
|
||
|
- tioned, plus VT (vertical tab, U+000B), FF (form feed,
|
||
|
- U+000C), NEL (next line, U+0085), LS (line separator,
|
||
|
- U+2028), and PS (paragraph separator, U+2029).
|
||
|
+ Six different conventions for indicating the ends of lines in
|
||
|
+ scanned files are supported. For example:
|
||
|
+
|
||
|
+ pcre2grep -N CRLF 'some pattern' <file>
|
||
|
+
|
||
|
+ The newline type may be specified in upper, lower, or mixed
|
||
|
+ case. If the newline type is NUL, lines are separated by bi-
|
||
|
+ nary zero characters. The other types are the single-charac-
|
||
|
+ ter sequences CR (carriage return) and LF (linefeed), the
|
||
|
+ two-character sequence CRLF, an "anycrlf" type, which recog-
|
||
|
+ nizes any of the preceding three types, and an "any" type,
|
||
|
+ for which any Unicode line ending sequence is assumed to end
|
||
|
+ a line. The Unicode sequences are the three just mentioned,
|
||
|
+ plus VT (vertical tab, U+000B), FF (form feed, U+000C), NEL
|
||
|
+ (next line, U+0085), LS (line separator, U+2028), and PS
|
||
|
+ (paragraph separator, U+2029).
|
||
|
|
||
|
When the PCRE2 library is built, a default line-ending se-
|
||
|
quence is specified. This is normally the standard sequence
|
||
|
for the operating system. Unless otherwise specified by this
|
||
|
- option, pcre2grep uses the library's default. The possible
|
||
|
- values for this option are CR, LF, CRLF, ANYCRLF, or ANY.
|
||
|
- This makes it possible to use pcre2grep to scan files that
|
||
|
- have come from other environments without having to modify
|
||
|
- their line endings. If the data that is being scanned does
|
||
|
- not agree with the convention set by this option, pcre2grep
|
||
|
- may behave in strange ways. Note that this option does not
|
||
|
- apply to files specified by the -f, --exclude-from, or --in-
|
||
|
- clude-from options, which are expected to use the operating
|
||
|
- system's standard newline sequence.
|
||
|
+ option, pcre2grep uses the library's default.
|
||
|
+
|
||
|
+ This option makes it possible to use pcre2grep to scan files
|
||
|
+ that have come from other environments without having to mod-
|
||
|
+ ify their line endings. If the data that is being scanned
|
||
|
+ does not agree with the convention set by this option,
|
||
|
+ pcre2grep may behave in strange ways. Note that this option
|
||
|
+ does not apply to files specified by the -f, --exclude-from,
|
||
|
+ or --include-from options, which are expected to use the op-
|
||
|
+ erating system's standard newline sequence.
|
||
|
|
||
|
-n, --line-number
|
||
|
Precede each output line by its line number in the file, fol-
|
||
|
@@ -620,11 +626,13 @@ OPTIONS
|
||
|
|
||
|
-O text, --output=text
|
||
|
When there is a match, instead of outputting the whole line
|
||
|
- that matched, output just the given text. This option is mu-
|
||
|
- tually exclusive with --only-matching, --file-offsets, and
|
||
|
- --line-offsets. Escape sequences starting with a dollar char-
|
||
|
- acter may be used to insert the contents of the matched part
|
||
|
- of the line and/or captured substrings into the text.
|
||
|
+ that matched, output just the given text, followed by an op-
|
||
|
+ erating-system standard newline. The --newline option has no
|
||
|
+ effect on this option, which is mutually exclusive with
|
||
|
+ --only-matching, --file-offsets, and --line-offsets. Escape
|
||
|
+ sequences starting with a dollar character may be used to in-
|
||
|
+ sert the contents of the matched part of the line and/or cap-
|
||
|
+ tured substrings into the text.
|
||
|
|
||
|
$<digits> or ${<digits>} is replaced by the captured sub-
|
||
|
string of the given decimal number; zero substitutes the
|
||
|
@@ -780,17 +788,27 @@ ENVIRONMENT VARIABLES
|
||
|
|
||
|
NEWLINES
|
||
|
|
||
|
- The -N (--newline) option allows pcre2grep to scan files with different
|
||
|
- newline conventions from the default. Any parts of the input files that
|
||
|
- are written to the standard output are copied identically, with what-
|
||
|
- ever newline sequences they have in the input. However, the setting of
|
||
|
- this option affects only the way scanned files are processed. It does
|
||
|
- not affect the interpretation of files specified by the -f, --file-
|
||
|
- list, --exclude-from, or --include-from options, nor does it affect the
|
||
|
- way in which pcre2grep writes informational messages to the standard
|
||
|
- error and output streams. For these it uses the string "\n" to indicate
|
||
|
- newlines, relying on the C I/O library to convert this to an appropri-
|
||
|
- ate sequence.
|
||
|
+ The -N (--newline) option allows pcre2grep to scan files with newline
|
||
|
+ conventions that differ from the default. This option affects only the
|
||
|
+ way scanned files are processed. It does not affect the interpretation
|
||
|
+ of files specified by the -f, --file-list, --exclude-from, or --in-
|
||
|
+ clude-from options.
|
||
|
+
|
||
|
+ Any parts of the scanned input files that are written to the standard
|
||
|
+ output are copied with whatever newline sequences they have in the in-
|
||
|
+ put. However, if the final line of a file is output, and it does not
|
||
|
+ end with a newline sequence, a newline sequence is added. If the new-
|
||
|
+ line setting is CR, LF, CRLF or NUL, that line ending is output; for
|
||
|
+ the other settings (ANYCRLF or ANY) a single NL is used.
|
||
|
+
|
||
|
+ The newline setting does not affect the way in which pcre2grep writes
|
||
|
+ newlines in informational messages to the standard output and error
|
||
|
+ streams. Under Windows, the standard output is set to be binary, so
|
||
|
+ that "\r\n" at the ends of output lines that are copied from the input
|
||
|
+ is not converted to "\r\r\n" by the C I/O library. This means that any
|
||
|
+ messages written to the standard output must end with "\r\n". For all
|
||
|
+ other operating systems, and for all messages to the standard error
|
||
|
+ stream, "\n" is used.
|
||
|
|
||
|
|
||
|
OPTIONS COMPATIBILITY
|
||
|
@@ -963,5 +981,5 @@ AUTHOR
|
||
|
|
||
|
REVISION
|
||
|
|
||
|
- Last updated: 15 June 2019
|
||
|
- Copyright (c) 1997-2019 University of Cambridge.
|
||
|
+ Last updated: 25 January 2020
|
||
|
+ Copyright (c) 1997-2020 University of Cambridge.
|
||
|
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
|
||
|
index 12fe95e..10314a5 100644
|
||
|
--- a/src/pcre2grep.c
|
||
|
+++ b/src/pcre2grep.c
|
||
|
@@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS.
|
||
|
The header can be found in the special z/OS distribution, which is available
|
||
|
from www.zaconsultants.net or from www.cbttape.org.
|
||
|
|
||
|
- Copyright (c) 1997-2019 University of Cambridge
|
||
|
+ Copyright (c) 1997-2020 University of Cambridge
|
||
|
|
||
|
-----------------------------------------------------------------------------
|
||
|
Redistribution and use in source and binary forms, with or without
|
||
|
@@ -1665,6 +1665,44 @@ switch(endlinetype)
|
||
|
|
||
|
|
||
|
|
||
|
+/*************************************************
|
||
|
+* Output newline at end *
|
||
|
+*************************************************/
|
||
|
+
|
||
|
+/* This function is called if the final line of a file has been written to
|
||
|
+stdout, but it does not have a terminating newline.
|
||
|
+
|
||
|
+Arguments: none
|
||
|
+Returns: nothing
|
||
|
+*/
|
||
|
+
|
||
|
+static void
|
||
|
+write_final_newline(void)
|
||
|
+{
|
||
|
+switch(endlinetype)
|
||
|
+ {
|
||
|
+ default: /* Just in case */
|
||
|
+ case PCRE2_NEWLINE_LF:
|
||
|
+ case PCRE2_NEWLINE_ANY:
|
||
|
+ case PCRE2_NEWLINE_ANYCRLF:
|
||
|
+ fprintf(stdout, "\n");
|
||
|
+ break;
|
||
|
+
|
||
|
+ case PCRE2_NEWLINE_CR:
|
||
|
+ fprintf(stdout, "\r");
|
||
|
+ break;
|
||
|
+
|
||
|
+ case PCRE2_NEWLINE_CRLF:
|
||
|
+ fprintf(stdout, "\r\n");
|
||
|
+ break;
|
||
|
+
|
||
|
+ case PCRE2_NEWLINE_NUL:
|
||
|
+ fprintf(stdout, "%c", 0);
|
||
|
+ break;
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
/*************************************************
|
||
|
* Print the previous "after" lines *
|
||
|
*************************************************/
|
||
|
@@ -1689,9 +1727,9 @@ do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
|
||
|
if (after_context > 0 && lastmatchnumber > 0)
|
||
|
{
|
||
|
int count = 0;
|
||
|
+ int ellength = 0;
|
||
|
while (lastmatchrestart < endptr && count < after_context)
|
||
|
{
|
||
|
- int ellength;
|
||
|
char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
|
||
|
if (ellength == 0 && pp == main_buffer + bufsize) break;
|
||
|
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
||
|
@@ -1700,7 +1738,17 @@ if (after_context > 0 && lastmatchnumber > 0)
|
||
|
lastmatchrestart = pp;
|
||
|
count++;
|
||
|
}
|
||
|
- if (count > 0) hyphenpending = TRUE;
|
||
|
+
|
||
|
+ /* If we have printed any lines, arrange for a hyphen separator if anything
|
||
|
+ else follows. Also, if the last line is the final line in the file and it had
|
||
|
+ no newline, add one. */
|
||
|
+
|
||
|
+ if (count > 0)
|
||
|
+ {
|
||
|
+ hyphenpending = TRUE;
|
||
|
+ if (ellength == 0 && lastmatchrestart >= endptr)
|
||
|
+ write_final_newline();
|
||
|
+ }
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@@ -2437,6 +2485,7 @@ char *endptr;
|
||
|
PCRE2_SIZE bufflength;
|
||
|
BOOL binary = FALSE;
|
||
|
BOOL endhyphenpending = FALSE;
|
||
|
+BOOL lines_printed = FALSE;
|
||
|
BOOL input_line_buffered = line_buffered;
|
||
|
FILE *in = NULL; /* Ensure initialized */
|
||
|
|
||
|
@@ -2777,6 +2826,8 @@ while (ptr < endptr)
|
||
|
|
||
|
else
|
||
|
{
|
||
|
+ lines_printed = TRUE;
|
||
|
+
|
||
|
/* See if there is a requirement to print some "after" lines from a
|
||
|
previous match. We never print any overlaps. */
|
||
|
|
||
|
@@ -2825,7 +2876,8 @@ while (ptr < endptr)
|
||
|
int linecount = 0;
|
||
|
char *p = ptr;
|
||
|
|
||
|
- while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
|
||
|
+ while (p > main_buffer &&
|
||
|
+ (lastmatchnumber == 0 || p > lastmatchrestart) &&
|
||
|
linecount < before_context)
|
||
|
{
|
||
|
linecount++;
|
||
|
@@ -2981,6 +3033,12 @@ while (ptr < endptr)
|
||
|
|
||
|
lastmatchrestart = ptr + linelength + endlinelength;
|
||
|
lastmatchnumber = linenumber + 1;
|
||
|
+
|
||
|
+ /* If a line was printed and we are now at the end of the file and the last
|
||
|
+ line had no newline, output one. */
|
||
|
+
|
||
|
+ if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
|
||
|
+ write_final_newline();
|
||
|
}
|
||
|
|
||
|
/* For a match in multiline inverted mode (which of course did not cause
|
||
|
diff --git a/testdata/grepoutputN b/testdata/grepoutputN
|
||
|
index ba97e90..caaeb75 100644
|
||
|
--- a/testdata/grepoutputN
|
||
|
+++ b/testdata/grepoutputN
|
||
|
@@ -2,16 +2,20 @@
|
||
|
1:abc
2:def
---------------------------- Test N2 ------------------------------
|
||
|
1:abc
def
|
||
|
2:ghi
|
||
|
-jkl---------------------------- Test N3 ------------------------------
|
||
|
+jkl
|
||
|
+---------------------------- Test N3 ------------------------------
|
||
|
2:def
3:
|
||
|
ghi
|
||
|
-jkl---------------------------- Test N4 ------------------------------
|
||
|
+jkl
---------------------------- Test N4 ------------------------------
|
||
|
2:ghi
|
||
|
-jkl---------------------------- Test N5 ------------------------------
|
||
|
+jkl
|
||
|
+---------------------------- Test N5 ------------------------------
|
||
|
1:abc
2:def
|
||
|
3:ghi
|
||
|
-4:jkl---------------------------- Test N6 ------------------------------
|
||
|
+4:jkl
|
||
|
+---------------------------- Test N6 ------------------------------
|
||
|
1:abc
2:def
|
||
|
3:ghi
|
||
|
-4:jkl---------------------------- Test N7 ------------------------------
|
||
|
-1:abcZERO2:def
|
||
|
+4:jkl
|
||
|
+---------------------------- Test N7 ------------------------------
|
||
|
+1:abcZERO2:defZERO
|
||
|
--
|
||
|
2.21.1
|
||
|
|