archive/tar patches for performance

This commit is contained in:
Vincent Batts 2014-07-07 16:11:04 -04:00
parent 207296f435
commit 4086849026
3 changed files with 133 additions and 1 deletions

View File

@ -0,0 +1,64 @@
# HG changeset patch
# User Cristian Staretu <unclejacksons@gmail.com>
# Date 1404344479 -36000
# Thu Jul 03 09:41:19 2014 +1000
# Node ID 17404efd6b02d4b3acd17070e3f89de97a145877
# Parent 837348e418f33fc7a242f56dbe2feff829532526
archive/tar: reuse temporary buffer in readHeader
A temporary 512 bytes buffer is allocated for every call to
readHeader. This buffer isn't returned to the caller and it could
be reused to lower the number of memory allocations.
This CL improves it by using a pool and zeroing out the buffer before
putting it back into the pool.
benchmark old ns/op new ns/op delta
BenchmarkListFiles100k 545249903 538832687 -1.18%
benchmark old allocs new allocs delta
BenchmarkListFiles100k 2105167 2005692 -4.73%
benchmark old bytes new bytes delta
BenchmarkListFiles100k 105903472 54831527 -48.22%
This improvement is very important if your code has to deal with a lot
of tarballs which contain a lot of files.
LGTM=dsymonds
R=golang-codereviews, dave, dsymonds, bradfitz
CC=golang-codereviews
https://codereview.appspot.com/108240044
Committer: David Symonds <dsymonds@golang.org>
diff -r 837348e418f3 -r 17404efd6b02 src/pkg/archive/tar/reader.go
--- a/src/pkg/archive/tar/reader.go Thu Jul 03 09:40:53 2014 +1000
+++ b/src/pkg/archive/tar/reader.go Thu Jul 03 09:41:19 2014 +1000
@@ -29,10 +29,11 @@
// The Next method advances to the next file in the archive (including the first),
// and then it can be treated as an io.Reader to access the file's data.
type Reader struct {
- r io.Reader
- err error
- pad int64 // amount of padding (ignored) after current file entry
- curr numBytesReader // reader for current file entry
+ r io.Reader
+ err error
+ pad int64 // amount of padding (ignored) after current file entry
+ curr numBytesReader // reader for current file entry
+ hdrBuff [blockSize]byte // buffer to use in readHeader
}
// A numBytesReader is an io.Reader with a numBytes method, returning the number
@@ -426,7 +427,9 @@
}
func (tr *Reader) readHeader() *Header {
- header := make([]byte, blockSize)
+ header := tr.hdrBuff[:]
+ copy(header, zeroBlock)
+
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
}

View File

@ -0,0 +1,56 @@
# HG changeset patch
# User Cristian Staretu <unclejacksons@gmail.com>
# Date 1404344453 -36000
# Thu Jul 03 09:40:53 2014 +1000
# Node ID 837348e418f33fc7a242f56dbe2feff829532526
# Parent c5f72a685e256457a0872f6587e2bb9500eac7c4
archive/tar: reuse temporary buffer in writeHeader
A temporary 512 bytes buffer is allocated for every call to
writeHeader. This buffer could be reused the lower the number
of memory allocations.
benchmark old ns/op new ns/op delta
BenchmarkWriteFiles100k 634622051 583810847 -8.01%
benchmark old allocs new allocs delta
BenchmarkWriteFiles100k 2701920 2602621 -3.68%
benchmark old bytes new bytes delta
BenchmarkWriteFiles100k 115383884 64349922 -44.23%
This change is very important if your code has to write a lot of
tarballs with a lot of files.
LGTM=dsymonds
R=golang-codereviews, dave, dsymonds
CC=golang-codereviews
https://codereview.appspot.com/107440043
Committer: David Symonds <dsymonds@golang.org>
diff -r c5f72a685e25 -r 837348e418f3 src/pkg/archive/tar/writer.go
--- a/src/pkg/archive/tar/writer.go Wed Jul 02 15:28:57 2014 -0700
+++ b/src/pkg/archive/tar/writer.go Thu Jul 03 09:40:53 2014 +1000
@@ -37,8 +37,9 @@
nb int64 // number of unwritten bytes for current file entry
pad int64 // amount of padding to write after current file entry
closed bool
- usedBinary bool // whether the binary numeric field extension was used
- preferPax bool // use pax header instead of binary numeric header
+ usedBinary bool // whether the binary numeric field extension was used
+ preferPax bool // use pax header instead of binary numeric header
+ hdrBuff [blockSize]byte // buffer to use in writeHeader
}
// NewWriter creates a new Writer writing to w.
@@ -160,7 +161,8 @@
// subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters
- header := make([]byte, blockSize)
+ header := tw.hdrBuff[:]
+ copy(header, zeroBlock)
s := slicer(header)
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax

View File

@ -39,7 +39,7 @@
Name: golang
Version: 1.3
Release: 1%{?dist}
Release: 2%{?dist}
Summary: The Go Programming Language
License: BSD
@ -73,6 +73,11 @@ Patch1: golang-1.2-remove-ECC-p224.patch
# http://code.google.com/p/go/issues/detail?id=6522
Patch2: ./golang-1.2-skipCpuProfileTest.patch
# these patches can be dropped for go1.4
# discovered working here https://github.com/dotcloud/docker/pull/6829
Patch3: ./go1.3-tar_reuse_buffer_readHeader.patch
Patch4: ./go1.3-tar_reuse_buffer_writeHeader.patch
# Having documentation separate was broken
Obsoletes: %{name}-docs < 1.1-4
@ -357,6 +362,10 @@ end
# skip flaky test
%patch2 -p1
# performance for archive/tar
%patch3 -p1
%patch4 -p1
# create a [dirty] gcc wrapper to allow us to build with our own flags
# (dirty because it is spoofing 'gcc' since CC value is stored in the go tool)
# TODO: remove this and just set CFLAGS/LDFLAGS once upstream supports it
@ -973,6 +982,9 @@ GOROOT=%{goroot} GOOS=openbsd GOARCH=amd64 go install std
%changelog
* Mon Jul 07 2014 Vincent Batts <vbatts@fedoraproejct.org> - 1.3-2
- archive/tar memory allocation improvements
* Thu Jun 19 2014 Vincent Batts <vbatts@fedoraproejct.org> - 1.3-1
- update to go1.3