apply upstream patch to resolve git fast-import memory leak (#1900335)
A change in git-2.27.0¹ caused fast-import to leak memory and crash in some cases. Apply the upstream fix², which didn't quite make it into git-2.29.0. ¹ ddddf8d7e2 (fast-import: permit reading multiple marks files, 2020-02-22) https://github.com/git/git/commit/ddddf8d7e2 ² 3f018ec716 (fast-import: fix over-allocation of marks storage, 2020-10-15) https://github.com/git/git/commit/3f018ec716
This commit is contained in:
parent
6b303b4795
commit
1faa4b1131
221
0001-fast-import-fix-over-allocation-of-marks-storage.patch
Normal file
221
0001-fast-import-fix-over-allocation-of-marks-storage.patch
Normal file
@ -0,0 +1,221 @@
|
||||
From 3f018ec716292b4d757385686f42f57af3bca685 Mon Sep 17 00:00:00 2001
|
||||
From: Jeff King <peff@peff.net>
|
||||
Date: Thu, 15 Oct 2020 11:38:49 -0400
|
||||
Subject: [PATCH] fast-import: fix over-allocation of marks storage
|
||||
|
||||
Fast-import stores its marks in a trie-like structure made of mark_set
|
||||
structs. Each struct has a fixed size (1024). If our id number is too
|
||||
large to fit in the struct, then we allocate a new struct which shifts
|
||||
the id number by 10 bits. Our original struct becomes a child node
|
||||
of this new layer, and the new struct becomes the top level of the trie.
|
||||
|
||||
This scheme was broken by ddddf8d7e2 (fast-import: permit reading
|
||||
multiple marks files, 2020-02-22). Before then, we had a top-level
|
||||
"marks" pointer, and the push-down worked by assigning the new top-level
|
||||
struct to "marks". But after that commit, insert_mark() takes a pointer
|
||||
to the mark_set, rather than using the global "marks". It continued to
|
||||
assign to the global "marks" variable during the push down, which was
|
||||
wrong for two reasons:
|
||||
|
||||
- we added a call in option_rewrite_submodules() which uses a separate
|
||||
mark set; pushing down on "marks" is outright wrong here. We'd
|
||||
corrupt the "marks" set, and we'd fail to correctly store any
|
||||
submodule mappings with an id over 1024.
|
||||
|
||||
- the other callers passed "marks", but the push-down was still wrong.
|
||||
In read_mark_file(), we take the pointer to the mark_set as a
|
||||
parameter. So even though insert_mark() was updating the global
|
||||
"marks", the local pointer we had in read_mark_file() was not
|
||||
updated. As a result, we'd add a new level when needed, but then the
|
||||
next call to insert_mark() wouldn't see it! It would then allocate a
|
||||
new layer, which would also not be seen, and so on. Lookups for the
|
||||
lost layers obviously wouldn't work, but before we even hit any
|
||||
lookup stage, we'd generally run out of memory and die.
|
||||
|
||||
Our tests didn't notice either of these cases because they didn't have
|
||||
enough marks to trigger the push-down behavior. The new tests in t9304
|
||||
cover both cases (and fail without this patch).
|
||||
|
||||
We can solve the problem by having insert_mark() take a pointer-to-pointer
|
||||
of the top-level of the set. Then our push down can assign to it in a
|
||||
way that the caller actually sees. Note the subtle reordering in
|
||||
option_rewrite_submodules(). Our call to read_mark_file() may modify our
|
||||
top-level set pointer, so we have to wait until after it returns to
|
||||
assign its value into the string_list.
|
||||
|
||||
Reported-by: Sergey Brester <serg.brester@sebres.de>
|
||||
Signed-off-by: Jeff King <peff@peff.net>
|
||||
Signed-off-by: Junio C Hamano <gitster@pobox.com>
|
||||
---
|
||||
builtin/fast-import.c | 31 ++++++++++++----------
|
||||
t/t9304-fast-import-marks.sh | 51 ++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 68 insertions(+), 14 deletions(-)
|
||||
create mode 100755 t/t9304-fast-import-marks.sh
|
||||
|
||||
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
|
||||
index 1bf50a73dc359..70d7d25eed2e0 100644
|
||||
--- a/builtin/fast-import.c
|
||||
+++ b/builtin/fast-import.c
|
||||
@@ -150,7 +150,7 @@ struct recent_command {
|
||||
char *buf;
|
||||
};
|
||||
|
||||
-typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark);
|
||||
+typedef void (*mark_set_inserter_t)(struct mark_set **s, struct object_id *oid, uintmax_t mark);
|
||||
typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp);
|
||||
|
||||
/* Configured limits on output */
|
||||
@@ -526,13 +526,15 @@ static unsigned int hc_str(const char *s, size_t len)
|
||||
return r;
|
||||
}
|
||||
|
||||
-static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe)
|
||||
+static void insert_mark(struct mark_set **top, uintmax_t idnum, struct object_entry *oe)
|
||||
{
|
||||
+ struct mark_set *s = *top;
|
||||
+
|
||||
while ((idnum >> s->shift) >= 1024) {
|
||||
s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set));
|
||||
- s->shift = marks->shift + 10;
|
||||
- s->data.sets[0] = marks;
|
||||
- marks = s;
|
||||
+ s->shift = (*top)->shift + 10;
|
||||
+ s->data.sets[0] = *top;
|
||||
+ *top = s;
|
||||
}
|
||||
while (s->shift) {
|
||||
uintmax_t i = idnum >> s->shift;
|
||||
@@ -944,7 +946,7 @@ static int store_object(
|
||||
|
||||
e = insert_object(&oid);
|
||||
if (mark)
|
||||
- insert_mark(marks, mark, e);
|
||||
+ insert_mark(&marks, mark, e);
|
||||
if (e->idx.offset) {
|
||||
duplicate_count_by_type[type]++;
|
||||
return 1;
|
||||
@@ -1142,7 +1144,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
|
||||
e = insert_object(&oid);
|
||||
|
||||
if (mark)
|
||||
- insert_mark(marks, mark, e);
|
||||
+ insert_mark(&marks, mark, e);
|
||||
|
||||
if (e->idx.offset) {
|
||||
duplicate_count_by_type[OBJ_BLOB]++;
|
||||
@@ -1717,7 +1719,7 @@ static void dump_marks(void)
|
||||
}
|
||||
}
|
||||
|
||||
-static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
|
||||
+static void insert_object_entry(struct mark_set **s, struct object_id *oid, uintmax_t mark)
|
||||
{
|
||||
struct object_entry *e;
|
||||
e = find_object(oid);
|
||||
@@ -1734,12 +1736,12 @@ static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintm
|
||||
insert_mark(s, mark, e);
|
||||
}
|
||||
|
||||
-static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
|
||||
+static void insert_oid_entry(struct mark_set **s, struct object_id *oid, uintmax_t mark)
|
||||
{
|
||||
insert_mark(s, mark, xmemdupz(oid, sizeof(*oid)));
|
||||
}
|
||||
|
||||
-static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter)
|
||||
+static void read_mark_file(struct mark_set **s, FILE *f, mark_set_inserter_t inserter)
|
||||
{
|
||||
char line[512];
|
||||
while (fgets(line, sizeof(line), f)) {
|
||||
@@ -1772,7 +1774,7 @@ static void read_marks(void)
|
||||
goto done; /* Marks file does not exist */
|
||||
else
|
||||
die_errno("cannot read '%s'", import_marks_file);
|
||||
- read_mark_file(marks, f, insert_object_entry);
|
||||
+ read_mark_file(&marks, f, insert_object_entry);
|
||||
fclose(f);
|
||||
done:
|
||||
import_marks_file_done = 1;
|
||||
@@ -3228,7 +3230,7 @@ static void parse_alias(void)
|
||||
die(_("Expected 'to' command, got %s"), command_buf.buf);
|
||||
e = find_object(&b.oid);
|
||||
assert(e);
|
||||
- insert_mark(marks, next_mark, e);
|
||||
+ insert_mark(&marks, next_mark, e);
|
||||
}
|
||||
|
||||
static char* make_fast_import_path(const char *path)
|
||||
@@ -3321,13 +3323,14 @@ static void option_rewrite_submodules(const char *arg, struct string_list *list)
|
||||
*f = '\0';
|
||||
f++;
|
||||
ms = xcalloc(1, sizeof(*ms));
|
||||
- string_list_insert(list, s)->util = ms;
|
||||
|
||||
fp = fopen(f, "r");
|
||||
if (!fp)
|
||||
die_errno("cannot read '%s'", f);
|
||||
- read_mark_file(ms, fp, insert_oid_entry);
|
||||
+ read_mark_file(&ms, fp, insert_oid_entry);
|
||||
fclose(fp);
|
||||
+
|
||||
+ string_list_insert(list, s)->util = ms;
|
||||
}
|
||||
|
||||
static int parse_one_option(const char *option)
|
||||
diff --git a/t/t9304-fast-import-marks.sh b/t/t9304-fast-import-marks.sh
|
||||
new file mode 100755
|
||||
index 0000000000000..d4359dba21c9c
|
||||
--- /dev/null
|
||||
+++ b/t/t9304-fast-import-marks.sh
|
||||
@@ -0,0 +1,51 @@
|
||||
+#!/bin/sh
|
||||
+
|
||||
+test_description='test exotic situations with marks'
|
||||
+. ./test-lib.sh
|
||||
+
|
||||
+test_expect_success 'setup dump of basic history' '
|
||||
+ test_commit one &&
|
||||
+ git fast-export --export-marks=marks HEAD >dump
|
||||
+'
|
||||
+
|
||||
+test_expect_success 'setup large marks file' '
|
||||
+ # normally a marks file would have a lot of useful, unique
|
||||
+ # marks. But for our purposes, just having a lot of nonsense
|
||||
+ # ones is fine. Start at 1024 to avoid clashing with marks
|
||||
+ # legitimately used in our tiny dump.
|
||||
+ blob=$(git rev-parse HEAD:one.t) &&
|
||||
+ for i in $(test_seq 1024 16384)
|
||||
+ do
|
||||
+ echo ":$i $blob"
|
||||
+ done >>marks
|
||||
+'
|
||||
+
|
||||
+test_expect_success 'import with large marks file' '
|
||||
+ git fast-import --import-marks=marks <dump
|
||||
+'
|
||||
+
|
||||
+test_expect_success 'setup dump with submodule' '
|
||||
+ git submodule add "$PWD" sub &&
|
||||
+ git commit -m "add submodule" &&
|
||||
+ git fast-export HEAD >dump
|
||||
+'
|
||||
+
|
||||
+test_expect_success 'setup submodule mapping with large id' '
|
||||
+ old=$(git rev-parse HEAD:sub) &&
|
||||
+ new=$(echo $old | sed s/./a/g) &&
|
||||
+ echo ":12345 $old" >from &&
|
||||
+ echo ":12345 $new" >to
|
||||
+'
|
||||
+
|
||||
+test_expect_success 'import with submodule mapping' '
|
||||
+ git init dst &&
|
||||
+ git -C dst fast-import \
|
||||
+ --rewrite-submodules-from=sub:../from \
|
||||
+ --rewrite-submodules-to=sub:../to \
|
||||
+ <dump &&
|
||||
+ git -C dst rev-parse HEAD:sub >actual &&
|
||||
+ echo "$new" >expect &&
|
||||
+ test_cmp expect actual
|
||||
+'
|
||||
+
|
||||
+test_done
|
9
git.spec
9
git.spec
@ -97,7 +97,7 @@
|
||||
|
||||
Name: git
|
||||
Version: 2.29.2
|
||||
Release: 2%{?rcrev}%{?dist}
|
||||
Release: 3%{?rcrev}%{?dist}
|
||||
Summary: Fast Version Control System
|
||||
License: GPLv2
|
||||
URL: https://git-scm.com/
|
||||
@ -133,6 +133,10 @@ Patch0: git-cvsimport-Ignore-cvsps-2.2b1-Branches-output.patch
|
||||
# https://lore.kernel.org/git/xmqqy2jglv29.fsf_-_@gitster.c.googlers.com/
|
||||
Patch1: https://github.com/git/git/commit/39664cb0ac.patch#/0001-log-diagnose-L-used-with-pathspec-as-an-error.patch
|
||||
|
||||
# https://bugzilla.redhat.com/1900335
|
||||
# https://lore.kernel.org/git/20201015153849.GA551964@coredump.intra.peff.net/
|
||||
Patch2: https://github.com/git/git/commit/3f018ec716.patch#/0001-fast-import-fix-over-allocation-of-marks-storage.patch
|
||||
|
||||
%if %{with docs}
|
||||
# pod2man is needed to build Git.3pm
|
||||
BuildRequires: %{_bindir}/pod2man
|
||||
@ -1080,6 +1084,9 @@ rmdir --ignore-fail-on-non-empty "$testdir"
|
||||
%{?with_docs:%{_pkgdocdir}/git-svn.html}
|
||||
|
||||
%changelog
|
||||
* Wed Nov 25 2020 Todd Zullinger <tmz@pobox.com> - 2.29.2-3
|
||||
- apply upstream patch to resolve git fast-import memory leak (#1900335)
|
||||
|
||||
* Sat Nov 07 2020 Todd Zullinger <tmz@pobox.com> - 2.29.2-2
|
||||
- apply upstream patch to resolve git log segfault (#1791810)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user