diff --git a/0001-fast-import-fix-over-allocation-of-marks-storage.patch b/0001-fast-import-fix-over-allocation-of-marks-storage.patch new file mode 100644 index 0000000..eff5fb2 --- /dev/null +++ b/0001-fast-import-fix-over-allocation-of-marks-storage.patch @@ -0,0 +1,221 @@ +From 3f018ec716292b4d757385686f42f57af3bca685 Mon Sep 17 00:00:00 2001 +From: Jeff King +Date: Thu, 15 Oct 2020 11:38:49 -0400 +Subject: [PATCH] fast-import: fix over-allocation of marks storage + +Fast-import stores its marks in a trie-like structure made of mark_set +structs. Each struct has a fixed size (1024). If our id number is too +large to fit in the struct, then we allocate a new struct which shifts +the id number by 10 bits. Our original struct becomes a child node +of this new layer, and the new struct becomes the top level of the trie. + +This scheme was broken by ddddf8d7e2 (fast-import: permit reading +multiple marks files, 2020-02-22). Before then, we had a top-level +"marks" pointer, and the push-down worked by assigning the new top-level +struct to "marks". But after that commit, insert_mark() takes a pointer +to the mark_set, rather than using the global "marks". It continued to +assign to the global "marks" variable during the push down, which was +wrong for two reasons: + + - we added a call in option_rewrite_submodules() which uses a separate + mark set; pushing down on "marks" is outright wrong here. We'd + corrupt the "marks" set, and we'd fail to correctly store any + submodule mappings with an id over 1024. + + - the other callers passed "marks", but the push-down was still wrong. + In read_mark_file(), we take the pointer to the mark_set as a + parameter. So even though insert_mark() was updating the global + "marks", the local pointer we had in read_mark_file() was not + updated. As a result, we'd add a new level when needed, but then the + next call to insert_mark() wouldn't see it! It would then allocate a + new layer, which would also not be seen, and so on. Lookups for the + lost layers obviously wouldn't work, but before we even hit any + lookup stage, we'd generally run out of memory and die. + +Our tests didn't notice either of these cases because they didn't have +enough marks to trigger the push-down behavior. The new tests in t9304 +cover both cases (and fail without this patch). + +We can solve the problem by having insert_mark() take a pointer-to-pointer +of the top-level of the set. Then our push down can assign to it in a +way that the caller actually sees. Note the subtle reordering in +option_rewrite_submodules(). Our call to read_mark_file() may modify our +top-level set pointer, so we have to wait until after it returns to +assign its value into the string_list. + +Reported-by: Sergey Brester +Signed-off-by: Jeff King +Signed-off-by: Junio C Hamano +--- + builtin/fast-import.c | 31 ++++++++++++---------- + t/t9304-fast-import-marks.sh | 51 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 68 insertions(+), 14 deletions(-) + create mode 100755 t/t9304-fast-import-marks.sh + +diff --git a/builtin/fast-import.c b/builtin/fast-import.c +index 1bf50a73dc359..70d7d25eed2e0 100644 +--- a/builtin/fast-import.c ++++ b/builtin/fast-import.c +@@ -150,7 +150,7 @@ struct recent_command { + char *buf; + }; + +-typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark); ++typedef void (*mark_set_inserter_t)(struct mark_set **s, struct object_id *oid, uintmax_t mark); + typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp); + + /* Configured limits on output */ +@@ -526,13 +526,15 @@ static unsigned int hc_str(const char *s, size_t len) + return r; + } + +-static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe) ++static void insert_mark(struct mark_set **top, uintmax_t idnum, struct object_entry *oe) + { ++ struct mark_set *s = *top; ++ + while ((idnum >> s->shift) >= 1024) { + s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set)); +- s->shift = marks->shift + 10; +- s->data.sets[0] = marks; +- marks = s; ++ s->shift = (*top)->shift + 10; ++ s->data.sets[0] = *top; ++ *top = s; + } + while (s->shift) { + uintmax_t i = idnum >> s->shift; +@@ -944,7 +946,7 @@ static int store_object( + + e = insert_object(&oid); + if (mark) +- insert_mark(marks, mark, e); ++ insert_mark(&marks, mark, e); + if (e->idx.offset) { + duplicate_count_by_type[type]++; + return 1; +@@ -1142,7 +1144,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) + e = insert_object(&oid); + + if (mark) +- insert_mark(marks, mark, e); ++ insert_mark(&marks, mark, e); + + if (e->idx.offset) { + duplicate_count_by_type[OBJ_BLOB]++; +@@ -1717,7 +1719,7 @@ static void dump_marks(void) + } + } + +-static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark) ++static void insert_object_entry(struct mark_set **s, struct object_id *oid, uintmax_t mark) + { + struct object_entry *e; + e = find_object(oid); +@@ -1734,12 +1736,12 @@ static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintm + insert_mark(s, mark, e); + } + +-static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark) ++static void insert_oid_entry(struct mark_set **s, struct object_id *oid, uintmax_t mark) + { + insert_mark(s, mark, xmemdupz(oid, sizeof(*oid))); + } + +-static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter) ++static void read_mark_file(struct mark_set **s, FILE *f, mark_set_inserter_t inserter) + { + char line[512]; + while (fgets(line, sizeof(line), f)) { +@@ -1772,7 +1774,7 @@ static void read_marks(void) + goto done; /* Marks file does not exist */ + else + die_errno("cannot read '%s'", import_marks_file); +- read_mark_file(marks, f, insert_object_entry); ++ read_mark_file(&marks, f, insert_object_entry); + fclose(f); + done: + import_marks_file_done = 1; +@@ -3228,7 +3230,7 @@ static void parse_alias(void) + die(_("Expected 'to' command, got %s"), command_buf.buf); + e = find_object(&b.oid); + assert(e); +- insert_mark(marks, next_mark, e); ++ insert_mark(&marks, next_mark, e); + } + + static char* make_fast_import_path(const char *path) +@@ -3321,13 +3323,14 @@ static void option_rewrite_submodules(const char *arg, struct string_list *list) + *f = '\0'; + f++; + ms = xcalloc(1, sizeof(*ms)); +- string_list_insert(list, s)->util = ms; + + fp = fopen(f, "r"); + if (!fp) + die_errno("cannot read '%s'", f); +- read_mark_file(ms, fp, insert_oid_entry); ++ read_mark_file(&ms, fp, insert_oid_entry); + fclose(fp); ++ ++ string_list_insert(list, s)->util = ms; + } + + static int parse_one_option(const char *option) +diff --git a/t/t9304-fast-import-marks.sh b/t/t9304-fast-import-marks.sh +new file mode 100755 +index 0000000000000..d4359dba21c9c +--- /dev/null ++++ b/t/t9304-fast-import-marks.sh +@@ -0,0 +1,51 @@ ++#!/bin/sh ++ ++test_description='test exotic situations with marks' ++. ./test-lib.sh ++ ++test_expect_success 'setup dump of basic history' ' ++ test_commit one && ++ git fast-export --export-marks=marks HEAD >dump ++' ++ ++test_expect_success 'setup large marks file' ' ++ # normally a marks file would have a lot of useful, unique ++ # marks. But for our purposes, just having a lot of nonsense ++ # ones is fine. Start at 1024 to avoid clashing with marks ++ # legitimately used in our tiny dump. ++ blob=$(git rev-parse HEAD:one.t) && ++ for i in $(test_seq 1024 16384) ++ do ++ echo ":$i $blob" ++ done >>marks ++' ++ ++test_expect_success 'import with large marks file' ' ++ git fast-import --import-marks=marks dump ++' ++ ++test_expect_success 'setup submodule mapping with large id' ' ++ old=$(git rev-parse HEAD:sub) && ++ new=$(echo $old | sed s/./a/g) && ++ echo ":12345 $old" >from && ++ echo ":12345 $new" >to ++' ++ ++test_expect_success 'import with submodule mapping' ' ++ git init dst && ++ git -C dst fast-import \ ++ --rewrite-submodules-from=sub:../from \ ++ --rewrite-submodules-to=sub:../to \ ++ actual && ++ echo "$new" >expect && ++ test_cmp expect actual ++' ++ ++test_done diff --git a/git.spec b/git.spec index 5188290..ad1623e 100644 --- a/git.spec +++ b/git.spec @@ -97,7 +97,7 @@ Name: git Version: 2.29.2 -Release: 2%{?rcrev}%{?dist} +Release: 3%{?rcrev}%{?dist} Summary: Fast Version Control System License: GPLv2 URL: https://git-scm.com/ @@ -133,6 +133,10 @@ Patch0: git-cvsimport-Ignore-cvsps-2.2b1-Branches-output.patch # https://lore.kernel.org/git/xmqqy2jglv29.fsf_-_@gitster.c.googlers.com/ Patch1: https://github.com/git/git/commit/39664cb0ac.patch#/0001-log-diagnose-L-used-with-pathspec-as-an-error.patch +# https://bugzilla.redhat.com/1900335 +# https://lore.kernel.org/git/20201015153849.GA551964@coredump.intra.peff.net/ +Patch2: https://github.com/git/git/commit/3f018ec716.patch#/0001-fast-import-fix-over-allocation-of-marks-storage.patch + %if %{with docs} # pod2man is needed to build Git.3pm BuildRequires: %{_bindir}/pod2man @@ -1080,6 +1084,9 @@ rmdir --ignore-fail-on-non-empty "$testdir" %{?with_docs:%{_pkgdocdir}/git-svn.html} %changelog +* Wed Nov 25 2020 Todd Zullinger - 2.29.2-3 +- apply upstream patch to resolve git fast-import memory leak (#1900335) + * Sat Nov 07 2020 Todd Zullinger - 2.29.2-2 - apply upstream patch to resolve git log segfault (#1791810)