diff -ur b/attr.c a/attr.c --- b/attr.c 2021-03-26 23:03:34.000000000 +0100 +++ a/attr.c 2023-01-25 17:55:11.665410385 +0100 @@ -28,7 +28,7 @@ #endif struct git_attr { - int attr_nr; /* unique attribute number */ + unsigned int attr_nr; /* unique attribute number */ char name[FLEX_ARRAY]; /* attribute name */ }; @@ -210,7 +210,7 @@ * dictionary. If no entry is found, create a new attribute and store it in * the dictionary. */ -static const struct git_attr *git_attr_internal(const char *name, int namelen) +static const struct git_attr *git_attr_internal(const char *name, size_t namelen) { struct git_attr *a; @@ -226,8 +226,8 @@ a->attr_nr = hashmap_get_size(&g_attr_hashmap.map); attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a); - assert(a->attr_nr == - (hashmap_get_size(&g_attr_hashmap.map) - 1)); + if (a->attr_nr != hashmap_get_size(&g_attr_hashmap.map) - 1) + die(_("unable to add additional attribute")); } hashmap_unlock(&g_attr_hashmap); @@ -272,7 +272,7 @@ const struct git_attr *attr; } u; char is_macro; - unsigned num_attr; + size_t num_attr; struct attr_state state[FLEX_ARRAY]; }; @@ -289,7 +289,7 @@ struct attr_state *e) { const char *ep, *equals; - int len; + size_t len; ep = cp + strcspn(cp, blank); equals = strchr(cp, '='); @@ -333,8 +333,7 @@ static struct match_attr *parse_attr_line(const char *line, const char *src, int lineno, int macro_ok) { - int namelen; - int num_attr, i; + size_t namelen, num_attr, i; const char *cp, *name, *states; struct match_attr *res = NULL; int is_macro; @@ -345,6 +344,11 @@ return NULL; name = cp; + if (strlen(line) >= ATTR_MAX_LINE_LENGTH) { + warning(_("ignoring overly long attributes line %d"), lineno); + return NULL; + } + if (*cp == '"' && !unquote_c_style(&pattern, name, &states)) { name = pattern.buf; namelen = pattern.len; @@ -381,10 +385,9 @@ goto fail_return; } - res = xcalloc(1, - sizeof(*res) + - sizeof(struct attr_state) * num_attr + - (is_macro ? 0 : namelen + 1)); + res = xcalloc(1, st_add3(sizeof(*res), + st_mult(sizeof(struct attr_state), num_attr), + is_macro ? 0 : namelen + 1)); if (is_macro) { res->u.attr = git_attr_internal(name, namelen); } else { @@ -447,11 +450,12 @@ static void attr_stack_free(struct attr_stack *e) { - int i; + unsigned i; free(e->origin); for (i = 0; i < e->num_matches; i++) { struct match_attr *a = e->attrs[i]; - int j; + size_t j; + for (j = 0; j < a->num_attr; j++) { const char *setto = a->state[j].setto; if (setto == ATTR__TRUE || @@ -660,8 +664,8 @@ a = parse_attr_line(line, src, lineno, macro_ok); if (!a) return; - ALLOC_GROW(res->attrs, res->num_matches + 1, res->alloc); - res->attrs[res->num_matches++] = a; + ALLOC_GROW_BY(res->attrs, res->num_matches, 1, res->alloc); + res->attrs[res->num_matches - 1] = a; } static struct attr_stack *read_attr_from_array(const char **list) @@ -700,21 +704,37 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) { + struct strbuf buf = STRBUF_INIT; FILE *fp = fopen_or_warn(path, "r"); struct attr_stack *res; - char buf[2048]; int lineno = 0; + int fd; + struct stat st; if (!fp) return NULL; + + fd = fileno(fp); + if (fstat(fd, &st)) { + warning_errno(_("cannot fstat gitattributes file '%s'"), path); + fclose(fp); + return NULL; + } + if (st.st_size >= ATTR_MAX_FILE_SIZE) { + warning(_("ignoring overly large gitattributes file '%s'"), path); + fclose(fp); + return NULL; + } + CALLOC_ARRAY(res, 1); - while (fgets(buf, sizeof(buf), fp)) { - char *bufp = buf; - if (!lineno) - skip_utf8_bom(&bufp, strlen(bufp)); - handle_attr_line(res, bufp, path, ++lineno, macro_ok); + while (strbuf_getline(&buf, fp) != EOF) { + if (!lineno && starts_with(buf.buf, utf8_bom)) + strbuf_remove(&buf, 0, strlen(utf8_bom)); + handle_attr_line(res, buf.buf, path, ++lineno, macro_ok); } + fclose(fp); + strbuf_release(&buf); return res; } @@ -725,13 +745,18 @@ struct attr_stack *res; char *buf, *sp; int lineno = 0; + size_t size; if (!istate) return NULL; - buf = read_blob_data_from_index(istate, path, NULL); + buf = read_blob_data_from_index(istate, path, &size); if (!buf) return NULL; + if (size >= ATTR_MAX_FILE_SIZE) { + warning(_("ignoring overly large gitattributes blob '%s'"), path); + return NULL; + } CALLOC_ARRAY(res, 1); for (sp = buf; *sp; ) { @@ -1001,12 +1026,12 @@ static int fill_one(const char *what, struct all_attrs_item *all_attrs, const struct match_attr *a, int rem) { - int i; + size_t i; - for (i = a->num_attr - 1; rem > 0 && i >= 0; i--) { - const struct git_attr *attr = a->state[i].attr; + for (i = a->num_attr; rem > 0 && i > 0; i--) { + const struct git_attr *attr = a->state[i - 1].attr; const char **n = &(all_attrs[attr->attr_nr].value); - const char *v = a->state[i].setto; + const char *v = a->state[i - 1].setto; if (*n == ATTR__UNKNOWN) { debug_set(what, @@ -1025,11 +1050,11 @@ struct all_attrs_item *all_attrs, int rem) { for (; rem > 0 && stack; stack = stack->prev) { - int i; + unsigned i; const char *base = stack->origin ? stack->origin : ""; - for (i = stack->num_matches - 1; 0 < rem && 0 <= i; i--) { - const struct match_attr *a = stack->attrs[i]; + for (i = stack->num_matches; 0 < rem && 0 < i; i--) { + const struct match_attr *a = stack->attrs[i - 1]; if (a->is_macro) continue; if (path_matches(path, pathlen, basename_offset, @@ -1060,11 +1085,11 @@ const struct attr_stack *stack) { for (; stack; stack = stack->prev) { - int i; - for (i = stack->num_matches - 1; i >= 0; i--) { - const struct match_attr *ma = stack->attrs[i]; + unsigned i; + for (i = stack->num_matches; i > 0; i--) { + const struct match_attr *ma = stack->attrs[i - 1]; if (ma->is_macro) { - int n = ma->u.attr->attr_nr; + unsigned int n = ma->u.attr->attr_nr; if (!all_attrs[n].macro) { all_attrs[n].macro = ma; } @@ -1116,7 +1141,7 @@ collect_some_attrs(istate, path, check); for (i = 0; i < check->nr; i++) { - size_t n = check->items[i].attr->attr_nr; + unsigned int n = check->items[i].attr->attr_nr; const char *value = check->all_attrs[n].value; if (value == ATTR__UNKNOWN) value = ATTR__UNSET; diff -ur b/attr.h a/attr.h --- b/attr.h 2021-03-26 23:03:34.000000000 +0100 +++ a/attr.h 2023-01-25 17:55:44.310607076 +0100 @@ -107,6 +107,18 @@ * - Free the `attr_check` struct by calling `attr_check_free()`. */ +/** + * The maximum line length for a gitattributes file. If the line exceeds this + * length we will ignore it. + */ +#define ATTR_MAX_LINE_LENGTH 2048 + + /** + * The maximum size of the giattributes file. If the file exceeds this size we + * will ignore it. + */ +#define ATTR_MAX_FILE_SIZE (100 * 1024 * 1024) + struct index_state; /** diff -ur b/column.c a/column.c --- b/column.c 2021-03-26 23:03:34.000000000 +0100 +++ a/column.c 2023-01-25 17:35:25.867526707 +0100 @@ -23,7 +23,7 @@ /* return length of 's' in letters, ANSI escapes stripped */ static int item_length(const char *s) { - return utf8_strnwidth(s, -1, 1); + return utf8_strnwidth(s, strlen(s), 1); } /* diff -ur b/git-compat-util.h a/git-compat-util.h --- b/git-compat-util.h 2021-03-26 23:03:34.000000000 +0100 +++ a/git-compat-util.h 2023-01-25 17:36:06.128731483 +0100 @@ -854,6 +854,14 @@ return a - b; } +static inline int cast_size_t_to_int(size_t a) +{ + if (a > INT_MAX) + die("number too large to represent as int on this platform: %"PRIuMAX, + (uintmax_t)a); + return (int)a; +} + #ifdef HAVE_ALLOCA_H # include # define xalloca(size) (alloca(size)) diff -ur b/pretty.c a/pretty.c --- b/pretty.c 2021-03-26 23:03:34.000000000 +0100 +++ a/pretty.c 2023-01-25 17:39:45.405061148 +0100 @@ -13,6 +13,13 @@ #include "gpg-interface.h" #include "trailer.h" +/* + * The limit for formatting directives, which enable the caller to append + * arbitrarily many bytes to the formatted buffer. This includes padding + * and wrapping formatters. + */ +#define FORMATTING_LIMIT (16 * 1024) + static char *user_format; static struct cmt_fmt_map { const char *name; @@ -916,7 +923,9 @@ if (pos) strbuf_add(&tmp, sb->buf, pos); strbuf_add_wrapped_text(&tmp, sb->buf + pos, - (int) indent1, (int) indent2, (int) width); + cast_size_t_to_int(indent1), + cast_size_t_to_int(indent2), + cast_size_t_to_int(width)); strbuf_swap(&tmp, sb); strbuf_release(&tmp); } @@ -1042,9 +1051,18 @@ const char *end = start + strcspn(start, ",)"); char *next; int width; - if (!end || end == start) + if (!*end || end == start) return 0; width = strtol(start, &next, 10); + + /* + * We need to limit the amount of padding, or otherwise this + * would allow the user to pad the buffer by arbitrarily many + * bytes and thus cause resource exhaustion. + */ + if (width < -FORMATTING_LIMIT || width > FORMATTING_LIMIT) + return 0; + if (next == start || width == 0) return 0; if (width < 0) { @@ -1260,6 +1278,16 @@ } if (*next != ')') return 0; + + /* + * We need to limit the format here as it allows the + * user to prepend arbitrarily many bytes to the buffer + * when rewrapping. + */ + if (width > FORMATTING_LIMIT || + indent1 > FORMATTING_LIMIT || + indent2 > FORMATTING_LIMIT) + return 0; } rewrap_message_tail(sb, c, width, indent1, indent2); return end - placeholder + 1; @@ -1507,19 +1535,21 @@ struct format_commit_context *c) { struct strbuf local_sb = STRBUF_INIT; - int total_consumed = 0, len, padding = c->padding; + size_t total_consumed = 0; + int len, padding = c->padding; + if (padding < 0) { const char *start = strrchr(sb->buf, '\n'); int occupied; if (!start) start = sb->buf; - occupied = utf8_strnwidth(start, -1, 1); + occupied = utf8_strnwidth(start, strlen(start), 1); occupied += c->pretty_ctx->graph_width; padding = (-padding) - occupied; } while (1) { int modifier = *placeholder == 'C'; - int consumed = format_commit_one(&local_sb, placeholder, c); + size_t consumed = format_commit_one(&local_sb, placeholder, c); total_consumed += consumed; if (!modifier) @@ -1531,7 +1561,7 @@ placeholder++; total_consumed++; } - len = utf8_strnwidth(local_sb.buf, -1, 1); + len = utf8_strnwidth(local_sb.buf, local_sb.len, 1); if (c->flush_type == flush_left_and_steal) { const char *ch = sb->buf + sb->len - 1; @@ -1546,7 +1576,7 @@ if (*ch != 'm') break; p = ch - 1; - while (ch - p < 10 && *p != '\033') + while (p > sb->buf && ch - p < 10 && *p != '\033') p--; if (*p != '\033' || ch + 1 - p != display_mode_esc_sequence_len(p)) @@ -1585,7 +1615,7 @@ } strbuf_addbuf(sb, &local_sb); } else { - int sb_len = sb->len, offset = 0; + size_t sb_len = sb->len, offset = 0; if (c->flush_type == flush_left) offset = padding - len; else if (c->flush_type == flush_both) @@ -1608,8 +1638,7 @@ const char *placeholder, void *context) { - int consumed; - size_t orig_len; + size_t consumed, orig_len; enum { NO_MAGIC, ADD_LF_BEFORE_NON_EMPTY, @@ -1630,9 +1659,21 @@ default: break; } - if (magic != NO_MAGIC) + if (magic != NO_MAGIC) { placeholder++; + switch (placeholder[0]) { + case 'w': + /* + * `%+w()` cannot ever expand to a non-empty string, + * and it potentially changes the layout of preceding + * contents. We're thus not able to handle the magic in + * this combination and refuse the pattern. + */ + return 0; + }; + } + orig_len = sb->len; if (((struct format_commit_context *)context)->flush_type != no_flush) consumed = format_and_pad_commit(sb, placeholder, context); diff -ur b/t/t0003-attributes.sh a/t/t0003-attributes.sh --- b/t/t0003-attributes.sh 2021-03-26 23:03:34.000000000 +0100 +++ a/t/t0003-attributes.sh 2023-01-25 17:56:18.656788243 +0100 @@ -339,4 +339,63 @@ test_cmp expect actual ' +test_expect_success 'large attributes line ignored in tree' ' + test_when_finished "rm .gitattributes" && + printf "path %02043d" 1 >.gitattributes && + git check-attr --all path >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success 'large attributes line ignores trailing content in tree' ' + test_when_finished "rm .gitattributes" && + # older versions of Git broke lines at 2048 bytes; the 2045 bytes + # of 0-padding here is accounting for the three bytes of "a 1", which + # would knock "trailing" to the "next" line, where it would be + # erroneously parsed. + printf "a %02045dtrailing attribute\n" 1 >.gitattributes && + git check-attr --all trailing >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success EXPENSIVE 'large attributes file ignored in tree' ' + test_when_finished "rm .gitattributes" && + dd if=/dev/zero of=.gitattributes bs=101M count=1 2>/dev/null && + git check-attr --all path >/dev/null 2>err && + echo "warning: ignoring overly large gitattributes file ${SQ}.gitattributes${SQ}" >expect && + test_cmp expect err +' + +test_expect_success 'large attributes line ignored in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(printf "path %02043d" 1 | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644,$blob,.gitattributes && + git check-attr --cached --all path >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success 'large attributes line ignores trailing content in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(printf "a %02045dtrailing attribute\n" 1 | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644,$blob,.gitattributes && + git check-attr --cached --all trailing >actual 2>err && + echo "warning: ignoring overly long attributes line 1" >expect && + test_cmp expect err && + test_must_be_empty actual +' + +test_expect_success EXPENSIVE 'large attributes file ignored in index' ' + test_when_finished "git update-index --remove .gitattributes" && + blob=$(dd if=/dev/zero bs=101M count=1 2>/dev/null | git hash-object -w --stdin) && + git update-index --add --cacheinfo 100644,$blob,.gitattributes && + git check-attr --cached --all path >/dev/null 2>err && + echo "warning: ignoring overly large gitattributes blob ${SQ}.gitattributes${SQ}" >expect && + test_cmp expect err +' + test_done diff -ur b/t/t4205-log-pretty-formats.sh a/t/t4205-log-pretty-formats.sh --- b/t/t4205-log-pretty-formats.sh 2021-03-26 23:03:34.000000000 +0100 +++ a/t/t4205-log-pretty-formats.sh 2023-01-25 17:40:19.966850121 +0100 @@ -962,4 +962,80 @@ test_cmp expect actual ' +test_expect_success 'log --pretty with space stealing' ' + printf mm0 >expect && + git log -1 --pretty="format:mm%>>|(1)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with invalid padding format' ' + printf "%s%%<(20" "$(git rev-parse HEAD)" >expect && + git log -1 --pretty="format:%H%<(20" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with magical wrapping directives' ' + commit_id=$(git commit-tree HEAD^{tree} -m "describe me") && + git tag describe-me $commit_id && + printf "\n(tag:\ndescribe-me)%%+w(2)" >expect && + git log -1 --pretty="format:%w(1)%+d%+w(2)" $commit_id >actual && + test_cmp expect actual +' + +test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing wrapping directive' ' + printf "%%w(2147483649,1,1)0" >expect && + git log -1 --pretty="format:%w(2147483649,1,1)%x30" >actual && + test_cmp expect actual && + printf "%%w(1,2147483649,1)0" >expect && + git log -1 --pretty="format:%w(1,2147483649,1)%x30" >actual && + test_cmp expect actual && + printf "%%w(1,1,2147483649)0" >expect && + git log -1 --pretty="format:%w(1,1,2147483649)%x30" >actual && + test_cmp expect actual +' + +test_expect_success SIZE_T_IS_64BIT 'log --pretty with overflowing padding directive' ' + printf "%%<(2147483649)0" >expect && + git log -1 --pretty="format:%<(2147483649)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty with padding and preceding control chars' ' + printf "\20\20 0" >expect && + git log -1 --pretty="format:%x10%x10%>|(4)%x30" >actual && + test_cmp expect actual +' + +test_expect_success 'log --pretty truncation with control chars' ' + test_commit "$(printf "\20\20\20\20xxxx")" file contents commit-with-control-chars && + printf "\20\20\20\20x.." >expect && + git log -1 --pretty="format:%<(3,trunc)%s" commit-with-control-chars >actual && + test_cmp expect actual +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' + # We only assert that this command does not crash. This needs to be + # executed with the address sanitizer to demonstrate failure. + git log -1 --pretty="format:%>(2147483646)%x41%41%>(2147483646)%x41" >/dev/null +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'set up huge commit' ' + test-tool genzeros 2147483649 | tr "\000" "1" >expect && + huge_commit=$(git commit-tree -F expect HEAD^{tree}) +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message' ' + git log -1 --format="%B%<(1)%x30" $huge_commit >actual && + echo 0 >>expect && + test_cmp expect actual +' + +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT 'log --pretty with huge commit message does not cause allocation failure' ' + test_must_fail git log -1 --format="%<(1)%B" $huge_commit 2>error && + cat >expect <<-EOF && + fatal: number too large to represent as int on this platform: 2147483649 + EOF + test_cmp expect error +' + test_done diff -ur b/t/test-lib.sh a/t/test-lib.sh --- b/t/test-lib.sh 2021-03-26 23:03:34.000000000 +0100 +++ a/t/test-lib.sh 2023-01-25 17:41:35.007391936 +0100 @@ -1635,6 +1635,10 @@ sed -ne "s/^$1: //p" } +test_lazy_prereq SIZE_T_IS_64BIT ' + test 8 -eq "$(build_option sizeof-size_t)" +' + test_lazy_prereq LONG_IS_64BIT ' test 8 -le "$(build_option sizeof-long)" ' diff -ur b/utf8.c a/utf8.c --- b/utf8.c 2021-03-26 23:03:34.000000000 +0100 +++ a/utf8.c 2023-01-25 18:34:55.927040924 +0100 @@ -206,26 +206,34 @@ * string, assuming that the string is utf8. Returns strlen() instead * if the string does not look like a valid utf8 string. */ -int utf8_strnwidth(const char *string, int len, int skip_ansi) +int utf8_strnwidth(const char *string, size_t len, int skip_ansi) { - int width = 0; const char *orig = string; + size_t width = 0; - if (len == -1) - len = strlen(string); while (string && string < orig + len) { - int skip; + int glyph_width; + size_t skip; + while (skip_ansi && (skip = display_mode_esc_sequence_len(string)) != 0) string += skip; - width += utf8_width(&string, NULL); + + glyph_width = utf8_width(&string, NULL); + if (glyph_width > 0) + width += glyph_width; } - return string ? width : len; + + /* + * TODO: fix the interface of this function and `utf8_strwidth()` to + * return `size_t` instead of `int`. + */ + return cast_size_t_to_int(string ? width : len); } int utf8_strwidth(const char *string) { - return utf8_strnwidth(string, -1, 0); + return utf8_strnwidth(string, strlen(string), 0); } int is_utf8(const char *text) @@ -357,51 +365,51 @@ void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width, const char *subst) { - struct strbuf sb_dst = STRBUF_INIT; - char *src = sb_src->buf; - char *end = src + sb_src->len; - char *dst; - int w = 0, subst_len = 0; - - if (subst) - subst_len = strlen(subst); - strbuf_grow(&sb_dst, sb_src->len + subst_len); - dst = sb_dst.buf; + const char *src = sb_src->buf, *end = sb_src->buf + sb_src->len; + struct strbuf dst; + int w = 0; + + strbuf_init(&dst, sb_src->len); while (src < end) { - char *old; + const char *old; + int glyph_width; size_t n; while ((n = display_mode_esc_sequence_len(src))) { - memcpy(dst, src, n); + strbuf_add(&dst, src, n); src += n; - dst += n; } if (src >= end) break; old = src; - n = utf8_width((const char**)&src, NULL); - if (!src) /* broken utf-8, do nothing */ + glyph_width = utf8_width((const char**)&src, NULL); + if (!src) /* broken utf-8, do nothing */ goto out; - if (n && w >= pos && w < pos + width) { + + /* + * In case we see a control character we copy it into the + * buffer, but don't add it to the width. + */ + if (glyph_width < 0) + glyph_width = 0; + + if (glyph_width && w >= pos && w < pos + width) { if (subst) { - memcpy(dst, subst, subst_len); - dst += subst_len; + strbuf_addstr(&dst, subst); subst = NULL; } - w += n; - continue; + } else { + strbuf_add(&dst, old, src - old); } - memcpy(dst, old, src - old); - dst += src - old; - w += n; + + w += glyph_width; } - strbuf_setlen(&sb_dst, dst - sb_dst.buf); - strbuf_swap(sb_src, &sb_dst); + strbuf_swap(sb_src, &dst); out: - strbuf_release(&sb_dst); + strbuf_release(&dst); } /* @@ -791,7 +799,7 @@ void strbuf_utf8_align(struct strbuf *buf, align_type position, unsigned int width, const char *s) { - int slen = strlen(s); + size_t slen = strlen(s); int display_len = utf8_strnwidth(s, slen, 0); int utf8_compensation = slen - display_len; diff -ur b/utf8.h a/utf8.h --- b/utf8.h 2021-03-26 23:03:34.000000000 +0100 +++ a/utf8.h 2023-01-25 17:22:31.971590518 +0100 @@ -7,7 +7,7 @@ size_t display_mode_esc_sequence_len(const char *s); int utf8_width(const char **start, size_t *remainder_p); -int utf8_strnwidth(const char *string, int len, int skip_ansi); +int utf8_strnwidth(const char *string, size_t len, int skip_ansi); int utf8_strwidth(const char *string); int is_utf8(const char *text); int is_encoding_utf8(const char *name);