From 36833ea1e0189b0445d46d42f84ac7dc0aeed5fe Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 1 Jul 2021 18:31:01 +0200 Subject: [PATCH] iptables-1.8.7-11 - Fix performance restoring large rulesets - Review unit file --- ...nft-cache-Sort-chains-on-demand-only.patch | 211 ++++++++++++++++++ ...CH_PAGE_SIZE-to-support-huge-ruleset.patch | 56 +++++ iptables.service | 5 +- iptables.spec | 8 +- 4 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 0010-nft-cache-Sort-chains-on-demand-only.patch create mode 100644 0011-nft-Increase-BATCH_PAGE_SIZE-to-support-huge-ruleset.patch diff --git a/0010-nft-cache-Sort-chains-on-demand-only.patch b/0010-nft-cache-Sort-chains-on-demand-only.patch new file mode 100644 index 0000000..2a57862 --- /dev/null +++ b/0010-nft-cache-Sort-chains-on-demand-only.patch @@ -0,0 +1,211 @@ +From b7582864a4cb71d4dcde752a3a2203c81159d6e2 Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Thu, 25 Mar 2021 16:24:39 +0100 +Subject: [PATCH] nft: cache: Sort chains on demand only + +Mandatory sorted insert of chains into cache significantly slows down +restoring of large rulesets. Since the sorted list of user-defined +chains is needed for listing and verbose output only, introduce +nft_cache_sort_chains() and call it where needed. + +Signed-off-by: Phil Sutter +(cherry picked from commit fdf64dcdace989589bac441805082e3b1fe6a915) +--- + iptables/nft-cache.c | 71 +++++++++++++++++++++++++++++++++-------- + iptables/nft-cache.h | 1 + + iptables/nft.c | 12 +++++++ + iptables/nft.h | 1 + + iptables/xtables-save.c | 1 + + 5 files changed, 73 insertions(+), 13 deletions(-) + +diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c +index 6b6e6da40a826..8fbf9727826d8 100644 +--- a/iptables/nft-cache.c ++++ b/iptables/nft-cache.c +@@ -223,24 +223,67 @@ int nft_cache_add_chain(struct nft_handle *h, const struct builtin_table *t, + + h->cache->table[t->type].base_chains[hooknum] = nc; + } else { +- struct nft_chain_list *clist = h->cache->table[t->type].chains; +- struct list_head *pos = &clist->list; +- struct nft_chain *cur; +- const char *n; +- +- list_for_each_entry(cur, &clist->list, head) { +- n = nftnl_chain_get_str(cur->nftnl, NFTNL_CHAIN_NAME); +- if (strcmp(cname, n) <= 0) { +- pos = &cur->head; +- break; +- } +- } +- list_add_tail(&nc->head, pos); ++ list_add_tail(&nc->head, ++ &h->cache->table[t->type].chains->list); + } + hlist_add_head(&nc->hnode, chain_name_hlist(h, t, cname)); + return 0; + } + ++static void __nft_chain_list_sort(struct list_head *list, ++ int (*cmp)(struct nft_chain *a, ++ struct nft_chain *b)) ++{ ++ struct nft_chain *pivot, *cur, *sav; ++ LIST_HEAD(sublist); ++ ++ if (list_empty(list)) ++ return; ++ ++ /* grab first item as pivot (dividing) value */ ++ pivot = list_entry(list->next, struct nft_chain, head); ++ list_del(&pivot->head); ++ ++ /* move any smaller value into sublist */ ++ list_for_each_entry_safe(cur, sav, list, head) { ++ if (cmp(pivot, cur) > 0) { ++ list_del(&cur->head); ++ list_add_tail(&cur->head, &sublist); ++ } ++ } ++ /* conquer divided */ ++ __nft_chain_list_sort(&sublist, cmp); ++ __nft_chain_list_sort(list, cmp); ++ ++ /* merge divided and pivot again */ ++ list_add_tail(&pivot->head, &sublist); ++ list_splice(&sublist, list); ++} ++ ++static int nft_chain_cmp_byname(struct nft_chain *a, struct nft_chain *b) ++{ ++ const char *aname = nftnl_chain_get_str(a->nftnl, NFTNL_CHAIN_NAME); ++ const char *bname = nftnl_chain_get_str(b->nftnl, NFTNL_CHAIN_NAME); ++ ++ return strcmp(aname, bname); ++} ++ ++int nft_cache_sort_chains(struct nft_handle *h, const char *table) ++{ ++ const struct builtin_table *t = nft_table_builtin_find(h, table); ++ ++ if (!t) ++ return -1; ++ ++ if (h->cache->table[t->type].sorted) ++ return 0; ++ ++ __nft_chain_list_sort(&h->cache->table[t->type].chains->list, ++ nft_chain_cmp_byname); ++ h->cache->table[t->type].sorted = true; ++ return 0; ++} ++ + struct nftnl_chain_list_cb_data { + struct nft_handle *h; + const struct builtin_table *t; +@@ -663,6 +706,7 @@ static int flush_cache(struct nft_handle *h, struct nft_cache *c, + + flush_base_chain_cache(c->table[table->type].base_chains); + nft_chain_foreach(h, tablename, __flush_chain_cache, NULL); ++ c->table[table->type].sorted = false; + + if (c->table[table->type].sets) + nftnl_set_list_foreach(c->table[table->type].sets, +@@ -678,6 +722,7 @@ static int flush_cache(struct nft_handle *h, struct nft_cache *c, + if (c->table[i].chains) { + nft_chain_list_free(c->table[i].chains); + c->table[i].chains = NULL; ++ c->table[i].sorted = false; + } + + if (c->table[i].sets) { +diff --git a/iptables/nft-cache.h b/iptables/nft-cache.h +index 20d96beede876..58a015265056c 100644 +--- a/iptables/nft-cache.h ++++ b/iptables/nft-cache.h +@@ -16,6 +16,7 @@ int flush_rule_cache(struct nft_handle *h, const char *table, + void nft_cache_build(struct nft_handle *h); + int nft_cache_add_chain(struct nft_handle *h, const struct builtin_table *t, + struct nftnl_chain *c); ++int nft_cache_sort_chains(struct nft_handle *h, const char *table); + + struct nft_chain * + nft_chain_find(struct nft_handle *h, const char *table, const char *chain); +diff --git a/iptables/nft.c b/iptables/nft.c +index bde4ca72d3fcc..8b14daeaed610 100644 +--- a/iptables/nft.c ++++ b/iptables/nft.c +@@ -1754,6 +1754,8 @@ int nft_rule_flush(struct nft_handle *h, const char *chain, const char *table, + return 1; + } + ++ nft_cache_sort_chains(h, table); ++ + ret = nft_chain_foreach(h, table, nft_rule_flush_cb, &d); + + /* the core expects 1 for success and 0 for error */ +@@ -1900,6 +1902,9 @@ int nft_chain_user_del(struct nft_handle *h, const char *chain, + goto out; + } + ++ if (verbose) ++ nft_cache_sort_chains(h, table); ++ + ret = nft_chain_foreach(h, table, __nft_chain_user_del, &d); + out: + /* the core expects 1 for success and 0 for error */ +@@ -2437,6 +2442,8 @@ int nft_rule_list(struct nft_handle *h, const char *chain, const char *table, + return 1; + } + ++ nft_cache_sort_chains(h, table); ++ + if (ops->print_table_header) + ops->print_table_header(table); + +@@ -2540,6 +2547,8 @@ int nft_rule_list_save(struct nft_handle *h, const char *chain, + return nft_rule_list_cb(c, &d); + } + ++ nft_cache_sort_chains(h, table); ++ + /* Dump policies and custom chains first */ + nft_chain_foreach(h, table, nft_rule_list_chain_save, &counters); + +@@ -3431,6 +3440,9 @@ int nft_chain_zero_counters(struct nft_handle *h, const char *chain, + goto err; + } + ++ if (verbose) ++ nft_cache_sort_chains(h, table); ++ + ret = nft_chain_foreach(h, table, __nft_chain_zero_counters, &d); + err: + /* the core expects 1 for success and 0 for error */ +diff --git a/iptables/nft.h b/iptables/nft.h +index 0910f82a2773c..4ac7e0099d567 100644 +--- a/iptables/nft.h ++++ b/iptables/nft.h +@@ -44,6 +44,7 @@ struct nft_cache { + struct nft_chain_list *chains; + struct nftnl_set_list *sets; + bool exists; ++ bool sorted; + } table[NFT_TABLE_MAX]; + }; + +diff --git a/iptables/xtables-save.c b/iptables/xtables-save.c +index d7901c650ea70..cfce0472f3ee8 100644 +--- a/iptables/xtables-save.c ++++ b/iptables/xtables-save.c +@@ -87,6 +87,7 @@ __do_output(struct nft_handle *h, const char *tablename, void *data) + printf("*%s\n", tablename); + /* Dump out chain names first, + * thereby preventing dependency conflicts */ ++ nft_cache_sort_chains(h, tablename); + nft_chain_foreach(h, tablename, nft_chain_save, h); + nft_rule_save(h, tablename, d->format); + if (d->commit) +-- +2.31.1 + diff --git a/0011-nft-Increase-BATCH_PAGE_SIZE-to-support-huge-ruleset.patch b/0011-nft-Increase-BATCH_PAGE_SIZE-to-support-huge-ruleset.patch new file mode 100644 index 0000000..863206e --- /dev/null +++ b/0011-nft-Increase-BATCH_PAGE_SIZE-to-support-huge-ruleset.patch @@ -0,0 +1,56 @@ +From 8086d05bb16e75a23b49bf1accef615193e726e6 Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Tue, 6 Apr 2021 10:51:20 +0200 +Subject: [PATCH] nft: Increase BATCH_PAGE_SIZE to support huge rulesets + +In order to support the same ruleset sizes as legacy iptables, the +kernel's limit of 1024 iovecs has to be overcome. Therefore increase +each iovec's size from 128KB to 2MB. + +While being at it, add a log message for failing sendmsg() call. This is +not supposed to happen, even if the transaction fails. Yet if it does, +users are left with only a "line XXX failed" message (with line number +being the COMMIT line). + +Signed-off-by: Phil Sutter +Signed-off-by: Florian Westphal +(cherry picked from commit a3e81c62e8c5abb4158f1f66df6bbcffd1b33240) +--- + iptables/nft.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/iptables/nft.c b/iptables/nft.c +index 8b14daeaed610..f1deb82f87576 100644 +--- a/iptables/nft.c ++++ b/iptables/nft.c +@@ -88,11 +88,11 @@ int mnl_talk(struct nft_handle *h, struct nlmsghdr *nlh, + + #define NFT_NLMSG_MAXSIZE (UINT16_MAX + getpagesize()) + +-/* selected batch page is 256 Kbytes long to load ruleset of +- * half a million rules without hitting -EMSGSIZE due to large +- * iovec. ++/* Selected batch page is 2 Mbytes long to support loading a ruleset of 3.5M ++ * rules matching on source and destination address as well as input and output ++ * interfaces. This is what legacy iptables supports. + */ +-#define BATCH_PAGE_SIZE getpagesize() * 32 ++#define BATCH_PAGE_SIZE 2 * 1024 * 1024 + + static struct nftnl_batch *mnl_batch_init(void) + { +@@ -220,8 +220,10 @@ static int mnl_batch_talk(struct nft_handle *h, int numcmds) + int err = 0; + + ret = mnl_nft_socket_sendmsg(h, numcmds); +- if (ret == -1) ++ if (ret == -1) { ++ fprintf(stderr, "sendmsg() failed: %s\n", strerror(errno)); + return -1; ++ } + + FD_ZERO(&readfds); + FD_SET(fd, &readfds); +-- +2.31.1 + diff --git a/iptables.service b/iptables.service index 342ac93..6b996d1 100644 --- a/iptables.service +++ b/iptables.service @@ -1,7 +1,8 @@ [Unit] Description=IPv4 firewall with iptables -After=syslog.target AssertPathExists=/etc/sysconfig/iptables +Before=network-pre.target +Wants=network-pre.target [Service] Type=oneshot @@ -13,4 +14,4 @@ Environment=BOOTUP=serial Environment=CONSOLETYPE=serial [Install] -WantedBy=basic.target +WantedBy=multi-user.target diff --git a/iptables.spec b/iptables.spec index e02c24d..ba0d1dd 100644 --- a/iptables.spec +++ b/iptables.spec @@ -11,7 +11,7 @@ Name: iptables Summary: Tools for managing Linux kernel packet filtering capabilities URL: https://www.netfilter.org/projects/iptables Version: 1.8.7 -Release: 10%{?dist} +Release: 11%{?dist} Source: %{url}/files/%{name}-%{version}.tar.bz2 Source1: iptables.init Source2: iptables-config @@ -29,6 +29,8 @@ Patch06: 0006-extensions-libebt_ip6-Drop-unused-variables.patch Patch07: 0007-libxtables-Fix-memleak-in-xtopt_parse_hostmask.patch Patch08: 0008-nft-Avoid-memleak-in-error-path-of-nft_cmd_new.patch Patch09: 0009-iptables-apply-Drop-unused-variable.patch +Patch10: 0010-nft-cache-Sort-chains-on-demand-only.patch +Patch11: 0011-nft-Increase-BATCH_PAGE_SIZE-to-support-huge-ruleset.patch # pf.os: ISC license # iptables-apply: Artistic Licence 2.0 @@ -426,6 +428,10 @@ fi %changelog +* Thu Jul 01 2021 Phil Sutter - 1.8.7-11 +- Fix performance restoring large rulesets +- Review unit file + * Wed Jun 16 2021 Phil Sutter - 1.8.7-10 - Backport fixes from upstream