nftables-1.1.1-5.el10

* Wed Jul 16 2025 Phil Sutter <psutter@redhat.com> [1.1.1-5.el10]
- src: add conntrack information to trace monitor mode (Phil Sutter) [RHEL-102994]
- src: split monitor trace code into new trace.c (Phil Sutter) [RHEL-102994]
Resolves: RHEL-102994
This commit is contained in:
Phil Sutter 2025-07-16 11:01:29 +02:00
parent 6e830a1e31
commit 75bf1c540d
4 changed files with 1087 additions and 9 deletions

View File

@ -1,4 +1,4 @@
From b3c1312b5815b004614d79eae2ad731c6883ce6f Mon Sep 17 00:00:00 2001
From 75c95b2f59fb09c6375ca1e10277af9d0641e71d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 22 Jan 2025 10:18:04 +0100
Subject: [PATCH] evaluate: allow to re-use existing metered set
@ -41,7 +41,7 @@ Signed-off-by: Eric Garver <egarver@redhat.com>
create mode 100755 tests/shell/testcases/sets/meter_set_reuse
diff --git a/src/evaluate.c b/src/evaluate.c
index 593a0140e92a..c9cbaa6ae648 100644
index 593a014..c9cbaa6 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -3338,7 +3338,7 @@ static int stmt_evaluate_payload(struct eval_ctx *ctx, struct stmt *stmt)
@ -114,7 +114,7 @@ index 593a0140e92a..c9cbaa6ae648 100644
if (stmt_evaluate(ctx, stmt->meter.stmt) < 0)
diff --git a/tests/shell/testcases/sets/dumps/meter_set_reuse.json-nft b/tests/shell/testcases/sets/dumps/meter_set_reuse.json-nft
new file mode 100644
index 000000000000..ab4ac06184d0
index 0000000..ab4ac06
--- /dev/null
+++ b/tests/shell/testcases/sets/dumps/meter_set_reuse.json-nft
@@ -0,0 +1,105 @@
@ -225,7 +225,7 @@ index 000000000000..ab4ac06184d0
+}
diff --git a/tests/shell/testcases/sets/dumps/meter_set_reuse.nft b/tests/shell/testcases/sets/dumps/meter_set_reuse.nft
new file mode 100644
index 000000000000..f911acaffb85
index 0000000..f911aca
--- /dev/null
+++ b/tests/shell/testcases/sets/dumps/meter_set_reuse.nft
@@ -0,0 +1,11 @@
@ -242,7 +242,7 @@ index 000000000000..f911acaffb85
+}
diff --git a/tests/shell/testcases/sets/meter_set_reuse b/tests/shell/testcases/sets/meter_set_reuse
new file mode 100755
index 000000000000..94eccc1a7b82
index 0000000..94eccc1
--- /dev/null
+++ b/tests/shell/testcases/sets/meter_set_reuse
@@ -0,0 +1,20 @@
@ -266,6 +266,3 @@ index 000000000000..94eccc1a7b82
+
+# This re-add should work.
+addrule
--
2.48.1

View File

@ -0,0 +1,807 @@
From bb46381b2d378729d709480806c9522aaa32deeb Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Tue, 15 Jul 2025 22:50:32 +0200
Subject: [PATCH] src: split monitor trace code into new trace.c
JIRA: https://issues.redhat.com/browse/RHEL-102994
Upstream Status: nftables commit 8e03d59b5aa46b960454b4fd30541cee77125f77
commit 8e03d59b5aa46b960454b4fd30541cee77125f77
Author: Florian Westphal <fw@strlen.de>
Date: Mon Jul 7 11:47:13 2025 +0200
src: split monitor trace code into new trace.c
Preparation patch to avoid putting more trace functionality into
netlink.c.
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Phil Sutter <psutter@redhat.com>
---
Makefile.am | 1 +
include/netlink.h | 5 -
include/trace.h | 8 ++
src/monitor.c | 2 +-
src/netlink.c | 332 -------------------------------------------
src/trace.c | 353 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 363 insertions(+), 338 deletions(-)
create mode 100644 include/trace.h
create mode 100644 src/trace.c
diff --git a/Makefile.am b/Makefile.am
index fb64105..ba09e7f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -220,6 +220,7 @@ src_libnftables_la_SOURCES = \
src/misspell.c \
src/mnl.c \
src/monitor.c \
+ src/trace.c \
src/netlink.c \
src/netlink_delinearize.c \
src/netlink_linearize.c \
diff --git a/include/netlink.h b/include/netlink.h
index e9667a2..609f213 100644
--- a/include/netlink.h
+++ b/include/netlink.h
@@ -227,11 +227,6 @@ struct ruleset_parse {
struct cmd *cmd;
};
-struct nftnl_parse_ctx;
-
-int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
- struct netlink_mon_handler *monh);
-
enum nft_data_types dtype_map_to_kernel(const struct datatype *dtype);
void netlink_linearize_init(struct netlink_linearize_ctx *lctx,
diff --git a/include/trace.h b/include/trace.h
new file mode 100644
index 0000000..ebebb47
--- /dev/null
+++ b/include/trace.h
@@ -0,0 +1,8 @@
+#ifndef NFTABLES_TRACE_H
+#define NFTABLES_TRACE_H
+#include <linux/netlink.h>
+
+struct netlink_mon_handler;
+int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
+ struct netlink_mon_handler *monh);
+#endif /* NFTABLES_TRACE_H */
diff --git a/src/monitor.c b/src/monitor.c
index a787db8..01325c9 100644
--- a/src/monitor.c
+++ b/src/monitor.c
@@ -16,7 +16,6 @@
#include <inttypes.h>
#include <libnftnl/table.h>
-#include <libnftnl/trace.h>
#include <libnftnl/chain.h>
#include <libnftnl/expr.h>
#include <libnftnl/object.h>
@@ -32,6 +31,7 @@
#include <nftables.h>
#include <netlink.h>
#include <mnl.h>
+#include <trace.h>
#include <expression.h>
#include <statement.h>
#include <gmputil.h>
diff --git a/src/netlink.c b/src/netlink.c
index 25ee341..2ced863 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -18,7 +18,6 @@
#include <inttypes.h>
#include <libnftnl/table.h>
-#include <libnftnl/trace.h>
#include <libnftnl/chain.h>
#include <libnftnl/expr.h>
#include <libnftnl/object.h>
@@ -41,7 +40,6 @@
#include <gmputil.h>
#include <utils.h>
#include <erec.h>
-#include <iface.h>
#define nft_mon_print(monh, ...) nft_print(&monh->ctx->nft->output, __VA_ARGS__)
@@ -1859,333 +1857,3 @@ int netlink_list_flowtables(struct netlink_ctx *ctx, const struct handle *h)
nftnl_flowtable_list_free(flowtable_cache);
return err;
}
-
-static void trace_print_hdr(const struct nftnl_trace *nlt,
- struct output_ctx *octx)
-{
- nft_print(octx, "trace id %08x %s ",
- nftnl_trace_get_u32(nlt, NFTNL_TRACE_ID),
- family2str(nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY)));
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_TABLE))
- nft_print(octx, "%s ",
- nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE));
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CHAIN))
- nft_print(octx, "%s ",
- nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN));
-}
-
-static void trace_print_expr(const struct nftnl_trace *nlt, unsigned int attr,
- struct expr *lhs, struct output_ctx *octx)
-{
- struct expr *rhs, *rel;
- const void *data;
- uint32_t len;
-
- data = nftnl_trace_get_data(nlt, attr, &len);
- rhs = constant_expr_alloc(&netlink_location,
- lhs->dtype, lhs->byteorder,
- len * BITS_PER_BYTE, data);
- rel = relational_expr_alloc(&netlink_location, OP_EQ, lhs, rhs);
-
- expr_print(rel, octx);
- nft_print(octx, " ");
- expr_free(rel);
-}
-
-static void trace_print_verdict(const struct nftnl_trace *nlt,
- struct output_ctx *octx)
-{
- struct expr *chain_expr = NULL;
- const char *chain = NULL;
- unsigned int verdict;
- struct expr *expr;
-
- verdict = nftnl_trace_get_u32(nlt, NFTNL_TRACE_VERDICT);
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_JUMP_TARGET)) {
- chain = xstrdup(nftnl_trace_get_str(nlt, NFTNL_TRACE_JUMP_TARGET));
- chain_expr = constant_expr_alloc(&netlink_location,
- &string_type,
- BYTEORDER_HOST_ENDIAN,
- strlen(chain) * BITS_PER_BYTE,
- chain);
- }
- expr = verdict_expr_alloc(&netlink_location, verdict, chain_expr);
-
- nft_print(octx, "verdict ");
- expr_print(expr, octx);
- expr_free(expr);
-}
-
-static void trace_print_policy(const struct nftnl_trace *nlt,
- struct output_ctx *octx)
-{
- unsigned int policy;
- struct expr *expr;
-
- policy = nftnl_trace_get_u32(nlt, NFTNL_TRACE_POLICY);
-
- expr = verdict_expr_alloc(&netlink_location, policy, NULL);
-
- nft_print(octx, "policy ");
- expr_print(expr, octx);
- expr_free(expr);
-}
-
-static struct rule *trace_lookup_rule(const struct nftnl_trace *nlt,
- uint64_t rule_handle,
- struct nft_cache *cache)
-{
- struct chain *chain;
- struct table *table;
- struct handle h;
-
- h.family = nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY);
- h.table.name = nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE);
- h.chain.name = nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN);
-
- if (!h.table.name)
- return NULL;
-
- table = table_cache_find(&cache->table_cache, h.table.name, h.family);
- if (!table)
- return NULL;
-
- chain = chain_cache_find(table, h.chain.name);
- if (!chain)
- return NULL;
-
- return rule_lookup(chain, rule_handle);
-}
-
-static void trace_print_rule(const struct nftnl_trace *nlt,
- struct output_ctx *octx, struct nft_cache *cache)
-{
- uint64_t rule_handle;
- struct rule *rule;
-
- rule_handle = nftnl_trace_get_u64(nlt, NFTNL_TRACE_RULE_HANDLE);
- rule = trace_lookup_rule(nlt, rule_handle, cache);
-
- trace_print_hdr(nlt, octx);
-
- if (rule) {
- nft_print(octx, "rule ");
- rule_print(rule, octx);
- } else {
- nft_print(octx, "unknown rule handle %" PRIu64, rule_handle);
- }
-
- nft_print(octx, " (");
- trace_print_verdict(nlt, octx);
- nft_print(octx, ")\n");
-}
-
-static void trace_gen_stmts(struct list_head *stmts,
- struct proto_ctx *ctx, struct payload_dep_ctx *pctx,
- const struct nftnl_trace *nlt, unsigned int attr,
- enum proto_bases base)
-{
- struct list_head unordered = LIST_HEAD_INIT(unordered);
- struct list_head list;
- struct expr *rel, *lhs, *rhs, *tmp, *nexpr;
- struct stmt *stmt;
- const struct proto_desc *desc;
- const void *hdr;
- uint32_t hlen;
- unsigned int n;
-
- if (!nftnl_trace_is_set(nlt, attr))
- return;
- hdr = nftnl_trace_get_data(nlt, attr, &hlen);
-
- lhs = payload_expr_alloc(&netlink_location, NULL, 0);
- payload_init_raw(lhs, base, 0, hlen * BITS_PER_BYTE);
- rhs = constant_expr_alloc(&netlink_location,
- &invalid_type, BYTEORDER_INVALID,
- hlen * BITS_PER_BYTE, hdr);
-
-restart:
- init_list_head(&list);
- payload_expr_expand(&list, lhs, ctx);
- expr_free(lhs);
-
- desc = NULL;
- list_for_each_entry_safe(lhs, nexpr, &list, list) {
- if (desc && desc != ctx->protocol[base].desc) {
- /* Chained protocols */
- lhs->payload.offset = 0;
- if (ctx->protocol[base].desc == NULL)
- break;
- goto restart;
- }
-
- tmp = constant_expr_splice(rhs, lhs->len);
- expr_set_type(tmp, lhs->dtype, lhs->byteorder);
- if (tmp->byteorder == BYTEORDER_HOST_ENDIAN)
- mpz_switch_byteorder(tmp->value, tmp->len / BITS_PER_BYTE);
-
- /* Skip unknown and filtered expressions */
- desc = lhs->payload.desc;
- if (lhs->dtype == &invalid_type ||
- lhs->payload.tmpl == &proto_unknown_template ||
- desc->checksum_key == payload_hdr_field(lhs) ||
- desc->format.filter & (1 << payload_hdr_field(lhs))) {
- expr_free(lhs);
- expr_free(tmp);
- continue;
- }
-
- rel = relational_expr_alloc(&lhs->location, OP_EQ, lhs, tmp);
- stmt = expr_stmt_alloc(&rel->location, rel);
- list_add_tail(&stmt->list, &unordered);
-
- desc = ctx->protocol[base].desc;
- relational_expr_pctx_update(ctx, rel);
- }
-
- expr_free(rhs);
-
- n = 0;
-next:
- list_for_each_entry(stmt, &unordered, list) {
- enum proto_bases b = base;
-
- rel = stmt->expr;
- lhs = rel->left;
-
- /* Move statements to result list in defined order */
- desc = lhs->payload.desc;
- if (desc->format.order[n] &&
- desc->format.order[n] != payload_hdr_field(lhs))
- continue;
-
- list_move_tail(&stmt->list, stmts);
- n++;
-
- if (payload_is_stacked(desc, rel))
- b--;
-
- /* Don't strip 'icmp type' from payload dump. */
- if (pctx->icmp_type == 0)
- payload_dependency_kill(pctx, lhs, ctx->family);
- if (lhs->flags & EXPR_F_PROTOCOL)
- payload_dependency_store(pctx, stmt, b);
-
- goto next;
- }
-}
-
-static void trace_print_packet(const struct nftnl_trace *nlt,
- struct output_ctx *octx)
-{
- struct list_head stmts = LIST_HEAD_INIT(stmts);
- const struct proto_desc *ll_desc;
- struct payload_dep_ctx pctx = {};
- struct proto_ctx ctx;
- uint16_t dev_type;
- uint32_t nfproto;
- struct stmt *stmt, *next;
-
- trace_print_hdr(nlt, octx);
-
- nft_print(octx, "packet: ");
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_IIF))
- trace_print_expr(nlt, NFTNL_TRACE_IIF,
- meta_expr_alloc(&netlink_location,
- NFT_META_IIF), octx);
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_OIF))
- trace_print_expr(nlt, NFTNL_TRACE_OIF,
- meta_expr_alloc(&netlink_location,
- NFT_META_OIF), octx);
-
- proto_ctx_init(&ctx, nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY), 0, false);
- ll_desc = ctx.protocol[PROTO_BASE_LL_HDR].desc;
- if ((ll_desc == &proto_inet || ll_desc == &proto_netdev) &&
- nftnl_trace_is_set(nlt, NFTNL_TRACE_NFPROTO)) {
- nfproto = nftnl_trace_get_u32(nlt, NFTNL_TRACE_NFPROTO);
-
- proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location, NULL);
- proto_ctx_update(&ctx, PROTO_BASE_NETWORK_HDR, &netlink_location,
- proto_find_upper(ll_desc, nfproto));
- }
- if (ctx.protocol[PROTO_BASE_LL_HDR].desc == NULL &&
- nftnl_trace_is_set(nlt, NFTNL_TRACE_IIFTYPE)) {
- dev_type = nftnl_trace_get_u16(nlt, NFTNL_TRACE_IIFTYPE);
- proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location,
- proto_dev_desc(dev_type));
- }
-
- trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_LL_HEADER,
- PROTO_BASE_LL_HDR);
- trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_NETWORK_HEADER,
- PROTO_BASE_NETWORK_HDR);
- trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_TRANSPORT_HEADER,
- PROTO_BASE_TRANSPORT_HDR);
-
- list_for_each_entry_safe(stmt, next, &stmts, list) {
- stmt_print(stmt, octx);
- nft_print(octx, " ");
- stmt_free(stmt);
- }
- nft_print(octx, "\n");
-}
-
-int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
- struct netlink_mon_handler *monh)
-{
- struct nftnl_trace *nlt;
-
- assert(type == NFT_MSG_TRACE);
-
- nlt = nftnl_trace_alloc();
- if (!nlt)
- memory_allocation_error();
-
- if (nftnl_trace_nlmsg_parse(nlh, nlt) < 0)
- netlink_abi_error();
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_LL_HEADER) ||
- nftnl_trace_is_set(nlt, NFTNL_TRACE_NETWORK_HEADER))
- trace_print_packet(nlt, &monh->ctx->nft->output);
-
- switch (nftnl_trace_get_u32(nlt, NFTNL_TRACE_TYPE)) {
- case NFT_TRACETYPE_RULE:
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_RULE_HANDLE))
- trace_print_rule(nlt, &monh->ctx->nft->output,
- &monh->ctx->nft->cache);
- break;
- case NFT_TRACETYPE_POLICY:
- trace_print_hdr(nlt, &monh->ctx->nft->output);
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_POLICY)) {
- trace_print_policy(nlt, &monh->ctx->nft->output);
- nft_mon_print(monh, " ");
- }
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
- trace_print_expr(nlt, NFTNL_TRACE_MARK,
- meta_expr_alloc(&netlink_location,
- NFT_META_MARK),
- &monh->ctx->nft->output);
- nft_mon_print(monh, "\n");
- break;
- case NFT_TRACETYPE_RETURN:
- trace_print_hdr(nlt, &monh->ctx->nft->output);
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_VERDICT)) {
- trace_print_verdict(nlt, &monh->ctx->nft->output);
- nft_mon_print(monh, " ");
- }
-
- if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
- trace_print_expr(nlt, NFTNL_TRACE_MARK,
- meta_expr_alloc(&netlink_location,
- NFT_META_MARK),
- &monh->ctx->nft->output);
- nft_mon_print(monh, "\n");
- break;
- }
-
- nftnl_trace_free(nlt);
- return MNL_CB_OK;
-}
diff --git a/src/trace.c b/src/trace.c
new file mode 100644
index 0000000..a7cc8ff
--- /dev/null
+++ b/src/trace.c
@@ -0,0 +1,353 @@
+#include <nft.h>
+#include <trace.h>
+
+#include <libnftnl/trace.h>
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <inttypes.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter.h>
+
+#include <nftables.h>
+#include <mnl.h>
+#include <parser.h>
+#include <netlink.h>
+#include <expression.h>
+#include <statement.h>
+#include <utils.h>
+
+#define nft_mon_print(monh, ...) nft_print(&monh->ctx->nft->output, __VA_ARGS__)
+
+static void trace_print_hdr(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ nft_print(octx, "trace id %08x %s ",
+ nftnl_trace_get_u32(nlt, NFTNL_TRACE_ID),
+ family2str(nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY)));
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_TABLE))
+ nft_print(octx, "%s ",
+ nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE));
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CHAIN))
+ nft_print(octx, "%s ",
+ nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN));
+}
+
+static void trace_print_expr(const struct nftnl_trace *nlt, unsigned int attr,
+ struct expr *lhs, struct output_ctx *octx)
+{
+ struct expr *rhs, *rel;
+ const void *data;
+ uint32_t len;
+
+ data = nftnl_trace_get_data(nlt, attr, &len);
+ rhs = constant_expr_alloc(&netlink_location,
+ lhs->dtype, lhs->byteorder,
+ len * BITS_PER_BYTE, data);
+ rel = relational_expr_alloc(&netlink_location, OP_EQ, lhs, rhs);
+
+ expr_print(rel, octx);
+ nft_print(octx, " ");
+ expr_free(rel);
+}
+
+static void trace_print_verdict(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ struct expr *chain_expr = NULL;
+ const char *chain = NULL;
+ unsigned int verdict;
+ struct expr *expr;
+
+ verdict = nftnl_trace_get_u32(nlt, NFTNL_TRACE_VERDICT);
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_JUMP_TARGET)) {
+ chain = xstrdup(nftnl_trace_get_str(nlt, NFTNL_TRACE_JUMP_TARGET));
+ chain_expr = constant_expr_alloc(&netlink_location,
+ &string_type,
+ BYTEORDER_HOST_ENDIAN,
+ strlen(chain) * BITS_PER_BYTE,
+ chain);
+ }
+ expr = verdict_expr_alloc(&netlink_location, verdict, chain_expr);
+
+ nft_print(octx, "verdict ");
+ expr_print(expr, octx);
+ expr_free(expr);
+}
+
+static void trace_print_policy(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ unsigned int policy;
+ struct expr *expr;
+
+ policy = nftnl_trace_get_u32(nlt, NFTNL_TRACE_POLICY);
+
+ expr = verdict_expr_alloc(&netlink_location, policy, NULL);
+
+ nft_print(octx, "policy ");
+ expr_print(expr, octx);
+ expr_free(expr);
+}
+
+static struct rule *trace_lookup_rule(const struct nftnl_trace *nlt,
+ uint64_t rule_handle,
+ struct nft_cache *cache)
+{
+ struct chain *chain;
+ struct table *table;
+ struct handle h;
+
+ h.family = nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY);
+ h.table.name = nftnl_trace_get_str(nlt, NFTNL_TRACE_TABLE);
+ h.chain.name = nftnl_trace_get_str(nlt, NFTNL_TRACE_CHAIN);
+
+ if (!h.table.name)
+ return NULL;
+
+ table = table_cache_find(&cache->table_cache, h.table.name, h.family);
+ if (!table)
+ return NULL;
+
+ chain = chain_cache_find(table, h.chain.name);
+ if (!chain)
+ return NULL;
+
+ return rule_lookup(chain, rule_handle);
+}
+
+static void trace_print_rule(const struct nftnl_trace *nlt,
+ struct output_ctx *octx, struct nft_cache *cache)
+{
+ uint64_t rule_handle;
+ struct rule *rule;
+
+ rule_handle = nftnl_trace_get_u64(nlt, NFTNL_TRACE_RULE_HANDLE);
+ rule = trace_lookup_rule(nlt, rule_handle, cache);
+
+ trace_print_hdr(nlt, octx);
+
+ if (rule) {
+ nft_print(octx, "rule ");
+ rule_print(rule, octx);
+ } else {
+ nft_print(octx, "unknown rule handle %" PRIu64, rule_handle);
+ }
+
+ nft_print(octx, " (");
+ trace_print_verdict(nlt, octx);
+ nft_print(octx, ")\n");
+}
+
+static void trace_gen_stmts(struct list_head *stmts,
+ struct proto_ctx *ctx, struct payload_dep_ctx *pctx,
+ const struct nftnl_trace *nlt, unsigned int attr,
+ enum proto_bases base)
+{
+ struct list_head unordered = LIST_HEAD_INIT(unordered);
+ struct list_head list;
+ struct expr *rel, *lhs, *rhs, *tmp, *nexpr;
+ struct stmt *stmt;
+ const struct proto_desc *desc;
+ const void *hdr;
+ uint32_t hlen;
+ unsigned int n;
+
+ if (!nftnl_trace_is_set(nlt, attr))
+ return;
+ hdr = nftnl_trace_get_data(nlt, attr, &hlen);
+
+ lhs = payload_expr_alloc(&netlink_location, NULL, 0);
+ payload_init_raw(lhs, base, 0, hlen * BITS_PER_BYTE);
+ rhs = constant_expr_alloc(&netlink_location,
+ &invalid_type, BYTEORDER_INVALID,
+ hlen * BITS_PER_BYTE, hdr);
+
+restart:
+ init_list_head(&list);
+ payload_expr_expand(&list, lhs, ctx);
+ expr_free(lhs);
+
+ desc = NULL;
+ list_for_each_entry_safe(lhs, nexpr, &list, list) {
+ if (desc && desc != ctx->protocol[base].desc) {
+ /* Chained protocols */
+ lhs->payload.offset = 0;
+ if (ctx->protocol[base].desc == NULL)
+ break;
+ goto restart;
+ }
+
+ tmp = constant_expr_splice(rhs, lhs->len);
+ expr_set_type(tmp, lhs->dtype, lhs->byteorder);
+ if (tmp->byteorder == BYTEORDER_HOST_ENDIAN)
+ mpz_switch_byteorder(tmp->value, tmp->len / BITS_PER_BYTE);
+
+ /* Skip unknown and filtered expressions */
+ desc = lhs->payload.desc;
+ if (lhs->dtype == &invalid_type ||
+ lhs->payload.tmpl == &proto_unknown_template ||
+ desc->checksum_key == payload_hdr_field(lhs) ||
+ desc->format.filter & (1 << payload_hdr_field(lhs))) {
+ expr_free(lhs);
+ expr_free(tmp);
+ continue;
+ }
+
+ rel = relational_expr_alloc(&lhs->location, OP_EQ, lhs, tmp);
+ stmt = expr_stmt_alloc(&rel->location, rel);
+ list_add_tail(&stmt->list, &unordered);
+
+ desc = ctx->protocol[base].desc;
+ relational_expr_pctx_update(ctx, rel);
+ }
+
+ expr_free(rhs);
+
+ n = 0;
+next:
+ list_for_each_entry(stmt, &unordered, list) {
+ enum proto_bases b = base;
+
+ rel = stmt->expr;
+ lhs = rel->left;
+
+ /* Move statements to result list in defined order */
+ desc = lhs->payload.desc;
+ if (desc->format.order[n] &&
+ desc->format.order[n] != payload_hdr_field(lhs))
+ continue;
+
+ list_move_tail(&stmt->list, stmts);
+ n++;
+
+ if (payload_is_stacked(desc, rel))
+ b--;
+
+ /* Don't strip 'icmp type' from payload dump. */
+ if (pctx->icmp_type == 0)
+ payload_dependency_kill(pctx, lhs, ctx->family);
+ if (lhs->flags & EXPR_F_PROTOCOL)
+ payload_dependency_store(pctx, stmt, b);
+
+ goto next;
+ }
+}
+
+static void trace_print_packet(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ struct list_head stmts = LIST_HEAD_INIT(stmts);
+ const struct proto_desc *ll_desc;
+ struct payload_dep_ctx pctx = {};
+ struct proto_ctx ctx;
+ uint16_t dev_type;
+ uint32_t nfproto;
+ struct stmt *stmt, *next;
+
+ trace_print_hdr(nlt, octx);
+
+ nft_print(octx, "packet: ");
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_IIF))
+ trace_print_expr(nlt, NFTNL_TRACE_IIF,
+ meta_expr_alloc(&netlink_location,
+ NFT_META_IIF), octx);
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_OIF))
+ trace_print_expr(nlt, NFTNL_TRACE_OIF,
+ meta_expr_alloc(&netlink_location,
+ NFT_META_OIF), octx);
+
+ proto_ctx_init(&ctx, nftnl_trace_get_u32(nlt, NFTNL_TRACE_FAMILY), 0, false);
+ ll_desc = ctx.protocol[PROTO_BASE_LL_HDR].desc;
+ if ((ll_desc == &proto_inet || ll_desc == &proto_netdev) &&
+ nftnl_trace_is_set(nlt, NFTNL_TRACE_NFPROTO)) {
+ nfproto = nftnl_trace_get_u32(nlt, NFTNL_TRACE_NFPROTO);
+
+ proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location, NULL);
+ proto_ctx_update(&ctx, PROTO_BASE_NETWORK_HDR, &netlink_location,
+ proto_find_upper(ll_desc, nfproto));
+ }
+ if (ctx.protocol[PROTO_BASE_LL_HDR].desc == NULL &&
+ nftnl_trace_is_set(nlt, NFTNL_TRACE_IIFTYPE)) {
+ dev_type = nftnl_trace_get_u16(nlt, NFTNL_TRACE_IIFTYPE);
+ proto_ctx_update(&ctx, PROTO_BASE_LL_HDR, &netlink_location,
+ proto_dev_desc(dev_type));
+ }
+
+ trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_LL_HEADER,
+ PROTO_BASE_LL_HDR);
+ trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_NETWORK_HEADER,
+ PROTO_BASE_NETWORK_HDR);
+ trace_gen_stmts(&stmts, &ctx, &pctx, nlt, NFTNL_TRACE_TRANSPORT_HEADER,
+ PROTO_BASE_TRANSPORT_HDR);
+
+ list_for_each_entry_safe(stmt, next, &stmts, list) {
+ stmt_print(stmt, octx);
+ nft_print(octx, " ");
+ stmt_free(stmt);
+ }
+ nft_print(octx, "\n");
+}
+
+int netlink_events_trace_cb(const struct nlmsghdr *nlh, int type,
+ struct netlink_mon_handler *monh)
+{
+ struct nftnl_trace *nlt;
+
+ assert(type == NFT_MSG_TRACE);
+
+ nlt = nftnl_trace_alloc();
+ if (!nlt)
+ memory_allocation_error();
+
+ if (nftnl_trace_nlmsg_parse(nlh, nlt) < 0)
+ netlink_abi_error();
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_LL_HEADER) ||
+ nftnl_trace_is_set(nlt, NFTNL_TRACE_NETWORK_HEADER))
+ trace_print_packet(nlt, &monh->ctx->nft->output);
+
+ switch (nftnl_trace_get_u32(nlt, NFTNL_TRACE_TYPE)) {
+ case NFT_TRACETYPE_RULE:
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_RULE_HANDLE))
+ trace_print_rule(nlt, &monh->ctx->nft->output,
+ &monh->ctx->nft->cache);
+ break;
+ case NFT_TRACETYPE_POLICY:
+ trace_print_hdr(nlt, &monh->ctx->nft->output);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_POLICY)) {
+ trace_print_policy(nlt, &monh->ctx->nft->output);
+ nft_mon_print(monh, " ");
+ }
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
+ trace_print_expr(nlt, NFTNL_TRACE_MARK,
+ meta_expr_alloc(&netlink_location,
+ NFT_META_MARK),
+ &monh->ctx->nft->output);
+ nft_mon_print(monh, "\n");
+ break;
+ case NFT_TRACETYPE_RETURN:
+ trace_print_hdr(nlt, &monh->ctx->nft->output);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_VERDICT)) {
+ trace_print_verdict(nlt, &monh->ctx->nft->output);
+ nft_mon_print(monh, " ");
+ }
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_MARK))
+ trace_print_expr(nlt, NFTNL_TRACE_MARK,
+ meta_expr_alloc(&netlink_location,
+ NFT_META_MARK),
+ &monh->ctx->nft->output);
+ nft_mon_print(monh, "\n");
+ break;
+ }
+
+ nftnl_trace_free(nlt);
+ return MNL_CB_OK;
+}

View File

@ -0,0 +1,268 @@
From 0d28ee52a20e8441f66dc11b690fb595f63db6a3 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Tue, 15 Jul 2025 22:50:32 +0200
Subject: [PATCH] src: add conntrack information to trace monitor mode
JIRA: https://issues.redhat.com/browse/RHEL-102994
Upstream Status: nftables commit cfd768615235bb89650f15498c70d19813502825
commit cfd768615235bb89650f15498c70d19813502825
Author: Florian Westphal <fw@strlen.de>
Date: Mon Jul 7 22:38:13 2025 +0200
src: add conntrack information to trace monitor mode
Upcoming kernel change provides the packets conntrack state in the
trace message data.
This allows to see if packet is seen as original or reply, the conntrack
state (new, establieshed, related) and the status bits which show if e.g.
NAT was applied. Alsoi include conntrack ID so users can use conntrack
tool to query the kernel for more information via ctnetlink.
This improves debugging when e.g. packets do not pick up the expected
NAT mapping, which could e.g. also happen because of expectations
following the NAT binding of the owning conntrack entry.
Example output ("conntrack: " lines are new):
trace id 32 t PRE_RAW packet: iif "enp0s3" ether saddr [..]
trace id 32 t PRE_RAW rule tcp flags syn meta nftrace set 1 (verdict continue)
trace id 32 t PRE_RAW policy accept
trace id 32 t PRE_MANGLE conntrack: ct direction original ct state new ct id 2641368242
trace id 32 t PRE_MANGLE packet: iif "enp0s3" ether saddr [..]
trace id 32 t ct_new_pre rule jump rpfilter (verdict jump rpfilter)
trace id 32 t PRE_MANGLE policy accept
trace id 32 t INPUT conntrack: ct direction original ct state new ct status dnat-done ct id 2641368242
trace id 32 t INPUT packet: iif "enp0s3" [..]
trace id 32 t public_in rule tcp dport 443 accept (verdict accept)
v3: remove clash bit again, kernel won't expose it anymore.
v2: add more status bits: helper, clash, offload, hw-offload.
add flag explanation to documentation.
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Phil Sutter <psutter@redhat.com>
---
doc/data-types.txt | 30 ++---
include/linux/netfilter/nf_conntrack_common.h | 16 +++
src/ct.c | 7 ++
src/trace.c | 109 ++++++++++++++++++
4 files changed, 147 insertions(+), 15 deletions(-)
diff --git a/doc/data-types.txt b/doc/data-types.txt
index 6c0e2f9..abbb7fd 100644
--- a/doc/data-types.txt
+++ b/doc/data-types.txt
@@ -378,21 +378,21 @@ For each of the types above, keywords are available for convenience:
.conntrack status (ct_status)
[options="header"]
|==================
-|Keyword| Value
-|expected|
-1
-|seen-reply|
-2
-|assured|
-4
-|confirmed|
-8
-|snat|
-16
-|dnat|
-32
-|dying|
-512
+|Keyword| Value | Description
+|expected|1| Expected connection; conntrack helper set it up
+|seen-reply|2| Conntrack has seen packets in both directions
+|assured| 4 |Conntrack entry will not be removed if hash table is full
+|confirmed | 8 | Initial packet processed
+|snat| 16 | Original source address differs from reply destination
+|dnat| 32 | Original destination differs from reply source
+|seq-adjust| 64 | tcp sequence number rewrite due to conntrack helper or synproxy
+|snat-done| 128 | tried to find matching snat/masquerade rule
+|dnat-done| 256 | tried to find matching dnat/redirect rule
+|dying| 512 | Connection about to be deleted
+|fixed-timeout | 1024 | entry expires even if traffic is active
+|helper | 8192 | connection is monitored by conntrack helper
+|offload | 16384 | connection is offloaded to a flow table
+|hw-offload | 32768 | connection is offloaded to hardware
|================
.conntrack event bits (ct_event)
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 768ff25..22bbb6c 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -77,6 +77,22 @@ enum ip_conntrack_status {
/* Connection has fixed timeout. */
IPS_FIXED_TIMEOUT_BIT = 10,
IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
+
+ /* Conntrack is a fake untracked entry. Obsolete and not used anymore */
+ IPS_UNTRACKED_BIT = 12,
+ IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+
+ /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */
+ IPS_HELPER_BIT = 13,
+ IPS_HELPER = (1 << IPS_HELPER_BIT),
+
+ /* Conntrack has been offloaded to flow table. */
+ IPS_OFFLOAD_BIT = 14,
+ IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
+
+ /* Conntrack has been offloaded to hardware. */
+ IPS_HW_OFFLOAD_BIT = 15,
+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
};
/* Connection tracking event types */
diff --git a/src/ct.c b/src/ct.c
index 6793464..cd97d82 100644
--- a/src/ct.c
+++ b/src/ct.c
@@ -98,7 +98,14 @@ static const struct symbol_table ct_status_tbl = {
SYMBOL("confirmed", IPS_CONFIRMED),
SYMBOL("snat", IPS_SRC_NAT),
SYMBOL("dnat", IPS_DST_NAT),
+ SYMBOL("seq-adjust", IPS_SEQ_ADJUST),
+ SYMBOL("snat-done", IPS_SRC_NAT_DONE),
+ SYMBOL("dnat-done", IPS_DST_NAT_DONE),
SYMBOL("dying", IPS_DYING),
+ SYMBOL("fixed-timeout", IPS_FIXED_TIMEOUT),
+ SYMBOL("helper", IPS_HELPER_BIT),
+ SYMBOL("offload", IPS_OFFLOAD_BIT),
+ SYMBOL("hw-offload", IPS_HW_OFFLOAD_BIT),
SYMBOL_LIST_END
},
};
diff --git a/src/trace.c b/src/trace.c
index a7cc8ff..b270951 100644
--- a/src/trace.c
+++ b/src/trace.c
@@ -237,6 +237,114 @@ next:
}
}
+static struct expr *trace_alloc_list(const struct datatype *dtype,
+ enum byteorder byteorder,
+ unsigned int len, const void *data)
+{
+ struct expr *list_expr;
+ unsigned int i;
+ mpz_t value;
+ uint32_t v;
+
+ if (len != sizeof(v))
+ return constant_expr_alloc(&netlink_location,
+ dtype, byteorder,
+ len * BITS_PER_BYTE, data);
+
+ list_expr = list_expr_alloc(&netlink_location);
+
+ mpz_init2(value, 32);
+ mpz_import_data(value, data, byteorder, len);
+ v = mpz_get_uint32(value);
+ if (v == 0) {
+ mpz_clear(value);
+ return NULL;
+ }
+
+ for (i = 0; i < 32; i++) {
+ uint32_t bitv = v & (1 << i);
+
+ if (bitv == 0)
+ continue;
+
+ compound_expr_add(list_expr,
+ constant_expr_alloc(&netlink_location,
+ dtype, byteorder,
+ len * BITS_PER_BYTE,
+ &bitv));
+ }
+
+ mpz_clear(value);
+ return list_expr;
+}
+
+static void trace_print_ct_expr(const struct nftnl_trace *nlt, unsigned int attr,
+ enum nft_ct_keys key, struct output_ctx *octx)
+{
+ struct expr *lhs, *rhs, *rel;
+ const void *data;
+ uint32_t len;
+
+ data = nftnl_trace_get_data(nlt, attr, &len);
+ lhs = ct_expr_alloc(&netlink_location, key, -1);
+
+ switch (key) {
+ case NFT_CT_STATUS:
+ rhs = trace_alloc_list(lhs->dtype, lhs->byteorder, len, data);
+ if (!rhs) {
+ expr_free(lhs);
+ return;
+ }
+ rel = binop_expr_alloc(&netlink_location, OP_IMPLICIT, lhs, rhs);
+ break;
+ case NFT_CT_DIRECTION:
+ case NFT_CT_STATE:
+ case NFT_CT_ID:
+ /* fallthrough */
+ default:
+ rhs = constant_expr_alloc(&netlink_location,
+ lhs->dtype, lhs->byteorder,
+ len * BITS_PER_BYTE, data);
+ rel = relational_expr_alloc(&netlink_location, OP_IMPLICIT, lhs, rhs);
+ break;
+ }
+
+ expr_print(rel, octx);
+ nft_print(octx, " ");
+ expr_free(rel);
+}
+
+static void trace_print_ct(const struct nftnl_trace *nlt,
+ struct output_ctx *octx)
+{
+ bool ct = nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_STATE);
+
+ if (!ct)
+ return;
+
+ trace_print_hdr(nlt, octx);
+
+ nft_print(octx, "conntrack: ");
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_DIRECTION))
+ trace_print_ct_expr(nlt, NFTNL_TRACE_CT_DIRECTION,
+ NFT_CT_DIRECTION, octx);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_STATE))
+ trace_print_ct_expr(nlt, NFTNL_TRACE_CT_STATE,
+ NFT_CT_STATE, octx);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_STATUS))
+ trace_print_ct_expr(nlt, NFTNL_TRACE_CT_STATUS,
+ NFT_CT_STATUS, octx);
+
+ if (nftnl_trace_is_set(nlt, NFTNL_TRACE_CT_ID))
+ trace_print_ct_expr(nlt, NFTNL_TRACE_CT_ID,
+ NFT_CT_ID, octx);
+
+ nft_print(octx, "\n");
+}
+
static void trace_print_packet(const struct nftnl_trace *nlt,
struct output_ctx *octx)
{
@@ -248,6 +356,7 @@ static void trace_print_packet(const struct nftnl_trace *nlt,
uint32_t nfproto;
struct stmt *stmt, *next;
+ trace_print_ct(nlt, octx);
trace_print_hdr(nlt, octx);
nft_print(octx, "packet: ");

View File

@ -1,6 +1,6 @@
Name: nftables
Version: 1.1.1
Release: 4%{?dist}
Release: 5%{?dist}
# Upstream released a 0.100 version, then 0.4. Need Epoch to get back on track.
Epoch: 1
Summary: Netfilter Tables userspace utilities
@ -30,6 +30,8 @@ Patch11: 0011-json-Support-typeof-in-set-and-map-types.patch
Patch12: 0012-tests-py-Fix-for-storing-payload-into-missing-file.patch
Patch13: 0013-monitor-Recognize-flowtable-add-del-events.patch
Patch14: 0014-evaluate-allow-to-re-use-existing-metered-set.patch
Patch15: 0015-src-split-monitor-trace-code-into-new-trace.c.patch
Patch16: 0016-src-add-conntrack-information-to-trace-monitor-mode.patch
BuildRequires: autoconf
BuildRequires: automake
@ -144,6 +146,10 @@ cd py/
%files -n python3-nftables -f %{pyproject_files}
%changelog
* Wed Jul 16 2025 Phil Sutter <psutter@redhat.com> [1.1.1-5.el10]
- src: add conntrack information to trace monitor mode (Phil Sutter) [RHEL-102994]
- src: split monitor trace code into new trace.c (Phil Sutter) [RHEL-102994]
* Mon Mar 03 2025 Eric Garver <egarver@redhat.com> [1.1.1-4.el10]
- evaluate: allow to re-use existing metered set [RHEL-75507]