304 lines
8.7 KiB
Diff
304 lines
8.7 KiB
Diff
From 003abd51eae1a575403d74eaec2fa935ca39fb1f Mon Sep 17 00:00:00 2001
|
|
From: David Lamparter <equinox@opensourcerouting.org>
|
|
Date: Wed, 22 Jan 2025 11:23:31 +0100
|
|
Subject: [PATCH] lib: clean up nexthop hashing mess
|
|
|
|
We were hashing 4 bytes of the address. Even for IPv6 addresses.
|
|
|
|
Oops.
|
|
|
|
The reason this was done was to try to make it faster, but made a
|
|
complex maze out of everything. Time for a refactor.
|
|
|
|
Signed-off-by: David Lamparter <equinox@opensourcerouting.org>
|
|
---
|
|
lib/nexthop.c | 87 ++++++-----------------------------------
|
|
lib/nexthop.h | 104 +++++++++++++++++++++++++++++++++-----------------
|
|
2 files changed, 80 insertions(+), 111 deletions(-)
|
|
|
|
diff --git a/lib/nexthop.c b/lib/nexthop.c
|
|
index 1eeed4a..fd1d1e9 100644
|
|
--- a/lib/nexthop.c
|
|
+++ b/lib/nexthop.c
|
|
@@ -693,68 +693,30 @@ unsigned int nexthop_level(const struct nexthop *nexthop)
|
|
return rv;
|
|
}
|
|
|
|
-/* Only hash word-sized things, let cmp do the rest. */
|
|
-uint32_t nexthop_hash_quick(const struct nexthop *nexthop)
|
|
+uint32_t nexthop_hash(const struct nexthop *nexthop)
|
|
{
|
|
uint32_t key = 0x45afe398;
|
|
- int i;
|
|
|
|
- key = jhash_3words(nexthop->type, nexthop->vrf_id,
|
|
- nexthop->nh_label_type, key);
|
|
-
|
|
- if (nexthop->nh_label) {
|
|
- int labels = nexthop->nh_label->num_labels;
|
|
+ /* type, vrf, ifindex, ip addresses - see nexthop.h */
|
|
+ key = _nexthop_hash_bytes(nexthop, key);
|
|
|
|
- i = 0;
|
|
+ key = jhash_1word(nexthop->flags & NEXTHOP_FLAGS_HASHED, key);
|
|
|
|
- while (labels >= 3) {
|
|
- key = jhash_3words(nexthop->nh_label->label[i],
|
|
- nexthop->nh_label->label[i + 1],
|
|
- nexthop->nh_label->label[i + 2],
|
|
- key);
|
|
- labels -= 3;
|
|
- i += 3;
|
|
- }
|
|
-
|
|
- if (labels >= 2) {
|
|
- key = jhash_2words(nexthop->nh_label->label[i],
|
|
- nexthop->nh_label->label[i + 1],
|
|
- key);
|
|
- labels -= 2;
|
|
- i += 2;
|
|
- }
|
|
+ if (nexthop->nh_label) {
|
|
+ const struct mpls_label_stack *ls = nexthop->nh_label;
|
|
|
|
- if (labels >= 1)
|
|
- key = jhash_1word(nexthop->nh_label->label[i], key);
|
|
+ /* num_labels itself isn't useful to hash, if the number of
|
|
+ * labels is different, the hash value will change just due to
|
|
+ * that already.
|
|
+ */
|
|
+ key = jhash(ls->label, sizeof(ls->label[0]) * ls->num_labels, key);
|
|
}
|
|
|
|
- key = jhash_2words(nexthop->ifindex,
|
|
- CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK),
|
|
- key);
|
|
-
|
|
/* Include backup nexthops, if present */
|
|
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_HAS_BACKUP)) {
|
|
int backups = nexthop->backup_num;
|
|
|
|
- i = 0;
|
|
-
|
|
- while (backups >= 3) {
|
|
- key = jhash_3words(nexthop->backup_idx[i],
|
|
- nexthop->backup_idx[i + 1],
|
|
- nexthop->backup_idx[i + 2], key);
|
|
- backups -= 3;
|
|
- i += 3;
|
|
- }
|
|
-
|
|
- while (backups >= 2) {
|
|
- key = jhash_2words(nexthop->backup_idx[i],
|
|
- nexthop->backup_idx[i + 1], key);
|
|
- backups -= 2;
|
|
- i += 2;
|
|
- }
|
|
-
|
|
- if (backups >= 1)
|
|
- key = jhash_1word(nexthop->backup_idx[i], key);
|
|
+ key = jhash(nexthop->backup_idx, sizeof(nexthop->backup_idx[0]) * backups, key);
|
|
}
|
|
|
|
if (nexthop->nh_srv6) {
|
|
@@ -768,31 +730,6 @@ uint32_t nexthop_hash_quick(const struct nexthop *nexthop)
|
|
return key;
|
|
}
|
|
|
|
-
|
|
-#define GATE_SIZE 4 /* Number of uint32_t words in struct g_addr */
|
|
-
|
|
-/* For a more granular hash */
|
|
-uint32_t nexthop_hash(const struct nexthop *nexthop)
|
|
-{
|
|
- uint32_t gate_src_rmap_raw[GATE_SIZE * 3] = {};
|
|
- /* Get all the quick stuff */
|
|
- uint32_t key = nexthop_hash_quick(nexthop);
|
|
-
|
|
- assert(((sizeof(nexthop->gate) + sizeof(nexthop->src)
|
|
- + sizeof(nexthop->rmap_src))
|
|
- / 3)
|
|
- == (GATE_SIZE * sizeof(uint32_t)));
|
|
-
|
|
- memcpy(gate_src_rmap_raw, &nexthop->gate, GATE_SIZE);
|
|
- memcpy(gate_src_rmap_raw + GATE_SIZE, &nexthop->src, GATE_SIZE);
|
|
- memcpy(gate_src_rmap_raw + (2 * GATE_SIZE), &nexthop->rmap_src,
|
|
- GATE_SIZE);
|
|
-
|
|
- key = jhash2(gate_src_rmap_raw, (GATE_SIZE * 3), key);
|
|
-
|
|
- return key;
|
|
-}
|
|
-
|
|
void nexthop_copy_no_recurse(struct nexthop *copy,
|
|
const struct nexthop *nexthop,
|
|
struct nexthop *rparent)
|
|
diff --git a/lib/nexthop.h b/lib/nexthop.h
|
|
index f35cc5e..9383799 100644
|
|
--- a/lib/nexthop.h
|
|
+++ b/lib/nexthop.h
|
|
@@ -23,6 +23,7 @@
|
|
#ifndef _LIB_NEXTHOP_H
|
|
#define _LIB_NEXTHOP_H
|
|
|
|
+#include "jhash.h"
|
|
#include "prefix.h"
|
|
#include "mpls.h"
|
|
#include "vxlan.h"
|
|
@@ -71,15 +72,48 @@ struct nexthop {
|
|
struct nexthop *next;
|
|
struct nexthop *prev;
|
|
|
|
- /*
|
|
- * What vrf is this nexthop associated with?
|
|
+
|
|
+ /* begin of hashed data - all fields from here onwards are given to
|
|
+ * jhash() as one consecutive chunk. DO NOT create "padding holes".
|
|
+ * DO NOT insert pointers that need to be deep-hashed.
|
|
+ *
|
|
+ * static_assert() below needs to be updated when fields are added
|
|
*/
|
|
+ char _hash_begin[0];
|
|
+
|
|
+ /* see above */
|
|
+ enum nexthop_types_t type;
|
|
+
|
|
+ /* What vrf is this nexthop associated with? */
|
|
vrf_id_t vrf_id;
|
|
|
|
/* Interface index. */
|
|
ifindex_t ifindex;
|
|
|
|
- enum nexthop_types_t type;
|
|
+ /* Type of label(s), if any */
|
|
+ enum lsp_types_t nh_label_type;
|
|
+
|
|
+ /* padding: keep 16 byte alignment here */
|
|
+
|
|
+ /* Nexthop address
|
|
+ * make sure all 16 bytes for IPv6 are zeroed when putting in an IPv4
|
|
+ * address since the entire thing is hashed as-is
|
|
+ */
|
|
+ union {
|
|
+ union g_addr gate;
|
|
+ enum blackhole_type bh_type;
|
|
+ };
|
|
+ union g_addr src;
|
|
+ union g_addr rmap_src; /* Src is set via routemap */
|
|
+
|
|
+ /* end of hashed data - remaining fields in this struct are not
|
|
+ * directly fed into jhash(). Most of them are actually part of the
|
|
+ * hash but have special rules or handling attached.
|
|
+ */
|
|
+ char _hash_end[0];
|
|
+
|
|
+ /* Weight of the nexthop ( for unequal cost ECMP ) */
|
|
+ uint8_t weight;
|
|
|
|
uint16_t flags;
|
|
#define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */
|
|
@@ -97,18 +131,15 @@ struct nexthop {
|
|
#define NEXTHOP_FLAG_EVPN (1 << 8) /* nexthop is EVPN */
|
|
#define NEXTHOP_FLAG_LINKDOWN (1 << 9) /* is not removed on link down */
|
|
|
|
+ /* which flags are part of nexthop_hash(). Should probably be split
|
|
+ * off into a separate field...
|
|
+ */
|
|
+#define NEXTHOP_FLAGS_HASHED NEXTHOP_FLAG_ONLINK
|
|
+
|
|
#define NEXTHOP_IS_ACTIVE(flags) \
|
|
(CHECK_FLAG(flags, NEXTHOP_FLAG_ACTIVE) \
|
|
&& !CHECK_FLAG(flags, NEXTHOP_FLAG_DUPLICATE))
|
|
|
|
- /* Nexthop address */
|
|
- union {
|
|
- union g_addr gate;
|
|
- enum blackhole_type bh_type;
|
|
- };
|
|
- union g_addr src;
|
|
- union g_addr rmap_src; /* Src is set via routemap */
|
|
-
|
|
/* Nexthops obtained by recursive resolution.
|
|
*
|
|
* If the nexthop struct needs to be resolved recursively,
|
|
@@ -119,15 +150,9 @@ struct nexthop {
|
|
/* Recursive parent */
|
|
struct nexthop *rparent;
|
|
|
|
- /* Type of label(s), if any */
|
|
- enum lsp_types_t nh_label_type;
|
|
-
|
|
/* Label(s) associated with this nexthop. */
|
|
struct mpls_label_stack *nh_label;
|
|
|
|
- /* Weight of the nexthop ( for unequal cost ECMP ) */
|
|
- uint8_t weight;
|
|
-
|
|
/* Count and index of corresponding backup nexthop(s) in a backup list;
|
|
* only meaningful if the HAS_BACKUP flag is set.
|
|
*/
|
|
@@ -153,6 +178,29 @@ struct nexthop {
|
|
struct nexthop_srv6 *nh_srv6;
|
|
};
|
|
|
|
+/* all hashed fields (including padding, if it is necessary to add) need to
|
|
+ * be listed in the static_assert below
|
|
+ */
|
|
+
|
|
+#define S(field) sizeof(((struct nexthop *)NULL)->field)
|
|
+
|
|
+static_assert(
|
|
+ offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin) ==
|
|
+ S(type) + S(vrf_id) + S(ifindex) + S(nh_label_type) + S(gate) + S(src) + S(rmap_src),
|
|
+ "struct nexthop contains padding, this can break things. insert _pad fields at appropriate places");
|
|
+
|
|
+#undef S
|
|
+
|
|
+/* this is here to show exactly what is meant by the comments above about
|
|
+ * the hashing
|
|
+ */
|
|
+static inline uint32_t _nexthop_hash_bytes(const struct nexthop *nh, uint32_t seed)
|
|
+{
|
|
+ return jhash(&nh->_hash_begin,
|
|
+ offsetof(struct nexthop, _hash_end) - offsetof(struct nexthop, _hash_begin),
|
|
+ seed);
|
|
+}
|
|
+
|
|
/* Utility to append one nexthop to another. */
|
|
#define NEXTHOP_APPEND(to, new) \
|
|
do { \
|
|
@@ -196,27 +244,11 @@ struct nexthop *nexthop_from_blackhole(enum blackhole_type bh_type,
|
|
/*
|
|
* Hash a nexthop. Suitable for use with hash tables.
|
|
*
|
|
- * This function uses the following values when computing the hash:
|
|
- * - vrf_id
|
|
- * - ifindex
|
|
- * - type
|
|
- * - gate
|
|
- *
|
|
- * nexthop
|
|
- * The nexthop to hash
|
|
- *
|
|
- * Returns:
|
|
- * 32-bit hash of nexthop
|
|
+ * Please double check the code on what is included in the hash, there was
|
|
+ * documentation here but it got outdated and the only thing worse than no
|
|
+ * doc is incorrect doc.
|
|
*/
|
|
uint32_t nexthop_hash(const struct nexthop *nexthop);
|
|
-/*
|
|
- * Hash a nexthop only on word-sized attributes:
|
|
- * - vrf_id
|
|
- * - ifindex
|
|
- * - type
|
|
- * - (some) flags
|
|
- */
|
|
-uint32_t nexthop_hash_quick(const struct nexthop *nexthop);
|
|
|
|
extern bool nexthop_same(const struct nexthop *nh1, const struct nexthop *nh2);
|
|
extern bool nexthop_same_no_labels(const struct nexthop *nh1,
|
|
--
|
|
2.47.3
|
|
|