forked from rpms/libvirt
229 lines
9.4 KiB
Diff
229 lines
9.4 KiB
Diff
From cfe170216accf60938ff4ea9440a4ac78b0bd83f Mon Sep 17 00:00:00 2001
|
|
Message-Id: <cfe170216accf60938ff4ea9440a4ac78b0bd83f@dist-git>
|
|
From: Dmytro Linkin <dlinkin@nvidia.com>
|
|
Date: Thu, 28 Jan 2021 23:17:29 -0500
|
|
Subject: [PATCH] util: Add phys_port_name support on virPCIGetNetName
|
|
|
|
virPCIGetNetName is used to get the name of the netdev associated with
|
|
a particular PCI device. This is used when we have a VF name, but need
|
|
the PF name in order to send a netlink command (e.g. in order to
|
|
get/set the MAC address of the VF).
|
|
|
|
In simple cases there is a single netdev associated with any PCI
|
|
device, so it is easy to figure out the PF netdev for a VF - just look
|
|
for the PCI device that has the VF listed in its "virtfns" directory;
|
|
the only name in the "net" subdirectory of that PCI device's sysfs
|
|
directory is the PF netdev that is upstream of the VF in question.
|
|
|
|
In some cases there can be more than one netdev in a PCI device's net
|
|
directory though. In the past, the only case of this was for SR-IOV
|
|
NICs that could have multiple PF's per PCI device. In this case, all
|
|
PF netdevs associated with a PCI address would be listed in the "net"
|
|
subdirectory of the PCI device's directory in sysfs. At the same time,
|
|
all VF netdevs and all PF netdevs have a phys_port_id in their sysfs,
|
|
so the way to learn the correct PF netdev for a particular VF netdev
|
|
is to search through the list of devices in the net subdirectory of
|
|
the PF's PCI device, looking for the one netdev with a "phys_port_id"
|
|
matching that of the VF netdev.
|
|
|
|
But starting in kernel 5.8, the NVIDIA Mellanox driver began linking
|
|
the VFs' representor netdevs to the PF PCI address [1], and so the VF
|
|
representor netdevs would also show up in the net
|
|
subdirectory. However, all of the devices that do so also only have a
|
|
single PF netdev for any given PCI address.
|
|
|
|
This means that the net directory of the PCI device can still hold
|
|
multiple net devices, but only one of them will be the PF netdev (the
|
|
others are VF representors):
|
|
|
|
$ ls '/sys/bus/pci/devices/0000:82:00.0/net'
|
|
ens1f0 eth0 eth1
|
|
|
|
In this case the way to find the PF device is to look at the
|
|
"phys_port_name" attribute of each netdev in sysfs. All PF devices
|
|
have a phys_port_name matching a particular regex
|
|
|
|
(p[0-9]+$)|(p[0-9]+s[0-9]+$)
|
|
|
|
Since there can only be one PF in the entire list of devices, once we
|
|
match that regex, we've found the PF netdev.
|
|
|
|
[1] - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/
|
|
commit/?id=123f0f53dd64b67e34142485fe866a8a581f12f1
|
|
|
|
Resolves: https://bugzilla.redhat.com/1918708
|
|
Co-Authored-by: Moshe Levi <moshele@nvidia.com>
|
|
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
|
|
Reviewed-by: Adrian Chiris <adrianc@nvidia.com>
|
|
Reviewed-by: Laine Stump <laine@redhat.com>
|
|
(cherry picked from commit 5b1c525b1f3608156884aed0dc5e925306c1e260)
|
|
|
|
Conflicts: src/util/virpci.c - upstream all DIR* were converted to use
|
|
g_autoptr, which permitted virPCIGetNetName() to be
|
|
simplified. Unfortunately, backporting this refactor would require
|
|
backporting an ever-ballooning set of patches, making the
|
|
possibility of causing a regression a very real danger. Instead,
|
|
one small refactor of virPCIGetName() that didn't affect any other
|
|
functions was backported, and this patch (adding phys_port_name
|
|
support) resolved the remaining conflicts by mimicking the current
|
|
upstream version of the function, but with all "return 0" replaced
|
|
by "ret = 0; goto cleanup;" and all "return -1" replaced by "goto
|
|
cleanup;" (the code at cleanup: just closes the DIR* and returns
|
|
the current value of ret). This will assure identical behavior to
|
|
upstream.
|
|
Signed-off-by: Laine Stump <laine@redhat.com>
|
|
Message-Id: <20210129041729.1076345-4-laine@redhat.com>
|
|
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>
|
|
---
|
|
src/util/virpci.c | 93 ++++++++++++++++++++++++++++-------------------
|
|
src/util/virpci.h | 5 +++
|
|
2 files changed, 61 insertions(+), 37 deletions(-)
|
|
|
|
diff --git a/src/util/virpci.c b/src/util/virpci.c
|
|
index 00377eed31..d5c038b7fe 100644
|
|
--- a/src/util/virpci.c
|
|
+++ b/src/util/virpci.c
|
|
@@ -2424,9 +2424,9 @@ virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
|
|
* virPCIGetNetName:
|
|
* @device_link_sysfs_path: sysfs path to the PCI device
|
|
* @idx: used to choose which netdev when there are several
|
|
- * (ignored if physPortID is set)
|
|
+ * (ignored if physPortID is set or physPortName is available)
|
|
* @physPortID: match this string in the netdev's phys_port_id
|
|
- * (or NULL to ignore and use idx instead)
|
|
+ * (or NULL to ignore and use phys_port_name or idx instead)
|
|
* @netname: used to return the name of the netdev
|
|
* (set to NULL (but returns success) if there is no netdev)
|
|
*
|
|
@@ -2460,6 +2460,14 @@ virPCIGetNetName(const char *device_link_sysfs_path,
|
|
}
|
|
|
|
while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) {
|
|
+ /* save the first entry we find to use as a failsafe
|
|
+ * in case we don't match the phys_port_id. This is
|
|
+ * needed because some NIC drivers (e.g. i40e)
|
|
+ * implement phys_port_id for PFs, but not for VFs
|
|
+ */
|
|
+ if (!firstEntryName)
|
|
+ firstEntryName = g_strdup(entry->d_name);
|
|
+
|
|
/* if the caller sent a physPortID, compare it to the
|
|
* physportID of this netdev. If not, look for entry[idx].
|
|
*/
|
|
@@ -2470,50 +2478,61 @@ virPCIGetNetName(const char *device_link_sysfs_path,
|
|
goto cleanup;
|
|
|
|
/* if this one doesn't match, keep looking */
|
|
- if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) {
|
|
- /* save the first entry we find to use as a failsafe
|
|
- * in case we don't match the phys_port_id. This is
|
|
- * needed because some NIC drivers (e.g. i40e)
|
|
- * implement phys_port_id for PFs, but not for VFs
|
|
- */
|
|
- if (!firstEntryName)
|
|
- firstEntryName = g_strdup(entry->d_name);
|
|
-
|
|
+ if (STRNEQ_NULLABLE(physPortID, thisPhysPortID))
|
|
continue;
|
|
- }
|
|
+
|
|
} else {
|
|
- if (i++ < idx)
|
|
- continue;
|
|
- }
|
|
+ /* Most switch devices use phys_port_name instead of
|
|
+ * phys_port_id.
|
|
+ * NOTE: VFs' representors net devices can be linked to PF's PCI
|
|
+ * device, which mean that there'll be multiple net devices
|
|
+ * instances and to get a proper net device need to match on
|
|
+ * specific regex.
|
|
+ * To get PF netdev, for ex., used following regex:
|
|
+ * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
|
|
+ * or to get exact VF's netdev next regex is used:
|
|
+ * "pf0vf1$"
|
|
+ */
|
|
+ g_autofree char *thisPhysPortName = NULL;
|
|
|
|
- *netname = g_strdup(entry->d_name);
|
|
+ if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0)
|
|
+ goto cleanup;
|
|
|
|
- ret = 0;
|
|
- break;
|
|
- }
|
|
+ if (thisPhysPortName) {
|
|
+
|
|
+ /* if this one doesn't match, keep looking */
|
|
+ if (!virStringMatch(thisPhysPortName, VIR_PF_PHYS_PORT_NAME_REGEX))
|
|
+ continue;
|
|
|
|
- if (ret < 0) {
|
|
- if (physPortID) {
|
|
- if (firstEntryName) {
|
|
- /* we didn't match the provided phys_port_id, but this
|
|
- * is probably because phys_port_id isn't implemented
|
|
- * for this NIC driver, so just return the first
|
|
- * (probably only) netname we found.
|
|
- */
|
|
- *netname = firstEntryName;
|
|
- firstEntryName = NULL;
|
|
- ret = 0;
|
|
} else {
|
|
- virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
- _("Could not find network device with "
|
|
- "phys_port_id '%s' under PCI device at %s"),
|
|
- physPortID, device_link_sysfs_path);
|
|
+
|
|
+ if (i++ < idx)
|
|
+ continue;
|
|
}
|
|
- } else {
|
|
- ret = 0; /* no netdev at the given index is *not* an error */
|
|
}
|
|
+
|
|
+ *netname = g_strdup(entry->d_name);
|
|
+ ret = 0;
|
|
+ goto cleanup;
|
|
}
|
|
- cleanup:
|
|
+
|
|
+ if (firstEntryName) {
|
|
+ /* we didn't match the provided phys_port_id / find a
|
|
+ * phys_port_name matching VIR_PF_PHYS_PORT_NAME_REGEX / find
|
|
+ * as many net devices as the value of idx, but this is
|
|
+ * probably because phys_port_id / phys_port_name isn't
|
|
+ * implemented for this NIC driver, so just return the first
|
|
+ * (probably only) netname we found.
|
|
+ */
|
|
+ *netname = g_steal_pointer(&firstEntryName);
|
|
+ ret = 0;
|
|
+ goto cleanup;
|
|
+ }
|
|
+
|
|
+ virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
+ _("Could not find any network device under PCI device at %s"),
|
|
+ device_link_sysfs_path);
|
|
+cleanup:
|
|
VIR_DIR_CLOSE(dir);
|
|
return ret;
|
|
}
|
|
diff --git a/src/util/virpci.h b/src/util/virpci.h
|
|
index f6796fc422..e47c766918 100644
|
|
--- a/src/util/virpci.h
|
|
+++ b/src/util/virpci.h
|
|
@@ -49,6 +49,11 @@ struct _virZPCIDeviceAddress {
|
|
|
|
#define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d"
|
|
|
|
+/* Represents format of PF's phys_port_name in switchdev mode:
|
|
+ * 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file.
|
|
+ */
|
|
+#define VIR_PF_PHYS_PORT_NAME_REGEX "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
|
|
+
|
|
struct _virPCIDeviceAddress {
|
|
unsigned int domain;
|
|
unsigned int bus;
|
|
--
|
|
2.30.0
|
|
|