From cfe170216accf60938ff4ea9440a4ac78b0bd83f Mon Sep 17 00:00:00 2001 Message-Id: From: Dmytro Linkin Date: Thu, 28 Jan 2021 23:17:29 -0500 Subject: [PATCH] util: Add phys_port_name support on virPCIGetNetName virPCIGetNetName is used to get the name of the netdev associated with a particular PCI device. This is used when we have a VF name, but need the PF name in order to send a netlink command (e.g. in order to get/set the MAC address of the VF). In simple cases there is a single netdev associated with any PCI device, so it is easy to figure out the PF netdev for a VF - just look for the PCI device that has the VF listed in its "virtfns" directory; the only name in the "net" subdirectory of that PCI device's sysfs directory is the PF netdev that is upstream of the VF in question. In some cases there can be more than one netdev in a PCI device's net directory though. In the past, the only case of this was for SR-IOV NICs that could have multiple PF's per PCI device. In this case, all PF netdevs associated with a PCI address would be listed in the "net" subdirectory of the PCI device's directory in sysfs. At the same time, all VF netdevs and all PF netdevs have a phys_port_id in their sysfs, so the way to learn the correct PF netdev for a particular VF netdev is to search through the list of devices in the net subdirectory of the PF's PCI device, looking for the one netdev with a "phys_port_id" matching that of the VF netdev. But starting in kernel 5.8, the NVIDIA Mellanox driver began linking the VFs' representor netdevs to the PF PCI address [1], and so the VF representor netdevs would also show up in the net subdirectory. However, all of the devices that do so also only have a single PF netdev for any given PCI address. This means that the net directory of the PCI device can still hold multiple net devices, but only one of them will be the PF netdev (the others are VF representors): $ ls '/sys/bus/pci/devices/0000:82:00.0/net' ens1f0 eth0 eth1 In this case the way to find the PF device is to look at the "phys_port_name" attribute of each netdev in sysfs. All PF devices have a phys_port_name matching a particular regex (p[0-9]+$)|(p[0-9]+s[0-9]+$) Since there can only be one PF in the entire list of devices, once we match that regex, we've found the PF netdev. [1] - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/ commit/?id=123f0f53dd64b67e34142485fe866a8a581f12f1 Resolves: https://bugzilla.redhat.com/1918708 Co-Authored-by: Moshe Levi Signed-off-by: Dmytro Linkin Reviewed-by: Adrian Chiris Reviewed-by: Laine Stump (cherry picked from commit 5b1c525b1f3608156884aed0dc5e925306c1e260) Conflicts: src/util/virpci.c - upstream all DIR* were converted to use g_autoptr, which permitted virPCIGetNetName() to be simplified. Unfortunately, backporting this refactor would require backporting an ever-ballooning set of patches, making the possibility of causing a regression a very real danger. Instead, one small refactor of virPCIGetName() that didn't affect any other functions was backported, and this patch (adding phys_port_name support) resolved the remaining conflicts by mimicking the current upstream version of the function, but with all "return 0" replaced by "ret = 0; goto cleanup;" and all "return -1" replaced by "goto cleanup;" (the code at cleanup: just closes the DIR* and returns the current value of ret). This will assure identical behavior to upstream. Signed-off-by: Laine Stump Message-Id: <20210129041729.1076345-4-laine@redhat.com> Reviewed-by: Jiri Denemark --- src/util/virpci.c | 93 ++++++++++++++++++++++++++++------------------- src/util/virpci.h | 5 +++ 2 files changed, 61 insertions(+), 37 deletions(-) diff --git a/src/util/virpci.c b/src/util/virpci.c index 00377eed31..d5c038b7fe 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -2424,9 +2424,9 @@ virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr, * virPCIGetNetName: * @device_link_sysfs_path: sysfs path to the PCI device * @idx: used to choose which netdev when there are several - * (ignored if physPortID is set) + * (ignored if physPortID is set or physPortName is available) * @physPortID: match this string in the netdev's phys_port_id - * (or NULL to ignore and use idx instead) + * (or NULL to ignore and use phys_port_name or idx instead) * @netname: used to return the name of the netdev * (set to NULL (but returns success) if there is no netdev) * @@ -2460,6 +2460,14 @@ virPCIGetNetName(const char *device_link_sysfs_path, } while (virDirRead(dir, &entry, pcidev_sysfs_net_path) > 0) { + /* save the first entry we find to use as a failsafe + * in case we don't match the phys_port_id. This is + * needed because some NIC drivers (e.g. i40e) + * implement phys_port_id for PFs, but not for VFs + */ + if (!firstEntryName) + firstEntryName = g_strdup(entry->d_name); + /* if the caller sent a physPortID, compare it to the * physportID of this netdev. If not, look for entry[idx]. */ @@ -2470,50 +2478,61 @@ virPCIGetNetName(const char *device_link_sysfs_path, goto cleanup; /* if this one doesn't match, keep looking */ - if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) { - /* save the first entry we find to use as a failsafe - * in case we don't match the phys_port_id. This is - * needed because some NIC drivers (e.g. i40e) - * implement phys_port_id for PFs, but not for VFs - */ - if (!firstEntryName) - firstEntryName = g_strdup(entry->d_name); - + if (STRNEQ_NULLABLE(physPortID, thisPhysPortID)) continue; - } + } else { - if (i++ < idx) - continue; - } + /* Most switch devices use phys_port_name instead of + * phys_port_id. + * NOTE: VFs' representors net devices can be linked to PF's PCI + * device, which mean that there'll be multiple net devices + * instances and to get a proper net device need to match on + * specific regex. + * To get PF netdev, for ex., used following regex: + * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)" + * or to get exact VF's netdev next regex is used: + * "pf0vf1$" + */ + g_autofree char *thisPhysPortName = NULL; - *netname = g_strdup(entry->d_name); + if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0) + goto cleanup; - ret = 0; - break; - } + if (thisPhysPortName) { + + /* if this one doesn't match, keep looking */ + if (!virStringMatch(thisPhysPortName, VIR_PF_PHYS_PORT_NAME_REGEX)) + continue; - if (ret < 0) { - if (physPortID) { - if (firstEntryName) { - /* we didn't match the provided phys_port_id, but this - * is probably because phys_port_id isn't implemented - * for this NIC driver, so just return the first - * (probably only) netname we found. - */ - *netname = firstEntryName; - firstEntryName = NULL; - ret = 0; } else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Could not find network device with " - "phys_port_id '%s' under PCI device at %s"), - physPortID, device_link_sysfs_path); + + if (i++ < idx) + continue; } - } else { - ret = 0; /* no netdev at the given index is *not* an error */ } + + *netname = g_strdup(entry->d_name); + ret = 0; + goto cleanup; } - cleanup: + + if (firstEntryName) { + /* we didn't match the provided phys_port_id / find a + * phys_port_name matching VIR_PF_PHYS_PORT_NAME_REGEX / find + * as many net devices as the value of idx, but this is + * probably because phys_port_id / phys_port_name isn't + * implemented for this NIC driver, so just return the first + * (probably only) netname we found. + */ + *netname = g_steal_pointer(&firstEntryName); + ret = 0; + goto cleanup; + } + + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not find any network device under PCI device at %s"), + device_link_sysfs_path); +cleanup: VIR_DIR_CLOSE(dir); return ret; } diff --git a/src/util/virpci.h b/src/util/virpci.h index f6796fc422..e47c766918 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -49,6 +49,11 @@ struct _virZPCIDeviceAddress { #define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d" +/* Represents format of PF's phys_port_name in switchdev mode: + * 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file. + */ +#define VIR_PF_PHYS_PORT_NAME_REGEX "(p[0-9]+$)|(p[0-9]+s[0-9]+$)" + struct _virPCIDeviceAddress { unsigned int domain; unsigned int bus; -- 2.30.0