From 48c7645ce8849ac31298e6c2b1d5661d0f581279 Mon Sep 17 00:00:00 2001 From: Gris Ge Date: Mon, 17 May 2021 16:09:52 +0800 Subject: [PATCH 1/2] ovs: Fix `is_ovs_running()` in container environment. In k8s container environment, the OVS database socket /var/run/openvswitch/db.sock is mounted from host, so NM can managed it without the ovs daemon running in container. To support that, this patch removed the top level checking on `is_ovs_running()` and trust plugin raise the proper error on failure. Patched the NM plugin to check the error `NM.DeviceStateReason.OVSDB_FAILED` on activation failure, raise `NmstateDependencyError` if OVS DB failed to connected. NM will not raise any error when creating OVS internal interface with OVSDB mounted to /dev/null, NM will keep showing the OVS interface as ACTIVATING, changed the fallback checker to give only 30 seconds for OVS interface to exit `NM.DeviceState.PREPARE`, if not treat it as OVS daemon malfunctioning. Updated integration test case to mask(mount /dev/null) the OVS DB socket file for simulating the stopped OVS daemon. Signed-off-by: Gris Ge Signed-off-by: Fernando Fernandez Mancera --- libnmstate/ifaces/ovs.py | 15 ---------- libnmstate/nm/active_connection.py | 47 ++++++++++++++++++++++++++---- libnmstate/nm/plugin.py | 3 +- libnmstate/validator.py | 16 +++------- tests/integration/ovs_test.py | 41 +++++++++++--------------- 5 files changed, 64 insertions(+), 58 deletions(-) diff --git a/libnmstate/ifaces/ovs.py b/libnmstate/ifaces/ovs.py index 24d4aba..28892ad 100644 --- a/libnmstate/ifaces/ovs.py +++ b/libnmstate/ifaces/ovs.py @@ -19,7 +19,6 @@ from copy import deepcopy from operator import itemgetter -import subprocess import warnings from libnmstate.error import NmstateValueError @@ -252,20 +251,6 @@ class OvsInternalIface(BaseIface): self._info.pop(Interface.MAC, None) -def is_ovs_running(): - try: - subprocess.run( - ("systemctl", "status", "openvswitch"), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=True, - timeout=SYSTEMCTL_TIMEOUT_SECONDS, - ) - return True - except Exception: - return False - - def is_ovs_lag_port(port_state): return port_state.get(OVSBridge.Port.LINK_AGGREGATION_SUBTREE) is not None diff --git a/libnmstate/nm/active_connection.py b/libnmstate/nm/active_connection.py index ddf93a7..150256f 100644 --- a/libnmstate/nm/active_connection.py +++ b/libnmstate/nm/active_connection.py @@ -20,6 +20,7 @@ import logging from libnmstate.error import NmstateLibnmError +from libnmstate.error import NmstateDependencyError from libnmstate.error import NmstateInternalError from .common import GLib @@ -33,6 +34,7 @@ from .ipv6 import is_dynamic as is_ipv6_dynamic NM_AC_STATE_CHANGED_SIGNAL = "state-changed" FALLBACK_CHECKER_INTERNAL = 15 +MAX_OVS_IFACE_PREPARE_TIME = FALLBACK_CHECKER_INTERNAL * 2 GIO_ERROR_DOMAIN = "g-io-error-quark" @@ -92,6 +94,7 @@ class ProfileActivation: self._dev_handlers = set() self._action = None self._fallback_checker = None + self._fallback_checker_counter = 0 def run(self): specific_object = None @@ -336,19 +339,53 @@ class ProfileActivation: self._activation_clean_up() self._ctx.finish_async(self._action) elif not is_activating(self._nm_ac, self._nm_dev): - reason = f"{self._nm_ac.get_state_reason()}" + nm_ac_reason = f"{self._nm_ac.get_state_reason()}" + nm_dev_reason = None if self._nm_dev: - reason += f" {self._nm_dev.get_state_reason()}" + nm_dev_reason = self._nm_dev.get_state_reason() + + if nm_dev_reason == NM.DeviceStateReason.OVSDB_FAILED: + error = NmstateDependencyError( + f"{self._action} failed: failed to communicating with " + f"Open vSwitch database, {nm_dev_reason}" + ) + else: + reason = nm_ac_reason + ( + str(nm_dev_reason) if nm_dev_reason else "" + ) + error = NmstateLibnmError( + f"{self._action} failed: reason={reason}" + ) self._activation_clean_up() - self._ctx.fail( - NmstateLibnmError(f"{self._action} failed: reason={reason}") - ) + self._ctx.fail(error) def _fallback_checker_callback(self, _user_data): + self._fallback_checker_counter += 1 nm_dev = get_nm_dev(self._ctx, self._iface_name, self._iface_type) if nm_dev: self._nm_dev = nm_dev self._activation_progress_check() + # When OVSDB connection is invalid(such as been mounted as + # /dev/null), NM will hang on the activation of ovs internal + # interface with state ACITVATING with reason UNKNOWN forever with + # no state change signal. The fallback check only found it + # as activating which lead us hang till killed by idle timeout. + # To prevent that, when we found OVS interface interface in + # `NM.DeviceState.PREPARE` on in second call of fallbacker, + # we fail the action as NmstateDependencyError. + if ( + self._fallback_checker_counter + >= MAX_OVS_IFACE_PREPARE_TIME / FALLBACK_CHECKER_INTERNAL + and nm_dev.get_device_type() == NM.DeviceType.OVS_INTERFACE + and nm_dev.get_state() == NM.DeviceState.PREPARE + ): + self._ctx.fail( + NmstateDependencyError( + f"{self._action} failed: timeout on creating OVS " + "interface, please check Open vSwitch daemon" + ) + ) + return GLib.SOURCE_CONTINUE diff --git a/libnmstate/nm/plugin.py b/libnmstate/nm/plugin.py index 335d93c..da933b3 100644 --- a/libnmstate/nm/plugin.py +++ b/libnmstate/nm/plugin.py @@ -23,7 +23,6 @@ from operator import itemgetter from libnmstate.error import NmstateDependencyError from libnmstate.error import NmstateNotSupportedError from libnmstate.error import NmstateValueError -from libnmstate.ifaces.ovs import is_ovs_running from libnmstate.schema import DNS from libnmstate.schema import Interface from libnmstate.schema import InterfaceType @@ -103,7 +102,7 @@ class NetworkManagerPlugin(NmstatePlugin): @property def capabilities(self): capabilities = [] - if has_ovs_capability(self.client) and is_ovs_running(): + if has_ovs_capability(self.client): capabilities.append(NmstatePlugin.OVS_CAPABILITY) if has_team_capability(self.client): capabilities.append(NmstatePlugin.TEAM_CAPABILITY) diff --git a/libnmstate/validator.py b/libnmstate/validator.py index 02890b4..cd3b540 100644 --- a/libnmstate/validator.py +++ b/libnmstate/validator.py @@ -22,7 +22,6 @@ import logging import jsonschema as js -from libnmstate.ifaces.ovs import is_ovs_running from libnmstate.schema import Interface from libnmstate.schema import InterfaceType from libnmstate.error import NmstateDependencyError @@ -50,7 +49,6 @@ def validate_interface_capabilities(ifaces_state, capabilities): ifaces_types = {iface_state.get("type") for iface_state in ifaces_state} has_ovs_capability = NmstatePlugin.OVS_CAPABILITY in capabilities has_team_capability = NmstatePlugin.TEAM_CAPABILITY in capabilities - ovs_is_running = is_ovs_running() for iface_type in ifaces_types: is_ovs_type = iface_type in ( InterfaceType.OVS_BRIDGE, @@ -58,18 +56,12 @@ def validate_interface_capabilities(ifaces_state, capabilities): InterfaceType.OVS_PORT, ) if is_ovs_type and not has_ovs_capability: - if not ovs_is_running: - raise NmstateDependencyError( - "openvswitch service is not started." - ) - else: - raise NmstateDependencyError( - "Open vSwitch NetworkManager support not installed " - "and started" - ) + raise NmstateDependencyError( + "Open vSwitch support not properly installed or started" + ) elif iface_type == InterfaceType.TEAM and not has_team_capability: raise NmstateDependencyError( - "NetworkManager-team plugin not installed and started" + "Team support not properly installed or started" ) -- 2.31.1