From 676dfca09d9c783311a51a1c53fa0f7ecd95bd28 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 10 Sep 2021 13:38:19 -0400 Subject: [PATCH] [collect] Abstract transport protocol from SoSNode Since its addition to sos, collect has assumed the use of a system installation of SSH in order to connect to the nodes identified for collection. However, there may be use cases and desires to use other transport protocols. As such, provide an abstraction for these protocols in the form of the new `RemoteTransport` class that `SoSNode` will now leverage. So far an abstraction for the currently used SSH ControlPersist function is provided, along with a psuedo abstraction for local execution so that SoSNode does not directly need to make more "if local then foo" checks than are absolutely necessary. Related: #2668 Signed-off-by: Jake Hunsaker --- setup.py | 4 +- sos/collector/__init__.py | 54 +-- sos/collector/clusters/__init__.py | 4 +- sos/collector/clusters/jbon.py | 2 + sos/collector/clusters/kubernetes.py | 4 +- sos/collector/clusters/ocp.py | 6 +- sos/collector/clusters/ovirt.py | 10 +- sos/collector/clusters/pacemaker.py | 8 +- sos/collector/clusters/satellite.py | 4 +- sos/collector/sosnode.py | 388 +++++--------------- sos/collector/transports/__init__.py | 317 ++++++++++++++++ sos/collector/transports/control_persist.py | 199 ++++++++++ sos/collector/transports/local.py | 49 +++ 13 files changed, 705 insertions(+), 344 deletions(-) create mode 100644 sos/collector/transports/__init__.py create mode 100644 sos/collector/transports/control_persist.py create mode 100644 sos/collector/transports/local.py diff --git a/setup.py b/setup.py index 7653b59d..25e87a71 100644 --- a/setup.py +++ b/setup.py @@ -101,8 +101,8 @@ setup( 'sos.policies.distros', 'sos.policies.runtimes', 'sos.policies.package_managers', 'sos.policies.init_systems', 'sos.report', 'sos.report.plugins', 'sos.collector', - 'sos.collector.clusters', 'sos.cleaner', 'sos.cleaner.mappings', - 'sos.cleaner.parsers', 'sos.cleaner.archives' + 'sos.collector.clusters', 'sos.collector.transports', 'sos.cleaner', + 'sos.cleaner.mappings', 'sos.cleaner.parsers', 'sos.cleaner.archives' ], cmdclass=cmdclass, command_options=command_options, diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index b2a07f37..da912655 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -17,7 +17,6 @@ import re import string import socket import shutil -import subprocess import sys from datetime import datetime @@ -28,7 +27,6 @@ from pipes import quote from textwrap import fill from sos.cleaner import SoSCleaner from sos.collector.sosnode import SosNode -from sos.collector.exceptions import ControlPersistUnsupportedException from sos.options import ClusterOption from sos.component import SoSComponent from sos import __version__ @@ -154,7 +152,6 @@ class SoSCollector(SoSComponent): try: self.parse_node_strings() self.parse_cluster_options() - self._check_for_control_persist() self.log_debug('Executing %s' % ' '.join(s for s in sys.argv)) self.log_debug("Found cluster profiles: %s" % self.clusters.keys()) @@ -437,33 +434,6 @@ class SoSCollector(SoSComponent): action='extend', help='List of usernames to obfuscate') - def _check_for_control_persist(self): - """Checks to see if the local system supported SSH ControlPersist. - - ControlPersist allows OpenSSH to keep a single open connection to a - remote host rather than building a new session each time. This is the - same feature that Ansible uses in place of paramiko, which we have a - need to drop in sos-collector. - - This check relies on feedback from the ssh binary. The command being - run should always generate stderr output, but depending on what that - output reads we can determine if ControlPersist is supported or not. - - For our purposes, a host that does not support ControlPersist is not - able to run sos-collector. - - Returns - True if ControlPersist is supported, else raise Exception. - """ - ssh_cmd = ['ssh', '-o', 'ControlPersist'] - cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - out, err = cmd.communicate() - err = err.decode('utf-8') - if 'Bad configuration option' in err or 'Usage:' in err: - raise ControlPersistUnsupportedException - return True - def exit(self, msg, error=1): """Used to safely terminate if sos-collector encounters an error""" self.log_error(msg) @@ -455,7 +455,7 @@ class SoSCollector(SoSComponent): 'cmdlineopts': self.opts, 'need_sudo': True if self.opts.ssh_user != 'root' else False, 'tmpdir': self.tmpdir, - 'hostlen': len(self.opts.master) or len(self.hostname), + 'hostlen': max(len(self.opts.primary), len(self.hostname)), 'policy': self.policy } @@ -1020,9 +1020,10 @@ class SoSCollector(SoSComponent): self.node_list.append(self.hostname) self.reduce_node_list() try: - self.commons['hostlen'] = len(max(self.node_list, key=len)) + _node_max = len(max(self.node_list, key=len)) + self.commons['hostlen'] = max(_node_max, self.commons['hostlen']) except (TypeError, ValueError): - self.commons['hostlen'] = len(self.opts.master) + pass def _connect_to_node(self, node): """Try to connect to the node, and if we can add to the client list to @@ -1068,7 +1039,7 @@ class SoSCollector(SoSComponent): client.set_node_manifest(getattr(self.collect_md.nodes, node[0])) else: - client.close_ssh_session() + client.disconnect() except Exception: pass @@ -1077,12 +1048,11 @@ class SoSCollector(SoSComponent): provided on the command line """ disclaimer = ("""\ -This utility is used to collect sosreports from multiple \ -nodes simultaneously. It uses OpenSSH's ControlPersist feature \ -to connect to nodes and run commands remotely. If your system \ -installation of OpenSSH is older than 5.6, please upgrade. +This utility is used to collect sos reports from multiple \ +nodes simultaneously. Remote connections are made and/or maintained \ +to those nodes via well-known transport protocols such as SSH. -An archive of sosreport tarballs collected from the nodes will be \ +An archive of sos report tarballs collected from the nodes will be \ generated in %s and may be provided to an appropriate support representative. The generated archive may contain data considered sensitive \ @@ -1230,10 +1200,10 @@ this utility or remote systems that it connects to. self.log_error("Error running sosreport: %s" % err) def close_all_connections(self): - """Close all ssh sessions for nodes""" + """Close all sessions for nodes""" for client in self.client_list: - self.log_debug('Closing SSH connection to %s' % client.address) - client.close_ssh_session() + self.log_debug('Closing connection to %s' % client.address) + client.disconnect() def create_cluster_archive(self): """Calls for creation of tar archive then cleans up the temporary diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py index 2b5d7018..64ac2a44 100644 --- a/sos/collector/clusters/__init__.py +++ b/sos/collector/clusters/__init__.py @@ -188,8 +188,8 @@ class Cluster(): :rtype: ``dict`` """ res = self.master.run_command(cmd, get_pty=True, need_root=need_root) - if res['stdout']: - res['stdout'] = res['stdout'].replace('Password:', '') + if res['output']: + res['output'] = res['output'].replace('Password:', '') return res def setup(self): diff --git a/sos/collector/clusters/jbon.py b/sos/collector/clusters/jbon.py index 488fbd16..8f083ac6 100644 --- a/sos/collector/clusters/jbon.py +++ b/sos/collector/clusters/jbon.py @@ -28,3 +28,5 @@ class jbon(Cluster): # This should never be called, but as insurance explicitly never # allow this to be enabled via the determine_cluster() path return False + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py index cdbf8861..99f788dc 100644 --- a/sos/collector/clusters/kubernetes.py +++ b/sos/collector/clusters/kubernetes.py @@ -34,7 +34,7 @@ class kubernetes(Cluster): if res['status'] == 0: nodes = [] roles = [x for x in self.get_option('role').split(',') if x] - for nodeln in res['stdout'].splitlines()[1:]: + for nodeln in res['output'].splitlines()[1:]: node = nodeln.split() if not roles: nodes.append(node[0]) @@ -44,3 +44,5 @@ class kubernetes(Cluster): return nodes else: raise Exception('Node enumeration did not return usable output') + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py index 5479417d..ad97587f 100644 --- a/sos/collector/clusters/ocp.py +++ b/sos/collector/clusters/ocp.py @@ -93,7 +93,7 @@ class ocp(Cluster): res = self.exec_master_cmd(self.fmt_oc_cmd(cmd)) if res['status'] == 0: roles = [r for r in self.get_option('role').split(':')] - self.node_dict = self._build_dict(res['stdout'].splitlines()) + self.node_dict = self._build_dict(res['output'].splitlines()) for node in self.node_dict: if roles: for role in roles: @@ -103,7 +103,7 @@ class ocp(Cluster): nodes.append(node) else: msg = "'oc' command failed" - if 'Missing or incomplete' in res['stdout']: + if 'Missing or incomplete' in res['output']: msg = ("'oc' failed due to missing kubeconfig on master node." " Specify one via '-c ocp.kubeconfig='") raise Exception(msg) @@ -168,3 +168,5 @@ class ocp(Cluster): def set_node_options(self, node): # don't attempt OC API collections on non-primary nodes node.plugin_options.append('openshift.no-oc=on') + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/clusters/ovirt.py b/sos/collector/clusters/ovirt.py index 079a122e..bd2d0c74 100644 --- a/sos/collector/clusters/ovirt.py +++ b/sos/collector/clusters/ovirt.py @@ -98,7 +98,7 @@ class ovirt(Cluster): return [] res = self._run_db_query(self.dbquery) if res['status'] == 0: - nodes = res['stdout'].splitlines()[2:-1] + nodes = res['output'].splitlines()[2:-1] return [n.split('(')[0].strip() for n in nodes] else: raise Exception('database query failed, return code: %s' @@ -114,7 +114,7 @@ class ovirt(Cluster): engconf = '/etc/ovirt-engine/engine.conf.d/10-setup-database.conf' res = self.exec_primary_cmd('cat %s' % engconf, need_root=True) if res['status'] == 0: - config = res['stdout'].splitlines() + config = res['output'].splitlines() for line in config: try: k = str(line.split('=')[0]) @@ -141,7 +141,7 @@ class ovirt(Cluster): '--batch -o postgresql {}' ).format(self.conf['ENGINE_DB_PASSWORD'], sos_opt) db_sos = self.exec_primary_cmd(cmd, need_root=True) - for line in db_sos['stdout'].splitlines(): + for line in db_sos['output'].splitlines(): if fnmatch.fnmatch(line, '*sosreport-*tar*'): _pg_dump = line.strip() self.master.manifest.add_field('postgresql_dump', @@ -180,5 +180,7 @@ class rhhi_virt(rhv): ret = self._run_db_query('SELECT count(server_id) FROM gluster_server') if ret['status'] == 0: # if there are any entries in this table, RHHI-V is in use - return ret['stdout'].splitlines()[2].strip() != '0' + return ret['output'].splitlines()[2].strip() != '0' return False + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py index 034f3f3e..55024314 100644 --- a/sos/collector/clusters/pacemaker.py +++ b/sos/collector/clusters/pacemaker.py @@ -27,7 +27,7 @@ class pacemaker(Cluster): self.log_error('Cluster status could not be determined. Is the ' 'cluster running on this node?') return [] - if 'node names do not match' in self.res['stdout']: + if 'node names do not match' in self.res['output']: self.log_warn('Warning: node name mismatch reported. Attempts to ' 'connect to some nodes may fail.\n') return self.parse_pcs_output() @@ -41,17 +41,19 @@ class pacemaker(Cluster): return nodes def get_online_nodes(self): - for line in self.res['stdout'].splitlines(): + for line in self.res['output'].splitlines(): if line.startswith('Online:'): nodes = line.split('[')[1].split(']')[0] return [n for n in nodes.split(' ') if n] def get_offline_nodes(self): offline = [] - for line in self.res['stdout'].splitlines(): + for line in self.res['output'].splitlines(): if line.startswith('Node') and line.endswith('(offline)'): offline.append(line.split()[1].replace(':', '')) if line.startswith('OFFLINE:'): nodes = line.split('[')[1].split(']')[0] offline.extend([n for n in nodes.split(' ') if n]) return offline + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/clusters/satellite.py b/sos/collector/clusters/satellite.py index e123c8a3..7c21e553 100644 --- a/sos/collector/clusters/satellite.py +++ b/sos/collector/clusters/satellite.py @@ -28,7 +28,7 @@ class satellite(Cluster): res = self.exec_primary_cmd(cmd, need_root=True) if res['status'] == 0: nodes = [ - n.strip() for n in res['stdout'].splitlines() + n.strip() for n in res['output'].splitlines() if 'could not change directory' not in n ] return nodes @@ -38,3 +38,5 @@ class satellite(Cluster): if node.address == self.master.address: return 'satellite' return 'capsule' + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 4b1ee109..f79bd5ff 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -12,22 +12,16 @@ import fnmatch import inspect import logging import os -import pexpect import re -import shutil from distutils.version import LooseVersion from pipes import quote from sos.policies import load from sos.policies.init_systems import InitSystem -from sos.collector.exceptions import (InvalidPasswordException, - TimeoutPasswordAuthException, - PasswordRequestException, - AuthPermissionDeniedException, +from sos.collector.transports.control_persist import SSHControlPersist +from sos.collector.transports.local import LocalTransport +from sos.collector.exceptions import (CommandTimeoutException, ConnectionException, - CommandTimeoutException, - ConnectionTimeoutException, - ControlSocketMissingException, UnsupportedHostException) @@ -61,34 +61,25 @@ class SosNode(): 'sos_cmd': commons['sos_cmd'] } self.sos_bin = 'sosreport' - filt = ['localhost', '127.0.0.1'] self.soslog = logging.getLogger('sos') self.ui_log = logging.getLogger('sos_ui') - self.control_path = ("%s/.sos-collector-%s" - % (self.tmpdir, self.address)) - self.ssh_cmd = self._create_ssh_command() - if self.address not in filt: - try: - self.connected = self._create_ssh_session() - except Exception as err: - self.log_error('Unable to open SSH session: %s' % err) - raise - else: - self.connected = True - self.local = True - self.need_sudo = os.getuid() != 0 + self._transport = self._load_remote_transport(commons) + try: + self._transport.connect(self._password) + except Exception as err: + self.log_error('Unable to open remote session: %s' % err) + raise # load the host policy now, even if we don't want to load further # host information. This is necessary if we're running locally on the # cluster master but do not want a local report as we still need to do # package checks in that instance self.host = self.determine_host_policy() - self.get_hostname() + self.hostname = self._transport.hostname if self.local and self.opts.no_local: load_facts = False if self.connected and load_facts: if not self.host: - self.connected = False - self.close_ssh_session() + self._transport.disconnect() return None if self.local: if self.check_in_container(): @@ -103,11 +88,26 @@ class SosNode(): self.create_sos_container() self._load_sos_info() - def _create_ssh_command(self): - """Build the complete ssh command for this node""" - cmd = "ssh -oControlPath=%s " % self.control_path - cmd += "%s@%s " % (self.opts.ssh_user, self.address) - return cmd + @property + def connected(self): + if self._transport: + return self._transport.connected + # if no transport, we're running locally + return True + + def disconnect(self): + """Wrapper to close the remote session via our transport agent + """ + self._transport.disconnect() + + def _load_remote_transport(self, commons): + """Determine the type of remote transport to load for this node, then + return an instantiated instance of that transport + """ + if self.address in ['localhost', '127.0.0.1']: + self.local = True + return LocalTransport(self.address, commons) + return SSHControlPersist(self.address, commons) def _fmt_msg(self, msg): return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg) @@ -135,6 +135,7 @@ class SosNode(): self.manifest.add_field('policy', self.host.distro) self.manifest.add_field('sos_version', self.sos_info['version']) self.manifest.add_field('final_sos_command', '') + self.manifest.add_field('transport', self._transport.name) def check_in_container(self): """ @@ -160,13 +161,13 @@ class SosNode(): res = self.run_command(cmd, need_root=True) if res['status'] in [0, 125]: if res['status'] == 125: - if 'unable to retrieve auth token' in res['stdout']: + if 'unable to retrieve auth token' in res['output']: self.log_error( "Could not pull image. Provide either a username " "and password or authfile" ) raise Exception - elif 'unknown: Not found' in res['stdout']: + elif 'unknown: Not found' in res['output']: self.log_error('Specified image not found on registry') raise Exception # 'name exists' with code 125 means the container was @@ -181,11 +182,11 @@ class SosNode(): return True else: self.log_error("Could not start container after create: %s" - % ret['stdout']) + % ret['output']) raise Exception else: self.log_error("Could not create container on host: %s" - % res['stdout']) + % res['output']) raise Exception def get_container_auth(self): @@ -204,18 +205,11 @@ class SosNode(): def file_exists(self, fname, need_root=False): """Checks for the presence of fname on the remote node""" - if not self.local: - try: - res = self.run_command("stat %s" % fname, need_root=need_root) - return res['status'] == 0 - except Exception: - return False - else: - try: - os.stat(fname) - return True - except Exception: - return False + try: + res = self.run_command("stat %s" % fname, need_root=need_root) + return res['status'] == 0 + except Exception: + return False @property def _hostname(self): @@ -223,18 +217,6 @@ class SosNode(): return self.hostname return self.address - @property - def control_socket_exists(self): - """Check if the SSH control socket exists - - The control socket is automatically removed by the SSH daemon in the - event that the last connection to the node was greater than the timeout - set by the ControlPersist option. This can happen for us if we are - collecting from a large number of nodes, and the timeout expires before - we start collection. - """ - return os.path.exists(self.control_path) - def _sanitize_log_msg(self, msg): """Attempts to obfuscate sensitive information in log messages such as passwords""" @@ -264,12 +246,6 @@ class SosNode(): msg = '[%s:%s] %s' % (self._hostname, caller, msg) self.soslog.debug(msg) - def get_hostname(self): - """Get the node's hostname""" - sout = self.run_command('hostname') - self.hostname = sout['stdout'].strip() - self.log_info('Hostname set to %s' % self.hostname) - def _format_cmd(self, cmd): """If we need to provide a sudo or root password to a command, then here we prefix the command with the correct bits @@ -280,19 +256,6 @@ class SosNode(): return "sudo -S %s" % cmd return cmd - def _fmt_output(self, output=None, rc=0): - """Formats the returned output from a command into a dict""" - if rc == 0: - stdout = output - stderr = '' - else: - stdout = '' - stderr = output - res = {'status': rc, - 'stdout': stdout, - 'stderr': stderr} - return res - def _load_sos_info(self): """Queries the node for information about the installed version of sos """ @@ -306,7 +269,7 @@ class SosNode(): pkgs = self.run_command(self.host.container_version_command, use_container=True, need_root=True) if pkgs['status'] == 0: - ver = pkgs['stdout'].strip().split('-')[1] + ver = pkgs['output'].strip().split('-')[1] if ver: self.sos_info['version'] = ver else: @@ -321,18 +284,21 @@ class SosNode(): self.log_error('sos is not installed on this node') self.connected = False return False - cmd = 'sosreport -l' + # sos-4.0 changes the binary + if self.check_sos_version('4.0'): + self.sos_bin = 'sos report' + cmd = "%s -l" % self.sos_bin sosinfo = self.run_command(cmd, use_container=True, need_root=True) if sosinfo['status'] == 0: - self._load_sos_plugins(sosinfo['stdout']) + self._load_sos_plugins(sosinfo['output']) if self.check_sos_version('3.6'): self._load_sos_presets() def _load_sos_presets(self): - cmd = 'sosreport --list-presets' + cmd = '%s --list-presets' % self.sos_bin res = self.run_command(cmd, use_container=True, need_root=True) if res['status'] == 0: - for line in res['stdout'].splitlines(): + for line in res['output'].splitlines(): if line.strip().startswith('name:'): pname = line.split('name:')[1].strip() self.sos_info['presets'].append(pname) @@ -372,21 +338,7 @@ class SosNode(): """Reads the specified file and returns the contents""" try: self.log_info("Reading file %s" % to_read) - if not self.local: - res = self.run_command("cat %s" % to_read, timeout=5) - if res['status'] == 0: - return res['stdout'] - else: - if 'No such file' in res['stdout']: - self.log_debug("File %s does not exist on node" - % to_read) - else: - self.log_error("Error reading %s: %s" % - (to_read, res['stdout'].split(':')[1:])) - return '' - else: - with open(to_read, 'r') as rfile: - return rfile.read() + return self._transport.read_file(to_read) except Exception as err: self.log_error("Exception while reading %s: %s" % (to_read, err)) return '' @@ -400,7 +352,8 @@ class SosNode(): % self.commons['policy'].distro) return self.commons['policy'] host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(), - probe_runtime=True, remote_exec=self.ssh_cmd, + probe_runtime=True, + remote_exec=self._transport.remote_exec, remote_check=self.read_file('/etc/os-release')) if host: self.log_info("loaded policy %s for host" % host.distro) @@ -422,7 +375,7 @@ class SosNode(): return self.host.package_manager.pkg_by_name(pkg) is not None def run_command(self, cmd, timeout=180, get_pty=False, need_root=False, - force_local=False, use_container=False, env=None): + use_container=False, env=None): """Runs a given cmd, either via the SSH session or locally Arguments: @@ -433,58 +386,37 @@ class SosNode(): need_root - if a command requires root privileges, setting this to True tells sos-collector to format the command with sudo or su - as appropriate and to input the password - force_local - force a command to run locally. Mainly used for scp. use_container - Run this command in a container *IF* the host is containerized """ - if not self.control_socket_exists and not self.local: - self.log_debug('Control socket does not exist, attempting to ' - 're-create') + if not self.connected and not self.local: + self.log_debug('Node is disconnected, attempting to reconnect') try: - _sock = self._create_ssh_session() - if not _sock: - self.log_debug('Failed to re-create control socket') - raise ControlSocketMissingException + reconnected = self._transport.reconnect(self._password) + if not reconnected: + self.log_debug('Failed to reconnect to node') + raise ConnectionException except Exception as err: - self.log_error('Cannot run command: control socket does not ' - 'exist') - self.log_debug("Error while trying to create new SSH control " - "socket: %s" % err) + self.log_debug("Error while trying to reconnect: %s" % err) raise if use_container and self.host.containerized: cmd = self.host.format_container_command(cmd) if need_root: - get_pty = True cmd = self._format_cmd(cmd) - self.log_debug('Running command %s' % cmd) + if 'atomic' in cmd: get_pty = True - if not self.local and not force_local: - cmd = "%s %s" % (self.ssh_cmd, quote(cmd)) - else: - if get_pty: - cmd = "/bin/bash -c %s" % quote(cmd) + + if get_pty: + cmd = "/bin/bash -c %s" % quote(cmd) + if env: _cmd_env = self.env_vars env = _cmd_env.update(env) - res = pexpect.spawn(cmd, encoding='utf-8', env=env) - if need_root: - if self.need_sudo: - res.sendline(self.opts.sudo_pw) - if self.opts.become_root: - res.sendline(self.opts.root_password) - output = res.expect([pexpect.EOF, pexpect.TIMEOUT], - timeout=timeout) - if output == 0: - out = res.before - res.close() - rc = res.exitstatus - return {'status': rc, 'stdout': out} - elif output == 1: - raise CommandTimeoutException(cmd) + return self._transport.run_command(cmd, timeout, need_root, env) def sosreport(self): - """Run a sosreport on the node, then collect it""" + """Run an sos report on the node, then collect it""" try: path = self.execute_sos_command() if path: @@ -497,109 +429,6 @@ class SosNode(): pass self.cleanup() - def _create_ssh_session(self): - """ - Using ControlPersist, create the initial connection to the node. - - This will generate an OpenSSH ControlPersist socket within the tmp - directory created or specified for sos-collector to use. - - At most, we will wait 30 seconds for a connection. This involves a 15 - second wait for the initial connection attempt, and a subsequent 15 - second wait for a response when we supply a password. - - Since we connect to nodes in parallel (using the --threads value), this - means that the time between 'Connecting to nodes...' and 'Beginning - collection of sosreports' that users see can be up to an amount of time - equal to 30*(num_nodes/threads) seconds. - - Returns - True if session is successfully opened, else raise Exception - """ - # Don't use self.ssh_cmd here as we need to add a few additional - # parameters to establish the initial connection - self.log_info('Opening SSH session to create control socket') - connected = False - ssh_key = '' - ssh_port = '' - if self.opts.ssh_port != 22: - ssh_port = "-p%s " % self.opts.ssh_port - if self.opts.ssh_key: - ssh_key = "-i%s" % self.opts.ssh_key - cmd = ("ssh %s %s -oControlPersist=600 -oControlMaster=auto " - "-oStrictHostKeyChecking=no -oControlPath=%s %s@%s " - "\"echo Connected\"" % (ssh_key, - ssh_port, - self.control_path, - self.opts.ssh_user, - self.address)) - res = pexpect.spawn(cmd, encoding='utf-8') - - connect_expects = [ - u'Connected', - u'password:', - u'.*Permission denied.*', - u'.* port .*: No route to host', - u'.*Could not resolve hostname.*', - pexpect.TIMEOUT - ] - - index = res.expect(connect_expects, timeout=15) - - if index == 0: - connected = True - elif index == 1: - if self._password: - pass_expects = [ - u'Connected', - u'Permission denied, please try again.', - pexpect.TIMEOUT - ] - res.sendline(self._password) - pass_index = res.expect(pass_expects, timeout=15) - if pass_index == 0: - connected = True - elif pass_index == 1: - # Note that we do not get an exitstatus here, so matching - # this line means an invalid password will be reported for - # both invalid passwords and invalid user names - raise InvalidPasswordException - elif pass_index == 2: - raise TimeoutPasswordAuthException - else: - raise PasswordRequestException - elif index == 2: - raise AuthPermissionDeniedException - elif index == 3: - raise ConnectionException(self.address, self.opts.ssh_port) - elif index == 4: - raise ConnectionException(self.address) - elif index == 5: - raise ConnectionTimeoutException - else: - raise Exception("Unknown error, client returned %s" % res.before) - if connected: - self.log_debug("Successfully created control socket at %s" - % self.control_path) - return True - return False - - def close_ssh_session(self): - """Remove the control socket to effectively terminate the session""" - if self.local: - return True - try: - res = self.run_command("rm -f %s" % self.control_path, - force_local=True) - if res['status'] == 0: - return True - self.log_error("Could not remove ControlPath %s: %s" - % (self.control_path, res['stdout'])) - return False - except Exception as e: - self.log_error('Error closing SSH session: %s' % e) - return False - def _preset_exists(self, preset): """Verifies if the given preset exists on the node""" return preset in self.sos_info['presets'] @@ -646,8 +475,8 @@ class SosNode(): self.cluster = cluster def update_cmd_from_cluster(self): - """This is used to modify the sosreport command run on the nodes. - By default, sosreport is run without any options, using this will + """This is used to modify the sos report command run on the nodes. + By default, sos report is run without any options, using this will allow the profile to specify what plugins to run or not and what options to use. @@ -727,10 +556,6 @@ class SosNode(): if self.opts.since: sos_opts.append('--since=%s' % quote(self.opts.since)) - # sos-4.0 changes the binary - if self.check_sos_version('4.0'): - self.sos_bin = 'sos report' - if self.check_sos_version('4.1'): if self.opts.skip_commands: sos_opts.append( @@ -811,7 +636,7 @@ class SosNode(): self.manifest.add_field('final_sos_command', self.sos_cmd) def determine_sos_label(self): - """Determine what, if any, label should be added to the sosreport""" + """Determine what, if any, label should be added to the sos report""" label = '' label += self.cluster.get_node_label(self) @@ -822,7 +647,7 @@ class SosNode(): if not label: return None - self.log_debug('Label for sosreport set to %s' % label) + self.log_debug('Label for sos report set to %s' % label) if self.check_sos_version('3.6'): lcmd = '--label' else: @@ -844,20 +669,20 @@ class SosNode(): def determine_sos_error(self, rc, stdout): if rc == -1: - return 'sosreport process received SIGKILL on node' + return 'sos report process received SIGKILL on node' if rc == 1: if 'sudo' in stdout: return 'sudo attempt failed' if rc == 127: - return 'sosreport terminated unexpectedly. Check disk space' + return 'sos report terminated unexpectedly. Check disk space' if len(stdout) > 0: return stdout.split('\n')[0:1] else: return 'sos exited with code %s' % rc def execute_sos_command(self): - """Run sosreport and capture the resulting file path""" - self.ui_msg('Generating sosreport...') + """Run sos report and capture the resulting file path""" + self.ui_msg('Generating sos report...') try: path = False checksum = False @@ -867,7 +692,7 @@ class SosNode(): use_container=True, env=self.sos_env_vars) if res['status'] == 0: - for line in res['stdout'].splitlines(): + for line in res['output'].splitlines(): if fnmatch.fnmatch(line, '*sosreport-*tar*'): path = line.strip() if line.startswith((" sha256\t", " md5\t")): @@ -884,44 +709,31 @@ class SosNode(): else: self.manifest.add_field('checksum_type', 'unknown') else: - err = self.determine_sos_error(res['status'], res['stdout']) - self.log_debug("Error running sosreport. rc = %s msg = %s" - % (res['status'], res['stdout'] or - res['stderr'])) + err = self.determine_sos_error(res['status'], res['output']) + self.log_debug("Error running sos report. rc = %s msg = %s" + % (res['status'], res['output'])) raise Exception(err) return path except CommandTimeoutException: self.log_error('Timeout exceeded') raise except Exception as e: - self.log_error('Error running sosreport: %s' % e) + self.log_error('Error running sos report: %s' % e) raise def retrieve_file(self, path): """Copies the specified file from the host to our temp dir""" destdir = self.tmpdir + '/' - dest = destdir + path.split('/')[-1] + dest = os.path.join(destdir, path.split('/')[-1]) try: - if not self.local: - if self.file_exists(path): - self.log_info("Copying remote %s to local %s" % - (path, destdir)) - cmd = "/usr/bin/scp -oControlPath=%s %s@%s:%s %s" % ( - self.control_path, - self.opts.ssh_user, - self.address, - path, - destdir - ) - res = self.run_command(cmd, force_local=True) - return res['status'] == 0 - else: - self.log_debug("Attempting to copy remote file %s, but it " - "does not exist on filesystem" % path) - return False + if self.file_exists(path): + self.log_info("Copying remote %s to local %s" % + (path, destdir)) + self._transport.retrieve_file(path, dest) else: - self.log_debug("Moving %s to %s" % (path, destdir)) - shutil.copy(path, dest) + self.log_debug("Attempting to copy remote file %s, but it " + "does not exist on filesystem" % path) + return False return True except Exception as err: self.log_debug("Failed to retrieve %s: %s" % (path, err)) @@ -933,7 +745,7 @@ class SosNode(): """ path = ''.join(path.split()) try: - if len(path) <= 2: # ensure we have a non '/' path + if len(path.split('/')) <= 2: # ensure we have a non '/' path self.log_debug("Refusing to remove path %s: appears to be " "incorrect and possibly dangerous" % path) return False @@ -959,14 +771,14 @@ class SosNode(): except Exception: self.log_error('Failed to make archive readable') return False - self.soslog.info('Retrieving sosreport from %s' % self.address) - self.ui_msg('Retrieving sosreport...') + self.soslog.info('Retrieving sos report from %s' % self.address) + self.ui_msg('Retrieving sos report...') ret = self.retrieve_file(self.sos_path) if ret: - self.ui_msg('Successfully collected sosreport') + self.ui_msg('Successfully collected sos report') self.file_list.append(self.sos_path.split('/')[-1]) else: - self.log_error('Failed to retrieve sosreport') + self.log_error('Failed to retrieve sos report') raise SystemExit return True else: @@ -976,8 +788,8 @@ class SosNode(): else: e = [x.strip() for x in self.stdout.readlines() if x.strip][-1] self.soslog.error( - 'Failed to run sosreport on %s: %s' % (self.address, e)) - self.log_error('Failed to run sosreport. %s' % e) + 'Failed to run sos report on %s: %s' % (self.address, e)) + self.log_error('Failed to run sos report. %s' % e) return False def remove_sos_archive(self): @@ -986,20 +798,20 @@ class SosNode(): if self.sos_path is None: return if 'sosreport' not in self.sos_path: - self.log_debug("Node sosreport path %s looks incorrect. Not " + self.log_debug("Node sos report path %s looks incorrect. Not " "attempting to remove path" % self.sos_path) return removed = self.remove_file(self.sos_path) if not removed: - self.log_error('Failed to remove sosreport') + self.log_error('Failed to remove sos report') def cleanup(self): """Remove the sos archive from the node once we have it locally""" self.remove_sos_archive() if self.sos_path: for ext in ['.sha256', '.md5']: - if os.path.isfile(self.sos_path + ext): - self.remove_file(self.sos_path + ext) + if self.remove_file(self.sos_path + ext): + break cleanup = self.host.set_cleanup_cmd() if cleanup: self.run_command(cleanup, need_root=True) @@ -1040,3 +852,5 @@ class SosNode(): msg = "Exception while making %s readable. Return code was %s" self.log_error(msg % (filepath, res['status'])) raise Exception + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py new file mode 100644 index 00000000..5be7dc6d --- /dev/null +++ b/sos/collector/transports/__init__.py @@ -0,0 +1,317 @@ +# Copyright Red Hat 2021, Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +import inspect +import logging +import pexpect +import re + +from pipes import quote +from sos.collector.exceptions import (ConnectionException, + CommandTimeoutException) + + +class RemoteTransport(): + """The base class used for defining supported remote transports to connect + to remote nodes in conjunction with `sos collect`. + + This abstraction is used to manage the backend connections to nodes so that + SoSNode() objects can be leveraged generically to connect to nodes, inspect + those nodes, and run commands on them. + """ + + name = 'undefined' + + def __init__(self, address, commons): + self.address = address + self.opts = commons['cmdlineopts'] + self.tmpdir = commons['tmpdir'] + self.need_sudo = commons['need_sudo'] + self._hostname = None + self.soslog = logging.getLogger('sos') + self.ui_log = logging.getLogger('sos_ui') + + def _sanitize_log_msg(self, msg): + """Attempts to obfuscate sensitive information in log messages such as + passwords""" + reg = r'(?P(pass|key|secret|PASS|KEY|SECRET).*?=)(?P.*?\s)' + return re.sub(reg, r'\g****** ', msg) + + def log_info(self, msg): + """Used to print and log info messages""" + caller = inspect.stack()[1][3] + lmsg = '[%s:%s] %s' % (self.hostname, caller, msg) + self.soslog.info(lmsg) + + def log_error(self, msg): + """Used to print and log error messages""" + caller = inspect.stack()[1][3] + lmsg = '[%s:%s] %s' % (self.hostname, caller, msg) + self.soslog.error(lmsg) + + def log_debug(self, msg): + """Used to print and log debug messages""" + msg = self._sanitize_log_msg(msg) + caller = inspect.stack()[1][3] + msg = '[%s:%s] %s' % (self.hostname, caller, msg) + self.soslog.debug(msg) + + @property + def hostname(self): + if self._hostname and 'localhost' not in self._hostname: + return self._hostname + return self.address + + @property + def connected(self): + """Is the transport __currently__ connected to the node, or otherwise + capable of seamlessly running a command or similar on the node? + """ + return False + + @property + def remote_exec(self): + """This is the command string needed to leverage the remote transport + when executing commands. For example, for an SSH transport this would + be the `ssh ` string prepended to any command so that the + command is executed by the ssh binary. + + This is also referenced by the `remote_exec` parameter for policies + when loading a policy for a remote node + """ + return None + + def connect(self, password): + """Perform the connection steps in order to ensure that we are able to + connect to the node for all future operations. Note that this should + not provide an interactive shell at this time. + """ + if self._connect(password): + if not self._hostname: + self._get_hostname() + return True + return False + + def _connect(self, password): + """Actually perform the connection requirements. Should be overridden + by specific transports that subclass RemoteTransport + """ + raise NotImplementedError("Transport %s does not define connect" + % self.name) + + def reconnect(self, password): + """Attempts to reconnect to the node using the standard connect() + but does not do so indefinitely. This imposes a strict number of retry + attempts before failing out + """ + attempts = 1 + last_err = 'unknown' + while attempts < 5: + self.log_debug("Attempting reconnect (#%s) to node" % attempts) + try: + if self.connect(password): + return True + except Exception as err: + self.log_debug("Attempt #%s exception: %s" % (attempts, err)) + last_err = err + attempts += 1 + self.log_error("Unable to reconnect to node after 5 attempts, " + "aborting.") + raise ConnectionException("last exception from transport: %s" + % last_err) + + def disconnect(self): + """Perform whatever steps are necessary, if any, to terminate any + connection to the node + """ + try: + if self._disconnect(): + self.log_debug("Successfully disconnected from node") + else: + self.log_error("Unable to successfully disconnect, see log for" + " more details") + except Exception as err: + self.log_error("Failed to disconnect: %s" % err) + + def _disconnect(self): + raise NotImplementedError("Transport %s does not define disconnect" + % self.name) + + def run_command(self, cmd, timeout=180, need_root=False, env=None): + """Run a command on the node, returning its output and exit code. + This should return the exit code of the command being executed, not the + exit code of whatever mechanism the transport uses to execute that + command + + :param cmd: The command to run + :type cmd: ``str`` + + :param timeout: The maximum time in seconds to allow the cmd to run + :type timeout: ``int`` + + :param get_pty: Does ``cmd`` require a pty? + :type get_pty: ``bool`` + + :param need_root: Does ``cmd`` require root privileges? + :type neeed_root: ``bool`` + + :param env: Specify env vars to be passed to the ``cmd`` + :type env: ``dict`` + + :returns: Output of ``cmd`` and the exit code + :rtype: ``dict`` with keys ``output`` and ``status`` + """ + self.log_debug('Running command %s' % cmd) + # currently we only use/support the use of pexpect for handling the + # execution of these commands, as opposed to directly invoking + # subprocess.Popen() in conjunction with tools like sshpass. + # If that changes in the future, we'll add decision making logic here + # to route to the appropriate handler, but for now we just go straight + # to using pexpect + return self._run_command_with_pexpect(cmd, timeout, need_root, env) + + def _format_cmd_for_exec(self, cmd): + """Format the command in the way needed for the remote transport to + successfully execute it as one would when manually executing it + + :param cmd: The command being executed, as formatted by SoSNode + :type cmd: ``str`` + + + :returns: The command further formatted as needed by this + transport + :rtype: ``str`` + """ + cmd = "%s %s" % (self.remote_exec, quote(cmd)) + cmd = cmd.lstrip() + return cmd + + def _run_command_with_pexpect(self, cmd, timeout, need_root, env): + """Execute the command using pexpect, which allows us to more easily + handle prompts and timeouts compared to directly leveraging the + subprocess.Popen() method. + + :param cmd: The command to execute. This will be automatically + formatted to use the transport. + :type cmd: ``str`` + + :param timeout: The maximum time in seconds to run ``cmd`` + :type timeout: ``int`` + + :param need_root: Does ``cmd`` need to run as root or with sudo? + :type need_root: ``bool`` + + :param env: Any env vars that ``cmd`` should be run with + :type env: ``dict`` + """ + cmd = self._format_cmd_for_exec(cmd) + result = pexpect.spawn(cmd, encoding='utf-8', env=env) + + _expects = [pexpect.EOF, pexpect.TIMEOUT] + if need_root and self.opts.ssh_user != 'root': + _expects.extend([ + '\\[sudo\\] password for .*:', + 'Password:' + ]) + + index = result.expect(_expects, timeout=timeout) + + if index in [2, 3]: + self._send_pexpect_password(index, result) + index = result.expect(_expects, timeout=timeout) + + if index == 0: + out = result.before + result.close() + return {'status': result.exitstatus, 'output': out} + elif index == 1: + raise CommandTimeoutException(cmd) + + def _send_pexpect_password(self, index, result): + """Handle password prompts for sudo and su usage for non-root SSH users + + :param index: The index pexpect.spawn returned to match against + either a sudo or su prompt + :type index: ``int`` + + :param result: The spawn running the command + :type result: ``pexpect.spawn`` + """ + if index == 2: + if not self.opts.sudo_pw and not self.opt.nopasswd_sudo: + msg = ("Unable to run command: sudo password " + "required but not provided") + self.log_error(msg) + raise Exception(msg) + result.sendline(self.opts.sudo_pw) + elif index == 3: + if not self.opts.root_password: + msg = ("Unable to run command as root: no root password given") + self.log_error(msg) + raise Exception(msg) + result.sendline(self.opts.root_password) + + def _get_hostname(self): + """Determine the hostname of the node and set that for future reference + and logging + + :returns: The hostname of the system, per the `hostname` command + :rtype: ``str`` + """ + _out = self.run_command('hostname') + if _out['status'] == 0: + self._hostname = _out['output'].strip() + self.log_info("Hostname set to %s" % self._hostname) + return self._hostname + + def retrieve_file(self, fname, dest): + """Copy a remote file, fname, to dest on the local node + + :param fname: The name of the file to retrieve + :type fname: ``str`` + + :param dest: Where to save the file to locally + :type dest: ``str`` + + :returns: True if file was successfully copied from remote, or False + :rtype: ``bool`` + """ + return self._retrieve_file(fname, dest) + + def _retrieve_file(self, fname, dest): + raise NotImplementedError("Transport %s does not support file copying" + % self.name) + + def read_file(self, fname): + """Read the given file fname and return its contents + + :param fname: The name of the file to read + :type fname: ``str`` + + :returns: The content of the file + :rtype: ``str`` + """ + self.log_debug("Reading file %s" % fname) + return self._read_file(fname) + + def _read_file(self, fname): + res = self.run_command("cat %s" % fname, timeout=5) + if res['status'] == 0: + return res['output'] + else: + if 'No such file' in res['output']: + self.log_debug("File %s does not exist on node" + % fname) + else: + self.log_error("Error reading %s: %s" % + (fname, res['output'].split(':')[1:])) + return '' + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/transports/control_persist.py b/sos/collector/transports/control_persist.py new file mode 100644 index 00000000..3e848b41 --- /dev/null +++ b/sos/collector/transports/control_persist.py @@ -0,0 +1,199 @@ +# Copyright Red Hat 2021, Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + + +import os +import pexpect +import subprocess + +from sos.collector.transports import RemoteTransport +from sos.collector.exceptions import (InvalidPasswordException, + TimeoutPasswordAuthException, + PasswordRequestException, + AuthPermissionDeniedException, + ConnectionException, + ConnectionTimeoutException, + ControlSocketMissingException, + ControlPersistUnsupportedException) +from sos.utilities import sos_get_command_output + + +class SSHControlPersist(RemoteTransport): + """A transport for collect that leverages OpenSSH's Control Persist + functionality which uses control sockets to transparently keep a connection + open to the remote host without needing to rebuild the SSH connection for + each and every command executed on the node + """ + + name = 'control_persist' + + def _check_for_control_persist(self): + """Checks to see if the local system supported SSH ControlPersist. + + ControlPersist allows OpenSSH to keep a single open connection to a + remote host rather than building a new session each time. This is the + same feature that Ansible uses in place of paramiko, which we have a + need to drop in sos-collector. + + This check relies on feedback from the ssh binary. The command being + run should always generate stderr output, but depending on what that + output reads we can determine if ControlPersist is supported or not. + + For our purposes, a host that does not support ControlPersist is not + able to run sos-collector. + + Returns + True if ControlPersist is supported, else raise Exception. + """ + ssh_cmd = ['ssh', '-o', 'ControlPersist'] + cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = cmd.communicate() + err = err.decode('utf-8') + if 'Bad configuration option' in err or 'Usage:' in err: + raise ControlPersistUnsupportedException + return True + + def _connect(self, password=''): + """ + Using ControlPersist, create the initial connection to the node. + + This will generate an OpenSSH ControlPersist socket within the tmp + directory created or specified for sos-collector to use. + + At most, we will wait 30 seconds for a connection. This involves a 15 + second wait for the initial connection attempt, and a subsequent 15 + second wait for a response when we supply a password. + + Since we connect to nodes in parallel (using the --threads value), this + means that the time between 'Connecting to nodes...' and 'Beginning + collection of sosreports' that users see can be up to an amount of time + equal to 30*(num_nodes/threads) seconds. + + Returns + True if session is successfully opened, else raise Exception + """ + try: + self._check_for_control_persist() + except ControlPersistUnsupportedException: + self.log_error("OpenSSH ControlPersist is not locally supported. " + "Please update your OpenSSH installation.") + raise + self.log_info('Opening SSH session to create control socket') + self.control_path = ("%s/.sos-collector-%s" % (self.tmpdir, + self.address)) + self.ssh_cmd = '' + connected = False + ssh_key = '' + ssh_port = '' + if self.opts.ssh_port != 22: + ssh_port = "-p%s " % self.opts.ssh_port + if self.opts.ssh_key: + ssh_key = "-i%s" % self.opts.ssh_key + + cmd = ("ssh %s %s -oControlPersist=600 -oControlMaster=auto " + "-oStrictHostKeyChecking=no -oControlPath=%s %s@%s " + "\"echo Connected\"" % (ssh_key, + ssh_port, + self.control_path, + self.opts.ssh_user, + self.address)) + res = pexpect.spawn(cmd, encoding='utf-8') + + connect_expects = [ + u'Connected', + u'password:', + u'.*Permission denied.*', + u'.* port .*: No route to host', + u'.*Could not resolve hostname.*', + pexpect.TIMEOUT + ] + + index = res.expect(connect_expects, timeout=15) + + if index == 0: + connected = True + elif index == 1: + if password: + pass_expects = [ + u'Connected', + u'Permission denied, please try again.', + pexpect.TIMEOUT + ] + res.sendline(password) + pass_index = res.expect(pass_expects, timeout=15) + if pass_index == 0: + connected = True + elif pass_index == 1: + # Note that we do not get an exitstatus here, so matching + # this line means an invalid password will be reported for + # both invalid passwords and invalid user names + raise InvalidPasswordException + elif pass_index == 2: + raise TimeoutPasswordAuthException + else: + raise PasswordRequestException + elif index == 2: + raise AuthPermissionDeniedException + elif index == 3: + raise ConnectionException(self.address, self.opts.ssh_port) + elif index == 4: + raise ConnectionException(self.address) + elif index == 5: + raise ConnectionTimeoutException + else: + raise Exception("Unknown error, client returned %s" % res.before) + if connected: + if not os.path.exists(self.control_path): + raise ControlSocketMissingException + self.log_debug("Successfully created control socket at %s" + % self.control_path) + return True + return False + + def _disconnect(self): + if os.path.exists(self.control_path): + try: + os.remove(self.control_path) + return True + except Exception as err: + self.log_debug("Could not disconnect properly: %s" % err) + return False + self.log_debug("Control socket not present when attempting to " + "terminate session") + + @property + def connected(self): + """Check if the SSH control socket exists + + The control socket is automatically removed by the SSH daemon in the + event that the last connection to the node was greater than the timeout + set by the ControlPersist option. This can happen for us if we are + collecting from a large number of nodes, and the timeout expires before + we start collection. + """ + return os.path.exists(self.control_path) + + @property + def remote_exec(self): + if not self.ssh_cmd: + self.ssh_cmd = "ssh -oControlPath=%s %s@%s" % ( + self.control_path, self.opts.ssh_user, self.address + ) + return self.ssh_cmd + + def _retrieve_file(self, fname, dest): + cmd = "/usr/bin/scp -oControlPath=%s %s@%s:%s %s" % ( + self.control_path, self.opts.ssh_user, self.address, fname, dest + ) + res = sos_get_command_output(cmd) + return res['status'] == 0 + +# vim: set et ts=4 sw=4 : diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py new file mode 100644 index 00000000..a4897f19 --- /dev/null +++ b/sos/collector/transports/local.py @@ -0,0 +1,49 @@ +# Copyright Red Hat 2021, Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +import os +import shutil + +from sos.collector.transports import RemoteTransport + + +class LocalTransport(RemoteTransport): + """A 'transport' to represent a local node. This allows us to more easily + extend SoSNode() without having a ton of 'if local' or similar checks in + more places than we actually need them + """ + + name = 'local_node' + + def _connect(self, password): + return True + + def _disconnect(self): + return True + + @property + def connected(self): + return True + + def _retrieve_file(self, fname, dest): + self.log_debug("Moving %s to %s" % (fname, dest)) + shutil.copy(fname, dest) + + def _format_cmd_for_exec(self, cmd): + return cmd + + def _read_file(self, fname): + if os.path.exists(fname): + with open(fname, 'r') as rfile: + return rfile.read() + self.log_debug("No such file: %s" % fname) + return '' + +# vim: set et ts=4 sw=4 : -- 2.31.1 From 07d96d52ef69b9f8fe1ef32a1b88089d31c33fe8 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Mon, 27 Sep 2021 12:28:27 -0400 Subject: [PATCH 2/2] [plugins] Update plugins to use new os.path.join wrapper Updates plugins to use the new `self.path_join()` wrapper for `os.path.join()` so that these plugins now account for non-/ sysroots for their collections. Signed-off-by: Jake Hunsaker --- sos/report/plugins/__init__.py | 2 +- sos/report/plugins/azure.py | 4 +-- sos/report/plugins/collectd.py | 2 +- sos/report/plugins/container_log.py | 2 +- sos/report/plugins/corosync.py | 2 +- sos/report/plugins/docker_distribution.py | 5 ++-- sos/report/plugins/ds.py | 3 +-- sos/report/plugins/elastic.py | 4 ++- sos/report/plugins/etcd.py | 2 +- sos/report/plugins/gluster.py | 3 ++- sos/report/plugins/jars.py | 2 +- sos/report/plugins/kdump.py | 4 +-- sos/report/plugins/libvirt.py | 2 +- sos/report/plugins/logs.py | 8 +++--- sos/report/plugins/manageiq.py | 12 ++++----- sos/report/plugins/numa.py | 9 +++---- sos/report/plugins/openstack_instack.py | 2 +- sos/report/plugins/openstack_nova.py | 2 +- sos/report/plugins/openvswitch.py | 13 ++++----- sos/report/plugins/origin.py | 28 +++++++++++--------- sos/report/plugins/ovirt.py | 2 +- sos/report/plugins/ovirt_engine_backup.py | 5 ++-- sos/report/plugins/ovn_central.py | 26 +++++++++--------- sos/report/plugins/ovn_host.py | 4 +-- sos/report/plugins/pacemaker.py | 4 +-- sos/report/plugins/pcp.py | 32 +++++++++++------------ sos/report/plugins/postfix.py | 2 +- sos/report/plugins/postgresql.py | 2 +- sos/report/plugins/powerpc.py | 2 +- sos/report/plugins/processor.py | 3 +-- sos/report/plugins/python.py | 4 +-- sos/report/plugins/sar.py | 5 ++-- sos/report/plugins/sos_extras.py | 2 +- sos/report/plugins/ssh.py | 7 +++-- sos/report/plugins/unpackaged.py | 4 +-- sos/report/plugins/watchdog.py | 13 +++++---- sos/report/plugins/yum.py | 2 +- 37 files changed, 115 insertions(+), 115 deletions(-) diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py index 1f84bca4..ec138f83 100644 --- a/sos/report/plugins/__init__.py +++ b/sos/report/plugins/__init__.py @@ -2897,7 +2897,7 @@ class Plugin(): try: cmd_line_paths = glob.glob(cmd_line_glob) for path in cmd_line_paths: - f = open(path, 'r') + f = open(self.path_join(path), 'r') cmd_line = f.read().strip() if process in cmd_line: status = True diff --git a/sos/report/plugins/azure.py b/sos/report/plugins/azure.py index 45971a61..90999b3f 100644 --- a/sos/report/plugins/azure.py +++ b/sos/report/plugins/azure.py @@ -8,8 +8,8 @@ # # See the LICENSE file in the source distribution for further information. -import os from sos.report.plugins import Plugin, UbuntuPlugin, RedHatPlugin +import os class Azure(Plugin, UbuntuPlugin): @@ -38,7 +38,7 @@ class Azure(Plugin, UbuntuPlugin): for path, subdirs, files in os.walk("/var/log/azure"): for name in files: - self.add_copy_spec(os.path.join(path, name), sizelimit=limit) + self.add_copy_spec(self.path_join(path, name), sizelimit=limit) self.add_cmd_output(( 'curl -s -H Metadata:true ' diff --git a/sos/report/plugins/collectd.py b/sos/report/plugins/collectd.py index 80d4b00a..8584adf9 100644 --- a/sos/report/plugins/collectd.py +++ b/sos/report/plugins/collectd.py @@ -33,7 +33,7 @@ class Collectd(Plugin, IndependentPlugin): p = re.compile('^LoadPlugin.*') try: - with open("/etc/collectd.conf") as f: + with open(self.path_join("/etc/collectd.conf"), 'r') as f: for line in f: if p.match(line): self.add_alert("Active Plugin found: %s" % diff --git a/sos/report/plugins/container_log.py b/sos/report/plugins/container_log.py index 14e0b7d8..e8dedad2 100644 --- a/sos/report/plugins/container_log.py +++ b/sos/report/plugins/container_log.py @@ -29,6 +29,6 @@ class ContainerLog(Plugin, IndependentPlugin): """Collect *.log files from subdirs of passed root path """ for dirName, _, _ in os.walk(root): - self.add_copy_spec(os.path.join(dirName, '*.log')) + self.add_copy_spec(self.path_join(dirName, '*.log')) # vim: set et ts=4 sw=4 : diff --git a/sos/report/plugins/corosync.py b/sos/report/plugins/corosync.py index d74086e3..10e096c6 100644 --- a/sos/report/plugins/corosync.py +++ b/sos/report/plugins/corosync.py @@ -47,7 +47,7 @@ class Corosync(Plugin): # (it isnt precise but sufficient) pattern = r'^\s*(logging.)?logfile:\s*(\S+)$' try: - with open("/etc/corosync/corosync.conf") as f: + with open(self.path_join("/etc/corosync/corosync.conf"), 'r') as f: for line in f: if re.match(pattern, line): self.add_copy_spec(re.search(pattern, line).group(2)) diff --git a/sos/report/plugins/docker_distribution.py b/sos/report/plugins/docker_distribution.py index 84222ff7..e760f252 100644 --- a/sos/report/plugins/docker_distribution.py +++ b/sos/report/plugins/docker_distribution.py @@ -19,8 +19,9 @@ class DockerDistribution(Plugin): def setup(self): self.add_copy_spec('/etc/docker-distribution/') self.add_journal('docker-distribution') - if self.path_exists('/etc/docker-distribution/registry/config.yml'): - with open('/etc/docker-distribution/registry/config.yml') as f: + conf = self.path_join('/etc/docker-distribution/registry/config.yml') + if self.path_exists(conf): + with open(conf) as f: for line in f: if 'rootdirectory' in line: loc = line.split()[1] diff --git a/sos/report/plugins/ds.py b/sos/report/plugins/ds.py index addf49e1..43feb21e 100644 --- a/sos/report/plugins/ds.py +++ b/sos/report/plugins/ds.py @@ -11,7 +11,6 @@ # See the LICENSE file in the source distribution for further information. from sos.report.plugins import Plugin, RedHatPlugin -import os class DirectoryServer(Plugin, RedHatPlugin): @@ -47,7 +46,7 @@ class DirectoryServer(Plugin, RedHatPlugin): try: for d in self.listdir("/etc/dirsrv"): if d[0:5] == 'slapd': - certpath = os.path.join("/etc/dirsrv", d) + certpath = self.path_join("/etc/dirsrv", d) self.add_cmd_output("certutil -L -d %s" % certpath) self.add_cmd_output("dsctl %s healthcheck" % d) except OSError: diff --git a/sos/report/plugins/elastic.py b/sos/report/plugins/elastic.py index ad9a06ff..da2662bc 100644 --- a/sos/report/plugins/elastic.py +++ b/sos/report/plugins/elastic.py @@ -39,7 +39,9 @@ class Elastic(Plugin, IndependentPlugin): return hostname, port def setup(self): - els_config_file = "/etc/elasticsearch/elasticsearch.yml" + els_config_file = self.path_join( + "/etc/elasticsearch/elasticsearch.yml" + ) self.add_copy_spec(els_config_file) if self.get_option("all_logs"): diff --git a/sos/report/plugins/etcd.py b/sos/report/plugins/etcd.py index fd4f67eb..fe017e9f 100644 --- a/sos/report/plugins/etcd.py +++ b/sos/report/plugins/etcd.py @@ -62,7 +62,7 @@ class etcd(Plugin, RedHatPlugin): def get_etcd_url(self): try: - with open('/etc/etcd/etcd.conf', 'r') as ef: + with open(self.path_join('/etc/etcd/etcd.conf'), 'r') as ef: for line in ef: if line.startswith('ETCD_LISTEN_CLIENT_URLS'): return line.split('=')[1].replace('"', '').strip() diff --git a/sos/report/plugins/gluster.py b/sos/report/plugins/gluster.py index a44ffeb7..e518e3d3 100644 --- a/sos/report/plugins/gluster.py +++ b/sos/report/plugins/gluster.py @@ -35,9 +35,10 @@ class Gluster(Plugin, RedHatPlugin): ] for statedump_file in statedump_entries: statedumps_present = statedumps_present+1 + _spath = self.path_join(name_dir, statedump_file) ret = -1 while ret == -1: - with open(name_dir + '/' + statedump_file, 'r') as sfile: + with open(_spath, 'r') as sfile: last_line = sfile.readlines()[-1] ret = string.count(last_line, 'DUMP_END_TIME') diff --git a/sos/report/plugins/jars.py b/sos/report/plugins/jars.py index 0d3cf37e..4b98684e 100644 --- a/sos/report/plugins/jars.py +++ b/sos/report/plugins/jars.py @@ -63,7 +63,7 @@ class Jars(Plugin, RedHatPlugin): for location in locations: for dirpath, _, filenames in os.walk(location): for filename in filenames: - path = os.path.join(dirpath, filename) + path = self.path_join(dirpath, filename) if Jars.is_jar(path): jar_paths.append(path) diff --git a/sos/report/plugins/kdump.py b/sos/report/plugins/kdump.py index 757c2736..66565664 100644 --- a/sos/report/plugins/kdump.py +++ b/sos/report/plugins/kdump.py @@ -40,7 +40,7 @@ class RedHatKDump(KDump, RedHatPlugin): packages = ('kexec-tools',) def fstab_parse_fs(self, device): - with open('/etc/fstab', 'r') as fp: + with open(self.path_join('/etc/fstab'), 'r') as fp: for line in fp: if line.startswith((device)): return line.split()[1].rstrip('/') @@ -50,7 +50,7 @@ class RedHatKDump(KDump, RedHatPlugin): fs = "" path = "/var/crash" - with open('/etc/kdump.conf', 'r') as fp: + with open(self.path_join('/etc/kdump.conf'), 'r') as fp: for line in fp: if line.startswith("path"): path = line.split()[1] diff --git a/sos/report/plugins/libvirt.py b/sos/report/plugins/libvirt.py index be8120ff..5caa5802 100644 --- a/sos/report/plugins/libvirt.py +++ b/sos/report/plugins/libvirt.py @@ -55,7 +55,7 @@ class Libvirt(Plugin, IndependentPlugin): else: self.add_copy_spec("/var/log/libvirt") - if self.path_exists(self.join_sysroot(libvirt_keytab)): + if self.path_exists(self.path_join(libvirt_keytab)): self.add_cmd_output("klist -ket %s" % libvirt_keytab) self.add_cmd_output("ls -lR /var/lib/libvirt/qemu") diff --git a/sos/report/plugins/logs.py b/sos/report/plugins/logs.py index ee6bb98d..606e574a 100644 --- a/sos/report/plugins/logs.py +++ b/sos/report/plugins/logs.py @@ -24,15 +24,15 @@ class Logs(Plugin, IndependentPlugin): since = self.get_option("since") if self.path_exists('/etc/rsyslog.conf'): - with open('/etc/rsyslog.conf', 'r') as conf: + with open(self.path_join('/etc/rsyslog.conf'), 'r') as conf: for line in conf.readlines(): if line.startswith('$IncludeConfig'): confs += glob.glob(line.split()[1]) for conf in confs: - if not self.path_exists(conf): + if not self.path_exists(self.path_join(conf)): continue - config = self.join_sysroot(conf) + config = self.path_join(conf) logs += self.do_regex_find_all(r"^\S+\s+(-?\/.*$)\s+", config) for i in logs: @@ -60,7 +60,7 @@ class Logs(Plugin, IndependentPlugin): # - there is some data present, either persistent or runtime only # - systemd-journald service exists # otherwise fallback to collecting few well known logfiles directly - journal = any([self.path_exists(p + "/log/journal/") + journal = any([self.path_exists(self.path_join(p, "log/journal/")) for p in ["/var", "/run"]]) if journal and self.is_service("systemd-journald"): self.add_journal(since=since, tags='journal_full', priority=100) diff --git a/sos/report/plugins/manageiq.py b/sos/report/plugins/manageiq.py index 27ad6ef4..e20c4a2a 100644 --- a/sos/report/plugins/manageiq.py +++ b/sos/report/plugins/manageiq.py @@ -58,7 +58,7 @@ class ManageIQ(Plugin, RedHatPlugin): # Log files to collect from miq_dir/log/ miq_log_dir = os.path.join(miq_dir, "log") - miq_main_log_files = [ + miq_main_logs = [ 'ansible_tower.log', 'top_output.log', 'evm.log', @@ -81,16 +81,16 @@ class ManageIQ(Plugin, RedHatPlugin): self.add_copy_spec(list(self.files)) self.add_copy_spec([ - os.path.join(self.miq_conf_dir, x) for x in self.miq_conf_files + self.path_join(self.miq_conf_dir, x) for x in self.miq_conf_files ]) # Collect main log files without size limit. self.add_copy_spec([ - os.path.join(self.miq_log_dir, x) for x in self.miq_main_log_files + self.path_join(self.miq_log_dir, x) for x in self.miq_main_logs ], sizelimit=0) self.add_copy_spec([ - os.path.join(self.miq_log_dir, x) for x in self.miq_log_files + self.path_join(self.miq_log_dir, x) for x in self.miq_log_files ]) self.add_copy_spec([ @@ -101,8 +101,8 @@ class ManageIQ(Plugin, RedHatPlugin): if environ.get("APPLIANCE_PG_DATA"): pg_dir = environ.get("APPLIANCE_PG_DATA") self.add_copy_spec([ - os.path.join(pg_dir, 'pg_log'), - os.path.join(pg_dir, 'postgresql.conf') + self.path_join(pg_dir, 'pg_log'), + self.path_join(pg_dir, 'postgresql.conf') ]) # vim: set et ts=4 sw=4 : diff --git a/sos/report/plugins/numa.py b/sos/report/plugins/numa.py index 0faef8d2..9094baef 100644 --- a/sos/report/plugins/numa.py +++ b/sos/report/plugins/numa.py @@ -9,7 +9,6 @@ # See the LICENSE file in the source distribution for further information. from sos.report.plugins import Plugin, IndependentPlugin -import os.path class Numa(Plugin, IndependentPlugin): @@ -42,10 +41,10 @@ class Numa(Plugin, IndependentPlugin): ]) self.add_copy_spec([ - os.path.join(numa_path, "node*/meminfo"), - os.path.join(numa_path, "node*/cpulist"), - os.path.join(numa_path, "node*/distance"), - os.path.join(numa_path, "node*/hugepages/hugepages-*/*") + self.path_join(numa_path, "node*/meminfo"), + self.path_join(numa_path, "node*/cpulist"), + self.path_join(numa_path, "node*/distance"), + self.path_join(numa_path, "node*/hugepages/hugepages-*/*") ]) # vim: set et ts=4 sw=4 : diff --git a/sos/report/plugins/openstack_instack.py b/sos/report/plugins/openstack_instack.py index 7c56c162..5b4f7d41 100644 --- a/sos/report/plugins/openstack_instack.py +++ b/sos/report/plugins/openstack_instack.py @@ -68,7 +68,7 @@ class OpenStackInstack(Plugin): p = uc_config.get(opt) if p: if not os.path.isabs(p): - p = os.path.join('/home/stack', p) + p = self.path_join('/home/stack', p) self.add_copy_spec(p) except Exception: pass diff --git a/sos/report/plugins/openstack_nova.py b/sos/report/plugins/openstack_nova.py index 53210c48..f840081e 100644 --- a/sos/report/plugins/openstack_nova.py +++ b/sos/report/plugins/openstack_nova.py @@ -103,7 +103,7 @@ class OpenStackNova(Plugin): "nova-scheduler.log*" ] for novalog in novalogs: - self.add_copy_spec(os.path.join(novadir, novalog)) + self.add_copy_spec(self.path_join(novadir, novalog)) self.add_copy_spec([ "/etc/nova/", diff --git a/sos/report/plugins/openvswitch.py b/sos/report/plugins/openvswitch.py index 003596c6..179d1532 100644 --- a/sos/report/plugins/openvswitch.py +++ b/sos/report/plugins/openvswitch.py @@ -10,7 +10,6 @@ from sos.report.plugins import Plugin, RedHatPlugin, DebianPlugin, UbuntuPlugin -from os.path import join as path_join from os import environ import re @@ -65,7 +64,9 @@ class OpenVSwitch(Plugin): log_dirs.append(environ.get('OVS_LOGDIR')) if not all_logs: - self.add_copy_spec([path_join(ld, '*.log') for ld in log_dirs]) + self.add_copy_spec([ + self.path_join(ld, '*.log') for ld in log_dirs + ]) else: self.add_copy_spec(log_dirs) @@ -76,13 +77,13 @@ class OpenVSwitch(Plugin): ]) self.add_copy_spec([ - path_join('/usr/local/etc/openvswitch', 'conf.db'), - path_join('/etc/openvswitch', 'conf.db'), - path_join('/var/lib/openvswitch', 'conf.db'), + self.path_join('/usr/local/etc/openvswitch', 'conf.db'), + self.path_join('/etc/openvswitch', 'conf.db'), + self.path_join('/var/lib/openvswitch', 'conf.db'), ]) ovs_dbdir = environ.get('OVS_DBDIR') if ovs_dbdir: - self.add_copy_spec(path_join(ovs_dbdir, 'conf.db')) + self.add_copy_spec(self.path_join(ovs_dbdir, 'conf.db')) self.add_cmd_output([ # The '-t 5' adds an upper bound on how long to wait to connect diff --git a/sos/report/plugins/origin.py b/sos/report/plugins/origin.py index f9cc32c1..7df9c019 100644 --- a/sos/report/plugins/origin.py +++ b/sos/report/plugins/origin.py @@ -69,20 +69,21 @@ class OpenShiftOrigin(Plugin): def is_static_etcd(self): """Determine if we are on a node running etcd""" - return self.path_exists(os.path.join(self.static_pod_dir, "etcd.yaml")) + return self.path_exists(self.path_join(self.static_pod_dir, + "etcd.yaml")) def is_static_pod_compatible(self): """Determine if a node is running static pods""" return self.path_exists(self.static_pod_dir) def setup(self): - bstrap_node_cfg = os.path.join(self.node_base_dir, - "bootstrap-" + self.node_cfg_file) - bstrap_kubeconfig = os.path.join(self.node_base_dir, - "bootstrap.kubeconfig") - node_certs = os.path.join(self.node_base_dir, "certs", "*") - node_client_ca = os.path.join(self.node_base_dir, "client-ca.crt") - admin_cfg = os.path.join(self.master_base_dir, "admin.kubeconfig") + bstrap_node_cfg = self.path_join(self.node_base_dir, + "bootstrap-" + self.node_cfg_file) + bstrap_kubeconfig = self.path_join(self.node_base_dir, + "bootstrap.kubeconfig") + node_certs = self.path_join(self.node_base_dir, "certs", "*") + node_client_ca = self.path_join(self.node_base_dir, "client-ca.crt") + admin_cfg = self.path_join(self.master_base_dir, "admin.kubeconfig") oc_cmd_admin = "%s --config=%s" % ("oc", admin_cfg) static_pod_logs_cmd = "master-logs" @@ -92,11 +93,12 @@ class OpenShiftOrigin(Plugin): self.add_copy_spec([ self.master_cfg, self.master_env, - os.path.join(self.master_base_dir, "*.crt"), + self.path_join(self.master_base_dir, "*.crt"), ]) if self.is_static_pod_compatible(): - self.add_copy_spec(os.path.join(self.static_pod_dir, "*.yaml")) + self.add_copy_spec(self.path_join(self.static_pod_dir, + "*.yaml")) self.add_cmd_output([ "%s api api" % static_pod_logs_cmd, "%s controllers controllers" % static_pod_logs_cmd, @@ -177,9 +179,9 @@ class OpenShiftOrigin(Plugin): node_client_ca, bstrap_node_cfg, bstrap_kubeconfig, - os.path.join(self.node_base_dir, "*.crt"), - os.path.join(self.node_base_dir, "resolv.conf"), - os.path.join(self.node_base_dir, "node-dnsmasq.conf"), + self.path_join(self.node_base_dir, "*.crt"), + self.path_join(self.node_base_dir, "resolv.conf"), + self.path_join(self.node_base_dir, "node-dnsmasq.conf"), ]) self.add_journal(units="atomic-openshift-node") diff --git a/sos/report/plugins/ovirt.py b/sos/report/plugins/ovirt.py index 1de606be..09647bf1 100644 --- a/sos/report/plugins/ovirt.py +++ b/sos/report/plugins/ovirt.py @@ -216,7 +216,7 @@ class Ovirt(Plugin, RedHatPlugin): "isouploader.conf" ] for conf_file in passwd_files: - conf_path = os.path.join("/etc/ovirt-engine", conf_file) + conf_path = self.path_join("/etc/ovirt-engine", conf_file) self.do_file_sub( conf_path, r"passwd=(.*)", diff --git a/sos/report/plugins/ovirt_engine_backup.py b/sos/report/plugins/ovirt_engine_backup.py index 676e419e..7fb6a5c7 100644 --- a/sos/report/plugins/ovirt_engine_backup.py +++ b/sos/report/plugins/ovirt_engine_backup.py @@ -8,7 +8,6 @@ # # See the LICENSE file in the source distribution for further information. -import os from sos.report.plugins import (Plugin, RedHatPlugin) from datetime import datetime @@ -29,11 +28,11 @@ class oVirtEngineBackup(Plugin, RedHatPlugin): def setup(self): now = datetime.now().strftime("%Y%m%d%H%M%S") - backup_filename = os.path.join( + backup_filename = self.path_join( self.get_option("backupdir"), "engine-db-backup-%s.tar.gz" % (now) ) - log_filename = os.path.join( + log_filename = self.path_join( self.get_option("backupdir"), "engine-db-backup-%s.log" % (now) ) diff --git a/sos/report/plugins/ovn_central.py b/sos/report/plugins/ovn_central.py index d6647aad..914eda60 100644 --- a/sos/report/plugins/ovn_central.py +++ b/sos/report/plugins/ovn_central.py @@ -42,7 +42,7 @@ class OVNCentral(Plugin): return else: try: - with open(filename, 'r') as f: + with open(self.path_join(filename), 'r') as f: try: db = json.load(f) except Exception: @@ -71,13 +71,13 @@ class OVNCentral(Plugin): ovs_rundir = os.environ.get('OVS_RUNDIR') for pidfile in ['ovnnb_db.pid', 'ovnsb_db.pid', 'ovn-northd.pid']: self.add_copy_spec([ - os.path.join('/var/lib/openvswitch/ovn', pidfile), - os.path.join('/usr/local/var/run/openvswitch', pidfile), - os.path.join('/run/openvswitch/', pidfile), + self.path_join('/var/lib/openvswitch/ovn', pidfile), + self.path_join('/usr/local/var/run/openvswitch', pidfile), + self.path_join('/run/openvswitch/', pidfile), ]) if ovs_rundir: - self.add_copy_spec(os.path.join(ovs_rundir, pidfile)) + self.add_copy_spec(self.path_join(ovs_rundir, pidfile)) if self.get_option("all_logs"): self.add_copy_spec("/var/log/ovn/") @@ -104,7 +104,7 @@ class OVNCentral(Plugin): schema_dir = '/usr/share/openvswitch' - nb_tables = self.get_tables_from_schema(os.path.join( + nb_tables = self.get_tables_from_schema(self.path_join( schema_dir, 'ovn-nb.ovsschema')) self.add_database_output(nb_tables, nbctl_cmds, 'ovn-nbctl') @@ -116,7 +116,7 @@ class OVNCentral(Plugin): format(self.ovn_sbdb_sock_path), "output": "Leader: self"} if self.test_predicate(self, pred=SoSPredicate(self, cmd_outputs=co)): - sb_tables = self.get_tables_from_schema(os.path.join( + sb_tables = self.get_tables_from_schema(self.path_join( schema_dir, 'ovn-sb.ovsschema'), ['Logical_Flow']) self.add_database_output(sb_tables, sbctl_cmds, 'ovn-sbctl') cmds += sbctl_cmds @@ -134,14 +134,14 @@ class OVNCentral(Plugin): ovs_dbdir = os.environ.get('OVS_DBDIR') for dbfile in ['ovnnb_db.db', 'ovnsb_db.db']: self.add_copy_spec([ - os.path.join('/var/lib/openvswitch/ovn', dbfile), - os.path.join('/usr/local/etc/openvswitch', dbfile), - os.path.join('/etc/openvswitch', dbfile), - os.path.join('/var/lib/openvswitch', dbfile), - os.path.join('/var/lib/ovn/etc', dbfile), + self.path_join('/var/lib/openvswitch/ovn', dbfile), + self.path_join('/usr/local/etc/openvswitch', dbfile), + self.path_join('/etc/openvswitch', dbfile), + self.path_join('/var/lib/openvswitch', dbfile), + self.path_join('/var/lib/ovn/etc', dbfile) ]) if ovs_dbdir: - self.add_copy_spec(os.path.join(ovs_dbdir, dbfile)) + self.add_copy_spec(self.path_join(ovs_dbdir, dbfile)) self.add_journal(units="ovn-northd") diff --git a/sos/report/plugins/ovn_host.py b/sos/report/plugins/ovn_host.py index 3742c49f..78604a15 100644 --- a/sos/report/plugins/ovn_host.py +++ b/sos/report/plugins/ovn_host.py @@ -35,7 +35,7 @@ class OVNHost(Plugin): else: self.add_copy_spec("/var/log/ovn/*.log") - self.add_copy_spec([os.path.join(pp, pidfile) for pp in pid_paths]) + self.add_copy_spec([self.path_join(pp, pidfile) for pp in pid_paths]) self.add_copy_spec('/etc/sysconfig/ovn-controller') @@ -49,7 +49,7 @@ class OVNHost(Plugin): def check_enabled(self): return (any([self.path_isfile( - os.path.join(pp, pidfile)) for pp in pid_paths]) or + self.path_join(pp, pidfile)) for pp in pid_paths]) or super(OVNHost, self).check_enabled()) diff --git a/sos/report/plugins/pacemaker.py b/sos/report/plugins/pacemaker.py index 497807ff..6ce80881 100644 --- a/sos/report/plugins/pacemaker.py +++ b/sos/report/plugins/pacemaker.py @@ -129,7 +129,7 @@ class Pacemaker(Plugin): class DebianPacemaker(Pacemaker, DebianPlugin, UbuntuPlugin): def setup(self): - self.envfile = "/etc/default/pacemaker" + self.envfile = self.path_join("/etc/default/pacemaker") self.setup_crm_shell() self.setup_pcs() super(DebianPacemaker, self).setup() @@ -141,7 +141,7 @@ class DebianPacemaker(Pacemaker, DebianPlugin, UbuntuPlugin): class RedHatPacemaker(Pacemaker, RedHatPlugin): def setup(self): - self.envfile = "/etc/sysconfig/pacemaker" + self.envfile = self.path_join("/etc/sysconfig/pacemaker") self.setup_pcs() self.add_copy_spec("/etc/sysconfig/sbd") super(RedHatPacemaker, self).setup() diff --git a/sos/report/plugins/pcp.py b/sos/report/plugins/pcp.py index 9707d7a9..ad902332 100644 --- a/sos/report/plugins/pcp.py +++ b/sos/report/plugins/pcp.py @@ -41,7 +41,7 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): total_size = 0 for dirpath, dirnames, filenames in os.walk(path): for f in filenames: - fp = os.path.join(dirpath, f) + fp = self.path_join(dirpath, f) total_size += os.path.getsize(fp) return total_size @@ -86,7 +86,7 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): # unconditionally. Obviously if someone messes up their /etc/pcp.conf # in a ridiculous way (i.e. setting PCP_SYSCONF_DIR to '/') this will # break badly. - var_conf_dir = os.path.join(self.pcp_var_dir, 'config') + var_conf_dir = self.path_join(self.pcp_var_dir, 'config') self.add_copy_spec([ self.pcp_sysconf_dir, self.pcp_conffile, @@ -98,10 +98,10 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): # rpms. It does not make up for a lot of size but it contains many # files self.add_forbidden_path([ - os.path.join(var_conf_dir, 'pmchart'), - os.path.join(var_conf_dir, 'pmlogconf'), - os.path.join(var_conf_dir, 'pmieconf'), - os.path.join(var_conf_dir, 'pmlogrewrite') + self.path_join(var_conf_dir, 'pmchart'), + self.path_join(var_conf_dir, 'pmlogconf'), + self.path_join(var_conf_dir, 'pmieconf'), + self.path_join(var_conf_dir, 'pmlogrewrite') ]) # Take PCP_LOG_DIR/pmlogger/`hostname` + PCP_LOG_DIR/pmmgr/`hostname` @@ -121,13 +121,13 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): # we would collect everything if self.pcp_hostname != '': # collect pmmgr logs up to 'pmmgrlogs' size limit - path = os.path.join(self.pcp_log_dir, 'pmmgr', - self.pcp_hostname, '*') + path = self.path_join(self.pcp_log_dir, 'pmmgr', + self.pcp_hostname, '*') self.add_copy_spec(path, sizelimit=self.sizelimit, tailit=False) # collect newest pmlogger logs up to 'pmloggerfiles' count files_collected = 0 - path = os.path.join(self.pcp_log_dir, 'pmlogger', - self.pcp_hostname, '*') + path = self.path_join(self.pcp_log_dir, 'pmlogger', + self.pcp_hostname, '*') pmlogger_ls = self.exec_cmd("ls -t1 %s" % path) if pmlogger_ls['status'] == 0: for line in pmlogger_ls['output'].splitlines(): @@ -138,15 +138,15 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): self.add_copy_spec([ # Collect PCP_LOG_DIR/pmcd and PCP_LOG_DIR/NOTICES - os.path.join(self.pcp_log_dir, 'pmcd'), - os.path.join(self.pcp_log_dir, 'NOTICES*'), + self.path_join(self.pcp_log_dir, 'pmcd'), + self.path_join(self.pcp_log_dir, 'NOTICES*'), # Collect PCP_VAR_DIR/pmns - os.path.join(self.pcp_var_dir, 'pmns'), + self.path_join(self.pcp_var_dir, 'pmns'), # Also collect any other log and config files # (as suggested by fche) - os.path.join(self.pcp_log_dir, '*/*.log*'), - os.path.join(self.pcp_log_dir, '*/*/*.log*'), - os.path.join(self.pcp_log_dir, '*/*/config*') + self.path_join(self.pcp_log_dir, '*/*.log*'), + self.path_join(self.pcp_log_dir, '*/*/*.log*'), + self.path_join(self.pcp_log_dir, '*/*/config*') ]) # Collect a summary for the current day diff --git a/sos/report/plugins/postfix.py b/sos/report/plugins/postfix.py index 8f584430..3ca0c4ad 100644 --- a/sos/report/plugins/postfix.py +++ b/sos/report/plugins/postfix.py @@ -41,7 +41,7 @@ class Postfix(Plugin): ] fp = [] try: - with open('/etc/postfix/main.cf', 'r') as cffile: + with open(self.path_join('/etc/postfix/main.cf'), 'r') as cffile: for line in cffile.readlines(): # ignore comments and take the first word after '=' if line.startswith('#'): diff --git a/sos/report/plugins/postgresql.py b/sos/report/plugins/postgresql.py index bec0b019..00824db7 100644 --- a/sos/report/plugins/postgresql.py +++ b/sos/report/plugins/postgresql.py @@ -124,7 +124,7 @@ class RedHatPostgreSQL(PostgreSQL, SCLPlugin): # copy PG_VERSION and postmaster.opts for f in ["PG_VERSION", "postmaster.opts"]: - self.add_copy_spec(os.path.join(_dir, "data", f)) + self.add_copy_spec(self.path_join(_dir, "data", f)) class DebianPostgreSQL(PostgreSQL, DebianPlugin, UbuntuPlugin): diff --git a/sos/report/plugins/powerpc.py b/sos/report/plugins/powerpc.py index 4fb4f87c..50f88650 100644 --- a/sos/report/plugins/powerpc.py +++ b/sos/report/plugins/powerpc.py @@ -22,7 +22,7 @@ class PowerPC(Plugin, IndependentPlugin): def setup(self): try: - with open('/proc/cpuinfo', 'r') as fp: + with open(self.path_join('/proc/cpuinfo'), 'r') as fp: contents = fp.read() ispSeries = "pSeries" in contents isPowerNV = "PowerNV" in contents diff --git a/sos/report/plugins/processor.py b/sos/report/plugins/processor.py index 2df2dc9a..c3d8930c 100644 --- a/sos/report/plugins/processor.py +++ b/sos/report/plugins/processor.py @@ -7,7 +7,6 @@ # See the LICENSE file in the source distribution for further information. from sos.report.plugins import Plugin, IndependentPlugin -import os class Processor(Plugin, IndependentPlugin): @@ -41,7 +40,7 @@ class Processor(Plugin, IndependentPlugin): # cumulative directory size exceeds 25MB or even 100MB. cdirs = self.listdir('/sys/devices/system/cpu') self.add_copy_spec([ - os.path.join('/sys/devices/system/cpu', cdir) for cdir in cdirs + self.path_join('/sys/devices/system/cpu', cdir) for cdir in cdirs ]) self.add_cmd_output([ diff --git a/sos/report/plugins/python.py b/sos/report/plugins/python.py index e2ab39ab..a8ec0cd8 100644 --- a/sos/report/plugins/python.py +++ b/sos/report/plugins/python.py @@ -68,9 +68,9 @@ class RedHatPython(Python, RedHatPlugin): ] for py_path in py_paths: - for root, _, files in os.walk(py_path): + for root, _, files in os.walk(self.path_join(py_path)): for file_ in files: - filepath = os.path.join(root, file_) + filepath = self.path_join(root, file_) if filepath.endswith('.py'): try: with open(filepath, 'rb') as f: diff --git a/sos/report/plugins/sar.py b/sos/report/plugins/sar.py index 669f5d7b..b60005b1 100644 --- a/sos/report/plugins/sar.py +++ b/sos/report/plugins/sar.py @@ -7,7 +7,6 @@ # See the LICENSE file in the source distribution for further information. from sos.report.plugins import Plugin, RedHatPlugin, DebianPlugin, UbuntuPlugin -import os import re @@ -24,7 +23,7 @@ class Sar(Plugin,): "", False)] def setup(self): - self.add_copy_spec(os.path.join(self.sa_path, '*'), + self.add_copy_spec(self.path_join(self.sa_path, '*'), sizelimit=0 if self.get_option("all_sar") else None, tailit=False) @@ -44,7 +43,7 @@ class Sar(Plugin,): # as option for sadc for fname in dir_list: if sa_regex.match(fname): - sa_data_path = os.path.join(self.sa_path, fname) + sa_data_path = self.path_join(self.sa_path, fname) sar_filename = 'sar' + fname[2:] if sar_filename not in dir_list: sar_cmd = 'sh -c "sar -A -f %s"' % sa_data_path diff --git a/sos/report/plugins/sos_extras.py b/sos/report/plugins/sos_extras.py index ffde4138..55bc4dc0 100644 --- a/sos/report/plugins/sos_extras.py +++ b/sos/report/plugins/sos_extras.py @@ -58,7 +58,7 @@ class SosExtras(Plugin, IndependentPlugin): for path, dirlist, filelist in os.walk(self.extras_dir): for f in filelist: - _file = os.path.join(path, f) + _file = self.path_join(path, f) self._log_warn("Collecting data from extras file %s" % _file) try: for line in open(_file).read().splitlines(): diff --git a/sos/report/plugins/ssh.py b/sos/report/plugins/ssh.py index 971cda8b..9ac9dec0 100644 --- a/sos/report/plugins/ssh.py +++ b/sos/report/plugins/ssh.py @@ -42,7 +41,7 @@ class Ssh(Plugin, IndependentPlugin): try: for sshcfg in sshcfgs: tag = sshcfg.split('/')[-1] - with open(sshcfg, 'r') as cfgfile: + with open(self.path_join(sshcfg), 'r') as cfgfile: for line in cfgfile: # skip empty lines and comments if len(line.split()) == 0 or line.startswith('#'): diff --git a/sos/report/plugins/unpackaged.py b/sos/report/plugins/unpackaged.py index 9205e53f..772b1d1f 100644 --- a/sos/report/plugins/unpackaged.py +++ b/sos/report/plugins/unpackaged.py @@ -40,7 +40,7 @@ class Unpackaged(Plugin, RedHatPlugin): for e in exclude: dirs[:] = [d for d in dirs if d not in e] for name in files: - path = os.path.join(root, name) + path = self.path_join(root, name) try: if stat.S_ISLNK(os.lstat(path).st_mode): path = Path(path).resolve() @@ -49,7 +49,7 @@ class Unpackaged(Plugin, RedHatPlugin): file_list.append(os.path.realpath(path)) for name in dirs: file_list.append(os.path.realpath( - os.path.join(root, name))) + self.path_join(root, name))) return file_list diff --git a/sos/report/plugins/watchdog.py b/sos/report/plugins/watchdog.py index 1bf3f4cb..bf2dc9cb 100644 --- a/sos/report/plugins/watchdog.py +++ b/sos/report/plugins/watchdog.py @@ -11,7 +11,6 @@ from sos.report.plugins import Plugin, RedHatPlugin from glob import glob -import os class Watchdog(Plugin, RedHatPlugin): @@ -56,8 +55,8 @@ class Watchdog(Plugin, RedHatPlugin): Collect configuration files, custom executables for test-binary and repair-binary, and stdout/stderr logs. """ - conf_file = self.get_option('conf_file') - log_dir = '/var/log/watchdog' + conf_file = self.path_join(self.get_option('conf_file')) + log_dir = self.path_join('/var/log/watchdog') # Get service configuration and sysconfig files self.add_copy_spec([ @@ -80,15 +79,15 @@ class Watchdog(Plugin, RedHatPlugin): self._log_warn("Could not read %s: %s" % (conf_file, ex)) if self.get_option('all_logs'): - log_files = glob(os.path.join(log_dir, '*')) + log_files = glob(self.path_join(log_dir, '*')) else: - log_files = (glob(os.path.join(log_dir, '*.stdout')) + - glob(os.path.join(log_dir, '*.stderr'))) + log_files = (glob(self.path_join(log_dir, '*.stdout')) + + glob(self.path_join(log_dir, '*.stderr'))) self.add_copy_spec(log_files) # Get output of "wdctl " for each /dev/watchdog* - for dev in glob('/dev/watchdog*'): + for dev in glob(self.path_join('/dev/watchdog*')): self.add_cmd_output("wdctl %s" % dev) # vim: set et ts=4 sw=4 : diff --git a/sos/report/plugins/yum.py b/sos/report/plugins/yum.py index 148464cb..e5256642 100644 --- a/sos/report/plugins/yum.py +++ b/sos/report/plugins/yum.py @@ -61,7 +61,7 @@ class Yum(Plugin, RedHatPlugin): if not p.endswith(".py"): continue plugins = plugins + " " if len(plugins) else "" - plugins = plugins + os.path.join(YUM_PLUGIN_PATH, p) + plugins = plugins + self.path_join(YUM_PLUGIN_PATH, p) if len(plugins): self.add_cmd_output("rpm -qf %s" % plugins, suggest_filename="plugin-packages") -- 2.31.1 From f4af5efdc79aefe1aa685c36d095925bae14dc4a Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 28 Sep 2021 13:00:17 -0400 Subject: [PATCH 1/4] [collect] Add --transport option and allow clusters to set transport type Adds a new `--transport` option for users to be able to specify the type of transport to use when connecting to nodes. The default value of `auto` will defer to the cluster profile to set the transport type, which will continue to default to use OpenSSH's ControlPersist feature. Clusters may override the new `set_transport_type()` method to change the default transport used. If `--transport` is anything besides `auto`, then the cluster profile will not be deferred to when choosing a transport for each remote node. Signed-off-by: Jake Hunsaker --- man/en/sos-collect.1 | 15 +++++++++++++++ sos/collector/__init__.py | 6 ++++++ sos/collector/clusters/__init__.py | 10 ++++++++++ sos/collector/exceptions.py | 13 ++++++++++++- sos/collector/sosnode.py | 16 +++++++++++++++- 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 index e930023e..8ad4fe5e 100644 --- a/man/en/sos-collect.1 +++ b/man/en/sos-collect.1 @@ -43,6 +43,7 @@ sos collect \- Collect sosreports from multiple (cluster) nodes [\-\-sos-cmd SOS_CMD] [\-t|\-\-threads THREADS] [\-\-timeout TIMEOUT] + [\-\-transport TRANSPORT] [\-\-tmp\-dir TMP_DIR] [\-v|\-\-verbose] [\-\-verify] @@ -350,6 +351,20 @@ Note that sosreports are collected in parallel, so you can approximate the total runtime of sos collect via timeout*(number of nodes/jobs). Default is 180 seconds. +.TP +\fB\-\-transport\fR TRANSPORT +Specify the type of remote transport to use to manage connections to remote nodes. + +\fBsos collect\fR uses locally installed binaries to connect to and interact with remote +nodes, instead of directly establishing those connections. By default, OpenSSH's ControlPersist +feature is preferred, however certain cluster types may have preferences of their own for how +remote sessions should be established. + +The types of transports supported are currently as follows: + + \fBauto\fR Allow the cluster type to determine the transport used + \fBcontrol_persist\fR Use OpenSSH's ControlPersist feature. This is the default behavior + .TP \fB\-\-tmp\-dir\fR TMP_DIR Specify a temporary directory to save sos archives to. By default one will be created in diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index da912655..fecfe6aa 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -98,6 +98,7 @@ class SoSCollector(SoSComponent): 'ssh_port': 22, 'ssh_user': 'root', 'timeout': 600, + 'transport': 'auto', 'verify': False, 'usernames': [], 'upload': False, @@ -378,6 +379,8 @@ class SoSCollector(SoSComponent): help='Specify an SSH user. Default root') collect_grp.add_argument('--timeout', type=int, required=False, help='Timeout for sosreport on each node.') + collect_grp.add_argument('--transport', default='auto', type=str, + help='Remote connection transport to use') collect_grp.add_argument("--upload", action="store_true", default=False, help="Upload archive to a policy-default " @@ -813,6 +813,8 @@ class SoSCollector(SoSComponent): self.collect_md.add_field('cluster_type', self.cluster_type) if self.cluster: self.master.cluster = self.cluster + if self.opts.transport == 'auto': + self.opts.transport = self.cluster.set_transport_type() self.cluster.setup() if self.cluster.cluster_ssh_key: if not self.opts.ssh_key: @@ -1041,6 +1046,7 @@ class SoSCollector(SoSComponent): else: client.disconnect() except Exception: + # all exception logging is handled within SoSNode pass def intro(self): diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py index 64ac2a44..cf1e7a0b 100644 --- a/sos/collector/clusters/__init__.py +++ b/sos/collector/clusters/__init__.py @@ -149,6 +149,16 @@ class Cluster(): """ pass + def set_transport_type(self): + """The default connection type used by sos collect is to leverage the + local system's SSH installation using ControlPersist, however certain + cluster types may want to use something else. + + Override this in a specific cluster profile to set the ``transport`` + option according to what type of transport should be used. + """ + return 'control_persist' + def set_master_options(self, node): """If there is a need to set specific options in the sos command being run on the cluster's master nodes, override this method in the cluster diff --git a/sos/collector/exceptions.py b/sos/collector/exceptions.py index 1e44768b..2bb07e7b 100644 --- a/sos/collector/exceptions.py +++ b/sos/collector/exceptions.py @@ -94,6 +94,16 @@ class UnsupportedHostException(Exception): super(UnsupportedHostException, self).__init__(message) +class InvalidTransportException(Exception): + """Raised when a transport is requested but it does not exist or is + not supported locally""" + + def __init__(self, transport=None): + message = ("Connection failed: unknown or unsupported transport %s" + % transport if transport else '') + super(InvalidTransportException, self).__init__(message) + + __all__ = [ 'AuthPermissionDeniedException', 'CommandTimeoutException', @@ -104,5 +114,6 @@ __all__ = [ 'InvalidPasswordException', 'PasswordRequestException', 'TimeoutPasswordAuthException', - 'UnsupportedHostException' + 'UnsupportedHostException', + 'InvalidTransportException' ] diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index f79bd5ff..5c5c7201 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -22,7 +22,13 @@ from sos.collector.transports.control_persist import SSHControlPersist from sos.collector.transports.local import LocalTransport from sos.collector.exceptions import (CommandTimeoutException, ConnectionException, - UnsupportedHostException) + UnsupportedHostException, + InvalidTransportException) + +TRANSPORTS = { + 'local': LocalTransport, + 'control_persist': SSHControlPersist, +} class SosNode(): @@ -107,6 +113,14 @@ class SosNode(): if self.address in ['localhost', '127.0.0.1']: self.local = True return LocalTransport(self.address, commons) + elif self.opts.transport in TRANSPORTS.keys(): + return TRANSPORTS[self.opts.transport](self.address, commons) + elif self.opts.transport != 'auto': + self.log_error( + "Connection failed: unknown or unsupported transport %s" + % self.opts.transport + ) + raise InvalidTransportException(self.opts.transport) return SSHControlPersist(self.address, commons) def _fmt_msg(self, msg): -- 2.31.1 From dbc49345384404600f45d68b8d3c6541b2a26480 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 30 Sep 2021 10:38:18 -0400 Subject: [PATCH 2/4] [transports] Add 'oc' as a transport option for remote nodes This commit adds a new transport for `sos collect` by leveraging a locally available `oc` binary that has been properly configured for access to an OCP cluster. This transport will allow users to use `sos collect` to collect reports from an OCP cluster without directly connecting to any of the nodes involved. We do this by using the `oc` binary to first launch a pod on target node(s) and then exec our discovery commands and eventual `sos report` command to that pod. This in turn is dependent on a function API for the `oc` binary to communicate with. In the event that `oc` is not __locally__ available or is not properly configured, we will fallback to the current default of using SSH ControlPersist to directly connect to the nodes. Otherwise, the OCP cluster will attempt to automatically use this new transport. --- man/en/sos-collect.1 | 1 + sos/collector/clusters/ocp.py | 14 ++ sos/collector/sosnode.py | 8 +- sos/collector/transports/__init__.py | 20 ++- sos/collector/transports/oc.py | 220 +++++++++++++++++++++++++++ 5 files changed, 257 insertions(+), 6 deletions(-) create mode 100644 sos/collector/transports/oc.py diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 index 8ad4fe5e..a1f6c10e 100644 --- a/man/en/sos-collect.1 +++ b/man/en/sos-collect.1 @@ -364,6 +364,7 @@ The types of transports supported are currently as follows: \fBauto\fR Allow the cluster type to determine the transport used \fBcontrol_persist\fR Use OpenSSH's ControlPersist feature. This is the default behavior + \fBoc\fR Use a \fBlocally\fR configured \fBoc\fR binary to deploy collection pods on OCP nodes .TP \fB\-\-tmp\-dir\fR TMP_DIR diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py index ad97587f..a9357dbf 100644 --- a/sos/collector/clusters/ocp.py +++ b/sos/collector/clusters/ocp.py @@ -12,6 +12,7 @@ import os from pipes import quote from sos.collector.clusters import Cluster +from sos.utilities import is_executable class ocp(Cluster): @@ -83,6 +84,19 @@ class ocp(Cluster): nodes[_node[0]][column] = _node[idx[column]] return nodes + def set_transport_type(self): + if is_executable('oc'): + return 'oc' + self.log_info("Local installation of 'oc' not found or is not " + "correctly configured. Will use ControlPersist") + self.ui_log.warn( + "Preferred transport 'oc' not available, will fallback to SSH." + ) + if not self.opts.batch: + input("Press ENTER to continue connecting with SSH, or Ctrl+C to" + "abort.") + return 'control_persist' + def get_nodes(self): nodes = [] self.node_dict = {} diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 5c5c7201..8a9dbd7a 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -20,6 +20,7 @@ from sos.policies import load from sos.policies.init_systems import InitSystem from sos.collector.transports.control_persist import SSHControlPersist from sos.collector.transports.local import LocalTransport +from sos.collector.transports.oc import OCTransport from sos.collector.exceptions import (CommandTimeoutException, ConnectionException, UnsupportedHostException, @@ -28,6 +29,7 @@ from sos.collector.exceptions import (CommandTimeoutException, TRANSPORTS = { 'local': LocalTransport, 'control_persist': SSHControlPersist, + 'oc': OCTransport } @@ -421,13 +423,11 @@ class SosNode(): if 'atomic' in cmd: get_pty = True - if get_pty: - cmd = "/bin/bash -c %s" % quote(cmd) - if env: _cmd_env = self.env_vars env = _cmd_env.update(env) - return self._transport.run_command(cmd, timeout, need_root, env) + return self._transport.run_command(cmd, timeout, need_root, env, + get_pty) def sosreport(self): """Run an sos report on the node, then collect it""" diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py index 5be7dc6d..7bffee62 100644 --- a/sos/collector/transports/__init__.py +++ b/sos/collector/transports/__init__.py @@ -144,7 +144,8 @@ class RemoteTransport(): raise NotImplementedError("Transport %s does not define disconnect" % self.name) - def run_command(self, cmd, timeout=180, need_root=False, env=None): + def run_command(self, cmd, timeout=180, need_root=False, env=None, + get_pty=False): """Run a command on the node, returning its output and exit code. This should return the exit code of the command being executed, not the exit code of whatever mechanism the transport uses to execute that @@ -165,10 +166,15 @@ class RemoteTransport(): :param env: Specify env vars to be passed to the ``cmd`` :type env: ``dict`` + :param get_pty: Does ``cmd`` require execution with a pty? + :type get_pty: ``bool`` + :returns: Output of ``cmd`` and the exit code :rtype: ``dict`` with keys ``output`` and ``status`` """ self.log_debug('Running command %s' % cmd) + if get_pty: + cmd = "/bin/bash -c %s" % quote(cmd) # currently we only use/support the use of pexpect for handling the # execution of these commands, as opposed to directly invoking # subprocess.Popen() in conjunction with tools like sshpass. @@ -212,6 +218,13 @@ class RemoteTransport(): :type env: ``dict`` """ cmd = self._format_cmd_for_exec(cmd) + + # if for any reason env is empty, set it to None as otherwise + # pexpect interprets this to mean "run this command with no env vars of + # any kind" + if not env: + env = None + result = pexpect.spawn(cmd, encoding='utf-8', env=env) _expects = [pexpect.EOF, pexpect.TIMEOUT] @@ -268,6 +281,9 @@ class RemoteTransport(): _out = self.run_command('hostname') if _out['status'] == 0: self._hostname = _out['output'].strip() + + if not self._hostname: + self._hostname = self.address self.log_info("Hostname set to %s" % self._hostname) return self._hostname @@ -302,7 +318,7 @@ class RemoteTransport(): return self._read_file(fname) def _read_file(self, fname): - res = self.run_command("cat %s" % fname, timeout=5) + res = self.run_command("cat %s" % fname, timeout=10) if res['status'] == 0: return res['output'] else: diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py new file mode 100644 index 00000000..649037b9 --- /dev/null +++ b/sos/collector/transports/oc.py @@ -0,0 +1,220 @@ +# Copyright Red Hat 2021, Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +import json +import tempfile +import os + +from sos.collector.transports import RemoteTransport +from sos.utilities import (is_executable, sos_get_command_output, + SoSTimeoutError) + + +class OCTransport(RemoteTransport): + """This transport leverages the execution of commands via a locally + available and configured ``oc`` binary for OCPv4 environments. + + OCPv4 clusters generally discourage the use of SSH, so this transport may + be used to remove our use of SSH in favor of the environment provided + method of connecting to nodes and executing commands via debug pods. + + Note that this approach will generate multiple debug pods over the course + of our execution + """ + + name = 'oc' + project = 'sos-collect-tmp' + + def run_oc(self, cmd, **kwargs): + """Format and run a command with `oc` in the project defined for our + execution + """ + return sos_get_command_output( + "oc -n sos-collect-tmp %s" % cmd, + **kwargs + ) + + @property + def connected(self): + up = self.run_oc( + "wait --timeout=0s --for=condition=ready pod/%s" % self.pod_name + ) + return up['status'] == 0 + + def get_node_pod_config(self): + """Based on our template for the debug container, add the node-specific + items so that we can deploy one of these on each node we're collecting + from + """ + return { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "%s-sos-collector" % self.address.split('.')[0], + "namespace": "sos-collect-tmp" + }, + "priorityClassName": "system-cluster-critical", + "spec": { + "volumes": [ + { + "name": "host", + "hostPath": { + "path": "/", + "type": "Directory" + } + }, + { + "name": "run", + "hostPath": { + "path": "/run", + "type": "Directory" + } + }, + { + "name": "varlog", + "hostPath": { + "path": "/var/log", + "type": "Directory" + } + }, + { + "name": "machine-id", + "hostPath": { + "path": "/etc/machine-id", + "type": "File" + } + } + ], + "containers": [ + { + "name": "sos-collector-tmp", + "image": "registry.redhat.io/rhel8/support-tools", + "command": [ + "/bin/bash" + ], + "env": [ + { + "name": "HOST", + "value": "/host" + } + ], + "resources": {}, + "volumeMounts": [ + { + "name": "host", + "mountPath": "/host" + }, + { + "name": "run", + "mountPath": "/run" + }, + { + "name": "varlog", + "mountPath": "/var/log" + }, + { + "name": "machine-id", + "mountPath": "/etc/machine-id" + } + ], + "securityContext": { + "privileged": True, + "runAsUser": 0 + }, + "stdin": True, + "stdinOnce": True, + "tty": True + } + ], + "restartPolicy": "Never", + "nodeName": self.address, + "hostNetwork": True, + "hostPID": True, + "hostIPC": True + } + } + + def _connect(self, password): + # the oc binary must be _locally_ available for this to work + if not is_executable('oc'): + return False + + # deploy the debug container we'll exec into + podconf = self.get_node_pod_config() + self.pod_name = podconf['metadata']['name'] + fd, self.pod_tmp_conf = tempfile.mkstemp(dir=self.tmpdir) + with open(fd, 'w') as cfile: + json.dump(podconf, cfile) + self.log_debug("Starting sos collector container '%s'" % self.pod_name) + # this specifically does not need to run with a project definition + out = sos_get_command_output( + "oc create -f %s" % self.pod_tmp_conf + ) + if (out['status'] != 0 or "pod/%s created" % self.pod_name not in + out['output']): + self.log_error("Unable to deploy sos collect pod") + self.log_debug("Debug pod deployment failed: %s" % out['output']) + return False + self.log_debug("Pod '%s' successfully deployed, waiting for pod to " + "enter ready state" % self.pod_name) + + # wait for the pod to report as running + try: + up = self.run_oc("wait --for=condition=Ready pod/%s --timeout=30s" + % self.pod_name, + # timeout is for local safety, not oc + timeout=40) + if not up['status'] == 0: + self.log_error("Pod not available after 30 seconds") + return False + except SoSTimeoutError: + self.log_error("Timeout while polling for pod readiness") + return False + except Exception as err: + self.log_error("Error while waiting for pod to be ready: %s" + % err) + return False + + return True + + def _format_cmd_for_exec(self, cmd): + if cmd.startswith('oc'): + return ("oc -n %s exec --request-timeout=0 %s -- chroot /host %s" + % (self.project, self.pod_name, cmd)) + return super(OCTransport, self)._format_cmd_for_exec(cmd) + + def run_command(self, cmd, timeout=180, need_root=False, env=None, + get_pty=False): + # debug pod setup is slow, extend all timeouts to account for this + if timeout: + timeout += 10 + + # since we always execute within a bash shell, force disable get_pty + # to avoid double-quoting + return super(OCTransport, self).run_command(cmd, timeout, need_root, + env, False) + + def _disconnect(self): + os.unlink(self.pod_tmp_conf) + removed = self.run_oc("delete pod %s" % self.pod_name) + if "deleted" not in removed['output']: + self.log_debug("Calling delete on pod '%s' failed: %s" + % (self.pod_name, removed)) + return False + return True + + @property + def remote_exec(self): + return ("oc -n %s exec --request-timeout=0 %s -- /bin/bash -c" + % (self.project, self.pod_name)) + + def _retrieve_file(self, fname, dest): + cmd = self.run_oc("cp %s:%s %s" % (self.pod_name, fname, dest)) + return cmd['status'] == 0 -- 2.31.1 From 460494c4296db1a7529b44fe8f6597544c917c02 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Mon, 11 Oct 2021 11:50:44 -0400 Subject: [PATCH 3/4] [ocp] Create temporary project and restrict default node list to masters Adds explicit setup of a new project to use in the `ocp` cluster and adds better handling of cluster setup generally, which the `ocp` cluster is the first to make use of. Included in this change is a correction to `Cluster.exec_primary_cmd()`'s use of `get_pty` to now be determined on if the primary node is the local node or not. Additionally, based on feedback from the OCP engineering team, by default restrict node lists to masters. Signed-off-by: Jake Hunsaker --- sos/collector/__init__.py | 5 ++++ sos/collector/clusters/__init__.py | 13 +++++++- sos/collector/clusters/ocp.py | 48 ++++++++++++++++++++++++++++-- sos/collector/transports/oc.py | 4 +-- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index fecfe6aa..a76f8a79 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -850,6 +850,7 @@ class SoSCollector(SoSComponent): "CTRL-C to quit\n") self.ui_log.info("") except KeyboardInterrupt: + self.cluster.cleanup() self.exit("Exiting on user cancel", 130) def configure_sos_cmd(self): @@ -1098,6 +1099,7 @@ this utility or remote systems that it connects to. self.archive.makedirs('sos_logs', 0o755) self.collect() + self.cluster.cleanup() self.cleanup() def collect(self): @@ -1156,9 +1158,11 @@ this utility or remote systems that it connects to. pool.shutdown(wait=True) except KeyboardInterrupt: self.log_error('Exiting on user cancel\n') + self.cluster.cleanup() os._exit(130) except Exception as err: self.log_error('Could not connect to nodes: %s' % err) + self.cluster.cleanup() os._exit(1) if hasattr(self.cluster, 'run_extra_cmd'): @@ -1199,6 +1199,7 @@ this utility or remote systems that it c arc_name = self.create_cluster_archive() else: msg = 'No sosreports were collected, nothing to archive...' + self.cluster.cleanup() self.exit(msg, 1) if self.opts.upload and self.policy.get_upload_url(): diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py index cf1e7a0b..2a4665a1 100644 --- a/sos/collector/clusters/__init__.py +++ b/sos/collector/clusters/__init__.py @@ -192,7 +192,8 @@ class Cluster(): :returns: The output and status of `cmd` :rtype: ``dict`` """ - res = self.master.run_command(cmd, get_pty=True, need_root=need_root) + pty = self.master.local is False + res = self.master.run_command(cmd, get_pty=pty, need_root=need_root) if res['output']: res['output'] = res['output'].replace('Password:', '') return res @@ -223,6 +224,16 @@ class Cluster(): return True return False + def cleanup(self): + """ + This may be overridden by clusters + + Perform any necessary cleanup steps required by the cluster profile. + This helps ensure that sos does make lasting changes to the environment + in which we are running + """ + pass + def get_nodes(self): """ This MUST be overridden by a cluster profile subclassing this class diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py index a9357dbf..92da4e6e 100644 --- a/sos/collector/clusters/ocp.py +++ b/sos/collector/clusters/ocp.py @@ -23,10 +23,12 @@ class ocp(Cluster): api_collect_enabled = False token = None + project = 'sos-collect-tmp' + oc_cluster_admin = None option_list = [ ('label', '', 'Colon delimited list of labels to select nodes with'), - ('role', '', 'Colon delimited list of roles to select nodes with'), + ('role', 'master', 'Colon delimited list of roles to filter on'), ('kubeconfig', '', 'Path to the kubeconfig file'), ('token', '', 'Service account token to use for oc authorization') ] @@ -58,6 +58,42 @@ class ocp(Cluster): _who = self.fmt_oc_cmd('whoami') return self.exec_master_cmd(_who)['status'] == 0 + def setup(self): + """Create the project that we will be executing in for any nodes' + collection via a container image + """ + if not self.set_transport_type() == 'oc': + return + + out = self.exec_master_cmd(self.fmt_oc_cmd("auth can-i '*' '*'")) + self.oc_cluster_admin = out['status'] == 0 + if not self.oc_cluster_admin: + self.log_debug("Check for cluster-admin privileges returned false," + " cannot create project in OCP cluster") + raise Exception("Insufficient permissions to create temporary " + "collection project.\nAborting...") + + self.log_info("Creating new temporary project '%s'" % self.project) + ret = self.exec_master_cmd("oc new-project %s" % self.project) + if ret['status'] == 0: + return True + + self.log_debug("Failed to create project: %s" % ret['output']) + raise Exception("Failed to create temporary project for collection. " + "\nAborting...") + + def cleanup(self): + """Remove the project we created to execute within + """ + if self.project: + ret = self.exec_master_cmd("oc delete project %s" % self.project) + if not ret['status'] == 0: + self.log_error("Error deleting temporary project: %s" + % ret['output']) + # don't leave the config on a non-existing project + self.exec_master_cmd("oc project default") + return True + def _build_dict(self, nodelist): """From the output of get_nodes(), construct an easier-to-reference dict of nodes that will be used in determining labels, master status, @@ -85,10 +123,10 @@ class ocp(Cluster): return nodes def set_transport_type(self): - if is_executable('oc'): + if is_executable('oc') or self.opts.transport == 'oc': return 'oc' self.log_info("Local installation of 'oc' not found or is not " - "correctly configured. Will use ControlPersist") + "correctly configured. Will use ControlPersist.") self.ui_log.warn( "Preferred transport 'oc' not available, will fallback to SSH." ) @@ -106,6 +144,10 @@ class ocp(Cluster): cmd += " -l %s" % quote(labels) res = self.exec_master_cmd(self.fmt_oc_cmd(cmd)) if res['status'] == 0: + if self.get_option('role') == 'master': + self.log_warn("NOTE: By default, only master nodes are listed." + "\nTo collect from all/more nodes, override the " + "role option with '-c ocp.role=role1:role2'") roles = [r for r in self.get_option('role').split(':')] self.node_dict = self._build_dict(res['output'].splitlines()) for node in self.node_dict: diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py index 649037b9..de044ccb 100644 --- a/sos/collector/transports/oc.py +++ b/sos/collector/transports/oc.py @@ -37,7 +37,7 @@ class OCTransport(RemoteTransport): execution """ return sos_get_command_output( - "oc -n sos-collect-tmp %s" % cmd, + "oc -n %s %s" % (self.project, cmd), **kwargs ) @@ -58,7 +58,7 @@ class OCTransport(RemoteTransport): "apiVersion": "v1", "metadata": { "name": "%s-sos-collector" % self.address.split('.')[0], - "namespace": "sos-collect-tmp" + "namespace": self.project }, "priorityClassName": "system-cluster-critical", "spec": { -- 2.31.1 From 1bc0e9fe32491e764e622368bfe216f97bf32620 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Mon, 4 Oct 2021 15:12:04 -0400 Subject: [PATCH 4/4] [sosnode] Fix typo and small logic break Fixes a typo in setting the non-primary node options from the ocp profile against the sosnode object. Second, fixes a small break in checksum handling for the manifest discovered during `oc` transport testing for edge cases. Signed-off-by: Jake Hunsaker --- sos/collector/clusters/ocp.py | 4 ++-- sos/collector/sosnode.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py index 92da4e6e..22a7289a 100644 --- a/sos/collector/clusters/ocp.py +++ b/sos/collector/clusters/ocp.py @@ -183,7 +183,7 @@ class ocp(Cluster): if self.api_collect_enabled: # a primary has already been enabled for API collection, disable # it among others - node.plugin_options.append('openshift.no-oc=on') + node.plugopts.append('openshift.no-oc=on') else: _oc_cmd = 'oc' if node.host.containerized: @@ -223,6 +223,6 @@ class ocp(Cluster): def set_node_options(self, node): # don't attempt OC API collections on non-primary nodes - node.plugin_options.append('openshift.no-oc=on') + node.plugopts.append('openshift.no-oc=on') # vim: set et ts=4 sw=4 : diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 8a9dbd7a..ab7f23cc 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -714,7 +714,7 @@ class SosNode(): elif line.startswith("The checksum is: "): checksum = line.split()[3] - if checksum is not None: + if checksum: self.manifest.add_field('checksum', checksum) if len(checksum) == 32: self.manifest.add_field('checksum_type', 'md5') @@ -722,6 +722,8 @@ class SosNode(): self.manifest.add_field('checksum_type', 'sha256') else: self.manifest.add_field('checksum_type', 'unknown') + else: + self.manifest.add_field('checksum_type', 'unknown') else: err = self.determine_sos_error(res['status'], res['output']) self.log_debug("Error running sos report. rc = %s msg = %s" -- 2.31.1 From 38a0533de3dd2613eefcc4865a2916e225e3ceed Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Tue, 9 Nov 2021 19:34:25 +0100 Subject: [PATCH] [presets] Optimise OCP preset for hundreds of network namespaces Sos report on OCP having hundreds of namespaces timeouts in networking plugin, as it collects >10 commands for each namespace. Let use a balanced approach in: - increasing network.timeout - limiting namespaces to traverse - disabling ethtool per namespace to ensure sos report successfully finish in a reasonable time, collecting rasonable amount of data. Resolves: #2754 Signed-off-by: Pavel Moravec --- sos/presets/redhat/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sos/presets/redhat/__init__.py b/sos/presets/redhat/__init__.py index e6d63611..865c9b6b 100644 --- a/sos/presets/redhat/__init__.py +++ b/sos/presets/redhat/__init__.py @@ -29,11 +29,15 @@ RHEL_DESC = RHEL_RELEASE_STR RHOSP = "rhosp" RHOSP_DESC = "Red Hat OpenStack Platform" +RHOSP_OPTS = SoSOptions(plugopts=[ + 'process.lsof=off', + 'networking.ethtool_namespaces=False', + 'networking.namespaces=200']) RHOCP = "ocp" RHOCP_DESC = "OpenShift Container Platform by Red Hat" -RHOSP_OPTS = SoSOptions(plugopts=[ - 'process.lsof=off', +RHOCP_OPTS = SoSOptions(all_logs=True, verify=True, plugopts=[ + 'networking.timeout=600', 'networking.ethtool_namespaces=False', 'networking.namespaces=200']) @@ -62,7 +66,7 @@ RHEL_PRESETS = { RHEL: PresetDefaults(name=RHEL, desc=RHEL_DESC), RHOSP: PresetDefaults(name=RHOSP, desc=RHOSP_DESC, opts=RHOSP_OPTS), RHOCP: PresetDefaults(name=RHOCP, desc=RHOCP_DESC, note=NOTE_SIZE_TIME, - opts=_opts_all_logs_verify), + opts=RHOCP_OPTS), RH_CFME: PresetDefaults(name=RH_CFME, desc=RH_CFME_DESC, note=NOTE_TIME, opts=_opts_verify), RH_SATELLITE: PresetDefaults(name=RH_SATELLITE, desc=RH_SATELLITE_DESC, -- 2.31.1 From 97b93c7f8755d04bdeb4f93759c20dcb787f2046 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 2 Nov 2021 11:34:13 -0400 Subject: [PATCH] [Plugin] Rework get_container_logs to be more useful `get_container_logs()` is now `add_container_logs()` to align it better with our more common `add_*` methods for plugin collections. Additionally, it has been extended to accept either a single string or a list of strings like the other methods, and plugin authors may now specify either specific container names or regexes. Signed-off-by: Jake Hunsaker --- sos/report/plugins/__init__.py | 22 +++++++++++++++++----- sos/report/plugins/rabbitmq.py | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py index 08eee118..4b0e4fd5 100644 --- a/sos/report/plugins/__init__.py +++ b/sos/report/plugins/__init__.py @@ -2366,20 +2366,32 @@ class Plugin(): return _runtime.volumes return [] - def get_container_logs(self, container, **kwargs): - """Helper to get the ``logs`` output for a given container + def add_container_logs(self, containers, get_all=False, **kwargs): + """Helper to get the ``logs`` output for a given container or list + of container names and/or regexes. Supports passthru of add_cmd_output() options - :param container: The name of the container to retrieve logs from - :type container: ``str`` + :param containers: The name of the container to retrieve logs from, + may be a single name or a regex + :type containers: ``str`` or ``list` of strs + + :param get_all: Should non-running containers also be queried? + Default: False + :type get_all: ``bool`` :param kwargs: Any kwargs supported by ``add_cmd_output()`` are supported here """ _runtime = self._get_container_runtime() if _runtime is not None: - self.add_cmd_output(_runtime.get_logs_command(container), **kwargs) + if isinstance(containers, str): + containers = [containers] + for container in containers: + _cons = self.get_all_containers_by_regex(container, get_all) + for _con in _cons: + cmd = _runtime.get_logs_command(_con[1]) + self.add_cmd_output(cmd, **kwargs) def fmt_container_cmd(self, container, cmd, quotecmd=False): """Format a command to be executed by the loaded ``ContainerRuntime`` diff --git a/sos/report/plugins/rabbitmq.py b/sos/report/plugins/rabbitmq.py index e84b52da..1bfa741f 100644 --- a/sos/report/plugins/rabbitmq.py +++ b/sos/report/plugins/rabbitmq.py @@ -32,7 +32,7 @@ class RabbitMQ(Plugin, IndependentPlugin): if in_container: for container in container_names: - self.get_container_logs(container) + self.add_container_logs(container) self.add_cmd_output( self.fmt_container_cmd(container, 'rabbitmqctl report'), foreground=True -- 2.31.1 From 8bf602108f75db10e449eff5e2266c6466504086 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Thu, 2 Dec 2021 16:30:44 +0100 Subject: [PATCH] [clusters:ocp] fix get_nodes function Signed-off-by: Nadia Pinaeva --- sos/collector/clusters/ocp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py index 22a7289a..2ce4e977 100644 --- a/sos/collector/clusters/ocp.py +++ b/sos/collector/clusters/ocp.py @@ -150,13 +150,13 @@ class ocp(Cluster): "role option with '-c ocp.role=role1:role2'") roles = [r for r in self.get_option('role').split(':')] self.node_dict = self._build_dict(res['output'].splitlines()) - for node in self.node_dict: + for node_name, node in self.node_dict.items(): if roles: for role in roles: - if role in node: - nodes.append(node) + if role == node['roles']: + nodes.append(node_name) else: - nodes.append(node) + nodes.append(node_name) else: msg = "'oc' command failed" if 'Missing or incomplete' in res['output']: -- 2.31.1 From 5d80ac6dc67e12ef00903436c088a1694f9a7dd7 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 1 Dec 2021 14:13:16 -0500 Subject: [PATCH] [collect] Catch command not found exceptions from pexpect When running a command that does not exist on the system, catch the resulting pexpect exception and return the proper status code rather than allowing an untrapped exception. Closes: #2768 Signed-off-by: Jake Hunsaker --- sos/collector/transports/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py index 7bffee62..33f2f66d 100644 --- a/sos/collector/transports/__init__.py +++ b/sos/collector/transports/__init__.py @@ -225,7 +225,11 @@ class RemoteTransport(): if not env: env = None - result = pexpect.spawn(cmd, encoding='utf-8', env=env) + try: + result = pexpect.spawn(cmd, encoding='utf-8', env=env) + except pexpect.exceptions.ExceptionPexpect as err: + self.log_debug(err.value) + return {'status': 127, 'output': ''} _expects = [pexpect.EOF, pexpect.TIMEOUT] if need_root and self.opts.ssh_user != 'root': -- 2.31.1 From decb5d26c165e664fa879a669f2d80165181f0e1 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 2 Dec 2021 14:02:17 -0500 Subject: [PATCH] [report,collect] Add option to control default container runtime Adds a new `--container-runtime` option that allows users to control what default container runtime is used by plugins for container based collections, effectively overriding policy defaults. If no runtimes are active, this option is effectively ignored. If however runtimes are active, but the requested one is not, raise an exception to abort collection with an appropriate message to the user. Signed-off-by: Jake Hunsaker --- man/en/sos-collect.1 | 6 ++++++ man/en/sos-report.1 | 19 +++++++++++++++++++ sos/collector/__init__.py | 4 ++++ sos/collector/sosnode.py | 6 ++++++ sos/report/__init__.py | 36 ++++++++++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+) diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 index a1f6c10e..9b0a5d7b 100644 --- a/man/en/sos-collect.1 +++ b/man/en/sos-collect.1 @@ -11,6 +11,7 @@ sos collect \- Collect sosreports from multiple (cluster) nodes [\-\-chroot CHROOT] [\-\-case\-id CASE_ID] [\-\-cluster\-type CLUSTER_TYPE] + [\-\-container\-runtime RUNTIME] [\-e ENABLE_PLUGINS] [--encrypt-key KEY]\fR [--encrypt-pass PASS]\fR @@ -113,6 +114,11 @@ Example: \fBsos collect --cluster-type=kubernetes\fR will force the kubernetes p to be run, and thus set sosreport options and attempt to determine a list of nodes using that profile. .TP +\fB\-\-container\-runtime\fR RUNTIME +\fB sos report\fR option. Using this with \fBcollect\fR will pass this option thru +to nodes with sos version 4.3 or later. This option controls the default container +runtime plugins will use for collections. See \fBman sos-report\fR. +.TP \fB\-e\fR ENABLE_PLUGINS, \fB\-\-enable\-plugins\fR ENABLE_PLUGINS Sosreport option. Use this to enable a plugin that would otherwise not be run. diff --git a/man/en/sos-report.1 b/man/en/sos-report.1 index e8efc8f8..464a77e5 100644 --- a/man/en/sos-report.1 +++ b/man/en/sos-report.1 @@ -19,6 +19,7 @@ sos report \- Collect and package diagnostic and support data [--plugin-timeout TIMEOUT]\fR [--cmd-timeout TIMEOUT]\fR [--namespaces NAMESPACES]\fR + [--container-runtime RUNTIME]\fR [-s|--sysroot SYSROOT]\fR [-c|--chroot {auto|always|never}\fR [--tmp-dir directory]\fR @@ -299,6 +300,24 @@ Use '0' (default) for no limit - all namespaces will be used for collections. Note that specific plugins may provide a similar `namespaces` plugin option. If the plugin option is used, it will override this option. +.TP +.B \--container-runtime RUNTIME +Force the use of the specified RUNTIME as the default runtime that plugins will +use to collect data from and about containers and container images. By default, +the setting of \fBauto\fR results in the local policy determining what runtime +will be the default runtime (in configurations where multiple runtimes are installed +and active). + +If no container runtimes are active, this option is ignored. If there are runtimes +active, but not one with a name matching RUNTIME, sos will abort. + +Setting this to \fBnone\fR, \fBoff\fR, or \fBdisabled\fR will cause plugins to +\fBNOT\fR leverage any active runtimes for collections. Note that if disabled, plugins +specifically for runtimes (e.g. the podman or docker plugins) will still collect +general data about the runtime, but will not inspect existing containers or images. + +Default: 'auto' (policy determined) +.TP .B \--case-id NUMBER Specify a case identifier to associate with the archive. Identifiers may include alphanumeric characters, commas and periods ('.'). diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index 42a7731d..3ad703d3 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -55,6 +55,7 @@ class SoSCollector(SoSComponent): 'clean': False, 'cluster_options': [], 'cluster_type': None, + 'container_runtime': 'auto', 'domains': [], 'enable_plugins': [], 'encrypt_key': '', @@ -268,6 +269,9 @@ class SoSCollector(SoSComponent): sos_grp.add_argument('--chroot', default='', choices=['auto', 'always', 'never'], help="chroot executed commands to SYSROOT") + sos_grp.add_argument("--container-runtime", default="auto", + help="Default container runtime to use for " + "collections. 'auto' for policy control.") sos_grp.add_argument('-e', '--enable-plugins', action="extend", help='Enable specific plugins for sosreport') sos_grp.add_argument('-k', '--plugin-option', '--plugopts', diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index ab7f23cc..f5957e17 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -586,6 +586,12 @@ class SosNode(): sos_opts.append('--cmd-timeout=%s' % quote(str(self.opts.cmd_timeout))) + if self.check_sos_version('4.3'): + if self.opts.container_runtime != 'auto': + sos_opts.append( + "--container-runtime=%s" % self.opts.container_runtime + ) + self.update_cmd_from_cluster() sos_cmd = sos_cmd.replace( diff --git a/sos/report/__init__.py b/sos/report/__init__.py index a6c72778..0daad82f 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -82,6 +82,7 @@ class SoSReport(SoSComponent): 'case_id': '', 'chroot': 'auto', 'clean': False, + 'container_runtime': 'auto', 'keep_binary_files': False, 'desc': '', 'domains': [], @@ -187,6 +188,7 @@ class SoSReport(SoSComponent): self.tempfile_util.clean() self._exit(1) + self._check_container_runtime() self._get_hardware_devices() self._get_namespaces() @@ -218,6 +220,9 @@ class SoSReport(SoSComponent): dest="chroot", default='auto', help="chroot executed commands to SYSROOT " "[auto, always, never] (default=auto)") + report_grp.add_argument("--container-runtime", default="auto", + help="Default container runtime to use for " + "collections. 'auto' for policy control.") report_grp.add_argument("--desc", "--description", type=str, action="store", default="", help="Description for a new preset",) @@ -373,6 +378,37 @@ class SoSReport(SoSComponent): } # TODO: enumerate network devices, preferably with devtype info + def _check_container_runtime(self): + """Check the loaded container runtimes, and the policy default runtime + (if set), against any requested --container-runtime value. This can be + useful for systems that have multiple runtimes, such as RHCOS, but do + not have a clearly defined 'default' (or one that is determined based + entirely on configuration). + """ + if self.opts.container_runtime != 'auto': + crun = self.opts.container_runtime.lower() + if crun in ['none', 'off', 'diabled']: + self.policy.runtimes = {} + self.soslog.info( + "Disabled all container runtimes per user option." + ) + elif not self.policy.runtimes: + msg = ("WARNING: No container runtimes are active, ignoring " + "option to set default runtime to '%s'\n" % crun) + self.soslog.warn(msg) + elif crun not in self.policy.runtimes.keys(): + valid = ', '.join(p for p in self.policy.runtimes.keys() + if p != 'default') + raise Exception("Cannot use container runtime '%s': no such " + "runtime detected. Available runtimes: %s" + % (crun, valid)) + else: + self.policy.runtimes['default'] = self.policy.runtimes[crun] + self.soslog.info( + "Set default container runtime to '%s'" + % self.policy.runtimes['default'].name + ) + def get_fibre_devs(self): """Enumerate a list of fibrechannel devices on this system so that plugins can iterate over them -- 2.31.1 From 9d4b5af39d76ac99afa40d004fe9888633218356 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 3 Dec 2021 13:37:09 -0500 Subject: [PATCH 1/2] [Plugin] Add container parameter for add_cmd_output() Adds a new `container` parameter for `Plugin.add_cmd_output()`, which if set will format all commands passed to that call for execution in the specified container. `Plugin.fmt_container_cmd()` is called for this purpose, and has been modified so that if the given container does not exist, an empty string is returned instead, thus preventing execution on the host. Signed-off-by: Jake Hunsaker --- sos/report/plugins/__init__.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py index e180ae17..3ff7c191 100644 --- a/sos/report/plugins/__init__.py +++ b/sos/report/plugins/__init__.py @@ -1707,7 +1707,7 @@ class Plugin(): chroot=True, runat=None, env=None, binary=False, sizelimit=None, pred=None, subdir=None, changes=False, foreground=False, tags=[], - priority=10, cmd_as_tag=False): + priority=10, cmd_as_tag=False, container=None): """Run a program or a list of programs and collect the output Output will be limited to `sizelimit`, collecting the last X amount @@ -1772,6 +1772,10 @@ class Plugin(): :param cmd_as_tag: Should the command string be automatically formatted to a tag? :type cmd_as_tag: ``bool`` + + :param container: Run the specified `cmds` inside a container with this + ID or name + :type container: ``str`` """ if isinstance(cmds, str): cmds = [cmds] @@ -1782,6 +1786,14 @@ class Plugin(): if pred is None: pred = self.get_predicate(cmd=True) for cmd in cmds: + if container: + ocmd = cmd + cmd = self.fmt_container_cmd(container, cmd) + if not cmd: + self._log_debug("Skipping command '%s' as the requested " + "container '%s' does not exist." + % (ocmd, container)) + continue self._add_cmd_output(cmd=cmd, suggest_filename=suggest_filename, root_symlink=root_symlink, timeout=timeout, stderr=stderr, chroot=chroot, runat=runat, @@ -2420,7 +2432,7 @@ class Plugin(): if self.container_exists(container): _runtime = self._get_container_runtime() return _runtime.fmt_container_cmd(container, cmd, quotecmd) - return cmd + return '' def is_module_loaded(self, module_name): """Determine whether specified module is loaded or not -- 2.31.1 From 874d2adfbff9e51dc902669af3c4a5083dbc19b1 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 3 Dec 2021 14:49:43 -0500 Subject: [PATCH 2/2] [plugins] Update existing plugins to use a_c_o container parameter Updates plugins currently calling `fmt_container_cmd()` in their `add_cmd_output()` calls to instead use the new `container` parameter and rely on the automatic formatting. Signed-off-by: Jake Hunsaker --- sos/report/plugins/opencontrail.py | 3 +-- sos/report/plugins/openstack_database.py | 20 ++++++-------------- sos/report/plugins/openstack_designate.py | 6 ++---- sos/report/plugins/openstack_ironic.py | 3 +-- sos/report/plugins/ovn_central.py | 7 +++---- sos/report/plugins/rabbitmq.py | 11 ++++++----- 9 files changed, 47 insertions(+), 69 deletions(-) diff --git a/sos/report/plugins/opencontrail.py b/sos/report/plugins/opencontrail.py index b368bffe..76c03e21 100644 --- a/sos/report/plugins/opencontrail.py +++ b/sos/report/plugins/opencontrail.py @@ -25,8 +25,7 @@ class OpenContrail(Plugin, IndependentPlugin): cnames = self.get_containers(get_all=True) cnames = [c[1] for c in cnames if 'opencontrail' in c[1]] for cntr in cnames: - _cmd = self.fmt_container_cmd(cntr, 'contrail-status') - self.add_cmd_output(_cmd) + self.add_cmd_output('contrail-status', container=cntr) else: self.add_cmd_output("contrail-status") diff --git a/sos/report/plugins/openstack_database.py b/sos/report/plugins/openstack_database.py index 1e98fabf..e9f84cf8 100644 --- a/sos/report/plugins/openstack_database.py +++ b/sos/report/plugins/openstack_database.py @@ -37,36 +37,28 @@ class OpenStackDatabase(Plugin): ] def setup(self): - - in_container = False # determine if we're running databases on the host or in a container _db_containers = [ 'galera-bundle-.*', # overcloud 'mysql' # undercloud ] + cname = None for container in _db_containers: cname = self.get_container_by_name(container) - if cname is not None: - in_container = True + if cname: break - if in_container: - fname = "clustercheck_%s" % cname - cmd = self.fmt_container_cmd(cname, 'clustercheck') - self.add_cmd_output(cmd, timeout=15, suggest_filename=fname) - else: - self.add_cmd_output('clustercheck', timeout=15) + fname = "clustercheck_%s" % cname if cname else None + self.add_cmd_output('clustercheck', container=cname, timeout=15, + suggest_filename=fname) if self.get_option('dump') or self.get_option('dumpall'): db_dump = self.get_mysql_db_string(container=cname) db_cmd = "mysqldump --opt %s" % db_dump - if in_container: - db_cmd = self.fmt_container_cmd(cname, db_cmd) - self.add_cmd_output(db_cmd, suggest_filename='mysql_dump.sql', - sizelimit=0) + sizelimit=0, container=cname) def get_mysql_db_string(self, container=None): diff --git a/sos/report/plugins/openstack_designate.py b/sos/report/plugins/openstack_designate.py index 0ae991b0..a2ea37ab 100644 --- a/sos/report/plugins/openstack_designate.py +++ b/sos/report/plugins/openstack_designate.py @@ -20,12 +20,10 @@ class OpenStackDesignate(Plugin): def setup(self): # collect current pool config - pools_cmd = self.fmt_container_cmd( - self.get_container_by_name(".*designate_central"), - "designate-manage pool generate_file --file /dev/stdout") self.add_cmd_output( - pools_cmd, + "designate-manage pool generate_file --file /dev/stdout", + container=self.get_container_by_name(".*designate_central"), suggest_filename="openstack_designate_current_pools.yaml" ) diff --git a/sos/report/plugins/openstack_ironic.py b/sos/report/plugins/openstack_ironic.py index c36fb6b6..49beb2d9 100644 --- a/sos/report/plugins/openstack_ironic.py +++ b/sos/report/plugins/openstack_ironic.py @@ -80,8 +80,7 @@ class OpenStackIronic(Plugin): 'ironic_pxe_tftp', 'ironic_neutron_agent', 'ironic_conductor', 'ironic_api']: if self.container_exists('.*' + container_name): - self.add_cmd_output(self.fmt_container_cmd(container_name, - 'rpm -qa')) + self.add_cmd_output('rpm -qa', container=container_name) else: self.conf_list = [ diff --git a/sos/report/plugins/ovn_central.py b/sos/report/plugins/ovn_central.py index 914eda60..ddbf288d 100644 --- a/sos/report/plugins/ovn_central.py +++ b/sos/report/plugins/ovn_central.py @@ -123,11 +123,10 @@ class OVNCentral(Plugin): # If OVN is containerized, we need to run the above commands inside # the container. - cmds = [ - self.fmt_container_cmd(self._container_name, cmd) for cmd in cmds - ] - self.add_cmd_output(cmds, foreground=True) + self.add_cmd_output( + cmds, foreground=True, container=self._container_name + ) self.add_copy_spec("/etc/sysconfig/ovn-northd") diff --git a/sos/report/plugins/rabbitmq.py b/sos/report/plugins/rabbitmq.py index 1bfa741f..607802e4 100644 --- a/sos/report/plugins/rabbitmq.py +++ b/sos/report/plugins/rabbitmq.py @@ -34,14 +34,15 @@ class RabbitMQ(Plugin, IndependentPlugin): for container in container_names: self.add_container_logs(container) self.add_cmd_output( - self.fmt_container_cmd(container, 'rabbitmqctl report'), + 'rabbitmqctl report', + container=container, foreground=True ) self.add_cmd_output( - self.fmt_container_cmd( - container, "rabbitmqctl eval " - "'rabbit_diagnostics:maybe_stuck().'"), - foreground=True, timeout=10 + "rabbitmqctl eval 'rabbit_diagnostics:maybe_stuck().'", + container=container, + foreground=True, + timeout=10 ) else: self.add_cmd_output("rabbitmqctl report") -- 2.31.1 From faa15754f82e9841cd624afe18dc2198644decdf Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 8 Dec 2021 13:51:20 -0500 Subject: [PATCH] [Policy,collect] Prevent remote node policies from setting local PATH This commit fixes an issue where policies loaded for remote nodes when using `sos collect` would override the PATH setting for the local policy, which in turn could prevent successful execution of cluster profile operations. Related: #2777 Signed-off-by: Jake Hunsaker --- sos/policies/__init__.py | 8 +++++--- sos/policies/distros/__init__.py | 6 ++++-- sos/policies/distros/debian.py | 3 ++- sos/policies/distros/redhat.py | 6 ++++-- sos/policies/distros/suse.py | 3 ++- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/sos/policies/__init__.py b/sos/policies/__init__.py index ef9188de..826d03a1 100644 --- a/sos/policies/__init__.py +++ b/sos/policies/__init__.py @@ -45,7 +45,7 @@ def load(cache={}, sysroot=None, init=None, probe_runtime=True, return cache['policy'] -class Policy(object): +class Policy(): """Policies represent distributions that sos supports, and define the way in which sos behaves on those distributions. A policy should define at minimum a way to identify the distribution, and a package manager to allow @@ -111,7 +111,7 @@ any third party. presets_path = PRESETS_PATH _in_container = False - def __init__(self, sysroot=None, probe_runtime=True): + def __init__(self, sysroot=None, probe_runtime=True, remote_exec=None): """Subclasses that choose to override this initializer should call super() to ensure that they get the required platform bits attached. super(SubClass, self).__init__(). Policies that require runtime @@ -122,7 +122,9 @@ any third party. self.probe_runtime = probe_runtime self.package_manager = PackageManager() self.valid_subclasses = [IndependentPlugin] - self.set_exec_path() + self.remote_exec = remote_exec + if not self.remote_exec: + self.set_exec_path() self.sysroot = sysroot self.register_presets(GENERIC_PRESETS) diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py index c69fc1e7..9c91a918 100644 --- a/sos/policies/distros/__init__.py +++ b/sos/policies/distros/__init__.py @@ -68,9 +68,11 @@ class LinuxPolicy(Policy): container_version_command = None container_authfile = None - def __init__(self, sysroot=None, init=None, probe_runtime=True): + def __init__(self, sysroot=None, init=None, probe_runtime=True, + remote_exec=None): super(LinuxPolicy, self).__init__(sysroot=sysroot, - probe_runtime=probe_runtime) + probe_runtime=probe_runtime, + remote_exec=remote_exec) if sysroot: self.sysroot = sysroot diff --git a/sos/policies/distros/debian.py b/sos/policies/distros/debian.py index 639fd5eb..41f09428 100644 --- a/sos/policies/distros/debian.py +++ b/sos/policies/distros/debian.py @@ -26,7 +26,8 @@ class DebianPolicy(LinuxPolicy): def __init__(self, sysroot=None, init=None, probe_runtime=True, remote_exec=None): super(DebianPolicy, self).__init__(sysroot=sysroot, init=init, - probe_runtime=probe_runtime) + probe_runtime=probe_runtime, + remote_exec=remote_exec) self.package_manager = DpkgPackageManager(chroot=self.sysroot, remote_exec=remote_exec) self.valid_subclasses += [DebianPlugin] diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py index 4b14abaf..eb75e15b 100644 --- a/sos/policies/distros/redhat.py +++ b/sos/policies/distros/redhat.py @@ -53,7 +53,8 @@ class RedHatPolicy(LinuxPolicy): def __init__(self, sysroot=None, init=None, probe_runtime=True, remote_exec=None): super(RedHatPolicy, self).__init__(sysroot=sysroot, init=init, - probe_runtime=probe_runtime) + probe_runtime=probe_runtime, + remote_exec=remote_exec) self.usrmove = False self.package_manager = RpmPackageManager(chroot=self.sysroot, @@ -76,7 +77,8 @@ class RedHatPolicy(LinuxPolicy): self.PATH = "/sbin:/bin:/usr/sbin:/usr/bin:/root/bin" self.PATH += os.pathsep + "/usr/local/bin" self.PATH += os.pathsep + "/usr/local/sbin" - self.set_exec_path() + if not self.remote_exec: + self.set_exec_path() self.load_presets() @classmethod diff --git a/sos/policies/distros/suse.py b/sos/policies/distros/suse.py index 1c1feff5..b9d4a3b1 100644 --- a/sos/policies/distros/suse.py +++ b/sos/policies/distros/suse.py @@ -25,7 +25,8 @@ class SuSEPolicy(LinuxPolicy): def __init__(self, sysroot=None, init=None, probe_runtime=True, remote_exec=None): super(SuSEPolicy, self).__init__(sysroot=sysroot, init=init, - probe_runtime=probe_runtime) + probe_runtime=probe_runtime, + remote_exec=remote_exec) self.valid_subclasses += [SuSEPlugin, RedHatPlugin] self.usrmove = False -- 2.31.1 From d4383fec5f8a80121aa4f5a37575b37988c51663 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Wed, 1 Dec 2021 12:23:34 +0100 Subject: [PATCH] Add crio runtime and openshift_ovn plugin openshift_ovn plugin collects logs from crio containers Fix get_container_by_name function returning container_id and not name Signed-off-by: Nadia Pinaeva --- sos/policies/distros/__init__.py | 4 +- sos/policies/runtimes/__init__.py | 2 +- sos/policies/runtimes/crio.py | 79 +++++++++++++++++++++++++++++ sos/report/plugins/openshift_ovn.py | 41 +++++++++++++++ 4 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 sos/policies/runtimes/crio.py create mode 100644 sos/report/plugins/openshift_ovn.py diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py index 9c91a918..7acc7e49 100644 --- a/sos/policies/distros/__init__.py +++ b/sos/policies/distros/__init__.py @@ -17,6 +17,7 @@ from sos import _sos as _ from sos.policies import Policy from sos.policies.init_systems import InitSystem from sos.policies.init_systems.systemd import SystemdInit +from sos.policies.runtimes.crio import CrioContainerRuntime from sos.policies.runtimes.podman import PodmanContainerRuntime from sos.policies.runtimes.docker import DockerContainerRuntime @@ -92,7 +93,8 @@ class LinuxPolicy(Policy): if self.probe_runtime: _crun = [ PodmanContainerRuntime(policy=self), - DockerContainerRuntime(policy=self) + DockerContainerRuntime(policy=self), + CrioContainerRuntime(policy=self) ] for runtime in _crun: if runtime.check_is_active(): diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py index 2e60ad23..4e9a45c1 100644 --- a/sos/policies/runtimes/__init__.py +++ b/sos/policies/runtimes/__init__.py @@ -100,7 +100,7 @@ class ContainerRuntime(): return None for c in self.containers: if re.match(name, c[1]): - return c[1] + return c[0] return None def get_images(self): diff --git a/sos/policies/runtimes/crio.py b/sos/policies/runtimes/crio.py new file mode 100644 index 00000000..980c3ea1 --- /dev/null +++ b/sos/policies/runtimes/crio.py @@ -0,0 +1,79 @@ +# Copyright (C) 2021 Red Hat, Inc., Nadia Pinaeva + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.policies.runtimes import ContainerRuntime +from sos.utilities import sos_get_command_output +from pipes import quote + + +class CrioContainerRuntime(ContainerRuntime): + """Runtime class to use for systems running crio""" + + name = 'crio' + binary = 'crictl' + + def get_containers(self, get_all=False): + """Get a list of containers present on the system. + + :param get_all: If set, include stopped containers as well + :type get_all: ``bool`` + """ + containers = [] + _cmd = "%s ps %s" % (self.binary, '-a' if get_all else '') + if self.active: + out = sos_get_command_output(_cmd, chroot=self.policy.sysroot) + if out['status'] == 0: + for ent in out['output'].splitlines()[1:]: + ent = ent.split() + # takes the form (container_id, container_name) + containers.append((ent[0], ent[-3])) + return containers + + def get_images(self): + """Get a list of images present on the system + + :returns: A list of 2-tuples containing (image_name, image_id) + :rtype: ``list`` + """ + images = [] + if self.active: + out = sos_get_command_output("%s images" % self.binary, + chroot=self.policy.sysroot) + if out['status'] == 0: + for ent in out['output'].splitlines(): + ent = ent.split() + # takes the form (image_name, image_id) + images.append((ent[0] + ':' + ent[1], ent[2])) + return images + + def fmt_container_cmd(self, container, cmd, quotecmd): + """Format a command to run inside a container using the runtime + + :param container: The name or ID of the container in which to run + :type container: ``str`` + + :param cmd: The command to run inside `container` + :type cmd: ``str`` + + :param quotecmd: Whether the cmd should be quoted. + :type quotecmd: ``bool`` + + :returns: Formatted string to run `cmd` inside `container` + :rtype: ``str`` + """ + if quotecmd: + quoted_cmd = quote(cmd) + else: + quoted_cmd = cmd + container_id = self.get_container_by_name(container) + return "%s %s %s" % (self.run_cmd, container_id, + quoted_cmd) if container_id is not None else '' + +# vim: set et ts=4 sw=4 : diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py new file mode 100644 index 00000000..168f1dd3 --- /dev/null +++ b/sos/report/plugins/openshift_ovn.py @@ -0,0 +1,41 @@ +# Copyright (C) 2021 Nadia Pinaeva + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.report.plugins import Plugin, RedHatPlugin + + +class OpenshiftOVN(Plugin, RedHatPlugin): + """This plugin is used to collect OCP 4.x OVN logs. + """ + short_desc = 'Openshift OVN' + plugin_name = "openshift_ovn" + containers = ('ovnkube-master', 'ovn-ipsec') + profiles = ('openshift',) + + def setup(self): + self.add_copy_spec([ + "/var/lib/ovn/etc/ovnnb_db.db", + "/var/lib/ovn/etc/ovnsb_db.db", + "/var/lib/openvswitch/etc/keys", + "/var/log/openvswitch/libreswan.log", + "/var/log/openvswitch/ovs-monitor-ipsec.log" + ]) + + self.add_cmd_output([ + 'ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ' + + 'cluster/status OVN_Northbound', + 'ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ' + + 'cluster/status OVN_Southbound'], + container='ovnkube-master') + self.add_cmd_output([ + 'ovs-appctl -t ovs-monitor-ipsec tunnels/show', + 'ipsec status', + 'certutil -L -d sql:/etc/ipsec.d'], + container='ovn-ipsec') -- 2.31.1 From 17218ca17e49cb8491c688095b56503d041c1ae9 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 9 Dec 2021 15:07:23 -0500 Subject: [PATCH 1/3] [ocp] Skip project setup whenever oc transport is not used Fixes a corner case where we would still attempt to create a new project within the OCP cluster even if we weren't using the `oc` transport. Signed-off-by: Jake Hunsaker --- sos/collector/clusters/ocp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py index 2ce4e977..56f8cc47 100644 --- a/sos/collector/clusters/ocp.py +++ b/sos/collector/clusters/ocp.py @@ -123,7 +123,9 @@ class ocp(Cluster): return nodes def set_transport_type(self): - if is_executable('oc') or self.opts.transport == 'oc': + if self.opts.transport != 'auto': + return self.opts.transport + if is_executable('oc'): return 'oc' self.log_info("Local installation of 'oc' not found or is not " "correctly configured. Will use ControlPersist.") -- 2.31.1 From 9faabdc3df08516a91c1adb3326bbf43db155f71 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 9 Dec 2021 16:04:39 -0500 Subject: [PATCH 2/3] [crio] Put inspect output in the containers subdir Given the environments where crio is run, having `crictl inspect` output in the main plugin directory can be a bit overwhelming. As such, put this output into a `containers` subdir, and nest container log output in a `containers/logs/` subdir. Signed-off-by: Jake Hunsaker --- sos/report/plugins/crio.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sos/report/plugins/crio.py b/sos/report/plugins/crio.py index cb2c9796..56cf64a7 100644 --- a/sos/report/plugins/crio.py +++ b/sos/report/plugins/crio.py @@ -79,10 +79,11 @@ class CRIO(Plugin, RedHatPlugin, UbuntuPlugin): pods = self._get_crio_list(pod_cmd) for container in containers: - self.add_cmd_output("crictl inspect %s" % container) + self.add_cmd_output("crictl inspect %s" % container, + subdir="containers") if self.get_option('logs'): self.add_cmd_output("crictl logs -t %s" % container, - subdir="containers", priority=100) + subdir="containers/logs", priority=100) for image in images: self.add_cmd_output("crictl inspecti %s" % image, subdir="images") -- 2.31.1 From 9118562c47fb521da3eeeed1a8746d45aaa769e7 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 9 Dec 2021 16:06:06 -0500 Subject: [PATCH 3/3] [networking] Put namespaced commands into subdirs Where networking namespaces are used, there tend to be large numbers of namespaces used. This in turn results in sos running and collecting very large numbers of namespaced commands. To aid in consumability, place these collections under a subdir for the namespace under another "namespaces" subdir within the plugin directory. Signed-off-by: Jake Hunsaker --- sos/report/plugins/networking.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/sos/report/plugins/networking.py b/sos/report/plugins/networking.py index 80e24abb..bcb5e6ae 100644 --- a/sos/report/plugins/networking.py +++ b/sos/report/plugins/networking.py @@ -198,6 +198,7 @@ class Networking(Plugin): pred=SoSPredicate(self, cmd_outputs=co6)) else None) for namespace in namespaces: + _subdir = "namespaces/%s" % namespace ns_cmd_prefix = cmd_prefix + namespace + " " self.add_cmd_output([ ns_cmd_prefix + "ip address show", @@ -213,29 +214,27 @@ class Networking(Plugin): ns_cmd_prefix + "netstat -s", ns_cmd_prefix + "netstat %s -agn" % self.ns_wide, ns_cmd_prefix + "nstat -zas", - ], priority=50) + ], priority=50, subdir=_subdir) self.add_cmd_output([ns_cmd_prefix + "iptables-save"], pred=iptables_with_nft, + subdir=_subdir, priority=50) self.add_cmd_output([ns_cmd_prefix + "ip6tables-save"], pred=ip6tables_with_nft, + subdir=_subdir, priority=50) ss_cmd = ns_cmd_prefix + "ss -peaonmi" # --allow-system-changes is handled directly in predicate # evaluation, so plugin code does not need to separately # check for it - self.add_cmd_output(ss_cmd, pred=ss_pred) - - # Collect ethtool commands only when ethtool_namespaces - # is set to true. - if self.get_option("ethtool_namespaces"): - # Devices that exist in a namespace use less ethtool - # parameters. Run this per namespace. - for namespace in self.get_network_namespaces( - self.get_option("namespace_pattern"), - self.get_option("namespaces")): - ns_cmd_prefix = cmd_prefix + namespace + " " + self.add_cmd_output(ss_cmd, pred=ss_pred, subdir=_subdir) + + # Collect ethtool commands only when ethtool_namespaces + # is set to true. + if self.get_option("ethtool_namespaces"): + # Devices that exist in a namespace use less ethtool + # parameters. Run this per namespace. netns_netdev_list = self.exec_cmd( ns_cmd_prefix + "ls -1 /sys/class/net/" ) @@ -250,9 +249,7 @@ class Networking(Plugin): ns_cmd_prefix + "ethtool -i " + eth, ns_cmd_prefix + "ethtool -k " + eth, ns_cmd_prefix + "ethtool -S " + eth - ], priority=50) - - return + ], priority=50, subdir=_subdir) class RedHatNetworking(Networking, RedHatPlugin): -- 2.31.1 From 4bf5f9143c962c839c1d27217ba74127551a5c00 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 17 Dec 2021 11:10:15 -0500 Subject: [PATCH] [transport] Detect retrieval failures and automatically retry If a paritcular attempt to retrieve a remote file fails, we should automatically retry that collection up to a certain point. This provides `sos collect` more resiliency for the collection of sos report archives. This change necessitates a change in how we handle the SoSNode flow for failed sos report retrievals, and as such contains minor fixes to transports to ensure that we do not incorrectly hit exceptions in error handling that were not previously possible with how we exited the SoSNode retrieval flow. Closes: #2777 Signed-off-by: Jake Hunsaker --- sos/collector/__init__.py | 5 +++-- sos/collector/clusters/ocp.py | 1 + sos/collector/sosnode.py | 17 ++++++++++------- sos/collector/transports/__init__.py | 15 ++++++++++++++- sos/collector/transports/local.py | 1 + sos/collector/transports/oc.py | 3 ++- 6 files changed, 31 insertions(+), 11 deletions(-) diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index b825d8fc..a25e794e 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -1221,8 +1221,9 @@ this utility or remote systems that it connects to. def close_all_connections(self): """Close all sessions for nodes""" for client in self.client_list: - self.log_debug('Closing connection to %s' % client.address) - client.disconnect() + if client.connected: + self.log_debug('Closing connection to %s' % client.address) + client.disconnect() def create_cluster_archive(self): """Calls for creation of tar archive then cleans up the temporary diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py index 56f8cc47..ae93ad58 100644 --- a/sos/collector/clusters/ocp.py +++ b/sos/collector/clusters/ocp.py @@ -92,6 +92,7 @@ class ocp(Cluster): % ret['output']) # don't leave the config on a non-existing project self.exec_master_cmd("oc project default") + self.project = None return True def _build_dict(self, nodelist): diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 1341e39f..925f2790 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -751,12 +751,11 @@ class SosNode(): if self.file_exists(path): self.log_info("Copying remote %s to local %s" % (path, destdir)) - self._transport.retrieve_file(path, dest) + return self._transport.retrieve_file(path, dest) else: self.log_debug("Attempting to copy remote file %s, but it " "does not exist on filesystem" % path) return False - return True except Exception as err: self.log_debug("Failed to retrieve %s: %s" % (path, err)) return False @@ -793,16 +792,20 @@ class SosNode(): except Exception: self.log_error('Failed to make archive readable') return False - self.soslog.info('Retrieving sos report from %s' % self.address) + self.log_info('Retrieving sos report from %s' % self.address) self.ui_msg('Retrieving sos report...') - ret = self.retrieve_file(self.sos_path) + try: + ret = self.retrieve_file(self.sos_path) + except Exception as err: + self.log_error(err) + return False if ret: self.ui_msg('Successfully collected sos report') self.file_list.append(self.sos_path.split('/')[-1]) + return True else: - self.log_error('Failed to retrieve sos report') - raise SystemExit - return True + self.ui_msg('Failed to retrieve sos report') + return False else: # sos sometimes fails but still returns a 0 exit code if self.stderr.read(): diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py index 33f2f66d..dcdebdde 100644 --- a/sos/collector/transports/__init__.py +++ b/sos/collector/transports/__init__.py @@ -303,7 +303,20 @@ class RemoteTransport(): :returns: True if file was successfully copied from remote, or False :rtype: ``bool`` """ - return self._retrieve_file(fname, dest) + attempts = 0 + try: + while attempts < 5: + attempts += 1 + ret = self._retrieve_file(fname, dest) + if ret: + return True + self.log_info("File retrieval attempt %s failed" % attempts) + self.log_info("File retrieval failed after 5 attempts") + return False + except Exception as err: + self.log_error("Exception encountered during retrieval attempt %s " + "for %s: %s" % (attempts, fname, err)) + raise err def _retrieve_file(self, fname, dest): raise NotImplementedError("Transport %s does not support file copying" diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py index a4897f19..2996d524 100644 --- a/sos/collector/transports/local.py +++ b/sos/collector/transports/local.py @@ -35,6 +35,7 @@ class LocalTransport(RemoteTransport): def _retrieve_file(self, fname, dest): self.log_debug("Moving %s to %s" % (fname, dest)) shutil.copy(fname, dest) + return True def _format_cmd_for_exec(self, cmd): return cmd diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py index de044ccb..720dd61d 100644 --- a/sos/collector/transports/oc.py +++ b/sos/collector/transports/oc.py @@ -202,7 +202,8 @@ class OCTransport(RemoteTransport): env, False) def _disconnect(self): - os.unlink(self.pod_tmp_conf) + if os.path.exists(self.pod_tmp_conf): + os.unlink(self.pod_tmp_conf) removed = self.run_oc("delete pod %s" % self.pod_name) if "deleted" not in removed['output']: self.log_debug("Calling delete on pod '%s' failed: %s" -- 2.31.1 From 304c9ef6c1015f1ebe1a8d569c3e16bada4d23f1 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Tue, 4 Jan 2022 16:37:09 +0100 Subject: [PATCH] Add cluster cleanup for all exit() calls Signed-off-by: Nadia Pinaeva --- sos/collector/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index a25e794e1..ffd63bc63 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -443,6 +443,7 @@ def add_parser_options(cls, parser): def exit(self, msg, error=1): """Used to safely terminate if sos-collector encounters an error""" + self.cluster.cleanup() self.log_error(msg) try: self.close_all_connections() @@ -858,8 +858,9 @@ class SoSCollector(SoSComponent): "CTRL-C to quit\n") self.ui_log.info("") except KeyboardInterrupt: - self.cluster.cleanup() self.exit("Exiting on user cancel", 130) + except Exception as e: + self.exit(repr(e), 1) def configure_sos_cmd(self): """Configures the sosreport command that is run on the nodes""" @@ -1185,7 +1185,6 @@ def collect(self): arc_name = self.create_cluster_archive() else: msg = 'No sosreports were collected, nothing to archive...' - self.cluster.cleanup() self.exit(msg, 1) if self.opts.upload and self.policy.get_upload_url(): From 2c3a647817dfbac36be3768acf6026e91d1a6e8f Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 21 Dec 2021 14:20:19 -0500 Subject: [PATCH] [options] Allow spaces in --keywords values in sos.conf The `--keywords` option supports spaces to allow for obfuscated phrases, not just words. This however breaks if a phrase is added to the config file *before* a run with the phrase in the cmdline option, due to the safeguards we have for all other values that do not support spaces. Add a check in our flow for updating options from the config file to not replace illegal spaces if we're checking the `keywords` option, for which spaces are legal. Signed-off-by: Jake Hunsaker --- sos/options.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sos/options.py b/sos/options.py index 7bea3ffc1..4846a5096 100644 --- a/sos/options.py +++ b/sos/options.py @@ -200,7 +200,10 @@ def _update_from_section(section, config): odict[rename_opts[key]] = odict.pop(key) # set the values according to the config file for key, val in odict.items(): - if isinstance(val, str): + # most option values do not tolerate spaces, special + # exception however for --keywords which we do want to + # support phrases, and thus spaces, for + if isinstance(val, str) and key != 'keywords': val = val.replace(' ', '') if key not in self.arg_defaults: # read an option that is not loaded by the current From f912fc9e31b406a24b7a9c012e12cda920632051 Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Mon, 10 Jan 2022 14:13:42 +0100 Subject: [PATCH] [collect] Deal None sos_version properly In case collector cluster hits an error during init, sos_version is None what LooseVersion can't compare properly and raises exception 'LooseVersion' object has no attribute 'version' Related: #2822 Signed-off-by: Pavel Moravec --- sos/collector/sosnode.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 925f27909..7bbe0cd1f 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -382,7 +382,8 @@ def check_sos_version(self, ver): given ver. This means that if the installed version is greater than ver, this will still return True """ - return LooseVersion(self.sos_info['version']) >= ver + return self.sos_info['version'] is not None and \ + LooseVersion(self.sos_info['version']) >= ver def is_installed(self, pkg): """Checks if a given package is installed on the node""" From 0c67e8ebaeef17dac3b5b9e42a59b4e673e4403b Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Mon, 10 Jan 2022 14:17:13 +0100 Subject: [PATCH] [collector] Cleanup cluster only if defined In case cluster init fails, self.cluster = None and its cleanup must be skipped. Resolves: #2822 Signed-off-by: Pavel Moravec --- sos/collector/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index ffd63bc63..3e22bca3e 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -443,7 +443,8 @@ def add_parser_options(cls, parser): def exit(self, msg, error=1): """Used to safely terminate if sos-collector encounters an error""" - self.cluster.cleanup() + if self.cluster: + self.cluster.cleanup() self.log_error(msg) try: self.close_all_connections() From ef27a6ee6737c23b3beda1437768a91679024697 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Fri, 3 Dec 2021 15:41:35 +0100 Subject: [PATCH] Add journal logs for NetworkManager plugin Signed-off-by: Nadia Pinaeva --- sos/report/plugins/networkmanager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sos/report/plugins/networkmanager.py b/sos/report/plugins/networkmanager.py index 30f99a1140..3aca0c7460 100644 --- a/sos/report/plugins/networkmanager.py +++ b/sos/report/plugins/networkmanager.py @@ -25,6 +25,8 @@ def setup(self): "/etc/NetworkManager/dispatcher.d" ]) + self.add_journal(units="NetworkManager") + # There are some incompatible changes in nmcli since # the release of NetworkManager >= 0.9.9. In addition, # NetworkManager >= 0.9.9 will use the long names of From 9eb60f0bb6ea36f9c1cf099c1fd20cf3938b4b26 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Mon, 17 Jan 2022 11:11:24 -0500 Subject: [PATCH] [clean] Ignore empty items for obfuscation better This commit fixes a couple edge cases where an item empty (e.g. and empty string '') was not being properly ignored, which in turned caused failures in writing both obfuscations and replacement files. This should no longer be possible. Signed-off-by: Jake Hunsaker --- sos/cleaner/mappings/__init__.py | 5 ++++- sos/cleaner/mappings/username_map.py | 2 +- sos/cleaner/parsers/username_parser.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py index 5cf5c8b2d..48171a052 100644 --- a/sos/cleaner/mappings/__init__.py +++ b/sos/cleaner/mappings/__init__.py @@ -49,6 +49,8 @@ def add(self, item): :param item: The plaintext object to obfuscate """ with self.lock: + if not item: + return item self.dataset[item] = self.sanitize_item(item) return self.dataset[item] @@ -67,7 +69,8 @@ def get(self, item): """Retrieve an item's obfuscated counterpart from the map. If the item does not yet exist in the map, add it by generating one on the fly """ - if self.ignore_item(item) or self.item_in_dataset_values(item): + if (not item or self.ignore_item(item) or + self.item_in_dataset_values(item)): return item if item not in self.dataset: return self.add(item) diff --git a/sos/cleaner/mappings/username_map.py b/sos/cleaner/mappings/username_map.py index 7ecccd7bc..ed6dc0912 100644 --- a/sos/cleaner/mappings/username_map.py +++ b/sos/cleaner/mappings/username_map.py @@ -24,7 +24,7 @@ class SoSUsernameMap(SoSMap): def load_names_from_options(self, opt_names): for name in opt_names: - if name not in self.dataset.keys(): + if name and name not in self.dataset.keys(): self.add(name) def sanitize_item(self, username): diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 49640f7fd..2853c860f 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -55,7 +55,7 @@ def load_usernames_into_map(self, content): user = line.split()[0] except Exception: continue - if user.lower() in self.skip_list: + if not user or user.lower() in self.skip_list: continue users.add(user) for each in users: From ed618678fd3d07e68e1a430eb7d225a9701332e0 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 13 Jan 2022 13:52:34 -0500 Subject: [PATCH] [clean,parsers] Build regex lists for static items only once For parsers such as the username and keyword parsers, we don't discover new items through parsing archives - these parsers use static lists determined before we begin the actual obfuscation process. As such, we can build a list of regexes for these static items once, and then reference those regexes during execution, rather than rebuilding the regex for each of these items for every obfuscation. For use cases where hundreds of items, e.g. hundreds of usernames, are being obfuscated this results in a significant performance increase. Individual per-file gains are minor - fractions of a second - however these gains build up over the course of the hundreds to thousands of files a typical archive can be expected to contain. Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 9 +++++++++ sos/cleaner/parsers/__init__.py | 10 ++++++++++ sos/cleaner/parsers/keyword_parser.py | 15 ++++++++++----- sos/cleaner/parsers/username_parser.py | 14 ++++++++------ tests/unittests/cleaner_tests.py | 1 + 5 files changed, 38 insertions(+), 11 deletions(-) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 5686e2131..b76bef644 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -294,6 +294,7 @@ def execute(self): # we have at least one valid target to obfuscate self.completed_reports = [] self.preload_all_archives_into_maps() + self.generate_parser_item_regexes() self.obfuscate_report_paths() if not self.completed_reports: @@ -498,6 +499,14 @@ def _replace_obfuscated_archives(self): shutil.move(archive.final_archive_path, dest) archive.final_archive_path = dest_name + def generate_parser_item_regexes(self): + """For the parsers that use prebuilt lists of items, generate those + regexes now since all the parsers should be preloaded by the archive(s) + as well as being handed cmdline options and mapping file configuration. + """ + for parser in self.parsers: + parser.generate_item_regexes() + def preload_all_archives_into_maps(self): """Before doing the actual obfuscation, if we have multiple archives to obfuscate then we need to preload each of them into the mappings diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index e62fd9384..6def863a6 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -46,9 +46,19 @@ class SoSCleanerParser(): map_file_key = 'unset' def __init__(self, config={}): + self.regexes = {} if self.map_file_key in config: self.mapping.conf_update(config[self.map_file_key]) + def generate_item_regexes(self): + """Generate regexes for items the parser will be searching for + repeatedly without needing to generate them for every file and/or line + we process + + Not used by all parsers. + """ + pass + def parse_line(self, line): """This will be called for every line in every file we process, so that every parser has a chance to scrub everything. diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py index 694c6073a..362a1929e 100644 --- a/sos/cleaner/parsers/keyword_parser.py +++ b/sos/cleaner/parsers/keyword_parser.py @@ -9,6 +9,7 @@ # See the LICENSE file in the source distribution for further information. import os +import re from sos.cleaner.parsers import SoSCleanerParser from sos.cleaner.mappings.keyword_map import SoSKeywordMap @@ -33,16 +34,20 @@ def __init__(self, config, keywords=None, keyword_file=None): # pre-generate an obfuscation mapping for each keyword # this is necessary for cases where filenames are being # obfuscated before or instead of file content - self.mapping.get(keyword) + self.mapping.get(keyword.lower()) self.user_keywords.append(keyword) if keyword_file and os.path.exists(keyword_file): with open(keyword_file, 'r') as kwf: self.user_keywords.extend(kwf.read().splitlines()) + def generate_item_regexes(self): + for kw in self.user_keywords: + self.regexes[kw] = re.compile(kw, re.I) + def parse_line(self, line): count = 0 - for keyword in sorted(self.user_keywords, reverse=True): - if keyword in line: - line = line.replace(keyword, self.mapping.get(keyword)) - count += 1 + for kwrd, reg in sorted(self.regexes.items(), key=len, reverse=True): + if reg.search(line): + line, _count = reg.subn(self.mapping.get(kwrd.lower()), line) + count += _count return line, count diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 3208a6557..49640f7fd 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -61,12 +61,14 @@ def load_usernames_into_map(self, content): for each in users: self.mapping.get(each) + def generate_item_regexes(self): + for user in self.mapping.dataset: + self.regexes[user] = re.compile(user, re.I) + def parse_line(self, line): count = 0 - for username in sorted(self.mapping.dataset.keys(), reverse=True): - _reg = re.compile(username, re.I) - if _reg.search(line): - line, count = _reg.subn( - self.mapping.get(username.lower()), line - ) + for user, reg in sorted(self.regexes.items(), key=len, reverse=True): + if reg.search(line): + line, _count = reg.subn(self.mapping.get(user.lower()), line) + count += _count return line, count diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py index cb20772fd..b59eade9a 100644 --- a/tests/unittests/cleaner_tests.py +++ b/tests/unittests/cleaner_tests.py @@ -105,6 +105,7 @@ def setUp(self): self.host_parser = SoSHostnameParser(config={}, opt_domains='foobar.com') self.kw_parser = SoSKeywordParser(config={}, keywords=['foobar']) self.kw_parser_none = SoSKeywordParser(config={}) + self.kw_parser.generate_item_regexes() def test_ip_parser_valid_ipv4_line(self): line = 'foobar foo 10.0.0.1/24 barfoo bar' From 2ae16e0245e1b01b8547e507abb69c11871a8467 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Mon, 21 Feb 2022 14:37:09 -0500 Subject: [PATCH] [sosnode] Handle downstream versioning for runtime option check First, adds parsing and formatting for an sos installation's release version according to the loaded package manager for that node. Adds a fallback version check for 4.2-13 for RHEL downstreams that backport the `container-runtime` option into sos-4.2. Carry this in upstream to account for use cases where a workstation used to run `collect` from may be from a different stream than those used by cluster nodes. Signed-off-by: Jake Hunsaker --- sos/collector/sosnode.py | 60 ++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 7bbe0cd1..d9b998b0 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -275,21 +275,34 @@ class SosNode(): def _load_sos_info(self): """Queries the node for information about the installed version of sos """ + ver = None + rel = None if self.host.container_version_command is None: pkg = self.host.package_manager.pkg_version(self.host.sos_pkg_name) if pkg is not None: ver = '.'.join(pkg['version']) - self.sos_info['version'] = ver + if pkg['release']: + rel = pkg['release'] + else: # use the containerized policy's command pkgs = self.run_command(self.host.container_version_command, use_container=True, need_root=True) if pkgs['status'] == 0: - ver = pkgs['output'].strip().split('-')[1] - if ver: - self.sos_info['version'] = ver - else: - self.sos_info['version'] = None + _, ver, rel = pkgs['output'].strip().split('-') + + if ver: + if len(ver.split('.')) == 2: + # safeguard against maintenance releases throwing off the + # comparison by LooseVersion + ver += '.0' + try: + ver += '-%s' % rel.split('.')[0] + except Exception as err: + self.log_debug("Unable to fully parse sos release: %s" % err) + + self.sos_info['version'] = ver + if self.sos_info['version']: self.log_info('sos version is %s' % self.sos_info['version']) else: @@ -381,9 +394,37 @@ class SosNode(): """Checks to see if the sos installation on the node is AT LEAST the given ver. This means that if the installed version is greater than ver, this will still return True + + :param ver: Version number we are trying to verify is installed + :type ver: ``str`` + + :returns: True if installed version is at least ``ver``, else False + :rtype: ``bool`` """ - return self.sos_info['version'] is not None and \ - LooseVersion(self.sos_info['version']) >= ver + def _format_version(ver): + # format the version we're checking to a standard form of X.Y.Z-R + try: + _fver = ver.split('-')[0] + _rel = '' + if '-' in ver: + _rel = '-' + ver.split('-')[-1].split('.')[0] + if len(_fver.split('.')) == 2: + _fver += '.0' + + return _fver + _rel + except Exception as err: + self.log_debug("Unable to format '%s': %s" % (ver, err)) + return ver + + _ver = _format_version(ver) + + try: + _node_ver = LooseVersion(self.sos_info['version']) + _test_ver = LooseVersion(_ver) + return _node_ver >= _test_ver + except Exception as err: + self.log_error("Error checking sos version: %s" % err) + return False def is_installed(self, pkg): """Checks if a given package is installed on the node""" @@ -587,7 +628,8 @@ class SosNode(): sos_opts.append('--cmd-timeout=%s' % quote(str(self.opts.cmd_timeout))) - if self.check_sos_version('4.3'): + # handle downstream versions that backported this option + if self.check_sos_version('4.3') or self.check_sos_version('4.2-13'): if self.opts.container_runtime != 'auto': sos_opts.append( "--container-runtime=%s" % self.opts.container_runtime -- 2.34.1