From fb57ab1960469b00a391105843e6065ff34f3d69 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Wed, 25 May 2022 03:44:11 -0400 Subject: [PATCH] import sos-4.2-19.el8_6 --- SOURCES/sos-bz2036697-ocp-backports.patch | 560 +++++++ SOURCES/sos-bz2071825-merged-8.6.z.patch | 1797 +++++++++++++++++++++ SPECS/sos.spec | 31 +- 3 files changed, 2387 insertions(+), 1 deletion(-) create mode 100644 SOURCES/sos-bz2071825-merged-8.6.z.patch diff --git a/SOURCES/sos-bz2036697-ocp-backports.patch b/SOURCES/sos-bz2036697-ocp-backports.patch index 3e53e93..9888165 100644 --- a/SOURCES/sos-bz2036697-ocp-backports.patch +++ b/SOURCES/sos-bz2036697-ocp-backports.patch @@ -3392,6 +3392,50 @@ index 8a9dbd7a..ab7f23cc 100644 -- 2.31.1 +From a7ffacd855fcf2e9a136c6946744cbc99bc91272 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 19 Oct 2021 14:31:37 -0400 +Subject: [PATCH] [Plugin] Wrap add_service_status to add_cmd_output + +The changes to allow writing command output to a file highlighted a +short coming in add_service_status - by wrapping to `_add_cmd_output()` +instead of `add_cmd_output()`, we are not applying the default values +for kwarg parameters and thus potentially causing undesired behavior +during the actual collection. + +Fix this by wrapping to `add_cmd_output()`, which will then in turn call +`_add_cmd_output()`. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/__init__.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index 38529a9d..98f163ab 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -2493,7 +2493,7 @@ class Plugin(): + :param services: Service name(s) to collect statuses for + :type services: ``str`` or a ``list`` of strings + +- :param kwargs: Optional arguments to pass to _add_cmd_output ++ :param kwargs: Optional arguments to pass to add_cmd_output + (timeout, predicate, suggest_filename,..) + + """ +@@ -2508,7 +2508,7 @@ class Plugin(): + return + + for service in services: +- self._add_cmd_output(cmd="%s %s" % (query, service), **kwargs) ++ self.add_cmd_output("%s %s" % (query, service), **kwargs) + + def add_journal(self, units=None, boot=None, since=None, until=None, + lines=None, allfields=False, output=None, +-- +2.27.0 + From 38a0533de3dd2613eefcc4865a2916e225e3ceed Mon Sep 17 00:00:00 2001 From: Pavel Moravec Date: Tue, 9 Nov 2021 19:34:25 +0100 @@ -3526,6 +3570,32 @@ index e84b52da..1bfa741f 100644 -- 2.31.1 +From 83bade79da931225f12a1f40e576884bfe7173dd Mon Sep 17 00:00:00 2001 +From: Michael Cambria +Date: Fri, 12 Nov 2021 09:07:24 -0500 +Subject: [PATCH] Collect "ip route cache" data + +Signed-off-by: Michael Cambria +--- + sos/report/plugins/networking.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/sos/report/plugins/networking.py b/sos/report/plugins/networking.py +index f2bdca06..cc93c9e9 100644 +--- a/sos/report/plugins/networking.py ++++ b/sos/report/plugins/networking.py +@@ -95,6 +95,8 @@ class Networking(Plugin): + "networkctl status -a", + "ip route show table all", + "ip -6 route show table all", ++ "ip -d route show cache", ++ "ip -d -6 route show cache", + "ip -4 rule", + "ip -6 rule", + "ip -s -d link", +-- +2.27.0 + From 8bf602108f75db10e449eff5e2266c6466504086 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Thu, 2 Dec 2021 16:30:44 +0100 @@ -5024,6 +5094,496 @@ index cb20772fd..b59eade9a 100644 def test_ip_parser_valid_ipv4_line(self): line = 'foobar foo 10.0.0.1/24 barfoo bar' +From 134451fe8fc80c67b8714713ab49c55245fd7727 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 20 Jan 2022 10:21:32 -0500 +Subject: [PATCH 1/2] [Plugin] Add support for containers to `add_copy_spec()` + +This commit adds the ability for `add_copy_spec()` to copy files from +containers running on the host via a supported container runtime. + +`add_copy_spec()` now has a `container` parameter which, if set, will +generate the command needed to copy a file from a container to our temp +directory. This will be collected after host files but before command +collection. + +Runtimes have been updated with a `get_copy_command()` method that will +return the command string needed to copy a given file from a given +container to a given path on the host. Note that the `crio` runtime does +not currently provide a copy mechanism like `docker` or `podman`, so +file collections from containers will not be succesful on hosts using +that as their default runtime. + +Finally, the manifest entries for plugins have been updated with a new +`containers` dict field whose entries are container names that have been +collected from. Those fields are also dicts having the same `files` and +`commands` keys/content as those in the plugin entry directly. + +Closes: #2439 + +Signed-off-by: Jake Hunsaker +--- + sos/policies/runtimes/__init__.py | 32 ++++++ + sos/policies/runtimes/crio.py | 3 + + sos/policies/runtimes/docker.py | 3 + + sos/report/plugins/__init__.py | 180 +++++++++++++++++++++++++----- + 4 files changed, 189 insertions(+), 29 deletions(-) + +diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py +index 4e9a45c1..5ac67354 100644 +--- a/sos/policies/runtimes/__init__.py ++++ b/sos/policies/runtimes/__init__.py +@@ -69,6 +69,12 @@ class ContainerRuntime(): + return True + return False + ++ def check_can_copy(self): ++ """Check if the runtime supports copying files out of containers and ++ onto the host filesystem ++ """ ++ return True ++ + def get_containers(self, get_all=False): + """Get a list of containers present on the system. + +@@ -199,5 +205,31 @@ class ContainerRuntime(): + """ + return "%s logs -t %s" % (self.binary, container) + ++ def get_copy_command(self, container, path, dest, sizelimit=None): ++ """Generate the command string used to copy a file out of a container ++ by way of the runtime. ++ ++ :param container: The name or ID of the container ++ :type container: ``str`` ++ ++ :param path: The path to copy from the container. Note that at ++ this time, no supported runtime supports globbing ++ :type path: ``str`` ++ ++ :param dest: The destination on the *host* filesystem to write ++ the file to ++ :type dest: ``str`` ++ ++ :param sizelimit: Limit the collection to the last X bytes of the ++ file at PATH ++ :type sizelimit: ``int`` ++ ++ :returns: Formatted runtime command to copy a file from a container ++ :rtype: ``str`` ++ """ ++ if sizelimit: ++ return "%s %s tail -c %s %s" % (self.run_cmd, container, sizelimit, ++ path) ++ return "%s cp %s:%s %s" % (self.binary, container, path, dest) + + # vim: set et ts=4 sw=4 : +diff --git a/sos/policies/runtimes/crio.py b/sos/policies/runtimes/crio.py +index 980c3ea1..55082d07 100644 +--- a/sos/policies/runtimes/crio.py ++++ b/sos/policies/runtimes/crio.py +@@ -19,6 +19,9 @@ class CrioContainerRuntime(ContainerRuntime): + name = 'crio' + binary = 'crictl' + ++ def check_can_copy(self): ++ return False ++ + def get_containers(self, get_all=False): + """Get a list of containers present on the system. + +diff --git a/sos/policies/runtimes/docker.py b/sos/policies/runtimes/docker.py +index e81f580e..c0cc156c 100644 +--- a/sos/policies/runtimes/docker.py ++++ b/sos/policies/runtimes/docker.py +@@ -27,4 +27,7 @@ class DockerContainerRuntime(ContainerRuntime): + return True + return False + ++ def check_can_copy(self): ++ return self.check_is_active(sysroot=self.policy.sysroot) ++ + # vim: set et ts=4 sw=4 : +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index ca58c22c..0bdc1632 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -487,6 +487,7 @@ + self.commons = commons + self.forbidden_paths = [] + self.copy_paths = set() ++ self.container_copy_paths = [] + self.copy_strings = [] + self.collect_cmds = [] + self.sysroot = commons['sysroot'] +@@ -538,6 +539,7 @@ + self.manifest.add_field('command_timeout', self.cmdtimeout) + self.manifest.add_list('commands', []) + self.manifest.add_list('files', []) ++ self.manifest.add_field('containers', {}) + + def timeout_from_options(self, optname, plugoptname, default_timeout): + """Returns either the default [plugin|cmd] timeout value, the value as +@@ -1558,7 +1560,7 @@ class Plugin(): + self.manifest.files.append(manifest_data) + + def add_copy_spec(self, copyspecs, sizelimit=None, maxage=None, +- tailit=True, pred=None, tags=[]): ++ tailit=True, pred=None, tags=[], container=None): + """Add a file, directory, or regex matching filepaths to the archive + + :param copyspecs: A file, directory, or regex matching filepaths +@@ -1583,10 +1585,17 @@ class Plugin(): + for this collection + :type tags: ``str`` or a ``list`` of strings + ++ :param container: Container(s) from which this file should be copied ++ :type container: ``str`` or a ``list`` of strings ++ + `copyspecs` will be expanded and/or globbed as appropriate. Specifying + a directory here will cause the plugin to attempt to collect the entire + directory, recursively. + ++ If `container` is specified, `copyspecs` may only be explicit paths, ++ not globs as currently container runtimes do not support glob expansion ++ as part of the copy operation. ++ + Note that `sizelimit` is applied to each `copyspec`, not each file + individually. For example, a copyspec of + ``['/etc/foo', '/etc/bar.conf']`` and a `sizelimit` of 25 means that +@@ -1623,28 +1632,79 @@ class Plugin(): + if isinstance(tags, str): + tags = [tags] + ++ def get_filename_tag(fname): ++ """Generate a tag to add for a single file copyspec ++ ++ This tag will be set to the filename, minus any extensions ++ except '.conf' which will be converted to '_conf' ++ """ ++ fname = fname.replace('-', '_') ++ if fname.endswith('.conf'): ++ return fname.replace('.', '_') ++ return fname.split('.')[0] ++ + for copyspec in copyspecs: + if not (copyspec and len(copyspec)): + return False + +- if self.use_sysroot(): +- copyspec = self.path_join(copyspec) +- +- files = self._expand_copy_spec(copyspec) ++ if not container: ++ if self.use_sysroot(): ++ copyspec = self.path_join(copyspec) ++ files = self._expand_copy_spec(copyspec) ++ if len(files) == 0: ++ continue ++ else: ++ files = [copyspec] + +- if len(files) == 0: +- continue ++ _spec_tags = [] ++ if len(files) == 1: ++ _spec_tags = [get_filename_tag(files[0].split('/')[-1])] + +- def get_filename_tag(fname): +- """Generate a tag to add for a single file copyspec ++ _spec_tags.extend(tags) ++ _spec_tags = list(set(_spec_tags)) + +- This tag will be set to the filename, minus any extensions +- except '.conf' which will be converted to '_conf' +- """ +- fname = fname.replace('-', '_') +- if fname.endswith('.conf'): +- return fname.replace('.', '_') +- return fname.split('.')[0] ++ if container: ++ if isinstance(container, str): ++ container = [container] ++ for con in container: ++ if not self.container_exists(con): ++ continue ++ _tail = False ++ if sizelimit: ++ # to get just the size, stat requires a literal '%s' ++ # which conflicts with python string formatting ++ cmd = "stat -c %s " + copyspec ++ ret = self.exec_cmd(cmd, container=con) ++ if ret['status'] == 0: ++ try: ++ consize = int(ret['output']) ++ if consize > sizelimit: ++ _tail = True ++ except ValueError: ++ self._log_info( ++ "unable to determine size of '%s' in " ++ "container '%s'. Skipping collection." ++ % (copyspec, con) ++ ) ++ continue ++ else: ++ self._log_debug( ++ "stat of '%s' in container '%s' failed, " ++ "skipping collection: %s" ++ % (copyspec, con, ret['output']) ++ ) ++ continue ++ self.container_copy_paths.append( ++ (con, copyspec, sizelimit, _tail, _spec_tags) ++ ) ++ self._log_info( ++ "added collection of '%s' from container '%s'" ++ % (copyspec, con) ++ ) ++ # break out of the normal flow here as container file ++ # copies are done via command execution, not raw cp/mv ++ # operations ++ continue + + # Files hould be sorted in most-recently-modified order, so that + # we collect the newest data first before reaching the limit. +@@ -1668,12 +1728,6 @@ class Plugin(): + return False + return True + +- _spec_tags = [] +- if len(files) == 1: +- _spec_tags = [get_filename_tag(files[0].split('/')[-1])] +- +- _spec_tags.extend(tags) +- + if since or maxage: + files = list(filter(lambda f: time_filter(f), files)) + +@@ -1742,13 +1796,14 @@ class Plugin(): + # should collect the whole file and stop + limit_reached = (sizelimit and current_size == sizelimit) + +- _spec_tags = list(set(_spec_tags)) +- if self.manifest: +- self.manifest.files.append({ +- 'specification': copyspec, +- 'files_copied': _manifest_files, +- 'tags': _spec_tags +- }) ++ if not container: ++ # container collection manifest additions are handled later ++ if self.manifest: ++ self.manifest.files.append({ ++ 'specification': copyspec, ++ 'files_copied': _manifest_files, ++ 'tags': _spec_tags ++ }) + + def add_blockdev_cmd(self, cmds, devices='block', timeout=None, + sizelimit=None, chroot=True, runat=None, env=None, +@@ -2460,6 +2515,30 @@ class Plugin(): + chdir=runat, binary=binary, env=env, + foreground=foreground, stderr=stderr) + ++ def _add_container_file_to_manifest(self, container, path, arcpath, tags): ++ """Adds a file collection to the manifest for a particular container ++ and file path. ++ ++ :param container: The name of the container ++ :type container: ``str`` ++ ++ :param path: The filename from the container filesystem ++ :type path: ``str`` ++ ++ :param arcpath: Where in the archive the file is written to ++ :type arcpath: ``str`` ++ ++ :param tags: Metadata tags for this collection ++ :type tags: ``str`` or ``list`` of strings ++ """ ++ if container not in self.manifest.containers: ++ self.manifest.containers[container] = {'files': [], 'commands': []} ++ self.manifest.containers[container]['files'].append({ ++ 'specification': path, ++ 'files_copied': arcpath, ++ 'tags': tags ++ }) ++ + def _get_container_runtime(self, runtime=None): + """Based on policy and request by the plugin, return a usable + ContainerRuntime if one exists +@@ -2842,6 +2921,48 @@ class Plugin(): + self._do_copy_path(path) + self.generate_copyspec_tags() + ++ def _collect_container_copy_specs(self): ++ """Copy any requested files from containers here. This is done ++ separately from normal file collection as this requires the use of ++ a container runtime. ++ ++ This method will iterate over self.container_copy_paths which is a set ++ of 5-tuples as (container, path, sizelimit, stdout, tags). ++ """ ++ if not self.container_copy_paths: ++ return ++ rt = self._get_container_runtime() ++ if not rt: ++ self._log_info("Cannot collect container based files - no runtime " ++ "is present/active.") ++ return ++ if not rt.check_can_copy(): ++ self._log_info("Loaded runtime '%s' does not support copying " ++ "files from containers. Skipping collections.") ++ return ++ for contup in self.container_copy_paths: ++ con, path, sizelimit, tailit, tags = contup ++ self._log_info("collecting '%s' from container '%s'" % (path, con)) ++ ++ arcdest = "sos_containers/%s/%s" % (con, path.lstrip('/')) ++ self.archive.check_path(arcdest, P_FILE) ++ dest = self.archive.dest_path(arcdest) ++ ++ cpcmd = rt.get_copy_command( ++ con, path, dest, sizelimit=sizelimit if tailit else None ++ ) ++ cpret = self.exec_cmd(cpcmd, timeout=10) ++ ++ if cpret['status'] == 0: ++ if tailit: ++ # current runtimes convert files sent to stdout to tar ++ # archives, with no way to control that ++ self.archive.add_string(cpret['output'], arcdest) ++ self._add_container_file_to_manifest(con, path, arcdest, tags) ++ else: ++ self._log_info("error copying '%s' from container '%s': %s" ++ % (path, con, cpret['output'])) ++ + def _collect_cmds(self): + self.collect_cmds.sort(key=lambda x: x.priority) + for soscmd in self.collect_cmds: +@@ -2875,6 +2996,7 @@ class Plugin(): + """Collect the data for a plugin.""" + start = time() + self._collect_copy_specs() ++ self._collect_container_copy_specs() + self._collect_cmds() + self._collect_strings() + fields = (self.name(), time() - start) +-- +2.27.0 + + +From 5114e164e38f6aa09d1efdd30ab5d2e9266272cc Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 20 Jan 2022 11:30:30 -0500 +Subject: [PATCH 2/2] [Plugin] Add container-based command collections in + manifest + +Following the previous commit of adding a `containers` key to a Plugin's +manifest entry, we will now make an entry in the relevant `containers` +entry for commands executed in containers. + +Additionally, we will make a symlink from the relevant `sos_containers/` +path to the collection under `sos_commands/$plugin/`. This should allow +for easier spot analysis of a container by providing a kind of "micro +sos report" for each container collections happen from under the +`sos_containers/` top level directory. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/__init__.py | 41 ++++++++++++++++++++++++++++++++-- + 1 file changed, 39 insertions(+), 2 deletions(-) + +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index 0bdc1632..2988be08 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -2019,8 +2019,10 @@ class Plugin(): + if pred is None: + pred = self.get_predicate(cmd=True) + for cmd in cmds: ++ container_cmd = None + if container: + ocmd = cmd ++ container_cmd = (ocmd, container) + cmd = self.fmt_container_cmd(container, cmd) + if not cmd: + self._log_debug("Skipping command '%s' as the requested " +@@ -1805,7 +1807,8 @@ + env=env, binary=binary, sizelimit=sizelimit, + pred=pred, subdir=subdir, tags=tags, + changes=changes, foreground=foreground, +- priority=priority, cmd_as_tag=cmd_as_tag) ++ priority=priority, cmd_as_tag=cmd_as_tag, ++ container_cmd=container_cmd) + + def add_cmd_tags(self, tagdict): + """Retroactively add tags to any commands that have been run by this +@@ -2006,7 +2006,8 @@ + stderr=True, chroot=True, runat=None, env=None, + binary=False, sizelimit=None, subdir=None, + changes=False, foreground=False, tags=[], +- priority=10, cmd_as_tag=False): ++ priority=10, cmd_as_tag=False, ++ container_cmd=False): + """Execute a command and save the output to a file for inclusion in the + report. + +@@ -2362,10 +2365,14 @@ class Plugin(): + os.path.join(self.archive.get_archive_path(), outfn) if outfn else + '' + ) ++ + if self.manifest: + manifest_cmd['filepath'] = outfn + manifest_cmd['run_time'] = run_time + self.manifest.commands.append(manifest_cmd) ++ if container_cmd: ++ self._add_container_cmd_to_manifest(manifest_cmd.copy(), ++ container_cmd) + return result + + def collect_cmd_output(self, cmd, suggest_filename=None, +@@ -2539,6 +2546,36 @@ class Plugin(): + 'tags': tags + }) + ++ def _add_container_cmd_to_manifest(self, manifest, contup): ++ """Adds a command collection to the manifest for a particular container ++ and creates a symlink to that collection from the relevant ++ sos_containers/ location ++ ++ :param manifest: The manifest entry for the command ++ :type manifest: ``dict`` ++ ++ :param contup: A tuple of (original_cmd, container_name) ++ :type contup: ``tuple`` ++ """ ++ ++ cmd, container = contup ++ if container not in self.manifest.containers: ++ self.manifest.containers[container] = {'files': [], 'commands': []} ++ manifest['exec'] = cmd ++ manifest['command'] = cmd.split(' ')[0] ++ manifest['parameters'] = cmd.split(' ')[1:] ++ ++ _cdir = "sos_containers/%s/sos_commands/%s" % (container, self.name()) ++ _outloc = "../../../../%s" % manifest['filepath'] ++ cmdfn = self._mangle_command(cmd) ++ conlnk = "%s/%s" % (_cdir, cmdfn) ++ ++ self.archive.check_path(conlnk, P_FILE) ++ os.symlink(_outloc, self.archive.dest_path(conlnk)) ++ ++ manifest['filepath'] = conlnk ++ self.manifest.containers[container]['commands'].append(manifest) ++ + def _get_container_runtime(self, runtime=None): + """Based on policy and request by the plugin, return a usable + ContainerRuntime if one exists +-- +2.27.0 + From 2ae16e0245e1b01b8547e507abb69c11871a8467 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Mon, 21 Feb 2022 14:37:09 -0500 diff --git a/SOURCES/sos-bz2071825-merged-8.6.z.patch b/SOURCES/sos-bz2071825-merged-8.6.z.patch new file mode 100644 index 0000000..ab6eff1 --- /dev/null +++ b/SOURCES/sos-bz2071825-merged-8.6.z.patch @@ -0,0 +1,1797 @@ +From cc60fa5ee25bffed9203a4f786256185b7fe0115 Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Tue, 15 Mar 2022 11:49:57 +0100 +Subject: [PATCH] Add ovs datapath and groups collection commands Add + ct-zone-list command for openshift-ovn + +Signed-off-by: Nadia Pinaeva +--- + sos/report/plugins/openshift_ovn.py | 4 ++++ + sos/report/plugins/openvswitch.py | 3 +++ + 2 files changed, 7 insertions(+) + +diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py +index 168f1dd3..b4787b8e 100644 +--- a/sos/report/plugins/openshift_ovn.py ++++ b/sos/report/plugins/openshift_ovn.py +@@ -34,6 +34,10 @@ class OpenshiftOVN(Plugin, RedHatPlugin): + 'ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ' + + 'cluster/status OVN_Southbound'], + container='ovnkube-master') ++ self.add_cmd_output([ ++ 'ovs-appctl -t /var/run/ovn/ovn-controller.*.ctl ' + ++ 'ct-zone-list'], ++ container='ovnkube-node') + self.add_cmd_output([ + 'ovs-appctl -t ovs-monitor-ipsec tunnels/show', + 'ipsec status', +diff --git a/sos/report/plugins/openvswitch.py b/sos/report/plugins/openvswitch.py +index 179d1532..159b0bd2 100644 +--- a/sos/report/plugins/openvswitch.py ++++ b/sos/report/plugins/openvswitch.py +@@ -124,6 +124,8 @@ class OpenVSwitch(Plugin): + "ovs-vsctl -t 5 list interface", + # Capture OVS detailed information from all the bridges + "ovs-vsctl -t 5 list bridge", ++ # Capture OVS datapath list ++ "ovs-vsctl -t 5 list datapath", + # Capture DPDK queue to pmd mapping + "ovs-appctl dpif-netdev/pmd-rxq-show", + # Capture DPDK pmd stats +@@ -229,6 +231,7 @@ class OpenVSwitch(Plugin): + "ovs-ofctl queue-get-config %s" % br, + "ovs-ofctl queue-stats %s" % br, + "ovs-ofctl show %s" % br, ++ "ovs-ofctl dump-groups %s" % br, + ]) + + # Flow protocols currently supported +-- +2.27.0 + +From af40be92f502b35fa9d39ce4d4fea7d80c367830 Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Tue, 15 Mar 2022 13:09:55 +0100 +Subject: [PATCH] Improve sos collect for OCP: 1. wait for sos tmp project to + be deleted (just calling delete changes project state to Terminating, and + running a new sos collect is not possible before this project is fully + deleted) 2. use --retries flag to copy sos reports from the nodes more + reliably. The flag has been recently added to kubectl, and the most reliable + way to check if it's available or not is to check command error output for + "unknown flag" substring + +Signed-off-by: Nadia Pinaeva +--- + sos/collector/clusters/ocp.py | 5 +++++ + sos/collector/transports/oc.py | 6 +++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index f1714239..9beb2f9b 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -128,6 +128,11 @@ + if not ret['status'] == 0: + self.log_error("Error deleting temporary project: %s" + % ret['output']) ++ ret = self.exec_primary_cmd("oc wait namespace/%s --for=delete " ++ "--timeout=30s" % self.project) ++ if not ret['status'] == 0: ++ self.log_error("Error waiting for temporary project to be " ++ "deleted: %s" % ret['output']) + # don't leave the config on a non-existing project + self.exec_master_cmd("oc project default") + self.project = None +diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py +index 0fc9eee8..90a802b2 100644 +--- a/sos/collector/transports/oc.py ++++ b/sos/collector/transports/oc.py +@@ -231,5 +231,9 @@ class OCTransport(RemoteTransport): + % (self.project, self.pod_name)) + + def _retrieve_file(self, fname, dest): +- cmd = self.run_oc("cp %s:%s %s" % (self.pod_name, fname, dest)) ++ # check if --retries flag is available for given version of oc ++ result = self.run_oc("cp --retries", stderr=True) ++ flags = '' if "unknown flag" in result["output"] else '--retries=5' ++ cmd = self.run_oc("cp %s %s:%s %s" ++ % (flags, self.pod_name, fname, dest)) + return cmd['status'] == 0 +-- +2.27.0 + +From 3b0676b90ff65f20eaba3062775ff72b89386ffc Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 22 Mar 2022 14:25:24 -0400 +Subject: [PATCH] [Plugin] Allow plugins to define default command environment + vars + +Adds the ability for plugins to define a default set of environment vars +to pass to all commands executed by the plugin. This may be done either +via the new `set_default_cmd_environment()` or +`add_default_cmd_environment()` methods. The former will override any +previously set values, whereas the latter will add/update/modify any +existing values. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/__init__.py | 55 ++++++++++++++++++- + .../plugin_tests/plugin_environment.py | 44 +++++++++++++++ + .../fake_plugins/default_env_test.py | 28 ++++++++++ + tests/unittests/plugin_tests.py | 15 +++++ + 4 files changed, 140 insertions(+), 2 deletions(-) + create mode 100644 tests/report_tests/plugin_tests/plugin_environment.py + create mode 100644 tests/test_data/fake_plugins/default_env_test.py + +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index 336b4d22..74b4f4be 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -571,6 +571,7 @@ class Plugin(): + self.manifest = None + self.skip_files = commons['cmdlineopts'].skip_files + self.skip_commands = commons['cmdlineopts'].skip_commands ++ self.default_environment = {} + + self.soslog = self.commons['soslog'] if 'soslog' in self.commons \ + else logging.getLogger('sos') +@@ -542,6 +542,52 @@ + self.manifest.add_list('files', []) + self.manifest.add_field('containers', {}) + ++ def set_default_cmd_environment(self, env_vars): ++ """ ++ Specify a collection of environment variables that should always be ++ passed to commands being executed by this plugin. ++ ++ :param env_vars: The environment variables and their values to set ++ :type env_vars: ``dict{ENV_VAR_NAME: ENV_VAR_VALUE}`` ++ """ ++ if not isinstance(env_vars, dict): ++ raise TypeError( ++ "Environment variables for Plugin must be specified by dict" ++ ) ++ self.default_environment = env_vars ++ self._log_debug("Default environment for all commands now set to %s" ++ % self.default_environment) ++ ++ def add_default_cmd_environment(self, env_vars): ++ """ ++ Add or modify a specific environment variable in the set of default ++ environment variables used by this Plugin. ++ ++ :param env_vars: The environment variables to add to the current ++ set of env vars in use ++ :type env_vars: ``dict`` ++ """ ++ if not isinstance(env_vars, dict): ++ raise TypeError("Environment variables must be added via dict") ++ self._log_debug("Adding %s to default environment" % env_vars) ++ self.default_environment.update(env_vars) ++ ++ def _get_cmd_environment(self, env=None): ++ """ ++ Get the merged set of environment variables for a command about to be ++ executed by this plugin. ++ ++ :returns: The set of env vars to use for a command ++ :rtype: ``dict`` ++ """ ++ if env is None: ++ return self.default_environment ++ if not isinstance(env, dict): ++ raise TypeError("Command env vars must be passed as dict") ++ _env = self.default_environment.copy() ++ _env.update(env) ++ return _env ++ + def timeout_from_options(self, optname, plugoptname, default_timeout): + """Returns either the default [plugin|cmd] timeout value, the value as + provided on the commandline via -k plugin.[|cmd-]timeout=value, or the +@@ -2258,6 +2305,8 @@ class Plugin(): + + _tags = list(set(_tags)) + ++ _env = self._get_cmd_environment(env) ++ + if chroot or self.commons['cmdlineopts'].chroot == 'always': + root = self.sysroot + else: +@@ -2068,7 +2068,7 @@ + + result = sos_get_command_output( + cmd, timeout=timeout, stderr=stderr, chroot=root, +- chdir=runat, env=env, binary=binary, sizelimit=sizelimit, ++ chdir=runat, env=_env, binary=binary, sizelimit=sizelimit, + poller=self.check_timeout, foreground=foreground + ) + +@@ -2510,6 +2559,8 @@ class Plugin(): + else: + root = None + ++ _env = self._get_cmd_environment(env) ++ + if container: + if self._get_container_runtime() is None: + self._log_info("Cannot run cmd '%s' in container %s: no " +@@ -2522,7 +2573,7 @@ class Plugin(): + "container is running." % (cmd, container)) + + return sos_get_command_output(cmd, timeout=timeout, chroot=root, +- chdir=runat, binary=binary, env=env, ++ chdir=runat, binary=binary, env=_env, + foreground=foreground, stderr=stderr) + + def _add_container_file_to_manifest(self, container, path, arcpath, tags): +diff --git a/tests/report_tests/plugin_tests/plugin_environment.py b/tests/report_tests/plugin_tests/plugin_environment.py +new file mode 100644 +index 00000000..3158437a +--- /dev/null ++++ b/tests/report_tests/plugin_tests/plugin_environment.py +@@ -0,0 +1,44 @@ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++import os ++ ++from sos_tests import StageTwoReportTest ++ ++ ++class PluginDefaultEnvironmentTest(StageTwoReportTest): ++ """ ++ Ensure that being able to set a default set of environment variables is ++ working correctly and does not leave a lingering env var on the system ++ ++ :avocado: tags=stageone ++ """ ++ ++ install_plugins = ['default_env_test'] ++ sos_cmd = '-o default_env_test' ++ ++ def test_environment_used_in_cmd(self): ++ self.assertFileHasContent( ++ 'sos_commands/default_env_test/env_var_test', ++ 'Does Linus play hockey?' ++ ) ++ ++ def test_environment_setting_logged(self): ++ self.assertSosLogContains( ++ 'Default environment for all commands now set to' ++ ) ++ ++ def test_environment_not_set_on_host(self): ++ self.assertTrue('TORVALDS' not in os.environ) ++ self.assertTrue('GREATESTSPORT' not in os.environ) ++ ++ def test_environment_not_captured(self): ++ # we should still have an empty environment file ++ self.assertFileCollected('environment') ++ self.assertFileNotHasContent('environment', 'TORVALDS') ++ self.assertFileNotHasContent('environment', 'GREATESTSPORT') +diff --git a/tests/test_data/fake_plugins/default_env_test.py b/tests/test_data/fake_plugins/default_env_test.py +new file mode 100644 +index 00000000..d1d1fb78 +--- /dev/null ++++ b/tests/test_data/fake_plugins/default_env_test.py +@@ -0,0 +1,28 @@ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++from sos.report.plugins import Plugin, IndependentPlugin ++ ++ ++class DefaultEnv(Plugin, IndependentPlugin): ++ ++ plugin_name = 'default_env_test' ++ short_desc = 'Fake plugin to test default env var handling' ++ ++ def setup(self): ++ self.set_default_cmd_environment({ ++ 'TORVALDS': 'Linus', ++ 'GREATESTSPORT': 'hockey' ++ }) ++ ++ self.add_cmd_output( ++ "sh -c 'echo Does '$TORVALDS' play '$GREATESTSPORT'?'", ++ suggest_filename='env_var_test' ++ ) ++ ++ self.add_env_var(['TORVALDS', 'GREATESTSPORT']) +diff --git a/tests/unittests/plugin_tests.py b/tests/unittests/plugin_tests.py +index 0dfa243d..e469b78e 100644 +--- a/tests/unittests/plugin_tests.py ++++ b/tests/unittests/plugin_tests.py +@@ -305,6 +305,21 @@ class PluginTests(unittest.TestCase): + p.postproc() + self.assertTrue(p.did_postproc) + ++ def test_set_default_cmd_env(self): ++ p = MockPlugin({ ++ 'sysroot': self.sysroot, ++ 'policy': LinuxPolicy(init=InitSystem(), probe_runtime=False), ++ 'cmdlineopts': MockOptions(), ++ 'devices': {} ++ }) ++ e = {'TORVALDS': 'Linus'} ++ p.set_default_cmd_environment(e) ++ self.assertEquals(p.default_environment, e) ++ add_e = {'GREATESTSPORT': 'hockey'} ++ p.add_default_cmd_environment(add_e) ++ self.assertEquals(p.default_environment['GREATESTSPORT'], 'hockey') ++ self.assertEquals(p.default_environment['TORVALDS'], 'Linus') ++ + + class AddCopySpecTests(unittest.TestCase): + +-- +2.27.0 + +From 1e12325efaa500d304dcbfbeeb50e72ed0f938f5 Mon Sep 17 00:00:00 2001 +From: Vladislav Walek <22072258+vwalek@users.noreply.github.com> +Date: Thu, 17 Mar 2022 14:10:26 -0700 +Subject: [PATCH] [openshift] Adding ability to use the localhost.kubeconfig + and KUBECONFIG env to use system:admin + +Signed-off-by: Vladislav Walek <22072258+vwalek@users.noreply.github.com> +--- + sos/report/plugins/openshift.py | 45 +++++++++++++++++++++++++++++++-- + 1 file changed, 43 insertions(+), 2 deletions(-) + +diff --git a/sos/report/plugins/openshift.py b/sos/report/plugins/openshift.py +index 5ae38178..d643f04c 100644 +--- a/sos/report/plugins/openshift.py ++++ b/sos/report/plugins/openshift.py +@@ -60,11 +60,18 @@ + profiles = ('openshift',) + packages = ('openshift-hyperkube',) + ++ master_localhost_kubeconfig = ( ++ '/etc/kubernetes/static-pod-resources/' ++ 'kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig' ++ ) ++ + option_list = [ + ('token', 'admin token to allow API queries', 'fast', None), ++ ('kubeconfig', 'Path to a locally available kubeconfig file', ++ 'fast', None), + ('host', 'host address to use for oc login, including port', 'fast', + 'https://localhost:6443'), +- ('no-oc', 'do not collect `oc` command output', 'fast', False), ++ ('no-oc', 'do not collect `oc` command output', 'fast', True), + ('podlogs', 'collect logs from each pod', 'fast', True), + ('podlogs-filter', ('limit podlogs collection to pods matching this ' + 'regex'), 'fast', ''), +@@ -73,6 +80,10 @@ class Openshift(Plugin, RedHatPlugin): + """Check to see if we can run `oc` commands""" + return self.exec_cmd('oc whoami')['status'] == 0 + ++ def _check_localhost_kubeconfig(self): ++ """Check if the localhost.kubeconfig exists with system:admin user""" ++ return self.path_exists(self.get_option('kubeconfig')) ++ + def _check_oc_logged_in(self): + """See if we're logged in to the API service, and if not attempt to do + so using provided plugin options +@@ -80,8 +91,38 @@ class Openshift(Plugin, RedHatPlugin): + if self._check_oc_function(): + return True + +- # Not logged in currently, attempt to do so ++ if self.get_option('kubeconfig') is None: ++ # If admin doesn't add the kubeconfig ++ # use default localhost.kubeconfig ++ self.set_option( ++ 'kubeconfig', ++ self.master_localhost_kubeconfig ++ ) ++ ++ # Check first if we can use the localhost.kubeconfig before ++ # using token. We don't want to use 'host' option due we use ++ # cluster url from kubeconfig. Default is localhost. ++ if self._check_localhost_kubeconfig(): ++ self.set_default_cmd_environment({ ++ 'KUBECONFIG': self.get_option('kubeconfig') ++ }) ++ ++ oc_res = self.exec_cmd( ++ "oc login -u system:admin " ++ "--insecure-skip-tls-verify=True" ++ ) ++ if oc_res['status'] == 0 and self._check_oc_function(): ++ return True ++ ++ self._log_warn( ++ "The login command failed with status: %s and error: %s" ++ % (oc_res['status'], oc_res['output']) ++ ) ++ return False ++ ++ # If kubeconfig is not defined, check if token is provided. + token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None) ++ + if token: + oc_res = self.exec_cmd("oc login %s --token=%s " + "--insecure-skip-tls-verify=True" +-- +2.27.0 + +From 3b84b4ccfa9e4924a5a3829d3810568dfb69bf63 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 18 Mar 2022 16:25:35 -0400 +Subject: [PATCH 1/2] [pacemaker] Redesign node enumeration logic + +It has been found that `pcs status` output is liable to change, which +ends up breaking our parsing of node lists when using it on newer +versions. + +Instead, first try to parse through `crm_mon` output, which is what `pcs +status` uses under the hood, but as a stable and reliable xml format. + +Failing that, for example if the `--primary` node is not functioning as +part of the cluster, source `/etc/corosync/corosync.conf` instead. + +Related: RHBZ2065805 +Related: RHBZ2065811 + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/pacemaker.py | 110 +++++++++++++++++++--------- + 1 file changed, 76 insertions(+), 34 deletions(-) + +diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py +index 55024314..49d0ce51 100644 +--- a/sos/collector/clusters/pacemaker.py ++++ b/sos/collector/clusters/pacemaker.py +@@ -8,7 +8,11 @@ + # + # See the LICENSE file in the source distribution for further information. + ++import re ++ + from sos.collector.clusters import Cluster ++from setuptools._vendor.packaging import version ++from xml.etree import ElementTree + + + class pacemaker(Cluster): +@@ -18,42 +22,80 @@ class pacemaker(Cluster): + packages = ('pacemaker',) + option_list = [ + ('online', True, 'Collect nodes listed as online'), +- ('offline', True, 'Collect nodes listed as offline') ++ ('offline', True, 'Collect nodes listed as offline'), ++ ('only-corosync', False, 'Only use corosync.conf to enumerate nodes') + ] + + def get_nodes(self): +- self.res = self.exec_primary_cmd('pcs status') +- if self.res['status'] != 0: +- self.log_error('Cluster status could not be determined. Is the ' +- 'cluster running on this node?') +- return [] +- if 'node names do not match' in self.res['output']: +- self.log_warn('Warning: node name mismatch reported. Attempts to ' +- 'connect to some nodes may fail.\n') +- return self.parse_pcs_output() +- +- def parse_pcs_output(self): +- nodes = [] +- if self.get_option('online'): +- nodes += self.get_online_nodes() +- if self.get_option('offline'): +- nodes += self.get_offline_nodes() +- return nodes +- +- def get_online_nodes(self): +- for line in self.res['output'].splitlines(): +- if line.startswith('Online:'): +- nodes = line.split('[')[1].split(']')[0] +- return [n for n in nodes.split(' ') if n] +- +- def get_offline_nodes(self): +- offline = [] +- for line in self.res['output'].splitlines(): +- if line.startswith('Node') and line.endswith('(offline)'): +- offline.append(line.split()[1].replace(':', '')) +- if line.startswith('OFFLINE:'): +- nodes = line.split('[')[1].split(']')[0] +- offline.extend([n for n in nodes.split(' ') if n]) +- return offline ++ self.nodes = [] ++ # try crm_mon first ++ try: ++ if not self.get_option('only-corosync'): ++ try: ++ self.get_nodes_from_crm() ++ except Exception as err: ++ self.log_warn("Falling back to sourcing corosync.conf. " ++ "Could not parse crm_mon output: %s" % err) ++ if not self.nodes: ++ # fallback to corosync.conf, in case the node we're inspecting ++ # is offline from the cluster ++ self.get_nodes_from_corosync() ++ except Exception as err: ++ self.log_error("Could not determine nodes from cluster: %s" % err) ++ ++ _shorts = [n for n in self.nodes if '.' not in n] ++ if _shorts: ++ self.log_warn( ++ "WARNING: Node addresses '%s' may not resolve locally if you " ++ "are not running on a node in the cluster. Try using option " ++ "'-c pacemaker.only-corosync' if these connections fail." ++ % ','.join(_shorts) ++ ) ++ return self.nodes ++ ++ def get_nodes_from_crm(self): ++ """ ++ Try to parse crm_mon output for node list and status. ++ """ ++ xmlopt = '--output-as=xml' ++ # older pacemaker had a different option for xml output ++ _ver = self.exec_primary_cmd('crm_mon --version') ++ if _ver['status'] == 0: ++ cver = _ver['output'].split()[1].split('-')[0] ++ if not version.parse(cver) > version.parse('2.0.3'): ++ xmlopt = '--as-xml' ++ else: ++ return ++ _out = self.exec_primary_cmd( ++ "crm_mon --one-shot --inactive %s" % xmlopt, ++ need_root=True ++ ) ++ if _out['status'] == 0: ++ self.parse_crm_xml(_out['output']) ++ ++ def parse_crm_xml(self, xmlstring): ++ """ ++ Parse the xml output string provided by crm_mon ++ """ ++ _xml = ElementTree.fromstring(xmlstring) ++ nodes = _xml.find('nodes') ++ for node in nodes: ++ _node = node.attrib ++ if self.get_option('online') and _node['online'] == 'true': ++ self.nodes.append(_node['name']) ++ elif self.get_option('offline') and _node['online'] == 'false': ++ self.nodes.append(_node['name']) ++ ++ def get_nodes_from_corosync(self): ++ """ ++ As a fallback measure, read corosync.conf to get the node list. Note ++ that this prevents us from separating online nodes from offline nodes. ++ """ ++ self.log_warn("WARNING: unable to distinguish online nodes from " ++ "offline nodes when sourcing from corosync.conf") ++ cc = self.primary.read_file('/etc/corosync/corosync.conf') ++ nodes = re.findall(r'((\sring0_addr:)(.*))', cc) ++ for node in nodes: ++ self.nodes.append(node[-1].strip()) + + # vim: set et ts=4 sw=4 : +-- +2.27.0 + + +From 6701a7d77ecc998b018b54ecc00f9fd102ae9518 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Mon, 21 Mar 2022 12:05:59 -0400 +Subject: [PATCH 2/2] [clusters] Allow clusters to not add localhost to node + list + +For most of our supported clusters, we end up needing to add the +local host executing `sos collect` to the node list (unless `--no-local` +is used) as that accounts for the primary node that may otherwise be +left off. However, this is not helpful for clusters that may reports +node names as something other than resolveable names. In those cases, +such as with pacemaker, adding the local hostname may result in +duplicate collections. + +Add a toggle to cluster profiles via a new `strict_node_list` class attr +that, if True, will skip this addition. This toggle is default `False` +to preserve existing behavior, and is now enabled for `pacemaker` +specifically. + +Related: RHBZ#2065821 + +Signed-off-by: Jake Hunsaker +--- + sos/collector/__init__.py | 3 ++- + sos/collector/clusters/__init__.py | 4 ++++ + sos/collector/clusters/pacemaker.py | 1 + + 3 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index a8bb0064..d898ca34 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -1073,7 +1073,8 @@ class SoSCollector(SoSComponent): + for node in self.node_list: + if host == node.split('.')[0]: + self.node_list.remove(node) +- self.node_list.append(self.hostname) ++ if not self.cluster.strict_node_list: ++ self.node_list.append(self.hostname) + self.reduce_node_list() + try: + _node_max = len(max(self.node_list, key=len)) +diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py +index f3f550ad..f00677b8 100644 +--- a/sos/collector/clusters/__init__.py ++++ b/sos/collector/clusters/__init__.py +@@ -56,6 +56,10 @@ + sos_plugin_options = {} + sos_preset = '' + cluster_name = None ++ # set this to True if the local host running collect should *not* be ++ # forcibly added to the node list. This can be helpful in situations where ++ # the host's fqdn and the name the cluster uses are different ++ strict_node_list = False + + def __init__(self, commons): + self.master = None +diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py +index 49d0ce51..bebcb265 100644 +--- a/sos/collector/clusters/pacemaker.py ++++ b/sos/collector/clusters/pacemaker.py +@@ -20,6 +20,7 @@ class pacemaker(Cluster): + cluster_name = 'Pacemaker High Availability Cluster Manager' + sos_plugins = ['pacemaker'] + packages = ('pacemaker',) ++ strict_node_list = True + option_list = [ + ('online', True, 'Collect nodes listed as online'), + ('offline', True, 'Collect nodes listed as offline'), +-- +2.27.0 + +From 61765992812afb785e9552e01e3b5579118a6963 Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Fri, 1 Apr 2022 12:05:36 +0200 +Subject: [PATCH] Add one more container for plugin enablement + +Signed-off-by: Nadia Pinaeva +--- + sos/report/plugins/openshift_ovn.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py +index b4787b8e..98522b1e 100644 +--- a/sos/report/plugins/openshift_ovn.py ++++ b/sos/report/plugins/openshift_ovn.py +@@ -16,7 +16,7 @@ class OpenshiftOVN(Plugin, RedHatPlugin): + """ + short_desc = 'Openshift OVN' + plugin_name = "openshift_ovn" +- containers = ('ovnkube-master', 'ovn-ipsec') ++ containers = ('ovnkube-master', 'ovnkube-node', 'ovn-ipsec') + profiles = ('openshift',) + + def setup(self): +-- +2.27.0 + +From d3aa071efc85507341cf65dd61414a734654f50a Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Mon, 28 Mar 2022 14:47:09 -0400 +Subject: [PATCH] [presets] Adjust OCP preset options + +Adjust the options used by the 'ocp' preset to better reflect the +current collection needs and approach. + +This includes disabling the `cgroups` plugin due to the large amount of +mostly irrelevant data captured due to the high number of containers +present on OCP nodes, ensuring the `--container-runtime` option is set +to `crio` to align container-based collections, disabling HTML report +generation and increasing the base log size rather than blindly enabling +all-logs. + +Signed-off-by: Jake Hunsaker +--- + sos/presets/redhat/__init__.py | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/sos/presets/redhat/__init__.py b/sos/presets/redhat/__init__.py +index 865c9b6b..0b9f6f11 100644 +--- a/sos/presets/redhat/__init__.py ++++ b/sos/presets/redhat/__init__.py +@@ -36,10 +36,15 @@ RHOSP_OPTS = SoSOptions(plugopts=[ + + RHOCP = "ocp" + RHOCP_DESC = "OpenShift Container Platform by Red Hat" +-RHOCP_OPTS = SoSOptions(all_logs=True, verify=True, plugopts=[ +- 'networking.timeout=600', +- 'networking.ethtool_namespaces=False', +- 'networking.namespaces=200']) ++RHOCP_OPTS = SoSOptions( ++ verify=True, skip_plugins=['cgroups'], container_runtime='crio', ++ no_report=True, log_size=100, ++ plugopts=[ ++ 'crio.timeout=600', ++ 'networking.timeout=600', ++ 'networking.ethtool_namespaces=False', ++ 'networking.namespaces=200' ++ ]) + + RH_CFME = "cfme" + RH_CFME_DESC = "Red Hat CloudForms" +-- +2.27.0 + +From f2b67ab820070063995689fed03492cdaa012d01 Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Fri, 1 Apr 2022 17:01:35 +0200 +Subject: [PATCH] Use /etc/os-release instead of /etc/redhat-release as the + most compatible way to find host release + +Signed-off-by: Nadia Pinaeva +--- + sos/policies/distros/redhat.py | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py +index 0c72a5e4..2e117f37 100644 +--- a/sos/policies/distros/redhat.py ++++ b/sos/policies/distros/redhat.py +@@ -40,7 +40,6 @@ class RedHatPolicy(LinuxPolicy): + ('Distribution Website', 'https://www.redhat.com/'), + ('Commercial Support', 'https://www.access.redhat.com/') + ] +- _redhat_release = '/etc/redhat-release' + _tmp_dir = "/var/tmp" + _in_container = False + default_scl_prefix = '/opt/rh' +@@ -471,7 +470,7 @@ support representative. + atomic = False + if ENV_HOST_SYSROOT not in os.environ: + return atomic +- host_release = os.environ[ENV_HOST_SYSROOT] + cls._redhat_release ++ host_release = os.environ[ENV_HOST_SYSROOT] + OS_RELEASE + if not os.path.exists(host_release): + return False + try: +@@ -558,7 +557,7 @@ support representative. + coreos = False + if ENV_HOST_SYSROOT not in os.environ: + return coreos +- host_release = os.environ[ENV_HOST_SYSROOT] + cls._redhat_release ++ host_release = os.environ[ENV_HOST_SYSROOT] + OS_RELEASE + try: + for line in open(host_release, 'r').read().splitlines(): + coreos |= 'Red Hat Enterprise Linux CoreOS' in line +-- +2.27.0 + +From ee0dd68199a2c9296eafe64ead5b2263c8270e4a Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Wed, 6 Apr 2022 11:56:41 +0200 +Subject: [PATCH] Use --force-pull-image option for pods created with oc. Set + --force-pull-image=True by default, can be turned off with + --force-pull-image=False + +Signed-off-by: Nadia Pinaeva +--- + man/en/sos-collect.1 | 16 +++++++++++----- + sos/collector/__init__.py | 9 +++++---- + sos/collector/transports/oc.py | 2 ++ + sos/options.py | 20 ++++++++++++++------ + 4 files changed, 32 insertions(+), 15 deletions(-) + +diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 +index 9b0a5d7b..2f60332b 100644 +--- a/man/en/sos-collect.1 ++++ b/man/en/sos-collect.1 +@@ -28,7 +28,7 @@ + [\-\-no\-local] + [\-\-master MASTER] + [\-\-image IMAGE] +- [\-\-force-pull-image] ++ [\-\-force-pull-image TOGGLE, --pull TOGGLE] + [\-\-registry-user USER] + [\-\-registry-password PASSWORD] + [\-\-registry-authfile FILE] +@@ -262,10 +262,16 @@ Specify an image to use for the temporary container created for collections on + containerized host, if you do not want to use the default image specifed by the + host's policy. Note that this should include the registry. + .TP +-\fB\-\-force-pull-image\fR +-Use this option to force the container runtime to pull the specified image (even +-if it is the policy default image) even if the image already exists on the host. +-This may be useful to update an older container image on containerized hosts. ++\fB\-\-force-pull-image TOGGLE, \-\-pull TOGGLE\fR ++When collecting an sos report from a containerized host, force the host to always ++pull the specified image, even if that image already exists on the host. ++This is useful to ensure that the latest version of that image is always in use. ++Disabling this option will use whatever version of the image is present on the node, ++and only attempt a pull if there is no copy of the image present at all. ++ ++Enable with true/on/yes or disable with false/off/no ++ ++Default: true + .TP + \fB\-\-registry-user USER\fR + Specify the username to authenticate to the registry with in order to pull the container +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index d898ca34..66c3d932 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -27,7 +27,7 @@ + from textwrap import fill + from sos.cleaner import SoSCleaner + from sos.collector.sosnode import SosNode +-from sos.options import ClusterOption ++from sos.options import ClusterOption, str_to_bool + from sos.component import SoSComponent + from sos import __version__ + +@@ -85,7 +85,7 @@ class SoSCollector(SoSComponent): + 'encrypt_pass': '', + 'group': None, + 'image': '', +- 'force_pull_image': False, ++ 'force_pull_image': True, + 'jobs': 4, + 'keywords': [], + 'keyword_file': None, +@@ -357,8 +357,9 @@ class SoSCollector(SoSComponent): + collect_grp.add_argument('--image', + help=('Specify the container image to use for' + ' containerized hosts.')) +- collect_grp.add_argument('--force-pull-image', '--pull', default=False, +- action='store_true', ++ collect_grp.add_argument('--force-pull-image', '--pull', ++ default=True, choices=(True, False), ++ type=str_to_bool, + help='Force pull the container image even if ' + 'it already exists on the host') + collect_grp.add_argument('--registry-user', default=None, +diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py +index 90a802b2..8f6aa9b4 100644 +--- a/sos/collector/transports/oc.py ++++ b/sos/collector/transports/oc.py +@@ -147,6 +147,8 @@ class OCTransport(RemoteTransport): + "tty": True + } + ], ++ "imagePullPolicy": ++ "Always" if self.opts.force_pull_image else "IfNotPresent", + "restartPolicy": "Never", + "nodeName": self.address, + "hostNetwork": True, +diff --git a/sos/options.py b/sos/options.py +index 4846a509..2d5a5135 100644 +--- a/sos/options.py ++++ b/sos/options.py +@@ -18,6 +18,16 @@ def _is_seq(val): + return val_type is list or val_type is tuple + + ++def str_to_bool(val): ++ _val = val.lower() ++ if _val in ['true', 'on', 'yes']: ++ return True ++ elif _val in ['false', 'off', 'no']: ++ return False ++ else: ++ return None ++ ++ + class SoSOptions(): + + def _merge_opt(self, opt, src, is_default): +@@ -153,15 +163,13 @@ class SoSOptions(): + if isinstance(self.arg_defaults[key], list): + return [v for v in val.split(',')] + if isinstance(self.arg_defaults[key], bool): +- _val = val.lower() +- if _val in ['true', 'on', 'yes']: +- return True +- elif _val in ['false', 'off', 'no']: +- return False +- else: ++ val = str_to_bool(val) ++ if val is None: + raise Exception( + "Value of '%s' in %s must be True or False or analagous" + % (key, conf)) ++ else: ++ return val + if isinstance(self.arg_defaults[key], int): + try: + return int(val) +-- +2.27.0 + +From ade857c82926bb7de2c45232f00a3f346db4e59a Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 3 Nov 2021 16:28:10 -0400 +Subject: [PATCH] [collect] Update docstrings for collect for help command + +Updates the docstrings for collect components for the new `help` +command, including minor formatting changes to make the help output more +human-friendly. + +This includes docstring updates and changes to transports and cluster +profiles. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/jbon.py | 13 +++++--- + sos/collector/clusters/kubernetes.py | 7 ++++- + sos/collector/clusters/ocp.py | 35 ++++++++++++++++++++- + sos/collector/clusters/ovirt.py | 27 ++++++++++++++++ + sos/collector/clusters/satellite.py | 9 +++++- + sos/collector/transports/control_persist.py | 12 +++++-- + sos/collector/transports/local.py | 7 +++-- + sos/collector/transports/oc.py | 20 ++++++++++-- + 8 files changed, 114 insertions(+), 16 deletions(-) + +diff --git a/sos/collector/clusters/jbon.py b/sos/collector/clusters/jbon.py +index 8f083ac6..219e1901 100644 +--- a/sos/collector/clusters/jbon.py ++++ b/sos/collector/clusters/jbon.py +@@ -12,11 +12,14 @@ from sos.collector.clusters import Cluster + + + class jbon(Cluster): +- '''Just a Bunch of Nodes +- +- Used when --cluster-type=none (or jbon), to avoid cluster checks, and just +- use the provided --nodes list +- ''' ++ """ ++ Used when --cluster-type=none (or jbon) to avoid cluster checks, and just ++ use the provided --nodes list. ++ ++ Using this profile will skip any and all operations that a cluster profile ++ normally performs, and will not set any plugins, plugin options, or presets ++ for the sos report generated on the nodes provided by --nodes. ++ """ + + cluster_name = 'Just a Bunch Of Nodes (no cluster)' + packages = None +diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py +index 99f788dc..04752977 100644 +--- a/sos/collector/clusters/kubernetes.py ++++ b/sos/collector/clusters/kubernetes.py +@@ -13,7 +13,12 @@ from sos.collector.clusters import Cluster + + + class kubernetes(Cluster): +- ++ """ ++ The kuberentes cluster profile is intended to be used on kubernetes ++ clusters built from the upstream/source kubernetes (k8s) project. It is ++ not intended for use with other projects or platforms that are built ontop ++ of kubernetes. ++ """ + cluster_name = 'Community Kubernetes' + packages = ('kubernetes-master',) + sos_plugins = ['kubernetes'] +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index ae93ad58..f1714239 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -16,7 +16,40 @@ from sos.utilities import is_executable + + + class ocp(Cluster): +- """OpenShift Container Platform v4""" ++ """ ++ This profile is for use with OpenShift Container Platform (v4) clusters ++ instead of the kubernetes profile. ++ ++ This profile will favor using the `oc` transport type, which means it will ++ leverage a locally installed `oc` binary. This is also how node enumeration ++ is done. To instead use SSH to connect to the nodes, use the ++ '--transport=control_persist' option. ++ ++ Thus, a functional `oc` binary for the user executing sos collect is ++ required. Functional meaning that the user can run `oc` commands with ++ clusterAdmin privileges. ++ ++ If this requires the use of a secondary configuration file, specify that ++ path with the 'kubeconfig' cluster option. ++ ++ Alternatively, provide a clusterAdmin access token either via the 'token' ++ cluster option or, preferably, the SOSOCPTOKEN environment variable. ++ ++ By default, this profile will enumerate only master nodes within the ++ cluster, and this may be changed by overriding the 'role' cluster option. ++ To collect from all nodes in the cluster regardless of role, use the form ++ -c ocp.role=''. ++ ++ Filtering nodes by a label applied to that node is also possible via the ++ label cluster option, though be aware that this is _combined_ with the role ++ option mentioned above. ++ ++ To avoid redundant collections of OCP API information (e.g. 'oc get' ++ commands), this profile will attempt to enable the openshift plugin on only ++ a single master node. If the none of the master nodes have a functional ++ 'oc' binary available, *and* the --no-local option is used, that means that ++ no API data will be collected. ++ """ + + cluster_name = 'OpenShift Container Platform v4' + packages = ('openshift-hyperkube', 'openshift-clients') +diff --git a/sos/collector/clusters/ovirt.py b/sos/collector/clusters/ovirt.py +index bd2d0c74..4587a1ca 100644 +--- a/sos/collector/clusters/ovirt.py ++++ b/sos/collector/clusters/ovirt.py +@@ -17,6 +17,33 @@ ENGINE_KEY = '/etc/pki/ovirt-engine/keys/engine_id_rsa' + + + class ovirt(Cluster): ++ """ ++ This cluster profile is for the oVirt/RHV project which provides for a ++ virtualization cluster built ontop of KVM. ++ ++ Nodes enumerated will be hypervisors within the envrionment, not virtual ++ machines running on those hypervisors. By default, ALL hypervisors within ++ the environment are returned. This may be influenced by the 'cluster' and ++ 'datacenter' cluster options, which will limit enumeration to hypervisors ++ within the specific cluster and/or datacenter. The spm-only cluster option ++ may also be used to only collect from hypervisors currently holding the ++ SPM role. ++ ++ Optionally, to only collect an archive from manager and the postgresql ++ database, use the no-hypervisors cluster option. ++ ++ By default, a second archive from the manager will be collected that is ++ just the postgresql plugin configured in such a way that a dump of the ++ manager's database that can be explored and restored to other systems will ++ be collected. ++ ++ The ovirt profile focuses on the upstream, community ovirt project. ++ ++ The rhv profile is for Red Hat customers running RHV (formerly RHEV). ++ ++ The rhhi_virt profile is for Red Hat customers running RHV in a ++ hyper-converged setup and enables gluster collections. ++ """ + + cluster_name = 'Community oVirt' + packages = ('ovirt-engine',) +diff --git a/sos/collector/clusters/satellite.py b/sos/collector/clusters/satellite.py +index 7c21e553..5e28531d 100644 +--- a/sos/collector/clusters/satellite.py ++++ b/sos/collector/clusters/satellite.py +@@ -13,7 +13,14 @@ from sos.collector.clusters import Cluster + + + class satellite(Cluster): +- """Red Hat Satellite 6""" ++ """ ++ This profile is specifically for Red Hat Satellite 6, and not earlier ++ releases of Satellite. ++ ++ While note technically a 'cluster' in the traditional sense, Satellite ++ does provide for 'capsule' nodes which is what this profile aims to ++ enumerate beyond the 'primary' Satellite system. ++ """ + + cluster_name = 'Red Hat Satellite 6' + packages = ('satellite', 'satellite-installer') +diff --git a/sos/collector/transports/control_persist.py b/sos/collector/transports/control_persist.py +index 3e848b41..ae6c7e43 100644 +--- a/sos/collector/transports/control_persist.py ++++ b/sos/collector/transports/control_persist.py +@@ -26,10 +26,18 @@ from sos.utilities import sos_get_command_output + + + class SSHControlPersist(RemoteTransport): +- """A transport for collect that leverages OpenSSH's Control Persist ++ """ ++ A transport for collect that leverages OpenSSH's ControlPersist + functionality which uses control sockets to transparently keep a connection + open to the remote host without needing to rebuild the SSH connection for +- each and every command executed on the node ++ each and every command executed on the node. ++ ++ This transport will by default assume the use of SSH keys, meaning keys ++ have already been distributed to target nodes. If this is not the case, ++ users will need to provide a password using the --password or ++ --password-per-node option, depending on if the password to connect to all ++ nodes is the same or not. Note that these options prevent the use of the ++ --batch option, as they require user input. + """ + + name = 'control_persist' +diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py +index 2996d524..dd72053c 100644 +--- a/sos/collector/transports/local.py ++++ b/sos/collector/transports/local.py +@@ -15,9 +15,10 @@ from sos.collector.transports import RemoteTransport + + + class LocalTransport(RemoteTransport): +- """A 'transport' to represent a local node. This allows us to more easily +- extend SoSNode() without having a ton of 'if local' or similar checks in +- more places than we actually need them ++ """ ++ A 'transport' to represent a local node. No remote connection is actually ++ made, and all commands set to be run by this transport are executed locally ++ without any wrappers. + """ + + name = 'local_node' +diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py +index 720dd61d..0fc9eee8 100644 +--- a/sos/collector/transports/oc.py ++++ b/sos/collector/transports/oc.py +@@ -18,15 +18,29 @@ from sos.utilities import (is_executable, sos_get_command_output, + + + class OCTransport(RemoteTransport): +- """This transport leverages the execution of commands via a locally ++ """ ++ This transport leverages the execution of commands via a locally + available and configured ``oc`` binary for OCPv4 environments. + ++ The location of the oc binary MUST be in the $PATH used by the locally ++ loaded SoS policy. Specifically this means that the binary cannot be in the ++ running user's home directory, such as ~/.local/bin. ++ + OCPv4 clusters generally discourage the use of SSH, so this transport may + be used to remove our use of SSH in favor of the environment provided + method of connecting to nodes and executing commands via debug pods. + +- Note that this approach will generate multiple debug pods over the course +- of our execution ++ The debug pod created will be a privileged pod that mounts the host's ++ filesystem internally so that sos report collections reflect the host, and ++ not the container in which it runs. ++ ++ This transport will execute within a temporary 'sos-collect-tmp' project ++ created by the OCP cluster profile. The project will be removed at the end ++ of execution. ++ ++ In the event of failures due to a misbehaving OCP API or oc binary, it is ++ recommended to fallback to the control_persist transport by manually ++ setting the --transport option. + """ + + name = 'oc' +-- +2.27.0 + +From ce289a3ae7101a898efdb84ddfd575576ba5819b Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 5 Apr 2022 11:32:11 -0400 +Subject: [PATCH] [ocp, openshift] Re-align API collection options and rename + option + +Previously, in #2888, the `openshift` plugin was extended to allow API +collections by using a default-available kubeconfig file rather than +relying on user-provided tokens. This also included flipping the default +value of the `no-oc` plugin option to `True` (meaning do not collect API +output by default). + +This worked for the plugin, but it introduced a gap in `sos collect` +whereby the cluster profile could no longer reliably enable API +collections when trying to leverage the new functionality of not +requiring a user token. + +Fix this by updating the cluster profile to align with the new +default-off approach of API collections. + +Along with this, add a toggle to the cluster profile directly to allow +users to toggle API collections on or off (default off) directly. This +is done via a new `with-api` cluster option (e.g. `-c ocp.with-api`). +Further, rename the `openshift` plugin option from `no-oc` to +`with-api`. This change not only makes the option use case far more +obvious, it will also align the use of the option to both `collect` and +`report` so that users need only be aware of a single option for either +method. + +The cluster profile also has logic to detect which plugin option, +`no-oc` or `with-api` to use based on the (RHEL) sos version installed +on the nodes being inspected by the `ocp` cluster profile. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/ocp.py | 72 +++++++++++++++++++++++++++------ + sos/report/plugins/openshift.py | 26 +++++++----- + 2 files changed, 77 insertions(+), 21 deletions(-) + +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index 9beb2f9b..e31d1903 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -30,7 +30,11 @@ class ocp(Cluster): + clusterAdmin privileges. + + If this requires the use of a secondary configuration file, specify that +- path with the 'kubeconfig' cluster option. ++ path with the 'kubeconfig' cluster option. This config file will also be ++ used on a single master node to perform API collections if the `with-api` ++ option is enabled (default disabled). If no `kubeconfig` option is given, ++ but `with-api` is enabled, the cluster profile will attempt to use a ++ well-known default kubeconfig file if it is available on the host. + + Alternatively, provide a clusterAdmin access token either via the 'token' + cluster option or, preferably, the SOSOCPTOKEN environment variable. +@@ -45,7 +49,7 @@ class ocp(Cluster): + option mentioned above. + + To avoid redundant collections of OCP API information (e.g. 'oc get' +- commands), this profile will attempt to enable the openshift plugin on only ++ commands), this profile will attempt to enable the API collections on only + a single master node. If the none of the master nodes have a functional + 'oc' binary available, *and* the --no-local option is used, that means that + no API data will be collected. +@@ -63,7 +67,8 @@ class ocp(Cluster): + ('label', '', 'Colon delimited list of labels to select nodes with'), + ('role', 'master', 'Colon delimited list of roles to filter on'), + ('kubeconfig', '', 'Path to the kubeconfig file'), +- ('token', '', 'Service account token to use for oc authorization') ++ ('token', '', 'Service account token to use for oc authorization'), ++ ('with-api', False, 'Collect OCP API data from a master node') + ] + + def fmt_oc_cmd(self, cmd): +@@ -219,13 +219,52 @@ + return False + return 'master' in self.node_dict[sosnode.address]['roles'] + ++ def _toggle_api_opt(self, node, use_api): ++ """In earlier versions of sos, the openshift plugin option that is ++ used to toggle the API collections was called `no-oc` rather than ++ `with-api`. This older plugin option had the inverse logic of the ++ current `with-api` option. ++ ++ Use this to toggle the correct plugin option given the node's sos ++ version. Note that the use of version 4.2 here is tied to the RHEL ++ release (the only usecase for this cluster profile) rather than ++ the upstream version given the backports for that downstream. ++ ++ :param node: The node being inspected for API collections ++ :type node: ``SoSNode`` ++ ++ :param use_api: Should this node enable API collections? ++ :type use_api: ``bool`` ++ """ ++ if node.check_sos_version('4.2-16'): ++ _opt = 'with-api' ++ _val = 'on' if use_api else 'off' ++ else: ++ _opt = 'no-oc' ++ _val = 'off' if use_api else 'on' ++ node.plugopts.append("openshift.%s=%s" % (_opt, _val)) ++ + def set_master_options(self, node): ++ + node.enable_plugins.append('openshift') ++ if not self.get_option('with-api'): ++ self._toggle_api_opt(node, False) ++ return + if self.api_collect_enabled: + # a primary has already been enabled for API collection, disable + # it among others +- node.plugopts.append('openshift.no-oc=on') ++ self._toggle_api_opt(node, False) + else: ++ # running in a container, so reference the /host mount point ++ master_kube = ( ++ '/host/etc/kubernetes/static-pod-resources/' ++ 'kube-apiserver-certs/secrets/node-kubeconfigs/' ++ 'localhost.kubeconfig' ++ ) ++ _optconfig = self.get_option('kubeconfig') ++ if _optconfig and not _optconfig.startswith('/host'): ++ _optconfig = '/host/' + _optconfig ++ _kubeconfig = _optconfig or master_kube + _oc_cmd = 'oc' + if node.host.containerized: + _oc_cmd = '/host/bin/oc' +@@ -244,17 +288,21 @@ class ocp(Cluster): + need_root=True) + if can_oc['status'] == 0: + # the primary node can already access the API ++ self._toggle_api_opt(node, True) + self.api_collect_enabled = True + elif self.token: + node.sos_env_vars['SOSOCPTOKEN'] = self.token ++ self._toggle_api_opt(node, True) ++ self.api_collect_enabled = True ++ elif node.file_exists(_kubeconfig): ++ # if the file exists, then the openshift sos plugin will use it ++ # if the with-api option is turned on ++ if not _kubeconfig == master_kube: ++ node.plugopts.append( ++ "openshift.kubeconfig=%s" % _kubeconfig ++ ) ++ self._toggle_api_opt(node, True) + self.api_collect_enabled = True +- elif self.get_option('kubeconfig'): +- kc = self.get_option('kubeconfig') +- if node.file_exists(kc): +- if node.host.containerized: +- kc = "/host/%s" % kc +- node.sos_env_vars['KUBECONFIG'] = kc +- self.api_collect_enabled = True + if self.api_collect_enabled: + msg = ("API collections will be performed on %s\nNote: API " + "collections may extend runtime by 10s of minutes\n" +@@ -264,6 +312,6 @@ class ocp(Cluster): + + def set_node_options(self, node): + # don't attempt OC API collections on non-primary nodes +- node.plugopts.append('openshift.no-oc=on') ++ self._toggle_api_opt(node, False) + + # vim: set et ts=4 sw=4 : +diff --git a/sos/report/plugins/openshift.py b/sos/report/plugins/openshift.py +index d643f04c..a41ab62b 100644 +--- a/sos/report/plugins/openshift.py ++++ b/sos/report/plugins/openshift.py +@@ -19,7 +19,10 @@ class Openshift(Plugin, RedHatPlugin): + further extending the kubernetes plugin (or the OCP 3.x extensions included + in the Red Hat version of the kube plugin). + +- By default, this plugin will collect cluster information and inspect the ++ This plugin may collect OCP API information when the `with-api` option is ++ enabled. This option is disabled by default. ++ ++ When enabled, this plugin will collect cluster information and inspect the + default namespaces/projects that are created during deployment - i.e. the + namespaces of the cluster projects matching openshift.* and kube.*. At the + time of this plugin's creation that number of default projects is already +@@ -34,16 +37,20 @@ class Openshift(Plugin, RedHatPlugin): + + Users will need to either: + +- 1) Provide the bearer token via the `-k openshift.token` option +- 2) Provide the bearer token via the `SOSOCPTOKEN` environment variable +- 3) Otherwise ensure that the root user can successfully run `oc` and ++ 1) Accept the use of a well-known stock kubeconfig file provided via a ++ static pod resource for the kube-apiserver ++ 2) Provide the bearer token via the `-k openshift.token` option ++ 3) Provide the bearer token via the `SOSOCPTOKEN` environment variable ++ 4) Otherwise ensure that the root user can successfully run `oc` and + get proper output prior to running this plugin + + +- It is highly suggested that option #2 be used first, as this will prevent +- the token from being recorded in output saved to the archive. Option #1 may ++ It is highly suggested that option #1 be used first, as this uses well ++ known configurations and requires the least information from the user. If ++ using a token, it is recommended to use option #3 as this will prevent ++ the token from being recorded in output saved to the archive. Option #2 may + be used if this is considered an acceptable risk. It is not recommended to +- rely on option #3, though it will provide the functionality needed. ++ rely on option #4, though it will provide the functionality needed. + """ + + short_desc = 'Openshift Container Platform 4.x' +@@ -71,6 +71,7 @@ + 'fast', None), + ('host', 'host address to use for oc login, including port', 'fast', + 'https://localhost:6443'), ++ ('with-api', 'collect output from the OCP API', 'fast', False), + ('no-oc', 'do not collect `oc` command output', 'fast', True), + ('podlogs', 'collect logs from each pod', 'fast', True), + ('podlogs-filter', ('limit podlogs collection to pods matching this ' +@@ -212,7 +220,7 @@ class Openshift(Plugin, RedHatPlugin): + self.add_copy_spec('/etc/kubernetes/*') + + # see if we run `oc` commands +- if not self.get_option('no-oc'): ++ if self.get_option('with-api'): + can_run_oc = self._check_oc_logged_in() + else: + can_run_oc = False +-- +2.27.0 + +From 2b2ad04884cfdda78c02a33a73603d249c0a4dd5 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 19 Oct 2021 11:51:56 -0400 +Subject: [PATCH 1/2] [Plugin] Allow writing command output directly to disk + +This commit addresses a long standing ask in sos, regarding resource +consumption when sos is run with `--all-logs`. + +For executions where `--all-logs` is used, or for specific commands +where `sizelimit` has been set to 0, `add_cmd_output()` and +`add_journal()` will now instruct `sos_get_command_output()` to write +output directly to a file rather than saving the output in memory. + +When this occurs, the `output` key in the returned dict will be empty. + +Note that this does extend to `collect_cmd_output()` or `exec_cmd()`. + +Resolves: #1506 + +Signed-off-by: Jake Hunsaker +--- + sos/archive.py | 14 +++---- + sos/report/__init__.py | 10 ++++- + sos/report/plugins/__init__.py | 70 +++++++++++++++++++++++----------- + sos/utilities.py | 69 ++++++++++++++++++++++++++++----- + 4 files changed, 123 insertions(+), 40 deletions(-) + +diff --git a/sos/archive.py b/sos/archive.py +index e3c68b77..e92d5d60 100644 +--- a/sos/archive.py ++++ b/sos/archive.py +@@ -251,7 +251,7 @@ class FileCacheArchive(Archive): + + return dest + +- def _check_path(self, src, path_type, dest=None, force=False): ++ def check_path(self, src, path_type, dest=None, force=False): + """Check a new destination path in the archive. + + Since it is possible for multiple plugins to collect the same +@@ -345,7 +345,7 @@ class FileCacheArchive(Archive): + if not dest: + dest = src + +- dest = self._check_path(dest, P_FILE) ++ dest = self.check_path(dest, P_FILE) + if not dest: + return + +@@ -384,7 +384,7 @@ class FileCacheArchive(Archive): + # over any exixting content in the archive, since it is used by + # the Plugin postprocessing hooks to perform regex substitution + # on file content. +- dest = self._check_path(dest, P_FILE, force=True) ++ dest = self.check_path(dest, P_FILE, force=True) + + f = codecs.open(dest, mode, encoding='utf-8') + if isinstance(content, bytes): +@@ -397,7 +397,7 @@ class FileCacheArchive(Archive): + + def add_binary(self, content, dest): + with self._path_lock: +- dest = self._check_path(dest, P_FILE) ++ dest = self.check_path(dest, P_FILE) + if not dest: + return + +@@ -409,7 +409,7 @@ class FileCacheArchive(Archive): + def add_link(self, source, link_name): + self.log_debug("adding symlink at '%s' -> '%s'" % (link_name, source)) + with self._path_lock: +- dest = self._check_path(link_name, P_LINK) ++ dest = self.check_path(link_name, P_LINK) + if not dest: + return + +@@ -484,10 +484,10 @@ class FileCacheArchive(Archive): + """ + # Establish path structure + with self._path_lock: +- self._check_path(path, P_DIR) ++ self.check_path(path, P_DIR) + + def add_node(self, path, mode, device): +- dest = self._check_path(path, P_NODE) ++ dest = self.check_path(path, P_NODE) + if not dest: + return + +diff --git a/sos/report/__init__.py b/sos/report/__init__.py +index 249ff119..46c7f219 100644 +--- a/sos/report/__init__.py ++++ b/sos/report/__init__.py +@@ -476,7 +476,8 @@ class SoSReport(SoSComponent): + 'verbosity': self.opts.verbosity, + 'cmdlineopts': self.opts, + 'devices': self.devices, +- 'namespaces': self.namespaces ++ 'namespaces': self.namespaces, ++ 'tempfile_util': self.tempfile_util + } + + def get_temp_file(self): +@@ -1075,7 +1076,12 @@ class SoSReport(SoSComponent): + _plug.manifest.add_field('end_time', end) + _plug.manifest.add_field('run_time', end - start) + except TimeoutError: +- self.ui_log.error("\n Plugin %s timed out\n" % plugin[1]) ++ msg = "Plugin %s timed out" % plugin[1] ++ # log to ui_log.error to show the user, log to soslog.info ++ # so that someone investigating the sos execution has it all ++ # in one place, but without double notifying the user. ++ self.ui_log.error("\n %s\n" % msg) ++ self.soslog.info(msg) + self.running_plugs.remove(plugin[1]) + self.loaded_plugins[plugin[0]-1][1].set_timeout_hit() + pool.shutdown(wait=True) +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index 3ff7c191..38529a9d 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -15,6 +15,7 @@ from sos.utilities import (sos_get_command_output, import_module, grep, + path_exists, path_isdir, path_isfile, path_islink, + listdir, path_join) + ++from sos.archive import P_FILE + import os + import glob + import re +@@ -1686,6 +1687,8 @@ class Plugin(): + kwargs['priority'] = 10 + if 'changes' not in kwargs: + kwargs['changes'] = False ++ if self.get_option('all_logs') or kwargs['sizelimit'] == 0: ++ kwargs['to_file'] = True + soscmd = SoSCommand(**kwargs) + self._log_debug("packed command: " + soscmd.__str__()) + for _skip_cmd in self.skip_commands: +@@ -1707,7 +1710,8 @@ class Plugin(): + chroot=True, runat=None, env=None, binary=False, + sizelimit=None, pred=None, subdir=None, + changes=False, foreground=False, tags=[], +- priority=10, cmd_as_tag=False, container=None): ++ priority=10, cmd_as_tag=False, container=None, ++ to_file=False): + """Run a program or a list of programs and collect the output + + Output will be limited to `sizelimit`, collecting the last X amount +@@ -1776,6 +1780,10 @@ class Plugin(): + :param container: Run the specified `cmds` inside a container with this + ID or name + :type container: ``str`` ++ ++ :param to_file: Should command output be written directly to a new ++ file rather than stored in memory? ++ :type to_file: ``bool`` + """ + if isinstance(cmds, str): + cmds = [cmds] +@@ -1863,7 +1863,7 @@ + pred=pred, subdir=subdir, tags=tags, + changes=changes, foreground=foreground, + priority=priority, cmd_as_tag=cmd_as_tag, +- container_cmd=container_cmd) ++ to_file=to_file, container_cmd=container_cmd) + + def add_cmd_tags(self, tagdict): + """Retroactively add tags to any commands that have been run by this +@@ -2014,7 +2014,7 @@ + stderr=True, chroot=True, runat=None, env=None, + binary=False, sizelimit=None, subdir=None, + changes=False, foreground=False, tags=[], +- priority=10, cmd_as_tag=False, ++ priority=10, cmd_as_tag=False, to_file=False, + container_cmd=False): + """Execute a command and save the output to a file for inclusion in the + report. +@@ -2041,6 +2041,8 @@ + TTY + :param tags: Add tags in the archive manifest + :param cmd_as_tag: Format command string to tag ++ :param to_file: Write output directly to file instead ++ of saving in memory + + :returns: dict containing status, output, and filename in the + archive for the executed cmd +@@ -2074,27 +2074,46 @@ + else: + root = None + ++ if suggest_filename: ++ outfn = self._make_command_filename(suggest_filename, subdir) ++ else: ++ outfn = self._make_command_filename(cmd, subdir) ++ ++ outfn_strip = outfn[len(self.commons['cmddir'])+1:] ++ ++ if to_file: ++ self._log_debug("collecting '%s' output directly to disk" ++ % cmd) ++ self.archive.check_path(outfn, P_FILE) ++ out_file = os.path.join(self.archive.get_archive_path(), outfn) ++ else: ++ out_file = False ++ + start = time() + + result = sos_get_command_output( + cmd, timeout=timeout, stderr=stderr, chroot=root, + chdir=runat, env=_env, binary=binary, sizelimit=sizelimit, +- poller=self.check_timeout, foreground=foreground ++ poller=self.check_timeout, foreground=foreground, ++ to_file=out_file + ) + + end = time() + run_time = end - start + + if result['status'] == 124: +- self._log_warn( +- "command '%s' timed out after %ds" % (cmd, timeout) +- ) ++ warn = "command '%s' timed out after %ds" % (cmd, timeout) ++ self._log_warn(warn) ++ if to_file: ++ msg = (" - output up until the timeout may be available at " ++ "%s" % outfn) ++ self._log_debug("%s%s" % (warn, msg)) + + manifest_cmd = { + 'command': cmd.split(' ')[0], + 'parameters': cmd.split(' ')[1:], + 'exec': cmd, +- 'filepath': None, ++ 'filepath': outfn if to_file else None, + 'truncated': result['truncated'], + 'return_code': result['status'], + 'priority': priority, +@@ -2060,7 +2090,7 @@ class Plugin(): + result = sos_get_command_output( + cmd, timeout=timeout, chroot=False, chdir=runat, + env=env, binary=binary, sizelimit=sizelimit, +- poller=self.check_timeout ++ poller=self.check_timeout, to_file=out_file + ) + run_time = time() - start + self._log_debug("could not run '%s': command not found" % cmd) +@@ -2077,22 +2107,15 @@ class Plugin(): + if result['truncated']: + self._log_info("collected output of '%s' was truncated" + % cmd.split()[0]) +- +- if suggest_filename: +- outfn = self._make_command_filename(suggest_filename, subdir) +- else: +- outfn = self._make_command_filename(cmd, subdir) +- +- outfn_strip = outfn[len(self.commons['cmddir'])+1:] +- +- if result['truncated']: + linkfn = outfn + outfn = outfn.replace('sos_commands', 'sos_strings') + '.tailed' + +- if binary: +- self.archive.add_binary(result['output'], outfn) +- else: +- self.archive.add_string(result['output'], outfn) ++ if not to_file: ++ if binary: ++ self.archive.add_binary(result['output'], outfn) ++ else: ++ self.archive.add_string(result['output'], outfn) ++ + if result['truncated']: + # we need to manually build the relative path from the paths that + # exist within the build dir to properly drop these symlinks +@@ -2543,6 +2566,9 @@ class Plugin(): + all_logs = self.get_option("all_logs") + log_size = sizelimit or self.get_option("log_size") + log_size = max(log_size, journal_size) if not all_logs else 0 ++ if sizelimit == 0: ++ # allow for specific sizelimit overrides in plugins ++ log_size = 0 + + if isinstance(units, str): + units = [units] +diff --git a/sos/utilities.py b/sos/utilities.py +index 70d5c0ab..f422d7a0 100644 +--- a/sos/utilities.py ++++ b/sos/utilities.py +@@ -110,7 +110,8 @@ def is_executable(command, sysroot=None): + + def sos_get_command_output(command, timeout=TIMEOUT_DEFAULT, stderr=False, + chroot=None, chdir=None, env=None, foreground=False, +- binary=False, sizelimit=None, poller=None): ++ binary=False, sizelimit=None, poller=None, ++ to_file=False): + """Execute a command and return a dictionary of status and output, + optionally changing root or current working directory before + executing command. +@@ -124,6 +125,12 @@ def sos_get_command_output(command, timeout=TIMEOUT_DEFAULT, stderr=False, + if (chdir): + os.chdir(chdir) + ++ def _check_poller(proc): ++ if poller(): ++ proc.terminate() ++ raise SoSTimeoutError ++ time.sleep(0.01) ++ + cmd_env = os.environ.copy() + # ensure consistent locale for collected command output + cmd_env['LC_ALL'] = 'C' +@@ -158,24 +158,46 @@ + expanded_args.extend(expanded_arg) + else: + expanded_args.append(arg) ++ if to_file: ++ _output = open(to_file, 'w') ++ else: ++ _output = PIPE + try: +- p = Popen(expanded_args, shell=False, stdout=PIPE, ++ p = Popen(expanded_args, shell=False, stdout=_output, + stderr=STDOUT if stderr else PIPE, + bufsize=-1, env=cmd_env, close_fds=True, + preexec_fn=_child_prep_fn) + +- reader = AsyncReader(p.stdout, sizelimit, binary) ++ if not to_file: ++ reader = AsyncReader(p.stdout, sizelimit, binary) ++ else: ++ reader = FakeReader(p, binary) ++ + if poller: + while reader.running: +- if poller(): +- p.terminate() +- raise SoSTimeoutError +- time.sleep(0.01) +- stdout = reader.get_contents() +- truncated = reader.is_full ++ _check_poller(p) ++ else: ++ try: ++ # override timeout=0 to timeout=None, as Popen will treat the ++ # former as a literal 0-second timeout ++ p.wait(timeout if timeout else None) ++ except Exception: ++ p.terminate() ++ _output.close() ++ # until we separate timeouts from the `timeout` command ++ # handle per-cmd timeouts via Plugin status checks ++ return {'status': 124, 'output': reader.get_contents(), ++ 'truncated': reader.is_full} ++ if to_file: ++ _output.close() ++ ++ # wait for Popen to set the returncode + while p.poll() is None: + pass + ++ stdout = reader.get_contents() ++ truncated = reader.is_full ++ + except OSError as e: + if e.errno == errno.ENOENT: + return {'status': 127, 'output': "", 'truncated': ''} +@@ -256,6 +285,28 @@ def path_join(path, *p, sysroot=os.sep): + return os.path.join(path, *p) + + ++class FakeReader(): ++ """Used as a replacement AsyncReader for when we are writing directly to ++ disk, and allows us to keep more simplified flows for executing, ++ monitoring, and collecting command output. ++ """ ++ ++ def __init__(self, process, binary): ++ self.process = process ++ self.binary = binary ++ ++ @property ++ def is_full(self): ++ return False ++ ++ def get_contents(self): ++ return '' if not self.binary else b'' ++ ++ @property ++ def running(self): ++ return self.process.poll() is None ++ ++ + class AsyncReader(threading.Thread): + """Used to limit command output to a given size without deadlocking + sos. +-- +2.27.0 + diff --git a/SPECS/sos.spec b/SPECS/sos.spec index e7c9ca6..d0b093e 100644 --- a/SPECS/sos.spec +++ b/SPECS/sos.spec @@ -5,7 +5,7 @@ Summary: A set of tools to gather troubleshooting information from a system Name: sos Version: 4.2 -Release: 15%{?dist} +Release: 19%{?dist} Group: Applications/System Source0: https://github.com/sosreport/sos/archive/%{version}/sos-%{version}.tar.gz Source1: sos-audit-%{auditversion}.tgz @@ -44,6 +44,7 @@ Patch20: sos-bz2041488-virsh-in-foreground.patch Patch21: sos-bz2042966-ovn-proper-package-enablement.patch Patch22: sos-bz2054882-plugopt-logging-effective-opts.patch Patch23: sos-bz2055547-honour-plugins-timeout-hardcoded.patch +Patch24: sos-bz2071825-merged-8.6.z.patch %description Sos is a set of tools that gathers information about system @@ -77,6 +78,7 @@ support technicians and developers. %patch21 -p1 %patch22 -p1 %patch23 -p1 +%patch24 -p1 %build %py3_build @@ -143,6 +145,33 @@ of the system. Currently storage and filesystem commands are audited. %ghost /etc/audit/rules.d/40-sos-storage.rules %changelog +* Mon May 09 2022 Jan Jansky = 4.2-19 +- OCP backport + Resolves: bz2071824 +- [pacemaker] Update collect cluster profile for pacemaker + Resolves: bz2071695 +- [Plugin] oom excessive memory usage + Resolves: bz2071825 + +* Fri Apr 22 2022 Jan Jansky = 4.2-18 +- OCP backport + Resolves: bz2071824 +- [pacemaker] Update collect cluster profile for pacemaker + Resolves: bz2071695 +- [Plugin] oom excessive memory usage + Resolves: bz2071825 + +* Wed Apr 20 2022 Jan Jansky = 4.2-17 +- increased release version + +* Wed Apr 13 2022 Jan Jansky = 4.2-16 +- OCP backport + Resolves: bz2071824 +- [pacemaker] Update collect cluster profile for pacemaker + Resolves: bz2071695 +- [Plugin] oom excessive memory usage + Resolves: bz2071825 + * Wed Feb 23 2022 Pavel Moravec = 4.2-15 - [sosnode] Handle downstream versioning for runtime option Resolves: bz2036697