1798 lines
72 KiB
Diff
1798 lines
72 KiB
Diff
From cc60fa5ee25bffed9203a4f786256185b7fe0115 Mon Sep 17 00:00:00 2001
|
|
From: Nadia Pinaeva <npinaeva@redhat.com>
|
|
Date: Tue, 15 Mar 2022 11:49:57 +0100
|
|
Subject: [PATCH] Add ovs datapath and groups collection commands Add
|
|
ct-zone-list command for openshift-ovn
|
|
|
|
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
|
|
---
|
|
sos/report/plugins/openshift_ovn.py | 4 ++++
|
|
sos/report/plugins/openvswitch.py | 3 +++
|
|
2 files changed, 7 insertions(+)
|
|
|
|
diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py
|
|
index 168f1dd3..b4787b8e 100644
|
|
--- a/sos/report/plugins/openshift_ovn.py
|
|
+++ b/sos/report/plugins/openshift_ovn.py
|
|
@@ -34,6 +34,10 @@ class OpenshiftOVN(Plugin, RedHatPlugin):
|
|
'ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ' +
|
|
'cluster/status OVN_Southbound'],
|
|
container='ovnkube-master')
|
|
+ self.add_cmd_output([
|
|
+ 'ovs-appctl -t /var/run/ovn/ovn-controller.*.ctl ' +
|
|
+ 'ct-zone-list'],
|
|
+ container='ovnkube-node')
|
|
self.add_cmd_output([
|
|
'ovs-appctl -t ovs-monitor-ipsec tunnels/show',
|
|
'ipsec status',
|
|
diff --git a/sos/report/plugins/openvswitch.py b/sos/report/plugins/openvswitch.py
|
|
index 179d1532..159b0bd2 100644
|
|
--- a/sos/report/plugins/openvswitch.py
|
|
+++ b/sos/report/plugins/openvswitch.py
|
|
@@ -124,6 +124,8 @@ class OpenVSwitch(Plugin):
|
|
"ovs-vsctl -t 5 list interface",
|
|
# Capture OVS detailed information from all the bridges
|
|
"ovs-vsctl -t 5 list bridge",
|
|
+ # Capture OVS datapath list
|
|
+ "ovs-vsctl -t 5 list datapath",
|
|
# Capture DPDK queue to pmd mapping
|
|
"ovs-appctl dpif-netdev/pmd-rxq-show",
|
|
# Capture DPDK pmd stats
|
|
@@ -229,6 +231,7 @@ class OpenVSwitch(Plugin):
|
|
"ovs-ofctl queue-get-config %s" % br,
|
|
"ovs-ofctl queue-stats %s" % br,
|
|
"ovs-ofctl show %s" % br,
|
|
+ "ovs-ofctl dump-groups %s" % br,
|
|
])
|
|
|
|
# Flow protocols currently supported
|
|
--
|
|
2.27.0
|
|
|
|
From af40be92f502b35fa9d39ce4d4fea7d80c367830 Mon Sep 17 00:00:00 2001
|
|
From: Nadia Pinaeva <npinaeva@redhat.com>
|
|
Date: Tue, 15 Mar 2022 13:09:55 +0100
|
|
Subject: [PATCH] Improve sos collect for OCP: 1. wait for sos tmp project to
|
|
be deleted (just calling delete changes project state to Terminating, and
|
|
running a new sos collect is not possible before this project is fully
|
|
deleted) 2. use --retries flag to copy sos reports from the nodes more
|
|
reliably. The flag has been recently added to kubectl, and the most reliable
|
|
way to check if it's available or not is to check command error output for
|
|
"unknown flag" substring
|
|
|
|
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
|
|
---
|
|
sos/collector/clusters/ocp.py | 5 +++++
|
|
sos/collector/transports/oc.py | 6 +++++-
|
|
2 files changed, 10 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
|
|
index f1714239..9beb2f9b 100644
|
|
--- a/sos/collector/clusters/ocp.py
|
|
+++ b/sos/collector/clusters/ocp.py
|
|
@@ -128,6 +128,11 @@
|
|
if not ret['status'] == 0:
|
|
self.log_error("Error deleting temporary project: %s"
|
|
% ret['output'])
|
|
+ ret = self.exec_primary_cmd("oc wait namespace/%s --for=delete "
|
|
+ "--timeout=30s" % self.project)
|
|
+ if not ret['status'] == 0:
|
|
+ self.log_error("Error waiting for temporary project to be "
|
|
+ "deleted: %s" % ret['output'])
|
|
# don't leave the config on a non-existing project
|
|
self.exec_master_cmd("oc project default")
|
|
self.project = None
|
|
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
|
|
index 0fc9eee8..90a802b2 100644
|
|
--- a/sos/collector/transports/oc.py
|
|
+++ b/sos/collector/transports/oc.py
|
|
@@ -231,5 +231,9 @@ class OCTransport(RemoteTransport):
|
|
% (self.project, self.pod_name))
|
|
|
|
def _retrieve_file(self, fname, dest):
|
|
- cmd = self.run_oc("cp %s:%s %s" % (self.pod_name, fname, dest))
|
|
+ # check if --retries flag is available for given version of oc
|
|
+ result = self.run_oc("cp --retries", stderr=True)
|
|
+ flags = '' if "unknown flag" in result["output"] else '--retries=5'
|
|
+ cmd = self.run_oc("cp %s %s:%s %s"
|
|
+ % (flags, self.pod_name, fname, dest))
|
|
return cmd['status'] == 0
|
|
--
|
|
2.27.0
|
|
|
|
From 3b0676b90ff65f20eaba3062775ff72b89386ffc Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Tue, 22 Mar 2022 14:25:24 -0400
|
|
Subject: [PATCH] [Plugin] Allow plugins to define default command environment
|
|
vars
|
|
|
|
Adds the ability for plugins to define a default set of environment vars
|
|
to pass to all commands executed by the plugin. This may be done either
|
|
via the new `set_default_cmd_environment()` or
|
|
`add_default_cmd_environment()` methods. The former will override any
|
|
previously set values, whereas the latter will add/update/modify any
|
|
existing values.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/report/plugins/__init__.py | 55 ++++++++++++++++++-
|
|
.../plugin_tests/plugin_environment.py | 44 +++++++++++++++
|
|
.../fake_plugins/default_env_test.py | 28 ++++++++++
|
|
tests/unittests/plugin_tests.py | 15 +++++
|
|
4 files changed, 140 insertions(+), 2 deletions(-)
|
|
create mode 100644 tests/report_tests/plugin_tests/plugin_environment.py
|
|
create mode 100644 tests/test_data/fake_plugins/default_env_test.py
|
|
|
|
diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
|
|
index 336b4d22..74b4f4be 100644
|
|
--- a/sos/report/plugins/__init__.py
|
|
+++ b/sos/report/plugins/__init__.py
|
|
@@ -571,6 +571,7 @@ class Plugin():
|
|
self.manifest = None
|
|
self.skip_files = commons['cmdlineopts'].skip_files
|
|
self.skip_commands = commons['cmdlineopts'].skip_commands
|
|
+ self.default_environment = {}
|
|
|
|
self.soslog = self.commons['soslog'] if 'soslog' in self.commons \
|
|
else logging.getLogger('sos')
|
|
@@ -542,6 +542,52 @@
|
|
self.manifest.add_list('files', [])
|
|
self.manifest.add_field('containers', {})
|
|
|
|
+ def set_default_cmd_environment(self, env_vars):
|
|
+ """
|
|
+ Specify a collection of environment variables that should always be
|
|
+ passed to commands being executed by this plugin.
|
|
+
|
|
+ :param env_vars: The environment variables and their values to set
|
|
+ :type env_vars: ``dict{ENV_VAR_NAME: ENV_VAR_VALUE}``
|
|
+ """
|
|
+ if not isinstance(env_vars, dict):
|
|
+ raise TypeError(
|
|
+ "Environment variables for Plugin must be specified by dict"
|
|
+ )
|
|
+ self.default_environment = env_vars
|
|
+ self._log_debug("Default environment for all commands now set to %s"
|
|
+ % self.default_environment)
|
|
+
|
|
+ def add_default_cmd_environment(self, env_vars):
|
|
+ """
|
|
+ Add or modify a specific environment variable in the set of default
|
|
+ environment variables used by this Plugin.
|
|
+
|
|
+ :param env_vars: The environment variables to add to the current
|
|
+ set of env vars in use
|
|
+ :type env_vars: ``dict``
|
|
+ """
|
|
+ if not isinstance(env_vars, dict):
|
|
+ raise TypeError("Environment variables must be added via dict")
|
|
+ self._log_debug("Adding %s to default environment" % env_vars)
|
|
+ self.default_environment.update(env_vars)
|
|
+
|
|
+ def _get_cmd_environment(self, env=None):
|
|
+ """
|
|
+ Get the merged set of environment variables for a command about to be
|
|
+ executed by this plugin.
|
|
+
|
|
+ :returns: The set of env vars to use for a command
|
|
+ :rtype: ``dict``
|
|
+ """
|
|
+ if env is None:
|
|
+ return self.default_environment
|
|
+ if not isinstance(env, dict):
|
|
+ raise TypeError("Command env vars must be passed as dict")
|
|
+ _env = self.default_environment.copy()
|
|
+ _env.update(env)
|
|
+ return _env
|
|
+
|
|
def timeout_from_options(self, optname, plugoptname, default_timeout):
|
|
"""Returns either the default [plugin|cmd] timeout value, the value as
|
|
provided on the commandline via -k plugin.[|cmd-]timeout=value, or the
|
|
@@ -2258,6 +2305,8 @@ class Plugin():
|
|
|
|
_tags = list(set(_tags))
|
|
|
|
+ _env = self._get_cmd_environment(env)
|
|
+
|
|
if chroot or self.commons['cmdlineopts'].chroot == 'always':
|
|
root = self.sysroot
|
|
else:
|
|
@@ -2068,7 +2068,7 @@
|
|
|
|
result = sos_get_command_output(
|
|
cmd, timeout=timeout, stderr=stderr, chroot=root,
|
|
- chdir=runat, env=env, binary=binary, sizelimit=sizelimit,
|
|
+ chdir=runat, env=_env, binary=binary, sizelimit=sizelimit,
|
|
poller=self.check_timeout, foreground=foreground
|
|
)
|
|
|
|
@@ -2510,6 +2559,8 @@ class Plugin():
|
|
else:
|
|
root = None
|
|
|
|
+ _env = self._get_cmd_environment(env)
|
|
+
|
|
if container:
|
|
if self._get_container_runtime() is None:
|
|
self._log_info("Cannot run cmd '%s' in container %s: no "
|
|
@@ -2522,7 +2573,7 @@ class Plugin():
|
|
"container is running." % (cmd, container))
|
|
|
|
return sos_get_command_output(cmd, timeout=timeout, chroot=root,
|
|
- chdir=runat, binary=binary, env=env,
|
|
+ chdir=runat, binary=binary, env=_env,
|
|
foreground=foreground, stderr=stderr)
|
|
|
|
def _add_container_file_to_manifest(self, container, path, arcpath, tags):
|
|
diff --git a/tests/report_tests/plugin_tests/plugin_environment.py b/tests/report_tests/plugin_tests/plugin_environment.py
|
|
new file mode 100644
|
|
index 00000000..3158437a
|
|
--- /dev/null
|
|
+++ b/tests/report_tests/plugin_tests/plugin_environment.py
|
|
@@ -0,0 +1,44 @@
|
|
+# This file is part of the sos project: https://github.com/sosreport/sos
|
|
+#
|
|
+# This copyrighted material is made available to anyone wishing to use,
|
|
+# modify, copy, or redistribute it subject to the terms and conditions of
|
|
+# version 2 of the GNU General Public License.
|
|
+#
|
|
+# See the LICENSE file in the source distribution for further information.
|
|
+
|
|
+import os
|
|
+
|
|
+from sos_tests import StageTwoReportTest
|
|
+
|
|
+
|
|
+class PluginDefaultEnvironmentTest(StageTwoReportTest):
|
|
+ """
|
|
+ Ensure that being able to set a default set of environment variables is
|
|
+ working correctly and does not leave a lingering env var on the system
|
|
+
|
|
+ :avocado: tags=stageone
|
|
+ """
|
|
+
|
|
+ install_plugins = ['default_env_test']
|
|
+ sos_cmd = '-o default_env_test'
|
|
+
|
|
+ def test_environment_used_in_cmd(self):
|
|
+ self.assertFileHasContent(
|
|
+ 'sos_commands/default_env_test/env_var_test',
|
|
+ 'Does Linus play hockey?'
|
|
+ )
|
|
+
|
|
+ def test_environment_setting_logged(self):
|
|
+ self.assertSosLogContains(
|
|
+ 'Default environment for all commands now set to'
|
|
+ )
|
|
+
|
|
+ def test_environment_not_set_on_host(self):
|
|
+ self.assertTrue('TORVALDS' not in os.environ)
|
|
+ self.assertTrue('GREATESTSPORT' not in os.environ)
|
|
+
|
|
+ def test_environment_not_captured(self):
|
|
+ # we should still have an empty environment file
|
|
+ self.assertFileCollected('environment')
|
|
+ self.assertFileNotHasContent('environment', 'TORVALDS')
|
|
+ self.assertFileNotHasContent('environment', 'GREATESTSPORT')
|
|
diff --git a/tests/test_data/fake_plugins/default_env_test.py b/tests/test_data/fake_plugins/default_env_test.py
|
|
new file mode 100644
|
|
index 00000000..d1d1fb78
|
|
--- /dev/null
|
|
+++ b/tests/test_data/fake_plugins/default_env_test.py
|
|
@@ -0,0 +1,28 @@
|
|
+# This file is part of the sos project: https://github.com/sosreport/sos
|
|
+#
|
|
+# This copyrighted material is made available to anyone wishing to use,
|
|
+# modify, copy, or redistribute it subject to the terms and conditions of
|
|
+# version 2 of the GNU General Public License.
|
|
+#
|
|
+# See the LICENSE file in the source distribution for further information.
|
|
+
|
|
+from sos.report.plugins import Plugin, IndependentPlugin
|
|
+
|
|
+
|
|
+class DefaultEnv(Plugin, IndependentPlugin):
|
|
+
|
|
+ plugin_name = 'default_env_test'
|
|
+ short_desc = 'Fake plugin to test default env var handling'
|
|
+
|
|
+ def setup(self):
|
|
+ self.set_default_cmd_environment({
|
|
+ 'TORVALDS': 'Linus',
|
|
+ 'GREATESTSPORT': 'hockey'
|
|
+ })
|
|
+
|
|
+ self.add_cmd_output(
|
|
+ "sh -c 'echo Does '$TORVALDS' play '$GREATESTSPORT'?'",
|
|
+ suggest_filename='env_var_test'
|
|
+ )
|
|
+
|
|
+ self.add_env_var(['TORVALDS', 'GREATESTSPORT'])
|
|
diff --git a/tests/unittests/plugin_tests.py b/tests/unittests/plugin_tests.py
|
|
index 0dfa243d..e469b78e 100644
|
|
--- a/tests/unittests/plugin_tests.py
|
|
+++ b/tests/unittests/plugin_tests.py
|
|
@@ -305,6 +305,21 @@ class PluginTests(unittest.TestCase):
|
|
p.postproc()
|
|
self.assertTrue(p.did_postproc)
|
|
|
|
+ def test_set_default_cmd_env(self):
|
|
+ p = MockPlugin({
|
|
+ 'sysroot': self.sysroot,
|
|
+ 'policy': LinuxPolicy(init=InitSystem(), probe_runtime=False),
|
|
+ 'cmdlineopts': MockOptions(),
|
|
+ 'devices': {}
|
|
+ })
|
|
+ e = {'TORVALDS': 'Linus'}
|
|
+ p.set_default_cmd_environment(e)
|
|
+ self.assertEquals(p.default_environment, e)
|
|
+ add_e = {'GREATESTSPORT': 'hockey'}
|
|
+ p.add_default_cmd_environment(add_e)
|
|
+ self.assertEquals(p.default_environment['GREATESTSPORT'], 'hockey')
|
|
+ self.assertEquals(p.default_environment['TORVALDS'], 'Linus')
|
|
+
|
|
|
|
class AddCopySpecTests(unittest.TestCase):
|
|
|
|
--
|
|
2.27.0
|
|
|
|
From 1e12325efaa500d304dcbfbeeb50e72ed0f938f5 Mon Sep 17 00:00:00 2001
|
|
From: Vladislav Walek <22072258+vwalek@users.noreply.github.com>
|
|
Date: Thu, 17 Mar 2022 14:10:26 -0700
|
|
Subject: [PATCH] [openshift] Adding ability to use the localhost.kubeconfig
|
|
and KUBECONFIG env to use system:admin
|
|
|
|
Signed-off-by: Vladislav Walek <22072258+vwalek@users.noreply.github.com>
|
|
---
|
|
sos/report/plugins/openshift.py | 45 +++++++++++++++++++++++++++++++--
|
|
1 file changed, 43 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/sos/report/plugins/openshift.py b/sos/report/plugins/openshift.py
|
|
index 5ae38178..d643f04c 100644
|
|
--- a/sos/report/plugins/openshift.py
|
|
+++ b/sos/report/plugins/openshift.py
|
|
@@ -60,11 +60,18 @@
|
|
profiles = ('openshift',)
|
|
packages = ('openshift-hyperkube',)
|
|
|
|
+ master_localhost_kubeconfig = (
|
|
+ '/etc/kubernetes/static-pod-resources/'
|
|
+ 'kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig'
|
|
+ )
|
|
+
|
|
option_list = [
|
|
('token', 'admin token to allow API queries', 'fast', None),
|
|
+ ('kubeconfig', 'Path to a locally available kubeconfig file',
|
|
+ 'fast', None),
|
|
('host', 'host address to use for oc login, including port', 'fast',
|
|
'https://localhost:6443'),
|
|
- ('no-oc', 'do not collect `oc` command output', 'fast', False),
|
|
+ ('no-oc', 'do not collect `oc` command output', 'fast', True),
|
|
('podlogs', 'collect logs from each pod', 'fast', True),
|
|
('podlogs-filter', ('limit podlogs collection to pods matching this '
|
|
'regex'), 'fast', ''),
|
|
@@ -73,6 +80,10 @@ class Openshift(Plugin, RedHatPlugin):
|
|
"""Check to see if we can run `oc` commands"""
|
|
return self.exec_cmd('oc whoami')['status'] == 0
|
|
|
|
+ def _check_localhost_kubeconfig(self):
|
|
+ """Check if the localhost.kubeconfig exists with system:admin user"""
|
|
+ return self.path_exists(self.get_option('kubeconfig'))
|
|
+
|
|
def _check_oc_logged_in(self):
|
|
"""See if we're logged in to the API service, and if not attempt to do
|
|
so using provided plugin options
|
|
@@ -80,8 +91,38 @@ class Openshift(Plugin, RedHatPlugin):
|
|
if self._check_oc_function():
|
|
return True
|
|
|
|
- # Not logged in currently, attempt to do so
|
|
+ if self.get_option('kubeconfig') is None:
|
|
+ # If admin doesn't add the kubeconfig
|
|
+ # use default localhost.kubeconfig
|
|
+ self.set_option(
|
|
+ 'kubeconfig',
|
|
+ self.master_localhost_kubeconfig
|
|
+ )
|
|
+
|
|
+ # Check first if we can use the localhost.kubeconfig before
|
|
+ # using token. We don't want to use 'host' option due we use
|
|
+ # cluster url from kubeconfig. Default is localhost.
|
|
+ if self._check_localhost_kubeconfig():
|
|
+ self.set_default_cmd_environment({
|
|
+ 'KUBECONFIG': self.get_option('kubeconfig')
|
|
+ })
|
|
+
|
|
+ oc_res = self.exec_cmd(
|
|
+ "oc login -u system:admin "
|
|
+ "--insecure-skip-tls-verify=True"
|
|
+ )
|
|
+ if oc_res['status'] == 0 and self._check_oc_function():
|
|
+ return True
|
|
+
|
|
+ self._log_warn(
|
|
+ "The login command failed with status: %s and error: %s"
|
|
+ % (oc_res['status'], oc_res['output'])
|
|
+ )
|
|
+ return False
|
|
+
|
|
+ # If kubeconfig is not defined, check if token is provided.
|
|
token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None)
|
|
+
|
|
if token:
|
|
oc_res = self.exec_cmd("oc login %s --token=%s "
|
|
"--insecure-skip-tls-verify=True"
|
|
--
|
|
2.27.0
|
|
|
|
From 3b84b4ccfa9e4924a5a3829d3810568dfb69bf63 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Fri, 18 Mar 2022 16:25:35 -0400
|
|
Subject: [PATCH 1/2] [pacemaker] Redesign node enumeration logic
|
|
|
|
It has been found that `pcs status` output is liable to change, which
|
|
ends up breaking our parsing of node lists when using it on newer
|
|
versions.
|
|
|
|
Instead, first try to parse through `crm_mon` output, which is what `pcs
|
|
status` uses under the hood, but as a stable and reliable xml format.
|
|
|
|
Failing that, for example if the `--primary` node is not functioning as
|
|
part of the cluster, source `/etc/corosync/corosync.conf` instead.
|
|
|
|
Related: RHBZ2065805
|
|
Related: RHBZ2065811
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/collector/clusters/pacemaker.py | 110 +++++++++++++++++++---------
|
|
1 file changed, 76 insertions(+), 34 deletions(-)
|
|
|
|
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
|
|
index 55024314..49d0ce51 100644
|
|
--- a/sos/collector/clusters/pacemaker.py
|
|
+++ b/sos/collector/clusters/pacemaker.py
|
|
@@ -8,7 +8,11 @@
|
|
#
|
|
# See the LICENSE file in the source distribution for further information.
|
|
|
|
+import re
|
|
+
|
|
from sos.collector.clusters import Cluster
|
|
+from setuptools._vendor.packaging import version
|
|
+from xml.etree import ElementTree
|
|
|
|
|
|
class pacemaker(Cluster):
|
|
@@ -18,42 +22,80 @@ class pacemaker(Cluster):
|
|
packages = ('pacemaker',)
|
|
option_list = [
|
|
('online', True, 'Collect nodes listed as online'),
|
|
- ('offline', True, 'Collect nodes listed as offline')
|
|
+ ('offline', True, 'Collect nodes listed as offline'),
|
|
+ ('only-corosync', False, 'Only use corosync.conf to enumerate nodes')
|
|
]
|
|
|
|
def get_nodes(self):
|
|
- self.res = self.exec_primary_cmd('pcs status')
|
|
- if self.res['status'] != 0:
|
|
- self.log_error('Cluster status could not be determined. Is the '
|
|
- 'cluster running on this node?')
|
|
- return []
|
|
- if 'node names do not match' in self.res['output']:
|
|
- self.log_warn('Warning: node name mismatch reported. Attempts to '
|
|
- 'connect to some nodes may fail.\n')
|
|
- return self.parse_pcs_output()
|
|
-
|
|
- def parse_pcs_output(self):
|
|
- nodes = []
|
|
- if self.get_option('online'):
|
|
- nodes += self.get_online_nodes()
|
|
- if self.get_option('offline'):
|
|
- nodes += self.get_offline_nodes()
|
|
- return nodes
|
|
-
|
|
- def get_online_nodes(self):
|
|
- for line in self.res['output'].splitlines():
|
|
- if line.startswith('Online:'):
|
|
- nodes = line.split('[')[1].split(']')[0]
|
|
- return [n for n in nodes.split(' ') if n]
|
|
-
|
|
- def get_offline_nodes(self):
|
|
- offline = []
|
|
- for line in self.res['output'].splitlines():
|
|
- if line.startswith('Node') and line.endswith('(offline)'):
|
|
- offline.append(line.split()[1].replace(':', ''))
|
|
- if line.startswith('OFFLINE:'):
|
|
- nodes = line.split('[')[1].split(']')[0]
|
|
- offline.extend([n for n in nodes.split(' ') if n])
|
|
- return offline
|
|
+ self.nodes = []
|
|
+ # try crm_mon first
|
|
+ try:
|
|
+ if not self.get_option('only-corosync'):
|
|
+ try:
|
|
+ self.get_nodes_from_crm()
|
|
+ except Exception as err:
|
|
+ self.log_warn("Falling back to sourcing corosync.conf. "
|
|
+ "Could not parse crm_mon output: %s" % err)
|
|
+ if not self.nodes:
|
|
+ # fallback to corosync.conf, in case the node we're inspecting
|
|
+ # is offline from the cluster
|
|
+ self.get_nodes_from_corosync()
|
|
+ except Exception as err:
|
|
+ self.log_error("Could not determine nodes from cluster: %s" % err)
|
|
+
|
|
+ _shorts = [n for n in self.nodes if '.' not in n]
|
|
+ if _shorts:
|
|
+ self.log_warn(
|
|
+ "WARNING: Node addresses '%s' may not resolve locally if you "
|
|
+ "are not running on a node in the cluster. Try using option "
|
|
+ "'-c pacemaker.only-corosync' if these connections fail."
|
|
+ % ','.join(_shorts)
|
|
+ )
|
|
+ return self.nodes
|
|
+
|
|
+ def get_nodes_from_crm(self):
|
|
+ """
|
|
+ Try to parse crm_mon output for node list and status.
|
|
+ """
|
|
+ xmlopt = '--output-as=xml'
|
|
+ # older pacemaker had a different option for xml output
|
|
+ _ver = self.exec_primary_cmd('crm_mon --version')
|
|
+ if _ver['status'] == 0:
|
|
+ cver = _ver['output'].split()[1].split('-')[0]
|
|
+ if not version.parse(cver) > version.parse('2.0.3'):
|
|
+ xmlopt = '--as-xml'
|
|
+ else:
|
|
+ return
|
|
+ _out = self.exec_primary_cmd(
|
|
+ "crm_mon --one-shot --inactive %s" % xmlopt,
|
|
+ need_root=True
|
|
+ )
|
|
+ if _out['status'] == 0:
|
|
+ self.parse_crm_xml(_out['output'])
|
|
+
|
|
+ def parse_crm_xml(self, xmlstring):
|
|
+ """
|
|
+ Parse the xml output string provided by crm_mon
|
|
+ """
|
|
+ _xml = ElementTree.fromstring(xmlstring)
|
|
+ nodes = _xml.find('nodes')
|
|
+ for node in nodes:
|
|
+ _node = node.attrib
|
|
+ if self.get_option('online') and _node['online'] == 'true':
|
|
+ self.nodes.append(_node['name'])
|
|
+ elif self.get_option('offline') and _node['online'] == 'false':
|
|
+ self.nodes.append(_node['name'])
|
|
+
|
|
+ def get_nodes_from_corosync(self):
|
|
+ """
|
|
+ As a fallback measure, read corosync.conf to get the node list. Note
|
|
+ that this prevents us from separating online nodes from offline nodes.
|
|
+ """
|
|
+ self.log_warn("WARNING: unable to distinguish online nodes from "
|
|
+ "offline nodes when sourcing from corosync.conf")
|
|
+ cc = self.primary.read_file('/etc/corosync/corosync.conf')
|
|
+ nodes = re.findall(r'((\sring0_addr:)(.*))', cc)
|
|
+ for node in nodes:
|
|
+ self.nodes.append(node[-1].strip())
|
|
|
|
# vim: set et ts=4 sw=4 :
|
|
--
|
|
2.27.0
|
|
|
|
|
|
From 6701a7d77ecc998b018b54ecc00f9fd102ae9518 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Mon, 21 Mar 2022 12:05:59 -0400
|
|
Subject: [PATCH 2/2] [clusters] Allow clusters to not add localhost to node
|
|
list
|
|
|
|
For most of our supported clusters, we end up needing to add the
|
|
local host executing `sos collect` to the node list (unless `--no-local`
|
|
is used) as that accounts for the primary node that may otherwise be
|
|
left off. However, this is not helpful for clusters that may reports
|
|
node names as something other than resolveable names. In those cases,
|
|
such as with pacemaker, adding the local hostname may result in
|
|
duplicate collections.
|
|
|
|
Add a toggle to cluster profiles via a new `strict_node_list` class attr
|
|
that, if True, will skip this addition. This toggle is default `False`
|
|
to preserve existing behavior, and is now enabled for `pacemaker`
|
|
specifically.
|
|
|
|
Related: RHBZ#2065821
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/collector/__init__.py | 3 ++-
|
|
sos/collector/clusters/__init__.py | 4 ++++
|
|
sos/collector/clusters/pacemaker.py | 1 +
|
|
3 files changed, 7 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
|
|
index a8bb0064..d898ca34 100644
|
|
--- a/sos/collector/__init__.py
|
|
+++ b/sos/collector/__init__.py
|
|
@@ -1073,7 +1073,8 @@ class SoSCollector(SoSComponent):
|
|
for node in self.node_list:
|
|
if host == node.split('.')[0]:
|
|
self.node_list.remove(node)
|
|
- self.node_list.append(self.hostname)
|
|
+ if not self.cluster.strict_node_list:
|
|
+ self.node_list.append(self.hostname)
|
|
self.reduce_node_list()
|
|
try:
|
|
_node_max = len(max(self.node_list, key=len))
|
|
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
|
|
index f3f550ad..f00677b8 100644
|
|
--- a/sos/collector/clusters/__init__.py
|
|
+++ b/sos/collector/clusters/__init__.py
|
|
@@ -56,6 +56,10 @@
|
|
sos_plugin_options = {}
|
|
sos_preset = ''
|
|
cluster_name = None
|
|
+ # set this to True if the local host running collect should *not* be
|
|
+ # forcibly added to the node list. This can be helpful in situations where
|
|
+ # the host's fqdn and the name the cluster uses are different
|
|
+ strict_node_list = False
|
|
|
|
def __init__(self, commons):
|
|
self.master = None
|
|
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
|
|
index 49d0ce51..bebcb265 100644
|
|
--- a/sos/collector/clusters/pacemaker.py
|
|
+++ b/sos/collector/clusters/pacemaker.py
|
|
@@ -20,6 +20,7 @@ class pacemaker(Cluster):
|
|
cluster_name = 'Pacemaker High Availability Cluster Manager'
|
|
sos_plugins = ['pacemaker']
|
|
packages = ('pacemaker',)
|
|
+ strict_node_list = True
|
|
option_list = [
|
|
('online', True, 'Collect nodes listed as online'),
|
|
('offline', True, 'Collect nodes listed as offline'),
|
|
--
|
|
2.27.0
|
|
|
|
From 61765992812afb785e9552e01e3b5579118a6963 Mon Sep 17 00:00:00 2001
|
|
From: Nadia Pinaeva <npinaeva@redhat.com>
|
|
Date: Fri, 1 Apr 2022 12:05:36 +0200
|
|
Subject: [PATCH] Add one more container for plugin enablement
|
|
|
|
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
|
|
---
|
|
sos/report/plugins/openshift_ovn.py | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py
|
|
index b4787b8e..98522b1e 100644
|
|
--- a/sos/report/plugins/openshift_ovn.py
|
|
+++ b/sos/report/plugins/openshift_ovn.py
|
|
@@ -16,7 +16,7 @@ class OpenshiftOVN(Plugin, RedHatPlugin):
|
|
"""
|
|
short_desc = 'Openshift OVN'
|
|
plugin_name = "openshift_ovn"
|
|
- containers = ('ovnkube-master', 'ovn-ipsec')
|
|
+ containers = ('ovnkube-master', 'ovnkube-node', 'ovn-ipsec')
|
|
profiles = ('openshift',)
|
|
|
|
def setup(self):
|
|
--
|
|
2.27.0
|
|
|
|
From d3aa071efc85507341cf65dd61414a734654f50a Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Mon, 28 Mar 2022 14:47:09 -0400
|
|
Subject: [PATCH] [presets] Adjust OCP preset options
|
|
|
|
Adjust the options used by the 'ocp' preset to better reflect the
|
|
current collection needs and approach.
|
|
|
|
This includes disabling the `cgroups` plugin due to the large amount of
|
|
mostly irrelevant data captured due to the high number of containers
|
|
present on OCP nodes, ensuring the `--container-runtime` option is set
|
|
to `crio` to align container-based collections, disabling HTML report
|
|
generation and increasing the base log size rather than blindly enabling
|
|
all-logs.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/presets/redhat/__init__.py | 13 +++++++++----
|
|
1 file changed, 9 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/sos/presets/redhat/__init__.py b/sos/presets/redhat/__init__.py
|
|
index 865c9b6b..0b9f6f11 100644
|
|
--- a/sos/presets/redhat/__init__.py
|
|
+++ b/sos/presets/redhat/__init__.py
|
|
@@ -36,10 +36,15 @@ RHOSP_OPTS = SoSOptions(plugopts=[
|
|
|
|
RHOCP = "ocp"
|
|
RHOCP_DESC = "OpenShift Container Platform by Red Hat"
|
|
-RHOCP_OPTS = SoSOptions(all_logs=True, verify=True, plugopts=[
|
|
- 'networking.timeout=600',
|
|
- 'networking.ethtool_namespaces=False',
|
|
- 'networking.namespaces=200'])
|
|
+RHOCP_OPTS = SoSOptions(
|
|
+ verify=True, skip_plugins=['cgroups'], container_runtime='crio',
|
|
+ no_report=True, log_size=100,
|
|
+ plugopts=[
|
|
+ 'crio.timeout=600',
|
|
+ 'networking.timeout=600',
|
|
+ 'networking.ethtool_namespaces=False',
|
|
+ 'networking.namespaces=200'
|
|
+ ])
|
|
|
|
RH_CFME = "cfme"
|
|
RH_CFME_DESC = "Red Hat CloudForms"
|
|
--
|
|
2.27.0
|
|
|
|
From f2b67ab820070063995689fed03492cdaa012d01 Mon Sep 17 00:00:00 2001
|
|
From: Nadia Pinaeva <npinaeva@redhat.com>
|
|
Date: Fri, 1 Apr 2022 17:01:35 +0200
|
|
Subject: [PATCH] Use /etc/os-release instead of /etc/redhat-release as the
|
|
most compatible way to find host release
|
|
|
|
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
|
|
---
|
|
sos/policies/distros/redhat.py | 5 ++---
|
|
1 file changed, 2 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
|
|
index 0c72a5e4..2e117f37 100644
|
|
--- a/sos/policies/distros/redhat.py
|
|
+++ b/sos/policies/distros/redhat.py
|
|
@@ -40,7 +40,6 @@ class RedHatPolicy(LinuxPolicy):
|
|
('Distribution Website', 'https://www.redhat.com/'),
|
|
('Commercial Support', 'https://www.access.redhat.com/')
|
|
]
|
|
- _redhat_release = '/etc/redhat-release'
|
|
_tmp_dir = "/var/tmp"
|
|
_in_container = False
|
|
default_scl_prefix = '/opt/rh'
|
|
@@ -471,7 +470,7 @@ support representative.
|
|
atomic = False
|
|
if ENV_HOST_SYSROOT not in os.environ:
|
|
return atomic
|
|
- host_release = os.environ[ENV_HOST_SYSROOT] + cls._redhat_release
|
|
+ host_release = os.environ[ENV_HOST_SYSROOT] + OS_RELEASE
|
|
if not os.path.exists(host_release):
|
|
return False
|
|
try:
|
|
@@ -558,7 +557,7 @@ support representative.
|
|
coreos = False
|
|
if ENV_HOST_SYSROOT not in os.environ:
|
|
return coreos
|
|
- host_release = os.environ[ENV_HOST_SYSROOT] + cls._redhat_release
|
|
+ host_release = os.environ[ENV_HOST_SYSROOT] + OS_RELEASE
|
|
try:
|
|
for line in open(host_release, 'r').read().splitlines():
|
|
coreos |= 'Red Hat Enterprise Linux CoreOS' in line
|
|
--
|
|
2.27.0
|
|
|
|
From ee0dd68199a2c9296eafe64ead5b2263c8270e4a Mon Sep 17 00:00:00 2001
|
|
From: Nadia Pinaeva <npinaeva@redhat.com>
|
|
Date: Wed, 6 Apr 2022 11:56:41 +0200
|
|
Subject: [PATCH] Use --force-pull-image option for pods created with oc. Set
|
|
--force-pull-image=True by default, can be turned off with
|
|
--force-pull-image=False
|
|
|
|
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
|
|
---
|
|
man/en/sos-collect.1 | 16 +++++++++++-----
|
|
sos/collector/__init__.py | 9 +++++----
|
|
sos/collector/transports/oc.py | 2 ++
|
|
sos/options.py | 20 ++++++++++++++------
|
|
4 files changed, 32 insertions(+), 15 deletions(-)
|
|
|
|
diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
|
|
index 9b0a5d7b..2f60332b 100644
|
|
--- a/man/en/sos-collect.1
|
|
+++ b/man/en/sos-collect.1
|
|
@@ -28,7 +28,7 @@
|
|
[\-\-no\-local]
|
|
[\-\-master MASTER]
|
|
[\-\-image IMAGE]
|
|
- [\-\-force-pull-image]
|
|
+ [\-\-force-pull-image TOGGLE, --pull TOGGLE]
|
|
[\-\-registry-user USER]
|
|
[\-\-registry-password PASSWORD]
|
|
[\-\-registry-authfile FILE]
|
|
@@ -262,10 +262,16 @@ Specify an image to use for the temporary container created for collections on
|
|
containerized host, if you do not want to use the default image specifed by the
|
|
host's policy. Note that this should include the registry.
|
|
.TP
|
|
-\fB\-\-force-pull-image\fR
|
|
-Use this option to force the container runtime to pull the specified image (even
|
|
-if it is the policy default image) even if the image already exists on the host.
|
|
-This may be useful to update an older container image on containerized hosts.
|
|
+\fB\-\-force-pull-image TOGGLE, \-\-pull TOGGLE\fR
|
|
+When collecting an sos report from a containerized host, force the host to always
|
|
+pull the specified image, even if that image already exists on the host.
|
|
+This is useful to ensure that the latest version of that image is always in use.
|
|
+Disabling this option will use whatever version of the image is present on the node,
|
|
+and only attempt a pull if there is no copy of the image present at all.
|
|
+
|
|
+Enable with true/on/yes or disable with false/off/no
|
|
+
|
|
+Default: true
|
|
.TP
|
|
\fB\-\-registry-user USER\fR
|
|
Specify the username to authenticate to the registry with in order to pull the container
|
|
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
|
|
index d898ca34..66c3d932 100644
|
|
--- a/sos/collector/__init__.py
|
|
+++ b/sos/collector/__init__.py
|
|
@@ -27,7 +27,7 @@
|
|
from textwrap import fill
|
|
from sos.cleaner import SoSCleaner
|
|
from sos.collector.sosnode import SosNode
|
|
-from sos.options import ClusterOption
|
|
+from sos.options import ClusterOption, str_to_bool
|
|
from sos.component import SoSComponent
|
|
from sos import __version__
|
|
|
|
@@ -85,7 +85,7 @@ class SoSCollector(SoSComponent):
|
|
'encrypt_pass': '',
|
|
'group': None,
|
|
'image': '',
|
|
- 'force_pull_image': False,
|
|
+ 'force_pull_image': True,
|
|
'jobs': 4,
|
|
'keywords': [],
|
|
'keyword_file': None,
|
|
@@ -357,8 +357,9 @@ class SoSCollector(SoSComponent):
|
|
collect_grp.add_argument('--image',
|
|
help=('Specify the container image to use for'
|
|
' containerized hosts.'))
|
|
- collect_grp.add_argument('--force-pull-image', '--pull', default=False,
|
|
- action='store_true',
|
|
+ collect_grp.add_argument('--force-pull-image', '--pull',
|
|
+ default=True, choices=(True, False),
|
|
+ type=str_to_bool,
|
|
help='Force pull the container image even if '
|
|
'it already exists on the host')
|
|
collect_grp.add_argument('--registry-user', default=None,
|
|
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
|
|
index 90a802b2..8f6aa9b4 100644
|
|
--- a/sos/collector/transports/oc.py
|
|
+++ b/sos/collector/transports/oc.py
|
|
@@ -147,6 +147,8 @@ class OCTransport(RemoteTransport):
|
|
"tty": True
|
|
}
|
|
],
|
|
+ "imagePullPolicy":
|
|
+ "Always" if self.opts.force_pull_image else "IfNotPresent",
|
|
"restartPolicy": "Never",
|
|
"nodeName": self.address,
|
|
"hostNetwork": True,
|
|
diff --git a/sos/options.py b/sos/options.py
|
|
index 4846a509..2d5a5135 100644
|
|
--- a/sos/options.py
|
|
+++ b/sos/options.py
|
|
@@ -18,6 +18,16 @@ def _is_seq(val):
|
|
return val_type is list or val_type is tuple
|
|
|
|
|
|
+def str_to_bool(val):
|
|
+ _val = val.lower()
|
|
+ if _val in ['true', 'on', 'yes']:
|
|
+ return True
|
|
+ elif _val in ['false', 'off', 'no']:
|
|
+ return False
|
|
+ else:
|
|
+ return None
|
|
+
|
|
+
|
|
class SoSOptions():
|
|
|
|
def _merge_opt(self, opt, src, is_default):
|
|
@@ -153,15 +163,13 @@ class SoSOptions():
|
|
if isinstance(self.arg_defaults[key], list):
|
|
return [v for v in val.split(',')]
|
|
if isinstance(self.arg_defaults[key], bool):
|
|
- _val = val.lower()
|
|
- if _val in ['true', 'on', 'yes']:
|
|
- return True
|
|
- elif _val in ['false', 'off', 'no']:
|
|
- return False
|
|
- else:
|
|
+ val = str_to_bool(val)
|
|
+ if val is None:
|
|
raise Exception(
|
|
"Value of '%s' in %s must be True or False or analagous"
|
|
% (key, conf))
|
|
+ else:
|
|
+ return val
|
|
if isinstance(self.arg_defaults[key], int):
|
|
try:
|
|
return int(val)
|
|
--
|
|
2.27.0
|
|
|
|
From ade857c82926bb7de2c45232f00a3f346db4e59a Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Wed, 3 Nov 2021 16:28:10 -0400
|
|
Subject: [PATCH] [collect] Update docstrings for collect for help command
|
|
|
|
Updates the docstrings for collect components for the new `help`
|
|
command, including minor formatting changes to make the help output more
|
|
human-friendly.
|
|
|
|
This includes docstring updates and changes to transports and cluster
|
|
profiles.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/collector/clusters/jbon.py | 13 +++++---
|
|
sos/collector/clusters/kubernetes.py | 7 ++++-
|
|
sos/collector/clusters/ocp.py | 35 ++++++++++++++++++++-
|
|
sos/collector/clusters/ovirt.py | 27 ++++++++++++++++
|
|
sos/collector/clusters/satellite.py | 9 +++++-
|
|
sos/collector/transports/control_persist.py | 12 +++++--
|
|
sos/collector/transports/local.py | 7 +++--
|
|
sos/collector/transports/oc.py | 20 ++++++++++--
|
|
8 files changed, 114 insertions(+), 16 deletions(-)
|
|
|
|
diff --git a/sos/collector/clusters/jbon.py b/sos/collector/clusters/jbon.py
|
|
index 8f083ac6..219e1901 100644
|
|
--- a/sos/collector/clusters/jbon.py
|
|
+++ b/sos/collector/clusters/jbon.py
|
|
@@ -12,11 +12,14 @@ from sos.collector.clusters import Cluster
|
|
|
|
|
|
class jbon(Cluster):
|
|
- '''Just a Bunch of Nodes
|
|
-
|
|
- Used when --cluster-type=none (or jbon), to avoid cluster checks, and just
|
|
- use the provided --nodes list
|
|
- '''
|
|
+ """
|
|
+ Used when --cluster-type=none (or jbon) to avoid cluster checks, and just
|
|
+ use the provided --nodes list.
|
|
+
|
|
+ Using this profile will skip any and all operations that a cluster profile
|
|
+ normally performs, and will not set any plugins, plugin options, or presets
|
|
+ for the sos report generated on the nodes provided by --nodes.
|
|
+ """
|
|
|
|
cluster_name = 'Just a Bunch Of Nodes (no cluster)'
|
|
packages = None
|
|
diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py
|
|
index 99f788dc..04752977 100644
|
|
--- a/sos/collector/clusters/kubernetes.py
|
|
+++ b/sos/collector/clusters/kubernetes.py
|
|
@@ -13,7 +13,12 @@ from sos.collector.clusters import Cluster
|
|
|
|
|
|
class kubernetes(Cluster):
|
|
-
|
|
+ """
|
|
+ The kuberentes cluster profile is intended to be used on kubernetes
|
|
+ clusters built from the upstream/source kubernetes (k8s) project. It is
|
|
+ not intended for use with other projects or platforms that are built ontop
|
|
+ of kubernetes.
|
|
+ """
|
|
cluster_name = 'Community Kubernetes'
|
|
packages = ('kubernetes-master',)
|
|
sos_plugins = ['kubernetes']
|
|
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
|
|
index ae93ad58..f1714239 100644
|
|
--- a/sos/collector/clusters/ocp.py
|
|
+++ b/sos/collector/clusters/ocp.py
|
|
@@ -16,7 +16,40 @@ from sos.utilities import is_executable
|
|
|
|
|
|
class ocp(Cluster):
|
|
- """OpenShift Container Platform v4"""
|
|
+ """
|
|
+ This profile is for use with OpenShift Container Platform (v4) clusters
|
|
+ instead of the kubernetes profile.
|
|
+
|
|
+ This profile will favor using the `oc` transport type, which means it will
|
|
+ leverage a locally installed `oc` binary. This is also how node enumeration
|
|
+ is done. To instead use SSH to connect to the nodes, use the
|
|
+ '--transport=control_persist' option.
|
|
+
|
|
+ Thus, a functional `oc` binary for the user executing sos collect is
|
|
+ required. Functional meaning that the user can run `oc` commands with
|
|
+ clusterAdmin privileges.
|
|
+
|
|
+ If this requires the use of a secondary configuration file, specify that
|
|
+ path with the 'kubeconfig' cluster option.
|
|
+
|
|
+ Alternatively, provide a clusterAdmin access token either via the 'token'
|
|
+ cluster option or, preferably, the SOSOCPTOKEN environment variable.
|
|
+
|
|
+ By default, this profile will enumerate only master nodes within the
|
|
+ cluster, and this may be changed by overriding the 'role' cluster option.
|
|
+ To collect from all nodes in the cluster regardless of role, use the form
|
|
+ -c ocp.role=''.
|
|
+
|
|
+ Filtering nodes by a label applied to that node is also possible via the
|
|
+ label cluster option, though be aware that this is _combined_ with the role
|
|
+ option mentioned above.
|
|
+
|
|
+ To avoid redundant collections of OCP API information (e.g. 'oc get'
|
|
+ commands), this profile will attempt to enable the openshift plugin on only
|
|
+ a single master node. If the none of the master nodes have a functional
|
|
+ 'oc' binary available, *and* the --no-local option is used, that means that
|
|
+ no API data will be collected.
|
|
+ """
|
|
|
|
cluster_name = 'OpenShift Container Platform v4'
|
|
packages = ('openshift-hyperkube', 'openshift-clients')
|
|
diff --git a/sos/collector/clusters/ovirt.py b/sos/collector/clusters/ovirt.py
|
|
index bd2d0c74..4587a1ca 100644
|
|
--- a/sos/collector/clusters/ovirt.py
|
|
+++ b/sos/collector/clusters/ovirt.py
|
|
@@ -17,6 +17,33 @@ ENGINE_KEY = '/etc/pki/ovirt-engine/keys/engine_id_rsa'
|
|
|
|
|
|
class ovirt(Cluster):
|
|
+ """
|
|
+ This cluster profile is for the oVirt/RHV project which provides for a
|
|
+ virtualization cluster built ontop of KVM.
|
|
+
|
|
+ Nodes enumerated will be hypervisors within the envrionment, not virtual
|
|
+ machines running on those hypervisors. By default, ALL hypervisors within
|
|
+ the environment are returned. This may be influenced by the 'cluster' and
|
|
+ 'datacenter' cluster options, which will limit enumeration to hypervisors
|
|
+ within the specific cluster and/or datacenter. The spm-only cluster option
|
|
+ may also be used to only collect from hypervisors currently holding the
|
|
+ SPM role.
|
|
+
|
|
+ Optionally, to only collect an archive from manager and the postgresql
|
|
+ database, use the no-hypervisors cluster option.
|
|
+
|
|
+ By default, a second archive from the manager will be collected that is
|
|
+ just the postgresql plugin configured in such a way that a dump of the
|
|
+ manager's database that can be explored and restored to other systems will
|
|
+ be collected.
|
|
+
|
|
+ The ovirt profile focuses on the upstream, community ovirt project.
|
|
+
|
|
+ The rhv profile is for Red Hat customers running RHV (formerly RHEV).
|
|
+
|
|
+ The rhhi_virt profile is for Red Hat customers running RHV in a
|
|
+ hyper-converged setup and enables gluster collections.
|
|
+ """
|
|
|
|
cluster_name = 'Community oVirt'
|
|
packages = ('ovirt-engine',)
|
|
diff --git a/sos/collector/clusters/satellite.py b/sos/collector/clusters/satellite.py
|
|
index 7c21e553..5e28531d 100644
|
|
--- a/sos/collector/clusters/satellite.py
|
|
+++ b/sos/collector/clusters/satellite.py
|
|
@@ -13,7 +13,14 @@ from sos.collector.clusters import Cluster
|
|
|
|
|
|
class satellite(Cluster):
|
|
- """Red Hat Satellite 6"""
|
|
+ """
|
|
+ This profile is specifically for Red Hat Satellite 6, and not earlier
|
|
+ releases of Satellite.
|
|
+
|
|
+ While note technically a 'cluster' in the traditional sense, Satellite
|
|
+ does provide for 'capsule' nodes which is what this profile aims to
|
|
+ enumerate beyond the 'primary' Satellite system.
|
|
+ """
|
|
|
|
cluster_name = 'Red Hat Satellite 6'
|
|
packages = ('satellite', 'satellite-installer')
|
|
diff --git a/sos/collector/transports/control_persist.py b/sos/collector/transports/control_persist.py
|
|
index 3e848b41..ae6c7e43 100644
|
|
--- a/sos/collector/transports/control_persist.py
|
|
+++ b/sos/collector/transports/control_persist.py
|
|
@@ -26,10 +26,18 @@ from sos.utilities import sos_get_command_output
|
|
|
|
|
|
class SSHControlPersist(RemoteTransport):
|
|
- """A transport for collect that leverages OpenSSH's Control Persist
|
|
+ """
|
|
+ A transport for collect that leverages OpenSSH's ControlPersist
|
|
functionality which uses control sockets to transparently keep a connection
|
|
open to the remote host without needing to rebuild the SSH connection for
|
|
- each and every command executed on the node
|
|
+ each and every command executed on the node.
|
|
+
|
|
+ This transport will by default assume the use of SSH keys, meaning keys
|
|
+ have already been distributed to target nodes. If this is not the case,
|
|
+ users will need to provide a password using the --password or
|
|
+ --password-per-node option, depending on if the password to connect to all
|
|
+ nodes is the same or not. Note that these options prevent the use of the
|
|
+ --batch option, as they require user input.
|
|
"""
|
|
|
|
name = 'control_persist'
|
|
diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py
|
|
index 2996d524..dd72053c 100644
|
|
--- a/sos/collector/transports/local.py
|
|
+++ b/sos/collector/transports/local.py
|
|
@@ -15,9 +15,10 @@ from sos.collector.transports import RemoteTransport
|
|
|
|
|
|
class LocalTransport(RemoteTransport):
|
|
- """A 'transport' to represent a local node. This allows us to more easily
|
|
- extend SoSNode() without having a ton of 'if local' or similar checks in
|
|
- more places than we actually need them
|
|
+ """
|
|
+ A 'transport' to represent a local node. No remote connection is actually
|
|
+ made, and all commands set to be run by this transport are executed locally
|
|
+ without any wrappers.
|
|
"""
|
|
|
|
name = 'local_node'
|
|
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
|
|
index 720dd61d..0fc9eee8 100644
|
|
--- a/sos/collector/transports/oc.py
|
|
+++ b/sos/collector/transports/oc.py
|
|
@@ -18,15 +18,29 @@ from sos.utilities import (is_executable, sos_get_command_output,
|
|
|
|
|
|
class OCTransport(RemoteTransport):
|
|
- """This transport leverages the execution of commands via a locally
|
|
+ """
|
|
+ This transport leverages the execution of commands via a locally
|
|
available and configured ``oc`` binary for OCPv4 environments.
|
|
|
|
+ The location of the oc binary MUST be in the $PATH used by the locally
|
|
+ loaded SoS policy. Specifically this means that the binary cannot be in the
|
|
+ running user's home directory, such as ~/.local/bin.
|
|
+
|
|
OCPv4 clusters generally discourage the use of SSH, so this transport may
|
|
be used to remove our use of SSH in favor of the environment provided
|
|
method of connecting to nodes and executing commands via debug pods.
|
|
|
|
- Note that this approach will generate multiple debug pods over the course
|
|
- of our execution
|
|
+ The debug pod created will be a privileged pod that mounts the host's
|
|
+ filesystem internally so that sos report collections reflect the host, and
|
|
+ not the container in which it runs.
|
|
+
|
|
+ This transport will execute within a temporary 'sos-collect-tmp' project
|
|
+ created by the OCP cluster profile. The project will be removed at the end
|
|
+ of execution.
|
|
+
|
|
+ In the event of failures due to a misbehaving OCP API or oc binary, it is
|
|
+ recommended to fallback to the control_persist transport by manually
|
|
+ setting the --transport option.
|
|
"""
|
|
|
|
name = 'oc'
|
|
--
|
|
2.27.0
|
|
|
|
From ce289a3ae7101a898efdb84ddfd575576ba5819b Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Tue, 5 Apr 2022 11:32:11 -0400
|
|
Subject: [PATCH] [ocp, openshift] Re-align API collection options and rename
|
|
option
|
|
|
|
Previously, in #2888, the `openshift` plugin was extended to allow API
|
|
collections by using a default-available kubeconfig file rather than
|
|
relying on user-provided tokens. This also included flipping the default
|
|
value of the `no-oc` plugin option to `True` (meaning do not collect API
|
|
output by default).
|
|
|
|
This worked for the plugin, but it introduced a gap in `sos collect`
|
|
whereby the cluster profile could no longer reliably enable API
|
|
collections when trying to leverage the new functionality of not
|
|
requiring a user token.
|
|
|
|
Fix this by updating the cluster profile to align with the new
|
|
default-off approach of API collections.
|
|
|
|
Along with this, add a toggle to the cluster profile directly to allow
|
|
users to toggle API collections on or off (default off) directly. This
|
|
is done via a new `with-api` cluster option (e.g. `-c ocp.with-api`).
|
|
Further, rename the `openshift` plugin option from `no-oc` to
|
|
`with-api`. This change not only makes the option use case far more
|
|
obvious, it will also align the use of the option to both `collect` and
|
|
`report` so that users need only be aware of a single option for either
|
|
method.
|
|
|
|
The cluster profile also has logic to detect which plugin option,
|
|
`no-oc` or `with-api` to use based on the (RHEL) sos version installed
|
|
on the nodes being inspected by the `ocp` cluster profile.
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/collector/clusters/ocp.py | 72 +++++++++++++++++++++++++++------
|
|
sos/report/plugins/openshift.py | 26 +++++++-----
|
|
2 files changed, 77 insertions(+), 21 deletions(-)
|
|
|
|
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
|
|
index 9beb2f9b..e31d1903 100644
|
|
--- a/sos/collector/clusters/ocp.py
|
|
+++ b/sos/collector/clusters/ocp.py
|
|
@@ -30,7 +30,11 @@ class ocp(Cluster):
|
|
clusterAdmin privileges.
|
|
|
|
If this requires the use of a secondary configuration file, specify that
|
|
- path with the 'kubeconfig' cluster option.
|
|
+ path with the 'kubeconfig' cluster option. This config file will also be
|
|
+ used on a single master node to perform API collections if the `with-api`
|
|
+ option is enabled (default disabled). If no `kubeconfig` option is given,
|
|
+ but `with-api` is enabled, the cluster profile will attempt to use a
|
|
+ well-known default kubeconfig file if it is available on the host.
|
|
|
|
Alternatively, provide a clusterAdmin access token either via the 'token'
|
|
cluster option or, preferably, the SOSOCPTOKEN environment variable.
|
|
@@ -45,7 +49,7 @@ class ocp(Cluster):
|
|
option mentioned above.
|
|
|
|
To avoid redundant collections of OCP API information (e.g. 'oc get'
|
|
- commands), this profile will attempt to enable the openshift plugin on only
|
|
+ commands), this profile will attempt to enable the API collections on only
|
|
a single master node. If the none of the master nodes have a functional
|
|
'oc' binary available, *and* the --no-local option is used, that means that
|
|
no API data will be collected.
|
|
@@ -63,7 +67,8 @@ class ocp(Cluster):
|
|
('label', '', 'Colon delimited list of labels to select nodes with'),
|
|
('role', 'master', 'Colon delimited list of roles to filter on'),
|
|
('kubeconfig', '', 'Path to the kubeconfig file'),
|
|
- ('token', '', 'Service account token to use for oc authorization')
|
|
+ ('token', '', 'Service account token to use for oc authorization'),
|
|
+ ('with-api', False, 'Collect OCP API data from a master node')
|
|
]
|
|
|
|
def fmt_oc_cmd(self, cmd):
|
|
@@ -219,13 +219,52 @@
|
|
return False
|
|
return 'master' in self.node_dict[sosnode.address]['roles']
|
|
|
|
+ def _toggle_api_opt(self, node, use_api):
|
|
+ """In earlier versions of sos, the openshift plugin option that is
|
|
+ used to toggle the API collections was called `no-oc` rather than
|
|
+ `with-api`. This older plugin option had the inverse logic of the
|
|
+ current `with-api` option.
|
|
+
|
|
+ Use this to toggle the correct plugin option given the node's sos
|
|
+ version. Note that the use of version 4.2 here is tied to the RHEL
|
|
+ release (the only usecase for this cluster profile) rather than
|
|
+ the upstream version given the backports for that downstream.
|
|
+
|
|
+ :param node: The node being inspected for API collections
|
|
+ :type node: ``SoSNode``
|
|
+
|
|
+ :param use_api: Should this node enable API collections?
|
|
+ :type use_api: ``bool``
|
|
+ """
|
|
+ if node.check_sos_version('4.2-16'):
|
|
+ _opt = 'with-api'
|
|
+ _val = 'on' if use_api else 'off'
|
|
+ else:
|
|
+ _opt = 'no-oc'
|
|
+ _val = 'off' if use_api else 'on'
|
|
+ node.plugopts.append("openshift.%s=%s" % (_opt, _val))
|
|
+
|
|
def set_master_options(self, node):
|
|
+
|
|
node.enable_plugins.append('openshift')
|
|
+ if not self.get_option('with-api'):
|
|
+ self._toggle_api_opt(node, False)
|
|
+ return
|
|
if self.api_collect_enabled:
|
|
# a primary has already been enabled for API collection, disable
|
|
# it among others
|
|
- node.plugopts.append('openshift.no-oc=on')
|
|
+ self._toggle_api_opt(node, False)
|
|
else:
|
|
+ # running in a container, so reference the /host mount point
|
|
+ master_kube = (
|
|
+ '/host/etc/kubernetes/static-pod-resources/'
|
|
+ 'kube-apiserver-certs/secrets/node-kubeconfigs/'
|
|
+ 'localhost.kubeconfig'
|
|
+ )
|
|
+ _optconfig = self.get_option('kubeconfig')
|
|
+ if _optconfig and not _optconfig.startswith('/host'):
|
|
+ _optconfig = '/host/' + _optconfig
|
|
+ _kubeconfig = _optconfig or master_kube
|
|
_oc_cmd = 'oc'
|
|
if node.host.containerized:
|
|
_oc_cmd = '/host/bin/oc'
|
|
@@ -244,17 +288,21 @@ class ocp(Cluster):
|
|
need_root=True)
|
|
if can_oc['status'] == 0:
|
|
# the primary node can already access the API
|
|
+ self._toggle_api_opt(node, True)
|
|
self.api_collect_enabled = True
|
|
elif self.token:
|
|
node.sos_env_vars['SOSOCPTOKEN'] = self.token
|
|
+ self._toggle_api_opt(node, True)
|
|
+ self.api_collect_enabled = True
|
|
+ elif node.file_exists(_kubeconfig):
|
|
+ # if the file exists, then the openshift sos plugin will use it
|
|
+ # if the with-api option is turned on
|
|
+ if not _kubeconfig == master_kube:
|
|
+ node.plugopts.append(
|
|
+ "openshift.kubeconfig=%s" % _kubeconfig
|
|
+ )
|
|
+ self._toggle_api_opt(node, True)
|
|
self.api_collect_enabled = True
|
|
- elif self.get_option('kubeconfig'):
|
|
- kc = self.get_option('kubeconfig')
|
|
- if node.file_exists(kc):
|
|
- if node.host.containerized:
|
|
- kc = "/host/%s" % kc
|
|
- node.sos_env_vars['KUBECONFIG'] = kc
|
|
- self.api_collect_enabled = True
|
|
if self.api_collect_enabled:
|
|
msg = ("API collections will be performed on %s\nNote: API "
|
|
"collections may extend runtime by 10s of minutes\n"
|
|
@@ -264,6 +312,6 @@ class ocp(Cluster):
|
|
|
|
def set_node_options(self, node):
|
|
# don't attempt OC API collections on non-primary nodes
|
|
- node.plugopts.append('openshift.no-oc=on')
|
|
+ self._toggle_api_opt(node, False)
|
|
|
|
# vim: set et ts=4 sw=4 :
|
|
diff --git a/sos/report/plugins/openshift.py b/sos/report/plugins/openshift.py
|
|
index d643f04c..a41ab62b 100644
|
|
--- a/sos/report/plugins/openshift.py
|
|
+++ b/sos/report/plugins/openshift.py
|
|
@@ -19,7 +19,10 @@ class Openshift(Plugin, RedHatPlugin):
|
|
further extending the kubernetes plugin (or the OCP 3.x extensions included
|
|
in the Red Hat version of the kube plugin).
|
|
|
|
- By default, this plugin will collect cluster information and inspect the
|
|
+ This plugin may collect OCP API information when the `with-api` option is
|
|
+ enabled. This option is disabled by default.
|
|
+
|
|
+ When enabled, this plugin will collect cluster information and inspect the
|
|
default namespaces/projects that are created during deployment - i.e. the
|
|
namespaces of the cluster projects matching openshift.* and kube.*. At the
|
|
time of this plugin's creation that number of default projects is already
|
|
@@ -34,16 +37,20 @@ class Openshift(Plugin, RedHatPlugin):
|
|
|
|
Users will need to either:
|
|
|
|
- 1) Provide the bearer token via the `-k openshift.token` option
|
|
- 2) Provide the bearer token via the `SOSOCPTOKEN` environment variable
|
|
- 3) Otherwise ensure that the root user can successfully run `oc` and
|
|
+ 1) Accept the use of a well-known stock kubeconfig file provided via a
|
|
+ static pod resource for the kube-apiserver
|
|
+ 2) Provide the bearer token via the `-k openshift.token` option
|
|
+ 3) Provide the bearer token via the `SOSOCPTOKEN` environment variable
|
|
+ 4) Otherwise ensure that the root user can successfully run `oc` and
|
|
get proper output prior to running this plugin
|
|
|
|
|
|
- It is highly suggested that option #2 be used first, as this will prevent
|
|
- the token from being recorded in output saved to the archive. Option #1 may
|
|
+ It is highly suggested that option #1 be used first, as this uses well
|
|
+ known configurations and requires the least information from the user. If
|
|
+ using a token, it is recommended to use option #3 as this will prevent
|
|
+ the token from being recorded in output saved to the archive. Option #2 may
|
|
be used if this is considered an acceptable risk. It is not recommended to
|
|
- rely on option #3, though it will provide the functionality needed.
|
|
+ rely on option #4, though it will provide the functionality needed.
|
|
"""
|
|
|
|
short_desc = 'Openshift Container Platform 4.x'
|
|
@@ -71,6 +71,7 @@
|
|
'fast', None),
|
|
('host', 'host address to use for oc login, including port', 'fast',
|
|
'https://localhost:6443'),
|
|
+ ('with-api', 'collect output from the OCP API', 'fast', False),
|
|
('no-oc', 'do not collect `oc` command output', 'fast', True),
|
|
('podlogs', 'collect logs from each pod', 'fast', True),
|
|
('podlogs-filter', ('limit podlogs collection to pods matching this '
|
|
@@ -212,7 +220,7 @@ class Openshift(Plugin, RedHatPlugin):
|
|
self.add_copy_spec('/etc/kubernetes/*')
|
|
|
|
# see if we run `oc` commands
|
|
- if not self.get_option('no-oc'):
|
|
+ if self.get_option('with-api'):
|
|
can_run_oc = self._check_oc_logged_in()
|
|
else:
|
|
can_run_oc = False
|
|
--
|
|
2.27.0
|
|
|
|
From 2b2ad04884cfdda78c02a33a73603d249c0a4dd5 Mon Sep 17 00:00:00 2001
|
|
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
Date: Tue, 19 Oct 2021 11:51:56 -0400
|
|
Subject: [PATCH 1/2] [Plugin] Allow writing command output directly to disk
|
|
|
|
This commit addresses a long standing ask in sos, regarding resource
|
|
consumption when sos is run with `--all-logs`.
|
|
|
|
For executions where `--all-logs` is used, or for specific commands
|
|
where `sizelimit` has been set to 0, `add_cmd_output()` and
|
|
`add_journal()` will now instruct `sos_get_command_output()` to write
|
|
output directly to a file rather than saving the output in memory.
|
|
|
|
When this occurs, the `output` key in the returned dict will be empty.
|
|
|
|
Note that this does extend to `collect_cmd_output()` or `exec_cmd()`.
|
|
|
|
Resolves: #1506
|
|
|
|
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
---
|
|
sos/archive.py | 14 +++----
|
|
sos/report/__init__.py | 10 ++++-
|
|
sos/report/plugins/__init__.py | 70 +++++++++++++++++++++++-----------
|
|
sos/utilities.py | 69 ++++++++++++++++++++++++++++-----
|
|
4 files changed, 123 insertions(+), 40 deletions(-)
|
|
|
|
diff --git a/sos/archive.py b/sos/archive.py
|
|
index e3c68b77..e92d5d60 100644
|
|
--- a/sos/archive.py
|
|
+++ b/sos/archive.py
|
|
@@ -251,7 +251,7 @@ class FileCacheArchive(Archive):
|
|
|
|
return dest
|
|
|
|
- def _check_path(self, src, path_type, dest=None, force=False):
|
|
+ def check_path(self, src, path_type, dest=None, force=False):
|
|
"""Check a new destination path in the archive.
|
|
|
|
Since it is possible for multiple plugins to collect the same
|
|
@@ -345,7 +345,7 @@ class FileCacheArchive(Archive):
|
|
if not dest:
|
|
dest = src
|
|
|
|
- dest = self._check_path(dest, P_FILE)
|
|
+ dest = self.check_path(dest, P_FILE)
|
|
if not dest:
|
|
return
|
|
|
|
@@ -384,7 +384,7 @@ class FileCacheArchive(Archive):
|
|
# over any exixting content in the archive, since it is used by
|
|
# the Plugin postprocessing hooks to perform regex substitution
|
|
# on file content.
|
|
- dest = self._check_path(dest, P_FILE, force=True)
|
|
+ dest = self.check_path(dest, P_FILE, force=True)
|
|
|
|
f = codecs.open(dest, mode, encoding='utf-8')
|
|
if isinstance(content, bytes):
|
|
@@ -397,7 +397,7 @@ class FileCacheArchive(Archive):
|
|
|
|
def add_binary(self, content, dest):
|
|
with self._path_lock:
|
|
- dest = self._check_path(dest, P_FILE)
|
|
+ dest = self.check_path(dest, P_FILE)
|
|
if not dest:
|
|
return
|
|
|
|
@@ -409,7 +409,7 @@ class FileCacheArchive(Archive):
|
|
def add_link(self, source, link_name):
|
|
self.log_debug("adding symlink at '%s' -> '%s'" % (link_name, source))
|
|
with self._path_lock:
|
|
- dest = self._check_path(link_name, P_LINK)
|
|
+ dest = self.check_path(link_name, P_LINK)
|
|
if not dest:
|
|
return
|
|
|
|
@@ -484,10 +484,10 @@ class FileCacheArchive(Archive):
|
|
"""
|
|
# Establish path structure
|
|
with self._path_lock:
|
|
- self._check_path(path, P_DIR)
|
|
+ self.check_path(path, P_DIR)
|
|
|
|
def add_node(self, path, mode, device):
|
|
- dest = self._check_path(path, P_NODE)
|
|
+ dest = self.check_path(path, P_NODE)
|
|
if not dest:
|
|
return
|
|
|
|
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
|
|
index 249ff119..46c7f219 100644
|
|
--- a/sos/report/__init__.py
|
|
+++ b/sos/report/__init__.py
|
|
@@ -476,7 +476,8 @@ class SoSReport(SoSComponent):
|
|
'verbosity': self.opts.verbosity,
|
|
'cmdlineopts': self.opts,
|
|
'devices': self.devices,
|
|
- 'namespaces': self.namespaces
|
|
+ 'namespaces': self.namespaces,
|
|
+ 'tempfile_util': self.tempfile_util
|
|
}
|
|
|
|
def get_temp_file(self):
|
|
@@ -1075,7 +1076,12 @@ class SoSReport(SoSComponent):
|
|
_plug.manifest.add_field('end_time', end)
|
|
_plug.manifest.add_field('run_time', end - start)
|
|
except TimeoutError:
|
|
- self.ui_log.error("\n Plugin %s timed out\n" % plugin[1])
|
|
+ msg = "Plugin %s timed out" % plugin[1]
|
|
+ # log to ui_log.error to show the user, log to soslog.info
|
|
+ # so that someone investigating the sos execution has it all
|
|
+ # in one place, but without double notifying the user.
|
|
+ self.ui_log.error("\n %s\n" % msg)
|
|
+ self.soslog.info(msg)
|
|
self.running_plugs.remove(plugin[1])
|
|
self.loaded_plugins[plugin[0]-1][1].set_timeout_hit()
|
|
pool.shutdown(wait=True)
|
|
diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
|
|
index 3ff7c191..38529a9d 100644
|
|
--- a/sos/report/plugins/__init__.py
|
|
+++ b/sos/report/plugins/__init__.py
|
|
@@ -15,6 +15,7 @@ from sos.utilities import (sos_get_command_output, import_module, grep,
|
|
path_exists, path_isdir, path_isfile, path_islink,
|
|
listdir, path_join)
|
|
|
|
+from sos.archive import P_FILE
|
|
import os
|
|
import glob
|
|
import re
|
|
@@ -1686,6 +1687,8 @@ class Plugin():
|
|
kwargs['priority'] = 10
|
|
if 'changes' not in kwargs:
|
|
kwargs['changes'] = False
|
|
+ if self.get_option('all_logs') or kwargs['sizelimit'] == 0:
|
|
+ kwargs['to_file'] = True
|
|
soscmd = SoSCommand(**kwargs)
|
|
self._log_debug("packed command: " + soscmd.__str__())
|
|
for _skip_cmd in self.skip_commands:
|
|
@@ -1707,7 +1710,8 @@ class Plugin():
|
|
chroot=True, runat=None, env=None, binary=False,
|
|
sizelimit=None, pred=None, subdir=None,
|
|
changes=False, foreground=False, tags=[],
|
|
- priority=10, cmd_as_tag=False, container=None):
|
|
+ priority=10, cmd_as_tag=False, container=None,
|
|
+ to_file=False):
|
|
"""Run a program or a list of programs and collect the output
|
|
|
|
Output will be limited to `sizelimit`, collecting the last X amount
|
|
@@ -1776,6 +1780,10 @@ class Plugin():
|
|
:param container: Run the specified `cmds` inside a container with this
|
|
ID or name
|
|
:type container: ``str``
|
|
+
|
|
+ :param to_file: Should command output be written directly to a new
|
|
+ file rather than stored in memory?
|
|
+ :type to_file: ``bool``
|
|
"""
|
|
if isinstance(cmds, str):
|
|
cmds = [cmds]
|
|
@@ -1863,7 +1863,7 @@
|
|
pred=pred, subdir=subdir, tags=tags,
|
|
changes=changes, foreground=foreground,
|
|
priority=priority, cmd_as_tag=cmd_as_tag,
|
|
- container_cmd=container_cmd)
|
|
+ to_file=to_file, container_cmd=container_cmd)
|
|
|
|
def add_cmd_tags(self, tagdict):
|
|
"""Retroactively add tags to any commands that have been run by this
|
|
@@ -2014,7 +2014,7 @@
|
|
stderr=True, chroot=True, runat=None, env=None,
|
|
binary=False, sizelimit=None, subdir=None,
|
|
changes=False, foreground=False, tags=[],
|
|
- priority=10, cmd_as_tag=False,
|
|
+ priority=10, cmd_as_tag=False, to_file=False,
|
|
container_cmd=False):
|
|
"""Execute a command and save the output to a file for inclusion in the
|
|
report.
|
|
@@ -2041,6 +2041,8 @@
|
|
TTY
|
|
:param tags: Add tags in the archive manifest
|
|
:param cmd_as_tag: Format command string to tag
|
|
+ :param to_file: Write output directly to file instead
|
|
+ of saving in memory
|
|
|
|
:returns: dict containing status, output, and filename in the
|
|
archive for the executed cmd
|
|
@@ -2074,27 +2074,46 @@
|
|
else:
|
|
root = None
|
|
|
|
+ if suggest_filename:
|
|
+ outfn = self._make_command_filename(suggest_filename, subdir)
|
|
+ else:
|
|
+ outfn = self._make_command_filename(cmd, subdir)
|
|
+
|
|
+ outfn_strip = outfn[len(self.commons['cmddir'])+1:]
|
|
+
|
|
+ if to_file:
|
|
+ self._log_debug("collecting '%s' output directly to disk"
|
|
+ % cmd)
|
|
+ self.archive.check_path(outfn, P_FILE)
|
|
+ out_file = os.path.join(self.archive.get_archive_path(), outfn)
|
|
+ else:
|
|
+ out_file = False
|
|
+
|
|
start = time()
|
|
|
|
result = sos_get_command_output(
|
|
cmd, timeout=timeout, stderr=stderr, chroot=root,
|
|
chdir=runat, env=_env, binary=binary, sizelimit=sizelimit,
|
|
- poller=self.check_timeout, foreground=foreground
|
|
+ poller=self.check_timeout, foreground=foreground,
|
|
+ to_file=out_file
|
|
)
|
|
|
|
end = time()
|
|
run_time = end - start
|
|
|
|
if result['status'] == 124:
|
|
- self._log_warn(
|
|
- "command '%s' timed out after %ds" % (cmd, timeout)
|
|
- )
|
|
+ warn = "command '%s' timed out after %ds" % (cmd, timeout)
|
|
+ self._log_warn(warn)
|
|
+ if to_file:
|
|
+ msg = (" - output up until the timeout may be available at "
|
|
+ "%s" % outfn)
|
|
+ self._log_debug("%s%s" % (warn, msg))
|
|
|
|
manifest_cmd = {
|
|
'command': cmd.split(' ')[0],
|
|
'parameters': cmd.split(' ')[1:],
|
|
'exec': cmd,
|
|
- 'filepath': None,
|
|
+ 'filepath': outfn if to_file else None,
|
|
'truncated': result['truncated'],
|
|
'return_code': result['status'],
|
|
'priority': priority,
|
|
@@ -2060,7 +2090,7 @@ class Plugin():
|
|
result = sos_get_command_output(
|
|
cmd, timeout=timeout, chroot=False, chdir=runat,
|
|
env=env, binary=binary, sizelimit=sizelimit,
|
|
- poller=self.check_timeout
|
|
+ poller=self.check_timeout, to_file=out_file
|
|
)
|
|
run_time = time() - start
|
|
self._log_debug("could not run '%s': command not found" % cmd)
|
|
@@ -2077,22 +2107,15 @@ class Plugin():
|
|
if result['truncated']:
|
|
self._log_info("collected output of '%s' was truncated"
|
|
% cmd.split()[0])
|
|
-
|
|
- if suggest_filename:
|
|
- outfn = self._make_command_filename(suggest_filename, subdir)
|
|
- else:
|
|
- outfn = self._make_command_filename(cmd, subdir)
|
|
-
|
|
- outfn_strip = outfn[len(self.commons['cmddir'])+1:]
|
|
-
|
|
- if result['truncated']:
|
|
linkfn = outfn
|
|
outfn = outfn.replace('sos_commands', 'sos_strings') + '.tailed'
|
|
|
|
- if binary:
|
|
- self.archive.add_binary(result['output'], outfn)
|
|
- else:
|
|
- self.archive.add_string(result['output'], outfn)
|
|
+ if not to_file:
|
|
+ if binary:
|
|
+ self.archive.add_binary(result['output'], outfn)
|
|
+ else:
|
|
+ self.archive.add_string(result['output'], outfn)
|
|
+
|
|
if result['truncated']:
|
|
# we need to manually build the relative path from the paths that
|
|
# exist within the build dir to properly drop these symlinks
|
|
@@ -2543,6 +2566,9 @@ class Plugin():
|
|
all_logs = self.get_option("all_logs")
|
|
log_size = sizelimit or self.get_option("log_size")
|
|
log_size = max(log_size, journal_size) if not all_logs else 0
|
|
+ if sizelimit == 0:
|
|
+ # allow for specific sizelimit overrides in plugins
|
|
+ log_size = 0
|
|
|
|
if isinstance(units, str):
|
|
units = [units]
|
|
diff --git a/sos/utilities.py b/sos/utilities.py
|
|
index 70d5c0ab..f422d7a0 100644
|
|
--- a/sos/utilities.py
|
|
+++ b/sos/utilities.py
|
|
@@ -110,7 +110,8 @@ def is_executable(command, sysroot=None):
|
|
|
|
def sos_get_command_output(command, timeout=TIMEOUT_DEFAULT, stderr=False,
|
|
chroot=None, chdir=None, env=None, foreground=False,
|
|
- binary=False, sizelimit=None, poller=None):
|
|
+ binary=False, sizelimit=None, poller=None,
|
|
+ to_file=False):
|
|
"""Execute a command and return a dictionary of status and output,
|
|
optionally changing root or current working directory before
|
|
executing command.
|
|
@@ -124,6 +125,12 @@ def sos_get_command_output(command, timeout=TIMEOUT_DEFAULT, stderr=False,
|
|
if (chdir):
|
|
os.chdir(chdir)
|
|
|
|
+ def _check_poller(proc):
|
|
+ if poller():
|
|
+ proc.terminate()
|
|
+ raise SoSTimeoutError
|
|
+ time.sleep(0.01)
|
|
+
|
|
cmd_env = os.environ.copy()
|
|
# ensure consistent locale for collected command output
|
|
cmd_env['LC_ALL'] = 'C'
|
|
@@ -158,24 +158,46 @@
|
|
expanded_args.extend(expanded_arg)
|
|
else:
|
|
expanded_args.append(arg)
|
|
+ if to_file:
|
|
+ _output = open(to_file, 'w')
|
|
+ else:
|
|
+ _output = PIPE
|
|
try:
|
|
- p = Popen(expanded_args, shell=False, stdout=PIPE,
|
|
+ p = Popen(expanded_args, shell=False, stdout=_output,
|
|
stderr=STDOUT if stderr else PIPE,
|
|
bufsize=-1, env=cmd_env, close_fds=True,
|
|
preexec_fn=_child_prep_fn)
|
|
|
|
- reader = AsyncReader(p.stdout, sizelimit, binary)
|
|
+ if not to_file:
|
|
+ reader = AsyncReader(p.stdout, sizelimit, binary)
|
|
+ else:
|
|
+ reader = FakeReader(p, binary)
|
|
+
|
|
if poller:
|
|
while reader.running:
|
|
- if poller():
|
|
- p.terminate()
|
|
- raise SoSTimeoutError
|
|
- time.sleep(0.01)
|
|
- stdout = reader.get_contents()
|
|
- truncated = reader.is_full
|
|
+ _check_poller(p)
|
|
+ else:
|
|
+ try:
|
|
+ # override timeout=0 to timeout=None, as Popen will treat the
|
|
+ # former as a literal 0-second timeout
|
|
+ p.wait(timeout if timeout else None)
|
|
+ except Exception:
|
|
+ p.terminate()
|
|
+ _output.close()
|
|
+ # until we separate timeouts from the `timeout` command
|
|
+ # handle per-cmd timeouts via Plugin status checks
|
|
+ return {'status': 124, 'output': reader.get_contents(),
|
|
+ 'truncated': reader.is_full}
|
|
+ if to_file:
|
|
+ _output.close()
|
|
+
|
|
+ # wait for Popen to set the returncode
|
|
while p.poll() is None:
|
|
pass
|
|
|
|
+ stdout = reader.get_contents()
|
|
+ truncated = reader.is_full
|
|
+
|
|
except OSError as e:
|
|
if e.errno == errno.ENOENT:
|
|
return {'status': 127, 'output': "", 'truncated': ''}
|
|
@@ -256,6 +285,28 @@ def path_join(path, *p, sysroot=os.sep):
|
|
return os.path.join(path, *p)
|
|
|
|
|
|
+class FakeReader():
|
|
+ """Used as a replacement AsyncReader for when we are writing directly to
|
|
+ disk, and allows us to keep more simplified flows for executing,
|
|
+ monitoring, and collecting command output.
|
|
+ """
|
|
+
|
|
+ def __init__(self, process, binary):
|
|
+ self.process = process
|
|
+ self.binary = binary
|
|
+
|
|
+ @property
|
|
+ def is_full(self):
|
|
+ return False
|
|
+
|
|
+ def get_contents(self):
|
|
+ return '' if not self.binary else b''
|
|
+
|
|
+ @property
|
|
+ def running(self):
|
|
+ return self.process.poll() is None
|
|
+
|
|
+
|
|
class AsyncReader(threading.Thread):
|
|
"""Used to limit command output to a given size without deadlocking
|
|
sos.
|
|
--
|
|
2.27.0
|
|
|