setroubleshoot/SOURCES/0002-framework-util-optimiz...

154 lines
5.3 KiB
Diff

From 6fbf777bc59c005e04d4616b9aebeb7c7f0866c0 Mon Sep 17 00:00:00 2001
From: Vit Mojzis <vmojzis@redhat.com>
Date: Wed, 13 Jan 2021 12:43:54 +0100
Subject: [PATCH] framework/util: optimize get_rpm_nvr_by_type by adding a
cache
The cache build could be optimized by assuming that all modules with
priority 100 are part of selinux-policy-<policytype> package. This way
the cache would only have to contain types from modules of other
priorities.
Another optimization would be making the cache persistent. This way it
would only have to be rebuild on policy reload (sedispatch could trigger
cache rebuild over dbus).
My testing shows significant time save when processing multiple AVCs:
setroubleshoot-server-3.3.24-1.el8:
real 2m26.075s
user 2m17.989s
sys 0m5.916s
Cache:
real 0m15.337s
user 0m11.004s
sys 0m3.995s
But curiously, there is also a small time save for individual AVCs
(measured by forcing the cache rebuild for each call of
get_rpm_nvr_by_type):
real 1m40.393s
user 1m32.830s
sys 0m5.960s
It could be caused by processing policy modules by lines, which quickly
eliminates all lines not starting by "(type " (as opposed to comparing
the regular expression with the whole file).
---
framework/src/setroubleshoot/util.py | 86 +++++++++++++++++++++-------
1 file changed, 65 insertions(+), 21 deletions(-)
diff --git a/framework/src/setroubleshoot/util.py b/framework/src/setroubleshoot/util.py
index 023d1c4..657c882 100755
--- a/framework/src/setroubleshoot/util.py
+++ b/framework/src/setroubleshoot/util.py
@@ -114,6 +114,12 @@ hex_re = re.compile('^[A-Fa-f0-9]+$')
href_re = re.compile(r'<a\s*href="([^"]+)"[^<]*</a>')
name_at_domain_re = re.compile(r'^([^\s@]+)@([^\s@]+)$')
audit_decode_re = re.compile(r'^\s*"([^"]+)"\s*$')
+# regexp matching lines containing type definitions, eg. (type lib_t)
+# contains only 1 group that matches the type name
+typedef_regexp = re.compile(r"\s*\(\s*type\s+([\w-]+)\s*\)\s*")
+#Dictionary with all types defined in the module store as keys
+#and corresponding module paths as values. Used by get_package_nvr_by_name
+module_type_cache = None
log_level = syslog.LOG_WARNING
@@ -425,33 +431,71 @@ Finds an SELinux module which defines given SELinux type
'mysql-selinux-...
"""
+
+ if module_type_cache is None:
+ build_module_type_cache()
+ if module_type_cache is None:
+ return None
+
+ path = module_type_cache.get(selinux_type, None)
+
+ return get_package_nvr_by_file_path(path)
+
+# check if given string represents an integer
+def __str_is_int(str):
+ try:
+ int(str)
+ return True
+ except:
+ return False
+
+def build_module_type_cache():
+ """
+Creates a dictionary with all types defined in the module store as keys
+and corresponding module paths as values.
+The dictionary is stored in "module_type_cache" to be used by
+"get_rpm_nvr_by_type"
+ """
retval, policytype = selinux.selinux_getpolicytype()
+
if retval != 0:
- return None
- typedef = "(type {})\n".format(selinux_type)
- modules = []
- for (dirpath, dirnames, filenames) in os.walk("/var/lib/selinux/{}/active/modules".format(policytype)):
- if "cil" in filenames:
- try:
- defined = False
+ return
+
+ module_type_dict = dict()
+
+ priorities = []
+
+ # get list of module priorities, present in the module store, sorted by integer value
+ with os.scandir("/var/lib/selinux/{}/active/modules".format(policytype)) as module_store:
+ priorities = sorted([x.name for x in module_store if x.is_dir() and __str_is_int(x.name)], key = lambda x: int(x))
+
+ for dir in priorities:
+ # find individual modules in each priority and identify type definitions
+ for (dirpath, dirnames, filenames) in os.walk("/var/lib/selinux/{}/active/modules/{}".format(policytype,dir)):
+ if "cil" in filenames:
try:
- # cil files are bzip2'ed by default
- defined = typedef.encode() in bz2.open("{}/cil".format(dirpath))
- except:
- # maybe cil file is not bzip2'ed, try plain text
- defined = typedef in open("{}/cil".format(dirpath))
+ try:
+ # cil files are bzip2'ed by default
+ f = bz2.open("{}/cil".format(dirpath), mode = 'rt')
- if defined:
- modules.append(dirpath)
- except:
- # something's wrong, move on
- # FIXME: log a problem?
- pass
+ except:
+ # maybe cil file is not bzip2'ed, try plain text
+ f = open("{}/cil".format(dirpath))
- if len(modules) > 0:
- return get_package_nvr_by_file_path(sorted(modules)[-1])
+ for line in f:
+ result = typedef_regexp.match(line)
+ if result:
+ module_type_dict[result.group(1)] = dirpath
- return None
+ f.close()
+
+ except:
+ # something's wrong, move on
+ # FIXME: log a problem?
+ pass
+
+ global module_type_cache
+ module_type_cache = module_type_dict
def get_rpm_nvr_by_scontext(scontext, use_dbus=False):
"""
--
2.29.2