pungi/bin/comps_filter
Lubomír Sedlář 65910f2c33 Open files as binary where needed
In many cases we need to open files as binary to avoid errors on Py3
about writing binary data to file opened in text mode.

Signed-off-by: Lubomír Sedlář <lsedlar@redhat.com>
2017-08-28 13:47:18 +02:00

207 lines
7.7 KiB
Python
Executable File

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import fnmatch
import argparse
import lxml.etree
import re
class CompsFilter(object):
def __init__(self, file_obj, reindent=False):
self.reindent = reindent
parser = None
if self.reindent:
parser = lxml.etree.XMLParser(remove_blank_text=True)
self.tree = lxml.etree.parse(file_obj, parser=parser)
self.encoding = "utf-8"
def _filter_elements_by_arch(self, xpath, arch, only_arch=False):
if only_arch:
# remove all elements without the 'arch' attribute
for i in self.tree.xpath(xpath + "[not(@arch)]"):
i.getparent().remove(i)
for i in self.tree.xpath(xpath + "[@arch]"):
arches = i.attrib.get("arch")
arches = re.split(r"[, ]+", arches)
arches = [j for j in arches if j]
if arch not in arches:
# remove elements not matching the arch
i.getparent().remove(i)
else:
# remove the 'arch' attribute
del i.attrib["arch"]
def filter_packages(self, arch, only_arch=False):
"""
Filter packages according to arch.
If only_arch is set, then only packages for the specified arch are preserved.
Multiple arches separated by comma can be specified in the XML.
"""
self._filter_elements_by_arch("/comps/group/packagelist/packagereq", arch, only_arch)
def filter_groups(self, arch, only_arch=False):
"""
Filter groups according to arch.
If only_arch is set, then only groups for the specified arch are preserved.
Multiple arches separated by comma can be specified in the XML.
"""
self._filter_elements_by_arch("/comps/group", arch, only_arch)
def filter_environments(self, arch, only_arch=False):
"""
Filter environments according to arch.
If only_arch is set, then only environments for the specified arch are preserved.
Multiple arches separated by comma can be specified in the XML.
"""
self._filter_elements_by_arch("/comps/environment", arch, only_arch)
def filter_category_groups(self):
"""
Remove undefined groups from categories.
"""
all_groups = self.tree.xpath("/comps/group/id/text()")
for category in self.tree.xpath("/comps/category"):
for group in category.xpath("grouplist/groupid"):
if group.text not in all_groups:
group.getparent().remove(group)
def remove_empty_groups(self, keep_empty=None):
"""
Remove all groups without packages.
"""
keep_empty = keep_empty or []
for group in self.tree.xpath("/comps/group"):
if not group.xpath("packagelist/packagereq"):
group_id = group.xpath("id/text()")[0]
for pattern in keep_empty:
if fnmatch.fnmatch(group_id, pattern):
break
else:
group.getparent().remove(group)
def remove_empty_categories(self):
"""
Remove all categories without groups.
"""
for category in self.tree.xpath("/comps/category"):
if not category.xpath("grouplist/groupid"):
category.getparent().remove(category)
def remove_categories(self):
"""
Remove all categories.
"""
categories = self.tree.xpath("/comps/category")
for i in categories:
i.getparent().remove(i)
def remove_langpacks(self):
"""
Remove all langpacks.
"""
langpacks = self.tree.xpath("/comps/langpacks")
for i in langpacks:
i.getparent().remove(i)
def remove_translations(self):
"""
Remove all translations.
"""
for i in self.tree.xpath("//*[@xml:lang]"):
i.getparent().remove(i)
def filter_environment_groups(self):
"""
Remove undefined groups from environments.
"""
all_groups = self.tree.xpath("/comps/group/id/text()")
for environment in self.tree.xpath("/comps/environment"):
for group in environment.xpath("grouplist/groupid"):
if group.text not in all_groups:
group.getparent().remove(group)
def remove_empty_environments(self):
"""
Remove all environments without groups.
"""
for environment in self.tree.xpath("/comps/environment"):
if not environment.xpath("grouplist/groupid"):
environment.getparent().remove(environment)
def remove_environments(self):
"""
Remove all langpacks.
"""
environments = self.tree.xpath("/comps/environment")
for i in environments:
i.getparent().remove(i)
def write(self, file_obj):
self.tree.write(file_obj, pretty_print=self.reindent, xml_declaration=True, encoding=self.encoding)
file_obj.write(b"\n")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--output", help="redirect output to a file")
parser.add_argument("--arch", required=True,
help="filter groups and packages according to an arch")
parser.add_argument("--arch-only-groups", default=False, action="store_true",
help="keep only arch groups, remove the rest")
parser.add_argument("--arch-only-packages", default=False, action="store_true",
help="keep only arch packages, remove the rest")
parser.add_argument("--arch-only-environments", default=False, action="store_true",
help="keep only arch environments, remove the rest")
parser.add_argument("--remove-categories", default=False, action="store_true",
help="remove all categories")
parser.add_argument("--remove-langpacks", default=False, action="store_true",
help="remove the langpacks section")
parser.add_argument("--remove-translations", default=False, action="store_true",
help="remove all translations")
parser.add_argument("--remove-environments", default=False, action="store_true",
help="remove all environment sections")
parser.add_argument("--keep-empty-group", default=[], action="append", metavar="GROUPID",
help="keep groups even if they are empty")
parser.add_argument("--no-cleanup", default=False, action="store_true",
help="don't remove empty groups and categories")
parser.add_argument("--no-reindent", default=False, action="store_true",
help="don't re-indent the output")
parser.add_argument("comps_file", metavar='COMPS_FILE')
opts = parser.parse_args()
with open(opts.comps_file, "rb") as file_obj:
f = CompsFilter(file_obj, reindent=not opts.no_reindent)
f.filter_packages(opts.arch, opts.arch_only_packages)
f.filter_groups(opts.arch, opts.arch_only_groups)
f.filter_environments(opts.arch, opts.arch_only_environments)
if not opts.no_cleanup:
f.remove_empty_groups(keep_empty=opts.keep_empty_group)
f.filter_category_groups()
f.remove_empty_categories()
f.filter_environment_groups()
f.remove_empty_environments()
if opts.remove_categories:
f.remove_categories()
if opts.remove_langpacks:
f.remove_langpacks()
if opts.remove_translations:
f.remove_translations()
if opts.remove_environments:
f.remove_environments()
f.write(open(opts.output, 'wb') if opts.output else sys.stdout)
if __name__ == "__main__":
main()