#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import fnmatch import argparse import lxml.etree import re from io import StringIO class CompsFilter(object): def __init__(self, file_obj, reindent=False): self.reindent = reindent parser = None if self.reindent: parser = lxml.etree.XMLParser(remove_blank_text=True) self.tree = lxml.etree.parse(file_obj, parser=parser) self.encoding = "utf-8" def _filter_elements_by_arch(self, xpath, arch, only_arch=False): if only_arch: # remove all elements without the 'arch' attribute for i in self.tree.xpath(xpath + "[not(@arch)]"): i.getparent().remove(i) for i in self.tree.xpath(xpath + "[@arch]"): arches = i.attrib.get("arch") arches = re.split(r"[, ]+", arches) arches = [j for j in arches if j] if arch not in arches: # remove elements not matching the arch i.getparent().remove(i) else: # remove the 'arch' attribute del i.attrib["arch"] def filter_packages(self, arch, only_arch=False): """ Filter packages according to arch. If only_arch is set, then only packages for the specified arch are preserved. Multiple arches separated by comma can be specified in the XML. """ self._filter_elements_by_arch("/comps/group/packagelist/packagereq", arch, only_arch) def filter_groups(self, arch, only_arch=False): """ Filter groups according to arch. If only_arch is set, then only groups for the specified arch are preserved. Multiple arches separated by comma can be specified in the XML. """ self._filter_elements_by_arch("/comps/group", arch, only_arch) def filter_category_groups(self): """ Remove undefined groups from categories. """ all_groups = self.tree.xpath("/comps/group/id/text()") for category in self.tree.xpath("/comps/category"): for group in category.xpath("grouplist/groupid"): if group.text not in all_groups: group.getparent().remove(group) def remove_empty_groups(self, keep_empty=None): """ Remove all groups without packages. """ keep_empty = keep_empty or [] for group in self.tree.xpath("/comps/group"): if not group.xpath("packagelist/packagereq"): group_id = group.xpath("id/text()")[0] found = False for pattern in keep_empty: if fnmatch.fnmatch(group_id, pattern): found = True break if found: continue group.getparent().remove(group) def remove_empty_categories(self): """ Remove all categories without groups. """ for category in self.tree.xpath("/comps/category"): if not category.xpath("grouplist/groupid"): category.getparent().remove(category) def remove_categories(self): """ Remove all categories. """ categories = self.tree.xpath("/comps/category") for i in categories: i.getparent().remove(i) def remove_langpacks(self): """ Remove all langpacks. """ langpacks = self.tree.xpath("/comps/langpacks") for i in langpacks: i.getparent().remove(i) def remove_translations(self): """ Remove all translations. """ for i in self.tree.xpath("//*[@xml:lang]"): i.getparent().remove(i) def filter_environment_groups(self): """ Remove undefined groups from environments. """ all_groups = self.tree.xpath("/comps/group/id/text()") for environment in self.tree.xpath("/comps/environment"): for group in environment.xpath("grouplist/groupid"): if group.text not in all_groups: group.getparent().remove(group) def remove_empty_environments(self): """ Remove all environments without groups. """ for environment in self.tree.xpath("/comps/environment"): if not environment.xpath("grouplist/groupid"): environment.getparent().remove(environment) def remove_environments(self): """ Remove all langpacks. """ environments = self.tree.xpath("/comps/environment") for i in environments: i.getparent().remove(i) def write(self, file_obj): self.tree.write(file_obj, pretty_print=self.reindent, xml_declaration=True, encoding=self.encoding) file_obj.write("\n") def pprint(self): self.write(sys.stdout) def xml(self): io = StringIO() self.write(io) io.seek(0) return io.read() def main(): parser = argparse.ArgumentParser() parser.add_argument("--output", help="redirect output to a file") parser.add_argument("--arch", help="filter groups and packagews according to an arch") parser.add_argument("--arch-only-groups", default=False, action="store_true", help="keep only arch groups, remove the rest") parser.add_argument("--arch-only-packages", default=False, action="store_true", help="keep only arch packages, remove the rest") parser.add_argument("--remove-categories", default=False, action="store_true", help="remove all categories") parser.add_argument("--remove-langpacks", default=False, action="store_true", help="remove the langpacks section") parser.add_argument("--remove-translations", default=False, action="store_true", help="remove all translations") parser.add_argument("--remove-environments", default=False, action="store_true", help="remove all environment sections") parser.add_argument("--keep-empty-group", default=[], action="append", metavar="GROUPID", help="keep groups even if they are empty") parser.add_argument("--no-cleanup", default=False, action="store_true", help="don't remove empty groups and categories") parser.add_argument("--no-reindent", default=False, action="store_true", help="don't re-indent the output") parser.add_argument("comps_file", metavar='COMPS_FILE') opts = parser.parse_args() if opts.arch is None: parser.error("please specify arch") file_obj = open(opts.comps_file, "r") f = CompsFilter(file_obj, reindent=not opts.no_reindent) f.filter_packages(opts.arch, opts.arch_only_packages) f.filter_groups(opts.arch, opts.arch_only_groups) if not opts.no_cleanup: f.remove_empty_groups(keep_empty=opts.keep_empty_group) f.filter_category_groups() f.remove_empty_categories() f.filter_environment_groups() f.remove_empty_environments() if opts.remove_categories: f.remove_categories() if opts.remove_langpacks: f.remove_langpacks() if opts.remove_translations: f.remove_translations() if opts.remove_environments: f.remove_environments() if opts.output: out = open(opts.output, "w") f.write(out) else: f.pprint() if __name__ == "__main__": main()