#!/usr/bin/env python3 """ Usage: schema_parser.py SCHEMA [--output=ARG] Arguments: SCHEMA Kiwi RelaxNG schema file to parse Options: --output This is output file (stdout used if not present) """ import docopt from lxml import etree from collections import namedtuple import os.path import logging import sys logging.basicConfig(level=logging.WARNING) class SchemaNode(object): Child = namedtuple('Child', ['node', 'properties']) @classmethod def is_type(self, node): pass def __init__(self, node, namespaces): self.logger = logging.getLogger(self.__class__.__name__) self.node = node self.namespaces = namespaces ref = node.xpath( '(./parent::rng:define[@name])[1]', namespaces=namespaces ) if len(ref): self.reference = ref[0].attrib['name'] else: self.reference = None self.documentation = None self.paths = [] element_tree = node.getroottree() path = element_tree.getelementpath(node) self.x_path = path.replace('{%s}' % namespaces['rng'], 'rng:') self.properties = ['optional', 'oneOrMore', 'zeroOrMore'] def get_documentation(self): for candidate in self.node: if candidate.tag.endswith('documentation'): self.documentation = candidate.text.replace('\n', ' ') if self.documentation is None: self.documentation = '' self.logger.warning( 'Node %s has no documentation in schema', self.name ) return self.documentation def query(self, query, ret_type=None): nodes = [] for node in self.node.xpath(query, namespaces=self.namespaces): if ret_type is None: nodes.append(SchemaNode(node, self.namespaces)) else: nodes.append(ret_type(node, self.namespaces)) return nodes def get_children_by_type(self, ret_type, stop_types=None): if stop_types is None: stop_types = [] # Gets direct children of this node in its tree children = self.children_in_tree(ret_type, stop_types) # Follow references that are defined somewhere else in tree s_types = [ret_type] + stop_types for reference in self.children_in_tree(Reference, s_types): if not reference.node.name.endswith('any'): children += reference.node.resolve_reference( ret_type, stop_types, reference.properties ) return children def children_in_tree(self, ret_type, stop_types, properties=None): if properties is None: properties = [] children = [] for child in self.node: if self._is_in_types(child, stop_types): continue if ret_type.is_type(child): children.append(SchemaNode.Child( node=ret_type(child, self.namespaces), properties=self._get_properties(child, properties) )) else: children += SchemaNode(child, self.namespaces).children_in_tree( ret_type, stop_types, self._get_properties(child, properties) ) return children def _get_properties(self, node, properties): props = [] for prop in self.properties: if node.tag.endswith(prop): props = [prop] break return props + properties def _is_in_types(self, node, types): for t in types: if t.is_type(node): return True return False class Reference(SchemaNode): @classmethod def is_type(self, node): if 'name' in node.attrib and node.tag.endswith('ref'): return True return False def __init__(self, node, namespaces): super(Reference, self).__init__(node, namespaces) if not self.is_type(node): raise Exception('The given node is not a reference') self.name = self.node.attrib['name'] def resolve_reference(self, ret_type, stop_types, properties): define = self._find_define() if define is None: return [] # Gets direct children of this node in its tree nodes = define.children_in_tree(ret_type, stop_types, properties) # Follow references that are defined somewhere else in tree s_types = stop_types + [ret_type] for reference in define.children_in_tree( Reference, s_types, properties ): nodes += reference.node.resolve_reference( ret_type, stop_types, reference.properties ) return nodes def _find_define(self): node = self.query( '//rng:define[@name=\'%s\']' % self.name ) if len(node): return node[0] class Attribute(SchemaNode): @classmethod def is_type(self, node): if 'name' in node.attrib and node.tag.endswith('attribute'): return True return False def __init__(self, node, namespaces): super(Attribute, self).__init__(node, namespaces) if not self.is_type(node): raise Exception('The given node is not an attribute') self.documentation = None self.name = self.node.attrib['name'] class Element(SchemaNode): Child = namedtuple('Child', ['x_path', 'properties']) @classmethod def is_type(self, node): if 'name' in node.attrib and node.tag.endswith('element'): return True return False def __init__(self, node, namespaces): super(Element, self).__init__(node, namespaces) if not self.is_type(node): raise Exception('The given node is not an element') self.children = None self.name = self.node.attrib['name'] self.attributes = self.get_children_by_type(Attribute, [Element]) def get_parent_paths(self): parents = [] for path in filter(lambda x: '.' in x, self.paths): parent = path.rsplit('.' + self.get_name(), 1)[0] if len(parent): parents.append(parent) return parents def get_children(self): if self.children is None: self.find_children() return self.children def find_children(self): self.children = [] elements = [] for child in self.get_children_by_type(Element, [Attribute]): self.children.append(Element.Child( x_path=child.node.x_path, properties=child.properties )) elements.append(child.node) return elements class RNGSchemaParser(object): def __init__(self, schema): self.logger = logging.getLogger(self.__class__.__name__) self.schema_tree = etree.parse(schema) etree.strip_tags(self.schema_tree, etree.Comment) self.nspaces = self.schema_tree.getroot().nsmap self.nspaces.pop(None) if 'rng' not in self.nspaces: raise Exception('\'rng:\' namespace must be defined in namespaces map') self.root_nodes = [] for start in self.schema_tree.xpath('//rng:start', namespaces=self.nspaces): self.root_nodes += SchemaNode(start, self.nspaces).get_children_by_type( Element, [Attribute] ) if not self.root_nodes: raise Exception('Not valid RelaxNG start tag') self.elements = dict() for root_node in self.root_nodes: self._find_elements(root_node.node, None) def x_path_query(self, query): return self.schema_tree.xpath(query, namespaces=self.nspaces) def write_element_doc(self, element, current_path, output): if current_path is None: current_path = [] element_path = '.'.join(current_path + [element.name]) level = element_path.count('.') titles = '-_.,:;#~<>^*' doc = '.. _k.%s:\n\n' % element_path doc += '%s\n' % element.name doc += titles[level] * len(element.name) + '\n\n' if len(element.get_documentation()): doc += '%s\n\n' % element.get_documentation() parents = [] for path in [x for x in element.paths if len(x) > 1]: parents.append(':ref:`k.%s`' % '.'.join(path[:-1])) if len(parents): doc += 'Parents:\n' doc += ' These elements contain ``%s``: ' % element.name doc += ', '.join(parents) doc += '\n\n' children = [] for child in element.get_children(): children.append(':ref:`%s ` %s' % ( self.elements[child.x_path].name, '.'.join([element_path, self.elements[child.x_path].name]), self._properties_to_doc(child.properties) )) if len(children): doc += 'Children:\n' doc += ' The following elements occur in ``%s``: ' % element.name doc += ', '.join(children) doc += '\n\n' if len(element.attributes): doc += 'List of attributes for ``%s``:\n\n' % element.name for attribute in element.attributes: doc += '* ``%s`` %s: %s\n' % ( attribute.node.name, self._properties_to_doc(attribute.properties), attribute.node.get_documentation() ) doc += '\n' if output: with open(output, 'a') as f: f.write(doc) else: sys.stdout.write(doc) for child in element.get_children(): self.write_element_doc( self.elements[child.x_path], current_path + [element.name], output ) def legend(self): doc = '.. hint:: **Element quantifiers**\n\n'\ ' * **Optional** elements are qualified with _`[?]`\n'\ ' * Elements that occur **one or more** times are qualified with _`[+]`\n'\ ' * Elements that occur **zero or more** times are qualified with _`[*]`\n'\ ' * Required elements are not qualified\n\n' return doc def _properties_to_doc(self, properties): if not properties: return '' if properties[0] == 'optional': return '`[?]`_' elif properties[0] == 'oneOrMore': return '`[+]`_' elif properties[0] == 'zeroOrMore': return '`[*]`_' return '' def _find_elements(self, element, path): if path is None: path = [] if element.x_path not in self.elements: self.elements[element.x_path] = element for child in element.find_children(): self._find_elements(child, path + [element.name]) self.elements[element.x_path].paths.append(path + [element.name]) if __name__ == "__main__": try: logger = logging.getLogger(__name__) arguments = docopt.docopt(__doc__) ofile = arguments['--output'] if os.path.splitext(arguments['SCHEMA'])[-1] == '.rnc': raise OSError("Need RNG instead of RNC") if ofile and os.path.isfile(ofile): logger.warning('Output file %s will be overwritten', ofile) if os.path.isfile(arguments['SCHEMA']): parser = RNGSchemaParser(arguments['SCHEMA']) value = parser.x_path_query( '(//rng:attribute[@name=\'schemaversion\']/rng:value)[1]' ) if value: version = value[0].text else: raise Exception('Schema version not found!') title = 'Schema Documentation %s\n' % version title += '=' * len(title) + '\n\n' if ofile: with open(ofile, 'w') as f: f.write(title) f.write(parser.legend()) else: sys.stdout.write(title) for element in parser.root_nodes: parser.write_element_doc( parser.elements[element.node.x_path], None, ofile ) else: raise Exception('File not found: %s' % arguments['SCHEMA']) except Exception as e: logger.error('Schema parser failed', exc_info=True) exit(2)