Source code for geos.xml_tools.xml_formatter

import os
from lxml import etree as ElementTree  # type: ignore[import]
import re
from typing import List, Any, TextIO
from geos.xml_tools import command_line_parsers


[docs] def format_attribute( attribute_indent: str, ka: str, attribute_value: str ) -> str: """Format xml attribute strings Args: attribute_indent (str): Attribute indent string ka (str): Attribute name attribute_value (str): Attribute value Returns: str: Formatted attribute value """ # Make sure that a space follows commas attribute_value = re.sub( r",\s*", ", ", attribute_value ) # Handle external brackets attribute_value = re.sub( r"{\s*", "{ ", attribute_value ) attribute_value = re.sub( r"\s*}", " }", attribute_value ) # Consolidate whitespace attribute_value = re.sub( r"\s+", " ", attribute_value ) # Identify and split multi-line attributes if re.match( r"\s*{\s*({[-+.,0-9a-zA-Z\s]*},?\s*)*\s*}", attribute_value ): split_positions: List[ Any ] = [ match.end() for match in re.finditer( r"}\s*,", attribute_value ) ] newline_indent = '\n%s' % ( ' ' * ( len( attribute_indent ) + len( ka ) + 4 ) ) new_values = [] for a, b in zip( [ None ] + split_positions, split_positions + [ None ] ): new_values.append( attribute_value[ a:b ].strip() ) if new_values: attribute_value = newline_indent.join( new_values ) return attribute_value
[docs] def format_xml_level( output: TextIO, node: ElementTree.Element, level: int, indent: str = ' ' * 2, block_separation_max_depth: int = 2, modify_attribute_indent: bool = False, sort_attributes: bool = False, close_tag_newline: bool = False, include_namespace: bool = False ) -> None: """Iteratively format the xml file Args: output (file): the output text file handle node (lxml.etree.Element): the current xml element level (int): the xml depth indent (str): the xml indent style block_separation_max_depth (int): the maximum depth to separate adjacent elements modify_attribute_indent (bool): option to have flexible attribute indentation sort_attributes (bool): option to sort attributes alphabetically close_tag_newline (bool): option to place close tag on a separate line include_namespace (bool): option to include the xml namespace in the output """ # Handle comments if node.tag is ElementTree.Comment: output.write( '\n%s<!--%s-->' % ( indent * level, node.text ) ) else: # Write opening line opening_line = '\n%s<%s' % ( indent * level, node.tag ) output.write( opening_line ) # Write attributes if ( len( node.attrib ) > 0 ): # Choose indentation attribute_indent = '%s' % ( indent * ( level + 1 ) ) if modify_attribute_indent: attribute_indent = ' ' * ( len( opening_line ) ) # Get a copy of the attributes attribute_dict = {} if ( ( level == 0 ) & include_namespace ): # Handle the optional namespace information at the root level # Note: preferably, this would point to a schema we host online attribute_dict[ 'xmlns:xsi' ] = 'http://www.w3.org/2001/XMLSchema-instance' attribute_dict[ 'xsi:noNamespaceSchemaLocation' ] = '/usr/gapps/GEOS/schema/schema.xsd' elif ( level > 0 ): attribute_dict = node.attrib # Sort attribute names akeys = list( attribute_dict.keys() ) if sort_attributes: akeys = sorted( akeys ) # Format attributes for ka in akeys: # Avoid formatting mathpresso expressions if not ( node.tag in [ "SymbolicFunction", "CompositeFunction" ] and ka == "expression" ): attribute_dict[ ka ] = format_attribute( attribute_indent, ka, attribute_dict[ ka ] ) for ii in range( 0, len( akeys ) ): k = akeys[ ii ] if ( ( ii == 0 ) & modify_attribute_indent ): output.write( ' %s=\"%s\"' % ( k, attribute_dict[ k ] ) ) else: output.write( '\n%s%s=\"%s\"' % ( attribute_indent, k, attribute_dict[ k ] ) ) # Write children if len( node ): output.write( '>' ) Nc = len( node ) for ii, child in zip( range( Nc ), node ): format_xml_level( output, child, level + 1, indent, block_separation_max_depth, modify_attribute_indent, sort_attributes, close_tag_newline, include_namespace ) # Add space between blocks if ( ( level < block_separation_max_depth ) & ( ii < Nc - 1 ) & ( child.tag is not ElementTree.Comment ) ): output.write( '\n' ) # Write the end tag output.write( '\n%s</%s>' % ( indent * level, node.tag ) ) else: if close_tag_newline: output.write( '\n%s/>' % ( indent * level ) ) else: output.write( '/>' )
[docs] def format_file( input_fname: str, indent_size: int = 2, indent_style: bool = False, block_separation_max_depth: int = 2, alphebitize_attributes: bool = False, close_style: bool = False, namespace: bool = False ) -> None: """Script to format xml files Args: input_fname (str): Input file name indent_size (int): Indent size indent_style (bool): Style of indentation (0=fixed, 1=hanging) block_separation_max_depth (int): Max depth to separate xml blocks alphebitize_attributes (bool): Alphebitize attributes close_style (bool): Style of close tag (0=same line, 1=new line) namespace (bool): Insert this namespace in the xml description """ fname = os.path.expanduser( input_fname ) try: tree = ElementTree.parse( fname ) root = tree.getroot() prologue_comments = [ tmp.text for tmp in root.itersiblings( preceding=True ) ] epilog_comments = [ tmp.text for tmp in root.itersiblings() ] with open( fname, 'w' ) as f: f.write( '<?xml version=\"1.0\" ?>\n' ) for comment in reversed( prologue_comments ): f.write( '\n<!--%s-->' % ( comment ) ) format_xml_level( f, root, 0, indent=' ' * indent_size, block_separation_max_depth=block_separation_max_depth, modify_attribute_indent=indent_style, sort_attributes=alphebitize_attributes, close_tag_newline=close_style, include_namespace=namespace ) for comment in epilog_comments: f.write( '\n<!--%s-->' % ( comment ) ) f.write( '\n' ) except ElementTree.ParseError as err: print( '\nCould not load file: %s' % ( fname ) ) print( err.msg ) raise Exception( '\nCheck input file!' )
[docs] def main() -> None: """Script to format xml files Args: input (str): Input file name -i/--indent (int): Indent size -s/--style (int): Indent style -d/--depth (int): Block separation depth -a/--alphebitize (int): Alphebitize attributes -c/--close (int): Close tag style -n/--namespace (int): Include namespace """ parser = command_line_parsers.build_xml_formatter_input_parser() args = parser.parse_args() format_file( args.input, indent_size=args.indent, indent_style=args.style, block_separation_max_depth=args.depth, alphebitize_attributes=args.alphebitize, close_style=args.close, namespace=args.namespace )
if __name__ == "__main__": main()