Source code for dorie.parscraper.readers.xml

from __future__ import absolute_import

import xml.etree.ElementTree as ET
from warnings import warn
from collections import OrderedDict

from dorie.parscraper.warnings import XMLWarning
from dorie.parscraper.parameter import Parameter


[docs]def parse(filename): """ Reads and parses a DORiE parameter XML file, using the :class:`xml.etree.ElementTree` package. The XML file is expected to have the following structure: :menuselection:`<dorie> --> <category name="..."> --> <parameter name="..."> --> (attributes)` Available attributes are all attributes of the :class:`dorie.parscraper.parameter.Parameter` class, except the ones starting with an underscore. :param str filename: Path to the input XML file :rtype: OrderedDict :return: all parsed category names as keys, and a list of all parameters in the\ corresponding category as values (instances of :class:`Parameter`) :raises XMLWarning: if unknown, redundant, or malformatted XML elements are encountered """ xml_tree = ET.parse(filename) root = xml_tree.getroot() xml_parameters = OrderedDict() for child in root: if child.tag != "category": warn("Unknown element {0}".format(child.tag),XMLWarning) continue if not "name" in child.attrib.keys(): warn("Encountered category without proper 'name' attribute",XMLWarning) continue if not child.attrib["name"]: warn("Encountered category without proper 'name' attribute",XMLWarning) continue category = child.attrib["name"] category_parameters = [] for grandchild in child: if grandchild.tag != "parameter": warn("Unknown element {0}".format(child.tag)) continue if not "name" in grandchild.attrib.keys(): warn("Encountered parameter without proper 'name' attribute",XMLWarning) continue if not grandchild.attrib["name"]: warn("Encountered parameter without proper 'name' attribute",XMLWarning) continue key = grandchild.attrib["name"] p = Parameter(category,key) if "hidden" in grandchild.attrib.keys(): p.hidden = grandchild.attrib["hidden"].lower() in ["true","1","yes"] elif "hidden" in child.attrib.keys(): p.hidden = child.attrib["hidden"].lower() in ["true","1","yes"] for attribute in grandchild: if hasattr(p,attribute.tag) and not attribute.tag.startswith("_"): stripped_text = "\n".join([x.strip() for x in attribute.text.split("\n")]) setattr(p,attribute.tag,stripped_text) else: warn("Unrecognized attribute {0} for {1}.{2}".format(attribute.tag,category,key),XMLWarning) if not p in category_parameters: category_parameters.append(p) else: warn("{0}.{1} is defined multiple times".format(category,key),XMLWarning) xml_parameters[category] = category_parameters return xml_parameters