Source code for cobra.io.sbml3

# -*- coding: utf-8 -*-

from __future__ import absolute_import

import re
from ast import And, BoolOp, Name, Or
from bz2 import BZ2File
from collections import defaultdict
from decimal import Decimal
from gzip import GzipFile
from tempfile import NamedTemporaryFile
from warnings import catch_warnings, simplefilter, warn

from six import iteritems, string_types

from cobra.core import Gene, Metabolite, Model, Reaction
from cobra.core.gene import parse_gpr
from cobra.manipulation.modify import _renames
from cobra.manipulation.validate import check_metabolite_compartment_formula
from cobra.util.solver import set_objective

try:
    from lxml.etree import (
        parse, Element, SubElement, ElementTree, register_namespace,
        ParseError, XPath)

    _with_lxml = True
except ImportError:
    _with_lxml = False
    try:
        from xml.etree.cElementTree import (
            parse, Element, SubElement, ElementTree, register_namespace,
            ParseError)
    except ImportError:
        XPath = None
        from xml.etree.ElementTree import (
            parse, Element, SubElement, ElementTree, register_namespace,
            ParseError)

# use sbml level 2 from sbml.py (which uses libsbml). Eventually, it would
# be nice to use the libSBML converters directly instead.
try:
    import libsbml
except ImportError:
    libsbml = None
else:
    from cobra.io.sbml import create_cobra_model_from_sbml_file as read_sbml2
    from cobra.io.sbml import write_cobra_model_to_sbml_file as write_sbml2

try:
    from optlang.symbolics import Basic
except ImportError:
    class Basic:
        pass

# deal with namespaces
namespaces = {"fbc": "http://www.sbml.org/sbml/level3/version1/fbc/version2",
              "sbml": "http://www.sbml.org/sbml/level3/version1/core",
              "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
              "bqbiol": "http://biomodels.net/biology-qualifiers/"}
for key in namespaces:
    register_namespace(key, namespaces[key])


[docs]def ns(query):
    """replace prefixes with namespace"""
    for prefix, uri in iteritems(namespaces):
        query = query.replace(prefix + ":", "{" + uri + "}")
    return query


# XPATH query wrappers
fbc_prefix = "{" + namespaces["fbc"] + "}"
sbml_prefix = "{" + namespaces["sbml"] + "}"

SBML_DOT = "__SBML_DOT__"
# FBC TAGS
OR_TAG = ns("fbc:or")
AND_TAG = ns("fbc:and")
GENEREF_TAG = ns("fbc:geneProductRef")
GPR_TAG = ns("fbc:geneProductAssociation")
GENELIST_TAG = ns("fbc:listOfGeneProducts")
GENE_TAG = ns("fbc:geneProduct")
# XPATHS
BOUND_XPATH = ns("sbml:listOfParameters/sbml:parameter[@value]")
COMPARTMENT_XPATH = ns("sbml:listOfCompartments/sbml:compartment")
GENES_XPATH = GENELIST_TAG + "/" + GENE_TAG
SPECIES_XPATH = ns("sbml:listOfSpecies/sbml:species[@boundaryCondition='%s']")
OBJECTIVES_XPATH = ns("fbc:objective[@fbc:id='%s']/"
                      "fbc:listOfFluxObjectives/"
                      "fbc:fluxObjective")
LONG_SHORT_DIRECTION = {'maximize': 'max', 'minimize': 'min'}
SHORT_LONG_DIRECTION = {'min': 'minimize', 'max': 'maximize'}

if _with_lxml:
    RDF_ANNOTATION_XPATH = ("sbml:annotation/rdf:RDF/"
                            "rdf:Description[@rdf:about=$metaid]/"
                            "*[self::bqbiol:isEncodedBy or self::bqbiol:is]/"
                            "rdf:Bag/rdf:li/@rdf:resource")
    extract_rdf_annotation = XPath(RDF_ANNOTATION_XPATH, namespaces=namespaces,
                                   smart_strings=False)
else:
    RDF_ANNOTATION_XPATH = ns("sbml:annotation/rdf:RDF/"
                              "rdf:Description[@rdf:about='%s']/"
                              "bqbiol:isEncodedBy/"
                              "rdf:Bag/rdf:li[@rdf:resource]")

    def extract_rdf_annotation(sbml_element, metaid):
        search_xpath = RDF_ANNOTATION_XPATH % metaid
        for i in sbml_element.iterfind(search_xpath):
            yield get_attrib(i, "rdf:resource")
        for i in sbml_element.iterfind(search_xpath.replace(
                "isEncodedBy", "is")):
            yield get_attrib(i, "rdf:resource")


[docs]class CobraSBMLError(Exception):
    pass


[docs]def get_attrib(tag, attribute, type=lambda x: x, require=False):
    value = tag.get(ns(attribute))
    if require and value is None:
        msg = "required attribute '%s' not found in tag '%s'" % \
              (attribute, tag.tag)
        if tag.get("id") is not None:
            msg += " with id '%s'" % tag.get("id")
        elif tag.get("name") is not None:
            msg += " with name '%s'" % tag.get("name")
        raise CobraSBMLError(msg)
    return type(value) if value is not None else None


[docs]def set_attrib(xml, attribute_name, value):
    if value is None or value == "":
        return
    xml.set(ns(attribute_name), str(value))


[docs]def parse_stream(filename):
    """parses filename or compressed stream to xml"""
    try:
        if hasattr(filename, "read"):
            return parse(filename)
        elif filename.endswith(".gz"):
            with GzipFile(filename) as infile:
                return parse(infile)
        elif filename.endswith(".bz2"):
            with BZ2File(filename) as infile:
                return parse(infile)
        else:
            return parse(filename)
    except ParseError as e:
        raise CobraSBMLError("Malformed XML file: " + str(e))


# string utility functions
[docs]def clip(string, prefix):
    """clips a prefix from the beginning of a string if it exists

    >>> clip("R_pgi", "R_")
    "pgi"

    """
    return string[len(prefix):] if string.startswith(prefix) else string


[docs]def strnum(number):
    """Utility function to convert a number to a string"""
    if isinstance(number, (Decimal, Basic, str)):
        return str(number)
    s = "%.15g" % number
    return s.rstrip(".")


[docs]def construct_gpr_xml(parent, expression):
    """create gpr xml under parent node"""
    if isinstance(expression, BoolOp):
        op = expression.op
        if isinstance(op, And):
            new_parent = SubElement(parent, AND_TAG)
        elif isinstance(op, Or):
            new_parent = SubElement(parent, OR_TAG)
        else:
            raise Exception("unsupported operation " + op.__class__)
        for arg in expression.values:
            construct_gpr_xml(new_parent, arg)
    elif isinstance(expression, Name):
        gene_elem = SubElement(parent, GENEREF_TAG)
        set_attrib(gene_elem, "fbc:geneProduct", "G_" + expression.id)
    else:
        raise Exception("unsupported operation  " + repr(expression))


[docs]def annotate_cobra_from_sbml(cobra_element, sbml_element):
    sbo_term = sbml_element.get("sboTerm")
    if sbo_term is not None:
        cobra_element.annotation["SBO"] = sbo_term
    meta_id = get_attrib(sbml_element, "metaid")
    if meta_id is None:
        return
    annotation = cobra_element.annotation
    for uri in extract_rdf_annotation(sbml_element, metaid="#" + meta_id):
        if not uri.startswith("http://identifiers.org/"):
            warn("%s does not start with http://identifiers.org/" % uri)
            continue
        try:
            provider, identifier = uri[23:].split("/", 1)
        except ValueError:
            warn("%s does not conform to http://identifiers.org/provider/id"
                 % uri)
            continue
        # handle multiple id's in the same database
        if provider in annotation:
            # make into a list if necessary
            if isinstance(annotation[provider], string_types):
                annotation[provider] = [annotation[provider]]
            annotation[provider].append(identifier)
        else:
            cobra_element.annotation[provider] = identifier


[docs]def annotate_sbml_from_cobra(sbml_element, cobra_element):
    if len(cobra_element.annotation) == 0:
        return
    # get the id so we can set the metaid
    tag = sbml_element.tag
    if tag.startswith(sbml_prefix) or tag[0] != "{":
        prefix = ""
    elif tag.startswith(fbc_prefix):
        prefix = fbc_prefix
    else:
        raise ValueError("Can not annotate " + repr(sbml_element))
    id = sbml_element.get(prefix + "id")
    if len(id) == 0:
        raise ValueError("%s does not have id set" % repr(sbml_element))
    set_attrib(sbml_element, "metaid", id)
    annotation = SubElement(sbml_element, ns("sbml:annotation"))
    rdf_desc = SubElement(SubElement(annotation, ns("rdf:RDF")),
                          ns("rdf:Description"))
    set_attrib(rdf_desc, "rdf:about", "#" + id)
    bag = SubElement(SubElement(rdf_desc, ns("bqbiol:is")),
                     ns("rdf:Bag"))
    for provider, identifiers in sorted(iteritems(cobra_element.annotation)):
        if provider == "SBO":
            set_attrib(sbml_element, "sboTerm", identifiers)
            continue
        if isinstance(identifiers, string_types):
            identifiers = (identifiers,)
        for identifier in identifiers:
            li = SubElement(bag, ns("rdf:li"))
            set_attrib(li, "rdf:resource", "http://identifiers.org/%s/%s" %
                       (provider, identifier))


[docs]def parse_xml_into_model(xml, number=float):
    xml_model = xml.find(ns("sbml:model"))
    if get_attrib(xml_model, "fbc:strict") != "true":
        warn('loading SBML model without fbc:strict="true"')

    model_id = get_attrib(xml_model, "id")
    model = Model(model_id)
    model.name = xml_model.get("name")

    model.compartments = {c.get("id"): c.get("name") for c in
                          xml_model.findall(COMPARTMENT_XPATH)}
    # add metabolites
    for species in xml_model.findall(SPECIES_XPATH % 'false'):
        met = get_attrib(species, "id", require=True)
        met = Metabolite(clip(met, "M_"))
        met.name = species.get("name")
        annotate_cobra_from_sbml(met, species)
        met.compartment = species.get("compartment")
        met.charge = get_attrib(species, "fbc:charge", int)
        met.formula = get_attrib(species, "fbc:chemicalFormula")
        model.add_metabolites([met])
    # Detect boundary metabolites - In case they have been mistakenly
    # added. They should not actually appear in a model
    boundary_metabolites = {clip(i.get("id"), "M_")
                            for i in xml_model.findall(SPECIES_XPATH % 'true')}

    # add genes
    for sbml_gene in xml_model.iterfind(GENES_XPATH):
        gene_id = get_attrib(sbml_gene, "fbc:id").replace(SBML_DOT, ".")
        gene = Gene(clip(gene_id, "G_"))
        gene.name = get_attrib(sbml_gene, "fbc:name")
        if gene.name is None:
            gene.name = get_attrib(sbml_gene, "fbc:label")
        annotate_cobra_from_sbml(gene, sbml_gene)
        model.genes.append(gene)

    def process_gpr(sub_xml):
        """recursively convert gpr xml to a gpr string"""
        if sub_xml.tag == OR_TAG:
            return "( " + ' or '.join(process_gpr(i) for i in sub_xml) + " )"
        elif sub_xml.tag == AND_TAG:
            return "( " + ' and '.join(process_gpr(i) for i in sub_xml) + " )"
        elif sub_xml.tag == GENEREF_TAG:
            gene_id = get_attrib(sub_xml, "fbc:geneProduct", require=True)
            return clip(gene_id, "G_")
        else:
            raise Exception("unsupported tag " + sub_xml.tag)

    bounds = {bound.get("id"): get_attrib(bound, "value", type=number)
              for bound in xml_model.iterfind(BOUND_XPATH)}
    # add reactions
    reactions = []
    for sbml_reaction in xml_model.iterfind(
            ns("sbml:listOfReactions/sbml:reaction")):
        reaction = get_attrib(sbml_reaction, "id", require=True)
        reaction = Reaction(clip(reaction, "R_"))
        reaction.name = sbml_reaction.get("name")
        annotate_cobra_from_sbml(reaction, sbml_reaction)
        lb_id = get_attrib(sbml_reaction, "fbc:lowerFluxBound", require=True)
        ub_id = get_attrib(sbml_reaction, "fbc:upperFluxBound", require=True)
        try:
            reaction.upper_bound = bounds[ub_id]
            reaction.lower_bound = bounds[lb_id]
        except KeyError as e:
            raise CobraSBMLError("No constant bound with id '%s'" % str(e))
        reactions.append(reaction)

        stoichiometry = defaultdict(lambda: 0)
        for species_reference in sbml_reaction.findall(
                ns("sbml:listOfReactants/sbml:speciesReference")):
            met_name = clip(species_reference.get("species"), "M_")
            stoichiometry[met_name] -= \
                number(species_reference.get("stoichiometry"))
        for species_reference in sbml_reaction.findall(
                ns("sbml:listOfProducts/sbml:speciesReference")):
            met_name = clip(species_reference.get("species"), "M_")
            stoichiometry[met_name] += \
                get_attrib(species_reference, "stoichiometry",
                           type=number, require=True)
        # needs to have keys of metabolite objects, not ids
        object_stoichiometry = {}
        for met_id in stoichiometry:
            if met_id in boundary_metabolites:
                warn("Boundary metabolite '%s' used in reaction '%s'" %
                     (met_id, reaction.id))
                continue
            try:
                metabolite = model.metabolites.get_by_id(met_id)
            except KeyError:
                warn("ignoring unknown metabolite '%s' in reaction %s" %
                     (met_id, reaction.id))
                continue
            object_stoichiometry[metabolite] = stoichiometry[met_id]
        reaction.add_metabolites(object_stoichiometry)
        # set gene reaction rule
        gpr_xml = sbml_reaction.find(GPR_TAG)
        if gpr_xml is not None and len(gpr_xml) != 1:
            warn("ignoring invalid geneAssociation for " + repr(reaction))
            gpr_xml = None
        gpr = process_gpr(gpr_xml[0]) if gpr_xml is not None else ''
        # remove outside parenthesis, if any
        if gpr.startswith("(") and gpr.endswith(")"):
            gpr = gpr[1:-1].strip()
        gpr = gpr.replace(SBML_DOT, ".")
        reaction.gene_reaction_rule = gpr
    try:
        model.add_reactions(reactions)
    except ValueError as e:
        warn(str(e))

    # objective coefficients are handled after all reactions are added
    obj_list = xml_model.find(ns("fbc:listOfObjectives"))
    if obj_list is None:
        warn("listOfObjectives element not found")
        return model
    target_objective_id = get_attrib(obj_list, "fbc:activeObjective")
    target_objective = obj_list.find(
        ns("fbc:objective[@fbc:id='{}']".format(target_objective_id)))
    obj_direction_long = get_attrib(target_objective, "fbc:type")
    obj_direction = LONG_SHORT_DIRECTION[obj_direction_long]

    obj_query = OBJECTIVES_XPATH % target_objective_id
    coefficients = {}
    for sbml_objective in obj_list.findall(obj_query):
        rxn_id = clip(get_attrib(sbml_objective, "fbc:reaction"), "R_")
        try:
            objective_reaction = model.reactions.get_by_id(rxn_id)
        except KeyError:
            raise CobraSBMLError("Objective reaction '%s' not found" % rxn_id)
        try:
            coefficients[objective_reaction] = get_attrib(
                sbml_objective, "fbc:coefficient", type=number)
        except ValueError as e:
            warn(str(e))
    set_objective(model, coefficients)
    model.solver.objective.direction = obj_direction
    return model


[docs]def model_to_xml(cobra_model, units=True):
    xml = Element("sbml", xmlns=namespaces["sbml"], level="3", version="1",
                  sboTerm="SBO:0000624")
    set_attrib(xml, "fbc:required", "false")
    xml_model = SubElement(xml, "model")
    set_attrib(xml_model, "fbc:strict", "true")
    if cobra_model.id is not None:
        xml_model.set("id", cobra_model.id)
    if cobra_model.name is not None:
        xml_model.set("name", cobra_model.name)

    # if using units, add in mmol/gdw/hr
    if units:
        unit_def = SubElement(
            SubElement(xml_model, "listOfUnitDefinitions"),
            "unitDefinition", id="mmol_per_gDW_per_hr")
        list_of_units = SubElement(unit_def, "listOfUnits")
        SubElement(list_of_units, "unit", kind="mole", scale="-3",
                   multiplier="1", exponent="1")
        SubElement(list_of_units, "unit", kind="gram", scale="0",
                   multiplier="1", exponent="-1")
        SubElement(list_of_units, "unit", kind="second", scale="0",
                   multiplier="3600", exponent="-1")

    # create the element for the flux objective
    obj_list_tmp = SubElement(xml_model, ns("fbc:listOfObjectives"))
    set_attrib(obj_list_tmp, "fbc:activeObjective", "obj")
    obj_list_tmp = SubElement(obj_list_tmp, ns("fbc:objective"))
    set_attrib(obj_list_tmp, "fbc:id", "obj")
    set_attrib(obj_list_tmp, "fbc:type",
               SHORT_LONG_DIRECTION[cobra_model.objective.direction])
    flux_objectives_list = SubElement(obj_list_tmp,
                                      ns("fbc:listOfFluxObjectives"))

    # create the element for the flux bound parameters
    parameter_list = SubElement(xml_model, "listOfParameters")
    param_attr = {"constant": "true"}
    if units:
        param_attr["units"] = "mmol_per_gDW_per_hr"
    # the most common bounds are the minimum, maximum, and 0
    if len(cobra_model.reactions) > 0:
        min_value = min(cobra_model.reactions.list_attr("lower_bound"))
        max_value = max(cobra_model.reactions.list_attr("upper_bound"))
    else:
        min_value = -1000
        max_value = 1000

    SubElement(parameter_list, "parameter", value=strnum(min_value),
               id="cobra_default_lb", sboTerm="SBO:0000626", **param_attr)
    SubElement(parameter_list, "parameter", value=strnum(max_value),
               id="cobra_default_ub", sboTerm="SBO:0000626", **param_attr)
    SubElement(parameter_list, "parameter", value="0",
               id="cobra_0_bound", sboTerm="SBO:0000626", **param_attr)

    def create_bound(reaction, bound_type):
        """returns the str id of the appropriate bound for the reaction

        The bound will also be created if necessary"""
        value = getattr(reaction, bound_type)
        if value == min_value:
            return "cobra_default_lb"
        elif value == 0:
            return "cobra_0_bound"
        elif value == max_value:
            return "cobra_default_ub"
        else:
            param_id = "R_" + reaction.id + "_" + bound_type
            SubElement(parameter_list, "parameter", id=param_id,
                       value=strnum(value), sboTerm="SBO:0000625",
                       **param_attr)
            return param_id

    # add in compartments
    compartments_list = SubElement(xml_model, "listOfCompartments")
    compartments = cobra_model.compartments
    for compartment, name in iteritems(compartments):
        SubElement(compartments_list, "compartment", id=compartment, name=name,
                   constant="true")

    # add in metabolites
    species_list = SubElement(xml_model, "listOfSpecies")
    for met in cobra_model.metabolites:
        species = SubElement(species_list, "species",
                             id="M_" + met.id,
                             # Useless required SBML parameters
                             constant="false",
                             boundaryCondition="false",
                             hasOnlySubstanceUnits="false")
        set_attrib(species, "name", met.name)
        annotate_sbml_from_cobra(species, met)
        set_attrib(species, "compartment", met.compartment)
        set_attrib(species, "fbc:charge", met.charge)
        set_attrib(species, "fbc:chemicalFormula", met.formula)

    # add in genes
    if len(cobra_model.genes) > 0:
        genes_list = SubElement(xml_model, GENELIST_TAG)
        for gene in cobra_model.genes:
            gene_id = gene.id.replace(".", SBML_DOT)
            sbml_gene = SubElement(genes_list, GENE_TAG)
            set_attrib(sbml_gene, "fbc:id", "G_" + gene_id)
            name = gene.name
            if name is None or len(name) == 0:
                name = gene.id
            set_attrib(sbml_gene, "fbc:label", gene_id)
            set_attrib(sbml_gene, "fbc:name", name)
            annotate_sbml_from_cobra(sbml_gene, gene)

    # add in reactions
    reactions_list = SubElement(xml_model, "listOfReactions")
    for reaction in cobra_model.reactions:
        id = "R_" + reaction.id
        sbml_reaction = SubElement(
            reactions_list, "reaction",
            id=id,
            # Useless required SBML parameters
            fast="false",
            reversible=str(reaction.lower_bound < 0).lower())
        set_attrib(sbml_reaction, "name", reaction.name)
        annotate_sbml_from_cobra(sbml_reaction, reaction)
        # add in bounds
        set_attrib(sbml_reaction, "fbc:upperFluxBound",
                   create_bound(reaction, "upper_bound"))
        set_attrib(sbml_reaction, "fbc:lowerFluxBound",
                   create_bound(reaction, "lower_bound"))

        # objective coefficient
        if reaction.objective_coefficient != 0:
            objective = SubElement(flux_objectives_list,
                                   ns("fbc:fluxObjective"))
            set_attrib(objective, "fbc:reaction", id)
            set_attrib(objective, "fbc:coefficient",
                       strnum(reaction.objective_coefficient))

        # stoichiometry
        reactants = {}
        products = {}
        for metabolite, stoichiomety in iteritems(reaction._metabolites):
            met_id = "M_" + metabolite.id
            if stoichiomety > 0:
                products[met_id] = strnum(stoichiomety)
            else:
                reactants[met_id] = strnum(-stoichiomety)
        if len(reactants) > 0:
            reactant_list = SubElement(sbml_reaction, "listOfReactants")
            for met_id, stoichiomety in sorted(iteritems(reactants)):
                SubElement(reactant_list, "speciesReference", species=met_id,
                           stoichiometry=stoichiomety, constant="true")
        if len(products) > 0:
            product_list = SubElement(sbml_reaction, "listOfProducts")
            for met_id, stoichiomety in sorted(iteritems(products)):
                SubElement(product_list, "speciesReference", species=met_id,
                           stoichiometry=stoichiomety, constant="true")

        # gene reaction rule
        gpr = reaction.gene_reaction_rule
        if gpr is not None and len(gpr) > 0:
            gpr = gpr.replace(".", SBML_DOT)
            gpr_xml = SubElement(sbml_reaction, GPR_TAG)
            try:
                parsed, _ = parse_gpr(gpr)
                construct_gpr_xml(gpr_xml, parsed.body)
            except Exception as e:
                print("failed on '%s' in %s" %
                      (reaction.gene_reaction_rule, repr(reaction)))
                raise e

    return xml


[docs]def read_sbml_model(filename, number=float, **kwargs):
    if not _with_lxml:
        warn("Install lxml for faster SBML I/O", ImportWarning)
    xmlfile = parse_stream(filename)
    xml = xmlfile.getroot()
    # use libsbml if not l3v1 with fbc v2
    use_libsbml = (xml.get("level") != "3" or xml.get("version") != "1" or
                   get_attrib(xml, "fbc:required") is None)
    if use_libsbml:
        if libsbml is None:
            raise ImportError("libSBML required for fbc < 2")
        # libsbml needs a file string, so write to temp file if a file handle
        if hasattr(filename, "read"):
            with NamedTemporaryFile(suffix=".xml", delete=False) as outfile:
                xmlfile.write(outfile, encoding="UTF-8")
            filename = outfile.name
        return read_sbml2(filename, **kwargs)
    try:
        return parse_xml_into_model(xml, number=number)
    except Exception:
        raise CobraSBMLError(
            "Something went wrong reading the model. You can get a detailed "
            "report using the `cobra.io.sbml3.validate_sbml_model` function "
            "or using the online validator at http://sbml.org/validator")


id_required = {ns(i) for i in ("sbml:model", "sbml:reaction:", "sbml:species",
                               "fbc:geneProduct", "sbml:compartment",
                               "sbml:parameter", "sbml:UnitDefinition",
                               "fbc:objective")}
invalid_id_detector = re.compile("|".join(re.escape(i[0]) for i in _renames))


[docs]def validate_sbml_model(filename, check_model=True):
    """Returns the model along with a list of errors.

    Parameters
    ----------
    filename : str
        The filename of the SBML model to be validated.
    check_model: bool, optional
        Whether to also check some basic model properties such as reaction
        boundaries and compartment formulas.

    Returns
    -------
    model : :class:`~cobra.core.Model.Model` object
        The cobra model if the file could be read succesfully or None
        otherwise.
    errors : dict
        Warnings and errors grouped by their respective types.

    Raises
    ------
    CobraSBMLError
        If the file is not a valid SBML Level 3 file with FBC.
    """
    xmlfile = parse_stream(filename)
    xml = xmlfile.getroot()
    # use libsbml if not l3v1 with fbc v2
    use_libsbml = (xml.get("level") != "3" or xml.get("version") != "1" or
                   get_attrib(xml, "fbc:required") is None)
    if use_libsbml:
        raise CobraSBMLError("XML is not SBML level 3 v1 with fbc v2")

    errors = {k: [] for k in ("validator", "warnings", "SBML errors", "other")}

    def err(err_msg, group="validator"):
        errors[group].append(err_msg)

    # make sure there is exactly one model
    xml_models = xml.findall(ns("sbml:model"))
    if len(xml_models) == 0:
        raise CobraSBMLError("No SBML model detected in file")
    if len(xml_models) > 1:
        err("More than 1 SBML model detected in file")
    xml_model = xml_models[0]

    # make sure all sbml id's are valid
    all_ids = set()
    for element in xmlfile.iter():
        if element.tag.startswith(fbc_prefix):
            prefix = fbc_prefix
        elif element.tag.startswith(sbml_prefix):
            prefix = ""
        else:
            continue
        str_id = element.get(prefix + "id", None)
        element_name = element.tag.split("}")[-1]
        id_repr = "%s id '%s' " % (element_name, str_id)
        if str_id is None or len(str_id) == 0:
            if element.tag in id_required:
                err(element_name + " missing id")
        else:
            if str_id in all_ids:
                err("duplicate id for " + id_repr)
            all_ids.add(str_id)
            try:
                str_id.encode("ascii")
            except UnicodeEncodeError as e:
                err("invalid character '%s' found in %s" %
                    (str_id[e.start:e.end], id_repr))
            if invalid_id_detector.search(str_id):
                bad_chars = "".join(invalid_id_detector.findall(str_id))
                err("invalid character%s '%s' found in %s" %
                    ("s" if len(bad_chars) > 1 else "", bad_chars, id_repr))
            if not str_id[0].isalpha():
                err("%s does not start with alphabet character" % id_repr)

    # check SBO terms
    for element in xml.findall(".//*[@sboTerm]"):
        sbo_term = element.get("sboTerm")
        if not sbo_term.startswith("SBO:"):
            err("sboTerm '%s' does not begin with 'SBO:'" % sbo_term)

    # ensure can be made into model
    # all warnings generated while loading will be logged as errors
    with catch_warnings(record=True) as warning_list:
        simplefilter("always")
        try:
            model = parse_xml_into_model(xml)
        except CobraSBMLError as e:
            err(str(e), "SBML errors")
            return None, errors
        except Exception as e:
            err(str(e), "other")
            return None, errors
    errors["warnings"].extend(str(i.message) for i in warning_list)

    # check genes
    xml_genes = {
        get_attrib(i, "fbc:id").replace(SBML_DOT, ".")
        for i in xml_model.iterfind(GENES_XPATH)}
    for gene in model.genes:
        if "G_" + gene.id not in xml_genes and gene.id not in xml_genes:
            err("No gene specfied with id 'G_%s'" % gene.id)

    if check_model:
        errors["validator"].extend(check_metabolite_compartment_formula(model))

    return model, errors


[docs]def write_sbml_model(cobra_model, filename, use_fbc_package=True, **kwargs):
    if not _with_lxml:
        warn("Install lxml for faster SBML I/O", ImportWarning)
    if not use_fbc_package:
        if libsbml is None:
            raise ImportError("libSBML required to write non-fbc models")
        write_sbml2(cobra_model, filename, use_fbc_package=False, **kwargs)
        return
    # create xml
    xml = model_to_xml(cobra_model, **kwargs)
    write_args = {"encoding": "UTF-8", "xml_declaration": True}
    if _with_lxml:
        write_args["pretty_print"] = True
        write_args["pretty_print"] = True
    else:
        indent_xml(xml)
    # write xml to file
    should_close = True
    if hasattr(filename, "write"):
        xmlfile = filename
        should_close = False
    elif filename.endswith(".gz"):
        xmlfile = GzipFile(filename, "wb")
    elif filename.endswith(".bz2"):
        xmlfile = BZ2File(filename, "wb")
    else:
        xmlfile = open(filename, "wb")
    ElementTree(xml).write(xmlfile, **write_args)
    if should_close:
        xmlfile.close()


# inspired by http://effbot.org/zone/element-lib.htm#prettyprint
[docs]def indent_xml(elem, level=0):
    """indent xml for pretty printing"""
    i = "\n" + level * "  "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + "  "
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent_xml(elem, level + 1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i