Source code for test_sbml

# -*- coding: utf-8 -*-
"""
Testing SBML functionality based on libsbml.
"""

from __future__ import absolute_import

from collections import namedtuple
from os import unlink
from os.path import join, split
from pickle import load
from tempfile import gettempdir

import pytest

import cobra
from cobra import Model
from cobra.io import read_sbml_model, validate_sbml_model, write_sbml_model


[docs]config = cobra.Configuration() # for default bounds
try: import jsonschema except ImportError:
[docs] jsonschema = None
# ---------------------------------- # Definition of SBML files to test # ----------------------------------
[docs]IOTrial = namedtuple( "IOTrial", [ "name", "reference_file", "test_file", "read_function", "write_function", "validation_function",
], )
[docs]trials = [ IOTrial( "fbc2", "mini.pickle", "mini_fbc2.xml", read_sbml_model, write_sbml_model, validate_sbml_model, ), IOTrial( "fbc2Gz", "mini.pickle", "mini_fbc2.xml.gz", read_sbml_model, write_sbml_model, None, ), IOTrial( "fbc2Bz2", "mini.pickle", "mini_fbc2.xml.bz2", read_sbml_model, write_sbml_model, None, ), IOTrial( "fbc1", "mini.pickle", "mini_fbc1.xml", read_sbml_model, write_sbml_model, None ), IOTrial("cobra", None, "mini_cobra.xml", read_sbml_model, write_sbml_model, None),
]
[docs]trial_names = [node.name for node in trials]
@pytest.mark.parametrize("trial", trials) def test_validate(trial, data_directory): """ Test validation function. """ if trial.validation_function is None: pytest.skip("not implemented") test_file = join(data_directory, trial.test_file) trial.validation_function(test_file)
[docs]class TestCobraIO: """ Tests the read and write functions. """ @classmethod
[docs] def compare_models(cls, name, model1, model2): assert len(model1.reactions) == len(model2.reactions) assert len(model1.metabolites) == len(model2.metabolites) assert model1.objective.direction == model2.objective.direction for attr in ( "id", "name", "lower_bound", "upper_bound", "objective_coefficient", "gene_reaction_rule", ): assert getattr(model1.reactions[0], attr) == getattr( model2.reactions[0], attr ) assert getattr(model1.reactions[5], attr) == getattr( model2.reactions[5], attr ) assert getattr(model1.reactions[-1], attr) == getattr( model2.reactions[-1], attr ) for attr in ("id", "name", "compartment", "formula", "charge"): assert getattr(model1.metabolites[0], attr) == getattr( model2.metabolites[0], attr ) assert getattr(model1.metabolites[5], attr) == getattr( model2.metabolites[5], attr ) assert getattr(model1.metabolites[-1], attr) == getattr( model2.metabolites[-1], attr ) assert len(model1.reactions[0].metabolites) == len( model2.reactions[0].metabolites ) assert len(model1.reactions[8].metabolites) == len( model2.reactions[8].metabolites ) assert len(model1.reactions[-1].metabolites) == len( model2.reactions[-1].metabolites ) assert len(model1.genes) == len(model2.genes) # ensure they have the same solution max solution1 = model1.optimize() solution2 = model2.optimize() assert ( solution1.status == "infeasible" and solution2.status == "infeasible" ) or abs(solution1.objective_value - solution2.objective_value) < 0.001 # ensure the references are correct assert model2.metabolites[0]._model is model2 assert model2.reactions[0]._model is model2 assert model2.genes[0]._model is model2
@classmethod
[docs] def extra_comparisons(cls, name, model1, model2): assert model1.compartments == model2.compartments # FIXME: problems of duplicate annotations in test data # ('cas': ['56-65-5', '56-65-5']) # assert dict(model1.metabolites[4].annotation) == dict( # model2.metabolites[4].annotation) d1 = model1.reactions[4].annotation d2 = model2.reactions[4].annotation assert list(d1.keys()) == list(d2.keys()) for k in d1: assert set(d1[k]) == set(d2[k]) assert dict(model1.reactions[4].annotation) == dict( model2.reactions[4].annotation ) assert dict(model1.genes[5].annotation) == dict(model2.genes[5].annotation) for attr in ("id", "name"): assert getattr(model1.genes[0], attr) == getattr(model2.genes[0], attr) assert getattr(model1.genes[10], attr) == getattr(model2.genes[10], attr) assert getattr(model1.genes[-1], attr) == getattr(model2.genes[-1], attr)
[docs] def test_read_1(self, io_trial): name, reference_model, test_model, _ = io_trial if name in ["fbc1"]: pytest.xfail("not supported") if reference_model: self.compare_models(name, reference_model, test_model)
[docs] def test_read_2(self, io_trial): name, reference_model, test_model, _ = io_trial if name in ["fbc1", "mat", "cobra", "raven-mat"]: pytest.xfail("not supported") if reference_model: self.extra_comparisons(name, reference_model, test_model)
[docs] def test_write_1(self, io_trial): name, _, test_model, reread_model = io_trial if name in ["fbc1", "raven-mat"]: pytest.xfail("not supported") self.compare_models(name, test_model, reread_model)
[docs] def test_write_2(self, io_trial): name, _, test_model, reread_model = io_trial if name in ["fbc1", "mat", "cobra", "raven-mat"]: pytest.xfail("not supported") self.extra_comparisons(name, test_model, reread_model)
@pytest.fixture(scope="module", params=trials, ids=trial_names)
[docs]def io_trial(request, data_directory): reference_model = None if request.param.reference_file: with open(join(data_directory, request.param.reference_file), "rb") as infile: reference_model = load(infile) test_model = request.param.read_function( join(data_directory, request.param.test_file) ) test_output_filename = join(gettempdir(), split(request.param.test_file)[-1]) # test writing the model within a context with a non-empty stack with test_model: test_model.objective = test_model.objective request.param.write_function(test_model, test_output_filename) reread_model = request.param.read_function(test_output_filename) unlink(test_output_filename) return request.param.name, reference_model, test_model, reread_model
[docs]def test_filehandle(data_directory, tmp_path): """Test reading and writing to file handle.""" with open(join(data_directory, "mini_fbc2.xml"), "r") as f_in: model1 = read_sbml_model(f_in) assert model1 is not None sbml_path = join(str(tmp_path), "test.xml") with open(sbml_path, "w") as f_out: write_sbml_model(model1, f_out) with open(sbml_path, "r") as f_in: model2 = read_sbml_model(f_in) TestCobraIO.compare_models(name="filehandle", model1=model1, model2=model2)
[docs]def test_from_sbml_string(data_directory): """Test reading from SBML string.""" sbml_path = join(data_directory, "mini_fbc2.xml") with open(sbml_path, "r") as f_in: sbml_str = f_in.read() model1 = read_sbml_model(sbml_str) model2 = read_sbml_model(sbml_path) TestCobraIO.compare_models(name="read from string", model1=model1, model2=model2)
@pytest.mark.skip(reason="Model history currently not written")
[docs]def test_model_history(tmp_path): """Testing reading and writing of ModelHistory.""" model = Model("test") model._sbml = { "creators": [ { "familyName": "Mustermann", "givenName": "Max", "organisation": "Muster University", "email": "muster@university.com", } ] } sbml_path = join(str(tmp_path), "test.xml") with open(sbml_path, "w") as f_out: write_sbml_model(model, f_out) with open(sbml_path, "r") as f_in: model2 = read_sbml_model(f_in) assert "creators" in model2._sbml assert len(model2._sbml["creators"]) is 1 c = model2._sbml["creators"][0] assert c["familyName"] == "Mustermann" assert c["givenName"] == "Max" assert c["organisation"] == "Muster University" assert c["email"] == "muster@university.com"
[docs]def test_groups(data_directory, tmp_path): """Testing reading and writing of groups""" sbml_path = join(data_directory, "e_coli_core.xml") model = read_sbml_model(sbml_path) assert model.groups is not None assert len(model.groups) == 10 g1 = model.groups[0] assert len(g1.members) == 6 temp_path = join(str(tmp_path), "test.xml") with open(temp_path, "w") as f_out: write_sbml_model(model, f_out) with open(temp_path, "r") as f_in: model2 = read_sbml_model(f_in) assert model2.groups is not None assert len(model2.groups) == 10 g1 = model2.groups[0] assert len(g1.members) == 6
[docs]def test_missing_flux_bounds1(data_directory): sbml_path = join(data_directory, "annotation.xml") with open(sbml_path, "r") as f_in: # missing flux bounds are set to cobra.configuration.bounds model, errors = validate_sbml_model(f_in, set_missing_bounds=True) r1 = model.reactions.R1 assert r1.lower_bound == config.lower_bound assert r1.upper_bound == config.upper_bound
[docs]def test_missing_flux_bounds2(data_directory): sbml_path = join(data_directory, "annotation.xml") with open(sbml_path, "r") as f_in: # missing flux bounds are set to [-INF, INF] model, errors = validate_sbml_model(f_in, set_missing_bounds=False) r1 = model.reactions.R1 assert r1.lower_bound == config.lower_bound assert r1.upper_bound == config.upper_bound
[docs]def test_validate(data_directory): """Test the validation code. """ sbml_path = join(data_directory, "mini_fbc2.xml") with open(sbml_path, "r") as f_in: model1, errors = validate_sbml_model(f_in, check_modeling_practice=True) assert model1 assert errors assert len(errors["SBML_WARNING"]) == 0
[docs]def test_validation_warnings(data_directory): """Test the validation warnings. """ sbml_path = join(data_directory, "validation.xml") with open(sbml_path, "r") as f_in: model1, errors = validate_sbml_model(f_in, check_modeling_practice=True) assert model1 assert errors assert len(errors["COBRA_WARNING"]) == 3 assert "No objective in listOfObjectives" in errors["COBRA_WARNING"]
[docs]def test_infinity_bounds(data_directory, tmp_path): """Test infinity bound example. """ sbml_path = join(data_directory, "fbc_ex1.xml") model = read_sbml_model(sbml_path) # check that simulation works solution = model.optimize() # check that values are set r = model.reactions.get_by_id("EX_X") assert r.lower_bound == -float("Inf") assert r.upper_bound == float("Inf") temp_path = join(str(tmp_path), "test.xml") with open(temp_path, "w") as f_out: write_sbml_model(model, f_out) with open(temp_path, "r") as f_in: model2 = read_sbml_model(f_in) r = model2.reactions.get_by_id("EX_X") assert r.lower_bound == -float("Inf") assert r.upper_bound == float("Inf")
[docs]def test_boundary_conditions(data_directory): """Test infinity bound example. """ sbml_path1 = join(data_directory, "fbc_ex1.xml") model1 = read_sbml_model(sbml_path1) sol1 = model1.optimize() # model with species boundaryCondition==True sbml_path2 = join(data_directory, "fbc_ex2.xml") model2 = read_sbml_model(sbml_path2) sol2 = model2.optimize() r = model2.reactions.get_by_id("EX_X") assert r.lower_bound == config.lower_bound assert r.upper_bound == config.upper_bound assert sol1.objective_value == sol2.objective_value
[docs]def test_gprs(data_directory, tmp_path): """Test that GPRs are written and read correctly""" model1 = read_sbml_model(join(data_directory, "iJO1366.xml.gz")) sbml_path = join(str(tmp_path), "test.xml") with open(sbml_path, "w") as f_out: write_sbml_model(model1, f_out) with open(sbml_path, "r") as f_in: model2 = read_sbml_model(f_in) for r1 in model1.reactions: rid = r1.id r2 = model2.reactions.get_by_id(rid) gpr1 = r1.gene_reaction_rule gpr2 = r2.gene_reaction_rule assert gpr1 == gpr2
[docs]def test_identifiers_annotation(): from cobra.io.sbml import _parse_annotation_info for uri in [ "http://identifiers.org/chebi/CHEBI:000123", "https://identifiers.org/chebi/CHEBI:000123", "http://identifiers.org/CHEBI:000123", "https://identifiers.org/CHEBI:000123", ]: data = _parse_annotation_info(uri) assert data assert data[0] == "chebi" assert data[1] == "CHEBI:000123" for uri in [ "http://identifiers.org/taxonomy/9602", "https://identifiers.org/taxonomy/9602", "http://identifiers.org/taxonomy:9602", "https://identifiers.org/taxonomy:9602", ]: data = _parse_annotation_info(uri) assert data assert data[0] == "taxonomy" assert data[1] == "9602" for uri in [ "http://identifier.org/taxonomy/9602", "https://test.com", ]: data = _parse_annotation_info(uri) assert data is None
[docs]def test_smbl_with_notes(data_directory, tmp_path): """Test that NOTES in the RECON 2.2 style are written and read correctly""" sbml_path = join(data_directory, "example_notes.xml") model = read_sbml_model(sbml_path) assert model.metabolites is not None metabolite_notes = { "2hb_e": {"CHARGE": "-1", "FORMULA": "C4H7O3", "SMILES": "CCC(O)C(O)=O"}, "nad_e": { "CHARGE": "-1", "FORMULA": "C21H26N7O14P2", "SMILES": "NC(=O)c1ccc[n+](c1)[C@@H]1O[C@H](COP([O-])(" "=O)OP([O-])(=O)OC[C@H]2O[C@H]([C@H](O)[" "C@@H]2O)n2cnc3c(N)ncnc23)[C@@H](O)[C@H]1O", }, "h_e": {"CHARGE": "1", "FORMULA": "H", "SMILES": "[1H+]"}, "2obut_e": {"CHARGE": "-1", "FORMULA": "C4H5O3", "SMILES": "CCC(=O)C([O-])=O"}, "nadh_e": { "CHARGE": "-2", "FORMULA": "C21H27N7O14P2", "SMILES": "NC(=O)C1=CN(C=CC1)[C@@H]1O[C@H](COP([O-])(" "=O)OP([O-])(=O)OC[C@H]2O[C@H]([C@H](O)[" "C@@H]2O)n2cnc3c(N)ncnc23)[C@@H](O)[C@H]1O", }, } metabolite_annotations = { "2hb_e": { "sbo": "SBO:0000247", "inchi": "InChI=1S/C4H8O3/c1-2-3(5)4(6)7/h3,5H,2H2,1H3," "(H,6,7)", "chebi": "CHEBI:1148", }, "nad_e": { "sbo": "SBO:0000247", "inchi": "InChI=1S/C21H27N7O14P2/c22-17-12-19(" "25-7-24-17)28(8-26-12)21-16(32)14(30)11(" "41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(29)15(" "31)20(40-10)27-3-1-2-9(4-27)18(" "23)33/h1-4,7-8,10-11,13-16,20-21,29-32H,5-6H2," "(H5-,22,23,24,25,33,34,35,36,37)/p-1/t10-," "11-,13-,14-,15-,16-,20-,21-/m1/s1", "chebi": "CHEBI:57540", }, "h_e": { "sbo": "SBO:0000247", "inchi": "InChI=1S/p+1/i/hH", "chebi": "CHEBI:24636", }, "2obut_e": { "sbo": "SBO:0000247", "inchi": "InChI=1S/C4H6O3/c1-2-3(5)4(6)7/h2H2,1H3,(H,6," "7)/p-1", "chebi": "CHEBI:16763", }, "nadh_e": { "sbo": "SBO:0000247", "inchi": "InChI=1S/C21H29N7O14P2/c22-17-12-19(" "25-7-24-17)28(8-26-12)21-16(32)14(30)11(" "41-21)6-39-44(36,37)42-43(34,35)38-5-10-13(" "29)15(31)20(40-10)27-3-1-2-9(4-27)18(" "23)33/h1,3-4,7-8,10-11,13-16,20-21,29-32H,2," "5-6H2,(H2,23,33)(H,34,35)(H,36,37)(H2,22,24," "25)/p-2/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1", "chebi": "CHEBI:57945", }, } reaction_notes = { "CONFIDENCE_LEVEL": "4", "NOTES": "NCD", "SUBSYSTEM": "Propanoate metabolism", "GENE_ASSOCIATION": "(HGNC:8546 and HGNC:8548) or" " (HGNC:8547 and HGNC:8548)", } reaction_annotations = { "sbo": "SBO:0000176", "ec-code": "1.1.1.27", "pubmed": ["10108", "21765"], } for met_id in metabolite_notes: assert model.metabolites.has_id(met_id) for note_key in metabolite_notes[met_id].keys(): assert note_key in model.metabolites.get_by_id(met_id).notes assert ( metabolite_notes[met_id][note_key] == model.metabolites.get_by_id(met_id).notes[note_key] ) for annotation_key in metabolite_annotations[met_id].keys(): assert annotation_key in model.metabolites.get_by_id(met_id).annotation print(met_id) assert ( metabolite_annotations[met_id][annotation_key] == model.metabolites.get_by_id(met_id).annotation[annotation_key] ) for note_key in reaction_notes.keys(): assert note_key in model.reactions[0].notes.keys() assert reaction_notes[note_key] == model.reactions[0].notes[note_key] assert ( model.reactions[0].gene_reaction_rule == "(HGNC:8546 and HGNC:8548) or " "(HGNC:8547 and HGNC:8548)" ) assert len(model.groups) == 1 for annotation_key in reaction_annotations.keys(): assert annotation_key in model.reactions[0].annotation.keys() assert ( reaction_annotations[annotation_key] == model.reactions[0].annotation[annotation_key]
)