Source code for cobra.flux_analysis.gapfilling

"""Provide the base class and utility function for gap filling."""


import logging
from typing import TYPE_CHECKING, Dict, List, Optional

from optlang.interface import OPTIMAL
from optlang.symbolics import Zero

from ..core import Model
from ..util import fix_objective_as_constraint, interface_to_str


if TYPE_CHECKING:
    from cobra import Reaction


[docs]logger = logging.getLogger(__name__)
[docs]class GapFiller: r""" The base class for performing gap filling. This class implements gap filling based on a mixed-integer approach, very similar to that described in [1]_ and the 'no-growth but growth' part of [2]_ but with minor adjustments. In short, we add indicator variables for using the reactions in the universal model, z_i and then solve problem minimize: \sum_i c_i * z_i s.t. : Sv = 0 v_o \ge t lb_i \le v_i \le ub_i v_i = 0 if z_i = 0 where lb, ub are respectively the upper, lower flux bounds for reaction i, c_i is a cost parameter and the objective v_o is greater than the lower bound t. The default costs are 1 for reactions from the universal model, 100 for exchange (uptake) reactions added and 1 for added demand reactions. Note that this is a mixed-integer linear program and as such will be expensive to solve for large models. Consider using alternatives [3]_ such as CORDA instead [4,5]_ . Parameters ---------- model : cobra.Model The model to perform gap filling on. universal : cobra.Model, optional A universal model with reactions that can be used to complete the `model` (default None). lower_bound : float, optional The minimally accepted flux for the objective in the filled model (default 0.05). penalties : dict, optional A dictionary with keys being 'universal' (all reactions included in the universal model), 'exchange' and 'demand' (all additionally added exchange and demand reactions) for the three reaction types. Can also have reaction identifiers for reaction specific costs. Defaults are 1, 100 and 1 respectively (default None). exchange_reactions : bool, optional Consider adding exchange (uptake) reactions for all metabolites in the model (default False). demand_reactions : bool, optional Consider adding demand reactions for all metabolites (default True). integer_threshold : float, optional The threshold at which a value is considered non-zero (aka integrality threshold). If gapfilled models fail to validate, you may want to lower this value (default 1E-6). Attributes ---------- indicators: list of optlang.interface.Variable The list of symbolic indicator variables. costs: dict of {optlang.interface.Variable: float} The dictionary with symbolic variables as keys and their cost as values. References ---------- .. [1] Reed, Jennifer L., Trina R. Patel, Keri H. Chen, Andrew R. Joyce, Margaret K. Applebee, Christopher D. Herring, Olivia T. Bui, Eric M. Knight, Stephen S. Fong, and Bernhard O. Palsson. “Systems Approach to Refining Genome Annotation.” Proceedings of the National Academy of Sciences 103, no. 46 (2006): 17480–17484. .. [2] Kumar, Vinay Satish, and Costas D. Maranas. “GrowMatch: An Automated Method for Reconciling In Silico/In Vivo Growth Predictions.” Edited by Christos A. Ouzounis. PLoS Computational Biology 5, no. 3 (March 13, 2009): e1000308. doi:10.1371/journal.pcbi.1000308. .. [3] http://opencobra.github.io/cobrapy/tags/gapfilling/ .. [4] Schultz, André, and Amina A. Qutub. “Reconstruction of Tissue-Specific Metabolic Networks Using CORDA.” Edited by Costas D. Maranas. PLOS Computational Biology 12, no. 3 (March 4, 2016): e1004808. doi:10.1371/journal.pcbi.1004808. .. [5] Diener, Christian https://github.com/cdiener/corda """ def __init__( self, model: Model, universal: Optional[Model] = None, lower_bound: float = 0.05, penalties: Optional[Dict[str, "Reaction"]] = None, exchange_reactions: bool = False, demand_reactions: bool = True, integer_threshold: float = 1e-6, **kwargs, ) -> None: """Initialize a new GapFiller object. Other Parameters ---------------- kwargs : Further keyword arguments are passed on to the parent class. """ self.original_model = model self.lower_bound = lower_bound self.model = model.copy() tolerances = self.model.solver.configuration.tolerances try: tolerances.integrality = integer_threshold except AttributeError: logger.warning( f"The current solver interface {interface_to_str(self.model.problem)} " f"doesn't support setting the integrality tolerance." ) # TODO (Midnighter): One could debate how useful it is to compare against this # threshold when it is not supported by the chosen solver. self.integer_threshold = integer_threshold self.universal = universal.copy() if universal else Model("universal") self.penalties = dict(universal=1, exchange=100, demand=1) if penalties is not None: self.penalties.update(penalties) self.indicators = [] self.costs = {} self.extend_model(exchange_reactions, demand_reactions) fix_objective_as_constraint(self.model, bound=lower_bound) self.add_switches_and_objective()
[docs] def extend_model( self, exchange_reactions: bool = False, demand_reactions: bool = True ) -> None: """Extend gap filling model. Add reactions from universal model and optionally exchange and demand reactions for all metabolites in the model to perform gap filling on. Parameters ---------- exchange_reactions : bool, optional Consider adding exchange (uptake) reactions for all metabolites in the model (default False). demand_reactions : bool, optional Consider adding demand reactions for all metabolites (default True). """ for rxn in self.universal.reactions: rxn.gapfilling_type = "universal" new_metabolites = self.universal.metabolites.query( lambda metabolite: metabolite not in self.model.metabolites ) self.model.add_metabolites(new_metabolites) existing_exchanges = [] for rxn in self.universal.boundary: existing_exchanges = existing_exchanges + [ met.id for met in list(rxn.metabolites) ] for met in self.model.metabolites: if exchange_reactions: # check for exchange reaction in model already if met.id not in existing_exchanges: rxn = self.universal.add_boundary( met, type="exchange_smiley", lb=-1000, ub=0, reaction_id=f"EX_{met.id}", ) rxn.gapfilling_type = "exchange" if demand_reactions: rxn = self.universal.add_boundary( met, type="demand_smiley", lb=0, ub=1000, reaction_id=f"DM_{met.id}", ) rxn.gapfilling_type = "demand" new_reactions = self.universal.reactions.query( lambda reaction: reaction not in self.model.reactions ) self.model.add_reactions(new_reactions)
[docs] def update_costs(self) -> None: """Update coefficients for the indicator variables in the objective. Done incrementally so that second time the function is called, active indicators in the current solutions gets higher cost than the unused indicators. """ for var in self.indicators: if var not in self.costs: self.costs[var] = var.cost else: if var._get_primal() > self.integer_threshold: self.costs[var] += var.cost self.model.objective.set_linear_coefficients(self.costs)
[docs] def add_switches_and_objective(self) -> None: """Update gap filling model with switches and indicator objective.""" constraints = [] big_m = max(max(abs(b) for b in r.bounds) for r in self.model.reactions) prob = self.model.problem for rxn in self.model.reactions: if not hasattr(rxn, "gapfilling_type"): continue indicator = prob.Variable( name=f"indicator_{rxn.id}", lb=0, ub=1, type="binary" ) if rxn.id in self.penalties: indicator.cost = self.penalties[rxn.id] else: indicator.cost = self.penalties[rxn.gapfilling_type] indicator.rxn_id = rxn.id self.indicators.append(indicator) # if z = 1 v_i is allowed non-zero # v_i - Mz <= 0 and v_i + Mz >= 0 constraint_lb = prob.Constraint( rxn.flux_expression - big_m * indicator, ub=0, name=f"constraint_lb_{rxn.id}", sloppy=True, ) constraint_ub = prob.Constraint( rxn.flux_expression + big_m * indicator, lb=0, name=f"constraint_ub_{rxn.id}", sloppy=True, ) constraints.extend([constraint_lb, constraint_ub]) self.model.add_cons_vars(self.indicators) self.model.add_cons_vars(constraints, sloppy=True) self.model.objective = prob.Objective(Zero, direction="min", sloppy=True) self.model.objective.set_linear_coefficients({i: 1 for i in self.indicators}) self.update_costs()
[docs] def fill(self, iterations: int = 1) -> List[List["Reaction"]]: """Perform the gap filling. With every iteration, it solves the model, updates the costs and records the used reactions. Parameters ---------- iterations : int, optional The number of rounds of gap filling to perform. For every iteration, the penalty for every used reaction increases linearly. This way, the algorithm is encouraged to search for alternative solutions which may include previously used reactions i.e., with enough iterations pathways including 10 steps will eventually be reported even if the shortest pathway is a single reaction (default 1). Returns ------- list of list of cobra.Reaction A list of lists where each element is a list of reactions that were used to gap fill the model. Raises ------ RuntimeError If the model fails to be validated (i.e. the original model with the proposed reactions added, still cannot get the required flux through the objective). """ used_reactions = [] for _ in range(iterations): self.model.slim_optimize( error_value=None, message="gap filling optimization failed" ) solution = [ self.model.reactions.get_by_id(ind.rxn_id) for ind in self.indicators if ind._get_primal() > self.integer_threshold ] if not self.validate(solution): raise RuntimeError( "Failed to validate gap filled model, " "try lowering the integer threshold." ) used_reactions.append(solution) self.update_costs() return used_reactions
[docs] def validate(self, reactions: List["Reaction"]) -> bool: """Validate the model. Parameters ---------- reactions: list of cobra.Reaction The reactions to add to the model for validation. Returns ------- bool Whether the model is valid or not. """ with self.original_model as model: mets = [x.metabolites for x in reactions] all_keys = set().union(*(d.keys() for d in mets)) model.add_metabolites(all_keys) model.add_reactions(reactions) model.slim_optimize() return ( model.solver.status == OPTIMAL and model.solver.objective.value >= self.lower_bound
)
[docs]def gapfill( model: Model, universal: Optional[Model] = None, lower_bound: float = 0.05, penalties: Optional[Dict[str, "Reaction"]] = None, demand_reactions: bool = True, exchange_reactions: bool = False, iterations: int = 1, ): """Perform gap filling on a model. Parameters ---------- model : cobra.Model The model to perform gap filling on. universal : cobra.Model, optional A universal model with reactions that can be used to complete the model. Only gapfill considering demand and exchange reactions if left missing (default None). lower_bound : float, optional The minimally accepted flux for the objective in the filled model. (default 0.05). penalties : dict, optional A dictionary with keys being 'universal' (all reactions included in the universal model), 'exchange' and 'demand' (all additionally added exchange and demand reactions) for the three reaction types. Can also have reaction identifiers for reaction specific costs. Defaults are 1, 100 and 1 respectively (default None). exchange_reactions : bool, optional Consider adding exchange (uptake) reactions for all metabolites in the model (default False). demand_reactions : bool, optional Consider adding demand reactions for all metabolites (default True). iterations : int, optional The number of rounds of gap filling to perform. For every iteration, the penalty for every used reaction increases linearly. This way, the algorithm is encouraged to search for alternative solutions which may include previously used reactions i.e., with enough iterations pathways including 10 steps will eventually be reported even if the shortest pathway is a single reaction (default 1). Returns ------- list of list of cobra.Reaction A list of lists with on set of reactions that completes the model per requested iteration. Examples -------- >>> from cobra.io import load_model >>> from cobra import Model >>> from cobra.flux_analysis import gapfill >>> model = load_model("iYS1720") >>> universal = Model("universal") >>> universal.add_reactions([model.reactions.GF6PTA.copy()]) >>> model.remove_reactions([model.reactions.GF6PTA]) >>> gapfill(model, universal) [[<Reaction GF6PTA at 0x12206a280>]] """ gapfiller = GapFiller( model, universal=universal, lower_bound=lower_bound, penalties=penalties, demand_reactions=demand_reactions, exchange_reactions=exchange_reactions, ) return gapfiller.fill(iterations=iterations)