"""Provide the base class and utility function for gap filling."""
import logging
from typing import TYPE_CHECKING, Dict, List, Optional
from optlang.interface import OPTIMAL
from optlang.symbolics import Zero
from ..core import Model
from ..util import fix_objective_as_constraint, interface_to_str
if TYPE_CHECKING:
from cobra import Reaction
[docs]logger = logging.getLogger(__name__)
[docs]class GapFiller:
r"""
The base class for performing gap filling.
This class implements gap filling based on a mixed-integer approach,
very similar to that described in [1]_ and the 'no-growth but growth'
part of [2]_ but with minor adjustments. In short, we add indicator
variables for using the reactions in the universal model, z_i and then
solve problem
minimize: \sum_i c_i * z_i
s.t. : Sv = 0
v_o \ge t
lb_i \le v_i \le ub_i
v_i = 0 if z_i = 0
where lb, ub are respectively the upper, lower flux bounds for reaction i,
c_i is a cost parameter and the objective v_o is greater than the lower
bound t. The default costs are 1 for reactions from the universal model,
100 for exchange (uptake) reactions added and 1 for added demand reactions.
Note that this is a mixed-integer linear program and as such will be
expensive to solve for large models. Consider using alternatives [3]_
such as CORDA instead [4,5]_ .
Parameters
----------
model : cobra.Model
The model to perform gap filling on.
universal : cobra.Model, optional
A universal model with reactions that can be used to complete the
`model` (default None).
lower_bound : float, optional
The minimally accepted flux for the objective in the filled model
(default 0.05).
penalties : dict, optional
A dictionary with keys being 'universal' (all reactions included in
the universal model), 'exchange' and 'demand' (all additionally
added exchange and demand reactions) for the three reaction types.
Can also have reaction identifiers for reaction specific costs.
Defaults are 1, 100 and 1 respectively (default None).
exchange_reactions : bool, optional
Consider adding exchange (uptake) reactions for all metabolites
in the model (default False).
demand_reactions : bool, optional
Consider adding demand reactions for all metabolites (default True).
integer_threshold : float, optional
The threshold at which a value is considered non-zero (aka
integrality threshold). If gapfilled models fail to validate,
you may want to lower this value (default 1E-6).
Attributes
----------
indicators: list of optlang.interface.Variable
The list of symbolic indicator variables.
costs: dict of {optlang.interface.Variable: float}
The dictionary with symbolic variables as keys and their cost as
values.
References
----------
.. [1] Reed, Jennifer L., Trina R. Patel, Keri H. Chen, Andrew R. Joyce,
Margaret K. Applebee, Christopher D. Herring, Olivia T. Bui, Eric M.
Knight, Stephen S. Fong, and Bernhard O. Palsson. “Systems Approach
to Refining Genome Annotation.” Proceedings of the National Academy
of Sciences 103, no. 46 (2006): 17480–17484.
.. [2] Kumar, Vinay Satish, and Costas D. Maranas. “GrowMatch: An
Automated Method for Reconciling In Silico/In Vivo Growth
Predictions.” Edited by Christos A. Ouzounis. PLoS Computational
Biology 5, no. 3 (March 13, 2009): e1000308.
doi:10.1371/journal.pcbi.1000308.
.. [3] http://opencobra.github.io/cobrapy/tags/gapfilling/
.. [4] Schultz, André, and Amina A. Qutub. “Reconstruction of
Tissue-Specific Metabolic Networks Using CORDA.” Edited by Costas D.
Maranas. PLOS Computational Biology 12, no. 3 (March 4, 2016):
e1004808. doi:10.1371/journal.pcbi.1004808.
.. [5] Diener, Christian https://github.com/cdiener/corda
"""
def __init__(
self,
model: Model,
universal: Optional[Model] = None,
lower_bound: float = 0.05,
penalties: Optional[Dict[str, "Reaction"]] = None,
exchange_reactions: bool = False,
demand_reactions: bool = True,
integer_threshold: float = 1e-6,
**kwargs,
) -> None:
"""Initialize a new GapFiller object.
Other Parameters
----------------
kwargs :
Further keyword arguments are passed on to the parent class.
"""
self.original_model = model
self.lower_bound = lower_bound
self.model = model.copy()
tolerances = self.model.solver.configuration.tolerances
try:
tolerances.integrality = integer_threshold
except AttributeError:
logger.warning(
f"The current solver interface {interface_to_str(self.model.problem)} "
f"doesn't support setting the integrality tolerance."
)
# TODO (Midnighter): One could debate how useful it is to compare against this
# threshold when it is not supported by the chosen solver.
self.integer_threshold = integer_threshold
self.universal = universal.copy() if universal else Model("universal")
self.penalties = dict(universal=1, exchange=100, demand=1)
if penalties is not None:
self.penalties.update(penalties)
self.indicators = []
self.costs = {}
self.extend_model(exchange_reactions, demand_reactions)
fix_objective_as_constraint(self.model, bound=lower_bound)
self.add_switches_and_objective()
[docs] def extend_model(
self, exchange_reactions: bool = False, demand_reactions: bool = True
) -> None:
"""Extend gap filling model.
Add reactions from universal model and optionally exchange and
demand reactions for all metabolites in the model to perform
gap filling on.
Parameters
----------
exchange_reactions : bool, optional
Consider adding exchange (uptake) reactions for all metabolites
in the model (default False).
demand_reactions : bool, optional
Consider adding demand reactions for all metabolites
(default True).
"""
for rxn in self.universal.reactions:
rxn.gapfilling_type = "universal"
new_metabolites = self.universal.metabolites.query(
lambda metabolite: metabolite not in self.model.metabolites
)
self.model.add_metabolites(new_metabolites)
existing_exchanges = []
for rxn in self.universal.boundary:
existing_exchanges = existing_exchanges + [
met.id for met in list(rxn.metabolites)
]
for met in self.model.metabolites:
if exchange_reactions:
# check for exchange reaction in model already
if met.id not in existing_exchanges:
rxn = self.universal.add_boundary(
met,
type="exchange_smiley",
lb=-1000,
ub=0,
reaction_id=f"EX_{met.id}",
)
rxn.gapfilling_type = "exchange"
if demand_reactions:
rxn = self.universal.add_boundary(
met,
type="demand_smiley",
lb=0,
ub=1000,
reaction_id=f"DM_{met.id}",
)
rxn.gapfilling_type = "demand"
new_reactions = self.universal.reactions.query(
lambda reaction: reaction not in self.model.reactions
)
self.model.add_reactions(new_reactions)
[docs] def update_costs(self) -> None:
"""Update coefficients for the indicator variables in the objective.
Done incrementally so that second time the function is called,
active indicators in the current solutions gets higher cost than the
unused indicators.
"""
for var in self.indicators:
if var not in self.costs:
self.costs[var] = var.cost
else:
if var._get_primal() > self.integer_threshold:
self.costs[var] += var.cost
self.model.objective.set_linear_coefficients(self.costs)
[docs] def add_switches_and_objective(self) -> None:
"""Update gap filling model with switches and indicator objective."""
constraints = []
big_m = max(max(abs(b) for b in r.bounds) for r in self.model.reactions)
prob = self.model.problem
for rxn in self.model.reactions:
if not hasattr(rxn, "gapfilling_type"):
continue
indicator = prob.Variable(
name=f"indicator_{rxn.id}", lb=0, ub=1, type="binary"
)
if rxn.id in self.penalties:
indicator.cost = self.penalties[rxn.id]
else:
indicator.cost = self.penalties[rxn.gapfilling_type]
indicator.rxn_id = rxn.id
self.indicators.append(indicator)
# if z = 1 v_i is allowed non-zero
# v_i - Mz <= 0 and v_i + Mz >= 0
constraint_lb = prob.Constraint(
rxn.flux_expression - big_m * indicator,
ub=0,
name=f"constraint_lb_{rxn.id}",
sloppy=True,
)
constraint_ub = prob.Constraint(
rxn.flux_expression + big_m * indicator,
lb=0,
name=f"constraint_ub_{rxn.id}",
sloppy=True,
)
constraints.extend([constraint_lb, constraint_ub])
self.model.add_cons_vars(self.indicators)
self.model.add_cons_vars(constraints, sloppy=True)
self.model.objective = prob.Objective(Zero, direction="min", sloppy=True)
self.model.objective.set_linear_coefficients({i: 1 for i in self.indicators})
self.update_costs()
[docs] def fill(self, iterations: int = 1) -> List[List["Reaction"]]:
"""Perform the gap filling.
With every iteration, it solves the model, updates the costs and
records the used reactions.
Parameters
----------
iterations : int, optional
The number of rounds of gap filling to perform. For every
iteration, the penalty for every used reaction increases
linearly. This way, the algorithm is encouraged to search for
alternative solutions which may include previously used
reactions i.e., with enough iterations pathways including 10
steps will eventually be reported even if the shortest pathway
is a single reaction (default 1).
Returns
-------
list of list of cobra.Reaction
A list of lists where each element is a list of reactions that
were used to gap fill the model.
Raises
------
RuntimeError
If the model fails to be validated (i.e. the original model with
the proposed reactions added, still cannot get the required flux
through the objective).
"""
used_reactions = []
for _ in range(iterations):
self.model.slim_optimize(
error_value=None, message="gap filling optimization failed"
)
solution = [
self.model.reactions.get_by_id(ind.rxn_id)
for ind in self.indicators
if ind._get_primal() > self.integer_threshold
]
if not self.validate(solution):
raise RuntimeError(
"Failed to validate gap filled model, "
"try lowering the integer threshold."
)
used_reactions.append(solution)
self.update_costs()
return used_reactions
[docs] def validate(self, reactions: List["Reaction"]) -> bool:
"""Validate the model.
Parameters
----------
reactions: list of cobra.Reaction
The reactions to add to the model for validation.
Returns
-------
bool
Whether the model is valid or not.
"""
with self.original_model as model:
mets = [x.metabolites for x in reactions]
all_keys = set().union(*(d.keys() for d in mets))
model.add_metabolites(all_keys)
model.add_reactions(reactions)
model.slim_optimize()
return (
model.solver.status == OPTIMAL
and model.solver.objective.value >= self.lower_bound
)
[docs]def gapfill(
model: Model,
universal: Optional[Model] = None,
lower_bound: float = 0.05,
penalties: Optional[Dict[str, "Reaction"]] = None,
demand_reactions: bool = True,
exchange_reactions: bool = False,
iterations: int = 1,
):
"""Perform gap filling on a model.
Parameters
----------
model : cobra.Model
The model to perform gap filling on.
universal : cobra.Model, optional
A universal model with reactions that can be used to complete the
model. Only gapfill considering demand and exchange reactions if
left missing (default None).
lower_bound : float, optional
The minimally accepted flux for the objective in the filled model.
(default 0.05).
penalties : dict, optional
A dictionary with keys being 'universal' (all reactions included in
the universal model), 'exchange' and 'demand' (all additionally
added exchange and demand reactions) for the three reaction types.
Can also have reaction identifiers for reaction specific costs.
Defaults are 1, 100 and 1 respectively (default None).
exchange_reactions : bool, optional
Consider adding exchange (uptake) reactions for all metabolites
in the model (default False).
demand_reactions : bool, optional
Consider adding demand reactions for all metabolites (default True).
iterations : int, optional
The number of rounds of gap filling to perform. For every iteration,
the penalty for every used reaction increases linearly. This way,
the algorithm is encouraged to search for alternative solutions
which may include previously used reactions i.e., with enough
iterations pathways including 10 steps will eventually be reported
even if the shortest pathway is a single reaction (default 1).
Returns
-------
list of list of cobra.Reaction
A list of lists with on set of reactions that completes the model per
requested iteration.
Examples
--------
>>> from cobra.io import load_model
>>> from cobra import Model
>>> from cobra.flux_analysis import gapfill
>>> model = load_model("iYS1720")
>>> universal = Model("universal")
>>> universal.add_reactions([model.reactions.GF6PTA.copy()])
>>> model.remove_reactions([model.reactions.GF6PTA])
>>> gapfill(model, universal)
[[<Reaction GF6PTA at 0x12206a280>]]
"""
gapfiller = GapFiller(
model,
universal=universal,
lower_bound=lower_bound,
penalties=penalties,
demand_reactions=demand_reactions,
exchange_reactions=exchange_reactions,
)
return gapfiller.fill(iterations=iterations)