# Source code for cobra.flux_analysis.gapfilling

```
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from optlang.interface import OPTIMAL
from optlang.symbolics import Zero, add
from cobra.core import Model
from cobra.util import fix_objective_as_constraint
[docs]class GapFiller(object):
"""Class for performing gap filling.
This class implements gap filling based on a mixed-integer approach,
very similar to that described in [1]_ and the 'no-growth but growth'
part of [2]_ but with minor adjustments. In short, we add indicator
variables for using the reactions in the universal model, z_i and then
solve problem
minimize \sum_i c_i * z_i
s.t. Sv = 0
v_o >= t
lb_i <= v_i <= ub_i
v_i = 0 if z_i = 0
where lb, ub are the upper, lower flux bounds for reaction i, c_i is a
cost parameter and the objective v_o is greater than the lower bound t.
The default costs are 1 for reactions from the universal model, 100 for
exchange (uptake) reactions added and 1 for added demand reactions.
Note that this is a mixed-integer linear program and as such will
expensive to solve for large models. Consider using alternatives [3]_
such as CORDA instead [4,5]_.
Parameters
----------
model : cobra.Model
The model to perform gap filling on.
universal : cobra.Model
A universal model with reactions that can be used to complete the
model.
lower_bound : float
The minimally accepted flux for the objective in the filled model.
penalties : dict, None
A dictionary with keys being 'universal' (all reactions included in
the universal model), 'exchange' and 'demand' (all additionally
added exchange and demand reactions) for the three reaction types.
Can also have reaction identifiers for reaction specific costs.
Defaults are 1, 100 and 1 respectively.
integer_threshold : float
The threshold at which a value is considered non-zero (aka
integrality threshold). If gapfilled models fail to validate,
you may want to lower this value.
exchange_reactions : bool
Consider adding exchange (uptake) reactions for all metabolites
in the model.
demand_reactions : bool
Consider adding demand reactions for all metabolites.
References
----------
.. [1] Reed, Jennifer L., Trina R. Patel, Keri H. Chen, Andrew R. Joyce,
Margaret K. Applebee, Christopher D. Herring, Olivia T. Bui, Eric M.
Knight, Stephen S. Fong, and Bernhard O. Palsson. “Systems Approach
to Refining Genome Annotation.” Proceedings of the National Academy
of Sciences 103, no. 46 (2006): 17480–17484.
[2] Kumar, Vinay Satish, and Costas D. Maranas. “GrowMatch: An
Automated Method for Reconciling In Silico/In Vivo Growth
Predictions.” Edited by Christos A. Ouzounis. PLoS Computational
Biology 5, no. 3 (March 13, 2009): e1000308.
doi:10.1371/journal.pcbi.1000308.
[3] http://opencobra.github.io/cobrapy/tags/gapfilling/
[4] Schultz, André, and Amina A. Qutub. “Reconstruction of
Tissue-Specific Metabolic Networks Using CORDA.” Edited by Costas D.
Maranas. PLOS Computational Biology 12, no. 3 (March 4, 2016):
e1004808. doi:10.1371/journal.pcbi.1004808.
[5] Diener, Christian https://github.com/cdiener/corda
"""
def __init__(
self,
model,
universal=None,
lower_bound=0.05,
penalties=None,
exchange_reactions=False,
demand_reactions=True,
integer_threshold=1e-6,
):
self.original_model = model
self.lower_bound = lower_bound
self.model = model.copy()
tolerances = self.model.solver.configuration.tolerances
tolerances.integrality = integer_threshold
self.universal = universal.copy() if universal else Model("universal")
self.penalties = dict(universal=1, exchange=100, demand=1)
if penalties is not None:
self.penalties.update(penalties)
self.integer_threshold = integer_threshold
self.indicators = list()
self.costs = dict()
self.extend_model(exchange_reactions, demand_reactions)
fix_objective_as_constraint(self.model, bound=lower_bound)
self.add_switches_and_objective()
[docs] def extend_model(self, exchange_reactions=False, demand_reactions=True):
"""Extend gapfilling model.
Add reactions from universal model and optionally exchange and
demand reactions for all metabolites in the model to perform
gapfilling on.
Parameters
----------
exchange_reactions : bool
Consider adding exchange (uptake) reactions for all metabolites
in the model.
demand_reactions : bool
Consider adding demand reactions for all metabolites.
"""
for rxn in self.universal.reactions:
rxn.gapfilling_type = "universal"
new_metabolites = self.universal.metabolites.query(
lambda metabolite: metabolite not in self.model.metabolites
)
self.model.add_metabolites(new_metabolites)
existing_exchanges = []
for rxn in self.universal.boundary:
existing_exchanges = existing_exchanges + [
met.id for met in list(rxn.metabolites)
]
for met in self.model.metabolites:
if exchange_reactions:
# check for exchange reaction in model already
if met.id not in existing_exchanges:
rxn = self.universal.add_boundary(
met,
type="exchange_smiley",
lb=-1000,
ub=0,
reaction_id="EX_{}".format(met.id),
)
rxn.gapfilling_type = "exchange"
if demand_reactions:
rxn = self.universal.add_boundary(
met,
type="demand_smiley",
lb=0,
ub=1000,
reaction_id="DM_{}".format(met.id),
)
rxn.gapfilling_type = "demand"
new_reactions = self.universal.reactions.query(
lambda reaction: reaction not in self.model.reactions
)
self.model.add_reactions(new_reactions)
[docs] def update_costs(self):
"""Update the coefficients for the indicator variables in the objective.
Done incrementally so that second time the function is called,
active indicators in the current solutions gets higher cost than the
unused indicators.
"""
for var in self.indicators:
if var not in self.costs:
self.costs[var] = var.cost
else:
if var._get_primal() > self.integer_threshold:
self.costs[var] += var.cost
self.model.objective.set_linear_coefficients(self.costs)
[docs] def add_switches_and_objective(self):
"""Update gapfilling model with switches and the indicator objective."""
constraints = list()
big_m = max(max(abs(b) for b in r.bounds) for r in self.model.reactions)
prob = self.model.problem
for rxn in self.model.reactions:
if not hasattr(rxn, "gapfilling_type"):
continue
indicator = prob.Variable(
name="indicator_{}".format(rxn.id), lb=0, ub=1, type="binary"
)
if rxn.id in self.penalties:
indicator.cost = self.penalties[rxn.id]
else:
indicator.cost = self.penalties[rxn.gapfilling_type]
indicator.rxn_id = rxn.id
self.indicators.append(indicator)
# if z = 1 v_i is allowed non-zero
# v_i - Mz <= 0 and v_i + Mz >= 0
constraint_lb = prob.Constraint(
rxn.flux_expression - big_m * indicator,
ub=0,
name="constraint_lb_{}".format(rxn.id),
sloppy=True,
)
constraint_ub = prob.Constraint(
rxn.flux_expression + big_m * indicator,
lb=0,
name="constraint_ub_{}".format(rxn.id),
sloppy=True,
)
constraints.extend([constraint_lb, constraint_ub])
self.model.add_cons_vars(self.indicators)
self.model.add_cons_vars(constraints, sloppy=True)
self.model.objective = prob.Objective(Zero, direction="min", sloppy=True)
self.model.objective.set_linear_coefficients({i: 1 for i in self.indicators})
self.update_costs()
[docs] def fill(self, iterations=1):
"""Perform the gapfilling by iteratively solving the model, updating
the costs and recording the used reactions.
Parameters
----------
iterations : int
The number of rounds of gapfilling to perform. For every
iteration, the penalty for every used reaction increases
linearly. This way, the algorithm is encouraged to search for
alternative solutions which may include previously used
reactions. I.e., with enough iterations pathways including 10
steps will eventually be reported even if the shortest pathway
is a single reaction.
Returns
-------
iterable
A list of lists where each element is a list reactions that were
used to gapfill the model.
Raises
------
RuntimeError
If the model fails to be validated (i.e. the original model with
the proposed reactions added, still cannot get the required flux
through the objective).
"""
used_reactions = list()
for i in range(iterations):
self.model.slim_optimize(
error_value=None, message="gapfilling optimization failed"
)
solution = [
self.model.reactions.get_by_id(ind.rxn_id)
for ind in self.indicators
if ind._get_primal() > self.integer_threshold
]
if not self.validate(solution):
raise RuntimeError(
"failed to validate gapfilled model, "
"try lowering the integer_threshold"
)
used_reactions.append(solution)
self.update_costs()
return used_reactions
[docs] def validate(self, reactions):
with self.original_model as model:
mets = [x.metabolites for x in reactions]
all_keys = set().union(*(d.keys() for d in mets))
model.add_metabolites(all_keys)
model.add_reactions(reactions)
model.slim_optimize()
return (
model.solver.status == OPTIMAL
and model.solver.objective.value >= self.lower_bound
)
[docs]def gapfill(
model,
universal=None,
lower_bound=0.05,
penalties=None,
demand_reactions=True,
exchange_reactions=False,
iterations=1,
):
"""Perform gapfilling on a model.
See documentation for the class GapFiller.
Parameters
----------
model : cobra.Model
The model to perform gap filling on.
universal : cobra.Model, None
A universal model with reactions that can be used to complete the
model. Only gapfill considering demand and exchange reactions if
left missing.
lower_bound : float
The minimally accepted flux for the objective in the filled model.
penalties : dict, None
A dictionary with keys being 'universal' (all reactions included in
the universal model), 'exchange' and 'demand' (all additionally
added exchange and demand reactions) for the three reaction types.
Can also have reaction identifiers for reaction specific costs.
Defaults are 1, 100 and 1 respectively.
iterations : int
The number of rounds of gapfilling to perform. For every iteration,
the penalty for every used reaction increases linearly. This way,
the algorithm is encouraged to search for alternative solutions
which may include previously used reactions. I.e., with enough
iterations pathways including 10 steps will eventually be reported
even if the shortest pathway is a single reaction.
exchange_reactions : bool
Consider adding exchange (uptake) reactions for all metabolites
in the model.
demand_reactions : bool
Consider adding demand reactions for all metabolites.
Returns
-------
iterable
list of lists with on set of reactions that completes the model per
requested iteration.
Examples
--------
import cobra as ct
>>> from cobra import Model
>>> from cobra.flux_analysis import gapfill
>>> model = ct.create_test_model("salmonella")
>>> universal = Model('universal')
>>> universal.add_reactions(model.reactions.GF6PTA.copy())
>>> model.remove_reactions([model.reactions.GF6PTA])
>>> gapfill(model, universal)
"""
gapfiller = GapFiller(
model,
universal=universal,
lower_bound=lower_bound,
penalties=penalties,
demand_reactions=demand_reactions,
exchange_reactions=exchange_reactions,
)
return gapfiller.fill(iterations=iterations)
```