"""Provide functions for reaction and gene deletions."""
from functools import partial
from itertools import product
from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Union
import pandas as pd
from optlang.exceptions import SolverError
from ..core import Configuration, Gene, Model, Reaction
from ..util import ProcessPool
from ..util import solver as sutil
from .moma import add_moma
from .room import add_room
if TYPE_CHECKING:
from cobra import Solution
[docs]configuration = Configuration()
[docs]def _get_growth(model: Model) -> Tuple[float, str]:
"""Return the growth from the `model`.
Parameters
----------
model : cobra.Model
The model to obtain growth for.
Returns
-------
float
The obtained growth value. Returns nan if there is some error while
optimizing.
"""
try:
if "moma_old_objective" in model.solver.variables:
model.slim_optimize()
growth = model.solver.variables.moma_old_objective.primal
else:
growth = model.slim_optimize()
except SolverError:
growth = float("nan")
return growth, model.solver.status
[docs]def _reaction_deletion(
model: Model, reaction_ids: List[str]
) -> Tuple[List[str], float, str]:
"""Perform reaction deletion.
Parameters
----------
model : cobra.Model
The model to perform reaction deletion on.
ids : list of str
The reaction IDs to knock-out.
Returns
-------
tuple of (list of str, float, str)
A tuple containing reaction IDs knocked out, growth of the model and
the solver status.
"""
with model:
for rxn_id in reaction_ids:
model.reactions.get_by_id(rxn_id).knock_out()
growth, status = _get_growth(model)
return reaction_ids, growth, status
[docs]def _reaction_deletion_worker(ids: List[str]) -> Tuple[List[str], float, str]:
"""Perform reaction deletions on worker process.
Parameters
----------
ids : list of str
The reaction IDs to knock-out.
Returns
-------
tuple of (list of str, float, str)
A tuple containing reaction IDs knocked out, growth of the model and
the solver status.
"""
global _model
return _reaction_deletion(_model, ids)
[docs]def _gene_deletion(model: Model, gene_ids: List[str]) -> Tuple[List[str], float, str]:
"""Perform gene deletions.
Parameters
----------
model : cobra.Model
The model to perform gene deletion on.
ids : list of str
The gene IDs to knock-out.
Returns
-------
tuple of (list of str, float, str)
A tuple containing gene IDs knocked out, growth of the model and
the solver status.
"""
with model:
for gene_id in gene_ids:
model.genes.get_by_id(gene_id).knock_out()
growth, status = _get_growth(model)
return gene_ids, growth, status
[docs]def _gene_deletion_worker(ids: List[str]) -> Tuple[List[str], float, str]:
"""Perform gene deletions on worker process.
Parameters
----------
ids : list of str
The gene IDs to knock-out.
Returns
-------
tuple of (list of str, float, str)
A tuple containing gene IDs knocked out, growth of the model and
the solver status.
"""
global _model
return _gene_deletion(_model, ids)
[docs]def _init_worker(model: Model) -> None:
"""Initialize worker process."""
global _model
_model = model
[docs]def _multi_deletion(
model: Model,
entity: str,
element_lists: List[Union[Gene, Reaction]],
method: str = "fba",
solution: Optional["Solution"] = None,
processes: Optional[int] = None,
**kwargs,
) -> pd.DataFrame:
"""Provide a common interface for single or multiple knockouts.
Parameters
----------
model : cobra.Model
The metabolic model to perform deletions in.
entity : {"gene", "reaction"}
The entity to knockout.
element_lists : list of cobra.Gene or cobra.Reaction
List of cobra.Gene or cobra.Reaction to be deleted.
method : {"fba", "moma", "linear moma", "room", "linear room"}, optional
Method used to predict the growth rate (default "fba").
solution : cobra.Solution, optional
A previous solution to use as a reference for (linear) MOMA or ROOM
(default None).
processes : int, optional
The number of parallel processes to run. Can speed up the computations
if the number of knockouts to perform is large. If not passed,
will be set to `configuration.processes` (default None).
**kwargs :
Passed on to underlying simulation functions.
Returns
-------
pandas.DataFrame
A representation of all combinations of entity deletions. The
columns are 'growth' and 'status', where
index : tuple(str)
The gene or reaction identifiers that were knocked out.
growth : float
The growth rate of the adjusted model.
status : str
The solution's status.
"""
solver = sutil.interface_to_str(model.problem.__name__)
if method == "moma" and solver not in sutil.qp_solvers:
raise RuntimeError(
f"Cannot use MOMA since '{solver}' is not QP-capable. "
"Please choose a different solver or use FBA only."
)
if processes is None:
processes = configuration.processes
with model:
if "moma" in method:
add_moma(model, solution=solution, linear="linear" in method)
elif "room" in method:
add_room(model, solution=solution, linear="linear" in method, **kwargs)
args = set([frozenset(comb) for comb in product(*element_lists)])
processes = min(processes, len(args))
def extract_knockout_results(result_iter):
result = pd.DataFrame(
[
(
set(ids),
growth,
status,
)
for (ids, growth, status) in result_iter
],
columns=["ids", "growth", "status"],
)
return result
if processes > 1:
worker = dict(
gene=_gene_deletion_worker, reaction=_reaction_deletion_worker
)[entity]
chunk_size = len(args) // processes
with ProcessPool(
processes, initializer=_init_worker, initargs=(model,)
) as pool:
results = extract_knockout_results(
pool.imap_unordered(worker, args, chunksize=chunk_size)
)
else:
worker = dict(gene=_gene_deletion, reaction=_reaction_deletion)[entity]
results = extract_knockout_results(map(partial(worker, model), args))
return results
[docs]def _entities_ids(entities: List[Union[str, Gene, Reaction]]) -> List[str]:
"""Return the IDs of the `entities`.
Parameters
----------
entities : list of str or cobra.Gene or cobra.Reaction
The list of entities whose IDs need to be returned.
Returns
-------
list of str
The IDs of the `entities`.
"""
try:
return [e.id for e in entities]
except AttributeError:
return list(entities)
[docs]def _element_lists(
entities: List[Union[str, Gene, Reaction]], *ids: List[str]
) -> List[str]:
"""Return the elements.
Parameters
----------
entities : list of str or cobra.Gene or cobra.Reaction
The list of entities.
*ids : list of str
The list of IDs.
Returns
-------
list of str
The list of IDs.
"""
lists = list(ids)
if lists[0] is None:
lists[0] = entities
result = [_entities_ids(lists[0])]
for _list in lists[1:]:
if _list is None:
result.append(result[-1])
else:
result.append(_entities_ids(_list))
return result
[docs]def single_reaction_deletion(
model: Model,
reaction_list: Optional[List[Union[Reaction, str]]] = None,
method: str = "fba",
solution: Optional["Solution"] = None,
processes: Optional[int] = None,
**kwargs,
) -> pd.DataFrame:
"""Knock out each reaction from `reaction_list`.
Parameters
----------
model : cobra.Model
The metabolic model to perform deletions in.
reaction_list : list of cobra.Reaction or str, optional
The reactions be knocked out. If not passed, all the reactions from
the model are used (default None).
method: {"fba", "moma", "linear moma", "room", "linear room"}, optional
Method used to predict the growth rate (default "fba").
solution : cobra.Solution, optional
A previous solution to use as a reference for (linear) MOMA or ROOM
(default None).
processes : int, optional
The number of parallel processes to run. Can speed up the computations
if the number of knockouts to perform is large. If not passed,
will be set to `configuration.processes` (default None).
**kwargs :
Keyword arguments are passed on to underlying simulation functions
such as `add_room`.
Returns
-------
pandas.DataFrame
A representation of all single reaction deletions. The columns are
'growth' and 'status', where
index : tuple(str)
The reaction identifier that was knocked out.
growth : float
The growth rate of the adjusted model.
status : str
The solution's status.
"""
return _multi_deletion(
model,
"reaction",
element_lists=_element_lists(model.reactions, reaction_list),
method=method,
solution=solution,
processes=processes,
**kwargs,
)
[docs]def single_gene_deletion(
model: Model,
gene_list: Optional[List[Union[Gene, str]]] = None,
method: str = "fba",
solution: Optional["Solution"] = None,
processes: Optional[int] = None,
**kwargs,
) -> pd.DataFrame:
"""Knock out each gene from `gene_list`.
Parameters
----------
model : cobra.Model
The metabolic model to perform deletions in.
gene_list : list of cobra.Gene or str, optional
The gene objects to be deleted. If not passed, all the genes from the
model are used (default None).
method : {"fba", "moma", "linear moma", "room", "linear room"}, optional
Method used to predict the growth rate (default "fba").
solution : cobra.Solution, optional
A previous solution to use as a reference for (linear) MOMA or ROOM
(default None).
processes : int, optional
The number of parallel processes to run. Can speed up the computations
if the number of knockouts to perform is large. If not passed,
will be set to `configuration.processes` (default None).
**kwargs :
Keyword arguments are passed on to underlying simulation functions
such as `add_room`.
Returns
-------
pandas.DataFrame
A representation of all single gene deletions. The columns are
'growth' and 'status', where
index : tuple(str)
The gene identifier that was knocked out.
growth : float
The growth rate of the adjusted model.
status : str
The solution's status.
"""
return _multi_deletion(
model,
"gene",
element_lists=_element_lists(model.genes, gene_list),
method=method,
solution=solution,
processes=processes,
**kwargs,
)
[docs]def double_reaction_deletion(
model: Model,
reaction_list1: Optional[List[Union[Reaction, str]]] = None,
reaction_list2: Optional[List[Union[Reaction, str]]] = None,
method: str = "fba",
solution: Optional["Solution"] = None,
processes: Optional[int] = None,
**kwargs,
) -> pd.DataFrame:
"""Knock out each reaction pair from the combinations of two given lists.
We say 'pair' here but the order order does not matter.
Parameters
----------
model : cobra.Model
The metabolic model to perform deletions in.
reaction_list1 : list of cobra.Reaction or str, optional
The first reaction list to be deleted. If not passed,
all the reactions from the model are used (default None).
reaction_list2 : list of cobra.Reaction or str, optional
The second reaction list to be deleted. If not passed,
all the reactions from the model are used (default None).
method: {"fba", "moma", "linear moma", "room", "linear room"}, optional
Method used to predict the growth rate (default "fba").
solution : cobra.Solution, optional
A previous solution to use as a reference for (linear) MOMA or ROOM
(default None).
processes : int, optional
The number of parallel processes to run. Can speed up the computations
if the number of knockouts to perform is large. If not passed,
will be set to `configuration.processes` (default None).
**kwargs :
Keyword arguments are passed on to underlying simulation functions
such as `add_room`.
Returns
-------
pandas.DataFrame
A representation of all combinations of reaction deletions. The
columns are 'growth' and 'status', where
index : tuple(str)
The reaction identifiers that were knocked out.
growth : float
The growth rate of the adjusted model.
status : str
The solution's status.
"""
reaction_list1, reaction_list2 = _element_lists(
model.reactions, reaction_list1, reaction_list2
)
return _multi_deletion(
model,
"reaction",
element_lists=[reaction_list1, reaction_list2],
method=method,
solution=solution,
processes=processes,
**kwargs,
)
[docs]def double_gene_deletion(
model: Model,
gene_list1: Optional[List[Union[Gene, str]]] = None,
gene_list2: Optional[List[Union[Gene, str]]] = None,
method: str = "fba",
solution: Optional["Solution"] = None,
processes: Optional[int] = None,
**kwargs,
) -> pd.DataFrame:
"""Knock out each gene pair from the combination of two given lists.
We say 'pair' here but the order order does not matter.
Parameters
----------
model : cobra.Model
The metabolic model to perform deletions in.
gene_list1 : list of cobra.Gene or str, optional
The first gene list to be deleted. If not passed,
all the genes from the model are used (default None).
gene_list2 : list of cobra.Gene or str, optional
The second gene list to be deleted. If not passed,
all the genes from the model are used (default None).
method : {"fba", "moma", "linear moma", "room", "linear room"}, optional
Method used to predict the growth rate (default None).
solution : cobra.Solution, optional
A previous solution to use as a reference for (linear) MOMA or ROOM
(default None).
processes : int, optional
The number of parallel processes to run. Can speed up the computations
if the number of knockouts to perform is large. If not passed,
will be set to `configuration.processes` (default None).
**kwargs :
Keyword arguments are passed on to underlying simulation functions
such as `add_room`.
Returns
-------
pandas.DataFrame
A representation of all combinations of gene deletions. The
columns are 'growth' and 'status', where
index : tuple(str)
The gene identifiers that were knocked out.
growth : float
The growth rate of the adjusted model.
status : str
The solution's status.
"""
gene_list1, gene_list2 = _element_lists(model.genes, gene_list1, gene_list2)
return _multi_deletion(
model,
"gene",
element_lists=[gene_list1, gene_list2],
method=method,
solution=solution,
processes=processes,
**kwargs,
)
@pd.api.extensions.register_dataframe_accessor("knockout")
[docs]class KnockoutAccessor:
"""
Access unique combinations of reactions in deletion results.
This allows acces in the form of `results.knockout[rxn1]` or
`results.knockout["rxn1_id"]`. Each individual entry will return a
deletion so `results.knockout[rxn1, rxn2]` will return two deletions
(for individual knockouts of rxn1 and rxn2 respectively).
Multi-deletions can be accessed by passing in sets like
`results.knockout[{rxn1, rxn2}]` which denotes the double deletion of
both reactions. Thus, the following are allowed index elements:
- single reactions or genes (depending on whether it is a gene or
reaction deletion)
- single reaction IDs or gene IDs
- lists of single single reaction IDs or gene IDs (will return one row
for each
element in the list)
- sets of reactions or genes (for multi-deletions)
- sets of reactions IDs or gene IDs
- list of sets of objects or IDs (to get several multi-deletions)
Parameters:
-----------
pandas_obj : pandas.DataFrame or pandas.Series
A result from one of the deletion methods.
"""
def __init__(self, pandas_obj: Union[pd.DataFrame, pd.Series]) -> None:
"""Set up the accessor."""
self._validate(pandas_obj)
self._result = pandas_obj
@staticmethod
[docs] def _validate(obj: pd.DataFrame) -> None:
"""Validate the object given.
Parameters
----------
obj : pandas.DataFrame
The object to validate.
Raises
------
AttributeError
If the object supplied is not a DataFrame.
"""
# verify it is a deletion results
if any(name not in obj.columns for name in ["ids", "growth", "status"]):
raise AttributeError("Must be DataFrame returned by a deletion method.")
[docs] def __getitem__(
self,
args: Union[
Gene,
List[Gene],
Set[Gene],
List[Set[Gene]],
Reaction,
List[Reaction],
Set[Reaction],
List[Set[Reaction]],
str,
List[str],
Set[str],
List[Set[str]],
],
) -> pd.DataFrame:
"""Return the deletion result for a particular set of knocked entities.
Parameters
----------
args : cobra.Reaction, cobra.Gene, str, set, or list
The deletions to be returned. Accepts:
- single reactions or genes
- single reaction IDs or gene IDs
- lists of single single reaction IDs or gene IDs
- sets of reactions or genes
- sets of reactions IDs or gene IDs
- list of sets of objects or IDs
See the docs for usage examples.
Returns
-------
pandas.DataFrame
The deletion result where the chosen entities have been deleted.
Each row denotes a deletion.
Raises
------
ValueError
If any other object is used as index for lookup.
"""
if not any(isinstance(args, t) for t in [tuple, list]):
args = [args]
if any(isinstance(args[0], t) for t in [Reaction, Gene, str]):
try:
args = [{obj.id} for obj in args]
except AttributeError:
# are already strings
args = [{obj} for obj in args]
elif isinstance(args[0], set):
try:
args = [set(elem.id for elem in obj) for obj in args]
except AttributeError:
args = [set(obj) for obj in args]
else:
raise ValueError(
"Allowed indices are single cobra.Reaction or cobra.Gene, "
"lists of cobra.Reaction of cobra.Gene, or lists of sets "
"of cobra.Reaction or cobra.Gene."
)
found = [x in args for x in self._result.ids]
return self._result[found]