Source code for cobra.flux_analysis.deletion

"""Provide functions for reaction and gene deletions."""

from functools import partial
from itertools import product
from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Union

import pandas as pd
from optlang.exceptions import SolverError

from ..core import Configuration, Gene, Model, Reaction
from ..util import ProcessPool
from ..util import solver as sutil
from .moma import add_moma
from .room import add_room


if TYPE_CHECKING:
    from cobra import Solution


[docs]configuration = Configuration()
[docs]def _get_growth(model: Model) -> Tuple[float, str]: """Return the growth from the `model`. Parameters ---------- model : cobra.Model The model to obtain growth for. Returns ------- float The obtained growth value. Returns nan if there is some error while optimizing. """ try: if "moma_old_objective" in model.solver.variables: model.slim_optimize() growth = model.solver.variables.moma_old_objective.primal else: growth = model.slim_optimize() except SolverError: growth = float("nan") return growth, model.solver.status
[docs]def _reaction_deletion( model: Model, reaction_ids: List[str] ) -> Tuple[List[str], float, str]: """Perform reaction deletion. Parameters ---------- model : cobra.Model The model to perform reaction deletion on. ids : list of str The reaction IDs to knock-out. Returns ------- tuple of (list of str, float, str) A tuple containing reaction IDs knocked out, growth of the model and the solver status. """ with model: for rxn_id in reaction_ids: model.reactions.get_by_id(rxn_id).knock_out() growth, status = _get_growth(model) return reaction_ids, growth, status
[docs]def _reaction_deletion_worker(ids: List[str]) -> Tuple[List[str], float, str]: """Perform reaction deletions on worker process. Parameters ---------- ids : list of str The reaction IDs to knock-out. Returns ------- tuple of (list of str, float, str) A tuple containing reaction IDs knocked out, growth of the model and the solver status. """ global _model return _reaction_deletion(_model, ids)
[docs]def _gene_deletion(model: Model, gene_ids: List[str]) -> Tuple[List[str], float, str]: """Perform gene deletions. Parameters ---------- model : cobra.Model The model to perform gene deletion on. ids : list of str The gene IDs to knock-out. Returns ------- tuple of (list of str, float, str) A tuple containing gene IDs knocked out, growth of the model and the solver status. """ with model: for gene_id in gene_ids: model.genes.get_by_id(gene_id).knock_out() growth, status = _get_growth(model) return gene_ids, growth, status
[docs]def _gene_deletion_worker(ids: List[str]) -> Tuple[List[str], float, str]: """Perform gene deletions on worker process. Parameters ---------- ids : list of str The gene IDs to knock-out. Returns ------- tuple of (list of str, float, str) A tuple containing gene IDs knocked out, growth of the model and the solver status. """ global _model return _gene_deletion(_model, ids)
[docs]def _init_worker(model: Model) -> None: """Initialize worker process.""" global _model _model = model
[docs]def _multi_deletion( model: Model, entity: str, element_lists: List[Union[Gene, Reaction]], method: str = "fba", solution: Optional["Solution"] = None, processes: Optional[int] = None, **kwargs, ) -> pd.DataFrame: """Provide a common interface for single or multiple knockouts. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. entity : {"gene", "reaction"} The entity to knockout. element_lists : list of cobra.Gene or cobra.Reaction List of cobra.Gene or cobra.Reaction to be deleted. method : {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate (default "fba"). solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM (default None). processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to `configuration.processes` (default None). **kwargs : Passed on to underlying simulation functions. Returns ------- pandas.DataFrame A representation of all combinations of entity deletions. The columns are 'growth' and 'status', where index : tuple(str) The gene or reaction identifiers that were knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ solver = sutil.interface_to_str(model.problem.__name__) if method == "moma" and solver not in sutil.qp_solvers: raise RuntimeError( f"Cannot use MOMA since '{solver}' is not QP-capable. " "Please choose a different solver or use FBA only." ) if processes is None: processes = configuration.processes with model: if "moma" in method: add_moma(model, solution=solution, linear="linear" in method) elif "room" in method: add_room(model, solution=solution, linear="linear" in method, **kwargs) args = set([frozenset(comb) for comb in product(*element_lists)]) processes = min(processes, len(args)) def extract_knockout_results(result_iter): result = pd.DataFrame( [ ( set(ids), growth, status, ) for (ids, growth, status) in result_iter ], columns=["ids", "growth", "status"], ) return result if processes > 1: worker = dict( gene=_gene_deletion_worker, reaction=_reaction_deletion_worker )[entity] chunk_size = len(args) // processes with ProcessPool( processes, initializer=_init_worker, initargs=(model,) ) as pool: results = extract_knockout_results( pool.imap_unordered(worker, args, chunksize=chunk_size) ) else: worker = dict(gene=_gene_deletion, reaction=_reaction_deletion)[entity] results = extract_knockout_results(map(partial(worker, model), args)) return results
[docs]def _entities_ids(entities: List[Union[str, Gene, Reaction]]) -> List[str]: """Return the IDs of the `entities`. Parameters ---------- entities : list of str or cobra.Gene or cobra.Reaction The list of entities whose IDs need to be returned. Returns ------- list of str The IDs of the `entities`. """ try: return [e.id for e in entities] except AttributeError: return list(entities)
[docs]def _element_lists( entities: List[Union[str, Gene, Reaction]], *ids: List[str] ) -> List[str]: """Return the elements. Parameters ---------- entities : list of str or cobra.Gene or cobra.Reaction The list of entities. *ids : list of str The list of IDs. Returns ------- list of str The list of IDs. """ lists = list(ids) if lists[0] is None: lists[0] = entities result = [_entities_ids(lists[0])] for _list in lists[1:]: if _list is None: result.append(result[-1]) else: result.append(_entities_ids(_list)) return result
[docs]def single_reaction_deletion( model: Model, reaction_list: Optional[List[Union[Reaction, str]]] = None, method: str = "fba", solution: Optional["Solution"] = None, processes: Optional[int] = None, **kwargs, ) -> pd.DataFrame: """Knock out each reaction from `reaction_list`. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. reaction_list : list of cobra.Reaction or str, optional The reactions be knocked out. If not passed, all the reactions from the model are used (default None). method: {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate (default "fba"). solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM (default None). processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to `configuration.processes` (default None). **kwargs : Keyword arguments are passed on to underlying simulation functions such as `add_room`. Returns ------- pandas.DataFrame A representation of all single reaction deletions. The columns are 'growth' and 'status', where index : tuple(str) The reaction identifier that was knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ return _multi_deletion( model, "reaction", element_lists=_element_lists(model.reactions, reaction_list), method=method, solution=solution, processes=processes, **kwargs,
)
[docs]def single_gene_deletion( model: Model, gene_list: Optional[List[Union[Gene, str]]] = None, method: str = "fba", solution: Optional["Solution"] = None, processes: Optional[int] = None, **kwargs, ) -> pd.DataFrame: """Knock out each gene from `gene_list`. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. gene_list : list of cobra.Gene or str, optional The gene objects to be deleted. If not passed, all the genes from the model are used (default None). method : {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate (default "fba"). solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM (default None). processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to `configuration.processes` (default None). **kwargs : Keyword arguments are passed on to underlying simulation functions such as `add_room`. Returns ------- pandas.DataFrame A representation of all single gene deletions. The columns are 'growth' and 'status', where index : tuple(str) The gene identifier that was knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ return _multi_deletion( model, "gene", element_lists=_element_lists(model.genes, gene_list), method=method, solution=solution, processes=processes, **kwargs,
)
[docs]def double_reaction_deletion( model: Model, reaction_list1: Optional[List[Union[Reaction, str]]] = None, reaction_list2: Optional[List[Union[Reaction, str]]] = None, method: str = "fba", solution: Optional["Solution"] = None, processes: Optional[int] = None, **kwargs, ) -> pd.DataFrame: """Knock out each reaction pair from the combinations of two given lists. We say 'pair' here but the order order does not matter. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. reaction_list1 : list of cobra.Reaction or str, optional The first reaction list to be deleted. If not passed, all the reactions from the model are used (default None). reaction_list2 : list of cobra.Reaction or str, optional The second reaction list to be deleted. If not passed, all the reactions from the model are used (default None). method: {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate (default "fba"). solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM (default None). processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to `configuration.processes` (default None). **kwargs : Keyword arguments are passed on to underlying simulation functions such as `add_room`. Returns ------- pandas.DataFrame A representation of all combinations of reaction deletions. The columns are 'growth' and 'status', where index : tuple(str) The reaction identifiers that were knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ reaction_list1, reaction_list2 = _element_lists( model.reactions, reaction_list1, reaction_list2 ) return _multi_deletion( model, "reaction", element_lists=[reaction_list1, reaction_list2], method=method, solution=solution, processes=processes, **kwargs,
)
[docs]def double_gene_deletion( model: Model, gene_list1: Optional[List[Union[Gene, str]]] = None, gene_list2: Optional[List[Union[Gene, str]]] = None, method: str = "fba", solution: Optional["Solution"] = None, processes: Optional[int] = None, **kwargs, ) -> pd.DataFrame: """Knock out each gene pair from the combination of two given lists. We say 'pair' here but the order order does not matter. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. gene_list1 : list of cobra.Gene or str, optional The first gene list to be deleted. If not passed, all the genes from the model are used (default None). gene_list2 : list of cobra.Gene or str, optional The second gene list to be deleted. If not passed, all the genes from the model are used (default None). method : {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate (default None). solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM (default None). processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to `configuration.processes` (default None). **kwargs : Keyword arguments are passed on to underlying simulation functions such as `add_room`. Returns ------- pandas.DataFrame A representation of all combinations of gene deletions. The columns are 'growth' and 'status', where index : tuple(str) The gene identifiers that were knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ gene_list1, gene_list2 = _element_lists(model.genes, gene_list1, gene_list2) return _multi_deletion( model, "gene", element_lists=[gene_list1, gene_list2], method=method, solution=solution, processes=processes, **kwargs,
) @pd.api.extensions.register_dataframe_accessor("knockout")
[docs]class KnockoutAccessor: """ Access unique combinations of reactions in deletion results. This allows acces in the form of `results.knockout[rxn1]` or `results.knockout["rxn1_id"]`. Each individual entry will return a deletion so `results.knockout[rxn1, rxn2]` will return two deletions (for individual knockouts of rxn1 and rxn2 respectively). Multi-deletions can be accessed by passing in sets like `results.knockout[{rxn1, rxn2}]` which denotes the double deletion of both reactions. Thus, the following are allowed index elements: - single reactions or genes (depending on whether it is a gene or reaction deletion) - single reaction IDs or gene IDs - lists of single single reaction IDs or gene IDs (will return one row for each element in the list) - sets of reactions or genes (for multi-deletions) - sets of reactions IDs or gene IDs - list of sets of objects or IDs (to get several multi-deletions) Parameters: ----------- pandas_obj : pandas.DataFrame or pandas.Series A result from one of the deletion methods. """ def __init__(self, pandas_obj: Union[pd.DataFrame, pd.Series]) -> None: """Set up the accessor.""" self._validate(pandas_obj) self._result = pandas_obj @staticmethod
[docs] def _validate(obj: pd.DataFrame) -> None: """Validate the object given. Parameters ---------- obj : pandas.DataFrame The object to validate. Raises ------ AttributeError If the object supplied is not a DataFrame. """ # verify it is a deletion results if any(name not in obj.columns for name in ["ids", "growth", "status"]): raise AttributeError("Must be DataFrame returned by a deletion method.")
[docs] def __getitem__( self, args: Union[ Gene, List[Gene], Set[Gene], List[Set[Gene]], Reaction, List[Reaction], Set[Reaction], List[Set[Reaction]], str, List[str], Set[str], List[Set[str]], ], ) -> pd.DataFrame: """Return the deletion result for a particular set of knocked entities. Parameters ---------- args : cobra.Reaction, cobra.Gene, str, set, or list The deletions to be returned. Accepts: - single reactions or genes - single reaction IDs or gene IDs - lists of single single reaction IDs or gene IDs - sets of reactions or genes - sets of reactions IDs or gene IDs - list of sets of objects or IDs See the docs for usage examples. Returns ------- pandas.DataFrame The deletion result where the chosen entities have been deleted. Each row denotes a deletion. Raises ------ ValueError If any other object is used as index for lookup. """ if not any(isinstance(args, t) for t in [tuple, list]): args = [args] if any(isinstance(args[0], t) for t in [Reaction, Gene, str]): try: args = [{obj.id} for obj in args] except AttributeError: # are already strings args = [{obj} for obj in args] elif isinstance(args[0], set): try: args = [set(elem.id for elem in obj) for obj in args] except AttributeError: args = [set(obj) for obj in args] else: raise ValueError( "Allowed indices are single cobra.Reaction or cobra.Gene, " "lists of cobra.Reaction of cobra.Gene, or lists of sets " "of cobra.Reaction or cobra.Gene." ) found = [x in args for x in self._result.ids] return self._result[found]