Source code for cobra.flux_analysis.deletion

# -*- coding: utf-8 -*-

import logging
import multiprocessing
from builtins import dict, map
from functools import partial
from itertools import product
from typing import List, Set, Union

import pandas as pd
from optlang.exceptions import SolverError

from cobra.core import Configuration, Gene, Reaction
from cobra.flux_analysis.moma import add_moma
from cobra.flux_analysis.room import add_room
from cobra.manipulation.delete import find_gene_knockout_reactions
from cobra.util import solver as sutil


[docs]LOGGER = logging.getLogger(__name__)
[docs]CONFIGURATION = Configuration()
[docs]def _reactions_knockouts_with_restore(model, reactions): with model: for reaction in reactions: reaction.knock_out() growth = _get_growth(model) return [r.id for r in reactions], growth, model.solver.status
[docs]def _get_growth(model): try: if "moma_old_objective" in model.solver.variables: model.slim_optimize() growth = model.solver.variables.moma_old_objective.primal else: growth = model.slim_optimize() except SolverError: growth = float("nan") return growth
[docs]def _reaction_deletion(model, ids): return _reactions_knockouts_with_restore( model, [model.reactions.get_by_id(r_id) for r_id in ids]
)
[docs]def _gene_deletion(model, ids): all_reactions = [] for g_id in ids: all_reactions.extend( find_gene_knockout_reactions(model, (model.genes.get_by_id(g_id),)) ) _, growth, status = _reactions_knockouts_with_restore(model, all_reactions) return (ids, growth, status)
[docs]def _reaction_deletion_worker(ids): global _model return _reaction_deletion(_model, ids)
[docs]def _gene_deletion_worker(ids): global _model return _gene_deletion(_model, ids)
[docs]def _init_worker(model): global _model _model = model
[docs]def _multi_deletion( model, entity, element_lists, method="fba", solution=None, processes=None, **kwargs ): """ Provide a common interface for single or multiple knockouts. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. entity : 'gene' or 'reaction' The entity to knockout (``cobra.Gene`` or ``cobra.Reaction``). element_lists : list List of iterables ``cobra.Reaction``s or ``cobra.Gene``s (or their IDs) to be deleted. method: {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate. solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM. processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to the number of CPUs found. kwargs : Passed on to underlying simulation functions. Returns ------- pandas.DataFrame A representation of all combinations of entity deletions. The columns are 'growth' and 'status', where index : tuple(str) The gene or reaction identifiers that were knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ solver = sutil.interface_to_str(model.problem.__name__) if method == "moma" and solver not in sutil.qp_solvers: raise RuntimeError( "Cannot use MOMA since '{}' is not QP-capable." "Please choose a different solver or use FBA only.".format(solver) ) if processes is None: processes = CONFIGURATION.processes with model: if "moma" in method: add_moma(model, solution=solution, linear="linear" in method) elif "room" in method: add_room(model, solution=solution, linear="linear" in method, **kwargs) args = set([frozenset(comb) for comb in product(*element_lists)]) processes = min(processes, len(args)) def extract_knockout_results(result_iter): result = pd.DataFrame( [ ( set(ids), growth, status, ) for (ids, growth, status) in result_iter ], columns=["ids", "growth", "status"], ) return result if processes > 1: worker = dict( gene=_gene_deletion_worker, reaction=_reaction_deletion_worker )[entity] chunk_size = len(args) // processes pool = multiprocessing.Pool( processes, initializer=_init_worker, initargs=(model,) ) results = extract_knockout_results( pool.imap_unordered(worker, args, chunksize=chunk_size) ) pool.close() pool.join() else: worker = dict(gene=_gene_deletion, reaction=_reaction_deletion)[entity] results = extract_knockout_results(map(partial(worker, model), args)) return results
[docs]def _entities_ids(entities): try: return [e.id for e in entities] except AttributeError: return list(entities)
[docs]def _element_lists(entities, *ids): lists = list(ids) if lists[0] is None: lists[0] = entities result = [_entities_ids(lists[0])] for l in lists[1:]: if l is None: result.append(result[-1]) else: result.append(_entities_ids(l)) return result
[docs]def single_reaction_deletion( model, reaction_list=None, method="fba", solution=None, processes=None, **kwargs ): """ Knock out each reaction from a given list. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. reaction_list : iterable, optional ``cobra.Reaction``s to be deleted. If not passed, all the reactions from the model are used. method: {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate. solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM. processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to the number of CPUs found. kwargs : Keyword arguments are passed on to underlying simulation functions such as ``add_room``. Returns ------- pandas.DataFrame A representation of all single reaction deletions. The columns are 'growth' and 'status', where index : tuple(str) The reaction identifier that was knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ return _multi_deletion( model, "reaction", element_lists=_element_lists(model.reactions, reaction_list), method=method, solution=solution, processes=processes, **kwargs
)
[docs]def single_gene_deletion( model, gene_list=None, method="fba", solution=None, processes=None, **kwargs ): """ Knock out each gene from a given list. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. gene_list : iterable ``cobra.Gene``s to be deleted. If not passed, all the genes from the model are used. method: {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate. solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM. processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to the number of CPUs found. kwargs : Keyword arguments are passed on to underlying simulation functions such as ``add_room``. Returns ------- pandas.DataFrame A representation of all single gene deletions. The columns are 'growth' and 'status', where index : tuple(str) The gene identifier that was knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ return _multi_deletion( model, "gene", element_lists=_element_lists(model.genes, gene_list), method=method, solution=solution, processes=processes, **kwargs
)
[docs]def double_reaction_deletion( model, reaction_list1=None, reaction_list2=None, method="fba", solution=None, processes=None, **kwargs ): """ Knock out each reaction pair from the combinations of two given lists. We say 'pair' here but the order order does not matter. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. reaction_list1 : iterable, optional First iterable of ``cobra.Reaction``s to be deleted. If not passed, all the reactions from the model are used. reaction_list2 : iterable, optional Second iterable of ``cobra.Reaction``s to be deleted. If not passed, all the reactions from the model are used. method: {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate. solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM. processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to the number of CPUs found. kwargs : Keyword arguments are passed on to underlying simulation functions such as ``add_room``. Returns ------- pandas.DataFrame A representation of all combinations of reaction deletions. The columns are 'growth' and 'status', where index : tuple(str) The reaction identifiers that were knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ reaction_list1, reaction_list2 = _element_lists( model.reactions, reaction_list1, reaction_list2 ) return _multi_deletion( model, "reaction", element_lists=[reaction_list1, reaction_list2], method=method, solution=solution, processes=processes, **kwargs
)
[docs]def double_gene_deletion( model, gene_list1=None, gene_list2=None, method="fba", solution=None, processes=None, **kwargs ): """ Knock out each gene pair from the combination of two given lists. We say 'pair' here but the order order does not matter. Parameters ---------- model : cobra.Model The metabolic model to perform deletions in. gene_list1 : iterable, optional First iterable of ``cobra.Gene``s to be deleted. If not passed, all the genes from the model are used. gene_list2 : iterable, optional Second iterable of ``cobra.Gene``s to be deleted. If not passed, all the genes from the model are used. method: {"fba", "moma", "linear moma", "room", "linear room"}, optional Method used to predict the growth rate. solution : cobra.Solution, optional A previous solution to use as a reference for (linear) MOMA or ROOM. processes : int, optional The number of parallel processes to run. Can speed up the computations if the number of knockouts to perform is large. If not passed, will be set to the number of CPUs found. kwargs : Keyword arguments are passed on to underlying simulation functions such as ``add_room``. Returns ------- pandas.DataFrame A representation of all combinations of gene deletions. The columns are 'growth' and 'status', where index : tuple(str) The gene identifiers that were knocked out. growth : float The growth rate of the adjusted model. status : str The solution's status. """ gene_list1, gene_list2 = _element_lists(model.genes, gene_list1, gene_list2) return _multi_deletion( model, "gene", element_lists=[gene_list1, gene_list2], method=method, solution=solution, processes=processes, **kwargs
) @pd.api.extensions.register_dataframe_accessor("knockout")
[docs]class KnockoutAccessor: """Access unique combinations of reactions in deletion results. This allows acces in the form of `results.knockout[rxn1]` or `results.knockout["rxn1_id"]`. Each individual entry will return a deletion so `results.knockout[rxn1, rxn2]` will return two deletions (for individual knockouts of rxn1 and rxn2 respectively). Multi-deletions can be accessed by passing in sets like `results.knockou[{rxn1, rxn2}]` which denotes the double deletion of both reactions. Thus, the following are allowed index elements: - single reactions or genes (depending on whether it is a gene or reaction deletion) - single reaction IDs or gene IDs - lists of single single reaction IDs or gene IDs (will return one row for each element in the list) - sets of reactions or genes (for multi-deletions) - sets of reactions IDs or gene IDs - list of sets of objects or IDs (to get several multi-deletions) """ def __init__(self, pandas_obj: pd.DataFrame) -> None: """Set up the accessor. Parameters: ----------- pandas_obj : pd.DataFrame or pd.Series A result from one of the deletion methods. """ self._validate(pandas_obj) self._result = pandas_obj @staticmethod
[docs] def _validate(obj: pd.DataFrame) -> None: # verify it is a deletion results if any(name not in obj.columns for name in ["ids", "growth", "status"]): raise AttributeError("Must be DataFrame returned by a deletion method.")
[docs] def __getitem__( self, args: Union[ Gene, List[Gene], Set[Gene], List[Set[Gene]], Reaction, List[Reaction], Set[Reaction], List[Set[Reaction]], str, List[str], Set[str], List[Set[str]], ], ) -> pd.DataFrame: """Return the deletion result for a particular set of knocked entities. Parameters: ----------- args : cobra.Reactions, cobra.Gene, str, set, or list The deletions to be returned. Accepts: - single reactions or genes - single reaction IDs or gene IDs - lists of single single reaction IDs or gene IDs - sets of reactions or genes - sets of reactions IDs or gene IDs - list of sets of objects or IDs See the docs for usage examples. Returns: -------- pd.DataFrame The deletion result where the chosen entities have been deleted. Each row denotes a deletion. """ if not any(isinstance(args, t) for t in [tuple, list]): args = [args] if any(isinstance(args[0], t) for t in [Reaction, Gene, str]): try: args = [{obj.id} for obj in args] except AttributeError: # are already strings args = [{obj} for obj in args] elif isinstance(args[0], set): try: args = [set(elem.id for elem in obj) for obj in args] except AttributeError: args = [set(obj) for obj in args] else: raise ValueError( "Allowed indices are single Reactions or Genes, " "lists of Reactions of Genes, or lists of sets " "of Reactions or Genes." ) found = [x in args for x in self._result.ids] return self._result[found]