Source code for cobra.summary.model_summary

"""Provide the model summary class."""


import logging
from operator import attrgetter
from typing import TYPE_CHECKING, Dict, List, Optional, Union

import pandas as pd

from cobra.core import Reaction
from cobra.flux_analysis import flux_variability_analysis, pfba
from cobra.summary import Summary
from cobra.util.solver import linear_reaction_coefficients


if TYPE_CHECKING:
    from cobra.core import Metabolite, Model, Solution


[docs]logger = logging.getLogger(__name__)
[docs]class ModelSummary(Summary): """ Define the model summary. Attributes ---------- uptake_flux : pandas.DataFrame A pandas DataFrame of only the uptake fluxes. secretion_flux : pandas.DataFrame A pandas DataFrame of only the consuming fluxes. See Also -------- Summary : Parent that defines further attributes. MetaboliteSummary ReactionSummary """ def __init__( self, *, model: "Model", solution: Optional["Solution"] = None, fva: Optional[Union[float, pd.DataFrame]] = None, **kwargs, ): """ Initialize a model summary. Parameters ---------- model : cobra.Model The metabolic model for which to generate a metabolite summary. solution : cobra.Solution, optional A previous model solution to use for generating the summary. If ``None``, the summary method will generate a parsimonious flux distribution (default None). fva : pandas.DataFrame or float, optional Whether or not to include flux variability analysis in the output. If given, `fva` should either be a previous FVA solution matching the model or a float between 0 and 1 representing the fraction of the optimum objective to be searched (default None). Other Parameters ---------------- kwargs : Further keyword arguments are passed on to the parent class. See Also -------- Summary : Parent that has further default parameters. MetaboliteSummary ReactionSummary """ super().__init__(**kwargs) self._objective = None self._objective_value = None self._boundary = None self._boundary_metabolites = None self.uptake_flux: Optional[pd.DataFrame] = None self.secretion_flux: Optional[pd.DataFrame] = None self._generate(model, solution, fva)
[docs] def _generate( self, model: "Model", solution: Optional["Solution"], fva: Optional[Union[float, pd.DataFrame]], ) -> None: """ Prepare the data for the summary instance. Parameters ---------- model : cobra.Model The metabolic model for which to generate a metabolite summary. solution : cobra.Solution, optional A previous model solution to use for generating the summary. If ``None``, the summary method will generate a parsimonious flux distribution. fva : pandas.DataFrame or float, optional Whether or not to include flux variability analysis in the output. If given, `fva` should either be a previous FVA solution matching the model or a float between 0 and 1 representing the fraction of the optimum objective to be searched. """ super()._generate(model=model, solution=solution, fva=fva) coefficients = linear_reaction_coefficients(model) if solution is None: logger.info("Generating new parsimonious flux distribution.") solution = pfba(model) if isinstance(fva, float): logger.info("Performing flux variability analysis.") fva = flux_variability_analysis( model=model, reaction_list=model.boundary, fraction_of_optimum=fva, ) if coefficients: self._objective: Dict["Reaction", float] = { rxn.copy(): coef for rxn, coef in coefficients.items() } self._objective_value: float = sum( solution[rxn.id] * coef for rxn, coef in self._objective.items() ) else: logger.warning( "Non-linear or non-reaction model objective. Falling back to minimal " "display." ) self._objective = { Reaction(id="Expression", name="Expression"): float("nan") } self._objective_value: float = float("nan") self._boundary: List["Reaction"] = [ rxn.copy() for rxn in sorted(model.boundary, key=attrgetter("id")) ] self._boundary_metabolites: List["Metabolite"] = [ met.copy() for rxn in self._boundary for met in rxn.metabolites ] flux = pd.DataFrame( data=[ (rxn.id, met.id, rxn.get_coefficient(met.id), solution[rxn.id]) for rxn, met in zip(self._boundary, self._boundary_metabolites) ], columns=["reaction", "metabolite", "factor", "flux"], index=[r.id for r in self._boundary], ) # Scale fluxes by stoichiometric coefficient. flux["flux"] *= flux["factor"] if fva is not None: flux = flux.join(fva) view = flux[["flux", "minimum", "maximum"]] # Set fluxes below model tolerance to zero. flux[["flux", "minimum", "maximum"]] = view.where( view.abs() >= model.tolerance, 0 ) # Create the scaled compound flux. flux[["minimum", "maximum"]] = flux[["minimum", "maximum"]].mul( flux["factor"], axis=0 ) # Negative factors invert the minimum/maximum relationship. negative = flux["factor"] < 0 tmp = flux.loc[negative, "maximum"] flux.loc[negative, "maximum"] = flux.loc[negative, "minimum"] flux.loc[negative, "minimum"] = tmp # Add zero to turn negative zero into positive zero for nicer display later. flux[["flux", "minimum", "maximum"]] += 0 else: # Set fluxes below model tolerance to zero. flux.loc[flux["flux"].abs() < model.tolerance, "flux"] = 0 # Add zero to turn negative zero into positive zero for nicer display later. flux["flux"] += 0 # Create production table from producing fluxes or zero fluxes where the # metabolite is a product in the reaction. is_produced = (flux["flux"] > 0) | ((flux["flux"] == 0) & (flux["factor"] > 0)) if fva is not None: self.uptake_flux = flux.loc[ is_produced, ["flux", "minimum", "maximum", "reaction", "metabolite"] ].copy() else: self.uptake_flux = flux.loc[ is_produced, ["flux", "reaction", "metabolite"] ].copy() # Create consumption table from consuming fluxes or zero fluxes where the # metabolite is a substrate in the reaction. is_consumed = (flux["flux"] < 0) | ((flux["flux"] == 0) & (flux["factor"] < 0)) if fva is not None: self.secretion_flux = flux.loc[ is_consumed, ["flux", "minimum", "maximum", "reaction", "metabolite"] ].copy() else: self.secretion_flux = flux.loc[ is_consumed, ["flux", "reaction", "metabolite"] ].copy() self._flux = flux
[docs] def _display_flux( self, frame: pd.DataFrame, names: bool, element: str, threshold: float ) -> pd.DataFrame: """ Transform a flux data frame for display. Parameters ---------- frame : pandas.DataFrame Either the producing or the consuming fluxes. names : bool Whether or not elements should be displayed by their common names. element : str The atomic element to summarize fluxes for. threshold : float Hide fluxes below the threshold from being displayed. Returns ------- pandas.DataFrame The transformed pandas DataFrame with flux percentages and reaction definitions. """ if "minimum" in frame.columns and "maximum" in frame.columns: frame = frame.loc[ (frame["flux"].abs() >= threshold) | (frame["minimum"].abs() >= threshold) | (frame["maximum"].abs() >= threshold), :, ].copy() else: frame = frame.loc[frame["flux"].abs() >= threshold, :].copy() metabolites = {m.id: m for m in self._boundary_metabolites} element_num = f"{element}-Number" frame[element_num] = [ metabolites[met_id].elements.get(element, 0) for met_id in frame["metabolite"] ] element_percent = f"{element}-Flux" frame[element_percent] = frame[element_num] * frame["flux"].abs() total = frame[element_percent].sum() if total > 0.0: frame[element_percent] /= total frame[element_percent] = [f"{x:.2%}" for x in frame[element_percent]] if names: frame["metabolite"] = [ metabolites[met_id].name for met_id in frame["metabolite"] ] if "minimum" in frame.columns and "maximum" in frame.columns: frame["range"] = list( frame[["minimum", "maximum"]].itertuples(index=False, name=None) ) return frame[ [ "metabolite", "reaction", "flux", "range", element_num, element_percent, ] ] else: return frame[ ["metabolite", "reaction", "flux", element_num, element_percent]
] @staticmethod
[docs] def _string_table(frame: pd.DataFrame, float_format: str, column_width: int) -> str: """ Create a pretty string representation of the data frame. Parameters ---------- frame : pandas.DataFrame A pandas DataFrame of fluxes. float_format : str Format string for floats. column_width : int The maximum column width for each row. Returns ------- str The data frame formatted as a pretty string. """ frame.columns = [header.title() for header in frame.columns] return frame.to_string( header=True, index=False, na_rep="", formatters={ "Flux": f"{{:{float_format}}}".format, "Range": lambda pair: f"[{pair[0]:{float_format}}; " f"{pair[1]:{float_format}}]", }, max_colwidth=column_width,
) @staticmethod
[docs] def _html_table(frame: pd.DataFrame, float_format: str) -> str: """ Create an HTML representation of the data frame. Parameters ---------- frame : pandas.DataFrame A pandas DataFrame of fluxes. float_format : str Format string for floats. Returns ------- str The data frame formatted as HTML. """ frame.columns = [header.title() for header in frame.columns] return frame.to_html( header=True, index=False, na_rep="", formatters={ "Flux": f"{{:{float_format}}}".format, "Range": lambda pair: f"[{pair[0]:{float_format}}; "
f" {pair[1]:{float_format}}]", }, )
[docs] def _string_objective(self, names: bool) -> str: """ Return a string representation of the objective. Parameters ---------- names : bool, optional Whether or not elements should be displayed by their common names. Returns ------- str The objective expression and value as a string. """ if names: objective = " + ".join( [f"{coef} {rxn.name}" for rxn, coef in self._objective.items()] ) else: objective = " + ".join( [f"{coef} {rxn.id}" for rxn, coef in self._objective.items()] ) return f"{objective} = {self._objective_value}"
[docs] def to_string( self, names: bool = False, element: str = "C", threshold: Optional[float] = None, float_format: str = ".4G", column_width: int = 79, ) -> str: """ Return a pretty string representation of the model summary. Parameters ---------- names : bool, optional Whether or not elements should be displayed by their common names (default False). element : str, optional The atomic element to summarize uptake and secretion for (default 'C'). threshold : float, optional Hide fluxes below the threshold from being displayed. If no value is given, the model tolerance is used (default None). float_format : str, optional Format string for floats (default '.4G'). column_width : int, optional The maximum column width for each row (default 79). Returns ------- str The summary formatted as a pretty string. """ threshold = self._normalize_threshold(threshold) objective = self._string_objective(names) uptake = self._string_table( self._display_flux(self.uptake_flux, names, element, threshold), float_format, column_width, ) secretion = self._string_table( self._display_flux(self.secretion_flux, names, element, threshold), float_format, column_width, ) return ( f"Objective\n"
f"=========\n" f"{objective}\n\n" f"Uptake\n" f"------\n" f"{uptake}\n\n" f"Secretion\n" f"---------\n" f"{secretion}\n" )
[docs] def to_html( self, names: bool = False, element: str = "C", threshold: Optional[float] = None, float_format: str = ".4G", ) -> str: """ Return a rich HTML representation of the model summary. Parameters ---------- names : bool, optional Whether or not elements should be displayed by their common names (default False). element : str, optional The atomic element to summarize uptake and secretion for (default 'C'). threshold : float, optional Hide fluxes below the threshold from being displayed. If no value is given, the model tolerance is used (default None). float_format : str, optional Format string for floats (default '.4G'). Returns ------- str The summary formatted as HTML. """ threshold = self._normalize_threshold(threshold) objective = self._string_objective(names) uptake = self._html_table( self._display_flux(self.uptake_flux, names, element, threshold), float_format, ) secretion = self._html_table( self._display_flux(self.secretion_flux, names, element, threshold), float_format, ) return ( f"<h3>Objective</h3>"
f"<p>{objective}</p>" f"<h4>Uptake</h4>" f"{uptake}" f"<h4>Secretion</h4>" f"{secretion}" )