Source code for cobra.summary.model_summary

"""Provide the model summary class."""


import logging
from operator import attrgetter
from typing import TYPE_CHECKING, Dict, List, Optional, Union

import pandas as pd

from cobra.core import Reaction
from cobra.flux_analysis import flux_variability_analysis, pfba
from cobra.summary import Summary
from cobra.util.solver import linear_reaction_coefficients


if TYPE_CHECKING:
    from cobra.core import Metabolite, Model, Solution


[docs]logger = logging.getLogger(__name__)


[docs]class ModelSummary(Summary):
    """
    Define the model summary.

    Attributes
    ----------
    uptake_flux : pandas.DataFrame
        A pandas DataFrame of only the uptake fluxes.
    secretion_flux : pandas.DataFrame
        A pandas DataFrame of only the consuming fluxes.
    See Also
    --------
    Summary : Parent that defines further attributes.
    MetaboliteSummary
    ReactionSummary

    """

    def __init__(
        self,
        *,
        model: "Model",
        solution: Optional["Solution"] = None,
        fva: Optional[Union[float, pd.DataFrame]] = None,
        **kwargs,
    ):
        """
        Initialize a model summary.

        Parameters
        ----------
        model : cobra.Model
            The metabolic model for which to generate a metabolite summary.
        solution : cobra.Solution, optional
            A previous model solution to use for generating the summary. If
            ``None``, the summary method will generate a parsimonious flux
            distribution (default None).
        fva : pandas.DataFrame or float, optional
            Whether or not to include flux variability analysis in the output.
            If given, `fva` should either be a previous FVA solution matching the
            model or a float between 0 and 1 representing the fraction of the
            optimum objective to be searched (default None).

        Other Parameters
        ----------------
        kwargs :
            Further keyword arguments are passed on to the parent class.

        See Also
        --------
        Summary : Parent that has further default parameters.
        MetaboliteSummary
        ReactionSummary

        """
        super().__init__(**kwargs)
        self._objective = None
        self._objective_value = None
        self._boundary = None
        self._boundary_metabolites = None
        self.uptake_flux: Optional[pd.DataFrame] = None
        self.secretion_flux: Optional[pd.DataFrame] = None
        self._generate(model, solution, fva)

[docs]    def _generate(
        self,
        model: "Model",
        solution: Optional["Solution"],
        fva: Optional[Union[float, pd.DataFrame]],
    ) -> None:
        """
        Prepare the data for the summary instance.

        Parameters
        ----------
        model : cobra.Model
            The metabolic model for which to generate a metabolite summary.
        solution : cobra.Solution, optional
            A previous model solution to use for generating the summary. If
            ``None``, the summary method will generate a parsimonious flux
            distribution.
        fva : pandas.DataFrame or float, optional
            Whether or not to include flux variability analysis in the output.
            If given, `fva` should either be a previous FVA solution matching the
            model or a float between 0 and 1 representing the fraction of the
            optimum objective to be searched.

        """
        super()._generate(model=model, solution=solution, fva=fva)

        coefficients = linear_reaction_coefficients(model)
        if solution is None:
            logger.info("Generating new parsimonious flux distribution.")
            solution = pfba(model)

        if isinstance(fva, float):
            logger.info("Performing flux variability analysis.")
            fva = flux_variability_analysis(
                model=model,
                reaction_list=model.boundary,
                fraction_of_optimum=fva,
            )
        if coefficients:
            self._objective: Dict["Reaction", float] = {
                rxn.copy(): coef for rxn, coef in coefficients.items()
            }
            self._objective_value: float = sum(
                solution[rxn.id] * coef for rxn, coef in self._objective.items()
            )
        else:
            logger.warning(
                "Non-linear or non-reaction model objective. Falling back to minimal "
                "display."
            )
            self._objective = {
                Reaction(id="Expression", name="Expression"): float("nan")
            }
            self._objective_value: float = float("nan")
        self._boundary: List["Reaction"] = [
            rxn.copy() for rxn in sorted(model.boundary, key=attrgetter("id"))
        ]
        self._boundary_metabolites: List["Metabolite"] = [
            met.copy() for rxn in self._boundary for met in rxn.metabolites
        ]
        flux = pd.DataFrame(
            data=[
                (rxn.id, met.id, rxn.get_coefficient(met.id), solution[rxn.id])
                for rxn, met in zip(self._boundary, self._boundary_metabolites)
            ],
            columns=["reaction", "metabolite", "factor", "flux"],
            index=[r.id for r in self._boundary],
        )
        # Scale fluxes by stoichiometric coefficient.
        flux["flux"] *= flux["factor"]

        if fva is not None:
            flux = flux.join(fva)
            view = flux[["flux", "minimum", "maximum"]]
            # Set fluxes below model tolerance to zero.
            flux[["flux", "minimum", "maximum"]] = view.where(
                view.abs() >= model.tolerance, 0
            )
            # Create the scaled compound flux.
            flux[["minimum", "maximum"]] = flux[["minimum", "maximum"]].mul(
                flux["factor"], axis=0
            )
            # Negative factors invert the minimum/maximum relationship.
            negative = flux["factor"] < 0
            tmp = flux.loc[negative, "maximum"]
            flux.loc[negative, "maximum"] = flux.loc[negative, "minimum"]
            flux.loc[negative, "minimum"] = tmp
            # Add zero to turn negative zero into positive zero for nicer display later.
            flux[["flux", "minimum", "maximum"]] += 0
        else:
            # Set fluxes below model tolerance to zero.
            flux.loc[flux["flux"].abs() < model.tolerance, "flux"] = 0
            # Add zero to turn negative zero into positive zero for nicer display later.
            flux["flux"] += 0

        # Create production table from producing fluxes or zero fluxes where the
        # metabolite is a product in the reaction.
        is_produced = (flux["flux"] > 0) | ((flux["flux"] == 0) & (flux["factor"] > 0))
        if fva is not None:
            self.uptake_flux = flux.loc[
                is_produced, ["flux", "minimum", "maximum", "reaction", "metabolite"]
            ].copy()
        else:
            self.uptake_flux = flux.loc[
                is_produced, ["flux", "reaction", "metabolite"]
            ].copy()

        # Create consumption table from consuming fluxes or zero fluxes where the
        # metabolite is a substrate in the reaction.
        is_consumed = (flux["flux"] < 0) | ((flux["flux"] == 0) & (flux["factor"] < 0))
        if fva is not None:
            self.secretion_flux = flux.loc[
                is_consumed, ["flux", "minimum", "maximum", "reaction", "metabolite"]
            ].copy()
        else:
            self.secretion_flux = flux.loc[
                is_consumed, ["flux", "reaction", "metabolite"]
            ].copy()

        self._flux = flux

[docs]    def _display_flux(
        self, frame: pd.DataFrame, names: bool, element: str, threshold: float
    ) -> pd.DataFrame:
        """
        Transform a flux data frame for display.

        Parameters
        ----------
        frame : pandas.DataFrame
            Either the producing or the consuming fluxes.
        names : bool
            Whether or not elements should be displayed by their common names.
        element : str
            The atomic element to summarize fluxes for.
        threshold : float
            Hide fluxes below the threshold from being displayed.

        Returns
        -------
        pandas.DataFrame
            The transformed pandas DataFrame with flux percentages and reaction
            definitions.

        """
        if "minimum" in frame.columns and "maximum" in frame.columns:
            frame = frame.loc[
                (frame["flux"].abs() >= threshold)
                | (frame["minimum"].abs() >= threshold)
                | (frame["maximum"].abs() >= threshold),
                :,
            ].copy()
        else:
            frame = frame.loc[frame["flux"].abs() >= threshold, :].copy()

        metabolites = {m.id: m for m in self._boundary_metabolites}

        element_num = f"{element}-Number"
        frame[element_num] = [
            metabolites[met_id].elements.get(element, 0)
            for met_id in frame["metabolite"]
        ]
        element_percent = f"{element}-Flux"
        frame[element_percent] = frame[element_num] * frame["flux"].abs()
        total = frame[element_percent].sum()
        if total > 0.0:
            frame[element_percent] /= total
        frame[element_percent] = [f"{x:.2%}" for x in frame[element_percent]]

        if names:
            frame["metabolite"] = [
                metabolites[met_id].name for met_id in frame["metabolite"]
            ]

        if "minimum" in frame.columns and "maximum" in frame.columns:
            frame["range"] = list(
                frame[["minimum", "maximum"]].itertuples(index=False, name=None)
            )
            return frame[
                [
                    "metabolite",
                    "reaction",
                    "flux",
                    "range",
                    element_num,
                    element_percent,
                ]
            ]
        else:
            return frame[
                ["metabolite", "reaction", "flux", element_num, element_percent]
            ]

    @staticmethod
[docs]    def _string_table(frame: pd.DataFrame, float_format: str, column_width: int) -> str:
        """
        Create a pretty string representation of the data frame.

        Parameters
        ----------
        frame : pandas.DataFrame
            A pandas DataFrame of fluxes.
        float_format : str
            Format string for floats.
        column_width : int
            The maximum column width for each row.

        Returns
        -------
        str
            The data frame formatted as a pretty string.

        """
        frame.columns = [header.title() for header in frame.columns]
        return frame.to_string(
            header=True,
            index=False,
            na_rep="",
            formatters={
                "Flux": f"{{:{float_format}}}".format,
                "Range": lambda pair: f"[{pair[0]:{float_format}}; "
                f"{pair[1]:{float_format}}]",
            },
            max_colwidth=column_width,
        )

    @staticmethod
[docs]    def _html_table(frame: pd.DataFrame, float_format: str) -> str:
        """
        Create an HTML representation of the data frame.

        Parameters
        ----------
        frame : pandas.DataFrame
            A pandas DataFrame of fluxes.
        float_format : str
            Format string for floats.

        Returns
        -------
        str
            The data frame formatted as HTML.

        """
        frame.columns = [header.title() for header in frame.columns]
        return frame.to_html(
            header=True,
            index=False,
            na_rep="",
            formatters={
                "Flux": f"{{:{float_format}}}".format,
                "Range": lambda pair: f"[{pair[0]:{float_format}}; "
                f" {pair[1]:{float_format}}]",
            },
        )

[docs]    def _string_objective(self, names: bool) -> str:
        """
        Return a string representation of the objective.

        Parameters
        ----------
        names : bool, optional
            Whether or not elements should be displayed by their common names.

        Returns
        -------
        str
            The objective expression and value as a string.

        """
        if names:
            objective = " + ".join(
                [f"{coef} {rxn.name}" for rxn, coef in self._objective.items()]
            )
        else:
            objective = " + ".join(
                [f"{coef} {rxn.id}" for rxn, coef in self._objective.items()]
            )
        return f"{objective} = {self._objective_value}"

[docs]    def to_string(
        self,
        names: bool = False,
        element: str = "C",
        threshold: Optional[float] = None,
        float_format: str = ".4G",
        column_width: int = 79,
    ) -> str:
        """
        Return a pretty string representation of the model summary.

        Parameters
        ----------
        names : bool, optional
            Whether or not elements should be displayed by their common names
            (default False).
        element : str, optional
            The atomic element to summarize uptake and secretion for (default 'C').
        threshold : float, optional
            Hide fluxes below the threshold from being displayed. If no value is
            given, the model tolerance is used (default None).
        float_format : str, optional
            Format string for floats (default '.4G').
        column_width : int, optional
            The maximum column width for each row (default 79).

        Returns
        -------
        str
            The summary formatted as a pretty string.

        """
        threshold = self._normalize_threshold(threshold)

        objective = self._string_objective(names)

        uptake = self._string_table(
            self._display_flux(self.uptake_flux, names, element, threshold),
            float_format,
            column_width,
        )

        secretion = self._string_table(
            self._display_flux(self.secretion_flux, names, element, threshold),
            float_format,
            column_width,
        )

        return (
            f"Objective\n"
            f"=========\n"
            f"{objective}\n\n"
            f"Uptake\n"
            f"------\n"
            f"{uptake}\n\n"
            f"Secretion\n"
            f"---------\n"
            f"{secretion}\n"
        )

[docs]    def to_html(
        self,
        names: bool = False,
        element: str = "C",
        threshold: Optional[float] = None,
        float_format: str = ".4G",
    ) -> str:
        """
        Return a rich HTML representation of the model summary.

        Parameters
        ----------
        names : bool, optional
            Whether or not elements should be displayed by their common names
            (default False).
        element : str, optional
            The atomic element to summarize uptake and secretion for (default 'C').
        threshold : float, optional
            Hide fluxes below the threshold from being displayed. If no value is
            given, the model tolerance is used (default None).
        float_format : str, optional
            Format string for floats (default '.4G').

        Returns
        -------
        str
            The summary formatted as HTML.

        """
        threshold = self._normalize_threshold(threshold)

        objective = self._string_objective(names)

        uptake = self._html_table(
            self._display_flux(self.uptake_flux, names, element, threshold),
            float_format,
        )

        secretion = self._html_table(
            self._display_flux(self.secretion_flux, names, element, threshold),
            float_format,
        )

        return (
            f"<h3>Objective</h3>"
            f"<p>{objective}</p>"
            f"<h4>Uptake</h4>"
            f"{uptake}"
            f"<h4>Secretion</h4>"
            f"{secretion}"
        )