Source code for cobra.summary.metabolite_summary

"""Provide the metabolite summary class."""


import logging
from operator import attrgetter
from textwrap import shorten
from typing import TYPE_CHECKING, List, Optional, Union

import pandas as pd

from cobra.flux_analysis import flux_variability_analysis, pfba
from cobra.summary import Summary


if TYPE_CHECKING:
    from cobra.core import Metabolite, Model, Reaction, Solution


[docs]logger = logging.getLogger(__name__)


[docs]class MetaboliteSummary(Summary):
    """
    Define the metabolite summary.

    Attributes
    ----------
    producing_flux : pandas.DataFrame
        A pandas DataFrame of only the producing fluxes.
    consuming_flux : pandas.DataFrame
        A pandas DataFrame of only the consuming fluxes.

    See Also
    --------
    Summary : Parent that defines further attributes.
    ReactionSummary
    ModelSummary

    """

    def __init__(
        self,
        *,
        metabolite: "Metabolite",
        model: "Model",
        solution: Optional["Solution"] = None,
        fva: Optional[Union[float, pd.DataFrame]] = None,
        **kwargs,
    ) -> None:
        """
        Initialize a metabolite summary.

        Parameters
        ----------
        metabolite : cobra.Metabolite
            The metabolite object whose summary we intend to get.
        model : cobra.Model
            The metabolic model for which to generate a metabolite summary.
        solution : cobra.Solution, optional
            A previous model solution to use for generating the summary. If
            ``None``, the summary method will generate a parsimonious flux
            distribution (default None).
        fva : pandas.DataFrame or float, optional
            Whether or not to include flux variability analysis in the output.
            If given, `fva` should either be a previous FVA solution matching the
            model or a float between 0 and 1 representing the fraction of the
            optimum objective to be searched (default None).

        Other Parameters
        ----------------
        kwargs :
            Further keyword arguments are passed on to the parent class.

        See Also
        --------
        Summary : Parent that has further default parameters.
        ReactionSummary
        ModelSummary

        """
        super().__init__(**kwargs)
        self._metabolite = metabolite.copy()
        self._reactions: List["Reaction"] = [
            r.copy() for r in sorted(metabolite.reactions, key=attrgetter("id"))
        ]
        self.producing_flux: Optional[pd.DataFrame] = None
        self.consuming_flux: Optional[pd.DataFrame] = None
        self._generate(model, solution, fva)

[docs]    def _generate(
        self,
        model: "Model",
        solution: Optional["Solution"],
        fva: Optional[Union[float, pd.DataFrame]],
    ) -> None:
        """
        Prepare the data for the summary instance.

        Parameters
        ----------
        model : cobra.Model
            The metabolic model for which to generate a metabolite summary.
        solution : cobra.Solution, optional
            A previous model solution to use for generating the summary. If
            ``None``, the summary method will generate a parsimonious flux
            distribution.
        fva : pandas.DataFrame or float, optional
            Whether or not to include flux variability analysis in the output.
            If given, `fva` should either be a previous FVA solution matching the
            model or a float between 0 and 1 representing the fraction of the
            optimum objective to be searched.

        """
        super()._generate(model=model, solution=solution, fva=fva)

        if solution is None:
            logger.info("Generating new parsimonious flux distribution.")
            solution = pfba(model)

        if isinstance(fva, float):
            logger.info("Performing flux variability analysis.")
            fva = flux_variability_analysis(
                model=model,
                reaction_list=[r.id for r in self._reactions],
                fraction_of_optimum=fva,
            )

        # Create the basic flux table.
        flux = pd.DataFrame(
            data=[
                (r.id, solution[r.id], r.get_coefficient(self._metabolite.id),)
                for r in self._reactions
            ],
            columns=["reaction", "flux", "factor"],
            index=[r.id for r in self._reactions],
        )
        # Scale fluxes by stoichiometric coefficient.
        flux["flux"] *= flux["factor"]

        if fva is not None:
            flux = flux.join(fva)
            view = flux[["flux", "minimum", "maximum"]]
            # Set fluxes below model tolerance to zero.
            flux[["flux", "minimum", "maximum"]] = view.where(
                view.abs() >= model.tolerance, 0
            )
            # Create the scaled compound flux.
            flux[["minimum", "maximum"]] = flux[["minimum", "maximum"]].mul(
                flux["factor"], axis=0
            )
            # Negative factors invert the minimum/maximum relationship.
            negative = flux["factor"] < 0
            tmp = flux.loc[negative, "maximum"]
            flux.loc[negative, "maximum"] = flux.loc[negative, "minimum"]
            flux.loc[negative, "minimum"] = tmp
            # Add zero to turn negative zero into positive zero for nicer display later.
            flux[["flux", "minimum", "maximum"]] += 0
        else:
            # Set fluxes below model tolerance to zero.
            flux.loc[flux["flux"].abs() < model.tolerance, "flux"] = 0
            # Add zero to turn negative zero into positive zero for nicer display later.
            flux["flux"] += 0

        # Create production table from producing fluxes or zero fluxes where the
        # metabolite is a product in the reaction.
        is_produced = (flux["flux"] > 0) | ((flux["flux"] == 0) & (flux["factor"] > 0))
        if fva is not None:
            self.producing_flux = flux.loc[
                is_produced, ["flux", "minimum", "maximum", "reaction"]
            ].copy()
        else:
            self.producing_flux = flux.loc[is_produced, ["flux", "reaction"]].copy()
        production = self.producing_flux["flux"].abs()
        self.producing_flux["percent"] = production / production.sum()

        # Create consumption table from consuming fluxes or zero fluxes where the
        # metabolite is a substrate in the reaction.
        is_consumed = (flux["flux"] < 0) | ((flux["flux"] == 0) & (flux["factor"] < 0))
        if fva is not None:
            self.consuming_flux = flux.loc[
                is_consumed, ["flux", "minimum", "maximum", "reaction"]
            ].copy()
        else:
            self.consuming_flux = flux.loc[is_consumed, ["flux", "reaction"]].copy()
        consumption = self.consuming_flux["flux"].abs()
        self.consuming_flux["percent"] = consumption / consumption.sum()

        self._flux = flux

[docs]    def _display_flux(
        self, frame: pd.DataFrame, names: bool, threshold: float
    ) -> pd.DataFrame:
        """
        Transform a flux data frame for display.

        Parameters
        ----------
        frame : pandas.DataFrame
            Either the producing or the consuming fluxes.
        names : bool
            Whether or not elements should be displayed by their common names.
        threshold : float
            Hide fluxes below the threshold from being displayed.

        Returns
        -------
        pandas.DataFrame
            The transformed pandas DataFrame with flux percentages and reaction
            definitions.

        """
        if "minimum" in frame.columns and "maximum" in frame.columns:
            frame = frame.loc[
                (frame["flux"].abs() >= threshold)
                | (frame["minimum"].abs() >= threshold)
                | (frame["maximum"].abs() >= threshold),
                :,
            ].copy()
        else:
            frame = frame.loc[frame["flux"].abs() >= threshold, :].copy()
        reactions = {r.id: r for r in self._reactions}
        frame["definition"] = [
            reactions[rxn_id].build_reaction_string(names)
            for rxn_id in frame["reaction"]
        ]
        if "minimum" in frame.columns and "maximum" in frame.columns:
            frame["range"] = list(
                frame[["minimum", "maximum"]].itertuples(index=False, name=None)
            )
            return frame[["percent", "flux", "range", "reaction", "definition"]]
        else:
            return frame[["percent", "flux", "reaction", "definition"]]

    @staticmethod
[docs]    def _string_table(frame: pd.DataFrame, float_format: str, column_width: int) -> str:
        """
        Create a pretty string representation of the data frame.

        Parameters
        ----------
        frame : pandas.DataFrame
            A pandas DataFrame of fluxes.
        float_format : str
            Format string for floats.
        column_width : int
            The maximum column width for each row.

        Returns
        -------
        str
            The data frame formatted as a pretty string.

        """
        frame.columns = [header.title() for header in frame.columns]
        return frame.to_string(
            header=True,
            index=False,
            na_rep="",
            formatters={
                "Percent": "{:.2%}".format,
                "Flux": f"{{:{float_format}}}".format,
                "Range": lambda pair: f"[{pair[0]:{float_format}}; "
                f"{pair[1]:{float_format}}]",
            },
            max_colwidth=column_width,
        )

    @staticmethod
[docs]    def _html_table(frame: pd.DataFrame, float_format: str) -> str:
        """
        Create an HTML representation of the data frame.

        Parameters
        ----------
        frame : pandas.DataFrame
            A pandas DataFrame of fluxes.
        float_format : str
            Format string for floats.

        Returns
        -------
        str
            The data frame formatted as HTML.

        """
        frame.columns = [header.title() for header in frame.columns]
        return frame.to_html(
            header=True,
            index=False,
            na_rep="",
            formatters={
                "Percent": "{:.2%}".format,
                "Flux": f"{{:{float_format}}}".format,
                "Range": lambda pair: f"[{pair[0]:{float_format}}; "
                f" {pair[1]:{float_format}}]",
            },
        )

[docs]    def to_string(
        self,
        names: bool = False,
        threshold: Optional[float] = None,
        float_format: str = ".4G",
        column_width: int = 79,
    ) -> str:
        """
        Return a pretty string representation of the metabolite summary.

        Parameters
        ----------
        names : bool, optional
            Whether or not elements should be displayed by their common names
            (default False).
        threshold : float, optional
            Hide fluxes below the threshold from being displayed. If no value is
            given, the model tolerance is used (default None).
        float_format : str, optional
            Format string for floats (default '.4G').
        column_width : int, optional
            The maximum column width for each row (default 79).

        Returns
        -------
        str
            The summary formatted as a pretty string.

        """
        threshold = self._normalize_threshold(threshold)

        if names:
            metabolite = shorten(
                self._metabolite.name, width=column_width, placeholder="..."
            )
        else:
            metabolite = shorten(
                self._metabolite.id, width=column_width, placeholder="..."
            )

        production = self._string_table(
            self._display_flux(self.producing_flux, names, threshold),
            float_format,
            column_width,
        )

        consumption = self._string_table(
            self._display_flux(self.consuming_flux, names, threshold),
            float_format,
            column_width,
        )

        return (
            f"{metabolite}\n"
            f"{'=' * len(metabolite)}\n"
            f"Formula: {self._metabolite.formula}\n\n"
            f"Producing Reactions\n"
            f"-------------------\n"
            f"{production}\n\n"
            f"Consuming Reactions\n"
            f"-------------------\n"
            f"{consumption}"
        )

[docs]    def to_html(
        self,
        names: bool = False,
        threshold: Optional[float] = None,
        float_format: str = ".4G",
    ) -> str:
        """
        Return a rich HTML representation of the metabolite summary.

        Parameters
        ----------
        names : bool, optional
            Whether or not elements should be displayed by their common names
            (default False).
        threshold : float, optional
            Hide fluxes below the threshold from being displayed. If no value is
            given, the model tolerance is used (default None).
        float_format : str, optional
            Format string for floats (default '.4G').

        Returns
        -------
        str
            The summary formatted as HTML.

        """
        threshold = self._normalize_threshold(threshold)

        if names:
            metabolite = self._metabolite.name
        else:
            metabolite = self._metabolite.id

        production = self._html_table(
            self._display_flux(self.producing_flux, names, threshold), float_format,
        )

        consumption = self._html_table(
            self._display_flux(self.consuming_flux, names, threshold), float_format,
        )

        return (
            f"<h3>{metabolite}</h3>"
            f"<p>{self._metabolite.formula}</p>"
            f"<h4>Producing Reactions</h4>"
            f"{production}"
            f"<h4>Consuming Reactions</h4>"
            f"{consumption}"
        )