Source code for cobra.flux_analysis.summary

from six.moves import zip_longest
from six import print_, iteritems

import pandas as pd
from tabulate import tabulate

from .variability import flux_variability_analysis


[docs]def format_long_string(string, max_length): if len(string) > max_length: string = string[:max_length - 3] string += '...' return string
[docs]def metabolite_summary(met, threshold=0.01, fva=False, floatfmt='.3g', **solver_args): """Print a summary of the reactions which produce and consume this metabolite threshold: float a value below which to ignore reaction fluxes fva: float (0->1), or None Whether or not to include flux variability analysis in the output. If given, fva should be a float between 0 and 1, representing the fraction of the optimum objective to be searched. floatfmt: string format method for floats, passed to tabulate. Default is '.3g'. """ def rxn_summary(r): out = { 'id': format_long_string(r.id, 10), 'flux': r.x * r.metabolites[met], 'reaction': format_long_string(r.reaction, 40 if fva else 50), } if rxn_summary.fva_results is not False: fmax = rxn_summary.fva_results.loc[r.id, 'maximum'] fmin = rxn_summary.fva_results.loc[r.id, 'minimum'] imax = r.metabolites[met] * fmax imin = r.metabolites[met] * fmin # Correct 'max' and 'min' for negative values out.update({ 'fmin': imin if abs(imin) <= abs(imax) else imax, 'fmax': imax if abs(imin) <= abs(imax) else imin, }) return out if fva: rxn_summary.fva_results = pd.DataFrame(flux_variability_analysis( met.model, met.reactions, fraction_of_optimum=fva, **solver_args)).T else: rxn_summary.fva_results = False flux_summary = pd.DataFrame((rxn_summary(r) for r in met.reactions)) assert flux_summary.flux.sum() < 1E-6, "Error in flux balance" flux_summary = _process_flux_dataframe(flux_summary, fva, threshold, floatfmt) flux_summary['percent'] = 0 total_flux = flux_summary[flux_summary.is_input].flux.sum() flux_summary.loc[flux_summary.is_input, 'percent'] = \ flux_summary.loc[flux_summary.is_input, 'flux'] / total_flux flux_summary.loc[~flux_summary.is_input, 'percent'] = \ flux_summary.loc[~flux_summary.is_input, 'flux'] / total_flux flux_summary['percent'] = flux_summary.percent.apply( lambda x: '{:.0%}'.format(x)) if fva: flux_table = tabulate( flux_summary.loc[:, ['percent', 'flux', 'fva_fmt', 'id', 'reaction']].values, floatfmt=floatfmt, headers=['%', 'FLUX', 'RANGE', 'RXN ID', 'REACTION']).split('\n') else: flux_table = tabulate( flux_summary.loc[:, ['percent', 'flux', 'id', 'reaction']].values, floatfmt=floatfmt, headers=['%', 'FLUX', 'RXN ID', 'REACTION'] ).split('\n') flux_table_head = flux_table[:2] met_tag = "{0} ({1})".format(format_long_string(met.name, 45), format_long_string(met.id, 10)) head = "PRODUCING REACTIONS -- " + met_tag print_(head) print_("-" * len(head)) print_('\n'.join(flux_table_head)) print_('\n'.join( pd.np.array(flux_table[2:])[flux_summary.is_input.values])) print_() print_("CONSUMING REACTIONS -- " + met_tag) print_("-" * len(head)) print_('\n'.join(flux_table_head)) print_('\n'.join( pd.np.array(flux_table[2:])[~flux_summary.is_input.values]))
[docs]def model_summary(model, threshold=1E-8, fva=None, floatfmt='.3g', **solver_args): """Print a summary of the input and output fluxes of the model. threshold: float tolerance for determining if a flux is zero (not printed) fva: int or None Whether or not to calculate and report flux variability in the output summary floatfmt: string format method for floats, passed to tabulate. Default is '.3g'. """ # Create a dataframe of objective fluxes obj_fluxes = pd.DataFrame({key: key.x * value for key, value in iteritems(model.objective)}, index=['flux']).T obj_fluxes['id'] = obj_fluxes.apply( lambda x: format_long_string(x.name.id, 15), 1) # Build a dictionary of metabolite production from the boundary reactions boundary_reactions = model.reactions.query(lambda x: x, 'boundary') # Calculate FVA results if requested if fva: fva_results = pd.DataFrame( flux_variability_analysis(model, reaction_list=boundary_reactions, fraction_of_optimum=fva, **solver_args)).T metabolite_fluxes = {} for rxn in boundary_reactions: for met, stoich in iteritems(rxn.metabolites): metabolite_fluxes[met] = { 'id': format_long_string(met.id, 15), 'flux': stoich * rxn.x} if fva: imin = stoich * fva_results.loc[rxn.id]['minimum'] imax = stoich * fva_results.loc[rxn.id]['maximum'] # Correct 'max' and 'min' for negative values metabolite_fluxes[met].update({ 'fmin': imin if abs(imin) <= abs(imax) else imax, 'fmax': imax if abs(imin) <= abs(imax) else imin, }) # Generate a dataframe of boundary fluxes metabolite_fluxes = pd.DataFrame(metabolite_fluxes).T metabolite_fluxes = _process_flux_dataframe( metabolite_fluxes, fva, threshold, floatfmt) # Begin building string output table def get_str_table(species_df, fva=False): """Formats a string table for each column""" if not fva: return tabulate(species_df.loc[:, ['id', 'flux']].values, floatfmt=floatfmt, tablefmt='plain').split('\n') else: return tabulate( species_df.loc[:, ['id', 'flux', 'fva_fmt']].values, floatfmt=floatfmt, tablefmt='simple', headers=['id', 'Flux', 'Range']).split('\n') in_table = get_str_table( metabolite_fluxes[metabolite_fluxes.is_input], fva=fva) out_table = get_str_table( metabolite_fluxes[~metabolite_fluxes.is_input], fva=fva) obj_table = get_str_table(obj_fluxes, fva=False) # Print nested output table print_(tabulate( [entries for entries in zip_longest(in_table, out_table, obj_table)], headers=['IN FLUXES', 'OUT FLUXES', 'OBJECTIVES'], tablefmt='simple'))
def _process_flux_dataframe(flux_dataframe, fva, threshold, floatfmt): """Some common methods for processing a database of flux information into print-ready formats. Used in both model_summary and metabolite_summary. """ # Drop unused boundary fluxes if not fva: flux_dataframe = flux_dataframe[ flux_dataframe.flux.abs() > threshold].copy() else: flux_dataframe = flux_dataframe[ (flux_dataframe.flux.abs() > threshold) | (flux_dataframe.fmin.abs() > threshold) | (flux_dataframe.fmax.abs() > threshold)].copy() # Make all fluxes positive if not fva: flux_dataframe['is_input'] = flux_dataframe.flux >= 0 flux_dataframe.flux = \ flux_dataframe.flux.abs().astype('float').round(6) else: def get_direction(flux, fmin, fmax): """ decide whether or not to reverse a flux to make it positive """ if flux < 0: return -1 elif flux > 0: return 1 elif (fmax > 0) & (fmin <= 0): return 1 elif (fmax < 0) & (fmin >= 0): return -1 elif ((fmax + fmin)/2) < 0: return -1 else: return 1 sign = flux_dataframe.apply( lambda x: get_direction(x.flux, x.fmin, x.fmax), 1) flux_dataframe['is_input'] = sign == 1 flux_dataframe.loc[:, ['flux', 'fmin', 'fmax']] = \ flux_dataframe.loc[:, ['flux', 'fmin', 'fmax']].multiply( sign, 0).astype('float').round(6) flux_dataframe.loc[:, ['flux', 'fmin', 'fmax']] = \ flux_dataframe.loc[:, ['flux', 'fmin', 'fmax']].applymap( lambda x: x if abs(x) > 1E-6 else 0) if fva: flux_dataframe['fva_fmt'] = flux_dataframe.apply( lambda x: ("[{0.fmin:" + floatfmt + "}, {0.fmax:" + floatfmt + "}]").format(x), 1) flux_dataframe = flux_dataframe.sort_values( by=['flux', 'fmax', 'fmin', 'id'], ascending=[False, False, False, True]) else: flux_dataframe = flux_dataframe.sort_values( by=['flux', 'id'], ascending=[False, True]) return flux_dataframe