Source code for cobra.core.gene

# -*- coding: utf-8 -*-

from __future__ import absolute_import

import re
from ast import (
    AST,
    And,
    BitAnd,
    BitOr,
    BoolOp,
    Expression,
    Module,
    Name,
    NodeTransformer,
    NodeVisitor,
    Or,
)
from ast import parse as ast_parse
from copy import deepcopy
from keyword import kwlist
from typing import FrozenSet, Iterable, Set, Tuple, Union
from warnings import warn

from cobra.core.dictlist import DictList
from cobra.core.species import Species
from cobra.util import resettable
from cobra.util.util import format_long_string


[docs]keywords = list(kwlist)
keywords.remove("and") keywords.remove("or") keywords.extend(("True", "False"))
[docs]keyword_re = re.compile(r"(?=\b(%s)\b)" % "|".join(keywords))
[docs]number_start_re = re.compile(r"(?=\b[0-9])")
[docs]replacements = ( (".", "__COBRA_DOT__"), ("'", "__COBRA_SQUOTE__"), ('"', "__COBRA_DQUOTE__"), (":", "__COBRA_COLON__"), ("/", "__COBRA_FSLASH__"), ("\\", "__COBRA_BSLASH"), ("-", "__COBRA_DASH__"), ("=", "__COBRA_EQ__"),
)
[docs]class GPRWalker(NodeVisitor): """Identifies genes in an AST/GPR tree. Walks over the tree, and identifies the id of each Name node """ def __init__(self): NodeVisitor.__init__(self) self.gene_set = set()
[docs] def visit_Name(self, node) -> None: self.gene_set.add(node.id)
[docs] def visit_BoolOp(self, node: BoolOp) -> None: self.generic_visit(node) for val in node.values: self.visit(val)
[docs]class GPRCleaner(NodeTransformer): """Parses compiled ast of a gene_reaction_rule and identifies genes. Parts of the tree are rewritten to allow periods in gene ID's and bitwise boolean operations """ def __init__(self): NodeTransformer.__init__(self) self.gene_set = set()
[docs] def visit_Name(self, node): if node.id.startswith("__cobra_escape__"): node.id = node.id[16:] for char, escaped in replacements: if escaped in node.id: node.id = node.id.replace(escaped, char) self.gene_set.add(node.id) return node
[docs] def visit_BinOp(self, node): self.generic_visit(node) if isinstance(node.op, BitAnd): return BoolOp(And(), (node.left, node.right)) elif isinstance(node.op, BitOr): return BoolOp(Or(), (node.left, node.right)) else: raise TypeError("unsupported operation '%s'" % node.op.__class__.__name__)
[docs]def parse_gpr(str_expr: str) -> Tuple: """Parse GPR into AST. Parameters ---------- str_expr : string string with the gene reaction rule to parse Returns ------- tuple elements ast_tree and gene_ids as a set .. deprecated :: Use GPR(string_gpr=str_expr) in the future. Because of the GPR() class, this function will be removed. """ warn( "parse_gpr() will be removed soon." "Use GPR(string_gpr=str_expr) in the future", DeprecationWarning, ) gpr_tree = GPR.from_string(str_expr) return gpr_tree, gpr_tree.genes
[docs]class Gene(Species): """A Gene in a cobra model. Parameters ---------- id : string The identifier to associate the gene with name: string A longer human readable name for the gene functional: bool Indicates whether the gene is functional. If it is not functional then it cannot be used in an enzyme complex nor can its products be used. """ def __init__(self, id=None, name="", functional=True): """Initialize a gene. Parameters ---------- id: str A string that will identify the gene. name: str A (longer) string that will identify the gene. Can have more special characters. functional: bool A flag whether or not the gene is functional """ Species.__init__(self, id=id, name=name) self._functional = functional @property
[docs] def functional(self): """Flag indicating if the gene is functional. Changing the flag is reverted upon exit if executed within the model as context. """ return self._functional
@functional.setter @resettable def functional(self, value): if not isinstance(value, bool): raise ValueError("expected boolean") self._functional = value
[docs] def knock_out(self): """Knockout gene by marking it as non-functional. Knockout gene by marking it as non-functional and setting all associated reactions bounds to zero. The change is reverted upon exit if executed within the model as context. """ self.functional = False for reaction in self.reactions: if not reaction.functional: reaction.bounds = (0, 0)
[docs] def remove_from_model( self, model=None, make_dependent_reactions_nonfunctional=True ): """Removes the association. Parameters ---------- model : cobra model The model to remove the gene from make_dependent_reactions_nonfunctional : bool If True then replace the gene with 'False' in the gene association, else replace the gene with 'True' .. deprecated :: 0.4 Use cobra.manipulation.delete_model_genes to simulate knockouts and cobra.manipulation.remove_genes to remove genes from the model. """ warn("Use cobra.manipulation.remove_genes instead") if model is not None: if model != self._model: raise Exception( "%s is a member of %s, not %s" % (repr(self), repr(self._model), repr(model)) ) if self._model is None: raise Exception("%s is not in a model" % repr(self)) if make_dependent_reactions_nonfunctional: gene_state = "False" else: gene_state = "True" the_gene_re = re.compile("(^|(?<=( |\()))%s(?=( |\)|$))" % re.escape(self.id)) # remove reference to the gene in all groups associated_groups = self._model.get_associated_groups(self) for group in associated_groups: group.remove_members(self) self._model.genes.remove(self) self._model = None for the_reaction in list(self._reaction): the_reaction._gene_reaction_rule = the_gene_re.sub( gene_state, the_reaction.gene_reaction_rule ) the_reaction._genes.remove(self) # Now, deactivate the reaction if its gene association evaluates # to False the_gene_reaction_relation = the_reaction.gene_reaction_rule for other_gene in the_reaction._genes: other_gene_re = re.compile( "(^|(?<=( |\()))%s(?=( |\)|$))" % re.escape(other_gene.id) ) the_gene_reaction_relation = other_gene_re.sub( "True", the_gene_reaction_relation ) if not eval(the_gene_reaction_relation): the_reaction.lower_bound = 0 the_reaction.upper_bound = 0 self._reaction.clear()
[docs] def _repr_html_(self): return """ <table> <tr> <td><strong>Gene identifier</strong></td><td>{id}</td> </tr><tr> <td><strong>Name</strong></td><td>{name}</td> </tr><tr> <td><strong>Memory address</strong></td> <td>{address}</td> </tr><tr> <td><strong>Functional</strong></td><td>{functional}</td> </tr><tr> <td><strong>In {n_reactions} reaction(s)</strong></td><td> {reactions}</td> </tr> </table>""".format( id=self.id, name=self.name, functional=self.functional, address="0x0%x" % id(self), n_reactions=len(self.reactions), reactions=format_long_string(", ".join(r.id for r in self.reactions), 200),
)
[docs]class GPR(Module): """A Gene Reaction rule in a cobra model, using AST as base class. Parameters ---------- gpr_from : Expression or Module or AST A GPR in AST format """ def __init__(self, gpr_from: Union[Expression, Module, AST] = None, **kwargs): super().__init__(**kwargs) self._genes = set() self.body = None if gpr_from: if isinstance(gpr_from, str): self.from_string(gpr_from) raise TypeError( f"GPR accepts AST, not string. " f'Next time, use GPR().from_string("{gpr_from}")' ) elif isinstance(gpr_from, (Expression, Module)): cleaner = GPRCleaner() cleaner.visit(gpr_from) self._genes = deepcopy(cleaner.gene_set) # noinspection PyTypeChecker self.body = deepcopy(gpr_from.body) self.eval() else: raise TypeError("GPR requires AST Expression or Module") @classmethod
[docs] def from_string(cls, string_gpr: str) -> "GPR": """Construct a GPR from a string. Parameters ---------- string_gpr: str a string that describes the gene rules, in a format like A & B Returns ------- GPR: returns a new GPR while setting self.body as Parsed AST tree that has the gene rules This function also sets self._genes with the gene ids in the AST """ if not isinstance(string_gpr, str): raise TypeError( f"{cls.__name__}.from_string " f"requires a str argument, not {type(string_gpr)}." ) gpr = cls() uppercase_AND = re.compile(r"\bAND\b") uppercase_OR = re.compile(r"\bOR\b") str_expr = string_gpr.strip() if len(str_expr) == 0: gpr.body = None return gpr for char, escaped in replacements: if char in str_expr: str_expr = str_expr.replace(char, escaped) escaped_str = keyword_re.sub("__cobra_escape__", str_expr) escaped_str = number_start_re.sub("__cobra_escape__", escaped_str) try: tree = ast_parse(escaped_str, "<string>", "eval") except (SyntaxError, TypeError) as e: if "AND" in string_gpr or "OR" in string_gpr: warn( f"Uppercase AND/OR found in rule '{string_gpr}'.", SyntaxWarning, ) string_gpr = uppercase_AND.sub("and", string_gpr) string_gpr = uppercase_OR.sub("or", string_gpr) try: tree = ast_parse(string_gpr, "<string>", "eval") except SyntaxError as e: warn( f"Malformed gene_reaction_rule '{string_gpr}' for {repr(gpr)}", SyntaxWarning, ) warn("GPR will be empty") warn(e.msg) return gpr return cls(tree)
@property
[docs] def genes(self) -> FrozenSet: """To check the genes. This property updates the genes before returning them, in case the GPR was changed and the genes weren't. Returns ------- genes: frozenset All the genes in a frozen set. Do not try to change them with this property. """ self.update_genes() return frozenset(self._genes)
[docs] def update_genes(self) -> None: """Update genes, used after changes in GPR. Walks along the AST tree of the GPR class, and modifies self._genes """ if self.body: walker = GPRWalker() walker.visit(self) self._genes = deepcopy(walker.gene_set)
[docs] def _eval_gpr( self, expr: Union[Expression, list, BoolOp, Name], knockouts: Union[DictList, set], ) -> bool: """Evaluate compiled ast of gene_reaction_rule with knockouts. Parameters ---------- expr : Expression or GPR or list or BoolOp or Name The ast of the gene reaction rule knockouts : DictList, set Set of genes that are knocked out Returns ------- bool True if the gene reaction rule is true with the given knockouts otherwise false """ # just always call the recursions as self._eval_gpr(a, b) if isinstance(expr, (Expression, GPR)): if not expr.body: return True return self._eval_gpr(expr.body, knockouts) elif isinstance(expr, Name): return expr.id not in knockouts elif isinstance(expr, BoolOp): op = expr.op if isinstance(op, Or): # noinspection PyTypeChecker return any(self._eval_gpr(i, knockouts) for i in expr.values) elif isinstance(op, And): # noinspection PyTypeChecker return all(self._eval_gpr(i, knockouts) for i in expr.values) else: raise TypeError(f"Unsupported operation: {op.__class__.__name__}") elif expr is None: return True else: raise TypeError(f"Unsupported operation: {repr(expr)}")
[docs] def eval(self, knockouts: Union[DictList, Set, str, Iterable] = None) -> bool: """Evaluate compiled ast of gene_reaction_rule with knockouts. This function calls _eval_gpr, but allows more flexibility in input, including name, and list. Parameters ---------- knockouts Which gene or genes to knoc out Returns ------- bool True if the gene reaction rule is true with the given knockouts otherwise false """ if knockouts is None: knockouts = set() if knockouts is str: knockouts = list(knockouts) if self.body: return self._eval_gpr(self.body, knockouts=knockouts) else: return True
[docs] def _ast2str( self, expr: Union[Expression, BoolOp, Name, list], level: int = 0, names: dict = None, ) -> str: """Convert compiled ast to gene_reaction_rule str. Parameters ---------- expr : AST or GPR or list or Name or BoolOp string for a gene reaction rule, e.g "a and b" level : int internal use only names : dict Dict where each element id a gene identifier and the value is the gene name. Use this to get a rule str which uses names instead. This should be done for display purposes only. All gene_reaction_rule strings which are computed with should use the id. Returns ------ string The gene reaction rule """ if isinstance(expr, (Expression, GPR)): return self._ast2str(expr.body, 0, names) if expr.body else "" elif isinstance(expr, Name): return names.get(expr.id, expr.id) if names else expr.id elif isinstance(expr, BoolOp): op = expr.op if isinstance(op, Or): # noinspection PyTypeChecker str_exp = " or ".join( self._ast2str(i, level + 1, names) for i in expr.values ) elif isinstance(op, And): # noinspection PyTypeChecker str_exp = " and ".join( self._ast2str(i, level + 1, names) for i in expr.values ) else: # noinspection PyTypeChecker raise TypeError(f"Unsupported operation: {op.__class__.__name}") return f"({str_exp})" if level else str_exp elif expr is None or (isinstance(expr, list) and len(expr) == 0): return "" else: raise TypeError(f"Unsupported operation: {repr(expr)}")
[docs] def to_string(self, names: dict = None) -> str: """Convert compiled ast to gene_reaction_rule str. Parameters ---------- self : GPR compiled ast Module describing GPR names: dict dictionary of gene ids to gene names. If this is empty, returns gene ids Returns ------ string The gene reaction rule Notes ----- Calls __aststr() """ # noinspection PyTypeChecker return self._ast2str(self, names=names)
[docs] def copy(self): """Copy a GPR.""" return deepcopy(self)
[docs] def __repr__(self): return "%s.%s(%r)" % ( self.__class__.__module__, self.__class__.__qualname__, self.to_string(),
)
[docs] def __str__(self): """Convert compiled ast to gene_reaction_rule str. Parameters ---------- self : GPR compiled ast Module describing GPR Returns ------ string The gene reaction rule """ return self.to_string(names={})
[docs] def _repr_html__(self): return """<p><strong>GPR</strong></p><p>{gpr}</p>""".format( gpr=format_long_string(self.to_string(), 100)
) # def as_symbolic(self): # # ...
[docs]def eval_gpr(expr, knockouts): """Evaluate compiled ast of gene_reaction_rule with knockouts. .. deprecated :: Use GPR().eval() in the future. Because of the GPR() class, this function will be removed. Parameters ---------- expr : Expression or GPR The ast of the gene reaction rule knockouts : DictList, set Set of genes that are knocked out Returns ------- bool True if the gene reaction rule is true with the given knockouts otherwise false """ warn( "eval_gpr() will be removed soon." "Use GPR().eval(knockouts) in the future", DeprecationWarning, ) if isinstance(expr, GPR): return expr.eval(knockouts=knockouts) else: return GPR(expr).eval(knockouts=knockouts)
# functions for gene reaction rules
[docs]def ast2str(expr: Union[Expression, GPR], level: int = 0, names: dict = None) -> str: """Convert compiled ast to gene_reaction_rule str. Parameters ---------- expr : AST or GPR AST or GPR level : int internal use only names : dict Dict where each element id a gene identifier and the value is the gene name. Use this to get a rule str which uses names instead. This should be done for display purposes only. All gene_reaction_rule strings which are computed with should use the id. Returns ------ string The gene reaction rule .. deprecated :: Use GPR.to_string(names=) in the future. Because of the GPR() class, this function will be removed. """ warn( "ast2satr() will be removed soon. Use gpr.to_string(names=names) in the future", DeprecationWarning, ) if isinstance(expr, GPR): return expr.to_string(names=names) else: return GPR(expr).to_string(names=names)