Source code for cobra.core.gene

"""Provide functions for dealing with genes and gene product rules (GPR)."""

import logging
import re
from ast import (
    AST,
    And,
    BitAnd,
    BitOr,
    BoolOp,
    Expression,
    Module,
    Name,
    NodeTransformer,
    NodeVisitor,
    Or,
)
from ast import parse as ast_parse
from copy import deepcopy
from keyword import kwlist
from typing import FrozenSet, Iterable, Optional, Set, Tuple, Union
from warnings import warn

import sympy.logic.boolalg as spl
from sympy import Symbol

from cobra.core.dictlist import DictList
from cobra.core.species import Species
from cobra.util import resettable
from cobra.util.util import format_long_string


# TODO - When https://github.com/symengine/symengine.py/issues/334 is resolved,
#  change sympy.Symbol (above in imports) to optlang.symbolics.Symbol

[docs]logger = logging.getLogger(__name__)
[docs]keywords = list(kwlist)
keywords.remove("and") keywords.remove("or") keywords.extend(("True", "False"))
[docs]keyword_re = re.compile(rf"(?=\b({'|'.join(keywords)})\b)")
[docs]number_start_re = re.compile(r"(?=\b[0-9])")
[docs]replacements = ( (".", "__COBRA_DOT__"), ("'", "__COBRA_SQUOTE__"), ('"', "__COBRA_DQUOTE__"), (":", "__COBRA_COLON__"), ("/", "__COBRA_FSLASH__"), ("\\", "__COBRA_BSLASH"), ("-", "__COBRA_DASH__"), ("=", "__COBRA_EQ__"),
)
[docs]class GPRWalker(NodeVisitor): """Identifies genes in an AST/GPR tree. Walks over the tree, and identifies the id of each Name node """ def __init__(self, **kwargs) -> None: """Initialize a new object. Other Parameters ---------------- **kwargs: Further keyword arguments are passed on to the parent class. """ super().__init__(**kwargs) self.gene_set = set()
[docs] def visit_Name(self, node: Name) -> None: """Visit a Gene node and add the id to the gene_set. Parameters ---------- node: ast.Name The node to visit """ self.gene_set.add(node.id)
[docs] def visit_BoolOp(self, node: BoolOp) -> None: """Visit a BoolOp node (AND/OR) and visit the children to add them to gene_set. Parameters ---------- node: ast.BoolOp The node to visit """ self.generic_visit(node) for val in node.values: self.visit(val)
[docs]class GPRCleaner(NodeTransformer): """Parses compiled ast of a gene_reaction_rule and identifies genes. Parts of the tree are rewritten to allow periods in gene ID's and bitwise boolean operations """ def __init__(self, **kwargs) -> None: """Initialize a new object. Other Parameters ---------------- **kwargs: Further keyword arguments are passed on to the parent class. """ super().__init__(**kwargs) self.gene_set = set()
[docs] def visit_Name(self, node: Name) -> Name: """Visit a Gene node and add the id to the gene_set. The gene id will be cleaned used __cobra_escape__ and replacements dictionary (see above). Parameters ---------- node: ast.Name The node to visit Returns ------- node: ast.Name The transformed node (with the id changed). """ if node.id.startswith("__cobra_escape__"): node.id = node.id[16:] for char, escaped in replacements: if escaped in node.id: node.id = node.id.replace(escaped, char) self.gene_set.add(node.id) return node
[docs] def visit_BinOp(self, node: BoolOp) -> None: """Visit a BoolOp node (AND/OR) and visit the children (genes) to process them. Parameters ---------- node: ast.BoolOp The node to visit. Nodes other than And() and Or() will cause an error. Returns ------- node: ast.BoolOp The node with the children transformed. """ self.generic_visit(node) if isinstance(node.op, BitAnd): return BoolOp(And(), (node.left, node.right)) elif isinstance(node.op, BitOr): return BoolOp(Or(), (node.left, node.right)) else: raise TypeError(f"unsupported operation '{node.op.__class__.__name__}'")
[docs]def parse_gpr(str_expr: str) -> Tuple: """Parse GPR into AST. Parameters ---------- str_expr : string string with the gene reaction rule to parse Returns ------- tuple elements ast_tree and gene_ids as a set .. deprecated :: Use GPR(string_gpr=str_expr) in the future. Because of the GPR() class, this function will be removed. """ warn( "parse_gpr() will be removed soon." "Use GPR(string_gpr=str_expr) in the future", DeprecationWarning, ) gpr_tree = GPR.from_string(str_expr) return gpr_tree, gpr_tree.genes
[docs]class Gene(Species): """A Gene in a cobra model. Parameters ---------- id : string The identifier to associate the gene with name: string A longer human readable name for the gene functional: bool Indicates whether the gene is functional. If it is not functional then it cannot be used in an enzyme complex nor can its products be used. """ # noinspection PyShadowingBuiltins def __init__(self, id: str = None, name: str = "", functional: bool = True) -> None: """Initialize a gene. Parameters ---------- id: str A string that will identify the gene. name: str A (longer) string that will identify the gene. Can have more special characters. functional: bool A flag whether or not the gene is functional """ super().__init__(id=id, name=name) self._functional = functional @property
[docs] def functional(self) -> bool: """Flag indicating if the gene is functional. Changing the flag is reverted upon exit if executed within the model as context. """ return self._functional
@functional.setter @resettable def functional(self, value: bool) -> None: if not isinstance(value, bool): raise ValueError("expected boolean") self._functional = value
[docs] def knock_out(self) -> None: """Knockout gene by marking it as non-functional. Knockout gene by marking it as non-functional and setting all associated reactions bounds to zero. The change is reverted upon exit if executed within the model as context. """ self.functional = False for reaction in self.reactions: if not reaction.functional: reaction.bounds = (0, 0)
[docs] def _repr_html_(self): return f""" <table> <tr> <td><strong>Gene identifier</strong></td><td>{self.id}</td> </tr><tr> <td><strong>Name</strong></td><td>{self.name}</td> </tr><tr> <td><strong>Memory address</strong></td> <td>{id(self):#x}</td> </tr><tr> <td><strong>Functional</strong></td><td>{self.functional}</td> </tr><tr> <td><strong>In {len(self.reactions)} reaction(s)</strong></td><td> {format_long_string(", ".join(r.id for r in self.reactions), 200)} </td> </tr> </table>"""
[docs]class GPR(Module): """A Gene Reaction rule in a cobra model, using AST as base class. Parameters ---------- gpr_from : Expression or Module or AST A GPR in AST format """ def __init__(self, gpr_from: Union[Expression, Module, AST] = None, **kwargs): """Initialize a gene. Parameters ---------- gpr_from: Expression, Module, AST An AST expression that will be parsed to GPR. **kwargs: Further keyword arguments are passed on to the parent class. """ super().__init__(**kwargs) self._genes = set() self.body: Optional[list] = None if gpr_from: if isinstance(gpr_from, str): self.from_string(gpr_from) raise TypeError( f"GPR accepts AST, not string. " f'Next time, use GPR().from_string("{gpr_from}")' ) elif isinstance(gpr_from, (Expression, Module)): cleaner = GPRCleaner() cleaner.visit(gpr_from) self._genes = deepcopy(cleaner.gene_set) self.body = deepcopy(gpr_from.body) self.eval() else: raise TypeError("GPR requires AST Expression or Module") @classmethod
[docs] def from_string(cls, string_gpr: str) -> "GPR": """Construct a GPR from a string. Parameters ---------- string_gpr: str a string that describes the gene rules, in a format like A & B Returns ------- GPR: returns a new GPR while setting self.body as Parsed AST tree that has the gene rules This function also sets self._genes with the gene ids in the AST """ if not isinstance(string_gpr, str): raise TypeError( f"{cls.__name__}.from_string " f"requires a str argument, not {type(string_gpr)}." ) gpr = cls() uppercase_AND = re.compile(r"\bAND\b") uppercase_OR = re.compile(r"\bOR\b") str_expr = string_gpr.strip() if len(str_expr) == 0: gpr.body = None return gpr for char, escaped in replacements: if char in str_expr: str_expr = str_expr.replace(char, escaped) escaped_str = keyword_re.sub("__cobra_escape__", str_expr) escaped_str = number_start_re.sub("__cobra_escape__", escaped_str) # Some mat models have () in gr_rules which leads to a complicated error later escaped_str = escaped_str.replace("()", "") try: tree = ast_parse(escaped_str, "<string>", "eval") except (SyntaxError, TypeError) as e: if "AND" in escaped_str or "OR" in escaped_str: # noinspection PyTypeChecker logger.warning( f"Uppercase AND/OR found in rule '{string_gpr}'.", ) logger.warning(e.msg) warn( "Uppercase AND/OR found in rule '{}'.".format(string_gpr), SyntaxWarning, ) escaped_str = uppercase_AND.sub("and", escaped_str) escaped_str = uppercase_OR.sub("or", escaped_str) try: tree = ast_parse(escaped_str, "<string>", "eval") except SyntaxError: # noinspection PyTypeChecker logger.warning( f"Malformed gene_reaction_rule '{escaped_str}' for {string_gpr}", exc_info=1, ) logger.warning("GPR will be empty") warn( "Malformed gene_reaction_rule '{}'".format(escaped_str), SyntaxWarning, ) return gpr gpr = cls(tree) gpr.update_genes() return gpr
@property
[docs] def genes(self) -> FrozenSet: """To check the genes. This property updates the genes before returning them, in case the GPR was changed and the genes weren't. Returns ------- genes: frozenset All the genes in a frozen set. Do not try to change them with this property. """ self.update_genes() return frozenset(self._genes)
[docs] def update_genes(self) -> None: """Update genes, used after changes in GPR. Walks along the AST tree of the GPR class, and modifies self._genes """ if self.body: walker = GPRWalker() walker.visit(self) self._genes = deepcopy(walker.gene_set)
[docs] def _eval_gpr( self, expr: Union[Expression, list, BoolOp, Name], knockouts: Union[DictList, set], ) -> bool: """Evaluate compiled ast of gene_reaction_rule with knockouts. Parameters ---------- expr : Expression or GPR or list or BoolOp or Name The ast of the gene reaction rule knockouts : DictList, set Set of genes that are knocked out Returns ------- bool True if the gene reaction rule is true with the given knockouts otherwise false """ # just always call the recursions as self._eval_gpr(a, b) if isinstance(expr, (Expression, GPR)): if not expr.body: return True return self._eval_gpr(expr.body, knockouts) elif isinstance(expr, Name): return expr.id not in knockouts elif isinstance(expr, BoolOp): op = expr.op if isinstance(op, Or): # noinspection PyTypeChecker return any(self._eval_gpr(i, knockouts) for i in expr.values) elif isinstance(op, And): # noinspection PyTypeChecker return all(self._eval_gpr(i, knockouts) for i in expr.values) else: raise TypeError(f"Unsupported operation: {op.__class__.__name__}") elif expr is None: return True else: raise TypeError(f"Unsupported operation: {repr(expr)}")
[docs] def eval(self, knockouts: Union[DictList, Set, str, Iterable] = None) -> bool: """Evaluate compiled ast of gene_reaction_rule with knockouts. This function calls _eval_gpr, but allows more flexibility in input, including name, and list. Parameters ---------- knockouts Which gene or genes to knoc out Returns ------- bool True if the gene reaction rule is true with the given knockouts otherwise false """ if knockouts is None: knockouts = set() if knockouts is str: knockouts = list(knockouts) if self.body: return self._eval_gpr(self.body, knockouts=knockouts) else: return True
[docs] def _ast2str( self, expr: Union["GPR", Expression, BoolOp, Name, list], level: int = 0, names: dict = None, ) -> str: """Convert compiled ast to gene_reaction_rule str. Parameters ---------- expr : AST or GPR or list or Name or BoolOp string for a gene reaction rule, e.g "a and b" level : int internal use only names : dict Dict where each element id a gene identifier and the value is the gene name. Use this to get a rule str which uses names instead. This should be done for display purposes only. All gene_reaction_rule strings which are computed with should use the id. Returns ------ string The gene reaction rule """ if isinstance(expr, (Expression, GPR)): return self._ast2str(expr.body, 0, names) if expr.body else "" elif isinstance(expr, Name): return names.get(expr.id, expr.id) if names else expr.id elif isinstance(expr, BoolOp): op = expr.op if isinstance(op, Or): # noinspection PyTypeChecker str_exp = " or ".join( self._ast2str(i, level + 1, names) for i in expr.values ) elif isinstance(op, And): # noinspection PyTypeChecker str_exp = " and ".join( self._ast2str(i, level + 1, names) for i in expr.values ) else: # noinspection PyTypeChecker raise TypeError(f"Unsupported operation: {op.__class__.__name}") return f"({str_exp})" if level else str_exp elif expr is None or (isinstance(expr, list) and len(expr) == 0): return "" else: raise TypeError(f"Unsupported operation: {repr(expr)}")
[docs] def to_string(self, names: dict = None) -> str: """Convert compiled ast to gene_reaction_rule str. Parameters ---------- self : GPR compiled ast Module describing GPR names: dict dictionary of gene ids to gene names. If this is empty, returns gene ids Returns ------ string The gene reaction rule Notes ----- Calls _aststr() """ return self._ast2str(self, names=names)
[docs] def copy(self): """Copy a GPR.""" return deepcopy(self)
[docs] def __copy__(self) -> "GPR": """Ensure a correct shallow copy.""" return self.copy()
[docs] def __repr__(self) -> str: """Return the GPR with module, class, and code to recreate it.""" return ( f"{self.__class__.__module__}.{self.__class__.__qualname__}"
f"({self.to_string()!r})" )
[docs] def __str__(self) -> str: """Convert compiled ast to gene_reaction_rule str. Parameters ---------- self : GPR compiled ast Module describing GPR Returns ------ string The gene reaction rule """ return self.to_string(names={})
[docs] def _repr_html_(self) -> str: return f"""<p><strong>GPR</strong></p><p>{format_long_string(self.to_string(), 100)}</p>"""
[docs] def as_symbolic( self, names: dict = None, ) -> Union[spl.Or, spl.And, Symbol]: """Convert compiled ast to sympy expression. Parameters ---------- self : GPR compiled ast Module describing GPR names: dict dictionary of gene ids to gene names. If this is empty, returns sympy expression using gene ids Returns ------ Symbol or BooleanFunction SYMPY expression (Symbol or And or Or). Symbol("") if the GPR is empty Notes ----- Calls _symbolic_gpr() """ # noinspection PyTypeChecker if names: GPRGene_dict = {gid: Symbol(names[gid]) for gid in self.genes} else: GPRGene_dict = None return self._symbolic_gpr(self, GPRGene_dict=GPRGene_dict)
[docs] def _symbolic_gpr( self, expr: Union["GPR", Expression, BoolOp, Name, list] = None, GPRGene_dict: dict = None, ) -> Union[spl.Or, spl.And, Symbol]: """Parse gpr into SYMPY using ast similar to _ast2str(). Parameters ---------- expr : AST or GPR or list or Name or BoolOp compiled GPR GPRGene_dict: dict dictionary from gene id to GPRGeneSymbol Returns ------- Symbol or BooleanFunction SYMPY expression (Symbol or And or Or). Symbol("") if the GPR is empty """ if GPRGene_dict is None: GPRGene_dict = {gid: Symbol(name=gid) for gid in expr.genes} if isinstance(expr, (Expression, GPR)): return ( self._symbolic_gpr(expr.body, GPRGene_dict) if expr.body else Symbol("") ) else: if isinstance(expr, Name): return GPRGene_dict.get(expr.id) elif isinstance(expr, BoolOp): op = expr.op if isinstance(op, Or): # noinspection PyTypeChecker sym_exp = spl.Or( *[self._symbolic_gpr(i, GPRGene_dict) for i in expr.values] ) elif isinstance(op, And): # noinspection PyTypeChecker sym_exp = spl.And( *[self._symbolic_gpr(i, GPRGene_dict) for i in expr.values] ) else: raise TypeError("Unsupported operation " + op.__class__.__name) return sym_exp elif not expr: return Symbol("") else: raise TypeError("Unsupported Expression " + repr(expr))
@classmethod
[docs] def from_symbolic(cls, sympy_gpr: Union[spl.BooleanFunction, Symbol]) -> "GPR": """Construct a GPR from a sympy expression. Parameters ---------- sympy_gpr: sympy a sympy that describes the gene rules, being a Symbol for single genes or a BooleanFunction for AND/OR relationships Returns ------- GPR: returns a new GPR while setting self.body as Parsed AST tree that has the gene rules This function also sets self._genes with the gene ids in the AST """ def _sympy_to_ast( sympy_expr: Union[spl.BooleanFunction, Symbol] ) -> Union[BoolOp, Name]: if sympy_expr.func is spl.Or: return BoolOp( op=Or(), values=[_sympy_to_ast(i) for i in sympy_expr.args] ) elif sympy_expr.func is spl.And: return BoolOp( op=And(), values=[_sympy_to_ast(i) for i in sympy_expr.args] ) elif not sympy_expr.args: return Name(id=sympy_expr.name) else: raise TypeError(f"Unsupported operation: {sympy_expr.func}") if not isinstance(sympy_gpr, (spl.BooleanFunction, Symbol)): raise TypeError( f"{cls.__name__}.from_symbolic " f"requires a sympy BooleanFunction or " f"Symbol argument, not {type(sympy_gpr)}." ) gpr = cls() if sympy_gpr == Symbol(""): gpr.body = None return gpr try: tree = Expression(_sympy_to_ast(sympy_gpr)) except SyntaxError as e: logger.warning( f"Problem with sympy expression '{sympy_gpr}' for {repr(gpr)}", ) logger.warning("GPR will be empty") logger.warning(e.msg) return gpr gpr = cls(tree) gpr.update_genes() return gpr
[docs] def __eq__(self, other) -> bool: """Check equality of GPR via symbolic equality.""" if not self.body and not other.body: return True elif not self.body or not other.body: return False else: self_symb = self.as_symbolic() other_symb = other.as_symbolic() if isinstance(self_symb, Symbol) and isinstance(other_symb, Symbol): return self_symb == other_symb if isinstance(self_symb, Symbol) or isinstance(other_symb, Symbol): return False return self_symb.equals(other_symb)
[docs]def eval_gpr(expr: Union[Expression, GPR], knockouts: Union[DictList, set]) -> bool: """Evaluate compiled ast of gene_reaction_rule with knockouts. .. deprecated :: Use GPR().eval() in the future. Because of the GPR() class, this function will be removed. Parameters ---------- expr : Expression or GPR The ast of the gene reaction rule knockouts : DictList, set Set of genes that are knocked out Returns ------- bool True if the gene reaction rule is true with the given knockouts otherwise false """ warn( "eval_gpr() will be removed soon." "Use GPR().eval(knockouts) in the future", DeprecationWarning, ) if isinstance(expr, GPR): return expr.eval(knockouts=knockouts) else: return GPR(expr).eval(knockouts=knockouts)
# functions for gene reaction rules
[docs]def ast2str(expr: Union[Expression, GPR], level: int = 0, names: dict = None) -> str: """Convert compiled ast to gene_reaction_rule str. Parameters ---------- expr : AST or GPR AST or GPR level : int internal use only. Ignored because of GPR() class, kept only for interface consistency with code still using ast2str. names : dict Dict where each element id a gene identifier and the value is the gene name. Use this to get a rule str which uses names instead. This should be done for display purposes only. All gene_reaction_rule strings which are computed with should use the id. Returns ------ string The gene reaction rule .. deprecated :: Use GPR.to_string(names=) in the future. Because of the GPR() class, this function will be removed. """ warn( "ast2satr() will be removed soon. Use gpr.to_string(names=names) in the future", DeprecationWarning, ) if isinstance(expr, GPR): return expr.to_string(names=names) else: return GPR(expr).to_string(names=names)