Source code for molSimplify.Classes.mol2D

import networkx as nx
import numpy as np
from typing import List, Union
from packaging import version
from molSimplify.Classes.globalvars import globalvars
from molSimplify.Classes.mol3D import mol3D as Mol3D

try:
    from openbabel import openbabel  # version 3 style import
except ImportError:
    import openbabel  # fallback to version 2


[docs]class Mol2D(nx.Graph):

    def __repr__(self):
        atom_counts = {}
        for _, sym in self.nodes.data(data="symbol", default="X"):
            if sym not in atom_counts:
                atom_counts[sym] = 1
            else:
                atom_counts[sym] += 1

        symbols = globalvars().elementsbynum()
        formula = ""
        for sym in sorted(atom_counts.keys(),
                          key=lambda x: symbols.index(x), reverse=True):
            formula += f"{sym}{atom_counts[sym]}"
        return f"Mol2D({formula})"

[docs]    @classmethod
    def from_smiles(cls, smiles: str):
        """Create a Mol2D object from a SMILES string.

        Parameters
        ----------
        smiles : str
            SMILES representation of the molecule.

        Returns
        -------
        Mol2D
            Mol2D object of the molecule

        Examples
        --------
        Create a furan molecule from SMILES:

        >>> mol = Mol2D.from_smiles("o1cccc1")
        >>> mol
        Mol2D(O1C4H4)
        """
        mol = cls()

        # Load using openbabel OBMol
        obConversion = openbabel.OBConversion()
        OBMol = openbabel.OBMol()
        obConversion.SetInFormat('smi')
        obConversion.ReadString(OBMol, smiles)
        OBMol.AddHydrogens()

        symbols = globalvars().elementsbynum()
        # Add atoms
        for i, atom in enumerate(openbabel.OBMolAtomIter(OBMol)):
            sym = symbols[atom.GetAtomicNum() - 1]
            mol.add_node(i, symbol=sym)

        # Add bonds
        for bond in openbabel.OBMolBondIter(OBMol):
            # Subtract 1 because of zero indexing vs. one indexing
            mol.add_edge(bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1)

        return mol

[docs]    @classmethod
    def from_mol2_file(cls, filename):
        mol = cls()

        with open(filename, "r") as fin:
            lines = fin.readlines()

        # Read counts line:
        sp = lines[2].split()
        n_atoms = int(sp[0])
        n_bonds = int(sp[1])

        atom_start = lines.index("@<TRIPOS>ATOM\n") + 1
        for i, line in enumerate(lines[atom_start:atom_start + n_atoms]):
            sp = line.split()
            sym = sp[5].split(".")[0]
            mol.add_node(i, symbol=sym)

        bond_start = lines.index("@<TRIPOS>BOND\n") + 1
        for line in lines[bond_start:bond_start + n_bonds]:
            sp = line.split()
            # Subtract 1 because of zero indexing vs. one indexing
            mol.add_edge(int(sp[1]) - 1, int(sp[2]) - 1)

        return mol

[docs]    @classmethod
    def from_mol_file(cls, filename):
        mol = cls()

        with open(filename, "r") as fin:
            lines = fin.readlines()

        # Read counts line:
        sp = lines[3].split()
        n_atoms = int(sp[0])
        n_bonds = int(sp[1])

        # Add atoms (offset of 4 for the header lines):
        for i, line in enumerate(lines[4:4 + n_atoms]):
            sp = line.split()
            mol.add_node(i, symbol=sp[3])

        # Add bonds:
        for line in lines[4 + n_atoms:4 + n_atoms + n_bonds]:
            sp = line.split()
            # Subtract 1 because of zero indexing vs. one indexing
            mol.add_edge(int(sp[0]) - 1, int(sp[1]) - 1)

        return mol

[docs]    @classmethod
    def from_mol3d(cls, mol3d: Mol3D):
        if len(mol3d.graph) == 0:
            raise ValueError("Mol3D object does not have molecular graph attached.")

        mol = cls()

        for i, atom in enumerate(mol3d.atoms):
            mol.add_node(i, symbol=atom.sym)

        bonds = ((int(e[0]), int(e[1])) for e in zip(*mol3d.graph.nonzero()))
        mol.add_edges_from(bonds)
        return mol

[docs]    def graph_hash(self) -> str:
        """Calculates the node attributed graph hash of the molecule.

        Returns
        -------
        str
            node attributed graph hash

        Examples
        --------
        Create a furan molecule from SMILES:

        >>> mol = Mol2D.from_smiles("o1cccc1")

        and calculate the node attributed graph hash:

        >>> mol.graph_hash()
        'f6090276d7369c0c0a535113ec1d97cf'
        """
        # This is necessary because networkx < 2.7 had a bug in the implementation
        # of weisfeiler_lehman_graph_hash
        # https://github.com/networkx/networkx/pull/4946#issuecomment-914623654
        assert version.parse(nx.__version__) >= version.parse("2.7")
        return nx.weisfeiler_lehman_graph_hash(self, node_attr="symbol")

[docs]    def graph_hash_edge_attr(self) -> str:
        """Calculates the edge attributed graph hash of the molecule.

        Returns
        -------
        str
            edge attributed graph hash

        Examples
        --------
        Create a furan molecule from SMILES:

        >>> mol = Mol2D.from_smiles("o1cccc1")

        and calculate the edge attributed graph hash:

        >>> mol.graph_hash_edge_attr()
        'a9b6fbc7b5f53613546d5e91a7544ed6'
        """
        # This is necessary because networkx < 2.7 had a bug in the implementation
        # of weisfeiler_lehman_graph_hash
        # https://github.com/networkx/networkx/pull/4946#issuecomment-914623654
        assert version.parse(nx.__version__) >= version.parse("2.7")
        # Copy orginal graph before adding edge attributes
        G = self.copy()

        for i, j in G.edges:
            G.edges[i, j]["label"] = "".join(sorted([G.nodes[i]["symbol"],
                                                     G.nodes[j]["symbol"]]))

        return nx.weisfeiler_lehman_graph_hash(G, edge_attr="label")

[docs]    def graph_determinant(self, return_string: bool = True) -> Union[str, float]:
        """Calculates the molecular graph determinant.

        Parameters
        ----------
        return_string : bool, optional
            Flag to return the determinant as a string. Default is True.

        Returns
        -------
        str | float
            graph determinant

        Examples
        --------
        Create a furan molecule from SMILES:

        >>> mol = Mol2D.from_smiles("o1cccc1")

        and calculate the graph determinant:

        >>> mol.graph_determinant()
        '-19404698740'
        """
        atomic_masses = globalvars().amass()
        weights = np.array(
            [
                atomic_masses[sym][0]
                for _, sym in self.nodes(data="symbol", default="X")
            ]
        )
        adjacency = nx.to_numpy_array(self)
        mat = np.outer(weights, weights) * adjacency
        np.fill_diagonal(mat, weights)
        with np.errstate(over='raise'):
            try:
                det = np.linalg.det(mat)
            except (np.linalg.LinAlgError, FloatingPointError):
                (sign, det) = np.linalg.slogdet(mat)
                if sign != 0:
                    det = sign*det
        if return_string:
            det = str(det)
            if "e+" in det:
                sp = det.split("e+")
                det = sp[0][0:12] + "e+" + sp[1]
            else:
                det = det[0:12]
        return det

[docs]    def find_metal(self, transition_metals_only: bool = True) -> List[int]:
        """
        Find indices of metal(s) in a Mol2D class.

        Parameters
        ----------
            transition_metals_only : bool, optional
                Only find transition metals. Default is true.

        Returns
        -------
            metal_list : list
                List of indices of metal atoms in Mol2D.

        Examples
        --------
        Build Vanadyl acetylacetonate from SMILES:

        >>> mol = Mol2D.from_smiles("CC(=[O+]1)C=C(C)O[V-3]12(#[O+])OC(C)=CC(C)=[O+]2")
        >>> mol.find_metal()
        [7]
        """
        globs = globalvars()

        metal_list = []
        for i, sym in self.nodes.data(data="symbol", default="X"):
            if sym in globs.metalslist(transition_metals_only=transition_metals_only):
                metal_list.append(i)
        return metal_list