import networkx as nx
import numpy as np
from typing import List, Union
from packaging import version
from molSimplify.Classes.globalvars import globalvars
from molSimplify.Classes.mol3D import mol3D as Mol3D
try:
from openbabel import openbabel # version 3 style import
except ImportError:
import openbabel # fallback to version 2
[docs]class Mol2D(nx.Graph):
def __repr__(self):
atom_counts = {}
for _, sym in self.nodes.data(data="symbol", default="X"):
if sym not in atom_counts:
atom_counts[sym] = 1
else:
atom_counts[sym] += 1
symbols = globalvars().elementsbynum()
formula = ""
for sym in sorted(atom_counts.keys(),
key=lambda x: symbols.index(x), reverse=True):
formula += f"{sym}{atom_counts[sym]}"
return f"Mol2D({formula})"
[docs] @classmethod
def from_smiles(cls, smiles: str):
"""Create a Mol2D object from a SMILES string.
Parameters
----------
smiles : str
SMILES representation of the molecule.
Returns
-------
Mol2D
Mol2D object of the molecule
Examples
--------
Create a furan molecule from SMILES:
>>> mol = Mol2D.from_smiles("o1cccc1")
>>> mol
Mol2D(O1C4H4)
"""
mol = cls()
# Load using openbabel OBMol
obConversion = openbabel.OBConversion()
OBMol = openbabel.OBMol()
obConversion.SetInFormat('smi')
obConversion.ReadString(OBMol, smiles)
OBMol.AddHydrogens()
symbols = globalvars().elementsbynum()
# Add atoms
for i, atom in enumerate(openbabel.OBMolAtomIter(OBMol)):
sym = symbols[atom.GetAtomicNum() - 1]
mol.add_node(i, symbol=sym)
# Add bonds
for bond in openbabel.OBMolBondIter(OBMol):
# Subtract 1 because of zero indexing vs. one indexing
mol.add_edge(bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1)
return mol
[docs] @classmethod
def from_mol2_file(cls, filename):
mol = cls()
with open(filename, "r") as fin:
lines = fin.readlines()
# Read counts line:
sp = lines[2].split()
n_atoms = int(sp[0])
n_bonds = int(sp[1])
atom_start = lines.index("@<TRIPOS>ATOM\n") + 1
for i, line in enumerate(lines[atom_start:atom_start + n_atoms]):
sp = line.split()
sym = sp[5].split(".")[0]
mol.add_node(i, symbol=sym)
bond_start = lines.index("@<TRIPOS>BOND\n") + 1
for line in lines[bond_start:bond_start + n_bonds]:
sp = line.split()
# Subtract 1 because of zero indexing vs. one indexing
mol.add_edge(int(sp[1]) - 1, int(sp[2]) - 1)
return mol
[docs] @classmethod
def from_mol_file(cls, filename):
mol = cls()
with open(filename, "r") as fin:
lines = fin.readlines()
# Read counts line:
sp = lines[3].split()
n_atoms = int(sp[0])
n_bonds = int(sp[1])
# Add atoms (offset of 4 for the header lines):
for i, line in enumerate(lines[4:4 + n_atoms]):
sp = line.split()
mol.add_node(i, symbol=sp[3])
# Add bonds:
for line in lines[4 + n_atoms:4 + n_atoms + n_bonds]:
sp = line.split()
# Subtract 1 because of zero indexing vs. one indexing
mol.add_edge(int(sp[0]) - 1, int(sp[1]) - 1)
return mol
[docs] @classmethod
def from_mol3d(cls, mol3d: Mol3D):
if len(mol3d.graph) == 0:
raise ValueError("Mol3D object does not have molecular graph attached.")
mol = cls()
for i, atom in enumerate(mol3d.atoms):
mol.add_node(i, symbol=atom.sym)
bonds = ((int(e[0]), int(e[1])) for e in zip(*mol3d.graph.nonzero()))
mol.add_edges_from(bonds)
return mol
[docs] def graph_hash(self) -> str:
"""Calculates the node attributed graph hash of the molecule.
Returns
-------
str
node attributed graph hash
Examples
--------
Create a furan molecule from SMILES:
>>> mol = Mol2D.from_smiles("o1cccc1")
and calculate the node attributed graph hash:
>>> mol.graph_hash()
'f6090276d7369c0c0a535113ec1d97cf'
"""
# This is necessary because networkx < 2.7 had a bug in the implementation
# of weisfeiler_lehman_graph_hash
# https://github.com/networkx/networkx/pull/4946#issuecomment-914623654
assert version.parse(nx.__version__) >= version.parse("2.7")
return nx.weisfeiler_lehman_graph_hash(self, node_attr="symbol")
[docs] def graph_hash_edge_attr(self) -> str:
"""Calculates the edge attributed graph hash of the molecule.
Returns
-------
str
edge attributed graph hash
Examples
--------
Create a furan molecule from SMILES:
>>> mol = Mol2D.from_smiles("o1cccc1")
and calculate the edge attributed graph hash:
>>> mol.graph_hash_edge_attr()
'a9b6fbc7b5f53613546d5e91a7544ed6'
"""
# This is necessary because networkx < 2.7 had a bug in the implementation
# of weisfeiler_lehman_graph_hash
# https://github.com/networkx/networkx/pull/4946#issuecomment-914623654
assert version.parse(nx.__version__) >= version.parse("2.7")
# Copy orginal graph before adding edge attributes
G = self.copy()
for i, j in G.edges:
G.edges[i, j]["label"] = "".join(sorted([G.nodes[i]["symbol"],
G.nodes[j]["symbol"]]))
return nx.weisfeiler_lehman_graph_hash(G, edge_attr="label")
[docs] def graph_determinant(self, return_string: bool = True) -> Union[str, float]:
"""Calculates the molecular graph determinant.
Parameters
----------
return_string : bool, optional
Flag to return the determinant as a string. Default is True.
Returns
-------
str | float
graph determinant
Examples
--------
Create a furan molecule from SMILES:
>>> mol = Mol2D.from_smiles("o1cccc1")
and calculate the graph determinant:
>>> mol.graph_determinant()
'-19404698740'
"""
atomic_masses = globalvars().amass()
weights = np.array(
[
atomic_masses[sym][0]
for _, sym in self.nodes(data="symbol", default="X")
]
)
adjacency = nx.to_numpy_array(self)
mat = np.outer(weights, weights) * adjacency
np.fill_diagonal(mat, weights)
with np.errstate(over='raise'):
try:
det = np.linalg.det(mat)
except (np.linalg.LinAlgError, FloatingPointError):
(sign, det) = np.linalg.slogdet(mat)
if sign != 0:
det = sign*det
if return_string:
det = str(det)
if "e+" in det:
sp = det.split("e+")
det = sp[0][0:12] + "e+" + sp[1]
else:
det = det[0:12]
return det