Package rdkit :: Package Chem :: Module SATIS
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SATIS

  1  # $Id: SATIS.py 1528 2010-09-26 17:04:37Z glandrum $ 
  2  # 
  3  # Copyright (C) 2001-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """  Functionality for SATIS typing atoms 
 12   
 13  """ 
 14  from rdkit import Chem 
 15   
 16  _debug = 0 
 17   
 18  # 
 19  #  These are SMARTS patterns for the special cases used in 
 20  #  SATIS typing. 
 21  # 
 22  aldehydePatt = Chem.MolFromSmarts('[CD2]=[OD1]') 
 23  ketonePatt = Chem.MolFromSmarts('[CD3]=[OD1]') 
 24  amidePatt = Chem.MolFromSmarts('[CD3](=[OD1])-[#7]') 
 25  esterPatt = Chem.MolFromSmarts('C(=[OD1])-O-[#6]') 
 26  carboxylatePatt = Chem.MolFromSmarts('C(=[OD1])-[OX1]') 
 27  carboxylPatt = Chem.MolFromSmarts('C(=[OD1])-[OX2]') 
 28   
 29  specialCases = ((carboxylatePatt,97), 
 30                  (esterPatt,96), 
 31                  (carboxylPatt,98), 
 32                  (amidePatt,95), 
 33                  (ketonePatt,94), 
 34                  (aldehydePatt,93)) 
 35   
 36   
37 -def SATISTypes(mol,neighborsToInclude=4):
38 """ returns SATIS codes for all atoms in a molecule 39 40 The SATIS definition used is from: 41 J. Chem. Inf. Comput. Sci. _39_ 751-757 (1999) 42 43 each SATIS code is a string consisting of _neighborsToInclude_ + 1 44 2 digit numbers 45 46 **Arguments** 47 48 - mol: a molecule 49 50 - neighborsToInclude (optional): the number of neighbors to include 51 in the SATIS codes 52 53 **Returns** 54 55 a list of strings nAtoms long 56 57 """ 58 global specialCases 59 60 nAtoms = mol.GetNumAtoms() 61 atomicNums = [0]*nAtoms 62 atoms = mol.GetAtoms() 63 for i in xrange(nAtoms): 64 atomicNums[i] = atoms[i].GetAtomicNum() 65 66 nSpecialCases = len(specialCases) 67 specialCaseMatches = [None]*nSpecialCases 68 for i,(patt,idx) in enumerate(specialCases): 69 if mol.HasSubstructMatch(patt): 70 specialCaseMatches[i] = mol.GetSubstructMatches(patt) 71 else: 72 specialCaseMatches[i] = () 73 74 codes = [None]*nAtoms 75 for i in range(nAtoms): 76 code = [99]*(neighborsToInclude+1) 77 atom = atoms[i] 78 atomIdx = atom.GetIdx() 79 code[0] = min(atom.GetAtomicNum(),99) 80 bonds = atom.GetBonds() 81 nBonds = len(bonds) 82 otherIndices = [-1]*nBonds 83 if _debug: print code[0], 84 for j in range(nBonds): 85 otherIndices[j] = bonds[j].GetOtherAtom(atom).GetIdx() 86 if _debug: print otherIndices[j], 87 if _debug: print 88 otherNums = [atomicNums[x] for x in otherIndices] + \ 89 [1]*atom.GetTotalNumHs() 90 otherNums.sort() 91 92 nOthers = len(otherNums) 93 if nOthers > neighborsToInclude: 94 otherNums.reverse() 95 otherNums = otherNums[:neighborsToInclude] 96 otherNums.reverse() 97 for j in range(neighborsToInclude): 98 code[j+1] = min(otherNums[j],99) 99 else: 100 for j in range(nOthers): 101 code[j+1] = min(otherNums[j],99) 102 if nOthers < neighborsToInclude and code[0] in [6,8]: 103 found = 0 104 for j in range(nSpecialCases): 105 for matchTuple in specialCaseMatches[j]: 106 if atomIdx in matchTuple: 107 code[-1] = specialCases[j][1] 108 found = 1 109 break 110 if found: 111 break 112 113 codes[i] = ''.join(['%02d'%(x) for x in code]) 114 return codes
115 116 if __name__ == '__main__': 117 smis = ['CC(=O)NC','CP(F)(Cl)(Br)(O)', 118 'O=CC(=O)C','C(=O)OCC(=O)O','C(=O)[O-]'] 119 for smi in smis: 120 print smi 121 m = Chem.MolFromSmiles(smi) 122 codes = SATISTypes(m) 123 print codes 124