Package rdkit :: Package Chem :: Module Crippen
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Crippen

  1  # $Id: Crippen.py 1528 2010-09-26 17:04:37Z glandrum $ 
  2  # 
  3  # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Atom-based calculation of LogP and MR using Crippen's approach 
 12   
 13   
 14      Reference: 
 15        S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999) 
 16   
 17   
 18  """ 
 19  import os 
 20  from rdkit import RDConfig 
 21  from rdkit import Chem 
 22  from rdkit.Chem import rdMolDescriptors 
 23  import numpy 
 24   
 25  _smartsPatterns = {} 
 26  _patternOrder = [] 
 27  # this is the file containing the atom contributions 
 28  defaultPatternFileName = os.path.join(RDConfig.RDDataDir,'Crippen.txt') 
 29   
30 -def _ReadPatts(fileName):
31 """ *Internal Use Only* 32 33 parses the pattern list from the data file 34 35 """ 36 patts = {} 37 order = [] 38 for line in open(fileName,'r').xreadlines(): 39 if line[0] != '#': 40 splitLine = line.split('\t') 41 if len(splitLine)>=4 and splitLine[0] != '': 42 sma = splitLine[1] 43 if sma!='SMARTS': 44 sma.replace('"','') 45 try: 46 p = Chem.MolFromSmarts(sma) 47 except: 48 pass 49 else: 50 if p: 51 if len(splitLine[0])>1 and splitLine[0][1] not in 'S0123456789': 52 cha = splitLine[0][:2] 53 else: 54 cha = splitLine[0][0] 55 logP = float(splitLine[2]) 56 if splitLine[3] != '': 57 mr = float(splitLine[3]) 58 else: 59 mr = 0.0 60 if cha not in order: 61 order.append(cha) 62 l = patts.get(cha,[]) 63 l.append((sma,p,logP,mr)) 64 patts[cha] = l 65 else: 66 print 'Problems parsing smarts: %s'%(sma) 67 return order,patts
68 69 _GetAtomContribs=rdMolDescriptors._CalcCrippenContribs
70 -def _pyGetAtomContribs(mol,patts=None,order=None,verbose=0,force=0):
71 """ *Internal Use Only* 72 73 calculates atomic contributions to the LogP and MR values 74 75 if the argument *force* is not set, we'll use the molecules stored 76 _crippenContribs value when possible instead of re-calculating. 77 78 **Note:** Changes here affect the version numbers of MolLogP and MolMR 79 as well as the VSA descriptors in Chem.MolSurf 80 81 """ 82 if not force and hasattr(mol,'_crippenContribs'): 83 return mol._crippenContribs 84 85 if patts is None: 86 patts = _smartsPatterns 87 order = _patternOrder 88 89 nAtoms = mol.GetNumAtoms() 90 atomContribs = [(0.,0.)]*nAtoms 91 doneAtoms=[0]*nAtoms 92 nAtomsFound=0 93 done = False 94 for cha in order: 95 pattVect = patts[cha] 96 for sma,patt,logp,mr in pattVect: 97 #print 'try:',entry[0] 98 for match in mol.GetSubstructMatches(patt,False,False): 99 firstIdx = match[0] 100 if not doneAtoms[firstIdx]: 101 doneAtoms[firstIdx]=1 102 atomContribs[firstIdx] = (logp,mr) 103 if verbose: 104 print '\tAtom %d: %s %4.4f %4.4f'%(match[0],sma,logp,mr) 105 nAtomsFound+=1 106 if nAtomsFound>=nAtoms: 107 done=True 108 break 109 if done: break 110 mol._crippenContribs = atomContribs 111 return atomContribs
112
113 -def _Init():
114 global _smartsPatterns,_patternOrder 115 if _smartsPatterns == {}: 116 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName)
117
118 -def _pyMolLogP(inMol,patts=None,order=None,verbose=0,addHs=1):
119 """ DEPRECATED 120 """ 121 if addHs < 0: 122 mol = Chem.AddHs(inMol,1) 123 elif addHs > 0: 124 mol = Chem.AddHs(inMol,0) 125 else: 126 mol = inMol 127 128 if patts is None: 129 global _smartsPatterns,_patternOrder 130 if _smartsPatterns == {}: 131 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 132 patts = _smartsPatterns 133 order = _patternOrder 134 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 135 return numpy.sum(atomContribs,0)[0]
136 _pyMolLogP.version="1.1.0" 137
138 -def _pyMolMR(inMol,patts=None,order=None,verbose=0,addHs=1):
139 """ DEPRECATED 140 """ 141 if addHs < 0: 142 mol = Chem.AddHs(inMol,1) 143 elif addHs > 0: 144 mol = Chem.AddHs(inMol,0) 145 else: 146 mol = inMol 147 148 if patts is None: 149 global _smartsPatterns,_patternOrder 150 if _smartsPatterns == {}: 151 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 152 patts = _smartsPatterns 153 order = _patternOrder 154 155 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 156 return numpy.sum(atomContribs,0)[1]
157 _pyMolMR.version="1.1.0" 158 159 MolLogP=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[0] 160 MolLogP.version=rdMolDescriptors._CalcCrippenDescriptors_version 161 MolLogP.__doc__=""" Wildman-Crippen LogP value 162 163 Uses an atom-based scheme based on the values in the paper: 164 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 165 166 **Arguments** 167 168 - inMol: a molecule 169 170 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 171 If true, hydrogens will be added to the molecule and used in the calculation. 172 173 """ 174 175 MolMR=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[1] 176 MolMR.version=rdMolDescriptors._CalcCrippenDescriptors_version 177 MolMR.__doc__=""" Wildman-Crippen MR value 178 179 Uses an atom-based scheme based on the values in the paper: 180 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 181 182 **Arguments** 183 184 - inMol: a molecule 185 186 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 187 If true, hydrogens will be added to the molecule and used in the calculation. 188 189 """ 190 191 if __name__=='__main__': 192 import sys 193 194 if len(sys.argv): 195 ms = [] 196 verbose=0 197 if '-v' in sys.argv: 198 verbose=1 199 sys.argv.remove('-v') 200 for smi in sys.argv[1:]: 201 ms.append((smi,Chem.MolFromSmiles(smi))) 202 203 for smi,m in ms: 204 print 'Mol: %s'%(smi) 205 logp = MolLogP(m,verbose=verbose) 206 print '----' 207 mr = MolMR(m,verbose=verbose) 208 print 'Res:',logp,mr 209 newM = Chem.AddHs(m) 210 logp = MolLogP(newM,addHs=0) 211 mr = MolMR(newM,addHs=0) 212 print '\t',logp,mr 213 print '-*-*-*-*-*-*-*-*' 214