Package rdkit :: Package Chem :: Module MolSurf
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolSurf

  1  # $Id: MolSurf.py 1673 2011-03-21 05:02:12Z glandrum $ 
  2  # 
  3  # Copyright (C) 2001-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Exposes functionality for MOE-like approximate molecular surface area 
 12  descriptors. 
 13   
 14    The MOE-like VSA descriptors are also calculated here 
 15   
 16  """ 
 17  from rdkit import Chem 
 18  from rdkit.Chem.PeriodicTable import numTable 
 19  from rdkit.Chem import Crippen 
 20  from rdkit.Chem import rdPartialCharges,rdMolDescriptors 
 21  import numpy 
 22  import bisect 
 23  radCol = 5 
 24  bondScaleFacts = [ .1,0,.2,.3] # aromatic,single,double,triple 
 25   
26 -def _LabuteHelper(mol,includeHs=1,force=0):
27 """ *Internal Use Only* 28 helper function for LabuteASA calculation 29 returns an array of atomic contributions to the ASA 30 31 **Note:** Changes here affect the version numbers of all ASA descriptors 32 33 """ 34 if not force: 35 try: 36 res = mol._labuteContribs 37 except AttributeError: 38 pass 39 else: 40 if res: 41 return res 42 tpl = rdMolDescriptors._CalcLabuteASAContribs(mol,includeHs) 43 ats,hs=tpl 44 Vi=[hs]+list(ats) 45 mol._labuteContribs=Vi 46 return Vi
47 -def _pyLabuteHelper(mol,includeHs=1,force=0):
48 """ *Internal Use Only* 49 helper function for LabuteASA calculation 50 returns an array of atomic contributions to the ASA 51 52 **Note:** Changes here affect the version numbers of all ASA descriptors 53 54 """ 55 import math 56 if not force: 57 try: 58 res = mol._labuteContribs 59 except AttributeError: 60 pass 61 else: 62 if res.all(): 63 return res 64 65 nAts = mol.GetNumAtoms() 66 Vi = numpy.zeros(nAts+1,'d') 67 rads = numpy.zeros(nAts+1,'d') 68 69 # 0 contains the H information 70 rads[0] = numTable[1][radCol] 71 for i in xrange(nAts): 72 rads[i+1] = numTable[mol.GetAtomWithIdx(i).GetAtomicNum()][radCol] 73 74 # start with explicit bonds 75 for bond in mol.GetBonds(): 76 idx1 = bond.GetBeginAtomIdx()+1 77 idx2 = bond.GetEndAtomIdx()+1 78 Ri = rads[idx1] 79 Rj = rads[idx2] 80 81 if not bond.GetIsAromatic(): 82 bij = Ri+Rj - bondScaleFacts[bond.GetBondType()] 83 else: 84 bij = Ri+Rj - bondScaleFacts[0] 85 dij = min( max( abs(Ri-Rj), bij), Ri+Rj) 86 Vi[idx1] += Rj*Rj - (Ri-dij)**2 / dij 87 Vi[idx2] += Ri*Ri - (Rj-dij)**2 / dij 88 89 # add in hydrogens 90 if includeHs: 91 j = 0 92 Rj = rads[j] 93 for i in xrange(1,nAts+1): 94 Ri = rads[i] 95 bij = Ri+Rj 96 dij = min( max( abs(Ri-Rj), bij), Ri+Rj) 97 Vi[i] += Rj*Rj - (Ri-dij)**2 / dij 98 Vi[j] += Ri*Ri - (Rj-dij)**2 / dij 99 100 for i in xrange(nAts+1): 101 Ri = rads[i] 102 Vi[i] = 4*math.pi * Ri**2 - math.pi * Ri * Vi[i] 103 104 mol._labuteContribs=Vi 105 return Vi
106 107 #def SMR_VSA(mol,bins=[0.11,0.26,0.35,0.39,0.44,0.485,0.56]): 108 # original default bins from assuming Labute values are logs 109 # mrBins=[1.29, 1.82, 2.24, 2.45, 2.75, 3.05, 3.63] 110 mrBins=[1.29, 1.82, 2.24, 2.45, 2.75, 3.05, 3.63,3.8,4.0]
111 -def pySMR_VSA_(mol,bins=None,force=1):
112 """ *Internal Use Only* 113 """ 114 if not force: 115 try: 116 res = mol._smrVSA 117 except AttributeError: 118 pass 119 else: 120 if res.all(): 121 return res 122 123 if bins is None: bins = mrBins 124 Crippen._Init() 125 propContribs = Crippen._GetAtomContribs(mol,force=force) 126 volContribs = _LabuteHelper(mol) 127 128 ans = numpy.zeros(len(bins)+1,'d') 129 for i in range(len(propContribs)): 130 prop = propContribs[i] 131 vol = volContribs[i+1] 132 if prop is not None: 133 bin = bisect.bisect_right(bins,prop[1]) 134 ans[bin] += vol 135 136 mol._smrVSA=ans 137 return ans
138 SMR_VSA_ = rdMolDescriptors.SMR_VSA_ 139 140 # 141 # Original bins (from Labute paper) are: 142 # [-0.4,-0.2,0,0.1,0.15,0.2,0.25,0.3,0.4] 143 # 144 logpBins=[-0.4,-0.2,0,0.1,0.15,0.2,0.25,0.3,0.4,0.5,0.6]
145 -def pySlogP_VSA_(mol,bins=None,force=1):
146 """ *Internal Use Only* 147 """ 148 if not force: 149 try: 150 res = mol._slogpVSA 151 except AttributeError: 152 pass 153 else: 154 if res.all(): 155 return res 156 157 if bins is None: bins = logpBins 158 Crippen._Init() 159 propContribs = Crippen._GetAtomContribs(mol,force=force) 160 volContribs = _LabuteHelper(mol) 161 162 ans = numpy.zeros(len(bins)+1,'d') 163 for i in range(len(propContribs)): 164 prop = propContribs[i] 165 vol = volContribs[i+1] 166 if prop is not None: 167 bin = bisect.bisect_right(bins,prop[0]) 168 ans[bin] += vol 169 170 mol._slogpVSA=ans 171 return ans
172 SlogP_VSA_ = rdMolDescriptors.SlogP_VSA_ 173 174 chgBins=[-.3,-.25,-.20,-.15,-.10,-.05,0,.05,.10,.15,.20,.25,.30]
175 -def pyPEOE_VSA_(mol,bins=None,force=1):
176 """ *Internal Use Only* 177 """ 178 if not force: 179 try: 180 res = mol._peoeVSA 181 except AttributeError: 182 pass 183 else: 184 if res.all(): 185 return res 186 if bins is None: bins = chgBins 187 Crippen._Init() 188 #print '\ts:',repr(mol.GetMol()) 189 #print '\t\t:',len(mol.GetAtoms()) 190 rdPartialCharges.ComputeGasteigerCharges(mol) 191 192 #propContribs = [float(x.GetProp('_GasteigerCharge')) for x in mol.GetAtoms()] 193 propContribs=[] 194 for at in mol.GetAtoms(): 195 p = at.GetProp('_GasteigerCharge') 196 try: 197 v = float(p) 198 except ValueError: 199 v = 0.0 200 propContribs.append(v) 201 #print '\tp',propContribs 202 volContribs = _LabuteHelper(mol) 203 #print '\tv',volContribs 204 205 ans = numpy.zeros(len(bins)+1,'d') 206 for i in range(len(propContribs)): 207 prop = propContribs[i] 208 vol = volContribs[i+1] 209 if prop is not None: 210 bin = bisect.bisect_right(bins,prop) 211 ans[bin] += vol 212 213 mol._peoeVSA=ans 214 return ans
215 PEOE_VSA_=rdMolDescriptors.PEOE_VSA_ 216 217 #------------------------------------------------- 218 # install the various VSA descriptors in the namespace 219 # (this is used by AvailDescriptors)
220 -def _InstallDescriptors():
221 for i in range(len(mrBins)): 222 fn = lambda x,y=i:SMR_VSA_(x,force=0)[y] 223 if i > 0: 224 fn.__doc__="MOE MR VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,mrBins[i-1],mrBins[i]) 225 else: 226 fn.__doc__="MOE MR VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,mrBins[i]) 227 name="SMR_VSA%d"%(i+1) 228 fn.version="1.0.1" 229 globals()[name]=fn 230 i+=1 231 fn = lambda x,y=i:SMR_VSA_(x,force=0)[y] 232 fn.__doc__="MOE MR VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,mrBins[i-1]) 233 fn.version="1.0.1" 234 name="SMR_VSA%d"%(i+1) 235 globals()[name]=fn 236 237 for i in range(len(logpBins)): 238 fn = lambda x,y=i:SlogP_VSA_(x,force=0)[y] 239 if i > 0: 240 fn.__doc__="MOE logP VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,logpBins[i-1], 241 logpBins[i]) 242 else: 243 fn.__doc__="MOE logP VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,logpBins[i]) 244 name="SlogP_VSA%d"%(i+1) 245 fn.version="1.0.1" 246 globals()[name]=fn 247 i+=1 248 fn = lambda x,y=i:SlogP_VSA_(x,force=0)[y] 249 fn.__doc__="MOE logP VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,logpBins[i-1]) 250 fn.version="1.0.1" 251 name="SlogP_VSA%d"%(i+1) 252 globals()[name]=fn 253 254 for i in range(len(chgBins)): 255 fn = lambda x,y=i:PEOE_VSA_(x,force=0)[y] 256 if i > 0: 257 fn.__doc__="MOE Charge VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,chgBins[i-1], 258 chgBins[i]) 259 else: 260 fn.__doc__="MOE Chage VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,chgBins[i]) 261 name="PEOE_VSA%d"%(i+1) 262 fn.version="1.0.1" 263 globals()[name]=fn 264 i+=1 265 fn = lambda x,y=i:PEOE_VSA_(x,force=0)[y] 266 fn.version="1.0.1" 267 fn.__doc__="MOE Charge VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,chgBins[i-1]) 268 name="PEOE_VSA%d"%(i+1) 269 globals()[name]=fn 270 fn = None
271 # Change log for the MOE-type descriptors: 272 # version 1.0.1: optimizations, values unaffected 273 _InstallDescriptors() 274
275 -def pyLabuteASA(mol,includeHs=1):
276 """ calculates Labute's Approximate Surface Area (ASA from MOE) 277 278 Definition from P. Labute's article in the Journal of the Chemical Computing Group 279 and J. Mol. Graph. Mod. _18_ 464-477 (2000) 280 281 """ 282 Vi = _LabuteHelper(mol,includeHs=includeHs) 283 return sum(Vi)
284 pyLabuteASA.version="1.0.1" 285 # Change log for LabuteASA: 286 # version 1.0.1: optimizations, values unaffected 287 LabuteASA=lambda *x,**y:rdMolDescriptors.CalcLabuteASA(*x,**y) 288 LabuteASA.version=rdMolDescriptors._CalcLabuteASA_version 289 290
291 -def _pyTPSAContribs(mol,verbose=False):
292 """ DEPRECATED: this has been reimplmented in C++ 293 calculates atomic contributions to a molecules TPSA 294 295 Algorithm described in: 296 P. Ertl, B. Rohde, P. Selzer 297 Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based 298 Contributions and Its Application to the Prediction of Drug Transport 299 Properties, J.Med.Chem. 43, 3714-3717, 2000 300 301 Implementation based on the Daylight contrib program tpsa.c 302 303 NOTE: The JMC paper describing the TPSA algorithm includes 304 contributions from sulfur and phosphorus, however according to 305 Peter Ertl (personal communication, 2010) the correlation of TPSA 306 with various ADME properties is better if only contributions from 307 oxygen and nitrogen are used. This matches the daylight contrib 308 implementation. 309 310 """ 311 res = [0]*mol.GetNumAtoms() 312 for i in range(mol.GetNumAtoms()): 313 atom = mol.GetAtomWithIdx(i) 314 atNum = atom.GetAtomicNum() 315 if atNum in [7,8]: 316 #nHs = atom.GetImplicitValence()-atom.GetHvyValence() 317 nHs = atom.GetTotalNumHs() 318 chg = atom.GetFormalCharge() 319 isArom = atom.GetIsAromatic() 320 in3Ring = atom.IsInRingSize(3) 321 322 bonds = atom.GetBonds() 323 numNeighbors = atom.GetDegree() 324 nSing = 0 325 nDoub = 0 326 nTrip = 0 327 nArom = 0 328 for bond in bonds: 329 otherAt = bond.GetOtherAtom(atom) 330 if otherAt.GetAtomicNum()!=1: 331 if bond.GetIsAromatic(): 332 nArom += 1 333 else: 334 ord = bond.GetBondType() 335 if ord == Chem.BondType.SINGLE: 336 nSing += 1 337 elif ord == Chem.BondType.DOUBLE: 338 nDoub += 1 339 elif ord == Chem.BondType.TRIPLE: 340 nTrip += 1 341 else: 342 numNeighbors-=1 343 nHs += 1 344 tmp = -1 345 if atNum == 7: 346 if numNeighbors == 1: 347 if nHs==0 and nTrip==1 and chg==0: tmp = 23.79 348 elif nHs==1 and nDoub==1 and chg==0: tmp = 23.85 349 elif nHs==2 and nSing==1 and chg==0: tmp = 26.02 350 elif nHs==2 and nDoub==1 and chg==1: tmp = 25.59 351 elif nHs==3 and nSing==1 and chg==1: tmp = 27.64 352 elif numNeighbors == 2: 353 if nHs==0 and nSing==1 and nDoub==1 and chg==0: tmp = 12.36 354 elif nHs==0 and nTrip==1 and nDoub==1 and chg==0: tmp = 13.60 355 elif nHs==1 and nSing==2 and chg==0: 356 if not in3Ring: tmp = 12.03 357 else: tmp=21.94 358 elif nHs==0 and nTrip==1 and nSing==1 and chg==1: tmp = 4.36 359 elif nHs==1 and nDoub==1 and nSing==1 and chg==1: tmp = 13.97 360 elif nHs==2 and nSing==2 and chg==1: tmp = 16.61 361 elif nHs==0 and nArom==2 and chg==0: tmp = 12.89 362 elif nHs==1 and nArom==2 and chg==0: tmp = 15.79 363 elif nHs==1 and nArom==2 and chg==1: tmp = 14.14 364 elif numNeighbors == 3: 365 if nHs==0 and nSing==3 and chg==0: 366 if not in3Ring: tmp = 3.24 367 else: tmp = 3.01 368 elif nHs==0 and nSing==1 and nDoub==2 and chg==0: tmp = 11.68 369 elif nHs==0 and nSing==2 and nDoub==1 and chg==1: tmp = 3.01 370 elif nHs==1 and nSing==3 and chg==1: tmp = 4.44 371 elif nHs==0 and nArom==3 and chg==0: tmp = 4.41 372 elif nHs==0 and nSing==1 and nArom==2 and chg==0: tmp = 4.93 373 elif nHs==0 and nDoub==1 and nArom==2 and chg==0: tmp = 8.39 374 elif nHs==0 and nArom==3 and chg==1: tmp = 4.10 375 elif nHs==0 and nSing==1 and nArom==2 and chg==1: tmp = 3.88 376 elif numNeighbors == 4: 377 if nHs==0 and nSing==4 and chg==1: tmp = 0.00 378 if tmp < 0.0: 379 tmp = 30.5 - numNeighbors * 8.2 + nHs * 1.5 380 if tmp < 0.0: tmp = 0.0 381 elif atNum==8: 382 #print nHs,nSing,chg 383 if numNeighbors == 1: 384 if nHs==0 and nDoub==1 and chg==0: tmp = 17.07 385 elif nHs==1 and nSing==1 and chg==0: tmp = 20.23 386 elif nHs==0 and nSing==1 and chg==-1: tmp = 23.06 387 elif numNeighbors == 2: 388 if nHs==0 and nSing==2 and chg==0: 389 if not in3Ring: tmp = 9.23 390 else: tmp = 12.53 391 elif nHs==0 and nArom==2 and chg==0: tmp = 13.14 392 393 if tmp < 0.0: 394 tmp = 28.5 - numNeighbors * 8.6 + nHs * 1.5 395 if tmp < 0.0: tmp = 0.0 396 if verbose: 397 print '\t',atom.GetIdx(),atom.GetSymbol(),atNum,nHs,nSing,nDoub,nTrip,nArom,chg,tmp 398 399 res[atom.GetIdx()] = tmp 400 return res
401
402 -def _pyTPSA(mol,verbose=False):
403 """ DEPRECATED: this has been reimplmented in C++ 404 calculates the polar surface area of a molecule based upon fragments 405 406 Algorithm in: 407 P. Ertl, B. Rohde, P. Selzer 408 Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based 409 Contributions and Its Application to the Prediction of Drug Transport 410 Properties, J.Med.Chem. 43, 3714-3717, 2000 411 412 Implementation based on the Daylight contrib program tpsa.c 413 """ 414 contribs = _pyTPSAContribs(mol,verbose=verbose) 415 res = 0.0 416 for contrib in contribs: 417 res += contrib 418 return res
419 _pyTPSA.version="1.0.1" 420 421 TPSA=lambda *x,**y:rdMolDescriptors.CalcTPSA(*x,**y) 422 TPSA.version=rdMolDescriptors._CalcTPSA_version 423 424 425 if __name__ == '__main__': 426 smis = ['C','CC','CCC','CCCC','CO','CCO','COC'] 427 smis = ['C(=O)O','c1ccccc1'] 428 for smi in smis: 429 m = Chem.MolFromSmiles(smi) 430 #print smi, LabuteASA(m); 431 print '-----------\n',smi 432 #print 'M:',['% 4.2f'%x for x in SMR_VSA_(m)] 433 #print 'L:',['% 4.2f'%x for x in SlogP_VSA_(m)] 434 print 'P:',['% 4.2f'%x for x in PEOE_VSA_(m)] 435 print 'P:',['% 4.2f'%x for x in PEOE_VSA_(m)] 436 print 437