1
2
3
4
5
6
7
8
9
10
11 from rdkit import RDConfig
12
13 import sys,time,math
14 from rdkit.ML.Data import Stats
15 import rdkit.DistanceGeometry as DG
16 from rdkit import Chem
17 import numpy
18 from rdkit.Chem import rdDistGeom as MolDG
19 from rdkit.Chem import ChemicalFeatures
20 from rdkit.Chem import ChemicalForceFields
21 import Pharmacophore,ExcludedVolume
22 from rdkit import Geometry
23 _times = {}
24
25 from rdkit import RDLogger as logging
26 logger = logging.logger()
27 defaultFeatLength=2.0
28
30 """ returns a list of the heavy-atom neighbors of the
31 atom passed in:
32
33 >>> m = Chem.MolFromSmiles('CCO')
34 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(0))
35 >>> len(l)
36 1
37 >>> isinstance(l[0],Chem.Atom)
38 True
39 >>> l[0].GetIdx()
40 1
41
42 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(1))
43 >>> len(l)
44 2
45 >>> l[0].GetIdx()
46 0
47 >>> l[1].GetIdx()
48 2
49
50 """
51 res=[]
52 for nbr in atom.GetNeighbors():
53 if nbr.GetAtomicNum() != 1:
54 res.append(nbr)
55 return res
56
58 """ Adds an entry at the end of the bounds matrix for a point at
59 the center of a multi-point feature
60
61 returns a 2-tuple:
62 new bounds mat
63 index of point added
64
65 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]])
66 >>> match = [0,1,2]
67 >>> bm,idx = ReplaceGroup(match,boundsMat,slop=0.0)
68
69 the index is at the end:
70 >>> idx
71 3
72
73 and the matrix is one bigger:
74 >>> bm.shape
75 (4, 4)
76
77 but the original bounds mat is not altered:
78 >>> boundsMat.shape
79 (3, 3)
80
81
82 We make the assumption that the points of the
83 feature form a regular polygon, are listed in order
84 (i.e. pt 0 is a neighbor to pt 1 and pt N-1)
85 and that the replacement point goes at the center:
86 >>> print ', '.join(['%.3f'%x for x in bm[-1]])
87 0.577, 0.577, 0.577, 0.000
88 >>> print ', '.join(['%.3f'%x for x in bm[:,-1]])
89 1.155, 1.155, 1.155, 0.000
90
91 The slop argument (default = 0.01) is fractional:
92 >>> bm,idx = ReplaceGroup(match,boundsMat)
93 >>> print ', '.join(['%.3f'%x for x in bm[-1]])
94 0.572, 0.572, 0.572, 0.000
95 >>> print ', '.join(['%.3f'%x for x in bm[:,-1]])
96 1.166, 1.166, 1.166, 0.000
97
98
99 """
100 maxVal = -1000.0
101 minVal = 1e8
102 nPts = len(match)
103 for i in range(nPts):
104 idx0 = match[i]
105 if i<nPts-1:
106 idx1 = match[i+1]
107 else:
108 idx1 = match[0]
109 if idx1<idx0:
110 idx0,idx1 = idx1,idx0
111 minVal = min(minVal,bounds[idx1,idx0])
112 maxVal = max(maxVal,bounds[idx0,idx1])
113 maxVal *= (1+slop)
114 minVal *= (1-slop)
115
116 scaleFact = 1.0/(2.0*math.sin(math.pi/nPts))
117 minVal *= scaleFact
118 maxVal *= scaleFact
119
120 replaceIdx = bounds.shape[0]
121 if not useDirs:
122 bm = numpy.zeros((bounds.shape[0]+1,bounds.shape[1]+1),numpy.float)
123 else:
124 bm = numpy.zeros((bounds.shape[0]+2,bounds.shape[1]+2),numpy.float)
125 bm[0:bounds.shape[0],0:bounds.shape[1]]=bounds
126 bm[:replaceIdx,replaceIdx]=1000.
127
128 if useDirs:
129 bm[:replaceIdx+1,replaceIdx+1]=1000.
130
131 bm[replaceIdx,replaceIdx+1]=dirLength+slop
132 bm[replaceIdx+1,replaceIdx]=dirLength-slop
133
134 for idx1 in match:
135 bm[idx1,replaceIdx]=maxVal
136 bm[replaceIdx,idx1]=minVal
137 if useDirs:
138
139 bm[idx1,replaceIdx+1] = numpy.sqrt(bm[replaceIdx,replaceIdx+1]**2+maxVal**2)
140 bm[replaceIdx+1,idx1] = numpy.sqrt(bm[replaceIdx+1,replaceIdx]**2+minVal**2)
141 return bm,replaceIdx
142
143 -def EmbedMol(mol,bm,atomMatch=None,weight=2.0,randomSeed=-1,
144 excludedVolumes=None):
145 """ Generates an embedding for a molecule based on a bounds matrix and adds
146 a conformer (id 0) to the molecule
147
148 if the optional argument atomMatch is provided, it will be used to provide
149 supplemental weights for the embedding routine (used in the optimization
150 phase to ensure that the resulting geometry really does satisfy the
151 pharmacophore).
152
153 if the excludedVolumes is provided, it should be a sequence of
154 ExcludedVolume objects
155
156 >>> m = Chem.MolFromSmiles('c1ccccc1C')
157 >>> bounds = MolDG.GetMoleculeBoundsMatrix(m)
158 >>> bounds.shape
159 (7, 7)
160 >>> m.GetNumConformers()
161 0
162 >>> EmbedMol(m,bounds,randomSeed=23)
163 >>> m.GetNumConformers()
164 1
165
166
167 """
168 nAts = mol.GetNumAtoms()
169 weights=[]
170 if(atomMatch):
171 for i in range(len(atomMatch)):
172 for j in range(i+1,len(atomMatch)):
173 weights.append((i,j,weight))
174 if(excludedVolumes):
175 for vol in excludedVolumes:
176 idx = vol.index
177
178 for i in range(nAts):
179 weights.append((i,idx,weight))
180 coords = DG.EmbedBoundsMatrix(bm,weights=weights,numZeroFail=1,randomSeed=randomSeed)
181
182
183
184 conf = Chem.Conformer(nAts)
185 conf.SetId(0)
186 for i in range(nAts):
187 conf.SetAtomPosition(i,list(coords[i]))
188 if excludedVolumes:
189 for vol in excludedVolumes:
190 vol.pos = numpy.array(coords[vol.index])
191
192
193 mol.AddConformer(conf)
194
196 """ Adds a set of excluded volumes to the bounds matrix
197 and returns the new matrix
198
199 excludedVolumes is a list of ExcludedVolume objects
200
201
202 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]])
203 >>> ev1 = ExcludedVolume.ExcludedVolume(([(0,),0.5,1.0],),exclusionDist=1.5)
204 >>> bm = AddExcludedVolumes(boundsMat,(ev1,))
205
206 the results matrix is one bigger:
207 >>> bm.shape
208 (4, 4)
209
210 and the original bounds mat is not altered:
211 >>> boundsMat.shape
212 (3, 3)
213
214 >>> print ', '.join(['%.3f'%x for x in bm[-1]])
215 0.500, 1.500, 1.500, 0.000
216 >>> print ', '.join(['%.3f'%x for x in bm[:,-1]])
217 1.000, 3.000, 3.000, 0.000
218
219 """
220 oDim = bm.shape[0]
221 dim = oDim+len(excludedVolumes)
222 res = numpy.zeros((dim,dim),numpy.float)
223 res[:oDim,:oDim] = bm
224 for i,vol in enumerate(excludedVolumes):
225 bmIdx = oDim+i
226 vol.index = bmIdx
227
228
229 res[bmIdx,:bmIdx] = vol.exclusionDist
230 res[:bmIdx,bmIdx] = 1000.0
231
232
233 for indices,minV,maxV in vol.featInfo:
234 for index in indices:
235 try:
236 res[bmIdx,index] = minV
237 res[index,bmIdx] = maxV
238 except IndexError:
239 logger.error('BAD INDEX: res[%d,%d], shape is %s'%(bmIdx,index,str(res.shape)))
240 raise IndexError
241
242
243 for j in range(bmIdx+1,dim):
244 res[bmIdx,j:dim] = 0
245 res[j:dim,bmIdx] = 1000
246
247 if smoothIt: DG.DoTriangleSmoothing(res)
248 return res
249
253 """ loops over a distance bounds matrix and replaces the elements
254 that are altered by a pharmacophore
255
256 **NOTE** this returns the resulting bounds matrix, but it may also
257 alter the input matrix
258
259 atomMatch is a sequence of sequences containing atom indices
260 for each of the pharmacophore's features.
261
262 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
263 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
264 ... ]
265 >>> pcophore=Pharmacophore.Pharmacophore(feats)
266 >>> pcophore.setLowerBound(0,1, 1.0)
267 >>> pcophore.setUpperBound(0,1, 2.0)
268
269 >>> boundsMat = numpy.array([[0.0,3.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
270 >>> atomMatch = ((0,),(1,))
271 >>> bm = UpdatePharmacophoreBounds(boundsMat,atomMatch,pcophore)
272
273
274 In this case, there are no multi-atom features, so the result matrix
275 is the same as the input:
276 >>> bm is boundsMat
277 True
278
279 this means, of course, that the input boundsMat is altered:
280 >>> print ', '.join(['%.3f'%x for x in boundsMat[0]])
281 0.000, 2.000, 3.000
282 >>> print ', '.join(['%.3f'%x for x in boundsMat[1]])
283 1.000, 0.000, 3.000
284 >>> print ', '.join(['%.3f'%x for x in boundsMat[2]])
285 2.000, 2.000, 0.000
286
287 """
288 replaceMap = {}
289 for i,matchI in enumerate(atomMatch):
290 if len(matchI)>1:
291 bm,replaceIdx = ReplaceGroup(matchI,bm,useDirs=useDirs)
292 replaceMap[i] = replaceIdx
293
294 for i,matchI in enumerate(atomMatch):
295 mi = replaceMap.get(i,matchI[0])
296 for j in range(i+1,len(atomMatch)):
297 mj = replaceMap.get(j,atomMatch[j][0])
298 if mi<mj:
299 idx0,idx1 = mi,mj
300 else:
301 idx0,idx1 = mj,mi
302 bm[idx0,idx1] = pcophore.getUpperBound(i,j)
303 bm[idx1,idx0] = pcophore.getLowerBound(i,j)
304
305 return bm
306
307
308
309 -def EmbedPharmacophore(mol,atomMatch,pcophore,randomSeed=-1,count=10,smoothFirst=True,
310 silent=False,bounds=None,excludedVolumes=None,targetNumber=-1,
311 useDirs=False):
312 """ Generates one or more embeddings for a molecule that satisfy a pharmacophore
313
314 atomMatch is a sequence of sequences containing atom indices
315 for each of the pharmacophore's features.
316
317 - count: is the maximum number of attempts to make a generating an embedding
318 - smoothFirst: toggles triangle smoothing of the molecular bounds matix
319 - bounds: if provided, should be the molecular bounds matrix. If this isn't
320 provided, the matrix will be generated.
321 - targetNumber: if this number is positive, it provides a maximum number
322 of embeddings to generate (i.e. we'll have count attempts to generate
323 targetNumber embeddings).
324
325 returns: a 3 tuple:
326 1) the molecular bounds matrix adjusted for the pharmacophore
327 2) a list of embeddings (molecules with a single conformer)
328 3) the number of failed attempts at embedding
329
330
331 >>> m = Chem.MolFromSmiles('OCCN')
332 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
333 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
334 ... ]
335 >>> pcophore=Pharmacophore.Pharmacophore(feats)
336 >>> pcophore.setLowerBound(0,1, 2.5)
337 >>> pcophore.setUpperBound(0,1, 3.5)
338 >>> atomMatch = ((0,),(3,))
339
340 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
341 >>> len(embeds)
342 10
343 >>> nFail
344 0
345
346 Set up a case that can't succeed:
347 >>> pcophore=Pharmacophore.Pharmacophore(feats)
348 >>> pcophore.setLowerBound(0,1, 2.0)
349 >>> pcophore.setUpperBound(0,1, 2.1)
350 >>> atomMatch = ((0,),(3,))
351
352 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
353 >>> len(embeds)
354 0
355 >>> nFail
356 10
357
358 """
359 global _times
360 if not hasattr(mol,'_chiralCenters'):
361 mol._chiralCenters = Chem.FindMolChiralCenters(mol)
362
363 if bounds is None:
364 bounds = MolDG.GetMoleculeBoundsMatrix(mol)
365 if smoothFirst: DG.DoTriangleSmoothing(bounds)
366
367 bm = bounds.copy()
368
369
370
371
372
373
374 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore,useDirs=useDirs,mol=mol)
375
376 if excludedVolumes:
377 bm = AddExcludedVolumes(bm,excludedVolumes,smoothIt=False)
378
379 if not DG.DoTriangleSmoothing(bm):
380 raise ValueError,"could not smooth bounds matrix"
381
382
383
384
385
386
387
388
389 if targetNumber<=0:
390 targetNumber=count
391 nFailed = 0
392 res = []
393 for i in range(count):
394 tmpM = bm[:,:]
395 m2 = Chem.Mol(mol.ToBinary())
396 t1 = time.time()
397 try:
398 if randomSeed<=0:
399 seed = i*10+1
400 else:
401 seed = i*10+randomSeed
402 EmbedMol(m2,tmpM,atomMatch,randomSeed=seed,
403 excludedVolumes=excludedVolumes)
404 except ValueError:
405 if not silent:
406 logger.info('Embed failed')
407 nFailed += 1
408 else:
409 t2 = time.time()
410 _times['embed'] = _times.get('embed',0)+t2-t1
411 keepIt=True
412 for idx,stereo in mol._chiralCenters:
413 if stereo in ('R','S'):
414 vol = ComputeChiralVolume(m2,idx)
415 if (stereo=='R' and vol>=0) or \
416 (stereo=='S' and vol<=0):
417 keepIt=False
418 break
419 if keepIt:
420 res.append(m2)
421 else:
422 logger.debug('Removed embedding due to chiral constraints.')
423 if len(res)==targetNumber: break
424 return bm,res,nFailed
425
427 """ provides an OS independent way of detecting NaNs
428 This is intended to be used with values returned from the C++
429 side of things.
430
431 We can't actually test this from Python (which traps
432 zero division errors), but it would work something like
433 this if we could:
434 >>> isNaN(0)
435 False
436
437 #>>> isNan(1/0)
438 #True
439
440 """
441 if v!=v and sys.platform=='win32':
442 return True
443 elif v==0 and v==1 and sys.platform!='win32':
444 return True
445 return False
446
447
448 -def OptimizeMol(mol,bm,atomMatches=None,excludedVolumes=None,
449 forceConstant=1200.0,
450 maxPasses=5,verbose=False):
451 """ carries out a UFF optimization for a molecule optionally subject
452 to the constraints in a bounds matrix
453
454 - atomMatches, if provided, is a sequence of sequences
455 - forceConstant is the force constant of the spring used to enforce
456 the constraints
457
458 returns a 2-tuple:
459 1) the energy of the initial conformation
460 2) the energy post-embedding
461 NOTE that these energies include the energies of the constraints
462
463
464
465 >>> m = Chem.MolFromSmiles('OCCN')
466 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
467 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
468 ... ]
469 >>> pcophore=Pharmacophore.Pharmacophore(feats)
470 >>> pcophore.setLowerBound(0,1, 2.5)
471 >>> pcophore.setUpperBound(0,1, 2.8)
472 >>> atomMatch = ((0,),(3,))
473 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
474 >>> len(embeds)
475 10
476 >>> testM = embeds[0]
477
478 Do the optimization:
479 >>> e1,e2 = OptimizeMol(testM,bm,atomMatches=atomMatch)
480
481 Optimizing should have lowered the energy:
482 >>> e2 < e1
483 True
484
485 Check the constrained distance:
486 >>> conf = testM.GetConformer(0)
487 >>> p0 = conf.GetAtomPosition(0)
488 >>> p3 = conf.GetAtomPosition(3)
489 >>> d03 = p0.Distance(p3)
490 >>> d03 >= pcophore.getLowerBound(0,1)-.01
491 True
492 >>> d03 <= pcophore.getUpperBound(0,1)+.01
493 True
494
495 If we optimize without the distance constraints (provided via the atomMatches
496 argument) we're not guaranteed to get the same results, particularly in a case
497 like the current one where the pharmcophore brings the atoms uncomfortably
498 close together:
499 >>> testM = embeds[1]
500 >>> e1,e2 = OptimizeMol(testM,bm)
501 >>> e2 < e1
502 True
503 >>> conf = testM.GetConformer(0)
504 >>> p0 = conf.GetAtomPosition(0)
505 >>> p3 = conf.GetAtomPosition(3)
506 >>> d03 = p0.Distance(p3)
507 >>> d03 >= pcophore.getLowerBound(0,1)-.01
508 True
509 >>> d03 <= pcophore.getUpperBound(0,1)+.01
510 False
511
512 """
513 try:
514 ff = ChemicalForceFields.UFFGetMoleculeForceField(mol)
515 except:
516 logger.info('Problems building molecular forcefield',exc_info=True)
517 return -1.0,-1.0
518
519 weights=[]
520 if(atomMatches):
521 for k in range(len(atomMatches)):
522 for i in atomMatches[k]:
523 for l in range(k+1,len(atomMatches)):
524 for j in atomMatches[l]:
525 weights.append((i,j))
526 for i,j in weights:
527 if j<i:
528 i,j = j,i
529 minV = bm[j,i]
530 maxV = bm[i,j]
531 ff.AddDistanceConstraint(i,j,minV,maxV,forceConstant)
532 if excludedVolumes:
533 nAts = mol.GetNumAtoms()
534 conf = mol.GetConformer()
535 idx = nAts
536 for exVol in excludedVolumes:
537 assert exVol.pos is not None
538 logger.debug('ff.AddExtraPoint(%.4f,%.4f,%.4f)'%(exVol.pos[0],exVol.pos[1],
539 exVol.pos[2]))
540 ff.AddExtraPoint(exVol.pos[0],exVol.pos[1],exVol.pos[2],True)
541 indices = []
542 for localIndices,foo,bar in exVol.featInfo:
543 indices += list(localIndices)
544 for i in range(nAts):
545 v = numpy.array(conf.GetAtomPosition(i))-numpy.array(exVol.pos)
546 d = numpy.sqrt(numpy.dot(v,v))
547 if i not in indices:
548 if d<5.0:
549 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%d,%.0f)'%(i,idx,exVol.exclusionDist,1000,forceConstant))
550 ff.AddDistanceConstraint(i,idx,exVol.exclusionDist,1000,
551 forceConstant)
552
553 else:
554 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%.3f,%.0f)'%(i,idx,bm[exVol.index,i],bm[i,exVol.index],forceConstant))
555 ff.AddDistanceConstraint(i,idx,bm[exVol.index,i],bm[i,exVol.index],
556 forceConstant)
557 idx += 1
558 ff.Initialize()
559 e1 = ff.CalcEnergy()
560 if isNaN(e1):
561 raise ValueError,'bogus energy'
562
563 if verbose:
564 print Chem.MolToMolBlock(mol)
565 for i,vol in enumerate(excludedVolumes):
566 pos = ff.GetExtraPointPos(i)
567 print >>sys.stderr,' % 7.4f % 7.4f % 7.4f As 0 0 0 0 0 0 0 0 0 0 0 0'%tuple(pos)
568 needsMore=ff.Minimize()
569 nPasses=0
570 while needsMore and nPasses<maxPasses:
571 needsMore=ff.Minimize()
572 nPasses+=1
573 e2 = ff.CalcEnergy()
574 if isNaN(e2):
575 raise ValueError,'bogus energy'
576
577 if verbose:
578 print '--------'
579 print Chem.MolToMolBlock(mol)
580 for i,vol in enumerate(excludedVolumes):
581 pos = ff.GetExtraPointPos(i)
582 print >>sys.stderr,' % 7.4f % 7.4f % 7.4f Sb 0 0 0 0 0 0 0 0 0 0 0 0'%tuple(pos)
583 ff = None
584 return e1,e2
585
586 -def EmbedOne(mol,name,match,pcophore,count=1,silent=0,**kwargs):
587 """ generates statistics for a molecule's embeddings
588
589 Four energies are computed for each embedding:
590 1) E1: the energy (with constraints) of the initial embedding
591 2) E2: the energy (with constraints) of the optimized embedding
592 3) E3: the energy (no constraints) the geometry for E2
593 4) E4: the energy (no constraints) of the optimized free-molecule
594 (starting from the E3 geometry)
595
596 Returns a 9-tuple:
597 1) the mean value of E1
598 2) the sample standard deviation of E1
599 3) the mean value of E2
600 4) the sample standard deviation of E2
601 5) the mean value of E3
602 6) the sample standard deviation of E3
603 7) the mean value of E4
604 8) the sample standard deviation of E4
605 9) The number of embeddings that failed
606
607 """
608 global _times
609 atomMatch = [list(x.GetAtomIds()) for x in match]
610 bm,ms,nFailed = EmbedPharmacophore(mol,atomMatch,pcophore,count=count,
611 silent=silent,**kwargs)
612 e1s = []
613 e2s = []
614 e3s = []
615 e4s = []
616 d12s = []
617 d23s = []
618 d34s = []
619 for m in ms:
620 t1 = time.time()
621 try:
622 e1,e2 = OptimizeMol(m,bm,atomMatch)
623 except ValueError:
624 pass
625 else:
626 t2 = time.time()
627 _times['opt1'] = _times.get('opt1',0)+t2-t1
628
629 e1s.append(e1)
630 e2s.append(e2)
631
632 d12s.append(e1-e2)
633 t1 = time.time()
634 try:
635 e3,e4 = OptimizeMol(m,bm)
636 except ValueError:
637 pass
638 else:
639 t2 = time.time()
640 _times['opt2'] = _times.get('opt2',0)+t2-t1
641 e3s.append(e3)
642 e4s.append(e4)
643 d23s.append(e2-e3)
644 d34s.append(e3-e4)
645 count += 1
646 try:
647 e1,e1d = Stats.MeanAndDev(e1s)
648 except:
649 e1 = -1.0
650 e1d=-1.0
651 try:
652 e2,e2d = Stats.MeanAndDev(e2s)
653 except:
654 e2 = -1.0
655 e2d=-1.0
656 try:
657 e3,e3d = Stats.MeanAndDev(e3s)
658 except:
659 e3 = -1.0
660 e3d=-1.0
661
662 try:
663 e4,e4d = Stats.MeanAndDev(e4s)
664 except:
665 e4 = -1.0
666 e4d=-1.0
667 if not silent:
668 print '%s(%d): %.2f(%.2f) -> %.2f(%.2f) : %.2f(%.2f) -> %.2f(%.2f)'%(name,nFailed,e1,e1d,e2,e2d,e3,e3d,e4,e4d)
669 return e1,e1d,e2,e2d,e3,e3d,e4,e4d,nFailed
670
672 """ generates a list of all possible mappings of a pharmacophore to a molecule
673
674 Returns a 2-tuple:
675 1) a boolean indicating whether or not all features were found
676 2) a list, numFeatures long, of sequences of features
677
678
679 >>> import os.path
680 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
681 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
682 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
683 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
684 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats)
685 >>> m = Chem.MolFromSmiles('FCCN')
686 >>> match,mList = MatchPharmacophoreToMol(m,featFactory,pcophore)
687 >>> match
688 True
689
690 Two feature types:
691 >>> len(mList)
692 2
693
694 The first feature type, Acceptor, has two matches:
695 >>> len(mList[0])
696 2
697 >>> mList[0][0].GetAtomIds()
698 (0,)
699 >>> mList[0][1].GetAtomIds()
700 (3,)
701
702 The first feature type, Donor, has a single match:
703 >>> len(mList[1])
704 1
705 >>> mList[1][0].GetAtomIds()
706 (3,)
707
708 """
709 return MatchFeatsToMol(mol, featFactory, pcophore.getFeatures())
710
712 """ **INTERNAL USE ONLY**
713
714 >>> import os.path
715 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
716 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
717 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
718 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
719 >>> m = Chem.MolFromSmiles('FCCN')
720 >>> d =_getFeatDict(m,featFactory,activeFeats)
721 >>> d.keys()
722 ['Donor', 'Acceptor']
723 >>> donors = d['Donor']
724 >>> len(donors)
725 1
726 >>> donors[0].GetAtomIds()
727 (3,)
728 >>> acceptors = d['Acceptor']
729 >>> len(acceptors)
730 2
731 >>> acceptors[0].GetAtomIds()
732 (0,)
733 >>> acceptors[1].GetAtomIds()
734 (3,)
735
736 """
737 molFeats = {}
738 for feat in features:
739 family = feat.GetFamily()
740 if not molFeats.has_key(family):
741 matches = featFactory.GetFeaturesForMol(mol,includeOnly=family)
742 molFeats[family] = matches
743 return molFeats
744
746 """ generates a list of all possible mappings of each feature to a molecule
747
748 Returns a 2-tuple:
749 1) a boolean indicating whether or not all features were found
750 2) a list, numFeatures long, of sequences of features
751
752
753 >>> import os.path
754 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
755 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
756 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
757 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
758 >>> m = Chem.MolFromSmiles('FCCN')
759 >>> match,mList = MatchFeatsToMol(m,featFactory,activeFeats)
760 >>> match
761 True
762
763 Two feature types:
764 >>> len(mList)
765 2
766
767 The first feature type, Acceptor, has two matches:
768 >>> len(mList[0])
769 2
770 >>> mList[0][0].GetAtomIds()
771 (0,)
772 >>> mList[0][1].GetAtomIds()
773 (3,)
774
775 The first feature type, Donor, has a single match:
776 >>> len(mList[1])
777 1
778 >>> mList[1][0].GetAtomIds()
779 (3,)
780
781 """
782 molFeats = _getFeatDict(mol,featFactory,features)
783 res = []
784 for feat in features:
785 matches = molFeats.get(feat.GetFamily(),[])
786 if len(matches) == 0 :
787 return False, None
788 res.append(matches)
789 return True, res
790
792 """ This generator takes a sequence of sequences as an argument and
793 provides all combinations of the elements of the subsequences:
794
795 >>> gen = CombiEnum(((1,2),(10,20)))
796 >>> gen.next()
797 [1, 10]
798 >>> gen.next()
799 [1, 20]
800
801 >>> [x for x in CombiEnum(((1,2),(10,20)))]
802 [[1, 10], [1, 20], [2, 10], [2, 20]]
803
804 >>> [x for x in CombiEnum(((1,2),(10,20),(100,200)))]
805 [[1, 10, 100], [1, 10, 200], [1, 20, 100], [1, 20, 200], [2, 10, 100], [2, 10, 200], [2, 20, 100], [2, 20, 200]]
806
807 """
808 if not len(sequence):
809 yield []
810 elif len(sequence)==1:
811 for entry in sequence[0]:
812 yield [entry]
813 else:
814 for entry in sequence[0]:
815 for subVal in CombiEnum(sequence[1:]):
816 yield [entry]+subVal
817
819 """ removes rows from a bounds matrix that are
820 that are greater than a threshold value away from a set of
821 other points
822
823 returns the modfied bounds matrix
824
825 The goal of this function is to remove rows from the bounds matrix
826 that correspond to atoms that are likely to be quite far from
827 the pharmacophore we're interested in. Because the bounds smoothing
828 we eventually have to do is N^3, this can be a big win
829
830 >>> boundsMat = numpy.array([[0.0,3.0,4.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
831 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),3.5)
832 >>> bm.shape
833 (2, 2)
834
835 we don't touch the input matrix:
836 >>> boundsMat.shape
837 (3, 3)
838
839 >>> print ', '.join(['%.3f'%x for x in bm[0]])
840 0.000, 3.000
841 >>> print ', '.join(['%.3f'%x for x in bm[1]])
842 2.000, 0.000
843
844 if the threshold is high enough, we don't do anything:
845 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
846 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),5.0)
847 >>> bm.shape
848 (3, 3)
849
850 If there's a max value that's close enough to *any* of the indices
851 we pass in, we'll keep it:
852 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
853 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,1),3.5)
854 >>> bm.shape
855 (3, 3)
856
857 """
858 nPts = bm.shape[0]
859 k = numpy.zeros(nPts,numpy.int0)
860 for idx in indices: k[idx]=1
861 for i in indices:
862 row = bm[i]
863 for j in range(i+1,nPts):
864 if not k[j] and row[j]<maxThresh:
865 k[j]=1
866 keep = numpy.nonzero(k)[0]
867 bm2 = numpy.zeros((len(keep),len(keep)),numpy.float)
868 for i,idx in enumerate(keep):
869 row = bm[idx]
870 bm2[i] = numpy.take(row,keep)
871 return bm2
872
874 """
875
876 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
877 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
878 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)),
879 ... ]
880 >>> pcophore=Pharmacophore.Pharmacophore(feats)
881 >>> pcophore.setLowerBound(0,1, 1.1)
882 >>> pcophore.setUpperBound(0,1, 1.9)
883 >>> pcophore.setLowerBound(0,2, 2.1)
884 >>> pcophore.setUpperBound(0,2, 2.9)
885 >>> pcophore.setLowerBound(1,2, 2.1)
886 >>> pcophore.setUpperBound(1,2, 3.9)
887
888 >>> bounds = numpy.array([[0,2,3],[1,0,4],[2,3,0]],numpy.float)
889 >>> CoarseScreenPharmacophore(((0,),(1,)),bounds,pcophore)
890 True
891
892 >>> CoarseScreenPharmacophore(((0,),(2,)),bounds,pcophore)
893 False
894
895 >>> CoarseScreenPharmacophore(((1,),(2,)),bounds,pcophore)
896 False
897
898 >>> CoarseScreenPharmacophore(((0,),(1,),(2,)),bounds,pcophore)
899 True
900
901 >>> CoarseScreenPharmacophore(((1,),(0,),(2,)),bounds,pcophore)
902 False
903
904 >>> CoarseScreenPharmacophore(((2,),(1,),(0,)),bounds,pcophore)
905 False
906
907 # we ignore the point locations here and just use their definitions:
908 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
909 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
910 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)),
911 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
912 ... ]
913 >>> pcophore=Pharmacophore.Pharmacophore(feats)
914 >>> pcophore.setLowerBound(0,1, 2.1)
915 >>> pcophore.setUpperBound(0,1, 2.9)
916 >>> pcophore.setLowerBound(0,2, 2.1)
917 >>> pcophore.setUpperBound(0,2, 2.9)
918 >>> pcophore.setLowerBound(0,3, 2.1)
919 >>> pcophore.setUpperBound(0,3, 2.9)
920 >>> pcophore.setLowerBound(1,2, 1.1)
921 >>> pcophore.setUpperBound(1,2, 1.9)
922 >>> pcophore.setLowerBound(1,3, 1.1)
923 >>> pcophore.setUpperBound(1,3, 1.9)
924 >>> pcophore.setLowerBound(2,3, 1.1)
925 >>> pcophore.setUpperBound(2,3, 1.9)
926 >>> bounds = numpy.array([[0,3,3,3],[2,0,2,2],[2,1,0,2],[2,1,1,0]],numpy.float)
927
928 >>> CoarseScreenPharmacophore(((0,),(1,),(2,),(3,)),bounds,pcophore)
929 True
930
931 >>> CoarseScreenPharmacophore(((0,),(1,),(3,),(2,)),bounds,pcophore)
932 True
933
934 >>> CoarseScreenPharmacophore(((1,),(0,),(3,),(2,)),bounds,pcophore)
935 False
936
937 """
938 for k in range(len(atomMatch)):
939 if len(atomMatch[k])==1:
940 for l in range(k+1,len(atomMatch)):
941 if len(atomMatch[l])==1:
942 idx0 = atomMatch[k][0]
943 idx1 = atomMatch[l][0]
944 if idx1<idx0:
945 idx0,idx1=idx1,idx0
946 if bounds[idx1,idx0] >= pcophore.getUpperBound(k, l) or \
947 bounds[idx0,idx1] <= pcophore.getLowerBound(k, l) :
948 if verbose:
949 print '\t (%d,%d) [%d,%d] fail'%(idx1,idx0,k,l)
950 print '\t %f,%f - %f,%f'%(bounds[idx1,idx0],pcophore.getUpperBound(k,l),
951 bounds[idx0,idx1],pcophore.getLowerBound(k,l))
952
953
954
955
956 return False
957 return True
958
960 """ checks to see if a particular mapping of features onto
961 a molecule satisfies a pharmacophore's 2D restrictions
962
963
964 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
965 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
966 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats)
967 >>> pcophore.setUpperBound2D(0,1,3)
968 >>> m = Chem.MolFromSmiles('FCC(N)CN')
969 >>> Check2DBounds(((0,),(3,)),m,pcophore)
970 True
971 >>> Check2DBounds(((0,),(5,)),m,pcophore)
972 False
973
974 """
975 dm = Chem.GetDistanceMatrix(mol,False,False,False)
976 nFeats = len(atomMatch)
977 for i in range(nFeats):
978 for j in range(i+1,nFeats):
979 lowerB = pcophore._boundsMat2D[j,i]
980 upperB = pcophore._boundsMat2D[i,j]
981 dij=10000
982 for atomI in atomMatch[i]:
983 for atomJ in atomMatch[j]:
984 try:
985 dij = min(dij,dm[atomI,atomJ])
986 except IndexError:
987 print 'bad indices:',atomI,atomJ
988 print ' shape:',dm.shape
989 print ' match:',atomMatch
990 print ' mol:'
991 print Chem.MolToMolBlock(mol)
992 raise IndexError
993 if dij<lowerB or dij>upperB:
994 return False
995 return True
996
997 -def _checkMatch(match,mol,bounds,pcophore,use2DLimits):
998 """ **INTERNAL USE ONLY**
999
1000 checks whether a particular atom match can be satisfied by
1001 a molecule
1002
1003 """
1004 atomMatch = ChemicalFeatures.GetAtomMatch(match)
1005 if not atomMatch:
1006 return None
1007 elif use2DLimits:
1008 if not Check2DBounds(atomMatch,mol,pcophore):
1009 return None
1010 if not CoarseScreenPharmacophore(atomMatch,bounds,pcophore):
1011 return None
1012 return atomMatch
1013
1014 -def ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=False,
1015 index=0,soFar=[]):
1016 """ Enumerates the list of atom mappings a molecule
1017 has to a particular pharmacophore.
1018 We do check distance bounds here.
1019
1020
1021 """
1022 nMatches = len(matches)
1023 if index>=nMatches:
1024 yield soFar,[]
1025 elif index==nMatches-1:
1026 for entry in matches[index]:
1027 nextStep = soFar+[entry]
1028 if index != 0:
1029 atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits)
1030 else:
1031 atomMatch = ChemicalFeatures.GetAtomMatch(nextStep)
1032 if atomMatch:
1033 yield soFar+[entry],atomMatch
1034 else:
1035 for entry in matches[index]:
1036 nextStep = soFar+[entry]
1037 if index != 0:
1038 atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits)
1039 if not atomMatch:
1040 continue
1041 for val in ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=use2DLimits,
1042 index=index+1,soFar=nextStep):
1043 if val:
1044 yield val
1045
1046 -def MatchPharmacophore(matches,bounds,pcophore,useDownsampling=False,
1047 use2DLimits=False,mol=None,excludedVolumes=None,
1048 useDirs=False):
1049 """
1050
1051 if use2DLimits is set, the molecule must also be provided and topological
1052 distances will also be used to filter out matches
1053
1054 """
1055 for match,atomMatch in ConstrainedEnum(matches,mol,pcophore,bounds,
1056 use2DLimits=use2DLimits):
1057 bm = bounds.copy()
1058 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore,useDirs=useDirs,mol=mol);
1059
1060 if excludedVolumes:
1061 localEvs = []
1062 for eV in excludedVolumes:
1063 featInfo = []
1064 for i,entry in enumerate(atomMatch):
1065 info = list(eV.featInfo[i])
1066 info[0] = entry
1067 featInfo.append(info)
1068 localEvs.append(ExcludedVolume.ExcludedVolume(featInfo,eV.index,
1069 eV.exclusionDist))
1070 bm = AddExcludedVolumes(bm,localEvs,smoothIt=False)
1071
1072 sz = bm.shape[0]
1073 if useDownsampling:
1074 indices = []
1075 for entry in atomMatch:
1076 indices.extend(entry)
1077 if excludedVolumes:
1078 for vol in localEvs:
1079 indices.append(vol.index)
1080 bm = DownsampleBoundsMatrix(bm,indices)
1081 if DG.DoTriangleSmoothing(bm):
1082 return 0,bm,match,(sz,bm.shape[0])
1083
1084 return 1,None,None,None
1085
1086 -def GetAllPharmacophoreMatches(matches,bounds,pcophore,useDownsampling=0,
1087 progressCallback=None,
1088 use2DLimits=False,mol=None,
1089 verbose=False):
1090 res = []
1091 nDone = 0
1092 for match in CombiEnum(matches):
1093 atomMatch = ChemicalFeatures.GetAtomMatch(match)
1094 if atomMatch and use2DLimits and mol:
1095 pass2D = Check2DBounds(atomMatch,mol,pcophore)
1096 if verbose:
1097 print '..',atomMatch
1098 print ' ..Pass2d:',pass2D
1099 else:
1100 pass2D = True
1101 if atomMatch and pass2D and \
1102 CoarseScreenPharmacophore(atomMatch,bounds,pcophore,verbose=verbose):
1103 if verbose:
1104 print ' ..CoarseScreen: Pass'
1105
1106 bm = bounds.copy()
1107 if verbose:
1108 print 'pre update:'
1109 for row in bm:
1110 print ' ',' '.join(['% 4.2f'%x for x in row])
1111 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore);
1112 sz = bm.shape[0]
1113 if verbose:
1114 print 'pre downsample:'
1115 for row in bm:
1116 print ' ',' '.join(['% 4.2f'%x for x in row])
1117
1118 if useDownsampling:
1119 indices = []
1120 for entry in atomMatch:
1121 indices += list(entry)
1122 bm = DownsampleBoundsMatrix(bm,indices)
1123 if verbose:
1124 print 'post downsample:'
1125 for row in bm:
1126 print ' ',' '.join(['% 4.2f'%x for x in row])
1127
1128 if DG.DoTriangleSmoothing(bm):
1129 res.append(match)
1130 elif verbose:
1131 print 'cannot smooth'
1132 nDone+=1
1133 if progressCallback:
1134 progressCallback(nDone)
1135 return res
1136
1137
1139 """ Computes the chiral volume of an atom
1140
1141 We're using the chiral volume formula from Figure 7 of
1142 Blaney and Dixon, Rev. Comp. Chem. V, 299-335 (1994)
1143
1144 >>> import os.path
1145 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
1146
1147 R configuration atoms give negative volumes:
1148 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r.mol'))
1149 >>> Chem.AssignStereochemistry(mol)
1150 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1151 'R'
1152 >>> ComputeChiralVolume(mol,1) < 0
1153 True
1154
1155 S configuration atoms give positive volumes:
1156 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s.mol'))
1157 >>> Chem.AssignStereochemistry(mol)
1158 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1159 'S'
1160 >>> ComputeChiralVolume(mol,1) > 0
1161 True
1162
1163 Non-chiral (or non-specified) atoms give zero volume:
1164 >>> ComputeChiralVolume(mol,0) == 0.0
1165 True
1166
1167 We also work on 3-coordinate atoms (with implicit Hs):
1168 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r-3.mol'))
1169 >>> Chem.AssignStereochemistry(mol)
1170 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1171 'R'
1172 >>> ComputeChiralVolume(mol,1)<0
1173 True
1174
1175 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s-3.mol'))
1176 >>> Chem.AssignStereochemistry(mol)
1177 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1178 'S'
1179 >>> ComputeChiralVolume(mol,1)>0
1180 True
1181
1182
1183
1184 """
1185 conf = mol.GetConformer(confId)
1186 Chem.AssignStereochemistry(mol)
1187 center = mol.GetAtomWithIdx(centerIdx)
1188 if not center.HasProp('_CIPCode'):
1189 return 0.0
1190
1191 nbrs = center.GetNeighbors()
1192 nbrRanks = []
1193 for nbr in nbrs:
1194 rank = int(nbr.GetProp('_CIPRank'))
1195 pos = conf.GetAtomPosition(nbr.GetIdx())
1196 nbrRanks.append((rank,pos))
1197
1198
1199
1200 if len(nbrRanks)==3:
1201 nbrRanks.append((-1,conf.GetAtomPosition(centerIdx)))
1202 nbrRanks.sort()
1203
1204 ps = [x[1] for x in nbrRanks]
1205 v1 = ps[0]-ps[3]
1206 v2 = ps[1]-ps[3]
1207 v3 = ps[2]-ps[3]
1208
1209 res = v1.DotProduct(v2.CrossProduct(v3))
1210 return res
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1222 import doctest,sys
1223 return doctest.testmod(sys.modules["__main__"])
1224
1225 if __name__ == '__main__':
1226 import sys
1227 failed,tried = _test()
1228 sys.exit(failed)
1229