1
2
3
4
5
6
7 import RDConfig
8
9 import sys,time,math,sets
10 from ML.Data import Stats
11 import DistanceGeometry as DG
12 import Chem
13 import numpy
14 from Chem import rdDistGeom as MolDG
15 from Chem import ChemicalFeatures
16 from Chem import ChemicalForceFields
17 import Pharmacophore,ExcludedVolume
18 import Geometry
19 _times = {}
20
21 import RDLogger as logging
22 logger = logging.logger()
23 defaultFeatLength=2.0
24
26 """ returns a list of the heavy-atom neighbors of the
27 atom passed in:
28
29 >>> m = Chem.MolFromSmiles('CCO')
30 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(0))
31 >>> len(l)
32 1
33 >>> isinstance(l[0],Chem.Atom)
34 True
35 >>> l[0].GetIdx()
36 1
37
38 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(1))
39 >>> len(l)
40 2
41 >>> l[0].GetIdx()
42 0
43 >>> l[1].GetIdx()
44 2
45
46 """
47 res=[]
48 for nbr in atom.GetNeighbors():
49 if nbr.GetAtomicNum() != 1:
50 res.append(nbr)
51 return res
52
54 """ Adds an entry at the end of the bounds matrix for a point at
55 the center of a multi-point feature
56
57 returns a 2-tuple:
58 new bounds mat
59 index of point added
60
61 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]])
62 >>> match = [0,1,2]
63 >>> bm,idx = ReplaceGroup(match,boundsMat,slop=0.0)
64
65 the index is at the end:
66 >>> idx
67 3
68
69 and the matrix is one bigger:
70 >>> bm.shape
71 (4, 4)
72
73 but the original bounds mat is not altered:
74 >>> boundsMat.shape
75 (3, 3)
76
77
78 We make the assumption that the points of the
79 feature form a regular polygon and the replacement
80 point goes at the center:
81 >>> print ', '.join(['%.3f'%x for x in bm[-1]])
82 0.577, 0.577, 0.577, 0.000
83 >>> print ', '.join(['%.3f'%x for x in bm[:,-1]])
84 1.155, 1.155, 1.155, 0.000
85
86 The slop argument (default = 0.01) is fractional:
87 >>> bm,idx = ReplaceGroup(match,boundsMat)
88 >>> print ', '.join(['%.3f'%x for x in bm[-1]])
89 0.572, 0.572, 0.572, 0.000
90 >>> print ', '.join(['%.3f'%x for x in bm[:,-1]])
91 1.166, 1.166, 1.166, 0.000
92
93
94 """
95 maxVal = -1000.0
96 minVal = 1e8
97 nPts = len(match)
98 for i in range(nPts):
99 for j in range(i+1,nPts):
100 idx0 = match[i]
101 idx1 = match[j]
102 if idx1<idx0:
103 idx0,idx1 = idx1,idx0
104 minVal = min(minVal,bounds[idx1,idx0])
105 maxVal = max(maxVal,bounds[idx0,idx1])
106 maxVal *= (1+slop)
107 minVal *= (1-slop)
108
109 scaleFact = 1.0/(2.0*math.sin(math.pi/nPts))
110 minVal *= scaleFact
111 maxVal *= scaleFact
112
113 replaceIdx = bounds.shape[0]
114 if not useDirs:
115 bm = numpy.zeros((bounds.shape[0]+1,bounds.shape[1]+1),numpy.float)
116 else:
117 bm = numpy.zeros((bounds.shape[0]+2,bounds.shape[1]+2),numpy.float)
118 bm[0:bounds.shape[0],0:bounds.shape[1]]=bounds
119 bm[:replaceIdx,replaceIdx]=1000.
120
121 if useDirs:
122 bm[:replaceIdx+1,replaceIdx+1]=1000.
123
124 bm[replaceIdx,replaceIdx+1]=dirLength+slop
125 bm[replaceIdx+1,replaceIdx]=dirLength-slop
126
127 for idx1 in match:
128 bm[idx1,replaceIdx]=maxVal
129 bm[replaceIdx,idx1]=minVal
130 if useDirs:
131
132 bm[idx1,replaceIdx+1] = numpy.sqrt(bm[replaceIdx,replaceIdx+1]**2+maxVal**2)
133 bm[replaceIdx+1,idx1] = numpy.sqrt(bm[replaceIdx+1,replaceIdx]**2+minVal**2)
134 return bm,replaceIdx
135
136 -def EmbedMol(mol,bm,atomMatch=None,weight=2.0,randomSeed=-1,
137 excludedVolumes=None):
138 """ Generates an embedding for a molecule based on a bounds matrix and adds
139 a conformer (id 0) to the molecule
140
141 if the optional argument atomMatch is provided, it will be used to provide
142 supplemental weights for the embedding routine (used in the optimization
143 phase to ensure that the resulting geometry really does satisfy the
144 pharmacophore).
145
146 if the excludedVolumes is provided, it should be a sequence of
147 ExcludedVolume objects
148
149 >>> m = Chem.MolFromSmiles('c1ccccc1C')
150 >>> bounds = MolDG.GetMoleculeBoundsMatrix(m)
151 >>> bounds.shape
152 (7, 7)
153 >>> m.GetNumConformers()
154 0
155 >>> EmbedMol(m,bounds,randomSeed=23)
156 >>> m.GetNumConformers()
157 1
158
159
160 """
161 nAts = mol.GetNumAtoms()
162 weights=[]
163 if(atomMatch):
164 for i in range(len(atomMatch)):
165 for j in range(i+1,len(atomMatch)):
166 weights.append((i,j,weight))
167 if(excludedVolumes):
168 for vol in excludedVolumes:
169 idx = vol.index
170
171 for i in range(nAts):
172 weights.append((i,idx,weight))
173 coords = DG.EmbedBoundsMatrix(bm,weights=weights,numZeroFail=1,randomSeed=randomSeed)
174
175
176
177 conf = Chem.Conformer(nAts)
178 conf.SetId(0)
179 for i in range(nAts):
180 conf.SetAtomPosition(i,list(coords[i]))
181 if excludedVolumes:
182 for vol in excludedVolumes:
183 vol.pos = numpy.array(coords[vol.index])
184
185
186 mol.AddConformer(conf)
187
189 """ Adds a set of excluded volumes to the bounds matrix
190 and returns the new matrix
191
192 excludedVolumes is a list of ExcludedVolume objects
193
194
195 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]])
196 >>> ev1 = ExcludedVolume.ExcludedVolume(([(0,),0.5,1.0],),exclusionDist=1.5)
197 >>> bm = AddExcludedVolumes(boundsMat,(ev1,))
198
199 the results matrix is one bigger:
200 >>> bm.shape
201 (4, 4)
202
203 and the original bounds mat is not altered:
204 >>> boundsMat.shape
205 (3, 3)
206
207 >>> print ', '.join(['%.3f'%x for x in bm[-1]])
208 0.500, 1.500, 1.500, 0.000
209 >>> print ', '.join(['%.3f'%x for x in bm[:,-1]])
210 1.000, 3.000, 3.000, 0.000
211
212 """
213 oDim = bm.shape[0]
214 dim = oDim+len(excludedVolumes)
215 res = numpy.zeros((dim,dim),numpy.float)
216 res[:oDim,:oDim] = bm
217 for i,vol in enumerate(excludedVolumes):
218 bmIdx = oDim+i
219 vol.index = bmIdx
220
221
222 res[bmIdx,:bmIdx] = vol.exclusionDist
223 res[:bmIdx,bmIdx] = 1000.0
224
225
226 for indices,minV,maxV in vol.featInfo:
227 for index in indices:
228 try:
229 res[bmIdx,index] = minV
230 res[index,bmIdx] = maxV
231 except IndexError:
232 logger.error('BAD INDEX: res[%d,%d], shape is %s'%(bmIdx,index,str(res.shape)))
233 raise IndexError
234
235
236 for j in range(bmIdx+1,dim):
237 res[bmIdx,j:dim] = 0
238 res[j:dim,bmIdx] = 1000
239
240 if smoothIt: DG.DoTriangleSmoothing(res)
241 return res
242
246 """ loops over a distance bounds matrix and replaces the elements
247 that are altered by a pharmacophore
248
249 **NOTE** this returns the resulting bounds matrix, but it may also
250 alter the input matrix
251
252 atomMatch is a sequence of sequences containing atom indices
253 for each of the pharmacophore's features.
254
255 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
256 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
257 ... ]
258 >>> pcophore=Pharmacophore.Pharmacophore(feats)
259 >>> pcophore.setLowerBound(0,1, 1.0)
260 >>> pcophore.setUpperBound(0,1, 2.0)
261
262 >>> boundsMat = numpy.array([[0.0,3.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
263 >>> atomMatch = ((0,),(1,))
264 >>> bm = UpdatePharmacophoreBounds(boundsMat,atomMatch,pcophore)
265
266
267 In this case, there are no multi-atom features, so the result matrix
268 is the same as the input:
269 >>> bm is boundsMat
270 True
271
272 this means, of course, that the input boundsMat is altered:
273 >>> print ', '.join(['%.3f'%x for x in boundsMat[0]])
274 0.000, 2.000, 3.000
275 >>> print ', '.join(['%.3f'%x for x in boundsMat[1]])
276 1.000, 0.000, 3.000
277 >>> print ', '.join(['%.3f'%x for x in boundsMat[2]])
278 2.000, 2.000, 0.000
279
280 """
281 replaceMap = {}
282 for i,matchI in enumerate(atomMatch):
283 if len(matchI)>1:
284 bm,replaceIdx = ReplaceGroup(matchI,bm,useDirs=useDirs)
285 replaceMap[i] = replaceIdx
286
287 for i,matchI in enumerate(atomMatch):
288 mi = replaceMap.get(i,matchI[0])
289 for j in range(i+1,len(atomMatch)):
290 mj = replaceMap.get(j,atomMatch[j][0])
291 if mi<mj:
292 idx0,idx1 = mi,mj
293 else:
294 idx0,idx1 = mj,mi
295 bm[idx0,idx1] = pcophore.getUpperBound(i,j)
296 bm[idx1,idx0] = pcophore.getLowerBound(i,j)
297
298 return bm
299
300
301
302 -def EmbedPharmacophore(mol,atomMatch,pcophore,randomSeed=-1,count=10,smoothFirst=True,
303 silent=False,bounds=None,excludedVolumes=None,targetNumber=-1,
304 useDirs=False):
305 """ Generates one or more embeddings for a molecule that satisfy a pharmacophore
306
307 atomMatch is a sequence of sequences containing atom indices
308 for each of the pharmacophore's features.
309
310 - count: is the maximum number of attempts to make a generating an embedding
311 - smoothFirst: toggles triangle smoothing of the molecular bounds matix
312 - bounds: if provided, should be the molecular bounds matrix. If this isn't
313 provided, the matrix will be generated.
314 - targetNumber: if this number is positive, it provides a maximum number
315 of embeddings to generate (i.e. we'll have count attempts to generate
316 targetNumber embeddings).
317
318 returns: a 3 tuple:
319 1) the molecular bounds matrix adjusted for the pharmacophore
320 2) a list of embeddings (molecules with a single conformer)
321 3) the number of failed attempts at embedding
322
323
324 >>> m = Chem.MolFromSmiles('OCCN')
325 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
326 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
327 ... ]
328 >>> pcophore=Pharmacophore.Pharmacophore(feats)
329 >>> pcophore.setLowerBound(0,1, 2.5)
330 >>> pcophore.setUpperBound(0,1, 3.5)
331 >>> atomMatch = ((0,),(3,))
332
333 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
334 >>> len(embeds)
335 10
336 >>> nFail
337 0
338
339 Set up a case that can't succeed:
340 >>> pcophore=Pharmacophore.Pharmacophore(feats)
341 >>> pcophore.setLowerBound(0,1, 2.0)
342 >>> pcophore.setUpperBound(0,1, 2.1)
343 >>> atomMatch = ((0,),(3,))
344
345 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
346 >>> len(embeds)
347 0
348 >>> nFail
349 10
350
351 """
352 global _times
353 if not hasattr(mol,'_chiralCenters'):
354 mol._chiralCenters = Chem.FindMolChiralCenters(mol)
355
356 if bounds is None:
357 bounds = MolDG.GetMoleculeBoundsMatrix(mol)
358 if smoothFirst: DG.DoTriangleSmoothing(bounds)
359
360 bm = bounds.copy()
361
362
363
364
365
366
367 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore,useDirs=useDirs,mol=mol)
368
369 if excludedVolumes:
370 bm = AddExcludedVolumes(bm,excludedVolumes,smoothIt=False)
371
372 if not DG.DoTriangleSmoothing(bm):
373 raise ValueError,"could not smooth bounds matrix"
374
375
376
377
378
379
380
381
382 if targetNumber<=0:
383 targetNumber=count
384 nFailed = 0
385 res = []
386 for i in range(count):
387 tmpM = bm[:,:]
388 m2 = Chem.Mol(mol.ToBinary())
389 t1 = time.time()
390 try:
391 if randomSeed<=0:
392 seed = i*10+1
393 else:
394 seed = i*10+randomSeed
395 EmbedMol(m2,tmpM,atomMatch,randomSeed=seed,
396 excludedVolumes=excludedVolumes)
397 except ValueError:
398 if not silent:
399 logger.info('Embed failed')
400 nFailed += 1
401 else:
402 t2 = time.time()
403 _times['embed'] = _times.get('embed',0)+t2-t1
404 keepIt=True
405 for idx,stereo in mol._chiralCenters:
406 if stereo in ('R','S'):
407 vol = ComputeChiralVolume(m2,idx)
408 if (stereo=='R' and vol>=0) or \
409 (stereo=='S' and vol<=0):
410 keepIt=False
411 break
412 if keepIt:
413 res.append(m2)
414 else:
415 logger.debug('Removed embedding due to chiral constraints.')
416 if len(res)==targetNumber: break
417 return bm,res,nFailed
418
420 """ provides an OS independent way of detecting NaNs
421 This is intended to be used with values returned from the C++
422 side of things.
423
424 We can't actually test this from Python (which traps
425 zero division errors), but it would work something like
426 this if we could:
427 >>> isNaN(0)
428 False
429
430 #>>> isNan(1/0)
431 #True
432
433 """
434 if v!=v and sys.platform=='win32':
435 return True
436 elif v==0 and v==1 and sys.platform!='win32':
437 return True
438 return False
439
440
441 -def OptimizeMol(mol,bm,atomMatches=None,excludedVolumes=None,
442 forceConstant=1200.0,
443 maxPasses=5,verbose=False):
444 """ carries out a UFF optimization for a molecule optionally subject
445 to the constraints in a bounds matrix
446
447 - atomMatches, if provided, is a sequence of sequences
448 - forceConstant is the force constant of the spring used to enforce
449 the constraints
450
451 returns a 2-tuple:
452 1) the energy of the initial conformation
453 2) the energy post-embedding
454 NOTE that these energies include the energies of the constraints
455
456
457
458 >>> m = Chem.MolFromSmiles('OCCN')
459 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
460 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
461 ... ]
462 >>> pcophore=Pharmacophore.Pharmacophore(feats)
463 >>> pcophore.setLowerBound(0,1, 2.5)
464 >>> pcophore.setUpperBound(0,1, 2.8)
465 >>> atomMatch = ((0,),(3,))
466 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
467 >>> len(embeds)
468 10
469 >>> testM = embeds[0]
470
471 Do the optimization:
472 >>> e1,e2 = OptimizeMol(testM,bm,atomMatches=atomMatch)
473
474 Optimizing should have lowered the energy:
475 >>> e2 < e1
476 True
477
478 Check the constrained distance:
479 >>> conf = testM.GetConformer(0)
480 >>> p0 = conf.GetAtomPosition(0)
481 >>> p3 = conf.GetAtomPosition(3)
482 >>> d03 = p0.Distance(p3)
483 >>> d03 >= pcophore.getLowerBound(0,1)-.01
484 True
485 >>> d03 <= pcophore.getUpperBound(0,1)+.01
486 True
487
488 If we optimize without the distance constraints (provided via the atomMatches
489 argument) we're not guaranteed to get the same results, particularly in a case
490 like the current one where the pharmcophore brings the atoms uncomfortably
491 close together:
492 >>> testM = embeds[1]
493 >>> e1,e2 = OptimizeMol(testM,bm)
494 >>> e2 < e1
495 True
496 >>> conf = testM.GetConformer(0)
497 >>> p0 = conf.GetAtomPosition(0)
498 >>> p3 = conf.GetAtomPosition(3)
499 >>> d03 = p0.Distance(p3)
500 >>> d03 >= pcophore.getLowerBound(0,1)-.01
501 True
502 >>> d03 <= pcophore.getUpperBound(0,1)+.01
503 False
504
505 """
506 try:
507 ff = ChemicalForceFields.UFFGetMoleculeForceField(mol)
508 except:
509 logger.info('Problems building molecular forcefield',exc_info=True)
510 return -1.0,-1.0
511
512 weights=[]
513 if(atomMatches):
514 for k in range(len(atomMatches)):
515 for i in atomMatches[k]:
516 for l in range(k+1,len(atomMatches)):
517 for j in atomMatches[l]:
518 weights.append((i,j))
519 for i,j in weights:
520 if j<i:
521 i,j = j,i
522 minV = bm[j,i]
523 maxV = bm[i,j]
524 ff.AddDistanceConstraint(i,j,minV,maxV,forceConstant)
525 if excludedVolumes:
526 nAts = mol.GetNumAtoms()
527 conf = mol.GetConformer()
528 idx = nAts
529 for exVol in excludedVolumes:
530 assert exVol.pos is not None
531 logger.debug('ff.AddExtraPoint(%.4f,%.4f,%.4f)'%(exVol.pos[0],exVol.pos[1],
532 exVol.pos[2]))
533 ff.AddExtraPoint(exVol.pos[0],exVol.pos[1],exVol.pos[2],True)
534 indices = []
535 for localIndices,foo,bar in exVol.featInfo:
536 indices += list(localIndices)
537 for i in range(nAts):
538 v = numpy.array(conf.GetAtomPosition(i))-numpy.array(exVol.pos)
539 d = numpy.sqrt(numpy.dot(v,v))
540 if i not in indices:
541 if d<5.0:
542 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%d,%.0f)'%(i,idx,exVol.exclusionDist,1000,forceConstant))
543 ff.AddDistanceConstraint(i,idx,exVol.exclusionDist,1000,
544 forceConstant)
545
546 else:
547 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%.3f,%.0f)'%(i,idx,bm[exVol.index,i],bm[i,exVol.index],forceConstant))
548 ff.AddDistanceConstraint(i,idx,bm[exVol.index,i],bm[i,exVol.index],
549 forceConstant)
550 idx += 1
551 ff.Initialize()
552 e1 = ff.CalcEnergy()
553 if isNaN(e1):
554 raise ValueError,'bogus energy'
555
556 if verbose:
557 print Chem.MolToMolBlock(mol)
558 for i,vol in enumerate(excludedVolumes):
559 pos = ff.GetExtraPointPos(i)
560 print >>sys.stderr,' % 7.4f % 7.4f % 7.4f As 0 0 0 0 0 0 0 0 0 0 0 0'%tuple(pos)
561 needsMore=ff.Minimize()
562 nPasses=0
563 while needsMore and nPasses<maxPasses:
564 needsMore=ff.Minimize()
565 nPasses+=1
566 e2 = ff.CalcEnergy()
567 if isNaN(e2):
568 raise ValueError,'bogus energy'
569
570 if verbose:
571 print '--------'
572 print Chem.MolToMolBlock(mol)
573 for i,vol in enumerate(excludedVolumes):
574 pos = ff.GetExtraPointPos(i)
575 print >>sys.stderr,' % 7.4f % 7.4f % 7.4f Sb 0 0 0 0 0 0 0 0 0 0 0 0'%tuple(pos)
576 ff = None
577 return e1,e2
578
579 -def EmbedOne(mol,name,match,pcophore,count=1,silent=0,**kwargs):
580 """ generates statistics for a molecule's embeddings
581
582 Four energies are computed for each embedding:
583 1) E1: the energy (with constraints) of the initial embedding
584 2) E2: the energy (with constraints) of the optimized embedding
585 3) E3: the energy (no constraints) the geometry for E2
586 4) E4: the energy (no constraints) of the optimized free-molecule
587 (starting from the E3 geometry)
588
589 Returns a 9-tuple:
590 1) the mean value of E1
591 2) the sample standard deviation of E1
592 3) the mean value of E2
593 4) the sample standard deviation of E2
594 5) the mean value of E3
595 6) the sample standard deviation of E3
596 7) the mean value of E4
597 8) the sample standard deviation of E4
598 9) The number of embeddings that failed
599
600 """
601 global _times
602 atomMatch = [list(x.GetAtomIds()) for x in match]
603 bm,ms,nFailed = EmbedPharmacophore(mol,atomMatch,pcophore,count=count,
604 silent=silent,**kwargs)
605 e1s = []
606 e2s = []
607 e3s = []
608 e4s = []
609 d12s = []
610 d23s = []
611 d34s = []
612 for m in ms:
613 t1 = time.time()
614 try:
615 e1,e2 = OptimizeMol(m,bm,atomMatch)
616 except ValueError:
617 pass
618 else:
619 t2 = time.time()
620 _times['opt1'] = _times.get('opt1',0)+t2-t1
621
622 e1s.append(e1)
623 e2s.append(e2)
624
625 d12s.append(e1-e2)
626 t1 = time.time()
627 try:
628 e3,e4 = OptimizeMol(m,bm)
629 except ValueError:
630 pass
631 else:
632 t2 = time.time()
633 _times['opt2'] = _times.get('opt2',0)+t2-t1
634 e3s.append(e3)
635 e4s.append(e4)
636 d23s.append(e2-e3)
637 d34s.append(e3-e4)
638 count += 1
639 try:
640 e1,e1d = Stats.MeanAndDev(e1s)
641 except:
642 e1 = -1.0
643 e1d=-1.0
644 try:
645 e2,e2d = Stats.MeanAndDev(e2s)
646 except:
647 e2 = -1.0
648 e2d=-1.0
649 try:
650 e3,e3d = Stats.MeanAndDev(e3s)
651 except:
652 e3 = -1.0
653 e3d=-1.0
654
655 try:
656 e4,e4d = Stats.MeanAndDev(e4s)
657 except:
658 e4 = -1.0
659 e4d=-1.0
660 if not silent:
661 print '%s(%d): %.2f(%.2f) -> %.2f(%.2f) : %.2f(%.2f) -> %.2f(%.2f)'%(name,nFailed,e1,e1d,e2,e2d,e3,e3d,e4,e4d)
662 return e1,e1d,e2,e2d,e3,e3d,e4,e4d,nFailed
663
665 """ generates a list of all possible mappings of a pharmacophore to a molecule
666
667 Returns a 2-tuple:
668 1) a boolean indicating whether or not all features were found
669 2) a list, numFeatures long, of sequences of features
670
671
672 >>> import os.path
673 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
674 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
675 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
676 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
677 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats)
678 >>> m = Chem.MolFromSmiles('FCCN')
679 >>> match,mList = MatchPharmacophoreToMol(m,featFactory,pcophore)
680 >>> match
681 True
682
683 Two feature types:
684 >>> len(mList)
685 2
686
687 The first feature type, Acceptor, has two matches:
688 >>> len(mList[0])
689 2
690 >>> mList[0][0].GetAtomIds()
691 (0,)
692 >>> mList[0][1].GetAtomIds()
693 (3,)
694
695 The first feature type, Donor, has a single match:
696 >>> len(mList[1])
697 1
698 >>> mList[1][0].GetAtomIds()
699 (3,)
700
701 """
702 return MatchFeatsToMol(mol, featFactory, pcophore.getFeatures())
703
705 """ **INTERNAL USE ONLY**
706
707 >>> import os.path
708 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
709 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
710 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
711 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
712 >>> m = Chem.MolFromSmiles('FCCN')
713 >>> d =_getFeatDict(m,featFactory,activeFeats)
714 >>> d.keys()
715 ['Donor', 'Acceptor']
716 >>> donors = d['Donor']
717 >>> len(donors)
718 1
719 >>> donors[0].GetAtomIds()
720 (3,)
721 >>> acceptors = d['Acceptor']
722 >>> len(acceptors)
723 2
724 >>> acceptors[0].GetAtomIds()
725 (0,)
726 >>> acceptors[1].GetAtomIds()
727 (3,)
728
729 """
730 molFeats = {}
731 for feat in features:
732 family = feat.GetFamily()
733 if not molFeats.has_key(family):
734 matches = featFactory.GetFeaturesForMol(mol,includeOnly=family)
735 molFeats[family] = matches
736 return molFeats
737
739 """ generates a list of all possible mappings of each feature to a molecule
740
741 Returns a 2-tuple:
742 1) a boolean indicating whether or not all features were found
743 2) a list, numFeatures long, of sequences of features
744
745
746 >>> import os.path
747 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
748 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(dataDir,'BaseFeatures.fdef'))
749 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
750 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
751 >>> m = Chem.MolFromSmiles('FCCN')
752 >>> match,mList = MatchFeatsToMol(m,featFactory,activeFeats)
753 >>> match
754 True
755
756 Two feature types:
757 >>> len(mList)
758 2
759
760 The first feature type, Acceptor, has two matches:
761 >>> len(mList[0])
762 2
763 >>> mList[0][0].GetAtomIds()
764 (0,)
765 >>> mList[0][1].GetAtomIds()
766 (3,)
767
768 The first feature type, Donor, has a single match:
769 >>> len(mList[1])
770 1
771 >>> mList[1][0].GetAtomIds()
772 (3,)
773
774 """
775 molFeats = _getFeatDict(mol,featFactory,features)
776 res = []
777 for feat in features:
778 matches = molFeats.get(feat.GetFamily(),[])
779 if len(matches) == 0 :
780 return False, None
781 res.append(matches)
782 return True, res
783
785 """ This generator takes a sequence of sequences as an argument and
786 provides all combinations of the elements of the subsequences:
787
788 >>> gen = CombiEnum(((1,2),(10,20)))
789 >>> gen.next()
790 [1, 10]
791 >>> gen.next()
792 [1, 20]
793
794 >>> [x for x in CombiEnum(((1,2),(10,20)))]
795 [[1, 10], [1, 20], [2, 10], [2, 20]]
796
797 >>> [x for x in CombiEnum(((1,2),(10,20),(100,200)))]
798 [[1, 10, 100], [1, 10, 200], [1, 20, 100], [1, 20, 200], [2, 10, 100], [2, 10, 200], [2, 20, 100], [2, 20, 200]]
799
800 """
801 if not len(sequence):
802 yield []
803 elif len(sequence)==1:
804 for entry in sequence[0]:
805 yield [entry]
806 else:
807 for entry in sequence[0]:
808 for subVal in CombiEnum(sequence[1:]):
809 yield [entry]+subVal
810
812 """ removes rows from a bounds matrix that are
813 that are greater than a threshold value away from a set of
814 other points
815
816 returns the modfied bounds matrix
817
818 The goal of this function is to remove rows from the bounds matrix
819 that correspond to atoms that are likely to be quite far from
820 the pharmacophore we're interested in. Because the bounds smoothing
821 we eventually have to do is N^3, this can be a big win
822
823 >>> boundsMat = numpy.array([[0.0,3.0,4.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
824 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),3.5)
825 >>> bm.shape
826 (2, 2)
827
828 we don't touch the input matrix:
829 >>> boundsMat.shape
830 (3, 3)
831
832 >>> print ', '.join(['%.3f'%x for x in bm[0]])
833 0.000, 3.000
834 >>> print ', '.join(['%.3f'%x for x in bm[1]])
835 2.000, 0.000
836
837 if the threshold is high enough, we don't do anything:
838 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
839 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),5.0)
840 >>> bm.shape
841 (3, 3)
842
843 If there's a max value that's close enough to *any* of the indices
844 we pass in, we'll keep it:
845 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
846 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,1),3.5)
847 >>> bm.shape
848 (3, 3)
849
850 """
851 nPts = bm.shape[0]
852 k = numpy.zeros(nPts,numpy.int0)
853 for idx in indices: k[idx]=1
854 for i in indices:
855 row = bm[i]
856 for j in range(i+1,nPts):
857 if not k[j] and row[j]<maxThresh:
858 k[j]=1
859 keep = numpy.nonzero(k)[0]
860 bm2 = numpy.zeros((len(keep),len(keep)),numpy.float)
861 for i,idx in enumerate(keep):
862 row = bm[idx]
863 bm2[i] = numpy.take(row,keep)
864 return bm2
865
867 """
868
869 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
870 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
871 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)),
872 ... ]
873 >>> pcophore=Pharmacophore.Pharmacophore(feats)
874 >>> pcophore.setLowerBound(0,1, 1.1)
875 >>> pcophore.setUpperBound(0,1, 1.9)
876 >>> pcophore.setLowerBound(0,2, 2.1)
877 >>> pcophore.setUpperBound(0,2, 2.9)
878 >>> pcophore.setLowerBound(1,2, 2.1)
879 >>> pcophore.setUpperBound(1,2, 3.9)
880
881 >>> bounds = numpy.array([[0,2,3],[1,0,4],[2,3,0]],numpy.float)
882 >>> CoarseScreenPharmacophore(((0,),(1,)),bounds,pcophore)
883 True
884
885 >>> CoarseScreenPharmacophore(((0,),(2,)),bounds,pcophore)
886 False
887
888 >>> CoarseScreenPharmacophore(((1,),(2,)),bounds,pcophore)
889 False
890
891 >>> CoarseScreenPharmacophore(((0,),(1,),(2,)),bounds,pcophore)
892 True
893
894 >>> CoarseScreenPharmacophore(((1,),(0,),(2,)),bounds,pcophore)
895 False
896
897 >>> CoarseScreenPharmacophore(((2,),(1,),(0,)),bounds,pcophore)
898 False
899
900 # we ignore the point locations here and just use their definitions:
901 >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)),
902 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
903 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)),
904 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
905 ... ]
906 >>> pcophore=Pharmacophore.Pharmacophore(feats)
907 >>> pcophore.setLowerBound(0,1, 2.1)
908 >>> pcophore.setUpperBound(0,1, 2.9)
909 >>> pcophore.setLowerBound(0,2, 2.1)
910 >>> pcophore.setUpperBound(0,2, 2.9)
911 >>> pcophore.setLowerBound(0,3, 2.1)
912 >>> pcophore.setUpperBound(0,3, 2.9)
913 >>> pcophore.setLowerBound(1,2, 1.1)
914 >>> pcophore.setUpperBound(1,2, 1.9)
915 >>> pcophore.setLowerBound(1,3, 1.1)
916 >>> pcophore.setUpperBound(1,3, 1.9)
917 >>> pcophore.setLowerBound(2,3, 1.1)
918 >>> pcophore.setUpperBound(2,3, 1.9)
919 >>> bounds = numpy.array([[0,3,3,3],[2,0,2,2],[2,1,0,2],[2,1,1,0]],numpy.float)
920
921 >>> CoarseScreenPharmacophore(((0,),(1,),(2,),(3,)),bounds,pcophore)
922 True
923
924 >>> CoarseScreenPharmacophore(((0,),(1,),(3,),(2,)),bounds,pcophore)
925 True
926
927 >>> CoarseScreenPharmacophore(((1,),(0,),(3,),(2,)),bounds,pcophore)
928 False
929
930 """
931 for k in range(len(atomMatch)):
932 if len(atomMatch[k])==1:
933 for l in range(k+1,len(atomMatch)):
934 if len(atomMatch[l])==1:
935 idx0 = atomMatch[k][0]
936 idx1 = atomMatch[l][0]
937 if idx1<idx0:
938 idx0,idx1=idx1,idx0
939 if bounds[idx1,idx0] >= pcophore.getUpperBound(k, l) or \
940 bounds[idx0,idx1] <= pcophore.getLowerBound(k, l) :
941 if verbose:
942 print '\t (%d,%d) [%d,%d] fail'%(idx1,idx0,k,l)
943 print '\t %f,%f - %f,%f'%(bounds[idx1,idx0],pcophore.getUpperBound(k,l),
944 bounds[idx0,idx1],pcophore.getLowerBound(k,l))
945
946
947
948
949 return False
950 return True
951
953 """ checks to see if a particular mapping of features onto
954 a molecule satisfies a pharmacophore's 2D restrictions
955
956
957 >>> activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
958 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
959 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats)
960 >>> pcophore.setUpperBound2D(0,1,3)
961 >>> m = Chem.MolFromSmiles('FCC(N)CN')
962 >>> Check2DBounds(((0,),(3,)),m,pcophore)
963 True
964 >>> Check2DBounds(((0,),(5,)),m,pcophore)
965 False
966
967 """
968 dm = Chem.GetDistanceMatrix(mol,False,False,False)
969 nFeats = len(atomMatch)
970 for i in range(nFeats):
971 for j in range(i+1,nFeats):
972 lowerB = pcophore._boundsMat2D[j,i]
973 upperB = pcophore._boundsMat2D[i,j]
974 for atomI in atomMatch[i]:
975 for atomJ in atomMatch[j]:
976 try:
977 dij = dm[atomI,atomJ]
978 except IndexError:
979 print 'bad indices:',atomI,atomJ
980 print ' shape:',dm.shape
981 print ' match:',atomMatch
982 print ' mol:'
983 print Chem.MolToMolBlock(mol)
984 raise IndexError
985
986 if dij<lowerB or dij>upperB:
987 return False
988 return True
989
990 -def _checkMatch(match,mol,bounds,pcophore,use2DLimits):
991 """ **INTERNAL USE ONLY**
992
993 checks whether a particular atom match can be satisfied by
994 a molecule
995
996 """
997 atomMatch = ChemicalFeatures.GetAtomMatch(match)
998 if not atomMatch:
999 return None
1000 elif use2DLimits:
1001 if not Check2DBounds(atomMatch,mol,pcophore):
1002 return None
1003 if not CoarseScreenPharmacophore(atomMatch,bounds,pcophore):
1004 return None
1005 return atomMatch
1006
1007 -def ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=False,
1008 index=0,soFar=[]):
1009 """ Enumerates the list of atom mappings a molecule
1010 has to a particular pharmacophore.
1011 We do check distance bounds here.
1012
1013
1014 """
1015 nMatches = len(matches)
1016 if index>=nMatches:
1017 yield soFar,[]
1018 elif index==nMatches-1:
1019 for entry in matches[index]:
1020 nextStep = soFar+[entry]
1021 if index != 0:
1022 atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits)
1023 else:
1024 atomMatch = ChemicalFeatures.GetAtomMatch(nextStep)
1025 if atomMatch:
1026 yield soFar+[entry],atomMatch
1027 else:
1028 for entry in matches[index]:
1029 nextStep = soFar+[entry]
1030 if index != 0:
1031 atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits)
1032 if not atomMatch:
1033 continue
1034 for val in ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=use2DLimits,
1035 index=index+1,soFar=nextStep):
1036 if val:
1037 yield val
1038
1039 -def MatchPharmacophore(matches,bounds,pcophore,useDownsampling=False,
1040 use2DLimits=False,mol=None,excludedVolumes=None,
1041 useDirs=False):
1042 """
1043
1044 if use2DLimits is set, the molecule must also be provided and topological
1045 distances will also be used to filter out matches
1046
1047 """
1048 for match,atomMatch in ConstrainedEnum(matches,mol,pcophore,bounds,
1049 use2DLimits=use2DLimits):
1050 bm = bounds.copy()
1051 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore,useDirs=useDirs,mol=mol);
1052
1053 if excludedVolumes:
1054 localEvs = []
1055 for eV in excludedVolumes:
1056 featInfo = []
1057 for i,entry in enumerate(atomMatch):
1058 info = list(eV.featInfo[i])
1059 info[0] = entry
1060 featInfo.append(info)
1061 localEvs.append(ExcludedVolume.ExcludedVolume(featInfo,eV.index,
1062 eV.exclusionDist))
1063 bm = AddExcludedVolumes(bm,localEvs,smoothIt=False)
1064
1065 sz = bm.shape[0]
1066 if useDownsampling:
1067 indices = []
1068 for entry in atomMatch:
1069 indices.extend(entry)
1070 if excludedVolumes:
1071 for vol in localEvs:
1072 indices.append(vol.index)
1073 bm = DownsampleBoundsMatrix(bm,indices)
1074 if DG.DoTriangleSmoothing(bm):
1075 return 0,bm,match,(sz,bm.shape[0])
1076
1077 return 1,None,None,None
1078
1079 -def GetAllPharmacophoreMatches(matches,bounds,pcophore,useDownsampling=0,
1080 progressCallback=None,
1081 use2DLimits=False,mol=None,
1082 verbose=False):
1083 res = []
1084 nDone = 0
1085 for match in CombiEnum(matches):
1086 atomMatch = ChemicalFeatures.GetAtomMatch(match)
1087 if atomMatch and use2DLimits and mol:
1088 pass2D = Check2DBounds(atomMatch,mol,pcophore)
1089 if verbose:
1090 print '..',atomMatch
1091 print ' ..Pass2d:',pass2D
1092 else:
1093 pass2D = True
1094 if atomMatch and pass2D and \
1095 CoarseScreenPharmacophore(atomMatch,bounds,pcophore,verbose=verbose):
1096 if verbose:
1097 print ' ..CoarseScreen: Pass'
1098
1099 bm = bounds.copy()
1100 if verbose:
1101 print 'pre update:'
1102 for row in bm:
1103 print ' ',' '.join(['% 4.2f'%x for x in row])
1104 bm = UpdatePharmacophoreBounds(bm,atomMatch,pcophore);
1105 sz = bm.shape[0]
1106 if verbose:
1107 print 'pre downsample:'
1108 for row in bm:
1109 print ' ',' '.join(['% 4.2f'%x for x in row])
1110
1111 if useDownsampling:
1112 indices = []
1113 for entry in atomMatch:
1114 indices += list(entry)
1115 bm = DownsampleBoundsMatrix(bm,indices)
1116 if verbose:
1117 print 'post downsample:'
1118 for row in bm:
1119 print ' ',' '.join(['% 4.2f'%x for x in row])
1120
1121 if DG.DoTriangleSmoothing(bm):
1122 res.append(match)
1123 elif verbose:
1124 print 'cannot smooth'
1125 nDone+=1
1126 if progressCallback:
1127 progressCallback(nDone)
1128 return res
1129
1130
1132 """ Computes the chiral volume of an atom
1133
1134 We're using the chiral volume formula from Figure 7 of
1135 Blaney and Dixon, Rev. Comp. Chem. V, 299-335 (1994)
1136
1137 >>> import os.path
1138 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
1139
1140 R configuration atoms give negative volumes:
1141 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r.mol'))
1142 >>> Chem.AssignAtomChiralCodes(mol)
1143 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1144 'R'
1145 >>> ComputeChiralVolume(mol,1) < 0
1146 True
1147
1148 S configuration atoms give positive volumes:
1149 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s.mol'))
1150 >>> Chem.AssignAtomChiralCodes(mol)
1151 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1152 'S'
1153 >>> ComputeChiralVolume(mol,1) > 0
1154 True
1155
1156 Non-chiral (or non-specified) atoms give zero volume:
1157 >>> ComputeChiralVolume(mol,0) == 0.0
1158 True
1159
1160 We also work on 3-coordinate atoms (with implicit Hs):
1161 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r-3.mol'))
1162 >>> Chem.AssignAtomChiralCodes(mol)
1163 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1164 'R'
1165 >>> ComputeChiralVolume(mol,1)<0
1166 True
1167
1168 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s-3.mol'))
1169 >>> Chem.AssignAtomChiralCodes(mol)
1170 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1171 'S'
1172 >>> ComputeChiralVolume(mol,1)>0
1173 True
1174
1175
1176
1177 """
1178 conf = mol.GetConformer(confId)
1179 Chem.AssignAtomChiralCodes(mol)
1180 center = mol.GetAtomWithIdx(centerIdx)
1181 if not center.HasProp('_CIPCode'):
1182 return 0.0
1183
1184 nbrs = center.GetNeighbors()
1185 nbrRanks = []
1186 for nbr in nbrs:
1187 rank = int(nbr.GetProp('_CIPRank'))
1188 pos = conf.GetAtomPosition(nbr.GetIdx())
1189 nbrRanks.append((rank,pos))
1190
1191
1192
1193 if len(nbrRanks)==3:
1194 nbrRanks.append((-1,conf.GetAtomPosition(centerIdx)))
1195 nbrRanks.sort()
1196
1197 ps = [x[1] for x in nbrRanks]
1198 v1 = ps[0]-ps[3]
1199 v2 = ps[1]-ps[3]
1200 v3 = ps[2]-ps[3]
1201
1202 res = v1.DotProduct(v2.CrossProduct(v3))
1203 return res
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1215 import doctest,sys
1216 return doctest.testmod(sys.modules["__main__"])
1217
1218 if __name__ == '__main__':
1219 import sys
1220 failed,tried = _test()
1221 sys.exit(failed)
1222