1
2
3
4
5
6
7 """ data structures for holding 2D pharmacophore signatures
8
9
10 See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way
11 pharmacophores are broken into triangles and labelled.
12
13 See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit
14 numbering
15
16 """
17 import Chem
18 from Chem.Pharm2D import Utils
19 from DataStructs import SparseBitVect as BitVect
20
21 _verbose = 0
22
23 _sigVersion=100
24
26 """
27
28 **Notes on Use**
29
30 - After any changes, the Init() method should be called
31
32 **Important Attributes**
33
34 - patterns: a list of SmartsPatterns used to determine the features
35 patterns can be initialized directly from the list or from a list of SMARTS
36 strings
37
38 - bins: the list of distance bins to be used.
39
40 - minCount/maxCount: the minimum/maximum number of points to be included
41 in a pharmacophore
42
43 - shortestPathsOnly: toggles limiting the path-discovery algorithms to
44 only find the shortest paths between 2 features.
45
46 - includeBondOrder: toggles inclusion of bond order in consideration of
47 shortest paths.
48
49 **Limitations of Current Implementation**
50
51 - All distances have the same numbers of bins
52
53 - Only shortest-path matches are implemented
54
55
56 """
57 - def __init__(self,patts=None,bins=None,labels=None,minCnt=2,maxCnt=4,
58 shortestPathsOnly=1,includeBondOrder=0):
59 self._sigVersion = _sigVersion
60 self._bv = None
61 self._patts = None
62 if patts is not None:
63 self._patts = patts[:]
64 self._labels = None
65 if labels is not None:
66 self._labels = labels[:]
67
68 self.bins = None
69 if bins is not None:
70 self.bins = bins[:]
71 self._minCnt = minCnt
72 self._maxCnt = maxCnt
73 self._shortestPathsOnly = shortestPathsOnly
74 self._includeBondOrder = includeBondOrder
75 self._initLocals()
76
78 """ Internal use only
79
80 """
81 self._bv = None
82 self._size = -1
83 self._starts = {}
84 self._scaffolds = []
85
86
88 """ used by the pickling machinery
89
90 """
91 res = {'_minCnt':self._minCnt,
92 '_maxCnt':self._maxCnt,
93 '_shortestPathsOnly':self._shortestPathsOnly,
94 '_includeBondOrder':self._includeBondOrder,
95 'bins': self.bins,
96 '_bv':self._bv,
97 '_labels':self._labels,
98 '_sigVersion':self._sigVersion,
99 }
100 res['_patts'] = [Chem.MolToSmarts(x) for x in self._patts]
101
102 return res
121 if idx < 0 or idx >= self.GetSize():
122 raise IndexError,'Index %d invalid'%(idx)
123 return self._bv[idx]
124
125
127 self._patts = patts[:]
129 import Chem
130 self._patts = [None]*len(smarts)
131 for i in range(len(smarts)):
132 p = Chem.MolFromSmarts(smarts[i])
133 self._patts[i] = p
135 return self._patts[which]
137 return len(self._patts)
138
142 return self._labels[which]
143
144
146 """ bins should be a list of 2-tuples """
147 self.bins = bins[:]
149 return self.bins[which]
151 return len(self.bins)
153 return self.bins[0][0]
155 return self.bins[-1][1]
156
157
162
167
169 if not val:
170 raise ValueError,'only shortest paths signatures are currently supported'
171 self._shortestPathsOnly = val
173 return self._shortestPathsOnly
174
176 self._includeBondOrder = val
178 return self._includeBondOrder
179
182
183 - def Init(self,createBitVect=1):
184 """ Initializes internal parameters. This **must** be called after
185 making any changes to the signature
186
187 **Arguments**
188
189 - createBitVect: (optional) if this is nonzero, the bit vector
190 used to store the on bits will be allocated. Otherwise the
191 existing bit vect will be reused (it better be big enough)
192
193 """
194 accum = 0
195 self._scaffolds = [0]*(len(Utils.nPointDistDict[self.GetMaxCount()+1]))
196 for i in range(self.GetMinCount(),self.GetMaxCount()+1):
197 self._starts[i] = accum
198 nDistsHere = len(Utils.nPointDistDict[i])
199 scaffoldsHere = Utils.GetPossibleScaffolds(i,self.bins)
200 nBitsHere = len(scaffoldsHere)
201 self._scaffolds[nDistsHere] = scaffoldsHere
202 pointsHere = Utils.NumCombinations(self.GetNumPatterns(),i) * nBitsHere
203
204 accum += pointsHere
205 self._size = accum
206 if createBitVect:
207 self._bv = BitVect(self._size)
208
209
211 """ OBSOLETE: this has been rewritten in C++
212 Internal use only
213 Returns the index of a bin defined by a set of distances.
214
215 **Arguments**
216
217 - dists: a sequence of distances (not binned)
218
219 - bins: a sorted sequence of distance bins (2-tuples)
220
221 - scaffolds: a list of possible scaffolds (bin combinations)
222
223 **Returns**
224
225 an integer bin index
226
227 **Note**
228
229 the value returned here is not an index in the overall
230 signature. It is, rather, an offset of a scaffold in the
231 possible combinations of distance bins for a given
232 proto-pharmacophore.
233
234 """
235 nBins = len(bins)
236 nDists = len(dists)
237 whichBins = [0]*nDists
238
239
240
241
242
243
244 for i in range(nDists):
245 dist = dists[i]
246 where = -1
247
248
249 startP,endP = 0,len(bins)
250 while startP<endP:
251 midP = (startP+endP) // 2
252 begBin,endBin = bins[midP]
253 if dist < begBin:
254 endP = midP
255 elif dist >= endBin:
256 startP = midP+1
257 else:
258 where = midP
259 break
260 if where < 0:
261 return None
262 whichBins[i] = where
263 res = scaffolds.index(tuple(whichBins))
264 return res
265
266 - def GetBitIdx(self,patts,dists,checkPatts=1):
267 """ returns the index for a pharmacophore described using a set of
268 patterns and distances
269
270 **Arguments***
271
272 - patts: a sequence of pattern indices
273
274 - dists: a sequence of distance between the patterns, only the
275 unique distances should be included, and they should be in the
276 order defined in Utils.
277
278 - checkPatts: (optional) if nonzero, the pattern vector is
279 checked to ensure it is sorted
280
281
282 **Returns**
283
284 the integer bit index
285
286 """
287 nPoints = len(patts)
288 if nPoints < self._minCnt: raise IndexError,'bad number of patterns'
289 if nPoints > self._maxCnt: raise IndexError,'bad number of patterns'
290
291
292 startIdx = self._starts[nPoints]
293
294
295
296
297 nPatts = len(self._patts)
298 if checkPatts:
299 tmp = list(patts)
300 tmp.sort()
301 if tmp!=list(patts):
302 raise ValueError,'pattern vector not sorted'
303
304
305 if patts[0]<0: raise IndexError,'bad pattern index'
306 if max(patts)>=nPatts: raise IndexError,'bad pattern index'
307 offset = Utils.CountUpTo(nPatts,nPoints,patts)
308 if _verbose: print 'offset for patts %s: %d'%(str(patts),offset)
309 offset *= len(self._scaffolds[len(dists)])
310
311 try:
312 if _verbose:
313 print '>>>>>>>>>>>>>>>>>>>>>>>'
314 print '\tScaffolds:',repr(self._scaffolds[len(dists)]),type(self._scaffolds[len(dists)])
315 print '\tDists:',repr(dists),type(dists)
316 print '\tbins:',repr(self.bins),type(self.bins)
317 bin = self._findBinIdx(dists,self.bins,self._scaffolds[len(dists)])
318 except ValueError:
319 raise IndexError,'distance bin not found'
320
321 return startIdx + offset + bin
322
323 - def SetBit(self,patts,dists,checkPatts=1):
324 """ sets the bit defined by a collection of patterns and distances
325
326 **Arguments***
327
328 - patts: a sequence of pattern indices
329
330 - dists: a sequence of distance between the patterns, only the
331 unique distances should be included, and they should be in the
332 order defined in Utils.
333
334 - checkPatts: (optional) if nonzero, the pattern vector is
335 checked to ensure it is sorted
336
337 **Returns**
338
339 the original status of the bit (whether or not it was set)
340
341 """
342 idx = self.GetBitIdx(patts,dists,checkPatts=checkPatts)
343 if _verbose:
344 print '*--> setting bit: %d'%(idx)
345 print '\tfrom patts: %s and dists: %s\n'%(repr(patts),repr(dists))
346 if idx >= self.GetSize():
347 raise IndexError,'bad index (%d) calculated. %d is the max'%(idx,self.GetSize())
348 return self._bv.SetBit(idx)
349
350 - def GetBit(self,patts,dists,checkPatts=1):
351 """ returns the value of a
352
353 **Arguments***
354
355 - patts: a sequence of pattern indices
356
357 - dists: a sequence of distance between the patterns, only the
358 unique distances should be included, and they should be in the
359 order defined in Utils.
360
361 - checkPatts: (optional) if nonzero, the pattern vector is
362 checked to ensure it is sorted
363
364 **Returns**
365
366 whether or not the bit is set
367
368 """
369 idx = self.GetBitIdx(patts,dists,checkPatts=checkPatts)
370 if idx >= self.GetSize():
371 raise IndexError,'bad index (%d) calculated. %d is the max'%(idx,self.GetSize())
372 return self._bv.GetBit(idx)
373
375 """ returns our on bits
376
377 """
378 return self._bv.GetOnBits()
379
381 """ returns information about the given bit
382
383 **Arguments**
384
385 - idx: the bit index to be considered
386
387 **Returns**
388
389 a 3-tuple:
390
391 1) the number of points in the pharmacophore
392
393 2) the proto-pharmacophore (tuple of pattern indices)
394
395 3) the scaffold (tuple of distance indices)
396
397 """
398 if idx >= self.GetSize():
399 raise IndexError,'bad index (%d) queried. %d is the max'%(idx,self.GetSize())
400
401 nPts = self.GetMinCount()
402 while nPts < self.GetMaxCount() and self._starts[nPts+1]<=idx:
403 nPts+=1
404
405
406 offsetFromStart = idx - self._starts[nPts]
407 if _verbose:
408 print '\t %d Points, %d offset'%(nPts,offsetFromStart)
409
410
411 nDists = len(Utils.nPointDistDict[nPts])
412 scaffolds = self._scaffolds[nDists]
413
414 nScaffolds = len(scaffolds)
415
416
417 protoIdx = offsetFromStart / nScaffolds
418 indexCombos = Utils.GetIndexCombinations(self.GetNumPatterns(),nPts)
419 combo = indexCombos[protoIdx]
420 if _verbose:
421 print '\t combo: %s'%(str(combo))
422
423
424 scaffoldIdx = offsetFromStart % nScaffolds
425 scaffold = scaffolds[scaffoldIdx]
426 if _verbose:
427 print '\t scaffold: %s'%(str(scaffold))
428
429 return nPts,combo,scaffold
430
432 """ returns HTML with a description of the bit
433
434 **Arguments**
435
436 - bitIdx: an integer bit index
437
438 - includeBins: (optional) if nonzero, information about the bins will be
439 included as well
440
441 - fullPage: (optional) if nonzero, html headers and footers will
442 be included (so as to make the output a complete page)
443
444 **Returns**
445
446 a string with the HTML
447
448 """
449 nPts,combo,scaffold = self.GetBitInfo(bitIdx)
450 labels = [self._labels[x] for x in combo]
451 dMat = zeros((nPts,nPts),Int)
452 dVect = Utils.nPointDistDict[nPts]
453 for idx in range(len(dVect)):
454 i,j = dVect[idx]
455 dMat[i,j] = scaffold[idx]
456 dMat[j,i] = scaffold[idx]
457 if fullPage:
458 lines = ['<html><body>']
459 else:
460 lines = []
461 lines.append("""<h2>Bit %d</h2>
462 <p><b>Num Points:</b> %d
463 """%(bitIdx,nPts))
464 lines.append('<p><b>Distances</b><table border=1>')
465 hdr = ' '.join(['<th>%s</th>'%x for x in labels])
466 lines.append('<tr><td></td>%s</tr>'%(hdr))
467 for i in range(nPts):
468 row = ' '.join(['<td>%s</td>'%(str(dMat[i,x])) for x in range(nPts)])
469 lines.append('<tr><th>%s</th>%s</tr>'%(labels[i],row))
470 lines.append('</table>')
471
472 if includeBins:
473 lines.append('<p> <b>Distance Bin Information</b>')
474 lines.append('<table border=1>')
475 lines.append('<tr><td>bin</td><td>begin</td><td>end</td></tr>')
476 for idx in range(self.GetNumBins()):
477 beg,end = self.GetBin(idx)
478 lines.append('<tr><td>%d</td><td>%d</td><td>%d</td></tr>'%(idx,beg,end))
479 lines.append('</table>')
480 if fullPage:
481 lines.append("</body></html>")
482 return '\n'.join(lines)
483
484
485
486 try:
487 from Chem.Pharmacophores import cUtils
488 except ImportError:
489 pass
490 else:
491 Pharm2DSig._findBinIdx = cUtils.FindBinIdx
492
493
494 if __name__=='__main__':
503
505 sig = Pharm2DSig()
506 sig.SetPatternsFromSmarts(['O','N'])
507 sig.SetBins([(0,2),(2,4),(4,8)])
508 sig.SetMinCount(2)
509 sig.SetMaxCount(3)
510 sig.Init()
511 vs = [((0,0),[1]),((1,1),[1]),((0,0),[2]),((0,0),[6]),((0,1),[1])]
512 for patts,dist in vs:
513 idx = sig.GetBitIdx(patts,dist)
514 print patts,dist,idx
515
517 import Chem
518 import Generate
519 sig = Pharm2DSig()
520 sig.SetPatternsFromSmarts(['O'])
521 sig.SetBins([(1,3),(3,4),(4,8)])
522 sig.SetMinCount(2)
523 sig.SetMaxCount(3)
524 sig.Init()
525
526
527
528
529 mol = Chem.MolFromSmiles('OCCC1COCCO1')
530 try:
531 Generate.Gen2DFingerprint(mol,sig)
532 except TypeError:
533 import traceback
534 traceback.print_exc()
535 print '---------c'
536 patts,dist = [0,0],[4]
537
538
539 sig.SetBit(patts,dist)
540
541
542
543
545 sig = Pharm2DSig()
546 sig.SetPatternsFromSmarts(['[OD1]','[OD2]','[ND2]','[N]'])
547 sig.SetBins([(0,2),(2,4),(4,6),(6,8),(8,100)])
548 sig.SetMinCount(2)
549 sig.SetMaxCount(4)
550 sig.Init()
551 vs = [((0,0),[1]),((1,1),[1]),((0,0),[2]),((0,0),[6]),((0,1),[1]),((0,0,0),[1,1,1]),((0,0,0),[1,1,3]),
552 ((0,0,0),[3,1,2]),((0,0,1),[1,1,1]),]
553 for patts,dist in vs:
554 print patts,dist,sig.GetBitIdx(patts,dist)
555
556 test2()
557