Package Chem :: Package Pharm2D :: Module Signature
[hide private]
[frames] | no frames]

Source Code for Module Chem.Pharm2D.Signature

  1  # $Id: Signature.py 742 2008-07-05 07:42:38Z glandrum $ 
  2  # 
  3  # Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ data structures for holding 2D pharmacophore signatures 
  8   
  9   
 10    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 11    pharmacophores are broken into triangles and labelled. 
 12   
 13    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 14    numbering 
 15   
 16  """ 
 17  import Chem 
 18  from Chem.Pharm2D import Utils 
 19  from DataStructs import SparseBitVect as BitVect 
 20   
 21  _verbose = 0 
 22   
 23  _sigVersion=100 
 24   
25 -class Pharm2DSig(object):
26 """ 27 28 **Notes on Use** 29 30 - After any changes, the Init() method should be called 31 32 **Important Attributes** 33 34 - patterns: a list of SmartsPatterns used to determine the features 35 patterns can be initialized directly from the list or from a list of SMARTS 36 strings 37 38 - bins: the list of distance bins to be used. 39 40 - minCount/maxCount: the minimum/maximum number of points to be included 41 in a pharmacophore 42 43 - shortestPathsOnly: toggles limiting the path-discovery algorithms to 44 only find the shortest paths between 2 features. 45 46 - includeBondOrder: toggles inclusion of bond order in consideration of 47 shortest paths. 48 49 **Limitations of Current Implementation** 50 51 - All distances have the same numbers of bins 52 53 - Only shortest-path matches are implemented 54 55 56 """
57 - def __init__(self,patts=None,bins=None,labels=None,minCnt=2,maxCnt=4, 58 shortestPathsOnly=1,includeBondOrder=0):
59 self._sigVersion = _sigVersion 60 self._bv = None 61 self._patts = None 62 if patts is not None: 63 self._patts = patts[:] 64 self._labels = None 65 if labels is not None: 66 self._labels = labels[:] 67 68 self.bins = None 69 if bins is not None: 70 self.bins = bins[:] 71 self._minCnt = minCnt 72 self._maxCnt = maxCnt 73 self._shortestPathsOnly = shortestPathsOnly 74 self._includeBondOrder = includeBondOrder 75 self._initLocals()
76
77 - def _initLocals(self):
78 """ Internal use only 79 80 """ 81 self._bv = None 82 self._size = -1 83 self._starts = {} 84 self._scaffolds = []
85 86
87 - def __getstate__(self):
88 """ used by the pickling machinery 89 90 """ 91 res = {'_minCnt':self._minCnt, 92 '_maxCnt':self._maxCnt, 93 '_shortestPathsOnly':self._shortestPathsOnly, 94 '_includeBondOrder':self._includeBondOrder, 95 'bins': self.bins, 96 '_bv':self._bv, 97 '_labels':self._labels, 98 '_sigVersion':self._sigVersion, 99 } 100 res['_patts'] = [Chem.MolToSmarts(x) for x in self._patts] 101 102 return res
103 - def __setstate__(self,state):
104 """ used by the pickling machinery 105 106 """ 107 self.__dict__ = state 108 patts = state['_patts'] 109 self.SetPatternsFromSmarts(patts) 110 bv = self._bv 111 self._initLocals() 112 self._bv = bv 113 try: 114 self._sigVersion 115 except AttributeError: 116 self._sigVersion = _sigVersion 117 self.Init(createBitVect=0)
118 - def __len__(self):
119 return self.GetSize()
120 - def __getitem__(self,idx):
121 if idx < 0 or idx >= self.GetSize(): 122 raise IndexError,'Index %d invalid'%(idx) 123 return self._bv[idx]
124 125
126 - def SetPatterns(self,patts):
127 self._patts = patts[:]
128 - def SetPatternsFromSmarts(self,smarts):
129 import Chem 130 self._patts = [None]*len(smarts) 131 for i in range(len(smarts)): 132 p = Chem.MolFromSmarts(smarts[i]) 133 self._patts[i] = p
134 - def GetPattern(self,which):
135 return self._patts[which]
136 - def GetNumPatterns(self):
137 return len(self._patts)
138
139 - def SetLabels(self,labels):
140 self._labels = labels[:]
141 - def GetLabel(self,which):
142 return self._labels[which]
143 144
145 - def SetBins(self,bins):
146 """ bins should be a list of 2-tuples """ 147 self.bins = bins[:]
148 - def GetBin(self,which):
149 return self.bins[which]
150 - def GetNumBins(self):
151 return len(self.bins)
152 - def GetMinDist(self):
153 return self.bins[0][0]
154 - def GetMaxDist(self):
155 return self.bins[-1][1]
156 157
158 - def SetMinCount(self,min):
159 self._minCnt = min
160 - def GetMinCount(self):
161 return self._minCnt
162
163 - def SetMaxCount(self,max):
164 self._maxCnt = max
165 - def GetMaxCount(self):
166 return self._maxCnt
167
168 - def SetShortestPathsOnly(self,val):
169 if not val: 170 raise ValueError,'only shortest paths signatures are currently supported' 171 self._shortestPathsOnly = val
172 - def GetShortestPathsOnly(self):
173 return self._shortestPathsOnly
174
175 - def SetIncludeBondOrder(self,val):
176 self._includeBondOrder = val
177 - def GetIncludeBondOrder(self):
178 return self._includeBondOrder
179
180 - def GetSize(self):
181 return self._size
182
183 - def Init(self,createBitVect=1):
184 """ Initializes internal parameters. This **must** be called after 185 making any changes to the signature 186 187 **Arguments** 188 189 - createBitVect: (optional) if this is nonzero, the bit vector 190 used to store the on bits will be allocated. Otherwise the 191 existing bit vect will be reused (it better be big enough) 192 193 """ 194 accum = 0 195 self._scaffolds = [0]*(len(Utils.nPointDistDict[self.GetMaxCount()+1])) 196 for i in range(self.GetMinCount(),self.GetMaxCount()+1): 197 self._starts[i] = accum 198 nDistsHere = len(Utils.nPointDistDict[i]) 199 scaffoldsHere = Utils.GetPossibleScaffolds(i,self.bins) 200 nBitsHere = len(scaffoldsHere) 201 self._scaffolds[nDistsHere] = scaffoldsHere 202 pointsHere = Utils.NumCombinations(self.GetNumPatterns(),i) * nBitsHere 203 204 accum += pointsHere 205 self._size = accum 206 if createBitVect: 207 self._bv = BitVect(self._size)
208 209
210 - def _findBinIdx(self,dists,bins,scaffolds):
211 """ OBSOLETE: this has been rewritten in C++ 212 Internal use only 213 Returns the index of a bin defined by a set of distances. 214 215 **Arguments** 216 217 - dists: a sequence of distances (not binned) 218 219 - bins: a sorted sequence of distance bins (2-tuples) 220 221 - scaffolds: a list of possible scaffolds (bin combinations) 222 223 **Returns** 224 225 an integer bin index 226 227 **Note** 228 229 the value returned here is not an index in the overall 230 signature. It is, rather, an offset of a scaffold in the 231 possible combinations of distance bins for a given 232 proto-pharmacophore. 233 234 """ 235 nBins = len(bins) 236 nDists = len(dists) 237 whichBins = [0]*nDists 238 239 # This would be a ton easier if we had contiguous bins 240 # i.e. if we could maintain the bins as a list of bounds) 241 # because then we could use Python's bisect module. 242 # Since we can't do that, we've got to do our own binary 243 # search here. 244 for i in range(nDists): 245 dist = dists[i] 246 where = -1 247 248 # do a simple binary search: 249 startP,endP = 0,len(bins) 250 while startP<endP: 251 midP = (startP+endP) // 2 252 begBin,endBin = bins[midP] 253 if dist < begBin: 254 endP = midP 255 elif dist >= endBin: 256 startP = midP+1 257 else: 258 where = midP 259 break 260 if where < 0: 261 return None 262 whichBins[i] = where 263 res = scaffolds.index(tuple(whichBins)) 264 return res
265
266 - def GetBitIdx(self,patts,dists,checkPatts=1):
267 """ returns the index for a pharmacophore described using a set of 268 patterns and distances 269 270 **Arguments*** 271 272 - patts: a sequence of pattern indices 273 274 - dists: a sequence of distance between the patterns, only the 275 unique distances should be included, and they should be in the 276 order defined in Utils. 277 278 - checkPatts: (optional) if nonzero, the pattern vector is 279 checked to ensure it is sorted 280 281 282 **Returns** 283 284 the integer bit index 285 286 """ 287 nPoints = len(patts) 288 if nPoints < self._minCnt: raise IndexError,'bad number of patterns' 289 if nPoints > self._maxCnt: raise IndexError,'bad number of patterns' 290 291 # this is the start of the nPoint-point pharmacophores 292 startIdx = self._starts[nPoints] 293 294 # 295 # now we need to map the pattern indices to an offset from startIdx 296 # 297 nPatts = len(self._patts) 298 if checkPatts: 299 tmp = list(patts) 300 tmp.sort() 301 if tmp!=list(patts): 302 raise ValueError,'pattern vector not sorted' 303 #patts = list(patts) 304 #patts.sort() 305 if patts[0]<0: raise IndexError,'bad pattern index' 306 if max(patts)>=nPatts: raise IndexError,'bad pattern index' 307 offset = Utils.CountUpTo(nPatts,nPoints,patts) 308 if _verbose: print 'offset for patts %s: %d'%(str(patts),offset) 309 offset *= len(self._scaffolds[len(dists)]) 310 311 try: 312 if _verbose: 313 print '>>>>>>>>>>>>>>>>>>>>>>>' 314 print '\tScaffolds:',repr(self._scaffolds[len(dists)]),type(self._scaffolds[len(dists)]) 315 print '\tDists:',repr(dists),type(dists) 316 print '\tbins:',repr(self.bins),type(self.bins) 317 bin = self._findBinIdx(dists,self.bins,self._scaffolds[len(dists)]) 318 except ValueError: 319 raise IndexError,'distance bin not found' 320 321 return startIdx + offset + bin
322
323 - def SetBit(self,patts,dists,checkPatts=1):
324 """ sets the bit defined by a collection of patterns and distances 325 326 **Arguments*** 327 328 - patts: a sequence of pattern indices 329 330 - dists: a sequence of distance between the patterns, only the 331 unique distances should be included, and they should be in the 332 order defined in Utils. 333 334 - checkPatts: (optional) if nonzero, the pattern vector is 335 checked to ensure it is sorted 336 337 **Returns** 338 339 the original status of the bit (whether or not it was set) 340 341 """ 342 idx = self.GetBitIdx(patts,dists,checkPatts=checkPatts) 343 if _verbose: 344 print '*--> setting bit: %d'%(idx) 345 print '\tfrom patts: %s and dists: %s\n'%(repr(patts),repr(dists)) 346 if idx >= self.GetSize(): 347 raise IndexError,'bad index (%d) calculated. %d is the max'%(idx,self.GetSize()) 348 return self._bv.SetBit(idx)
349
350 - def GetBit(self,patts,dists,checkPatts=1):
351 """ returns the value of a 352 353 **Arguments*** 354 355 - patts: a sequence of pattern indices 356 357 - dists: a sequence of distance between the patterns, only the 358 unique distances should be included, and they should be in the 359 order defined in Utils. 360 361 - checkPatts: (optional) if nonzero, the pattern vector is 362 checked to ensure it is sorted 363 364 **Returns** 365 366 whether or not the bit is set 367 368 """ 369 idx = self.GetBitIdx(patts,dists,checkPatts=checkPatts) 370 if idx >= self.GetSize(): 371 raise IndexError,'bad index (%d) calculated. %d is the max'%(idx,self.GetSize()) 372 return self._bv.GetBit(idx)
373
374 - def GetOnBits(self):
375 """ returns our on bits 376 377 """ 378 return self._bv.GetOnBits()
379
380 - def GetBitInfo(self,idx):
381 """ returns information about the given bit 382 383 **Arguments** 384 385 - idx: the bit index to be considered 386 387 **Returns** 388 389 a 3-tuple: 390 391 1) the number of points in the pharmacophore 392 393 2) the proto-pharmacophore (tuple of pattern indices) 394 395 3) the scaffold (tuple of distance indices) 396 397 """ 398 if idx >= self.GetSize(): 399 raise IndexError,'bad index (%d) queried. %d is the max'%(idx,self.GetSize()) 400 # first figure out how many points are in the p'cophore 401 nPts = self.GetMinCount() 402 while nPts < self.GetMaxCount() and self._starts[nPts+1]<=idx: 403 nPts+=1 404 405 # how far are we in from the start point? 406 offsetFromStart = idx - self._starts[nPts] 407 if _verbose: 408 print '\t %d Points, %d offset'%(nPts,offsetFromStart) 409 410 # lookup the number of scaffolds 411 nDists = len(Utils.nPointDistDict[nPts]) 412 scaffolds = self._scaffolds[nDists] 413 414 nScaffolds = len(scaffolds) 415 416 # figure out to which proto-pharmacophore we belong: 417 protoIdx = offsetFromStart / nScaffolds 418 indexCombos = Utils.GetIndexCombinations(self.GetNumPatterns(),nPts) 419 combo = indexCombos[protoIdx] 420 if _verbose: 421 print '\t combo: %s'%(str(combo)) 422 423 # and which scaffold: 424 scaffoldIdx = offsetFromStart % nScaffolds 425 scaffold = scaffolds[scaffoldIdx] 426 if _verbose: 427 print '\t scaffold: %s'%(str(scaffold)) 428 429 return nPts,combo,scaffold
430
431 - def GetBitDescription(self,bitIdx,includeBins=0,fullPage=1):
432 """ returns HTML with a description of the bit 433 434 **Arguments** 435 436 - bitIdx: an integer bit index 437 438 - includeBins: (optional) if nonzero, information about the bins will be 439 included as well 440 441 - fullPage: (optional) if nonzero, html headers and footers will 442 be included (so as to make the output a complete page) 443 444 **Returns** 445 446 a string with the HTML 447 448 """ 449 nPts,combo,scaffold = self.GetBitInfo(bitIdx) 450 labels = [self._labels[x] for x in combo] 451 dMat = zeros((nPts,nPts),Int) 452 dVect = Utils.nPointDistDict[nPts] 453 for idx in range(len(dVect)): 454 i,j = dVect[idx] 455 dMat[i,j] = scaffold[idx] 456 dMat[j,i] = scaffold[idx] 457 if fullPage: 458 lines = ['<html><body>'] 459 else: 460 lines = [] 461 lines.append("""<h2>Bit %d</h2> 462 <p><b>Num Points:</b> %d 463 """%(bitIdx,nPts)) 464 lines.append('<p><b>Distances</b><table border=1>') 465 hdr = ' '.join(['<th>%s</th>'%x for x in labels]) 466 lines.append('<tr><td></td>%s</tr>'%(hdr)) 467 for i in range(nPts): 468 row = ' '.join(['<td>%s</td>'%(str(dMat[i,x])) for x in range(nPts)]) 469 lines.append('<tr><th>%s</th>%s</tr>'%(labels[i],row)) 470 lines.append('</table>') 471 472 if includeBins: 473 lines.append('<p> <b>Distance Bin Information</b>') 474 lines.append('<table border=1>') 475 lines.append('<tr><td>bin</td><td>begin</td><td>end</td></tr>') 476 for idx in range(self.GetNumBins()): 477 beg,end = self.GetBin(idx) 478 lines.append('<tr><td>%d</td><td>%d</td><td>%d</td></tr>'%(idx,beg,end)) 479 lines.append('</table>') 480 if fullPage: 481 lines.append("</body></html>") 482 return '\n'.join(lines)
483 484 485 486 try: 487 from Chem.Pharmacophores import cUtils 488 except ImportError: 489 pass 490 else: 491 Pharm2DSig._findBinIdx = cUtils.FindBinIdx 492 493 494 if __name__=='__main__':
495 - def test1():
496 sig = Pharm2DSig() 497 sig.SetPatternsFromSmarts(['O','N']) 498 sig.SetBins([(1,2),(2,4),(4,8)]) 499 sig.SetMinCount(2) 500 sig.SetMaxCount(3) 501 sig.Init() 502 print sig.GetSize()
503
504 - def test2():
505 sig = Pharm2DSig() 506 sig.SetPatternsFromSmarts(['O','N']) 507 sig.SetBins([(0,2),(2,4),(4,8)]) 508 sig.SetMinCount(2) 509 sig.SetMaxCount(3) 510 sig.Init() 511 vs = [((0,0),[1]),((1,1),[1]),((0,0),[2]),((0,0),[6]),((0,1),[1])] 512 for patts,dist in vs: 513 idx = sig.GetBitIdx(patts,dist) 514 print patts,dist,idx
515
516 - def test4():
517 import Chem 518 import Generate 519 sig = Pharm2DSig() 520 sig.SetPatternsFromSmarts(['O']) 521 sig.SetBins([(1,3),(3,4),(4,8)]) 522 sig.SetMinCount(2) 523 sig.SetMaxCount(3) 524 sig.Init() 525 #print '---------c' 526 #patts,dist = (0,0),[4] 527 #idx = sig.GetBitIdx(patts,dist) 528 #print patts,dist,idx 529 mol = Chem.MolFromSmiles('OCCC1COCCO1') 530 try: 531 Generate.Gen2DFingerprint(mol,sig) 532 except TypeError: 533 import traceback 534 traceback.print_exc() 535 print '---------c' 536 patts,dist = [0,0],[4] 537 #idx = sig.GetBitIdx(patts,dist) 538 #print patts,dist,idx 539 sig.SetBit(patts,dist)
540 541 542 543
544 - def test3():
545 sig = Pharm2DSig() 546 sig.SetPatternsFromSmarts(['[OD1]','[OD2]','[ND2]','[N]']) 547 sig.SetBins([(0,2),(2,4),(4,6),(6,8),(8,100)]) 548 sig.SetMinCount(2) 549 sig.SetMaxCount(4) 550 sig.Init() 551 vs = [((0,0),[1]),((1,1),[1]),((0,0),[2]),((0,0),[6]),((0,1),[1]),((0,0,0),[1,1,1]),((0,0,0),[1,1,3]), 552 ((0,0,0),[3,1,2]),((0,0,1),[1,1,1]),] 553 for patts,dist in vs: 554 print patts,dist,sig.GetBitIdx(patts,dist)
555 556 test2() 557