Package rdkit :: Package Chem :: Package Pharm2D :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.Utils

  1  # $Id: Utils.py 1528 2010-09-26 17:04:37Z glandrum $ 
  2  # 
  3  # Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ utility functionality for the 2D pharmacophores code 
 12   
 13    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 14    pharmacophores are broken into triangles and labelled. 
 15   
 16    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 17    numbering 
 18   
 19  """ 
 20  import numpy 
 21   
 22  # 
 23  #  number of points in a scaffold -> sequence of distances (p1,p2) in 
 24  #   the scaffold    
 25  # 
 26  nPointDistDict = { 
 27    2: ((0,1),), 
 28    3: ((0,1),(0,2), 
 29        (1,2)), 
 30    4: ((0,1),(0,2),(0,3), 
 31        (1,2),(2,3)), 
 32    5: ((0,1),(0,2),(0,3),(0,4), 
 33        (1,2),(2,3),(3,4)), 
 34    6: ((0,1),(0,2),(0,3),(0,4),(0,5), 
 35        (1,2),(2,3),(3,4),(4,5)), 
 36    7: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6), 
 37        (1,2),(2,3),(3,4),(4,5),(5,6)), 
 38    8: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7), 
 39        (1,2),(2,3),(3,4),(4,5),(5,6),(6,7)), 
 40    9: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8), 
 41        (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8)), 
 42    10: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8),(0,9), 
 43         (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8),(8,9)), 
 44    } 
 45   
 46  # 
 47  #  number of distances in a scaffold -> number of points in the scaffold 
 48  # 
 49  nDistPointDict = { 
 50    1:2, 
 51    3:3, 
 52    5:4, 
 53    7:5, 
 54    9:6, 
 55    11:7, 
 56    13:8, 
 57    15:9, 
 58    17:10, 
 59    } 
 60   
 61  _trianglesInPharmacophore = {} 
62 -def GetTriangles(nPts):
63 """ returns a tuple with the distance indices for 64 triangles composing an nPts-pharmacophore 65 66 """ 67 global _trianglesInPharmacophore 68 if nPts < 3: return [] 69 res = _trianglesInPharmacophore.get(nPts,[]) 70 if not res: 71 idx1,idx2,idx3=(0,1,nPts-1) 72 while idx1<nPts-2: 73 res.append((idx1,idx2,idx3)) 74 idx1 += 1 75 idx2 += 1 76 idx3 += 1 77 res = tuple(res) 78 _trianglesInPharmacophore[nPts] = res 79 return res
80 81
82 -def _fact(x):
83 if x <= 1: 84 return 1 85 86 accum = 1 87 for i in range(x): 88 accum *= i+1 89 return accum
90
91 -def BinsTriangleInequality(d1,d2,d3):
92 """ checks the triangle inequality for combinations 93 of distance bins. 94 95 the general triangle inequality is: 96 d1 + d2 >= d3 97 the conservative binned form of this is: 98 d1(upper) + d2(upper) >= d3(lower) 99 100 """ 101 if d1[1]+d2[1]<d3[0]: return False 102 if d2[1]+d3[1]<d1[0]: return False 103 if d3[1]+d1[1]<d2[0]: return False 104 105 return True
106
107 -def ScaffoldPasses(combo,bins=None):
108 """ checks the scaffold passed in to see if all 109 contributing triangles can satisfy the triangle inequality 110 111 the scaffold itself (encoded in combo) is a list of binned distances 112 113 """ 114 # this is the number of points in the pharmacophore 115 nPts = nDistPointDict[len(combo)] 116 tris = GetTriangles(nPts) 117 for tri in tris: 118 ds = [bins[combo[x]] for x in tri] 119 if not BinsTriangleInequality(ds[0],ds[1],ds[2]): 120 return False 121 return True
122 123 124 _numCombDict = {}
125 -def NumCombinations(nItems,nSlots):
126 """ returns the number of ways to fit nItems into nSlots 127 128 We assume that (x,y) and (y,x) are equivalent, and 129 (x,x) is allowed. 130 131 General formula is, for N items and S slots: 132 res = (N+S-1)! / ( (N-1)! * S! ) 133 134 """ 135 global _numCombDict 136 res = _numCombDict.get((nItems,nSlots),-1) 137 if res == -1: 138 res = _fact(nItems+nSlots-1) / (_fact(nItems-1)*_fact(nSlots)) 139 _numCombDict[(nItems,nSlots)] = res 140 return res
141 142 _verbose = 0 143 144 _countCache={}
145 -def CountUpTo(nItems,nSlots,vs,idx=0,startAt=0):
146 """ Figures out where a given combination of indices would 147 occur in the combinatorial explosion generated by _GetIndexCombinations_ 148 149 **Arguments** 150 151 - nItems: the number of items to distribute 152 153 - nSlots: the number of slots in which to distribute them 154 155 - vs: a sequence containing the values to find 156 157 - idx: used in the recursion 158 159 - startAt: used in the recursion 160 161 **Returns** 162 163 an integer 164 165 """ 166 global _countCache 167 if _verbose: 168 print ' '*idx,'CountUpTo(%d)'%idx,vs[idx],startAt 169 if idx==0 and _countCache.has_key((nItems,nSlots,tuple(vs))): 170 return _countCache[(nItems,nSlots,tuple(vs))] 171 elif idx >= nSlots: 172 accum = 0 173 elif idx == nSlots-1: 174 accum = vs[idx]-startAt 175 else: 176 accum = 0 177 # get the digit at idx correct 178 for i in range(startAt,vs[idx]): 179 nLevsUnder = nSlots-idx-1 180 nValsOver = nItems-i 181 if _verbose: 182 print ' '*idx,' ',i,nValsOver,nLevsUnder,\ 183 NumCombinations(nValsOver,nLevsUnder) 184 accum += NumCombinations(nValsOver,nLevsUnder) 185 accum += CountUpTo(nItems,nSlots,vs,idx+1,vs[idx]) 186 if _verbose: print ' '*idx,'>',accum 187 if idx == 0: 188 _countCache[(nItems,nSlots,tuple(vs))] = accum 189 return accum
190 191 _indexCombinations={}
192 -def GetIndexCombinations(nItems,nSlots,slot=0,lastItemVal=0):
193 """ Generates all combinations of nItems in nSlots without including 194 duplicates 195 196 **Arguments** 197 198 - nItems: the number of items to distribute 199 200 - nSlots: the number of slots in which to distribute them 201 202 - slot: used in recursion 203 204 - lastItemVal: used in recursion 205 206 **Returns** 207 208 a list of lists 209 210 """ 211 global _indexCombinations 212 if not slot and _indexCombinations.has_key((nItems,nSlots)): 213 res = _indexCombinations[(nItems,nSlots)] 214 elif slot >= nSlots: 215 res = [] 216 elif slot == nSlots-1: 217 res = [[x] for x in range(lastItemVal,nItems)] 218 else: 219 res = [] 220 for x in range(lastItemVal,nItems): 221 tmp = GetIndexCombinations(nItems,nSlots,slot+1,x) 222 for entry in tmp: 223 res.append([x]+entry) 224 if not slot: 225 _indexCombinations[(nItems,nSlots)] = res 226 return res
227
228 -def GetAllCombinations(choices,noDups=1,which=0):
229 """ Does the combinatorial explosion of the possible combinations 230 of the elements of _choices_. 231 232 **Arguments** 233 234 - choices: sequence of sequences with the elements to be enumerated 235 236 - noDups: (optional) if this is nonzero, results with duplicates, 237 e.g. (1,1,0), will not be generated 238 239 - which: used in recursion 240 241 **Returns** 242 243 a list of lists 244 245 >>> GetAllCombinations([(0,),(1,),(2,)]) 246 [[0, 1, 2]] 247 >>> GetAllCombinations([(0,),(1,3),(2,)]) 248 [[0, 1, 2], [0, 3, 2]] 249 250 >>> GetAllCombinations([(0,1),(1,3),(2,)]) 251 [[0, 1, 2], [0, 3, 2], [1, 3, 2]] 252 253 """ 254 if which >= len(choices): 255 res = [] 256 elif which == len(choices)-1: 257 res = [[x] for x in choices[which]] 258 else: 259 res = [] 260 tmp = GetAllCombinations(choices,noDups=noDups, 261 which=which+1) 262 for thing in choices[which]: 263 for other in tmp: 264 if not noDups or thing not in other: 265 res.append([thing]+other) 266 return res
267
268 -def GetUniqueCombinations(choices,classes,which=0):
269 """ Does the combinatorial explosion of the possible combinations 270 of the elements of _choices_. 271 272 """ 273 assert len(choices)==len(classes) 274 if which >= len(choices): 275 res = [] 276 elif which == len(choices)-1: 277 res = [[(classes[which],x)] for x in choices[which]] 278 else: 279 res = [] 280 tmp = GetUniqueCombinations(choices,classes, 281 which=which+1) 282 for thing in choices[which]: 283 for other in tmp: 284 idxThere=0 285 for x in other: 286 if x[1]==thing:idxThere+=1 287 if not idxThere: 288 newL = [(classes[which],thing)]+other 289 newL.sort() 290 if newL not in res: 291 res.append(newL) 292 return res
293
294 -def UniquifyCombinations(combos):
295 """ uniquifies the combinations in the argument 296 297 **Arguments**: 298 299 - combos: a sequence of sequences 300 301 **Returns** 302 303 - a list of tuples containing the unique combos 304 305 """ 306 print '>>> u:',combos 307 resD = {} 308 for combo in combos: 309 k = combo[:] 310 k.sort() 311 resD[tuple(k)] = tuple(combo) 312 print ' >>> u:',resD.values() 313 return resD.values()
314
315 -def GetPossibleScaffolds(nPts,bins,useTriangleInequality=True):
316 """ gets all realizable scaffolds (passing the triangle inequality) with the 317 given number of points and returns them as a list of tuples 318 319 """ 320 if nPts < 2: 321 res = 0 322 elif nPts == 2: 323 res = [(x,) for x in range(len(bins))] 324 else: 325 nDists = len(nPointDistDict[nPts]) 326 combos = GetAllCombinations([range(len(bins))]*nDists,noDups=0) 327 res = [] 328 for combo in combos: 329 if not useTriangleInequality or ScaffoldPasses(combo,bins): 330 res.append(tuple(combo)) 331 return res
332
333 -def OrderTriangle(featIndices,dists):
334 """ 335 put the distances for a triangle into canonical order 336 337 It's easy if the features are all different: 338 >>> OrderTriangle([0,2,4],[1,2,3]) 339 ([0, 2, 4], [1, 2, 3]) 340 341 It's trickiest if they are all the same: 342 >>> OrderTriangle([0,0,0],[1,2,3]) 343 ([0, 0, 0], [3, 2, 1]) 344 >>> OrderTriangle([0,0,0],[2,1,3]) 345 ([0, 0, 0], [3, 2, 1]) 346 >>> OrderTriangle([0,0,0],[1,3,2]) 347 ([0, 0, 0], [3, 2, 1]) 348 >>> OrderTriangle([0,0,0],[3,1,2]) 349 ([0, 0, 0], [3, 2, 1]) 350 >>> OrderTriangle([0,0,0],[3,2,1]) 351 ([0, 0, 0], [3, 2, 1]) 352 353 >>> OrderTriangle([0,0,1],[3,2,1]) 354 ([0, 0, 1], [3, 2, 1]) 355 >>> OrderTriangle([0,0,1],[1,3,2]) 356 ([0, 0, 1], [1, 3, 2]) 357 >>> OrderTriangle([0,0,1],[1,2,3]) 358 ([0, 0, 1], [1, 3, 2]) 359 >>> OrderTriangle([0,0,1],[1,3,2]) 360 ([0, 0, 1], [1, 3, 2]) 361 362 """ 363 if len(featIndices)!=3: raise ValueError,'bad indices' 364 if len(dists)!=3: raise ValueError,'bad dists' 365 366 fs = set(featIndices) 367 if len(fs)==3: 368 return featIndices,dists 369 370 dSums=[0]*3 371 dSums[0] = dists[0]+dists[1] 372 dSums[1] = dists[0]+dists[2] 373 dSums[2] = dists[1]+dists[2] 374 mD = max(dSums) 375 if len(fs)==1: 376 if dSums[0]==mD: 377 if dists[0]>dists[1]: 378 ireorder=(0,1,2) 379 dreorder=(0,1,2) 380 else: 381 ireorder=(0,2,1) 382 dreorder=(1,0,2) 383 elif dSums[1]==mD: 384 if dists[0]>dists[2]: 385 ireorder=(1,0,2) 386 dreorder=(0,2,1) 387 else: 388 ireorder=(1,2,0) 389 dreorder=(2,0,1) 390 else: 391 if dists[1]>dists[2]: 392 ireorder = (2,0,1) 393 dreorder=(1,2,0) 394 else: 395 ireorder = (2,1,0) 396 dreorder=(2,1,0) 397 else: 398 # two classes 399 if featIndices[0]==featIndices[1]: 400 if dists[1]>dists[2]: 401 ireorder=(0,1,2) 402 dreorder=(0,1,2) 403 else: 404 ireorder=(1,0,2) 405 dreorder=(0,2,1) 406 elif featIndices[0]==featIndices[2]: 407 if dists[0]>dists[2]: 408 ireorder=(0,1,2) 409 dreorder=(0,1,2) 410 else: 411 ireorder=(2,1,0) 412 dreorder=(2,1,0) 413 else: #featIndices[1]==featIndices[2]: 414 if dists[0]>dists[1]: 415 ireorder=(0,1,2) 416 dreorder=(0,1,2) 417 else: 418 ireorder=(0,2,1) 419 dreorder=(1,0,2) 420 dists = [dists[x] for x in dreorder] 421 featIndices = [featIndices[x] for x in ireorder] 422 return featIndices,dists
423 424 #------------------------------------ 425 # 426 # doctest boilerplate 427 #
428 -def _test():
429 import doctest,sys 430 return doctest.testmod(sys.modules["__main__"])
431 432 433 if __name__ == '__main__': 434 import sys 435 failed,tried = _test() 436 sys.exit(failed) 437