Package Chem :: Package Pharm2D :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module Chem.Pharm2D.Utils

  1  # $Id: Utils.py 742 2008-07-05 07:42:38Z glandrum $ 
  2  # 
  3  # Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ utility functionality for the 2D pharmacophores code 
  8   
  9    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 10    pharmacophores are broken into triangles and labelled. 
 11   
 12    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 13    numbering 
 14   
 15  """ 
 16  import numpy 
 17   
 18  # 
 19  #  number of points in a scaffold -> sequence of distances (p1,p2) in 
 20  #   the scaffold    
 21  # 
 22  nPointDistDict = { 
 23    2: ((0,1),), 
 24    3: ((0,1),(0,2), 
 25        (1,2)), 
 26    4: ((0,1),(0,2),(0,3), 
 27        (1,2),(2,3)), 
 28    5: ((0,1),(0,2),(0,3),(0,4), 
 29        (1,2),(2,3),(3,4)), 
 30    6: ((0,1),(0,2),(0,3),(0,4),(0,5), 
 31        (1,2),(2,3),(3,4),(4,5)), 
 32    7: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6), 
 33        (1,2),(2,3),(3,4),(4,5),(5,6)), 
 34    8: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7), 
 35        (1,2),(2,3),(3,4),(4,5),(5,6),(6,7)), 
 36    9: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8), 
 37        (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8)), 
 38    10: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8),(0,9), 
 39         (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8),(8,9)), 
 40    } 
 41   
 42  # 
 43  #  number of distances in a scaffold -> number of points in the scaffold 
 44  # 
 45  nDistPointDict = { 
 46    1:2, 
 47    3:3, 
 48    5:4, 
 49    7:5, 
 50    9:6, 
 51    11:7, 
 52    13:8, 
 53    15:9, 
 54    17:10, 
 55    } 
 56   
 57  _trianglesInPharmacophore = {} 
58 -def GetTriangles(nPts):
59 """ returns a tuple with the distance indices for 60 triangles composing an nPts-pharmacophore 61 62 """ 63 global _trianglesInPharmacophore 64 if nPts < 3: return [] 65 res = _trianglesInPharmacophore.get(nPts,[]) 66 if not res: 67 idx1,idx2,idx3=(0,1,nPts-1) 68 while idx1<nPts-2: 69 res.append((idx1,idx2,idx3)) 70 idx1 += 1 71 idx2 += 1 72 idx3 += 1 73 res = tuple(res) 74 _trianglesInPharmacophore[nPts] = res 75 return res
76 77
78 -def _fact(x):
79 if x <= 1: 80 return 1 81 82 accum = 1 83 for i in range(x): 84 accum *= i+1 85 return accum
86
87 -def BinsTriangleInequality(d1,d2,d3):
88 """ checks the triangle inequality for combinations 89 of distance bins. 90 91 the general triangle inequality is: 92 d1 + d2 >= d3 93 the conservative binned form of this is: 94 d1(upper) + d2(upper) >= d3(lower) 95 96 """ 97 if d1[1]+d2[1]<d3[0]: return 0 98 if d2[1]+d3[1]<d1[0]: return 0 99 if d3[1]+d1[1]<d2[0]: return 0 100 101 return 1
102
103 -def ScaffoldPasses(combo,bins=None):
104 """ checks the scaffold passed in to see if all 105 contributing triangles can satisfy the triangle inequality 106 107 the scaffold itself (encoded in combo) is a list of binned distances 108 109 """ 110 # this is the number of points in the pharmacophore 111 nPts = nDistPointDict[len(combo)] 112 tris = GetTriangles(nPts) 113 for tri in tris: 114 ds = [bins[combo[x]] for x in tri] 115 if not BinsTriangleInequality(ds[0],ds[1],ds[2]): 116 return 0 117 return 1
118 119 120 _numCombDict = {}
121 -def NumCombinations(nItems,nSlots):
122 """ returns the number of ways to fit nItems into nSlots 123 124 We assume that (x,y) and (y,x) are equivalent, and 125 (x,x) is allowed. 126 127 General formula is, for N items and S slots: 128 res = (N+S-1)! / ( (N-1)! * S! ) 129 130 """ 131 global _numCombDict 132 res = _numCombDict.get((nItems,nSlots),-1) 133 if res == -1: 134 res = _fact(nItems+nSlots-1) / (_fact(nItems-1)*_fact(nSlots)) 135 _numCombDict[(nItems,nSlots)] = res 136 return res
137 138 _verbose = 0 139 140 _countCache={}
141 -def CountUpTo(nItems,nSlots,vs,idx=0,startAt=0):
142 """ Figures out where a given combination of indices would 143 occur in the combinatorial explosion generated by _GetIndexCombinations_ 144 145 **Arguments** 146 147 - nItems: the number of items to distribute 148 149 - nSlots: the number of slots in which to distribute them 150 151 - vs: a sequence containing the values to find 152 153 - idx: used in the recursion 154 155 - startAt: used in the recursion 156 157 **Returns** 158 159 an integer 160 161 """ 162 global _countCache 163 if _verbose: 164 print ' '*idx,'CountUpTo(%d)'%idx,vs[idx],startAt 165 if idx==0 and _countCache.has_key((nItems,nSlots,tuple(vs))): 166 return _countCache[(nItems,nSlots,tuple(vs))] 167 elif idx >= nSlots: 168 accum = 0 169 elif idx == nSlots-1: 170 accum = vs[idx]-startAt 171 else: 172 accum = 0 173 # get the digit at idx correct 174 for i in range(startAt,vs[idx]): 175 nLevsUnder = nSlots-idx-1 176 nValsOver = nItems-i 177 if _verbose: 178 print ' '*idx,' ',i,nValsOver,nLevsUnder,\ 179 NumCombinations(nValsOver,nLevsUnder) 180 accum += NumCombinations(nValsOver,nLevsUnder) 181 accum += CountUpTo(nItems,nSlots,vs,idx+1,vs[idx]) 182 if _verbose: print ' '*idx,'>',accum 183 if idx == 0: 184 _countCache[(nItems,nSlots,tuple(vs))] = accum 185 return accum
186 187 _indexCombinations={}
188 -def GetIndexCombinations(nItems,nSlots,slot=0,lastItemVal=0):
189 """ Generates all combinations of nItems in nSlots without including 190 duplicates 191 192 **Arguments** 193 194 - nItems: the number of items to distribute 195 196 - nSlots: the number of slots in which to distribute them 197 198 - slot: used in recursion 199 200 - lastItemVal: used in recursion 201 202 **Returns** 203 204 a list of lists 205 206 """ 207 global _indexCombinations 208 if not slot and _indexCombinations.has_key((nItems,nSlots)): 209 res = _indexCombinations[(nItems,nSlots)] 210 elif slot >= nSlots: 211 res = [] 212 elif slot == nSlots-1: 213 res = [[x] for x in range(lastItemVal,nItems)] 214 else: 215 res = [] 216 for x in range(lastItemVal,nItems): 217 tmp = GetIndexCombinations(nItems,nSlots,slot+1,x) 218 for entry in tmp: 219 res.append([x]+entry) 220 if not slot: 221 _indexCombinations[(nItems,nSlots)] = res 222 return res
223
224 -def GetAllCombinations(choices,noDups=1,which=0):
225 """ Does the combinatorial explosion of the possible combinations 226 of the elements of _choices_. 227 228 **Arguments** 229 230 - choices: sequence of sequences with the elements to be enumerated 231 232 - noDups: (optional) if this is nonZero, duplicates will not be included 233 in the values returned 234 235 - noDups: (optional) if this is nonzero, results with duplicates, 236 e.g. (1,1,0), will not be generated 237 238 - which: used in recursion 239 240 **Returns** 241 242 a list of lists 243 244 """ 245 if which >= len(choices): 246 res = [] 247 elif which == len(choices)-1: 248 res = [[x] for x in choices[which]] 249 else: 250 res = [] 251 tmp = GetAllCombinations(choices,noDups=noDups, 252 which=which+1) 253 for thing in choices[which]: 254 for other in tmp: 255 if not noDups or thing not in other: 256 res.append([thing]+other) 257 return res
258
259 -def UniquifyCombinations(combos):
260 """ uniquifies the combinations in the argument 261 262 **Arguments**: 263 264 - combos: a sequence of sequences 265 266 **Returns** 267 268 - a list of tuples containing the unique combos 269 270 **Notes** 271 272 - the order of the indices of the individual combos in the 273 results list is modified (they are sorted) 274 275 """ 276 resD = {} 277 for combo in combos: 278 k = combo[:] 279 k.sort() 280 resD[tuple(k)] = tuple(combo) 281 return resD.values()
282
283 -def GetPossibleScaffolds(nPts,bins):
284 """ gets all realizable scaffolds (passing the triangle inequality) with the 285 given number of points and returns them as a list of tuples 286 287 """ 288 if nPts < 2: 289 res = 0 290 elif nPts == 2: 291 res = [(x,) for x in range(len(bins))] 292 else: 293 nDists = len(nPointDistDict[nPts]) 294 combos = GetAllCombinations([range(len(bins))]*nDists,noDups=0) 295 res = [] 296 for combo in combos: 297 if ScaffoldPasses(combo,bins): 298 res.append(tuple(combo)) 299 return res
300 301 302 303 if __name__ == '__main__': 304 things = [(1,0),(0,1)] 305 for thing in things: 306 print thing,CountUpTo(3,2,thing) 307 308 309 #r = GetIndexCombinations(3,3) 310 #for thing in r: 311 # print '\t',thing 312 313 #print '$$$$$$$$$' 314 #choices = [[1,3,7],[1,4,6],[10,11]] 315 #combs = GetAllCombinations(choices) 316 #for comb in combs: 317 # print '\t',comb 318