1
2
3
4
5
6
7
8
9
10
11 """ utility functionality for the 2D pharmacophores code
12
13 See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way
14 pharmacophores are broken into triangles and labelled.
15
16 See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit
17 numbering
18
19 """
20 import numpy
21
22
23
24
25
26 nPointDistDict = {
27 2: ((0,1),),
28 3: ((0,1),(0,2),
29 (1,2)),
30 4: ((0,1),(0,2),(0,3),
31 (1,2),(2,3)),
32 5: ((0,1),(0,2),(0,3),(0,4),
33 (1,2),(2,3),(3,4)),
34 6: ((0,1),(0,2),(0,3),(0,4),(0,5),
35 (1,2),(2,3),(3,4),(4,5)),
36 7: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),
37 (1,2),(2,3),(3,4),(4,5),(5,6)),
38 8: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),
39 (1,2),(2,3),(3,4),(4,5),(5,6),(6,7)),
40 9: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8),
41 (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8)),
42 10: ((0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8),(0,9),
43 (1,2),(2,3),(3,4),(4,5),(5,6),(6,7),(7,8),(8,9)),
44 }
45
46
47
48
49 nDistPointDict = {
50 1:2,
51 3:3,
52 5:4,
53 7:5,
54 9:6,
55 11:7,
56 13:8,
57 15:9,
58 17:10,
59 }
60
61 _trianglesInPharmacophore = {}
80
81
83 if x <= 1:
84 return 1
85
86 accum = 1
87 for i in range(x):
88 accum *= i+1
89 return accum
90
92 """ checks the triangle inequality for combinations
93 of distance bins.
94
95 the general triangle inequality is:
96 d1 + d2 >= d3
97 the conservative binned form of this is:
98 d1(upper) + d2(upper) >= d3(lower)
99
100 """
101 if d1[1]+d2[1]<d3[0]: return False
102 if d2[1]+d3[1]<d1[0]: return False
103 if d3[1]+d1[1]<d2[0]: return False
104
105 return True
106
108 """ checks the scaffold passed in to see if all
109 contributing triangles can satisfy the triangle inequality
110
111 the scaffold itself (encoded in combo) is a list of binned distances
112
113 """
114
115 nPts = nDistPointDict[len(combo)]
116 tris = GetTriangles(nPts)
117 for tri in tris:
118 ds = [bins[combo[x]] for x in tri]
119 if not BinsTriangleInequality(ds[0],ds[1],ds[2]):
120 return False
121 return True
122
123
124 _numCombDict = {}
126 """ returns the number of ways to fit nItems into nSlots
127
128 We assume that (x,y) and (y,x) are equivalent, and
129 (x,x) is allowed.
130
131 General formula is, for N items and S slots:
132 res = (N+S-1)! / ( (N-1)! * S! )
133
134 """
135 global _numCombDict
136 res = _numCombDict.get((nItems,nSlots),-1)
137 if res == -1:
138 res = _fact(nItems+nSlots-1) / (_fact(nItems-1)*_fact(nSlots))
139 _numCombDict[(nItems,nSlots)] = res
140 return res
141
142 _verbose = 0
143
144 _countCache={}
145 -def CountUpTo(nItems,nSlots,vs,idx=0,startAt=0):
146 """ Figures out where a given combination of indices would
147 occur in the combinatorial explosion generated by _GetIndexCombinations_
148
149 **Arguments**
150
151 - nItems: the number of items to distribute
152
153 - nSlots: the number of slots in which to distribute them
154
155 - vs: a sequence containing the values to find
156
157 - idx: used in the recursion
158
159 - startAt: used in the recursion
160
161 **Returns**
162
163 an integer
164
165 """
166 global _countCache
167 if _verbose:
168 print ' '*idx,'CountUpTo(%d)'%idx,vs[idx],startAt
169 if idx==0 and _countCache.has_key((nItems,nSlots,tuple(vs))):
170 return _countCache[(nItems,nSlots,tuple(vs))]
171 elif idx >= nSlots:
172 accum = 0
173 elif idx == nSlots-1:
174 accum = vs[idx]-startAt
175 else:
176 accum = 0
177
178 for i in range(startAt,vs[idx]):
179 nLevsUnder = nSlots-idx-1
180 nValsOver = nItems-i
181 if _verbose:
182 print ' '*idx,' ',i,nValsOver,nLevsUnder,\
183 NumCombinations(nValsOver,nLevsUnder)
184 accum += NumCombinations(nValsOver,nLevsUnder)
185 accum += CountUpTo(nItems,nSlots,vs,idx+1,vs[idx])
186 if _verbose: print ' '*idx,'>',accum
187 if idx == 0:
188 _countCache[(nItems,nSlots,tuple(vs))] = accum
189 return accum
190
191 _indexCombinations={}
193 """ Generates all combinations of nItems in nSlots without including
194 duplicates
195
196 **Arguments**
197
198 - nItems: the number of items to distribute
199
200 - nSlots: the number of slots in which to distribute them
201
202 - slot: used in recursion
203
204 - lastItemVal: used in recursion
205
206 **Returns**
207
208 a list of lists
209
210 """
211 global _indexCombinations
212 if not slot and _indexCombinations.has_key((nItems,nSlots)):
213 res = _indexCombinations[(nItems,nSlots)]
214 elif slot >= nSlots:
215 res = []
216 elif slot == nSlots-1:
217 res = [[x] for x in range(lastItemVal,nItems)]
218 else:
219 res = []
220 for x in range(lastItemVal,nItems):
221 tmp = GetIndexCombinations(nItems,nSlots,slot+1,x)
222 for entry in tmp:
223 res.append([x]+entry)
224 if not slot:
225 _indexCombinations[(nItems,nSlots)] = res
226 return res
227
229 """ Does the combinatorial explosion of the possible combinations
230 of the elements of _choices_.
231
232 **Arguments**
233
234 - choices: sequence of sequences with the elements to be enumerated
235
236 - noDups: (optional) if this is nonzero, results with duplicates,
237 e.g. (1,1,0), will not be generated
238
239 - which: used in recursion
240
241 **Returns**
242
243 a list of lists
244
245 >>> GetAllCombinations([(0,),(1,),(2,)])
246 [[0, 1, 2]]
247 >>> GetAllCombinations([(0,),(1,3),(2,)])
248 [[0, 1, 2], [0, 3, 2]]
249
250 >>> GetAllCombinations([(0,1),(1,3),(2,)])
251 [[0, 1, 2], [0, 3, 2], [1, 3, 2]]
252
253 """
254 if which >= len(choices):
255 res = []
256 elif which == len(choices)-1:
257 res = [[x] for x in choices[which]]
258 else:
259 res = []
260 tmp = GetAllCombinations(choices,noDups=noDups,
261 which=which+1)
262 for thing in choices[which]:
263 for other in tmp:
264 if not noDups or thing not in other:
265 res.append([thing]+other)
266 return res
267
269 """ Does the combinatorial explosion of the possible combinations
270 of the elements of _choices_.
271
272 """
273 assert len(choices)==len(classes)
274 if which >= len(choices):
275 res = []
276 elif which == len(choices)-1:
277 res = [[(classes[which],x)] for x in choices[which]]
278 else:
279 res = []
280 tmp = GetUniqueCombinations(choices,classes,
281 which=which+1)
282 for thing in choices[which]:
283 for other in tmp:
284 idxThere=0
285 for x in other:
286 if x[1]==thing:idxThere+=1
287 if not idxThere:
288 newL = [(classes[which],thing)]+other
289 newL.sort()
290 if newL not in res:
291 res.append(newL)
292 return res
293
295 """ uniquifies the combinations in the argument
296
297 **Arguments**:
298
299 - combos: a sequence of sequences
300
301 **Returns**
302
303 - a list of tuples containing the unique combos
304
305 """
306 print '>>> u:',combos
307 resD = {}
308 for combo in combos:
309 k = combo[:]
310 k.sort()
311 resD[tuple(k)] = tuple(combo)
312 print ' >>> u:',resD.values()
313 return resD.values()
314
316 """ gets all realizable scaffolds (passing the triangle inequality) with the
317 given number of points and returns them as a list of tuples
318
319 """
320 if nPts < 2:
321 res = 0
322 elif nPts == 2:
323 res = [(x,) for x in range(len(bins))]
324 else:
325 nDists = len(nPointDistDict[nPts])
326 combos = GetAllCombinations([range(len(bins))]*nDists,noDups=0)
327 res = []
328 for combo in combos:
329 if not useTriangleInequality or ScaffoldPasses(combo,bins):
330 res.append(tuple(combo))
331 return res
332
334 """
335 put the distances for a triangle into canonical order
336
337 It's easy if the features are all different:
338 >>> OrderTriangle([0,2,4],[1,2,3])
339 ([0, 2, 4], [1, 2, 3])
340
341 It's trickiest if they are all the same:
342 >>> OrderTriangle([0,0,0],[1,2,3])
343 ([0, 0, 0], [3, 2, 1])
344 >>> OrderTriangle([0,0,0],[2,1,3])
345 ([0, 0, 0], [3, 2, 1])
346 >>> OrderTriangle([0,0,0],[1,3,2])
347 ([0, 0, 0], [3, 2, 1])
348 >>> OrderTriangle([0,0,0],[3,1,2])
349 ([0, 0, 0], [3, 2, 1])
350 >>> OrderTriangle([0,0,0],[3,2,1])
351 ([0, 0, 0], [3, 2, 1])
352
353 >>> OrderTriangle([0,0,1],[3,2,1])
354 ([0, 0, 1], [3, 2, 1])
355 >>> OrderTriangle([0,0,1],[1,3,2])
356 ([0, 0, 1], [1, 3, 2])
357 >>> OrderTriangle([0,0,1],[1,2,3])
358 ([0, 0, 1], [1, 3, 2])
359 >>> OrderTriangle([0,0,1],[1,3,2])
360 ([0, 0, 1], [1, 3, 2])
361
362 """
363 if len(featIndices)!=3: raise ValueError,'bad indices'
364 if len(dists)!=3: raise ValueError,'bad dists'
365
366 fs = set(featIndices)
367 if len(fs)==3:
368 return featIndices,dists
369
370 dSums=[0]*3
371 dSums[0] = dists[0]+dists[1]
372 dSums[1] = dists[0]+dists[2]
373 dSums[2] = dists[1]+dists[2]
374 mD = max(dSums)
375 if len(fs)==1:
376 if dSums[0]==mD:
377 if dists[0]>dists[1]:
378 ireorder=(0,1,2)
379 dreorder=(0,1,2)
380 else:
381 ireorder=(0,2,1)
382 dreorder=(1,0,2)
383 elif dSums[1]==mD:
384 if dists[0]>dists[2]:
385 ireorder=(1,0,2)
386 dreorder=(0,2,1)
387 else:
388 ireorder=(1,2,0)
389 dreorder=(2,0,1)
390 else:
391 if dists[1]>dists[2]:
392 ireorder = (2,0,1)
393 dreorder=(1,2,0)
394 else:
395 ireorder = (2,1,0)
396 dreorder=(2,1,0)
397 else:
398
399 if featIndices[0]==featIndices[1]:
400 if dists[1]>dists[2]:
401 ireorder=(0,1,2)
402 dreorder=(0,1,2)
403 else:
404 ireorder=(1,0,2)
405 dreorder=(0,2,1)
406 elif featIndices[0]==featIndices[2]:
407 if dists[0]>dists[2]:
408 ireorder=(0,1,2)
409 dreorder=(0,1,2)
410 else:
411 ireorder=(2,1,0)
412 dreorder=(2,1,0)
413 else:
414 if dists[0]>dists[1]:
415 ireorder=(0,1,2)
416 dreorder=(0,1,2)
417 else:
418 ireorder=(0,2,1)
419 dreorder=(1,0,2)
420 dists = [dists[x] for x in dreorder]
421 featIndices = [featIndices[x] for x in ireorder]
422 return featIndices,dists
423
424
425
426
427
429 import doctest,sys
430 return doctest.testmod(sys.modules["__main__"])
431
432
433 if __name__ == '__main__':
434 import sys
435 failed,tried = _test()
436 sys.exit(failed)
437