1
2
3
4
5
6
7
8
9
10
11 """ class definitions for similarity screening
12
13 See _SimilarityScreener_ for overview of required API
14
15 """
16 from rdkit import DataStructs
17 from rdkit.DataStructs import TopNContainer
18 from rdkit import RDConfig
19
21 """ base class
22
23 important attributes:
24 probe: the probe fingerprint against which we screen.
25
26 metric: a function that takes two arguments and returns a similarity
27 measure between them
28
29 dataSource: the source pool from which to draw, needs to support
30 a next() method
31
32 fingerprinter: a function that takes a molecule and returns a
33 fingerprint of the appropriate format
34
35
36 **Notes**
37 subclasses must support either an iterator interface
38 or __len__ and __getitem__
39 """
40 - def __init__(self,probe=None,metric=None,dataSource=None,fingerprinter=None):
41 self.metric = metric
42 self.dataSource = dataSource
43 self.fingerprinter = fingerprinter
44 self.probe = probe
45
47 """ used to reset screeners that behave as iterators
48 """
49 pass
50
51
53 """ sets our probe fingerprint """
54 self.probe = probeFingerprint
55
57 """ returns a fingerprint for a single probe object
58
59 This is potentially useful in initializing our internal
60 probe object.
61
62 """
63 return self.fingerprinter(probe)
64
66 """ Used to return all compounds that have a similarity
67 to the probe beyond a threshold value
68
69 **Notes**:
70
71 - This is as lazy as possible, so the data source isn't
72 queried until the client asks for a hit.
73
74 - In addition to being lazy, this class is as thin as possible.
75 (Who'd have thought it was possible!)
76 Hits are *not* stored locally, so if a client resets
77 the iteration and starts over, the same amount of work must
78 be done to retrieve the hits.
79
80 - The thinness and laziness forces us to support only forward
81 iteration (not random access)
82
83 """
88
89
91 """ *Internal use only* """
92 done = 0
93 res = None
94 sim = 0
95 while not done:
96
97
98 obj = self.dataIter.next()
99 fp = self.fingerprinter(obj)
100 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric)
101 if sim >= self.threshold:
102 res = obj
103 done = 1
104 return sim,res
105
107 """ used to reset our internal state so that iteration
108 starts again from the beginning
109 """
110 self.dataSource.reset()
111 self.dataIter = iter(self.dataSource)
113 """ returns an iterator for this screener
114 """
115 self.Reset()
116 return self
118 """ required part of iterator interface """
119 return self._nextMatch()
120
122 """ A screener that only returns the top N hits found
123
124 **Notes**
125
126 - supports forward iteration and getitem
127
128 """
134
143 if self._pos >= self.numToGet:
144 raise StopIteration
145 else:
146 res = self.topN[self._pos]
147 self._pos += 1
148 return res
149
156
158 if self.topN is None:
159 self._initTopN()
160 return self.numToGet
161
166