1
2
3
4
5
6
7
8
9
10
11 from rdkit import DataStructs
12 import copy,struct,cPickle
13
15 """
16
17 >>> vc = VectCollection()
18 >>> bv1 = DataStructs.ExplicitBitVect(10)
19 >>> bv1.SetBitsFromList((1,3,5))
20 >>> vc.AddVect(1,bv1)
21 >>> bv1 = DataStructs.ExplicitBitVect(10)
22 >>> bv1.SetBitsFromList((6,8))
23 >>> vc.AddVect(2,bv1)
24 >>> len(vc)
25 10
26 >>> vc.GetNumBits()
27 10
28 >>> vc[0]
29 0
30 >>> vc[1]
31 1
32 >>> vc[9]
33 0
34 >>> vc[6]
35 1
36 >>> vc.GetBit(6)
37 1
38 >>> list(vc.GetOnBits())
39 [1, 3, 5, 6, 8]
40
41 keys must be unique, so adding a duplicate replaces the
42 previous values:
43 >>> bv1 = DataStructs.ExplicitBitVect(10)
44 >>> bv1.SetBitsFromList((7,9))
45 >>> vc.AddVect(1,bv1)
46 >>> len(vc)
47 10
48 >>> vc[1]
49 0
50 >>> vc[9]
51 1
52 >>> vc[6]
53 1
54
55 we can also query the children:
56 >>> vc.NumChildren()
57 2
58 >>> cs = vc.GetChildren()
59 >>> id,fp = cs[0]
60 >>> id
61 1
62 >>> list(fp.GetOnBits())
63 [7, 9]
64 >>> id,fp = cs[1]
65 >>> id
66 2
67 >>> list(fp.GetOnBits())
68 [6, 8]
69
70 attach/detach operations:
71 >>> bv1 = DataStructs.ExplicitBitVect(10)
72 >>> bv1.SetBitsFromList((5,6))
73 >>> vc.AddVect(3,bv1)
74 >>> vc.NumChildren()
75 3
76 >>> list(vc.GetOnBits())
77 [5, 6, 7, 8, 9]
78 >>> vc.DetachVectsNotMatchingBit(6)
79 >>> vc.NumChildren()
80 2
81 >>> list(vc.GetOnBits())
82 [5, 6, 8]
83
84
85 >>> bv1 = DataStructs.ExplicitBitVect(10)
86 >>> bv1.SetBitsFromList((7,9))
87 >>> vc.AddVect(1,bv1)
88 >>> vc.NumChildren()
89 3
90 >>> list(vc.GetOnBits())
91 [5, 6, 7, 8, 9]
92 >>> vc.DetachVectsMatchingBit(6)
93 >>> vc.NumChildren()
94 1
95 >>> list(vc.GetOnBits())
96 [7, 9]
97
98
99 to copy VectCollections, use the copy module:
100 >>> bv1 = DataStructs.ExplicitBitVect(10)
101 >>> bv1.SetBitsFromList((5,6))
102 >>> vc.AddVect(3,bv1)
103 >>> list(vc.GetOnBits())
104 [5, 6, 7, 9]
105 >>> vc2 = copy.copy(vc)
106 >>> vc.DetachVectsNotMatchingBit(6)
107 >>> list(vc.GetOnBits())
108 [5, 6]
109 >>> list(vc2.GetOnBits())
110 [5, 6, 7, 9]
111
112 The Uniquify() method can be used to remove duplicate vectors:
113 >>> vc = VectCollection()
114 >>> bv1 = DataStructs.ExplicitBitVect(10)
115 >>> bv1.SetBitsFromList((7,9))
116 >>> vc.AddVect(1,bv1)
117 >>> vc.AddVect(2,bv1)
118 >>> bv1 = DataStructs.ExplicitBitVect(10)
119 >>> bv1.SetBitsFromList((2,3,5))
120 >>> vc.AddVect(3,bv1)
121 >>> vc.NumChildren()
122 3
123 >>> vc.Uniquify()
124 >>> vc.NumChildren()
125 2
126
127
128
129 """
131 self.__vects = {}
132 self.__orVect = None
133 self.__numBits = -1
134 self.__needReset=True
135
136
138 if self.__needReset:
139 self.Reset()
140 return self.__orVect
141 orVect = property(GetOrVect)
142
144 self.__vects[id]=vect
145 self.__needReset=True
146
148 if not self.__needReset:
149 return
150 self.__orVect=None
151 if not self.__vects:
152 return
153 ks = self.__vects.keys()
154 self.__orVect = copy.copy(self.__vects[ks[0]])
155 self.__numBits = self.__orVect.GetNumBits()
156 for i in range(1,len(ks)):
157 self.__orVect |= self.__vects[ks[i]]
158 self.__needReset=False
159
161 return len(self.__vects.keys())
162
164 return tuple(self.__vects.iteritems())
165
167 if self.__needReset:
168 self.Reset()
169 return self[id]
172
174 if self.__needReset:
175 self.Reset()
176 return self.__orVect.GetOnBits()
177
179 items = list(self.__vects.iteritems())
180 for k,v in items:
181 if not v.GetBit(bit):
182 del(self.__vects[k])
183 self.__needReset=True
184
186 items = list(self.__vects.iteritems())
187 for k,v in items:
188 if v.GetBit(bit):
189 del(self.__vects[k])
190 self.__needReset=True
191
193 obls = {}
194 for k,v in self.__vects.iteritems():
195 obls[k] = list(v.GetOnBits())
196
197 keys = self.__vects.keys()
198 nKeys = len(keys)
199 keep = self.__vects.keys()
200 for i in range(nKeys):
201 k1 = keys[i]
202 if k1 in keep:
203 obl1 = obls[k1]
204 idx = keys.index(k1)
205 for j in range(idx+1,nKeys):
206 k2 = keys[j]
207 if k2 in keep:
208 obl2 = obls[k2]
209 if obl1==obl2:
210 keep.remove(k2)
211
212 self.__needsReset=True
213 tmp = {}
214 for k in keep:
215 tmp[k] = self.__vects[k]
216 if verbose: print 'uniquify:',len(self.__vects),'->',len(tmp)
217 self.__vects=tmp
218
219
221 if self.__needReset:
222 self.Reset()
223 return self.__numBits
225 if self.__needReset:
226 self.Reset()
227 return self.__orVect.GetBit(id)
228
229
230
231
233 pkl = struct.pack('I',len(self.__vects))
234 for k,v in self.__vects.iteritems():
235 pkl += struct.pack('I',k)
236 p = v.ToBinary()
237 l = len(p)
238 pkl += struct.pack('I%ds'%(l),l,p)
239 return pkl
240
242 self.__vects = {}
243 self.__orVect = None
244 self.__numBits = -1
245 self.__needReset=True
246 szI = struct.calcsize('I')
247 offset = 0
248 nToRead = struct.unpack('I',pkl[offset:offset+szI])[0]
249 offset += szI
250 for i in range(nToRead):
251 k = struct.unpack('I',pkl[offset:offset+szI])[0]
252 offset += szI
253 l = struct.unpack('I',pkl[offset:offset+szI])[0]
254 offset += szI
255 sz = struct.calcsize('%ds'%l)
256 bv = DataStructs.ExplicitBitVect(struct.unpack('%ds'%l,pkl[offset:offset+sz])[0])
257 offset += sz
258 self.AddVect(k,bv)
259
260
261
262
263
264
265
267 import doctest,sys
268 return doctest.testmod(sys.modules["__main__"])
269
270
271 if __name__ == '__main__':
272 import sys
273 failed,tried = _test()
274 sys.exit(failed)
275