1
2
3
4
5
6
7 import Chem
8 from Chem import rdMolDescriptors
9 import math
10
12 """
13
14 **Arguments**:
15
16 - the code to be considered
17
18 - branchSubtract: (optional) the constant that was subtracted off
19 the number of neighbors before integrating it into the code.
20 This is used by the topological torsions code.
21
22
23 >>> m = Chem.MolFromSmiles('C=CC(=O)O')
24 >>> code = GetAtomCode(m.GetAtomWithIdx(0))
25 >>> ExplainAtomCode(code)
26 ('C', 1, 1)
27 >>> code = GetAtomCode(m.GetAtomWithIdx(1))
28 >>> ExplainAtomCode(code)
29 ('C', 2, 1)
30 >>> code = GetAtomCode(m.GetAtomWithIdx(2))
31 >>> ExplainAtomCode(code)
32 ('C', 3, 1)
33 >>> code = GetAtomCode(m.GetAtomWithIdx(3))
34 >>> ExplainAtomCode(code)
35 ('O', 1, 1)
36 >>> code = GetAtomCode(m.GetAtomWithIdx(4))
37 >>> ExplainAtomCode(code)
38 ('O', 1, 0)
39
40 """
41 typeMask = (1<<rdMolDescriptors.AtomPairsParameters.numTypeBits)-1
42 branchMask = (1<<rdMolDescriptors.AtomPairsParameters.numBranchBits)-1
43 piMask = (1<<rdMolDescriptors.AtomPairsParameters.numPiBits)-1
44
45 nBranch = int(code&branchMask)
46
47 code = code>>rdMolDescriptors.AtomPairsParameters.numBranchBits
48 nPi = int(code&piMask)
49
50 code = code>>rdMolDescriptors.AtomPairsParameters.numPiBits
51
52 typeIdx=int(code&typeMask)
53 if typeIdx<len(rdMolDescriptors.AtomPairsParameters.atomTypes):
54 atomNum = rdMolDescriptors.AtomPairsParameters.atomTypes[typeIdx]
55 atomSymbol=Chem.GetPeriodicTable().GetElementSymbol(atomNum)
56 else:
57 atomSymbol='X'
58 return (atomSymbol,nBranch,nPi)
59
60 GetAtomCode=rdMolDescriptors.GetAtomPairAtomCode
61
63 """ Returns the number of electrons an atom is using for pi bonding
64
65 >>> m = Chem.MolFromSmiles('C=C')
66 >>> NumPiElectrons(m.GetAtomWithIdx(0))
67 1
68
69 >>> m = Chem.MolFromSmiles('C#CC')
70 >>> NumPiElectrons(m.GetAtomWithIdx(0))
71 2
72 >>> NumPiElectrons(m.GetAtomWithIdx(1))
73 2
74
75 >>> m = Chem.MolFromSmiles('O=C=CC')
76 >>> NumPiElectrons(m.GetAtomWithIdx(0))
77 1
78 >>> NumPiElectrons(m.GetAtomWithIdx(1))
79 2
80 >>> NumPiElectrons(m.GetAtomWithIdx(2))
81 1
82 >>> NumPiElectrons(m.GetAtomWithIdx(3))
83 0
84
85 FIX: this behaves oddly in these cases:
86 >>> m = Chem.MolFromSmiles('S(=O)(=O)')
87 >>> NumPiElectrons(m.GetAtomWithIdx(0))
88 2
89
90 >>> m = Chem.MolFromSmiles('S(=O)(=O)(O)O')
91 >>> NumPiElectrons(m.GetAtomWithIdx(0))
92 0
93
94 In the second case, the S atom is tagged as sp3 hybridized.
95
96 """
97
98 res = 0
99 if atom.GetIsAromatic():
100 res = 1
101 elif atom.GetHybridization() != Chem.HybridizationType.SP3:
102
103
104 res = atom.GetExplicitValence() - atom.GetDegree()
105 return res
106
107
109 """ Returns the number of bits in common between two vectors
110
111 **Arguments**:
112
113 - two vectors (sequences of bit ids)
114
115 **Returns**: an integer
116
117 **Notes**
118
119 - the vectors must be sorted
120
121 - duplicate bit IDs are counted more than once
122
123 >>> BitsInCommon( (1,2,3,4,10), (2,4,6) )
124 2
125
126 Here's how duplicates are handled:
127 >>> BitsInCommon( (1,2,2,3,4), (2,2,4,5,6) )
128 3
129
130 """
131 res = 0
132 v2Pos = 0
133 nV2 = len(v2)
134 for val in v1:
135 while v2Pos<nV2 and v2[v2Pos]<val:
136 v2Pos+=1
137 if v2Pos >= nV2:
138 break
139 if v2[v2Pos]==val:
140 res += 1
141 v2Pos += 1
142 return res
143
144
146 """ Implements the DICE similarity metric.
147 This is the recommended metric in both the Topological torsions
148 and Atom pairs papers.
149
150 **Arguments**:
151
152 - two vectors (sequences of bit ids)
153
154 **Returns**: a float.
155
156 **Notes**
157
158 - the vectors must be sorted
159
160
161 >>> DiceSimilarity( (1,2,3), (1,2,3) )
162 1.0
163 >>> DiceSimilarity( (1,2,3), (5,6) )
164 0.0
165 >>> DiceSimilarity( (1,2,3,4), (1,3,5,7) )
166 0.5
167 >>> DiceSimilarity( (1,2,3,4,5,6), (1,3) )
168 0.5
169
170 Note that duplicate bit IDs count multiple times:
171 >>> DiceSimilarity( (1,1,3,4,5,6), (1,1) )
172 0.5
173
174 but only if they are duplicated in both vectors:
175 >>> DiceSimilarity( (1,1,3,4,5,6), (1,) )==2./7
176 True
177
178 """
179 denom = 1.0*(len(v1)+len(v2))
180 if not denom:
181 res = 0.0
182 else:
183 if bounds and (min(len(v1),len(v2))/denom) < bounds:
184 numer = 0.0
185 else:
186 numer = 2.0*BitsInCommon(v1,v2)
187 res = numer/denom
188
189 return res
190
191
193 """ Returns the Dot product between two vectors:
194
195 **Arguments**:
196
197 - two vectors (sequences of bit ids)
198
199 **Returns**: an integer
200
201 **Notes**
202
203 - the vectors must be sorted
204
205 - duplicate bit IDs are counted more than once
206
207 >>> Dot( (1,2,3,4,10), (2,4,6) )
208 2
209
210 Here's how duplicates are handled:
211 >>> Dot( (1,2,2,3,4), (2,2,4,5,6) )
212 5
213 >>> Dot( (1,2,2,3,4), (2,4,5,6) )
214 2
215 >>> Dot( (1,2,2,3,4), (5,6) )
216 0
217 >>> Dot( (), (5,6) )
218 0
219
220 """
221 res = 0
222 nV1 = len(v1)
223 nV2 = len(v2)
224 i = 0
225 j = 0
226 while i<nV1:
227 v1Val = v1[i]
228 v1Count = 1
229 i+=1
230 while i<nV1 and v1[i]==v1Val:
231 v1Count += 1
232 i+=1
233 while j<nV2 and v2[j]<v1Val:
234 j+=1
235 if j < nV2 and v2[j]==v1Val:
236 v2Val = v2[j]
237 v2Count = 1
238 j+=1
239 while j<nV2 and v2[j]==v1Val:
240 v2Count+=1
241 j+=1
242 commonCount=min(v1Count,v2Count)
243 res += commonCount*commonCount
244 elif j>=nV2:
245 break
246 return res
247
249 """ Implements the Cosine similarity metric.
250 This is the recommended metric in the LaSSI paper
251
252 **Arguments**:
253
254 - two vectors (sequences of bit ids)
255
256 **Returns**: a float.
257
258 **Notes**
259
260 - the vectors must be sorted
261
262 >>> print '%.3f'%CosineSimilarity( (1,2,3,4,10), (2,4,6) )
263 0.516
264 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (2,2,4,5,6) )
265 0.714
266 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (1,2,2,3,4) )
267 1.000
268 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (5,6,7) )
269 0.000
270 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), () )
271 0.000
272
273 """
274 d1 = Dot(v1,v1)
275 d2 = Dot(v2,v2)
276 denom = math.sqrt(d1*d2)
277 if not denom:
278 res = 0.0
279 else:
280 numer = Dot(v1,v2)
281 res = numer/denom
282 return res
283
284
285
286
287
288
289
290
292 import doctest,sys
293 return doctest.testmod(sys.modules["__main__"])
294
295
296 if __name__ == '__main__':
297 import sys
298 failed,tried = _test()
299 sys.exit(failed)
300