1
2
3
4
5
6
7
8
9
10
11 from rdkit import Chem
12 from rdkit.Chem import rdMolDescriptors
13 import math
14
16 """
17
18 **Arguments**:
19
20 - the code to be considered
21
22 - branchSubtract: (optional) the constant that was subtracted off
23 the number of neighbors before integrating it into the code.
24 This is used by the topological torsions code.
25
26
27 >>> m = Chem.MolFromSmiles('C=CC(=O)O')
28 >>> code = GetAtomCode(m.GetAtomWithIdx(0))
29 >>> ExplainAtomCode(code)
30 ('C', 1, 1)
31 >>> code = GetAtomCode(m.GetAtomWithIdx(1))
32 >>> ExplainAtomCode(code)
33 ('C', 2, 1)
34 >>> code = GetAtomCode(m.GetAtomWithIdx(2))
35 >>> ExplainAtomCode(code)
36 ('C', 3, 1)
37 >>> code = GetAtomCode(m.GetAtomWithIdx(3))
38 >>> ExplainAtomCode(code)
39 ('O', 1, 1)
40 >>> code = GetAtomCode(m.GetAtomWithIdx(4))
41 >>> ExplainAtomCode(code)
42 ('O', 1, 0)
43
44 """
45 typeMask = (1<<rdMolDescriptors.AtomPairsParameters.numTypeBits)-1
46 branchMask = (1<<rdMolDescriptors.AtomPairsParameters.numBranchBits)-1
47 piMask = (1<<rdMolDescriptors.AtomPairsParameters.numPiBits)-1
48
49 nBranch = int(code&branchMask)
50
51 code = code>>rdMolDescriptors.AtomPairsParameters.numBranchBits
52 nPi = int(code&piMask)
53
54 code = code>>rdMolDescriptors.AtomPairsParameters.numPiBits
55
56 typeIdx=int(code&typeMask)
57 if typeIdx<len(rdMolDescriptors.AtomPairsParameters.atomTypes):
58 atomNum = rdMolDescriptors.AtomPairsParameters.atomTypes[typeIdx]
59 atomSymbol=Chem.GetPeriodicTable().GetElementSymbol(atomNum)
60 else:
61 atomSymbol='X'
62 return (atomSymbol,nBranch,nPi)
63
64 GetAtomCode=rdMolDescriptors.GetAtomPairAtomCode
65
67 """ Returns the number of electrons an atom is using for pi bonding
68
69 >>> m = Chem.MolFromSmiles('C=C')
70 >>> NumPiElectrons(m.GetAtomWithIdx(0))
71 1
72
73 >>> m = Chem.MolFromSmiles('C#CC')
74 >>> NumPiElectrons(m.GetAtomWithIdx(0))
75 2
76 >>> NumPiElectrons(m.GetAtomWithIdx(1))
77 2
78
79 >>> m = Chem.MolFromSmiles('O=C=CC')
80 >>> NumPiElectrons(m.GetAtomWithIdx(0))
81 1
82 >>> NumPiElectrons(m.GetAtomWithIdx(1))
83 2
84 >>> NumPiElectrons(m.GetAtomWithIdx(2))
85 1
86 >>> NumPiElectrons(m.GetAtomWithIdx(3))
87 0
88
89 FIX: this behaves oddly in these cases:
90 >>> m = Chem.MolFromSmiles('S(=O)(=O)')
91 >>> NumPiElectrons(m.GetAtomWithIdx(0))
92 2
93
94 >>> m = Chem.MolFromSmiles('S(=O)(=O)(O)O')
95 >>> NumPiElectrons(m.GetAtomWithIdx(0))
96 0
97
98 In the second case, the S atom is tagged as sp3 hybridized.
99
100 """
101
102 res = 0
103 if atom.GetIsAromatic():
104 res = 1
105 elif atom.GetHybridization() != Chem.HybridizationType.SP3:
106
107
108 res = atom.GetExplicitValence() - atom.GetDegree()
109 return res
110
111
113 """ Returns the number of bits in common between two vectors
114
115 **Arguments**:
116
117 - two vectors (sequences of bit ids)
118
119 **Returns**: an integer
120
121 **Notes**
122
123 - the vectors must be sorted
124
125 - duplicate bit IDs are counted more than once
126
127 >>> BitsInCommon( (1,2,3,4,10), (2,4,6) )
128 2
129
130 Here's how duplicates are handled:
131 >>> BitsInCommon( (1,2,2,3,4), (2,2,4,5,6) )
132 3
133
134 """
135 res = 0
136 v2Pos = 0
137 nV2 = len(v2)
138 for val in v1:
139 while v2Pos<nV2 and v2[v2Pos]<val:
140 v2Pos+=1
141 if v2Pos >= nV2:
142 break
143 if v2[v2Pos]==val:
144 res += 1
145 v2Pos += 1
146 return res
147
148
150 """ Implements the DICE similarity metric.
151 This is the recommended metric in both the Topological torsions
152 and Atom pairs papers.
153
154 **Arguments**:
155
156 - two vectors (sequences of bit ids)
157
158 **Returns**: a float.
159
160 **Notes**
161
162 - the vectors must be sorted
163
164
165 >>> DiceSimilarity( (1,2,3), (1,2,3) )
166 1.0
167 >>> DiceSimilarity( (1,2,3), (5,6) )
168 0.0
169 >>> DiceSimilarity( (1,2,3,4), (1,3,5,7) )
170 0.5
171 >>> DiceSimilarity( (1,2,3,4,5,6), (1,3) )
172 0.5
173
174 Note that duplicate bit IDs count multiple times:
175 >>> DiceSimilarity( (1,1,3,4,5,6), (1,1) )
176 0.5
177
178 but only if they are duplicated in both vectors:
179 >>> DiceSimilarity( (1,1,3,4,5,6), (1,) )==2./7
180 True
181
182 """
183 denom = 1.0*(len(v1)+len(v2))
184 if not denom:
185 res = 0.0
186 else:
187 if bounds and (min(len(v1),len(v2))/denom) < bounds:
188 numer = 0.0
189 else:
190 numer = 2.0*BitsInCommon(v1,v2)
191 res = numer/denom
192
193 return res
194
195
197 """ Returns the Dot product between two vectors:
198
199 **Arguments**:
200
201 - two vectors (sequences of bit ids)
202
203 **Returns**: an integer
204
205 **Notes**
206
207 - the vectors must be sorted
208
209 - duplicate bit IDs are counted more than once
210
211 >>> Dot( (1,2,3,4,10), (2,4,6) )
212 2
213
214 Here's how duplicates are handled:
215 >>> Dot( (1,2,2,3,4), (2,2,4,5,6) )
216 5
217 >>> Dot( (1,2,2,3,4), (2,4,5,6) )
218 2
219 >>> Dot( (1,2,2,3,4), (5,6) )
220 0
221 >>> Dot( (), (5,6) )
222 0
223
224 """
225 res = 0
226 nV1 = len(v1)
227 nV2 = len(v2)
228 i = 0
229 j = 0
230 while i<nV1:
231 v1Val = v1[i]
232 v1Count = 1
233 i+=1
234 while i<nV1 and v1[i]==v1Val:
235 v1Count += 1
236 i+=1
237 while j<nV2 and v2[j]<v1Val:
238 j+=1
239 if j < nV2 and v2[j]==v1Val:
240 v2Val = v2[j]
241 v2Count = 1
242 j+=1
243 while j<nV2 and v2[j]==v1Val:
244 v2Count+=1
245 j+=1
246 commonCount=min(v1Count,v2Count)
247 res += commonCount*commonCount
248 elif j>=nV2:
249 break
250 return res
251
253 """ Implements the Cosine similarity metric.
254 This is the recommended metric in the LaSSI paper
255
256 **Arguments**:
257
258 - two vectors (sequences of bit ids)
259
260 **Returns**: a float.
261
262 **Notes**
263
264 - the vectors must be sorted
265
266 >>> print '%.3f'%CosineSimilarity( (1,2,3,4,10), (2,4,6) )
267 0.516
268 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (2,2,4,5,6) )
269 0.714
270 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (1,2,2,3,4) )
271 1.000
272 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), (5,6,7) )
273 0.000
274 >>> print '%.3f'%CosineSimilarity( (1,2,2,3,4), () )
275 0.000
276
277 """
278 d1 = Dot(v1,v1)
279 d2 = Dot(v2,v2)
280 denom = math.sqrt(d1*d2)
281 if not denom:
282 res = 0.0
283 else:
284 numer = Dot(v1,v2)
285 res = numer/denom
286 return res
287
288
289
290
291
292
293
294
296 import doctest,sys
297 return doctest.testmod(sys.modules["__main__"])
298
299
300 if __name__ == '__main__':
301 import sys
302 failed,tried = _test()
303 sys.exit(failed)
304