1
2
3
4
5
6
7
8
9
10
11 """ Calculation of topological/topochemical descriptors.
12
13
14
15 """
16 from rdkit import Chem
17 from rdkit.Chem import Graphs
18 from rdkit.Chem import rdchem
19
20 from rdkit.Chem import pyPeriodicTable as PeriodicTable
21 import numpy
22 import math
23 from rdkit.ML.InfoTheory import entropy
24
25 periodicTable = rdchem.GetPeriodicTable()
26
27 _log2val = math.log(2)
30
32 """ *Internal Use Only*
33
34 this is just a row sum of the matrix... simple, neh?
35
36 """
37 if not onlyOnes:
38 res = sum(mat)
39 else:
40 res = sum(numpy.equal(mat,1))
41 return res
42
44 """ *Internal Use Only*
45
46 """
47 res = mol.GetNumBonds()
48 return res
49
51 """ *Internal Use Only*
52
53 """
54 res = {}
55 for v in arr:
56 res[v] = res.get(v,0)+1
57 return res
58
59
61 """ calculate the Hall-Kier alpha value for a molecule
62
63 From equations (58) of Rev. Comp. Chem. vol 2, 367-422, (1991)
64
65 """
66 alphaSum = 0.0
67 rC = PeriodicTable.nameTable['C'][5]
68 for atom in m.GetAtoms():
69 atNum=atom.GetAtomicNum()
70 if not atNum: continue
71 symb = atom.GetSymbol()
72 alphaV = PeriodicTable.hallKierAlphas.get(symb,None)
73 if alphaV is not None:
74 hyb = atom.GetHybridization()-2
75 if(hyb<len(alphaV)):
76 alpha = alphaV[hyb]
77 if alpha is None:
78 alpha = alphaV[-1]
79 else:
80 alpha = alphaV[-1]
81 else:
82 rA = PeriodicTable.nameTable[symb][5]
83 alpha = rA/rC - 1
84 alphaSum += alpha
85 return alphaSum
86 HallKierAlpha.version="1.0.2"
87
88 -def Ipc(mol, avg = 0, dMat = None, forceDMat = 0):
89 """This returns the information content of the coefficients of the characteristic
90 polynomial of the adjacency matrix of a hydrogen-suppressed graph of a molecule.
91
92 'avg = 1' returns the information content divided by the total population.
93
94 From D. Bonchev & N. Trinajstic, J. Chem. Phys. vol 67, 4517-4533 (1977)
95
96 """
97 if forceDMat or dMat is None:
98 if forceDMat:
99 dMat = Chem.GetDistanceMatrix(mol,0)
100 mol._adjMat = dMat
101 else:
102 try:
103 dMat = mol._adjMat
104 except AttributeError:
105 dMat = Chem.GetDistanceMatrix(mol,0)
106 mol._adjMat = dMat
107
108 adjMat = numpy.equal(dMat,1)
109 cPoly = abs(Graphs.CharacteristicPolynomial(mol, adjMat))
110 if avg:
111 return entropy.InfoEntropy(cPoly)
112 else:
113 return sum(cPoly)*entropy.InfoEntropy(cPoly)
114 Ipc.version="1.0.0"
115
116
117
119 """ Hall-Kier Kappa1 value
120
121 From equations (58) and (59) of Rev. Comp. Chem. vol 2, 367-422, (1991)
122
123 """
124 P1 = mol.GetNumBonds(1)
125 A = mol.GetNumAtoms(onlyHeavy=1)
126 alpha = HallKierAlpha(mol)
127 denom = P1 + alpha
128 if denom:
129 kappa = (A + alpha)*(A + alpha - 1)**2 / denom**2
130 else:
131 kappa = 0.0
132 return kappa
133 Kappa1.version="1.0.0"
134
135
137 """ Hall-Kier Kappa2 value
138
139 From equations (58) and (60) of Rev. Comp. Chem. vol 2, 367-422, (1991)
140
141 """
142 P2 = len(Chem.FindAllPathsOfLengthN(mol,2))
143 A = mol.GetNumAtoms(onlyHeavy=1)
144 alpha = HallKierAlpha(mol)
145 denom = (P2 + alpha)**2
146 if denom:
147 kappa = (A + alpha - 1)*(A + alpha - 2)**2 / denom
148 else:
149 kappa = 0
150 return kappa
151 Kappa2.version="1.0.0"
152
154 """ Hall-Kier Kappa3 value
155
156 From equations (58), (61) and (62) of Rev. Comp. Chem. vol 2, 367-422, (1991)
157
158 """
159 P3 = len(Chem.FindAllPathsOfLengthN(mol,3))
160 A = mol.GetNumAtoms(1)
161 alpha = HallKierAlpha(mol)
162 denom = (P3 + alpha)**2
163 if denom:
164 if A % 2 == 1:
165 kappa = (A + alpha - 1)*(A + alpha - 3)**2 / denom
166 else:
167 kappa = (A + alpha - 2)*(A + alpha - 3)**2 / denom
168 else:
169 kappa = 0
170 return kappa
171 Kappa3.version="1.0.0"
172
174 """ From equations (1),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
175
176 """
177 deltas = [x.GetDegree() for x in mol.GetAtoms()]
178 while 0 in deltas:
179 deltas.remove(0)
180 deltas = numpy.array(deltas,'d')
181 res = sum(numpy.sqrt(1./deltas))
182 return res
183 Chi0.version="1.0.0"
184
185
187 """ From equations (1),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
188
189 """
190 c1s = [x.GetBeginAtom().GetDegree()*x.GetEndAtom().GetDegree() for x in mol.GetBonds()]
191 while 0 in c1s:
192 c1s.remove(0)
193 c1s = numpy.array(c1s,'d')
194 res = sum(numpy.sqrt(1./c1s))
195 return res
196 Chi1.version="1.0.0"
197
200
218
220 """ From equations (5),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
221
222 """
223 deltas = _hkDeltas(mol)
224 while 0 in deltas:
225 deltas.remove(0)
226 res = sum(numpy.sqrt(1./numpy.array(deltas)))
227 return res
228 Chi0v.version="1.0.0"
229
231 """ From equations (5),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
232
233 """
234 deltas = numpy.array(_hkDeltas(mol,skipHs=0))
235 res = 0.0
236 for bond in mol.GetBonds():
237 v = deltas[bond.GetBeginAtomIdx()]*deltas[bond.GetEndAtomIdx()]
238 if v != 0.0:
239 res += numpy.sqrt(1./v)
240 return res
241 Chi1v.version="1.0.0"
242
244 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
245
246 **NOTE**: because the current path finding code does, by design,
247 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
248 length 3), values of ChiNv with N >= 3 may give results that differ
249 from those provided by the old code in molecules that have rings of
250 size 3.
251
252 """
253 deltas = numpy.array(_hkDeltas(mol,skipHs=0))
254 accum = 0.0
255
256 for path in Chem.FindAllPathsOfLengthN(mol,order+1,useBonds=0):
257 ats = []
258 cAccum = 1.0
259
260 for idx in path:
261 cAccum *= deltas[idx]
262 if cAccum:
263 accum += 1./numpy.sqrt(cAccum)
264 return accum
265
267 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
268
269 """
270 return ChiNv_(mol,2)
271 Chi2v.version="1.0.0"
272
274 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
275
276 """
277 return ChiNv_(mol,3)
278 Chi3v.version="1.0.0"
279
281 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
282
283 **NOTE**: because the current path finding code does, by design,
284 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
285 length 3), values of Chi4v may give results that differ from those
286 provided by the old code in molecules that have 3 rings.
287
288 """
289 return ChiNv_(mol,4)
290 Chi4v.version="1.0.0"
291
293 """ Similar to Hall Kier Chi0v, but uses nVal instead of valence
294 This makes a big difference after we get out of the first row.
295
296 """
297 deltas = [_nVal(x) for x in mol.GetAtoms()]
298 while deltas.count(0):
299 deltas.remove(0)
300 deltas = numpy.array(deltas,'d')
301 res = sum(numpy.sqrt(1./deltas))
302 return res
303 Chi0n.version="1.0.0"
304
306 """ Similar to Hall Kier Chi1v, but uses nVal instead of valence
307
308 """
309 delts = numpy.array([_nVal(x) for x in mol.GetAtoms()],'d')
310 res = 0.0
311 for bond in mol.GetBonds():
312 v = delts[bond.GetBeginAtomIdx()]*delts[bond.GetEndAtomIdx()]
313 if v != 0.0:
314 res += numpy.sqrt(1./v)
315 return res
316 Chi1n.version="1.0.0"
318 """ Similar to Hall Kier ChiNv, but uses nVal instead of valence
319 This makes a big difference after we get out of the first row.
320
321 **NOTE**: because the current path finding code does, by design,
322 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
323 length 3), values of ChiNn with N >= 3 may give results that differ
324 from those provided by the old code in molecules that have rings of
325 size 3.
326
327 """
328 deltas = numpy.array([_nVal(x) for x in mol.GetAtoms()],'d')
329 accum = 0.0
330 for path in Chem.FindAllPathsOfLengthN(mol,order+1,useBonds=0):
331 cAccum = 1.0
332 for idx in path:
333 cAccum *= deltas[idx]
334 if cAccum:
335 accum += 1./numpy.sqrt(cAccum)
336 return accum
337
339 """ Similar to Hall Kier Chi2v, but uses nVal instead of valence
340 This makes a big difference after we get out of the first row.
341
342 """
343 return ChiNn_(mol,2)
344 Chi2n.version="1.0.0"
345
347 """ Similar to Hall Kier Chi3v, but uses nVal instead of valence
348 This makes a big difference after we get out of the first row.
349
350 """
351 return ChiNn_(mol,3)
352 Chi3n.version="1.0.0"
353
355 """ Similar to Hall Kier Chi4v, but uses nVal instead of valence
356 This makes a big difference after we get out of the first row.
357
358
359 **NOTE**: because the current path finding code does, by design,
360 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
361 length 3), values of Chi4n may give results that differ from those
362 provided by the old code in molecules that have 3 rings.
363
364 """
365 return ChiNn_(mol,4)
366 Chi4n.version="1.0.0"
367
368
369 -def BalabanJ(mol,dMat=None,forceDMat=0):
370 """ Calculate Balaban's J value for a molecule
371
372 **Arguments**
373
374 - mol: a molecule
375
376 - dMat: (optional) a distance/adjacency matrix for the molecule, if this
377 is not provide, one will be calculated
378
379 - forceDMat: (optional) if this is set, the distance/adjacency matrix
380 will be recalculated regardless of whether or not _dMat_ is provided
381 or the molecule already has one
382
383 **Returns**
384
385 - a float containing the J value
386
387 We follow the notation of Balaban's paper:
388 Chem. Phys. Lett. vol 89, 399-404, (1982)
389
390 """
391
392 if forceDMat or dMat is None:
393 if forceDMat:
394
395 dMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=1)
396 mol._balabanMat = dMat
397 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
398 mol._adjMat = adjMat
399 else:
400 try:
401
402 dMat = mol._balabanMat
403 except AttributeError:
404
405 dMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=0,prefix="Balaban")
406
407 mol._balabanMat = dMat
408 try:
409 adjMat = mol._adjMat
410 except AttributeError:
411 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
412 mol._adjMat = adjMat
413 else:
414 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
415
416 s = _VertexDegrees(dMat)
417 q = _NumAdjacencies(mol,dMat)
418 n = mol.GetNumAtoms()
419 mu = q - n + 1
420
421 sum = 0.
422 nS = len(s)
423 for i in range(nS):
424 si = s[i]
425 for j in range(i,nS):
426 if adjMat[i,j] == 1:
427 sum += 1./numpy.sqrt(si*s[j])
428
429 if mu+1 != 0:
430 J = float(q) / float(mu + 1) * sum
431 else:
432 J = 0
433
434 return J
435 BalabanJ.version="1.0.0"
436
437
438
439
440
441
442
443
445 """
446 Used by BertzCT
447
448 vdList: the number of neighbors each atom has
449 bdMat: "balaban" distance matrix
450
451 """
452 if forceBDMat:
453 bdMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=1,
454 prefix="Balaban")
455 mol._balabanMat = bdMat
456
457 atomIdx = 0
458 keysSeen = []
459 symList = [0]*numAtoms
460 for i in range(numAtoms):
461 tmpList = bdMat[i].tolist()
462 tmpList.sort()
463 theKey = tuple(['%.4f'%x for x in tmpList[:cutoff]])
464 try:
465 idx = keysSeen.index(theKey)
466 except ValueError:
467 idx = len(keysSeen)
468 keysSeen.append(theKey)
469 symList[i] = idx+1
470 return tuple(symList)
471
473 """
474 Used by BertzCT
475 """
476 if atom1Id < atom2Id:
477 theKey = (atom1Id,atom2Id)
478 else:
479 theKey = (atom2Id,atom1Id)
480 tmp = bondDic[theKey]
481 if tmp == Chem.BondType.AROMATIC:
482 tmp = 1.5
483 else:
484 tmp = float(tmp)
485
486 return tmp
487
488
490 """
491 Used by BertzCT
492 """
493 connectionList = connectionDict.values()
494 totConnections = sum(connectionList)
495 connectionIE = totConnections*(entropy.InfoEntropy(numpy.array(connectionList)) +
496 math.log(totConnections)/_log2val)
497 atomTypeList = atomTypeDict.values()
498 atomTypeIE = numAtoms*entropy.InfoEntropy(numpy.array(atomTypeList))
499 return atomTypeIE + connectionIE
500
502 """ _Internal Use Only_
503 Used by BertzCT
504
505 """
506 bondDict = {}
507 nList = [None]*numAtoms
508 vdList = [0]*numAtoms
509 for aBond in mol.GetBonds():
510 atom1=aBond.GetBeginAtomIdx()
511 atom2=aBond.GetEndAtomIdx()
512 if atom1>atom2: atom2,atom1=atom1,atom2
513 if not aBond.GetIsAromatic():
514 bondDict[(atom1,atom2)] = aBond.GetBondType()
515 else:
516
517 bondDict[(atom1,atom2)] = Chem.BondType.AROMATIC
518 if nList[atom1] is None:
519 nList[atom1] = [atom2]
520 elif atom2 not in nList[atom1]:
521 nList[atom1].append(atom2)
522 if nList[atom2] is None:
523 nList[atom2] = [atom1]
524 elif atom1 not in nList[atom2]:
525 nList[atom2].append(atom1)
526
527 for i,element in enumerate(nList):
528 try:
529 element.sort()
530 vdList[i] = len(element)
531 except:
532 vdList[i] = 0
533 return bondDict, nList, vdList
534
535 -def BertzCT(mol, cutoff = 100, dMat = None, forceDMat = 1):
536 """ A topological index meant to quantify "complexity" of molecules.
537
538 Consists of a sum of two terms, one representing the complexity
539 of the bonding, the other representing the complexity of the
540 distribution of heteroatoms.
541
542 From S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981)
543
544 "cutoff" is an integer value used to limit the computational
545 expense. A cutoff value tells the program to consider vertices
546 topologically identical if their distance vectors (sets of
547 distances to all other vertices) are equal out to the "cutoff"th
548 nearest-neighbor.
549
550 **NOTE** The original implementation had the following comment:
551 > this implementation treats aromatic rings as the
552 > corresponding Kekule structure with alternating bonds,
553 > for purposes of counting "connections".
554 Upon further thought, this is the WRONG thing to do. It
555 results in the possibility of a molecule giving two different
556 CT values depending on the kekulization. For example, in the
557 old implementation, these two SMILES:
558 CC2=CN=C1C3=C(C(C)=C(C=N3)C)C=CC1=C2C
559 CC3=CN=C2C1=NC=C(C)C(C)=C1C=CC2=C3C
560 which correspond to differentk kekule forms, yield different
561 values.
562 The new implementation uses consistent (aromatic) bond orders
563 for aromatic bonds.
564
565 THIS MEANS THAT THIS IMPLEMENTATION IS NOT BACKWARDS COMPATIBLE.
566
567 Any molecule containing aromatic rings will yield different
568 values with this implementation. The new behavior is the correct
569 one, so we're going to live with the breakage.
570
571 **NOTE** this barfs if the molecule contains a second (or
572 nth) fragment that is one atom.
573
574 """
575 atomTypeDict = {}
576 connectionDict = {}
577 numAtoms = mol.GetNumAtoms()
578 if forceDMat or dMat is None:
579 if forceDMat:
580
581 dMat = Chem.GetDistanceMatrix(mol,useBO=0,useAtomWts=0,force=1)
582 mol._adjMat = dMat
583 else:
584 try:
585 dMat = mol._adjMat
586 except AttributeError:
587 dMat = Chem.GetDistanceMatrix(mol,useBO=0,useAtomWts=0,force=1)
588 mol._adjMat = dMat
589
590 if numAtoms < 2:
591 return 0
592
593 bondDict, neighborList, vdList = _CreateBondDictEtc(mol, numAtoms)
594 symmetryClasses = _AssignSymmetryClasses(mol, vdList, dMat, forceDMat, numAtoms, cutoff)
595
596 for atomIdx in range(numAtoms):
597 hingeAtomNumber = mol.GetAtomWithIdx(atomIdx).GetAtomicNum()
598 atomTypeDict[hingeAtomNumber] = atomTypeDict.get(hingeAtomNumber,0)+1
599
600 hingeAtomClass = symmetryClasses[atomIdx]
601 numNeighbors = vdList[atomIdx]
602 for i in range(numNeighbors):
603 neighbor_iIdx = neighborList[atomIdx][i]
604 NiClass = symmetryClasses[neighbor_iIdx]
605 bond_i_order = _LookUpBondOrder(atomIdx, neighbor_iIdx, bondDict)
606
607 if (bond_i_order > 1) and (neighbor_iIdx > atomIdx):
608 numConnections = bond_i_order*(bond_i_order - 1)/2
609 connectionKey = (min(hingeAtomClass, NiClass), max(hingeAtomClass, NiClass))
610 connectionDict[connectionKey] = connectionDict.get(connectionKey,0)+numConnections
611
612 for j in range(i+1, numNeighbors):
613 neighbor_jIdx = neighborList[atomIdx][j]
614 NjClass = symmetryClasses[neighbor_jIdx]
615 bond_j_order = _LookUpBondOrder(atomIdx, neighbor_jIdx, bondDict)
616 numConnections = bond_i_order*bond_j_order
617 connectionKey = (min(NiClass, NjClass), hingeAtomClass, max(NiClass, NjClass))
618 connectionDict[connectionKey] = connectionDict.get(connectionKey,0)+numConnections
619
620 if not connectionDict:
621 connectionDict = {'a':1}
622
623 ks = connectionDict.keys()
624 ks.sort()
625 return _CalculateEntropies(connectionDict, atomTypeDict, numAtoms)
626 BertzCT.version="2.0.0"
627
628
629
630
631
632
633
634
635
636
637