1
2
3
4
5
6
7
8
9
10
11
12
13 """ EState fingerprinting
14
15 """
16 import numpy
17 from rdkit.Chem.EState import EStateIndices
18 from rdkit.Chem.EState import AtomTypes
19
21 """ generates the EState fingerprints for the molecule
22
23 Concept from the paper: Hall and Kier JCICS _35_ 1039-1045 (1995)
24
25 two numeric arrays are returned:
26 The first (of ints) contains the number of times each possible atom type is hit
27 The second (of floats) contains the sum of the EState indices for atoms of
28 each type.
29
30 """
31 if AtomTypes.esPatterns is None:
32 AtomTypes.BuildPatts()
33 esIndices = EStateIndices(mol)
34
35 nPatts = len(AtomTypes.esPatterns)
36 counts = numpy.zeros(nPatts,numpy.int)
37 sums = numpy.zeros(nPatts,numpy.float)
38
39 for i,(name,pattern) in enumerate(AtomTypes.esPatterns):
40 matches = mol.GetSubstructMatches(pattern,uniquify=1)
41 counts[i] = len(matches)
42 for match in matches:
43 sums[i] += esIndices[match[0]]
44 return counts,sums
45
46
47 if __name__ == '__main__':
48 from rdkit import Chem
49 smis = ['CC','CCC','c1[nH]cnc1CC(N)C(O)=O','NCCc1ccc(O)c(O)c1']
50 for smi in smis:
51 m = Chem.MolFromSmiles(smi)
52 print smi,Chem.MolToSmiles(m)
53 types = AtomTypes.TypeAtoms(m)
54 for i in range(m.GetNumAtoms()):
55 print '%d %4s: %s'%(i+1,m.GetAtomWithIdx(i).GetSymbol(),str(types[i]))
56 es = EStateIndices(m)
57 counts,sums = FingerprintMol(m)
58 for i in range(len(AtomTypes.esPatterns)):
59 if counts[i]:
60 name,patt = AtomTypes.esPatterns[i]
61 print '%6s, % 2d, % 5.4f'%(name,counts[i],sums[i])
62 for i in range(len(es)):
63 print '% 2d, % 5.4f'%(i+1,es[i])
64 print '--------'
65