1
2
3
4
5
6
7
8
9
10
11 """ Functionality for SATIS typing atoms
12
13 """
14 from rdkit import Chem
15
16 _debug = 0
17
18
19
20
21
22 aldehydePatt = Chem.MolFromSmarts('[CD2]=[OD1]')
23 ketonePatt = Chem.MolFromSmarts('[CD3]=[OD1]')
24 amidePatt = Chem.MolFromSmarts('[CD3](=[OD1])-[#7]')
25 esterPatt = Chem.MolFromSmarts('C(=[OD1])-O-[#6]')
26 carboxylatePatt = Chem.MolFromSmarts('C(=[OD1])-[OX1]')
27 carboxylPatt = Chem.MolFromSmarts('C(=[OD1])-[OX2]')
28
29 specialCases = ((carboxylatePatt,97),
30 (esterPatt,96),
31 (carboxylPatt,98),
32 (amidePatt,95),
33 (ketonePatt,94),
34 (aldehydePatt,93))
35
36
38 """ returns SATIS codes for all atoms in a molecule
39
40 The SATIS definition used is from:
41 J. Chem. Inf. Comput. Sci. _39_ 751-757 (1999)
42
43 each SATIS code is a string consisting of _neighborsToInclude_ + 1
44 2 digit numbers
45
46 **Arguments**
47
48 - mol: a molecule
49
50 - neighborsToInclude (optional): the number of neighbors to include
51 in the SATIS codes
52
53 **Returns**
54
55 a list of strings nAtoms long
56
57 """
58 global specialCases
59
60 nAtoms = mol.GetNumAtoms()
61 atomicNums = [0]*nAtoms
62 atoms = mol.GetAtoms()
63 for i in xrange(nAtoms):
64 atomicNums[i] = atoms[i].GetAtomicNum()
65
66 nSpecialCases = len(specialCases)
67 specialCaseMatches = [None]*nSpecialCases
68 for i,(patt,idx) in enumerate(specialCases):
69 if mol.HasSubstructMatch(patt):
70 specialCaseMatches[i] = mol.GetSubstructMatches(patt)
71 else:
72 specialCaseMatches[i] = ()
73
74 codes = [None]*nAtoms
75 for i in range(nAtoms):
76 code = [99]*(neighborsToInclude+1)
77 atom = atoms[i]
78 atomIdx = atom.GetIdx()
79 code[0] = min(atom.GetAtomicNum(),99)
80 bonds = atom.GetBonds()
81 nBonds = len(bonds)
82 otherIndices = [-1]*nBonds
83 if _debug: print code[0],
84 for j in range(nBonds):
85 otherIndices[j] = bonds[j].GetOtherAtom(atom).GetIdx()
86 if _debug: print otherIndices[j],
87 if _debug: print
88 otherNums = [atomicNums[x] for x in otherIndices] + \
89 [1]*atom.GetTotalNumHs()
90 otherNums.sort()
91
92 nOthers = len(otherNums)
93 if nOthers > neighborsToInclude:
94 otherNums.reverse()
95 otherNums = otherNums[:neighborsToInclude]
96 otherNums.reverse()
97 for j in range(neighborsToInclude):
98 code[j+1] = min(otherNums[j],99)
99 else:
100 for j in range(nOthers):
101 code[j+1] = min(otherNums[j],99)
102 if nOthers < neighborsToInclude and code[0] in [6,8]:
103 found = 0
104 for j in range(nSpecialCases):
105 for matchTuple in specialCaseMatches[j]:
106 if atomIdx in matchTuple:
107 code[-1] = specialCases[j][1]
108 found = 1
109 break
110 if found:
111 break
112
113 codes[i] = ''.join(['%02d'%(x) for x in code])
114 return codes
115
116 if __name__ == '__main__':
117 smis = ['CC(=O)NC','CP(F)(Cl)(Br)(O)',
118 'O=CC(=O)C','C(=O)OCC(=O)O','C(=O)[O-]']
119 for smi in smis:
120 print smi
121 m = Chem.MolFromSmiles(smi)
122 codes = SATISTypes(m)
123 print codes
124