1
2
3
4
5
6
7 """ Contains an implementation of Atom-pair fingerprints, as
8 described in:
9
10 R.E. Carhart, D.H. Smith, R. Venkataraghavan;
11 "Atom Pairs as Molecular Features in Structure-Activity Studies:
12 Definition and Applications" JCICS 25, 64-73 (1985).
13
14 """
15 from DataStructs import IntSparseIntVect
16 import Chem
17 from Chem import rdMolDescriptors
18 from Chem.AtomPairs import Utils
19 import DataStructs
20
21 GetAtomPairFingerprint=rdMolDescriptors.GetAtomPairFingerprint
22 GetAtomPairFingerprintAsIntVect=rdMolDescriptors.GetAtomPairFingerprint
23
24 numPathBits=rdMolDescriptors.AtomPairsParameters.numPathBits
25 _maxPathLen=(1<<numPathBits)-1
26 numFpBits=numPathBits+2*rdMolDescriptors.AtomPairsParameters.codeSize
27 fpLen=1L<<numFpBits
28
30 """ Returns a score for an individual atom pair.
31
32 >>> m = Chem.MolFromSmiles('CCCCC')
33 >>> c1 = Utils.GetAtomCode(m.GetAtomWithIdx(0))
34 >>> c2 = Utils.GetAtomCode(m.GetAtomWithIdx(1))
35 >>> c3 = Utils.GetAtomCode(m.GetAtomWithIdx(2))
36 >>> t = 1 | min(c1,c2)<<numPathBits | max(c1,c2)<<(rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits)
37 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1)==t
38 1
39 >>> pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(0),1)==t
40 1
41 >>> t = 2 | min(c1,c3)<<numPathBits | max(c1,c3)<<(rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits)
42 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2)==t
43 1
44 >>> pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2,
45 ... atomCodes=(Utils.GetAtomCode(m.GetAtomWithIdx(0)),Utils.GetAtomCode(m.GetAtomWithIdx(2))))==t
46 1
47
48 """
49 if not atomCodes:
50 code1 = Utils.GetAtomCode(at1)
51 code2 = Utils.GetAtomCode(at2)
52 else:
53 code1,code2=atomCodes
54 accum = int(dist) % _maxPathLen
55 accum |= min(code1,code2) << numPathBits
56 accum |= max(code1,code2) << (rdMolDescriptors.AtomPairsParameters.codeSize+numPathBits)
57 return accum
58
60 """
61 >>> m = Chem.MolFromSmiles('C=CC')
62 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1)
63 >>> ExplainPairScore(score)
64 (('C', 1, 1), 1, ('C', 2, 1))
65 >>> score = pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2)
66 >>> ExplainPairScore(score)
67 (('C', 1, 0), 2, ('C', 1, 1))
68 >>> score = pyScorePair(m.GetAtomWithIdx(1),m.GetAtomWithIdx(2),1)
69 >>> ExplainPairScore(score)
70 (('C', 1, 0), 1, ('C', 2, 1))
71 >>> score = pyScorePair(m.GetAtomWithIdx(2),m.GetAtomWithIdx(1),1)
72 >>> ExplainPairScore(score)
73 (('C', 1, 0), 1, ('C', 2, 1))
74
75 """
76 codeMask = (1<<rdMolDescriptors.AtomPairsParameters.codeSize)-1
77 pathMask = (1<<numPathBits)-1
78 dist = score&pathMask
79
80 score = score>>numPathBits
81 code1 = score&codeMask
82 score = score>>rdMolDescriptors.AtomPairsParameters.codeSize
83 code2 = score&codeMask
84
85 res = Utils.ExplainAtomCode(code1),dist,Utils.ExplainAtomCode(code2)
86 return res
87
89 """ Returns the Atom-pair fingerprint for a molecule as
90 a SparseBitVect. Note that this doesn't match the standard
91 definition of atom pairs, which uses counts of the
92 pairs, not just their presence.
93
94 **Arguments**:
95
96 - mol: a molecule
97
98 **Returns**: a SparseBitVect
99
100 >>> m = Chem.MolFromSmiles('CCC')
101 >>> v = [ pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1),
102 ... pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2),
103 ... ]
104 >>> v.sort()
105 >>> fp = GetAtomPairFingerprintAsBitVect(m)
106 >>> list(fp.GetOnBits())==v
107 True
108
109 """
110 res = DataStructs.SparseBitVect(fpLen)
111 fp = rdMolDescriptors.GetAtomPairFingerprint(mol)
112 for val in fp.GetNonzeroElements().keys():
113 res.SetBit(val)
114 return res
115
116
117
118
119
121 import doctest,sys
122 return doctest.testmod(sys.modules["__main__"])
123
124
125 if __name__ == '__main__':
126 import sys
127 failed,tried = _test()
128 sys.exit(failed)
129