1
2
3
4
5
6
7
8
9
10
11 raise NotImplementedError,'not finished yet'
12 """ lazy generator of 2D pharmacophore signature data
13
14 """
15 import rdkit.Chem
16 from rdkit.Chem.Pharm2D import SigFactory,Matcher,Utils
17
19 """
20
21 Important attributes:
22
23 - mol: the molecules whose signature is being worked with
24
25 - sigFactory : the SigFactory object with signature parameters
26 NOTE: no preprocessing is carried out for _sigFactory_.
27 It *must* be pre-initialized.
28
29 **Notes**
30
31 -
32 """
33 - def __init__(self,sigFactory,mol,dMat=None,bitCache=True):
34 """ constructor
35
36 **Arguments**
37
38 - sigFactory: a signature factory, see class docs
39
40 - mol: a molecule, see class docs
41
42 - dMat: (optional) a distance matrix for the molecule. If this
43 is not provided, one will be calculated
44
45 - bitCache: (optional) if nonzero, a local cache of which bits
46 have been queried will be maintained. Otherwise things must
47 be recalculate each time a bit is queried.
48
49 """
50 if not isinstance(sigFactory,SigFactory.SigFactory):
51 raise ValueError,'bad factory'
52
53 self.sigFactory = sigFactory
54 self.mol = mol
55
56 if dMat is None:
57 useBO = sigFactory.includeBondOrder
58 dMat = Chem.GetDistanceMatrix(mol,useBO)
59
60 self.dMat = dMat
61
62 if bitCache:
63 self.bits = {}
64 else:
65 self.bits = None
66
67 featFamilies=[fam for fam in sigFactory.featFactory.GetFeatureFamilies() if fam not in sigFactory.skipFeats]
68 nFeats = len(featFamilies)
69 featMatches={}
70 for fam in featFamilies:
71 featMatches[fam] = []
72 feats = sigFactory.featFactory.GetFeaturesForMol(mol)
73 for feat in feats:
74 if feat.GetFamily() not in sigFactory.skipFeats:
75 featMatches[feat.GetFamily()].append(feat.GetAtomIds())
76 featMatches = [None]*nFeats
77 for i in range(nFeats):
78 featMatches[i]=sigFactory.featFactory.GetMolFeature()
79 self.pattMatches = pattMatches
80
82 """ returns a bool indicating whether or not the bit is set
83
84 """
85 if idx < 0 or idx >= self.sig.GetSize():
86 raise IndexError,'Index %d invalid'%(idx)
87 if self.bits is not None and self.bits.has_key(idx):
88 return self.bits[idx]
89
90 tmp = Matcher.GetAtomsMatchingBit(self.sig,idx,self.mol,
91 dMat=self.dMat,justOne=1,
92 matchingAtoms=self.pattMatches)
93 if not tmp or len(tmp)==0: res = 0
94 else: res = 1
95
96 if self.bits is not None:
97 self.bits[idx] = res
98 return res
99
101 """ allows class to support len()
102
103 """
104 return self.sig.GetSize()
106 """ allows class to support random access.
107 Calls self.GetBit()
108
109 """
110 return self.GetBit(itm)
111
112
113
114
115 if __name__ == '__main__':
116 import time
117 from rdkit import RDConfig,Chem
118 from rdkit.Chem.Pharm2D import Gobbi_Pharm2D,Generate
119 import random
120
121 factory = Gobbi_Pharm2D.factory
122 nToDo=100
123 inD = open(RDConfig.RDDataDir+"/NCI/first_5K.smi",'r').readlines()[:nToDo]
124 mols = [None]*len(inD)
125 for i in range(len(inD)):
126 smi = inD[i].split('\t')[0]
127 smi.strip()
128 mols[i] = Chem.MolFromSmiles(smi)
129
130 sig = factory.GetSignature()
131
132 nBits = 300
133 random.seed(23)
134 bits = [random.randint(0,sig.GetSize()-1) for x in range(nBits)]
135
136 print 'Using the Lazy Generator'
137 t1 = time.time()
138 for i in range(len(mols)):
139 if not i % 10: print 'done mol %d of %d'%(i,len(mols))
140 gen = Generator(factory,mols[i])
141 for bit in bits:
142 v = gen[bit]
143 t2 = time.time()
144 print '\tthat took %4.2f seconds'%(t2-t1)
145
146
147 print 'Generating and checking signatures'
148 t1 = time.time()
149 for i in range(len(mols)):
150 if not i % 10: print 'done mol %d of %d'%(i,len(mols))
151 sig = Generate.Gen2DFingerprint(mols[i],factory)
152 for bit in bits:
153 v = sig[bit]
154 t2 = time.time()
155 print '\tthat took %4.2f seconds'%(t2-t1)
156