Package rdkit :: Package Chem :: Package Pharm2D :: Module LazyGenerator
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.LazyGenerator

  1  # $Id: LazyGenerator.py 1528 2010-09-26 17:04:37Z glandrum $ 
  2  # 
  3  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  raise NotImplementedError,'not finished yet' 
 12  """ lazy generator of 2D pharmacophore signature data 
 13   
 14  """ 
 15  import rdkit.Chem 
 16  from rdkit.Chem.Pharm2D import SigFactory,Matcher,Utils 
 17   
18 -class Generator(object):
19 """ 20 21 Important attributes: 22 23 - mol: the molecules whose signature is being worked with 24 25 - sigFactory : the SigFactory object with signature parameters 26 NOTE: no preprocessing is carried out for _sigFactory_. 27 It *must* be pre-initialized. 28 29 **Notes** 30 31 - 32 """
33 - def __init__(self,sigFactory,mol,dMat=None,bitCache=True):
34 """ constructor 35 36 **Arguments** 37 38 - sigFactory: a signature factory, see class docs 39 40 - mol: a molecule, see class docs 41 42 - dMat: (optional) a distance matrix for the molecule. If this 43 is not provided, one will be calculated 44 45 - bitCache: (optional) if nonzero, a local cache of which bits 46 have been queried will be maintained. Otherwise things must 47 be recalculate each time a bit is queried. 48 49 """ 50 if not isinstance(sigFactory,SigFactory.SigFactory): 51 raise ValueError,'bad factory' 52 53 self.sigFactory = sigFactory 54 self.mol = mol 55 56 if dMat is None: 57 useBO = sigFactory.includeBondOrder 58 dMat = Chem.GetDistanceMatrix(mol,useBO) 59 60 self.dMat = dMat 61 62 if bitCache: 63 self.bits = {} 64 else: 65 self.bits = None 66 67 featFamilies=[fam for fam in sigFactory.featFactory.GetFeatureFamilies() if fam not in sigFactory.skipFeats] 68 nFeats = len(featFamilies) 69 featMatches={} 70 for fam in featFamilies: 71 featMatches[fam] = [] 72 feats = sigFactory.featFactory.GetFeaturesForMol(mol) 73 for feat in feats: 74 if feat.GetFamily() not in sigFactory.skipFeats: 75 featMatches[feat.GetFamily()].append(feat.GetAtomIds()) 76 featMatches = [None]*nFeats 77 for i in range(nFeats): 78 featMatches[i]=sigFactory.featFactory.GetMolFeature() 79 self.pattMatches = pattMatches
80
81 - def GetBit(self,idx):
82 """ returns a bool indicating whether or not the bit is set 83 84 """ 85 if idx < 0 or idx >= self.sig.GetSize(): 86 raise IndexError,'Index %d invalid'%(idx) 87 if self.bits is not None and self.bits.has_key(idx): 88 return self.bits[idx] 89 90 tmp = Matcher.GetAtomsMatchingBit(self.sig,idx,self.mol, 91 dMat=self.dMat,justOne=1, 92 matchingAtoms=self.pattMatches) 93 if not tmp or len(tmp)==0: res = 0 94 else: res = 1 95 96 if self.bits is not None: 97 self.bits[idx] = res 98 return res
99
100 - def __len__(self):
101 """ allows class to support len() 102 103 """ 104 return self.sig.GetSize()
105 - def __getitem__(self,itm):
106 """ allows class to support random access. 107 Calls self.GetBit() 108 109 """ 110 return self.GetBit(itm)
111 112 113 114 115 if __name__ == '__main__': 116 import time 117 from rdkit import RDConfig,Chem 118 from rdkit.Chem.Pharm2D import Gobbi_Pharm2D,Generate 119 import random 120 121 factory = Gobbi_Pharm2D.factory 122 nToDo=100 123 inD = open(RDConfig.RDDataDir+"/NCI/first_5K.smi",'r').readlines()[:nToDo] 124 mols = [None]*len(inD) 125 for i in range(len(inD)): 126 smi = inD[i].split('\t')[0] 127 smi.strip() 128 mols[i] = Chem.MolFromSmiles(smi) 129 130 sig = factory.GetSignature() 131 132 nBits = 300 133 random.seed(23) 134 bits = [random.randint(0,sig.GetSize()-1) for x in range(nBits)] 135 136 print 'Using the Lazy Generator' 137 t1 = time.time() 138 for i in range(len(mols)): 139 if not i % 10: print 'done mol %d of %d'%(i,len(mols)) 140 gen = Generator(factory,mols[i]) 141 for bit in bits: 142 v = gen[bit] 143 t2 = time.time() 144 print '\tthat took %4.2f seconds'%(t2-t1) 145 146 147 print 'Generating and checking signatures' 148 t1 = time.time() 149 for i in range(len(mols)): 150 if not i % 10: print 'done mol %d of %d'%(i,len(mols)) 151 sig = Generate.Gen2DFingerprint(mols[i],factory) 152 for bit in bits: 153 v = sig[bit] 154 t2 = time.time() 155 print '\tthat took %4.2f seconds'%(t2-t1) 156