Package rdkit :: Package Chem :: Package Fingerprints :: Module SimilarityScreener
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Fingerprints.SimilarityScreener

  1  # $Id: SimilarityScreener.py 1528 2010-09-26 17:04:37Z glandrum $ 
  2  # 
  3  # Copyright (C) 2003-2006 Greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ class definitions for similarity screening 
 12   
 13  See _SimilarityScreener_ for overview of required API 
 14   
 15  """ 
 16  from rdkit import DataStructs 
 17  from rdkit.DataStructs import TopNContainer 
 18  from rdkit import RDConfig 
 19   
20 -class SimilarityScreener(object):
21 """ base class 22 23 important attributes: 24 probe: the probe fingerprint against which we screen. 25 26 metric: a function that takes two arguments and returns a similarity 27 measure between them 28 29 dataSource: the source pool from which to draw, needs to support 30 a next() method 31 32 fingerprinter: a function that takes a molecule and returns a 33 fingerprint of the appropriate format 34 35 36 **Notes** 37 subclasses must support either an iterator interface 38 or __len__ and __getitem__ 39 """
40 - def __init__(self,probe=None,metric=None,dataSource=None,fingerprinter=None):
41 self.metric = metric 42 self.dataSource = dataSource 43 self.fingerprinter = fingerprinter 44 self.probe = probe
45
46 - def Reset(self):
47 """ used to reset screeners that behave as iterators 48 """ 49 pass
50 51 # FIX: add setters/getters for attributes
52 - def SetProbe(self,probeFingerprint):
53 """ sets our probe fingerprint """ 54 self.probe = probeFingerprint
55
56 - def GetSingleFingerprint(self,probe):
57 """ returns a fingerprint for a single probe object 58 59 This is potentially useful in initializing our internal 60 probe object. 61 62 """ 63 return self.fingerprinter(probe)
64
65 -class ThresholdScreener(SimilarityScreener):
66 """ Used to return all compounds that have a similarity 67 to the probe beyond a threshold value 68 69 **Notes**: 70 71 - This is as lazy as possible, so the data source isn't 72 queried until the client asks for a hit. 73 74 - In addition to being lazy, this class is as thin as possible. 75 (Who'd have thought it was possible!) 76 Hits are *not* stored locally, so if a client resets 77 the iteration and starts over, the same amount of work must 78 be done to retrieve the hits. 79 80 - The thinness and laziness forces us to support only forward 81 iteration (not random access) 82 83 """
84 - def __init__(self,threshold,**kwargs):
85 SimilarityScreener.__init__(self,**kwargs) 86 self.threshold = threshold 87 self.dataIter = iter(self.dataSource)
88 # FIX: add setters/getters for attributes 89
90 - def _nextMatch(self):
91 """ *Internal use only* """ 92 done = 0 93 res = None 94 sim = 0 95 while not done: 96 # this is going to crap out when the data source iterator finishes, 97 # that's how we stop when no match is found 98 obj = self.dataIter.next() 99 fp = self.fingerprinter(obj) 100 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric) 101 if sim >= self.threshold: 102 res = obj 103 done = 1 104 return sim,res
105
106 - def Reset(self):
107 """ used to reset our internal state so that iteration 108 starts again from the beginning 109 """ 110 self.dataSource.reset() 111 self.dataIter = iter(self.dataSource)
112 - def __iter__(self):
113 """ returns an iterator for this screener 114 """ 115 self.Reset() 116 return self
117 - def next(self):
118 """ required part of iterator interface """ 119 return self._nextMatch()
120
121 -class TopNScreener(SimilarityScreener):
122 """ A screener that only returns the top N hits found 123 124 **Notes** 125 126 - supports forward iteration and getitem 127 128 """
129 - def __init__(self,num,**kwargs):
130 SimilarityScreener.__init__(self,**kwargs) 131 self.numToGet = num 132 self.topN = None 133 self._pos = 0
134
135 - def Reset(self):
136 self._pos = 0
137 - def __iter__(self):
138 if self.topN is None: 139 self._initTopN() 140 self.Reset() 141 return self
142 - def next(self):
143 if self._pos >= self.numToGet: 144 raise StopIteration 145 else: 146 res = self.topN[self._pos] 147 self._pos += 1 148 return res
149
150 - def _initTopN(self):
151 self.topN = TopNContainer.TopNContainer(self.numToGet) 152 for obj in self.dataSource: 153 fp = self.fingerprinter(obj) 154 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric) 155 self.topN.Insert(sim,obj)
156
157 - def __len__(self):
158 if self.topN is None: 159 self._initTopN() 160 return self.numToGet
161
162 - def __getitem__(self,idx):
163 if self.topN is None: 164 self._initTopN() 165 return self.topN[idx]
166