Package Chem :: Package Fingerprints :: Module SimilarityScreener
[hide private]
[frames] | no frames]

Source Code for Module Chem.Fingerprints.SimilarityScreener

  1  # $Id: SimilarityScreener.py 2 2006-05-06 22:54:39Z glandrum $ 
  2  # 
  3  # Copyright (C) 2003-2006 Greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  """ class definitions for similarity screening 
  8   
  9  See _SimilarityScreener_ for overview of required API 
 10   
 11  """ 
 12  import DataStructs 
 13  from DataStructs import TopNContainer 
 14  import RDConfig 
 15   
16 -class SimilarityScreener(object):
17 """ base class 18 19 important attributes: 20 probe: the probe fingerprint against which we screen. 21 22 metric: a function that takes two arguments and returns a similarity 23 measure between them 24 25 dataSource: the source pool from which to draw, needs to support 26 a next() method 27 28 fingerprinter: a function that takes a molecule and returns a 29 fingerprint of the appropriate format 30 31 32 **Notes** 33 subclasses must support either an iterator interface 34 or __len__ and __getitem__ 35 """
36 - def __init__(self,probe=None,metric=None,dataSource=None,fingerprinter=None):
37 self.metric = metric 38 self.dataSource = dataSource 39 self.fingerprinter = fingerprinter 40 self.probe = probe
41
42 - def Reset(self):
43 """ used to reset screeners that behave as iterators 44 """ 45 pass
46 47 # FIX: add setters/getters for attributes
48 - def SetProbe(self,probeFingerprint):
49 """ sets our probe fingerprint """ 50 self.probe = probeFingerprint
51
52 - def GetSingleFingerprint(self,probe):
53 """ returns a fingerprint for a single probe object 54 55 This is potentially useful in initializing our internal 56 probe object. 57 58 """ 59 return self.fingerprinter(probe)
60
61 -class ThresholdScreener(SimilarityScreener):
62 """ Used to return all compounds that have a similarity 63 to the probe beyond a threshold value 64 65 **Notes**: 66 67 - This is as lazy as possible, so the data source isn't 68 queried until the client asks for a hit. 69 70 - In addition to being lazy, this class is as thin as possible. 71 (Who'd have thought it was possible!) 72 Hits are *not* stored locally, so if a client resets 73 the iteration and starts over, the same amount of work must 74 be done to retrieve the hits. 75 76 - The thinness and laziness forces us to support only forward 77 iteration (not random access) 78 79 """
80 - def __init__(self,threshold,**kwargs):
81 SimilarityScreener.__init__(self,**kwargs) 82 self.threshold = threshold 83 self.dataIter = iter(self.dataSource)
84 # FIX: add setters/getters for attributes 85
86 - def _nextMatch(self):
87 """ *Internal use only* """ 88 done = 0 89 res = None 90 sim = 0 91 while not done: 92 # this is going to crap out when the data source iterator finishes, 93 # that's how we stop when no match is found 94 obj = self.dataIter.next() 95 fp = self.fingerprinter(obj) 96 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric) 97 if sim >= self.threshold: 98 res = obj 99 done = 1 100 return sim,res
101
102 - def Reset(self):
103 """ used to reset our internal state so that iteration 104 starts again from the beginning 105 """ 106 self.dataSource.reset() 107 self.dataIter = iter(self.dataSource)
108 - def __iter__(self):
109 """ returns an iterator for this screener 110 """ 111 self.Reset() 112 return self
113 - def next(self):
114 """ required part of iterator interface """ 115 return self._nextMatch()
116
117 -class TopNScreener(SimilarityScreener):
118 """ A screener that only returns the top N hits found 119 120 **Notes** 121 122 - supports forward iteration and getitem 123 124 """
125 - def __init__(self,num,**kwargs):
126 SimilarityScreener.__init__(self,**kwargs) 127 self.numToGet = num 128 self.topN = None 129 self._pos = 0
130
131 - def Reset(self):
132 self._pos = 0
133 - def __iter__(self):
134 if self.topN is None: 135 self._initTopN() 136 self.Reset() 137 return self
138 - def next(self):
139 if self._pos >= self.numToGet: 140 raise StopIteration 141 else: 142 res = self.topN[self._pos] 143 self._pos += 1 144 return res
145
146 - def _initTopN(self):
147 self.topN = TopNContainer.TopNContainer(self.numToGet) 148 for obj in self.dataSource: 149 fp = self.fingerprinter(obj) 150 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric) 151 self.topN.Insert(sim,obj)
152
153 - def __len__(self):
154 if self.topN is None: 155 self._initTopN() 156 return self.numToGet
157
158 - def __getitem__(self,idx):
159 if self.topN is None: 160 self._initTopN() 161 return self.topN[idx]
162