BitOps.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2003-2008 greg Landrum and Rational Discovery LLC
00003 //
00004 //  @@ All Rights Reserved @@
00005 //
00006 #ifndef __RD_BITOPS_H__
00007 #define __RD_BITOPS_H__
00008 /*! \file BitOps.h
00009 
00010   \brief Contains general bit-comparison and similarity operations.
00011 
00012   The notation used to document the similarity metrics is:
00013     - \c V1_n: number of bits in vector 1
00014     - \c V1_o: number of on bits in vector 1
00015     - <tt>(V1&V2)_o</tt>: number of on bits in the intersection of vectors 1 and 2
00016   
00017  */
00018 
00019 #include "BitVects.h"
00020 #include <string>
00021 
00022 
00023 //! general purpose wrapper for calculating the similarity between two bvs
00024 //! that may be of unequal size (will automatically fold as appropriate)
00025 template <typename T>
00026 double SimilarityWrapper(const T &bv1,const T &bv2,
00027                          const double (*metric)(const T &,const T &),
00028                          bool returnDistance=false){
00029   double res=0.0;
00030   if(bv1.GetNumBits()>bv2.GetNumBits()){
00031     T *bv1tmp = FoldFingerprint(bv1,bv1.GetNumBits()/bv2.GetNumBits());
00032     res = metric(*bv1tmp,bv2);
00033     delete bv1tmp;
00034   } else if(bv2.GetNumBits()>bv1.GetNumBits()){
00035     T *bv2tmp = FoldFingerprint(bv2,bv2.GetNumBits()/bv1.GetNumBits());
00036     res = metric(bv1,*bv2tmp);
00037     delete bv2tmp;
00038   } else {
00039     res = metric(bv1,bv2);
00040   }
00041   if(returnDistance) res = 1.0-res;
00042   return res;
00043 }
00044 //! \overload
00045 template <typename T>
00046 double SimilarityWrapper(const T &bv1,const T &bv2,double a,double b,
00047                          const double (*metric)(const T &,const T &,double,double),
00048                          bool returnDistance=false){
00049   double res=0.0;
00050   if(bv1.GetNumBits()>bv2.GetNumBits()){
00051     T *bv1tmp = FoldFingerprint(bv1,bv1.GetNumBits()/bv2.GetNumBits());
00052     res = metric(*bv1tmp,bv2,a,b);
00053     delete bv1tmp;
00054   } else if(bv2.GetNumBits()>bv1.GetNumBits()){
00055     T *bv2tmp = FoldFingerprint(bv2,bv2.GetNumBits()/bv1.GetNumBits());
00056     res = metric(bv1,*bv2tmp,a,b);
00057     delete bv2tmp;
00058   } else {
00059     res = metric(bv1,bv2,a,b);
00060   }
00061   if(returnDistance) res = 1.0-res;
00062   return res;
00063 }
00064 
00065 
00066 bool AllProbeBitsMatch(const char *probe,const char *ref);
00067 bool AllProbeBitsMatch(const std::string &probe,const std::string &ref);
00068 
00069   
00070 template <typename T1>
00071 bool AllProbeBitsMatch(const T1 &probe,const std::string &pkl);
00072 
00073 
00074 //! returns the number of on bits in common between two bit vectors
00075 /*!
00076   \return (bv1&bv2)_o
00077 */
00078 template <typename T1, typename T2>
00079 int
00080 NumOnBitsInCommon(const T1& bv1,const T2& bv2);
00081 
00082 int
00083 NumOnBitsInCommon(const ExplicitBitVect & bv1,const ExplicitBitVect & bv2);
00084 
00085 //! returns the Tanimoto similarity between two bit vects
00086 /*!
00087   \return <tt>(bv1&bv2)_o / [bv1_o + bv2_o - (bv1&bv2)_o]</tt>
00088 */
00089 template <typename T1, typename T2>
00090 const double
00091 TanimotoSimilarity(const T1& bv1,const T2& bv2);
00092 
00093 //! returns the Cosine similarity between two bit vects
00094 /*!
00095   \return <tt>(bv1&bv2)_o / sqrt(bv1_o + bv2_o)</tt>
00096 */
00097 template <typename T1, typename T2>
00098 const double
00099 CosineSimilarity(const T1& bv1,
00100                  const T2& bv2);
00101 
00102 //! returns the Kulczynski similarity between two bit vects
00103 /*!
00104   \return <tt>(bv1&bv2)_o * [bv1_o + bv2_o] / [2 * bv1_o * bv2_o]</tt>
00105 */
00106 template <typename T1, typename T2>
00107 const double
00108 KulczynskiSimilarity(const T1& bv1,
00109                      const T2& bv2);
00110 
00111 //! returns the Dice similarity between two bit vects
00112 /*!
00113   \return <tt>2*(bv1&bv2)_o / [bv1_o + bv2_o]</tt>
00114 */
00115 template <typename T1, typename T2>
00116 const double
00117 DiceSimilarity(const T1& bv1,
00118                const T2& bv2);
00119 
00120 //! returns the Tversky similarity between two bit vects
00121 /*!
00122   \return <tt>(bv1&bv2)_o / [a*bv1_o + b*bv2_o + (1 - a - b)*(bv1&bv2)_o]</tt>
00123 
00124   Notes:  
00125    # 0 <= a,b <= 1
00126    # Tversky(a=1,b=1) = Tanimoto
00127    # Tversky(a=1/2,b=1/2) = Dice
00128  
00129 */
00130 template <typename T1, typename T2>
00131 const double
00132 TverskySimilarity(const T1& bv1,
00133                   const T2& bv2,double a,double b);
00134 
00135 //! returns the Sokal similarity between two bit vects
00136 /*!
00137   \return <tt>(bv1&bv2)_o / [2*bv1_o + 2*bv2_o - 3*(bv1&bv2)_o]</tt>
00138 */
00139 template <typename T1, typename T2>
00140 const double
00141 SokalSimilarity(const T1& bv1,
00142                 const T2& bv2);
00143 
00144 //! returns the McConnaughey similarity between two bit vects
00145 /*!
00146   \return <tt>[(bv1&bv2)_o * (bv1_o + bv2_o) - (bv1_o * bv2_o)] / (bv1_o * bv2_o)</tt>
00147 */
00148 template <typename T1, typename T2>
00149 const double
00150 McConnaugheySimilarity(const T1& bv1,
00151                        const T2& bv2);
00152 
00153 //! returns the Asymmetric similarity between two bit vects
00154 /*!
00155   \return <tt>(bv1&bv2)_o / min(bv1_o,bv2_o)</tt>
00156 */
00157 template <typename T1, typename T2>
00158 const double
00159 AsymmetricSimilarity(const T1& bv1,
00160                      const T2& bv2);
00161 
00162 //! returns the Braun-Blanquet similarity between two bit vects
00163 /*!
00164   \return <tt>(bv1&bv2)_o / max(bv1_o,bv2_o)</tt>
00165 */
00166 template <typename T1, typename T2>
00167 const double
00168 BraunBlanquetSimilarity(const T1& bv1,
00169                         const T2& bv2);
00170 
00171 //! returns the Russel similarity between two bit vects
00172 /*!
00173   \return <tt>(bv1&bv2)_o / bv1_o</tt>
00174 
00175   <b>Note:</b> that this operation is non-commutative:
00176     RusselSimilarity(bv1,bv2) != RusselSimilarity(bv2,bv1)
00177 
00178 */
00179 template <typename T1, typename T2>
00180 const double
00181 RusselSimilarity(const T1& bv1,
00182                  const T2& bv2);
00183 
00184 
00185 //! returns the on bit similarity between two bit vects
00186 /*!
00187   \return <tt>(bv1&bv2)_o / (bv1|bv2)_o </tt>
00188 */
00189 template <typename T1, typename T2>
00190 const double
00191 OnBitSimilarity(const T1& bv1,const T2& bv2);
00192 
00193 //! returns the number of common bits (on and off) between two bit vects
00194 /*!
00195   \return <tt>bv1_n - (bv1^bv2)_o</tt>
00196 */
00197 template <typename T1, typename T2>
00198 const int
00199 NumBitsInCommon(const T1& bv1,const T2& bv2);
00200 
00201 //! returns the commong-bit similarity (on and off) between two bit vects
00202 /*!
00203   \return <tt>[bv1_n - (bv1^bv2)_o] / bv1_n</tt>
00204 */
00205 template <typename T1, typename T2>
00206 const double
00207 AllBitSimilarity(const T1& bv1,const T2& bv2);
00208 
00209 //! returns an IntVect with indices of all on bits in common between two bit vects
00210 template <typename T1, typename T2>
00211 IntVect
00212 OnBitsInCommon(const T1& bv1,const T2& bv2);
00213 
00214 //! returns an IntVect with indices of all off bits in common between two bit vects
00215 template <typename T1, typename T2>
00216 IntVect
00217 OffBitsInCommon(const T1& bv1,const T2& bv2);
00218 
00219 //! returns the on-bit projected similarities between two bit vects
00220 /*!
00221   \return two values, as a DoubleVect:
00222       - <tt>(bv1&bv2)_o / bv1_o</tt> 
00223       - <tt>(bv1&bv2)_o / bv2_o</tt> 
00224 */
00225 template <typename T1, typename T2>
00226 DoubleVect
00227 OnBitProjSimilarity(const T1& bv1,const T2& bv2);
00228 
00229 //! returns the on-bit projected similarities between two bit vects
00230 /*!
00231   \return two values, as a DoubleVect:
00232      - <tt>[bv1_n - (bv1|bv2)_o] / [bv1_n - bv1_o]</tt> 
00233      - <tt>[bv2_n - (bv1|bv2)_o] / [bv2_n - bv2_o]</tt> 
00234 
00235    <b>Note:</b> <tt>bv1_n = bv2_n</tt>
00236       
00237 */
00238 template <typename T1, typename T2>
00239 DoubleVect
00240 OffBitProjSimilarity(const T1& bv1,const T2& bv2);
00241 
00242 
00243 //! folds a bit vector \c factor times and returns the result
00244 /*!
00245   \param bv1    the vector to be folded
00246   \param factor (optional) the number of times to fold it
00247   
00248   \return a pointer to the folded fingerprint, which is
00249      <tt>bv1_n/factor</tt> long.
00250      
00251    <b>Note:</b> The caller is responsible for <tt>delete</tt>ing the result.
00252  */
00253 template <typename T1>
00254 T1 *
00255 FoldFingerprint(const T1& bv1,unsigned int factor=2);
00256 
00257 //! returns a text representation of a bit vector (a string of 0s and 1s)
00258 /*!
00259   \param bv1    the vector to be folded
00260   
00261   \return an std::string
00262 
00263  */
00264 template <typename T1>
00265 std::string
00266 BitVectToText(const T1& bv1);
00267 
00268 
00269 
00270 #endif

Generated on Tue Oct 7 06:10:10 2008 for RDCode by  doxygen 1.5.5