CorrMatGenerator.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2003-2006 Rational Discovery LLC
00003 //
00004 //  @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 #ifndef _RD_CORRMATGENERATOR_H_
00011 #define _RD_CORRMATGENERATOR_H_
00012 
00013 #include <RDGeneral/types.h>
00014 #include <DataStructs/BitVects.h>
00015 #include <boost/dynamic_bitset.hpp>
00016 
00017 namespace RDInfoTheory {
00018   //FIX: won't worry about it now, but this class can be templated by the type of 
00019   // container for the bit list and type of descriptors (fingerprint vs. real valued)
00020   class BitCorrMatGenerator {
00021     /*! \brief A class to generate a correlation matrix for a bunch of fingerprints
00022      *
00023      *  The correlation matrix is done only for the bit IDs that are set by a call to the 
00024      *  function setDescriptorIdList
00025      *  
00026      *    cr = CorrMatGenerator();
00027      *    cr.setDescriptorIdList(descList);
00028      *    for each fingerprint in list of fingerprints {
00029      *        cr.collectVotes(fingerprint);
00030      *    }
00031      *    double *corrMat = cr.getCorrMat()
00032      *  
00033      *  The resulting correlation matrix is a one dimension matrix with only the lower triangle elements
00034      *  of the symmetric matrix
00035      */
00036   public:
00037     BitCorrMatGenerator() {
00038       this->initGenerator();
00039     };
00040 
00041     ~BitCorrMatGenerator() {
00042       if (dp_corrMat) {
00043         delete [] dp_corrMat;
00044       }
00045     }
00046 
00047     void initGenerator() {
00048       dp_corrMat = 0;
00049       d_descs.resize(0);
00050       d_nExamples = 0;
00051     };
00052 
00053     /*! \brief Set the list bits that we are interested in correlating
00054      *
00055      *  \param bitIdList is a list of bit ids that need to be correlated e.g. a list top ranked ensemble 
00056      *  of bits 
00057      */
00058     void setBitIdList(const RDKit::INT_VECT &bitIdList) {
00059       d_descs = bitIdList;
00060       int i, nd = d_descs.size();
00061       int nelem = nd*(nd-1)/2;
00062       if (dp_corrMat != 0) {
00063         delete [] dp_corrMat;
00064       }
00065       dp_corrMat = new double[nd*(nd-1)/2];
00066       for (i = 0; i < nelem; i++) {
00067         dp_corrMat[i] = 0.0;
00068       }
00069     };
00070 
00071     //! \brief get the number of examples we used so far to compute the correlation matrix
00072     int getNumExamples() const {
00073       return d_nExamples;
00074     };
00075 
00076     //! \brief Get the list of bits ID that are used to generate the correlation matrix
00077     RDKit::INT_VECT getCorrBitList() const {
00078       return d_descs;
00079     };
00080 
00081     //! \brief Gets a pointer to the correlation matrix
00082     double *getCorrMat() {
00083       return dp_corrMat;
00084     };
00085     
00086     //! \brief For each pair of on bits (bi, bj) in fp increase the correlation count
00087     //    for the pair by 1
00088     void collectVotes(const BitVect &fp) {
00089       unsigned int nd = d_descs.size();
00090       // use a temporary bit vector to first mask the fingerprint
00091       ExplicitBitVect ebv(nd);
00092       int bi;
00093       for (unsigned int i = 0; i < nd; i++) {
00094         bi = d_descs[i];
00095         if (fp[bi]) {
00096           ebv.setBit(i);
00097         }
00098       }
00099       for (unsigned i = 1; i < nd; i++) {
00100         unsigned int itab = i*(i-1)/2;
00101         if (ebv[i]) {
00102           for (unsigned int j = 0; j < i; j++) {
00103             if ( ebv[j]) {
00104               dp_corrMat[itab + j] += 1;
00105             }
00106           }
00107         }
00108       }
00109       d_nExamples++;
00110     };
00111 
00112   private:
00113     RDKit::INT_VECT d_descs;
00114     double *dp_corrMat;
00115     int d_nExamples;
00116   };
00117 
00118 }
00119 
00120 #endif
00121     
00122