CorrMatGenerator.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2003-2006 Rational Discovery LLC
00003 //
00004 //  @@ All Rights Reserved  @@
00005 //
00006 #ifndef _RD_CORRMATGENERATOR_H_
00007 #define _RD_CORRMATGENERATOR_H_
00008 
00009 #include <RDGeneral/types.h>
00010 #include <DataStructs/BitVects.h>
00011 #include <boost/dynamic_bitset.hpp>
00012 
00013 namespace RDInfoTheory {
00014   //FIX: won't worry about it now, but this class can be templated by the type of 
00015   // container for the bit list and type of descriptors (fingerprint vs. real valued)
00016   class BitCorrMatGenerator {
00017     /*! \brief A class to generate a correlation matrix for a bunch of fingerprints
00018      *
00019      *  The correlation matrix is done only for the bit IDs that are set by a call to the 
00020      *  function setDescriptorIdList
00021      *  
00022      *    cr = CorrMatGenerator();
00023      *    cr.setDescriptorIdList(descList);
00024      *    for each fingerprint in list of fingerprints {
00025      *        cr.collectVotes(fingerprint);
00026      *    }
00027      *    double *corrMat = cr.getCorrMat()
00028      *  
00029      *  The resulting correlation matrix is a one dimension matrix with only the lower triangle elements
00030      *  of the symmetric matrix
00031      */
00032   public:
00033     BitCorrMatGenerator() {
00034       this->initGenerator();
00035     };
00036 
00037     ~BitCorrMatGenerator() {
00038       if (dp_corrMat) {
00039         delete [] dp_corrMat;
00040       }
00041     }
00042 
00043     void initGenerator() {
00044       dp_corrMat = 0;
00045       d_descs.resize(0);
00046       d_nExamples = 0;
00047     };
00048 
00049     /*! \brief Set the list bits that we are interested in correlating
00050      *
00051      *  \param bitIdList is a list of bit ids that need to be correlated e.g. a list top ranked ensemble 
00052      *  of bits 
00053      */
00054     void setBitIdList(const RDKit::INT_VECT &bitIdList) {
00055       d_descs = bitIdList;
00056       int i, nd = d_descs.size();
00057       int nelem = nd*(nd-1)/2;
00058       if (dp_corrMat != 0) {
00059         delete [] dp_corrMat;
00060       }
00061       dp_corrMat = new double[nd*(nd-1)/2];
00062       for (i = 0; i < nelem; i++) {
00063         dp_corrMat[i] = 0.0;
00064       }
00065     };
00066 
00067     //! \brief get the number of examples we used so far to compute the correlation matrix
00068     int getNumExamples() const {
00069       return d_nExamples;
00070     };
00071 
00072     //! \brief Get the list of bits ID that are used to generate the correlation matrix
00073     RDKit::INT_VECT getCorrBitList() const {
00074       return d_descs;
00075     };
00076 
00077     //! \brief Gets a pointer to the correlation matrix
00078     double *getCorrMat() {
00079       return dp_corrMat;
00080     };
00081     
00082     //! \brief For each pair of on bits (bi, bj) in fp increase the correlation count
00083     //    for the pair by 1
00084     void collectVotes(const BitVect &fp) {
00085       unsigned int nd = d_descs.size();
00086       // use a temporary bit vector to first mask the fingerprint
00087       ExplicitBitVect ebv(nd);
00088       int bi;
00089       for (unsigned int i = 0; i < nd; i++) {
00090         bi = d_descs[i];
00091         if (fp[bi]) {
00092           ebv.SetBit(i);
00093         }
00094       }
00095       for (unsigned i = 1; i < nd; i++) {
00096         unsigned int itab = i*(i-1)/2;
00097         if (ebv[i]) {
00098           for (unsigned int j = 0; j < i; j++) {
00099             if ( ebv[j]) {
00100               dp_corrMat[itab + j] += 1;
00101             }
00102           }
00103         }
00104       }
00105       d_nExamples++;
00106     };
00107 
00108   private:
00109     RDKit::INT_VECT d_descs;
00110     double *dp_corrMat;
00111     int d_nExamples;
00112   };
00113 
00114 }
00115 
00116 #endif
00117     
00118 

Generated on Tue Oct 7 06:10:10 2008 for RDCode by  doxygen 1.5.5