MetricMatrixCalc.h

Go to the documentation of this file.
00001 // $Id: MetricMatrixCalc.h 1528 2010-09-26 17:04:37Z glandrum $
00002 //
00003 //  Copyright (C) 2003-2006 Rational Discovery LLC
00004 //
00005 //  @@ All Rights Reserved @@
00006 //  This file is part of the RDKit.
00007 //  The contents are covered by the terms of the BSD license
00008 //  which is included in the file license.txt, found at the root
00009 //  of the RDKit source tree.
00010 //
00011 #ifndef __RD_METRICMATRIXCAL_H__
00012 #define __RD_METRICMATRIXCAL_H__
00013 
00014 #include "MetricFuncs.h"
00015 #include <RDGeneral/Invariant.h>
00016 
00017 namespace RDDataManip {
00018   
00019   /*! \brief A generic metric matrix calculator (e.g similarity matrix or
00020    *         distance matrix) 
00021    *
00022    *  This templated class needs some explanation
00023    *    vectType is a container that can support [] operator 
00024    *    entryType is the type of entry that is returned by the [] operator
00025    *  Examples of the container include PySequenceHolder which is wrapper around 
00026    *  a python sequence objects like lists and tuples.
00027    *  Examples of the entryType include a sequence of double, floats, and ExplicitBitVects 
00028    *
00029    */
00030   template <class vectType, class entryType> class MetricMatrixCalc {
00031   public:
00032     /*! \brief Default Constructor
00033      *
00034      */
00035     MetricMatrixCalc() {};
00036     
00037     /*! \brief Set the metric function
00038      *
00039      * Set the pointer to the mertic funvtion to be used by the metric calculator
00040      *
00041      * ARGUMENTS:
00042      *
00043      *  mFunc - pointer to the metric funtion
00044      */
00045     void setMetricFunc(double (*mFunc)(const entryType &, const entryType &, unsigned int)) {
00046       dp_metricFunc = mFunc;
00047     }
00048 
00049     /*! \brief The calculator function
00050      *
00051      * ARGUMENTS:
00052      *
00053      *  descrips - vectType container with a entryType for each item
00054      *  nItems - the number of item in the descripts.
00055      *           In several cases this argument is irrelvant since vectType probably supports
00056      *           a size() member function, But we would like this interface to take for example 
00057      *           a double** and correctly parse the row and columns.
00058      *  dim - the dimension of the sequences
00059      *  distMat - pointer to an array to write the distance matrix to
00060      *            it is assumed that the right sized array has already be allocated.
00061      *
00062      * FIX: we can probably make this function create the correct sized distMat and return
00063      * it to the caller, but when pushing he result out to a python array not sure how to
00064      * avoid copy the entire distance matrix in that case
00065      *
00066      * RETURNS:
00067      * 
00068      *  pointer to a 1D array of doubles. Only the lower triangle elements are
00069      *  included in the array
00070      */
00071     void calcMetricMatrix(const vectType &descripts, unsigned int nItems, unsigned int dim,
00072                           double *distMat) {
00073       CHECK_INVARIANT(distMat, "invalid pointer to a distance matix");
00074       
00075       for (unsigned int i = 1; i < nItems; i++) {
00076         unsigned int itab = i*(i-1)/2;
00077         for (unsigned int j = 0; j < i; j++) {
00078           distMat[itab+j] = dp_metricFunc(descripts[i], descripts[j], dim);
00079         }
00080       }
00081     };
00082     
00083   private:
00084     // pointer to the metric function
00085     /*! \brief pointer to the metric function
00086      *
00087      * In several cases the last argument 'dim' should be irrelevant, 
00088      * For example when entryType is a bit vector the size is of the vector
00089      * or the dimension can be obtained by asking the bit vector itself. However
00090      * we woul like this interface to support other containers lines double* 
00091      * in which case the 'dim' value is useful in cumputing the metric.
00092      */  
00093     double (*dp_metricFunc)(const entryType &, const entryType &, unsigned int);
00094     
00095   };
00096 };
00097 
00098 #endif