RankAtoms.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
00003 //
00004 //   @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 
00011 //! \file RankAtoms.h
00012 /*!
00013     \brief Utility functionality used by atom rankers.
00014 */
00015 #ifndef _RD_RANKATOMS_H_
00016 #define _RD_RANKATOMS_H_
00017 
00018 #include <queue>
00019 #include <vector>
00020 #include <list>
00021 #include <algorithm>
00022 #include <boost/foreach.hpp>
00023 
00024 namespace RankAtoms {
00025   typedef std::vector<int> INT_VECT;
00026   typedef std::list<int> INT_LIST;
00027 
00028   //! utility function for ranking atoms
00029   void updateInPlayIndices(const INT_VECT &ranks,INT_LIST &indicesInPlay);
00030 
00031   //! returns the count of unique items in an std::vector
00032   template <typename T>
00033   unsigned int countClasses(const std::vector<T> &vect){
00034     std::vector<T> sortedVect = vect;
00035     std::sort(sortedVect.begin(),sortedVect.end(),std::less<T>());
00036     typename std::vector<T>::iterator newEnd=std::unique(sortedVect.begin(),sortedVect.end());
00037     return newEnd-sortedVect.begin();
00038   }
00039 
00040   //! functor for implementing > on two std::pairs.  The first entries are compared.
00041   template <typename T>
00042   struct pairGTFunctor {
00043     bool operator() (const std::pair<T,int> &v1,const std::pair<T,int> &v2){
00044       return v1.first > v2.first;
00045     }
00046   };
00047 
00048   //! function for implementing < on two std::pairs.  The first entries are compared.
00049   template <typename T>
00050   bool pairLess(const std::pair<T,int> &v1,const std::pair<T,int> &v2){
00051     return v1.first < v2.first;
00052   }
00053 
00054   template <typename T>
00055   class argless {
00056   public:
00057     argless(const T& c) : container(c) {};
00058     bool operator() (unsigned int v1,unsigned int v2){
00059       return container[v1]<container[v2];
00060     }
00061     const T &container;
00062   };
00063 
00064   
00065   //! ranks the entries in a vector
00066   /*!
00067     \param vect the vector to rank
00068     \param res  is used to return the ranks of each entry
00069   */
00070   template <typename T>  
00071   void rankVect(const std::vector<T> &vect,INT_VECT &res){
00072     PRECONDITION(res.size()>=vect.size(),"vector size mismatch");
00073     unsigned int nEntries = vect.size();
00074 
00075     std::vector< unsigned int > indices(nEntries);
00076     for(unsigned int i=0;i<nEntries;++i) indices[i]=i; 
00077     std::sort(indices.begin(),indices.end(),argless< std::vector<T> >(vect) );
00078 
00079     int currRank=0;
00080     T lastV = vect[indices[0]];
00081     BOOST_FOREACH(unsigned int idx,indices){
00082       T v = vect[idx];
00083       if(v==lastV){
00084         res[idx] = currRank;
00085       } else {
00086         res[idx] = ++currRank;
00087         lastV = v;
00088       }
00089     }
00090   }    
00091 
00092   //! finds the relative rankings of the entries in \c vals.
00093   /*!
00094     \param nAtoms         the number of points in play
00095     \param vals           the values to be ranked
00096     \param indicesInPlay  a list containing the indices that
00097            are being considered (only those entries in \c ranks
00098            that appear in \c indicesInPlay will be modified)
00099     \param ranks          the current ranks of entries, this is updated
00100            with new ranks
00101   */
00102   template <typename T>
00103   void sortAndRankVect(unsigned int nAtoms,
00104                        const std::vector<T> &vals,
00105                        const INT_LIST &indicesInPlay,
00106                        INT_VECT &ranks) {
00107     // --------------
00108     //
00109     // start by getting the internal ranking of the values passed in
00110     //
00111     // --------------
00112     INT_VECT newRanks(vals.size());
00113     rankVect(vals,newRanks);
00114 
00115     // --------------
00116     //  
00117     // At the end of this operation, fixedRanks will contain the ranks
00118     // of atoms that are no longer active (-1 for active atoms).
00119     //
00120     // --------------
00121     BOOST_FOREACH(int idx,indicesInPlay){
00122       ranks[idx] = -1;
00123     }
00124 
00125 #ifdef VERYVERBOSE_CANON
00126     std::cout << "new: ";
00127     debugVect(newRanks);
00128     std::cout << "fixed: ";
00129     debugVect(ranks);
00130 #endif
00131 
00132     INT_VECT idxVect;
00133     idxVect.assign(indicesInPlay.begin(),indicesInPlay.end());
00134 
00135     // -------------
00136     //
00137     //  Loop over all the new ranks.  We'll know that we're done
00138     //  when currNewRank > maxNewRank
00139     //
00140     // -------------
00141 
00142     int currNewRank= *(std::min_element(newRanks.begin(),newRanks.end()));
00143     int maxNewRank = *(std::max_element(newRanks.begin(),newRanks.end()));
00144     while(currNewRank<=maxNewRank){
00145       //
00146       // If this rank is already present in ranks, increment
00147       //  this rank and all new ranks that are higher:
00148       //
00149       while(std::find(ranks.begin(),ranks.end(),currNewRank)!=ranks.end()){
00150         BOOST_FOREACH(int &rank,newRanks){
00151           if(rank>=currNewRank)
00152             ++rank;
00153         }
00154         // increment both the current rank *and* the maximum new rank
00155         ++currNewRank;
00156         ++maxNewRank;
00157       }
00158 
00159       //
00160       //  now grab all entries with this new rank and copy them into
00161       //  the ranks list
00162       //
00163       INT_VECT::iterator ivIt=std::find(newRanks.begin(),newRanks.end(),currNewRank);
00164       while(ivIt!=newRanks.end()){
00165         int offset=ivIt-newRanks.begin();
00166         int idx = idxVect[offset];
00167         ranks[idx] = currNewRank;
00168         ++ivIt;
00169         ivIt=std::find(ivIt,newRanks.end(),currNewRank);
00170       }
00171       ++currNewRank;
00172     }
00173   }
00174 }
00175 #endif