Subgraphs.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
00003 //
00004 //   @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 
00011 /*! \file Subgraphs.h
00012 
00013    \brief functionality for finding subgraphs and paths in molecules
00014 
00015    Difference between _subgraphs_ and _paths_ :
00016       Subgraphs are potentially branched, whereas paths (in our 
00017       terminology at least) cannot be.  So, the following graph:
00018 \verbatim 
00019             C--0--C--1--C--3--C
00020                   |
00021                   2
00022                   |
00023                   C
00024 \endverbatim
00025       has 3 _subgraphs_ of length 3: (0,1,2),(0,1,3),(2,1,3)
00026       but only 2 _paths_ of length 3: (0,1,3),(2,1,3)
00027 */
00028 #ifndef _RD_SUBGRAPHS_H_
00029 #define _RD_SUBGRAPHS_H_
00030 
00031 #include <vector>
00032 #include <list>
00033 #include <map>
00034 
00035 
00036 namespace RDKit{
00037   class ROMol;
00038   // NOTE: before replacing the defn of PATH_TYPE: be aware that
00039   // we do occasionally use reverse iterators on these things, so
00040   // replacing with a slist would probably be a bad idea.
00041   typedef std::vector<int> PATH_TYPE;
00042   typedef std::list< PATH_TYPE > PATH_LIST;
00043   typedef PATH_LIST::const_iterator PATH_LIST_CI;
00044 
00045   typedef std::map<int, PATH_LIST> INT_PATH_LIST_MAP;
00046   typedef INT_PATH_LIST_MAP::const_iterator INT_PATH_LIST_MAP_CI;
00047   typedef INT_PATH_LIST_MAP::iterator INT_PATH_LIST_MAP_I;
00048 
00049   // --- --- --- --- --- --- --- --- --- --- --- --- --- 
00050   //
00051   //
00052   // --- --- --- --- --- --- --- --- --- --- --- --- --- 
00053 
00054   //! \brief find all bond subgraphs in a range of sizes
00055   /*!
00056    *   \param mol - the molecule to be considered
00057    *   \param lowerLen - the minimum subgraph size to find 
00058    *   \param upperLen - the maximum subgraph size to find
00059    *   \param useHs     - if set, hydrogens in the graph will be considered
00060    *                      eligible to be in paths. NOTE: this will not add
00061    *                      Hs to the graph. 
00062    *   \param rootedAtAtom - if non-negative, only subgraphs that start at
00063    *                         this atom will be returned.
00064    *
00065    *   The result is a map from subgraph size -> list of paths
00066    *               (i.e. list of list of bond indices)
00067   */
00068   INT_PATH_LIST_MAP findAllSubgraphsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen,
00069                                                   unsigned int upperLen, bool useHs=false,
00070                                                   int rootedAtAtom=-1);
00071 
00072   //! \brief find all bond subgraphs of a particular size
00073   /*!
00074    *   \param mol - the molecule to be considered
00075    *   \param targetLen - the length of the subgraphs to be returned
00076    *   \param useHs     - if set, hydrogens in the graph will be considered
00077    *                      eligible to be in paths. NOTE: this will not add
00078    *                      Hs to the graph. 
00079    *   \param rootedAtAtom - if non-negative, only subgraphs that start at
00080    *                         this atom will be returned.
00081    *
00082    *
00083    *   The result is a list of paths (i.e. list of list of bond indices)
00084   */
00085   PATH_LIST findAllSubgraphsOfLengthN(const ROMol &mol,unsigned int targetLen,
00086                                       bool useHs=false,int rootedAtAtom=-1);
00087 
00088   //! \brief find unique bond subgraphs of a particular size
00089   /*!
00090    *   \param mol - the molecule to be considered
00091    *   \param targetLen - the length of the subgraphs to be returned
00092    *   \param useHs     - if set, hydrogens in the graph will be considered
00093    *                      eligible to be in paths. NOTE: this will not add
00094    *                      Hs to the graph. 
00095    *   \param useBO     - if set, bond orders will be considered when uniquifying
00096    *                      the paths
00097    *   \param rootedAtAtom - if non-negative, only subgraphs that start at
00098    *                         this atom will be returned.
00099    *
00100    *   The result is a list of paths (i.e. list of list of bond indices)
00101   */
00102   PATH_LIST findUniqueSubgraphsOfLengthN(const ROMol &mol,unsigned int targetLen,
00103                                          bool useHs=false,bool useBO=true,
00104                                          int rootedAtAtom=-1);
00105   //! \brief find all paths of a particular size
00106   /*!
00107    *   \param mol - the molecule to be considered
00108    *   \param targetLen - the length of the paths to be returned
00109    *   \param useBonds  - if set, the path indices will be bond indices,
00110    *                      not atom indices
00111    *   \param useHs     - if set, hydrogens in the graph will be considered
00112    *                      eligible to be in paths. NOTE: this will not add
00113    *                      Hs to the graph. 
00114    *   \param rootedAtAtom - if non-negative, only subgraphs that start at
00115    *                         this atom will be returned.
00116    *
00117    *   The result is a list of paths (i.e. list of list of bond indices)
00118   */
00119   PATH_LIST findAllPathsOfLengthN(const ROMol &mol,unsigned int targetLen,
00120                                   bool useBonds=true,bool useHs=false,
00121                                   int rootedAtAtom=-1);
00122   INT_PATH_LIST_MAP findAllPathsOfLengthsMtoN(const ROMol &mol,unsigned int lowerLen,
00123                                               unsigned int upperLen,
00124                                               bool useBonds=true,bool useHs=false,
00125                                               int rootedAtAtom=-1);
00126 
00127   //! \brief find bond subgraphs of a particular radius around an atom
00128   /*!
00129    *   \param mol - the molecule to be considered
00130    *   \param radius - the radius of the subgraphs to be considered
00131    *   \param rootedAtAtom - the atom to consider
00132    *   \param useHs     - if set, hydrogens in the graph will be considered
00133    *                      eligible to be in paths. NOTE: this will not add
00134    *                      Hs to the graph. 
00135    *
00136    *   The result is a path (a vector of bond indices)
00137   */
00138   PATH_TYPE findAtomEnvironmentOfRadiusN(const ROMol &mol,unsigned int radius,
00139                                          unsigned int rootedAtAtom,bool useHs=false);
00140 
00141 }
00142 
00143   
00144 #endif