00001 // 00002 // Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC 00003 // 00004 // @@ All Rights Reserved @@ 00005 // This file is part of the RDKit. 00006 // The contents are covered by the terms of the BSD license 00007 // which is included in the file license.txt, found at the root 00008 // of the RDKit source tree. 00009 // 00010 00011 /*! \file Subgraphs.h 00012 00013 \brief functionality for finding subgraphs and paths in molecules 00014 00015 Difference between _subgraphs_ and _paths_ : 00016 Subgraphs are potentially branched, whereas paths (in our 00017 terminology at least) cannot be. So, the following graph: 00018 \verbatim 00019 C--0--C--1--C--3--C 00020 | 00021 2 00022 | 00023 C 00024 \endverbatim 00025 has 3 _subgraphs_ of length 3: (0,1,2),(0,1,3),(2,1,3) 00026 but only 2 _paths_ of length 3: (0,1,3),(2,1,3) 00027 */ 00028 #ifndef _RD_SUBGRAPHS_H_ 00029 #define _RD_SUBGRAPHS_H_ 00030 00031 #include <vector> 00032 #include <list> 00033 #include <map> 00034 00035 00036 namespace RDKit{ 00037 class ROMol; 00038 // NOTE: before replacing the defn of PATH_TYPE: be aware that 00039 // we do occasionally use reverse iterators on these things, so 00040 // replacing with a slist would probably be a bad idea. 00041 typedef std::vector<int> PATH_TYPE; 00042 typedef std::list< PATH_TYPE > PATH_LIST; 00043 typedef PATH_LIST::const_iterator PATH_LIST_CI; 00044 00045 typedef std::map<int, PATH_LIST> INT_PATH_LIST_MAP; 00046 typedef INT_PATH_LIST_MAP::const_iterator INT_PATH_LIST_MAP_CI; 00047 typedef INT_PATH_LIST_MAP::iterator INT_PATH_LIST_MAP_I; 00048 00049 // --- --- --- --- --- --- --- --- --- --- --- --- --- 00050 // 00051 // 00052 // --- --- --- --- --- --- --- --- --- --- --- --- --- 00053 00054 //! \brief find all bond subgraphs in a range of sizes 00055 /*! 00056 * \param mol - the molecule to be considered 00057 * \param lowerLen - the minimum subgraph size to find 00058 * \param upperLen - the maximum subgraph size to find 00059 * \param useHs - if set, hydrogens in the graph will be considered 00060 * eligible to be in paths. NOTE: this will not add 00061 * Hs to the graph. 00062 * \param rootedAtAtom - if non-negative, only subgraphs that start at 00063 * this atom will be returned. 00064 * 00065 * The result is a map from subgraph size -> list of paths 00066 * (i.e. list of list of bond indices) 00067 */ 00068 INT_PATH_LIST_MAP findAllSubgraphsOfLengthsMtoN(const ROMol &mol, unsigned int lowerLen, 00069 unsigned int upperLen, bool useHs=false, 00070 int rootedAtAtom=-1); 00071 00072 //! \brief find all bond subgraphs of a particular size 00073 /*! 00074 * \param mol - the molecule to be considered 00075 * \param targetLen - the length of the subgraphs to be returned 00076 * \param useHs - if set, hydrogens in the graph will be considered 00077 * eligible to be in paths. NOTE: this will not add 00078 * Hs to the graph. 00079 * \param rootedAtAtom - if non-negative, only subgraphs that start at 00080 * this atom will be returned. 00081 * 00082 * 00083 * The result is a list of paths (i.e. list of list of bond indices) 00084 */ 00085 PATH_LIST findAllSubgraphsOfLengthN(const ROMol &mol,unsigned int targetLen, 00086 bool useHs=false,int rootedAtAtom=-1); 00087 00088 //! \brief find unique bond subgraphs of a particular size 00089 /*! 00090 * \param mol - the molecule to be considered 00091 * \param targetLen - the length of the subgraphs to be returned 00092 * \param useHs - if set, hydrogens in the graph will be considered 00093 * eligible to be in paths. NOTE: this will not add 00094 * Hs to the graph. 00095 * \param useBO - if set, bond orders will be considered when uniquifying 00096 * the paths 00097 * \param rootedAtAtom - if non-negative, only subgraphs that start at 00098 * this atom will be returned. 00099 * 00100 * The result is a list of paths (i.e. list of list of bond indices) 00101 */ 00102 PATH_LIST findUniqueSubgraphsOfLengthN(const ROMol &mol,unsigned int targetLen, 00103 bool useHs=false,bool useBO=true, 00104 int rootedAtAtom=-1); 00105 //! \brief find all paths of a particular size 00106 /*! 00107 * \param mol - the molecule to be considered 00108 * \param targetLen - the length of the paths to be returned 00109 * \param useBonds - if set, the path indices will be bond indices, 00110 * not atom indices 00111 * \param useHs - if set, hydrogens in the graph will be considered 00112 * eligible to be in paths. NOTE: this will not add 00113 * Hs to the graph. 00114 * \param rootedAtAtom - if non-negative, only subgraphs that start at 00115 * this atom will be returned. 00116 * 00117 * The result is a list of paths (i.e. list of list of bond indices) 00118 */ 00119 PATH_LIST findAllPathsOfLengthN(const ROMol &mol,unsigned int targetLen, 00120 bool useBonds=true,bool useHs=false, 00121 int rootedAtAtom=-1); 00122 INT_PATH_LIST_MAP findAllPathsOfLengthsMtoN(const ROMol &mol,unsigned int lowerLen, 00123 unsigned int upperLen, 00124 bool useBonds=true,bool useHs=false, 00125 int rootedAtAtom=-1); 00126 00127 //! \brief find bond subgraphs of a particular radius around an atom 00128 /*! 00129 * \param mol - the molecule to be considered 00130 * \param radius - the radius of the subgraphs to be considered 00131 * \param rootedAtAtom - the atom to consider 00132 * \param useHs - if set, hydrogens in the graph will be considered 00133 * eligible to be in paths. NOTE: this will not add 00134 * Hs to the graph. 00135 * 00136 * The result is a path (a vector of bond indices) 00137 */ 00138 PATH_TYPE findAtomEnvironmentOfRadiusN(const ROMol &mol,unsigned int radius, 00139 unsigned int rootedAtAtom,bool useHs=false); 00140 00141 } 00142 00143 00144 #endif
1.7.1