00001 // 00002 // 00003 // Copyright (c) 2009-2010, Novartis Institutes for BioMedical Research Inc. 00004 // All rights reserved. 00005 // 00006 // Redistribution and use in source and binary forms, with or without 00007 // modification, are permitted provided that the following conditions are 00008 // met: 00009 // 00010 // * Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // * Redistributions in binary form must reproduce the above 00013 // copyright notice, this list of conditions and the following 00014 // disclaimer in the documentation and/or other materials provided 00015 // with the distribution. 00016 // * Neither the name of Novartis Institutes for BioMedical Research Inc. 00017 // nor the names of its contributors may be used to endorse or promote 00018 // products derived from this software without specific prior written permission. 00019 // 00020 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00021 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00022 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00023 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00024 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00026 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00030 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // Created by Greg Landrum, July 2008 00033 // 00034 // 00035 00036 /*! \file MorganFingerprints.h 00037 00038 */ 00039 #ifndef __RD_MORGANFPS_H__ 00040 #define __RD_MORGANFPS_H__ 00041 00042 #include <vector> 00043 #include <map> 00044 #include <DataStructs/SparseIntVect.h> 00045 #include <DataStructs/ExplicitBitVect.h> 00046 #include <boost/cstdint.hpp> 00047 00048 namespace RDKit { 00049 class ROMol; 00050 namespace MorganFingerprints { 00051 extern std::vector<std::string> defaultFeatureSmarts; 00052 extern std::vector<ROMOL_SPTR> defaultFeatureMatchers; 00053 00054 typedef std::map<boost::uint32_t,std::vector<std::pair<boost::uint32_t,boost::uint32_t> > > BitInfoMap; 00055 00056 const std::string morganFingerprintVersion="1.0.0"; 00057 00058 //! returns the Morgan fingerprint for a molecule 00059 /*! 00060 These fingerprints are similar to the well-known ECFP or 00061 FCFP fingerprints, depending on which invariants are used. 00062 00063 The algorithm used is described in the paper 00064 Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54 (2010) 00065 http://dx.doi.org/10.1021/ci100050t 00066 00067 The original implementation was done using this paper: 00068 D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005) 00069 and an unpublished technical report: 00070 http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc 00071 00072 \param mol: the molecule to be fingerprinted 00073 \param radius: the number of iterations to grow the fingerprint 00074 \param invariants : optional pointer to a set of atom invariants to 00075 be used. By default ECFP-type invariants are used 00076 (calculated by getConnectivityInvariants()) 00077 \param fromAtoms : if this is provided, only the atoms in the vector will be 00078 used as centers in the fingerprint 00079 \param useChirality : if set, additional information will be added to the fingerprint 00080 when chiral atoms are discovered. This will cause \verbatim C[C@H](F)Cl, 00081 C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate different fingerprints. 00082 \param useBondTypes : if set, bond types will be included as part of the hash for 00083 calculating bits 00084 \param onlyNonzeroInvariants : if set, bits will only be set from atoms that 00085 have a nonzero invariant. 00086 \param atomsSettingBits : if nonzero, this will be used to return information 00087 about the atoms that set each particular bit. 00088 The keys are the map are bit ids, the values 00089 are lists of (atomId, radius) pairs. 00090 00091 \return a pointer to the fingerprint. The client is 00092 responsible for calling delete on this. 00093 00094 */ 00095 SparseIntVect<boost::uint32_t> * 00096 getFingerprint(const ROMol &mol, 00097 unsigned int radius, 00098 std::vector<boost::uint32_t> *invariants=0, 00099 const std::vector<boost::uint32_t> *fromAtoms=0, 00100 bool useChirality=false, 00101 bool useBondTypes=true, 00102 bool onlyNonzeroInvariants=false, 00103 BitInfoMap *atomsSettingBits=0); 00104 00105 00106 //! returns the Morgan fingerprint for a molecule as a bit vector 00107 /*! 00108 see documentation for getFingerprint() for theory/references 00109 00110 \param mol: the molecule to be fingerprinted 00111 \param radius: the number of iterations to grow the fingerprint 00112 \param nBits: the number of bits in the final fingerprint 00113 \param invariants : optional pointer to a set of atom invariants to 00114 be used. By default ECFP-type invariants are used 00115 (calculated by getConnectivityInvariants()) 00116 \param fromAtoms : if this is provided, only the atoms in the vector will be 00117 used as centers in the fingerprint 00118 \param useChirality : if set, additional information will be added to the fingerprint 00119 when chiral atoms are discovered. This will cause \verbatim C[C@H](F)Cl, 00120 C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate different fingerprints. 00121 \param useBondTypes : if set, bond types will be included as part of the hash for 00122 calculating bits 00123 \param onlyNonzeroInvariants : if set, bits will only be set from atoms that 00124 have a nonzero invariant. 00125 \param atomsSettingBits : if nonzero, this will be used to return information 00126 about the atoms that set each particular bit. 00127 The keys are the map are bit ids, the values 00128 are lists of (atomId, radius) pairs. 00129 00130 \return a pointer to the fingerprint. The client is 00131 responsible for calling delete on this. 00132 00133 */ 00134 ExplicitBitVect * 00135 getFingerprintAsBitVect(const ROMol &mol, 00136 unsigned int radius, 00137 unsigned int nBits, 00138 std::vector<boost::uint32_t> *invariants=0, 00139 const std::vector<boost::uint32_t> *fromAtoms=0, 00140 bool useChirality=false, 00141 bool useBondTypes=true, 00142 bool onlyNonzeroInvariants=false, 00143 BitInfoMap *atomsSettingBits=0); 00144 00145 //! returns the connectivity invariants for a molecule 00146 /*! 00147 00148 \param mol : the molecule to be considered 00149 \param invars : used to return the results 00150 \param includeRingMembership : if set, whether or not the atom is in 00151 a ring will be used in the invariant list. 00152 */ 00153 void getConnectivityInvariants(const ROMol &mol, 00154 std::vector<boost::uint32_t> &invars, 00155 bool includeRingMembership=true); 00156 const std::string morganConnectivityInvariantVersion="1.0.0"; 00157 00158 //! returns the feature invariants for a molecule 00159 /*! 00160 00161 \param mol: the molecule to be considered 00162 \param invars : used to return the results 00163 \param patterns: if provided should contain the queries used to assign atom-types. 00164 if not provided, feature definitions adapted from reference: 00165 Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) 00166 will be used for Donor, Acceptor, Aromatic, Halogen, Basic, Acidic 00167 00168 */ 00169 void getFeatureInvariants(const ROMol &mol, 00170 std::vector<boost::uint32_t> &invars, 00171 std::vector<ROMOL_SPTR> *patterns=0); 00172 const std::string morganFeatureInvariantVersion="0.1.0"; 00173 00174 } // end of namespace MorganFingerprints 00175 } 00176 00177 #endif
1.7.1