00001 // 00002 // Copyright (C) 2007-2011 Greg Landrum 00003 // 00004 // @@ All Rights Reserved @@ 00005 // This file is part of the RDKit. 00006 // The contents are covered by the terms of the BSD license 00007 // which is included in the file license.txt, found at the root 00008 // of the RDKit source tree. 00009 // 00010 00011 /*! \file AtomPairs.h 00012 00013 */ 00014 #ifndef __RD_ATOMPAIRS_H__ 00015 #define __RD_ATOMPAIRS_H__ 00016 00017 #include <DataStructs/SparseIntVect.h> 00018 #include <DataStructs/BitVects.h> 00019 #include <boost/cstdint.hpp> 00020 namespace RDKit { 00021 class Atom; 00022 00023 namespace AtomPairs { 00024 const std::string atomPairsVersion="1.1.0"; 00025 const unsigned int numTypeBits=4; 00026 const unsigned int atomNumberTypes[1<<numTypeBits]={5,6,7,8,9,14,15,16,17,33,34,35,51,52,43}; 00027 const unsigned int numPiBits=2; 00028 const unsigned int maxNumPi=(1<<numPiBits)-1; 00029 const unsigned int numBranchBits=3; 00030 const unsigned int maxNumBranches=(1<<numBranchBits)-1; 00031 const unsigned int codeSize=numTypeBits+numPiBits+numBranchBits; 00032 const unsigned int numPathBits=5; 00033 const unsigned int maxPathLen=(1<<numPathBits)-1; 00034 const unsigned int numAtomPairFingerprintBits=numPathBits+2*codeSize; 00035 00036 //! returns a numeric code for the atom (the atom's hash in the 00037 //! atom-pair scheme) 00038 /*! 00039 \param atom the atom to be considered 00040 \param branchSubtract (optional) a constant to subtract from 00041 the number of neighbors when the hash 00042 is calculated (used in the topological 00043 torsions code) 00044 */ 00045 boost::uint32_t getAtomCode(const Atom *atom,unsigned int branchSubtract=0); 00046 00047 //! returns an atom pair hash based on two atom hashes and the 00048 //! distance between the atoms. 00049 /*! 00050 \param codeI the hash for the first atom 00051 \param codeJ the hash for the second atom 00052 \param dist the distance (number of bonds) between the two 00053 atoms 00054 */ 00055 boost::uint32_t getAtomPairCode(boost::uint32_t codeI,boost::uint32_t codeJ, 00056 unsigned int dist); 00057 00058 //! returns the atom-pair fingerprint for a molecule 00059 /*! 00060 The algorithm used is described here: 00061 R.E. Carhart, D.H. Smith, R. Venkataraghavan; "Atom Pairs as 00062 Molecular Features in Structure-Activity Studies: Definition 00063 and Applications" JCICS 25, 64-73 (1985). 00064 00065 00066 \param mol: the molecule to be fingerprinted 00067 \param minLength: minimum distance between atoms to be 00068 considered in a pair. Default is 1 bond. 00069 \param maxLength: maximum distance between atoms to be 00070 considered in a pair. 00071 Default is maxPathLen-1 bonds. 00072 \param fromAtoms: if provided, only atom pairs that involve 00073 the specified atoms will be included in the 00074 fingerprint 00075 \param ignoreAtoms: if provided, any atom pairs that include 00076 the specified atoms will not be included in the 00077 fingerprint 00078 00079 \return a pointer to the fingerprint. The client is 00080 responsible for calling delete on this. 00081 00082 */ 00083 SparseIntVect<boost::int32_t> * 00084 getAtomPairFingerprint(const ROMol &mol, 00085 unsigned int minLength,unsigned int maxLength, 00086 const std::vector<boost::uint32_t> *fromAtoms=0, 00087 const std::vector<boost::uint32_t> *ignoreAtoms=0); 00088 //! \overload 00089 SparseIntVect<boost::int32_t> * 00090 getAtomPairFingerprint(const ROMol &mol, 00091 const std::vector<boost::uint32_t> *fromAtoms=0, 00092 const std::vector<boost::uint32_t> *ignoreAtoms=0); 00093 00094 00095 //! returns the hashed atom-pair fingerprint for a molecule 00096 /*! 00097 \param mol: the molecule to be fingerprinted 00098 \param nBits: the length of the fingerprint to generate 00099 \param minLength: minimum distance between atoms to be 00100 considered in a pair. Default is 1 bond. 00101 \param maxLength: maximum distance between atoms to be 00102 considered in a pair. 00103 Default is maxPathLen-1 bonds. 00104 \param fromAtoms: if provided, only atom pairs that involve 00105 the specified atoms will be included in the 00106 fingerprint 00107 \param ignoreAtoms: if provided, any atom pairs that include 00108 the specified atoms will not be included in the 00109 fingerprint 00110 00111 \return a pointer to the fingerprint. The client is 00112 responsible for calling delete on this. 00113 00114 */ 00115 SparseIntVect<boost::int32_t> * 00116 getHashedAtomPairFingerprint(const ROMol &mol, 00117 unsigned int nBits=2048, 00118 unsigned int minLength=1, 00119 unsigned int maxLength=maxPathLen-1, 00120 const std::vector<boost::uint32_t> *fromAtoms=0, 00121 const std::vector<boost::uint32_t> *ignoreAtoms=0); 00122 //! returns the hashed atom-pair fingerprint for a molecule as a bit vector 00123 /*! 00124 \param mol: the molecule to be fingerprinted 00125 \param nBits: the length of the fingerprint to generate 00126 \param minLength: minimum distance between atoms to be 00127 considered in a pair. Default is 1 bond. 00128 \param maxLength: maximum distance between atoms to be 00129 considered in a pair. 00130 Default is maxPathLen-1 bonds. 00131 \param fromAtoms: if provided, only atom pairs that involve 00132 the specified atoms will be included in the 00133 fingerprint 00134 \param ignoreAtoms: if provided, any atom pairs that include 00135 the specified atoms will not be included in the 00136 fingerprint 00137 \param nBitsPerEntry: number of bits to use in simulating counts 00138 00139 \return a pointer to the fingerprint. The client is 00140 responsible for calling delete on this. 00141 00142 */ 00143 ExplicitBitVect * 00144 getHashedAtomPairFingerprintAsBitVect(const ROMol &mol, 00145 unsigned int nBits=2048, 00146 unsigned int minLength=1, 00147 unsigned int maxLength=maxPathLen-1, 00148 const std::vector<boost::uint32_t> *fromAtoms=0, 00149 const std::vector<boost::uint32_t> *ignoreAtoms=0, 00150 unsigned int nBitsPerEntry=4); 00151 00152 00153 //! returns an topological torsion hash based on the atom hashes 00154 //! passed in 00155 /*! 00156 \param atomCodes the vector of atom hashes 00157 */ 00158 boost::uint64_t getTopologicalTorsionCode(const std::vector<boost::uint32_t> &atomCodes); 00159 00160 //! returns the topological-torsion fingerprint for a molecule 00161 /*! 00162 The algorithm used is described here: 00163 R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan; 00164 "Topological Torsion: A New Molecular Descriptor for SAR Applications. 00165 Comparison with Other Descriptors" JCICS 27, 82-85 (1987). 00166 00167 \param mol: the molecule to be fingerprinted 00168 \param targetSize: the number of atoms to include in the "torsions" 00169 \param fromAtoms: if provided, only torsions that start or end at 00170 the specified atoms will be included in the 00171 fingerprint 00172 \param ignoreAtoms: if provided, any torsions that include 00173 the specified atoms will not be included in the 00174 fingerprint 00175 00176 \return a pointer to the fingerprint. The client is 00177 responsible for calling delete on this. 00178 00179 */ 00180 SparseIntVect<boost::int64_t > * 00181 getTopologicalTorsionFingerprint(const ROMol &mol, 00182 unsigned int targetSize=4, 00183 const std::vector<boost::uint32_t> *fromAtoms=0, 00184 const std::vector<boost::uint32_t> *ignoreAtoms=0); 00185 //! returns a hashed topological-torsion fingerprint for a molecule 00186 /*! 00187 The algorithm used is described here: 00188 R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan; 00189 "Topological Torsion: A New Molecular Descriptor for SAR Applications. 00190 Comparison with Other Descriptors" JCICS 27, 82-85 (1987). 00191 00192 \param mol: the molecule to be fingerprinted 00193 \param nBits: number of bits to include in the fingerprint 00194 \param targetSize: the number of atoms to include in the "torsions" 00195 \param fromAtoms: if provided, only torsions that start or end at 00196 the specified atoms will be included in the 00197 fingerprint 00198 \param ignoreAtoms: if provided, any torsions that include 00199 the specified atoms will not be included in the 00200 fingerprint 00201 00202 \return a pointer to the fingerprint. The client is 00203 responsible for calling delete on this. 00204 00205 */ 00206 SparseIntVect<boost::int64_t > * 00207 getHashedTopologicalTorsionFingerprint(const ROMol &mol, 00208 unsigned int nBits=2048, 00209 unsigned int targetSize=4, 00210 const std::vector<boost::uint32_t> *fromAtoms=0, 00211 const std::vector<boost::uint32_t> *ignoreAtoms=0); 00212 //! returns a hashed topological-torsion fingerprint for a molecule as a bit vector 00213 /*! 00214 \param mol: the molecule to be fingerprinted 00215 \param nBits: number of bits to include in the fingerprint 00216 \param targetSize: the number of atoms to include in the "torsions" 00217 \param fromAtoms: if provided, only torsions that start or end at 00218 the specified atoms will be included in the 00219 fingerprint 00220 \param ignoreAtoms: if provided, any torsions that include 00221 the specified atoms will not be included in the 00222 fingerprint 00223 \param nBitsPerEntry: number of bits to use in simulating counts 00224 00225 \return a pointer to the fingerprint. The client is 00226 responsible for calling delete on this. 00227 00228 */ 00229 ExplicitBitVect * 00230 getHashedTopologicalTorsionFingerprintAsBitVect(const ROMol &mol, 00231 unsigned int nBits=2048, 00232 unsigned int targetSize=4, 00233 const std::vector<boost::uint32_t> *fromAtoms=0, 00234 const std::vector<boost::uint32_t> *ignoreAtoms=0, 00235 unsigned int nBitsPerEntry=4); 00236 } 00237 } 00238 00239 #endif
1.7.1