Fingerprints.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2003-2010 Greg Landrum and Rational Discovery LLC
00003 //
00004 //   @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 #ifndef _RD_FINGERPRINTS_H_
00011 #define _RD_FINGERPRINTS_H_
00012 
00013 class ExplicitBitVect;
00014 namespace RDKit{
00015   class ROMol;
00016 
00017   //! \brief Generates a topological (Daylight like) fingerprint for a molecule
00018   //!        using an alternate (faster) hashing algorithm  
00019   /*!
00020 
00021     \param mol:          the molecule to be fingerprinted
00022     \param minPath:      the minimum path length (in bonds) to be included
00023     \param maxPath:      the minimum path length (in bonds) to be included
00024     \param fpSize:       the size of the fingerprint
00025     \param nBitsPerHash: the number of bits to be set by each path
00026     \param useHs:        toggles inclusion of Hs in paths (if the molecule has explicit Hs)
00027     \param tgtDensity:   if the generated fingerprint is below this density, it will
00028                          be folded until the density is reached.
00029     \param minSize:      the minimum size to which the fingerprint will be
00030                          folded
00031     \param branchedPaths: toggles generation of branched subgraphs, not just linear paths
00032 
00033     \return the molecular fingerprint, as an ExplicitBitVect
00034 
00035     <b>Notes:</b>
00036       - the caller is responsible for <tt>delete</tt>ing the result
00037     
00038   */
00039   ExplicitBitVect *RDKFingerprintMol(const ROMol &mol,
00040                                      unsigned int minPath=1,
00041                                      unsigned int maxPath=7,
00042                                      unsigned int fpSize=2048,
00043                                      unsigned int nBitsPerHash=2,
00044                                      bool useHs=true,
00045                                      double tgtDensity=0.0,
00046                                      unsigned int minSize=128,
00047                                      bool branchedPaths=true);
00048   const std::string RDKFingerprintMolVersion="1.0.0";
00049 
00050 
00051   //! \brief Generates a topological (Daylight like) fingerprint for a molecule
00052   //!        using a layer-based hashing algorithm  
00053   /*!
00054 
00055     <b>Experimental:</b> This function is experimental. The API or results may change from
00056     release to release.
00057     
00058     \param mol:          the molecule to be fingerprinted
00059     \param layerFlags:   the layers to be included (see below)
00060     \param minPath:      the minimum path length (in bonds) to be included
00061     \param maxPath:      the minimum path length (in bonds) to be included
00062     \param fpSize:       the size of the fingerprint
00063     \param tgtDensity:   if the generated fingerprint is below this density, it will
00064                          be folded until the density is reached.
00065     \param minSize:      the minimum size to which the fingerprint will be
00066                          folded
00067     \param atomCounts:   if provided, this will be used to provide the count of the number
00068                          of paths that set bits each atom is involved in. The vector should
00069                          have at least as many entries as the molecule has atoms and is not
00070                          zeroed out here.
00071     \param setOnlyBits:  if provided, only bits that are set in this bit vector will be set
00072                          in the result. This is essentially the same as doing:
00073                             (*res) &= (*setOnlyBits);
00074                          but also has an impact on the atomCounts (if being used)
00075     \param branchedPaths: toggles generation of branched subgraphs, not just linear paths
00076 
00077     \return the molecular fingerprint, as an ExplicitBitVect
00078 
00079     <b>Notes:</b>
00080       - the caller is responsible for <tt>delete</tt>ing the result
00081 
00082     <b>Layer definitions:</b>
00083        - 0x01: pure topology
00084        - 0x02: bond order
00085        - 0x04: atom types
00086        - 0x08: presence of rings
00087        - 0x10: ring sizes
00088        - 0x20: aromaticity
00089   */
00090   ExplicitBitVect *LayeredFingerprintMol(const ROMol &mol,
00091                                          unsigned int layerFlags=0xFFFFFFFF,
00092                                          unsigned int minPath=1,unsigned int maxPath=7,
00093                                          unsigned int fpSize=2048,
00094                                          double tgtDensity=0.0,unsigned int minSize=128,
00095                                          std::vector<unsigned int> *atomCounts=0,
00096                                          ExplicitBitVect *setOnlyBits=0,
00097                                          bool branchedPaths=true);
00098   const unsigned int maxFingerprintLayers=10;
00099   const std::string LayeredFingerprintMolVersion="0.5.0";
00100   const unsigned int substructLayers=0x07; 
00101 
00102   //! \brief Generates a topological fingerprint for a molecule
00103   //!        using a series of pre-defined structural patterns
00104   /*!
00105 
00106     <b>Experimental:</b> This function is experimental. The API or results may change from
00107     release to release.
00108     
00109     \param mol:          the molecule to be fingerprinted
00110     \param layerFlags:   the layers to be included [not used in this release] (see below)
00111     \param minPath:      the minimum path length (in bonds) to be included
00112     \param maxPath:      the minimum path length (in bonds) to be included
00113     \param fpSize:       the size of the fingerprint
00114     \param atomCounts:   if provided, this will be used to provide the count of the number
00115                          of paths that set bits each atom is involved in. The vector should
00116                          have at least as many entries as the molecule has atoms and is not
00117                          zeroed out here.
00118     \param setOnlyBits:  if provided, only bits that are set in this bit vector will be set
00119                          in the result. This is essentially the same as doing:
00120                             (*res) &= (*setOnlyBits);
00121                          but also has an impact on the atomCounts (if being used)
00122     \param branchedPaths: toggles generation of branched subgraphs, not just linear paths
00123 
00124     \return the molecular fingerprint, as an ExplicitBitVect
00125 
00126     <b>Notes:</b>
00127       - the caller is responsible for <tt>delete</tt>ing the result
00128 
00129     <b>Layer definitions:</b>
00130        - 0x01: pure topology
00131        - 0x02: bond order
00132        - 0x04: atom types
00133        - 0x08: presence of rings
00134        - 0x10: ring sizes
00135        - 0x20: aromaticity
00136   */
00137   ExplicitBitVect *LayeredFingerprintMol2(const ROMol &mol,
00138                                          unsigned int layerFlags=0xFFFFFFFF,
00139                                          unsigned int minPath=1,unsigned int maxPath=7,
00140                                          unsigned int fpSize=2048,
00141                                          std::vector<unsigned int> *atomCounts=0,
00142                                          ExplicitBitVect *setOnlyBits=0,
00143                                          bool branchedPaths=true);
00144 
00145 
00146 }
00147 
00148 #endif