Embedder.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
00003 //
00004 //   @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 
00011 #ifndef _RD_EMBEDDER_H_
00012 #define _RD_EMBEDDER_H_
00013 
00014 #include <map>
00015 #include <Geometry/point.h>
00016 #include <GraphMol/ROMol.h>
00017 
00018 namespace RDKit {
00019   namespace DGeomHelpers {
00020     
00021     //! Compute an embedding (in 3D) for the specified molecule using Distance Geometry
00022     /*!
00023       The following operations are performed (in order) here:
00024        -# Build a distance bounds matrix based on the topology, including 1-5
00025           distances but not VDW scaling
00026        -# Triangle smooth this bounds matrix
00027        -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW
00028           scaling, and repeat step 2
00029        -# Pick a distance matrix at random using the bounds matrix
00030        -# Compute initial coordinates from the distance matrix
00031        -# Repeat steps 3 and 4 until maxIterations is reached or embedding is successful
00032        -# Adjust initial coordinates by minimizing a Distance Violation error function
00033 
00034        **NOTE**: if the molecule has multiple fragments, they will be embedded separately,
00035          this means that they will likely occupy the same region of space.
00036        
00037       \param mol            Molecule of interest
00038       \param maxIterations  Max. number of times the embedding will be tried if coordinates are 
00039                             not obtained successfully. The default value is 10x the number of atoms.
00040       \param seed           provides a seed for the random number generator (so that the same
00041                             coordinates can be obtained for a molecule on multiple runs)
00042                             If negative, the RNG will not be seeded.
00043       \param clearConfs     Clear all existing conformations on the molecule
00044       \param useRandomCoords  Start the embedding from random coordinates instead of
00045                               using eigenvalues of the distance matrix.
00046       \param boxSizeMult    Determines the size of the box that is used for
00047                             random coordinates. If this is a positive number, the 
00048                             side length will equal the largest element of the distance
00049                             matrix times \c boxSizeMult. If this is a negative number,
00050                             the side length will equal \c -boxSizeMult (i.e. independent
00051                             of the elements of the distance matrix).
00052       \param randNegEig     Picks coordinates at random when a embedding process produces
00053                             negative eigenvalues
00054       \param numZeroFail    Fail embedding if we find this many or more zero eigenvalues
00055                             (within a tolerance)
00056       \param coordMap  a map of int to Point3D, between atom IDs and their locations
00057                        their locations.  If this container is provided, the coordinates
00058                        are used to set distance constraints on the embedding. The resulting
00059                        conformer(s) should have distances between the specified atoms that
00060                        reproduce those between the points in \c coordMap. Because the embedding
00061                        produces a molecule in an arbitrary reference frame, an alignment step
00062                        is required to actually reproduce the provided coordinates.
00063       \param optimizerForceTol set the tolerance on forces in the distgeom optimizer
00064                                (this shouldn't normally be altered in client code).
00065       \param basinThresh    set the basin threshold for the DGeom force field,
00066                             (this shouldn't normally be altered in client code).
00067 
00068 
00069       \return ID of the conformations added to the molecule, -1 if the emdedding failed
00070     */
00071     int EmbedMolecule(ROMol &mol, unsigned int maxIterations=0, int seed=-1,
00072                       bool clearConfs=true,
00073                       bool useRandomCoords=false,double boxSizeMult=2.0,
00074                       bool randNegEig=true,
00075                       unsigned int numZeroFail=1,
00076                       const std::map<int,RDGeom::Point3D> *coordMap=0,
00077                       double optimizerForceTol=1e-3,
00078                       double basinThresh=5.0);
00079 
00080     //*! Embed multiple conformations for a molecule
00081     /*!
00082       This is kind of equivalent to calling EmbedMolecule multiple times - just that the bounds
00083       matrix is computed only once from the topology
00084 
00085        **NOTE**: if the molecule has multiple fragments, they will be embedded separately,
00086          this means that they will likely occupy the same region of space.
00087 
00088 
00089       \param mol            Molecule of interest
00090       \param numConfs       Number of conformations to be generated
00091       \param maxIterations  Max. number of times the embedding will be tried if coordinates are 
00092                             not obtained successfully. The default value is 10x the number of atoms.
00093       \param seed           provides a seed for the random number generator (so that the same
00094                             coordinates can be obtained for a molecule on multiple runs).
00095                             If negative, the RNG will not be seeded.
00096       \param clearConfs     Clear all existing conformations on the molecule
00097       \param useRandomCoords  Start the embedding from random coordinates instead of
00098                               using eigenvalues of the distance matrix.
00099       \param boxSizeMult    Determines the size of the box that is used for
00100                             random coordinates. If this is a positive number, the 
00101                             side length will equal the largest element of the distance
00102                             matrix times \c boxSizeMult. If this is a negative number,
00103                             the side length will equal \c -boxSizeMult (i.e. independent
00104                             of the elements of the distance matrix).
00105       \param randNegEig     Picks coordinates at random when a embedding process produces
00106                             negative eigenvalues
00107       \param numZeroFail    Fail embedding if we find this many or more zero eigenvalues
00108                             (within a tolerance)
00109       \param pruneRmsThresh Retain only the conformations out of 'numConfs' after embedding that are
00110                             at least this far apart from each other. RMSD is computed on the heavy atoms.
00111                             Prunining is greedy; i.e. the first embedded conformation is retained and from
00112                             then on only those that are atleast pruneRmsThresh away from already 
00113                             retained conformations are kept. The pruning is done after embedding and 
00114                             bounds violation minimization. No pruning by default.
00115       \param coordMap  a map of int to Point3D, between atom IDs and their locations
00116                        their locations.  If this container is provided, the coordinates
00117                        are used to set distance constraints on the embedding. The resulting
00118                        conformer(s) should have distances between the specified atoms that
00119                        reproduce those between the points in \c coordMap. Because the embedding
00120                        produces a molecule in an arbitrary reference frame, an alignment step
00121                        is required to actually reproduce the provided coordinates.
00122 
00123       \param optimizerForceTol set the tolerance on forces in the DGeom optimizer
00124                                (this shouldn't normally be altered in client code).
00125       \param basinThresh    set the basin threshold for the DGeom force field,
00126                             (this shouldn't normally be altered in client code).
00127 
00128       \return an INT_VECT of conformer ids
00129 
00130     */
00131     INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs=10,
00132                                 unsigned int maxIterations=30, 
00133                                 int seed=-1, bool clearConfs=true, 
00134                                 bool useRandomCoords=false,double boxSizeMult=2.0,
00135                                 bool randNegEig=true, unsigned int numZeroFail=1,
00136                                 double pruneRmsThresh=-1.0,
00137                                 const std::map<int,RDGeom::Point3D> *coordMap=0,
00138                                 double optimizerForceTol=1e-3,double basinThresh=5.0);
00139 
00140   }
00141 }
00142 
00143 #endif