00001 // 00002 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC 00003 // 00004 // @@ All Rights Reserved @@ 00005 // This file is part of the RDKit. 00006 // The contents are covered by the terms of the BSD license 00007 // which is included in the file license.txt, found at the root 00008 // of the RDKit source tree. 00009 // 00010 00011 #ifndef _RD_EMBEDDER_H_ 00012 #define _RD_EMBEDDER_H_ 00013 00014 #include <map> 00015 #include <Geometry/point.h> 00016 #include <GraphMol/ROMol.h> 00017 00018 namespace RDKit { 00019 namespace DGeomHelpers { 00020 00021 //! Compute an embedding (in 3D) for the specified molecule using Distance Geometry 00022 /*! 00023 The following operations are performed (in order) here: 00024 -# Build a distance bounds matrix based on the topology, including 1-5 00025 distances but not VDW scaling 00026 -# Triangle smooth this bounds matrix 00027 -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW 00028 scaling, and repeat step 2 00029 -# Pick a distance matrix at random using the bounds matrix 00030 -# Compute initial coordinates from the distance matrix 00031 -# Repeat steps 3 and 4 until maxIterations is reached or embedding is successful 00032 -# Adjust initial coordinates by minimizing a Distance Violation error function 00033 00034 **NOTE**: if the molecule has multiple fragments, they will be embedded separately, 00035 this means that they will likely occupy the same region of space. 00036 00037 \param mol Molecule of interest 00038 \param maxIterations Max. number of times the embedding will be tried if coordinates are 00039 not obtained successfully. The default value is 10x the number of atoms. 00040 \param seed provides a seed for the random number generator (so that the same 00041 coordinates can be obtained for a molecule on multiple runs) 00042 If negative, the RNG will not be seeded. 00043 \param clearConfs Clear all existing conformations on the molecule 00044 \param useRandomCoords Start the embedding from random coordinates instead of 00045 using eigenvalues of the distance matrix. 00046 \param boxSizeMult Determines the size of the box that is used for 00047 random coordinates. If this is a positive number, the 00048 side length will equal the largest element of the distance 00049 matrix times \c boxSizeMult. If this is a negative number, 00050 the side length will equal \c -boxSizeMult (i.e. independent 00051 of the elements of the distance matrix). 00052 \param randNegEig Picks coordinates at random when a embedding process produces 00053 negative eigenvalues 00054 \param numZeroFail Fail embedding if we find this many or more zero eigenvalues 00055 (within a tolerance) 00056 \param coordMap a map of int to Point3D, between atom IDs and their locations 00057 their locations. If this container is provided, the coordinates 00058 are used to set distance constraints on the embedding. The resulting 00059 conformer(s) should have distances between the specified atoms that 00060 reproduce those between the points in \c coordMap. Because the embedding 00061 produces a molecule in an arbitrary reference frame, an alignment step 00062 is required to actually reproduce the provided coordinates. 00063 \param optimizerForceTol set the tolerance on forces in the distgeom optimizer 00064 (this shouldn't normally be altered in client code). 00065 \param basinThresh set the basin threshold for the DGeom force field, 00066 (this shouldn't normally be altered in client code). 00067 00068 00069 \return ID of the conformations added to the molecule, -1 if the emdedding failed 00070 */ 00071 int EmbedMolecule(ROMol &mol, unsigned int maxIterations=0, int seed=-1, 00072 bool clearConfs=true, 00073 bool useRandomCoords=false,double boxSizeMult=2.0, 00074 bool randNegEig=true, 00075 unsigned int numZeroFail=1, 00076 const std::map<int,RDGeom::Point3D> *coordMap=0, 00077 double optimizerForceTol=1e-3, 00078 double basinThresh=5.0); 00079 00080 //*! Embed multiple conformations for a molecule 00081 /*! 00082 This is kind of equivalent to calling EmbedMolecule multiple times - just that the bounds 00083 matrix is computed only once from the topology 00084 00085 **NOTE**: if the molecule has multiple fragments, they will be embedded separately, 00086 this means that they will likely occupy the same region of space. 00087 00088 00089 \param mol Molecule of interest 00090 \param numConfs Number of conformations to be generated 00091 \param maxIterations Max. number of times the embedding will be tried if coordinates are 00092 not obtained successfully. The default value is 10x the number of atoms. 00093 \param seed provides a seed for the random number generator (so that the same 00094 coordinates can be obtained for a molecule on multiple runs). 00095 If negative, the RNG will not be seeded. 00096 \param clearConfs Clear all existing conformations on the molecule 00097 \param useRandomCoords Start the embedding from random coordinates instead of 00098 using eigenvalues of the distance matrix. 00099 \param boxSizeMult Determines the size of the box that is used for 00100 random coordinates. If this is a positive number, the 00101 side length will equal the largest element of the distance 00102 matrix times \c boxSizeMult. If this is a negative number, 00103 the side length will equal \c -boxSizeMult (i.e. independent 00104 of the elements of the distance matrix). 00105 \param randNegEig Picks coordinates at random when a embedding process produces 00106 negative eigenvalues 00107 \param numZeroFail Fail embedding if we find this many or more zero eigenvalues 00108 (within a tolerance) 00109 \param pruneRmsThresh Retain only the conformations out of 'numConfs' after embedding that are 00110 at least this far apart from each other. RMSD is computed on the heavy atoms. 00111 Prunining is greedy; i.e. the first embedded conformation is retained and from 00112 then on only those that are atleast pruneRmsThresh away from already 00113 retained conformations are kept. The pruning is done after embedding and 00114 bounds violation minimization. No pruning by default. 00115 \param coordMap a map of int to Point3D, between atom IDs and their locations 00116 their locations. If this container is provided, the coordinates 00117 are used to set distance constraints on the embedding. The resulting 00118 conformer(s) should have distances between the specified atoms that 00119 reproduce those between the points in \c coordMap. Because the embedding 00120 produces a molecule in an arbitrary reference frame, an alignment step 00121 is required to actually reproduce the provided coordinates. 00122 00123 \param optimizerForceTol set the tolerance on forces in the DGeom optimizer 00124 (this shouldn't normally be altered in client code). 00125 \param basinThresh set the basin threshold for the DGeom force field, 00126 (this shouldn't normally be altered in client code). 00127 00128 \return an INT_VECT of conformer ids 00129 00130 */ 00131 INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs=10, 00132 unsigned int maxIterations=30, 00133 int seed=-1, bool clearConfs=true, 00134 bool useRandomCoords=false,double boxSizeMult=2.0, 00135 bool randNegEig=true, unsigned int numZeroFail=1, 00136 double pruneRmsThresh=-1.0, 00137 const std::map<int,RDGeom::Point3D> *coordMap=0, 00138 double optimizerForceTol=1e-3,double basinThresh=5.0); 00139 00140 } 00141 } 00142 00143 #endif
1.7.1