RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Embedder.h
Go to the documentation of this file.
1//
2// Copyright (C) 2004-2025 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_EMBEDDER_H_GUARD
13#define RD_EMBEDDER_H_GUARD
14
15#include <map>
16#include <utility>
17#include <Geometry/point.h>
18#include <GraphMol/ROMol.h>
19#include <boost/shared_ptr.hpp>
21
22namespace RDKit {
23namespace DGeomHelpers {
24
40
41//! Parameter object for controlling embedding
42/*!
43 numConfs Number of conformations to be generated
44 numThreads Sets the number of threads to use (more than one thread
45 will only be used if the RDKit was build with multithread
46 support) If set to zero, the max supported by the system will
47 be used.
48 maxIterations Max. number of times the embedding will be tried if
49 coordinates are not obtained successfully. The default
50 value is 10x the number of atoms.
51 randomSeed provides a seed for the random number generator (so that
52 the same coordinates can be obtained for a
53 molecule on multiple runs) If -1, the
54 RNG will not be seeded.
55 clearConfs Clear all existing conformations on the molecule
56 useRandomCoords Start the embedding from random coordinates instead of
57 using eigenvalues of the distance matrix.
58 boxSizeMult Determines the size of the box that is used for
59 random coordinates. If this is a positive number, the
60 side length will equal the largest element of the distance
61 matrix times \c boxSizeMult. If this is a negative number,
62 the side length will equal \c -boxSizeMult (i.e. independent
63 of the elements of the distance matrix).
64 randNegEig Picks coordinates at random when a embedding process produces
65 negative eigenvalues
66 numZeroFail Fail embedding if we find this many or more zero eigenvalues
67 (within a tolerance)
68 pruneRmsThresh Retain only the conformations out of 'numConfs' after
69 embedding that are at least this far apart from each other.
70 RMSD is computed on the heavy atoms.
71 Prunining is greedy; i.e. the first embedded conformation is
72 retained and from then on only those that are at least
73 \c pruneRmsThresh away from already
74 retained conformations are kept. The pruning is done
75 after embedding and bounds violation minimization.
76 No pruning by default.
77 coordMap a map of int to Point3D, between atom IDs and their locations
78 their locations. If this container is provided, the
79 coordinates are used to set distance constraints on the
80 embedding. The resulting conformer(s) should have distances
81 between the specified atoms that reproduce those between the
82 points in \c coordMap. Because the embedding produces a
83 molecule in an arbitrary reference frame, an alignment step
84 is required to actually reproduce the provided coordinates.
85 optimizerForceTol set the tolerance on forces in the DGeom optimizer
86 (this shouldn't normally be altered in client code).
87 ignoreSmoothingFailures try to embed the molecule even if triangle bounds
88 smoothing fails
89 enforceChirality enforce the correct chirality if chiral centers are present
90 useExpTorsionAnglePrefs impose experimental torsion-angle preferences
91 useBasicKnowledge impose "basic knowledge" terms such as flat
92 aromatic rings, ketones, etc.
93 ETversion version of the experimental torsion-angle preferences
94 verbose print output of experimental torsion-angle preferences
95 basinThresh set the basin threshold for the DGeom force field,
96 (this shouldn't normally be altered in client code).
97 onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
98 boundsMat custom bound matrix to specify upper and lower bounds of atom
99 pairs
100 embedFragmentsSeparately embed each fragment of molecule in turn
101 useSmallRingTorsions optional torsions to improve small ring conformer
102 sampling
103 useMacrocycleTorsions optional torsions to improve macrocycle conformer
104 sampling
105 useMacrocycle14config If 1-4 distances bound heuristics for
106 macrocycles is used
107 timeout time out in seconds
108 CPCI custom columbic interactions between atom pairs
109 callback void pointer to a function for reporting progress,
110 will be called with the current iteration number.
111 forceTransAmides constrain amide bonds to be trans.
112 useSymmetryForPruning use molecule symmetry when doing the RMSD pruning.
113 NOTE that for reasons of computational efficiency,
114 setting this will also set onlyHeavyAtomsForRMS to
115 true.
116 trackFailures keep track of which checks during the embedding process fail
117 failures if trackFailures is true, this is used to track the number
118 of times each embedding check fails
119 enableSequentialRandomSeeds handle the random number seeds so that
120 conformer generation can be restarted
121*/
123 unsigned int maxIterations{0};
125 int randomSeed{-1};
126 bool clearConfs{true};
127 bool useRandomCoords{false};
128 double boxSizeMult{2.0};
129 bool randNegEig{true};
130 unsigned int numZeroFail{1};
131 const std::map<int, RDGeom::Point3D> *coordMap{nullptr};
132 double optimizerForceTol{1e-3};
136 bool useBasicKnowledge{false};
137 bool verbose{false};
138 double basinThresh{5.0};
139 double pruneRmsThresh{-1.0};
141 unsigned int ETversion{2};
142 boost::shared_ptr<const DistGeom::BoundsMatrix> boundsMat;
147 unsigned int timeout{0};
148 std::shared_ptr<std::map<std::pair<unsigned int, unsigned int>, double>> CPCI;
149 void (*callback)(unsigned int);
153 bool trackFailures{false};
154 std::vector<unsigned int> failures;
157
158 EmbedParameters() : boundsMat(nullptr), CPCI(nullptr), callback(nullptr) {}
160 unsigned int maxIterations, int numThreads, int randomSeed,
161 bool clearConfs, bool useRandomCoords, double boxSizeMult,
162 bool randNegEig, unsigned int numZeroFail,
163 const std::map<int, RDGeom::Point3D> *coordMap, double optimizerForceTol,
167 unsigned int ETversion = 2,
168 const DistGeom::BoundsMatrix *boundsMat = nullptr,
169 bool embedFragmentsSeparately = true, bool useSmallRingTorsions = false,
170 bool useMacrocycleTorsions = false, bool useMacrocycle14config = false,
171 unsigned int timeout = 0,
172 std::shared_ptr<std::map<std::pair<unsigned int, unsigned int>, double>>
173 CPCI = nullptr,
174 void (*callback)(unsigned int) = nullptr)
200 CPCI(std::move(CPCI)),
202};
203
204//! update parameters from a JSON string
206 EmbedParameters &params, const std::string &json);
207
208//! export parameters to JSON string
210 const EmbedParameters &params);
211
212//! Embed multiple conformations for a molecule
214 unsigned int numConfs,
215 EmbedParameters &params);
216inline INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs,
217 EmbedParameters &params) {
218 INT_VECT res;
219 EmbedMultipleConfs(mol, res, numConfs, params);
220 return res;
221}
222
223//! Compute an embedding (in 3D) for the specified molecule using Distance
224/// Geometry
225inline int EmbedMolecule(ROMol &mol, EmbedParameters &params) {
226 INT_VECT confIds;
227 EmbedMultipleConfs(mol, confIds, 1, params);
228
229 int res;
230 if (confIds.size()) {
231 res = confIds[0];
232 } else {
233 res = -1;
234 }
235 return res;
236}
237
238//! Compute an embedding (in 3D) for the specified molecule using Distance
239/// Geometry
240/*!
241 The following operations are performed (in order) here:
242 -# Build a distance bounds matrix based on the topology, including 1-5
243 distances but not VDW scaling
244 -# Triangle smooth this bounds matrix
245 -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW
246 scaling, and repeat step 2
247 -# Pick a distance matrix at random using the bounds matrix
248 -# Compute initial coordinates from the distance matrix
249 -# Repeat steps 3 and 4 until maxIterations is reached or embedding is
250 successful
251 -# Adjust initial coordinates by minimizing a Distance Violation error
252 function
253 **NOTE**: if the molecule has multiple fragments, they will be embedded
254 separately,
255 this means that they will likely occupy the same region of space.
256 \param mol Molecule of interest
257 \param maxIterations Max. number of times the embedding will be tried if
258 coordinates are not obtained successfully. The default
259 value is 10x the number of atoms.
260 \param seed provides a seed for the random number generator (so that
261 the same coordinates can be obtained for a molecule on
262 multiple runs). If negative, the RNG will not be seeded.
263 \param clearConfs Clear all existing conformations on the molecule
264 \param useRandomCoords Start the embedding from random coordinates instead of
265 using eigenvalues of the distance matrix.
266 \param boxSizeMult Determines the size of the box that is used for
267 random coordinates. If this is a positive number, the
268 side length will equal the largest element of the
269 distance matrix times \c boxSizeMult. If this is a
270 negative number, the side length will equal
271 \c -boxSizeMult (i.e. independent of the elements of the
272 distance matrix).
273 \param randNegEig Picks coordinates at random when a embedding process
274 produces negative eigenvalues
275 \param numZeroFail Fail embedding if we find this many or more zero
276 eigenvalues (within a tolerance)
277 \param coordMap a map of int to Point3D, between atom IDs and their locations
278 their locations. If this container is provided, the
279 coordinates are used to set distance constraints on the
280 embedding. The resulting conformer(s) should have distances
281 between the specified atoms that reproduce those between the
282 points in \c coordMap. Because the embedding produces a
283 molecule in an arbitrary reference frame, an alignment step
284 is required to actually reproduce the provided coordinates.
285 \param optimizerForceTol set the tolerance on forces in the distgeom optimizer
286 (this shouldn't normally be altered in client code).
287 \param ignoreSmoothingFailures try to embed the molecule even if triangle
288 bounds smoothing fails
289 \param enforceChirality enforce the correct chirality if chiral centers are
290 present
291 \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
292 \param useBasicKnowledge impose "basic knowledge" terms such as flat
293 aromatic rings, ketones, etc.
294 \param verbose print output of experimental torsion-angle preferences
295 \param basinThresh set the basin threshold for the DGeom force field,
296 (this shouldn't normally be altered in client code).
297 \param onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
298 \param ETversion version of torsion preferences to use
299 \param useSmallRingTorsions optional torsions to improve small ring
300 conformer sampling
301 \param useMacrocycleTorsions optional torsions to improve macrocycle
302 conformer sampling
303 \param useMacrocycle14config If 1-4 distances bound heuristics for
304 macrocycles is used
305
306 \return ID of the conformer added to the molecule, -1 if the emdedding failed
307*/
308inline int EmbedMolecule(
309 ROMol &mol, unsigned int maxIterations = 0, int seed = -1,
310 bool clearConfs = true, bool useRandomCoords = false,
311 double boxSizeMult = 2.0, bool randNegEig = true,
312 unsigned int numZeroFail = 1,
313 const std::map<int, RDGeom::Point3D> *coordMap = nullptr,
314 double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
315 bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
316 bool useBasicKnowledge = false, bool verbose = false,
317 double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false,
318 unsigned int ETversion = 2, bool useSmallRingTorsions = false,
319 bool useMacrocycleTorsions = true, bool useMacrocycle14config = true) {
320 EmbedParameters params(
321 maxIterations, 1, seed, clearConfs, useRandomCoords, boxSizeMult,
322 randNegEig, numZeroFail, coordMap, optimizerForceTol,
323 ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs,
324 useBasicKnowledge, verbose, basinThresh, -1.0, onlyHeavyAtomsForRMS,
325 ETversion, nullptr, true, useSmallRingTorsions, useMacrocycleTorsions,
326 useMacrocycle14config);
327 return EmbedMolecule(mol, params);
328};
329
330//*! Embed multiple conformations for a molecule
331/*!
332 This is kind of equivalent to calling EmbedMolecule multiple times - just that
333 the bounds
334 matrix is computed only once from the topology
335 **NOTE**: if the molecule has multiple fragments, they will be embedded
336 separately,
337 this means that they will likely occupy the same region of space.
338 \param mol Molecule of interest
339 \param res Used to return the resulting conformer ids
340 \param numConfs Number of conformations to be generated
341 \param numThreads Sets the number of threads to use (more than one thread
342 will only be used if the RDKit was built with
343 multithread
344 support). If set to zero, the max supported by the
345 system
346 will be used.
347 \param maxIterations Max. number of times the embedding will be tried if
348 coordinates are not obtained successfully. The default
349 value is 10x the number of atoms.
350 \param seed provides a seed for the random number generator (so that
351 the same coordinates can be obtained for a molecule on
352 multiple runs). If negative, the RNG will not be seeded.
353 \param clearConfs Clear all existing conformations on the molecule
354 \param useRandomCoords Start the embedding from random coordinates instead of
355 using eigenvalues of the distance matrix.
356 \param boxSizeMult Determines the size of the box that is used for
357 random coordinates. If this is a positive number, the
358 side length will equal the largest element of the
359 distance matrix times \c boxSizeMult. If this is a
360 negative number, the side length will equal
361 \c -boxSizeMult (i.e. independent of the elements of the
362 distance matrix).
363 \param randNegEig Picks coordinates at random when a embedding process
364 produces negative eigenvalues
365 \param numZeroFail Fail embedding if we find this many or more zero
366 eigenvalues (within a tolerance)
367 \param pruneRmsThresh Retain only the conformations out of 'numConfs' after
368 embedding that are at least this far apart from each
369 other. RMSD is computed on the heavy atoms.
370 Pruning is greedy; i.e. the first embedded conformation
371 is retained and from then on only those that are at
372 least
373 pruneRmsThresh away from already retained conformations
374 are kept. The pruning is done after embedding and
375 bounds violation minimization. No pruning by default.
376 \param coordMap a map of int to Point3D, between atom IDs and their locations
377 their locations. If this container is provided, the
378 coordinates are used to set distance constraints on the
379 embedding. The resulting conformer(s) should have distances
380 between the specified atoms that reproduce those between the
381 points in \c coordMap. Because the embedding produces a
382 molecule in an arbitrary reference frame, an alignment step
383 is required to actually reproduce the provided coordinates.
384 \param optimizerForceTol set the tolerance on forces in the DGeom optimizer
385 (this shouldn't normally be altered in client code).
386 \param ignoreSmoothingFailures try to embed the molecule even if triangle
387 bounds smoothing fails
388 \param enforceChirality enforce the correct chirality if chiral centers are
389 present
390 \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
391 \param useBasicKnowledge impose "basic knowledge" terms such as flat
392 aromatic rings, ketones, etc.
393 \param verbose print output of experimental torsion-angle preferences
394 \param basinThresh set the basin threshold for the DGeom force field,
395 (this shouldn't normally be altered in client code).
396 \param onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
397 \param ETversion version of torsion preferences to use
398 \param useSmallRingTorsions optional torsions to improve small ring
399 conformer sampling
400 \param useMacrocycleTorsions optional torsions to improve macrocycle
401 conformer sampling
402 \param useMacrocycle14config If 1-4 distances bound heuristics for
403 macrocycles is used
404
405*/
407 ROMol &mol, INT_VECT &res, unsigned int numConfs = 10, int numThreads = 1,
408 unsigned int maxIterations = 30, int seed = -1, bool clearConfs = true,
409 bool useRandomCoords = false, double boxSizeMult = 2.0,
410 bool randNegEig = true, unsigned int numZeroFail = 1,
411 double pruneRmsThresh = -1.0,
412 const std::map<int, RDGeom::Point3D> *coordMap = nullptr,
413 double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
414 bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
415 bool useBasicKnowledge = false, bool verbose = false,
416 double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false,
417 unsigned int ETversion = 2, bool useSmallRingTorsions = false,
418 bool useMacrocycleTorsions = true, bool useMacrocycle14config = true,
419 unsigned int timeout = 0) {
420 EmbedParameters params(
421 maxIterations, numThreads, seed, clearConfs, useRandomCoords, boxSizeMult,
422 randNegEig, numZeroFail, coordMap, optimizerForceTol,
423 ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs,
424 useBasicKnowledge, verbose, basinThresh, pruneRmsThresh,
425 onlyHeavyAtomsForRMS, ETversion, nullptr, true, useSmallRingTorsions,
426 useMacrocycleTorsions, useMacrocycle14config, timeout);
427 EmbedMultipleConfs(mol, res, numConfs, params);
428};
429//! \overload
431 ROMol &mol, unsigned int numConfs = 10, unsigned int maxIterations = 30,
432 int seed = -1, bool clearConfs = true, bool useRandomCoords = false,
433 double boxSizeMult = 2.0, bool randNegEig = true,
434 unsigned int numZeroFail = 1, double pruneRmsThresh = -1.0,
435 const std::map<int, RDGeom::Point3D> *coordMap = nullptr,
436 double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
437 bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
438 bool useBasicKnowledge = false, bool verbose = false,
439 double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false,
440 unsigned int ETversion = 2, bool useSmallRingTorsions = false,
441 bool useMacrocycleTorsions = false, bool useMacrocycle14config = false,
442 unsigned int timeout = 0) {
443 EmbedParameters params(
444 maxIterations, 1, seed, clearConfs, useRandomCoords, boxSizeMult,
445 randNegEig, numZeroFail, coordMap, optimizerForceTol,
446 ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs,
447 useBasicKnowledge, verbose, basinThresh, pruneRmsThresh,
448 onlyHeavyAtomsForRMS, ETversion, nullptr, true, useSmallRingTorsions,
449 useMacrocycleTorsions, useMacrocycle14config, timeout);
450 INT_VECT res;
451 EmbedMultipleConfs(mol, res, numConfs, params);
452 return res;
453};
454
455//! Parameters corresponding to Sereina Riniker's KDG approach
456RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters KDG;
457//! Parameters corresponding to Sereina Riniker's ETDG approach
458RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETDG;
459//! Parameters corresponding to Sereina Riniker's ETDG approach - version 2
460RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETDGv2;
461//! Parameters corresponding to Sereina Riniker's ETKDG approach
462RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETKDG;
463//! Parameters corresponding to Sereina Riniker's ETKDG approach - version 2
464RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETKDGv2;
465//! Parameters corresponding improved ETKDG by Wang, Witek, Landrum and Riniker
466//! (10.1021/acs.jcim.0c00025) - the macrocycle part
467RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETKDGv3;
468//! Parameters corresponding improved ETKDG by Wang, Witek, Landrum and Riniker
469//! (10.1021/acs.jcim.0c00025) - the small ring part
470RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters srETKDGv3;
471} // namespace DGeomHelpers
472} // namespace RDKit
473
474#endif
Defines the primary molecule class ROMol as well as associated typedefs.
Class to store the distance bound.
#define RDKIT_DISTGEOMHELPERS_EXPORT
Definition export.h:129
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETKDGv2
Parameters corresponding to Sereina Riniker's ETKDG approach - version 2.
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETDG
Parameters corresponding to Sereina Riniker's ETDG approach.
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETKDGv3
RDKIT_DISTGEOMHELPERS_EXPORT void updateEmbedParametersFromJSON(EmbedParameters &params, const std::string &json)
update parameters from a JSON string
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETKDG
Parameters corresponding to Sereina Riniker's ETKDG approach.
RDKIT_DISTGEOMHELPERS_EXPORT void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs, EmbedParameters &params)
Embed multiple conformations for a molecule.
RDKIT_DISTGEOMHELPERS_EXPORT std::string embedParametersToJSON(const EmbedParameters &params)
export parameters to JSON string
int EmbedMolecule(ROMol &mol, EmbedParameters &params)
Definition Embedder.h:225
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters srETKDGv3
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters KDG
Parameters corresponding to Sereina Riniker's KDG approach.
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETDGv2
Parameters corresponding to Sereina Riniker's ETDG approach - version 2.
Std stuff.
std::vector< int > INT_VECT
Definition types.h:303
Parameter object for controlling embedding.
Definition Embedder.h:122
void(* callback)(unsigned int)
Definition Embedder.h:149
std::vector< unsigned int > failures
Definition Embedder.h:154
const std::map< int, RDGeom::Point3D > * coordMap
Definition Embedder.h:131
boost::shared_ptr< const DistGeom::BoundsMatrix > boundsMat
Definition Embedder.h:142
std::shared_ptr< std::map< std::pair< unsigned int, unsigned int >, double > > CPCI
Definition Embedder.h:148
EmbedParameters(unsigned int maxIterations, int numThreads, int randomSeed, bool clearConfs, bool useRandomCoords, double boxSizeMult, bool randNegEig, unsigned int numZeroFail, const std::map< int, RDGeom::Point3D > *coordMap, double optimizerForceTol, bool ignoreSmoothingFailures, bool enforceChirality, bool useExpTorsionAnglePrefs, bool useBasicKnowledge, bool verbose, double basinThresh, double pruneRmsThresh, bool onlyHeavyAtomsForRMS, unsigned int ETversion=2, const DistGeom::BoundsMatrix *boundsMat=nullptr, bool embedFragmentsSeparately=true, bool useSmallRingTorsions=false, bool useMacrocycleTorsions=false, bool useMacrocycle14config=false, unsigned int timeout=0, std::shared_ptr< std::map< std::pair< unsigned int, unsigned int >, double > > CPCI=nullptr, void(*callback)(unsigned int)=nullptr)
Definition Embedder.h:159