RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
AlignMolecules.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2025 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_ALIGNMOLECULES_H
12#define RD_ALIGNMOLECULES_H
13
15#include <Numerics/Vector.h>
16#include <vector>
17
18namespace RDKit {
19typedef std::vector<std::pair<int, int>> MatchVectType;
20
21class Conformer;
22class ROMol;
23class RWMol;
24namespace MolAlign {
25class RDKIT_MOLALIGN_EXPORT MolAlignException : public std::exception {
26 public:
27 //! construct with an error message
28 MolAlignException(const char *msg) : _msg(msg) {}
29 //! construct with an error message
30 MolAlignException(const std::string msg) : _msg(msg) {}
31 //! get the error message
32 const char *what() const noexcept override { return _msg.c_str(); }
33 ~MolAlignException() noexcept override = default;
34
35 private:
36 std::string _msg;
37};
38
40 int maxMatches = 1e6; //< if map is empty, this will be the max number of
41 /// matches found in a SubstructMatch().
43 true; //< if set, conjugated
44 /// terminal functional groups
45 /// (like nitro or carboxylate)
46 /// will be considered symmetrically
47 bool ignoreHs = true; //< ignore Hs in the alignment. Note that atoms in map
48 /// will always be used
49 int numThreads = 1;
50 std::vector<MatchVectType> map; //< a vector of vectors of pairs of atom IDs
51 /// (probe AtomId, ref AtomId) used to
52 /// compute the alignments.
53 /// If not provided, these will be
54 /// generated using a substructure search.
56 nullptr; //< weights for each pair of atoms
57};
58
59//! Alignment functions
60
61//! Compute the transformation required to align a molecule
62/*!
63 The 3D transformation required to align the specified conformation in the
64 probe molecule to a specified conformation in the reference molecule is
65 computed so that the root mean squared distance between a specified set of
66 atoms is minimized
67
68 \param prbMol molecule that is to be aligned
69 \param refMol molecule used as the reference for the alignment
70 \param trans storage for the computed transform
71 \param prbCid ID of the conformation in the probe to be used
72 for the alignment (defaults to first conformation)
73 \param refCid ID of the conformation in the ref molecule to which
74 the alignment is computed (defaults to first conformation)
75 \param atomMap a vector of pairs of atom IDs (probe AtomId, ref AtomId)
76 used to compute the alignments. If this mapping is
77 not specified an attempt is made to generate one by
78 substructure matching
79 \param weights Optionally specify weights for each of the atom pairs
80 \param reflect if true reflect the conformation of the probe molecule
81 \param maxIters maximum number of iterations used in minimizing the RMSD
82
83 <b>Returns</b>
84 RMSD value
85*/
87 const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &trans,
88 int prbCid = -1, int refCid = -1, const MatchVectType *atomMap = nullptr,
89 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
90 unsigned int maxIters = 50);
91
92//! Optimally (minimum RMSD) align a molecule to another molecule
93/*!
94 The 3D transformation required to align the specified conformation in the
95 probe molecule to a specified conformation in the reference molecule is
96 computed so that the root mean squared distance between a specified set of
97 atoms is minimized. This transform is then applied to the specified
98 conformation in the probe molecule
99
100 \param prbMol molecule that is to be aligned
101 \param refMol molecule used as the reference for the alignment
102 \param prbCid ID of the conformation in the probe to be used
103 for the alignment (defaults to first conformation)
104 \param refCid ID of the conformation in the ref molecule to which
105 the alignment is computed (defaults to first conformation)
106 \param atomMap a vector of pairs of atom IDs (probe AtomId, ref AtomId)
107 used to compute the alignments. If this mapping is
108 not specified an attempt is made to generate one by
109 substructure matching
110 \param weights Optionally specify weights for each of the atom pairs
111 \param reflect if true reflect the conformation of the probe molecule
112 \param maxIters maximum number of iterations used in minimizing the RMSD
113
114 <b>Returns</b>
115 RMSD value
116*/
118 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
119 const MatchVectType *atomMap = nullptr,
120 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
121 unsigned int maxIters = 50);
122
123//! Compute the optimal RMS, transformation and atom map for aligning
124//! two molecules, taking symmetry into account. Molecule coordinates
125//! are left unaltered.
126/*!
127 This function will attempt to align all permutations of matching atom
128 orders in both molecules, for some molecules it will lead to 'combinatorial
129 explosion' especially if hydrogens are present.
130 Use 'RDKit::MolAlign::getAlignmentTransform' to align molecules
131 without changing the atom order.
132
133 \param prbMol the molecule to be aligned to the reference
134 \param refMol the reference molecule
135 \param bestTrans storage for the best computed transform
136 \param bestMatch storage for the MatchVectType corresponding to
137 the best match found.
138 \param params parameters for the alignment
139 \param prbCid (optional) probe conformation to use
140 \param refCid (optional) reference conformation to use
141 \param reflect if true reflect the conformation of the probe molecule
142 \param maxIters maximum number of iterations used in minimizing the RMSD
143
144 <b>Returns</b>
145 Best RMSD value found
146*/
148 const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &bestTrans,
149 MatchVectType &bestMatch, const BestAlignmentParams &params,
150 int prbCid = -1, int refCid = -1, bool reflect = false,
151 unsigned int maxIters = 50);
152
153//! Compute the optimal RMS, transformation and atom map for aligning
154//! two molecules, taking symmetry into account. Molecule coordinates
155//! are left unaltered.
156/*!
157 This function will attempt to align all permutations of matching atom
158 orders in both molecules, for some molecules it will lead to 'combinatorial
159 explosion' especially if hydrogens are present.
160 Use 'RDKit::MolAlign::getAlignmentTransform' to align molecules
161 without changing the atom order.
162
163 \param prbMol the molecule to be aligned to the reference
164 \param refMol the reference molecule
165 \param bestTrans storage for the best computed transform
166 \param bestMatch storage for the MatchVectType corresponding to
167 the best match found.
168 \param prbCid (optional) probe conformation to use
169 \param refCid (optional) reference conformation to use
170 \param map (optional) a vector of vectors of pairs of atom IDs
171 (probe AtomId, ref AtomId) used to compute the alignments.
172 If not provided, these will be generated using a
173 substructure search.
174 \param maxMatches (optional) if map is empty, this will be the max number of
175 matches found in a SubstructMatch().
176 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
177 terminal functional groups (like nitro or carboxylate)
178 will be considered symmetrically
179 \param weights (optional) weights for each pair of atoms.
180 \param reflect if true reflect the conformation of the probe molecule
181 \param maxIters maximum number of iterations used in minimizing the RMSD
182 \param numThreads (optional) number of threads to use during the calculation
183
184 <b>Returns</b>
185 Best RMSD value found
186*/
188 const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &bestTrans,
189 MatchVectType &bestMatch, int prbCid = -1, int refCid = -1,
190 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
191 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
192 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
193 unsigned int maxIters = 50, int numThreads = 1) {
194 bool ignoreHs = false;
196 prbMol, refMol, bestTrans, bestMatch,
197 BestAlignmentParams{maxMatches, symmetrizeConjugatedTerminalGroups,
198 ignoreHs, numThreads, map, weights},
199 prbCid, refCid, reflect, maxIters);
200}
201
202//! Returns the optimal RMS for aligning two molecules, taking
203/// symmetry into account. As a side-effect, the probe molecule is
204/// left in the aligned state.
205/*!
206 This function will attempt to align all permutations of matching atom
207 orders in both molecules, for some molecules it will lead to 'combinatorial
208 explosion' especially if hydrogens are present.
209 Use 'RDKit::MolAlign::alignMol' to align molecules without changing the
210 atom order.
211
212 \param prbMol the molecule to be aligned to the reference
213 \param refMol the reference molecule
214 \param params parameters for the alignment
215 \param prbCid (optional) probe conformation to use
216 \param refCid (optional) reference conformation to use
217
218 <b>Returns</b>
219 Best RMSD value found
220*/
221RDKIT_MOLALIGN_EXPORT double getBestRMS(ROMol &prbMol, const ROMol &refMol,
222 const BestAlignmentParams &params,
223 int prbCid = -1, int refCid = -1);
224
225//! Returns the optimal RMS for aligning two molecules, taking
226/// symmetry into account. As a side-effect, the probe molecule is
227/// left in the aligned state.
228/*!
229 This function will attempt to align all permutations of matching atom
230 orders in both molecules, for some molecules it will lead to 'combinatorial
231 explosion' especially if hydrogens are present.
232 Use 'RDKit::MolAlign::alignMol' to align molecules without changing the
233 atom order.
234
235 \param prbMol the molecule to be aligned to the reference
236 \param refMol the reference molecule
237 \param prbCid (optional) probe conformation to use
238 \param refCid (optional) reference conformation to use
239 \param map (optional) a vector of vectors of pairs of atom IDs
240 (probe AtomId, ref AtomId) used to compute the alignments.
241 If not provided, these will be generated using a
242 substructure search.
243 \param maxMatches (optional) if map is empty, this will be the max number of
244 matches found in a SubstructMatch().
245 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
246 terminal functional groups (like nitro or carboxylate)
247 will be considered symmetrically
248 \param weights (optional) weights for each pair of atoms.
249 \param numThreads (optional) number of threads to use during the calculation
250
251 <b>Returns</b>
252 Best RMSD value found
253*/
254inline double getBestRMS(
255 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
256 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
257 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
258 const RDNumeric::DoubleVector *weights = nullptr, int numThreads = 1) {
259 bool ignoreHs = false;
260 return getBestRMS(
261 prbMol, refMol,
262 BestAlignmentParams{maxMatches, symmetrizeConjugatedTerminalGroups,
263 ignoreHs, numThreads, map, weights},
264 prbCid, refCid);
265};
266
267//! Returns the symmetric distance matrix between the conformers of a
268//! molecule.
269/// getBestRMS() is used to calculate the inter-conformer distances
270/*!
271 This function will attempt to align all permutations of matching atom
272 orders in both molecules, for some molecules it will lead to 'combinatorial
273 explosion' especially if hydrogens are present.
274
275 \param mol the molecule to be considered
276 \param params parameters for the matching
277 \param numThreads (optional) number of threads to use during the calculation
278
279 <b>Returns</b>
280 a vector with the RMSD values stored in the order:
281 [(1,0), (2,0), (2,1), (3,0), (3, 2), (3,1), ...]
282*/
284 const ROMol &mol, const BestAlignmentParams &params);
285
286//! Returns the symmetric distance matrix between the conformers of a
287//! molecule.
288/// getBestRMS() is used to calculate the inter-conformer distances
289/*!
290 This function will attempt to align all permutations of matching atom
291 orders in both molecules, for some molecules it will lead to 'combinatorial
292 explosion' especially if hydrogens are present.
293
294 \param mol the molecule to be considered
295 \param numThreads (optional) number of threads to use during the calculation
296 \param map (optional) a vector of vectors of pairs of atom IDs
297 (probe AtomId, ref AtomId) used to compute the alignments.
298 If not provided, these will be generated using a
299 substructure search.
300 \param maxMatches (optional) if map is empty, this will be the max number of
301 matches found in a SubstructMatch().
302 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
303 terminal functional groups (like nitro or carboxylate)
304 will be considered symmetrically
305 \param weights (optional) weights for each pair of atoms.
306
307 <b>Returns</b>
308 a vector with the RMSD values stored in the order:
309 [(1,0), (2,0), (2,1), (3,0), (3, 2), (3,1), ...]
310*/
311inline std::vector<double> getAllConformerBestRMS(
312 const ROMol &mol, int numThreads = 1,
313 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
314 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
315 const RDNumeric::DoubleVector *weights = nullptr) {
316 bool ignoreHs = false;
318 mol, BestAlignmentParams{maxMatches, symmetrizeConjugatedTerminalGroups,
319 ignoreHs, numThreads, map, weights});
320}
321
322//! Returns the RMS between two molecules, taking symmetry into account.
323//! In contrast to getBestRMS, the RMS is computed "in place", i.e.
324//! probe molecules are not aligned to the reference ahead of the
325//! RMS calculation. This is useful, for example, to compute
326//! the RMSD between docking poses and the co-crystallized ligand.
327/*!
328 This function will attempt to match all permutations of matching atom
329 orders in both molecules, for some molecules it will lead to 'combinatorial
330 explosion' especially if hydrogens are present.
331
332 \param prbMol the molecule to be aligned to the reference
333 \param refMol the reference molecule
334 \param prbCid (optional) probe conformation to use
335 \param refCid (optional) reference conformation to use
336 \param map (optional) a vector of vectors of pairs of atom IDs
337 (probe AtomId, ref AtomId) used to compute the alignments.
338 If not provided, these will be generated using a
339 substructure search.
340 \param maxMatches (optional) if map is empty, this will be the max number of
341 matches found in a SubstructMatch().
342 \param symmetrizeConjugatedTerminalGroups (optional) if set, conjugated
343 terminal functional groups (like nitro or carboxylate)
344 will be considered symmetrically \param weights (optional) weights for
345 each pair of atoms.
346
347 <b>Returns</b>
348 Best RMSD value found
349*/
351 ROMol &prbMol, const ROMol &refMol, int prbCid = -1, int refCid = -1,
352 const std::vector<MatchVectType> &map = std::vector<MatchVectType>(),
353 int maxMatches = 1e6, bool symmetrizeConjugatedTerminalGroups = true,
354 const RDNumeric::DoubleVector *weights = nullptr);
355
356//! Returns the RMS between two molecules, taking symmetry into account.
357//! In contrast to getBestRMS, the RMS is computed "in place", i.e.
358//! probe molecules are not aligned to the reference ahead of the
359//! RMS calculation. This is useful, for example, to compute
360//! the RMSD between docking poses and the co-crystallized ligand.
361/*!
362 This function will attempt to match all permutations of matching atom
363 orders in both molecules, for some molecules it will lead to 'combinatorial
364 explosion' especially if hydrogens are present.
365
366 \param prbMol the molecule to be aligned to the reference
367 \param refMol the reference molecule
368 \param prbCid (optional) probe conformation to use
369 \param refCid (optional) reference conformation to use
370 \param map (optional) a vector of vectors of pairs of atom IDs
371 (probe AtomId, ref AtomId) used to compute the alignments.
372 If not provided, these will be generated using a
373 substructure search.
374 \param maxMatches (optional) if map is empty, this will be the max number of
375 matches found in a SubstructMatch().
376 \param weights (optional) weights for each pair of atoms.
377
378 <b>Returns</b>
379 Best RMSD value found
380*/
381RDKIT_MOLALIGN_EXPORT double CalcRMS(ROMol &prbMol, const ROMol &refMol,
382 int prbCid, int refCid,
383 const std::vector<MatchVectType> &map,
384 int maxMatches,
385 const RDNumeric::DoubleVector *weights);
386
387//! Align the conformations of a molecule using a common set of atoms. If
388/// the molecules contains queries, then the queries must also match exactly.
389/*!
390 \param mol The molecule of interest.
391 \param atomIds vector of atoms to be used to generate the alignment.
392 All atoms will be used is not specified
393 \param confIds vector of conformations to align - defaults to all
394 \param weights (optional) weights for each pair of atoms.
395 \param reflect toggles reflecting (about the origin) the alignment
396 \param maxIters the maximum number of iterations to attempt
397 \param RMSlist if nonzero, this will be used to return the RMS values
398 between the reference conformation and the other aligned
399 conformations
400*/
402 ROMol &mol, const std::vector<unsigned int> *atomIds = nullptr,
403 const std::vector<unsigned int> *confIds = nullptr,
404 const RDNumeric::DoubleVector *weights = nullptr, bool reflect = false,
405 unsigned int maxIters = 50, std::vector<double> *RMSlist = nullptr);
406
407namespace details {
408//! Converts terminal atoms in groups like nitro or carboxylate to be symmetry
409/// equivalent
411} // namespace details
412} // namespace MolAlign
413} // namespace RDKit
414#endif
The class for representing 2D or 3D conformation of a molecule.
Definition Conformer.h:46
~MolAlignException() noexcept override=default
MolAlignException(const char *msg)
construct with an error message
MolAlignException(const std::string msg)
construct with an error message
const char * what() const noexcept override
get the error message
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_MOLALIGN_EXPORT
Definition export.h:289
RDKIT_MOLALIGN_EXPORT void symmetrizeTerminalAtoms(RWMol &mol)
RDKIT_MOLALIGN_EXPORT double getAlignmentTransform(const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &trans, int prbCid=-1, int refCid=-1, const MatchVectType *atomMap=nullptr, const RDNumeric::DoubleVector *weights=nullptr, bool reflect=false, unsigned int maxIters=50)
Alignment functions.
RDKIT_MOLALIGN_EXPORT std::vector< double > getAllConformerBestRMS(const ROMol &mol, const BestAlignmentParams &params)
RDKIT_MOLALIGN_EXPORT double alignMol(ROMol &prbMol, const ROMol &refMol, int prbCid=-1, int refCid=-1, const MatchVectType *atomMap=nullptr, const RDNumeric::DoubleVector *weights=nullptr, bool reflect=false, unsigned int maxIters=50)
Optimally (minimum RMSD) align a molecule to another molecule.
RDKIT_MOLALIGN_EXPORT double getBestRMS(ROMol &prbMol, const ROMol &refMol, const BestAlignmentParams &params, int prbCid=-1, int refCid=-1)
RDKIT_MOLALIGN_EXPORT double CalcRMS(ROMol &prbMol, const ROMol &refMol, int prbCid=-1, int refCid=-1, const std::vector< MatchVectType > &map=std::vector< MatchVectType >(), int maxMatches=1e6, bool symmetrizeConjugatedTerminalGroups=true, const RDNumeric::DoubleVector *weights=nullptr)
RDKIT_MOLALIGN_EXPORT double getBestAlignmentTransform(const ROMol &prbMol, const ROMol &refMol, RDGeom::Transform3D &bestTrans, MatchVectType &bestMatch, const BestAlignmentParams &params, int prbCid=-1, int refCid=-1, bool reflect=false, unsigned int maxIters=50)
RDKIT_MOLALIGN_EXPORT void alignMolConformers(ROMol &mol, const std::vector< unsigned int > *atomIds=nullptr, const std::vector< unsigned int > *confIds=nullptr, const RDNumeric::DoubleVector *weights=nullptr, bool reflect=false, unsigned int maxIters=50, std::vector< double > *RMSlist=nullptr)
RDKIT_MOLALIGN_EXPORT const RDGeom::POINT3D_VECT * reflect(const Conformer &conf)
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
Vector< double > DoubleVector
Definition Vector.h:296
const RDNumeric::DoubleVector * weights
std::vector< MatchVectType > map
bool symmetrizeConjugatedTerminalGroups
matches found in a SubstructMatch().