RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2024 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_MOL_OPS_H
12#define RD_MOL_OPS_H
13
14#include <vector>
15#include <map>
16#include <list>
18#include <boost/smart_ptr.hpp>
19#include <boost/dynamic_bitset.hpp>
21#include <RDGeneral/types.h>
23#include "SanitException.h"
25
26RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
27namespace RDKit {
28class ROMol;
29class RWMol;
30class Atom;
31class Bond;
32class Conformer;
33typedef std::vector<double> INVAR_VECT;
34typedef INVAR_VECT::iterator INVAR_VECT_I;
35typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
36
37//! \brief Groups a variety of molecular query and transformation operations.
38namespace MolOps {
39
40//! return the number of electrons available on an atom to donate for
41/// aromaticity
42/*!
43 The result is determined using the default valency, number of lone pairs,
44 number of bonds and the formal charge. Note that the atom may not donate
45 all of these electrons to a ring for aromaticity (also used in Conjugation
46 and hybridization code).
47
48 \param at the atom of interest
49
50 \return the number of electrons
51*/
53
54//! sums up all atomic formal charges and returns the result
56
57//! returns whether or not the given Atom is involved in a conjugated bond
59
60//! find fragments (disconnected components of the molecular graph)
61/*!
62
63 \param mol the molecule of interest
64 \param mapping used to return the mapping of Atoms->fragments.
65 On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
66 and will contain the fragment assignment for each Atom
67
68 \return the number of fragments found.
69
70*/
71RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
72 std::vector<int> &mapping);
73//! find fragments (disconnected components of the molecular graph)
74/*!
75
76 \param mol the molecule of interest
77 \param frags used to return the Atoms in each fragment
78 On return \c mapping will be \c numFrags long, and each entry
79 will contain the indices of the Atoms in that fragment.
80
81 \return the number of fragments found.
82
83*/
85 const ROMol &mol, std::vector<std::vector<int>> &frags);
86
87//! splits a molecule into its component fragments
88/// (disconnected components of the molecular graph)
89/*!
90
91 \param mol the molecule of interest
92 \param molFrags used to return the disconnected fragments as molecules.
93 Any contents on input will be cleared.
94 \param sanitizeFrags toggles sanitization of the fragments after
95 they are built
96 \param frags used to return the mapping of Atoms->fragments.
97 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
98 on return and will contain the fragment assignment for each Atom.
99 \param fragsMolAtomMapping used to return the Atoms in each fragment
100 On return \c mapping will be \c numFrags long, and each entry
101 will contain the indices of the Atoms in that fragment.
102 \param copyConformers toggles copying conformers of the fragments after
103 they are built
104 \return the number of fragments found.
105
106*/
108 const ROMol &mol, std::vector<std::unique_ptr<ROMol>> &molFrags,
109 bool sanitizeFrags = true, std::vector<int> *frags = nullptr,
110 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
111 bool copyConformers = true);
112
113//! splits a molecule into its component fragments
114/// (disconnected components of the molecular graph)
115/*!
116
117 \param mol the molecule of interest
118 \param sanitizeFrags toggles sanitization of the fragments after
119 they are built
120 \param frags used to return the mapping of Atoms->fragments.
121 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
122 on return and will contain the fragment assignment for each Atom
123 \param fragsMolAtomMapping used to return the Atoms in each fragment
124 On return \c mapping will be \c numFrags long, and each entry
125 will contain the indices of the Atoms in that fragment.
126 \param copyConformers toggles copying conformers of the fragments after
127 they are built
128 \return a vector of the fragments as smart pointers to ROMols
129
130*/
131RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
132 const ROMol &mol, bool sanitizeFrags = true,
133 std::vector<int> *frags = nullptr,
134 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
135 bool copyConformers = true);
136
137//! splits a molecule into pieces based on labels assigned using a query
138/*!
139
140 \param mol the molecule of interest
141 \param query the query used to "label" the molecule for fragmentation
142 \param sanitizeFrags toggles sanitization of the fragments after
143 they are built
144 \param whiteList if provided, only labels in the list will be kept
145 \param negateList if true, the white list logic will be inverted: only labels
146 not in the list will be kept
147
148 \return a map of the fragments and their labels
149
150*/
151
152template <typename T>
153RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
154getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
155 bool sanitizeFrags = true,
156 const std::vector<T> *whiteList = nullptr,
157 bool negateList = false);
158//! splits a molecule into pieces based on labels assigned using a query,
159//! putting them into a map of std::unique_ptr<ROMol>.
160/*!
161
162 \param mol the molecule of interest
163 \param query the query used to "label" the molecule for fragmentation
164 \param molFrags used to return the disconnected fragments as molecules.
165 Any contents on input will be cleared.
166 \param sanitizeFrags toggles sanitization of the fragments after
167 they are built
168 \param whiteList if provided, only labels in the list will be kept
169 \param negateList if true, the white list logic will be inverted: only labels
170 not in the list will be kept
171
172 \return the number of fragments
173
174*/
175template <typename T>
177 const ROMol &mol, T (*query)(const ROMol &, const Atom *),
178 std::map<T, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags = true,
179 const std::vector<T> *whiteList = nullptr, bool negateList = false);
180
181#if 0
182 //! finds a molecule's minimum spanning tree (MST)
183 /*!
184 \param mol the molecule of interest
185 \param mst used to return the MST as a vector of bond indices
186 */
187 RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
188#endif
189
190//! \name Dealing with hydrogens
191//{@
192
194 bool explicitOnly = false; /**< only add explicit Hs */
195 bool addCoords = false; /**< add coordinates for the Hs */
196 bool addResidueInfo = false; /**< add residue info to the Hs */
197 bool skipQueries =
198 false; /**< do not add Hs to query atoms or atoms with query bonds */
199};
200//! adds Hs to a molecule as explicit Atoms
201/*!
202 \param mol the molecule to add Hs to
203 \param params parameters controlling which Hs are added.
204 \param onlyOnAtoms (optional) if provided, this should be a vector of
205 IDs of the atoms that will be considered for H addition.
206
207 <b>Notes:</b>
208 - it makes no sense to use the \c addCoords option if the molecule's
209 heavy atoms don't already have coordinates.
210 - the molecule is modified
211 */
213 const UINT_VECT *onlyOnAtoms = nullptr);
214
215//! returns a copy of a molecule with hydrogens added in as explicit Atoms
216/*!
217 \param mol the molecule to add Hs to
218 \param explicitOnly (optional) if this \c true, only explicit Hs will be
219 added
220 \param addCoords (optional) If this is true, estimates for the atomic
221 coordinates
222 of the added Hs will be used.
223 \param onlyOnAtoms (optional) if provided, this should be a vector of
224 IDs of the atoms that will be considered for H addition.
225 \param addResidueInfo (optional) if this is true, add residue info to
226 hydrogen atoms (useful for PDB files).
227
228 \return the new molecule
229
230 <b>Notes:</b>
231 - it makes no sense to use the \c addCoords option if the molecule's
232 heavy
233 atoms don't already have coordinates.
234 - the caller is responsible for <tt>delete</tt>ing the pointer this
235 returns.
236 */
237inline ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
238 bool addCoords = false,
239 const UINT_VECT *onlyOnAtoms = nullptr,
240 bool addResidueInfo = false) {
241 AddHsParameters ps{explicitOnly, addCoords, addResidueInfo};
242 std::unique_ptr<RWMol> res{new RWMol(mol)};
244 return static_cast<ROMol *>(res.release());
245}
246//! \overload
247/// modifies the molecule in place
248inline void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
249 const UINT_VECT *onlyOnAtoms = nullptr,
250 bool addResidueInfo = false) {
251 AddHsParameters ps{explicitOnly, addCoords, addResidueInfo};
252 addHs(mol, ps, onlyOnAtoms);
253}
254
255//! Sets Cartesian coordinates for a terminal atom.
256//! Useful for growing an atom off a molecule with sensible
257//! coordinates based on the geometry of the neighbor.
258/*!
259 NOTE: this sets appropriate coordinates in all of the molecule's
260 conformers.
261
262 \param mol the molecule the atoms belong to
263 \param idx index of the terminal atom whose coordinates are set
264 \param otherIdx index of the bonded neighbor atom
265*/
266
268 unsigned int otherIdx);
269
270//! returns a copy of a molecule with hydrogens removed
271/*!
272 \param mol the molecule to remove Hs from
273 \param implicitOnly if this \c true, only implicit Hs will be
274 removed
275 \param updateExplicitCount (optional) If this is \c true, when explicit
276 Hs are removed from the graph, the heavy atom to which they are bound will
277 have its counter of explicit Hs increased.
278 \param sanitize: (optional) If this is \c true, the final molecule will be
279 sanitized
280
281 \return the new molecule
282
283 <b>Notes:</b>
284 - Hydrogens which aren't connected to a heavy atom will not be
285 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
286 all atoms removed.
287 - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
288 will not be removed.
289 - two coordinate Hs, like the central H in C[H-]C, will not be removed
290 - Hs connected to dummy atoms will not be removed
291 - Hs that are part of the definition of double bond Stereochemistry
292 will not be removed
293 - Hs that are not connected to anything else will not be removed
294 - Hs that have a query defined (i.e. hasQuery() returns true) will not
295 be removed
296
297 - the caller is responsible for <tt>delete</tt>ing the pointer this
298 returns.
299*/
300[[deprecated("Please use the version with RemoveHsParameters")]]
302 bool updateExplicitCount = false,
303 bool sanitize = true);
304//! \overload
305/// modifies the molecule in place
306[[deprecated("Please use the version with RemoveHsParameters")]]
308 bool updateExplicitCount = false,
309 bool sanitize = true);
311 bool removeDegreeZero = false; /**< hydrogens that have no bonds */
312 bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
313 bool removeOnlyHNeighbors =
314 false; /**< hydrogens with bonds only to other hydrogens */
315 bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
316 bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
317 isotopes and keeps track of the heavy atom the isotopes were attached to in
318 the private _isotopicHs atom property, so they are re-added by AddHs() as
319 the original isotopes if possible*/
320 bool removeDummyNeighbors =
321 false; /**< hydrogens with at least one dummy-atom neighbor */
322 bool removeDefiningBondStereo =
323 false; /**< hydrogens defining bond stereochemistry */
324 bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
325 bool removeWithQuery = false; /**< hydrogens with queries defined */
326 bool removeMapped = true; /**< mapped hydrogens */
327 bool removeInSGroups = true; /**< part of a SubstanceGroup.
328 An H atom will only be removed if it doesn't cause any SGroup to become empty,
329 and if it doesn't play a special role in the SGroup (XBOND, attach point
330 or a CState) */
331 bool showWarnings = true; /**< display warnings for Hs that are not removed */
332 bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
333 bool updateExplicitCount =
334 false; /**< DEPRECATED equivalent of updateExplicitCount */
335 bool removeHydrides = true; /**< Removing Hydrides */
336 bool removeNontetrahedralNeighbors =
337 false; /**< remove Hs which are bonded to atoms with specified
338 non-tetrahedral stereochemistry */
339};
340
341//! \overload
342/// modifies the molecule in place
345 bool sanitize = true);
346//! \overload
347/// The caller owns the pointer this returns
349 const ROMol &mol, const RemoveHsParameters &ps = RemoveHsParameters(),
350 bool sanitize = true);
351
352//! removes all Hs from a molecule
353RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
354//! \overload
355/// The caller owns the pointer this returns
357 bool sanitize = true);
358
359//! returns a copy of a molecule with hydrogens removed and added as queries
360//! to the heavy atoms to which they are bound.
361/*!
362 This is really intended to be used with molecules that contain QueryAtoms
363
364 \param mol the molecule to remove Hs from
365
366 \return the new molecule
367
368 <b>Notes:</b>
369 - Atoms that do not already have hydrogen count queries will have one
370 added, other H-related queries will not be touched. Examples:
371 - C[H] -> [C;!H0]
372 - [C;H1][H] -> [C;H1]
373 - [C;H2][H] -> [C;H2]
374 - Hydrogens which aren't connected to a heavy atom will not be
375 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
376 all atoms removed.
377 - the caller is responsible for <tt>delete</tt>ing the pointer this
378 returns.
379 - By default all hydrogens are removed, however if
380 mergeUnmappedOnly is true, any hydrogen participating
381 in an atom map will be retained
382
383*/
385 bool mergeUnmappedOnly = false,
386 bool mergeIsotopes = false);
387//! \overload
388/// modifies the molecule in place
390 bool mergeUnmappedOnly = false,
391 bool mergeIsotopes = false);
392
393//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
394/*!
395 This is really intended to be used with molecules that contain QueryAtoms
396 such as when checking smarts patterns for explicit hydrogens
397
398
399 \param mol the molecule to check for query Hs from
400 \return std::pair if pair.first is true if the molecule has query
401 hydrogens, if pair.second is true, the queryHs cannot be removed my
402 mergeQueryHs
403*/
404RDKIT_GRAPHMOL_EXPORT std::pair<bool, bool> hasQueryHs(const ROMol &mol);
405
415
416//! Parameters controlling the behavior of MolOps::adjustQueryProperties
417/*!
418
419 Note that some of the options here are either directly contradictory or make
420 no sense when combined with each other. We generally assume that client code
421 is doing something sensible and don't attempt to detect possible conflicts
422 or problems.
423
424*/
426 bool adjustDegree = true; /**< add degree queries */
427 std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
428
429 bool adjustRingCount = false; /**< add ring-count queries */
430 std::uint32_t adjustRingCountFlags =
432
433 bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
434 labels to any-atom queries */
435
436 bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
437
438 bool makeBondsGeneric =
439 false; /**< convert bonds to generic queries (any bonds) */
440 std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
441
442 bool makeAtomsGeneric =
443 false; /**< convert atoms to generic queries (any atoms) */
444 std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
445
446 bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
447 overall degree */
448 std::uint32_t adjustHeavyDegreeFlags =
450
451 bool adjustRingChain = false; /**< add ring-chain queries */
452 std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
453
454 bool useStereoCareForBonds =
455 false; /**< remove stereochemistry info from double bonds that do not
456 have the stereoCare property set */
457
458 bool adjustConjugatedFiveRings =
459 false; /**< sets bond queries in conjugated five-rings to
460 SINGLE|DOUBLE|AROMATIC */
461
462 bool setMDLFiveRingAromaticity =
463 false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
464 software as documented in the Chemical Representation Guide */
465
466 bool adjustSingleBondsToDegreeOneNeighbors =
467 false; /**< sets single bonds between aromatic or conjugated atoms and
468 degree one neighbors to SINGLE|AROMATIC */
469
470 bool adjustSingleBondsBetweenAromaticAtoms =
471 false; /**< sets non-ring single bonds between two aromatic or
472 conjugated atoms to SINGLE|AROMATIC */
473
474 //! \brief returns an AdjustQueryParameters object with all adjustments
475 //! disabled
478 res.adjustDegree = false;
479 res.makeDummiesQueries = false;
480 res.aromatizeIfPossible = false;
481 return res;
482 }
484};
485
486//! updates an AdjustQueryParameters object from a JSON string
488 MolOps::AdjustQueryParameters &p, const std::string &json);
489
490//! returns a copy of a molecule with query properties adjusted
491/*!
492 \param mol the molecule to adjust
493 \param params controls the adjustments made
494
495 \return the new molecule, the caller owns the memory
496*/
498 const ROMol &mol, const AdjustQueryParameters *params = nullptr);
499//! \overload
500/// modifies the molecule in place
502 RWMol &mol, const AdjustQueryParameters *params = nullptr);
503
504//! returns a copy of a molecule with the atoms renumbered
505/*!
506
507 \param mol the molecule to work with
508 \param newOrder the new ordering of the atoms (should be numAtoms long)
509 for example: if newOrder is [3,2,0,1], then atom 3 in the original
510 molecule will be atom 0 in the new one
511
512 \return the new molecule
513
514 <b>Notes:</b>
515 - the caller is responsible for <tt>delete</tt>ing the pointer this
516 returns.
517
518*/
520 const ROMol &mol, const std::vector<unsigned int> &newOrder);
521
522//! @}
523
524//! \name Sanitization
525/// {
526
527// clang-format off
529 SANITIZE_NONE = 0x0,
530 SANITIZE_CLEANUP = 0x1,
532 SANITIZE_SYMMRINGS = 0x4,
533 SANITIZE_KEKULIZE = 0x8,
539 SANITIZE_ADJUSTHS = 0x200,
542 SANITIZE_ALL = 0xFFFFFFF
543);
544// clang-format on
545
546//! \brief carries out a collection of tasks for cleaning up a molecule and
547//! ensuring that it makes "chemical sense"
548/*!
549 This functions calls the following in sequence
550 -# MolOps::cleanUp()
551 -# mol.updatePropertyCache()
552 -# MolOps::symmetrizeSSSR()
553 -# MolOps::Kekulize()
554 -# MolOps::assignRadicals()
555 -# MolOps::setAromaticity()
556 -# MolOps::setConjugation()
557 -# MolOps::setHybridization()
558 -# MolOps::cleanupChirality()
559 -# MolOps::adjustHs()
560 -# mol.updatePropertyCache()
561
562 \param mol : the RWMol to be cleaned
563
564 \param operationThatFailed : the first (if any) sanitization operation that
565 fails is set here.
566 The values are taken from the \c SanitizeFlags
567 enum. On success, the value is \c
568 SanitizeFlags::SANITIZE_NONE
569
570 \param sanitizeOps : the bits here are used to set which sanitization
571 operations are carried out. The elements of the \c
572 SanitizeFlags enum define the operations.
573
574 <b>Notes:</b>
575 - If there is a failure in the sanitization, a \c MolSanitizeException
576 will be thrown.
577 - in general the user of this function should cast the molecule following
578 this function to a ROMol, so that new atoms and bonds cannot be added to
579 the molecule and screw up the sanitizing that has been done here
580*/
582 RWMol &mol, unsigned int &operationThatFailed,
583 unsigned int sanitizeOps = SanitizeFlags::SANITIZE_ALL);
584//! \overload
586
587//! \brief Identifies chemistry problems (things that don't make chemical
588//! sense) in a molecule
589/*!
590 This functions uses the operations in sanitizeMol but does not change
591 the input structure and returns a list of the problems encountered instead
592 of stopping at the first failure,
593
594 The problems this looks for come from the sanitization operations:
595 -# mol.updatePropertyCache() : Unreasonable valences
596 -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
597 in rings, aromatic bonds to non-aromatic atoms.
598
599 \param mol : the ROMol to be cleaned
600
601 \param sanitizeOps : the bits here are used to set which sanitization
602 operations are carried out. The elements of the \c
603 SanitizeFlags enum define the operations.
604
605 \return a vector of \c MolSanitizeException values that indicate what
606 problems were encountered
607
608*/
610std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
611 const ROMol &mol, unsigned int sanitizeOps = SanitizeFlags::SANITIZE_ALL);
612
613//! Possible aromaticity models
614/*!
615- \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
616- \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
617Book)
618- \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
619does not consider the outer envelope of fused rings)
620- \c AROMATICITY_MDL
621- \c AROMATICIT_MMFF94 the aromaticity model used by the MMFF94 force field
622- \c AROMATICITY_CUSTOM uses a caller-provided function
623*/
624typedef enum {
625 AROMATICITY_DEFAULT = 0x0, ///< future proofing
630 AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
632
633//! sets the aromaticity model for a molecule to MMFF94
635
636//! Sets up the aromaticity for a molecule
637/*!
638
639 This is what happens here:
640 -# find all the simple rings by calling the findSSSR function
641 -# loop over all the Atoms in each ring and mark them if they are
642 candidates
643 for aromaticity. A ring atom is a candidate if it can spare electrons
644 to the ring and if it's from the first two rows of the periodic table.
645 -# based on the candidate atoms, mark the rings to be either candidates
646 or non-candidates. A ring is a candidate only if all its atoms are
647 candidates
648 -# apply Hueckel rule to each of the candidate rings to check if the ring
649 can be
650 aromatic
651
652 \param mol the RWMol of interest
653 \param model the aromaticity model to use
654 \param func a custom function for assigning aromaticity (only used when
655 model=\c AROMATICITY_CUSTOM)
656
657 \return >0 on success, <= 0 otherwise
658
659 <b>Assumptions:</b>
660 - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
661 been called)
662
663*/
666 int (*func)(RWMol &) = nullptr);
667
668//! Designed to be called by the sanitizer to handle special cases before
669/// anything is done.
670/*!
671
672 Currently this:
673 - modifies nitro groups, so that the nitrogen does not have an
674 unreasonable valence of 5, as follows:
675 - the nitrogen gets a positive charge
676 - one of the oxygens gets a negative chage and the double bond to
677 this oxygen is changed to a single bond The net result is that nitro groups
678 can be counted on to be: \c "[N+](=O)[O-]"
679 - modifies halogen-oxygen containing species as follows:
680 \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
681 \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
682 \c [Cl,Br,I](=O)O -> [X+]([O-])O
683 - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
684
685 \param mol the molecule of interest
686
687*/
689
690//! Designed to be called by the sanitizer to handle special cases for
691//! organometallic species before valence is perceived
692/*!
693
694 \b Note that this function is experimental and may either change in
695 behavior or be replaced with something else in future releases.
696
697 Currently this:
698 - replaces single bonds between "hypervalent" organic atoms and metals
699 with dative bonds (this is following an IUPAC recommendation:
700 https://iupac.qmul.ac.uk/tetrapyrrole/TP8.html)
701
702 \param mol the molecule of interest
703
704*/
706
707//! Called by the sanitizer to assign radical counts to atoms
709
710//! adjust the number of implicit and explicit Hs for special cases
711/*!
712
713 Currently this:
714 - modifies aromatic nitrogens so that, when appropriate, they have an
715 explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
716
717 \param mol the molecule of interest
718
719 <b>Assumptions</b>
720 - this is called after the molecule has been sanitized,
721 aromaticity has been perceived, and the implicit valence of
722 everything has been calculated.
723
724*/
726
727//! Kekulizes the molecule
728/*!
729
730 \param mol the molecule of interest
731
732 \param markAtomsBonds if this is set to true, \c isAromatic boolean
733 settings on both the Bonds and Atoms are turned to false following the
734 Kekulization, otherwise they are left alone in their original state.
735
736 \param maxBackTracks the maximum number of attempts at back-tracking. The
737 algorithm uses a back-tracking procedure to revisit a previous setting of
738 double bond if we hit a wall in the kekulization process
739
740 <b>Notes:</b>
741 - this does not modify query bonds which have bond type queries (like
742 those which come from SMARTS) or rings containing them.
743 - even if \c markAtomsBonds is \c false the \c BondType for all modified
744 aromatic bonds will be changed from \c RDKit::Bond::AROMATIC to \c
745 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
746
747*/
749 unsigned int maxBackTracks = 100);
750//! Kekulizes the molecule if possible. If the kekulization fails the molecule
751//! will not be modified
752/*!
753
754 \param mol the molecule of interest
755
756 \param markAtomsBonds if this is set to true, \c isAromatic boolean
757 settings on both the Bonds and Atoms are turned to false following the
758 Kekulization, otherwise they are left alone in their original state.
759
760 \param maxBackTracks the maximum number of attempts at back-tracking. The
761 algorithm uses a back-tracking procedure to revisit a previous setting of
762 double bond if we hit a wall in the kekulization process
763
764 \returns whether or not the kekulization succeeded
765
766 <b>Notes:</b>
767 - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
768 bonds will be changed from \c RDKit::Bond::AROMATIC to \c
769 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
770
771*/
773 bool markAtomsBonds = true,
774 unsigned int maxBackTracks = 100);
775
776//! flags the molecule's conjugated bonds
778
779//! calculates and sets the hybridization of all a molecule's Stoms
781
782//! @}
783
784//! \name Ring finding and SSSR
785//! @{
786
787//! finds a molecule's Smallest Set of Smallest Rings
788/*!
789 Currently this implements a modified form of Figueras algorithm
790 (JCICS - Vol. 36, No. 5, 1996, 986-991)
791
792 \param mol the molecule of interest
793 \param res used to return the vector of rings. Each entry is a vector with
794 atom indices. This information is also stored in the molecule's
795 RingInfo structure, so this argument is optional (see overload)
796 \param includeDativeBonds - determines whether or not dative bonds are used
797 in the ring finding.
798
799 \return number of smallest rings found
800
801 Base algorithm:
802 - The original algorithm starts by finding representative degree 2
803 nodes.
804 - Representative because if a series of deg 2 nodes are found only
805 one of them is picked.
806 - The smallest ring around each of them is found.
807 - The bonds that connect to this degree 2 node are them chopped off,
808 yielding
809 new deg two nodes
810 - The process is repeated on the new deg 2 nodes.
811 - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
812 with it is found. A bond from this is "carefully" (look in the paper)
813 selected and chopped, yielding deg 2 nodes. The process is same as
814 above once this is done.
815
816 Our Modifications:
817 - If available, more than one smallest ring around a representative deg 2
818 node will be computed and stored
819 - Typically 3 rings are found around a degree 3 node (when no deg 2s are
820 available)
821 and all the bond to that node are chopped.
822 - The extra rings that were found in this process are removed after all
823 the nodes have been covered.
824
825 These changes were motivated by several factors:
826 - We believe the original algorithm fails to find the correct SSSR
827 (finds the correct number of them but the wrong ones) on some sample
828 mols
829 - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
830 done. The extra rings this process adds can be quite useful.
831*/
833 std::vector<std::vector<int>> &res,
834 bool includeDativeBonds = false,
835 bool includeHydrogenBonds = false);
836//! \overload
838 std::vector<std::vector<int>> *res = nullptr,
839 bool includeDativeBonds = false,
840 bool includeHydrogenBonds = false);
841
842//! use a DFS algorithm to identify ring bonds and atoms in a molecule
843/*!
844 \b NOTE: though the RingInfo structure is populated by this function,
845 the only really reliable calls that can be made are to check if
846 mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
847 return values >0
848*/
850
852
853//! symmetrize the molecule's Smallest Set of Smallest Rings
854/*!
855 SSSR rings obatined from "findSSSR" can be non-unique in some case.
856 For example, cubane has five SSSR rings, not six as one would hope.
857
858 This function adds additional rings to the SSSR list if necessary
859 to make the list symmetric, e.g. all atoms in cubane will be part of the
860 same number of SSSRs. This function choses these extra rings from the extra
861 rings computed and discarded during findSSSR. The new ring are chosen such
862 that:
863 - replacing a same sized ring in the SSSR list with an extra ring yields
864 the same union of bond IDs as the original SSSR list
865
866 \param mol - the molecule of interest
867 \param res used to return the vector of rings. Each entry is a vector with
868 atom indices. This information is also stored in the molecule's
869 RingInfo structure, so this argument is optional (see overload)
870 \param includeDativeBonds - determines whether or not dative bonds are used
871 in the ring finding.
872
873 \return the total number of rings = (new rings + old SSSRs)
874
875 <b>Notes:</b>
876 - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
877 first
878*/
880 std::vector<std::vector<int>> &res,
881 bool includeDativeBonds = false,
882 bool includeHydrogenBonds = false);
883//! \overload
885 bool includeDativeBonds = false,
886 bool includeHydrogenBonds = false);
887
888//! @}
889
890//! \name Shortest paths and other matrices
891//! @{
892
893//! returns a molecule's adjacency matrix
894/*!
895 \param mol the molecule of interest
896 \param useBO toggles use of bond orders in the matrix
897 \param emptyVal sets the empty value (for non-adjacent atoms)
898 \param force forces calculation of the matrix, even if already
899 computed
900 \param propNamePrefix used to set the cached property name
901 \param bondsToUse used to limit which bonds are considered
902
903 \return the adjacency matrix.
904
905 <b>Notes</b>
906 - The result of this is cached in the molecule's local property
907 dictionary, which will handle deallocation. The caller should <b>not</b> \c
908 delete this pointer.
909
910*/
912 const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
913 const char *propNamePrefix = nullptr,
914 const boost::dynamic_bitset<> *bondsToUse = nullptr);
915
916//! Computes the molecule's topological distance matrix
917/*!
918 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
919
920 \param mol the molecule of interest
921 \param useBO toggles use of bond orders in the matrix
922 \param useAtomWts sets the diagonal elements of the result to
923 6.0/(atomic number) so that the matrix can be used to calculate
924 Balaban J values. This does not affect the bond weights.
925 \param force forces calculation of the matrix, even if already
926 computed
927 \param propNamePrefix used to set the cached property name
928
929 \return the distance matrix.
930
931 <b>Notes</b>
932 - The result of this is cached in the molecule's local property
933 dictionary, which will handle deallocation. The caller should <b>not</b> \c
934 delete this pointer.
935
936
937*/
939 const ROMol &mol, bool useBO = false, bool useAtomWts = false,
940 bool force = false, const char *propNamePrefix = nullptr);
941
942//! Computes the molecule's topological distance matrix
943/*!
944 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
945
946 \param mol the molecule of interest
947 \param activeAtoms only elements corresponding to these atom indices
948 will be included in the calculation
949 \param bonds only bonds found in this list will be included in the
950 calculation
951 \param useBO toggles use of bond orders in the matrix
952 \param useAtomWts sets the diagonal elements of the result to
953 6.0/(atomic number) so that the matrix can be used to calculate
954 Balaban J values. This does not affect the bond weights.
955
956 \return the distance matrix.
957
958 <b>Notes</b>
959 - The results of this call are not cached, the caller <b>should</b> \c
960 delete
961 this pointer.
962
963
964*/
966 const ROMol &mol, const std::vector<int> &activeAtoms,
967 const std::vector<const Bond *> &bonds, bool useBO = false,
968 bool useAtomWts = false);
969
970//! Computes the molecule's 3D distance matrix
971/*!
972
973 \param mol the molecule of interest
974 \param confId the conformer to use
975 \param useAtomWts sets the diagonal elements of the result to
976 6.0/(atomic number)
977 \param force forces calculation of the matrix, even if already
978 computed
979 \param propNamePrefix used to set the cached property name
980 (if set to an empty string, the matrix will not be
981 cached)
982
983 \return the distance matrix.
984
985 <b>Notes</b>
986 - If propNamePrefix is not empty the result of this is cached in the
987 molecule's local property dictionary, which will handle deallocation.
988 In other cases the caller is responsible for freeing the memory.
989
990*/
992 const ROMol &mol, int confId = -1, bool useAtomWts = false,
993 bool force = false, const char *propNamePrefix = nullptr);
994
995//! Find the shortest path between two atoms
996/*!
997 Uses the Bellman-Ford algorithm
998
999 \param mol molecule of interest
1000 \param aid1 index of the first atom
1001 \param aid2 index of the second atom
1002
1003 \return an std::list with the indices of the atoms along the shortest
1004 path
1005
1006 <b>Notes:</b>
1007 - the starting and end atoms are included in the path
1008 - if no path is found, an empty path is returned
1009
1010*/
1011RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
1012 int aid2);
1013
1014//! @}
1015
1016//! \name Stereochemistry
1017//! @{
1018
1019// class to hold hybridizations
1020
1022 public:
1024 throw FileParseException("not to be called without a mol parameter");
1025 };
1028 throw FileParseException("not to be called without a mol parameter");
1029 };
1030
1031 ~Hybridizations() = default;
1032
1034 return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
1035 }
1036 // Atom::HybridizationType &operator[](unsigned int idx) {
1037 // return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
1038 // d_hybridizations[d_hybridizations[idx]];
1039 // }
1040
1041 // // void clear() { d_hybridizations.clear(); }
1042 // // void resize(unsigned int sz) { d_hybridizations.resize(sz); }
1043 unsigned int size() const { return d_hybridizations.size(); }
1044
1045 private:
1046 std::vector<int> d_hybridizations;
1047};
1048
1049//! removes bogus chirality markers (e.g. tetrahedral flags on non-sp3
1050//! centers):
1052
1053//! removes bogus atropisomeric markers (e.g. those without sp2 begin and end
1054//! atoms):
1057//! \overload
1059
1060//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms
1061/*!
1062 \param mol the molecule of interest
1063 \param confId the conformer to use
1064 \param replaceExistingTags if this flag is true, any existing atomic chiral
1065 tags will be replaced
1066
1067 If the conformer provided is not a 3D conformer, nothing will be done.
1068
1069
1070 NOTE that this does not check to see if atoms are chiral centers (i.e. all
1071 substituents are different), it merely sets the chiral type flags based on
1072 the coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if
1073 you want chiral flags only on actual stereocenters.
1074*/
1076 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1077
1078//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
1079//! stereo flags to its bonds
1080/*!
1081
1082 \param mol the molecule of interest
1083 \param confId the conformer to use
1084 \param replaceExistingTags if this flag is true, any existing info about
1085 stereochemistry will be replaced
1086
1087 If the conformer provided is not a 3D conformer, nothing will be done.
1088*/
1090 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1091
1092//! \brief Use bond directions to assign ChiralTypes to a molecule's atoms
1093/*!
1094
1095 \param mol the molecule of interest
1096 \param confId the conformer to use
1097 \param replaceExistingTags if this flag is true, any existing info about
1098 stereochemistry will be replaced
1099*/
1101 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1102
1103//! \deprecated: this function will be removed in a future release. Use
1104//! setDoubleBondNeighborDirections() instead
1106 int confId = -1);
1107//! Sets bond directions based on double bond stereochemistry
1109 ROMol &mol, const Conformer *conf = nullptr);
1110//! removes directions from single bonds. The property _UnknownStereo will be
1111//! set on wiggly bonds
1113 bool onlyWedgeFlags = false);
1114
1115//! removes directions from all bonds. The property _UnknownStereo will be set
1116//! on wiggly bonds
1118//! removes directions from all bonds. The property _UnknownStereo will be set
1119//! on wiggly bonds
1121 bool onlyWedgeFlags = false);
1122
1123//! Assign CIS/TRANS bond stereochemistry tags based on neighboring
1124//! directions
1126
1127//! Assign stereochemistry tags to atoms and bonds.
1128/*!
1129 If useLegacyStereoPerception is true, it also does the CIP stereochemistry
1130 assignment for the molecule's atoms (R/S) and double bonds (Z/E).
1131 This assignment is based on legacy code which is fast, but is
1132 known to incorrectly assign CIP labels in some cases.
1133 instead, to assign CIP labels based on an accurate, though slower,
1134 implementation of the CIP rules, call CIPLabeler::assignCIPLabels().
1135 Chiral atoms will have a property '_CIPCode' indicating their chiral code.
1136
1137 \param mol the molecule to use
1138 \param cleanIt if true, any existing values of the property `_CIPCode`
1139 will be cleared, atoms with a chiral specifier that aren't
1140 actually chiral (e.g. atoms with duplicate
1141 substituents or only 2 substituents, etc.) will have
1142 their chiral code set to CHI_UNSPECIFIED. Bonds with
1143 STEREOCIS/STEREOTRANS specified that have duplicate
1144 substituents based upon the CIP atom ranks will be
1145 marked STEREONONE.
1146 \param force causes the calculation to be repeated even if it has
1147 already been done
1148 \param flagPossibleStereoCenters set the _ChiralityPossible property on
1149 atoms that are possible stereocenters
1150
1151 <b>Notes:M</b>
1152 - Throughout we assume that we're working with a hydrogen-suppressed
1153 graph.
1154
1155*/
1157 ROMol &mol, bool cleanIt = false, bool force = false,
1158 bool flagPossibleStereoCenters = false);
1159//! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
1160/// i.e. Z/E)
1161/*!
1162
1163 \param mol the molecule of interest
1164*/
1166
1167//! \brief finds bonds that could be cis/trans in a molecule and mark them as
1168//! Bond::STEREOANY.
1169/*!
1170 \param mol the molecule of interest
1171 \param cleanIt toggles removal of stereo flags from double bonds that can
1172 not have stereochemistry
1173
1174 This function finds any double bonds that can potentially be part of
1175 a cis/trans system. No attempt is made here to mark them cis or
1176 trans. No attempt is made to detect double bond stereo in ring systems.
1177
1178 This function is useful in the following situations:
1179 - when parsing a mol file; for the bonds marked here, coordinate
1180 information on the neighbors can be used to indentify cis or trans
1181 states
1182 - when writing a mol file; bonds that can be cis/trans but not marked as
1183 either need to be specially marked in the mol file
1184 - finding double bonds with unspecified stereochemistry so they
1185 can be enumerated for downstream 3D tools
1186
1187 The CIPranks on the neighboring atoms are checked in this function. The
1188 _CIPCode property if set to any on the double bond.
1189*/
1191 bool cleanIt = false);
1192//! \brief Uses the molParity atom property to assign ChiralType to a
1193//! molecule's atoms
1194/*!
1195 \param mol the molecule of interest
1196 \param replaceExistingTags if this flag is true, any existing atomic chiral
1197 tags will be replaced
1198*/
1200 ROMol &mol, bool replaceExistingTags = true);
1201
1202//! @}
1203
1204//! returns the number of atoms which have a particular property set
1206 const ROMol &mol, std::string prop);
1207
1208//! returns whether or not a molecule needs to have Hs added to it.
1210
1211//! \brief Replaces haptic bond with explicit dative bonds.
1212/*!
1213 *
1214 * @param mol the molecule of interest
1215 *
1216 * One way of showing haptic bonds (such as cyclopentadiene to iron in
1217 * ferrocene) is to use a dummy atom with a dative bond to the iron atom with
1218 * the bond labelled with the atoms involved in the organic end of the bond.
1219 * Another way is to have explicit dative bonds from the atoms of the haptic
1220 * group to the metal atom. This function converts the former representation
1221 * to the latter.
1222 */
1224
1225//! \overload modifies molecule in place.
1227
1228//! \brief Replaces explicit dative bonds with haptic.
1229/*!
1230 *
1231 * @param mol the molecule of interest
1232 *
1233 * Does the reverse of hapticBondsToDative. If there are multiple contiguous
1234 * atoms attached by dative bonds to an atom (probably a metal atom), the
1235 * dative bonds will be replaced by a dummy atom in their centre attached to
1236 * the (metal) atom by a dative bond, which is labelled with ENDPTS of the
1237 * atoms that had the original dative bonds.
1238 */
1240
1241//! \overload modifies molecule in place.
1243
1244/*!
1245 Calculates a molecule's average molecular weight
1246
1247 \param mol the molecule of interest
1248 \param onlyHeavy (optional) if this is true (the default is false),
1249 only heavy atoms will be included in the MW calculation
1250
1251 \return the AMW
1252*/
1254 bool onlyHeavy = false);
1255/*!
1256 Calculates a molecule's exact molecular weight
1257
1258 \param mol the molecule of interest
1259 \param onlyHeavy (optional) if this is true (the default is false),
1260 only heavy atoms will be included in the MW calculation
1261
1262 \return the exact MW
1263*/
1265 bool onlyHeavy = false);
1266
1267/*!
1268 Calculates a molecule's formula
1269
1270 \param mol the molecule of interest
1271 \param separateIsotopes if true, isotopes will show up separately in the
1272 formula. So C[13CH2]O will give the formula: C[13C]H6O
1273 \param abbreviateHIsotopes if true, 2H and 3H will be represented as
1274 D and T instead of [2H] and [3H]. This only applies if \c
1275 separateIsotopes is true
1276
1277 \return the formula as a string
1278*/
1280 const ROMol &mol, bool separateIsotopes = false,
1281 bool abbreviateHIsotopes = true);
1282
1283namespace details {
1284//! not recommended for use in other code
1285RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(
1286 RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1287 boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds = true,
1288 unsigned int maxBackTracks = 100);
1289
1290// If the bond is dative, and it has a common_properties::MolFileBondEndPts
1291// prop, returns a vector of the indices of the atoms mentioned in the prop.
1292RDKIT_GRAPHMOL_EXPORT std::vector<int> hapticBondEndpoints(const Bond *bond);
1293
1294} // namespace details
1295
1296//! attachment points encoded as attachPt properties are added to the graph as
1297/// dummy atoms
1298/*!
1299 *
1300 * @param mol the molecule of interest
1301 * @param addAsQueries if true, the dummy atoms will be added as null queries
1302 * (i.e. they will match any atom in a substructure search)
1303 * @param addCoords if true and the molecule has one or more conformers,
1304 * positions for the attachment points will be added to the conformer(s).
1305 *
1306 */
1308 bool addAsQueries = true,
1309 bool addCoords = true);
1310//! dummy atoms in the graph are removed and replaced with attachment point
1311//! annotations on the attached atoms
1312/*!
1313 *
1314 * @param mol the molecule of interest
1315 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1316 * property will be collapsed
1317 *
1318 * In order for a dummy atom to be considered for collapsing it must have:
1319 * - degree 1 with a single or unspecified bond
1320 * - the bond to it can not be wedged
1321 * - either no query or be an AtomNullQuery
1322 *
1323 */
1325 bool markedOnly = true);
1326
1327namespace details {
1328//! attachment points encoded as attachPt properties are added to the graph as
1329/// dummy atoms
1330/*!
1331 *
1332 * @param mol the molecule of interest
1333 * @param atomIdx the index of the atom to which the attachment point should
1334 * be added
1335 * @param val the attachment point value. Should be 1 or 2
1336 * @param addAsQueries if true, the dummy atoms will be added as null queries
1337 * (i.e. they will match any atom in a substructure search)
1338 * @param addCoords if true and the molecule has one or more conformers,
1339 * positions for the attachment points will be added to the conformer(s).
1340 *
1341 */
1342RDKIT_GRAPHMOL_EXPORT unsigned int addExplicitAttachmentPoint(
1343 RWMol &mol, unsigned int atomIdx, unsigned int val, bool addAsQuery = true,
1344 bool addCoords = true);
1345
1346//! returns whether or not an atom is an attachment point
1347/*!
1348 *
1349 * @param mol the molecule of interest
1350 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1351 * property will be collapsed
1352 *
1353 * In order for a dummy atom to be considered for collapsing it must have:
1354 * - degree 1 with a single or unspecified bond
1355 * - the bond to it can not be wedged
1356 * - either no query or be an AtomNullQuery
1357 *
1358 */
1359RDKIT_GRAPHMOL_EXPORT bool isAttachmentPoint(const Atom *atom,
1360 bool markedOnly = true);
1361
1362} // namespace details
1363
1364} // namespace MolOps
1365} // namespace RDKit
1366
1367#endif
#define BETTER_ENUM(Enum, Underlying,...)
Definition BetterEnums.h:17
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition Atom.h:75
HybridizationType
store hybridization
Definition Atom.h:88
class for representing a bond
Definition Bond.h:47
The class for representing 2D or 3D conformation of a molecule.
Definition Conformer.h:46
used by various file parsing classes to indicate a parse error
unsigned int size() const
Definition MolOps.h:1043
Atom::HybridizationType operator[](int idx)
Definition MolOps.h:1033
Hybridizations(const Hybridizations &)
Definition MolOps.h:1027
Hybridizations(const ROMol &mol)
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:233
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms and bonds.
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT std::string getMolFormula(const ROMol &mol, bool separateIsotopes=false, bool abbreviateHIsotopes=true)
RDKIT_GRAPHMOL_EXPORT void cleanupAtropisomers(RWMol &mol, Hybridizations &hybridizations)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SanitizeFlags::SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double getExactMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT std::pair< bool, bool > hasQueryHs(const ROMol &mol)
returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition MolOps.h:624
@ AROMATICITY_RDKIT
Definition MolOps.h:626
@ AROMATICITY_MDL
Definition MolOps.h:628
@ AROMATICITY_CUSTOM
use a function
Definition MolOps.h:630
@ AROMATICITY_DEFAULT
future proofing
Definition MolOps.h:625
@ AROMATICITY_MMFF94
Definition MolOps.h:629
@ AROMATICITY_SIMPLE
Definition MolOps.h:627
RDKIT_GRAPHMOL_EXPORT void cleanUpOrganometallics(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT ROMol * hapticBondsToDative(const ROMol &mol)
Replaces haptic bond with explicit dative bonds.
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void clearSingleBondDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false, bool mergeIsotopes=false)
RDKIT_GRAPHMOL_EXPORT void expandAttachmentPoints(RWMol &mol, bool addAsQueries=true, bool addCoords=true)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT ROMol * dativeBondsToHaptic(const ROMol &mol)
Replaces explicit dative bonds with haptic.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void setMMFFAromaticity(RWMol &mol)
sets the aromaticity model for a molecule to MMFF94
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT void clearAllBondDirFlags(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SanitizeFlags::SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT void clearDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false, bool includeHydrogenBonds=false)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, const AddHsParameters &params, const UINT_VECT *onlyOnAtoms=nullptr)
adds Hs to a molecule as explicit Atoms
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false, bool includeHydrogenBonds=false)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void collapseAttachmentPoints(RWMol &mol, bool markedOnly=true)
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT double getAvgMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
AdjustQueryWhichFlags
Definition MolOps.h:406
@ ADJUST_IGNORERINGS
Definition MolOps.h:409
@ ADJUST_IGNORENONE
Definition MolOps.h:407
@ ADJUST_IGNOREMAPPED
Definition MolOps.h:412
@ ADJUST_IGNORENONDUMMIES
Definition MolOps.h:411
@ ADJUST_IGNOREDUMMIES
Definition MolOps.h:410
@ ADJUST_IGNORECHAINS
Definition MolOps.h:408
@ ADJUST_IGNOREALL
Definition MolOps.h:413
Std stuff.
std::vector< double > INVAR_VECT
Definition MolOps.h:33
bool rdvalue_is(const RDValue_cast_t)
INVAR_VECT::iterator INVAR_VECT_I
Definition MolOps.h:34
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition MolOps.h:35
std::vector< UINT > UINT_VECT
Definition types.h:319
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition MolOps.h:425
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition MolOps.h:476