RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2024 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_MOL_OPS_H
12#define RD_MOL_OPS_H
13
14#include <vector>
15#include <map>
16#include <list>
18#include <boost/smart_ptr.hpp>
19#include <boost/dynamic_bitset.hpp>
21#include <RDGeneral/types.h>
23#include "SanitException.h"
25
26RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
27namespace RDKit {
28class ROMol;
29class RWMol;
30class Atom;
31class Bond;
32class Conformer;
33typedef std::vector<double> INVAR_VECT;
34typedef INVAR_VECT::iterator INVAR_VECT_I;
35typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
36
37//! \brief Groups a variety of molecular query and transformation operations.
38namespace MolOps {
39
40//! return the number of electrons available on an atom to donate for
41/// aromaticity
42/*!
43 The result is determined using the default valency, number of lone pairs,
44 number of bonds and the formal charge. Note that the atom may not donate
45 all of these electrons to a ring for aromaticity (also used in Conjugation
46 and hybridization code).
47
48 \param at the atom of interest
49
50 \return the number of electrons
51*/
53
54//! sums up all atomic formal charges and returns the result
56
57//! returns whether or not the given Atom is involved in a conjugated bond
59
60//! find fragments (disconnected components of the molecular graph)
61/*!
62
63 \param mol the molecule of interest
64 \param mapping used to return the mapping of Atoms->fragments.
65 On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
66 and will contain the fragment assignment for each Atom
67
68 \return the number of fragments found.
69
70*/
71RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
72 std::vector<int> &mapping);
73//! find fragments (disconnected components of the molecular graph)
74/*!
75
76 \param mol the molecule of interest
77 \param frags used to return the Atoms in each fragment
78 On return \c mapping will be \c numFrags long, and each entry
79 will contain the indices of the Atoms in that fragment.
80
81 \return the number of fragments found.
82
83*/
85 const ROMol &mol, std::vector<std::vector<int>> &frags);
86
87//! splits a molecule into its component fragments
88/// (disconnected components of the molecular graph)
89/*!
90
91 \param mol the molecule of interest
92 \param molFrags used to return the disconnected fragments as molecules.
93 Any contents on input will be cleared.
94 \param sanitizeFrags toggles sanitization of the fragments after
95 they are built
96 \param frags used to return the mapping of Atoms->fragments.
97 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
98 on return and will contain the fragment assignment for each Atom.
99 \param fragsMolAtomMapping used to return the Atoms in each fragment
100 On return \c mapping will be \c numFrags long, and each entry
101 will contain the indices of the Atoms in that fragment.
102 \param copyConformers toggles copying conformers of the fragments after
103 they are built
104 \return the number of fragments found.
105
106*/
108 const ROMol &mol, std::vector<std::unique_ptr<ROMol>> &molFrags,
109 bool sanitizeFrags = true, std::vector<int> *frags = nullptr,
110 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
111 bool copyConformers = true);
112
113//! splits a molecule into its component fragments
114/// (disconnected components of the molecular graph)
115/*!
116
117 \param mol the molecule of interest
118 \param sanitizeFrags toggles sanitization of the fragments after
119 they are built
120 \param frags used to return the mapping of Atoms->fragments.
121 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
122 on return and will contain the fragment assignment for each Atom
123 \param fragsMolAtomMapping used to return the Atoms in each fragment
124 On return \c mapping will be \c numFrags long, and each entry
125 will contain the indices of the Atoms in that fragment.
126 \param copyConformers toggles copying conformers of the fragments after
127 they are built
128 \return a vector of the fragments as smart pointers to ROMols
129
130*/
131RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
132 const ROMol &mol, bool sanitizeFrags = true,
133 std::vector<int> *frags = nullptr,
134 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
135 bool copyConformers = true);
136
137//! splits a molecule into pieces based on labels assigned using a query
138/*!
139
140 \param mol the molecule of interest
141 \param query the query used to "label" the molecule for fragmentation
142 \param sanitizeFrags toggles sanitization of the fragments after
143 they are built
144 \param whiteList if provided, only labels in the list will be kept
145 \param negateList if true, the white list logic will be inverted: only labels
146 not in the list will be kept
147
148 \return a map of the fragments and their labels
149
150*/
151
152template <typename T>
153RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
154getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
155 bool sanitizeFrags = true,
156 const std::vector<T> *whiteList = nullptr,
157 bool negateList = false);
158//! splits a molecule into pieces based on labels assigned using a query,
159//! putting them into a map of std::unique_ptr<ROMol>.
160/*!
161
162 \param mol the molecule of interest
163 \param query the query used to "label" the molecule for fragmentation
164 \param molFrags used to return the disconnected fragments as molecules.
165 Any contents on input will be cleared.
166 \param sanitizeFrags toggles sanitization of the fragments after
167 they are built
168 \param whiteList if provided, only labels in the list will be kept
169 \param negateList if true, the white list logic will be inverted: only labels
170 not in the list will be kept
171
172 \return the number of fragments
173
174*/
175template <typename T>
177 const ROMol &mol, T (*query)(const ROMol &, const Atom *),
178 std::map<T, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags = true,
179 const std::vector<T> *whiteList = nullptr, bool negateList = false);
180
181//! \name Dealing with hydrogens
182//{@
183
185 bool explicitOnly = false; /**< only add explicit Hs */
186 bool addCoords = false; /**< add coordinates for the Hs */
187 bool addResidueInfo = false; /**< add residue info to the Hs */
189 false; /**< do not add Hs to query atoms or atoms with query bonds */
190};
191//! adds Hs to a molecule as explicit Atoms
192/*!
193 \param mol the molecule to add Hs to
194 \param params parameters controlling which Hs are added.
195 \param onlyOnAtoms (optional) if provided, this should be a vector of
196 IDs of the atoms that will be considered for H addition.
197
198 <b>Notes:</b>
199 - it makes no sense to use the \c addCoords option if the molecule's
200 heavy atoms don't already have coordinates.
201 - the molecule is modified
202 */
204 const UINT_VECT *onlyOnAtoms = nullptr);
205
206//! returns a copy of a molecule with hydrogens added in as explicit Atoms
207/*!
208 \param mol the molecule to add Hs to
209 \param explicitOnly (optional) if this \c true, only explicit Hs will be
210 added
211 \param addCoords (optional) If this is true, estimates for the atomic
212 coordinates
213 of the added Hs will be used.
214 \param onlyOnAtoms (optional) if provided, this should be a vector of
215 IDs of the atoms that will be considered for H addition.
216 \param addResidueInfo (optional) if this is true, add residue info to
217 hydrogen atoms (useful for PDB files).
218
219 \return the new molecule
220
221 <b>Notes:</b>
222 - it makes no sense to use the \c addCoords option if the molecule's
223 heavy
224 atoms don't already have coordinates.
225 - the caller is responsible for <tt>delete</tt>ing the pointer this
226 returns.
227 */
228inline ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
229 bool addCoords = false,
230 const UINT_VECT *onlyOnAtoms = nullptr,
231 bool addResidueInfo = false) {
232 AddHsParameters ps{explicitOnly, addCoords, addResidueInfo};
233 std::unique_ptr<RWMol> res{new RWMol(mol)};
234 addHs(*res, ps, onlyOnAtoms);
235 return static_cast<ROMol *>(res.release());
236}
237//! \overload
238/// modifies the molecule in place
239inline void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
240 const UINT_VECT *onlyOnAtoms = nullptr,
241 bool addResidueInfo = false) {
242 AddHsParameters ps{explicitOnly, addCoords, addResidueInfo};
243 addHs(mol, ps, onlyOnAtoms);
244}
245
246//! Sets Cartesian coordinates for a terminal atom.
247//! Useful for growing an atom off a molecule with sensible
248//! coordinates based on the geometry of the neighbor.
249/*!
250 NOTE: this sets appropriate coordinates in all of the molecule's
251 conformers.
252
253 \param mol the molecule the atoms belong to
254 \param idx index of the terminal atom whose coordinates are set
255 \param otherIdx index of the bonded neighbor atom
256*/
257
259 unsigned int otherIdx);
260
261//! returns a copy of a molecule with hydrogens removed
262/*!
263 \param mol the molecule to remove Hs from
264 \param implicitOnly if this \c true, only implicit Hs will be
265 removed
266 \param updateExplicitCount (optional) If this is \c true, when explicit
267 Hs are removed from the graph, the heavy atom to which they are bound will
268 have its counter of explicit Hs increased.
269 \param sanitize: (optional) If this is \c true, the final molecule will be
270 sanitized
271
272 \return the new molecule
273
274 <b>Notes:</b>
275 - Hydrogens which aren't connected to a heavy atom will not be
276 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
277 all atoms removed.
278 - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
279 will not be removed.
280 - two coordinate Hs, like the central H in C[H-]C, will not be removed
281 - Hs connected to dummy atoms will not be removed
282 - Hs that are part of the definition of double bond Stereochemistry
283 will not be removed
284 - Hs that are not connected to anything else will not be removed
285 - Hs that have a query defined (i.e. hasQuery() returns true) will not
286 be removed
287
288 - the caller is responsible for <tt>delete</tt>ing the pointer this
289 returns.
290*/
291[[deprecated(
292 "Please use the version with RemoveHsParameters")]] RDKIT_GRAPHMOL_EXPORT
293 ROMol *
294 removeHs(const ROMol &mol, bool implicitOnly,
295 bool updateExplicitCount = false, bool sanitize = true);
296//! \overload
297/// modifies the molecule in place
298[[deprecated(
299 "Please use the version with RemoveHsParameters")]] RDKIT_GRAPHMOL_EXPORT void
300removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount = false,
301 bool sanitize = true);
303 bool removeDegreeZero = false; /**< hydrogens that have no bonds */
304 bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
306 false; /**< hydrogens with bonds only to other hydrogens */
307 bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
308 bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
309 isotopes and keeps track of the heavy atom the isotopes were attached to in
310 the private _isotopicHs atom property, so they are re-added by AddHs() as
311 the original isotopes if possible*/
313 false; /**< hydrogens with at least one dummy-atom neighbor */
315 false; /**< hydrogens defining bond stereochemistry */
316 bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
317 bool removeWithQuery = false; /**< hydrogens with queries defined */
318 bool removeMapped = true; /**< mapped hydrogens */
319 bool removeInSGroups = true; /**< part of a SubstanceGroup.
320 An H atom will only be removed if it doesn't cause any SGroup to become empty,
321 and if it doesn't play a special role in the SGroup (XBOND, attach point
322 or a CState) */
323 bool showWarnings = true; /**< display warnings for Hs that are not removed */
324 bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
326 false; /**< DEPRECATED equivalent of updateExplicitCount */
327 bool removeHydrides = false; /**< Removing Hydrides */
329 false; /**< remove Hs which are bonded to atoms with specified
330 non-tetrahedral stereochemistry */
331};
332
333//! \overload
334/// modifies the molecule in place
336 RWMol &mol, const RemoveHsParameters &ps = RemoveHsParameters(),
337 bool sanitize = true);
338//! \overload
339/// The caller owns the pointer this returns
341 const ROMol &mol, const RemoveHsParameters &ps = RemoveHsParameters(),
342 bool sanitize = true);
343
344//! removes all Hs from a molecule
345RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
346//! \overload
347/// The caller owns the pointer this returns
349 bool sanitize = true);
350
351//! returns a copy of a molecule with hydrogens removed and added as queries
352//! to the heavy atoms to which they are bound.
353/*!
354 This is really intended to be used with molecules that contain QueryAtoms
355
356 \param mol the molecule to remove Hs from
357
358 \return the new molecule
359
360 <b>Notes:</b>
361 - Atoms that do not already have hydrogen count queries will have one
362 added, other H-related queries will not be touched. Examples:
363 - C[H] -> [C;!H0]
364 - [C;H1][H] -> [C;H1]
365 - [C;H2][H] -> [C;H2]
366 - Hydrogens which aren't connected to a heavy atom will not be
367 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
368 all atoms removed.
369 - the caller is responsible for <tt>delete</tt>ing the pointer this
370 returns.
371 - By default all hydrogens are removed, however if
372 mergeUnmappedOnly is true, any hydrogen participating
373 in an atom map will be retained
374
375*/
377 bool mergeUnmappedOnly = false,
378 bool mergeIsotopes = false);
379//! \overload
380/// modifies the molecule in place
382 bool mergeUnmappedOnly = false,
383 bool mergeIsotopes = false);
384
385//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
386/*!
387 This is really intended to be used with molecules that contain QueryAtoms
388 such as when checking smarts patterns for explicit hydrogens
389
390
391 \param mol the molecule to check for query Hs from
392 \return std::pair if pair.first is true if the molecule has query
393 hydrogens, if pair.second is true, the queryHs cannot be removed my
394 mergeQueryHs
395*/
396RDKIT_GRAPHMOL_EXPORT std::pair<bool, bool> hasQueryHs(const ROMol &mol);
397
407
408//! Parameters controlling the behavior of MolOps::adjustQueryProperties
409/*!
410
411 Note that some of the options here are either directly contradictory or make
412 no sense when combined with each other. We generally assume that client code
413 is doing something sensible and don't attempt to detect possible conflicts
414 or problems.
415
416*/
418 bool adjustDegree = true; /**< add degree queries */
420
421 bool adjustRingCount = false; /**< add ring-count queries */
422 std::uint32_t adjustRingCountFlags =
424
425 bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
426 labels to any-atom queries */
427
428 bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
429
431 false; /**< convert bonds to generic queries (any bonds) */
433
435 false; /**< convert atoms to generic queries (any atoms) */
437
438 bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
439 overall degree */
442
443 bool adjustRingChain = false; /**< add ring-chain queries */
445
447 false; /**< remove stereochemistry info from double bonds that do not
448 have the stereoCare property set */
449
451 false; /**< sets bond queries in conjugated five-rings to
452 SINGLE|DOUBLE|AROMATIC */
453
455 false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
456 software as documented in the Chemical Representation Guide */
457
459 false; /**< sets single bonds between aromatic or conjugated atoms and
460 degree one neighbors to SINGLE|AROMATIC */
461
463 false; /**< sets non-ring single bonds between two aromatic or
464 conjugated atoms to SINGLE|AROMATIC */
465
466 //! \brief returns an AdjustQueryParameters object with all adjustments
467 //! disabled
470 res.adjustDegree = false;
471 res.makeDummiesQueries = false;
472 res.aromatizeIfPossible = false;
473 return res;
474 }
476};
477
478//! updates an AdjustQueryParameters object from a JSON string
480 MolOps::AdjustQueryParameters &p, const std::string &json);
481
482//! returns a copy of a molecule with query properties adjusted
483/*!
484 \param mol the molecule to adjust
485 \param params controls the adjustments made
486
487 \return the new molecule, the caller owns the memory
488*/
490 const ROMol &mol, const AdjustQueryParameters *params = nullptr);
491//! \overload
492/// modifies the molecule in place
494 RWMol &mol, const AdjustQueryParameters *params = nullptr);
495
496//! returns a copy of a molecule with the atoms renumbered
497/*!
498
499 \param mol the molecule to work with
500 \param newOrder the new ordering of the atoms (should be numAtoms long)
501 for example: if newOrder is [3,2,0,1], then atom 3 in the original
502 molecule will be atom 0 in the new one
503
504 \return the new molecule
505
506 <b>Notes:</b>
507 - the caller is responsible for <tt>delete</tt>ing the pointer this
508 returns.
509
510*/
512 const ROMol &mol, const std::vector<unsigned int> &newOrder);
513
514//! @}
515
516//! \name Sanitization
517/// {
518
519// clang-format off
520BETTER_ENUM(SanitizeFlags, unsigned int,
521 SANITIZE_NONE = 0x0,
522 SANITIZE_CLEANUP = 0x1,
523 SANITIZE_PROPERTIES = 0x2,
524 SANITIZE_SYMMRINGS = 0x4,
525 SANITIZE_KEKULIZE = 0x8,
526 SANITIZE_FINDRADICALS = 0x10,
527 SANITIZE_SETAROMATICITY = 0x20,
528 SANITIZE_SETCONJUGATION = 0x40,
529 SANITIZE_SETHYBRIDIZATION = 0x80,
530 SANITIZE_CLEANUPCHIRALITY = 0x100,
531 SANITIZE_ADJUSTHS = 0x200,
532 SANITIZE_CLEANUP_ORGANOMETALLICS = 0x400,
533 SANITIZE_CLEANUPATROPISOMERS = 0x800,
534 SANITIZE_ALL = 0xFFFFFFF
535);
536// clang-format on
537
538//! \brief carries out a collection of tasks for cleaning up a molecule and
539//! ensuring that it makes "chemical sense"
540/*!
541 This functions calls the following in sequence
542 -# MolOps::cleanUp()
543 -# mol.updatePropertyCache()
544 -# MolOps::symmetrizeSSSR()
545 -# MolOps::Kekulize()
546 -# MolOps::assignRadicals()
547 -# MolOps::setAromaticity()
548 -# MolOps::setConjugation()
549 -# MolOps::setHybridization()
550 -# MolOps::cleanupChirality()
551 -# MolOps::adjustHs()
552 -# mol.updatePropertyCache()
553
554 \param mol : the RWMol to be cleaned
555
556 \param operationThatFailed : the first (if any) sanitization operation that
557 fails is set here.
558 The values are taken from the \c SanitizeFlags
559 enum. On success, the value is \c
560 SanitizeFlags::SANITIZE_NONE
561
562 \param sanitizeOps : the bits here are used to set which sanitization
563 operations are carried out. The elements of the \c
564 SanitizeFlags enum define the operations.
565
566 <b>Notes:</b>
567 - If there is a failure in the sanitization, a \c MolSanitizeException
568 will be thrown.
569 - in general the user of this function should cast the molecule following
570 this function to a ROMol, so that new atoms and bonds cannot be added to
571 the molecule and screw up the sanitizing that has been done here
572*/
574 RWMol &mol, unsigned int &operationThatFailed,
575 unsigned int sanitizeOps = SanitizeFlags::SANITIZE_ALL);
576//! \overload
578
579//! \brief Identifies chemistry problems (things that don't make chemical
580//! sense) in a molecule
581/*!
582 This functions uses the operations in sanitizeMol but does not change
583 the input structure and returns a list of the problems encountered instead
584 of stopping at the first failure,
585
586 The problems this looks for come from the sanitization operations:
587 -# mol.updatePropertyCache() : Unreasonable valences
588 -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
589 in rings, aromatic bonds to non-aromatic atoms.
590
591 \param mol : the ROMol to be cleaned
592
593 \param sanitizeOps : the bits here are used to set which sanitization
594 operations are carried out. The elements of the \c
595 SanitizeFlags enum define the operations.
596
597 \return a vector of \c MolSanitizeException values that indicate what
598 problems were encountered
599
600*/
602std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
603 const ROMol &mol, unsigned int sanitizeOps = SanitizeFlags::SANITIZE_ALL);
604
605//! Possible aromaticity models
606/*!
607- \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
608- \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
609Book)
610- \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
611does not consider the outer envelope of fused rings)
612- \c AROMATICITY_MDL
613- \c AROMATICIT_MMFF94 the aromaticity model used by the MMFF94 force field
614- \c AROMATICITY_CUSTOM uses a caller-provided function
615*/
616typedef enum {
617 AROMATICITY_DEFAULT = 0x0, ///< future proofing
622 AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
624
625//! sets the aromaticity model for a molecule to MMFF94
627
628//! Sets up the aromaticity for a molecule
629/*!
630
631 This is what happens here:
632 -# find all the simple rings by calling the findSSSR function
633 -# loop over all the Atoms in each ring and mark them if they are
634 candidates
635 for aromaticity. A ring atom is a candidate if it can spare electrons
636 to the ring and if it's from the first two rows of the periodic table.
637 -# based on the candidate atoms, mark the rings to be either candidates
638 or non-candidates. A ring is a candidate only if all its atoms are
639 candidates
640 -# apply Hueckel rule to each of the candidate rings to check if the ring
641 can be
642 aromatic
643
644 \param mol the RWMol of interest
645 \param model the aromaticity model to use
646 \param func a custom function for assigning aromaticity (only used when
647 model=\c AROMATICITY_CUSTOM)
648
649 \return >0 on success, <= 0 otherwise
650
651 <b>Assumptions:</b>
652 - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
653 been called)
654
655*/
658 int (*func)(RWMol &) = nullptr);
659
660//! Designed to be called by the sanitizer to handle special cases before
661/// anything is done.
662/*!
663
664 Currently this:
665 - modifies nitro groups, so that the nitrogen does not have an
666 unreasonable valence of 5, as follows:
667 - the nitrogen gets a positive charge
668 - one of the oxygens gets a negative charge and the double bond to
669 this oxygen is changed to a single bond The net result is that nitro groups
670 can be counted on to be: \c "[N+](=O)[O-]"
671 - modifies halogen-oxygen containing species as follows:
672 \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
673 \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
674 \c [Cl,Br,I](=O)O -> [X+]([O-])O
675 - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
676
677 \param mol the molecule of interest
678
679*/
681
682//! Designed to be called by the sanitizer to handle special cases for
683//! organometallic species before valence is perceived
684/*!
685
686 \b Note that this function is experimental and may either change in
687 behavior or be replaced with something else in future releases.
688
689 Currently this:
690 - replaces single bonds between "hypervalent" organic atoms and metals
691 with dative bonds (this is following an IUPAC recommendation:
692 https://iupac.qmul.ac.uk/tetrapyrrole/TP8.html)
693
694 \param mol the molecule of interest
695
696*/
698
699//! Called by the sanitizer to assign radical counts to atoms
701
702//! adjust the number of implicit and explicit Hs for special cases
703/*!
704
705 Currently this:
706 - modifies aromatic nitrogens so that, when appropriate, they have an
707 explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
708
709 \param mol the molecule of interest
710
711 <b>Assumptions</b>
712 - this is called after the molecule has been sanitized,
713 aromaticity has been perceived, and the implicit valence of
714 everything has been calculated.
715
716*/
718
719//! Kekulizes the molecule
720/*!
721
722 \param mol the molecule of interest
723
724 \param markAtomsBonds if this is set to true, \c isAromatic boolean
725 settings on both the Bonds and Atoms are turned to false following the
726 Kekulization, otherwise they are left alone in their original state.
727
728 \param maxBackTracks the maximum number of attempts at back-tracking. The
729 algorithm uses a back-tracking procedure to revisit a previous setting of
730 double bond if we hit a wall in the kekulization process
731
732 <b>Notes:</b>
733 - this does not modify query bonds which have bond type queries (like
734 those which come from SMARTS) or rings containing them.
735 - even if \c markAtomsBonds is \c false the \c BondType for all modified
736 aromatic bonds will be changed from \c RDKit::Bond::AROMATIC to \c
737 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
738
739*/
740RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
741 unsigned int maxBackTracks = 100);
742//! Kekulizes the molecule if possible. If the kekulization fails the molecule
743//! will not be modified
744/*!
745
746 \param mol the molecule of interest
747
748 \param markAtomsBonds if this is set to true, \c isAromatic boolean
749 settings on both the Bonds and Atoms are turned to false following the
750 Kekulization, otherwise they are left alone in their original state.
751
752 \param maxBackTracks the maximum number of attempts at back-tracking. The
753 algorithm uses a back-tracking procedure to revisit a previous setting of
754 double bond if we hit a wall in the kekulization process
755
756 \returns whether or not the kekulization succeeded
757
758 <b>Notes:</b>
759 - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
760 bonds will be changed from \c RDKit::Bond::AROMATIC to \c
761 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
762
763*/
765 bool markAtomsBonds = true,
766 unsigned int maxBackTracks = 100);
767
768//! flags the molecule's conjugated bonds
770
771//! calculates and sets the hybridization of all a molecule's Stoms
773
774//! @}
775
776//! \name Ring finding and SSSR
777//! @{
778
779//! finds a molecule's Smallest Set of Smallest Rings
780/*!
781 Currently this implements a modified form of Figueras algorithm
782 (JCICS - Vol. 36, No. 5, 1996, 986-991)
783
784 \param mol the molecule of interest
785 \param res used to return the vector of rings. Each entry is a vector with
786 atom indices. This information is also stored in the molecule's
787 RingInfo structure, so this argument is optional (see overload)
788 \param includeDativeBonds - determines whether or not dative bonds are used
789 in the ring finding.
790 \param includeHydrogenBonds - determines whether or not hydrogen bonds are
791 used in the ring finding.
792
793 \return number of smallest rings found
794
795 Base algorithm:
796 - The original algorithm starts by finding representative degree 2
797 nodes.
798 - Representative because if a series of deg 2 nodes are found only
799 one of them is picked.
800 - The smallest ring around each of them is found.
801 - The bonds that connect to this degree 2 node are them chopped off,
802 yielding
803 new deg two nodes
804 - The process is repeated on the new deg 2 nodes.
805 - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
806 with it is found. A bond from this is "carefully" (look in the paper)
807 selected and chopped, yielding deg 2 nodes. The process is same as
808 above once this is done.
809
810 Our Modifications:
811 - If available, more than one smallest ring around a representative deg 2
812 node will be computed and stored
813 - Typically 3 rings are found around a degree 3 node (when no deg 2s are
814 available)
815 and all the bond to that node are chopped.
816 - The extra rings that were found in this process are removed after all
817 the nodes have been covered.
818
819 These changes were motivated by several factors:
820 - We believe the original algorithm fails to find the correct SSSR
821 (finds the correct number of them but the wrong ones) on some sample
822 mols
823 - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
824 done. The extra rings this process adds can be quite useful.
825*/
827 std::vector<std::vector<int>> &res,
828 bool includeDativeBonds = false,
829 bool includeHydrogenBonds = false);
830//! \overload
832 std::vector<std::vector<int>> *res = nullptr,
833 bool includeDativeBonds = false,
834 bool includeHydrogenBonds = false);
835
836//! use a DFS algorithm to identify ring bonds and atoms in a molecule
837/*!
838 \b NOTE: though the RingInfo structure is populated by this function,
839 the only really reliable calls that can be made are to check if
840 mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
841 return values >0
842*/
844
846 bool includeDativeBonds = false,
847 bool includeHydrogenBonds = false);
848
849//! symmetrize the molecule's Smallest Set of Smallest Rings
850/*!
851 SSSR rings obtained from "findSSSR" can be non-unique in some case.
852 For example, cubane has five SSSR rings, not six as one would hope.
853
854 This function adds additional rings to the SSSR list if necessary
855 to make the list symmetric, e.g. all atoms in cubane will be part of the
856 same number of SSSRs. This function choses these extra rings from the extra
857 rings computed and discarded during findSSSR. The new ring are chosen such
858 that:
859 - replacing a same sized ring in the SSSR list with an extra ring yields
860 the same union of bond IDs as the original SSSR list
861
862 \param mol - the molecule of interest
863 \param res used to return the vector of rings. Each entry is a vector with
864 atom indices. This information is also stored in the molecule's
865 RingInfo structure, so this argument is optional (see overload)
866 \param includeDativeBonds - determines whether or not dative bonds are used
867 in the ring finding.
868 \param includeHydrogenBonds - determines whether or not hydrogen bonds are
869 used in the ring finding.
870
871 \return the total number of rings = (new rings + old SSSRs)
872
873 <b>Notes:</b>
874 - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
875 first
876*/
878 std::vector<std::vector<int>> &res,
879 bool includeDativeBonds = false,
880 bool includeHydrogenBonds = false);
881//! \overload
883 bool includeDativeBonds = false,
884 bool includeHydrogenBonds = false);
885
886//! @}
887
888//! \name Shortest paths and other matrices
889//! @{
890
891//! returns a molecule's adjacency matrix
892/*!
893 \param mol the molecule of interest
894 \param useBO toggles use of bond orders in the matrix
895 \param emptyVal sets the empty value (for non-adjacent atoms)
896 \param force forces calculation of the matrix, even if already
897 computed
898 \param propNamePrefix used to set the cached property name
899 \param bondsToUse used to limit which bonds are considered
900
901 \return the adjacency matrix.
902
903 <b>Notes</b>
904 - The result of this is cached in the molecule's local property
905 dictionary, which will handle deallocation. The caller should <b>not</b> \c
906 delete this pointer.
907
908*/
910 const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
911 const char *propNamePrefix = nullptr,
912 const boost::dynamic_bitset<> *bondsToUse = nullptr);
913
914//! Computes the molecule's topological distance matrix
915/*!
916 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
917
918 \param mol the molecule of interest
919 \param useBO toggles use of bond orders in the matrix
920 \param useAtomWts sets the diagonal elements of the result to
921 6.0/(atomic number) so that the matrix can be used to calculate
922 Balaban J values. This does not affect the bond weights.
923 \param force forces calculation of the matrix, even if already
924 computed
925 \param propNamePrefix used to set the cached property name
926
927 \return the distance matrix.
928
929 <b>Notes</b>
930 - The result of this is cached in the molecule's local property
931 dictionary, which will handle deallocation. The caller should <b>not</b> \c
932 delete this pointer.
933
934
935*/
937 const ROMol &mol, bool useBO = false, bool useAtomWts = false,
938 bool force = false, const char *propNamePrefix = nullptr);
939
940//! Computes the molecule's topological distance matrix
941/*!
942 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
943
944 \param mol the molecule of interest
945 \param activeAtoms only elements corresponding to these atom indices
946 will be included in the calculation
947 \param bonds only bonds found in this list will be included in the
948 calculation
949 \param useBO toggles use of bond orders in the matrix
950 \param useAtomWts sets the diagonal elements of the result to
951 6.0/(atomic number) so that the matrix can be used to calculate
952 Balaban J values. This does not affect the bond weights.
953
954 \return the distance matrix.
955
956 <b>Notes</b>
957 - The results of this call are not cached, the caller <b>should</b> \c
958 delete
959 this pointer.
960
961
962*/
964 const ROMol &mol, const std::vector<int> &activeAtoms,
965 const std::vector<const Bond *> &bonds, bool useBO = false,
966 bool useAtomWts = false);
967
968//! Computes the molecule's 3D distance matrix
969/*!
970
971 \param mol the molecule of interest
972 \param confId the conformer to use
973 \param useAtomWts sets the diagonal elements of the result to
974 6.0/(atomic number)
975 \param force forces calculation of the matrix, even if already
976 computed
977 \param propNamePrefix used to set the cached property name
978 (if set to an empty string, the matrix will not be
979 cached)
980
981 \return the distance matrix.
982
983 <b>Notes</b>
984 - If propNamePrefix is not empty the result of this is cached in the
985 molecule's local property dictionary, which will handle deallocation.
986 In other cases the caller is responsible for freeing the memory.
987
988*/
990 const ROMol &mol, int confId = -1, bool useAtomWts = false,
991 bool force = false, const char *propNamePrefix = nullptr);
992
993//! Find the shortest path between two atoms
994/*!
995 Uses the Bellman-Ford algorithm
996
997 \param mol molecule of interest
998 \param aid1 index of the first atom
999 \param aid2 index of the second atom
1000
1001 \return an std::list with the indices of the atoms along the shortest
1002 path
1003
1004 <b>Notes:</b>
1005 - the starting and end atoms are included in the path
1006 - if no path is found, an empty path is returned
1007
1008*/
1009RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
1010 int aid2);
1011
1012//! @}
1013
1014//! \name Stereochemistry
1015//! @{
1016
1017// class to hold hybridizations
1018
1020 public:
1022 throw FileParseException("not to be called without a mol parameter");
1023 };
1026 throw FileParseException("not to be called without a mol parameter");
1027 };
1028
1029 ~Hybridizations() = default;
1030
1032 return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
1033 }
1034 // Atom::HybridizationType &operator[](unsigned int idx) {
1035 // return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
1036 // d_hybridizations[d_hybridizations[idx]];
1037 // }
1038
1039 // // void clear() { d_hybridizations.clear(); }
1040 // // void resize(unsigned int sz) { d_hybridizations.resize(sz); }
1041 unsigned int size() const { return d_hybridizations.size(); }
1042
1043 private:
1044 std::vector<int> d_hybridizations;
1045};
1046
1047//! removes bogus chirality markers (e.g. tetrahedral flags on non-sp3
1048//! centers):
1050
1051//! removes bogus atropisomeric markers (e.g. those without sp2 begin and end
1052//! atoms):
1054 Hybridizations &hybridizations);
1055//! \overload
1057
1058//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms
1059/*!
1060 \param mol the molecule of interest
1061 \param confId the conformer to use
1062 \param replaceExistingTags if this flag is true, any existing atomic chiral
1063 tags will be replaced
1064
1065 If the conformer provided is not a 3D conformer, nothing will be done.
1066
1067
1068 NOTE that this does not check to see if atoms are chiral centers (i.e. all
1069 substituents are different), it merely sets the chiral type flags based on
1070 the coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if
1071 you want chiral flags only on actual stereocenters.
1072*/
1074 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1075
1076//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
1077//! stereo flags to its bonds
1078/*!
1079
1080 \param mol the molecule of interest
1081 \param confId the conformer to use
1082 \param replaceExistingTags if this flag is true, any existing info about
1083 stereochemistry will be replaced
1084
1085 If the conformer provided is not a 3D conformer, nothing will be done.
1086*/
1088 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1089
1090//! \brief Use bond directions to assign ChiralTypes to a molecule's atoms
1091/*!
1092
1093 \param mol the molecule of interest
1094 \param confId the conformer to use
1095 \param replaceExistingTags if this flag is true, any existing info about
1096 stereochemistry will be replaced
1097*/
1099 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1100
1101//! \deprecated: this function will be removed in a future release. Use
1102//! setDoubleBondNeighborDirections() instead
1104 int confId = -1);
1105//! Sets bond directions based on double bond stereochemistry
1107 ROMol &mol, const Conformer *conf = nullptr);
1108//! removes directions from single bonds. The property _UnknownStereo will be
1109//! set on wiggly bonds
1111 bool onlyWedgeFlags = false);
1112
1113//! removes directions from all bonds. The property _UnknownStereo will be set
1114//! on wiggly bonds
1116//! removes directions from all bonds. The property _UnknownStereo will be set
1117//! on wiggly bonds
1119 bool onlyWedgeFlags = false);
1120
1121//! Assign CIS/TRANS bond stereochemistry tags based on neighboring
1122//! directions
1124
1125//! Assign stereochemistry tags to atoms and bonds.
1126/*!
1127 If useLegacyStereoPerception is true, it also does the CIP stereochemistry
1128 assignment for the molecule's atoms (R/S) and double bonds (Z/E).
1129 This assignment is based on legacy code which is fast, but is
1130 known to incorrectly assign CIP labels in some cases.
1131 instead, to assign CIP labels based on an accurate, though slower,
1132 implementation of the CIP rules, call CIPLabeler::assignCIPLabels().
1133 Chiral atoms will have a property '_CIPCode' indicating their chiral code.
1134
1135 \param mol the molecule to use
1136 \param cleanIt if true, any existing values of the property `_CIPCode`
1137 will be cleared, atoms with a chiral specifier that aren't
1138 actually chiral (e.g. atoms with duplicate
1139 substituents or only 2 substituents, etc.) will have
1140 their chiral code set to CHI_UNSPECIFIED. Bonds with
1141 STEREOCIS/STEREOTRANS specified that have duplicate
1142 substituents based upon the CIP atom ranks will be
1143 marked STEREONONE.
1144 \param force causes the calculation to be repeated even if it has
1145 already been done
1146 \param flagPossibleStereoCenters set the _ChiralityPossible property on
1147 atoms that are possible stereocenters
1148
1149 <b>Notes:M</b>
1150 - Throughout we assume that we're working with a hydrogen-suppressed
1151 graph.
1152
1153*/
1155 ROMol &mol, bool cleanIt = false, bool force = false,
1156 bool flagPossibleStereoCenters = false);
1157//! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
1158/// i.e. Z/E)
1159/*!
1160
1161 \param mol the molecule of interest
1162*/
1164
1165//! \brief finds bonds that could be cis/trans in a molecule and mark them as
1166//! Bond::STEREOANY.
1167/*!
1168 \param mol the molecule of interest
1169 \param cleanIt toggles removal of stereo flags from double bonds that can
1170 not have stereochemistry
1171
1172 This function finds any double bonds that can potentially be part of
1173 a cis/trans system. No attempt is made here to mark them cis or
1174 trans. No attempt is made to detect double bond stereo in ring systems.
1175
1176 This function is useful in the following situations:
1177 - when parsing a mol file; for the bonds marked here, coordinate
1178 information on the neighbors can be used to identify cis or trans
1179 states
1180 - when writing a mol file; bonds that can be cis/trans but not marked as
1181 either need to be specially marked in the mol file
1182 - finding double bonds with unspecified stereochemistry so they
1183 can be enumerated for downstream 3D tools
1184
1185 The CIPranks on the neighboring atoms are checked in this function. The
1186 _CIPCode property if set to any on the double bond.
1187*/
1189 bool cleanIt = false);
1190//! \brief Uses the molParity atom property to assign ChiralType to a
1191//! molecule's atoms
1192/*!
1193 \param mol the molecule of interest
1194 \param replaceExistingTags if this flag is true, any existing atomic chiral
1195 tags will be replaced
1196*/
1198 ROMol &mol, bool replaceExistingTags = true);
1199
1200//! @}
1201
1202//! returns the number of atoms which have a particular property set
1204 const ROMol &mol, std::string prop);
1205
1206//! returns whether or not a molecule needs to have Hs added to it.
1208
1209//! \brief Replaces haptic bond with explicit dative bonds.
1210/*!
1211 *
1212 * @param mol the molecule of interest
1213 *
1214 * One way of showing haptic bonds (such as cyclopentadiene to iron in
1215 * ferrocene) is to use a dummy atom with a dative bond to the iron atom with
1216 * the bond labelled with the atoms involved in the organic end of the bond.
1217 * Another way is to have explicit dative bonds from the atoms of the haptic
1218 * group to the metal atom. This function converts the former representation
1219 * to the latter.
1220 */
1222
1223//! \overload modifies molecule in place.
1225
1226//! \brief Replaces explicit dative bonds with haptic.
1227/*!
1228 *
1229 * @param mol the molecule of interest
1230 *
1231 * Does the reverse of hapticBondsToDative. If there are multiple contiguous
1232 * atoms attached by dative bonds to an atom (probably a metal atom), the
1233 * dative bonds will be replaced by a dummy atom in their centre attached to
1234 * the (metal) atom by a dative bond, which is labelled with ENDPTS of the
1235 * atoms that had the original dative bonds.
1236 */
1238
1239//! \overload modifies molecule in place.
1241
1242/*!
1243 Calculates a molecule's average molecular weight
1244
1245 \param mol the molecule of interest
1246 \param onlyHeavy (optional) if this is true (the default is false),
1247 only heavy atoms will be included in the MW calculation
1248
1249 \return the AMW
1250*/
1252 bool onlyHeavy = false);
1253/*!
1254 Calculates a molecule's exact molecular weight
1255
1256 \param mol the molecule of interest
1257 \param onlyHeavy (optional) if this is true (the default is false),
1258 only heavy atoms will be included in the MW calculation
1259
1260 \return the exact MW
1261*/
1263 bool onlyHeavy = false);
1264
1265/*!
1266 Calculates a molecule's formula
1267
1268 \param mol the molecule of interest
1269 \param separateIsotopes if true, isotopes will show up separately in the
1270 formula. So C[13CH2]O will give the formula: C[13C]H6O
1271 \param abbreviateHIsotopes if true, 2H and 3H will be represented as
1272 D and T instead of [2H] and [3H]. This only applies if \c
1273 separateIsotopes is true
1274
1275 \return the formula as a string
1276*/
1278 const ROMol &mol, bool separateIsotopes = false,
1279 bool abbreviateHIsotopes = true);
1280
1281namespace details {
1282//! not recommended for use in other code
1284 RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1285 boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds = true,
1286 unsigned int maxBackTracks = 100);
1287
1288// If the bond is dative, and it has a common_properties::MolFileBondEndPts
1289// prop, returns a vector of the indices of the atoms mentioned in the prop.
1290RDKIT_GRAPHMOL_EXPORT std::vector<int> hapticBondEndpoints(const Bond *bond);
1291
1292} // namespace details
1293
1294//! attachment points encoded as attachPt properties are added to the graph as
1295/// dummy atoms
1296/*!
1297 *
1298 * @param mol the molecule of interest
1299 * @param addAsQueries if true, the dummy atoms will be added as null queries
1300 * (i.e. they will match any atom in a substructure search)
1301 * @param addCoords if true and the molecule has one or more conformers,
1302 * positions for the attachment points will be added to the conformer(s).
1303 *
1304 */
1306 bool addAsQueries = true,
1307 bool addCoords = true);
1308//! dummy atoms in the graph are removed and replaced with attachment point
1309//! annotations on the attached atoms
1310/*!
1311 *
1312 * @param mol the molecule of interest
1313 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1314 * property will be collapsed
1315 *
1316 * In order for a dummy atom to be considered for collapsing it must have:
1317 * - degree 1 with a single or unspecified bond
1318 * - the bond to it can not be wedged
1319 * - either no query or be an AtomNullQuery
1320 *
1321 */
1323 bool markedOnly = true);
1324
1325namespace details {
1326//! attachment points encoded as attachPt properties are added to the graph as
1327/// dummy atoms
1328/*!
1329 *
1330 * @param mol the molecule of interest
1331 * @param atomIdx the index of the atom to which the attachment point should
1332 * be added
1333 * @param val the attachment point value. Should be 1 or 2
1334 * @param addAsQueries if true, the dummy atoms will be added as null queries
1335 * (i.e. they will match any atom in a substructure search)
1336 * @param addCoords if true and the molecule has one or more conformers,
1337 * positions for the attachment points will be added to the conformer(s).
1338 *
1339 */
1341 RWMol &mol, unsigned int atomIdx, unsigned int val, bool addAsQuery = true,
1342 bool addCoords = true);
1343
1344//! returns whether or not an atom is an attachment point
1345/*!
1346 *
1347 * @param mol the molecule of interest
1348 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1349 * property will be collapsed
1350 *
1351 * In order for a dummy atom to be considered for collapsing it must have:
1352 * - degree 1 with a single or unspecified bond
1353 * - the bond to it can not be wedged
1354 * - either no query or be an AtomNullQuery
1355 *
1356 */
1358 bool markedOnly = true);
1359
1360} // namespace details
1361
1362} // namespace MolOps
1363} // namespace RDKit
1364
1365#endif
#define BETTER_ENUM(Enum, Underlying,...)
Definition BetterEnums.h:17
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition Atom.h:74
HybridizationType
store hybridization
Definition Atom.h:92
class for representing a bond
Definition Bond.h:46
The class for representing 2D or 3D conformation of a molecule.
Definition Conformer.h:46
used by various file parsing classes to indicate a parse error
unsigned int size() const
Definition MolOps.h:1041
Atom::HybridizationType operator[](int idx)
Definition MolOps.h:1031
Hybridizations(const Hybridizations &)
Definition MolOps.h:1025
Hybridizations(const ROMol &mol)
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:249
RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(RWMol &mol, const boost::dynamic_bitset<> &atomsToUse, boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
not recommended for use in other code
RDKIT_GRAPHMOL_EXPORT std::vector< int > hapticBondEndpoints(const Bond *bond)
RDKIT_GRAPHMOL_EXPORT bool isAttachmentPoint(const Atom *atom, bool markedOnly=true)
returns whether or not an atom is an attachment point
RDKIT_GRAPHMOL_EXPORT unsigned int addExplicitAttachmentPoint(RWMol &mol, unsigned int atomIdx, unsigned int val, bool addAsQuery=true, bool addCoords=true)
Groups a variety of molecular query and transformation operations.
Definition MolOps.h:38
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms and bonds.
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol, bool includeDativeBonds=false, bool includeHydrogenBonds=false)
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT std::string getMolFormula(const ROMol &mol, bool separateIsotopes=false, bool abbreviateHIsotopes=true)
RDKIT_GRAPHMOL_EXPORT void cleanupAtropisomers(RWMol &mol, Hybridizations &hybridizations)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SanitizeFlags::SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT double getExactMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT std::pair< bool, bool > hasQueryHs(const ROMol &mol)
returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition MolOps.h:616
@ AROMATICITY_RDKIT
Definition MolOps.h:618
@ AROMATICITY_MDL
Definition MolOps.h:620
@ AROMATICITY_CUSTOM
use a function
Definition MolOps.h:622
@ AROMATICITY_DEFAULT
future proofing
Definition MolOps.h:617
@ AROMATICITY_MMFF94
Definition MolOps.h:621
@ AROMATICITY_SIMPLE
Definition MolOps.h:619
RDKIT_GRAPHMOL_EXPORT void cleanUpOrganometallics(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT ROMol * hapticBondsToDative(const ROMol &mol)
Replaces haptic bond with explicit dative bonds.
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void clearSingleBondDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false, bool mergeIsotopes=false)
RDKIT_GRAPHMOL_EXPORT void expandAttachmentPoints(RWMol &mol, bool addAsQueries=true, bool addCoords=true)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT ROMol * dativeBondsToHaptic(const ROMol &mol)
Replaces explicit dative bonds with haptic.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void setMMFFAromaticity(RWMol &mol)
sets the aromaticity model for a molecule to MMFF94
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT void clearAllBondDirFlags(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SanitizeFlags::SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT void clearDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false, bool includeHydrogenBonds=false)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, const AddHsParameters &params, const UINT_VECT *onlyOnAtoms=nullptr)
adds Hs to a molecule as explicit Atoms
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false, bool includeHydrogenBonds=false)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void collapseAttachmentPoints(RWMol &mol, bool markedOnly=true)
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT double getAvgMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
AdjustQueryWhichFlags
Definition MolOps.h:398
@ ADJUST_IGNORERINGS
Definition MolOps.h:401
@ ADJUST_IGNORENONE
Definition MolOps.h:399
@ ADJUST_IGNOREMAPPED
Definition MolOps.h:404
@ ADJUST_IGNORENONDUMMIES
Definition MolOps.h:403
@ ADJUST_IGNOREDUMMIES
Definition MolOps.h:402
@ ADJUST_IGNORECHAINS
Definition MolOps.h:400
@ ADJUST_IGNOREALL
Definition MolOps.h:405
Std stuff.
std::vector< double > INVAR_VECT
Definition MolOps.h:33
INVAR_VECT::iterator INVAR_VECT_I
Definition MolOps.h:34
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition MolOps.h:35
std::vector< UINT > UINT_VECT
Definition types.h:322
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition MolOps.h:417
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition MolOps.h:468