RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SubstanceGroup.h
Go to the documentation of this file.
1//
2//
3// Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11/*! \file SubstanceGroup.h
12
13 \brief Defines the SubstanceGroup class
14
15*/
16#include <RDGeneral/export.h>
17#ifndef _RD_SGROUP_H
18#define _RD_SGROUP_H
19
20#include <utility>
21#include <unordered_map>
22
23#include <Geometry/point.h>
24#include <RDGeneral/types.h>
25#include <RDGeneral/RDProps.h>
26#include <boost/smart_ptr.hpp>
27
28namespace RDKit {
29class ROMol;
30class RWMol;
31class Bond;
32class Atom;
33
34//! used to indicate errors from incorrect sgroup access
36 : public std::runtime_error {
37 public:
38 //! construct with an error message
39 SubstanceGroupException(const char *msg) : std::runtime_error(msg) {}
40 //! construct with an error message
41 SubstanceGroupException(const std::string &msg) : std::runtime_error(msg) {}
42};
43
44//! The class for representing SubstanceGroups
45/*!
46 <b>Notes:</b>
47 - These are inspired by the SGroups in the MDL formats
48 - Implementation is based on 2010 MDL SD specification:
49 http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
50 - See SGroups.md for further, more comprehensive notes.
51
52*/
53
55 public:
56 //! Bond type (see V3000 spec)
57 enum class BondType {
58 XBOND, // External/Crossing bond
59 CBOND, // Internal/Contained bond
60 };
61
62 typedef std::array<RDGeom::Point3D, 3> Bracket;
63
64 //! Data structure for SAP lines (see V3000 spec)
65 //! lvIdx may not be set; this signaled with value -1
66 struct AttachPoint {
67 unsigned int aIdx;
68 int lvIdx;
69 std::string id;
70 bool operator==(const AttachPoint &other) const {
71 return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
72 }
73 };
74
75 //! See specification for V3000 CSTATE
76 //! vector may or not be considered, depending on TYPE
77 struct CState {
78 unsigned int bondIdx;
80 bool operator==(const CState &other) const {
81 // note that we ignore coordinates for this
82 return bondIdx == other.bondIdx;
83 }
84 };
85
86//! No default constructor
87#ifndef SWIG
88 // Unfortunately, SWIG generated wrapper code uses temporary variables that
89 // require a default ctor not be deleted.
90 SubstanceGroup() = delete;
91#endif // !SWIG
92
93 //! Main Constructor. Ownership is only set on this side of the relationship:
94 //! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
95 //! on the other side.
96 SubstanceGroup(ROMol *owning_mol, const std::string &type);
97
98 SubstanceGroup(const SubstanceGroup &other) = default;
99 SubstanceGroup &operator=(const SubstanceGroup &other) = default;
100
101 SubstanceGroup(SubstanceGroup &&other) noexcept : RDProps(std::move(other)) {
102 dp_mol = std::exchange(other.dp_mol, nullptr);
103 d_atoms = std::move(other.d_atoms);
104 d_patoms = std::move(other.d_patoms);
105 d_bonds = std::move(other.d_bonds);
106 d_brackets = std::move(other.d_brackets);
107 d_cstates = std::move(other.d_cstates);
108 d_saps = std::move(other.d_saps);
109 }
110
112 if (this == &other) {
113 return *this;
114 }
115 RDProps::operator=(std::move(other));
116 dp_mol = std::exchange(other.dp_mol, nullptr);
117 d_atoms = std::move(other.d_atoms);
118 d_patoms = std::move(other.d_patoms);
119 d_bonds = std::move(other.d_bonds);
120 d_brackets = std::move(other.d_brackets);
121 d_cstates = std::move(other.d_cstates);
122 d_saps = std::move(other.d_saps);
123 return *this;
124 }
125
126 //! Destructor
127 ~SubstanceGroup() = default;
128
129 //! returns whether or not this belongs to a molecule
130 bool hasOwningMol() const { return dp_mol != nullptr; }
131
132 //! Get the molecule that owns this instance
134 PRECONDITION(dp_mol, "no owner");
135 return *dp_mol;
136 }
137
138 //! returns whether or not this group is valid; invalid groups must be
139 //! ignored.
140 bool getIsValid() const { return d_isValid; }
141
142 //! set whether or not this group is valid; invalid groups must be ignored.
143 void setIsValid(bool isValid) { d_isValid = isValid; }
144
145 //! get the index of this sgroup in dp_mol's sgroups vector
146 //! (do not mistake this by the ID!)
147 unsigned int getIndexInMol() const;
148
149 /* Atom and Bond methods */
150 void addAtomWithIdx(unsigned int idx);
151 void addParentAtomWithIdx(unsigned int idx);
152 void addBondWithIdx(unsigned int idx);
153 void addAtomWithBookmark(int mark);
155 void addBondWithBookmark(int mark);
156
157 // These methods should be handled with care, since they can leave
158 // Attachment points and CStates in an invalid state!
159 void removeAtomWithIdx(unsigned int idx);
160 void removeParentAtomWithIdx(unsigned int idx);
161 void removeBondWithIdx(unsigned int idx);
162
163 void addBracket(const Bracket &bracket);
164 void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
165 void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);
166
167 BondType getBondType(unsigned int bondIdx) const;
168
169 const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
170 const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
171 const std::vector<unsigned int> &getBonds() const { return d_bonds; }
172
173 void setAtoms(std::vector<unsigned int> atoms);
174 void setParentAtoms(std::vector<unsigned int> patoms);
175 void setBonds(std::vector<unsigned int> bonds);
176
177 const std::vector<Bracket> &getBrackets() const { return d_brackets; }
178 const std::vector<CState> &getCStates() const { return d_cstates; }
179 const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }
180
181 std::vector<Bracket> &getBrackets() { return d_brackets; }
182 std::vector<CState> &getCStates() { return d_cstates; }
183 std::vector<AttachPoint> &getAttachPoints() { return d_saps; }
184
185 void clearBrackets() { d_brackets.clear(); }
186 void clearCStates() { d_cstates.clear(); }
187 void clearAttachPoints() { d_saps.clear(); }
188
189 //! adjusts our atom IDs to reflect that an atom has been removed from the
190 //! parent molecule
191 //! decrements all atom IDs that are higher than \c atomIdx
192 //! raises a \c SubstanceGroupException if \c atomIdx is actually part of
193 //! this substance group
194 //! \returns whether or not anything was changed
195 bool adjustToRemovedAtom(unsigned int atomIdx);
196
197 //! \returns whether or not anything the specified atom is part of the
198 //! definition of this substance group
199 bool includesAtom(unsigned int atomIdx) const;
200
201 //! adjusts our bond IDs to reflect that a bond has been removed from the
202 //! parent molecule
203 //! decrements all bond IDs that are higher than \c bondIdx
204 //! raises a \c SubstanceGroupException if \c bondIdx is actually part of
205 //! this substance group
206 //! \returns whether or not anything was changed
207 bool adjustToRemovedBond(unsigned int bondIdx);
208
209 //! \returns whether or not anything the specified bond is part of the
210 //! definition of this substance group
211 bool includesBond(unsigned int bondIdx) const;
212
213 //! Set owning molecule
214 //! This only updates atoms and bonds; parent sgroup has to be updated
215 //! independently, since parent might not exist at the time this is
216 //! called.
217 void setOwningMol(ROMol *mol);
218
219 bool operator==(const SubstanceGroup &other) const {
220 // we ignore brackets and cstates, which involve coordinates
221 return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
222 d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
223 d_saps == other.d_saps;
224 }
225
226 private:
227 ROMol *dp_mol = nullptr; // owning molecule
228
229 bool d_isValid = true;
230
231 std::vector<unsigned int> d_atoms;
232 std::vector<unsigned int> d_patoms;
233 std::vector<unsigned int> d_bonds;
234
235 std::vector<Bracket> d_brackets;
236 std::vector<CState> d_cstates;
237 std::vector<AttachPoint> d_saps;
238}; // namespace RDKit
239
241
242const std::vector<std::string> sGroupTypes = {
243 // polymer sgroups:
244 "SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
245 // formulations/mixtures:
246 "COM", "MIX", "FOR",
247 // other
248 "SUP", "MUL", "DAT", "GEN"};
249
250const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
251const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};
252const std::vector<std::string> sGroupClasses = {
253 "AA", "dAA", "DNA", "RNA", "SUGAR", "BASE",
254 "PHOSPHATE", "LINKER", "CHEM", "LGRP", "MODAA", "MODdAA",
255 "MODDNA", "MODRNA", "XLINKAA", "XLINKdAA", "XLINKDNA", "XLINKRNA",
256};
257
258RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);
259
260RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);
261
262RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);
263
264RDKIT_GRAPHMOL_EXPORT bool isValidClass(const std::string &sgroupClass);
265
267 unsigned int id);
268
269} // namespace SubstanceGroupChecks
270
271//! \name SubstanceGroups and molecules
272//! @{
273
274RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
275 ROMol &mol);
276RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
277 const ROMol &mol);
278
279//! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
280//! references to the SubstanceGroup exist.
281/*!
282 \param sgroup - SubstanceGroup to be added to the molecule.
283*/
285 SubstanceGroup sgroup);
286
287//! Removes SubstanceGroups which reference a particular atom index
288/*!
289 \param mol - molecule to be edited.
290 \param idx - atom index
291*/
293 RWMol &mol, unsigned int idx);
294//! Removes SubstanceGroups which reference a particular bond index
295/*!
296 \param mol - molecule to be edited.
297 \param idx - bond index
298*/
300 RWMol &mol, unsigned int idx);
301//! @}
302
303} // namespace RDKit
304
305//! allows SubstanceGroup objects to be dumped to streams
306RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
307 const RDKit::SubstanceGroup &sg);
308#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:108
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::SubstanceGroup &sg)
allows SubstanceGroup objects to be dumped to streams
The class for representing atoms.
Definition Atom.h:74
class for representing a bond
Definition Bond.h:46
RDProps & operator=(const RDProps &rhs)
Definition RDProps.h:24
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
SubstanceGroupException(const std::string &msg)
construct with an error message
SubstanceGroupException(const char *msg)
construct with an error message
The class for representing SubstanceGroups.
const std::vector< unsigned int > & getBonds() const
void addBondWithIdx(unsigned int idx)
void setOwningMol(ROMol *mol)
SubstanceGroup & operator=(const SubstanceGroup &other)=default
void setParentAtoms(std::vector< unsigned int > patoms)
void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr)
void setBonds(std::vector< unsigned int > bonds)
~SubstanceGroup()=default
Destructor.
void setIsValid(bool isValid)
set whether or not this group is valid; invalid groups must be ignored.
const std::vector< unsigned int > & getAtoms() const
void addParentAtomWithBookmark(int mark)
void setAtoms(std::vector< unsigned int > atoms)
bool adjustToRemovedBond(unsigned int bondIdx)
void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector)
ROMol & getOwningMol() const
Get the molecule that owns this instance.
const std::vector< Bracket > & getBrackets() const
SubstanceGroup()=delete
No default constructor.
bool adjustToRemovedAtom(unsigned int atomIdx)
bool operator==(const SubstanceGroup &other) const
BondType
Bond type (see V3000 spec)
SubstanceGroup(const SubstanceGroup &other)=default
SubstanceGroup(SubstanceGroup &&other) noexcept
void addBondWithBookmark(int mark)
const std::vector< unsigned int > & getParentAtoms() const
void addAtomWithBookmark(int mark)
bool includesAtom(unsigned int atomIdx) const
std::vector< Bracket > & getBrackets()
void removeParentAtomWithIdx(unsigned int idx)
SubstanceGroup(ROMol *owning_mol, const std::string &type)
void addParentAtomWithIdx(unsigned int idx)
void addAtomWithIdx(unsigned int idx)
const std::vector< CState > & getCStates() const
std::array< RDGeom::Point3D, 3 > Bracket
void addBracket(const Bracket &bracket)
const std::vector< AttachPoint > & getAttachPoints() const
std::vector< CState > & getCStates()
bool hasOwningMol() const
returns whether or not this belongs to a molecule
bool includesBond(unsigned int bondIdx) const
void removeAtomWithIdx(unsigned int idx)
std::vector< AttachPoint > & getAttachPoints()
void removeBondWithIdx(unsigned int idx)
BondType getBondType(unsigned int bondIdx) const
SubstanceGroup & operator=(SubstanceGroup &&other) noexcept
unsigned int getIndexInMol() const
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:249
const std::vector< std::string > sGroupClasses
RDKIT_GRAPHMOL_EXPORT bool isValidClass(const std::string &sgroupClass)
RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type)
RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type)
const std::vector< std::string > sGroupConnectTypes
RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol, unsigned int id)
RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type)
const std::vector< std::string > sGroupSubtypes
const std::vector< std::string > sGroupTypes
Std stuff.
RDKIT_GRAPHMOL_EXPORT std::vector< SubstanceGroup > & getSubstanceGroups(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol, SubstanceGroup sgroup)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingBond(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular bond index.
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingAtom(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular atom index.
bool operator==(const AttachPoint &other) const
bool operator==(const CState &other) const