RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
ChemTransforms.h
Go to the documentation of this file.
1//
2// Copyright (C) 2006-2012 Greg Landrum
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef _RD_CHEMTRANSFORMS_H__
12#define _RD_CHEMTRANSFORMS_H__
13
14#include <boost/smart_ptr.hpp>
15#include <vector>
16
18#include "MolFragmenter.h"
19
20namespace RDKit {
21class ROMol;
22typedef boost::shared_ptr<ROMol> ROMOL_SPTR;
23
24//! \brief Returns a copy of an ROMol with the atoms and bonds that
25//! match a pattern removed.
26/*!
27 \param mol the ROMol of interest
28 \param query the query ROMol
29 \param onlyFrags if this is set, atoms will only be removed if
30 the entire fragment in which they are found is
31 matched by the query.
32 \param useChirality - if set, match the coreQuery using chirality
33
34 \return a copy of \c mol with the matching atoms and bonds (if any)
35 removed.
36*/
38 const ROMol &query,
39 bool onlyFrags = false,
40 bool useChirality = false);
41
42//! \brief Returns a list of copies of an ROMol with the atoms and bonds that
43//! match a pattern replaced with the atoms contained in another molecule.
44/*!
45 Bonds are created between the joining atom in the existing molecule
46 and the atoms in the new molecule. So, using SMILES instead of molecules:
47 replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
48 ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
49 replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
50 ['[X]NCCN[X]']
51 Chains should be handled "correctly":
52 replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
53 ['C[X]C']
54 As should rings:
55 replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
56 ['C1[X]C1']
57 And higher order branches:
58 replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
59 ['C[X](C)C']
60 Note that the client is responsible for making sure that the
61 resulting molecule actually makes sense - this function does not
62 perform sanitization.
63
64 \param mol the ROMol of interest
65 \param query the query ROMol
66 \param replacement the ROMol to be inserted
67 \param replaceAll if this is true, only a single result, with all
68 \param useChirality - if set, match the coreQuery using chirality
69
70 occurrences
71 of the substructure replaced, will be returned.
72 \param replacementConnectionPoint index of the atom in the replacement
73 that
74 the bond should made to
75
76 \return a vector of pointers to copies of \c mol with the matching atoms
77 and bonds (if any) replaced
78
79*/
81 const ROMol &mol, const ROMol &query, const ROMol &replacement,
82 bool replaceAll = false, unsigned int replacementConnectionPoint = 0,
83 bool useChirality = false);
84
85//! \brief Returns a copy of an ROMol with the atoms and bonds that
86//! don't fall within a substructure match removed.
87//!
88//! dummy atoms are left to indicate attachment points.
89//!
90/*!
91 \param mol the ROMol of interest
92 \param coreQuery a query ROMol to be used to match the core
93 \param useChirality - if set, match the coreQuery using chirality
94
95 \return a copy of \c mol with the non-matching atoms and bonds (if any)
96 removed and dummies at the connection points.
97*/
98
100 const ROMol &coreQuery,
101 bool useChirality = false);
102
103//! \brief Returns a copy of an ROMol with the atoms and bonds that
104//! are referenced by the MatchVector removed.
105//! MatchVector must be defined between mol and the specified core.
106//!
107//! dummy atoms are left to indicate attachment points.
108//! These dummy atoms can be labeled either by the matching index
109//! in the query or by an arbitrary "first match" found.
110//! Additional matching options are given below.
111//!
112/*!
113 Note that this is essentially identical to the replaceSidechains function,
114 except we
115 invert the query and replace the atoms that *do* match the query.
116
117 \param mol - the ROMol of interest
118 \param core - the core being matched against
119 \param matchVect - a matchVect of the type returned by Substructure
120 Matching \param replaceDummies - if set, atoms matching dummies in the core
121 will also be replaced \param labelByIndex - if set, the dummy atoms at
122 attachment points are labelled with the index+1 of the corresponding atom in
123 the core \param requireDummyMatch - if set, only side chains that are
124 connected to atoms in the core that have attached dummies will be considered.
125 Molecules that have sidechains that are attached
126 at other points will be rejected (NULL returned).
127 \param useChirality - if set, match the coreQuery using chirality
128
129 \return a copy of \c mol with the non-matching atoms and bonds (if any)
130 removed and dummies at the connection points. The client is
131 responsible
132 for deleting this molecule. If the core query is not matched, NULL
133 is returned.
134*/
136 const ROMol &core,
137 const MatchVectType &matchVect,
138 bool replaceDummies = true,
139 bool labelByIndex = false,
140 bool requireDummyMatch = false);
141
142//! \brief Returns a copy of an ROMol with the atoms and bonds that
143//! do fall within a substructure match removed.
144//!
145//! dummy atoms are left to indicate attachment points.
146//!
147/*!
148 Note that this is essentially identical to the replaceSidechains function,
149 except we
150 invert the query and replace the atoms that *do* match the query.
151
152 \param mol - the ROMol of interest
153 \param coreQuery - a query ROMol to be used to match the core
154 \param replaceDummies - if set, atoms matching dummies in the core will also
155 be replaced
156 \param labelByIndex - if set, the dummy atoms at attachment points are
157 labelled with the
158 index+1 of the corresponding atom in the core
159 \param requireDummyMatch - if set, only side chains that are connected to
160 atoms in
161 the core that have attached dummies will be
162 considered.
163 Molecules that have sidechains that are attached
164 at other points will be rejected (NULL returned).
165 \param useChirality - if set, match the coreQuery using chirality
166
167 \return a copy of \c mol with the non-matching atoms and bonds (if any)
168 removed and dummies at the connection points. The client is
169 responsible
170 for deleting this molecule. If the core query is not matched, NULL
171 is returned.
172*/
174 const ROMol &coreQuery,
175 bool replaceDummies = true,
176 bool labelByIndex = false,
177 bool requireDummyMatch = false,
178 bool useChirality = false);
179
180//! \brief Carries out a Murcko decomposition on the molecule provided
181//!
182/*!
183
184 \param mol - the ROMol of interest
185
186 \return a new ROMol with the Murcko scaffold
187 The client is responsible for deleting this molecule.
188*/
190
191//! \brief Combined two molecules to create a new one
192//!
193/*!
194
195 \param mol1 - the first ROMol to be combined
196 \param mol2 - the second ROMol to be combined
197 \param offset - a constant offset to be added to every
198 atom position in mol2
199
200 \return a new ROMol with the two molecules combined.
201 The new molecule has not been sanitized.
202 The client is responsible for deleting this molecule.
203*/
205 const ROMol &mol1, const ROMol &mol2,
206 RDGeom::Point3D offset = RDGeom::Point3D(0, 0, 0));
207
208//! \brief Adds named recursive queries to a molecule's atoms based on atom
209/// labels
210//!
211/*!
212
213 \param mol - the molecule to be modified
214 \param queries - the dictionary of named queries to add
215 \param propName - the atom property to use to get query names
216 \param reactantLabels - to store pairs of (atom index, query string)
217
218
219 NOTES:
220 - existing query information, if present, will be supplemented (AND logic)
221 - non-query atoms will be replaced with query atoms using only the query
222 logic
223 - query names can be present as comma separated lists, they will then
224 be combined using OR logic.
225 - throws a KeyErrorException if a particular query name is not present
226 in \c queries
227
228*/
230 ROMol &mol, const std::map<std::string, ROMOL_SPTR> &queries,
231 const std::string &propName,
232 std::vector<std::pair<unsigned int, std::string>> *reactantLabels =
233 nullptr);
234
235//! \brief parses a query definition file and sets up a set of definitions
236//! suitable for use by addRecursiveQueries()
237/*!
238
239 \param filename - the name of the file to be read
240 \param queryDefs - the dictionary of named queries (return value)
241 \param standardize - if true, query names will be converted to lower
242 case
243 \param delimiter - the line delimiter in the file
244 \param comment - text used to recognize comment lines
245 \param nameColumn - column with the names of queries
246 \param smartsColumn - column with the SMARTS definitions of the queries
247
248*/
250 const std::string &filename, std::map<std::string, ROMOL_SPTR> &queryDefs,
251 bool standardize = true, const std::string &delimiter = "\t",
252 const std::string &comment = "//", unsigned int nameColumn = 0,
253 unsigned int smartsColumn = 1);
254//! \overload
256 std::istream *inStream, std::map<std::string, ROMOL_SPTR> &queryDefs,
257 bool standardize = true, const std::string &delimiter = "\t",
258 const std::string &comment = "//", unsigned int nameColumn = 0,
259 unsigned int smartsColumn = 1);
260//! \brief equivalent to parseQueryDefFile() but the query definitions are
261/// explicitly passed in
263 const std::string &queryDefText,
264 std::map<std::string, ROMOL_SPTR> &queryDefs, bool standardize = true,
265 const std::string &delimiter = "\t", const std::string &comment = "//",
266 unsigned int nameColumn = 0, unsigned int smartsColumn = 1);
267
268namespace details {
269//! not recommended for use in other code
271 const ROMol &mol, RWMol &res, boost::dynamic_bitset<> &removedAtoms);
273 const std::map<const Atom *, Atom *> &molAtomMap, const ROMol &mol,
274 RWMol &newMol);
275} // namespace details
276
277} // namespace RDKit
278#endif
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_CHEMTRANSFORMS_EXPORT
Definition export.h:65
RDKIT_CHEMTRANSFORMS_EXPORT void updateSubMolConfs(const ROMol &mol, RWMol &res, boost::dynamic_bitset<> &removedAtoms)
not recommended for use in other code
RDKIT_CHEMTRANSFORMS_EXPORT void copyStereoGroups(const std::map< const Atom *, Atom * > &molAtomMap, const ROMol &mol, RWMol &newMol)
Std stuff.
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * replaceCore(const ROMol &mol, const ROMol &core, const MatchVectType &matchVect, bool replaceDummies=true, bool labelByIndex=false, bool requireDummyMatch=false)
Returns a copy of an ROMol with the atoms and bonds that are referenced by the MatchVector removed....
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * combineMols(const ROMol &mol1, const ROMol &mol2, RDGeom::Point3D offset=RDGeom::Point3D(0, 0, 0))
Combined two molecules to create a new one.
RDKIT_CHEMTRANSFORMS_EXPORT std::vector< ROMOL_SPTR > replaceSubstructs(const ROMol &mol, const ROMol &query, const ROMol &replacement, bool replaceAll=false, unsigned int replacementConnectionPoint=0, bool useChirality=false)
Returns a list of copies of an ROMol with the atoms and bonds that match a pattern replaced with the ...
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefText(const std::string &queryDefText, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\t", const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
equivalent to parseQueryDefFile() but the query definitions are explicitly passed in
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * deleteSubstructs(const ROMol &mol, const ROMol &query, bool onlyFrags=false, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that match a pattern removed.
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * MurckoDecompose(const ROMol &mol)
Carries out a Murcko decomposition on the molecule provided.
RDKIT_CHEMTRANSFORMS_EXPORT void addRecursiveQueries(ROMol &mol, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::pair< unsigned int, std::string > > *reactantLabels=nullptr)
Adds named recursive queries to a molecule's atoms based on atom labels.
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(const std::string &filename, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\t", const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
parses a query definition file and sets up a set of definitions suitable for use by addRecursiveQueri...
boost::shared_ptr< ROMol > ROMOL_SPTR
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * replaceSidechains(const ROMol &mol, const ROMol &coreQuery, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that don't fall within a substructure match remov...