RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SubstructMatch.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2025 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SUBSTRUCTMATCH_H
12#define RD_SUBSTRUCTMATCH_H
13
14// std bits
15#include <vector>
16
17#include <unordered_set>
18#include <functional>
19#include <unordered_map>
20#include <cstdint>
21#include <string>
22#include <span>
23
24#include <boost/dynamic_bitset.hpp>
25#if BOOST_VERSION >= 107100
26#define RDK_INTERNAL_BITSET_HAS_HASH
27#endif
28
30
31namespace RDKit {
32class ROMol;
33class Atom;
34class Bond;
35class ResonanceMolSupplier;
36class MolBundle;
37
38//! \brief used to return matches from substructure searching,
39//! The format is (queryAtomIdx, molAtomIdx)
40typedef std::vector<std::pair<int, int>> MatchVectType;
41
43 bool useChirality = false; //!< Use chirality in determining whether or not
44 //!< atoms/bonds match
45 bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
46 //!< determining whether atoms/bonds match
47 bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
48 //!< match each other
49 bool useQueryQueryMatches = false; //!< Consider query-query matches, not
50 //!< just simple matches
51 bool useGenericMatchers = false; //!< Looks for generic atoms in the query
52 //!< and uses them as part of the matching
53 bool recursionPossible = true; //!< Allow recursive queries
54 bool uniquify = true; //!< uniquify (by atom index) match results
55 unsigned int maxMatches = 1000; //!< maximum number of matches to return
56 int numThreads = 1; //!< number of threads to use when multi-threading
57 //!< is possible. 0 selects the number of
58 //!< concurrent threads supported by the hardware
59 //!< negative values are added to the number of
60 //!< concurrent threads supported by the hardware
61 std::vector<std::string> atomProperties; //!< atom properties that must be
62 //!< equivalent in order to match
63 std::vector<std::string> bondProperties; //!< bond properties that must be
64 //!< equivalent in order to match
65 std::function<bool(const ROMol &mol,
66 std::span<const unsigned int> match)>
67 extraFinalCheck; //!< a function to be called at the end to validate a
68 //!< match
69 unsigned int maxRecursiveMatches =
70 1000; //!< maximum number of matches that the recursive substructure
71 //!< matching should return
72 bool specifiedStereoQueryMatchesUnspecified =
73 false; //!< If set, query atoms and bonds with specified stereochemistry
74 //!< will match atoms and bonds with unspecified stereochemistry
75 bool aromaticMatchesSingleOrDouble = false; //!< Aromatic bonds match single
76 //!< or double bonds
78};
79
81 SubstructMatchParameters &params, const std::string &json);
83 const SubstructMatchParameters &params);
84
85//! Find a substructure match for a query in a molecule
86/*!
87 \param mol The ROMol to be searched
88 \param query The query ROMol
89 \param matchParams Parameters controlling the matching
90
91 \return The matches, if any
92
93*/
95 const ROMol &mol, const ROMol &query,
97
98//! Find all substructure matches for a query in a ResonanceMolSupplier object
99/*!
100 \param resMolSuppl The ResonanceMolSupplier object to be searched
101 \param query The query ROMol
102 \param matchParams Parameters controlling the matching
103
104 \return The matches, if any
105
106*/
110
112 const MolBundle &bundle, const ROMol &query,
115 const ROMol &mol, const MolBundle &query,
118 const MolBundle &bundle, const MolBundle &query,
120
121//! Find a substructure match for a query
122/*!
123 \param mol The object to be searched
124 \param query The query
125 \param matchVect Used to return the match
126 (pre-existing contents will be deleted)
127 \param recursionPossible flags whether or not recursive matches are allowed
128 \param useChirality use atomic CIP codes as part of the comparison
129 \param useQueryQueryMatches if set, the contents of atom and bond queries
130 will be used as part of the matching
131
132 \return whether or not a match was found
133
134*/
135template <typename T1, typename T2>
136bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
137 bool recursionPossible = true, bool useChirality = false,
138 bool useQueryQueryMatches = false) {
140 params.recursionPossible = recursionPossible;
141 params.useChirality = useChirality;
142 params.useQueryQueryMatches = useQueryQueryMatches;
143 params.maxMatches = 1;
144 std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
145 if (matchVects.size()) {
146 matchVect = matchVects.front();
147 } else {
148 matchVect.clear();
149 }
150 return matchVect.size() != 0;
151};
152
153//! Find all substructure matches for a query
154/*!
155 \param mol The object to be searched
156 \param query The query
157 \param matchVect Used to return the matches
158 (pre-existing contents will be deleted)
159 \param uniquify Toggles uniquification (by atom index) of the results
160 \param recursionPossible flags whether or not recursive matches are allowed
161 \param useChirality use atomic CIP codes as part of the comparison
162 \param useQueryQueryMatches if set, the contents of atom and bond queries
163 will be used as part of the matching
164 \param maxMatches The maximum number of matches that will be returned.
165 In high-symmetry cases with medium-sized molecules, it is
166 very
167 easy to end up with a combinatorial explosion in the
168 number of
169 possible matches. This argument prevents that from having
170 unintended consequences
171
172 \return the number of matches found
173
174*/
175template <typename T1, typename T2>
176unsigned int SubstructMatch(T1 &mol, const T2 &query,
177 std::vector<MatchVectType> &matchVect,
178 bool uniquify = true, bool recursionPossible = true,
179 bool useChirality = false,
180 bool useQueryQueryMatches = false,
181 unsigned int maxMatches = 1000,
182 int numThreads = 1) {
184 params.uniquify = uniquify;
185 params.recursionPossible = recursionPossible;
186 params.useChirality = useChirality;
187 params.useQueryQueryMatches = useQueryQueryMatches;
188 params.maxMatches = maxMatches;
189 params.numThreads = numThreads;
190 matchVect = SubstructMatch(mol, query, params);
191 return static_cast<unsigned int>(matchVect.size());
192};
193
194// ----------------------------------------------
195//
196// find one match in ResonanceMolSupplier object
197//
198template <>
200 const ROMol &query, MatchVectType &matchVect,
201 bool recursionPossible, bool useChirality,
202 bool useQueryQueryMatches) {
204 params.recursionPossible = recursionPossible;
205 params.useChirality = useChirality;
206 params.useQueryQueryMatches = useQueryQueryMatches;
207 params.maxMatches = 1;
208 std::vector<MatchVectType> matchVects =
209 SubstructMatch(resMolSupplier, query, params);
210 if (matchVects.size()) {
211 matchVect = matchVects.front();
212 } else {
213 matchVect.clear();
214 }
215 return matchVect.size() != 0;
216}
217
218template <>
220 const ROMol &query,
221 std::vector<MatchVectType> &matchVect,
222 bool uniquify, bool recursionPossible,
223 bool useChirality, bool useQueryQueryMatches,
224 unsigned int maxMatches, int numThreads) {
226 params.uniquify = uniquify;
227 params.recursionPossible = recursionPossible;
228 params.useChirality = useChirality;
229 params.useQueryQueryMatches = useQueryQueryMatches;
230 params.maxMatches = maxMatches;
231 params.numThreads = numThreads;
232 matchVect = SubstructMatch(resMolSupplier, query, params);
233 return static_cast<unsigned int>(matchVect.size());
234};
235
236//! Class used as a final step to confirm whether or not a given atom->atom
237//! mapping is a valid substructure match.
239 public:
240 MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
241 const SubstructMatchParameters &ps);
242
243 bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]);
244
245 private:
246 const ROMol &d_query;
247 const ROMol &d_mol;
248 const SubstructMatchParameters &d_params;
249 std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
250#ifdef RDK_INTERNAL_BITSET_HAS_HASH
251 // Boost 1.71 added support for std::hash with dynamic_bitset.
252 using HashedStorageType = boost::dynamic_bitset<>;
253#else
254 // otherwise we use a less elegant solution
255 using HashedStorageType = std::string;
256#endif
257 std::unordered_set<HashedStorageType> matchesSeen;
258};
259
260} // namespace RDKit
261
262#endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
MolBundle contains a collection of related ROMols.
Definition MolBundle.h:59
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[])
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition export.h:537
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
RDKIT_SUBSTRUCTMATCH_EXPORT std::string substructMatchParamsToJSON(const SubstructMatchParameters &params)
std::function< bool(const ROMol &mol, std::span< const unsigned int > match)> extraFinalCheck
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
std::vector< std::string > atomProperties
std::vector< std::string > bondProperties
bool recursionPossible
Allow recursive queries.