RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MorganGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_MORGANGEN_H_2018_07
13#define RD_MORGANGEN_H_2018_07
14
16#include <cstdint>
17
18namespace RDKit {
19
21
22/**
23 \brief Default atom invariants generator for Morgan fingerprint, generates
24 ECFP-type invariants
25
26 */
29 bool df_includeRingMembership;
30
31 public:
32 /**
33 \brief Construct a new MorganAtomInvGenerator object
34
35 \param includeRingMembership : if set, whether or not the atom is in a ring
36 will be used in the invariant list.
37 */
38 MorganAtomInvGenerator(const bool includeRingMembership = true);
39
40 std::vector<std::uint32_t> *getAtomInvariants(
41 const ROMol &mol) const override;
42
43 std::string infoString() const override;
44 void toJSON(boost::property_tree::ptree &pt) const override;
45 void fromJSON(const boost::property_tree::ptree &) override;
46 MorganAtomInvGenerator *clone() const override;
47};
48
49/**
50 \brief Alternative atom invariants generator for Morgan fingerprint, generate
51 FCFP-type invariants
52
53 */
56 std::vector<const ROMol *> *dp_patterns;
57
58 public:
59 /**
60 \brief Construct a new MorganFeatureAtomInvGenerator object
61
62 \param patterns : if provided should contain the queries used to assign
63 atom-types. if not provided, feature definitions adapted from reference:
64 Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) will be used for
65 Donor, Acceptor, Aromatic, Halogen, Basic, Acidic.
66 */
67 MorganFeatureAtomInvGenerator(std::vector<const ROMol *> *patterns = nullptr);
68
69 std::vector<std::uint32_t> *getAtomInvariants(
70 const ROMol &mol) const override;
71
72 std::string infoString() const override;
73 void toJSON(boost::property_tree::ptree &pt) const override;
74 void fromJSON(const boost::property_tree::ptree &) override;
76};
77
78/**
79 \brief Bond invariants generator for Morgan fingerprint
80
81 */
84 bool df_useBondTypes;
85 bool df_useChirality;
86
87 public:
88 /**
89 \brief Construct a new MorganBondInvGenerator object
90
91 \param useBondTypes : if set, bond types will be included as a part of the
92 bond invariants
93 \param useChirality : if set, chirality information will be included as a
94 part of the bond invariants
95 */
96 MorganBondInvGenerator(const bool useBondTypes = true,
97 const bool useChirality = false);
98
99 std::vector<std::uint32_t> *getBondInvariants(
100 const ROMol &mol) const override;
101
102 std::string infoString() const override;
103 void toJSON(boost::property_tree::ptree &pt) const override;
104 void fromJSON(const boost::property_tree::ptree &pt) override;
105 MorganBondInvGenerator *clone() const override;
106 ~MorganBondInvGenerator() override = default;
107};
108
109/**
110 \brief Class for holding Morgan fingerprint specific arguments
111
112 */
114 public:
116 unsigned int d_radius = 3;
118 bool df_useBondTypes = true;
119
120 std::string infoString() const override;
121 void toJSON(boost::property_tree::ptree &pt) const override;
122 void fromJSON(const boost::property_tree::ptree &pt) override;
123
124 /**
125 \brief Construct a new MorganArguments object
126
127 \param radius the number of iterations to grow the fingerprint
128 \param countSimulation if set, use count simulation while generating the
129 fingerprint
130 \param includeChirality if set, chirality information will be added to the
131 generated bit id, independently from bond invariants
132 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
133 have a nonzero invariant
134 \param countBounds boundaries for count simulation, corresponding bit will
135 be set if the count is higher than the number provided for that spot
136 \param fpSize size of the generated fingerprint, does not affect the sparse
137 versions
138 \param includeRedundantEnvironments if set redundant environments will be
139 included in the fingerprint
140 \param useBondTypes if set bond types will be included in the fingerprint
141 */
142 MorganArguments(unsigned int radius = 3, bool countSimulation = false,
143 bool includeChirality = false,
144 bool onlyNonzeroInvariants = false,
145 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
146 std::uint32_t fpSize = 2048,
147 bool includeRedundantEnvironments = false,
148 bool useBondTypes = true)
149 : FingerprintArguments(countSimulation, countBounds, fpSize, 1,
150 includeChirality),
151 df_onlyNonzeroInvariants(onlyNonzeroInvariants),
152 d_radius(radius),
153 df_includeRedundantEnvironments(includeRedundantEnvironments),
154 df_useBondTypes(useBondTypes) {};
155};
156
157/**
158 \brief Class for holding the bit-id created from Morgan fingerprint
159 environments and the additional data necessary extra outputs
160
161 */
162template <typename OutputType>
164 : public AtomEnvironment<OutputType> {
165 const OutputType d_code;
166 const unsigned int d_atomId;
167 const unsigned int d_layer;
168 const ROMol *d_mol = nullptr;
169
170 public:
171 OutputType getBitId(
172 FingerprintArguments *arguments, // unused
173 const std::vector<std::uint32_t> *atomInvariants, // unused
174 const std::vector<std::uint32_t> *bondInvariants, // unused
175 AdditionalOutput *additionalOutput, // unused
176 const bool hashResults = false, // unused
177 const std::uint64_t fpSize = 0 // unused
178 ) const override;
180 size_t bitId) const override;
181
182 /**
183 \brief Construct a new MorganAtomEnv object
184
185 \param code bit id generated from this environment
186 \param atomId atom id of the atom at the center of this environment
187 \param layer radius of this environment
188 */
189 MorganAtomEnv(const std::uint32_t code, const unsigned int atomId,
190 const unsigned int layer, const ROMol *mol)
191 : d_code(code), d_atomId(atomId), d_layer(layer), d_mol(mol) {}
192};
193
194/**
195 \brief Class that generates atom environments for Morgan fingerprint
196
197 */
198template <typename OutputType>
200 : public AtomEnvironmentGenerator<OutputType> {
201 public:
202 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
203 const ROMol &mol, FingerprintArguments *arguments,
204 const std::vector<std::uint32_t> *fromAtoms,
205 const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
206 const AdditionalOutput *additionalOutput,
207 const std::vector<std::uint32_t> *atomInvariants,
208 const std::vector<std::uint32_t> *bondInvariants,
209 const bool hashResults = false) const override;
210
211 std::string infoString() const override;
212 void toJSON(boost::property_tree::ptree &pt) const override;
213 void fromJSON(const boost::property_tree::ptree &pt) override;
214
215 OutputType getResultSize() const override;
216};
217
218/**
219 \brief Get a fingerprint generator for Morgan fingerprint
220
221 \tparam OutputType determines the size of the bitIds and the result, can be 32
222 or 64 bit unsigned integer
223
224 \param radius the number of iterations to grow the fingerprint
225
226 \param countSimulation if set, use count simulation while generating the
227 fingerprint
228
229 \param includeChirality if set, chirality information will be added to the
230 generated bit id, independently from bond invariants
231
232 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
233 have a nonzero invariant
234
235 \param countBounds boundaries for count simulation, corresponding bit will be
236 set if the count is higher than the number provided for that spot
237
238 \param fpSize size of the generated fingerprint, does not affect the sparse
239 versions
240 \param countSimulation if set, use count simulation while generating the
241 fingerprint
242 \param includeChirality sets includeChirality flag for both MorganArguments
243 and the default bond generator MorganBondInvGenerator
244 \param useBondTypes if set, bond types will be included as a part of the
245 default bond invariants
246 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
247 have a nonzero invariant
248 \param includeRedundantEnvironments if set redundant environments will be
249 included in the fingerprint
250 \param atomInvariantsGenerator custom atom invariants generator to use
251 \param bondInvariantsGenerator custom bond invariants generator to use
252 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
253 fingerprint generator
254 \param ownsBondInvGen if set bond invariants generator is destroyed with the
255 fingerprint generator
256
257 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
258
259This generator supports the following \c AdditionalOutput types:
260 - \c atomToBits : which bits each atom is the central atom for
261 - \c atomCounts : how many bits each atom sets
262 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
263
264 */
265template <typename OutputType>
267 unsigned int radius, bool countSimulation, bool includeChirality,
268 bool useBondTypes, bool onlyNonzeroInvariants,
269 bool includeRedundantEnvironments,
270 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
271 BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
272 std::uint32_t fpSize = 2048,
273 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
274 bool ownsAtomInvGen = false, bool ownsBondInvGen = false);
275//! \overload
276template <typename OutputType>
278 const MorganArguments &args,
279 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
280 BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
281 bool ownsAtomInvGen = false, bool ownsBondInvGen = false);
282
283/**
284 \brief Get a fingerprint generator for Morgan fingerprint
285
286 \tparam OutputType determines the size of the bitIds and the result, can be 32
287 or 64 bit unsigned integer
288
289 \param radius the number of iterations to grow the fingerprint
290
291 \param countSimulation if set, use count simulation while generating the
292 fingerprint
293
294 \param includeChirality if set, chirality information will be added to the
295 generated bit id, independently from bond invariants
296
297 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
298 have a nonzero invariant
299
300 \param countBounds boundaries for count simulation, corresponding bit will be
301 set if the count is higher than the number provided for that spot
302
303 \param fpSize size of the generated fingerprint, does not affect the sparse
304 versions
305 \param countSimulation if set, use count simulation while generating the
306 fingerprint
307 \param includeChirality sets includeChirality flag for both MorganArguments
308 and the default bond generator MorganBondInvGenerator
309 \param useBondTypes if set, bond types will be included as a part of the
310 default bond invariants
311 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
312 have a nonzero invariant
313 \param atomInvariantsGenerator custom atom invariants generator to use
314 \param bondInvariantsGenerator custom bond invariants generator to use
315 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
316 fingerprint generator
317 \param ownsBondInvGen if set bond invariants generator is destroyed with the
318 fingerprint generator
319
320 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
321
322This generator supports the following \c AdditionalOutput types:
323 - \c atomToBits : which bits each atom is the central atom for
324 - \c atomCounts : how many bits each atom sets
325 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
326
327 */
328template <typename OutputType>
330 unsigned int radius, bool countSimulation = false,
331 bool includeChirality = false, bool useBondTypes = true,
332 bool onlyNonzeroInvariants = false,
333 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
334 BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
335 std::uint32_t fpSize = 2048,
336 std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
337 bool ownsAtomInvGen = false, bool ownsBondInvGen = false) {
339 radius, countSimulation, includeChirality, useBondTypes,
340 onlyNonzeroInvariants, false, atomInvariantsGenerator,
341 bondInvariantsGenerator, fpSize, countBounds, ownsAtomInvGen,
342 ownsBondInvGen);
343};
344
345} // namespace MorganFingerprint
346} // namespace RDKit
347
348#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
abstract base class for bond invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
FingerprintArguments(bool countSimulation, const std::vector< std::uint32_t > countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature=1, bool includeChirality=false)
class that generates same fingerprint style for different output formats
Class for holding Morgan fingerprint specific arguments.
void toJSON(boost::property_tree::ptree &pt) const override
void fromJSON(const boost::property_tree::ptree &pt) override
MorganArguments(unsigned int radius=3, bool countSimulation=false, bool includeChirality=false, bool onlyNonzeroInvariants=false, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, bool includeRedundantEnvironments=false, bool useBondTypes=true)
Construct a new MorganArguments object.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
MorganAtomEnv(const std::uint32_t code, const unsigned int atomId, const unsigned int layer, const ROMol *mol)
Construct a new MorganAtomEnv object.
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
MorganAtomInvGenerator(const bool includeRingMembership=true)
Construct a new MorganAtomInvGenerator object.
void fromJSON(const boost::property_tree::ptree &) override
MorganAtomInvGenerator * clone() const override
void toJSON(boost::property_tree::ptree &pt) const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c BondInvariantsGenerator and its arguments
MorganBondInvGenerator * clone() const override
MorganBondInvGenerator(const bool useBondTypes=true, const bool useChirality=false)
Construct a new MorganBondInvGenerator object.
void toJSON(boost::property_tree::ptree &pt) const override
std::vector< std::uint32_t > * getBondInvariants(const ROMol &mol) const override
get bond invariants from a molecule
void fromJSON(const boost::property_tree::ptree &pt) override
Class that generates atom environments for Morgan fingerprint.
void toJSON(boost::property_tree::ptree &pt) const override
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
void fromJSON(const boost::property_tree::ptree &pt) override
MorganFeatureAtomInvGenerator(std::vector< const ROMol * > *patterns=nullptr)
Construct a new MorganFeatureAtomInvGenerator object.
void fromJSON(const boost::property_tree::ptree &) override
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
MorganFeatureAtomInvGenerator * clone() const override
void toJSON(boost::property_tree::ptree &pt) const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:193
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getMorganGenerator(unsigned int radius, bool countSimulation, bool includeChirality, bool useBondTypes, bool onlyNonzeroInvariants, bool includeRedundantEnvironments, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, std::uint32_t fpSize=2048, std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, bool ownsAtomInvGen=false, bool ownsBondInvGen=false)
Get a fingerprint generator for Morgan fingerprint.
Std stuff.