RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
RDKitFPGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2022 Boran Adas and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_RDFINGERPRINTGEN_H_2018_07
13#define RD_RDFINGERPRINTGEN_H_2018_07
14
16
17namespace RDKit {
18namespace RDKitFP {
19
21 public:
22 unsigned int d_minPath = 1;
23 unsigned int d_maxPath = 7;
24 bool df_useHs = true;
25 bool df_branchedPaths = true;
26 bool df_useBondOrder = true;
27
28 std::string infoString() const override;
29 void toJSON(boost::property_tree::ptree &pt) const override;
30 void fromJSON(const boost::property_tree::ptree &pt) override;
31
32 /**
33 \brief Construct a new RDKitFPArguments object
34
35 \param minPath the minimum path length (in bonds) to be included
36 \param maxPath the maximum path length (in bonds) to be included
37 \param useHs toggles inclusion of Hs in paths (if the molecule has
38 explicit Hs)
39 \param branchedPaths toggles generation of branched subgraphs, not just
40 linear paths
41 \param useBondOrder toggles inclusion of bond orders in the path hashes
42 \param countSimulation if set, use count simulation while
43 generating the fingerprint
44 \param countBounds boundaries for count simulation, corresponding bit will
45 be set if the count is higher than the number provided for that spot
46 \param fpSize size of the generated fingerprint, does not affect the sparse
47 versions
48 \param numBitsPerFeature controls the number of bits that are set for each
49 path/subgraph found
50
51 */
52 RDKitFPArguments(unsigned int minPath = 1, unsigned int maxPath = 7,
53 bool useHs = true, bool branchedPaths = true,
54 bool useBondOrder = true, bool countSimulation = false,
55 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
56 std::uint32_t fpSize = 2048,
57 std::uint32_t numBitsPerFeature = 2);
58};
59
62 public:
63 std::vector<std::uint32_t> *getAtomInvariants(
64 const ROMol &mol) const override;
65
66 std::string infoString() const override;
67 void toJSON(boost::property_tree::ptree &pt) const override;
68 void fromJSON(const boost::property_tree::ptree &pt) override;
69
70 RDKitFPAtomInvGenerator *clone() const override;
71};
72
73template <typename OutputType>
75 : public AtomEnvironment<OutputType> {
76 const OutputType d_bitId;
77 const boost::dynamic_bitset<> d_atomsInPath;
78 const INT_VECT d_bondPath;
79
80 public:
81 OutputType getBitId(
82 FingerprintArguments *arguments, // unused
83 const std::vector<std::uint32_t> *atomInvariants, // unused
84 const std::vector<std::uint32_t> *bondInvariants, // unused
85 AdditionalOutput *additionalOutput, // unused
86 bool hashResults = false, // unused
87 const std::uint64_t fpSize = 0 // unused
88 ) const override;
90 size_t bitId) const override;
91
92 /**
93 \brief Construct a new RDKitFPAtomEnv object
94
95 \param bitId bitId generated for this environment
96 \param atomsInPath holds atoms in this environment to set additional output
97 \param bondPath the bond path defining the environment
98
99 */
100 RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath,
101 INT_VECT bondPath)
102 : d_bitId(bitId),
103 d_atomsInPath(std::move(atomsInPath)),
104 d_bondPath(std::move(bondPath)) {}
105};
106
107template <typename OutputType>
109 : public AtomEnvironmentGenerator<OutputType> {
110 public:
111 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
112 const ROMol &mol, FingerprintArguments *arguments,
113 const std::vector<std::uint32_t> *fromAtoms,
114 const std::vector<std::uint32_t> *ignoreAtoms, int confId,
115 const AdditionalOutput *additionalOutput,
116 const std::vector<std::uint32_t> *atomInvariants,
117 const std::vector<std::uint32_t> *bondInvariants,
118 bool hashResults = false) const override;
119
120 std::string infoString() const override;
121 void toJSON(boost::property_tree::ptree &pt) const override;
122 void fromJSON(const boost::property_tree::ptree &pt) override;
123
124 OutputType getResultSize() const override;
125
126}; // namespace RDKitFP
127
128/**
129 \brief Get a RDKit fingerprint generator with given parameters
130
131 \tparam OutputType determines the size of the bitIds and the result, can be 32
132 or 64 bit unsigned integer
133 \param minPath the minimum path length (in bonds) to be included
134 \param maxPath the maximum path length (in bonds) to be included
135 \param useHs toggles inclusion of Hs in paths (if the molecule has
136 explicit Hs)
137 \param branchedPaths toggles generation of branched subgraphs, not just
138 linear paths
139 \param useBondOrder toggles inclusion of bond orders in the path hashes
140 \param atomInvariantsGenerator custom atom invariants generator to use
141 \param countSimulation if set, use count simulation while
142 generating the fingerprint
143 \param countBounds boundaries for count simulation, corresponding bit will be
144 set if the count is higher than the number provided for that spot
145 \param fpSize size of the generated fingerprint, does not affect the sparse
146 versions
147 \param numBitsPerFeature controls the number of bits that are set for each
148 path/subgraph found
149 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
150 fingerprint generator
151
152 /return FingerprintGenerator<OutputType>* that generates RDKit fingerprints
153
154 This generator supports the following \c AdditionalOutput types:
155 - \c atomToBits : which bits each atom is involved in
156 - \c atomCounts : how many bits each atom sets
157 - \c bitPaths : map from bitId to vectors of bond indices for the individual
158 subgraphs
159
160 */
161template <typename OutputType>
163 unsigned int minPath = 1, unsigned int maxPath = 7, bool useHs = true,
164 bool branchedPaths = true, bool useBondOrder = true,
165 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
166 bool countSimulation = false,
167 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
168 std::uint32_t fpSize = 2048, std::uint32_t numBitsPerFeature = 2,
169 bool ownsAtomInvGen = false);
170// \overload
171template <typename OutputType>
173 const RDKitFPArguments &args,
174 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
175 bool ownsAtomInvGen = false);
176
177} // namespace RDKitFP
178} // namespace RDKit
179
180#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
FingerprintArguments(bool countSimulation, const std::vector< std::uint32_t > countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature=1, bool includeChirality=false)
class that generates same fingerprint style for different output formats
void toJSON(boost::property_tree::ptree &pt) const override
void fromJSON(const boost::property_tree::ptree &pt) override
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
RDKitFPArguments(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2)
Construct a new RDKitFPArguments object.
RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath, INT_VECT bondPath)
Construct a new RDKitFPAtomEnv object.
OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *additionalOutput, bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
void updateAdditionalOutput(AdditionalOutput *output, size_t bitId) const override
void fromJSON(const boost::property_tree::ptree &pt) override
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
void toJSON(boost::property_tree::ptree &pt) const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
RDKitFPAtomInvGenerator * clone() const override
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
void toJSON(boost::property_tree::ptree &pt) const override
void fromJSON(const boost::property_tree::ptree &pt) override
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:193
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getRDKitFPGenerator(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2, bool ownsAtomInvGen=false)
Get a RDKit fingerprint generator with given parameters.
Std stuff.
std::vector< int > INT_VECT
Definition types.h:303