RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MorganFingerprints.h
Go to the documentation of this file.
1//
2//
3// Copyright (c) 2009-2010, Novartis Institutes for BioMedical Research Inc.
4// and other RDKit contributors
5//
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14// * Redistributions in binary form must reproduce the above
15// copyright notice, this list of conditions and the following
16// disclaimer in the documentation and/or other materials provided
17// with the distribution.
18// * Neither the name of Novartis Institutes for BioMedical Research Inc.
19// nor the names of its contributors may be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22//
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Created by Greg Landrum, July 2008
36//
37//
38
39/*! \file MorganFingerprints.h
40
41*/
42#include <RDGeneral/export.h>
43#ifndef __RD_MORGANFPS_H__
44#define __RD_MORGANFPS_H__
45
46#include <vector>
47#include <map>
50#include <cstdint>
52
53namespace RDKit {
54class ROMol;
55namespace MorganFingerprints {
56typedef std::map<std::uint32_t,
57 std::vector<std::pair<std::uint32_t, std::uint32_t>>>
59
60const std::string morganFingerprintVersion = "1.0.0";
61
62//! returns the Morgan fingerprint for a molecule
63/*!
64 These fingerprints are similar to the well-known ECFP or
65 FCFP fingerprints, depending on which invariants are used.
66
67 The algorithm used is described in the paper
68 Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54
69 (2010)
70 https://doi.org/10.1021/ci100050t
71
72 The original implementation was done using this paper:
73 D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005)
74 and an unpublished technical report:
75 http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc
76
77 \param mol: the molecule to be fingerprinted
78 \param radius: the number of iterations to grow the fingerprint
79 \param invariants : optional pointer to a set of atom invariants to
80 be used. By default ECFP-type invariants are used
81 (calculated by getConnectivityInvariants())
82 \param fromAtoms : if this is provided, only the atoms in the vector will be
83 used as centers in the fingerprint
84 \param useChirality : if set, additional information will be added to the
85 fingerprint
86 when chiral atoms are discovered. This will cause
87 \verbatim C[C@H](F)Cl,
88 C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
89 different fingerprints.
90 \param useBondTypes : if set, bond types will be included as part of the hash
91 for
92 calculating bits
93 \param useCounts : if set, counts of the features will be used
94 \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
95 have a nonzero invariant.
96 \param atomsSettingBits : if nonzero, this will be used to return information
97 about the atoms that set each particular bit.
98 The keys are the map are bit ids, the values
99 are lists of (atomId, radius) pairs.
100 \param includeRedundantEnvironments : if set, the check for redundant atom
101 environments will not be done.
102
103 \return a pointer to the fingerprint. The client is
104 responsible for calling delete on this.
105
106*/
108 const ROMol &mol, unsigned int radius,
109 std::vector<boost::uint32_t> *invariants = nullptr,
110 const std::vector<boost::uint32_t> *fromAtoms = nullptr,
111 bool useChirality = false, bool useBondTypes = true, bool useCounts = true,
112 bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
113 bool includeRedundantEnvironments = false);
114
115//! returns the Morgan fingerprint for a molecule
116/*!
117 These fingerprints are similar to the well-known ECFP or
118 FCFP fingerprints, depending on which invariants are used.
119
120 The algorithm used is described in the paper
121 Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54
122 (2010)
123 https://doi.org/10.1021/ci100050t
124
125 The original implementation was done using this paper:
126 D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005)
127 and an unpublished technical report:
128 http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc
129
130 \param mol: the molecule to be fingerprinted
131 \param radius: the number of iterations to grow the fingerprint
132 \param invariants : optional pointer to a set of atom invariants to
133 be used. By default ECFP-type invariants are used
134 (calculated by getConnectivityInvariants())
135 \param fromAtoms : if this is provided, only the atoms in the vector will be
136 used as centers in the fingerprint
137 \param useChirality : if set, additional information will be added to the
138 fingerprint
139 when chiral atoms are discovered. This will cause
140 \verbatim C[C@H](F)Cl,
141 C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
142 different fingerprints.
143 \param useBondTypes : if set, bond types will be included as part of the hash
144 for
145 calculating bits
146 \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
147 have a nonzero invariant.
148 \param atomsSettingBits : if nonzero, this will be used to return information
149 about the atoms that set each particular bit.
150 The keys are the map are bit ids, the values
151 are lists of (atomId, radius) pairs.
152 \param includeRedundantEnvironments : if set, the check for redundant atom
153 environments will not be done.
154
155 \return a pointer to the fingerprint. The client is
156 responsible for calling delete on this.
157
158*/
160 const ROMol &mol, unsigned int radius, unsigned int nBits = 2048,
161 std::vector<boost::uint32_t> *invariants = nullptr,
162 const std::vector<boost::uint32_t> *fromAtoms = nullptr,
163 bool useChirality = false, bool useBondTypes = true,
164 bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
165 bool includeRedundantEnvironments = false);
166
167//! returns the Morgan fingerprint for a molecule as a bit vector
168/*!
169 see documentation for getFingerprint() for theory/references
170
171 \param mol: the molecule to be fingerprinted
172 \param radius: the number of iterations to grow the fingerprint
173 \param nBits: the number of bits in the final fingerprint
174 \param invariants : optional pointer to a set of atom invariants to
175 be used. By default ECFP-type invariants are used
176 (calculated by getConnectivityInvariants())
177 \param fromAtoms : if this is provided, only the atoms in the vector will be
178 used as centers in the fingerprint
179 \param useChirality : if set, additional information will be added to the
180 fingerprint
181 when chiral atoms are discovered. This will cause
182 \verbatim C[C@H](F)Cl,
183 C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
184 different fingerprints.
185 \param useBondTypes : if set, bond types will be included as part of the hash
186 for
187 calculating bits
188 \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
189 have a nonzero invariant.
190 \param atomsSettingBits : if nonzero, this will be used to return information
191 about the atoms that set each particular bit.
192 The keys are the map are bit ids, the values
193 are lists of (atomId, radius) pairs.
194 \param includeRedundantEnvironments : if set, the check for redundant atom
195 environments will not be done.
196
197 \return a pointer to the fingerprint. The client is
198 responsible for calling delete on this.
199
200*/
202 const ROMol &mol, unsigned int radius, unsigned int nBits,
203 std::vector<std::uint32_t> *invariants = nullptr,
204 const std::vector<std::uint32_t> *fromAtoms = nullptr,
205 bool useChirality = false, bool useBondTypes = true,
206 bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
207 bool includeRedundantEnvironments = false);
208
209} // end of namespace MorganFingerprints
210} // namespace RDKit
211
212#endif
a class for bit vectors that are densely occupied
a class for efficiently storing sparse vectors of ints
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
std::map< std::uint32_t, std::vector< std::pair< std::uint32_t, std::uint32_t > > > BitInfoMap
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getFingerprint(const ROMol &mol, unsigned int radius, std::vector< boost::uint32_t > *invariants=nullptr, const std::vector< boost::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool useCounts=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getFingerprintAsBitVect(const ROMol &mol, unsigned int radius, unsigned int nBits, std::vector< std::uint32_t > *invariants=nullptr, const std::vector< std::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule as a bit vector
const std::string morganFingerprintVersion
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getHashedFingerprint(const ROMol &mol, unsigned int radius, unsigned int nBits=2048, std::vector< boost::uint32_t > *invariants=nullptr, const std::vector< boost::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule
Std stuff.
bool rdvalue_is(const RDValue_cast_t)