RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
AtomPairs.h
Go to the documentation of this file.
1//
2// Copyright (C) 2007-2013 Greg Landrum
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11/*! \file AtomPairs.h
12
13
14 A few quick notes about fingerprint size and the way chirality is handled in
15 these functions.
16
17 By default the atom-pair and topologic-torsion fingerprints do not include any
18 information about
19 chirality; the atom invariants only include information about the atomic
20 number,
21 number of pi electrons, and degree.
22 When chirality is included, two additional bits are added to the atom
23 invariants to flag R/S/no
24 chirality. These additional bits change the size of the atom invariants and
25 either the size
26 of the final fingerprint (atom pairs) or the maximum allowed path length
27 (torsions). This means
28 that even fingerprints for achiral molecules are different when
29 includeChirality is true.
30
31*/
32#include <RDGeneral/export.h>
33#ifndef __RD_ATOMPAIRS_H__
34#define __RD_ATOMPAIRS_H__
35
38#include <cstdint>
40namespace RDKit {
41class Atom;
42
43namespace AtomPairs {
44const std::string atomPairsVersion = "1.1.0";
45
46//! returns the atom-pair fingerprint for a molecule
47/*!
48 The algorithm used is described here:
49 R.E. Carhart, D.H. Smith, R. Venkataraghavan; "Atom Pairs as
50 Molecular Features in Structure-Activity Studies: Definition
51 and Applications" JCICS 25, 64-73 (1985).
52
53
54 \param mol: the molecule to be fingerprinted
55 \param minLength: minimum distance between atoms to be
56 considered in a pair. Default is 1 bond.
57 \param maxLength: maximum distance between atoms to be
58 considered in a pair.
59 Default is maxPathLen-1 bonds.
60 \param fromAtoms: if provided, only atom pairs that involve
61 the specified atoms will be included in the
62 fingerprint
63 \param ignoreAtoms: if provided, any atom pairs that include
64 the specified atoms will not be included in the
65 fingerprint
66 \param atomInvariants: a list of invariants to use for the atom hashes
67 note: only the first \c codeSize bits of each
68 invariant are used.
69 \param includeChirality: if set, chirality will be used in the atom invariants
70 (note: this is ignored if atomInvariants are
71 provided)
72 \param use2D: if set, the 2D (topological) distance matrix is used.
73 \param confId: the conformation to use if 3D distances are being used
74
75
76 \return a pointer to the fingerprint. The client is
77 responsible for calling delete on this.
78
79*/
80[[deprecated("please use AtomPairGenerator")]]
82 const ROMol &mol, unsigned int minLength, unsigned int maxLength,
83 const std::vector<std::uint32_t> *fromAtoms = nullptr,
84 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
85 const std::vector<std::uint32_t> *atomInvariants = nullptr,
86 bool includeChirality = false, bool use2D = true, int confId = -1);
87//! \overload
88[[deprecated("please use AtomPairGenerator")]]
90 const ROMol &mol, const std::vector<std::uint32_t> *fromAtoms = nullptr,
91 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
92 const std::vector<std::uint32_t> *atomInvariants = nullptr,
93 bool includeChirality = false, bool use2D = true, int confId = -1);
94
95//! returns the hashed atom-pair fingerprint for a molecule
96/*!
97 \param mol: the molecule to be fingerprinted
98 \param nBits: the length of the fingerprint to generate
99 \param minLength: minimum distance between atoms to be
100 considered in a pair. Default is 1 bond.
101 \param maxLength: maximum distance between atoms to be
102 considered in a pair.
103 Default is maxPathLen-1 bonds.
104 \param fromAtoms: if provided, only atom pairs that involve
105 the specified atoms will be included in the
106 fingerprint
107 \param ignoreAtoms: if provided, any atom pairs that include
108 the specified atoms will not be included in the
109 fingerprint
110 \param atomInvariants: a list of invariants to use for the atom hashes
111 note: only the first \c codeSize bits of each
112 invariant are used.
113 \param includeChirality: if set, chirality will be used in the atom invariants
114 (note: this is ignored if atomInvariants are
115 provided)
116 \param use2D: if set, the 2D (topological) distance matrix is used.
117
118 \return a pointer to the fingerprint. The client is
119 responsible for calling delete on this.
120
121*/
122[[deprecated("please use AtomPairGenerator")]]
125 const ROMol &mol, unsigned int nBits = 2048, unsigned int minLength = 1,
126 unsigned int maxLength = maxPathLen - 1,
127 const std::vector<std::uint32_t> *fromAtoms = nullptr,
128 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
129 const std::vector<std::uint32_t> *atomInvariants = nullptr,
130 bool includeChirality = false, bool use2D = true, int confId = -1);
131//! returns the hashed atom-pair fingerprint for a molecule as a bit vector
132/*!
133 \param mol: the molecule to be fingerprinted
134 \param nBits: the length of the fingerprint to generate
135 \param minLength: minimum distance between atoms to be
136 considered in a pair. Default is 1 bond.
137 \param maxLength: maximum distance between atoms to be
138 considered in a pair.
139 Default is maxPathLen-1 bonds.
140 \param fromAtoms: if provided, only atom pairs that involve
141 the specified atoms will be included in the
142 fingerprint
143 \param ignoreAtoms: if provided, any atom pairs that include
144 the specified atoms will not be included in the
145 fingerprint
146 \param atomInvariants: a list of invariants to use for the atom hashes
147 note: only the first \c codeSize bits of each
148 invariant are used.
149 \param nBitsPerEntry: number of bits to use in simulating counts
150 \param includeChirality: if set, chirality will be used in the atom invariants
151 (note: this is ignored if atomInvariants are
152 provided)
153 \param use2D: if set, the 2D (topological) distance matrix is used.
154 \param confId: the conformation to use if 3D distances are being used
155
156 \return a pointer to the fingerprint. The client is
157 responsible for calling delete on this.
158
159*/
160[[deprecated("please use AtomPairGenerator")]]
163 const ROMol &mol, unsigned int nBits = 2048, unsigned int minLength = 1,
164 unsigned int maxLength = maxPathLen - 1,
165 const std::vector<std::uint32_t> *fromAtoms = nullptr,
166 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
167 const std::vector<std::uint32_t> *atomInvariants = nullptr,
168 unsigned int nBitsPerEntry = 4, bool includeChirality = false,
169 bool use2D = true, int confId = -1);
170
171//! returns the topological-torsion fingerprint for a molecule
172/*!
173 The algorithm used is described here:
174 R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan;
175 "Topological Torsion: A New Molecular Descriptor for SAR Applications.
176 Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
177
178 \param mol: the molecule to be fingerprinted
179 \param targetSize: the number of atoms to include in the "torsions"
180 \param fromAtoms: if provided, only torsions that start or end at
181 the specified atoms will be included in the
182 fingerprint
183 \param ignoreAtoms: if provided, any torsions that include
184 the specified atoms will not be included in the
185 fingerprint
186 \param atomInvariants: a list of invariants to use for the atom hashes
187 note: only the first \c codeSize bits of each
188 invariant are used.
189 \param includeChirality: if set, chirality will be used in the atom invariants
190 (note: this is ignored if atomInvariants are
191 provided)
192
193 \return a pointer to the fingerprint. The client is
194 responsible for calling delete on this.
195
196*/
197[[deprecated("please use TopologicalTorsionGenerator")]]
200 const ROMol &mol, unsigned int targetSize = 4,
201 const std::vector<std::uint32_t> *fromAtoms = nullptr,
202 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
203 const std::vector<std::uint32_t> *atomInvariants = nullptr,
204 bool includeChirality = false);
205//! returns a hashed topological-torsion fingerprint for a molecule
206/*!
207 The algorithm used is described here:
208 R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan;
209 "Topological Torsion: A New Molecular Descriptor for SAR Applications.
210 Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
211
212 \param mol: the molecule to be fingerprinted
213 \param nBits: number of bits to include in the fingerprint
214 \param targetSize: the number of atoms to include in the "torsions"
215 \param fromAtoms: if provided, only torsions that start or end at
216 the specified atoms will be included in the
217 fingerprint
218 \param ignoreAtoms: if provided, any torsions that include
219 the specified atoms will not be included in the
220 fingerprint
221 \param atomInvariants: a list of invariants to use for the atom hashes
222 note: only the first \c codeSize bits of each
223 invariant are used.
224 \param includeChirality: if set, chirality will be used in the atom invariants
225 (note: this is ignored if atomInvariants are
226 provided)
227
228 \return a pointer to the fingerprint. The client is
229 responsible for calling delete on this.
230
231*/
232[[deprecated("please use TopologicalTorsionGenerator")]]
235 const ROMol &mol, unsigned int nBits = 2048, unsigned int targetSize = 4,
236 const std::vector<std::uint32_t> *fromAtoms = nullptr,
237 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
238 const std::vector<std::uint32_t> *atomInvariants = nullptr,
239 bool includeChirality = false);
240//! returns a hashed topological-torsion fingerprint for a molecule as a bit
241/// vector
242/*!
243 \param mol: the molecule to be fingerprinted
244 \param nBits: number of bits to include in the fingerprint
245 \param targetSize: the number of atoms to include in the "torsions"
246 \param fromAtoms: if provided, only torsions that start or end at
247 the specified atoms will be included in the
248 fingerprint
249 \param ignoreAtoms: if provided, any torsions that include
250 the specified atoms will not be included in the
251 fingerprint
252 \param atomInvariants: a list of invariants to use for the atom hashes
253 note: only the first \c codeSize bits of each
254 invariant are used.
255 \param nBitsPerEntry: number of bits to use in simulating counts
256 \param includeChirality: if set, chirality will be used in the atom invariants
257 (note: this is ignored if atomInvariants are
258 provided)
259
260 \return a pointer to the fingerprint. The client is
261 responsible for calling delete on this.
262
263*/
264[[deprecated("please use TopologicalTorsionGenerator")]]
267 const ROMol &mol, unsigned int nBits = 2048, unsigned int targetSize = 4,
268 const std::vector<std::uint32_t> *fromAtoms = nullptr,
269 const std::vector<std::uint32_t> *ignoreAtoms = nullptr,
270 const std::vector<std::uint32_t> *atomInvariants = nullptr,
271 unsigned int nBitsPerEntry = 4, bool includeChirality = false);
272} // namespace AtomPairs
273} // namespace RDKit
274
275#endif
Pulls in all the BitVect classes.
a class for bit vectors that are densely occupied
a class for efficiently storing sparse vectors of ints
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * getTopologicalTorsionFingerprint(const ROMol &mol, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false)
returns the topological-torsion fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getHashedAtomPairFingerprint(const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the hashed atom-pair fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * getHashedTopologicalTorsionFingerprint(const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false)
returns a hashed topological-torsion fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getHashedTopologicalTorsionFingerprintAsBitVect(const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, unsigned int nBitsPerEntry=4, bool includeChirality=false)
const std::string atomPairsVersion
Definition AtomPairs.h:44
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getHashedAtomPairFingerprintAsBitVect(const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, unsigned int nBitsPerEntry=4, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the hashed atom-pair fingerprint for a molecule as a bit vector
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getAtomPairFingerprint(const ROMol &mol, unsigned int minLength, unsigned int maxLength, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the atom-pair fingerprint for a molecule
const unsigned int maxPathLen
Std stuff.
bool rdvalue_is(const RDValue_cast_t)