RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
FingerprintUtil.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018 Boran Adas, Google Summer of Code
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_FINGERPRINTUTIL_H_2018_07
13#define RD_FINGERPRINTUTIL_H_2018_07
14
15#include <GraphMol/RDKitBase.h>
18#include <cstdint>
19#include <tuple>
20#include <vector>
21#include <map>
23
25
26namespace RDKit {
27namespace AtomPairs {
28const unsigned int numTypeBits = 4;
29const unsigned int atomNumberTypes[1 << numTypeBits] = {
30 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 51, 52, 43};
31const unsigned int numPiBits = 2;
32const unsigned int maxNumPi = (1 << numPiBits) - 1;
33const unsigned int numBranchBits = 3;
34const unsigned int maxNumBranches = (1 << numBranchBits) - 1;
35const unsigned int numChiralBits = 2;
37const unsigned int numPathBits = 5;
38const unsigned int maxPathLen = (1 << numPathBits) - 1;
39const unsigned int numAtomPairFingerprintBits =
40 numPathBits + 2 * codeSize; // note that this is only accurate if chirality
41 // is not included
42
43//! returns a numeric code for the atom (the atom's hash in the
44//! atom-pair scheme)
45/*!
46 \param atom the atom to be considered
47 \param branchSubtract (optional) a constant to subtract from
48 the number of neighbors when the hash
49 is calculated (used in the topological
50 torsions code)
51 \param includeChirality toggles the inclusions of bits indicating R/S
52 chirality
53*/
55 const Atom *atom, unsigned int branchSubtract = 0,
56 bool includeChirality = false);
57
58//! returns an atom pair hash based on two atom hashes and the
59//! distance between the atoms.
60/*!
61 \param codeI the hash for the first atom
62 \param codeJ the hash for the second atom
63 \param dist the distance (number of bonds) between the two
64 atoms
65 \param includeChirality toggles the inclusions of bits indicating R/S
66 chirality
67*/
69 std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist,
70 bool includeChirality = false);
71
72//! returns an topological torsion hash based on the atom hashes
73//! passed in
74/*!
75 \param atomCodes the vector of atom hashes
76*/
78 const std::vector<std::uint32_t> &atomCodes, bool includeChirality = false);
79
81 const std::vector<std::uint32_t> &pathCodes);
82
83} // namespace AtomPairs
84
85namespace MorganFingerprints {
86
88 public:
90 ss_matcher(const std::string &pattern);
91
92 // const RDKit::ROMOL_SPTR &getMatcher() const { return m_matcher; }
93 const RDKit::ROMol *getMatcher() const;
94
95 private:
96 RDKit::ROMOL_SPTR m_matcher;
97};
98
99typedef std::tuple<boost::dynamic_bitset<>, uint32_t, unsigned int> AccumTuple;
100
101RDKIT_FINGERPRINTS_EXPORT extern std::vector<std::string> defaultFeatureSmarts;
102
103//! returns the connectivity invariants for a molecule
104/*!
105
106 \param mol : the molecule to be considered
107 \param invars : used to return the results
108 \param includeRingMembership : if set, whether or not the atom is in
109 a ring will be used in the invariant list.
110*/
112 const ROMol &mol, std::vector<std::uint32_t> &invars,
113 bool includeRingMembership = true);
114const std::string morganConnectivityInvariantVersion = "1.0.0";
115
116//! returns the feature invariants for a molecule
117/*!
118
119 \param mol: the molecule to be considered
120 \param invars : used to return the results
121 \param patterns: if provided should contain the queries used to assign
122 atom-types.
123 if not provided, feature definitions adapted from reference:
124 Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998)
125 will be used for Donor, Acceptor, Aromatic, Halogen, Basic,
126 Acidic
127
128*/
130 const ROMol &mol, std::vector<std::uint32_t> &invars,
131 std::vector<const ROMol *> *patterns = nullptr);
132const std::string morganFeatureInvariantVersion = "0.1.0";
133
134} // namespace MorganFingerprints
135
136namespace RDKitFPUtils {
137
138RDKIT_FINGERPRINTS_EXPORT void buildDefaultRDKitFingerprintAtomInvariants(
139 const ROMol &mol, std::vector<std::uint32_t> &lAtomInvariants);
140
141RDKIT_FINGERPRINTS_EXPORT void enumerateAllPaths(
142 const ROMol &mol, std::map<int, std::list<std::vector<int>>> &allPaths,
143 const std::vector<std::uint32_t> *fromAtoms, bool branchedPaths, bool useHs,
144 unsigned int minPath, unsigned int maxPath);
145
146RDKIT_FINGERPRINTS_EXPORT void identifyQueryBonds(
147 const ROMol &mol, std::vector<const Bond *> &bondCache,
148 std::vector<short> &isQueryBond);
149
150RDKIT_FINGERPRINTS_EXPORT std::vector<unsigned int> generateBondHashes(
151 const ROMol &mol, boost::dynamic_bitset<> &atomsInPath,
152 const std::vector<const Bond *> &bondCache,
153 const std::vector<short> &isQueryBond, const std::vector<int> &path,
154 bool useBondOrder, const std::vector<std::uint32_t> *atomInvariants);
155
156} // namespace RDKitFPUtils
157
158} // namespace RDKit
159
160#endif
Pulls in all the BitVect classes.
pulls in the core RDKit functionality
functionality for finding subgraphs and paths in molecules
The class for representing atoms.
Definition Atom.h:75
const RDKit::ROMol * getMatcher() const
ss_matcher(const std::string &pattern)
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
const unsigned int numPiBits
RDKIT_FINGERPRINTS_EXPORT std::uint64_t getTopologicalTorsionCode(const std::vector< std::uint32_t > &atomCodes, bool includeChirality=false)
const unsigned int numPathBits
const unsigned int maxNumPi
const unsigned int numChiralBits
const unsigned int atomNumberTypes[1<< numTypeBits]
const unsigned int maxNumBranches
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomPairCode(std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist, bool includeChirality=false)
const unsigned int codeSize
const unsigned int numTypeBits
const unsigned int numBranchBits
const unsigned int numAtomPairFingerprintBits
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getTopologicalTorsionHash(const std::vector< std::uint32_t > &pathCodes)
const unsigned int maxPathLen
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomCode(const Atom *atom, unsigned int branchSubtract=0, bool includeChirality=false)
RDKIT_FINGERPRINTS_EXPORT std::vector< std::string > defaultFeatureSmarts
RDKIT_FINGERPRINTS_EXPORT void getFeatureInvariants(const ROMol &mol, std::vector< std::uint32_t > &invars, std::vector< const ROMol * > *patterns=nullptr)
returns the feature invariants for a molecule
const std::string morganFeatureInvariantVersion
const std::string morganConnectivityInvariantVersion
RDKIT_FINGERPRINTS_EXPORT void getConnectivityInvariants(const ROMol &mol, std::vector< std::uint32_t > &invars, bool includeRingMembership=true)
returns the connectivity invariants for a molecule
std::tuple< boost::dynamic_bitset<>, uint32_t, unsigned int > AccumTuple
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
boost::shared_ptr< ROMol > ROMOL_SPTR