RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
FingerprintUtil.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018 Boran Adas, Google Summer of Code
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_FINGERPRINTUTIL_H_2018_07
13#define RD_FINGERPRINTUTIL_H_2018_07
14
15#include <GraphMol/RDKitBase.h>
18#include <cstdint>
19#include <tuple>
20#include <vector>
21#include <map>
23
25
26namespace RDKit {
27namespace AtomPairs {
28const unsigned int numTypeBits = 4;
29const unsigned int atomNumberTypes[1 << numTypeBits] = {
30 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 51, 52, 43};
31const unsigned int numPiBits = 2;
32const unsigned int maxNumPi = (1 << numPiBits) - 1;
33const unsigned int numBranchBits = 3;
34const unsigned int maxNumBranches = (1 << numBranchBits) - 1;
35const unsigned int numChiralBits = 2;
37const unsigned int numPathBits = 5;
38const unsigned int maxPathLen = (1 << numPathBits) - 1;
39const unsigned int numAtomPairFingerprintBits =
40 numPathBits + 2 * codeSize; // note that this is only accurate if chirality
41 // is not included
42
43//! returns a numeric code for the atom (the atom's hash in the
44//! atom-pair scheme)
45/*!
46 \param atom the atom to be considered
47 \param branchSubtract (optional) a constant to subtract from
48 the number of neighbors when the hash
49 is calculated (used in the topological
50 torsions code)
51 \param includeChirality toggles the inclusions of bits indicating R/S
52 chirality
53*/
55 const Atom *atom, unsigned int branchSubtract = 0,
56 bool includeChirality = false);
57
58//! returns an atom pair hash based on two atom hashes and the
59//! distance between the atoms.
60/*!
61 \param codeI the hash for the first atom
62 \param codeJ the hash for the second atom
63 \param dist the distance (number of bonds) between the two
64 atoms
65 \param includeChirality toggles the inclusions of bits indicating R/S
66 chirality
67*/
69 std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist,
70 bool includeChirality = false);
71
72//! returns an topological torsion hash based on the atom hashes
73//! passed in
74/*!
75 \param atomCodes the vector of atom hashes
76*/
78 const std::vector<std::uint32_t> &atomCodes, bool includeChirality = false);
79
81 const std::vector<std::uint32_t> &pathCodes);
82} // namespace AtomPairs
83
84namespace MorganFingerprints {
85
87 public:
89 ss_matcher(const std::string &pattern);
90
91 // const RDKit::ROMOL_SPTR &getMatcher() const { return m_matcher; }
92 const RDKit::ROMol *getMatcher() const;
93
94 private:
95 RDKit::ROMOL_SPTR m_matcher;
96};
97
98typedef std::tuple<boost::dynamic_bitset<>, uint32_t, unsigned int> AccumTuple;
99
100RDKIT_FINGERPRINTS_EXPORT extern std::vector<std::string> defaultFeatureSmarts;
101
102//! returns the connectivity invariants for a molecule
103/*!
104
105 \param mol : the molecule to be considered
106 \param invars : used to return the results
107 \param includeRingMembership : if set, whether or not the atom is in
108 a ring will be used in the invariant list.
109*/
111 const ROMol &mol, std::vector<std::uint32_t> &invars,
112 bool includeRingMembership = true);
113const std::string morganConnectivityInvariantVersion = "1.0.0";
114
115//! returns the feature invariants for a molecule
116/*!
117
118 \param mol: the molecule to be considered
119 \param invars : used to return the results
120 \param patterns: if provided should contain the queries used to assign
121 atom-types.
122 if not provided, feature definitions adapted from reference:
123 Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998)
124 will be used for Donor, Acceptor, Aromatic, Halogen, Basic,
125 Acidic
126
127*/
129 const ROMol &mol, std::vector<std::uint32_t> &invars,
130 std::vector<const ROMol *> *patterns = nullptr);
131const std::string morganFeatureInvariantVersion = "0.1.0";
132
133} // namespace MorganFingerprints
134
135namespace RDKitFPUtils {
136
138 const ROMol &mol, std::vector<std::uint32_t> &lAtomInvariants);
139
141 const ROMol &mol, std::map<int, std::list<std::vector<int>>> &allPaths,
142 const std::vector<std::uint32_t> *fromAtoms, bool branchedPaths, bool useHs,
143 unsigned int minPath, unsigned int maxPath);
144
146 const ROMol &mol, std::vector<const Bond *> &bondCache,
147 std::vector<short> &isQueryBond);
148
150 const ROMol &mol, boost::dynamic_bitset<> &atomsInPath,
151 const std::vector<const Bond *> &bondCache,
152 const std::vector<short> &isQueryBond, const std::vector<int> &path,
153 bool useBondOrder, const std::vector<std::uint32_t> *atomInvariants);
154
155} // namespace RDKitFPUtils
156
157} // namespace RDKit
158
159#endif
Pulls in all the BitVect classes.
pulls in the core RDKit functionality
functionality for finding subgraphs and paths in molecules
The class for representing atoms.
Definition Atom.h:75
const RDKit::ROMol * getMatcher() const
ss_matcher(const std::string &pattern)
#define RDKIT_FINGERPRINTS_EXPORT
Definition export.h:177
const unsigned int numPiBits
RDKIT_FINGERPRINTS_EXPORT std::uint64_t getTopologicalTorsionCode(const std::vector< std::uint32_t > &atomCodes, bool includeChirality=false)
const unsigned int numPathBits
const unsigned int maxNumPi
const unsigned int numChiralBits
const unsigned int atomNumberTypes[1<< numTypeBits]
const unsigned int maxNumBranches
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomPairCode(std::uint32_t codeI, std::uint32_t codeJ, unsigned int dist, bool includeChirality=false)
const unsigned int codeSize
const unsigned int numTypeBits
const unsigned int numBranchBits
const unsigned int numAtomPairFingerprintBits
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getTopologicalTorsionHash(const std::vector< std::uint32_t > &pathCodes)
const unsigned int maxPathLen
RDKIT_FINGERPRINTS_EXPORT std::uint32_t getAtomCode(const Atom *atom, unsigned int branchSubtract=0, bool includeChirality=false)
RDKIT_FINGERPRINTS_EXPORT std::vector< std::string > defaultFeatureSmarts
RDKIT_FINGERPRINTS_EXPORT void getFeatureInvariants(const ROMol &mol, std::vector< std::uint32_t > &invars, std::vector< const ROMol * > *patterns=nullptr)
returns the feature invariants for a molecule
const std::string morganFeatureInvariantVersion
const std::string morganConnectivityInvariantVersion
RDKIT_FINGERPRINTS_EXPORT void getConnectivityInvariants(const ROMol &mol, std::vector< std::uint32_t > &invars, bool includeRingMembership=true)
returns the connectivity invariants for a molecule
std::tuple< boost::dynamic_bitset<>, uint32_t, unsigned int > AccumTuple
RDKIT_FINGERPRINTS_EXPORT void enumerateAllPaths(const ROMol &mol, std::map< int, std::list< std::vector< int > > > &allPaths, const std::vector< std::uint32_t > *fromAtoms, bool branchedPaths, bool useHs, unsigned int minPath, unsigned int maxPath)
RDKIT_FINGERPRINTS_EXPORT void identifyQueryBonds(const ROMol &mol, std::vector< const Bond * > &bondCache, std::vector< short > &isQueryBond)
RDKIT_FINGERPRINTS_EXPORT std::vector< unsigned int > generateBondHashes(const ROMol &mol, boost::dynamic_bitset<> &atomsInPath, const std::vector< const Bond * > &bondCache, const std::vector< short > &isQueryBond, const std::vector< int > &path, bool useBondOrder, const std::vector< std::uint32_t > *atomInvariants)
RDKIT_FINGERPRINTS_EXPORT void buildDefaultRDKitFingerprintAtomInvariants(const ROMol &mol, std::vector< std::uint32_t > &lAtomInvariants)
Std stuff.
bool rdvalue_is(const RDValue_cast_t)
boost::shared_ptr< ROMol > ROMOL_SPTR