RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SubstructureCache.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Novartis Institutes for BioMedical Research
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#pragma once
12#include <list>
13#include <vector>
14#include <string>
15#include <stdexcept>
16#include "../RDKitBase.h"
17#include "Graph.h"
18#include "Seed.h"
19#include "DebugTrace.h" // algorithm filter definitions
20
21namespace RDKit {
22namespace FMCS {
24 public:
25#pragma pack(push, 1)
27 typedef unsigned long long TValue;
28 TValue Value{0};
29
30 public:
32 };
33#pragma pack(pop)
34
35 struct HashKey {
37
38 public:
39 void computeKey(const Seed &seed,
40 const std::vector<unsigned int> &queryAtomLabels,
41 const std::vector<unsigned int> &queryBondLabels) {
42 computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
43 }
44
45 private:
46 void computeMorganCodeHash(
47 const Seed &seed, const std::vector<unsigned int> &queryAtomLabels,
48 const std::vector<unsigned int> &queryBondLabels) {
49 size_t nv = seed.getNumAtoms();
50 size_t ne = seed.getNumBonds();
51 std::vector<unsigned long> currCodes(nv);
52 std::vector<unsigned long> prevCodes(nv);
53 size_t nIterations = seed.getNumBonds();
54 if (nIterations > 5) {
55 nIterations = 5;
56 }
57
58 for (unsigned int seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms();
59 ++seedAtomIdx) {
60 currCodes[seedAtomIdx] = queryAtomLabels.at(
61 seed.MoleculeFragment.Atoms.at(seedAtomIdx)->getIdx());
62 }
63
64 for (size_t iter = 0; iter < nIterations; ++iter) {
65 for (size_t i = 0; i < nv; ++i) {
66 prevCodes[i] = currCodes[i];
67 }
68
69 for (size_t seedBondIdx = 0; seedBondIdx < ne; ++seedBondIdx) {
70 const Bond *bond = seed.MoleculeFragment.Bonds[seedBondIdx];
71 unsigned int order = queryBondLabels.at(
72 seed.MoleculeFragment.Bonds.at(seedBondIdx)->getIdx());
73 unsigned int atom1 = seed.MoleculeFragment.SeedAtomIdxMap
74 .find(bond->getBeginAtomIdx())
75 ->second;
76 unsigned int atom2 =
77 seed.MoleculeFragment.SeedAtomIdxMap.find(bond->getEndAtomIdx())
78 ->second;
79 unsigned int v1 = prevCodes[atom1];
80 unsigned int v2 = prevCodes[atom2];
81
82 currCodes[atom1] += v2 * v2 + (v2 + 23) * (order + 1721);
83 currCodes[atom2] += v1 * v1 + (v1 + 23) * (order + 1721);
84 }
85 }
86
87 KeyNumericMetrics::TValue result = 0;
88 for (unsigned int seedAtomIdx = 0; seedAtomIdx < nv; ++seedAtomIdx) {
89 unsigned long code = currCodes[seedAtomIdx];
90 result += code * (code + 6849) + 29;
91 }
92
93 NumericMetrics.Value = result;
94 }
95
96 // not implemented yet:
97 /*
98 void computeFingerprint(const Seed& seed)
99 {
100 unsigned int radius = seed.getNumBonds();
101 if (radius > 5)
102 radius = 5;
103 ExplicitBitVect *mf =
104 RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology,
105 radius); //SLOW !!!
106 // ...
107 delete mf;
108 NumericMetrics.Field.hasFingerprint = 1;
109 }
110 */
111 };
112
113 typedef HashKey TKey;
114 typedef std::list<FMCS::Graph> TIndexEntry; // hash-key is not unique key
115 private:
116 std::vector<TIndexEntry> ValueStorage;
117 std::map<KeyNumericMetrics::TValue, size_t> NumericIndex; // TIndexEntry
118 public:
119 // returns computed key, and pointer to index entry with a set of subgraphs
120 // corresponding to the key if found.
121 // then caller must find exactly matched subgraph in the result set with own
122 // search algorithm,
123 // including a resolving of collisions of hash key
124 TIndexEntry *find(const Seed &seed,
125 const std::vector<unsigned int> &queryAtomLabels,
126 const std::vector<unsigned int> &queryBondLabels,
127 TKey &key) { // compute key and find entry
128 key.computeKey(seed, queryAtomLabels, queryBondLabels);
129 const auto entryit = NumericIndex.find(key.NumericMetrics.Value);
130 if (NumericIndex.end() != entryit) {
131 return &ValueStorage[entryit->second];
132 }
133 return nullptr; // not found
134 }
135
136 // if find() did not found any entry for this key of seed a new entry will be
137 // created
138 void add(const Seed &seed, TKey &key,
139 TIndexEntry *entry) { // "compute" value and store it in NEW entry
140 // if not found
141 if (!entry) {
142 try {
143 ValueStorage.emplace_back();
144 } catch (...) {
145 return; // not enough memory room to add the item, but it's just a
146 // cache
147 }
148 entry = &ValueStorage.back();
149 }
150 entry->push_back(seed.Topology);
151
152 if (!NumericIndex
153 .insert(std::make_pair(key.NumericMetrics.Value,
154 ValueStorage.size() - 1))
155 .second) {
156 return; // not enough memory room to add the item, but it is just cache
157 }
158 }
159
160 size_t keyssize() const { // for statistics only
161 return ValueStorage.size();
162 }
163
164 size_t fullsize() const { // for statistics only
165 return std::accumulate(
166 ValueStorage.begin(), ValueStorage.end(), 0,
167 [](const auto &acc, const auto &v) { return acc + v.size(); });
168 }
169};
170} // namespace FMCS
171} // namespace RDKit
std::list< FMCS::Graph > TIndexEntry
TIndexEntry * find(const Seed &seed, const std::vector< unsigned int > &queryAtomLabels, const std::vector< unsigned int > &queryBondLabels, TKey &key)
void add(const Seed &seed, TKey &key, TIndexEntry *entry)
#define RDKIT_FMCS_EXPORT
Definition export.h:153
Std stuff.
void computeKey(const Seed &seed, const std::vector< unsigned int > &queryAtomLabels, const std::vector< unsigned int > &queryBondLabels)