RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SynthonSpaceSearcher.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2024.
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11// This file declares an abstract base class for searching a synthon
12// space. Concrete base classes include SynthonSpaceSubstructureSearcher
13// and SynthonSpaceFingerprintSearcher.
14
15#ifndef SYNTHONSPACESEARCHER_H
16#define SYNTHONSPACESEARCHER_H
17
18#include <chrono>
19#include <random>
20
21#include <RDGeneral/export.h>
25#include <boost/spirit/home/support/common_terminals.hpp>
26
27using Clock = std::chrono::steady_clock;
28using TimePoint = std::chrono::time_point<Clock>;
29
30namespace RDKit {
31class ROMol;
32
33namespace SynthonSpaceSearch {
34
35// Abstract base class for searching the SynthonSpace.
37 public:
40 const SynthonSpaceSearchParams &params,
46
47 virtual ~SynthonSpaceSearcher() = default;
48
50
51 SynthonSpace &getSpace() const { return d_space; }
52 const ROMol &getQuery() const { return d_query; }
53 const SynthonSpaceSearchParams &getParams() const { return d_params; }
54
55 // Do the search of this fragSet against the SynthonSet in the
56 // appropriate way, for example by substructure or fingerprint
57 // similarity.
58 virtual std::vector<std::unique_ptr<SynthonSpaceHitSet>> searchFragSet(
59 const std::vector<std::unique_ptr<ROMol>> &fragSet,
60 const SynthonSet &reaction) const = 0;
61
62 // Make the hit, constructed from a specific combination of
63 // synthons in the hitset, and verify that it matches the
64 // query in the appropriate way. There'll be 1 entry in synthNums
65 // for each synthon list in the hitset. Returns an empty pointer
66 // if the hit isn't accepted for whatever reason.
67 std::unique_ptr<ROMol> buildAndVerifyHit(
69 const std::vector<size_t> &synthNums) const;
70
71 protected:
72 // Checks that the given molecule is definitely a hit according to
73 // the derived class' criteria. This function checks the chiralAtomCount
74 // if appropriate, which required a non-const ROMol.
75 virtual bool verifyHit(ROMol &mol) const;
76
77 // Do a check against number of heavy atoms etc. if options call for it
78 // which can be done without having to build the full molecule from the
79 // synthons. Some of the search methods (fingerprints, for example) can do
80 // additional quick checks on whether this set of synthons can match the query
81 // without building the full molecule.
83 const std::vector<size_t> &synthNums) const;
84
85 private:
86 std::unique_ptr<std::mt19937> d_randGen;
87
88 const ROMol &d_query;
89 const SynthonSpaceSearchParams &d_params;
90 SynthonSpace &d_space;
91
92 // Some of the search methods might need extra setup of the fragment
93 // sets. The FingerprintSearcher, for example, needs fingerprints
94 // for all the fragments. The SubstructureSearcher needs connector
95 // regions and information about them.
96 virtual void extraSearchSetup(
97 [[maybe_unused]] std::vector<std::vector<std::unique_ptr<ROMol>>>
98 &fragSets) {}
99
100 std::vector<std::unique_ptr<SynthonSpaceHitSet>> doTheSearch(
101 std::vector<std::vector<std::unique_ptr<ROMol>>> &fragSets,
102 const TimePoint *endTime, bool &timedOut, std::uint64_t &totHits);
103
104 // Build the molecules from the synthons identified in hitsets.
105 // Checks that all the results produced match the
106 // query. Duplicates by name are not returned,
107 // but duplicate SMILES from different reactions will be.
108 // Hitsets will be re-ordered on exit.
109 void buildHits(std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
110 const TimePoint *endTime, bool &timedOut,
111 std::vector<std::unique_ptr<ROMol>> &results) const;
112 void buildAllHits(
113 const std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
114 const TimePoint *endTime, bool &timedOut,
115 std::vector<std::unique_ptr<ROMol>> &results) const;
116 void makeHitsFromToTry(
117 const std::vector<
118 std::pair<const SynthonSpaceHitSet *, std::vector<size_t>>> &toTry,
119 const TimePoint *endTime,
120 std::vector<std::unique_ptr<ROMol>> &results) const;
121 void processToTrySet(
122 std::vector<std::pair<const SynthonSpaceHitSet *, std::vector<size_t>>>
123 &toTry,
124 const TimePoint *endTime,
125 std::vector<std::unique_ptr<ROMol>> &results) const;
126
127 // get the subset of synthons for the given reaction to use for this
128 // enumeration.
129 std::vector<std::vector<ROMol *>> getSynthonsToUse(
130 const std::vector<boost::dynamic_bitset<>> &synthonsToUse,
131 const std::string &reaction_id) const;
132};
133
134} // namespace SynthonSpaceSearch
135} // namespace RDKit
136#endif // SYNTHONSPACESEARCHER_H
std::chrono::steady_clock Clock
std::chrono::time_point< Clock > TimePoint
contains a class for searching combinatorial libraries in Synthon format such as Enamine REAL.
SynthonSpaceSearcher(const ROMol &query, const SynthonSpaceSearchParams &params, SynthonSpace &space)
SynthonSpaceSearcher(SynthonSpaceSearcher &&other)=delete
SynthonSpaceSearcher(const SynthonSpaceSearcher &other)=delete
virtual std::vector< std::unique_ptr< SynthonSpaceHitSet > > searchFragSet(const std::vector< std::unique_ptr< ROMol > > &fragSet, const SynthonSet &reaction) const =0
virtual bool verifyHit(ROMol &mol) const
SynthonSpaceSearcher & operator=(const SynthonSpaceSearcher &other)=delete
const SynthonSpaceSearchParams & getParams() const
std::unique_ptr< ROMol > buildAndVerifyHit(const SynthonSpaceHitSet *hitset, const std::vector< size_t > &synthNums) const
SynthonSpaceSearcher & operator=(SynthonSpaceSearcher &&other)=delete
virtual bool quickVerify(const SynthonSpaceHitSet *hitset, const std::vector< size_t > &synthNums) const
Std stuff.
bool rdvalue_is(const RDValue_cast_t)