RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SynthonSpaceSearcher.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2024.
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11// This file declares an abstract base class for searching a synthon
12// space. Concrete base classes include SynthonSpaceSubstructureSearcher
13// and SynthonSpaceFingerprintSearcher.
14
15#ifndef SYNTHONSPACESEARCHER_H
16#define SYNTHONSPACESEARCHER_H
17
18#include <chrono>
19#include <random>
20
21#include <RDGeneral/export.h>
25#include <boost/spirit/home/support/common_terminals.hpp>
26
27using Clock = std::chrono::steady_clock;
28using TimePoint = std::chrono::time_point<Clock>;
29
30namespace RDKit {
31class ROMol;
32
33namespace SynthonSpaceSearch {
34
35// Abstract base class for searching the SynthonSpace.
37 public:
40 const SynthonSpaceSearchParams &params,
41 SynthonSpace &space);
46
47 virtual ~SynthonSpaceSearcher() = default;
48
51
52 SynthonSpace &getSpace() const { return d_space; }
53 const ROMol &getQuery() const { return d_query; }
54 const SynthonSpaceSearchParams &getParams() const { return d_params; }
55
56 // Do the search of this fragSet against the SynthonSet in the
57 // appropriate way, for example by substructure or fingerprint
58 // similarity.
59 virtual std::vector<std::unique_ptr<SynthonSpaceHitSet>> searchFragSet(
60 const std::vector<std::unique_ptr<ROMol>> &fragSet,
61 const SynthonSet &reaction) const = 0;
62
63 // Make the hit, constructed from a specific combination of
64 // synthons in the hitset, and verify that it matches the
65 // query in the appropriate way. There'll be 1 entry in synthNums
66 // for each synthon list in the hitset. Returns an empty pointer
67 // if the hit isn't accepted for whatever reason.
68 std::unique_ptr<ROMol> buildAndVerifyHit(
69 const SynthonSpaceHitSet *hitset,
70 const std::vector<size_t> &synthNums) const;
71
72 protected:
73 // Checks that the given molecule is definitely a hit according to
74 // the derived class' criteria. This function checks the chiralAtomCount
75 // if appropriate, which required a non-const ROMol.
76 virtual bool verifyHit(ROMol &mol) const;
77
78 // Do a check against number of heavy atoms etc. if options call for it
79 // which can be done without having to build the full molecule from the
80 // synthons. Some of the search methods (fingerprints, for example) can do
81 // additional quick checks on whether this set of synthons can match the query
82 // without building the full molecule.
83 virtual bool quickVerify(const SynthonSpaceHitSet *hitset,
84 const std::vector<size_t> &synthNums) const;
85
86 private:
87 std::unique_ptr<std::mt19937> d_randGen;
88
89 const ROMol &d_query;
90 const SynthonSpaceSearchParams &d_params;
91 SynthonSpace &d_space;
92
93 // Generally, the search needs the query fragmented into no more than
94 // the largest number synthon sets in any reaction. Substructure search
95 // needs more than that, sometimes.
96 virtual unsigned int getNumQueryFragmentsRequired();
97 // Some of the search methods might need extra setup of the fragment
98 // sets. The FingerprintSearcher, for example, needs fingerprints
99 // for all the fragments. The SubstructureSearcher needs connector
100 // regions and information about them.
101 virtual void extraSearchSetup(
102 [[maybe_unused]] std::vector<std::vector<std::unique_ptr<ROMol>>>
103 &fragSets) {}
104
105 std::vector<std::unique_ptr<SynthonSpaceHitSet>> assembleHitSets(
106 const TimePoint *endTime, bool &timedOut, std::uint64_t &totHits);
107
108 std::vector<std::unique_ptr<SynthonSpaceHitSet>> doTheSearch(
109 std::vector<std::vector<std::unique_ptr<ROMol>>> &fragSets,
110 const TimePoint *endTime, bool &timedOut, std::uint64_t &totHits);
111
112 // Build the molecules from the synthons identified in hitsets.
113 // Checks that all the results produced match the
114 // query. Duplicates by name are not returned,
115 // but duplicate SMILES from different reactions will be.
116 // Hitsets will be re-ordered on exit.
117 void buildHits(std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
118 const TimePoint *endTime, bool &timedOut,
119 std::vector<std::unique_ptr<ROMol>> &results) const;
120 void buildAllHits(
121 const std::vector<std::unique_ptr<SynthonSpaceHitSet>> &hitsets,
122 const TimePoint *endTime, bool &timedOut,
123 std::vector<std::unique_ptr<ROMol>> &results) const;
124 void makeHitsFromToTry(
125 const std::vector<
126 std::pair<const SynthonSpaceHitSet *, std::vector<size_t>>> &toTry,
127 const TimePoint *endTime,
128 std::vector<std::unique_ptr<ROMol>> &results) const;
129 void processToTrySet(
130 std::vector<std::pair<const SynthonSpaceHitSet *, std::vector<size_t>>>
131 &toTry,
132 const TimePoint *endTime,
133 std::vector<std::unique_ptr<ROMol>> &results) const;
134
135 // get the subset of synthons for the given reaction to use for this
136 // enumeration.
137 std::vector<std::vector<ROMol *>> getSynthonsToUse(
138 const std::vector<boost::dynamic_bitset<>> &synthonsToUse,
139 const std::string &reaction_id) const;
140};
141
142} // namespace SynthonSpaceSearch
143} // namespace RDKit
144#endif // SYNTHONSPACESEARCHER_H
std::chrono::steady_clock Clock
std::chrono::time_point< Clock > TimePoint
contains a class for searching combinatorial libraries in Synthon format such as Enamine REAL.
SynthonSpaceSearcher(const ROMol &query, const SynthonSpaceSearchParams &params, SynthonSpace &space)
SynthonSpaceSearcher(SynthonSpaceSearcher &&other)=delete
void search(const SearchResultCallback &cb)
SynthonSpaceSearcher(const SynthonSpaceSearcher &other)=delete
virtual std::vector< std::unique_ptr< SynthonSpaceHitSet > > searchFragSet(const std::vector< std::unique_ptr< ROMol > > &fragSet, const SynthonSet &reaction) const =0
virtual bool verifyHit(ROMol &mol) const
SynthonSpaceSearcher & operator=(const SynthonSpaceSearcher &other)=delete
const SynthonSpaceSearchParams & getParams() const
std::unique_ptr< ROMol > buildAndVerifyHit(const SynthonSpaceHitSet *hitset, const std::vector< size_t > &synthNums) const
SynthonSpaceSearcher & operator=(SynthonSpaceSearcher &&other)=delete
virtual bool quickVerify(const SynthonSpaceHitSet *hitset, const std::vector< size_t > &synthNums) const
std::function< bool(std::vector< std::unique_ptr< ROMol > > &)> SearchResultCallback
Std stuff.