RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SynthonSet.h
Go to the documentation of this file.
1//
2// Copyright (C) David Cosgrove 2024.
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#ifndef RDKIT_SYNTHONSET_H
12#define RDKIT_SYNTHONSET_H
13
14#include <iosfwd>
15#include <string>
16#include <vector>
17
18#include <boost/dynamic_bitset.hpp>
19
20#include <RDGeneral/export.h>
24
25namespace RDKit {
26class ROMol;
27
28namespace SynthonSpaceSearch {
29class Synthon;
30class SynthonSpace;
32
33// This class holds pointers to all the synthons for a particular
34// reaction. The synthons themselves are in a pool in the
35// SynthonSpace.
37 public:
38 SynthonSet() = default;
39 explicit SynthonSet(const std::string &id) : d_id(id) {}
40 SynthonSet(const SynthonSet &rhs) = delete;
41 SynthonSet(SynthonSet &&rhs) = delete;
42
43 const std::string &getId() const { return d_id; }
44 const std::vector<std::vector<std::pair<std::string, Synthon *>>> &
45 getSynthons() const {
46 return d_synthons;
47 }
48 const boost::dynamic_bitset<> &getConnectors() const { return d_connectors; }
49 const std::vector<boost::dynamic_bitset<>> &getSynthonConnectorPatterns()
50 const {
51 return d_synthConnPatts;
52 }
53 const std::vector<std::shared_ptr<ROMol>> &getConnectorRegions() const;
54 const std::vector<std::string> &getConnectorRegionSmiles() const;
55 const std::vector<std::unique_ptr<ExplicitBitVect>> &getConnRegFPs() const;
56 const std::unique_ptr<ExplicitBitVect> &getAddFP() const;
57 const std::unique_ptr<ExplicitBitVect> &getSubtractFP() const;
58 const std::vector<int> &getNumConnectors() const;
59 std::uint64_t getNumProducts() const;
60 bool hasFingerprints() const;
62 unsigned int getNumRingFormers() const { return d_numRingFormers; }
63
64 // Writes to/reads from a binary stream.
65 void writeToDBStream(std::ostream &os) const;
66 void readFromDBStream(std::istream &is, const SynthonSpace &space,
67 std::uint32_t version);
68 // write the enumerated molecules to the stream in SMILES format.
69 void enumerateToStream(std::ostream &os) const;
70
71 // This stores the pointer to the Synthon, but doesn't manage
72 // it and should never delete it.
73 void addSynthon(int synthonSetNum, Synthon *newSynthon,
74 const std::string &synthonId);
75
76 // Sometimes the synthon sets are numbered from 1 in the text file,
77 // in which case there'll be an empty set 0.
79
80 // The bonds in the synthons may not be the same as in the products, and
81 // this is a problem for aromatic ring creation in particular. Such as:
82 // [1*]=CC=C[2*] and [1*]Nc1c([2*])cccc1 giving c1ccc2ncccc2c1. So
83 // make versions of the synthons that reflect this, storead as searchMol
84 // in each synthon.
86
87 // Build the connector regions and their fingerprints. Only used when
88 // creating a SynthonSpace from a text file.
90
91 // Scan through the connectors ([1*], [2*] etc.) in the synthons
92 // and set bits in d_connectors accordingly. Also removes any empty
93 // reagent sets, which might be because the synthon numbers start from
94 // 1 rather than 0. Only used when creating a SynthonSpace from a text
95 // file.
97
101 unsigned int numBits);
102
103 // Return the molecules for synthons for which the bits are true.
104 // Obviously requires that reqSynths is the same dimensions as
105 // d_synthons.
106 std::vector<std::vector<ROMol *>> getSynthons(
107 const std::vector<boost::dynamic_bitset<>> &reqSynths) const;
108
109 std::string buildProductName(const std::vector<size_t> &synthNums) const;
110 std::unique_ptr<ROMol> buildProduct(
111 const std::vector<size_t> &synthNums) const;
112
114
115 private:
116 std::string d_id;
117 // The lists of synthons. A product of the reaction is created by
118 // combining 1 synthon from each of the outer vectors. The actual
119 // Synthon objects are held in the SynthonSpace which manages all
120 // the memory. In different reactions/SynthonSets the same Synthon
121 // can have different IDs, so we need to keep the ID here rather
122 // than in the Synthon, whose primary key is its SMILES string.
123 std::vector<std::vector<std::pair<std::string, Synthon *>>> d_synthons;
124 // MAX_CONNECTOR_NUM+1 bits showing which connectors are present in all the
125 // synthon sets.
126 boost::dynamic_bitset<> d_connectors;
127 // And the connector patterns for each synthon set. If synthon set 0
128 // has connectors 1 and 3, then d_synthConnPatts[0] will have bits
129 // 1 and 3 set.
130 std::vector<boost::dynamic_bitset<>> d_synthConnPatts;
131
132 // The connector regions of a molecule are the pieces of up to 3 bonds from
133 // a connector atom into the molecule. We keep a vector of all the ones
134 // present in the synthons in the set, plus a fingerprint for each.
135 // If a query fragment doesn't have a connector region in common with
136 // any of the synthons it can be assumed that the fragment won't have
137 // a match in this SynthonSet.
138 std::vector<std::shared_ptr<ROMol>> d_connectorRegions;
139 std::vector<std::string> d_connRegSmis;
140 // The fingerprints of the connector regions.
141 std::vector<std::unique_ptr<ExplicitBitVect>> d_connRegFPs;
142
143 // When doing an approximate FP similarity by ORing together
144 // the synthonFPs, adding d_addFP and subtracting d_subtractFP
145 // accounts (a bit) for the joins and the dummy atoms
146 // respectively.
147 std::unique_ptr<ExplicitBitVect> d_addFP;
148 std::unique_ptr<ExplicitBitVect> d_subtractFP;
149
150 // The number of connectors in the synthons in each synthon set.
151 std::vector<int> d_numConnectors;
152 // The number of rings that may be formed by the synthons. If there
153 // are a pair of synthons A([1*])[2*] and B([1*])[2*] 1 ring can be
154 // formed.
155 unsigned int d_numRingFormers{0};
156};
157
158} // namespace SynthonSpaceSearch
159
160} // namespace RDKit
161
162#endif // RDKIT_SYNTHONSET_H
class that generates same fingerprint style for different output formats
const std::unique_ptr< ExplicitBitVect > & getSubtractFP() const
const std::vector< std::vector< std::pair< std::string, Synthon * > > > & getSynthons() const
Definition SynthonSet.h:45
const std::unique_ptr< ExplicitBitVect > & getAddFP() const
const std::vector< std::unique_ptr< ExplicitBitVect > > & getConnRegFPs() const
SynthonSet(SynthonSet &&rhs)=delete
void enumerateToStream(std::ostream &os) const
void addSynthon(int synthonSetNum, Synthon *newSynthon, const std::string &synthonId)
std::unique_ptr< ROMol > buildProduct(const std::vector< size_t > &synthNums) const
void writeToDBStream(std::ostream &os) const
void buildAddAndSubtractFPs(const FingerprintGenerator< std::uint64_t > &fpGen, unsigned int numBits)
void readFromDBStream(std::istream &is, const SynthonSpace &space, std::uint32_t version)
void buildSynthonFingerprints(const FingerprintGenerator< std::uint64_t > &fpGen)
const std::vector< std::shared_ptr< ROMol > > & getConnectorRegions() const
SynthonSet(const std::string &id)
Definition SynthonSet.h:39
std::string buildProductName(const std::vector< size_t > &synthNums) const
const std::vector< int > & getNumConnectors() const
std::uint64_t getNumProducts() const
std::vector< std::vector< ROMol * > > getSynthons(const std::vector< boost::dynamic_bitset<> > &reqSynths) const
unsigned int getNumRingFormers() const
Definition SynthonSet.h:62
const std::vector< boost::dynamic_bitset<> > & getSynthonConnectorPatterns() const
Definition SynthonSet.h:49
const std::vector< std::string > & getConnectorRegionSmiles() const
const boost::dynamic_bitset & getConnectors() const
Definition SynthonSet.h:48
SynthonSet(const SynthonSet &rhs)=delete
const std::string & getId() const
Definition SynthonSet.h:43
#define RDKIT_SYNTHONSPACESEARCH_EXPORT
Definition export.h:577
Std stuff.