RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MultiFPBReader.h
Go to the documentation of this file.
1//
2// Copyright (c) 2016-2022 Greg Landrum
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_MULTIFPBREADER_H_APR2016
12#define RD_MULTIFPBREADER_H_APR2016
13/*! \file MultiFPBReader.h
14
15 \brief contains a class for reading and searching collections of FPB files
16
17 \b Note that this functionality is experimental and the API may change
18 in future releases.
19*/
20
24#include <tuple>
25
26namespace RDKit {
27
28//! class for reading and searching multiple FPB files
29/*!
30 basic usage:
31 \code
32 FPBReader r1("foo1.fpb"),r2("foo2.fpb");
33 std::vector<FPBReader *> readers;
34 readers.append(&r1);
35 readers.append(&r2);
36 MultiFPBReader fpbs(readers);
37 fpbs.init();
38 boost::shared_ptr<ExplicitBitVect> ebv = fpbs.getReader(0)->getFP(95);
39 std::vector<std::tuple<double,unsigned int, unsigned int> > nbrs =
40 fpbs.getTanimotoNeighbors(*ebv.get(), 0.70);
41 \endcode
42
43 \b Note: this functionality is experimental and the API may change
44 in future releases.
45
46 <b>Note on thread safety</b>
47 Operations that involve reading from FPB files are not thread safe.
48 This means that the \c init() method is not thread safe and none of the
49 search operations are thread safe when an \c FPBReader is initialized in
50 \c lazyRead mode.
51
52*/
54 public:
55 typedef std::tuple<double, unsigned int, unsigned int> ResultTuple;
57
58 /*!
59 \param initOnSearch: if this is true, the \c init() method on child readers
60 will not be called until the first search is done. This is useful with large
61 FPB readers.
62 */
63 MultiFPBReader(bool initOnSearch)
64 : df_init(false),
65 df_initOnSearch(initOnSearch),
66 df_takeOwnership(false) {}
67 /*!
68 \param readers: the set of FPBReader objects to use.
69 \param takeOwnership: if true, we own the memory for the FPBReaders
70 \param initOnSearch: if this is true, the \c init() method on child readers
71 will not be called until the first search is done. This is useful with large
72 FPB readers.
73 */
74 MultiFPBReader(std::vector<FPBReader *> &readers, bool takeOwnership = false,
75 bool initOnSearch = false);
76
78 df_init = false;
79 if (df_takeOwnership) {
80 for (auto &rdr : d_readers) {
81 delete rdr;
82 }
83 d_readers.clear();
84 }
85 }
86
87 //! Read the data from the file and initialize internal data structures
88 /*!
89 This must be called before most of the other methods of this class.
90 It calls the \c init() method on each of the child FPBReaders
91
92 */
93 void init();
94
95 //! returns the number of readers
96 unsigned int length() const { return d_readers.size(); }
97 //! returns the number of bits in our fingerprints (all readers are expected
98 //! to have the same length)
99 unsigned int nBits() const;
100
101 //! returns a particular reader
102 /*!
103
104 \param which: the reader to return
105
106 */
107 FPBReader *getReader(unsigned int which);
108
109 //! adds a new FPBReader to our list
110 /*!
111
112 This does no error checking on the reader, so be careful.
113
114 If \c takeOwnership is \c true then we will take ownership of the memory.
115
116 \param rdr: the reader to add. If we have already been initialized, the
117 reader's \c init() method will be called
118
119 \returns a count of the current number of readers
120 */
121 unsigned int addReader(FPBReader *rdr) {
122 PRECONDITION(rdr, "no reader provided");
123 d_readers.push_back(rdr);
124 if (df_init) {
125 rdr->init();
126 }
127 return d_readers.size();
128 }
129
130 //! returns tanimoto neighbors that are within a similarity threshold
131 /*!
132 The result vector of (similarity,index,reader) tuples is sorted in order
133 of decreasing similarity
134
135 \param bv the query fingerprint
136 \param threshold the minimum similarity to return
137 \param numThreads Sets the number of threads to use (more than one thread
138 will only be used if the RDKit was build with multithread support) If set to
139 zero, the max supported by the system will be used.
140
141 */
142 std::vector<ResultTuple> getTanimotoNeighbors(const std::uint8_t *bv,
143 double threshold = 0.7,
144 int numThreads = 1) const;
145 //! \overload
146 std::vector<ResultTuple> getTanimotoNeighbors(
147 boost::shared_array<std::uint8_t> bv, double threshold = 0.7,
148 int numThreads = 1) const {
149 return getTanimotoNeighbors(bv.get(), threshold, numThreads);
150 }
151 //! \overload
152 std::vector<ResultTuple> getTanimotoNeighbors(const ExplicitBitVect &ebv,
153 double threshold = 0.7,
154 int numThreads = 1) const;
155
156 //! returns Tversky neighbors that are within a similarity threshold
157 /*!
158 The result vector of (similarity,index) pairs is sorted in order
159 of decreasing similarity
160
161 \param bv the query fingerprint
162 \param ca the Tversky a coefficient
163 \param cb the Tversky a coefficient
164 \param threshold the minimum similarity to return
165 \param numThreads Sets the number of threads to use (more than one thread
166 will only be used if the RDKit was build with multithread support) If set to
167 zero, the max supported by the system will be used.
168
169 */
170 std::vector<ResultTuple> getTverskyNeighbors(const std::uint8_t *bv,
171 double ca, double cb,
172 double threshold = 0.7,
173 int numThreads = 1) const;
174 //! \overload
175 std::vector<ResultTuple> getTverskyNeighbors(
176 boost::shared_array<std::uint8_t> bv, double ca, double cb,
177 double threshold = 0.7, int numThreads = 1) const {
178 return getTverskyNeighbors(bv.get(), ca, cb, threshold, numThreads);
179 }
180 //! \overload
181 std::vector<ResultTuple> getTverskyNeighbors(const ExplicitBitVect &ebv,
182 double ca, double cb,
183 double threshold = 0.7,
184 int numThreads = 1) const;
185
186 //! returns indices of all fingerprints that completely contain this one
187 /*! (i.e. where all the bits set in the query are also set in the db
188 molecule)
189 */
190 std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
191 const std::uint8_t *bv, int numThreads = 1) const;
192 //! \overload
193 std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
194 boost::shared_array<std::uint8_t> bv, int numThreads = 1) const {
195 return getContainingNeighbors(bv.get(), numThreads);
196 }
197 //! \overload
198 std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
199 const ExplicitBitVect &ebv, int numThreads = 1) const;
200
201 private:
202 std::vector<FPBReader *> d_readers;
203 bool df_init{false}, df_initOnSearch{false}, df_takeOwnership{false};
204
205 // disable automatic copy constructors and assignment operators
206 // for this class and its subclasses. They will likely be
207 // carrying around stream pointers and copying those is a recipe
208 // for disaster.
209 MultiFPBReader(const MultiFPBReader &);
210 MultiFPBReader &operator=(const MultiFPBReader &);
211};
212} // namespace RDKit
213#endif
contains a simple class for reading and searching FPB files
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
a class for bit vectors that are densely occupied
class for reading and searching FPB files
Definition FPBReader.h:58
void init()
Read the data from the file and initialize internal data structures.
class for reading and searching multiple FPB files
std::vector< ResultTuple > getTanimotoNeighbors(const std::uint8_t *bv, double threshold=0.7, int numThreads=1) const
returns tanimoto neighbors that are within a similarity threshold
std::vector< ResultTuple > getTverskyNeighbors(const std::uint8_t *bv, double ca, double cb, double threshold=0.7, int numThreads=1) const
returns Tversky neighbors that are within a similarity threshold
unsigned int length() const
returns the number of readers
std::vector< ResultTuple > getTverskyNeighbors(const ExplicitBitVect &ebv, double ca, double cb, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
unsigned int addReader(FPBReader *rdr)
adds a new FPBReader to our list
void init()
Read the data from the file and initialize internal data structures.
MultiFPBReader(std::vector< FPBReader * > &readers, bool takeOwnership=false, bool initOnSearch=false)
MultiFPBReader(bool initOnSearch)
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(boost::shared_array< std::uint8_t > bv, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::tuple< double, unsigned int, unsigned int > ResultTuple
unsigned int nBits() const
std::vector< ResultTuple > getTanimotoNeighbors(const ExplicitBitVect &ebv, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< ResultTuple > getTverskyNeighbors(boost::shared_array< std::uint8_t > bv, double ca, double cb, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(const std::uint8_t *bv, int numThreads=1) const
returns indices of all fingerprints that completely contain this one
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(const ExplicitBitVect &ebv, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< ResultTuple > getTanimotoNeighbors(boost::shared_array< std::uint8_t > bv, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
FPBReader * getReader(unsigned int which)
returns a particular reader
#define RDKIT_DATASTRUCTS_EXPORT
Definition export.h:81
Std stuff.