RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MultithreadedSmilesMolSupplier.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Shrey Aryan
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifdef RDK_BUILD_THREADSAFE_SSS
11#ifndef MULTITHREADED_SMILES_MOL_SUPPLIER
12#define MULTITHREADED_SMILES_MOL_SUPPLIER
14namespace RDKit {
15namespace v2 {
16namespace FileParsers {
17//! This class is still a bit experimental and the public API may change
18//! in future releases.
19class RDKIT_FILEPARSERS_EXPORT MultithreadedSmilesMolSupplier
20 : public MultithreadedMolSupplier {
21 public:
22 explicit MultithreadedSmilesMolSupplier(
23 const std::string &fileName, const Parameters &params = Parameters(),
24 const SmilesMolSupplierParams &parseParams = SmilesMolSupplierParams());
25
26 explicit MultithreadedSmilesMolSupplier(
27 std::istream *inStream, bool takeOwnership = true,
28 const Parameters &params = Parameters(),
29 const SmilesMolSupplierParams &parseParams = SmilesMolSupplierParams());
30 MultithreadedSmilesMolSupplier();
31 virtual ~MultithreadedSmilesMolSupplier() {close();};
32
33 void init() override {}
34 //! returns df_end
35 bool getEnd() const override;
36 //! reads and processes the title line
37 void processTitleLine();
38 //! reads next record and returns whether or not EOF was hit
39 bool extractNextRecord(std::string &record, unsigned int &lineNum,
40 unsigned int &index) override;
41 //! parses the record and returns the resulting molecule
42 RWMol *processMoleculeRecord(const std::string &record,
43 unsigned int lineNum) override;
44
45 protected:
46 void closeStreams() override;
47
48 private:
49 void initFromSettings(
50 bool takeOwnership, const Parameters &params,
51 const SmilesMolSupplierParams &parseParams = SmilesMolSupplierParams());
52
53 private:
54 bool df_end = false; //!< have we reached the end of the file?
55 int d_line = 0; //!< line number we are currently on
56 STR_VECT d_props; //!< vector of property names
57 unsigned int d_currentRecordId = 1; //!< current record id
58 SmilesMolSupplierParams d_parseParams;
59};
60} // namespace FileParsers
61} // namespace v2
62
63inline namespace v1 {
64class RDKIT_FILEPARSERS_EXPORT MultithreadedSmilesMolSupplier
65 : public MolSupplier {
66 //! this is an abstract base class to concurrently supply molecules one at a
67 //! time
68 public:
69 using ContainedType = v2::FileParsers::MultithreadedSmilesMolSupplier;
70 MultithreadedSmilesMolSupplier() {}
71 explicit MultithreadedSmilesMolSupplier(
72 const std::string &fileName, const std::string &delimiter = " \t",
73 int smilesColumn = 0, int nameColumn = 1, bool titleLine = true,
74 bool sanitize = true, unsigned int numWriterThreads = 1,
75 size_t sizeInputQueue = 5, size_t sizeOutputQueue = 5) {
76 v2::FileParsers::MultithreadedSmilesMolSupplier::Parameters params;
77 params.numWriterThreads = numWriterThreads;
78 params.sizeInputQueue = sizeInputQueue;
79 params.sizeOutputQueue = sizeOutputQueue;
80 v2::FileParsers::SmilesMolSupplierParams parseParams;
81 parseParams.delimiter = delimiter;
82 parseParams.smilesColumn = smilesColumn;
83 parseParams.nameColumn = nameColumn;
84 parseParams.titleLine = titleLine;
85 parseParams.parseParameters.sanitize = sanitize;
86
87 dp_supplier.reset(new v2::FileParsers::MultithreadedSmilesMolSupplier(
88 fileName, params, parseParams));
89 }
90
91 explicit MultithreadedSmilesMolSupplier(
92 std::istream *inStream, bool takeOwnership = true,
93 const std::string &delimiter = " \t", int smilesColumn = 0,
94 int nameColumn = 1, bool titleLine = true, bool sanitize = true,
95 unsigned int numWriterThreads = 1, size_t sizeInputQueue = 5,
96 size_t sizeOutputQueue = 5) {
97 v2::FileParsers::MultithreadedSmilesMolSupplier::Parameters params;
98 params.numWriterThreads = numWriterThreads;
99 params.sizeInputQueue = sizeInputQueue;
100 params.sizeOutputQueue = sizeOutputQueue;
101 v2::FileParsers::SmilesMolSupplierParams parseParams;
102 parseParams.delimiter = delimiter;
103 parseParams.smilesColumn = smilesColumn;
104 parseParams.nameColumn = nameColumn;
105 parseParams.titleLine = titleLine;
106 parseParams.parseParameters.sanitize = sanitize;
107
108 dp_supplier.reset(new v2::FileParsers::MultithreadedSmilesMolSupplier(
109 inStream, takeOwnership, params, parseParams));
110 }
111
112 //! included for the interface, always returns false
113 bool getEOFHitOnRead() const {
114 if (dp_supplier) {
115 return static_cast<ContainedType *>(dp_supplier.get())->getEOFHitOnRead();
116 }
117 return false;
118 }
119
120 //! returns the record id of the last extracted item
121 //! Note: d_LastRecordId = 0, initially therefore the value 0 is returned
122 //! if and only if the function is called before extracting the first
123 //! record
124 unsigned int getLastRecordId() const {
125 PRECONDITION(dp_supplier, "no supplier");
126 return static_cast<ContainedType *>(dp_supplier.get())->getLastRecordId();
127 }
128 //! returns the text block for the last extracted item
129 std::string getLastItemText() const {
130 PRECONDITION(dp_supplier, "no supplier");
131 return static_cast<ContainedType *>(dp_supplier.get())->getLastItemText();
132 }
133};
134} // namespace v1
135
136} // namespace RDKit
137#endif
138#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:108
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:177
Std stuff.
std::vector< std::string > STR_VECT
Definition Dict.h:29