RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MultithreadedSmilesMolSupplier.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Shrey Aryan
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifdef RDK_BUILD_THREADSAFE_SSS
11#ifndef MULTITHREADED_SMILES_MOL_SUPPLIER
12#define MULTITHREADED_SMILES_MOL_SUPPLIER
14namespace RDKit {
15namespace v2 {
16namespace FileParsers {
17//! This class is still a bit experimental and the public API may change
18//! in future releases.
21 public:
22 explicit MultithreadedSmilesMolSupplier(
23 const std::string &fileName, const Parameters &params = Parameters(),
24 const SmilesMolSupplierParams &parseParams = SmilesMolSupplierParams());
25
26 explicit MultithreadedSmilesMolSupplier(
27 std::istream *inStream, bool takeOwnership = true,
28 const Parameters &params = Parameters(),
29 const SmilesMolSupplierParams &parseParams = SmilesMolSupplierParams());
30 MultithreadedSmilesMolSupplier();
31 ~MultithreadedSmilesMolSupplier() override;
32
33 void init() override {}
34 //! returns df_end
35 bool getEnd() const override;
36 //! reads and processes the title line
37 void processTitleLine();
38 //! reads next record and returns whether or not EOF was hit
39 bool extractNextRecord(std::string &record, unsigned int &lineNum,
40 unsigned int &index) override;
41 //! parses the record and returns the resulting molecule
42 RWMol *processMoleculeRecord(const std::string &record,
43 unsigned int lineNum) override;
44
45 private:
46 void initFromSettings(
47 bool takeOwnership, const Parameters &params,
48 const SmilesMolSupplierParams &parseParams = SmilesMolSupplierParams());
49
50 private:
51 bool df_end = false; //!< have we reached the end of the file?
52 int d_line = 0; //!< line number we are currently on
53 STR_VECT d_props; //!< vector of property names
54 unsigned int d_currentRecordId = 1; //!< current record id
55 SmilesMolSupplierParams d_parseParams;
56};
57} // namespace FileParsers
58} // namespace v2
59
60inline namespace v1 {
62 : public MolSupplier {
63 //! this is an abstract base class to concurrently supply molecules one at a
64 //! time
65 public:
66 using ContainedType = v2::FileParsers::MultithreadedSmilesMolSupplier;
67 MultithreadedSmilesMolSupplier() {}
68 explicit MultithreadedSmilesMolSupplier(
69 const std::string &fileName, const std::string &delimiter = " \t",
70 int smilesColumn = 0, int nameColumn = 1, bool titleLine = true,
71 bool sanitize = true, unsigned int numWriterThreads = 1,
72 size_t sizeInputQueue = 5, size_t sizeOutputQueue = 5) {
73 v2::FileParsers::MultithreadedSmilesMolSupplier::Parameters params;
74 params.numWriterThreads = numWriterThreads;
75 params.sizeInputQueue = sizeInputQueue;
76 params.sizeOutputQueue = sizeOutputQueue;
77 v2::FileParsers::SmilesMolSupplierParams parseParams;
78 parseParams.delimiter = delimiter;
79 parseParams.smilesColumn = smilesColumn;
80 parseParams.nameColumn = nameColumn;
81 parseParams.titleLine = titleLine;
82 parseParams.parseParameters.sanitize = sanitize;
83
84 dp_supplier.reset(new v2::FileParsers::MultithreadedSmilesMolSupplier(
85 fileName, params, parseParams));
86 }
87
88 explicit MultithreadedSmilesMolSupplier(
89 std::istream *inStream, bool takeOwnership = true,
90 const std::string &delimiter = " \t", int smilesColumn = 0,
91 int nameColumn = 1, bool titleLine = true, bool sanitize = true,
92 unsigned int numWriterThreads = 1, size_t sizeInputQueue = 5,
93 size_t sizeOutputQueue = 5) {
94 v2::FileParsers::MultithreadedSmilesMolSupplier::Parameters params;
95 params.numWriterThreads = numWriterThreads;
96 params.sizeInputQueue = sizeInputQueue;
97 params.sizeOutputQueue = sizeOutputQueue;
98 v2::FileParsers::SmilesMolSupplierParams parseParams;
99 parseParams.delimiter = delimiter;
100 parseParams.smilesColumn = smilesColumn;
101 parseParams.nameColumn = nameColumn;
102 parseParams.titleLine = titleLine;
103 parseParams.parseParameters.sanitize = sanitize;
104
105 dp_supplier.reset(new v2::FileParsers::MultithreadedSmilesMolSupplier(
106 inStream, takeOwnership, params, parseParams));
107 }
108
109 //! included for the interface, always returns false
110 bool getEOFHitOnRead() const {
111 if (dp_supplier) {
112 return static_cast<ContainedType *>(dp_supplier.get())->getEOFHitOnRead();
113 }
114 return false;
115 }
116
117 //! returns the record id of the last extracted item
118 //! Note: d_LastRecordId = 0, initially therefore the value 0 is returned
119 //! if and only if the function is called before extracting the first
120 //! record
121 unsigned int getLastRecordId() const {
122 PRECONDITION(dp_supplier, "no supplier");
123 return static_cast<ContainedType *>(dp_supplier.get())->getLastRecordId();
124 }
125 //! returns the text block for the last extracted item
126 std::string getLastItemText() const {
127 PRECONDITION(dp_supplier, "no supplier");
128 return static_cast<ContainedType *>(dp_supplier.get())->getLastItemText();
129 }
130};
131} // namespace v1
132
133} // namespace RDKit
134#endif
135#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:161
Std stuff.
std::vector< std::string > STR_VECT
Definition Dict.h:29
bool rdvalue_is(const RDValue_cast_t)