RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MultithreadedSDMolSupplier.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Shrey Aryan
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifdef RDK_BUILD_THREADSAFE_SSS
11#ifndef MULTITHREADED_SD_MOL_SUPPLIER
12#define MULTITHREADED_SD_MOL_SUPPLIER
14namespace RDKit {
15namespace v2 {
16namespace FileParsers {
17
18//! This class is still a bit experimental and the public API may change
19//! in future releases.
20class RDKIT_FILEPARSERS_EXPORT MultithreadedSDMolSupplier
21 : public MultithreadedMolSupplier {
22 public:
23 explicit MultithreadedSDMolSupplier(
24 const std::string &fileName, const Parameters &params = Parameters(),
25 const MolFileParserParams &parseParams = MolFileParserParams());
26
27 explicit MultithreadedSDMolSupplier(
28 std::istream *inStream, bool takeOwnership = true,
29 const Parameters &params = Parameters(),
30 const MolFileParserParams &parseParams = MolFileParserParams());
31
32 MultithreadedSDMolSupplier();
33 virtual ~MultithreadedSDMolSupplier() {close();}
34 void init() override {}
35
36 void checkForEnd();
37 bool getEnd() const override;
38 void setProcessPropertyLists(bool val) { df_processPropertyLists = val; }
39 bool getProcessPropertyLists() const { return df_processPropertyLists; }
40 bool getEOFHitOnRead() const { return df_eofHitOnRead; }
41
42 //! reads next record and returns whether or not EOF was hit
43 bool extractNextRecord(std::string &record, unsigned int &lineNum,
44 unsigned int &index) override;
45 void readMolProps(RWMol &mol, std::istringstream &inStream);
46 //! parses the record and returns the resulting molecule
47 RWMol *processMoleculeRecord(const std::string &record,
48 unsigned int lineNum) override;
49 protected:
50 void closeStreams() override;
51
52 private:
53 void initFromSettings(bool takeOwnership, const Parameters &params,
54 const MolFileParserParams &parseParams);
55
56 bool df_end = false; //!< have we reached the end of the file?
57 int d_line = 0; //!< line number we are currently on
58 bool df_processPropertyLists = true;
59 bool df_eofHitOnRead = false;
60 unsigned int d_currentRecordId = 1; //!< current record id
61 MolFileParserParams d_parseParams;
62};
63} // namespace FileParsers
64} // namespace v2
65
66inline namespace v1 {
67class RDKIT_FILEPARSERS_EXPORT MultithreadedSDMolSupplier : public MolSupplier {
68 //! this is an abstract base class to concurrently supply molecules one at a
69 //! time
70 public:
71 using ContainedType = v2::FileParsers::MultithreadedSDMolSupplier;
72 MultithreadedSDMolSupplier() {}
73 explicit MultithreadedSDMolSupplier(
74 const std::string &fileName, bool sanitize = true, bool removeHs = true,
75 bool strictParsing = true, unsigned int numWriterThreads = 1,
76 size_t sizeInputQueue = 5, size_t sizeOutputQueue = 5) {
77 v2::FileParsers::MultithreadedSDMolSupplier::Parameters params;
78 params.numWriterThreads = numWriterThreads;
79 params.sizeInputQueue = sizeInputQueue;
80 params.sizeOutputQueue = sizeOutputQueue;
81 v2::FileParsers::MolFileParserParams parseParams;
82 parseParams.sanitize = sanitize;
83 parseParams.removeHs = removeHs;
84 parseParams.strictParsing = strictParsing;
85
86 dp_supplier.reset(new v2::FileParsers::MultithreadedSDMolSupplier(
87 fileName, params, parseParams));
88 }
89
90 explicit MultithreadedSDMolSupplier(
91 std::istream *inStream, bool takeOwnership = true, bool sanitize = true,
92 bool removeHs = true, bool strictParsing = true,
93 unsigned int numWriterThreads = 1, size_t sizeInputQueue = 5,
94 size_t sizeOutputQueue = 5) {
95 v2::FileParsers::MultithreadedSDMolSupplier::Parameters params;
96 params.numWriterThreads = numWriterThreads;
97 params.sizeInputQueue = sizeInputQueue;
98 params.sizeOutputQueue = sizeOutputQueue;
99 v2::FileParsers::MolFileParserParams parseParams;
100 parseParams.sanitize = sanitize;
101 parseParams.removeHs = removeHs;
102 parseParams.strictParsing = strictParsing;
103
104 dp_supplier.reset(new v2::FileParsers::MultithreadedSDMolSupplier(
105 inStream, takeOwnership, params, parseParams));
106 }
107
108 //! included for the interface, always returns false
109 bool getEOFHitOnRead() const {
110 if (dp_supplier) {
111 return static_cast<ContainedType *>(dp_supplier.get())->getEOFHitOnRead();
112 }
113 return false;
114 }
115
116 //! returns the record id of the last extracted item
117 //! Note: d_LastRecordId = 0, initially therefore the value 0 is returned
118 //! if and only if the function is called before extracting the first
119 //! record
120 unsigned int getLastRecordId() const {
121 PRECONDITION(dp_supplier, "no supplier");
122 return static_cast<ContainedType *>(dp_supplier.get())->getLastRecordId();
123 }
124 //! returns the text block for the last extracted item
125 std::string getLastItemText() const {
126 PRECONDITION(dp_supplier, "no supplier");
127 return static_cast<ContainedType *>(dp_supplier.get())->getLastItemText();
128 }
129 void setProcessPropertyLists(bool val) {
130 PRECONDITION(dp_supplier, "no supplier");
131 static_cast<ContainedType *>(dp_supplier.get())
132 ->setProcessPropertyLists(val);
133 }
134 bool getProcessPropertyLists() const {
135 PRECONDITION(dp_supplier, "no supplier");
136 return static_cast<ContainedType *>(dp_supplier.get())
137 ->getProcessPropertyLists();
138 }
139};
140} // namespace v1
141} // namespace RDKit
142#endif
143#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:108
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:177
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.