RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MultithreadedSDMolSupplier.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Shrey Aryan
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#ifdef RDK_BUILD_THREADSAFE_SSS
11#ifndef MULTITHREADED_SD_MOL_SUPPLIER
12#define MULTITHREADED_SD_MOL_SUPPLIER
14namespace RDKit {
15namespace v2 {
16namespace FileParsers {
17
18//! This class is still a bit experimental and the public API may change
19//! in future releases.
22 public:
23 explicit MultithreadedSDMolSupplier(
24 const std::string &fileName, const Parameters &params = Parameters(),
25 const MolFileParserParams &parseParams = MolFileParserParams());
26
27 explicit MultithreadedSDMolSupplier(
28 std::istream *inStream, bool takeOwnership = true,
29 const Parameters &params = Parameters(),
30 const MolFileParserParams &parseParams = MolFileParserParams());
31
32 MultithreadedSDMolSupplier();
33 ~MultithreadedSDMolSupplier() override;
34 void init() override {}
35
36 void checkForEnd();
37 bool getEnd() const override;
38 void setProcessPropertyLists(bool val) { df_processPropertyLists = val; }
39 bool getProcessPropertyLists() const { return df_processPropertyLists; }
40 bool getEOFHitOnRead() const { return df_eofHitOnRead; }
41
42 //! reads next record and returns whether or not EOF was hit
43 bool extractNextRecord(std::string &record, unsigned int &lineNum,
44 unsigned int &index) override;
45 void readMolProps(RWMol &mol, std::istringstream &inStream);
46 //! parses the record and returns the resulting molecule
47 RWMol *processMoleculeRecord(const std::string &record,
48 unsigned int lineNum) override;
49
50 private:
51 void initFromSettings(bool takeOwnership, const Parameters &params,
52 const MolFileParserParams &parseParams);
53
54 Parameters d_params;
55 bool df_end = false; //!< have we reached the end of the file?
56 int d_line = 0; //!< line number we are currently on
57 bool df_processPropertyLists = true;
58 bool df_eofHitOnRead = false;
59 unsigned int d_currentRecordId = 1; //!< current record id
60 MolFileParserParams d_parseParams;
61};
62} // namespace FileParsers
63} // namespace v2
64
65inline namespace v1 {
67 //! this is an abstract base class to concurrently supply molecules one at a
68 //! time
69 public:
70 using ContainedType = v2::FileParsers::MultithreadedSDMolSupplier;
71 MultithreadedSDMolSupplier() {}
72 explicit MultithreadedSDMolSupplier(
73 const std::string &fileName, bool sanitize = true, bool removeHs = true,
74 bool strictParsing = true, unsigned int numWriterThreads = 1,
75 size_t sizeInputQueue = 5, size_t sizeOutputQueue = 5) {
76 v2::FileParsers::MultithreadedSDMolSupplier::Parameters params;
77 params.numWriterThreads = numWriterThreads;
78 params.sizeInputQueue = sizeInputQueue;
79 params.sizeOutputQueue = sizeOutputQueue;
80 v2::FileParsers::MolFileParserParams parseParams;
81 parseParams.sanitize = sanitize;
82 parseParams.removeHs = removeHs;
83 parseParams.strictParsing = strictParsing;
84
85 dp_supplier.reset(new v2::FileParsers::MultithreadedSDMolSupplier(
86 fileName, params, parseParams));
87 }
88
89 explicit MultithreadedSDMolSupplier(
90 std::istream *inStream, bool takeOwnership = true, bool sanitize = true,
91 bool removeHs = true, bool strictParsing = true,
92 unsigned int numWriterThreads = 1, size_t sizeInputQueue = 5,
93 size_t sizeOutputQueue = 5) {
94 v2::FileParsers::MultithreadedSDMolSupplier::Parameters params;
95 params.numWriterThreads = numWriterThreads;
96 params.sizeInputQueue = sizeInputQueue;
97 params.sizeOutputQueue = sizeOutputQueue;
98 v2::FileParsers::MolFileParserParams parseParams;
99 parseParams.sanitize = sanitize;
100 parseParams.removeHs = removeHs;
101 parseParams.strictParsing = strictParsing;
102
103 dp_supplier.reset(new v2::FileParsers::MultithreadedSDMolSupplier(
104 inStream, takeOwnership, params, parseParams));
105 }
106
107 //! included for the interface, always returns false
108 bool getEOFHitOnRead() const {
109 if (dp_supplier) {
110 return static_cast<ContainedType *>(dp_supplier.get())->getEOFHitOnRead();
111 }
112 return false;
113 }
114
115 //! returns the record id of the last extracted item
116 //! Note: d_LastRecordId = 0, initially therefore the value 0 is returned
117 //! if and only if the function is called before extracting the first
118 //! record
119 unsigned int getLastRecordId() const {
120 PRECONDITION(dp_supplier, "no supplier");
121 return static_cast<ContainedType *>(dp_supplier.get())->getLastRecordId();
122 }
123 //! returns the text block for the last extracted item
124 std::string getLastItemText() const {
125 PRECONDITION(dp_supplier, "no supplier");
126 return static_cast<ContainedType *>(dp_supplier.get())->getLastItemText();
127 }
128 void setProcessPropertyLists(bool val) {
129 PRECONDITION(dp_supplier, "no supplier");
130 static_cast<ContainedType *>(dp_supplier.get())
131 ->setProcessPropertyLists(val);
132 }
133 bool getProcessPropertyLists() const {
134 PRECONDITION(dp_supplier, "no supplier");
135 return static_cast<ContainedType *>(dp_supplier.get())
136 ->getProcessPropertyLists();
137 }
138};
139} // namespace v1
140} // namespace RDKit
141#endif
142#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:161
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.
bool rdvalue_is(const RDValue_cast_t)