RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolWriters.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2017 Greg Landrum, Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef _RD_MOLWRITERS_H_
13#define _RD_MOLWRITERS_H_
14
15#include <RDGeneral/types.h>
16
17#include <iostream>
18#include <memory>
19#include <string>
20
21#include <boost/noncopyable.hpp>
22
23#ifdef RDK_BUILD_MAEPARSER_SUPPORT
24namespace schrodinger {
25namespace mae {
26class Writer;
27} // namespace mae
28} // namespace schrodinger
29#endif // RDK_BUILD_MAEPARSER_SUPPORT
30
31#include <GraphMol/ROMol.h>
32
33namespace RDKit {
34
35static int defaultConfId = -1;
36
37class RDKIT_FILEPARSERS_EXPORT MolWriter : private boost::noncopyable {
38 public:
39 virtual ~MolWriter() {}
40 virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
41 virtual void flush() = 0;
42 virtual void close() = 0;
43 virtual void setProps(const STR_VECT &propNames) = 0;
44 virtual unsigned int numMols() const = 0;
45};
46
47//! The SmilesWriter is for writing molecules and properties to
48//! delimited text files.
50 /******************************************************************************
51 * A Smiles Table writer - this is how it is used
52 * - create a SmilesWriter with a output file name (or a ostream), a
53 *delimiter,
54 * and a list of properties that need to be written out
55 * - then a call is made to the write function for each molecule that needs
56 *to
57 * be written out
58 ******************************************************************************/
59 public:
60 /*!
61 \param fileName : filename to write to ("-" to write to stdout)
62 \param delimiter : delimiter to use in the text file
63 \param nameHeader : used to label the name column in the output. If this
64 is provided as the empty string, no names will be
65 written.
66 \param includeHeader : toggles inclusion of a header line in the output
67 \param isomericSmiles : toggles generation of isomeric SMILES
68 \param kekuleSmiles : toggles the generation of kekule SMILES
69
70 */
71 SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
72 const std::string &nameHeader = "Name",
73 bool includeHeader = true, bool isomericSmiles = true,
74 bool kekuleSmiles = false);
75 //! \overload
76 SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
77 std::string nameHeader = "Name", bool includeHeader = true,
78 bool takeOwnership = false, bool isomericSmiles = true,
79 bool kekuleSmiles = false);
80
81 ~SmilesWriter() override;
82
83 //! \brief set a vector of property names that are need to be
84 //! written out for each molecule
85 void setProps(const STR_VECT &propNames) override;
86
87 //! \brief write a new molecule to the file
88 void write(const ROMol &mol, int confId = defaultConfId) override;
89
90 //! \brief flush the ostream
91 void flush() override {
92 PRECONDITION(dp_ostream, "no output stream");
93 try {
94 dp_ostream->flush();
95 } catch (...) {
96 try {
97 if (dp_ostream->good()) {
98 dp_ostream->setstate(std::ios::badbit);
99 }
100 } catch (const std::runtime_error &) {
101 }
102 }
103 }
104
105 //! \brief close our stream (the writer cannot be used again)
106 void close() override {
107 if (dp_ostream) {
108 flush();
109 }
110 if (df_owner) {
111 delete dp_ostream;
112 df_owner = false;
113 }
114 dp_ostream = nullptr;
115 }
116
117 //! \brief get the number of molecules written so far
118 unsigned int numMols() const override { return d_molid; }
119
120 private:
121 // local initialization
122 void init(const std::string &delimiter, const std::string &nameHeader,
123 bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
124
125 // dumps a header line to the output stream
126 void dumpHeader() const;
127
128 std::ostream *dp_ostream;
129 bool df_owner;
130 bool df_includeHeader; // whether or not to include a title line
131 unsigned int d_molid; // the number of the molecules we wrote so far
132 std::string d_delim; // delimiter string between various records
133 std::string d_nameHeader; // header for the name column in the output file
134 STR_VECT d_props; // list of property name that need to be written out
135 bool df_isomericSmiles; // whether or not to do isomeric smiles
136 bool df_kekuleSmiles; // whether or not to do kekule smiles
137};
138
139//! The SDWriter is for writing molecules and properties to
140//! SD files
142 /**************************************************************************************
143 * A SD file ( or stream) writer - this is how it is used
144 * - create a SDWriter with a output file name (or a ostream),
145 * and a list of properties that need to be written out
146 * - then a call is made to the write function for each molecule that needs
147 *to be written out
148 **********************************************************************************************/
149 public:
150 /*!
151 \param fileName : filename to write to ("-" to write to stdout)
152 */
153 SDWriter(const std::string &fileName);
154 SDWriter(std::ostream *outStream, bool takeOwnership = false);
155
156 ~SDWriter() override;
157
158 //! \brief set a vector of property names that are need to be
159 //! written out for each molecule
160 void setProps(const STR_VECT &propNames) override;
161
162 //! \brief return the text that would be written to the file
163 static std::string getText(const ROMol &mol, int confId = defaultConfId,
164 bool kekulize = true, bool force_V3000 = false,
165 int molid = -1, STR_VECT *propNames = nullptr);
166
167 //! \brief write a new molecule to the file
168 void write(const ROMol &mol, int confId = defaultConfId) override;
169
170 //! \brief flush the ostream
171 void flush() override {
172 PRECONDITION(dp_ostream, "no output stream");
173 try {
174 dp_ostream->flush();
175 } catch (...) {
176 try {
177 if (dp_ostream->good()) {
178 dp_ostream->setstate(std::ios::badbit);
179 }
180 } catch (const std::runtime_error &) {
181 }
182 }
183 }
184
185 //! \brief close our stream (the writer cannot be used again)
186 void close() override {
187 if (dp_ostream) {
188 flush();
189 }
190 if (df_owner) {
191 delete dp_ostream;
192 df_owner = false;
193 }
194 dp_ostream = nullptr;
195 }
196
197 //! \brief get the number of molecules written so far
198 unsigned int numMols() const override { return d_molid; }
199
200 void setForceV3000(bool val) { df_forceV3000 = val; }
201 bool getForceV3000() const { return df_forceV3000; }
202
203 void setKekulize(bool val) { df_kekulize = val; }
204 bool getKekulize() const { return df_kekulize; }
205
206 private:
207 void writeProperty(const ROMol &mol, const std::string &name);
208
209 std::ostream *dp_ostream;
210 bool df_owner;
211 unsigned int d_molid; // the number of the molecules we wrote so far
212 STR_VECT d_props; // list of property name that need to be written out
213 bool df_forceV3000; // force writing the mol blocks as V3000
214 bool df_kekulize; // toggle kekulization of molecules on writing
215};
216
217//! The TDTWriter is for writing molecules and properties to
218//! TDT files
220 /**************************************************************************************
221 * A TDT file ( or stream) writer - this is how it is used
222 * - create a TDTWriter with a output file name (or a ostream),
223 * and a list of properties that need to be written out
224 * - then a call is made to the write function for each molecule that needs
225 *to be written out
226 **********************************************************************************************/
227 public:
228 /*!
229 \param fileName : filename to write to ("-" to write to stdout)
230 */
231 TDTWriter(const std::string &fileName);
232 TDTWriter(std::ostream *outStream, bool takeOwnership = false);
233
234 ~TDTWriter() override;
235
236 //! \brief set a vector of property names that are need to be
237 //! written out for each molecule
238 void setProps(const STR_VECT &propNames) override;
239
240 //! \brief write a new molecule to the file
241 void write(const ROMol &mol, int confId = defaultConfId) override;
242
243 //! \brief flush the ostream
244 void flush() override {
245 PRECONDITION(dp_ostream, "no output stream");
246 try {
247 dp_ostream->flush();
248 } catch (...) {
249 try {
250 if (dp_ostream->good()) {
251 dp_ostream->setstate(std::ios::badbit);
252 }
253 } catch (const std::runtime_error &) {
254 }
255 }
256 }
257
258 //! \brief close our stream (the writer cannot be used again)
259 void close() override {
260 if (dp_ostream) {
261 // if we've written any mols, finish with a "|" line
262 if (d_molid > 0) {
263 *dp_ostream << "|\n";
264 }
265 flush();
266 }
267 if (df_owner) {
268 delete dp_ostream;
269 df_owner = false;
270 }
271 dp_ostream = nullptr;
272 }
273
274 //! \brief get the number of molecules written so far
275 unsigned int numMols() const override { return d_molid; }
276
277 void setWrite2D(bool state = true) { df_write2D = state; }
278 bool getWrite2D() const { return df_write2D; }
279
280 void setWriteNames(bool state = true) { df_writeNames = state; }
281 bool getWriteNames() const { return df_writeNames; }
282
283 void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; }
284 unsigned int getNumDigits() const { return d_numDigits; }
285
286 private:
287 void writeProperty(const ROMol &mol, const std::string &name);
288
289 std::ostream *dp_ostream;
290 bool df_owner;
291 unsigned int d_molid; // the number of molecules we wrote so far
292 STR_VECT d_props; // list of property name that need to be written out
293 bool df_write2D; // write 2D coordinates instead of 3D
294 bool df_writeNames; // write a name record for each molecule
295 unsigned int
296 d_numDigits; // number of digits to use in our output of coordinates;
297};
298
299//! The PDBWriter is for writing molecules to Brookhaven Protein
300//! DataBank format files.
302 public:
303 PDBWriter(const std::string &fileName, unsigned int flavor = 0);
304 PDBWriter(std::ostream *outStream, bool takeOwnership = false,
305 unsigned int flavor = 0);
306 ~PDBWriter() override;
307
308 //! \brief write a new molecule to the file
309 void write(const ROMol &mol, int confId = defaultConfId) override;
310
311 void setProps(const STR_VECT &) override {}
312
313 //! \brief flush the ostream
314 void flush() override {
315 PRECONDITION(dp_ostream, "no output stream");
316 try {
317 dp_ostream->flush();
318 } catch (...) {
319 try {
320 if (dp_ostream->good()) {
321 dp_ostream->setstate(std::ios::badbit);
322 }
323 } catch (const std::runtime_error &) {
324 }
325 }
326 }
327
328 //! \brief close our stream (the writer cannot be used again)
329 void close() override {
330 if (dp_ostream) {
331 flush();
332 }
333 if (df_owner) {
334 delete dp_ostream;
335 df_owner = false;
336 }
337 dp_ostream = nullptr;
338 }
339
340 //! \brief get the number of molecules written so far
341 unsigned int numMols() const override { return d_count; }
342
343 private:
344 std::ostream *dp_ostream;
345 unsigned int d_flavor;
346 unsigned int d_count;
347 bool df_owner;
348};
349
350#ifdef RDK_BUILD_MAEPARSER_SUPPORT
351
352class RDKIT_FILEPARSERS_EXPORT MaeWriter : public MolWriter {
353 /**************************************************************************************
354 * A highly experimental Maestro file (or stream) writer. Many features are
355 * not supported yet, e.g. chirality and bond stereo, stereo groups, substance
356 * groups, isotopes or dummy atoms. Note that except for stereochemistry
357 * labels these aren't supported by the MaeMolSupplier either.
358 *
359 * Usage:
360 * - create a MaeWriter with an output file name (or a ostream),
361 * and a list of mol/atom/bond properties that need to be written out.
362 * If no property names are specified, all properties will be exported.
363 * Properties that are specified, but are not present will be ignored.
364 * - then, a call is made to the write function for each molecule
365 * that needs to be written out.
366 *
367 * Notes:
368 * - kekulization is mandatory, as the Maestro format does not
369 * have the concept of an aromatic bond.
370 * - Ownership of the output stream is mandatory, since it needs
371 * to be managed though a shared_ptr, as this is what maeparser
372 * writer takes.
373 ***************************************************************************************/
374 public:
375 /*!
376 \param fileName : filename to write to (stdout is *not* supported)
377 */
378 MaeWriter(const std::string &fileName);
379
380 /*!
381 \note Note that this takes ownership of the output stream.
382 */
383 MaeWriter(std::ostream *outStream);
384
385 MaeWriter(std::shared_ptr<std::ostream> outStream);
386
387 ~MaeWriter() override;
388
389 //! \brief set a vector of property names that are need to be
390 //! written out for each molecule
391 void setProps(const STR_VECT &propNames) override;
392
393 //! \brief return the text that would be written to the file
394 static std::string getText(const ROMol &mol, int confId = defaultConfId,
395 const STR_VECT &propNames = STR_VECT());
396
397 //! \brief write a new molecule to the file.
398 void write(const ROMol &mol, int confId = defaultConfId) override;
399
400 //! \brief flush the ostream
401 void flush() override;
402 //! \brief close our stream (the writer cannot be used again)
403 void close() override;
404
405 //! \brief get the number of molecules written so far
406 unsigned int numMols() const override { return d_molid; }
407
408 protected:
409 MaeWriter() = default; // used in the Python wrapper
410
411 std::shared_ptr<std::ostream> dp_ostream;
412
413 private:
414 void open();
415
416 std::unique_ptr<schrodinger::mae::Writer> dp_writer;
417 unsigned d_molid = 0; // the number of the molecules we wrote so far
418 STR_VECT d_props; // list of property name that need to be written out
419};
420
421#endif // RDK_BUILD_MAEPARSER_SUPPORT
422
423} // namespace RDKit
424
425#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
virtual void flush()=0
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
virtual ~MolWriter()
Definition MolWriters.h:39
virtual void close()=0
virtual void setProps(const STR_VECT &propNames)=0
virtual unsigned int numMols() const =0
PDBWriter(const std::string &fileName, unsigned int flavor=0)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void flush() override
flush the ostream
Definition MolWriters.h:314
PDBWriter(std::ostream *outStream, bool takeOwnership=false, unsigned int flavor=0)
void setProps(const STR_VECT &) override
Definition MolWriters.h:311
~PDBWriter() override
unsigned int numMols() const override
get the number of molecules written so far
Definition MolWriters.h:341
void close() override
close our stream (the writer cannot be used again)
Definition MolWriters.h:329
~SDWriter() override
bool getForceV3000() const
Definition MolWriters.h:201
unsigned int numMols() const override
get the number of molecules written so far
Definition MolWriters.h:198
SDWriter(std::ostream *outStream, bool takeOwnership=false)
bool getKekulize() const
Definition MolWriters.h:204
void flush() override
flush the ostream
Definition MolWriters.h:171
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
static std::string getText(const ROMol &mol, int confId=defaultConfId, bool kekulize=true, bool force_V3000=false, int molid=-1, STR_VECT *propNames=nullptr)
return the text that would be written to the file
void close() override
close our stream (the writer cannot be used again)
Definition MolWriters.h:186
void setForceV3000(bool val)
Definition MolWriters.h:200
SDWriter(const std::string &fileName)
void setKekulize(bool val)
Definition MolWriters.h:203
unsigned int numMols() const override
get the number of molecules written so far
Definition MolWriters.h:118
SmilesWriter(const std::string &fileName, const std::string &delimiter=" ", const std::string &nameHeader="Name", bool includeHeader=true, bool isomericSmiles=true, bool kekuleSmiles=false)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
~SmilesWriter() override
SmilesWriter(std::ostream *outStream, std::string delimiter=" ", std::string nameHeader="Name", bool includeHeader=true, bool takeOwnership=false, bool isomericSmiles=true, bool kekuleSmiles=false)
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void close() override
close our stream (the writer cannot be used again)
Definition MolWriters.h:106
void flush() override
flush the ostream
Definition MolWriters.h:91
~TDTWriter() override
bool getWrite2D() const
Definition MolWriters.h:278
void setNumDigits(unsigned int numDigits)
Definition MolWriters.h:283
void setWrite2D(bool state=true)
Definition MolWriters.h:277
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
unsigned int numMols() const override
get the number of molecules written so far
Definition MolWriters.h:275
unsigned int getNumDigits() const
Definition MolWriters.h:284
TDTWriter(std::ostream *outStream, bool takeOwnership=false)
void close() override
close our stream (the writer cannot be used again)
Definition MolWriters.h:259
TDTWriter(const std::string &fileName)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void setWriteNames(bool state=true)
Definition MolWriters.h:280
void flush() override
flush the ostream
Definition MolWriters.h:244
bool getWriteNames() const
Definition MolWriters.h:281
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:161
Std stuff.
std::vector< std::string > STR_VECT
Definition Dict.h:29
bool rdvalue_is(const RDValue_cast_t)
static int defaultConfId
Definition MolWriters.h:35