RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolWriters.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2017 Greg Landrum, Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef _RD_MOLWRITERS_H_
13#define _RD_MOLWRITERS_H_
14
15#include <RDGeneral/types.h>
16
17#include <memory>
18#include <string>
19
20#include <boost/noncopyable.hpp>
21
22#ifdef RDK_BUILD_MAEPARSER_SUPPORT
23namespace schrodinger {
24namespace mae {
25class Writer;
26} // namespace mae
27} // namespace schrodinger
28#endif // RDK_BUILD_MAEPARSER_SUPPORT
29
30#include <GraphMol/ROMol.h>
31
32namespace RDKit {
33
34static int defaultConfId = -1;
35
36class RDKIT_FILEPARSERS_EXPORT MolWriter : private boost::noncopyable {
37 public:
38 virtual ~MolWriter() {}
39 virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
40 virtual void flush() = 0;
41 virtual void close() = 0;
42 virtual void setProps(const STR_VECT &propNames) = 0;
43 virtual unsigned int numMols() const = 0;
44};
45
46//! The SmilesWriter is for writing molecules and properties to
47//! delimited text files.
49 /******************************************************************************
50 * A Smiles Table writer - this is how it is used
51 * - create a SmilesWriter with a output file name (or a ostream), a
52 *delimiter,
53 * and a list of properties that need to be written out
54 * - then a call is made to the write function for each molecule that needs
55 *to
56 * be written out
57 ******************************************************************************/
58 public:
59 /*!
60 \param fileName : filename to write to ("-" to write to stdout)
61 \param delimiter : delimiter to use in the text file
62 \param nameHeader : used to label the name column in the output. If this
63 is provided as the empty string, no names will be
64 written.
65 \param includeHeader : toggles inclusion of a header line in the output
66 \param isomericSmiles : toggles generation of isomeric SMILES
67 \param kekuleSmiles : toggles the generation of kekule SMILES
68
69 */
70 SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
71 const std::string &nameHeader = "Name",
72 bool includeHeader = true, bool isomericSmiles = true,
73 bool kekuleSmiles = false);
74 //! \overload
75 SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
76 std::string nameHeader = "Name", bool includeHeader = true,
77 bool takeOwnership = false, bool isomericSmiles = true,
78 bool kekuleSmiles = false);
79
80 ~SmilesWriter() override;
81
82 //! \brief set a vector of property names that are need to be
83 //! written out for each molecule
84 void setProps(const STR_VECT &propNames) override;
85
86 //! \brief write a new molecule to the file
87 void write(const ROMol &mol, int confId = defaultConfId) override;
88
89 //! \brief flush the ostream
90 void flush() override {
91 PRECONDITION(dp_ostream, "no output stream");
92 try {
93 dp_ostream->flush();
94 } catch (...) {
95 try {
96 if (dp_ostream->good()) {
97 dp_ostream->setstate(std::ios::badbit);
98 }
99 } catch (const std::runtime_error &) {
100 }
101 }
102 }
103
104 //! \brief close our stream (the writer cannot be used again)
105 void close() override {
106 if (dp_ostream) {
107 flush();
108 }
109 if (df_owner) {
110 delete dp_ostream;
111 df_owner = false;
112 }
113 dp_ostream = nullptr;
114 }
115
116 //! \brief get the number of molecules written so far
117 unsigned int numMols() const override { return d_molid; }
118
119 private:
120 // local initialization
121 void init(const std::string &delimiter, const std::string &nameHeader,
122 bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
123
124 // dumps a header line to the output stream
125 void dumpHeader() const;
126
127 std::ostream *dp_ostream;
128 bool df_owner;
129 bool df_includeHeader; // whether or not to include a title line
130 unsigned int d_molid; // the number of the molecules we wrote so far
131 std::string d_delim; // delimiter string between various records
132 std::string d_nameHeader; // header for the name column in the output file
133 STR_VECT d_props; // list of property name that need to be written out
134 bool df_isomericSmiles; // whether or not to do isomeric smiles
135 bool df_kekuleSmiles; // whether or not to do kekule smiles
136};
137
138//! The SDWriter is for writing molecules and properties to
139//! SD files
141 /**************************************************************************************
142 * A SD file ( or stream) writer - this is how it is used
143 * - create a SDWriter with a output file name (or a ostream),
144 * and a list of properties that need to be written out
145 * - then a call is made to the write function for each molecule that needs
146 *to be written out
147 **********************************************************************************************/
148 public:
149 /*!
150 \param fileName : filename to write to ("-" to write to stdout)
151 */
152 SDWriter(const std::string &fileName);
153 SDWriter(std::ostream *outStream, bool takeOwnership = false);
154
155 ~SDWriter() override;
156
157 //! \brief set a vector of property names that are need to be
158 //! written out for each molecule
159 void setProps(const STR_VECT &propNames) override;
160
161 //! \brief return the text that would be written to the file
162 static std::string getText(const ROMol &mol, int confId = defaultConfId,
163 bool kekulize = true, bool force_V3000 = false,
164 int molid = -1, STR_VECT *propNames = nullptr);
165
166 //! \brief write a new molecule to the file
167 void write(const ROMol &mol, int confId = defaultConfId) override;
168
169 //! \brief flush the ostream
170 void flush() override {
171 PRECONDITION(dp_ostream, "no output stream");
172 try {
173 dp_ostream->flush();
174 } catch (...) {
175 try {
176 if (dp_ostream->good()) {
177 dp_ostream->setstate(std::ios::badbit);
178 }
179 } catch (const std::runtime_error &) {
180 }
181 }
182 }
183
184 //! \brief close our stream (the writer cannot be used again)
185 void close() override {
186 if (dp_ostream) {
187 flush();
188 }
189 if (df_owner) {
190 delete dp_ostream;
191 df_owner = false;
192 }
193 dp_ostream = nullptr;
194 }
195
196 //! \brief get the number of molecules written so far
197 unsigned int numMols() const override { return d_molid; }
198
199 void setForceV3000(bool val) { df_forceV3000 = val; }
200 bool getForceV3000() const { return df_forceV3000; }
201
202 void setKekulize(bool val) { df_kekulize = val; }
203 bool getKekulize() const { return df_kekulize; }
204
205 private:
206 void writeProperty(const ROMol &mol, const std::string &name);
207
208 std::ostream *dp_ostream;
209 bool df_owner;
210 unsigned int d_molid; // the number of the molecules we wrote so far
211 STR_VECT d_props; // list of property name that need to be written out
212 bool df_forceV3000; // force writing the mol blocks as V3000
213 bool df_kekulize; // toggle kekulization of molecules on writing
214};
215
216//! The TDTWriter is for writing molecules and properties to
217//! TDT files
219 /**************************************************************************************
220 * A TDT file ( or stream) writer - this is how it is used
221 * - create a TDTWriter with a output file name (or a ostream),
222 * and a list of properties that need to be written out
223 * - then a call is made to the write function for each molecule that needs
224 *to be written out
225 **********************************************************************************************/
226 public:
227 /*!
228 \param fileName : filename to write to ("-" to write to stdout)
229 */
230 TDTWriter(const std::string &fileName);
231 TDTWriter(std::ostream *outStream, bool takeOwnership = false);
232
233 ~TDTWriter() override;
234
235 //! \brief set a vector of property names that are need to be
236 //! written out for each molecule
237 void setProps(const STR_VECT &propNames) override;
238
239 //! \brief write a new molecule to the file
240 void write(const ROMol &mol, int confId = defaultConfId) override;
241
242 //! \brief flush the ostream
243 void flush() override {
244 PRECONDITION(dp_ostream, "no output stream");
245 try {
246 dp_ostream->flush();
247 } catch (...) {
248 try {
249 if (dp_ostream->good()) {
250 dp_ostream->setstate(std::ios::badbit);
251 }
252 } catch (const std::runtime_error &) {
253 }
254 }
255 }
256
257 //! \brief close our stream (the writer cannot be used again)
258 void close() override {
259 if (dp_ostream) {
260 // if we've written any mols, finish with a "|" line
261 if (d_molid > 0) {
262 *dp_ostream << "|\n";
263 }
264 flush();
265 }
266 if (df_owner) {
267 delete dp_ostream;
268 df_owner = false;
269 }
270 dp_ostream = nullptr;
271 }
272
273 //! \brief get the number of molecules written so far
274 unsigned int numMols() const override { return d_molid; }
275
276 void setWrite2D(bool state = true) { df_write2D = state; }
277 bool getWrite2D() const { return df_write2D; }
278
279 void setWriteNames(bool state = true) { df_writeNames = state; }
280 bool getWriteNames() const { return df_writeNames; }
281
282 void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; }
283 unsigned int getNumDigits() const { return d_numDigits; }
284
285 private:
286 void writeProperty(const ROMol &mol, const std::string &name);
287
288 std::ostream *dp_ostream;
289 bool df_owner;
290 unsigned int d_molid; // the number of molecules we wrote so far
291 STR_VECT d_props; // list of property name that need to be written out
292 bool df_write2D; // write 2D coordinates instead of 3D
293 bool df_writeNames; // write a name record for each molecule
294 unsigned int
295 d_numDigits; // number of digits to use in our output of coordinates;
296};
297
298//! The PDBWriter is for writing molecules to Brookhaven Protein
299//! DataBank format files.
301 public:
302 PDBWriter(const std::string &fileName, unsigned int flavor = 0);
303 PDBWriter(std::ostream *outStream, bool takeOwnership = false,
304 unsigned int flavor = 0);
305 ~PDBWriter() override;
306
307 //! \brief write a new molecule to the file
308 void write(const ROMol &mol, int confId = defaultConfId) override;
309
310 void setProps(const STR_VECT &) override {}
311
312 //! \brief flush the ostream
313 void flush() override {
314 PRECONDITION(dp_ostream, "no output stream");
315 try {
316 dp_ostream->flush();
317 } catch (...) {
318 try {
319 if (dp_ostream->good()) {
320 dp_ostream->setstate(std::ios::badbit);
321 }
322 } catch (const std::runtime_error &) {
323 }
324 }
325 }
326
327 //! \brief close our stream (the writer cannot be used again)
328 void close() override {
329 if (dp_ostream) {
330 flush();
331 }
332 if (df_owner) {
333 delete dp_ostream;
334 df_owner = false;
335 }
336 dp_ostream = nullptr;
337 }
338
339 //! \brief get the number of molecules written so far
340 unsigned int numMols() const override { return d_count; }
341
342 private:
343 std::ostream *dp_ostream;
344 unsigned int d_flavor;
345 unsigned int d_count;
346 bool df_owner;
347};
348
349#ifdef RDK_BUILD_MAEPARSER_SUPPORT
350
351class RDKIT_FILEPARSERS_EXPORT MaeWriter : public MolWriter {
352 /**************************************************************************************
353 * A highly experimental Maestro file (or stream) writer. Many features are
354 * not supported yet, e.g. chirality and bond stereo, stereo groups, substance
355 * groups, isotopes or dummy atoms. Note that except for stereochemistry
356 * labels these aren't supported by the MaeMolSupplier either.
357 *
358 * Usage:
359 * - create a MaeWriter with an output file name (or a ostream),
360 * and a list of mol/atom/bond properties that need to be written out.
361 * If no property names are specified, all properties will be exported.
362 * Properties that are specified, but are not present will be ignored.
363 * - then, a call is made to the write function for each molecule
364 * that needs to be written out.
365 *
366 * Notes:
367 * - kekulization is mandatory, as the Maestro format does not
368 * have the concept of an aromatic bond.
369 * - Ownership of the output stream is mandatory, since it needs
370 * to be managed though a shared_ptr, as this is what maeparser
371 * writer takes.
372 ***************************************************************************************/
373 public:
374 /*!
375 \param fileName : filename to write to (stdout is *not* supported)
376 */
377 MaeWriter(const std::string &fileName);
378
379 /*!
380 \note Note that this takes ownership of the output stream.
381 */
382 MaeWriter(std::ostream *outStream);
383
384 MaeWriter(std::shared_ptr<std::ostream> outStream);
385
386 ~MaeWriter() override;
387
388 //! \brief set a vector of property names that are need to be
389 //! written out for each molecule
390 void setProps(const STR_VECT &propNames) override;
391
392 //! \brief return the text that would be written to the file
393 static std::string getText(const ROMol &mol, int confId = defaultConfId,
394 const STR_VECT &propNames = STR_VECT());
395
396 //! \brief write a new molecule to the file.
397 void write(const ROMol &mol, int confId = defaultConfId) override;
398
399 //! \brief flush the ostream
400 void flush() override;
401 //! \brief close our stream (the writer cannot be used again)
402 void close() override;
403
404 //! \brief get the number of molecules written so far
405 unsigned int numMols() const override { return d_molid; }
406
407 protected:
408 MaeWriter() = default; // used in the Python wrapper
409
410 std::shared_ptr<std::ostream> dp_ostream;
411
412 private:
413 void open();
414
415 std::unique_ptr<schrodinger::mae::Writer> dp_writer;
416 unsigned d_molid = 0; // the number of the molecules we wrote so far
417 STR_VECT d_props; // list of property name that need to be written out
418};
419
420#endif // RDK_BUILD_MAEPARSER_SUPPORT
421
422} // namespace RDKit
423
424#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:108
Defines the primary molecule class ROMol as well as associated typedefs.
virtual void flush()=0
virtual void write(const ROMol &mol, int confId=defaultConfId)=0
virtual ~MolWriter()
Definition MolWriters.h:38
virtual void close()=0
virtual void setProps(const STR_VECT &propNames)=0
virtual unsigned int numMols() const =0
PDBWriter(const std::string &fileName, unsigned int flavor=0)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void flush() override
flush the ostream
Definition MolWriters.h:313
PDBWriter(std::ostream *outStream, bool takeOwnership=false, unsigned int flavor=0)
void setProps(const STR_VECT &) override
Definition MolWriters.h:310
~PDBWriter() override
unsigned int numMols() const override
get the number of molecules written so far
Definition MolWriters.h:340
void close() override
close our stream (the writer cannot be used again)
Definition MolWriters.h:328
~SDWriter() override
bool getForceV3000() const
Definition MolWriters.h:200
unsigned int numMols() const override
get the number of molecules written so far
Definition MolWriters.h:197
SDWriter(std::ostream *outStream, bool takeOwnership=false)
bool getKekulize() const
Definition MolWriters.h:203
void flush() override
flush the ostream
Definition MolWriters.h:170
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
static std::string getText(const ROMol &mol, int confId=defaultConfId, bool kekulize=true, bool force_V3000=false, int molid=-1, STR_VECT *propNames=nullptr)
return the text that would be written to the file
void close() override
close our stream (the writer cannot be used again)
Definition MolWriters.h:185
void setForceV3000(bool val)
Definition MolWriters.h:199
SDWriter(const std::string &fileName)
void setKekulize(bool val)
Definition MolWriters.h:202
unsigned int numMols() const override
get the number of molecules written so far
Definition MolWriters.h:117
SmilesWriter(const std::string &fileName, const std::string &delimiter=" ", const std::string &nameHeader="Name", bool includeHeader=true, bool isomericSmiles=true, bool kekuleSmiles=false)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
~SmilesWriter() override
SmilesWriter(std::ostream *outStream, std::string delimiter=" ", std::string nameHeader="Name", bool includeHeader=true, bool takeOwnership=false, bool isomericSmiles=true, bool kekuleSmiles=false)
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
void close() override
close our stream (the writer cannot be used again)
Definition MolWriters.h:105
void flush() override
flush the ostream
Definition MolWriters.h:90
~TDTWriter() override
bool getWrite2D() const
Definition MolWriters.h:277
void setNumDigits(unsigned int numDigits)
Definition MolWriters.h:282
void setWrite2D(bool state=true)
Definition MolWriters.h:276
void setProps(const STR_VECT &propNames) override
set a vector of property names that are need to be written out for each molecule
unsigned int numMols() const override
get the number of molecules written so far
Definition MolWriters.h:274
unsigned int getNumDigits() const
Definition MolWriters.h:283
TDTWriter(std::ostream *outStream, bool takeOwnership=false)
void close() override
close our stream (the writer cannot be used again)
Definition MolWriters.h:258
TDTWriter(const std::string &fileName)
void write(const ROMol &mol, int confId=defaultConfId) override
write a new molecule to the file
void setWriteNames(bool state=true)
Definition MolWriters.h:279
void flush() override
flush the ostream
Definition MolWriters.h:243
bool getWriteNames() const
Definition MolWriters.h:280
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:177
Std stuff.
std::vector< std::string > STR_VECT
Definition Dict.h:29
static int defaultConfId
Definition MolWriters.h:34