MolWriters.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2002-2010 Rational Discovery LLC
00003 //
00004 //   @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 
00011 #ifndef _RD_MOLWRITERS_H_
00012 #define _RD_MOLWRITERS_H_
00013 
00014 #include <RDGeneral/types.h>
00015 
00016 #include <string>
00017 #include <iostream>
00018 #include <GraphMol/ROMol.h>
00019 
00020 namespace RDKit {
00021 
00022   static int defaultConfId=-1;
00023   class MolWriter {
00024   public:
00025     virtual ~MolWriter() {}
00026     virtual void write(ROMol &mol,int confId=defaultConfId) = 0;
00027     virtual void flush() = 0;
00028     virtual void close() = 0;
00029     virtual void setProps(const STR_VECT &propNames)=0;
00030     virtual unsigned int numMols() const =0;
00031   };
00032 
00033   //! The SmilesWriter is for writing molecules and properties to
00034   //! delimited text files.
00035   class SmilesWriter : public MolWriter {
00036     /******************************************************************************
00037      * A Smiles Table writer - this is how it is used
00038      *  - create a SmilesWriter with a output file name (or a ostream), a delimiter,
00039      *     and a list of properties that need to be written out
00040      *  - then a call is made to the write function for each molecule that needs to
00041      *     be written out
00042      ******************************************************************************/
00043   public:
00044     /*!
00045       \param fileName       : filename to write to ("-" to write to stdout)
00046       \param delimiter      : delimiter to use in the text file
00047       \param nameHeader     : used to label the name column in the output. If this
00048                               is provided as the empty string, no names will be written.
00049       \param includeHeader  : toggles inclusion of a header line in the output
00050       \param isomericSmiles : toggles generation of isomeric SMILES
00051       \param kekuleSmiles   : toggles the generation of kekule SMILES
00052 
00053      */
00054     SmilesWriter(std::string fileName, 
00055                  std::string delimiter=" ",
00056                  std::string nameHeader="Name",
00057                  bool includeHeader=true,
00058                  bool isomericSmiles=false,
00059                  bool kekuleSmiles=false);
00060     //! \overload
00061     SmilesWriter(std::ostream *outStream, 
00062                  std::string delimiter=" ",
00063                  std::string nameHeader="Name",
00064                  bool includeHeader=true,
00065                  bool takeOwnership=false,
00066                  bool isomericSmiles=false,
00067                  bool kekuleSmiles=false);
00068                  
00069     ~SmilesWriter();
00070 
00071     //! \brief set a vector of property names that are need to be
00072     //! written out for each molecule
00073     void setProps(const STR_VECT &propNames);
00074 
00075     //! \brief write a new molecule to the file
00076     void write(ROMol &mol,int confId=defaultConfId);
00077 
00078     //! \brief flush the ostream
00079     void flush() {
00080       PRECONDITION(dp_ostream,"no output stream");
00081       dp_ostream->flush();
00082     };
00083 
00084     //! \brief close our stream (the writer cannot be used again)
00085     void close() {
00086       PRECONDITION(dp_ostream,"no output stream");
00087       dp_ostream->flush();
00088       if(df_owner) {
00089         delete dp_ostream;
00090         df_owner=false;
00091       }
00092       dp_ostream=NULL;
00093     };
00094 
00095     //! \brief get the number of molecules written so far
00096     unsigned int numMols() const { return d_molid;} ;
00097 
00098   private:
00099     // local initialization
00100     void init(std::string delimiter,std::string nameHeader,
00101               bool includeHeader,
00102               bool isomericSmiles,
00103               bool kekuleSmiles);
00104 
00105 
00106     // dumps a header line to the output stream
00107     void dumpHeader() const;
00108 
00109 
00110     std::ostream *dp_ostream;
00111     bool df_owner;
00112     bool df_includeHeader; // whether or not to include a title line
00113     unsigned int d_molid; // the number of the molecules we wrote so far
00114     std::string d_delim; // delimiter string between various records
00115     std::string d_nameHeader; // header for the name column in the output file
00116     STR_VECT d_props; // list of property name that need to be written out
00117     bool df_isomericSmiles; // whether or not to do isomeric smiles
00118     bool df_kekuleSmiles; // whether or not to do kekule smiles
00119   };
00120 
00121 
00122   //! The SDWriter is for writing molecules and properties to
00123   //! SD files 
00124   class SDWriter : public MolWriter {
00125     /**************************************************************************************
00126      * A SD file ( or stream) writer - this is how it is used
00127      *  - create a SDMolWriter with a output file name (or a ostream),
00128      *     and a list of properties that need to be written out
00129      *  - then a call is made to the write function for each molecule that needs to be written out
00130      **********************************************************************************************/
00131   public:
00132     /*!
00133       \param fileName       : filename to write to ("-" to write to stdout)
00134      */
00135     SDWriter(std::string fileName);
00136     SDWriter(std::ostream *outStream,bool takeOwnership=false);
00137 
00138     ~SDWriter();
00139 
00140     //! \brief set a vector of property names that are need to be
00141     //! written out for each molecule
00142     void setProps(const STR_VECT &propNames);
00143 
00144     //! \brief write a new molecule to the file
00145     void write(ROMol &mol, int confId=defaultConfId);
00146 
00147     //! \brief flush the ostream
00148     void flush() { 
00149       PRECONDITION(dp_ostream,"no output stream");
00150       dp_ostream->flush();
00151     } ;
00152 
00153     //! \brief close our stream (the writer cannot be used again)
00154     void close() {
00155       PRECONDITION(dp_ostream,"no output stream");
00156       // if we've written any mols, finish with a "$$$$" line
00157       if (d_molid > 0) {
00158         (*dp_ostream) << "$$$$\n";
00159       }
00160       dp_ostream->flush();
00161       if(df_owner) {
00162         delete dp_ostream;
00163         df_owner=false;
00164       }
00165       dp_ostream=NULL;
00166     };
00167 
00168     //! \brief get the number of molecules written so far
00169     unsigned int numMols() const { return d_molid; };
00170 
00171   private:
00172     void writeProperty(const ROMol &mol, std::string name);
00173 
00174     std::ostream *dp_ostream;
00175     bool df_owner;
00176     unsigned int d_molid; // the number of the molecules we wrote so far
00177     STR_VECT d_props; // list of property name that need to be written out
00178   };
00179 
00180   //! The TDTWriter is for writing molecules and properties to
00181   //! TDT files 
00182   class TDTWriter : public MolWriter {
00183     /**************************************************************************************
00184      * A TDT file ( or stream) writer - this is how it is used
00185      *  - create a TDTWriter with a output file name (or a ostream),
00186      *     and a list of properties that need to be written out
00187      *  - then a call is made to the write function for each molecule that needs to be written out
00188      **********************************************************************************************/
00189   public:
00190     /*!
00191       \param fileName       : filename to write to ("-" to write to stdout)
00192      */
00193     TDTWriter(std::string fileName);
00194     TDTWriter(std::ostream *outStream,bool takeOwnership=false);
00195 
00196     ~TDTWriter();
00197 
00198     //! \brief set a vector of property names that are need to be
00199     //! written out for each molecule
00200     void setProps(const STR_VECT &propNames);
00201 
00202     //! \brief write a new molecule to the file
00203     void write(ROMol &mol, int confId=defaultConfId);
00204 
00205     //! \brief flush the ostream
00206     void flush() { 
00207       PRECONDITION(dp_ostream,"no output stream");
00208       dp_ostream->flush();
00209     };
00210 
00211     //! \brief close our stream (the writer cannot be used again)
00212     void close() {
00213       PRECONDITION(dp_ostream,"no output stream");
00214       dp_ostream->flush();
00215       if(df_owner) {
00216         delete dp_ostream;
00217         df_owner=false;
00218       }
00219       dp_ostream=NULL;
00220     };
00221 
00222     //! \brief get the number of molecules written so far
00223     unsigned int numMols() const { return d_molid; };
00224 
00225     void setWrite2D(bool state=true) { df_write2D=state; };
00226     bool getWrite2D() const { return df_write2D; };
00227 
00228     void setWriteNames(bool state=true) { df_writeNames=state; };
00229     bool getWriteNames() const { return df_writeNames; };
00230 
00231     void setNumDigits(unsigned int numDigits) { d_numDigits=numDigits; };
00232     unsigned int getNumDigits() const { return d_numDigits;};
00233     
00234   private:
00235     void writeProperty(const ROMol &mol, std::string name);
00236 
00237     std::ostream *dp_ostream;
00238     bool df_owner;
00239     unsigned int d_molid; // the number of molecules we wrote so far
00240     STR_VECT d_props; // list of property name that need to be written out
00241     bool df_write2D; // write 2D coordinates instead of 3D
00242     bool df_writeNames; // write a name record for each molecule
00243     unsigned int d_numDigits; // number of digits to use in our output of coordinates;
00244   };
00245 
00246 }
00247 
00248 #endif
00249