FileParsers.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
00003 //
00004 //   @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 #ifndef _RD_FILEPARSERS_H
00011 #define _RD_FILEPARSERS_H
00012 
00013 #include <RDGeneral/types.h>
00014 #include <GraphMol/RDKitBase.h>
00015 
00016 #include <string>
00017 #include <iostream>
00018 #include <vector>
00019 #include <exception>
00020 
00021 #include <boost/shared_ptr.hpp>
00022 
00023 namespace RDKit{
00024   const int MOLFILE_MAXLINE=256;
00025   std::string strip(const std::string &orig);
00026 
00027   //-----
00028   // mol files
00029   //-----
00030   typedef std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT;
00031   // \brief construct a molecule from MDL mol data in a stream
00032   /*! 
00033    *   \param inStream - stream containing the data
00034    *   \param line     - current line number (used for error reporting)
00035    *   \param sanitize - toggles sanitization and stereochemistry
00036    *                     perception of the molecule
00037    *   \param removeHs - toggles removal of Hs from the molecule. H removal
00038    *                     is only done if the molecule is sanitized
00039    */
00040   RWMol *MolDataStreamToMol(std::istream *inStream, unsigned int &line,
00041                             bool sanitize=true,bool removeHs=true);
00042   // \overload
00043   RWMol *MolDataStreamToMol(std::istream &inStream, unsigned int &line,
00044                             bool sanitize=true,bool removeHs=true);
00045   // \brief construct a molecule from an MDL mol block
00046   /*! 
00047    *   \param molBlock - string containing the mol block
00048    *   \param sanitize - toggles sanitization and stereochemistry
00049    *                     perception of the molecule
00050    *   \param removeHs - toggles removal of Hs from the molecule. H removal
00051    *                     is only done if the molecule is sanitized
00052    */
00053   RWMol *MolBlockToMol(const std::string &molBlock, bool sanitize=true,
00054                        bool removeHs=true);
00055   
00056   // \brief construct a molecule from an MDL mol file
00057   /*! 
00058    *   \param fName    - string containing the file name
00059    *   \param sanitize - toggles sanitization and stereochemistry
00060    *                     perception of the molecule
00061    *   \param removeHs - toggles removal of Hs from the molecule. H removal
00062    *                     is only done if the molecule is sanitized
00063    */
00064   RWMol *MolFileToMol(std::string fName, bool sanitize=true,
00065                       bool removeHs=true);
00066 
00067   // \brief generates an MDL mol block for a molecule
00068   /*! 
00069    *   \param mol           - the molecule in question
00070    *   \param includeStereo - toggles inclusion of stereochemistry information
00071    *   \param confId        - selects the conformer to be used
00072    *   \param kekulize      - triggers kekulization of the molecule before it is written
00073    */
00074   std::string MolToMolBlock(const ROMol &mol,bool includeStereo=true,
00075                             int confId=-1,bool kekulize=true);
00076   // \brief construct a molecule from an MDL mol file
00077   /*! 
00078    *   \param mol           - the molecule in question
00079    *   \param fName         - the name of the file to use
00080    *   \param includeStereo - toggles inclusion of stereochemistry information
00081    *   \param confId        - selects the conformer to be used
00082    *   \param kekulize      - triggers kekulization of the molecule before it is written
00083    */
00084   void MolToMolFile(const ROMol &mol,std::string fName,bool includeStereo=true,
00085                     int confId=-1,bool kekulize=true);
00086 
00087 
00088   //-----
00089   //  TPL handling:
00090   //-----
00091 
00092   //! \brief translate TPL data (BioCad format) into a multi-conf molecule
00093   /*!
00094     \param inStream:      the stream from which to read
00095     \param line:          used to track the line number of errors
00096     \param sanitize:      toggles sanitization and stereochemistry
00097                           perception of the molecule
00098     \param skipFirstConf: according to the TPL format description, the atomic
00099                           coords in the atom-information block describe the first
00100                           conformation and the first conf block describes second 
00101                           conformation. The CombiCode, on the other hand, writes
00102                           the first conformation data both to the atom-information 
00103                           block and to the first conf block. We want to be able to
00104                           read CombiCode-style tpls, so we'll allow this mis-feature
00105                           to be parsed when this flag is set.
00106   */
00107   RWMol *TPLDataStreamToMol(std::istream *inStream, unsigned int &line,
00108                             bool sanitize=true,
00109                             bool skipFirstConf=false);
00110 
00111   //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
00112   /*!
00113     \param fName:         the name of the file from which to read
00114     \param sanitize:      toggles sanitization and stereochemistry
00115                           perception of the molecule
00116     \param skipFirstConf: according to the TPL format description, the atomic
00117                           coords in the atom-information block describe the first
00118                           conformation and the first conf block describes second 
00119                           conformation. The CombiCode, on the other hand, writes
00120                           the first conformation data both to the atom-information 
00121                           block and to the first conf block. We want to be able to
00122                           read CombiCode-style tpls, so we'll allow this mis-feature
00123                           to be parsed when this flag is set.
00124   */
00125   RWMol *TPLFileToMol(std::string fName,bool sanitize=true,
00126                       bool skipFirstConf=false);
00127 
00128   std::string MolToTPLText(const ROMol &mol,
00129                            std::string partialChargeProp="_GasteigerCharge",
00130                            bool writeFirstConfTwice=false);
00131   void MolToTPLFile(const ROMol &mol,std::string fName,
00132                     std::string partialChargeProp="_GasteigerCharge",
00133                     bool writeFirstConfTwice=false);
00134 
00135   //-----
00136   //  MOL2 handling
00137   //-----
00138 
00139   typedef enum {
00140     CORINA=0  //! supports output from Corina and some dbtranslate output
00141   } Mol2Type;
00142 
00143   // \brief construct a molecule from a Tripos mol2 file
00144   /*! 
00145    *
00146    *   \param fName    - string containing the file name
00147    *   \param sanitize - toggles sanitization of the molecule
00148    *   \param removeHs - toggles removal of Hs from the molecule. H removal
00149    *                     is only done if the molecule is sanitized
00150    *   \param variant  - the atom type definitions to use
00151    */
00152   RWMol *Mol2FileToMol(std::string fName,bool sanitize=true,bool removeHs=true,
00153                        Mol2Type variant=CORINA);
00154 
00155   // \brief construct a molecule from Tripos mol2 data in a stream
00156   /*!  
00157    *   \param inStream - stream containing the data
00158    *   \param sanitize - toggles sanitization of the molecule
00159    *   \param removeHs - toggles removal of Hs from the molecule. H removal
00160    *                     is only done if the molecule is sanitized
00161    *   \param variant  - the atom type definitions to use
00162    */
00163   RWMol *Mol2DataStreamToMol(std::istream *inStream,bool sanitize=true,bool removeHs=true,
00164                              Mol2Type variant=CORINA);
00165   // \overload 
00166   RWMol *Mol2DataStreamToMol(std::istream &inStream,bool sanitize=true,bool removeHs=true,
00167                              Mol2Type variant=CORINA);
00168 
00169   // \brief construct a molecule from a Tripos mol2 block
00170   /*! 
00171    *   \param molBlock - string containing the mol block
00172    *   \param sanitize - toggles sanitization of the molecule
00173    *   \param removeHs - toggles removal of Hs from the molecule. H removal
00174    *                     is only done if the molecule is sanitized
00175    *   \param variant  - the atom type definitions to use
00176    */
00177   RWMol *Mol2BlockToMol(const std::string &molBlock,bool sanitize=true,bool removeHs=true,
00178                         Mol2Type variant=CORINA);
00179 
00180 }
00181 
00182 #endif