00001 // 00002 // Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC 00003 // 00004 // @@ All Rights Reserved @@ 00005 // 00006 #ifndef _RD_FILEPARSERS_H 00007 #define _RD_FILEPARSERS_H 00008 00009 #include <RDGeneral/types.h> 00010 #include <GraphMol/RDKitBase.h> 00011 00012 #include <string> 00013 #include <iostream> 00014 #include <vector> 00015 #include <exception> 00016 00017 #include <boost/shared_ptr.hpp> 00018 00019 namespace RDKit{ 00020 const int MOLFILE_MAXLINE=256; 00021 std::string strip(const std::string &orig); 00022 00023 //----- 00024 // mol files 00025 //----- 00026 typedef std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT; 00027 // \brief construct a molecule from MDL mol data in a stream 00028 /*! 00029 * \param inStream - stream containing the data 00030 * \param line - current line number (used for error reporting) 00031 * \param sanitize - toggles sanitization and stereochemistry 00032 * perception of the molecule 00033 * \param removeHs - toggles removal of Hs from the molecule. H removal 00034 * is only done if the molecule is sanitized 00035 */ 00036 RWMol *MolDataStreamToMol(std::istream *inStream, unsigned int &line, 00037 bool sanitize=true,bool removeHs=true); 00038 // \overload 00039 RWMol *MolDataStreamToMol(std::istream &inStream, unsigned int &line, 00040 bool sanitize=true,bool removeHs=true); 00041 // \brief construct a molecule from an MDL mol block 00042 /*! 00043 * \param molBlock - string containing the mol block 00044 * \param sanitize - toggles sanitization and stereochemistry 00045 * perception of the molecule 00046 * \param removeHs - toggles removal of Hs from the molecule. H removal 00047 * is only done if the molecule is sanitized 00048 */ 00049 RWMol *MolBlockToMol(const std::string &molBlock, bool sanitize=true, 00050 bool removeHs=true); 00051 00052 // \brief construct a molecule from an MDL mol file 00053 /*! 00054 * \param fName - string containing the file name 00055 * \param sanitize - toggles sanitization and stereochemistry 00056 * perception of the molecule 00057 * \param removeHs - toggles removal of Hs from the molecule. H removal 00058 * is only done if the molecule is sanitized 00059 */ 00060 RWMol *MolFileToMol(std::string fName, bool sanitize=true, 00061 bool removeHs=true); 00062 00063 // \brief generates an MDL mol block for a molecule 00064 /*! 00065 * \param mol - the molecule in question 00066 * \param includeStereo - toggles inclusion of stereochemistry information 00067 * \param confId - selects the conformer to be used 00068 */ 00069 std::string MolToMolBlock(const ROMol &mol,bool includeStereo=true, 00070 int confId=-1); 00071 // \brief construct a molecule from an MDL mol file 00072 /*! 00073 * \param mol - the molecule in question 00074 * \param fName - the name of the file to use 00075 * \param includeStereo - toggles inclusion of stereochemistry information 00076 * \param confId - selects the conformer to be used 00077 */ 00078 void MolToMolFile(const ROMol &mol,std::string fName,bool includeStereo=true, 00079 int confId=-1); 00080 00081 00082 //----- 00083 // TPL handling: 00084 //----- 00085 00086 //! \brief translate TPL data (BioCad format) into a multi-conf molecule 00087 /*! 00088 \param inStream: the stream from which to read 00089 \param line: used to track the line number of errors 00090 \param sanitize: toggles sanitization of the molecule 00091 \param skipFirstConf: according to the TPL format description, the atomic 00092 coords in the atom-information block describe the first 00093 conformation and the first conf block describes second 00094 conformation. The CombiCode, on the other hand, writes 00095 the first conformation data both to the atom-information 00096 block and to the first conf block. We want to be able to 00097 read CombiCode-style tpls, so we'll allow this mis-feature 00098 to be parsed when this flag is set. 00099 */ 00100 RWMol *TPLDataStreamToMol(std::istream *inStream, unsigned int &line, 00101 bool sanitize=true, 00102 bool skipFirstConf=false); 00103 00104 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file 00105 /*! 00106 \param fName: the name of the file from which to read 00107 \param sanitize: toggles sanitization of the molecule 00108 \param skipFirstConf: according to the TPL format description, the atomic 00109 coords in the atom-information block describe the first 00110 conformation and the first conf block describes second 00111 conformation. The CombiCode, on the other hand, writes 00112 the first conformation data both to the atom-information 00113 block and to the first conf block. We want to be able to 00114 read CombiCode-style tpls, so we'll allow this mis-feature 00115 to be parsed when this flag is set. 00116 */ 00117 RWMol *TPLFileToMol(std::string fName,bool sanitize=true, 00118 bool skipFirstConf=false); 00119 00120 std::string MolToTPLText(const ROMol &mol, 00121 std::string partialChargeProp="_GasteigerCharge", 00122 bool writeFirstConfTwice=false); 00123 void MolToTPLFile(const ROMol &mol,std::string fName, 00124 std::string partialChargeProp="_GasteigerCharge", 00125 bool writeFirstConfTwice=false); 00126 00127 00128 00129 } 00130 00131 #endif
1.5.5