SLNParseOps.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc.
00003 //  All rights reserved.
00004 // 
00005 // Redistribution and use in source and binary forms, with or without
00006 // modification, are permitted provided that the following conditions are
00007 // met: 
00008 //
00009 //     * Redistributions of source code must retain the above copyright 
00010 //       notice, this list of conditions and the following disclaimer.
00011 //     * Redistributions in binary form must reproduce the above
00012 //       copyright notice, this list of conditions and the following 
00013 //       disclaimer in the documentation and/or other materials provided 
00014 //       with the distribution.
00015 //     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
00016 //       nor the names of its contributors may be used to endorse or promote 
00017 //       products derived from this software without specific prior
00018 //       written permission.
00019 //
00020 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00021 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00022 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00023 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00024 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00026 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00027 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00028 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00030 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031 //
00032 // Created by Greg Landrum, September 2006
00033 //
00034 #ifndef __RD_SLNPARSEOPS_H__
00035 #define __RD_SLNPARSEOPS_H__
00036 
00037 #include <vector>
00038 #include <GraphMol/SLNParse/SLNParse.h>
00039 #include <GraphMol/SLNParse/SLNAttribs.h>
00040 #include <GraphMol/RDKitBase.h>
00041 #include <GraphMol/RDKitQueries.h>
00042 #include <boost/lexical_cast.hpp>
00043 
00044 namespace RDKit{
00045   namespace SLNParse{
00046     namespace {
00047       //!  set a bookmark in the molecule if the atom has an associated ID:
00048       void bookmarkAtomID(RWMol *mp,Atom *atom){
00049         PRECONDITION(mp,"bad molecule");
00050         PRECONDITION(atom,"bad atom");
00051         if(atom->hasProp("_AtomID")){
00052           unsigned int label;
00053           atom->getProp("_AtomID",label);
00054           if(mp->hasAtomBookmark(label)){
00055             std::stringstream err;
00056             err << "SLN Parser error: Atom ID " << label << " used a second time.";
00057             throw SLNParseException(err.str());
00058           }
00059           if(mp->hasBondBookmark(label)){
00060             std::stringstream err;
00061             err << "SLN Parser error: Atom ID " << label << " appears *after* its ring closure.";
00062             throw SLNParseException(err.str());
00063           }
00064           mp->setAtomBookmark(atom,label);
00065         }
00066       }
00067 
00068       //! adds a bond, being careful to handle aromaticity properly
00069       template<typename BondType>
00070       void addBondToMol(RWMol *mp,BondType *bond){
00071         PRECONDITION(mp,"null molecule");
00072         PRECONDITION(bond,"null bond");
00073         mp->addBond(bond,true);
00074         if(bond->getBondType()==Bond::AROMATIC){
00075           // SLN doesn't have aromatic atom types, aromaticity is a property
00076           // of the bonds themselves, so we need to set the atom types:
00077           bond->setIsAromatic(true);
00078           bond->getBeginAtom()->setIsAromatic(true);
00079           bond->getEndAtom()->setIsAromatic(true);
00080         }
00081       }
00082     }// end of anonymous namespace
00083 
00084     // ------------------------------------------------------------------------------------
00085     //! initialize a molecule
00086     template <typename AtomType>
00087     int startMol(std::vector<RWMol *> &molList,AtomType *firstAtom,bool doingQuery){
00088       PRECONDITION(firstAtom,"empty atom");
00089       RWMol *mp = new RWMol();
00090       mp->addAtom(firstAtom,true,true);
00091       bookmarkAtomID(mp,firstAtom);
00092 
00093       if(!doingQuery){
00094         // add any hydrogens that are set on the atom, otherwise getting the numbering right
00095         // is just too hard:
00096         for(unsigned int i=0;i<firstAtom->getNumExplicitHs();++i){
00097           int hIdx=mp->addAtom(new Atom(1),false,true);
00098           mp->addBond(0,hIdx,Bond::SINGLE);
00099         }
00100         firstAtom->setNumExplicitHs(0);
00101       }
00102 
00103       int sz = molList.size();
00104       molList.push_back(mp);
00105       return sz;
00106     };
00107 
00108     // ------------------------------------------------------------------------------------
00109     //! adds an atom to a molecule
00110     template<typename AtomType,typename BondType>
00111     void addAtomToMol(std::vector<RWMol *> &molList,unsigned int idx,AtomType *atom,
00112                       BondType *bond,bool doingQuery){
00113       PRECONDITION(idx<molList.size(),"bad index");
00114       RWMol *mp=molList[idx];
00115       PRECONDITION(mp,"null molecule");
00116       PRECONDITION(atom,"empty atom");
00117       PRECONDITION(bond,"null bond");
00118 
00119       Atom *a1 = mp->getActiveAtom();
00120       int atomIdx1=a1->getIdx();
00121       int atomIdx2=mp->addAtom(atom,true,true);
00122       bookmarkAtomID(mp,atom);
00123       bond->setOwningMol(mp);
00124       bond->setBeginAtomIdx(atomIdx1);
00125       bond->setEndAtomIdx(atomIdx2);
00126       addBondToMol(mp,bond);
00127     
00128       if(!doingQuery){
00129         // add any hydrogens that are set on the atom, otherwise getting the numbering right
00130         // is just too hard:
00131         for(unsigned int i=0;i<atom->getNumExplicitHs();++i){
00132           int hIdx=mp->addAtom(new Atom(1),false,true);
00133           mp->addBond(atomIdx2,hIdx,Bond::SINGLE);
00134         }
00135         atom->setNumExplicitHs(0);
00136       }      
00137     }
00138     //! \overload
00139     template<typename AtomType>
00140     void addAtomToMol(std::vector<RWMol *> &molList,unsigned int idx,AtomType *atom,bool doingQuery){
00141       addAtomToMol(molList,idx,atom,new Bond(Bond::SINGLE),doingQuery);
00142     }
00143 
00144     // ------------------------------------------------------------------------------------
00145     //! closes an indexed ring in a molecule using the bond provided
00146     // The bond is formed from the atom in the molecule with the
00147     // corresponding bookmark to the active atom
00148     //
00149     template <typename BondType>
00150     void closeRingBond(std::vector<RWMol *> &molList,unsigned int molIdx,
00151                        unsigned int ringIdx,BondType *bond,
00152                        bool postponeAllowed=true){
00153       PRECONDITION(molIdx<molList.size(),"bad index");
00154       RWMol *mp=molList[molIdx];
00155       PRECONDITION(mp,"null molecule");
00156       PRECONDITION(bond,"Null bond");
00157 
00158       if(!mp->hasAtomBookmark(ringIdx)){
00159         if(postponeAllowed){
00160           // save it for later:
00161           bond->setOwningMol(mp);
00162           bond->setEndAtomIdx(mp->getActiveAtom()->getIdx());
00163           mp->setBondBookmark(bond,ringIdx);
00164           return;
00165         } else {
00166           std::stringstream err;
00167           err << "SLN Parser error: Ring closure " << ringIdx << " does not have a corresponding opener.";
00168           throw SLNParseException(err.str());
00169         }
00170       }
00171       Atom *opener=mp->getAtomWithBookmark(ringIdx);
00172       CHECK_INVARIANT(opener,"invalid atom");
00173 
00174       Atom *closer=mp->getActiveAtom();
00175       bond->setOwningMol(mp);
00176       bond->setBeginAtom(opener);
00177       bond->setEndAtom(closer);
00178       addBondToMol(mp,bond);
00179     };
00180     //! \overload
00181     void closeRingBond(std::vector<RWMol *> &molList,unsigned int molIdx,unsigned int ringIdx){
00182       closeRingBond(molList,molIdx,ringIdx,new Bond(Bond::SINGLE));
00183     };
00184 
00185     // ------------------------------------------------------------------------------------
00186     // NOTE: this takes over responsibility for the bond
00187     template <typename BondType>
00188     int addBranchToMol(std::vector<RWMol *> &molList,unsigned int molIdx,
00189                        unsigned int branchIdx,BondType *&bond){
00190       PRECONDITION(molIdx<molList.size(),"bad index");
00191       RWMol *mp=molList[molIdx];
00192       PRECONDITION(mp,"null molecule");
00193       PRECONDITION(branchIdx<molList.size(),"bad index");
00194       RWMol *branch=molList[branchIdx];
00195       PRECONDITION(branch,"null branch");
00196       PRECONDITION(bond,"null bond");
00197 
00198       unsigned int activeAtomIdx=mp->getActiveAtom()->getIdx();
00199       unsigned int nOrigAtoms=mp->getNumAtoms();
00200 
00201       //
00202       // Add the fragment's atoms and bonds to the molecule:
00203       //
00204       mp->insertMol(*branch);
00205 
00206       // copy in any atom bookmarks from the branch:
00207       for(ROMol::ATOM_BOOKMARK_MAP::const_iterator bmIt=branch->getAtomBookmarks()->begin();
00208           bmIt != branch->getAtomBookmarks()->end();++bmIt){
00209         if(bmIt->first<0) continue;
00210         if(mp->hasAtomBookmark(bmIt->first)){
00211           std::stringstream err;
00212           err << "SLN Parser error: Atom ID " << bmIt->first << " used a second time.";
00213           throw SLNParseException(err.str());
00214         } else if(mp->hasBondBookmark(bmIt->first)){
00215             std::stringstream err;
00216             err << "SLN Parser error: Atom ID " << bmIt->first << " appears *after* its ring closure.";
00217             throw SLNParseException(err.str());
00218           }
00219         else {
00220           CHECK_INVARIANT(bmIt->second.size()==1,"bad atom bookmark list on branch");
00221           Atom *tgtAtom=mp->getAtomWithIdx((*bmIt->second.begin())->getIdx()+nOrigAtoms);
00222           mp->setAtomBookmark(tgtAtom,bmIt->first);
00223         }
00224       }
00225       
00226       // loop over bond bookmarks in the branch and close the corresponding rings
00227       for(ROMol::BOND_BOOKMARK_MAP::const_iterator bmIt=branch->getBondBookmarks()->begin();
00228           bmIt != branch->getBondBookmarks()->end();++bmIt){
00229         CHECK_INVARIANT(bmIt->second.size()>=1,"bad bond bookmark list on branch");
00230         for(ROMol::BOND_PTR_LIST::const_iterator bondIt=bmIt->second.begin();
00231             bondIt!=bmIt->second.end();++bondIt){
00232           Bond *tgtBond=*bondIt;
00233           if(bmIt->first>0 && mp->hasAtomBookmark(bmIt->first)){
00234             Atom *tmpAtom=mp->getActiveAtom();
00235             mp->setActiveAtom(mp->getAtomWithIdx(tgtBond->getEndAtomIdx()+nOrigAtoms));
00236             closeRingBond(molList,molIdx,bmIt->first,tgtBond,false);
00237             mp->setActiveAtom(tmpAtom);
00238           } else {
00239             // no partner found yet, copy into this mol:
00240             tgtBond->setOwningMol(mp);
00241             tgtBond->setEndAtomIdx(tgtBond->getEndAtomIdx()+nOrigAtoms);
00242             mp->setBondBookmark(tgtBond,bmIt->first);
00243           }
00244         }
00245       }
00246       
00247       // set the connecting bond:
00248       if(bond->getBondType()!=Bond::IONIC){
00249         bond->setOwningMol(mp);
00250         bond->setBeginAtomIdx(activeAtomIdx);
00251         bond->setEndAtomIdx(nOrigAtoms);
00252         addBondToMol(mp,bond);
00253       } else {
00254         delete bond;
00255       }
00256       bond=0;
00257         
00258       delete branch;
00259       unsigned int sz = molList.size();
00260       if ( sz==branchIdx+1) {
00261         molList.resize( sz-1 );
00262       }
00263       return molIdx;
00264     };
00265     //! \overload
00266     int addBranchToMol(std::vector<RWMol *> &molList,unsigned int molIdx,unsigned int branchIdx){
00267       Bond *newBond=new Bond(Bond::SINGLE);
00268       return addBranchToMol(molList,molIdx,branchIdx,newBond);
00269     };
00270 
00271     // ------------------------------------------------------------------------------------
00272     //! adds the atoms and bonds from a fragment to the molecule, sets no bond between them
00273     int addFragToMol(std::vector<RWMol *> &molList,unsigned int molIdx,unsigned int fragIdx){
00274       Bond *newBond=new Bond(Bond::IONIC);
00275       return addBranchToMol(molList,molIdx,fragIdx,newBond);
00276     }
00277 
00278     //! convenience function to convert the argument to a string
00279     template <typename T>
00280     std::string convertToString(T val){
00281       std::string res=boost::lexical_cast<std::string>(val);
00282       return  res;
00283     }
00284 
00285     void CleanupAfterParseError(RWMol *mol){
00286       PRECONDITION(mol,"no molecule");
00287       // blow out any partial bonds:
00288       RWMol::BOND_BOOKMARK_MAP *marks = mol->getBondBookmarks();
00289       RWMol::BOND_BOOKMARK_MAP::iterator markI=marks->begin();
00290       while(markI != marks->end()){
00291         RWMol::BOND_PTR_LIST &bonds=markI->second;
00292         for(RWMol::BOND_PTR_LIST::iterator bondIt=bonds.begin();
00293             bondIt!=bonds.end();++bondIt){
00294           delete *bondIt;
00295         }
00296         ++markI;
00297       }
00298     }
00299   } // end of namespace SLNParse
00300 } // end of namespace RDKit
00301 #endif