ChemTransforms.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2006-2011 Greg Landrum
00003 //
00004 //   @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 #ifndef _RD_CHEMTRANSFORMS_H__
00011 #define _RD_CHEMTRANSFORMS_H__
00012 
00013 #include <boost/smart_ptr.hpp>
00014 #include <vector>
00015 
00016 namespace RDKit{
00017   class ROMol;
00018   typedef boost::shared_ptr<ROMol>    ROMOL_SPTR;
00019 
00020   //! \brief Returns a copy of an ROMol with the atoms and bonds that 
00021   //!      match a pattern removed.
00022   /*!
00023       \param mol       the ROMol of interest
00024       \param query     the query ROMol
00025       \param replaceAll  if this is set all matches of the query to the substructure will
00026                          be removed. Default is to only remove the first.
00027 
00028       \return a copy of \c mol with the matching atoms and bonds (if any)
00029               removed.                 
00030   */
00031   ROMol *deleteSubstructs(const ROMol &mol, const ROMol &query,
00032                           bool replaceAll=false);
00033 
00034   //! \brief Returns a list of copies of an ROMol with the atoms and bonds that 
00035   //!      match a pattern replaced with the atoms contained in another molecule.
00036   /*!
00037      Bonds are created between the joining atom in the existing molecule
00038      and the atoms in the new molecule. So, using SMILES instead of molecules:
00039             replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
00040                   ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
00041             replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
00042                   ['[X]NCCN[X]']
00043      Chains should be handled "correctly":
00044             replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
00045                   ['C[X]C']
00046      As should rings:
00047             replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
00048                   ['C1[X]C1']
00049      And higher order branches:
00050             replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
00051                   ['C[X](C)C']
00052      Note that the client is responsible for making sure that the
00053        resulting molecule actually makes sense - this function does not
00054        perform sanitization.
00055 
00056       \param mol         the ROMol of interest
00057       \param query       the query ROMol
00058       \param replacement the ROMol to be inserted
00059       \param replaceAll  if this is true, only a single result, with all occurances
00060                          of the substructure replaced, will be returned.
00061 
00062       \return a vector of pointers to copies of \c mol with the matching atoms
00063           and bonds (if any) replaced
00064 
00065   */
00066   std::vector<ROMOL_SPTR> replaceSubstructs(const ROMol &mol, const ROMol &query,
00067                                             const ROMol &replacement,
00068                                             bool replaceAll=false);
00069 
00070   //! \brief Returns a copy of an ROMol with the atoms and bonds that 
00071   //!      don't fall within a substructure match removed.
00072   //!
00073   //!   dummy atoms are left to indicate attachment points.
00074   //!
00075   /*!
00076       \param mol       the ROMol of interest
00077       \param coreQuery a query ROMol to be used to match the core
00078 
00079       \return a copy of \c mol with the non-matching atoms and bonds (if any)
00080               removed and dummies at the connection points.                    
00081   */
00082   ROMol *replaceSidechains(const ROMol &mol, const ROMol &coreQuery);
00083                           
00084   //! \brief Returns a copy of an ROMol with the atoms and bonds that 
00085   //!      do fall within a substructure match removed.
00086   //!
00087   //!   dummy atoms are left to indicate attachment points.
00088   //!
00089   /*!
00090       Note that this is essentially identical to the replaceSidechains function, except we
00091       invert the query and replace the atoms that *do* match the query.
00092 
00093       \param mol            - the ROMol of interest
00094       \param coreQuery      - a query ROMol to be used to match the core
00095       \param replaceDummies - if set, atoms matching dummies in the core will also be replaced
00096       \param labelByIndex  - if set, the dummy atoms at attachment points are labelled with the
00097                              index+1 of the corresponding atom in the core
00098       \param requireDummyMatch - if set, only side chains that are connected to atoms in
00099                                  the core that have attached dummies will be considered.
00100                                  Molecules that have sidechains that are attached
00101                                  at other points will be rejected (NULL returned).
00102 
00103       \return a copy of \c mol with the non-matching atoms and bonds (if any)
00104               removed and dummies at the connection points. The client is responsible
00105               for deleting this molecule. If the core query is not matched, NULL is returned.
00106   */
00107   ROMol *replaceCore(const ROMol &mol, const ROMol &coreQuery,
00108                      bool replaceDummies=true,bool labelByIndex=false,
00109                      bool requireDummyMatch=false);
00110         
00111 
00112   ROMol *MurckoDecompose(const ROMol &mol);
00113 
00114   
00115 }
00116 
00117 #endif
00118 
00119 
00120 
00121