Reaction.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (c) 2007, Novartis Institutes for BioMedical Research Inc.
00003 //  All rights reserved.
00004 // 
00005 // Redistribution and use in source and binary forms, with or without
00006 // modification, are permitted provided that the following conditions are
00007 // met: 
00008 //
00009 //     * Redistributions of source code must retain the above copyright 
00010 //       notice, this list of conditions and the following disclaimer.
00011 //     * Redistributions in binary form must reproduce the above
00012 //       copyright notice, this list of conditions and the following 
00013 //       disclaimer in the documentation and/or other materials provided 
00014 //       with the distribution.
00015 //     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
00016 //       nor the names of its contributors may be used to endorse or promote 
00017 //       products derived from this software without specific prior written permission.
00018 //
00019 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00020 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00021 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00022 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00023 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00025 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00026 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00027 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00029 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030 //
00031 
00032 #ifndef __RD_REACTION_H_17Aug2006__
00033 #define __RD_REACTION_H_17Aug2006__
00034 
00035 #include <GraphMol/RDKitBase.h>
00036 #include <GraphMol/Substruct/SubstructMatch.h>
00037 #include <vector>
00038 
00039 namespace RDKit{
00040   class ReactionPickler;
00041      
00042   //! used to indicate an error in the chemical reaction engine
00043   class ChemicalReactionException : public std::exception {
00044   public:
00045     //! construct with an error message
00046     explicit ChemicalReactionException(const char *msg) : _msg(msg) {};
00047     //! construct with an error message
00048     explicit ChemicalReactionException(const std::string msg) : _msg(msg) {};
00049     //! get the error message
00050     const char *message () const { return _msg.c_str(); };
00051     ~ChemicalReactionException () throw () {};
00052   private:
00053     std::string _msg;
00054   };
00055    
00056   //! This is a class for storing and applying general chemical reactions.
00057   /*!
00058      basic usage will be something like:
00059      
00060      \verbatim
00061      ChemicalReaction rxn;
00062      rxn.addReactantTemplate(r1);
00063      rxn.addReactantTemplate(r2);
00064      rxn.addProductTemplate(p1);
00065      rxn.initReactantMatchers();
00066      
00067      MOL_SPTR_VECT prods;
00068      for(MOL_SPTR_VECT::const_iterator r1It=reactantSet1.begin();
00069          r1It!=reactantSet1.end();++r1It;){
00070        for(MOL_SPTR_VECT::const_iterator r2It=reactantSet2.begin();
00071            r2It!=reactantSet2.end();++r2It;){
00072          MOL_SPTR_VECT rVect(2);
00073          rVect[0] = *r1It;
00074          rVect[1] = *r2It;
00075              
00076          std::vector<MOL_SPTR_VECT> lprods;
00077          lprods = rxn.runReactants(rVect);
00078          for(std::vector<MOL_SPTR_VECT>::const_iterator lpIt=lprods.begin();
00079             lpIt!=lprods.end();++lpIt){
00080             // we know this is a single-product reaction:
00081             prods.push_back((*lpIt)[0]);
00082          }
00083        }     
00084      }
00085      \endverbatim     
00086 
00087   */
00088   class ChemicalReaction {
00089   public:
00090     ChemicalReaction() : df_needsInit(true), df_implicitProperties(false) {};
00091     ChemicalReaction(const ChemicalReaction &other){
00092         df_needsInit=true;
00093         df_implicitProperties=other.df_implicitProperties;
00094         m_reactantTemplates=other.m_reactantTemplates;
00095         m_productTemplates=other.m_productTemplates;
00096     }
00097     //! construct a reaction from a pickle string
00098     ChemicalReaction(const std::string &binStr);
00099 
00100     //! Adds a new reactant template
00101     /*!
00102       \return the number of reactants
00103 
00104     */
00105     unsigned int addReactantTemplate(ROMOL_SPTR mol){
00106       this->df_needsInit = true;
00107       this->m_reactantTemplates.push_back(mol);
00108       return this->m_reactantTemplates.size();
00109     }
00110 
00111     //! Adds a new product template
00112     /*!
00113       \return the number of products
00114 
00115     */
00116     unsigned int addProductTemplate(ROMOL_SPTR mol){
00117       this->m_productTemplates.push_back(mol);
00118       return this->m_productTemplates.size();
00119     }
00120     
00121       
00122     //! Runs the reaction on a set of reactants
00123     /*!
00124      
00125       \param reactants: the reactants to be used. The length of this must be equal to
00126                         this->getNumReactantTemplates()
00127                          
00128       \return a vector of vectors of products. Each subvector will be 
00129               this->getNumProductTemplates() long.          
00130       
00131       We return a vector of vectors of products because each individual template may 
00132       map multiple times onto its reactant. This leads to multiple possible result
00133       sets.
00134     */
00135     std::vector<MOL_SPTR_VECT> runReactants(const MOL_SPTR_VECT reactants) const;
00136 
00137     MOL_SPTR_VECT::const_iterator beginReactantTemplates() const {
00138         return this->m_reactantTemplates.begin();    
00139     }
00140     MOL_SPTR_VECT::const_iterator endReactantTemplates() const {
00141         return this->m_reactantTemplates.end();    
00142     }
00143 
00144     MOL_SPTR_VECT::const_iterator beginProductTemplates() const {
00145         return this->m_productTemplates.begin();    
00146     }
00147     MOL_SPTR_VECT::const_iterator endProductTemplates() const {
00148         return this->m_productTemplates.end();    
00149     }
00150 
00151     MOL_SPTR_VECT::iterator beginReactantTemplates() {
00152         return this->m_reactantTemplates.begin();    
00153     }
00154     MOL_SPTR_VECT::iterator endReactantTemplates() {
00155         return this->m_reactantTemplates.end();    
00156     }
00157 
00158     MOL_SPTR_VECT::iterator beginProductTemplates() {
00159         return this->m_productTemplates.begin();    
00160     }
00161     MOL_SPTR_VECT::iterator endProductTemplates() {
00162         return this->m_productTemplates.end();    
00163     }
00164     unsigned int getNumReactantTemplates() const { return this->m_reactantTemplates.size(); };
00165     unsigned int getNumProductTemplates() const { return this->m_productTemplates.size(); };
00166 
00167     //! initializes our internal reactant-matching datastructures.
00168     /*! 
00169         This must be called after adding reactants and before calling
00170         runReactants.
00171     */
00172     void initReactantMatchers();
00173 
00174     bool isInitialized() const { return !df_needsInit; };
00175     
00176     //! validates the reactants and products to make sure the reaction seems "reasonable"
00177     /*! 
00178         \return   true if the reaction validates without errors (warnings do not stop
00179                   validation)
00180          
00181         \param numWarnings: used to return the number of validation warnings
00182         \param numErrors:   used to return the number of validation errors
00183         
00184         \param silent: If this bool is true, no messages will be logged during the validation. 
00185                        By default, validation problems are reported to the warning and error 
00186                        logs depending on their severity.
00187                        
00188     */
00189     bool validate(unsigned int &numWarnings,unsigned int &numErrors,bool silent=false) const;
00190         
00191 
00192     //! returns whether or not the reaction uses implicit
00193     //! properties on the product atoms
00194     /*!
00195 
00196       This toggles whether or not unspecified atomic properties in the
00197       products are considered to be implicit and should be copied from
00198       the actual reactants. This is necessary due to a semantic difference
00199       between the "reaction SMARTS" approach and the MDL RXN
00200       approach:
00201         In "reaction SMARTS", this reaction:
00202           [C:1]-[Br:2].[O-:3]>>[C:1]-[O:3].[Br-:2]
00203         applied to [CH4+]Br should yield [CH4+]O
00204         Something similar drawn in an rxn file, and applied to
00205         [CH4+]Br should yield [CH3]O. 
00206         In rxn there is no charge on the product C because nothing is
00207         specified in the rxn file; in "SMARTS" the charge from the
00208         actual reactants is not *removed* because no charge is
00209         specified in the reaction.
00210 
00211     */
00212     bool getImplicitPropertiesFlag() const { return df_implicitProperties; };
00213     //! sets the implicit properties flag. See the documentation for
00214     //! getImplicitProertiesFlag() for a discussion of what this means.
00215     void setImplicitPropertiesFlag(bool val) { df_implicitProperties=val; };
00216 
00217   private:
00218     bool df_needsInit;
00219     bool df_implicitProperties;
00220     MOL_SPTR_VECT m_reactantTemplates,m_productTemplates;
00221     ChemicalReaction &operator=(const ChemicalReaction &); // disable assignment
00222     MOL_SPTR_VECT generateOneProductSet(const MOL_SPTR_VECT &reactants,
00223                                         const std::vector<MatchVectType> &reactantsMatch) const;
00224   };
00225 
00226   //! tests whether or not the molecule has a substructure match
00227   //! to any of the reaction's reactants
00228   //! the \c which argument is used to return which of the reactants
00229   //! the molecule matches. If there's no match, it is equal to the number
00230   //! of reactants on return
00231   bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn,const ROMol &mol,
00232                                       unsigned int &which);
00233   //! tests whether or not the molecule has a substructure match
00234   //! to any of the reaction's products
00235   //! the \c which argument is used to return which of the products
00236   //! the molecule matches. If there's no match, it is equal to the number
00237   //! of products on return
00238   bool isMoleculeProductOfReaction(const ChemicalReaction &rxn,const ROMol &mol,
00239                                    unsigned int &which);
00240 
00241   
00242 } // end of RDKit namespace
00243 
00244 namespace RDDepict {
00245   //! \brief Generate 2D coordinates (a depiction) for a reaction
00246   /*! 
00247 
00248     \param rxn the reaction were are interested in
00249 
00250     \param spacing the spacing between components of the reaction
00251 
00252     \param updateProps if set, properties such as conjugation and
00253         hybridization will be calculated for the reactant and product
00254         templates before generating coordinates. This should result in
00255         better depictions, but can lead to errors in some cases.
00256 
00257     \param canonOrient canonicalize the orientation so that the long
00258     axes align with the x-axis etc.
00259 
00260     \param nFlipsPerSample - the number of rotatable bonds that are
00261     flipped at random for each sample
00262 
00263     \param nSamples - the number of samples
00264 
00265     \param sampleSeed - seed for the random sampling process
00266 
00267     \param permuteDeg4Nodes - try permuting the drawing order of bonds around
00268           atoms with four neighbors in order to improve the depiction
00269 
00270     for the other parameters see the documentation for compute2DCoords()
00271 
00272   */
00273   void compute2DCoordsForReaction(RDKit::ChemicalReaction &rxn,
00274                                   double spacing=2.0,
00275                                   bool updateProps=true,
00276                                   bool canonOrient=false,
00277                                   unsigned int nFlipsPerSample=0,
00278                                   unsigned int nSamples=0,
00279                                   int sampleSeed=0,
00280                                   bool permuteDeg4Nodes=false);
00281 
00282 } // end of RDDepict namespace
00283 
00284 #endif