EmbeddedFrag.h

Go to the documentation of this file.
00001 //
00002 //  Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
00003 //
00004 //   @@ All Rights Reserved @@
00005 //  This file is part of the RDKit.
00006 //  The contents are covered by the terms of the BSD license
00007 //  which is included in the file license.txt, found at the root
00008 //  of the RDKit source tree.
00009 //
00010 #ifndef _RD_EMBEDDED_FRAG_H_
00011 #define _RD_EMBEDDED_FRAG_H_
00012 
00013 #include <RDGeneral/types.h>
00014 #include <Geometry/Transform2D.h>
00015 #include <Geometry/point.h>
00016 #include "DepictUtils.h" 
00017 #include <boost/smart_ptr.hpp>
00018 
00019 
00020 namespace RDKit {
00021   class ROMol;
00022   class Bond;
00023 }
00024 
00025 namespace RDDepict {
00026   typedef boost::shared_array<double> DOUBLE_SMART_PTR;
00027   
00028   //! Class that contains the data for an atoms that has alredy been embedded
00029   class EmbeddedAtom {
00030   public:
00031     typedef enum {
00032       UNSPECIFIED=0,
00033       CISTRANS,
00034       RING} EAtomType;
00035 
00036     EmbeddedAtom() : aid(0), angle(-1.0), nbr1(-1), nbr2(-1), 
00037                      CisTransNbr(-1), ccw(true), rotDir(0), d_density(-1.0){
00038       neighs.clear();
00039     }
00040 
00041     EmbeddedAtom(unsigned int aid, const RDGeom::Point2D &pos) :
00042       aid(aid), angle(-1.0), nbr1(-1), nbr2(-1), 
00043       CisTransNbr(-1), ccw(true), rotDir(0), d_density(-1.0) {
00044       loc = pos;
00045     }
00046       
00047     EmbeddedAtom& operator=(const EmbeddedAtom& other) {
00048       loc = other.loc;
00049       angle = other.angle;
00050       nbr1 = other.nbr1;
00051       nbr2 = other.nbr2;
00052       CisTransNbr = other.CisTransNbr;
00053       rotDir = other.rotDir;
00054       normal = other.normal;
00055       ccw = other.ccw;
00056       neighs = other.neighs;
00057       d_density = other.d_density;
00058       return *this;
00059     }
00060 
00061     void Transform(const RDGeom::Transform2D &trans) {
00062       RDGeom::Point2D temp = loc + normal;
00063       trans.TransformPoint(loc);
00064       trans.TransformPoint(temp);
00065       normal = temp - loc;
00066     }
00067 
00068     void Reflect(const RDGeom::Point2D &loc1,
00069                  const RDGeom::Point2D &loc2) {
00070       RDGeom::Point2D temp = loc + normal; 
00071       loc = reflectPoint(loc, loc1, loc2);
00072       temp = reflectPoint(temp, loc1, loc2);
00073       normal = temp - loc;
00074       ccw = (!ccw);
00075     }
00076     
00077     unsigned int aid; // the id of the atom
00078 
00079     //! the angle that is already takes at this atom, so any new atom attaching to this
00080     //! atom with have to fall in the available part
00081     double angle; 
00082     
00083     //! the first neighbor of this atom that form the 'angle'
00084     int nbr1;
00085     
00086     //! the second neighbor of atom that from the 'angle'
00087     int nbr2;
00088     
00089     //! is this is a cis/trans atom the neighbor of this atom that is involved in the
00090     //! cis/trans system - defaults to -1
00091     int CisTransNbr;
00092 
00093     //! which direction do we rotate this normal to add the next bond 
00094     //! if ccw is true we rotate counter cloack wise, otherwise rotate clock wise, by an angle that is
00095     //! <= PI/2
00096     bool ccw;
00097 
00098     //! rotation direction around this atom when adding new atoms,
00099     //! we determine this for the first neighbor and stick to this direction after that
00100     //! useful only on atoms that are degree >= 4
00101     int rotDir;
00102 
00103     RDGeom::Point2D loc; // the current location of this atom
00104     //! this is a normal vector to one of the bonds that added this atom
00105     //! it provides the side on which we want to add a new bond to this atom,
00106     //! this is only relevant when we are dealing with non ring atoms. We would like to draw chains in
00107     //! a zig-zag manner
00108     RDGeom::Point2D normal;
00109 
00110     //! and these are the atom IDs of the neighbors that still need to be embedded
00111     RDKit::INT_VECT neighs;
00112 
00113     // density of the atoms around this atoms
00114     // - this is sum of inverse of the square of distances to other atoms from this atom
00115     // Used in the collision removal code - initialized to -1.0
00116     double d_density;
00117   };
00118 
00119 
00120 
00121   typedef std::map<unsigned int, EmbeddedAtom> INT_EATOM_MAP;
00122   typedef INT_EATOM_MAP::iterator INT_EATOM_MAP_I;
00123   typedef INT_EATOM_MAP::const_iterator INT_EATOM_MAP_CI;
00124   
00125 
00126   //! Class containing a fragment of a molecule that has already been embedded
00127   /*
00128     Here is how this class is designed to be used
00129     - find a set of fused rings and compte the coordinates for the atoms in those ring
00130     - them grow this sytem either by adding non ring neighbors
00131     - or by adding other embedded fragment
00132     - so at the end of the process  the whole molecule end up being one these embedded frag objects
00133   */
00134   class EmbeddedFrag {
00135     // REVIEW: think about moving member functions up to global level and just using
00136     // this class as a container
00137 
00138   public:
00139     //! Default constructor
00140     EmbeddedFrag() : d_done(false), dp_mol(0){
00141       d_eatoms.clear();
00142       d_attachPts.clear();
00143     };
00144 
00145     //! Intializer from a single atom id 
00146     /*!
00147       A single Embedded Atom with this atom ID is added and placed at the origin
00148     */
00149     EmbeddedFrag(unsigned int aid, const RDKit::ROMol *mol);
00150 
00151     //! Constructor when the coordinates have been specified for a set of atoms
00152     /*! 
00153        This simply initialized a set of EmbeddedAtom to have the same coordinates as the 
00154        one's specified. No testing is done to verify any kind of ocrrectlness. Also
00155        this fragment is less ready (to expand and add new neighbors) than when using other 
00156        constructors. This is because:
00157        - the user may have specified coords for only a part of the atoms in a fused ring systems
00158          in which case we need to find these atoms and merge these ring systems to this fragment
00159        - The atoms are not yet aware of their neighbor (what is left to add etc.) this again
00160          depends on  atoms properly so that new 
00161          neighbors can be added to them
00162     */
00163     EmbeddedFrag(const RDKit::ROMol *mol, const RDGeom::INT_POINT2D_MAP &coordMap);
00164 
00165     //! Initializer from a set of fused rings
00166     /*!
00167       ARGUMENTS:
00168       \param mol        the molecule of interest
00169       \param fusedRings a vector of rings, each ring is a list of atom ids 
00170     */
00171     EmbeddedFrag(const RDKit::ROMol *mol, const RDKit::VECT_INT_VECT &fusedRings);
00172 
00173     //! Initializer for a cis/trans system using the double bond
00174     /*!
00175       ARGUMENTS:
00176       \param dblBond   the double bond that is involed in the cis/trans configuration
00177     */
00178     explicit EmbeddedFrag(const RDKit::Bond* dblBond);
00179 
00180     //! Expand this embedded system by adding negihboring atoms or other embedded systems
00181     /*!
00182 
00183       Note that both nratms and efrags are modified in this function
00184       as we start merging them with the current fragment
00185 
00186     */
00187     void expandEfrag(RDKit::INT_LIST &nratms, std::list<EmbeddedFrag> &efrags);
00188                      
00189     //! Add a new non-ring atom to this object
00190     /*
00191       ARGUMENTS:
00192       \param aid     ID of the atom to be added
00193       \param toAid   ID of the atom that is already in this object to which this atom is added
00194     */
00195     void addNonRingAtom(unsigned int aid, unsigned int toAid);
00196 
00197     //! Merge this embedded object with another embedded fragment
00198     /*!
00199 
00200       The transformation (rotation + translation required to attached
00201       the passed in object will be computed and applied. The
00202       coordinates of the atoms in this object will remain fixed We
00203       will assume that there are no common atoms between the two
00204       fragments to start with
00205       
00206       ARGUMENTS:
00207       \param  embObj  another EmbeddedFrag object to be merged with this object
00208       \param  toAid   the atom in this embedded fragment to which the new object will be attached
00209       \param  nbrAid  the atom in the other fragment to attach to
00210     */
00211     void mergeNoCommon(EmbeddedFrag &embObj, unsigned int toAid, 
00212                        unsigned int nbrAid);
00213 
00214     //! Merge this embedded object with another embedded fragment
00215     /*!
00216 
00217       The transformation (rotation + translation required to attached
00218       the passed in object will be computed and applied. The
00219       coordinates of the atoms in this object will remain fixed This
00220       already know there are a atoms in common and we will use them to
00221       merge things
00222       
00223       ARGUMENTS:
00224       \param embObj    another EmbeddedFrag object to be merged with this object
00225       \param commAtms  a vector of ids of the common atoms
00226 
00227     */
00228     void mergeWithCommon(EmbeddedFrag &embObj, RDKit::INT_VECT &commAtms);
00229 
00230     void mergeFragsWithComm(std::list<EmbeddedFrag> &efrags);
00231 
00232     //! Mark this fragment to be done for final embedding
00233     void markDone() {
00234       d_done = true;
00235     }
00236 
00237     //! If this fragment done for the final embedding
00238     bool isDone() {
00239       return d_done;
00240     }
00241 
00242     //! Get the molecule that this embedded fragmetn blongs to
00243     const RDKit::ROMol *getMol() const { return dp_mol;}
00244 
00245     //! Find the common atom ids between this fragment and a second one
00246     RDKit::INT_VECT findCommonAtoms(const EmbeddedFrag &efrag2);
00247 
00248     //! Find a neighbor to a non-ring atom among the already embedded atoms
00249     /*!
00250       ARGUMENTS:
00251       \param aid  the atom id of interest
00252      
00253       RETURNS:
00254       \return the id of the atom if we found a neighbor
00255                    -1 otherwise
00256                    
00257       NOTE: by definition we can have only one neighbor in the embdded system. 
00258     */
00259     int findNeighbor(unsigned int aid);
00260 
00261     //! Tranform this object to a new coordinates system
00262     /*!
00263       ARGUMENTS:
00264       \param trans : the transformation that need to be applied to the atoms in this object
00265     */
00266     void Transform(const RDGeom::Transform2D &trans);
00267 
00268     void Reflect(const RDGeom::Point2D &loc1,
00269                  const RDGeom::Point2D &loc2);
00270 
00271     const INT_EATOM_MAP &GetEmbeddedAtoms() const {
00272       return d_eatoms;
00273     }
00274 
00275     void Translate(const RDGeom::Point2D &shift) {
00276       INT_EATOM_MAP_I eari;
00277       for (eari = d_eatoms.begin(); eari != d_eatoms.end(); eari++) {
00278         eari->second.loc += shift;
00279       }
00280     }
00281 
00282     EmbeddedAtom GetEmbeddedAtom(unsigned int aid) const {
00283       INT_EATOM_MAP_CI posi = d_eatoms.find(aid);
00284       if (posi == d_eatoms.end()) {
00285         PRECONDITION(0, "Embedded atom does not contain embedded atom specified");
00286       }
00287       return posi->second;
00288     }
00289 
00290     //! the number of atoms in the embedded system
00291     int Size() const {
00292       return d_eatoms.size();
00293     }
00294     
00295     //! \brief compute a box that encloses the fragment
00296     void computeBox();
00297 
00298     //! \brief Flip atoms on one side of a bond - used in removing colissions
00299     /*!
00300       ARGUMENTS:
00301       \param bondId - the bond used as the mirror to flip
00302     */
00303     void flipAboutBond(unsigned int bondId);
00304 
00305     void openAngles(const double *dmat, unsigned int aid1, unsigned int aid2);
00306 
00307     std::vector<PAIR_I_I> findCollisions(const double *dmat, bool includeBonds=1);
00308 
00309     void computeDistMat(DOUBLE_SMART_PTR &dmat);
00310 
00311     double mimicDistMatAndDensityCostFunc(const DOUBLE_SMART_PTR *dmat, 
00312                                           double mimicDmatWt);
00313 
00314     void permuteBonds(unsigned int aid, unsigned int aid1, unsigned int aid2);
00315 
00316     void randomSampleFlipsAndPermutations(unsigned int nBondsPerSample=3,
00317                                           unsigned int nSamples=100, int seed=100,
00318                                           const DOUBLE_SMART_PTR *dmat=0, 
00319                                           double mimicDmatWt=0.0,
00320                                           bool permuteDeg4Nodes=false);
00321 
00322     //! Remove collisions in a structure by flipping rotable bonds 
00323     //! along the shortest path between two colliding atoms
00324     void removeCollisionsBondFlip();
00325 
00326     //! Remove collision by opening angles at the offending atoms
00327     void removeCollisionsOpenAngles();
00328 
00329     //! Remove collisions by shortening bonds along the shortest path between the atoms
00330     void removeCollisionsShortenBonds();
00331 
00332     //! helpers funtions to 
00333     
00334 
00335     //! \brief make list of neighbors for each atom in the embedded system that
00336     //!  still need to be embedded
00337     void setupNewNeighs(); 
00338 
00339     //! update the  unembedded neighbor atom list for a specified atom
00340     void updateNewNeighs(unsigned int aid); 
00341 
00342     //! \brief Find all atoms in this embedded system that are
00343     //!  within a specified distant of a point
00344     int findNumNeigh(const RDGeom::Point2D &pt, double radius);
00345 
00346 
00347     inline double getBoxPx() {return d_px;}
00348     inline double getBoxNx() {return d_nx;}
00349     inline double getBoxPy() {return d_py;}
00350     inline double getBoxNy() {return d_ny;}
00351 
00352     void canonicalizeOrientation();
00353 
00354     
00355   private:
00356 
00357     double totalDensity();
00358 
00359     void embedFusedRings(const RDKit::VECT_INT_VECT &fusedRings);
00360 
00361     //! \brief Find a transform to join a ring to the current embedded frag when we
00362     //! have only on common atom
00363     /*!
00364       So this is the state of affairs assumed here:
00365       - we already have some rings in the fused system embeded and the
00366         coordinates for the atoms
00367       - the coordinates for the atoms in the new ring (with the center
00368         of rings at the origin) are available nringCors. we want to
00369         translate and rotate this ring to join with the already
00370         embeded rings.
00371       - only one atom is common between this new ring and the atoms
00372         that are already embedded
00373       - so we need to compute a transform that includes a translation
00374         so that the common atom overlaps and the rotation to minimize
00375         overalp with other atoms.
00376         
00377       Here's what is done:
00378       - we bisect the remaining sweep angle at the common atom and
00379         attach the new ring such that the center of the new ring falls
00380         on this bisecting line
00381         
00382       NOTE: It is assumed here that the original coordinates for the
00383       new ring are such that the center is at the origin (this is the
00384       way rings come out of embedRing)
00385     */
00386     RDGeom::Transform2D computeOneAtomTrans(unsigned int commAid,
00387                                             const EmbeddedFrag &other);
00388     
00389 
00390     RDGeom::Transform2D computeTwoAtomTrans(unsigned int aid1, unsigned int aid2, 
00391                                             const RDGeom::INT_POINT2D_MAP &nringCor);
00392         
00393     //! Merge a ring with already embedded atoms
00394     /*!
00395       It is assumed that the new rings has already been oriented
00396       correctly etc.  This function just update all the relevant data,
00397       like the neighbor information and the sweep angle
00398     */
00399     void mergeRing(const EmbeddedFrag &embRing, unsigned int nCommon, 
00400                    const RDKit::INT_VECT &pinAtoms);
00401 
00402     //! Reflect a fragment if necessary through a line connecting two atoms
00403     /*!
00404 
00405       We want add the new fragment such that, most of its atoms fall
00406       on the side opoiste to where the atoms already embedded are aid1
00407       and aid2 give the atoms that were used to align the new ring to
00408       the embedded atoms and we will assume that that process has
00409       already taken place (i.e. transformRing has been called)
00410 
00411     */
00412     void reflectIfNecessaryDensity(EmbeddedFrag &embFrag, unsigned int aid1, 
00413                                    unsigned int aid2);
00414 
00415     //! Reflect a fragment if necessary based on the cis/trans specification
00416     /*!
00417 
00418       we want to add the new fragment such that the cis/trans
00419       specification on bond between aid1 and aid2 is not violated. We
00420       will assume that aid1 and aid2 from this fragments as well as
00421       embFrag are already aligned to each other.
00422       
00423       \param embFrag   the fragment that will be reflected if necessary
00424       \param ctCase    which fragment if the cis/trans dbl bond
00425                         - 1 means embFrag is the cis/trans fragment
00426                         - 2 mean "this" is the cis/trans fragment
00427       \param aid1      first atom that forms the plane (line) of reflection 
00428       \param aid2      seconf atom that forms the plane of reflection
00429     */
00430     void reflectIfNecessaryCisTrans(EmbeddedFrag &embFrag, unsigned int ctCase, 
00431                                     unsigned int aid1, unsigned int aid2);
00432 
00433     //! Reflect a fragment if necessary based on a thrid common point
00434     /*!
00435 
00436       we want add the new fragment such that the thrid point falls on
00437       the same side of aid1 and aid2. We will assume that aid1 and
00438       aid2 from this fragments as well as embFrag are already aligned
00439       to each other.
00440 
00441     */
00442     void reflectIfNecessaryThirdPt(EmbeddedFrag &embFrag, unsigned int aid1, unsigned int aid2, 
00443                                    unsigned int aid3);
00444 
00445     //! \brief Initialize this fragment from a ring and coordinates for its atoms
00446     /*!
00447       ARGUMENTS:
00448       /param ring     a vector of atom ids in the ring; it is assumed that there in 
00449                       clockwise or anti-clockwise order
00450       /param nringMap a map of atomId to coordinate map for the atoms in the ring
00451     */
00452     void initFromRingCoords(const RDKit::INT_VECT &ring, 
00453                             const RDGeom::INT_POINT2D_MAP &nringMap);
00454 
00455     //! Helper function to addNonRingAtom to a specified atoms in the fragment
00456     /*
00457       Add an atom to this embedded fragment when the fragment already
00458       has a atleast two previously added neighbors to 'toAid'. In this
00459       case we have to choose where the the new neighbor goes based on
00460       the angle that is already taken around the atom.
00461       
00462       ARGUMENTS:
00463       \param aid    ID of the atom to be added
00464       \param toAid  ID of the atom that is already in this object to which this atom is added
00465     */
00466     void addAtomToAtomWithAng(unsigned int aid, unsigned int toAid);
00467 
00468 
00469     //! Helper function to addNonRingAtom to a specified atoms in the fragment
00470     /*!
00471 
00472       Add an atom (aid) to an atom (toAid) in this embedded fragment
00473       when 'toAid' has one or no neighbors previously added. In this
00474       case where the new atom should fall is determined by the degree
00475       of 'toAid' and the congestion around it.
00476       
00477       ARGUMENTS:
00478       \param  aid     ID of the atom to be added
00479       \param  toAid   ID of the atom that is already in this object to which this atom is added
00480       \param  mol     the molecule we are dealing with
00481     */
00482     void addAtomToAtomWithNoAng(unsigned int aid, 
00483                                 unsigned int toAid); //, const RDKit::ROMol *mol);
00484 
00485     //! Helper funtion to contructor that takes predefined coordinates
00486     /*! 
00487 
00488       Given an atom with more than 2 neighbors all embedded in this
00489       fragment this function tries to determine
00490 
00491       - how much of an angle if left for any new neighbors yet to be
00492         added
00493       - which atom should we rotate when we add a new neighbor and in
00494         which direction (clockwise or anticlockwise
00495         
00496       This is how it works 
00497       - find the pair of nbrs that have the largest angle
00498       - this will most likely be the angle that is available - unless
00499         we have fused rings and we found on of the ring angle !!!! -
00500         in this cae we find the next best
00501       - find the smallest anngle that contains one of these nbrs -
00502         this determined which
00503       - way we want to rotate
00504      
00505       ARGUMENTS:
00506       \param aid        the atom id where we are centered right now
00507       \param doneNbrs   list of neighbors that are already embedded around aid
00508     */
00509     void computeNbrsAndAng(unsigned int aid, const RDKit::INT_VECT &doneNbrs);
00510     //const RDKit::ROMol *mol);
00511 
00512     //! are we embedded with the final (molecule) coordinates
00513     bool d_done;
00514     double d_px, d_nx, d_py, d_ny;
00515 
00516     //! a map that takes one from teh atom id to the embeddedatom object for that atom.
00517     INT_EATOM_MAP d_eatoms;
00518 
00519     //RDKit::INT_DEQUE d_attachPts;
00520     RDKit::INT_LIST d_attachPts;
00521 
00522     // pointer to the owning molecule
00523     const RDKit::ROMol *dp_mol;
00524 
00525   };
00526 
00527   
00528 }
00529 
00530 #endif