Package rdkit :: Package Chem :: Module AllChem
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.AllChem

  1  # $Id: AllChem.py 1729 2011-05-21 03:46:05Z glandrum $ 
  2  # 
  3  #  Copyright (C) 2006-2011  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Import all RDKit chemistry modules 
 12    
 13  """ 
 14  from rdkit import rdBase 
 15  from rdkit import RDConfig 
 16  from rdkit import DataStructs 
 17  from rdkit.Geometry import rdGeometry 
 18  from rdkit.Chem import * 
 19  from rdkit.Chem.rdPartialCharges import * 
 20  from rdkit.Chem.rdDepictor import * 
 21  from rdkit.Chem.rdForceFieldHelpers import * 
 22  from rdkit.Chem.ChemicalFeatures import * 
 23  from rdkit.Chem.rdDistGeom import * 
 24  from rdkit.Chem.rdMolAlign import * 
 25  from rdkit.Chem.rdMolTransforms import * 
 26  from rdkit.Chem.rdShapeHelpers import * 
 27  from rdkit.Chem.rdChemReactions import * 
 28  from rdkit.Chem.rdSLNParse import * 
 29  from rdkit.Chem.rdMolDescriptors import * 
 30  from rdkit import ForceField 
 31  Mol.Compute2DCoords = Compute2DCoords 
 32  Mol.ComputeGasteigerCharges = ComputeGasteigerCharges 
 33  import numpy 
 34   
35 -def TransformMol(mol,tform,confId=-1,keepConfs=False):
36 """ Applies the transformation (usually a 4x4 double matrix) to a molecule 37 if keepConfs is False then all but that conformer are removed 38 39 """ 40 refConf = mol.GetConformer(confId) 41 TransformConformer(refConf,tform) 42 if not keepConfs: 43 if confId==-1: confId=0 44 allConfIds = [c.GetId() for c in mol.GetConformers()] 45 for id in allConfIds: 46 if not id==confId: mol.RemoveConformer(id) 47 #reset the conf Id to zero since there is only one conformer left 48 mol.GetConformer(confId).SetId(0)
49
50 -def ComputeMolShape(mol,confId=-1,boxDim=(20,20,20),spacing=0.5,**kwargs):
51 """ returns a grid representation of the molecule's shape 52 """ 53 res = rdGeometry.UniformGrid3D(boxDim[0],boxDim[1],boxDim[2],spacing=spacing) 54 EncodeShape(mol,res,confId,**kwargs) 55 return res
56
57 -def ComputeMolVolume(mol,confId=-1,gridSpacing=0.2,boxMargin=2.0):
58 """ Calculates the volume of a particular conformer of a molecule 59 based on a grid-encoding of the molecular shape. 60 61 """ 62 mol = rdchem.Mol(mol.ToBinary()) 63 conf = mol.GetConformer(confId) 64 CanonicalizeConformer(conf) 65 box = ComputeConfBox(conf) 66 sideLen = ( box[1].x-box[0].x + 2*boxMargin, \ 67 box[1].y-box[0].y + 2*boxMargin, \ 68 box[1].z-box[0].z + 2*boxMargin ) 69 shape = rdGeometry.UniformGrid3D(sideLen[0],sideLen[1],sideLen[2], 70 spacing=gridSpacing) 71 EncodeShape(mol,shape,confId,ignoreHs=False,vdwScale=1.0) 72 voxelVol = gridSpacing**3 73 occVect = shape.GetOccupancyVect() 74 voxels = [1 for x in occVect if x==3] 75 vol = voxelVol*len(voxels) 76 return vol
77
78 -def GenerateDepictionMatching2DStructure(mol,reference,confId=-1, 79 referencePattern=None, 80 acceptFailure=False, 81 **kwargs):
82 """ Generates a depiction for a molecule where a piece of the molecule 83 is constrained to have the same coordinates as a reference. 84 85 This is useful for, for example, generating depictions of SAR data 86 sets so that the cores of the molecules are all oriented the same 87 way. 88 89 Arguments: 90 - mol: the molecule to be aligned, this will come back 91 with a single conformer. 92 - reference: a molecule with the reference atoms to align to; 93 this should have a depiction. 94 - confId: (optional) the id of the reference conformation to use 95 - referencePattern: (optional) an optional molecule to be used to 96 generate the atom mapping between the molecule 97 and the reference. 98 - acceptFailure: (optional) if True, standard depictions will be generated 99 for molecules that don't have a substructure match to the 100 reference; if False, a ValueError will be raised 101 102 """ 103 if reference and referencePattern: 104 if not reference.GetNumAtoms(onlyHeavy=True)==referencePattern.GetNumAtoms(onlyHeavy=True): 105 raise ValueError,'When a pattern is provided, it must have the same number of atoms as the reference' 106 referenceMatch = reference.GetSubstructMatch(referencePattern) 107 if not referenceMatch: 108 raise ValueError,"Reference does not map to itself" 109 else: 110 referenceMatch = range(reference.GetNumAtoms(onlyHeavy=True)) 111 if referencePattern: 112 match = mol.GetSubstructMatch(referencePattern) 113 else: 114 match = mol.GetSubstructMatch(reference) 115 116 if not match: 117 if not acceptFailure: 118 raise ValueError,'Substructure match with reference not found.' 119 else: 120 coordMap={} 121 else: 122 conf = reference.GetConformer() 123 coordMap={} 124 for i,idx in enumerate(match): 125 pt3 = conf.GetAtomPosition(referenceMatch[i]) 126 pt2 = rdGeometry.Point2D(pt3.x,pt3.y) 127 coordMap[idx] = pt2 128 Compute2DCoords(mol,clearConfs=True,coordMap=coordMap,canonOrient=False)
129
130 -def GenerateDepictionMatching3DStructure(mol,reference,confId=-1, 131 **kwargs):
132 """ Generates a depiction for a molecule where a piece of the molecule 133 is constrained to have coordinates similar to those of a 3D reference 134 structure. 135 136 Arguments: 137 - mol: the molecule to be aligned, this will come back 138 with a single conformer. 139 - reference: a molecule with the reference atoms to align to; 140 this should have a depiction. 141 - confId: (optional) the id of the reference conformation to use 142 143 """ 144 nAts = mol.GetNumAtoms() 145 dm = [] 146 conf = reference.GetConformer(confId) 147 for i in range(nAts): 148 pi = conf.GetAtomPosition(i) 149 #npi.z=0 150 for j in range(i+1,nAts): 151 pj = conf.GetAtomPosition(j) 152 #pj.z=0 153 dm.append((pi-pj).Length()) 154 dm = numpy.array(dm) 155 Compute2DCoordsMimicDistmat(mol,dm,**kwargs)
156
157 -def GetBestRMS(ref,probe,refConfId=-1,probeConfId=-1,maps=None):
158 """ Returns the optimal RMS for aligning two molecules, taking 159 symmetry into account. As a side-effect, the probe molecule is 160 left in the aligned state. 161 162 Arguments: 163 - ref: the reference molecule 164 - probe: the molecule to be aligned to the reference 165 - refConfId: (optional) reference conformation to use 166 - probeConfId: (optional) probe conformation to use 167 - maps: (optional) a list of lists of (probeAtomId,refAtomId) 168 tuples with the atom-atom mappings of the two molecules. 169 If not provided, these will be generated using a substructure 170 search. 171 172 """ 173 if not maps: 174 query = RemoveHs(probe) 175 matches = ref.GetSubstructMatches(query,uniquify=False) 176 if not matches: 177 raise ValueError,'mol %s does not match mol %s'%(ref.GetProp('_Name'), 178 probe.GetProp('_Name')) 179 maps = [list(enumerate(match)) for match in matches] 180 bestRMS=1000. 181 for amap in maps: 182 rms=AlignMol(probe,ref,probeConfId,refConfId,atomMap=amap) 183 if rms<bestRMS: 184 bestRMS=rms 185 bestMap = amap 186 187 # finally repeate the best alignment : 188 if bestMap != amap: 189 AlignMol(probe,ref,probeConfId,refConfId,atomMap=bestMap) 190 return bestRMS
191
192 -def EnumerateLibraryFromReaction(reaction,sidechainSets) :
193 """ Returns a generator for the virtual library defined by 194 a reaction and a sequence of sidechain sets 195 196 >>> from rdkit import Chem 197 >>> from rdkit.Chem import AllChem 198 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')] 199 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')] 200 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]') 201 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1]) 202 >>> [Chem.MolToSmiles(x[0]) for x in list(r)] 203 ['CNC=O', 'CCNC=O', 'CNC(=O)C', 'CCNC(=O)C'] 204 205 Note that this is all done in a lazy manner, so "infinitely" large libraries can 206 be done without worrying about running out of memory. Your patience will run out first: 207 208 Define a set of 10000 amines: 209 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000)) 210 211 ... a set of 10000 acids 212 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000)) 213 214 ... now the virtual library (1e8 compounds in principle): 215 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines]) 216 217 ... look at the first 4 compounds: 218 >>> [Chem.MolToSmiles(r.next()[0]) for x in range(4)] 219 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O'] 220 221 222 """ 223 if len(sidechainSets) != reaction.GetNumReactantTemplates(): 224 raise ValueError,'%d sidechains provided, %d required'%(len(sidechainSets),reaction.getNumReactantTemplates()) 225 226 def _combiEnumerator(items,depth=0): 227 for item in items[depth]: 228 if depth+1 < len(items): 229 v = _combiEnumerator(items,depth+1) 230 for entry in v: 231 l=[item] 232 l.extend(entry) 233 yield l 234 else: 235 yield [item]
236 for chains in _combiEnumerator(sidechainSets): 237 prodSets = reaction.RunReactants(chains) 238 for prods in prodSets: 239 yield prods 240
241 -def ConstrainedEmbed(mol,core,useTethers=True,coreConfId=-1, 242 randomseed=2342):
243 """ generates an embedding of a molecule where part of the molecule 244 is constrained to have particular coordinates 245 246 Arguments 247 - mol: the molecule to embed 248 - core: the molecule to use as a source of constraints 249 - useTethers: (optional) if True, the final conformation will be 250 optimized subject to a series of extra forces that pull the 251 matching atoms to the positions of the core atoms. Otherwise 252 simple distance constraints based on the core atoms will be 253 used in the optimization. 254 - coreConfId: (optional) id of the core conformation to use 255 - randomSeed: (optional) seed for the random number generator 256 257 258 An example, start by generating a template with a 3D structure: 259 >>> from rdkit.Chem import AllChem 260 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1") 261 >>> AllChem.EmbedMolecule(template) 262 0 263 >>> AllChem.UFFOptimizeMolecule(template) 264 0 265 266 Here's a molecule: 267 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3") 268 269 Now do the constrained embedding 270 >>> newmol=AllChem.ConstrainedEmbed(mol, template) 271 272 Demonstrate that the positions are the same: 273 >>> newp=newmol.GetConformer().GetAtomPosition(0) 274 >>> molp=mol.GetConformer().GetAtomPosition(0) 275 >>> list(newp-molp)==[0.0,0.0,0.0] 276 True 277 >>> newp=newmol.GetConformer().GetAtomPosition(1) 278 >>> molp=mol.GetConformer().GetAtomPosition(1) 279 >>> list(newp-molp)==[0.0,0.0,0.0] 280 True 281 282 """ 283 match = mol.GetSubstructMatch(core) 284 if not match: 285 raise ValueError,"molecule doesn't match the core" 286 coordMap={} 287 coreConf = core.GetConformer(coreConfId) 288 for i,idxI in enumerate(match): 289 corePtI = coreConf.GetAtomPosition(i) 290 coordMap[idxI]=corePtI 291 292 ci = EmbedMolecule(mol,coordMap=coordMap,randomSeed=randomseed) 293 if ci<0: 294 raise ValueError,'Could not embed molecule.' 295 296 algMap=[(j,i) for i,j in enumerate(match)] 297 298 if not useTethers: 299 # clean up the conformation 300 ff = UFFGetMoleculeForceField(mol,confId=0) 301 for i,idxI in enumerate(match): 302 for j in range(i+1,len(match)): 303 idxJ = match[j] 304 d = coordMap[idxI].Distance(coordMap[idxJ]) 305 ff.AddDistanceConstraint(idxI,idxJ,d,d,100.) 306 ff.Initialize() 307 n=4 308 more=ff.Minimize() 309 while more and n: 310 more=ff.Minimize() 311 n-=1 312 # rotate the embedded conformation onto the core: 313 rms =AlignMol(mol,core,atomMap=algMap) 314 else: 315 # rotate the embedded conformation onto the core: 316 rms = AlignMol(mol,core,atomMap=algMap) 317 ff = UFFGetMoleculeForceField(mol,confId=0) 318 conf = core.GetConformer() 319 for i in range(core.GetNumAtoms()): 320 p =conf.GetAtomPosition(i) 321 pIdx=ff.AddExtraPoint(p.x,p.y,p.z,fixed=True)-1 322 ff.AddDistanceConstraint(pIdx,match[i],0,0,100.) 323 ff.Initialize() 324 n=4 325 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3) 326 while more and n: 327 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3) 328 n-=1 329 # realign 330 rms = AlignMol(mol,core,atomMap=algMap) 331 mol.SetProp('EmbedRMS',str(rms)) 332 return mol
333 334 335 #------------------------------------ 336 # 337 # doctest boilerplate 338 #
339 -def _test():
340 import doctest,sys 341 return doctest.testmod(sys.modules["__main__"])
342 343 344 if __name__ == '__main__': 345 import sys 346 failed,tried = _test() 347 sys.exit(failed) 348