1
2
3
4
5
6
7
8
9
10
11 """ Import all RDKit chemistry modules
12
13 """
14 from rdkit import rdBase
15 from rdkit import RDConfig
16 from rdkit import DataStructs
17 from rdkit.Geometry import rdGeometry
18 from rdkit.Chem import *
19 from rdkit.Chem.rdPartialCharges import *
20 from rdkit.Chem.rdDepictor import *
21 from rdkit.Chem.rdForceFieldHelpers import *
22 from rdkit.Chem.ChemicalFeatures import *
23 from rdkit.Chem.rdDistGeom import *
24 from rdkit.Chem.rdMolAlign import *
25 from rdkit.Chem.rdMolTransforms import *
26 from rdkit.Chem.rdShapeHelpers import *
27 from rdkit.Chem.rdChemReactions import *
28 from rdkit.Chem.rdSLNParse import *
29 from rdkit.Chem.rdMolDescriptors import *
30 from rdkit import ForceField
31 Mol.Compute2DCoords = Compute2DCoords
32 Mol.ComputeGasteigerCharges = ComputeGasteigerCharges
33 import numpy
34
49
51 """ returns a grid representation of the molecule's shape
52 """
53 res = rdGeometry.UniformGrid3D(boxDim[0],boxDim[1],boxDim[2],spacing=spacing)
54 EncodeShape(mol,res,confId,**kwargs)
55 return res
56
77
82 """ Generates a depiction for a molecule where a piece of the molecule
83 is constrained to have the same coordinates as a reference.
84
85 This is useful for, for example, generating depictions of SAR data
86 sets so that the cores of the molecules are all oriented the same
87 way.
88
89 Arguments:
90 - mol: the molecule to be aligned, this will come back
91 with a single conformer.
92 - reference: a molecule with the reference atoms to align to;
93 this should have a depiction.
94 - confId: (optional) the id of the reference conformation to use
95 - referencePattern: (optional) an optional molecule to be used to
96 generate the atom mapping between the molecule
97 and the reference.
98 - acceptFailure: (optional) if True, standard depictions will be generated
99 for molecules that don't have a substructure match to the
100 reference; if False, a ValueError will be raised
101
102 """
103 if reference and referencePattern:
104 if not reference.GetNumAtoms(onlyHeavy=True)==referencePattern.GetNumAtoms(onlyHeavy=True):
105 raise ValueError,'When a pattern is provided, it must have the same number of atoms as the reference'
106 referenceMatch = reference.GetSubstructMatch(referencePattern)
107 if not referenceMatch:
108 raise ValueError,"Reference does not map to itself"
109 else:
110 referenceMatch = range(reference.GetNumAtoms(onlyHeavy=True))
111 if referencePattern:
112 match = mol.GetSubstructMatch(referencePattern)
113 else:
114 match = mol.GetSubstructMatch(reference)
115
116 if not match:
117 if not acceptFailure:
118 raise ValueError,'Substructure match with reference not found.'
119 else:
120 coordMap={}
121 else:
122 conf = reference.GetConformer()
123 coordMap={}
124 for i,idx in enumerate(match):
125 pt3 = conf.GetAtomPosition(referenceMatch[i])
126 pt2 = rdGeometry.Point2D(pt3.x,pt3.y)
127 coordMap[idx] = pt2
128 Compute2DCoords(mol,clearConfs=True,coordMap=coordMap,canonOrient=False)
129
132 """ Generates a depiction for a molecule where a piece of the molecule
133 is constrained to have coordinates similar to those of a 3D reference
134 structure.
135
136 Arguments:
137 - mol: the molecule to be aligned, this will come back
138 with a single conformer.
139 - reference: a molecule with the reference atoms to align to;
140 this should have a depiction.
141 - confId: (optional) the id of the reference conformation to use
142
143 """
144 nAts = mol.GetNumAtoms()
145 dm = []
146 conf = reference.GetConformer(confId)
147 for i in range(nAts):
148 pi = conf.GetAtomPosition(i)
149
150 for j in range(i+1,nAts):
151 pj = conf.GetAtomPosition(j)
152
153 dm.append((pi-pj).Length())
154 dm = numpy.array(dm)
155 Compute2DCoordsMimicDistmat(mol,dm,**kwargs)
156
157 -def GetBestRMS(ref,probe,refConfId=-1,probeConfId=-1,maps=None):
158 """ Returns the optimal RMS for aligning two molecules, taking
159 symmetry into account. As a side-effect, the probe molecule is
160 left in the aligned state.
161
162 Arguments:
163 - ref: the reference molecule
164 - probe: the molecule to be aligned to the reference
165 - refConfId: (optional) reference conformation to use
166 - probeConfId: (optional) probe conformation to use
167 - maps: (optional) a list of lists of (probeAtomId,refAtomId)
168 tuples with the atom-atom mappings of the two molecules.
169 If not provided, these will be generated using a substructure
170 search.
171
172 """
173 if not maps:
174 query = RemoveHs(probe)
175 matches = ref.GetSubstructMatches(query,uniquify=False)
176 if not matches:
177 raise ValueError,'mol %s does not match mol %s'%(ref.GetProp('_Name'),
178 probe.GetProp('_Name'))
179 maps = [list(enumerate(match)) for match in matches]
180 bestRMS=1000.
181 for amap in maps:
182 rms=AlignMol(probe,ref,probeConfId,refConfId,atomMap=amap)
183 if rms<bestRMS:
184 bestRMS=rms
185 bestMap = amap
186
187
188 if bestMap != amap:
189 AlignMol(probe,ref,probeConfId,refConfId,atomMap=bestMap)
190 return bestRMS
191
193 """ Returns a generator for the virtual library defined by
194 a reaction and a sequence of sidechain sets
195
196 >>> from rdkit import Chem
197 >>> from rdkit.Chem import AllChem
198 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')]
199 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')]
200 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]')
201 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1])
202 >>> [Chem.MolToSmiles(x[0]) for x in list(r)]
203 ['CNC=O', 'CCNC=O', 'CNC(=O)C', 'CCNC(=O)C']
204
205 Note that this is all done in a lazy manner, so "infinitely" large libraries can
206 be done without worrying about running out of memory. Your patience will run out first:
207
208 Define a set of 10000 amines:
209 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000))
210
211 ... a set of 10000 acids
212 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000))
213
214 ... now the virtual library (1e8 compounds in principle):
215 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines])
216
217 ... look at the first 4 compounds:
218 >>> [Chem.MolToSmiles(r.next()[0]) for x in range(4)]
219 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O']
220
221
222 """
223 if len(sidechainSets) != reaction.GetNumReactantTemplates():
224 raise ValueError,'%d sidechains provided, %d required'%(len(sidechainSets),reaction.getNumReactantTemplates())
225
226 def _combiEnumerator(items,depth=0):
227 for item in items[depth]:
228 if depth+1 < len(items):
229 v = _combiEnumerator(items,depth+1)
230 for entry in v:
231 l=[item]
232 l.extend(entry)
233 yield l
234 else:
235 yield [item]
236 for chains in _combiEnumerator(sidechainSets):
237 prodSets = reaction.RunReactants(chains)
238 for prods in prodSets:
239 yield prods
240
241 -def ConstrainedEmbed(mol,core,useTethers=True,coreConfId=-1,
242 randomseed=2342):
243 """ generates an embedding of a molecule where part of the molecule
244 is constrained to have particular coordinates
245
246 Arguments
247 - mol: the molecule to embed
248 - core: the molecule to use as a source of constraints
249 - useTethers: (optional) if True, the final conformation will be
250 optimized subject to a series of extra forces that pull the
251 matching atoms to the positions of the core atoms. Otherwise
252 simple distance constraints based on the core atoms will be
253 used in the optimization.
254 - coreConfId: (optional) id of the core conformation to use
255 - randomSeed: (optional) seed for the random number generator
256
257
258 An example, start by generating a template with a 3D structure:
259 >>> from rdkit.Chem import AllChem
260 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1")
261 >>> AllChem.EmbedMolecule(template)
262 0
263 >>> AllChem.UFFOptimizeMolecule(template)
264 0
265
266 Here's a molecule:
267 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3")
268
269 Now do the constrained embedding
270 >>> newmol=AllChem.ConstrainedEmbed(mol, template)
271
272 Demonstrate that the positions are the same:
273 >>> newp=newmol.GetConformer().GetAtomPosition(0)
274 >>> molp=mol.GetConformer().GetAtomPosition(0)
275 >>> list(newp-molp)==[0.0,0.0,0.0]
276 True
277 >>> newp=newmol.GetConformer().GetAtomPosition(1)
278 >>> molp=mol.GetConformer().GetAtomPosition(1)
279 >>> list(newp-molp)==[0.0,0.0,0.0]
280 True
281
282 """
283 match = mol.GetSubstructMatch(core)
284 if not match:
285 raise ValueError,"molecule doesn't match the core"
286 coordMap={}
287 coreConf = core.GetConformer(coreConfId)
288 for i,idxI in enumerate(match):
289 corePtI = coreConf.GetAtomPosition(i)
290 coordMap[idxI]=corePtI
291
292 ci = EmbedMolecule(mol,coordMap=coordMap,randomSeed=randomseed)
293 if ci<0:
294 raise ValueError,'Could not embed molecule.'
295
296 algMap=[(j,i) for i,j in enumerate(match)]
297
298 if not useTethers:
299
300 ff = UFFGetMoleculeForceField(mol,confId=0)
301 for i,idxI in enumerate(match):
302 for j in range(i+1,len(match)):
303 idxJ = match[j]
304 d = coordMap[idxI].Distance(coordMap[idxJ])
305 ff.AddDistanceConstraint(idxI,idxJ,d,d,100.)
306 ff.Initialize()
307 n=4
308 more=ff.Minimize()
309 while more and n:
310 more=ff.Minimize()
311 n-=1
312
313 rms =AlignMol(mol,core,atomMap=algMap)
314 else:
315
316 rms = AlignMol(mol,core,atomMap=algMap)
317 ff = UFFGetMoleculeForceField(mol,confId=0)
318 conf = core.GetConformer()
319 for i in range(core.GetNumAtoms()):
320 p =conf.GetAtomPosition(i)
321 pIdx=ff.AddExtraPoint(p.x,p.y,p.z,fixed=True)-1
322 ff.AddDistanceConstraint(pIdx,match[i],0,0,100.)
323 ff.Initialize()
324 n=4
325 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3)
326 while more and n:
327 more=ff.Minimize(energyTol=1e-4,forceTol=1e-3)
328 n-=1
329
330 rms = AlignMol(mol,core,atomMap=algMap)
331 mol.SetProp('EmbedRMS',str(rms))
332 return mol
333
334
335
336
337
338
340 import doctest,sys
341 return doctest.testmod(sys.modules["__main__"])
342
343
344 if __name__ == '__main__':
345 import sys
346 failed,tried = _test()
347 sys.exit(failed)
348