1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008)
35
36 """
37 from rdkit import Chem
38 from rdkit.Chem import rdChemReactions as Reactions
39 import sys,re,random
40
41
42 environs = {
43 'L1':'[C;D3]([#0,#6,#7,#8])(=O)',
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 'L3':'[O;D2]-;!@[#0,#6,#1]',
64 'L4':'[C;!D1;!$(C=*)]-;!@[#6]',
65
66 'L5':'[N;!D1;!$(N=*);!$(N-[!#6;!#16;!#0;!#1]);!$([N;R]@[C;R]=O)]',
67 'L6':'[C;D3;!R](=O)-;!@[#0,#6,#7,#8]',
68 'L7a':'[C;D2,D3]-[#6]',
69 'L7b':'[C;D2,D3]-[#6]',
70 '#L8':'[C;!R;!D1]-;!@[#6]',
71 'L8':'[C;!R;!D1;!$(C!-*)]',
72 'L9':'[n;+0;$(n(:[c,n,o,s]):[c,n,o,s])]',
73 'L10':'[N;R;$(N(@C(=O))@[C,N,O,S])]',
74 'L11':'[S;D2](-;!@[#0,#6])',
75 'L12':'[S;D4]([#6,#0])(=O)(=O)',
76 'L13':'[C;$(C(-;@[C,N,O,S])-;@[N,O,S])]',
77 'L14':'[c;$(c(:[c,n,o,s]):[n,o,s])]',
78 'L14b':'[c;$(c(:[c,n,o,s]):[n,o,s])]',
79 'L15':'[C;$(C(-;@C)-;@C)]',
80 'L16':'[c;$(c(:c):c)]',
81 'L16b':'[c;$(c(:c):c)]',
82 }
83 reactionDefs = (
84
85 [
86 ('1','3','-'),
87 ('1','5','-'),
88 ('1','10','-'),
89 ],
90
91
92 [
93 ('3','4','-'),
94 ('3','13','-'),
95 ('3','14','-'),
96 ('3','15','-'),
97 ('3','16','-'),
98 ],
99
100
101 [
102 ('4','5','-'),
103 ('4','11','-'),
104 ],
105
106
107 [
108 ('5','12','-'),
109 ('5','14','-'),
110 ('5','16','-'),
111 ('5','13','-'),
112 ('5','15','-'),
113 ],
114
115
116 [
117 ('6','13','-'),
118 ('6','14','-'),
119 ('6','15','-'),
120 ('6','16','-'),
121 ],
122
123
124 [
125 ('7a','7b','='),
126 ],
127
128
129 [
130 ('8','9','-'),
131 ('8','10','-'),
132 ('8','13','-'),
133 ('8','14','-'),
134 ('8','15','-'),
135 ('8','16','-'),
136 ],
137
138
139 [
140 ('9','13','-'),
141 ('9','14','-'),
142 ('9','15','-'),
143 ('9','16','-'),
144 ],
145
146
147 [
148 ('10','13','-'),
149 ('10','14','-'),
150 ('10','15','-'),
151 ('10','16','-'),
152 ],
153
154
155 [
156 ('11','13','-'),
157 ('11','14','-'),
158 ('11','15','-'),
159 ('11','16','-'),
160 ],
161
162
163
164
165
166 [
167 ('13','14','-'),
168 ('13','15','-'),
169 ('13','16','-'),
170 ],
171
172
173 [
174 ('14','14','-'),
175 ('14','15','-'),
176 ('14','16','-'),
177 ],
178
179
180 [
181 ('15','16','-'),
182 ],
183
184
185 [
186 ('16','16','-'),
187 ],
188 )
189 import copy
190 smartsGps=copy.deepcopy(reactionDefs)
191 for gp in smartsGps:
192 for j,defn in enumerate(gp):
193 g1,g2,bnd = defn
194 r1=environs['L'+g1]
195 r2=environs['L'+g2]
196 g1 = re.sub('[a-z,A-Z]','',g1)
197 g2 = re.sub('[a-z,A-Z]','',g2)
198 sma='[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]'%(r1,bnd,r2,g1,g2)
199 gp[j] =sma
200
201 for gp in smartsGps:
202 for defn in gp:
203 try:
204 t=Reactions.ReactionFromSmarts(defn)
205 t.Initialize()
206 except:
207 print defn
208 raise
209
210 environMatchers={}
211 for env,sma in environs.iteritems():
212 environMatchers[env]=Chem.MolFromSmarts(sma)
213
214 bondMatchers=[]
215 for i,compats in enumerate(reactionDefs):
216 tmp=[]
217 for i1,i2,bType in compats:
218 e1 = environs['L%s'%i1]
219 e2 = environs['L%s'%i2]
220 patt = '[$(%s)]%s;!@[$(%s)]'%(e1,bType,e2)
221 patt = Chem.MolFromSmarts(patt)
222 tmp.append((i1,i2,bType,patt))
223 bondMatchers.append(tmp)
224
225 reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps])
226 reverseReactions = []
227 for i,rxnSet in enumerate(smartsGps):
228 for j,sma in enumerate(rxnSet):
229 rs,ps = sma.split('>>')
230 sma = '%s>>%s'%(ps,rs)
231 rxn = Reactions.ReactionFromSmarts(sma)
232 labels = re.findall(r'\[([0-9]+?)\*\]',ps)
233 rxn._matchers=[Chem.MolFromSmiles('[%s*]'%x) for x in labels]
234 reverseReactions.append(rxn)
235
237 """ returns the bonds in a molecule that BRICS would cleave
238
239 >>> from rdkit import Chem
240 >>> m = Chem.MolFromSmiles('CCCOCC')
241 >>> res = list(FindBRICSBonds(m))
242 >>> res
243 [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))]
244
245 a more complicated case:
246 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
247 >>> res = list(FindBRICSBonds(m))
248 >>> res
249 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]
250
251 we can also randomize the order of the results:
252 >>> random.seed(23)
253 >>> res = list(FindBRICSBonds(m,randomizeOrder=True))
254 >>> sorted(res)
255 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]
256
257 Note that this is a generator function :
258 >>> res = FindBRICSBonds(m)
259 >>> res
260 <generator object ...>
261 >>> res.next()
262 ((3, 2), ('3', '4'))
263
264 >>> m = Chem.MolFromSmiles('CC=CC')
265 >>> res = list(FindBRICSBonds(m))
266 >>> sorted(res)
267 [((1, 2), ('7', '7'))]
268
269 make sure we don't match ring bonds:
270 >>> m = Chem.MolFromSmiles('O=C1NCCC1')
271 >>> list(FindBRICSBonds(m))
272 []
273
274 another nice one, make sure environment 8 doesn't match something connected
275 to a ring atom:
276 >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1')
277 >>> list(FindBRICSBonds(m))
278 []
279
280 """
281 letter = re.compile('[a-z,A-Z]')
282 indices = range(len(bondMatchers))
283 bondsDone=set()
284 if randomizeOrder: random.shuffle(indices)
285
286 envMatches={}
287 for env,patt in environMatchers.iteritems():
288 envMatches[env]=mol.HasSubstructMatch(patt)
289 for gpIdx in indices:
290 if randomizeOrder:
291 compats =bondMatchers[gpIdx][:]
292 random.shuffle(compats)
293 else:
294 compats = bondMatchers[gpIdx]
295 for i1,i2,bType,patt in compats:
296 if not envMatches['L'+i1] or not envMatches['L'+i2]: continue
297 matches = mol.GetSubstructMatches(patt)
298 i1 = letter.sub('',i1)
299 i2 = letter.sub('',i2)
300 for match in matches:
301 if match not in bondsDone and (match[1],match[0]) not in bondsDone:
302 bondsDone.add(match)
303 yield(((match[0],match[1]),(i1,i2)))
304
306 """ breaks the BRICS bonds in a molecule and returns the results
307
308 >>> from rdkit import Chem
309 >>> m = Chem.MolFromSmiles('CCCOCC')
310 >>> m2=BreakBRICSBonds(m)
311 >>> Chem.MolToSmiles(m2,True)
312 '[4*]CC.[4*]CCC.[3*]O[3*]'
313
314 a more complicated case:
315 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
316 >>> m2=BreakBRICSBonds(m)
317 >>> Chem.MolToSmiles(m2,True)
318 '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[6*]C(=O)CC[4*]'
319
320 can also specify a limited set of bonds to work with:
321 >>> m = Chem.MolFromSmiles('CCCOCC')
322 >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))])
323 >>> Chem.MolToSmiles(m2,True)
324 '[3*]OCC.[4*]CCC'
325
326 this can be used as an alternate approach for doing a BRICS decomposition by
327 following BreakBRICSBonds with a call to Chem.GetMolFrags:
328 >>> m = Chem.MolFromSmiles('CCCOCC')
329 >>> m2=BreakBRICSBonds(m)
330 >>> frags = Chem.GetMolFrags(m2,asMols=True)
331 >>> [Chem.MolToSmiles(x,True) for x in frags]
332 ['[4*]CCC', '[3*]O[3*]', '[4*]CC']
333
334 """
335 if not bonds:
336 bonds = FindBRICSBonds(mol)
337 if not bonds:
338 return Chem.Mol(mol.ToBinary())
339 eMol = Chem.EditableMol(mol)
340 nAts = mol.GetNumAtoms()
341
342 dummyPositions=[]
343 for indices,dummyTypes in bonds:
344 ia,ib = indices
345 obond = mol.GetBondBetweenAtoms(ia,ib)
346 bondType=obond.GetBondType()
347 eMol.RemoveBond(ia,ib)
348
349 da,db = dummyTypes
350 atoma = Chem.Atom(0)
351 atoma.SetMass(int(da))
352 atoma.SetNoImplicit(True)
353 idxa = nAts
354 nAts+=1
355 eMol.AddAtom(atoma)
356 eMol.AddBond(ia,idxa,bondType)
357
358 atomb = Chem.Atom(0)
359 atomb.SetMass(int(db))
360 atomb.SetNoImplicit(True)
361 idxb = nAts
362 nAts+=1
363 eMol.AddAtom(atomb)
364 eMol.AddBond(ib,idxb,bondType)
365 if mol.GetNumConformers():
366 dummyPositions.append((idxa,ib))
367 dummyPositions.append((idxb,ia))
368 res = eMol.GetMol()
369 if sanitize:
370 Chem.SanitizeMol(res)
371 if mol.GetNumConformers():
372 for conf in mol.GetConformers():
373 resConf = res.GetConformer(conf.GetId())
374 for ia,pa in dummyPositions:
375 resConf.SetAtomPosition(ia,conf.GetAtomPosition(pa))
376 return res
377
378 -def BRICSDecompose(mol,allNodes=None,minFragmentSize=1,onlyUseReactions=None,
379 silent=True,keepNonLeafNodes=False,singlePass=False,returnMols=False):
380 """ returns the BRICS decomposition for a molecule
381
382 >>> from rdkit import Chem
383 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
384 >>> res = list(BRICSDecompose(m))
385 >>> sorted(res)
386 ['[14*]c1ncccc1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[8*]C[4*]']
387
388 >>> res = BRICSDecompose(m,returnMols=True)
389 >>> res[0]
390 <rdkit.Chem.rdchem.Mol object ...>
391 >>> smis = [Chem.MolToSmiles(x,True) for x in res]
392 >>> sorted(smis)
393 ['[14*]c1ncccc1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[8*]C[4*]']
394
395 nexavar, an example from the paper (corrected):
396 >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
397 >>> res = list(BRICSDecompose(m))
398 >>> sorted(res)
399 ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1nccc([16*])c1', '[16*]c1cc([16*])c(Cl)cc1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F']
400
401 it's also possible to keep pieces that haven't been fully decomposed:
402 >>> m = Chem.MolFromSmiles('CCCOCC')
403 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
404 >>> sorted(res)
405 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC']
406
407 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
408 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
409 >>> sorted(res)
410 ['CCCOCc1cc(-c2ncccc2)ccc1', '[14*]c1ncccc1', '[16*]c1cc(-c2ncccc2)ccc1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc(CO[3*])c1', '[16*]c1cccc(C[4*])c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OCc1cc(-c2ncccc2)ccc1', '[3*]O[3*]', '[4*]CCC', '[4*]Cc1cc(-c2ncccc2)ccc1', '[8*]COCCC', '[8*]CO[3*]', '[8*]C[4*]']
411
412 or to only do a single pass of decomposition:
413 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
414 >>> res = list(BRICSDecompose(m,singlePass=True))
415 >>> sorted(res)
416 ['CCCOCc1cc(-c2ncccc2)ccc1', '[14*]c1ncccc1', '[16*]c1cc(-c2ncccc2)ccc1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cc(-c2ncccc2)ccc1', '[4*]CCC', '[4*]Cc1cc(-c2ncccc2)ccc1', '[8*]COCCC']
417
418
419 """
420 global reactions
421 mSmi = Chem.MolToSmiles(mol,1)
422
423 if allNodes is None:
424 allNodes=set()
425
426 if mSmi in allNodes:
427 return set()
428
429 activePool={mSmi:mol}
430 allNodes.add(mSmi)
431 foundMols={mSmi:mol}
432 for gpIdx,reactionGp in enumerate(reactions):
433 newPool = {}
434 while activePool:
435 matched=False
436 nSmi = activePool.keys()[0]
437 mol = activePool.pop(nSmi)
438 for rxnIdx,reaction in enumerate(reactionGp):
439 if onlyUseReactions and (gpIdx,rxnIdx) not in onlyUseReactions:
440 continue
441 if not silent:
442 print '--------'
443 print smartsGps[gpIdx][rxnIdx]
444 ps = reaction.RunReactants((mol,))
445 if ps:
446 if not silent: print nSmi,'->',len(ps),'products'
447 for prodSeq in ps:
448 seqOk=True
449
450 prodSeq = [(prod.GetNumAtoms(onlyHeavy=True),prod) for prod in prodSeq]
451 prodSeq.sort()
452 for nats,prod in prodSeq:
453 try:
454 Chem.SanitizeMol(prod)
455 except:
456 continue
457 pSmi = Chem.MolToSmiles(prod,1)
458 if minFragmentSize>0:
459 nDummies = pSmi.count('*')
460 if nats-nDummies<minFragmentSize:
461 seqOk=False
462 break
463 prod.pSmi = pSmi
464 if seqOk:
465 matched=True
466 for nats,prod in prodSeq:
467 pSmi = prod.pSmi
468
469 if pSmi not in allNodes:
470 if not singlePass:
471 activePool[pSmi] = prod
472 allNodes.add(pSmi)
473 foundMols[pSmi]=prod
474 if singlePass or keepNonLeafNodes or not matched:
475 newPool[nSmi]=mol
476 activePool = newPool
477 if not (singlePass or keepNonLeafNodes):
478 if not returnMols:
479 res = set(activePool.keys())
480 else:
481 res = activePool.values()
482 else:
483 if not returnMols:
484 res = allNodes
485 else:
486 res = foundMols.values()
487 return res
488
489
490 import random
491 dummyPattern=Chem.MolFromSmiles('[*]')
492 -def BRICSBuild(fragments,onlyCompleteMols=True,seeds=None,uniquify=True,
493 scrambleReagents=True,maxDepth=3):
494 seen = set()
495 if not seeds:
496 seeds = list(fragments)
497 if scrambleReagents:
498 seeds = list(seeds)
499 random.shuffle(seeds)
500 if scrambleReagents:
501 tempReactions = list(reverseReactions)
502 random.shuffle(tempReactions)
503 else:
504 tempReactions=reverseReactions
505 for seed in seeds:
506 seedIsR1=False
507 seedIsR2=False
508 nextSteps=[]
509 for rxn in tempReactions:
510 if seed.HasSubstructMatch(rxn._matchers[0]):
511 seedIsR1=True
512 if seed.HasSubstructMatch(rxn._matchers[1]):
513 seedIsR2=True
514 for fragment in fragments:
515 ps = None
516 if fragment.HasSubstructMatch(rxn._matchers[0]):
517 if seedIsR2:
518 ps = rxn.RunReactants((fragment,seed))
519 if fragment.HasSubstructMatch(rxn._matchers[1]):
520 if seedIsR1:
521 ps = rxn.RunReactants((seed,fragment))
522 if ps:
523 for p in ps:
524 if uniquify:
525 pSmi =Chem.MolToSmiles(p[0],True)
526 if pSmi in seen:
527 continue
528 else:
529 seen.add(pSmi)
530 if p[0].HasSubstructMatch(dummyPattern):
531 nextSteps.append(p[0])
532 if not onlyCompleteMols:
533 yield p[0]
534 else:
535 yield p[0]
536 if nextSteps and maxDepth>0:
537 for p in BRICSBuild(fragments,onlyCompleteMols=onlyCompleteMols,
538 seeds=nextSteps,uniquify=uniquify,
539 maxDepth=maxDepth-1):
540 if uniquify:
541 pSmi =Chem.MolToSmiles(p,True)
542 if pSmi in seen:
543 continue
544 else:
545 seen.add(pSmi)
546 yield p
547
548
549
550
551
552
553
554
555
556
557
559 import doctest,sys
560 return doctest.testmod(sys.modules["__main__"],
561 optionflags=doctest.ELLIPSIS+doctest.NORMALIZE_WHITESPACE)
562
563 if __name__=='__main__':
564 import unittest
618
620
621 m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
622 res = BRICSDecompose(m)
623 self.failUnless(res)
624 self.failUnless(len(res)==9,res)
625
627 m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1')
628 res = BRICSDecompose(m)
629 self.failUnless(res)
630 self.failUnless(len(res)==5,res)
631 self.failUnless('[5*]N[5*]' in res,res)
632 self.failUnless('[16*]c1ccccc1' in res,res)
633 self.failUnless('[8*]C(F)(F)F' in res,res)
634
635
637 allNodes = set()
638 m = Chem.MolFromSmiles('c1ccccc1OCCC')
639 res = BRICSDecompose(m,allNodes=allNodes)
640 self.failUnless(res)
641 leaves=res
642 self.failUnless(len(leaves)==3,leaves)
643 self.failUnless(len(allNodes)==6,allNodes)
644 res = BRICSDecompose(m,allNodes=allNodes)
645 self.failIf(res)
646 self.failUnless(len(allNodes)==6,allNodes)
647
648 m = Chem.MolFromSmiles('c1ccccc1OCCCC')
649 res = BRICSDecompose(m,allNodes=allNodes)
650 self.failUnless(res)
651 leaves.update(res)
652 self.failUnless(len(allNodes)==9,allNodes)
653 self.failUnless(len(leaves)==4,leaves)
654
655
656 m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC')
657 res = BRICSDecompose(m,allNodes=allNodes)
658 self.failUnless(res)
659 leaves.update(res)
660 self.failUnless(len(leaves)==8,leaves)
661 self.failUnless(len(allNodes)==18,allNodes)
662
664 allNodes = set()
665 frags = [
666 '[14*]c1ncncn1',
667 '[16*]c1ccccc1',
668 '[14*]c1ncccc1',
669 ]
670 frags = [Chem.MolFromSmiles(x) for x in frags]
671 res = BRICSBuild(frags)
672 self.failUnless(res)
673 res= list(res)
674 self.failUnless(len(res)==6)
675 smis = [Chem.MolToSmiles(x,True) for x in res]
676 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis)
677 self.failUnless('c1ccc(-c2ncccc2)cc1' in smis)
678
680 allNodes = set()
681 frags = [
682 '[3*]O[3*]',
683 '[16*]c1ccccc1',
684 ]
685 frags = [Chem.MolFromSmiles(x) for x in frags]
686 res = BRICSBuild(frags)
687 self.failUnless(res)
688 res=list(res)
689 smis = [Chem.MolToSmiles(x,True) for x in res]
690 self.failUnless(len(smis)==2,smis)
691 self.failUnless('c1ccc(Oc2ccccc2)cc1' in smis)
692 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis)
693
694
695
697 allNodes = set()
698 frags = [
699 '[16*]c1ccccc1',
700 '[3*]OC',
701 '[9*]n1cccc1',
702 ]
703 frags = [Chem.MolFromSmiles(x) for x in frags]
704 res = BRICSBuild(frags)
705 self.failUnless(res)
706 res= list(res)
707 self.failUnless(len(res)==3)
708 smis = [Chem.MolToSmiles(x,True) for x in res]
709 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis)
710 self.failUnless('COc1ccccc1' in smis)
711 self.failUnless('c1ccc(-n2cccc2)cc1' in smis)
712
714 allNodes = set()
715 frags = [
716 '[16*]c1ccccc1',
717 '[3*]OC',
718 '[3*]OCC(=O)[6*]',
719 ]
720 frags = [Chem.MolFromSmiles(x) for x in frags]
721 res = BRICSBuild(frags)
722 self.failUnless(res)
723 res= list(res)
724 smis = [Chem.MolToSmiles(x,True) for x in res]
725 self.failUnless(len(res)==3)
726 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis)
727 self.failUnless('COc1ccccc1' in smis)
728 self.failUnless('O=C(COc1ccccc1)c1ccccc1' in smis)
729
731 random.seed(23)
732 base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1")
733 catalog = BRICSDecompose(base)
734 self.failUnless(len(catalog)==5,catalog)
735
736 catalog = [Chem.MolFromSmiles(x) for x in catalog]
737 ms = [Chem.MolToSmiles(x) for x in BRICSBuild(catalog,maxDepth=4)]
738 self.failUnless(len(ms)==36,ms)
739
740
741 ts = ['n1cnc(C2CNC2)nc1','n1cnc(-c2ncncn2)nc1','C(OC1CNC1)C(C1CC1)OC1CNC1',
742 'n1cnc(OC(COC2CNC2)C2CC2)nc1','n1cnc(OCC(OC2CNC2)C2CNC2)nc1']
743 ts = [Chem.MolToSmiles(Chem.MolFromSmiles(x),True) for x in ts]
744 for t in ts:
745 self.failUnless(t in ms,(t,ms))
746
748 m = Chem.MolFromSmiles('CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1')
749 res=BRICSDecompose(m)
750 self.failUnlessEqual(len(res),7)
751 self.failUnless('[3*]O[3*]' in res)
752 self.failIf('[14*]c1ncnc(NCCCC)n1' in res)
753 res = BRICSDecompose(m,singlePass=True)
754 self.failUnlessEqual(len(res),13)
755 self.failUnless('[3*]OCC' in res)
756 self.failUnless('[14*]c1ncnc(NCCCC)n1' in res)
757
762
764
765 molblock="""
766 RDKit 3D
767
768 13 14 0 0 0 0 0 0 0 0999 V2000
769 -1.2004 0.5900 0.6110 C 0 0 0 0 0 0 0 0 0 0 0 0
770 -2.2328 1.3173 0.0343 C 0 0 0 0 0 0 0 0 0 0 0 0
771 -3.4299 0.6533 -0.1500 C 0 0 0 0 0 0 0 0 0 0 0 0
772 -3.3633 -0.7217 -0.3299 C 0 0 0 0 0 0 0 0 0 0 0 0
773 -2.1552 -1.3791 -0.2207 C 0 0 0 0 0 0 0 0 0 0 0 0
774 -1.1425 -0.7969 0.5335 C 0 0 0 0 0 0 0 0 0 0 0 0
775 0.1458 -1.4244 0.4108 O 0 0 0 0 0 0 0 0 0 0 0 0
776 1.2976 -0.7398 -0.1026 C 0 0 0 0 0 0 0 0 0 0 0 0
777 2.4889 -0.7939 0.5501 N 0 0 0 0 0 0 0 0 0 0 0 0
778 3.4615 0.1460 0.3535 C 0 0 0 0 0 0 0 0 0 0 0 0
779 3.0116 1.4034 -0.0296 C 0 0 0 0 0 0 0 0 0 0 0 0
780 1.9786 1.4264 -0.9435 C 0 0 0 0 0 0 0 0 0 0 0 0
781 1.1399 0.3193 -0.9885 C 0 0 0 0 0 0 0 0 0 0 0 0
782 1 2 2 0
783 2 3 1 0
784 3 4 2 0
785 4 5 1 0
786 5 6 2 0
787 6 7 1 0
788 7 8 1 0
789 8 9 2 0
790 9 10 1 0
791 10 11 2 0
792 11 12 1 0
793 12 13 2 0
794 6 1 1 0
795 13 8 1 0
796 M END
797 """
798 m = Chem.MolFromMolBlock(molblock)
799 pieces = BreakBRICSBonds(m)
800 frags = Chem.GetMolFrags(pieces,asMols=True)
801 self.failUnlessEqual(len(frags),3)
802 self.failUnlessEqual(frags[0].GetNumAtoms(),7)
803 self.failUnlessEqual(frags[1].GetNumAtoms(),3)
804 self.failUnlessEqual(frags[2].GetNumAtoms(),7)
805
806 c1 = m.GetConformer()
807 c2 = frags[0].GetConformer()
808 for i in range(6):
809 p1 = c1.GetAtomPosition(i)
810 p2 = c2.GetAtomPosition(i)
811 self.failUnlessEqual((p1-p2).Length(),0.0)
812 p1 = c1.GetAtomPosition(6)
813 p2 = c2.GetAtomPosition(6)
814 self.failUnlessEqual((p1-p2).Length(),0.0)
815
816 c2 = frags[2].GetConformer()
817 for i in range(6):
818 p1 = c1.GetAtomPosition(i+7)
819 p2 = c2.GetAtomPosition(i)
820 self.failUnlessEqual((p1-p2).Length(),0.0)
821 p1 = c1.GetAtomPosition(6)
822 p2 = c2.GetAtomPosition(6)
823 self.failUnlessEqual((p1-p2).Length(),0.0)
824
825 c2 = frags[1].GetConformer()
826 for i in range(1):
827 p1 = c1.GetAtomPosition(i+6)
828 p2 = c2.GetAtomPosition(i)
829 self.failUnlessEqual((p1-p2).Length(),0.0)
830 p1 = c1.GetAtomPosition(7)
831 p2 = c2.GetAtomPosition(1)
832 self.failUnlessEqual((p1-p2).Length(),0.0)
833 p1 = c1.GetAtomPosition(5)
834 p2 = c2.GetAtomPosition(2)
835 self.failUnlessEqual((p1-p2).Length(),0.0)
836
837
838
839 molblock="""
840 RDKit 2D
841
842 13 14 0 0 0 0 0 0 0 0999 V2000
843 -1.2990 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
844 -2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
845 -3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
846 -3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
847 -2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
848 -1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
849 -0.0000 1.3846 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
850 1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
851 1.2990 -0.8654 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
852 2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
853 3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
854 3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
855 2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
856 1 2 2 0
857 2 3 1 0
858 3 4 2 0
859 4 5 1 0
860 5 6 2 0
861 6 7 1 0
862 7 8 1 0
863 8 9 2 0
864 9 10 1 0
865 10 11 2 0
866 11 12 1 0
867 12 13 2 0
868 6 1 1 0
869 13 8 1 0
870 M END
871 """
872 m2 = Chem.MolFromMolBlock(molblock)
873 m.AddConformer(m2.GetConformer(),assignId=True)
874 self.failUnlessEqual(m.GetNumConformers(),2)
875
876 pieces = BreakBRICSBonds(m)
877 frags = Chem.GetMolFrags(pieces,asMols=True)
878 self.failUnlessEqual(len(frags),3)
879 self.failUnlessEqual(frags[0].GetNumAtoms(),7)
880 self.failUnlessEqual(frags[1].GetNumAtoms(),3)
881 self.failUnlessEqual(frags[2].GetNumAtoms(),7)
882 self.failUnlessEqual(frags[0].GetNumConformers(),2)
883 self.failUnlessEqual(frags[1].GetNumConformers(),2)
884 self.failUnlessEqual(frags[2].GetNumConformers(),2)
885
886 c1 = m.GetConformer(0)
887 c2 = frags[0].GetConformer(0)
888 for i in range(6):
889 p1 = c1.GetAtomPosition(i)
890 p2 = c2.GetAtomPosition(i)
891 self.failUnlessEqual((p1-p2).Length(),0.0)
892 p1 = c1.GetAtomPosition(6)
893 p2 = c2.GetAtomPosition(6)
894 self.failUnlessEqual((p1-p2).Length(),0.0)
895
896 c2 = frags[2].GetConformer(0)
897 for i in range(6):
898 p1 = c1.GetAtomPosition(i+7)
899 p2 = c2.GetAtomPosition(i)
900 self.failUnlessEqual((p1-p2).Length(),0.0)
901 p1 = c1.GetAtomPosition(6)
902 p2 = c2.GetAtomPosition(6)
903 self.failUnlessEqual((p1-p2).Length(),0.0)
904
905 c2 = frags[1].GetConformer(0)
906 for i in range(1):
907 p1 = c1.GetAtomPosition(i+6)
908 p2 = c2.GetAtomPosition(i)
909 self.failUnlessEqual((p1-p2).Length(),0.0)
910 p1 = c1.GetAtomPosition(7)
911 p2 = c2.GetAtomPosition(1)
912 self.failUnlessEqual((p1-p2).Length(),0.0)
913 p1 = c1.GetAtomPosition(5)
914 p2 = c2.GetAtomPosition(2)
915 self.failUnlessEqual((p1-p2).Length(),0.0)
916
917 c1 = m.GetConformer(1)
918 c2 = frags[0].GetConformer(1)
919 for i in range(6):
920 p1 = c1.GetAtomPosition(i)
921 p2 = c2.GetAtomPosition(i)
922 self.failUnlessEqual((p1-p2).Length(),0.0)
923 p1 = c1.GetAtomPosition(6)
924 p2 = c2.GetAtomPosition(6)
925 self.failUnlessEqual((p1-p2).Length(),0.0)
926
927 c2 = frags[2].GetConformer(1)
928 for i in range(6):
929 p1 = c1.GetAtomPosition(i+7)
930 p2 = c2.GetAtomPosition(i)
931 self.failUnlessEqual((p1-p2).Length(),0.0)
932 p1 = c1.GetAtomPosition(6)
933 p2 = c2.GetAtomPosition(6)
934 self.failUnlessEqual((p1-p2).Length(),0.0)
935
936 c2 = frags[1].GetConformer(1)
937 for i in range(1):
938 p1 = c1.GetAtomPosition(i+6)
939 p2 = c2.GetAtomPosition(i)
940 self.failUnlessEqual((p1-p2).Length(),0.0)
941 p1 = c1.GetAtomPosition(7)
942 p2 = c2.GetAtomPosition(1)
943 self.failUnlessEqual((p1-p2).Length(),0.0)
944 p1 = c1.GetAtomPosition(5)
945 p2 = c2.GetAtomPosition(2)
946 self.failUnlessEqual((p1-p2).Length(),0.0)
947
949 m = Chem.MolFromSmiles('CCS(=O)(=O)NCC')
950 res=list(FindBRICSBonds(m))
951 self.failUnlessEqual(len(res),2,res)
952 atIds = [x[0] for x in res]
953 atIds.sort()
954 self.failUnlessEqual(atIds,[(5,2), (6,5)])
955
956
957
958
959
960 failed,tried = _test()
961 if failed:
962 sys.exit(failed)
963
964 unittest.main()
965