Package rdkit :: Package Chem :: Module BRICS
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.BRICS

  1  # $Id: BRICS.py 1676 2011-03-26 06:30:10Z glandrum $ 
  2  # 
  3  #  Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. 
  4  #  All rights reserved. 
  5  #  
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions are 
  8  # met:  
  9  # 
 10  #     * Redistributions of source code must retain the above copyright  
 11  #       notice, this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above 
 13  #       copyright notice, this list of conditions and the following  
 14  #       disclaimer in the documentation and/or other materials provided  
 15  #       with the distribution. 
 16  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 17  #       nor the names of its contributors may be used to endorse or promote  
 18  #       products derived from this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 21  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 22  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 23  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 24  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 25  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 26  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 27  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 28  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 29  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 30  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 31  # 
 32  # Created by Greg Landrum, Nov 2008 
 33   
 34  """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008) 
 35   
 36  """ 
 37  from rdkit import Chem 
 38  from rdkit.Chem import rdChemReactions as Reactions 
 39  import sys,re,random 
 40   
 41  # These are the definitions that will be applied to fragment molecules: 
 42  environs = { 
 43    'L1':'[C;D3]([#0,#6,#7,#8])(=O)', 
 44    # 
 45    # After some discussion, the L2 definitions ("N.pl3" in the original 
 46    # paper) have been removed and incorporated into a (almost) general 
 47    # purpose amine definition in L5 ("N.sp3" in the paper). 
 48    # 
 49    # The problem is one of consistency. 
 50    #    Based on the original definitions you should get the following 
 51    #    fragmentations: 
 52    #      C1CCCCC1NC(=O)C -> C1CCCCC1N[2*].[1*]C(=O)C 
 53    #      c1ccccc1NC(=O)C -> c1ccccc1[16*].[2*]N[2*].[1*]C(=O)C 
 54    #    This difference just didn't make sense to us. By switching to 
 55    #    the unified definition we end up with: 
 56    #      C1CCCCC1NC(=O)C -> C1CCCCC1[15*].[5*]N[5*].[1*]C(=O)C 
 57    #      c1ccccc1NC(=O)C -> c1ccccc1[16*].[5*]N[5*].[1*]C(=O)C 
 58    # 
 59    #'L2':'[N;!R;!D1;!$(N=*)]-;!@[#0,#6]', 
 60    # this one turned out to be too tricky to define above, so we set it off 
 61    # in its own definition: 
 62    #'L2a':'[N;D3;R;$(N(@[C;!$(C=*)])@[C;!$(C=*)])]', 
 63    'L3':'[O;D2]-;!@[#0,#6,#1]', 
 64    'L4':'[C;!D1;!$(C=*)]-;!@[#6]', 
 65    #'L5':'[N;!D1;!$(N*!-*);!$(N=*);!$(N-[!C;!#0])]-[#0,C]', 
 66    'L5':'[N;!D1;!$(N=*);!$(N-[!#6;!#16;!#0;!#1]);!$([N;R]@[C;R]=O)]', 
 67    'L6':'[C;D3;!R](=O)-;!@[#0,#6,#7,#8]', 
 68    'L7a':'[C;D2,D3]-[#6]', 
 69    'L7b':'[C;D2,D3]-[#6]', 
 70    '#L8':'[C;!R;!D1]-;!@[#6]', 
 71    'L8':'[C;!R;!D1;!$(C!-*)]', 
 72    'L9':'[n;+0;$(n(:[c,n,o,s]):[c,n,o,s])]', 
 73    'L10':'[N;R;$(N(@C(=O))@[C,N,O,S])]', 
 74    'L11':'[S;D2](-;!@[#0,#6])', 
 75    'L12':'[S;D4]([#6,#0])(=O)(=O)', 
 76    'L13':'[C;$(C(-;@[C,N,O,S])-;@[N,O,S])]', 
 77    'L14':'[c;$(c(:[c,n,o,s]):[n,o,s])]', 
 78    'L14b':'[c;$(c(:[c,n,o,s]):[n,o,s])]', 
 79    'L15':'[C;$(C(-;@C)-;@C)]', 
 80    'L16':'[c;$(c(:c):c)]', 
 81    'L16b':'[c;$(c(:c):c)]', 
 82    } 
 83  reactionDefs = ( 
 84    # L1 
 85    [ 
 86      ('1','3','-'), 
 87      ('1','5','-'), 
 88      ('1','10','-'), 
 89    ], 
 90   
 91     # L3  
 92     [ 
 93      ('3','4','-'), 
 94      ('3','13','-'), 
 95      ('3','14','-'), 
 96      ('3','15','-'), 
 97      ('3','16','-'), 
 98     ], 
 99     
100     # L4 
101     [ 
102      ('4','5','-'), 
103      ('4','11','-'), 
104     ], 
105   
106     # L5 
107     [ 
108      ('5','12','-'), 
109      ('5','14','-'), 
110      ('5','16','-'), 
111      ('5','13','-'), 
112      ('5','15','-'), 
113     ], 
114     
115      # L6 
116      [ 
117      ('6','13','-'), 
118      ('6','14','-'), 
119      ('6','15','-'), 
120      ('6','16','-'), 
121      ], 
122     
123      # L7 
124      [ 
125      ('7a','7b','='), 
126      ], 
127   
128      # L8 
129      [ 
130      ('8','9','-'), 
131      ('8','10','-'), 
132      ('8','13','-'), 
133      ('8','14','-'), 
134      ('8','15','-'), 
135      ('8','16','-'), 
136      ], 
137     
138      # L9 
139      [ 
140      ('9','13','-'),# not in original paper 
141      ('9','14','-'),# not in original paper 
142      ('9','15','-'), 
143      ('9','16','-'), 
144      ], 
145     
146      # L10 
147      [ 
148      ('10','13','-'), 
149      ('10','14','-'), 
150      ('10','15','-'), 
151      ('10','16','-'), 
152      ], 
153     
154      # L11 
155      [ 
156      ('11','13','-'), 
157      ('11','14','-'), 
158      ('11','15','-'), 
159      ('11','16','-'), 
160      ], 
161     
162      # L12 
163      # none left 
164   
165      # L13 
166      [ 
167      ('13','14','-'), 
168      ('13','15','-'), 
169      ('13','16','-'), 
170      ], 
171     
172      # L14 
173      [ 
174      ('14','14','-'),# not in original paper 
175      ('14','15','-'), 
176      ('14','16','-'), 
177      ], 
178     
179      # L15 
180      [ 
181      ('15','16','-'), 
182      ], 
183   
184      # L16 
185      [ 
186      ('16','16','-'), # not in original paper 
187      ], 
188    ) 
189  import copy 
190  smartsGps=copy.deepcopy(reactionDefs) 
191  for gp in smartsGps: 
192    for j,defn in enumerate(gp): 
193      g1,g2,bnd = defn 
194      r1=environs['L'+g1] 
195      r2=environs['L'+g2] 
196      g1 = re.sub('[a-z,A-Z]','',g1) 
197      g2 = re.sub('[a-z,A-Z]','',g2) 
198      sma='[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]'%(r1,bnd,r2,g1,g2) 
199      gp[j] =sma 
200   
201  for gp in smartsGps: 
202    for defn in gp: 
203      try: 
204        t=Reactions.ReactionFromSmarts(defn) 
205        t.Initialize() 
206      except: 
207        print defn 
208        raise 
209   
210  environMatchers={} 
211  for env,sma in environs.iteritems(): 
212    environMatchers[env]=Chem.MolFromSmarts(sma) 
213     
214  bondMatchers=[] 
215  for i,compats in enumerate(reactionDefs): 
216    tmp=[] 
217    for i1,i2,bType in compats: 
218        e1 = environs['L%s'%i1] 
219        e2 = environs['L%s'%i2] 
220        patt = '[$(%s)]%s;!@[$(%s)]'%(e1,bType,e2) 
221        patt = Chem.MolFromSmarts(patt) 
222        tmp.append((i1,i2,bType,patt)) 
223    bondMatchers.append(tmp) 
224       
225  reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps]) 
226  reverseReactions = [] 
227  for i,rxnSet in enumerate(smartsGps): 
228    for j,sma in enumerate(rxnSet): 
229      rs,ps = sma.split('>>') 
230      sma = '%s>>%s'%(ps,rs) 
231      rxn = Reactions.ReactionFromSmarts(sma) 
232      labels = re.findall(r'\[([0-9]+?)\*\]',ps) 
233      rxn._matchers=[Chem.MolFromSmiles('[%s*]'%x) for x in labels] 
234      reverseReactions.append(rxn) 
235   
236 -def FindBRICSBonds(mol,randomizeOrder=False,silent=True):
237 """ returns the bonds in a molecule that BRICS would cleave 238 239 >>> from rdkit import Chem 240 >>> m = Chem.MolFromSmiles('CCCOCC') 241 >>> res = list(FindBRICSBonds(m)) 242 >>> res 243 [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))] 244 245 a more complicated case: 246 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') 247 >>> res = list(FindBRICSBonds(m)) 248 >>> res 249 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] 250 251 we can also randomize the order of the results: 252 >>> random.seed(23) 253 >>> res = list(FindBRICSBonds(m,randomizeOrder=True)) 254 >>> sorted(res) 255 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] 256 257 Note that this is a generator function : 258 >>> res = FindBRICSBonds(m) 259 >>> res 260 <generator object ...> 261 >>> res.next() 262 ((3, 2), ('3', '4')) 263 264 >>> m = Chem.MolFromSmiles('CC=CC') 265 >>> res = list(FindBRICSBonds(m)) 266 >>> sorted(res) 267 [((1, 2), ('7', '7'))] 268 269 make sure we don't match ring bonds: 270 >>> m = Chem.MolFromSmiles('O=C1NCCC1') 271 >>> list(FindBRICSBonds(m)) 272 [] 273 274 another nice one, make sure environment 8 doesn't match something connected 275 to a ring atom: 276 >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1') 277 >>> list(FindBRICSBonds(m)) 278 [] 279 280 """ 281 letter = re.compile('[a-z,A-Z]') 282 indices = range(len(bondMatchers)) 283 bondsDone=set() 284 if randomizeOrder: random.shuffle(indices) 285 286 envMatches={} 287 for env,patt in environMatchers.iteritems(): 288 envMatches[env]=mol.HasSubstructMatch(patt) 289 for gpIdx in indices: 290 if randomizeOrder: 291 compats =bondMatchers[gpIdx][:] 292 random.shuffle(compats) 293 else: 294 compats = bondMatchers[gpIdx] 295 for i1,i2,bType,patt in compats: 296 if not envMatches['L'+i1] or not envMatches['L'+i2]: continue 297 matches = mol.GetSubstructMatches(patt) 298 i1 = letter.sub('',i1) 299 i2 = letter.sub('',i2) 300 for match in matches: 301 if match not in bondsDone and (match[1],match[0]) not in bondsDone: 302 bondsDone.add(match) 303 yield(((match[0],match[1]),(i1,i2)))
304
305 -def BreakBRICSBonds(mol,bonds=None,sanitize=True,silent=True):
306 """ breaks the BRICS bonds in a molecule and returns the results 307 308 >>> from rdkit import Chem 309 >>> m = Chem.MolFromSmiles('CCCOCC') 310 >>> m2=BreakBRICSBonds(m) 311 >>> Chem.MolToSmiles(m2,True) 312 '[4*]CC.[4*]CCC.[3*]O[3*]' 313 314 a more complicated case: 315 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') 316 >>> m2=BreakBRICSBonds(m) 317 >>> Chem.MolToSmiles(m2,True) 318 '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[6*]C(=O)CC[4*]' 319 320 can also specify a limited set of bonds to work with: 321 >>> m = Chem.MolFromSmiles('CCCOCC') 322 >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))]) 323 >>> Chem.MolToSmiles(m2,True) 324 '[3*]OCC.[4*]CCC' 325 326 this can be used as an alternate approach for doing a BRICS decomposition by 327 following BreakBRICSBonds with a call to Chem.GetMolFrags: 328 >>> m = Chem.MolFromSmiles('CCCOCC') 329 >>> m2=BreakBRICSBonds(m) 330 >>> frags = Chem.GetMolFrags(m2,asMols=True) 331 >>> [Chem.MolToSmiles(x,True) for x in frags] 332 ['[4*]CCC', '[3*]O[3*]', '[4*]CC'] 333 334 """ 335 if not bonds: 336 bonds = FindBRICSBonds(mol) 337 if not bonds: 338 return Chem.Mol(mol.ToBinary()) 339 eMol = Chem.EditableMol(mol) 340 nAts = mol.GetNumAtoms() 341 342 dummyPositions=[] 343 for indices,dummyTypes in bonds: 344 ia,ib = indices 345 obond = mol.GetBondBetweenAtoms(ia,ib) 346 bondType=obond.GetBondType() 347 eMol.RemoveBond(ia,ib) 348 349 da,db = dummyTypes 350 atoma = Chem.Atom(0) 351 atoma.SetMass(int(da)) 352 atoma.SetNoImplicit(True) 353 idxa = nAts 354 nAts+=1 355 eMol.AddAtom(atoma) 356 eMol.AddBond(ia,idxa,bondType) 357 358 atomb = Chem.Atom(0) 359 atomb.SetMass(int(db)) 360 atomb.SetNoImplicit(True) 361 idxb = nAts 362 nAts+=1 363 eMol.AddAtom(atomb) 364 eMol.AddBond(ib,idxb,bondType) 365 if mol.GetNumConformers(): 366 dummyPositions.append((idxa,ib)) 367 dummyPositions.append((idxb,ia)) 368 res = eMol.GetMol() 369 if sanitize: 370 Chem.SanitizeMol(res) 371 if mol.GetNumConformers(): 372 for conf in mol.GetConformers(): 373 resConf = res.GetConformer(conf.GetId()) 374 for ia,pa in dummyPositions: 375 resConf.SetAtomPosition(ia,conf.GetAtomPosition(pa)) 376 return res
377
378 -def BRICSDecompose(mol,allNodes=None,minFragmentSize=1,onlyUseReactions=None, 379 silent=True,keepNonLeafNodes=False,singlePass=False,returnMols=False):
380 """ returns the BRICS decomposition for a molecule 381 382 >>> from rdkit import Chem 383 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 384 >>> res = list(BRICSDecompose(m)) 385 >>> sorted(res) 386 ['[14*]c1ncccc1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[8*]C[4*]'] 387 388 >>> res = BRICSDecompose(m,returnMols=True) 389 >>> res[0] 390 <rdkit.Chem.rdchem.Mol object ...> 391 >>> smis = [Chem.MolToSmiles(x,True) for x in res] 392 >>> sorted(smis) 393 ['[14*]c1ncccc1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[8*]C[4*]'] 394 395 nexavar, an example from the paper (corrected): 396 >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') 397 >>> res = list(BRICSDecompose(m)) 398 >>> sorted(res) 399 ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1nccc([16*])c1', '[16*]c1cc([16*])c(Cl)cc1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F'] 400 401 it's also possible to keep pieces that haven't been fully decomposed: 402 >>> m = Chem.MolFromSmiles('CCCOCC') 403 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) 404 >>> sorted(res) 405 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC'] 406 407 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 408 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) 409 >>> sorted(res) 410 ['CCCOCc1cc(-c2ncccc2)ccc1', '[14*]c1ncccc1', '[16*]c1cc(-c2ncccc2)ccc1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc(CO[3*])c1', '[16*]c1cccc(C[4*])c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OCc1cc(-c2ncccc2)ccc1', '[3*]O[3*]', '[4*]CCC', '[4*]Cc1cc(-c2ncccc2)ccc1', '[8*]COCCC', '[8*]CO[3*]', '[8*]C[4*]'] 411 412 or to only do a single pass of decomposition: 413 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 414 >>> res = list(BRICSDecompose(m,singlePass=True)) 415 >>> sorted(res) 416 ['CCCOCc1cc(-c2ncccc2)ccc1', '[14*]c1ncccc1', '[16*]c1cc(-c2ncccc2)ccc1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cc(-c2ncccc2)ccc1', '[4*]CCC', '[4*]Cc1cc(-c2ncccc2)ccc1', '[8*]COCCC'] 417 418 419 """ 420 global reactions 421 mSmi = Chem.MolToSmiles(mol,1) 422 423 if allNodes is None: 424 allNodes=set() 425 426 if mSmi in allNodes: 427 return set() 428 429 activePool={mSmi:mol} 430 allNodes.add(mSmi) 431 foundMols={mSmi:mol} 432 for gpIdx,reactionGp in enumerate(reactions): 433 newPool = {} 434 while activePool: 435 matched=False 436 nSmi = activePool.keys()[0] 437 mol = activePool.pop(nSmi) 438 for rxnIdx,reaction in enumerate(reactionGp): 439 if onlyUseReactions and (gpIdx,rxnIdx) not in onlyUseReactions: 440 continue 441 if not silent: 442 print '--------' 443 print smartsGps[gpIdx][rxnIdx] 444 ps = reaction.RunReactants((mol,)) 445 if ps: 446 if not silent: print nSmi,'->',len(ps),'products' 447 for prodSeq in ps: 448 seqOk=True 449 # we want to disqualify small fragments, so sort the product sequence by size 450 prodSeq = [(prod.GetNumAtoms(onlyHeavy=True),prod) for prod in prodSeq] 451 prodSeq.sort() 452 for nats,prod in prodSeq: 453 try: 454 Chem.SanitizeMol(prod) 455 except: 456 continue 457 pSmi = Chem.MolToSmiles(prod,1) 458 if minFragmentSize>0: 459 nDummies = pSmi.count('*') 460 if nats-nDummies<minFragmentSize: 461 seqOk=False 462 break 463 prod.pSmi = pSmi 464 if seqOk: 465 matched=True 466 for nats,prod in prodSeq: 467 pSmi = prod.pSmi 468 #print '\t',nats,pSmi 469 if pSmi not in allNodes: 470 if not singlePass: 471 activePool[pSmi] = prod 472 allNodes.add(pSmi) 473 foundMols[pSmi]=prod 474 if singlePass or keepNonLeafNodes or not matched: 475 newPool[nSmi]=mol 476 activePool = newPool 477 if not (singlePass or keepNonLeafNodes): 478 if not returnMols: 479 res = set(activePool.keys()) 480 else: 481 res = activePool.values() 482 else: 483 if not returnMols: 484 res = allNodes 485 else: 486 res = foundMols.values() 487 return res
488 489 490 import random 491 dummyPattern=Chem.MolFromSmiles('[*]')
492 -def BRICSBuild(fragments,onlyCompleteMols=True,seeds=None,uniquify=True, 493 scrambleReagents=True,maxDepth=3):
494 seen = set() 495 if not seeds: 496 seeds = list(fragments) 497 if scrambleReagents: 498 seeds = list(seeds) 499 random.shuffle(seeds) 500 if scrambleReagents: 501 tempReactions = list(reverseReactions) 502 random.shuffle(tempReactions) 503 else: 504 tempReactions=reverseReactions 505 for seed in seeds: 506 seedIsR1=False 507 seedIsR2=False 508 nextSteps=[] 509 for rxn in tempReactions: 510 if seed.HasSubstructMatch(rxn._matchers[0]): 511 seedIsR1=True 512 if seed.HasSubstructMatch(rxn._matchers[1]): 513 seedIsR2=True 514 for fragment in fragments: 515 ps = None 516 if fragment.HasSubstructMatch(rxn._matchers[0]): 517 if seedIsR2: 518 ps = rxn.RunReactants((fragment,seed)) 519 if fragment.HasSubstructMatch(rxn._matchers[1]): 520 if seedIsR1: 521 ps = rxn.RunReactants((seed,fragment)) 522 if ps: 523 for p in ps: 524 if uniquify: 525 pSmi =Chem.MolToSmiles(p[0],True) 526 if pSmi in seen: 527 continue 528 else: 529 seen.add(pSmi) 530 if p[0].HasSubstructMatch(dummyPattern): 531 nextSteps.append(p[0]) 532 if not onlyCompleteMols: 533 yield p[0] 534 else: 535 yield p[0] 536 if nextSteps and maxDepth>0: 537 for p in BRICSBuild(fragments,onlyCompleteMols=onlyCompleteMols, 538 seeds=nextSteps,uniquify=uniquify, 539 maxDepth=maxDepth-1): 540 if uniquify: 541 pSmi =Chem.MolToSmiles(p,True) 542 if pSmi in seen: 543 continue 544 else: 545 seen.add(pSmi) 546 yield p
547 548 549 550 # ------- ------- ------- ------- ------- ------- ------- ------- 551 # Begin testing code 552 553 554 #------------------------------------ 555 # 556 # doctest boilerplate 557 #
558 -def _test():
559 import doctest,sys 560 return doctest.testmod(sys.modules["__main__"], 561 optionflags=doctest.ELLIPSIS+doctest.NORMALIZE_WHITESPACE)
562 563 if __name__=='__main__': 564 import unittest
565 - class TestCase(unittest.TestCase):
566 - def test1(self):
567 m = Chem.MolFromSmiles('CC(=O)OC') 568 res = BRICSDecompose(m) 569 self.failUnless(res) 570 self.failUnless(len(res)==2) 571 572 m = Chem.MolFromSmiles('CC(=O)N1CCC1=O') 573 res = BRICSDecompose(m) 574 self.failUnless(res) 575 self.failUnless(len(res)==2,res) 576 577 m = Chem.MolFromSmiles('c1ccccc1N(C)C') 578 res = BRICSDecompose(m) 579 self.failUnless(res) 580 self.failUnless(len(res)==2,res) 581 582 m = Chem.MolFromSmiles('c1cccnc1N(C)C') 583 res = BRICSDecompose(m) 584 self.failUnless(res) 585 self.failUnless(len(res)==2,res) 586 587 m = Chem.MolFromSmiles('o1ccnc1N(C)C') 588 res = BRICSDecompose(m) 589 self.failUnless(res) 590 self.failUnless(len(res)==2) 591 592 m = Chem.MolFromSmiles('c1ccccc1OC') 593 res = BRICSDecompose(m) 594 self.failUnless(res) 595 self.failUnless(len(res)==2) 596 597 m = Chem.MolFromSmiles('o1ccnc1OC') 598 res = BRICSDecompose(m) 599 self.failUnless(res) 600 self.failUnless(len(res)==2) 601 602 m = Chem.MolFromSmiles('O1CCNC1OC') 603 res = BRICSDecompose(m) 604 self.failUnless(res) 605 self.failUnless(len(res)==2) 606 607 m = Chem.MolFromSmiles('CCCSCC') 608 res = BRICSDecompose(m) 609 self.failUnless(res) 610 self.failUnless(len(res)==3,res) 611 self.failUnless('[11*]S[11*]' in res,res) 612 613 m = Chem.MolFromSmiles('CCNC(=O)C1CC1') 614 res = BRICSDecompose(m) 615 self.failUnless(res) 616 self.failUnless(len(res)==4,res) 617 self.failUnless('[5*]N[5*]' in res,res)
618
619 - def test2(self):
620 # example from the paper, nexavar: 621 m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') 622 res = BRICSDecompose(m) 623 self.failUnless(res) 624 self.failUnless(len(res)==9,res)
625
626 - def test3(self):
627 m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1') 628 res = BRICSDecompose(m) 629 self.failUnless(res) 630 self.failUnless(len(res)==5,res) 631 self.failUnless('[5*]N[5*]' in res,res) 632 self.failUnless('[16*]c1ccccc1' in res,res) 633 self.failUnless('[8*]C(F)(F)F' in res,res)
634 635
636 - def test4(self):
637 allNodes = set() 638 m = Chem.MolFromSmiles('c1ccccc1OCCC') 639 res = BRICSDecompose(m,allNodes=allNodes) 640 self.failUnless(res) 641 leaves=res 642 self.failUnless(len(leaves)==3,leaves) 643 self.failUnless(len(allNodes)==6,allNodes) 644 res = BRICSDecompose(m,allNodes=allNodes) 645 self.failIf(res) 646 self.failUnless(len(allNodes)==6,allNodes) 647 648 m = Chem.MolFromSmiles('c1ccccc1OCCCC') 649 res = BRICSDecompose(m,allNodes=allNodes) 650 self.failUnless(res) 651 leaves.update(res) 652 self.failUnless(len(allNodes)==9,allNodes) 653 self.failUnless(len(leaves)==4,leaves) 654 655 656 m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC') 657 res = BRICSDecompose(m,allNodes=allNodes) 658 self.failUnless(res) 659 leaves.update(res) 660 self.failUnless(len(leaves)==8,leaves) 661 self.failUnless(len(allNodes)==18,allNodes)
662
663 - def test5(self):
664 allNodes = set() 665 frags = [ 666 '[14*]c1ncncn1', 667 '[16*]c1ccccc1', 668 '[14*]c1ncccc1', 669 ] 670 frags = [Chem.MolFromSmiles(x) for x in frags] 671 res = BRICSBuild(frags) 672 self.failUnless(res) 673 res= list(res) 674 self.failUnless(len(res)==6) 675 smis = [Chem.MolToSmiles(x,True) for x in res] 676 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis) 677 self.failUnless('c1ccc(-c2ncccc2)cc1' in smis)
678
679 - def test5a(self):
680 allNodes = set() 681 frags = [ 682 '[3*]O[3*]', 683 '[16*]c1ccccc1', 684 ] 685 frags = [Chem.MolFromSmiles(x) for x in frags] 686 res = BRICSBuild(frags) 687 self.failUnless(res) 688 res=list(res) 689 smis = [Chem.MolToSmiles(x,True) for x in res] 690 self.failUnless(len(smis)==2,smis) 691 self.failUnless('c1ccc(Oc2ccccc2)cc1' in smis) 692 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis)
693 694 695
696 - def test6(self):
697 allNodes = set() 698 frags = [ 699 '[16*]c1ccccc1', 700 '[3*]OC', 701 '[9*]n1cccc1', 702 ] 703 frags = [Chem.MolFromSmiles(x) for x in frags] 704 res = BRICSBuild(frags) 705 self.failUnless(res) 706 res= list(res) 707 self.failUnless(len(res)==3) 708 smis = [Chem.MolToSmiles(x,True) for x in res] 709 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis) 710 self.failUnless('COc1ccccc1' in smis) 711 self.failUnless('c1ccc(-n2cccc2)cc1' in smis)
712
713 - def test7(self):
714 allNodes = set() 715 frags = [ 716 '[16*]c1ccccc1', 717 '[3*]OC', 718 '[3*]OCC(=O)[6*]', 719 ] 720 frags = [Chem.MolFromSmiles(x) for x in frags] 721 res = BRICSBuild(frags) 722 self.failUnless(res) 723 res= list(res) 724 smis = [Chem.MolToSmiles(x,True) for x in res] 725 self.failUnless(len(res)==3) 726 self.failUnless('c1ccc(-c2ccccc2)cc1' in smis) 727 self.failUnless('COc1ccccc1' in smis) 728 self.failUnless('O=C(COc1ccccc1)c1ccccc1' in smis)
729
730 - def test8(self):
731 random.seed(23) 732 base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1") 733 catalog = BRICSDecompose(base) 734 self.failUnless(len(catalog)==5,catalog) 735 736 catalog = [Chem.MolFromSmiles(x) for x in catalog] 737 ms = [Chem.MolToSmiles(x) for x in BRICSBuild(catalog,maxDepth=4)] 738 self.failUnless(len(ms)==36,ms) 739 740 741 ts = ['n1cnc(C2CNC2)nc1','n1cnc(-c2ncncn2)nc1','C(OC1CNC1)C(C1CC1)OC1CNC1', 742 'n1cnc(OC(COC2CNC2)C2CC2)nc1','n1cnc(OCC(OC2CNC2)C2CNC2)nc1'] 743 ts = [Chem.MolToSmiles(Chem.MolFromSmiles(x),True) for x in ts] 744 for t in ts: 745 self.failUnless(t in ms,(t,ms))
746
747 - def test9(self):
748 m = Chem.MolFromSmiles('CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1') 749 res=BRICSDecompose(m) 750 self.failUnlessEqual(len(res),7) 751 self.failUnless('[3*]O[3*]' in res) 752 self.failIf('[14*]c1ncnc(NCCCC)n1' in res) 753 res = BRICSDecompose(m,singlePass=True) 754 self.failUnlessEqual(len(res),13) 755 self.failUnless('[3*]OCC' in res) 756 self.failUnless('[14*]c1ncnc(NCCCC)n1' in res)
757
758 - def test10(self):
759 m = Chem.MolFromSmiles('C1CCCCN1c1ccccc1') 760 res=BRICSDecompose(m) 761 self.failUnlessEqual(len(res),2,res)
762
763 - def test11(self):
764 # test coordinate preservation: 765 molblock=""" 766 RDKit 3D 767 768 13 14 0 0 0 0 0 0 0 0999 V2000 769 -1.2004 0.5900 0.6110 C 0 0 0 0 0 0 0 0 0 0 0 0 770 -2.2328 1.3173 0.0343 C 0 0 0 0 0 0 0 0 0 0 0 0 771 -3.4299 0.6533 -0.1500 C 0 0 0 0 0 0 0 0 0 0 0 0 772 -3.3633 -0.7217 -0.3299 C 0 0 0 0 0 0 0 0 0 0 0 0 773 -2.1552 -1.3791 -0.2207 C 0 0 0 0 0 0 0 0 0 0 0 0 774 -1.1425 -0.7969 0.5335 C 0 0 0 0 0 0 0 0 0 0 0 0 775 0.1458 -1.4244 0.4108 O 0 0 0 0 0 0 0 0 0 0 0 0 776 1.2976 -0.7398 -0.1026 C 0 0 0 0 0 0 0 0 0 0 0 0 777 2.4889 -0.7939 0.5501 N 0 0 0 0 0 0 0 0 0 0 0 0 778 3.4615 0.1460 0.3535 C 0 0 0 0 0 0 0 0 0 0 0 0 779 3.0116 1.4034 -0.0296 C 0 0 0 0 0 0 0 0 0 0 0 0 780 1.9786 1.4264 -0.9435 C 0 0 0 0 0 0 0 0 0 0 0 0 781 1.1399 0.3193 -0.9885 C 0 0 0 0 0 0 0 0 0 0 0 0 782 1 2 2 0 783 2 3 1 0 784 3 4 2 0 785 4 5 1 0 786 5 6 2 0 787 6 7 1 0 788 7 8 1 0 789 8 9 2 0 790 9 10 1 0 791 10 11 2 0 792 11 12 1 0 793 12 13 2 0 794 6 1 1 0 795 13 8 1 0 796 M END 797 """ 798 m = Chem.MolFromMolBlock(molblock) 799 pieces = BreakBRICSBonds(m) 800 frags = Chem.GetMolFrags(pieces,asMols=True) 801 self.failUnlessEqual(len(frags),3) 802 self.failUnlessEqual(frags[0].GetNumAtoms(),7) 803 self.failUnlessEqual(frags[1].GetNumAtoms(),3) 804 self.failUnlessEqual(frags[2].GetNumAtoms(),7) 805 806 c1 = m.GetConformer() 807 c2 = frags[0].GetConformer() 808 for i in range(6): 809 p1 = c1.GetAtomPosition(i) 810 p2 = c2.GetAtomPosition(i) 811 self.failUnlessEqual((p1-p2).Length(),0.0) 812 p1 = c1.GetAtomPosition(6) 813 p2 = c2.GetAtomPosition(6) 814 self.failUnlessEqual((p1-p2).Length(),0.0) 815 816 c2 = frags[2].GetConformer() 817 for i in range(6): 818 p1 = c1.GetAtomPosition(i+7) 819 p2 = c2.GetAtomPosition(i) 820 self.failUnlessEqual((p1-p2).Length(),0.0) 821 p1 = c1.GetAtomPosition(6) 822 p2 = c2.GetAtomPosition(6) 823 self.failUnlessEqual((p1-p2).Length(),0.0) 824 825 c2 = frags[1].GetConformer() 826 for i in range(1): 827 p1 = c1.GetAtomPosition(i+6) 828 p2 = c2.GetAtomPosition(i) 829 self.failUnlessEqual((p1-p2).Length(),0.0) 830 p1 = c1.GetAtomPosition(7) 831 p2 = c2.GetAtomPosition(1) 832 self.failUnlessEqual((p1-p2).Length(),0.0) 833 p1 = c1.GetAtomPosition(5) 834 p2 = c2.GetAtomPosition(2) 835 self.failUnlessEqual((p1-p2).Length(),0.0) 836 837 838 # make sure multiple conformations (include 2D) also work: 839 molblock=""" 840 RDKit 2D 841 842 13 14 0 0 0 0 0 0 0 0999 V2000 843 -1.2990 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 844 -2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 845 -3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 846 -3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 847 -2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 848 -1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 849 -0.0000 1.3846 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 850 1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 851 1.2990 -0.8654 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 852 2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 853 3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 854 3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 855 2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 856 1 2 2 0 857 2 3 1 0 858 3 4 2 0 859 4 5 1 0 860 5 6 2 0 861 6 7 1 0 862 7 8 1 0 863 8 9 2 0 864 9 10 1 0 865 10 11 2 0 866 11 12 1 0 867 12 13 2 0 868 6 1 1 0 869 13 8 1 0 870 M END 871 """ 872 m2 = Chem.MolFromMolBlock(molblock) 873 m.AddConformer(m2.GetConformer(),assignId=True) 874 self.failUnlessEqual(m.GetNumConformers(),2) 875 876 pieces = BreakBRICSBonds(m) 877 frags = Chem.GetMolFrags(pieces,asMols=True) 878 self.failUnlessEqual(len(frags),3) 879 self.failUnlessEqual(frags[0].GetNumAtoms(),7) 880 self.failUnlessEqual(frags[1].GetNumAtoms(),3) 881 self.failUnlessEqual(frags[2].GetNumAtoms(),7) 882 self.failUnlessEqual(frags[0].GetNumConformers(),2) 883 self.failUnlessEqual(frags[1].GetNumConformers(),2) 884 self.failUnlessEqual(frags[2].GetNumConformers(),2) 885 886 c1 = m.GetConformer(0) 887 c2 = frags[0].GetConformer(0) 888 for i in range(6): 889 p1 = c1.GetAtomPosition(i) 890 p2 = c2.GetAtomPosition(i) 891 self.failUnlessEqual((p1-p2).Length(),0.0) 892 p1 = c1.GetAtomPosition(6) 893 p2 = c2.GetAtomPosition(6) 894 self.failUnlessEqual((p1-p2).Length(),0.0) 895 896 c2 = frags[2].GetConformer(0) 897 for i in range(6): 898 p1 = c1.GetAtomPosition(i+7) 899 p2 = c2.GetAtomPosition(i) 900 self.failUnlessEqual((p1-p2).Length(),0.0) 901 p1 = c1.GetAtomPosition(6) 902 p2 = c2.GetAtomPosition(6) 903 self.failUnlessEqual((p1-p2).Length(),0.0) 904 905 c2 = frags[1].GetConformer(0) 906 for i in range(1): 907 p1 = c1.GetAtomPosition(i+6) 908 p2 = c2.GetAtomPosition(i) 909 self.failUnlessEqual((p1-p2).Length(),0.0) 910 p1 = c1.GetAtomPosition(7) 911 p2 = c2.GetAtomPosition(1) 912 self.failUnlessEqual((p1-p2).Length(),0.0) 913 p1 = c1.GetAtomPosition(5) 914 p2 = c2.GetAtomPosition(2) 915 self.failUnlessEqual((p1-p2).Length(),0.0) 916 917 c1 = m.GetConformer(1) 918 c2 = frags[0].GetConformer(1) 919 for i in range(6): 920 p1 = c1.GetAtomPosition(i) 921 p2 = c2.GetAtomPosition(i) 922 self.failUnlessEqual((p1-p2).Length(),0.0) 923 p1 = c1.GetAtomPosition(6) 924 p2 = c2.GetAtomPosition(6) 925 self.failUnlessEqual((p1-p2).Length(),0.0) 926 927 c2 = frags[2].GetConformer(1) 928 for i in range(6): 929 p1 = c1.GetAtomPosition(i+7) 930 p2 = c2.GetAtomPosition(i) 931 self.failUnlessEqual((p1-p2).Length(),0.0) 932 p1 = c1.GetAtomPosition(6) 933 p2 = c2.GetAtomPosition(6) 934 self.failUnlessEqual((p1-p2).Length(),0.0) 935 936 c2 = frags[1].GetConformer(1) 937 for i in range(1): 938 p1 = c1.GetAtomPosition(i+6) 939 p2 = c2.GetAtomPosition(i) 940 self.failUnlessEqual((p1-p2).Length(),0.0) 941 p1 = c1.GetAtomPosition(7) 942 p2 = c2.GetAtomPosition(1) 943 self.failUnlessEqual((p1-p2).Length(),0.0) 944 p1 = c1.GetAtomPosition(5) 945 p2 = c2.GetAtomPosition(2) 946 self.failUnlessEqual((p1-p2).Length(),0.0)
947
948 - def test12(self):
949 m = Chem.MolFromSmiles('CCS(=O)(=O)NCC') 950 res=list(FindBRICSBonds(m)) 951 self.failUnlessEqual(len(res),2,res) 952 atIds = [x[0] for x in res] 953 atIds.sort() 954 self.failUnlessEqual(atIds,[(5,2), (6,5)])
955 956 957 958 959 960 failed,tried = _test() 961 if failed: 962 sys.exit(failed) 963 964 unittest.main() 965