Package rdkit :: Package Chem :: Package MolDb :: Module Loader_orig
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolDb.Loader_orig

  1  # $Id: Loader_orig.py 1528 2010-09-26 17:04:37Z glandrum $ 
  2  # 
  3  #  Copyright (C) 2007-2008 Greg Landrum 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  from rdkit import Chem 
 11  from rdkit.Chem import AllChem 
 12  from rdkit.Chem import Lipinski,Descriptors,Crippen 
 13  from rdkit.Dbase.DbConnection import DbConnect 
 14  from rdkit.Dbase import DbModule 
 15  import re 
 16   
 17  #set up the logger: 
 18  import rdkit.RDLogger as logging 
 19  logger = logging.logger() 
 20  logger.setLevel(logging.INFO) 
 21   
22 -def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id', 23 redraw=False,keepHs=False, 24 skipProps=False,addComputedProps=False, 25 skipSmiles=False, 26 uniqNames=None,namesSeen=None):
27 if not mol: 28 raise ValueError,'no molecule' 29 if keepHs: 30 Chem.SanitizeMol(mol) 31 try: 32 nm = mol.GetProp(nameProp) 33 except KeyError: 34 nm = None 35 if not nm: 36 nm = 'Mol_%d'%nDone 37 if uniqNames and nm in namesSeen: 38 logger.error('duplicate compound id (%s) encountered. second instance skipped.'%nm) 39 return None 40 namesSeen.add(nm) 41 row = [nm] 42 if not skipProps: 43 if addComputedProps: 44 nHD=Lipinski.NumHDonors(mol) 45 mol.SetProp('DonorCount',str(nHD)) 46 nHA=Lipinski.NumHAcceptors(mol) 47 mol.SetProp('AcceptorCount',str(nHA)) 48 nRot=Lipinski.NumRotatableBonds(mol) 49 mol.SetProp('RotatableBondCount',str(nRot)) 50 MW=Descriptors.MolWt(mol) 51 mol.SetProp('AMW',str(MW)) 52 logp=Crippen.MolLogP(mol) 53 mol.SetProp('MolLogP',str(logp)) 54 55 pns = list(mol.GetPropNames()) 56 pD={} 57 for pi,pn in enumerate(pns): 58 if pn.lower()==nameCol.lower(): continue 59 pv = mol.GetProp(pn).strip() 60 if pv.find('>')<0 and pv.find('<')<0: 61 colTyp = globalProps.get(pn,2) 62 while colTyp>0: 63 try: 64 tpi = typeConversions[colTyp][1](pv) 65 except: 66 colTyp-=1 67 else: 68 break 69 globalProps[pn]=colTyp 70 pD[pn]=typeConversions[colTyp][1](pv) 71 else: 72 pD[pn]=pv 73 else: 74 pD={} 75 if redraw: 76 AllChem.Compute2DCoords(m) 77 if not skipSmiles: 78 row.append(Chem.MolToSmiles(mol,True)) 79 row.append(DbModule.binaryHolder(mol.ToBinary())) 80 row.append(pD) 81 return row
82
83 -def ConvertRows(rows,globalProps,defaultVal,skipSmiles):
84 for i,row in enumerate(rows): 85 newRow = [row[0],row[1]] 86 pD=row[-1] 87 for pn in globalProps: 88 pv = pD.get(pn,defaultVal) 89 newRow.append(pv) 90 newRow.append(row[2]) 91 if not skipSmiles: 92 newRow.append(row[3]) 93 rows[i] = newRow
94
95 -def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False, 96 redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False, 97 regName='molecules',skipSmiles=False,maxRowsCached=-1, 98 uniqNames=False,addComputedProps=False,lazySupplier=False):
99 if not lazySupplier: 100 nMols = len(suppl) 101 else: 102 nMols=-1 103 if not silent: 104 logger.info("Generating molecular database in file %s"%dbName) 105 if not lazySupplier: 106 logger.info(" Processing %d molecules"%nMols) 107 rows = [] 108 globalProps = {} 109 namesSeen = set() 110 nDone = 0 111 typeConversions={0:('varchar',str),1:('float',float),2:('int',int)} 112 for m in suppl: 113 nDone +=1 114 if not m: 115 if errorsTo: 116 if hasattr(suppl,'GetItemText'): 117 d = suppl.GetItemText(nDone-1) 118 errorsTo.write(d) 119 else: 120 logger.warning('full error file support not complete') 121 continue 122 123 row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp, 124 nameCol=nameCol,redraw=redraw, 125 keepHs=keepHs,skipProps=skipProps, 126 addComputedProps=addComputedProps,skipSmiles=skipSmiles, 127 uniqNames=uniqNames,namesSeen=namesSeen) 128 if row is None: continue 129 rows.append([nDone]+row) 130 if not silent and not nDone%100: 131 logger.info(' done %d'%nDone) 132 if len(rows)==maxRowsCached: 133 break 134 135 nameDef='%s varchar not null'%nameCol 136 if uniqNames: 137 nameDef += ' unique' 138 typs = ['guid integer not null primary key',nameDef] 139 pns = [] 140 for pn,v in globalProps.iteritems(): 141 addNm = re.sub(r'[\W]','_',pn) 142 typs.append('%s %s'%(addNm,typeConversions[v][0])) 143 pns.append(pn.lower()) 144 145 if not skipSmiles: 146 if 'smiles' not in pns: 147 typs.append('smiles varchar') 148 else: 149 typs.append('cansmiles varchar') 150 typs.append('molpkl %s'%(DbModule.binaryTypeName)) 151 conn = DbConnect(dbName) 152 curs = conn.GetCursor() 153 try: 154 curs.execute('drop table %s'%regName) 155 except: 156 pass 157 curs.execute('create table %s (%s)'%(regName,','.join(typs))) 158 qs = ','.join([DbModule.placeHolder for x in typs]) 159 160 161 ConvertRows(rows,globalProps,defaultVal,skipSmiles) 162 curs.executemany('insert into %s values (%s)'%(regName,qs),rows) 163 conn.Commit() 164 165 rows = [] 166 while 1: 167 nDone +=1 168 try: 169 m = suppl.next() 170 except StopIteration: 171 break 172 if not m: 173 if errorsTo: 174 if hasattr(suppl,'GetItemText'): 175 d = suppl.GetItemText(nDone-1) 176 errorsTo.write(d) 177 else: 178 logger.warning('full error file support not complete') 179 continue 180 tmpProps={} 181 row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp, 182 nameCol=nameCol,redraw=redraw, 183 keepHs=keepHs,skipProps=skipProps, 184 addComputedProps=addComputedProps,skipSmiles=skipSmiles, 185 uniqNames=uniqNames,namesSeen=namesSeen) 186 if not row: continue 187 rows.append([nDone]+row) 188 if not silent and not nDone%100: 189 logger.info(' done %d'%nDone) 190 if len(rows)==maxRowsCached: 191 ConvertRows(rows,globalProps,defaultVal,skipSmiles) 192 curs.executemany('insert into %s values (%s)'%(regName,qs),rows) 193 conn.Commit() 194 rows = [] 195 if len(rows): 196 ConvertRows(rows,globalProps,defaultVal,skipSmiles) 197 curs.executemany('insert into %s values (%s)'%(regName,qs),rows) 198 conn.Commit()
199