1
2
3
4
5
6
7
8
9
10 from rdkit import Chem
11 from rdkit.Chem import AllChem
12 from rdkit.Chem import Lipinski,Descriptors,Crippen
13 from rdkit.Dbase.DbConnection import DbConnect
14 from rdkit.Dbase import DbModule
15 import re
16
17
18 import rdkit.RDLogger as logging
19 logger = logging.logger()
20 logger.setLevel(logging.INFO)
21
22 -def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
23 redraw=False,keepHs=False,
24 skipProps=False,addComputedProps=False,
25 skipSmiles=False,
26 uniqNames=None,namesSeen=None):
82
83 -def ConvertRows(rows,globalProps,defaultVal,skipSmiles):
84 for i,row in enumerate(rows):
85 newRow = [row[0],row[1]]
86 pD=row[-1]
87 for pn in globalProps:
88 pv = pD.get(pn,defaultVal)
89 newRow.append(pv)
90 newRow.append(row[2])
91 if not skipSmiles:
92 newRow.append(row[3])
93 rows[i] = newRow
94
95 -def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False,
96 redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False,
97 regName='molecules',skipSmiles=False,maxRowsCached=-1,
98 uniqNames=False,addComputedProps=False,lazySupplier=False):
99 if not lazySupplier:
100 nMols = len(suppl)
101 else:
102 nMols=-1
103 if not silent:
104 logger.info("Generating molecular database in file %s"%dbName)
105 if not lazySupplier:
106 logger.info(" Processing %d molecules"%nMols)
107 rows = []
108 globalProps = {}
109 namesSeen = set()
110 nDone = 0
111 typeConversions={0:('varchar',str),1:('float',float),2:('int',int)}
112 for m in suppl:
113 nDone +=1
114 if not m:
115 if errorsTo:
116 if hasattr(suppl,'GetItemText'):
117 d = suppl.GetItemText(nDone-1)
118 errorsTo.write(d)
119 else:
120 logger.warning('full error file support not complete')
121 continue
122
123 row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp,
124 nameCol=nameCol,redraw=redraw,
125 keepHs=keepHs,skipProps=skipProps,
126 addComputedProps=addComputedProps,skipSmiles=skipSmiles,
127 uniqNames=uniqNames,namesSeen=namesSeen)
128 if row is None: continue
129 rows.append([nDone]+row)
130 if not silent and not nDone%100:
131 logger.info(' done %d'%nDone)
132 if len(rows)==maxRowsCached:
133 break
134
135 nameDef='%s varchar not null'%nameCol
136 if uniqNames:
137 nameDef += ' unique'
138 typs = ['guid integer not null primary key',nameDef]
139 pns = []
140 for pn,v in globalProps.iteritems():
141 addNm = re.sub(r'[\W]','_',pn)
142 typs.append('%s %s'%(addNm,typeConversions[v][0]))
143 pns.append(pn.lower())
144
145 if not skipSmiles:
146 if 'smiles' not in pns:
147 typs.append('smiles varchar')
148 else:
149 typs.append('cansmiles varchar')
150 typs.append('molpkl %s'%(DbModule.binaryTypeName))
151 conn = DbConnect(dbName)
152 curs = conn.GetCursor()
153 try:
154 curs.execute('drop table %s'%regName)
155 except:
156 pass
157 curs.execute('create table %s (%s)'%(regName,','.join(typs)))
158 qs = ','.join([DbModule.placeHolder for x in typs])
159
160
161 ConvertRows(rows,globalProps,defaultVal,skipSmiles)
162 curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
163 conn.Commit()
164
165 rows = []
166 while 1:
167 nDone +=1
168 try:
169 m = suppl.next()
170 except StopIteration:
171 break
172 if not m:
173 if errorsTo:
174 if hasattr(suppl,'GetItemText'):
175 d = suppl.GetItemText(nDone-1)
176 errorsTo.write(d)
177 else:
178 logger.warning('full error file support not complete')
179 continue
180 tmpProps={}
181 row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp,
182 nameCol=nameCol,redraw=redraw,
183 keepHs=keepHs,skipProps=skipProps,
184 addComputedProps=addComputedProps,skipSmiles=skipSmiles,
185 uniqNames=uniqNames,namesSeen=namesSeen)
186 if not row: continue
187 rows.append([nDone]+row)
188 if not silent and not nDone%100:
189 logger.info(' done %d'%nDone)
190 if len(rows)==maxRowsCached:
191 ConvertRows(rows,globalProps,defaultVal,skipSmiles)
192 curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
193 conn.Commit()
194 rows = []
195 if len(rows):
196 ConvertRows(rows,globalProps,defaultVal,skipSmiles)
197 curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
198 conn.Commit()
199