1
2
3
4
5
6
7
8
9
10 import sqlalchemy
11
12 from rdkit import Chem
13 from rdkit.Chem import AllChem
14 from rdkit.Chem import Lipinski,Descriptors,Crippen
15 from rdkit.Dbase.DbConnection import DbConnect
16 from rdkit.Dbase import DbModule
17 import os
18
19 from sqlalchemy.ext.declarative import declarative_base
20 from sqlalchemy import Table,Column,MetaData
21 from sqlalchemy import Integer,Text,String,ForeignKey,Binary,DateTime,Float
22 from sqlalchemy.orm import relation,mapper,sessionmaker,backref
23 from sqlalchemy import create_engine
24
25 decBase = declarative_base()
26
31
32
34 engine = create_engine(dbUrl,echo=echo)
35 decBase.metadata.create_all(engine)
36 maker = sessionmaker(bind=engine)
37 return maker
38 ConnectToSchema=RegisterSchema
39
40
41 import rdkit.RDLogger as logging
42 logger = logging.logger()
43 logger.setLevel(logging.INFO)
44
45 -def ProcessMol(session,mol,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
46 redraw=False,keepHs=False,
47 skipProps=False,addComputedProps=False,
48 skipSmiles=False):
85
86 -def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False,
87 redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False,
88 regName='molecules',skipSmiles=False,maxRowsCached=-1,
89 uniqNames=False,addComputedProps=False,lazySupplier=False,
90 numForPropScan=10,startAnew=True):
91 if not lazySupplier:
92 nMols = len(suppl)
93 else:
94 nMols=-1
95 if not silent:
96 logger.info("Generating molecular database in file %s"%dbName)
97 if not lazySupplier:
98 logger.info(" Processing %d molecules"%nMols)
99
100 globalProps = {}
101 if startAnew:
102 if os.path.exists(dbName):
103 os.unlink(dbName)
104 sIter=iter(suppl)
105 setattr(Compound,nameCol.lower(),Column(nameCol.lower(),String,default=defaultVal,unique=uniqNames))
106 if not skipSmiles:
107 Compound.smiles = Column(Text,unique=True)
108 if not skipProps:
109 while numForPropScan>0:
110 try:
111 m = sIter.next()
112 except StopIteration:
113 numForPropScan=0
114 break
115 if not m: continue
116 for pn in m.GetPropNames():
117 if pn.lower()==nameCol.lower(): continue
118 if not globalProps.has_key(pn):
119 globalProps[pn]=1
120 setattr(Compound,pn.lower(),Column(pn.lower(),String,default=defaultVal))
121 numForPropScan-=1
122 if addComputedProps:
123 Compound.DonorCount=Column(Integer)
124 Compound.AcceptorCount=Column(Integer)
125 Compound.RotatableBondCount=Column(Integer)
126 Compound.AMW=Column(Float)
127 Compound.MolLogP=Column(Float)
128 session=RegisterSchema('sqlite:///%s'%(dbName))()
129 else:
130 raise NotImplementedError,'updating existing databases is not yet supported'
131
132
133 nDone = 0
134 cache=[]
135 for m in suppl:
136 nDone +=1
137 if not m:
138 if errorsTo:
139 if hasattr(suppl,'GetItemText'):
140 d = suppl.GetItemText(nDone-1)
141 errorsTo.write(d)
142 else:
143 logger.warning('full error file support not complete')
144 continue
145
146 cmpd=ProcessMol(session,m,globalProps,nDone,nameProp=nameProp,
147 nameCol=nameCol,redraw=redraw,
148 keepHs=keepHs,skipProps=skipProps,
149 addComputedProps=addComputedProps,skipSmiles=skipSmiles)
150 if cmpd is not None:
151 cache.append(cmpd)
152
153 if not silent and not nDone%100:
154 logger.info(' done %d'%nDone)
155 try:
156 session.commit()
157 except:
158 session.rollback()
159 for cmpd in cache:
160 try:
161 session.add(cmpd)
162 session.commit()
163 except:
164 session.rollback()
165 cache=[]
166
167
168 try:
169 session.commit()
170 except:
171 session.rollback()
172 for cmpd in cache:
173 try:
174 session.add(cmpd)
175 session.commit()
176 except:
177 session.rollback()
178
179 if __name__=='__main__':
180 import sys
181 sdf =Chem.SDMolSupplier(sys.argv[1])
182 db =sys.argv[2]
183 LoadDb(sdf,db,addComputedProps=False)
184 session = RegisterSchema('sqlite:///%s'%(db))()
185 print '>>>>', len(session.query(Compound).all())
186