55 std::string &compressionFormat) {
60 if (boost::algorithm::iends_with(path,
".maegz")) {
62 compressionFormat =
"gz";
64 }
else if (boost::algorithm::iends_with(path,
".sdfgz")) {
66 compressionFormat =
"gz";
68 }
else if (boost::algorithm::iends_with(path,
".gz")) {
69 compressionFormat =
"gz";
70 basename = path.substr(0, path.size() - 3);
71 }
else if (boost::algorithm::iends_with(path,
".zst") ||
72 boost::algorithm::iends_with(path,
".bz2") ||
73 boost::algorithm::iends_with(path,
".7z")) {
75 "Unsupported compression extension (.zst, .bz2, .7z) given path: " +
79 compressionFormat =
"";
82 if (boost::algorithm::iends_with(basename,
"." + suffix)) {
88 "Unsupported structure or compression extension given path: " + path);
100 std::string fileFormat =
"";
101 std::string compressionFormat =
"";
106 if (compressionFormat.empty()) {
107 strm =
new std::ifstream(path.c_str(), std::ios::in | std::ios::binary);
109#ifdef RDK_USE_BOOST_IOSTREAMS
110 strm =
new gzstream(path);
113 "compressed files are only supported if the RDKit is built with boost::iostreams support");
117 if ((!(*strm)) || strm->bad()) {
118 std::ostringstream errout;
119 errout <<
"Bad input file " << path;
124 if (strm->bad() || strm->eof()) {
125 std::ostringstream errout;
126 errout <<
"Invalid input file " << path;
131#ifdef RDK_BUILD_THREADSAFE_SSS
132 FileParsers::MultithreadedMolSupplier::Parameters params;
136 if (fileFormat ==
"sdf") {
137 FileParsers::MolFileParserParams parseParams;
138 parseParams.sanitize = opt.
sanitize;
139 parseParams.removeHs = opt.
removeHs;
141#ifdef RDK_BUILD_THREADSAFE_SSS
142 if (params.numWriterThreads > 1) {
143 return std::make_unique<FileParsers::MultithreadedSDMolSupplier>(
144 strm,
true, params, parseParams);
147 return std::make_unique<FileParsers::ForwardSDMolSupplier>(strm,
true,
151 else if (fileFormat ==
"smi" || fileFormat ==
"csv" || fileFormat ==
"txt" ||
152 fileFormat ==
"tsv") {
153 FileParsers::SmilesMolSupplierParams parseParams;
158 parseParams.parseParameters.sanitize = opt.
sanitize;
159#ifdef RDK_BUILD_THREADSAFE_SSS
160 if (params.numWriterThreads > 1) {
161 return std::make_unique<FileParsers::MultithreadedSmilesMolSupplier>(
162 strm,
true, params, parseParams);
165 return std::make_unique<FileParsers::SmilesMolSupplier>(strm,
true,
168#ifdef RDK_BUILD_MAEPARSER_SUPPORT
169 else if (fileFormat ==
"mae") {
170 FileParsers::MaeMolSupplierParams parseParams;
171 parseParams.sanitize = opt.
sanitize;
172 parseParams.removeHs = opt.
removeHs;
173 return std::make_unique<FileParsers::MaeMolSupplier>(strm,
true,
177 else if (fileFormat ==
"tdt") {
178 FileParsers::TDTMolSupplierParams parseParams;
180 parseParams.confId2D = opt.
confId2D;
181 parseParams.confId3D = opt.
confId3D;
182 parseParams.parseParameters.sanitize = opt.
sanitize;
183 return std::make_unique<FileParsers::TDTMolSupplier>(strm,
true,