10#ifndef GENERAL_FILE_READER_H
11#define GENERAL_FILE_READER_H
13#include <RDStreams/streams.h>
15#include <boost/algorithm/string.hpp>
26namespace GeneralMolSupplier {
46 "sdf",
"mae",
"maegz",
"sdfgz",
"smi",
"csv",
"txt",
"tsv",
"tdt"};
60 if (boost::algorithm::iends_with(
path,
".maegz")) {
64 }
else if (boost::algorithm::iends_with(
path,
".sdfgz")) {
68 }
else if (boost::algorithm::iends_with(
path,
".gz")) {
71 }
else if (boost::algorithm::iends_with(
path,
".zst") ||
72 boost::algorithm::iends_with(
path,
".bz2") ||
73 boost::algorithm::iends_with(
path,
".7z")) {
75 "Unsupported compression extension (.zst, .bz2, .7z) given path: " +
88 "Unsupported structure or compression extension given path: " +
path);
107 strm =
new std::ifstream(
path.c_str(), std::ios::in | std::ios::binary);
109#ifdef RDK_USE_BOOST_IOSTREAMS
113 "compressed files are only supported if the RDKit is built with boost::iostreams support");
119#ifdef RDK_BUILD_THREADSAFE_SSS
120 if (
opt.numWriterThreads > 0) {
123 opt.numWriterThreads);
124 std::unique_ptr<MolSupplier> p(
sdsup);
130 std::unique_ptr<MolSupplier> p(
sdsup);
136#ifdef RDK_BUILD_THREADSAFE_SSS
137 if (
opt.numWriterThreads > 0) {
141 opt.titleLine,
opt.sanitize,
opt.numWriterThreads);
142 std::unique_ptr<MolSupplier> p(
smsup);
148 opt.nameColumn,
opt.titleLine,
opt.sanitize);
149 std::unique_ptr<MolSupplier> p(
smsup);
152#ifdef RDK_BUILD_MAEPARSER_SUPPORT
156 std::unique_ptr<MolSupplier> p(
maesup);
163 std::unique_ptr<MolSupplier> p(
tdtsup);
used by various file parsing classes to indicate a bad file
lazy file parser for Smiles tables
lazy file parser for TDT files
const std::vector< std::string > supportedCompressionFormats
current supported compression formats
const std::vector< std::string > supportedFileFormats
current supported file formats
void determineFormat(const std::string path, std::string &fileFormat, std::string &compressionFormat)
std::unique_ptr< MolSupplier > getSupplier(const std::string &path, const struct SupplierOptions &opt)
bool rdvalue_is(const RDValue_cast_t)
unsigned int numWriterThreads