11#ifndef RD_FILEPARSERUTILS_H
12#define RD_FILEPARSERUTILS_H
17#include <boost/lexical_cast.hpp>
18#include <boost/algorithm/string.hpp>
19#include <boost/format.hpp>
30 std::string_view orig, std::string stripChars =
" \t\r\n") {
31 std::string_view res = orig;
32 auto start = res.find_first_not_of(stripChars);
33 if (start != std::string_view::npos) {
34 auto end = res.find_last_not_of(stripChars) + 1;
35 res = res.substr(start, end - start);
44 auto trimmed =
strip(input,
" ");
45 if (acceptSpaces && trimmed.empty()) {
48 return boost::lexical_cast<T>(trimmed);
56 bool acceptSpaces =
true);
58 bool acceptSpaces =
true);
60 bool acceptSpaces =
true);
62 bool acceptSpaces =
true);
64 bool acceptSpaces =
true);
66 bool acceptSpaces =
true);
70 const ROMol &tmol,
const boost::dynamic_bitset<> &wasAromatic,
71 int confId = -1,
unsigned int precision = 6);
74 unsigned int precision = 6) {
75 boost::dynamic_bitset<> wasAromatic(tmol.
getNumBonds());
76 return getV3000CTAB(tmol, wasAromatic, confId, precision);
84 std::istream *inStream,
unsigned int &line,
RWMol *mol,
Conformer *&conf,
85 bool &chiralityPossible,
unsigned int &nAtoms,
unsigned int &nBonds,
86 bool strictParsing =
true,
bool expectMEND =
true,
87 bool expectMacroAtoms =
false);
91 std::istream *inStream,
unsigned int &line,
RWMol *mol,
Conformer *&conf,
92 bool &chiralityPossible,
unsigned int &nAtoms,
unsigned int &nBonds,
93 bool strictParsing =
true);
98 RWMol *res,
bool chiralityPossible,
102 bool sanitize,
bool removeHs) {
113template <
typename T,
typename U>
115 const std::string &prefix,
116 const std::string &missingValueMarker,
size_t nItems,
118 std::string itempn = pn.substr(prefix.size());
119 std::string strVect = mol.
getProp<std::string>(pn);
120 std::vector<std::string> tokens;
121 boost::split(tokens, strVect, boost::is_any_of(
" \t\n"),
122 boost::token_compress_on);
123 std::string mv = missingValueMarker;
124 size_t first_token = 0;
125 if (tokens.size() == nItems + 1 && tokens[0].front() ==
'[' &&
126 tokens[0].back() ==
']') {
127 mv = std::string(tokens[0].begin() + 1, tokens[0].end() - 1);
132 <<
" is empty." << std::endl;
134 if(tokens.size() - first_token != nItems) {
136 << tokens.size() <<
" elements found; expecting "
137 << nItems <<
". Ignoring it." << std::endl;
140 for (
size_t i = first_token; i < tokens.size(); ++i) {
141 if (tokens[i] != mv) {
142 unsigned int itemid = i - first_token;
144 T apv = boost::lexical_cast<T>(tokens[i]);
145 getter(itemid)->setProp(itempn, apv);
146 }
catch (
const boost::bad_lexical_cast &) {
148 <<
"Value " << tokens[i] <<
" for property " << pn <<
" of item "
149 << itemid <<
" can not be parsed. Ignoring it." << std::endl;
157[[deprecated(
"use applyMolListProp instead")]]
159 const std::string &prefix,
160 const std::string &missingValueMarker =
"n/a") {
161 auto getter = [&mol](
size_t which) {
return mol.
getAtomWithIdx(which); };
166template <
typename T,
typename U>
168 U getter,
const std::string missingValueMarker =
"n/a") {
170 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
179[[deprecated(
"use applyMolListProps instead")]]
181 const std::string missingValueMarker =
"n/a") {
182 auto getter = [&mol](
size_t which) {
return mol.
getAtomWithIdx(which); };
185 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
201 ROMol &mol,
const std::string &pn,
202 const std::string &missingValueMarker =
"n/a") {
203 auto propSetter = [&](
const std::string &propPrefix,
auto getter,
205 std::string prefix = propPrefix +
"prop.";
206 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
210 prefix = propPrefix +
"iprop.";
211 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
215 prefix = propPrefix +
"dprop.";
216 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
220 prefix = propPrefix +
"bprop.";
221 if (pn.find(prefix) == 0 && pn.length() > prefix.length()) {
246 ROMol &mol,
const std::string &missingValueMarker =
"n/a") {
254template <
typename T,
typename U>
256 std::string missingValueMarker =
"",
260 if (!missingValueMarker.empty()) {
261 propVal += boost::str(boost::format(
"[%s] ") % missingValueMarker);
263 missingValueMarker =
"n/a";
265 for (
const auto item : getter()) {
266 std::string apVal = missingValueMarker;
268 if (item->getPropIfPresent(propName, tVal)) {
269 apVal = boost::lexical_cast<std::string>(tVal);
271 if (propVal.length() + apVal.length() + 1 >= lineSize) {
274 res += propVal +
"\n";
277 propVal += apVal +
" ";
279 if (!propVal.empty()) {
288[[deprecated(
"use getPropertyList() instead")]]
290 std::string missingValueMarker =
"",
293 missingValueMarker, lineSize);
296template <
typename T,
typename U>
298 const std::string &typeMarker,
299 const std::string &propName,
300 const std::string &missingValueMarker =
"",
302 std::string molPropName = prefix +
"." + typeMarker +
"." + propName;
304 missingValueMarker, lineSize));
308 ROMol &mol,
const std::string &atomPropName,
309 const std::string &missingValueMarker =
"",
312 mol, [&mol]() {
return mol.
atoms(); },
"atom",
"iprop", atomPropName,
313 missingValueMarker, lineSize);
316 ROMol &mol,
const std::string &atomPropName,
317 const std::string &missingValueMarker =
"",
320 mol, [&mol]() {
return mol.
atoms(); },
"atom",
"dprop", atomPropName,
321 missingValueMarker, lineSize);
324 ROMol &mol,
const std::string &atomPropName,
325 const std::string &missingValueMarker =
"",
328 mol, [&mol]() {
return mol.
atoms(); },
"atom",
"bprop", atomPropName,
329 missingValueMarker, lineSize);
332 ROMol &mol,
const std::string &atomPropName,
333 const std::string &missingValueMarker =
"",
336 mol, [&mol]() {
return mol.
atoms(); },
"atom",
"prop", atomPropName,
337 missingValueMarker, lineSize);
341 ROMol &mol,
const std::string &bondPropName,
342 const std::string &missingValueMarker =
"",
345 mol, [&mol]() {
return mol.
bonds(); },
"bond",
"iprop", bondPropName,
346 missingValueMarker, lineSize);
349 ROMol &mol,
const std::string &bondPropName,
350 const std::string &missingValueMarker =
"",
353 mol, [&mol]() {
return mol.
bonds(); },
"bond",
"dprop", bondPropName,
354 missingValueMarker, lineSize);
357 ROMol &mol,
const std::string &bondPropName,
358 const std::string &missingValueMarker =
"",
361 mol, [&mol]() {
return mol.
bonds(); },
"bond",
"bprop", bondPropName,
362 missingValueMarker, lineSize);
365 ROMol &mol,
const std::string &bondPropName,
366 const std::string &missingValueMarker =
"",
369 mol, [&mol]() {
return mol.
bonds(); },
"bond",
"prop", bondPropName,
370 missingValueMarker, lineSize);
#define BOOST_LOG(__arg__)
RDKIT_RDGENERAL_EXPORT RDLogger rdWarningLog
The class for representing atoms.
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
STR_VECT getPropList(bool includePrivate=true, bool includeComputed=true) const
returns a list with the names of our properties
unsigned int getNumBonds(bool onlyHeavy=1) const
returns our number of Bonds
Atom * getAtomWithIdx(unsigned int idx)
returns a pointer to a particular Atom
unsigned int getNumAtoms() const
returns our number of atoms
CXXAtomIterator< MolGraph, Atom * > atoms()
C++11 Range iterator.
CXXBondIterator< MolGraph, Bond * > bonds()
Bond * getBondWithIdx(unsigned int idx)
returns a pointer to a particular Bond
RWMol is a molecule class that is intended to be edited.
#define RDKIT_FILEPARSERS_EXPORT
RDKIT_FILEPARSERS_EXPORT void moveAdditionalPropertiesToSGroups(RWMol &mol)
void createAtomDoublePropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
RDKIT_FILEPARSERS_EXPORT bool ParseV3000CTAB(std::istream *inStream, unsigned int &line, RWMol *mol, Conformer *&conf, bool &chiralityPossible, unsigned int &nAtoms, unsigned int &nBonds, bool strictParsing=true, bool expectMEND=true, bool expectMacroAtoms=false)
RDKIT_FILEPARSERS_EXPORT std::string getV3000CTAB(const ROMol &tmol, const boost::dynamic_bitset<> &wasAromatic, int confId=-1, unsigned int precision=6)
RDKIT_FILEPARSERS_EXPORT double toDouble(const std::string &input, bool acceptSpaces=true)
RDKIT_FILEPARSERS_EXPORT int toInt(const std::string &input, bool acceptSpaces=true)
void createBondDoublePropertyList(ROMol &mol, const std::string &bondPropName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
void createPropertyList(ROMol &mol, U getter, const std::string &prefix, const std::string &typeMarker, const std::string &propName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
static constexpr std::string_view bondPropPrefixView
RDKIT_FILEPARSERS_EXPORT Atom * replaceAtomWithQueryAtom(RWMol *mol, Atom *atom)
Deprecated, please use QueryOps::replaceAtomWithQueryAtom instead.
static constexpr unsigned int DEFAULT_LINESIZE
T stripSpacesAndCast(std::string_view input, bool acceptSpaces=false)
void applyMolListPropToAtoms(ROMol &mol, const std::string &pn, const std::string &prefix, const std::string &missingValueMarker="n/a")
applies a particular property to the atoms as an atom property list
void createAtomStringPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
void createAtomBoolPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
void applyMolListProp(ROMol &mol, const std::string &pn, const std::string &prefix, const std::string &missingValueMarker, size_t nItems, U getter)
applies a particular property to the atoms as an atom property list
static constexpr size_t bondPropPrefixLength
std::string getAtomPropertyList(ROMol &mol, const std::string &atomPropName, std::string missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
void createAtomIntPropertyList(ROMol &mol, const std::string &atomPropName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
static const std::string bondPropPrefix
RDKIT_FILEPARSERS_EXPORT void finishMolProcessing(RWMol *res, bool chiralityPossible, const v2::FileParsers::MolFileParserParams &ps)
RDKIT_FILEPARSERS_EXPORT std::string_view strip(std::string_view orig, std::string stripChars=" \t\r\n")
void createBondBoolPropertyList(ROMol &mol, const std::string &bondPropName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
void applyMolListPropsToAtoms(ROMol &mol, const std::string &prefix, const std::string missingValueMarker="n/a")
void createBondIntPropertyList(ROMol &mol, const std::string &bondPropName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
static constexpr std::string_view atomPropPrefixView
void processMolPropertyLists(ROMol &mol, const std::string &missingValueMarker="n/a")
RDKIT_FILEPARSERS_EXPORT bool ParseV2000CTAB(std::istream *inStream, unsigned int &line, RWMol *mol, Conformer *&conf, bool &chiralityPossible, unsigned int &nAtoms, unsigned int &nBonds, bool strictParsing=true)
static const std::string atomPropPrefix
void processMolPropertyList(ROMol &mol, const std::string &pn, const std::string &missingValueMarker="n/a")
static constexpr size_t atomPropPrefixLength
std::string getPropertyList(U getter, const std::string &propName, std::string missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)
RDKIT_FILEPARSERS_EXPORT std::string getV3000Line(std::istream *inStream, unsigned int &line)
RDKIT_FILEPARSERS_EXPORT unsigned int toUnsigned(const std::string &input, bool acceptSpaces=true)
void applyMolListProps(ROMol &mol, const std::string &prefix, size_t nItems, U getter, const std::string missingValueMarker="n/a")
void createBondStringPropertyList(ROMol &mol, const std::string &bondPropName, const std::string &missingValueMarker="", unsigned int lineSize=DEFAULT_LINESIZE)