RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
FileParsers.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2024 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_FILEPARSERS_H
12#define RD_FILEPARSERS_H
13
14#include <RDGeneral/types.h>
15#include <GraphMol/RDKitBase.h>
17#include "CDXMLParser.h"
18#include <string>
19#include <string_view>
20#include <iostream>
21#include <vector>
22#include <exception>
23
24#include <boost/shared_ptr.hpp>
25
26namespace RDKit {
27
28RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
29
30namespace v2 {
31namespace FileParsers {
33 : public std::exception {
34 public:
35 //! construct with an error message
36 explicit MolFileUnhandledFeatureException(const char *msg) : _msg(msg) {}
37 //! construct with an error message
38 explicit MolFileUnhandledFeatureException(const std::string msg)
39 : _msg(msg) {}
40 //! get the error message
41 const char *what() const noexcept override { return _msg.c_str(); }
42 ~MolFileUnhandledFeatureException() noexcept override = default;
43
44 private:
45 std::string _msg;
46};
47
49 bool sanitize = true; /**< sanitize the molecule after building it */
50 bool removeHs = true; /**< remove Hs after constructing the molecule */
51 bool strictParsing = true; /**< if set to false, the parser is more lax about
52 correctness of the contents. */
53 bool expandAttachmentPoints =
54 false; /**< toggle conversion of attachment points into dummy atoms */
55 bool parsingSCSRMol = false; /**< if true, we are parsing a SCSR mol file */
56};
58 AsEntered, //<! use the name of the temlate as entered in the SCSR Mol
59 UseFirstName, //<!Use the first name in the template
60 // def (For AA, the 3 letter code
61 UseSecondName //<!use the second name in the tempate def (
62 // For AA, the 1 letter code)
63};
64
66 Ignore, //<! Do not include base Hbonds in expanded output
67 UseSapAll, //<!use all hbonds defined in SAPs
68 // can be more than one per base
69 UseSapOne, //<!use only one SAP hbond per base
70 // If multiple SAPs are defined, use the first
71 // even if it is not the best
72 //(this just maintains the relationship between
73 // the to base pairs)
74 Auto //<!For bases that are C,G,A,T,U,In (and
75 // derivatives) use the standard Watson-Crick
76 // Hbonding. No SAPs need to be defined, and if
77 // defined, they are ignored.
78};
79
81 bool includeLeavingGroups =
82 true; /**< when true, leaving groups on atoms that are not exo-bonded are
83 retained. When false, no leaving groups are retained */
84 SCSRTemplateNames scsrTemplateNames = SCSRTemplateNames::AsEntered;
85
86 SCSRBaseHbondOptions scsrBaseHbondOptions = SCSRBaseHbondOptions::UseSapAll;
87};
89 std::istream &inStream, unsigned int &line,
92 const std::string &molBlock,
95 const std::string &fName,
97
98RDKIT_FILEPARSERS_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSCSRDataStream(
99 std::istream &inStream, unsigned int &line,
100 const MolFileParserParams &molFileParserParams = MolFileParserParams(),
101 const MolFromSCSRParams &molFromSCSRParams = MolFromSCSRParams());
102RDKIT_FILEPARSERS_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSCSRBlock(
103 const std::string &molBlock,
104 const MolFileParserParams &molFileParserParams = MolFileParserParams(),
105 const MolFromSCSRParams &molFromSCSRParams = MolFromSCSRParams());
106RDKIT_FILEPARSERS_EXPORT std::unique_ptr<RDKit::RWMol> MolFromSCSRFile(
107 const std::string &fName,
108 const MolFileParserParams &molFileParserParams = MolFileParserParams(),
109 const MolFromSCSRParams &molFromSCSRParams = MolFromSCSRParams());
110
111} // namespace FileParsers
112} // namespace v2
113
114inline namespace v1 {
116//-----
117// mol files
118//-----
119// \brief construct a molecule from MDL mol data in a stream
120/*!
121 * \param inStream - stream containing the data
122 * \param line - current line number (used for error reporting)
123 * \param sanitize - toggles sanitization and stereochemistry
124 * perception of the molecule
125 * \param removeHs - toggles removal of Hs from the molecule. H removal
126 * is only done if the molecule is sanitized
127 * \param line - current line number (used for error reporting)
128 * \param strictParsing - if set to false, the parser is more lax about
129 * correctness of the contents.
130 *
131 */
132inline RWMol *MolDataStreamToMol(std::istream *inStream, unsigned int &line,
133 bool sanitize = true, bool removeHs = true,
134 bool strictParsing = true) {
136 ps.sanitize = sanitize;
137 ps.removeHs = removeHs;
138 ps.strictParsing = strictParsing;
139 return v2::FileParsers::MolFromMolDataStream(*inStream, line, ps).release();
140};
141// \overload
142inline RWMol *MolDataStreamToMol(std::istream &inStream, unsigned int &line,
143 bool sanitize = true, bool removeHs = true,
144 bool strictParsing = true) {
145 return MolDataStreamToMol(&inStream, line, sanitize, removeHs, strictParsing);
146};
147// \brief construct a molecule from an MDL mol block
148/*!
149 * \param molBlock - string containing the mol block
150 * \param sanitize - toggles sanitization and stereochemistry
151 * perception of the molecule
152 * \param removeHs - toggles removal of Hs from the molecule. H removal
153 * is only done if the molecule is sanitized
154 * \param strictParsing - if set to false, the parser is more lax about
155 * correctness of the contents.
156 */
157inline RWMol *MolBlockToMol(const std::string &molBlock, bool sanitize = true,
158 bool removeHs = true, bool strictParsing = true) {
160 ps.sanitize = sanitize;
161 ps.removeHs = removeHs;
162 ps.strictParsing = strictParsing;
163 return v2::FileParsers::MolFromMolBlock(molBlock, ps).release();
164};
165
166// \brief construct a molecule from an MDL mol file
167/*!
168 * \param fName - string containing the file name
169 * \param sanitize - toggles sanitization and stereochemistry
170 * perception of the molecule
171 * \param removeHs - toggles removal of Hs from the molecule. H removal
172 * is only done if the molecule is sanitized
173 * \param strictParsing - if set to false, the parser is more lax about
174 * correctness of the contents.
175 */
176inline RWMol *MolFileToMol(const std::string &fName, bool sanitize = true,
177 bool removeHs = true, bool strictParsing = true) {
179 ps.sanitize = sanitize;
180 ps.removeHs = removeHs;
181 ps.strictParsing = strictParsing;
182 return v2::FileParsers::MolFromMolFile(fName, ps).release();
183};
184} // namespace v1
185
186//-----
187// TPL handling:
188//-----
189
190namespace v2 {
191namespace FileParsers {
193 bool sanitize = true; /**< sanitize the molecule after building it */
194 bool skipFirstConf =
195 false; /**< if set to true, the first conformer will be skipped */
196};
198 std::istream &inStream, unsigned int &line,
199 const TPLParserParams &params = TPLParserParams());
201 const std::string &fName,
202 const TPLParserParams &params = TPLParserParams());
203
204} // namespace FileParsers
205} // namespace v2
206
207inline namespace v1 {
208//! \brief translate TPL data (BioCad format) into a multi-conf molecule
209/*!
210 \param inStream: the stream from which to read
211 \param line: used to track the line number of errors
212 \param sanitize: toggles sanitization and stereochemistry
213 perception of the molecule
214 \param skipFirstConf: according to the TPL format description, the atomic
215 coords in the atom-information block describe the first
216 conformation and the first conf block describes second
217 conformation. The CombiCode, on the other hand, writes
218 the first conformation data both to the atom-information
219 block and to the first conf block. We want to be able to
220 read CombiCode-style tpls, so we'll allow this
221 mis-feature
222 to be parsed when this flag is set.
223*/
224inline RWMol *TPLDataStreamToMol(std::istream *inStream, unsigned int &line,
225 bool sanitize = true,
226 bool skipFirstConf = false) {
228 ps.sanitize = sanitize;
229 ps.skipFirstConf = skipFirstConf;
230 return v2::FileParsers::MolFromTPLDataStream(*inStream, line, ps).release();
231}
232
233//! \brief construct a multi-conf molecule from a TPL (BioCad format) file
234/*!
235 \param fName: the name of the file from which to read
236 \param sanitize: toggles sanitization and stereochemistry
237 perception of the molecule
238 \param skipFirstConf: according to the TPL format description, the atomic
239 coords in the atom-information block describe the first
240 conformation and the first conf block describes second
241 conformation. The CombiCode, on the other hand, writes
242 the first conformation data both to the atom-information
243 block and to the first conf block. We want to be able to
244 read CombiCode-style tpls, so we'll allow this
245 mis-feature
246 to be parsed when this flag is set.
247*/
248inline RWMol *TPLFileToMol(const std::string &fName, bool sanitize = true,
249 bool skipFirstConf = false) {
251 ps.sanitize = sanitize;
252 ps.skipFirstConf = skipFirstConf;
253 return v2::FileParsers::MolFromTPLFile(fName, ps).release();
254}
255} // namespace v1
256
257namespace v2 {
258namespace FileParsers {
259
260//-----
261// MOL2 handling
262//-----
263
264typedef enum {
265 CORINA = 0 //!< supports output from Corina and some dbtranslate output
266} Mol2Type;
267
269 bool sanitize = true; /**< sanitize the molecule after building it */
270 bool removeHs = true; /**< remove Hs after constructing the molecule */
271 Mol2Type variant = Mol2Type::CORINA; /**< the atom type definitions to use */
272 bool cleanupSubstructures =
273 true; /**< toggles recognition and cleanup of common substructures */
274};
275
277 std::istream &inStream,
278 const Mol2ParserParams &params = Mol2ParserParams());
280 const std::string &molBlock,
281 const Mol2ParserParams &params = Mol2ParserParams());
283 const std::string &fName,
284 const Mol2ParserParams &params = Mol2ParserParams());
285
286} // namespace FileParsers
287} // namespace v2
288
289inline namespace v1 {
291
292// \brief construct a molecule from a Tripos mol2 file
293/*!
294 *
295 * \param fName - string containing the file name
296 * \param sanitize - toggles sanitization of the molecule
297 * \param removeHs - toggles removal of Hs from the molecule. H removal
298 * is only done if the molecule is sanitized
299 * \param variant - the atom type definitions to use
300 * \param cleanupSubstructures - toggles recognition and cleanup of common
301 * substructures
302 */
303inline RWMol *Mol2FileToMol(const std::string &fName, bool sanitize = true,
304 bool removeHs = true,
305 Mol2Type variant = Mol2Type::CORINA,
306 bool cleanupSubstructures = true) {
308 ps.sanitize = sanitize;
309 ps.removeHs = removeHs;
310 ps.variant = variant;
311 ps.cleanupSubstructures = cleanupSubstructures;
312 return v2::FileParsers::MolFromMol2File(fName, ps).release();
313}
314
315// \brief construct a molecule from Tripos mol2 data in a stream
316/*!
317 * \param inStream - stream containing the data
318 * \param sanitize - toggles sanitization of the molecule
319 * \param removeHs - toggles removal of Hs from the molecule. H removal
320 * is only done if the molecule is sanitized
321 * \param variant - the atom type definitions to use
322 * \param cleanupSubstructures - toggles recognition and cleanup of common
323 * substructures
324 */
325inline RWMol *Mol2DataStreamToMol(std::istream &inStream, bool sanitize = true,
326 bool removeHs = true,
327 Mol2Type variant = Mol2Type::CORINA,
328 bool cleanupSubstructures = true) {
330 ps.sanitize = sanitize;
331 ps.removeHs = removeHs;
332 ps.variant = variant;
333 ps.cleanupSubstructures = cleanupSubstructures;
334 return v2::FileParsers::MolFromMol2DataStream(inStream, ps).release();
335}
336// \overload
337inline RWMol *Mol2DataStreamToMol(std::istream *inStream, bool sanitize = true,
338 bool removeHs = true,
339 Mol2Type variant = Mol2Type::CORINA,
340 bool cleanupSubstructures = true) {
341 return Mol2DataStreamToMol(*inStream, sanitize, removeHs, variant,
342 cleanupSubstructures);
343}
344
345// \brief construct a molecule from a Tripos mol2 block
346/*!
347 * \param molBlock - string containing the mol block
348 * \param sanitize - toggles sanitization of the molecule
349 * \param removeHs - toggles removal of Hs from the molecule. H removal
350 * is only done if the molecule is sanitized
351 * \param variant - the atom type definitions to use
352 * \param cleanupSubstructures - toggles recognition and cleanup of common
353 * substructures
354 */
355inline RWMol *Mol2BlockToMol(const std::string &molBlock, bool sanitize = true,
356 bool removeHs = true,
357 Mol2Type variant = Mol2Type::CORINA,
358 bool cleanupSubstructures = true) {
360 ps.sanitize = sanitize;
361 ps.removeHs = removeHs;
362 ps.variant = variant;
363 ps.cleanupSubstructures = cleanupSubstructures;
364 return v2::FileParsers::MolFromMol2Block(molBlock, ps).release();
365}
366} // namespace v1
367
368namespace v2 {
369namespace FileParsers {
370
372 std::istream &inStream);
373// \brief construct a molecule from an xyz block
374/*!
375 * \param xyzBlock - string containing the xyz block
376 */
378 const std::string &xyzBlock);
379// \brief construct a molecule from an xyz file
380/*!
381 * \param fName - string containing the file name
382 */
384 const std::string &fName);
385} // namespace FileParsers
386} // namespace v2
387inline namespace v1 {
388inline RWMol *XYZDataStreamToMol(std::istream &inStream) {
389 return v2::FileParsers::MolFromXYZDataStream(inStream).release();
390}
391// \brief construct a molecule from an xyz block
392/*!
393 * \param xyzBlock - string containing the xyz block
394 */
395inline RWMol *XYZBlockToMol(const std::string &xyzBlock) {
396 return v2::FileParsers::MolFromXYZBlock(xyzBlock).release();
397}
398// \brief construct a molecule from an xyz file
399/*!
400 * \param fName - string containing the file name
401 */
402inline RWMol *XYZFileToMol(const std::string &fName) {
403 return v2::FileParsers::MolFromXYZFile(fName).release();
404}
405
406} // namespace v1
407
408namespace v2 {
409namespace FileParsers {
411 bool sanitize = true; /**< sanitize the molecule after building it */
412 bool removeHs = true; /**< remove Hs after constructing the molecule */
413 bool proximityBonding = true; /**< if set to true, proximity bonding will be
414 performed */
415 unsigned int flavor = 0; /**< flavor to use */
416};
417
419 std::istream &inStream, const PDBParserParams &params = PDBParserParams());
421 const std::string &fname,
422 const PDBParserParams &params = PDBParserParams());
424 const std::string &str, const PDBParserParams &params = PDBParserParams());
425} // namespace FileParsers
426} // namespace v2
427
428inline namespace v1 {
430inline RWMol *PDBBlockToMol(const std::string &str, bool sanitize = true,
431 bool removeHs = true, unsigned int flavor = 0,
432 bool proximityBonding = true) {
434 ps.sanitize = sanitize;
435 ps.removeHs = removeHs;
436 ps.flavor = flavor;
437 ps.proximityBonding = proximityBonding;
438 return v2::FileParsers::MolFromPDBBlock(str, ps).release();
439}
440inline RWMol *PDBBlockToMol(const char *str, bool sanitize = true,
441 bool removeHs = true, unsigned int flavor = 0,
442 bool proximityBonding = true) {
443 return PDBBlockToMol(std::string(str), sanitize, removeHs, flavor,
444 proximityBonding);
445}
446inline RWMol *PDBFileToMol(const std::string &fname, bool sanitize = true,
447 bool removeHs = true, unsigned int flavor = 0,
448 bool proximityBonding = true) {
450 ps.sanitize = sanitize;
451 ps.removeHs = removeHs;
452 ps.flavor = flavor;
453 ps.proximityBonding = proximityBonding;
454 return v2::FileParsers::MolFromPDBFile(fname, ps).release();
455}
456inline RWMol *PDBDataStreamToMol(std::istream &inStream, bool sanitize = true,
457 bool removeHs = true, unsigned int flavor = 0,
458 bool proximityBonding = true) {
460 ps.sanitize = sanitize;
461 ps.removeHs = removeHs;
462 ps.flavor = flavor;
463 ps.proximityBonding = proximityBonding;
464 return v2::FileParsers::MolFromPDBDataStream(inStream, ps).release();
465}
466inline RWMol *PDBDataStreamToMol(std::istream *inStream, bool sanitize = true,
467 bool removeHs = true, unsigned int flavor = 0,
468 bool proximityBonding = true) {
469 return PDBDataStreamToMol(*inStream, sanitize, removeHs, flavor,
470 proximityBonding);
471}
472} // namespace v1
473
474// \brief reads a molecule from the metadata in an RDKit-generated SVG file
475/*!
476 * \param svg - string containing the SVG
477 * \param sanitize - toggles sanitization of the molecule
478 * \param removeHs - toggles removal of Hs from the molecule. H removal
479 * is only done if the molecule is sanitized
480 *
481 * **NOTE** This functionality should be considered beta.
482 */
484 bool sanitize = true,
485 bool removeHs = true);
486/*! \overload
487 */
489 bool sanitize = true,
490 bool removeHs = true);
491
492inline std::unique_ptr<RDKit::RWMol> operator"" _ctab(const char *text,
493 size_t len) {
494 std::string data(text, len);
495 try {
497 } catch (const RDKit::MolSanitizeException &) {
498 return nullptr;
499 }
500}
501inline std::unique_ptr<RDKit::RWMol> operator"" _mol2(const char *text,
502 size_t len) {
503 std::string data(text, len);
504 try {
506 } catch (const RDKit::MolSanitizeException &) {
507 return nullptr;
508 }
509}
510
511inline std::unique_ptr<RDKit::RWMol> operator"" _pdb(const char *text,
512 size_t len) {
513 std::string data(text, len);
514 try {
516 } catch (const RDKit::MolSanitizeException &) {
517 return nullptr;
518 }
519}
520
521} // namespace RDKit
522
523#endif
pulls in the core RDKit functionality
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
const char * what() const noexcept override
get the error message
Definition FileParsers.h:41
MolFileUnhandledFeatureException(const std::string msg)
construct with an error message
Definition FileParsers.h:38
~MolFileUnhandledFeatureException() noexcept override=default
MolFileUnhandledFeatureException(const char *msg)
construct with an error message
Definition FileParsers.h:36
#define RDKIT_FILEPARSERS_EXPORT
Definition export.h:161
RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=Mol2Type::CORINA, bool cleanupSubstructures=true)
RWMol * XYZFileToMol(const std::string &fName)
RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=Mol2Type::CORINA, bool cleanupSubstructures=true)
RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RWMol * Mol2DataStreamToMol(std::istream &inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=Mol2Type::CORINA, bool cleanupSubstructures=true)
RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RWMol * PDBDataStreamToMol(std::istream &inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RWMol * XYZDataStreamToMol(std::istream &inStream)
RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RWMol * XYZBlockToMol(const std::string &xyzBlock)
RWMol * PDBBlockToMol(const std::string &str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromTPLFile(const std::string &fName, const TPLParserParams &params=TPLParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromPDBFile(const std::string &fname, const PDBParserParams &params=PDBParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromPDBDataStream(std::istream &inStream, const PDBParserParams &params=PDBParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RDKit::RWMol > MolFromSCSRDataStream(std::istream &inStream, unsigned int &line, const MolFileParserParams &molFileParserParams=MolFileParserParams(), const MolFromSCSRParams &molFromSCSRParams=MolFromSCSRParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromXYZFile(const std::string &fName)
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromXYZBlock(const std::string &xyzBlock)
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromMolFile(const std::string &fName, const MolFileParserParams &params=MolFileParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromMol2DataStream(std::istream &inStream, const Mol2ParserParams &params=Mol2ParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromPDBBlock(const std::string &str, const PDBParserParams &params=PDBParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromMol2File(const std::string &fName, const Mol2ParserParams &params=Mol2ParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromXYZDataStream(std::istream &inStream)
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RDKit::RWMol > MolFromSCSRFile(const std::string &fName, const MolFileParserParams &molFileParserParams=MolFileParserParams(), const MolFromSCSRParams &molFromSCSRParams=MolFromSCSRParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RDKit::RWMol > MolFromSCSRBlock(const std::string &molBlock, const MolFileParserParams &molFileParserParams=MolFileParserParams(), const MolFromSCSRParams &molFromSCSRParams=MolFromSCSRParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromMolBlock(const std::string &molBlock, const MolFileParserParams &params=MolFileParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromMol2Block(const std::string &molBlock, const Mol2ParserParams &params=Mol2ParserParams())
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromMolDataStream(std::istream &inStream, unsigned int &line, const MolFileParserParams &params=MolFileParserParams())
@ CORINA
supports output from Corina and some dbtranslate output
RDKIT_FILEPARSERS_EXPORT std::unique_ptr< RWMol > MolFromTPLDataStream(std::istream &inStream, unsigned int &line, const TPLParserParams &params=TPLParserParams())
Std stuff.
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)