RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
StreamOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10//
11#include <RDGeneral/export.h>
12#ifndef _RD_STREAMOPS_H
13#define _RD_STREAMOPS_H
14
15#include "types.h"
16#include "Invariant.h"
17#include "RDProps.h"
18#include <string>
19#include <sstream>
20#include <unordered_set>
21#include <boost/cstdint.hpp>
22#include <boost/predef.h>
23
24namespace RDKit {
25// this code block for handling endian problems is adapted from :
26// http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
27enum EEndian {
30#if defined(BOOST_ENDIAN_LITTLE_BYTE) || defined(BOOST_ENDIAN_LITTLE_WORD)
31 HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
32#elif defined(BOOST_ENDIAN_BIG_BYTE)
33 HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
34#elif defined(BOOST_ENDIAN_BIG_WORD)
35#error "Cannot compile on word-swapped big-endian systems"
36#else
37#error "Failed to determine the system endian value"
38#endif
39};
40
41// this function swap the bytes of values given it's size as a template
42// parameter (could sizeof be used?).
43template <class T, unsigned int size>
44inline T SwapBytes(T value) {
45 if (size < 2) {
46 return value;
47 }
48
49 union {
50 T value;
51 char bytes[size];
52 } in, out;
53
54 in.value = value;
55
56 for (unsigned int i = 0; i < size; ++i) {
57 out.bytes[i] = in.bytes[size - 1 - i];
58 }
59
60 return out.value;
61}
62
63// Here is the function you will use. Again there is two compile-time assertion
64// that use the boost libraries. You could probably comment them out, but if you
65// do be cautious not to use this function for anything else than integers
66// types. This function need to be called like this :
67//
68// int x = someValue;
69// int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
70//
71template <EEndian from, EEndian to, class T>
72inline T EndianSwapBytes(T value) {
73 // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
74 BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
75 sizeof(T) == 8);
76 if (sizeof(T) == 1) {
77 return value;
78 }
79
80 // A : La donnée à swapper est d'un type arithmetic
81 // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
82
83 // Si from et to sont du même type on ne swap pas.
84 if (from == to) {
85 return value;
86 }
87
88 return SwapBytes<T, sizeof(T)>(value);
89}
90
91template <EEndian from, EEndian to>
92inline char EndianSwapBytes(char value) {
93 return value;
94}
95
96template <EEndian from, EEndian to>
97inline unsigned char EndianSwapBytes(unsigned char value) {
98 return value;
99}
100
101template <EEndian from, EEndian to>
102inline signed char EndianSwapBytes(signed char value) {
103 return value;
104}
105
106// --------------------------------------
107
108//! Packs an integer and outputs it to a stream
109inline void appendPackedIntToStream(std::stringstream &ss,
110 boost::uint32_t num) {
111 int nbytes, bix;
112 unsigned int val, res;
113 char tc;
114
115 res = num;
116 while (1) {
117 if (res < (1 << 7)) {
118 val = (res << 1);
119 nbytes = 1;
120 break;
121 }
122 res -= (1 << 7);
123 if (res < (1 << 14)) {
124 val = ((res << 2) | 1);
125 nbytes = 2;
126 break;
127 }
128 res -= (1 << 14);
129 if (res < (1 << 21)) {
130 val = ((res << 3) | 3);
131 nbytes = 3;
132 break;
133 }
134 res -= (1 << 21);
135 if (res < (1 << 29)) {
136 val = ((res << 3) | 7);
137 nbytes = 4;
138 break;
139 } else {
140 CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
141 }
142 }
143 // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
144
145 for (bix = 0; bix < nbytes; bix++) {
146 tc = (char)(val & 255);
147 ss.write(&tc, 1);
148 val >>= 8;
149 }
150}
151
152//! Reads an integer from a stream in packed format and returns the result.
153inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
154 boost::uint32_t val, num;
155 int shift, offset;
156 char tmp;
157 ss.read(&tmp, sizeof(tmp));
158 if (ss.fail()) {
159 throw std::runtime_error("failed to read from stream");
160 }
161
162 val = UCHAR(tmp);
163 offset = 0;
164 if ((val & 1) == 0) {
165 shift = 1;
166 } else if ((val & 3) == 1) {
167 ss.read((char *)&tmp, sizeof(tmp));
168 if (ss.fail()) {
169 throw std::runtime_error("failed to read from stream");
170 }
171
172 val |= (UCHAR(tmp) << 8);
173 shift = 2;
174 offset = (1 << 7);
175 } else if ((val & 7) == 3) {
176 ss.read((char *)&tmp, sizeof(tmp));
177 if (ss.fail()) {
178 throw std::runtime_error("failed to read from stream");
179 }
180
181 val |= (UCHAR(tmp) << 8);
182 ss.read((char *)&tmp, sizeof(tmp));
183 if (ss.fail()) {
184 throw std::runtime_error("failed to read from stream");
185 }
186
187 val |= (UCHAR(tmp) << 16);
188 shift = 3;
189 offset = (1 << 7) + (1 << 14);
190 } else {
191 ss.read((char *)&tmp, sizeof(tmp));
192 if (ss.fail()) {
193 throw std::runtime_error("failed to read from stream");
194 }
195
196 val |= (UCHAR(tmp) << 8);
197 ss.read((char *)&tmp, sizeof(tmp));
198 if (ss.fail()) {
199 throw std::runtime_error("failed to read from stream");
200 }
201
202 val |= (UCHAR(tmp) << 16);
203 ss.read((char *)&tmp, sizeof(tmp));
204 if (ss.fail()) {
205 throw std::runtime_error("failed to read from stream");
206 }
207
208 val |= (UCHAR(tmp) << 24);
209 shift = 3;
210 offset = (1 << 7) + (1 << 14) + (1 << 21);
211 }
212 num = (val >> shift) + offset;
213 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
214 return num;
215}
216
217//! Reads an integer from a char * in packed format and returns the result.
218//! The argument is advanced
219inline boost::uint32_t pullPackedIntFromString(const char *&text) {
220 boost::uint32_t val, num;
221 int shift, offset;
222 char tmp;
223 tmp = *text;
224 text++;
225 val = UCHAR(tmp);
226 offset = 0;
227 if ((val & 1) == 0) {
228 shift = 1;
229 } else if ((val & 3) == 1) {
230 tmp = *text;
231 text++;
232 val |= (UCHAR(tmp) << 8);
233 shift = 2;
234 offset = (1 << 7);
235 } else if ((val & 7) == 3) {
236 tmp = *text;
237 text++;
238 val |= (UCHAR(tmp) << 8);
239 tmp = *text;
240 text++;
241 val |= (UCHAR(tmp) << 16);
242 shift = 3;
243 offset = (1 << 7) + (1 << 14);
244 } else {
245 tmp = *text;
246 text++;
247 val |= (UCHAR(tmp) << 8);
248 tmp = *text;
249 text++;
250 val |= (UCHAR(tmp) << 16);
251 tmp = *text;
252 text++;
253 val |= (UCHAR(tmp) << 24);
254 shift = 3;
255 offset = (1 << 7) + (1 << 14) + (1 << 21);
256 }
257 num = (val >> shift) + offset;
258 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
259 return num;
260}
261
262//! does a binary write of an object to a stream
263template <typename T>
264void streamWrite(std::ostream &ss, const T &val) {
266 ss.write((const char *)&tval, sizeof(T));
267}
268
269//! special case for string
270inline void streamWrite(std::ostream &ss, const std::string &what) {
271 unsigned int l = static_cast<unsigned int>(what.length());
272 streamWrite(ss, l);
273 ss.write(what.c_str(), sizeof(char) * l);
274};
275
276template <typename T>
277void streamWriteVec(std::ostream &ss, const T &val) {
278 streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
279 for (size_t i = 0; i < val.size(); ++i) {
280 streamWrite(ss, val[i]);
281 }
282}
283
284//! does a binary read of an object from a stream
285template <typename T>
286void streamRead(std::istream &ss, T &loc) {
287 T tloc;
288 ss.read((char *)&tloc, sizeof(T));
289 if (ss.fail()) {
290 throw std::runtime_error("failed to read from stream");
291 }
293}
294
295//! special case for string
296template <class T>
297void streamRead(std::istream &ss, T &obj, int version) {
298 RDUNUSED_PARAM(version);
299 streamRead(ss, obj);
300}
301
302inline void streamRead(std::istream &ss, std::string &what, int version) {
303 RDUNUSED_PARAM(version);
304 unsigned int l;
305 streamRead(ss, l);
306 auto buff = std::make_unique<char[]>(l);
307 ss.read(buff.get(), sizeof(char) * l);
308 if (ss.fail()) {
309 throw std::runtime_error("failed to read from stream");
310 }
311 what = std::string(buff.get(), l);
312};
313
314template <class T>
315void streamReadVec(std::istream &ss, T &val) {
316 boost::uint64_t size;
317 streamRead(ss, size);
318 val.resize(boost::numeric_cast<size_t>(size));
319
320 for (size_t i = 0; i < size; ++i) {
321 streamRead(ss, val[i]);
322 }
323}
324
325inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val,
326 int version) {
327 boost::uint64_t size;
328 streamRead(ss, size);
329 val.resize(size);
330
331 for (size_t i = 0; i < size; ++i) {
332 streamRead(ss, val[i], version);
333 }
334}
335
336//! grabs the next line from an instream and returns it.
337inline std::string getLine(std::istream *inStream) {
338 std::string res;
339 std::getline(*inStream, res);
340 if (!res.empty() && (res.back() == '\r')) {
341 res.resize(res.length() - 1);
342 }
343 return res;
344}
345
346//! grabs the next line from an instream and returns it.
347inline std::string getLine(std::istream &inStream) {
348 return getLine(&inStream);
349}
350
351// n.b. We can't use RDTypeTag directly, they are implementation
352// specific
353namespace DTags {
354const unsigned char StringTag = 0;
355const unsigned char IntTag = 1;
356const unsigned char UnsignedIntTag = 2;
357const unsigned char BoolTag = 3;
358const unsigned char FloatTag = 4;
359const unsigned char DoubleTag = 5;
360const unsigned char VecStringTag = 6;
361const unsigned char VecIntTag = 7;
362const unsigned char VecUIntTag = 8;
363const unsigned char VecBoolTag = 9;
364const unsigned char VecFloatTag = 10;
365const unsigned char VecDoubleTag = 11;
366
367const unsigned char CustomTag = 0xFE; // custom data
368const unsigned char EndTag = 0xFF;
369} // namespace DTags
370
372 public:
374
375 virtual const char *getPropName() const = 0;
376
377 virtual bool canSerialize(const RDValue &value) const = 0;
378
379 virtual bool read(std::istream &ss, RDValue &value) const = 0;
380
381 virtual bool write(std::ostream &ss, const RDValue &value) const = 0;
382
383 virtual CustomPropHandler *clone() const = 0;
384};
385
386typedef std::vector<std::shared_ptr<const CustomPropHandler>>
388
389inline bool isSerializable(const Dict::Pair &pair,
390 const CustomPropHandlerVec &handlers = {}) {
391 switch (pair.val.getTag()) {
398
404 return true;
406 for (auto &handler : handlers) {
407 if (handler->canSerialize(pair.val)) {
408 return true;
409 }
410 }
411 return false;
412 default:
413 return false;
414 }
415}
416
417inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair,
418 const CustomPropHandlerVec &handlers = {}) {
419 if (!isSerializable(pair, handlers)) {
420 return false;
421 }
422
423 streamWrite(ss, pair.key);
424 switch (pair.val.getTag()) {
428 break;
431 streamWrite(ss, rdvalue_cast<int>(pair.val));
432 break;
436 break;
439 streamWrite(ss, rdvalue_cast<bool>(pair.val));
440 break;
443 streamWrite(ss, rdvalue_cast<float>(pair.val));
444 break;
447 streamWrite(ss, rdvalue_cast<double>(pair.val));
448 break;
449
452 streamWriteVec(ss, rdvalue_cast<std::vector<std::string>>(pair.val));
453 break;
456 streamWriteVec(ss, rdvalue_cast<std::vector<double>>(pair.val));
457 break;
460 streamWriteVec(ss, rdvalue_cast<std::vector<float>>(pair.val));
461 break;
464 streamWriteVec(ss, rdvalue_cast<std::vector<int>>(pair.val));
465 break;
468 streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int>>(pair.val));
469 break;
470 default:
471 for (auto &handler : handlers) {
472 if (handler->canSerialize(pair.val)) {
473 // The form of a custom tag is
474 // CustomTag
475 // customPropName (must be unique)
476 // custom serialization
478 streamWrite(ss, std::string(handler->getPropName()));
479 handler->write(ss, pair.val);
480 return true;
481 }
482 }
483
484 return false;
485 }
486 return true;
487}
488
489template <typename COUNT_TYPE = unsigned int>
491 std::ostream &ss, const RDProps &props, bool savePrivate = false,
492 bool saveComputed = false, const CustomPropHandlerVec &handlers = {},
493 const std::unordered_set<std::string> &ignore = {}) {
494 STR_VECT propsToSave = props.getPropList(savePrivate, saveComputed);
495 std::unordered_set<std::string> propnames;
496 for (const auto &pn : propsToSave) {
497 if (ignore.empty() || ignore.find(pn) == ignore.end()) {
498 propnames.insert(pn);
499 }
500 }
501
502 const Dict &dict = props.getDict();
503 COUNT_TYPE count = 0;
504 for (const auto &elem : dict.getData()) {
505 if (propnames.find(elem.key) != propnames.end()) {
506 if (isSerializable(elem, handlers)) {
507 count++;
508 }
509 }
510 }
511 streamWrite(ss, count); // packed int?
512 if (!count) {
513 return false;
514 }
515
516 COUNT_TYPE writtenCount = 0;
517 for (const auto &elem : dict.getData()) {
518 if (propnames.find(elem.key) != propnames.end()) {
519 if (isSerializable(elem, handlers)) {
520 // note - not all properties are serializable, this may be
521 // a null op
522 if (streamWriteProp(ss, elem, handlers)) {
523 writtenCount++;
524 }
525 }
526 }
527 }
528 POSTCONDITION(count == writtenCount,
529 "Estimated property count not equal to written");
530 return true;
531}
532
533template <class T>
534void readRDValue(std::istream &ss, RDValue &value) {
535 T v;
536 streamRead(ss, v);
537 value = v;
538}
539
540template <class T>
541void readRDVecValue(std::istream &ss, RDValue &value) {
542 std::vector<T> v;
543 streamReadVec(ss, v);
544 value = v;
545}
546
547inline void readRDValueString(std::istream &ss, RDValue &value) {
548 std::string v;
549 int version = 0;
550 streamRead(ss, v, version);
551 value = v;
552}
553
554inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
555 std::vector<std::string> v;
556 int version = 0;
557 streamReadStringVec(ss, v, version);
558 value = v;
559}
560
561inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
562 bool &dictHasNonPOD,
563 const CustomPropHandlerVec &handlers = {}) {
564 int version = 0;
565 streamRead(ss, pair.key, version);
566
567 unsigned char type;
568 streamRead(ss, type);
569 switch (type) {
570 case DTags::IntTag:
571 readRDValue<int>(ss, pair.val);
572 break;
575 break;
576 case DTags::BoolTag:
577 readRDValue<bool>(ss, pair.val);
578 break;
579 case DTags::FloatTag:
580 readRDValue<float>(ss, pair.val);
581 break;
582 case DTags::DoubleTag:
583 readRDValue<double>(ss, pair.val);
584 break;
585
586 case DTags::StringTag:
587 readRDValueString(ss, pair.val);
588 dictHasNonPOD = true;
589 break;
591 readRDStringVecValue(ss, pair.val);
592 dictHasNonPOD = true;
593 break;
594 case DTags::VecIntTag:
595 readRDVecValue<int>(ss, pair.val);
596 dictHasNonPOD = true;
597 break;
600 dictHasNonPOD = true;
601 break;
603 readRDVecValue<float>(ss, pair.val);
604 dictHasNonPOD = true;
605 break;
607 readRDVecValue<double>(ss, pair.val);
608 dictHasNonPOD = true;
609 break;
610 case DTags::CustomTag: {
611 std::string propType;
612 int version = 0;
613 streamRead(ss, propType, version);
614 for (auto &handler : handlers) {
615 if (propType == handler->getPropName()) {
616 handler->read(ss, pair.val);
617 dictHasNonPOD = true;
618 return true;
619 }
620 }
621 return false;
622 }
623
624 default:
625 return false;
626 }
627 return true;
628}
629
630template <typename COUNT_TYPE = unsigned int>
631inline unsigned int streamReadProps(std::istream &ss, RDProps &props,
632 const CustomPropHandlerVec &handlers = {},
633 bool reset = true) {
634 COUNT_TYPE count;
635 streamRead(ss, count);
636
637 Dict &dict = props.getDict();
638 if (reset) {
639 dict.reset(); // Clear data before repopulating
640 }
641 auto startSz = dict.getData().size();
642 dict.getData().resize(startSz + count);
643 for (unsigned index = 0; index < count; ++index) {
644 CHECK_INVARIANT(streamReadProp(ss, dict.getData()[startSz + index],
645 dict.getNonPODStatus(), handlers),
646 "Corrupted property serialization detected");
647 }
648
649 return static_cast<unsigned int>(count);
650}
651} // namespace RDKit
652
653#endif
#define CHECK_INVARIANT(expr, mess)
Definition Invariant.h:100
#define POSTCONDITION(expr, mess)
Definition Invariant.h:116
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
virtual bool read(std::istream &ss, RDValue &value) const =0
virtual bool write(std::ostream &ss, const RDValue &value) const =0
virtual const char * getPropName() const =0
virtual CustomPropHandler * clone() const =0
virtual bool canSerialize(const RDValue &value) const =0
The Dict class can be used to store objects of arbitrary type keyed by strings.
Definition Dict.h:36
const DataType & getData() const
Access to the underlying data.
Definition Dict.h:142
const Dict & getDict() const
gets the underlying Dictionary
Definition RDProps.h:36
STR_VECT getPropList(bool includePrivate=true, bool includeComputed=true) const
returns a list with the names of our properties
Definition RDProps.h:45
const unsigned char IntTag
Definition StreamOps.h:355
const unsigned char VecUIntTag
Definition StreamOps.h:362
const unsigned char VecBoolTag
Definition StreamOps.h:363
const unsigned char VecIntTag
Definition StreamOps.h:361
const unsigned char CustomTag
Definition StreamOps.h:367
const unsigned char StringTag
Definition StreamOps.h:354
const unsigned char VecFloatTag
Definition StreamOps.h:364
const unsigned char DoubleTag
Definition StreamOps.h:359
const unsigned char VecStringTag
Definition StreamOps.h:360
const unsigned char EndTag
Definition StreamOps.h:368
const unsigned char BoolTag
Definition StreamOps.h:357
const unsigned char VecDoubleTag
Definition StreamOps.h:365
const unsigned char FloatTag
Definition StreamOps.h:358
const unsigned char UnsignedIntTag
Definition StreamOps.h:356
static const boost::uint64_t UnsignedIntTag
static const boost::uint64_t StringTag
static const boost::uint64_t VecStringTag
static const boost::uint64_t VecIntTag
static const boost::uint64_t FloatTag
static const boost::uint64_t VecUnsignedIntTag
static const boost::uint64_t DoubleTag
static const boost::uint64_t IntTag
static const boost::uint64_t AnyTag
static const boost::uint64_t VecFloatTag
static const boost::uint64_t VecDoubleTag
static const boost::uint64_t BoolTag
Std stuff.
std::vector< std::string > STR_VECT
Definition Dict.h:29
unsigned char UCHAR
Definition types.h:301
boost::uint32_t pullPackedIntFromString(const char *&text)
Definition StreamOps.h:219
@ LITTLE_ENDIAN_ORDER
Definition StreamOps.h:28
@ BIG_ENDIAN_ORDER
Definition StreamOps.h:29
T SwapBytes(T value)
Definition StreamOps.h:44
void readRDStringVecValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:554
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition StreamOps.h:286
std::string getLine(std::istream *inStream)
grabs the next line from an instream and returns it.
Definition StreamOps.h:337
void readRDValueString(std::istream &ss, RDValue &value)
Definition StreamOps.h:547
boost::uint32_t readPackedIntFromStream(std::stringstream &ss)
Reads an integer from a stream in packed format and returns the result.
Definition StreamOps.h:153
bool isSerializable(const Dict::Pair &pair, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:389
void streamReadStringVec(std::istream &ss, std::vector< std::string > &val, int version)
Definition StreamOps.h:325
void readRDVecValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:541
void streamWriteVec(std::ostream &ss, const T &val)
Definition StreamOps.h:277
T rdvalue_cast(RDValue_cast_t v)
void streamReadVec(std::istream &ss, T &val)
Definition StreamOps.h:315
void readRDValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:534
T EndianSwapBytes(T value)
Definition StreamOps.h:72
bool streamWriteProps(std::ostream &ss, const RDProps &props, bool savePrivate=false, bool saveComputed=false, const CustomPropHandlerVec &handlers={}, const std::unordered_set< std::string > &ignore={})
Definition StreamOps.h:490
bool streamReadProp(std::istream &ss, Dict::Pair &pair, bool &dictHasNonPOD, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:561
bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:417
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition StreamOps.h:264
void appendPackedIntToStream(std::stringstream &ss, boost::uint32_t num)
Packs an integer and outputs it to a stream.
Definition StreamOps.h:109
std::vector< std::shared_ptr< const CustomPropHandler > > CustomPropHandlerVec
Definition StreamOps.h:387
unsigned int streamReadProps(std::istream &ss, RDProps &props, const CustomPropHandlerVec &handlers={}, bool reset=true)
Definition StreamOps.h:631
std::string key
Definition Dict.h:39
RDValue val
Definition Dict.h:40
boost::uint64_t getTag() const