RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
Vector.h
Go to the documentation of this file.
1//
2// Copyright (C) 2004-2025 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_VECTOR_H
12#define RD_VECTOR_H
13
14#include <RDGeneral/Invariant.h>
15#include <RDGeneral/utils.h>
16#include <cmath>
17#include <iomanip>
18#include <cstdlib>
19#include <cstring>
20#include <ctime>
21#include <memory>
22#include <boost/random.hpp>
23
24static constexpr double zero_tolerance = 1.e-16;
25
26namespace RDNumeric {
27
28//! A class to represent vectors of numbers.
29template <class TYPE>
30class Vector {
31 public:
32 typedef std::shared_ptr<TYPE[]> DATA_SPTR;
33
34 //! Initialize with only a size.
35 constexpr explicit Vector(unsigned int N) {
36 d_size = N;
37 TYPE *data = new TYPE[N];
38 memset(static_cast<void *>(data), 0, d_size * sizeof(TYPE));
39 d_data.reset(data);
40 }
41
42 //! Initialize with a size and default value.
43 constexpr Vector(unsigned int N, TYPE val) {
44 d_size = N;
45 d_data.reset(new TYPE[N]);
46 for (auto i = 0u; i < N; i++) {
47 d_data[i] = val;
48 }
49 }
50
51 //! Initialize from a smart pointer.
52 /*!
53 <b>NOTE:</b> the data is not copied in this case
54 */
55 constexpr Vector(unsigned int N, DATA_SPTR data) {
56 d_size = N;
57 d_data = data;
58 }
59
60 //! copy constructor
61 /*! We make a copy of the other vector's data.
62 */
63 constexpr Vector(const Vector &other) {
64 d_size = other.size();
65 const TYPE *otherData = other.getData();
66 d_data.reset(new TYPE[d_size]);
67
68 memcpy(static_cast<void *>(d_data.get()),
69 static_cast<const void *>(otherData), d_size * sizeof(TYPE));
70 }
71
72 constexpr ~Vector() = default;
73
74 //! return the size (dimension) of the vector
75 constexpr unsigned int size() const { return d_size; }
76
77 //! returns the value at a particular index
78 TYPE getVal(unsigned int i) const {
79 PRECONDITION(i < d_size, "bad index");
80 return d_data[i];
81 }
82
83 //! sets the index at a particular value
84 void setVal(unsigned int i, TYPE val) {
85 PRECONDITION(i < d_size, "bad index");
86 d_data[i] = val;
87 }
88
89 TYPE operator[](unsigned int i) const {
90 PRECONDITION(i < d_size, "bad index");
91 return d_data[i];
92 }
93
94 TYPE &operator[](unsigned int i) {
95 PRECONDITION(i < d_size, "bad index");
96 return d_data[i];
97 }
98
99 //! returns a pointer to our data array
100 constexpr TYPE *getData() { return d_data.get(); }
101
102 //! returns a const pointer to our data array
103 constexpr const TYPE *getData() const {
104 // return dp_data;
105 return d_data.get();
106 }
107
108 //! Copy operator.
109 /*! We make a copy of the other Vector's data.
110 */
111
113 PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
114 const TYPE *otherData = other.getData();
115 memcpy(static_cast<void *>(d_data.get()),
116 static_cast<const void *>(otherData), d_size * sizeof(TYPE));
117 return *this;
118 }
119
120 //! elementwise addition, vectors must be the same size.
122 PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
123 const TYPE *otherData = other.getData();
124 TYPE *data = d_data.get();
125 unsigned int i;
126 for (i = 0; i < d_size; i++) {
127 data[i] += otherData[i];
128 }
129 return *this;
130 }
131
132 //! elementwise subtraction, vectors must be the same size.
134 PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
135 const TYPE *otherData = other.getData();
136 TYPE *data = d_data.get();
137 unsigned int i;
138 for (i = 0; i < d_size; i++) {
139 data[i] -= otherData[i];
140 }
141 return *this;
142 }
143
144 //! multiplication by a scalar
145 constexpr Vector<TYPE> &operator*=(TYPE scale) {
146 unsigned int i;
147 for (i = 0; i < d_size; i++) {
148 d_data[i] *= scale;
149 }
150 return *this;
151 }
152
153 //! division by a scalar
154 constexpr Vector<TYPE> &operator/=(TYPE scale) {
155 unsigned int i;
156 for (i = 0; i < d_size; i++) {
157 d_data[i] /= scale;
158 }
159 return *this;
160 }
161
162 //! L2 norm squared
163 constexpr inline TYPE normL2Sq() const {
164 TYPE res = (TYPE)0.0;
165 unsigned int i;
166 TYPE *data = d_data.get();
167 for (i = 0; i < d_size; i++) {
168 res += data[i] * data[i];
169 }
170 return res;
171 }
172
173 //! L2 norm
174 constexpr TYPE normL2() const { return sqrt(this->normL2Sq()); }
175
176 //! L1 norm
177 constexpr TYPE normL1() const {
178 TYPE res = (TYPE)0.0;
179 unsigned int i;
180 TYPE *data = d_data.get();
181 for (i = 0; i < d_size; i++) {
182 res += fabs(data[i]);
183 }
184 return res;
185 }
186
187 //! L-infinity norm
188 constexpr TYPE normLinfinity() const {
189 TYPE res = (TYPE)(-1.0);
190 unsigned int i;
191 TYPE *data = d_data.get();
192 for (i = 0; i < d_size; i++) {
193 if (fabs(data[i]) > res) {
194 res = fabs(data[i]);
195 }
196 }
197 return res;
198 }
199
200 //! \brief Gets the ID of the entry that has the largest absolute value
201 //! i.e. the entry being used for the L-infinity norm
202 constexpr unsigned int largestAbsValId() const {
203 TYPE res = (TYPE)(-1.0);
204 unsigned int i, id = d_size;
205 TYPE *data = d_data.get();
206 for (i = 0; i < d_size; i++) {
207 if (fabs(data[i]) > res) {
208 res = fabs(data[i]);
209 id = i;
210 }
211 }
212 return id;
213 }
214
215 //! \brief Gets the ID of the entry that has the largest value
216 constexpr unsigned int largestValId() const {
217 TYPE res = (TYPE)(-1.e8);
218 unsigned int i, id = d_size;
219 TYPE *data = d_data.get();
220 for (i = 0; i < d_size; i++) {
221 if (data[i] > res) {
222 res = data[i];
223 id = i;
224 }
225 }
226 return id;
227 }
228
229 //! \brief Gets the ID of the entry that has the smallest value
230 constexpr unsigned int smallestValId() const {
231 TYPE res = (TYPE)(1.e8);
232 unsigned int i, id = d_size;
233 TYPE *data = d_data.get();
234 for (i = 0; i < d_size; i++) {
235 if (data[i] < res) {
236 res = data[i];
237 id = i;
238 }
239 }
240 return id;
241 }
242
243 //! returns the dot product between two Vectors
244 TYPE dotProduct(const Vector<TYPE> other) const {
245 PRECONDITION(d_size == other.size(),
246 "Size mismatch in vector doct product");
247 const TYPE *oData = other.getData();
248 unsigned int i;
249 TYPE res = (TYPE)(0.0);
250 TYPE *data = d_data.get();
251 for (i = 0; i < d_size; i++) {
252 res += (data[i] * oData[i]);
253 }
254 return res;
255 }
256
257 //! Normalize the vector using the L2 norm
258 constexpr void normalize() {
259 TYPE val = this->normL2();
260 if (val < zero_tolerance) {
261 throw std::runtime_error("Cannot normalize a zero length vector");
262 }
263 (*this) /= val;
264 }
265
266 //! Set to a random unit vector
267 void setToRandom(unsigned int seed = 0) {
268 // we want to get our own RNG here instead of using the global
269 // one. This is related to Issue285.
270 RDKit::rng_type generator(42u);
271 RDKit::uniform_double dist(0, 1.0);
272 RDKit::double_source_type randSource(generator, dist);
273 if (seed > 0) {
274 generator.seed(seed);
275 } else {
276 // we can't initialize using only clock(), because it's possible
277 // that we'll get here fast enough that clock() will return 0
278 // and generator.seed(0) is an error:
279 generator.seed(clock() + 1);
280 }
281
282 unsigned int i;
283 TYPE *data = d_data.get();
284 for (i = 0; i < d_size; i++) {
285 data[i] = randSource();
286 }
287 this->normalize();
288 }
289
290 private:
291 unsigned int d_size; //!< our length
292 DATA_SPTR d_data;
293 Vector<TYPE> &operator=(const Vector<TYPE> &other);
294};
295
297
298//! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
299template <typename T>
300constexpr double TanimotoSimilarity(const Vector<T> &v1, const Vector<T> &v2) {
301 double numer = v1.dotProduct(v2);
302 if (numer == 0.0) {
303 return 0.0;
304 }
305 double denom = v1.normL2Sq() + v2.normL2Sq() - numer;
306 if (denom == 0.0) {
307 return 0.0;
308 }
309 return numer / denom;
310}
311} // end of namespace RDNumeric
312
313//! ostream operator for Vectors
314template <typename TYPE>
315constexpr std::ostream &operator<<(std::ostream &target,
316 const RDNumeric::Vector<TYPE> &vec) {
317 unsigned int siz = vec.size();
318 target << "Size: " << siz << " [";
319 unsigned int i;
320 for (i = 0; i < siz; i++) {
321 target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
322 }
323 target << "]\n";
324 return target;
325}
326
327#endif
#define PRECONDITION(expr, mess)
Definition Invariant.h:108
static constexpr double zero_tolerance
Definition Vector.h:24
constexpr std::ostream & operator<<(std::ostream &target, const RDNumeric::Vector< TYPE > &vec)
ostream operator for Vectors
Definition Vector.h:315
A class to represent vectors of numbers.
Definition Vector.h:30
constexpr void normalize()
Normalize the vector using the L2 norm.
Definition Vector.h:258
constexpr TYPE * getData()
returns a pointer to our data array
Definition Vector.h:100
Vector< TYPE > & assign(const Vector< TYPE > &other)
Copy operator.
Definition Vector.h:112
TYPE & operator[](unsigned int i)
Definition Vector.h:94
std::shared_ptr< TYPE[]> DATA_SPTR
Definition Vector.h:32
constexpr ~Vector()=default
void setVal(unsigned int i, TYPE val)
sets the index at a particular value
Definition Vector.h:84
constexpr Vector(unsigned int N, TYPE val)
Initialize with a size and default value.
Definition Vector.h:43
constexpr TYPE normL1() const
L1 norm.
Definition Vector.h:177
constexpr const TYPE * getData() const
returns a const pointer to our data array
Definition Vector.h:103
constexpr Vector(unsigned int N, DATA_SPTR data)
Initialize from a smart pointer.
Definition Vector.h:55
constexpr unsigned int largestAbsValId() const
Gets the ID of the entry that has the largest absolute value i.e. the entry being used for the L-infi...
Definition Vector.h:202
Vector< TYPE > & operator-=(const Vector< TYPE > &other)
elementwise subtraction, vectors must be the same size.
Definition Vector.h:133
constexpr TYPE normL2Sq() const
L2 norm squared.
Definition Vector.h:163
constexpr TYPE normLinfinity() const
L-infinity norm.
Definition Vector.h:188
Vector< TYPE > & operator+=(const Vector< TYPE > &other)
elementwise addition, vectors must be the same size.
Definition Vector.h:121
void setToRandom(unsigned int seed=0)
Set to a random unit vector.
Definition Vector.h:267
constexpr unsigned int largestValId() const
Gets the ID of the entry that has the largest value.
Definition Vector.h:216
constexpr unsigned int size() const
return the size (dimension) of the vector
Definition Vector.h:75
constexpr unsigned int smallestValId() const
Gets the ID of the entry that has the smallest value.
Definition Vector.h:230
constexpr Vector< TYPE > & operator*=(TYPE scale)
multiplication by a scalar
Definition Vector.h:145
TYPE dotProduct(const Vector< TYPE > other) const
returns the dot product between two Vectors
Definition Vector.h:244
TYPE getVal(unsigned int i) const
returns the value at a particular index
Definition Vector.h:78
TYPE operator[](unsigned int i) const
Definition Vector.h:89
constexpr Vector(unsigned int N)
Initialize with only a size.
Definition Vector.h:35
constexpr Vector(const Vector &other)
copy constructor
Definition Vector.h:63
constexpr TYPE normL2() const
L2 norm.
Definition Vector.h:174
constexpr Vector< TYPE > & operator/=(TYPE scale)
division by a scalar
Definition Vector.h:154
boost::minstd_rand rng_type
Definition utils.h:36
boost::variate_generator< rng_type &, uniform_double > double_source_type
Definition utils.h:40
boost::uniform_real uniform_double
Definition utils.h:38
Vector< double > DoubleVector
Definition Vector.h:296
constexpr double TanimotoSimilarity(const Vector< T > &v1, const Vector< T > &v2)
returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
Definition Vector.h:300