RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
BFGSOpt.h
Go to the documentation of this file.
1//
2// Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#include <cmath>
12#include <RDGeneral/Invariant.h>
14#include <cstring>
15#include <vector>
16#include <algorithm>
17
18namespace BFGSOpt {
21const double FUNCTOL =
22 1e-4; //!< Default tolerance for function convergence in the minimizer
23const double MOVETOL =
24 1e-7; //!< Default tolerance for x changes in the minimizer
25const int MAXITS = 200; //!< Default maximum number of iterations
26const double EPS = 3e-8; //!< Default gradient tolerance in the minimizer
27const double TOLX =
28 4. * EPS; //!< Default direction vector tolerance in the minimizer
29const double MAXSTEP = 100.0; //!< Default maximum step size in the minimizer
30
31//! Do a Quasi-Newton minimization along a line.
32/*!
33 See Numerical Recipes in C, Section 9.7 for a description of the algorithm.
34
35 \param dim the dimensionality of the space.
36 \param oldPt the current position, as an array.
37 \param oldVal the current function value.
38 \param grad the value of the function gradient at oldPt
39 \param dir the minimization direction
40 \param newPt used to return the final position
41 \param newVal used to return the final function value
42 \param func the function to minimize
43 \param maxStep the maximum allowable step size
44 \param resCode used to return the results of the search.
45
46 Possible values for resCode are on return are:
47 - 0: success
48 - 1: the stepsize got too small. This probably indicates success.
49 - -1: the direction is bad (orthogonal to the gradient)
50*/
51template <typename EnergyFunctor>
52void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad,
53 double *dir, double *newPt, double &newVal,
54 EnergyFunctor func, double maxStep, int &resCode) {
55 PRECONDITION(oldPt, "bad input array");
56 PRECONDITION(grad, "bad input array");
57 PRECONDITION(dir, "bad input array");
58 PRECONDITION(newPt, "bad input array");
59
60 const unsigned int MAX_ITER_LINEAR_SEARCH = 1000;
61 double sum = 0.0, slope = 0.0, test = 0.0, lambda = 0.0;
62 double lambda2 = 0.0, lambdaMin = 0.0, tmpLambda = 0.0, val2 = 0.0;
63
64 resCode = -1;
65
66 // get the length of the direction vector:
67 sum = 0.0;
68 for (unsigned int i = 0; i < dim; i++) {
69 sum += dir[i] * dir[i];
70 }
71 sum = sqrt(sum);
72
73 // rescale if we're trying to move too far:
74 if (sum > maxStep) {
75 for (unsigned int i = 0; i < dim; i++) {
76 dir[i] *= maxStep / sum;
77 }
78 }
79
80 // make sure our direction has at least some component along
81 // -grad
82 slope = 0.0;
83 for (unsigned int i = 0; i < dim; i++) {
84 slope += dir[i] * grad[i];
85 }
86 if (slope >= 0.0) {
87 return;
88 }
89
90 test = 0.0;
91 for (unsigned int i = 0; i < dim; i++) {
92 double temp = fabs(dir[i]) / std::max(fabs(oldPt[i]), 1.0);
93 if (temp > test) {
94 test = temp;
95 }
96 }
97
98 lambdaMin = MOVETOL / test;
99 lambda = 1.0;
100 unsigned int it = 0;
101 while (it < MAX_ITER_LINEAR_SEARCH) {
102 // std::cerr << "\t" << it<<" : "<<lambda << " " << lambdaMin << std::endl;
103 if (lambda < lambdaMin) {
104 // the position change is too small
105 resCode = 1;
106 break;
107 }
108 for (unsigned int i = 0; i < dim; i++) {
109 newPt[i] = oldPt[i] + lambda * dir[i];
110 }
111 newVal = func(newPt);
112
113 if (newVal - oldVal <= FUNCTOL * lambda * slope) {
114 // we're converged on the function:
115 resCode = 0;
116 return;
117 }
118 // if we made it this far, we need to backtrack:
119 if (it == 0) {
120 // it's the first step:
121 tmpLambda = -slope / (2.0 * (newVal - oldVal - slope));
122 } else {
123 double rhs1 = newVal - oldVal - lambda * slope;
124 double rhs2 = val2 - oldVal - lambda2 * slope;
125 double a = (rhs1 / (lambda * lambda) - rhs2 / (lambda2 * lambda2)) /
126 (lambda - lambda2);
127 double b = (-lambda2 * rhs1 / (lambda * lambda) +
128 lambda * rhs2 / (lambda2 * lambda2)) /
129 (lambda - lambda2);
130 if (a == 0.0) {
131 tmpLambda = -slope / (2.0 * b);
132 } else {
133 double disc = b * b - 3 * a * slope;
134 if (disc < 0.0) {
135 tmpLambda = 0.5 * lambda;
136 } else if (b <= 0.0) {
137 tmpLambda = (-b + sqrt(disc)) / (3.0 * a);
138 } else {
139 tmpLambda = -slope / (b + sqrt(disc));
140 }
141 }
142 if (tmpLambda > 0.5 * lambda) {
143 tmpLambda = 0.5 * lambda;
144 }
145 }
146 lambda2 = lambda;
147 val2 = newVal;
148 lambda = std::max(tmpLambda, 0.1 * lambda);
149 ++it;
150 }
151 // nothing was done
152 // std::cerr<<" RETURN AT END: "<<it<<" "<<resCode<<std::endl;
153 for (unsigned int i = 0; i < dim; i++) {
154 newPt[i] = oldPt[i];
155 }
156}
157
158//! Do a BFGS minimization of a function.
159/*!
160 See Numerical Recipes in C, Section 10.7 for a description of the algorithm.
161
162 \param dim the dimensionality of the space.
163 \param pos the starting position, as an array.
164 \param gradTol tolerance for gradient convergence
165 \param numIters used to return the number of iterations required
166 \param funcVal used to return the final function value
167 \param func the function to minimize
168 \param gradFunc calculates the gradient of func
169 \param funcTol tolerance for changes in the function value for convergence.
170 \param maxIts maximum number of iterations allowed
171 \param snapshotFreq a snapshot of the minimization trajectory
172 will be stored after as many steps as indicated
173 through this parameter; defaults to 0 (no
174 snapshots stored)
175 \param snapshotVect pointer to a std::vector<Snapshot> object that will
176 receive the coordinates and energies every snapshotFreq steps; defaults to
177 NULL (no snapshots stored)
178
179 \return a flag indicating success (or type of failure). Possible values are:
180 - 0: success
181 - 1: too many iterations were required
182*/
183template <typename EnergyFunctor, typename GradientFunctor>
184int minimize(unsigned int dim, double *pos, double gradTol,
185 unsigned int &numIters, double &funcVal, EnergyFunctor func,
186 GradientFunctor gradFunc, unsigned int snapshotFreq,
187 RDKit::SnapshotVect *snapshotVect, double funcTol = TOLX,
188 unsigned int maxIts = MAXITS) {
189 RDUNUSED_PARAM(funcTol);
190 PRECONDITION(pos, "bad input array");
191 PRECONDITION(gradTol > 0, "bad tolerance");
192
193 std::vector<double> grad(dim);
194 std::vector<double> dGrad(dim);
195 std::vector<double> hessDGrad(dim);
196 std::vector<double> xi(dim);
197 std::vector<double> invHessian(dim * dim, 0);
198 std::unique_ptr<double[]> newPos(new double[dim]);
199 snapshotFreq = std::min(snapshotFreq, maxIts);
200
201 // evaluate the function and gradient in our current position:
202 double fp = func(pos);
203 gradFunc(pos, grad.data());
204
205 double sum = 0.0;
206 for (unsigned int i = 0; i < dim; i++) {
207 unsigned int itab = i * dim;
208 // initialize the inverse hessian to be identity:
209 invHessian[itab + i] = 1.0;
210 // the first line dir is -grad:
211 xi[i] = -grad[i];
212 sum += pos[i] * pos[i];
213 }
214 // pick a max step size:
215 double maxStep = MAXSTEP * std::max(sqrt(sum), static_cast<double>(dim));
216
217 for (unsigned int iter = 1; iter <= maxIts; ++iter) {
218 numIters = iter;
219 int status = -1;
220
221 // do the line search:
222 linearSearch(dim, pos, fp, grad.data(), xi.data(), newPos.get(), funcVal, func,
223 maxStep, status);
224 CHECK_INVARIANT(status >= 0, "bad direction in linearSearch");
225
226 // save the function value for the next search:
227 fp = funcVal;
228
229 // set the direction of this line and save the gradient:
230 double test = 0.0;
231 for (unsigned int i = 0; i < dim; i++) {
232 xi[i] = newPos[i] - pos[i];
233 pos[i] = newPos[i];
234 double temp = fabs(xi[i]) / std::max(fabs(pos[i]), 1.0);
235 if (temp > test) {
236 test = temp;
237 }
238 dGrad[i] = grad[i];
239 }
240 // std::cerr<<" iter: "<<iter<<" "<<fp<<" "<<test<<"
241 // "<<TOLX<<std::endl;
242 if (test < TOLX) {
243 if (snapshotVect && snapshotFreq) {
244 RDKit::Snapshot s(boost::shared_array<double>(newPos.release()), fp);
245 snapshotVect->push_back(s);
246 }
247 return 0;
248 }
249
250 // update the gradient:
251 double gradScale = gradFunc(pos, grad.data());
252
253 // is the gradient converged?
254 test = 0.0;
255 double term = std::max(funcVal * gradScale, 1.0);
256 for (unsigned int i = 0; i < dim; i++) {
257 double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0);
258 test = std::max(test, temp);
259 dGrad[i] = grad[i] - dGrad[i];
260 }
261 test /= term;
262 // std::cerr<<" "<<gradScale<<" "<<test<<"
263 // "<<gradTol<<std::endl;
264 if (test < gradTol) {
265 if (snapshotVect && snapshotFreq) {
266 RDKit::Snapshot s(boost::shared_array<double>(newPos.release()), fp);
267 snapshotVect->push_back(s);
268 }
269 return 0;
270 }
271
272 // for(unsigned int i=0;i<dim;i++){
273 // figure out how much the gradient changed:
274 //}
275
276 // compute hessian*dGrad:
277 double fac = 0, fae = 0, sumDGrad = 0, sumXi = 0;
278 for (unsigned int i = 0; i < dim; i++) {
279 double *ivh = &(invHessian[i * dim]);
280 double &hdgradi = hessDGrad[i];
281 double *dgj = dGrad.data();
282 hdgradi = 0.0;
283 for (unsigned int j = 0; j < dim; ++j, ++ivh, ++dgj) {
284 hdgradi += *ivh * *dgj;
285 }
286 fac += dGrad[i] * xi[i];
287 fae += dGrad[i] * hessDGrad[i];
288 sumDGrad += dGrad[i] * dGrad[i];
289 sumXi += xi[i] * xi[i];
290 }
291 if (fac > sqrt(EPS * sumDGrad * sumXi)) {
292 fac = 1.0 / fac;
293 double fad = 1.0 / fae;
294 for (unsigned int i = 0; i < dim; i++) {
295 dGrad[i] = fac * xi[i] - fad * hessDGrad[i];
296 }
297
298 for (unsigned int i = 0; i < dim; i++) {
299 unsigned int itab = i * dim;
300 double pxi = fac * xi[i], hdgi = fad * hessDGrad[i],
301 dgi = fae * dGrad[i];
302 double *pxj = &(xi[i]), *hdgj = &(hessDGrad[i]), *dgj = &(dGrad[i]);
303 for (unsigned int j = i; j < dim; ++j, ++pxj, ++hdgj, ++dgj) {
304 invHessian[itab + j] += pxi * *pxj - hdgi * *hdgj + dgi * *dgj;
305 invHessian[j * dim + i] = invHessian[itab + j];
306 }
307 }
308 }
309 // generate the next direction to move:
310 for (unsigned int i = 0; i < dim; i++) {
311 unsigned int itab = i * dim;
312 xi[i] = 0.0;
313 double &pxi = xi[i];
314 double *ivh = &(invHessian[itab]);
315 double *gj = grad.data();
316 for (unsigned int j = 0; j < dim; ++j, ++ivh, ++gj) {
317 pxi -= *ivh * *gj;
318 }
319 }
320 if (snapshotVect && snapshotFreq && !(iter % snapshotFreq)) {
321 RDKit::Snapshot s(boost::shared_array<double>(newPos.release()), fp);
322 snapshotVect->push_back(s);
323 newPos.reset(new double[dim]);
324 }
325 }
326 return 1;
327}
328
329//! Do a BFGS minimization of a function.
330/*!
331 \param dim the dimensionality of the space.
332 \param pos the starting position, as an array.
333 \param gradTol tolerance for gradient convergence
334 \param numIters used to return the number of iterations required
335 \param funcVal used to return the final function value
336 \param func the function to minimize
337 \param gradFunc calculates the gradient of func
338 \param funcTol tolerance for changes in the function value for convergence.
339 \param maxIts maximum number of iterations allowed
340
341 \return a flag indicating success (or type of failure). Possible values are:
342 - 0: success
343 - 1: too many iterations were required
344*/
345template <typename EnergyFunctor, typename GradientFunctor>
346int minimize(unsigned int dim, double *pos, double gradTol,
347 unsigned int &numIters, double &funcVal, EnergyFunctor func,
348 GradientFunctor gradFunc, double funcTol = TOLX,
349 unsigned int maxIts = MAXITS) {
350 return minimize(dim, pos, gradTol, numIters, funcVal, func, gradFunc, 0,
351 nullptr, funcTol, maxIts);
352}
353
354} // namespace BFGSOpt
#define CHECK_INVARIANT(expr, mess)
Definition Invariant.h:101
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
#define PRECONDITION(expr, mess)
Definition Invariant.h:109
#define RDKIT_OPTIMIZER_EXPORT
Definition export.h:377
const double EPS
Default gradient tolerance in the minimizer.
Definition BFGSOpt.h:26
const double TOLX
Default direction vector tolerance in the minimizer.
Definition BFGSOpt.h:27
void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad, double *dir, double *newPt, double &newVal, EnergyFunctor func, double maxStep, int &resCode)
Do a Quasi-Newton minimization along a line.
Definition BFGSOpt.h:52
const double FUNCTOL
Default tolerance for function convergence in the minimizer.
Definition BFGSOpt.h:21
int minimize(unsigned int dim, double *pos, double gradTol, unsigned int &numIters, double &funcVal, EnergyFunctor func, GradientFunctor gradFunc, unsigned int snapshotFreq, RDKit::SnapshotVect *snapshotVect, double funcTol=TOLX, unsigned int maxIts=MAXITS)
Do a BFGS minimization of a function.
Definition BFGSOpt.h:184
const double MAXSTEP
Default maximum step size in the minimizer.
Definition BFGSOpt.h:29
RDKIT_OPTIMIZER_EXPORT int REALLY_A_HEADER_ONLY_LIBRARY
const double MOVETOL
Default tolerance for x changes in the minimizer.
Definition BFGSOpt.h:23
const int MAXITS
Default maximum number of iterations.
Definition BFGSOpt.h:25
RDKIT_OPTIMIZER_EXPORT int HEAD_ONLY_LIBRARY
std::vector< Snapshot > SnapshotVect
Definition Snapshot.h:21