iterative-solver 0.0
OptimizeBFGS.h
1#ifndef LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
2#define LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
3#include <molpro/linalg/itsolv/CastOptions.h>
4#include <molpro/linalg/itsolv/DSpaceResetter.h>
5#include <molpro/linalg/itsolv/Interpolate.h>
6#include <molpro/linalg/itsolv/IterativeSolverTemplate.h>
7#include <molpro/linalg/itsolv/propose_rspace.h>
8#include <molpro/linalg/itsolv/subspace/SubspaceSolverOptBFGS.h>
9#include <molpro/linalg/itsolv/subspace/XSpace.h>
10#include <cmath>
11#include <map>
12#include <memory>
13
14namespace molpro::linalg::itsolv {
23template <class R, class Q = R, class P = std::map<size_t, typename R::value_type>>
24class OptimizeBFGS : public IterativeSolverTemplate<Optimize, R, Q, P> {
25public:
28 using typename SolverTemplate::scalar_type;
29 using typename SolverTemplate::value_type;
30 using typename SolverTemplate::value_type_abs;
32
33 explicit OptimizeBFGS(const std::shared_ptr<ArrayHandlers<R, Q, P>>& handlers,
34 const std::shared_ptr<Logger>& logger_ = std::make_shared<Logger>())
35 : SolverTemplate(std::make_shared<subspace::XSpace<R, Q, P>>(handlers, logger_),
36 std::static_pointer_cast<subspace::ISubspaceSolver<R, Q, P>>(
37 std::make_shared<subspace::SubspaceSolverOptBFGS<R, Q, P>>(logger_)),
38 handlers, std::make_shared<Statistics>(), logger_),
39 logger(logger_) {}
40
41 bool nonlinear() const override { return true; }
42
43 int add_vector(R& parameters, R& residual, value_type value) override {
44 auto prof = this->profiler()->push("itsolv::add_vector");
45 using namespace subspace;
46 auto& xspace = this->m_xspace;
47 auto& xdata = xspace->data;
48 const auto& H = xdata[EqnData::H];
49 const auto& S = xdata[EqnData::S];
50 auto& Value = xdata[EqnData::value];
51 // std::cout << "updated value to" << as_string(Value) << std::endl;
52
53 // std::cout << "H "<<as_string(H)<<std::endl;
54 // std::cout << "Value "<<as_string(Value)<<std::endl;
55 while (xspace->size() >= size_t(this->m_max_size_qspace)) {
56 // std::cout << "delete Q" << std::endl;
57 xspace->eraseq(xspace->size() - 1);
58 }
59 // std::cout << "H after delete Q "<<as_string(H)<<std::endl;
60 // std::cout << "Value after delete Q "<<as_string(Value)<<std::endl;
61
62 // augment space with current point
63 auto oldValue = Value;
64 Value.resize({xspace->size() + 1, 1});
65 if (xspace->size() > 0)
66 Value.slice({1, 0}, {xspace->size() + 1, 1}) = oldValue.slice();
67 Value(0, 0) = value;
68 auto nwork = IterativeSolverTemplate<Optimize, R, Q, P>::add_vector(parameters, residual);
69 // std::cout << "H after add_vector "<<as_string(H)<<std::endl;
70 // std::cout << "Value after add_vector "<<as_string(Value)<<std::endl;
71
72 if (xspace->size() > 1) { // see whether a line search is needed
73 auto fprev = Value(1, 0); // the previous point
74 auto fcurrent = Value(0, 0); // the current point
75 auto gprev = H(0, 1) - H(1, 1);
76 auto gcurrent = H(0, 0) - H(1, 0);
77 bool Wolfe_1 = fcurrent <= fprev + m_Wolfe_1 * gprev;
78 bool Wolfe_2 = m_strong_Wolfe ? gcurrent >= m_Wolfe_2 * gprev : std::abs(gcurrent) <= m_Wolfe_2 * std::abs(gprev);
79 auto step = S(0, 0) - S(1, 0) - S(0, 1) + S(1, 1);
80 if (false) {
81 molpro::cout << "Size of Q=" << xspace->size() << std::endl;
82 molpro::cout << "step=" << step << std::endl;
83 molpro::cout << "fprev=" << fprev << std::endl;
84 molpro::cout << "fcurrent=" << fcurrent << std::endl;
85 molpro::cout << " m_Wolfe_1 =" << m_Wolfe_1 << std::endl;
86 molpro::cout << " m_Wolfe_1 * gprev=" << m_Wolfe_1 * gprev << std::endl;
87 molpro::cout << "fprev + m_Wolfe_1 * gprev=" << fprev + m_Wolfe_1 * gprev << std::endl;
88 molpro::cout << "gprev=" << gprev << std::endl;
89 molpro::cout << "gcurrent=" << gcurrent << std::endl;
90 molpro::cout << "m_convergence_threshold=" << this->m_convergence_threshold << std::endl;
91 molpro::cout << "Wolfe conditions: " << Wolfe_1 << Wolfe_2 << std::endl;
92 }
93 if (
94 // std::abs(gcurrent) < this->m_convergence_threshold or
95 (Wolfe_1 && Wolfe_2))
96 goto accept;
97 // molpro::cout << "evaluating line search" << std::endl;
98 Interpolate inter({-1, fprev, gprev}, {0, fcurrent, gcurrent});
99 auto [x, f, g, h] = inter.minimize(-1 - this->m_linesearch_grow_factor, this->m_linesearch_grow_factor);
100 // molpro::cout << "interpolation" << x << " f=" << f << " g=" << g << " h=" << h << std::endl;
101 if (std::isnan(x))
102 throw std::runtime_error("NaN in line search");
103 if (std::abs(x) > m_linesearch_tolerance) {
104 // molpro::cout << "taking line search" << std::endl;
105 this->m_logger->msg("Line search step taken", Logger::Info);
106 this->m_handlers->rr().scal(1 + x, parameters);
107 this->m_handlers->rq().axpy(-x, xspace->paramsq()[1], parameters);
108 auto erased = fprev < fcurrent ? 0 : 1;
109 // std::cout << "Value before erasure: "<<as_string(Value)<<std::endl;
110 // std::cout << "erased="<<erased<<"; removing point with value "<<Value(erased,0)<<std::endl;
111 xspace->eraseq(erased);
112 // std::cout << "Value after erasure: "<<as_string(Value)<<std::endl;
113 m_linesearch = true;
114 if (false) {
115 while (xspace->size() >= 2) {
116 std::cout << "delete Q because line searching" << std::endl;
117 xspace->eraseq(xspace->size() - 1);
118 }
119 }
120 return -1;
121 }
122 }
123
124 accept:
125 m_linesearch = false;
126 this->m_logger->msg("Quasi-Newton step taken", Logger::Info);
127 // this->m_errors.front() = std::sqrt(this->m_handlers->rr().dot(residual,residual));
128 for (size_t a = 0; a < xspace->size() - 1; a++) {
129 if (std::abs(H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1)) <
130 std::max(5e-14 * std::abs(H(a, a)), 1e-15)) {
131 xspace->eraseq(a + 1);
132 this->m_logger->msg("Erase redundant Q", Logger::Info);
133 goto accept;
134 }
135 }
136 BFGS_update_1(residual, xspace, H);
137 return nwork;
138 }
139
140 void BFGS_update_1(R& residual, std::shared_ptr<const subspace::IXSpace<R, Q, P>> xspace, const Matrix<double>& H) {
141 m_BFGS_update_alpha.resize(xspace->size() - 1);
142 const auto& q = xspace->paramsq();
143 const auto& u = xspace->actionsq();
144 for (size_t a = 0; a < m_BFGS_update_alpha.size(); a++) {
146 (this->m_handlers->rq().dot(residual, q[a]) - this->m_handlers->rq().dot(residual, q[a + 1])) /
147 (H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1));
148 this->m_handlers->rq().axpy(-m_BFGS_update_alpha[a], u[a], residual);
149 this->m_handlers->rq().axpy(m_BFGS_update_alpha[a], u[a + 1], residual);
150 }
151 }
152
153 void BFGS_update_2(R& z, std::shared_ptr<const subspace::IXSpace<R, Q, P>> xspace, const Matrix<double>& H) {
154 const auto& q = xspace->paramsq();
155 const auto& u = xspace->actionsq();
156 for (int a = m_BFGS_update_alpha.size() - 1; a >= 0; a--) {
157 auto beta = (this->m_handlers->rq().dot(z, u[a]) - this->m_handlers->rq().dot(z, u[a + 1])) /
158 (H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1));
159 this->m_handlers->rq().axpy(+m_BFGS_update_alpha[a] - beta, q[a], z);
160 this->m_handlers->rq().axpy(-m_BFGS_update_alpha[a] + beta, q[a + 1], z);
161 }
162 }
163
164 size_t end_iteration(const VecRef<R>& parameters, const VecRef<R>& action) override {
165 auto prof = this->profiler()->push("itsolv::end_iteration");
166 this->m_working_set = {0};
167 this->m_end_iteration_needed = false;
168 if (not m_linesearch) { // action is expected to hold the preconditioned residual
170 this->solution_params(this->m_working_set, parameters);
171 if (this->m_errors.front() < this->m_convergence_threshold) {
172 this->m_working_set.clear();
173 return 0;
174 }
175 this->m_working_set.assign(1, 0);
176 auto& z = action.front();
177 const auto& xspace = this->m_xspace;
178 auto& xdata = xspace->data;
179 const auto& H = xdata[subspace::EqnData::H];
180 BFGS_update_2(z, xspace, H);
181 auto len_z = std::sqrt(this->m_handlers->rr().dot(z,z));
182 if (len_z > this->m_quasinewton_maximum_step)
183 this->m_handlers->rr().scal(this->m_quasinewton_maximum_step / len_z, z);
184 this->m_handlers->rr().axpy(-1, z, parameters.front());
185 } else {
186 this->m_stats->line_search_steps++;
188 this->m_stats->line_searches++;
189 m_last_iteration_linesearching = true;
190 }
191 this->m_stats->iterations++;
192 return this->errors().front() < this->m_convergence_threshold ? 0 : 1;
193 }
194
195 size_t end_iteration(std::vector<R>& parameters, std::vector<R>& action) override {
196 auto result = end_iteration(wrap(parameters), wrap(action));
197 return result;
198 }
199
200 size_t end_iteration(R& parameters, R& actions) override {
201 auto wparams = std::vector<std::reference_wrapper<R>>{std::ref(parameters)};
202 auto wactions = std::vector<std::reference_wrapper<R>>{std::ref(actions)};
203 return end_iteration(wparams, wactions);
204 }
205
206 void set_value_errors() override {
207 auto& Value = this->m_xspace->data[subspace::EqnData::value];
208 this->m_value_errors.assign(1, std::numeric_limits<double>::max());
209 if (this->m_xspace->size() > 1 and Value(0, 0) < Value(1, 0))
210 this->m_value_errors.front() = Value(1, 0) - Value(0, 0);
211 }
212
217
218 void set_options(const Options& options) override {
220 if (auto opt2 = dynamic_cast<const molpro::linalg::itsolv::OptimizeBFGSOptions*>(&options)) {
222 if (opt.max_size_qspace)
223 set_max_size_qspace(opt.max_size_qspace.value());
224 if (opt.strong_Wolfe)
225 m_strong_Wolfe = opt.strong_Wolfe.value();
226 if (opt.Wolfe_1)
227 m_Wolfe_1 = opt.Wolfe_1.value();
228 if (opt.Wolfe_2)
229 m_Wolfe_2 = opt.Wolfe_2.value();
230 if (opt.linesearch_tolerance)
231 m_linesearch_tolerance = opt.linesearch_tolerance.value();
232 if (opt.linesearch_grow_factor)
233 m_linesearch_grow_factor = opt.linesearch_grow_factor.value();
234 if (opt.quasinewton_maximum_step)
235 m_quasinewton_maximum_step = opt.quasinewton_maximum_step.value();
236 }
237 }
238
239 std::shared_ptr<Options> get_options() const override {
240 auto opt = std::make_shared<OptimizeBFGSOptions>();
241 opt->copy(*SolverTemplate::get_options());
242 opt->max_size_qspace = get_max_size_qspace();
243 opt->strong_Wolfe = m_strong_Wolfe;
244 opt->Wolfe_1 = m_strong_Wolfe;
245 opt->Wolfe_2 = m_Wolfe_2;
246 opt->linesearch_tolerance = m_linesearch_tolerance;
247 opt->linesearch_grow_factor = m_linesearch_grow_factor;
248 opt->quasinewton_maximum_step = m_quasinewton_maximum_step;
249 return opt;
250 }
251
252 void report(std::ostream& cout, bool endl = true) const override {
253 SolverTemplate::report(cout, false);
254 cout << ", value " << this->value() << (m_linesearch ? ", line-searching" : ", quasi-Newton step");
255 if (endl)
256 cout << std::endl;
257 }
258 std::shared_ptr<Logger> logger;
259
260protected:
261 std::vector<double> m_BFGS_update_alpha;
264
265protected:
266 // for non-linear problems, actions already contains the residual
267 void construct_residual(const std::vector<int>& roots, const CVecRef<R>& params, const VecRef<R>& actions) override {}
268
269 int m_max_size_qspace = std::numeric_limits<int>::max();
270 bool m_strong_Wolfe = true;
271 double m_Wolfe_1 = 1e-4;
272 double m_Wolfe_2 = 0.9;
276 2;
277 double m_quasinewton_maximum_step = std::numeric_limits<double>::max();
278};
279
280} // namespace molpro::linalg::itsolv
281#endif // LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
Class, containing a collection of array handlers used in IterativeSolver Provides a Builder sub-class...
Definition: ArrayHandlers.h:25
Definition: Interpolate.h:13
Interpolate::point minimize(double xa, double xb, size_t bracket_grid=100, size_t max_bracket_grid=100000, bool analytic=true) const
Find the minimum of the interpolant within a range.
Definition: Interpolate.cpp:139
Implements IterativeSolver interface that is common to all solvers.
Definition: IterativeSolverTemplate.h:127
void solution_params(const std::vector< int > &roots, std::vector< R > &parameters) override
Definition: IterativeSolverTemplate.h:224
std::vector< int > m_working_set
indices of roots in the working set
Definition: IterativeSolverTemplate.h:584
scalar_type value() const override
Report the function value for the current optimum solution.
Definition: IterativeSolverTemplate.h:312
std::shared_ptr< subspace::IXSpace< R, R, std::map< size_t, typename R::value_type > > > m_xspace
manages the subspace and associated data
Definition: IterativeSolverTemplate.h:580
std::shared_ptr< ArrayHandlers< R, R, std::map< size_t, typename R::value_type > > > m_handlers
Array handlers.
Definition: IterativeSolverTemplate.h:579
std::shared_ptr< Options > get_options() const override
Definition: IterativeSolverTemplate.h:262
const std::shared_ptr< molpro::profiler::Profiler > & profiler() const override
Definition: IterativeSolverTemplate.h:320
bool m_end_iteration_needed
whether end_iteration should be called after any preconditioner
Definition: IterativeSolverTemplate.h:601
int add_vector(const VecRef< R > &parameters, const VecRef< R > &actions) override
Definition: IterativeSolverTemplate.h:140
const std::vector< scalar_type > & errors() const override
Definition: IterativeSolverTemplate.h:269
void report() const override
Definition: IterativeSolverTemplate.h:287
double m_convergence_threshold
residual norms less than this mark a converged solution
Definition: IterativeSolverTemplate.h:586
std::shared_ptr< Logger > m_logger
logger
Definition: IterativeSolverTemplate.h:590
void set_options(const Options &options) override
Definition: IterativeSolverTemplate.h:253
std::vector< double > m_errors
errors from the most recent solution
Definition: IterativeSolverTemplate.h:582
std::shared_ptr< Statistics > m_stats
accumulates statistics of operations performed by the solver
Definition: IterativeSolverTemplate.h:589
std::vector< double > m_value_errors
value errors from the most recent solution
Definition: IterativeSolverTemplate.h:583
typename R::value_type value_type
The underlying type of elements of vectors.
Definition: IterativeSolver.h:208
A class that optimises a function using a Quasi-Newton or other method.
Definition: OptimizeBFGS.h:24
int add_vector(R &parameters, R &residual, value_type value) override
Definition: OptimizeBFGS.h:43
bool m_strong_Wolfe
Whether to use strong or weak Wolfe conditions.
Definition: OptimizeBFGS.h:270
size_t end_iteration(R &parameters, R &actions) override
Definition: OptimizeBFGS.h:200
double m_Wolfe_2
Acceptance parameter for function gradient; recommended value Nocedal and Wright p142.
Definition: OptimizeBFGS.h:272
double m_Wolfe_1
Acceptance parameter for function value; recommended value Nocedal and Wright p142.
Definition: OptimizeBFGS.h:271
void BFGS_update_1(R &residual, std::shared_ptr< const subspace::IXSpace< R, Q, P > > xspace, const Matrix< double > &H)
Definition: OptimizeBFGS.h:140
void report(std::ostream &cout, bool endl=true) const override
Writes a report to cout output stream.
Definition: OptimizeBFGS.h:252
double m_linesearch_tolerance
Definition: OptimizeBFGS.h:273
std::shared_ptr< Options > get_options() const override
Definition: OptimizeBFGS.h:239
bool nonlinear() const override
Report whether the class is a non-linear solver.
Definition: OptimizeBFGS.h:41
bool m_linesearch
Definition: OptimizeBFGS.h:262
std::shared_ptr< Logger > logger
Definition: OptimizeBFGS.h:258
int m_max_size_qspace
maximum size of Q space
Definition: OptimizeBFGS.h:269
double m_quasinewton_maximum_step
Definition: OptimizeBFGS.h:277
size_t end_iteration(std::vector< R > &parameters, std::vector< R > &action) override
Definition: OptimizeBFGS.h:195
void set_options(const Options &options) override
Definition: OptimizeBFGS.h:218
void BFGS_update_2(R &z, std::shared_ptr< const subspace::IXSpace< R, Q, P > > xspace, const Matrix< double > &H)
Definition: OptimizeBFGS.h:153
std::vector< double > m_BFGS_update_alpha
Definition: OptimizeBFGS.h:261
void set_value_errors() override
Implementation class should overload this to set errors in the current values (e.g....
Definition: OptimizeBFGS.h:206
void set_max_size_qspace(int n)
Definition: OptimizeBFGS.h:215
size_t end_iteration(const VecRef< R > &parameters, const VecRef< R > &action) override
Behaviour depends on the solver.
Definition: OptimizeBFGS.h:164
int get_max_size_qspace() const
Definition: OptimizeBFGS.h:216
void construct_residual(const std::vector< int > &roots, const CVecRef< R > &params, const VecRef< R > &actions) override
Constructs residual for given roots provided their parameters and actions.
Definition: OptimizeBFGS.h:267
void report() const override
Writes a report to std::cout.
Definition: IterativeSolverTemplate.h:287
bool m_last_iteration_linesearching
Definition: OptimizeBFGS.h:263
OptimizeBFGS(const std::shared_ptr< ArrayHandlers< R, Q, P > > &handlers, const std::shared_ptr< Logger > &logger_=std::make_shared< Logger >())
Definition: OptimizeBFGS.h:33
double m_linesearch_grow_factor
If the predicted line search step is extrapolation, limit the step to this factor times the current s...
Definition: OptimizeBFGS.h:275
Matrix container that allows simple data access, slicing, copying and resizing without loosing data.
Definition: Matrix.h:28
Solves subspace problem for minimisation using the Steepest Descent algorithm.
Definition: SubspaceSolverOptBFGS.h:13
4-parameter interpolation of a 1-dimensional function given two points for which function values and ...
Definition: helper.h:10
auto wrap(ForwardIt begin, ForwardIt end)
Takes a begin and end iterators and returns a vector of references to each element.
Definition: wrap.h:32
std::vector< std::reference_wrapper< const A > > CVecRef
Definition: wrap.h:14
std::vector< std::reference_wrapper< A > > VecRef
Definition: wrap.h:11
const std::shared_ptr< const molpro::Options > options()
Get the Options object associated with iterative-solver.
Definition: options.cpp:4
static std::shared_ptr< OptimizeBFGSOptions > OptimizeBFGS(const std::shared_ptr< Options > &options)
Definition: CastOptions.h:79
@ Info
Definition: Logger.h:49
Allows setting and getting of options for OptimizeBFGS instance via IterativeSolver base class.
Definition: OptimizeBFGSOptions.h:9
Access point for different options in iterative solvers.
Definition: Options.h:20
Information about performance of IterativeSolver instance.
Definition: Statistics.h:10