iterative-solver 0.0
OptimizeBFGS.h
1#ifndef LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
2#define LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
3#include <molpro/linalg/itsolv/CastOptions.h>
4#include <molpro/linalg/itsolv/DSpaceResetter.h>
5#include <molpro/linalg/itsolv/Interpolate.h>
6#include <molpro/linalg/itsolv/IterativeSolverTemplate.h>
7#include <molpro/linalg/itsolv/propose_rspace.h>
8#include <molpro/linalg/itsolv/subspace/SubspaceSolverOptBFGS.h>
9#include <molpro/linalg/itsolv/subspace/XSpace.h>
10
11namespace molpro::linalg::itsolv {
20template <class R, class Q = R, class P = std::map<size_t, typename R::value_type>>
21class OptimizeBFGS : public IterativeSolverTemplate<Optimize, R, Q, P> {
22public:
25 using typename SolverTemplate::scalar_type;
26 using typename SolverTemplate::value_type;
27 using typename SolverTemplate::value_type_abs;
29
30 explicit OptimizeBFGS(const std::shared_ptr<ArrayHandlers<R, Q, P>>& handlers,
31 const std::shared_ptr<Logger>& logger_ = std::make_shared<Logger>())
32 : SolverTemplate(std::make_shared<subspace::XSpace<R, Q, P>>(handlers, logger_),
33 std::static_pointer_cast<subspace::ISubspaceSolver<R, Q, P>>(
34 std::make_shared<subspace::SubspaceSolverOptBFGS<R, Q, P>>(logger_)),
35 handlers, std::make_shared<Statistics>(), logger_),
36 logger(logger_) {}
37
38 bool nonlinear() const override { return true; }
39
40 int add_vector(R& parameters, R& residual, value_type value) override {
41 auto prof = this->profiler()->push("itsolv::add_vector");
42 using namespace subspace;
43 auto& xspace = this->m_xspace;
44 auto& xdata = xspace->data;
45 const auto& H = xdata[EqnData::H];
46 const auto& S = xdata[EqnData::S];
47 auto& Value = xdata[EqnData::value];
48 // std::cout << "updated value to" << as_string(Value) << std::endl;
49
50 // std::cout << "H "<<as_string(H)<<std::endl;
51 // std::cout << "Value "<<as_string(Value)<<std::endl;
52 while (xspace->size() >= size_t(this->m_max_size_qspace)) {
53 // std::cout << "delete Q" << std::endl;
54 xspace->eraseq(xspace->size() - 1);
55 }
56 // std::cout << "H after delete Q "<<as_string(H)<<std::endl;
57 // std::cout << "Value after delete Q "<<as_string(Value)<<std::endl;
58
59 // augment space with current point
60 auto oldValue = Value;
61 Value.resize({xspace->size() + 1, 1});
62 if (xspace->size() > 0)
63 Value.slice({1, 0}, {xspace->size() + 1, 1}) = oldValue.slice();
64 Value(0, 0) = value;
65 auto nwork = IterativeSolverTemplate<Optimize, R, Q, P>::add_vector(parameters, residual);
66 // std::cout << "H after add_vector "<<as_string(H)<<std::endl;
67 // std::cout << "Value after add_vector "<<as_string(Value)<<std::endl;
68
69 if (xspace->size() > 1) { // see whether a line search is needed
70 auto fprev = Value(1, 0); // the previous point
71 auto fcurrent = Value(0, 0); // the current point
72 auto gprev = H(0, 1) - H(1, 1);
73 auto gcurrent = H(0, 0) - H(1, 0);
74 bool Wolfe_1 = fcurrent <= fprev + m_Wolfe_1 * gprev;
75 bool Wolfe_2 = m_strong_Wolfe ? gcurrent >= m_Wolfe_2 * gprev : std::abs(gcurrent) <= m_Wolfe_2 * std::abs(gprev);
76 auto step = S(0, 0) - S(1, 0) - S(0, 1) + S(1, 1);
77 if (false) {
78 molpro::cout << "Size of Q=" << xspace->size() << std::endl;
79 molpro::cout << "step=" << step << std::endl;
80 molpro::cout << "fprev=" << fprev << std::endl;
81 molpro::cout << "fcurrent=" << fcurrent << std::endl;
82 molpro::cout << " m_Wolfe_1 =" << m_Wolfe_1 << std::endl;
83 molpro::cout << " m_Wolfe_1 * gprev=" << m_Wolfe_1 * gprev << std::endl;
84 molpro::cout << "fprev + m_Wolfe_1 * gprev=" << fprev + m_Wolfe_1 * gprev << std::endl;
85 molpro::cout << "gprev=" << gprev << std::endl;
86 molpro::cout << "gcurrent=" << gcurrent << std::endl;
87 molpro::cout << "m_convergence_threshold=" << this->m_convergence_threshold << std::endl;
88 molpro::cout << "Wolfe conditions: " << Wolfe_1 << Wolfe_2 << std::endl;
89 }
90 if (
91 // std::abs(gcurrent) < this->m_convergence_threshold or
92 (Wolfe_1 && Wolfe_2))
93 goto accept;
94 // molpro::cout << "evaluating line search" << std::endl;
95 Interpolate inter({-1, fprev, gprev}, {0, fcurrent, gcurrent});
96 auto [x, f, g, h] = inter.minimize(-1 - this->m_linesearch_grow_factor, this->m_linesearch_grow_factor);
97 // molpro::cout << "interpolation" << x << " f=" << f << " g=" << g << " h=" << h << std::endl;
98 if (std::abs(x) > m_linesearch_tolerance) {
99 // molpro::cout << "taking line search" << std::endl;
100 this->m_logger->msg("Line search step taken", Logger::Info);
101 this->m_handlers->rr().scal(1 + x, parameters);
102 this->m_handlers->rq().axpy(-x, xspace->paramsq()[1], parameters);
103 auto erased = fprev < fcurrent ? 0 : 1;
104 // std::cout << "Value before erasure: "<<as_string(Value)<<std::endl;
105 // std::cout << "erased="<<erased<<"; removing point with value "<<Value(erased,0)<<std::endl;
106 xspace->eraseq(erased);
107 // std::cout << "Value after erasure: "<<as_string(Value)<<std::endl;
108 m_linesearch = true;
109 if (false) {
110 while (xspace->size() >= 2) {
111 std::cout << "delete Q because line searching" << std::endl;
112 xspace->eraseq(xspace->size() - 1);
113 }
114 }
115 return -1;
116 }
117 }
118
119 accept:
120 m_linesearch = false;
121 this->m_logger->msg("Quasi-Newton step taken", Logger::Info);
122 // this->m_errors.front() = std::sqrt(this->m_handlers->rr().dot(residual,residual));
123 for (size_t a = 0; a < m_BFGS_update_alpha.size(); a++) {
124 if (std::abs(H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1)) <
125 std::max(5e-14 * std::abs(H(a, a)), 1e-15)) {
126 xspace->eraseq(a + 1);
127 this->m_logger->msg("Erase redundant Q", Logger::Info);
128 goto accept;
129 }
130 }
131 BFGS_update_1(residual, xspace, H);
132 return nwork;
133 }
134
135 void BFGS_update_1(R& residual, std::shared_ptr<const subspace::IXSpace<R, Q, P>> xspace, const Matrix<double>& H) {
136 m_BFGS_update_alpha.resize(xspace->size() - 1);
137 const auto& q = xspace->paramsq();
138 const auto& u = xspace->actionsq();
139 for (size_t a = 0; a < m_BFGS_update_alpha.size(); a++) {
141 (this->m_handlers->rq().dot(residual, q[a]) - this->m_handlers->rq().dot(residual, q[a + 1])) /
142 (H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1));
143 this->m_handlers->rq().axpy(-m_BFGS_update_alpha[a], u[a], residual);
144 this->m_handlers->rq().axpy(m_BFGS_update_alpha[a], u[a + 1], residual);
145 }
146 }
147
148 void BFGS_update_2(R& z, std::shared_ptr<const subspace::IXSpace<R, Q, P>> xspace, const Matrix<double>& H) {
149 const auto& q = xspace->paramsq();
150 const auto& u = xspace->actionsq();
151 for (int a = m_BFGS_update_alpha.size() - 1; a >= 0; a--) {
152 auto beta = (this->m_handlers->rq().dot(z, u[a]) - this->m_handlers->rq().dot(z, u[a + 1])) /
153 (H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1));
154 this->m_handlers->rq().axpy(+m_BFGS_update_alpha[a] - beta, q[a], z);
155 this->m_handlers->rq().axpy(-m_BFGS_update_alpha[a] + beta, q[a + 1], z);
156 }
157 }
158
159 size_t end_iteration(const VecRef<R>& parameters, const VecRef<R>& action) override {
160 auto prof = this->profiler()->push("itsolv::end_iteration");
161 this->m_working_set = {0};
162 this->m_end_iteration_needed = false;
163 if (not m_linesearch) { // action is expected to hold the preconditioned residual
165 this->solution_params(this->m_working_set, parameters);
166 if (this->m_errors.front() < this->m_convergence_threshold) {
167 this->m_working_set.clear();
168 return 0;
169 }
170 this->m_working_set.assign(1, 0);
171 auto& z = action.front();
172 const auto& xspace = this->m_xspace;
173 auto& xdata = xspace->data;
174 const auto& H = xdata[subspace::EqnData::H];
175 BFGS_update_2(z, xspace, H);
176 auto len_z = std::sqrt(this->m_handlers->rr().dot(z,z));
177 if (len_z > this->m_quasinewton_maximum_step)
178 this->m_handlers->rr().scal(this->m_quasinewton_maximum_step / len_z, z);
179 this->m_handlers->rr().axpy(-1, z, parameters.front());
180 } else {
181 this->m_stats->line_search_steps++;
183 this->m_stats->line_searches++;
184 m_last_iteration_linesearching = true;
185 }
186 this->m_stats->iterations++;
187 return this->errors().front() < this->m_convergence_threshold ? 0 : 1;
188 }
189
190 size_t end_iteration(std::vector<R>& parameters, std::vector<R>& action) override {
191 auto result = end_iteration(wrap(parameters), wrap(action));
192 return result;
193 }
194
195 size_t end_iteration(R& parameters, R& actions) override {
196 auto wparams = std::vector<std::reference_wrapper<R>>{std::ref(parameters)};
197 auto wactions = std::vector<std::reference_wrapper<R>>{std::ref(actions)};
198 return end_iteration(wparams, wactions);
199 }
200
201 void set_value_errors() override {
202 auto& Value = this->m_xspace->data[subspace::EqnData::value];
203 this->m_value_errors.assign(1, std::numeric_limits<double>::max());
204 if (this->m_xspace->size() > 1 and Value(0, 0) < Value(1, 0))
205 this->m_value_errors.front() = Value(1, 0) - Value(0, 0);
206 }
207
212
213 void set_options(const Options& options) override {
215 if (auto opt2 = dynamic_cast<const molpro::linalg::itsolv::OptimizeBFGSOptions*>(&options)) {
217 if (opt.max_size_qspace)
218 set_max_size_qspace(opt.max_size_qspace.value());
219 if (opt.strong_Wolfe)
220 m_strong_Wolfe = opt.strong_Wolfe.value();
221 if (opt.Wolfe_1)
222 m_Wolfe_1 = opt.Wolfe_1.value();
223 if (opt.Wolfe_2)
224 m_Wolfe_2 = opt.Wolfe_2.value();
225 if (opt.linesearch_tolerance)
226 m_linesearch_tolerance = opt.linesearch_tolerance.value();
227 if (opt.linesearch_grow_factor)
228 m_linesearch_grow_factor = opt.linesearch_grow_factor.value();
229 if (opt.quasinewton_maximum_step)
230 m_quasinewton_maximum_step = opt.quasinewton_maximum_step.value();
231 }
232 }
233
234 std::shared_ptr<Options> get_options() const override {
235 auto opt = std::make_shared<OptimizeBFGSOptions>();
236 opt->copy(*SolverTemplate::get_options());
237 opt->max_size_qspace = get_max_size_qspace();
238 opt->strong_Wolfe = m_strong_Wolfe;
239 opt->Wolfe_1 = m_strong_Wolfe;
240 opt->Wolfe_2 = m_Wolfe_2;
241 opt->linesearch_tolerance = m_linesearch_tolerance;
242 opt->linesearch_grow_factor = m_linesearch_grow_factor;
243 opt->quasinewton_maximum_step = m_quasinewton_maximum_step;
244 return opt;
245 }
246
247 void report(std::ostream& cout, bool endl = true) const override {
248 SolverTemplate::report(cout, false);
249 cout << ", value " << this->value() << (m_linesearch ? ", line-searching" : ", quasi-Newton step");
250 if (endl)
251 cout << std::endl;
252 }
253 std::shared_ptr<Logger> logger;
254
255protected:
256 std::vector<double> m_BFGS_update_alpha;
259
260protected:
261 // for non-linear problems, actions already contains the residual
262 void construct_residual(const std::vector<int>& roots, const CVecRef<R>& params, const VecRef<R>& actions) override {}
263
264 int m_max_size_qspace = std::numeric_limits<int>::max();
265 bool m_strong_Wolfe = true;
266 double m_Wolfe_1 = 1e-4;
267 double m_Wolfe_2 = 0.9;
271 2;
272 double m_quasinewton_maximum_step = std::numeric_limits<double>::max();
273};
274
275} // namespace molpro::linalg::itsolv
276#endif // LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
Class, containing a collection of array handlers used in IterativeSolver Provides a Builder sub-class...
Definition: ArrayHandlers.h:25
Definition: Interpolate.h:13
Interpolate::point minimize(double xa, double xb, size_t bracket_grid=100, size_t max_bracket_grid=100000, bool analytic=true) const
Find the minimum of the interpolant within a range.
Definition: Interpolate.cpp:136
Implements IterativeSolver interface that is common to all solvers.
Definition: IterativeSolverTemplate.h:127
void solution_params(const std::vector< int > &roots, std::vector< R > &parameters) override
Definition: IterativeSolverTemplate.h:224
std::vector< int > m_working_set
indices of roots in the working set
Definition: IterativeSolverTemplate.h:583
scalar_type value() const override
Report the function value for the current optimum solution.
Definition: IterativeSolverTemplate.h:312
std::shared_ptr< subspace::IXSpace< R, R, std::map< size_t, typename R::value_type > > > m_xspace
manages the subspace and associated data
Definition: IterativeSolverTemplate.h:579
std::shared_ptr< ArrayHandlers< R, R, std::map< size_t, typename R::value_type > > > m_handlers
Array handlers.
Definition: IterativeSolverTemplate.h:578
std::shared_ptr< Options > get_options() const override
Definition: IterativeSolverTemplate.h:262
const std::shared_ptr< molpro::profiler::Profiler > & profiler() const override
Definition: IterativeSolverTemplate.h:320
bool m_end_iteration_needed
whether end_iteration should be called after any preconditioner
Definition: IterativeSolverTemplate.h:600
int add_vector(const VecRef< R > &parameters, const VecRef< R > &actions) override
Definition: IterativeSolverTemplate.h:140
const std::vector< scalar_type > & errors() const override
Definition: IterativeSolverTemplate.h:269
void report() const override
Definition: IterativeSolverTemplate.h:287
double m_convergence_threshold
residual norms less than this mark a converged solution
Definition: IterativeSolverTemplate.h:585
std::shared_ptr< Logger > m_logger
logger
Definition: IterativeSolverTemplate.h:589
void set_options(const Options &options) override
Definition: IterativeSolverTemplate.h:253
std::vector< double > m_errors
errors from the most recent solution
Definition: IterativeSolverTemplate.h:581
std::shared_ptr< Statistics > m_stats
accumulates statistics of operations performed by the solver
Definition: IterativeSolverTemplate.h:588
std::vector< double > m_value_errors
value errors from the most recent solution
Definition: IterativeSolverTemplate.h:582
typename R::value_type value_type
The underlying type of elements of vectors.
Definition: IterativeSolver.h:202
A class that optimises a function using a Quasi-Newton or other method.
Definition: OptimizeBFGS.h:21
int add_vector(R &parameters, R &residual, value_type value) override
Definition: OptimizeBFGS.h:40
bool m_strong_Wolfe
Whether to use strong or weak Wolfe conditions.
Definition: OptimizeBFGS.h:265
size_t end_iteration(R &parameters, R &actions) override
Definition: OptimizeBFGS.h:195
double m_Wolfe_2
Acceptance parameter for function gradient; recommended value Nocedal and Wright p142.
Definition: OptimizeBFGS.h:267
double m_Wolfe_1
Acceptance parameter for function value; recommended value Nocedal and Wright p142.
Definition: OptimizeBFGS.h:266
void BFGS_update_1(R &residual, std::shared_ptr< const subspace::IXSpace< R, Q, P > > xspace, const Matrix< double > &H)
Definition: OptimizeBFGS.h:135
void report(std::ostream &cout, bool endl=true) const override
Writes a report to cout output stream.
Definition: OptimizeBFGS.h:247
double m_linesearch_tolerance
Definition: OptimizeBFGS.h:268
std::shared_ptr< Options > get_options() const override
Definition: OptimizeBFGS.h:234
bool nonlinear() const override
Report whether the class is a non-linear solver.
Definition: OptimizeBFGS.h:38
bool m_linesearch
Definition: OptimizeBFGS.h:257
std::shared_ptr< Logger > logger
Definition: OptimizeBFGS.h:253
int m_max_size_qspace
maximum size of Q space
Definition: OptimizeBFGS.h:264
double m_quasinewton_maximum_step
Definition: OptimizeBFGS.h:272
size_t end_iteration(std::vector< R > &parameters, std::vector< R > &action) override
Definition: OptimizeBFGS.h:190
void set_options(const Options &options) override
Definition: OptimizeBFGS.h:213
void BFGS_update_2(R &z, std::shared_ptr< const subspace::IXSpace< R, Q, P > > xspace, const Matrix< double > &H)
Definition: OptimizeBFGS.h:148
std::vector< double > m_BFGS_update_alpha
Definition: OptimizeBFGS.h:256
void set_value_errors() override
Implementation class should overload this to set errors in the current values (e.g....
Definition: OptimizeBFGS.h:201
void set_max_size_qspace(int n)
Definition: OptimizeBFGS.h:210
size_t end_iteration(const VecRef< R > &parameters, const VecRef< R > &action) override
Behaviour depends on the solver.
Definition: OptimizeBFGS.h:159
int get_max_size_qspace() const
Definition: OptimizeBFGS.h:211
void construct_residual(const std::vector< int > &roots, const CVecRef< R > &params, const VecRef< R > &actions) override
Constructs residual for given roots provided their parameters and actions.
Definition: OptimizeBFGS.h:262
void report() const override
Writes a report to std::cout.
Definition: IterativeSolverTemplate.h:287
bool m_last_iteration_linesearching
Definition: OptimizeBFGS.h:258
OptimizeBFGS(const std::shared_ptr< ArrayHandlers< R, Q, P > > &handlers, const std::shared_ptr< Logger > &logger_=std::make_shared< Logger >())
Definition: OptimizeBFGS.h:30
double m_linesearch_grow_factor
If the predicted line search step is extrapolation, limit the step to this factor times the current s...
Definition: OptimizeBFGS.h:270
Matrix container that allows simple data access, slicing, copying and resizing without loosing data.
Definition: Matrix.h:28
Solves subspace problem for minimisation using the Steepest Descent algorithm.
Definition: SubspaceSolverOptBFGS.h:13
4-parameter interpolation of a 1-dimensional function given two points for which function values and ...
Definition: helper.h:10
auto wrap(ForwardIt begin, ForwardIt end)
Takes a begin and end iterators and returns a vector of references to each element.
Definition: wrap.h:32
std::vector< std::reference_wrapper< const A > > CVecRef
Definition: wrap.h:14
std::vector< std::reference_wrapper< A > > VecRef
Definition: wrap.h:11
const std::shared_ptr< const molpro::Options > options()
Get the Options object associated with iterative-solver.
Definition: options.cpp:4
static std::shared_ptr< OptimizeBFGSOptions > OptimizeBFGS(const std::shared_ptr< Options > &options)
Definition: CastOptions.h:79
@ Info
Definition: Logger.h:49
Allows setting and getting of options for OptimizeBFGS instance via IterativeSolver base class.
Definition: OptimizeBFGSOptions.h:9
Access point for different options in iterative solvers.
Definition: Options.h:20
Information about performance of IterativeSolver instance.
Definition: Statistics.h:10