iterative-solver 0.0
OptimizeBFGS.h
1#ifndef LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
2#define LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
3#include <molpro/linalg/itsolv/CastOptions.h>
4#include <molpro/linalg/itsolv/DSpaceResetter.h>
5#include <molpro/linalg/itsolv/Interpolate.h>
6#include <molpro/linalg/itsolv/IterativeSolverTemplate.h>
7#include <molpro/linalg/itsolv/propose_rspace.h>
8#include <molpro/linalg/itsolv/subspace/SubspaceSolverOptBFGS.h>
9#include <molpro/linalg/itsolv/subspace/XSpace.h>
10
11#include <cmath>
12#include <map>
13#include <memory>
14
15namespace molpro::linalg::itsolv {
24template <class R, class Q = R, class P = std::map<size_t, typename R::value_type>>
25class OptimizeBFGS : public IterativeSolverTemplate<Optimize, R, Q, P> {
26public:
29 using typename SolverTemplate::scalar_type;
30 using typename SolverTemplate::value_type;
31 using typename SolverTemplate::value_type_abs;
33
34 explicit OptimizeBFGS(const std::shared_ptr<ArrayHandlers<R, Q, P>>& handlers,
35 const std::shared_ptr<Logger>& logger_ = std::make_shared<Logger>())
36 : SolverTemplate(std::make_shared<subspace::XSpace<R, Q, P>>(handlers, logger_),
37 std::static_pointer_cast<subspace::ISubspaceSolver<R, Q, P>>(
38 std::make_shared<subspace::SubspaceSolverOptBFGS<R, Q, P>>(logger_)),
39 handlers, std::make_shared<Statistics>(), logger_),
40 logger(logger_) {}
41
42 bool nonlinear() const override { return true; }
43
44 int add_vector(R& parameters, R& residual, value_type value) override {
45 auto prof = this->profiler()->push("itsolv::add_vector");
46 using namespace subspace;
47 auto& xspace = this->m_xspace;
48 auto& xdata = xspace->data;
49 const auto& H = xdata[EqnData::H];
50 const auto& S = xdata[EqnData::S];
51 auto& Value = xdata[EqnData::value];
52 // std::cout << "updated value to" << as_string(Value) << std::endl;
53
54 // std::cout << "H "<<as_string(H)<<std::endl;
55 // std::cout << "Value "<<as_string(Value)<<std::endl;
56 while (xspace->size() >= size_t(this->m_max_size_qspace)) {
57 // std::cout << "delete Q" << std::endl;
58 xspace->eraseq(xspace->size() - 1);
59 }
60 // std::cout << "H after delete Q "<<as_string(H)<<std::endl;
61 // std::cout << "Value after delete Q "<<as_string(Value)<<std::endl;
62
63 // augment space with current point
64 auto oldValue = Value;
65 Value.resize({xspace->size() + 1, 1});
66 if (xspace->size() > 0)
67 Value.slice({1, 0}, {xspace->size() + 1, 1}) = oldValue.slice();
68 Value(0, 0) = value;
69 auto nwork = IterativeSolverTemplate<Optimize, R, Q, P>::add_vector(parameters, residual);
70 // std::cout << "H after add_vector "<<as_string(H)<<std::endl;
71 // std::cout << "Value after add_vector "<<as_string(Value)<<std::endl;
72
73 if (xspace->size() > 1) { // see whether a line search is needed
74 auto fprev = Value(1, 0); // the previous point
75 auto fcurrent = Value(0, 0); // the current point
76 auto gprev = H(0, 1) - H(1, 1);
77 auto gcurrent = H(0, 0) - H(1, 0);
78 bool Wolfe_1 = fcurrent <= fprev + m_Wolfe_1 * gprev;
79 bool Wolfe_2 = m_strong_Wolfe ? gcurrent >= m_Wolfe_2 * gprev : std::abs(gcurrent) <= m_Wolfe_2 * std::abs(gprev);
80 auto step = S(0, 0) - S(1, 0) - S(0, 1) + S(1, 1);
81 if (false) {
82 molpro::cout << "Size of Q=" << xspace->size() << std::endl;
83 molpro::cout << "step=" << step << std::endl;
84 molpro::cout << "fprev=" << fprev << std::endl;
85 molpro::cout << "fcurrent=" << fcurrent << std::endl;
86 molpro::cout << " m_Wolfe_1 =" << m_Wolfe_1 << std::endl;
87 molpro::cout << " m_Wolfe_1 * gprev=" << m_Wolfe_1 * gprev << std::endl;
88 molpro::cout << "fprev + m_Wolfe_1 * gprev=" << fprev + m_Wolfe_1 * gprev << std::endl;
89 molpro::cout << "gprev=" << gprev << std::endl;
90 molpro::cout << "gcurrent=" << gcurrent << std::endl;
91 molpro::cout << "m_convergence_threshold=" << this->m_convergence_threshold << std::endl;
92 molpro::cout << "Wolfe conditions: " << Wolfe_1 << Wolfe_2 << std::endl;
93 }
94 if (
95 // std::abs(gcurrent) < this->m_convergence_threshold or
96 (Wolfe_1 && Wolfe_2))
97 goto accept;
98 // molpro::cout << "evaluating line search" << std::endl;
99 Interpolate inter({-1, fprev, gprev}, {0, fcurrent, gcurrent});
100 auto [x, f, g, h] = inter.minimize(-1 - this->m_linesearch_grow_factor, this->m_linesearch_grow_factor);
101 // molpro::cout << "interpolation" << x << " f=" << f << " g=" << g << " h=" << h << std::endl;
102 if (std::isnan(x))
103 throw std::runtime_error("NaN in line search");
104 if (std::abs(x) > m_linesearch_tolerance) {
105 // molpro::cout << "taking line search" << std::endl;
106 this->m_logger->info("Line search step taken");
107 this->m_handlers->rr().scal(1 + x, parameters);
108 this->m_handlers->rq().axpy(-x, xspace->paramsq()[1], parameters);
109 auto erased = fprev < fcurrent ? 0 : 1;
110 // std::cout << "Value before erasure: "<<as_string(Value)<<std::endl;
111 // std::cout << "erased="<<erased<<"; removing point with value "<<Value(erased,0)<<std::endl;
112 xspace->eraseq(erased);
113 // std::cout << "Value after erasure: "<<as_string(Value)<<std::endl;
114 m_linesearch = true;
115 if (false) {
116 while (xspace->size() >= 2) {
117 std::cout << "delete Q because line searching" << std::endl;
118 xspace->eraseq(xspace->size() - 1);
119 }
120 }
121 return -1;
122 }
123 }
124
125 accept:
126 m_linesearch = false;
127 this->m_logger->info("Quasi-Newton step taken");
128 // this->m_errors.front() = std::sqrt(this->m_handlers->rr().dot(residual,residual));
129 for (size_t a = 0; a < xspace->size() - 1; a++) {
130 if (std::abs(H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1)) <
131 std::max(5e-14 * std::abs(H(a, a)), 1e-15)) {
132 xspace->eraseq(a + 1);
133 this->m_logger->info("Erase redundant Q");
134 goto accept;
135 }
136 }
137 BFGS_update_1(residual, xspace, H);
138 return nwork;
139 }
140
141 void BFGS_update_1(R& residual, std::shared_ptr<const subspace::IXSpace<R, Q, P>> xspace,
142 const subspace::Matrix<double>& H) {
143 m_BFGS_update_alpha.resize(xspace->size() - 1);
144 const auto& q = xspace->paramsq();
145 const auto& u = xspace->actionsq();
146 for (size_t a = 0; a < m_BFGS_update_alpha.size(); a++) {
148 (this->m_handlers->rq().dot(residual, q[a]) - this->m_handlers->rq().dot(residual, q[a + 1])) /
149 (H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1));
150 this->m_handlers->rq().axpy(-m_BFGS_update_alpha[a], u[a], residual);
151 this->m_handlers->rq().axpy(m_BFGS_update_alpha[a], u[a + 1], residual);
152 }
153 }
154
155 void BFGS_update_2(R& z, std::shared_ptr<const subspace::IXSpace<R, Q, P>> xspace,
156 const subspace::Matrix<double>& H) {
157 const auto& q = xspace->paramsq();
158 const auto& u = xspace->actionsq();
159 for (int a = m_BFGS_update_alpha.size() - 1; a >= 0; a--) {
160 auto beta = (this->m_handlers->rq().dot(z, u[a]) - this->m_handlers->rq().dot(z, u[a + 1])) /
161 (H(a, a) - H(a, a + 1) - H(a + 1, a) + H(a + 1, a + 1));
162 this->m_handlers->rq().axpy(+m_BFGS_update_alpha[a] - beta, q[a], z);
163 this->m_handlers->rq().axpy(-m_BFGS_update_alpha[a] + beta, q[a + 1], z);
164 }
165 }
166
167 size_t end_iteration(const VecRef<R>& parameters, const VecRef<R>& action) override {
168 auto prof = this->profiler()->push("itsolv::end_iteration");
169 this->m_working_set = {0};
170 this->m_end_iteration_needed = false;
171 if (not m_linesearch) { // action is expected to hold the preconditioned residual
173 this->solution_params(this->m_working_set, parameters);
174 if (this->m_errors.front() < this->m_convergence_threshold) {
175 this->m_working_set.clear();
176 return 0;
177 }
178 this->m_working_set.assign(1, 0);
179 auto& z = action.front();
180 const auto& xspace = this->m_xspace;
181 auto& xdata = xspace->data;
182 const auto& H = xdata[subspace::EqnData::H];
183 BFGS_update_2(z, xspace, H);
184 auto len_z = std::sqrt(this->m_handlers->rr().dot(z,z));
185 if (len_z > this->m_quasinewton_maximum_step)
186 this->m_handlers->rr().scal(this->m_quasinewton_maximum_step / len_z, z);
187 this->m_handlers->rr().axpy(-1, z, parameters.front());
188 } else {
189 this->m_stats->line_search_steps++;
191 this->m_stats->line_searches++;
192 m_last_iteration_linesearching = true;
193 }
194 this->m_stats->iterations++;
195 return this->errors().front() < this->m_convergence_threshold ? 0 : 1;
196 }
197
198 size_t end_iteration(std::vector<R>& parameters, std::vector<R>& action) override {
199 auto result = end_iteration(wrap(parameters), wrap(action));
200 return result;
201 }
202
203 size_t end_iteration(R& parameters, R& actions) override {
204 auto wparams = std::vector<std::reference_wrapper<R>>{std::ref(parameters)};
205 auto wactions = std::vector<std::reference_wrapper<R>>{std::ref(actions)};
206 return end_iteration(wparams, wactions);
207 }
208
209 void set_value_errors() override {
210 auto& Value = this->m_xspace->data[subspace::EqnData::value];
211 this->m_value_errors.assign(1, std::numeric_limits<double>::max());
212 if (this->m_xspace->size() > 1 and Value(0, 0) < Value(1, 0))
213 this->m_value_errors.front() = Value(1, 0) - Value(0, 0);
214 }
215
220
221 void set_options(const Options& options) override {
223 if (auto opt2 = dynamic_cast<const molpro::linalg::itsolv::OptimizeBFGSOptions*>(&options)) {
225 if (opt.max_size_qspace)
226 set_max_size_qspace(opt.max_size_qspace.value());
227 if (opt.strong_Wolfe)
228 m_strong_Wolfe = opt.strong_Wolfe.value();
229 if (opt.Wolfe_1)
230 m_Wolfe_1 = opt.Wolfe_1.value();
231 if (opt.Wolfe_2)
232 m_Wolfe_2 = opt.Wolfe_2.value();
233 if (opt.linesearch_tolerance)
234 m_linesearch_tolerance = opt.linesearch_tolerance.value();
235 if (opt.linesearch_grow_factor)
236 m_linesearch_grow_factor = opt.linesearch_grow_factor.value();
237 if (opt.quasinewton_maximum_step)
238 m_quasinewton_maximum_step = opt.quasinewton_maximum_step.value();
239 }
240 }
241
242 std::shared_ptr<Options> get_options() const override {
243 auto opt = std::make_shared<OptimizeBFGSOptions>();
244 opt->copy(*SolverTemplate::get_options());
245 opt->max_size_qspace = get_max_size_qspace();
246 opt->strong_Wolfe = m_strong_Wolfe;
247 opt->Wolfe_1 = m_Wolfe_1;
248 opt->Wolfe_2 = m_Wolfe_2;
249 opt->linesearch_tolerance = m_linesearch_tolerance;
250 opt->linesearch_grow_factor = m_linesearch_grow_factor;
251 opt->quasinewton_maximum_step = m_quasinewton_maximum_step;
252 return opt;
253 }
254
255 void report(std::ostream& cout, bool endl = true) const override {
256 SolverTemplate::report(cout, false);
257 cout << ", value " << this->value() << (m_linesearch ? ", line-searching" : ", quasi-Newton step");
258 if (endl)
259 cout << std::endl;
260 }
261 std::shared_ptr<Logger> logger;
262
263protected:
264 std::vector<double> m_BFGS_update_alpha;
267
268protected:
269 // for non-linear problems, actions already contains the residual
270 void construct_residual(const std::vector<int>& roots, const CVecRef<R>& params, const VecRef<R>& actions) override {}
271
272 int m_max_size_qspace = std::numeric_limits<int>::max();
273 bool m_strong_Wolfe = true;
274 double m_Wolfe_1 = 1e-4;
275 double m_Wolfe_2 = 0.9;
279 2;
280 double m_quasinewton_maximum_step = std::numeric_limits<double>::max();
281};
282
283} // namespace molpro::linalg::itsolv
284#endif // LINEARALGEBRA_SRC_MOLPRO_LINALG_ITSOLV_OPTIMIZEBFGS_H
Class, containing a collection of array handlers used in IterativeSolver Provides a Builder sub-class...
Definition: ArrayHandlers.h:25
Definition: Interpolate.h:13
Interpolate::point minimize(double xa, double xb, size_t bracket_grid=100, size_t max_bracket_grid=100000, bool analytic=true) const
Find the minimum of the interpolant within a range.
Definition: Interpolate.cpp:139
Implements IterativeSolver interface that is common to all solvers.
Definition: IterativeSolverTemplate.h:161
void solution_params(const std::vector< int > &roots, std::vector< R > &parameters) override
Definition: IterativeSolverTemplate.h:264
std::vector< int > m_working_set
indices of roots in the working set
Definition: IterativeSolverTemplate.h:676
scalar_type value() const override
Report the function value for the current optimum solution.
Definition: IterativeSolverTemplate.h:386
std::shared_ptr< subspace::IXSpace< R, R, std::map< size_t, typename R::value_type > > > m_xspace
manages the subspace and associated data
Definition: IterativeSolverTemplate.h:672
std::shared_ptr< ArrayHandlers< R, R, std::map< size_t, typename R::value_type > > > m_handlers
Array handlers.
Definition: IterativeSolverTemplate.h:671
std::shared_ptr< Options > get_options() const override
Definition: IterativeSolverTemplate.h:308
const std::shared_ptr< molpro::profiler::Profiler > & profiler() const override
Definition: IterativeSolverTemplate.h:394
bool m_end_iteration_needed
whether end_iteration should be called after any preconditioner
Definition: IterativeSolverTemplate.h:693
int add_vector(const VecRef< R > &parameters, const VecRef< R > &actions) override
Definition: IterativeSolverTemplate.h:183
const std::vector< scalar_type > & errors() const override
Definition: IterativeSolverTemplate.h:319
void report() const override
Definition: IterativeSolverTemplate.h:349
double m_convergence_threshold
residual norms less than this mark a converged solution
Definition: IterativeSolverTemplate.h:678
std::shared_ptr< Logger > m_logger
logger
Definition: IterativeSolverTemplate.h:682
void set_options(const Options &options) override
Definition: IterativeSolverTemplate.h:293
std::vector< double > m_errors
errors from the most recent solution
Definition: IterativeSolverTemplate.h:674
std::shared_ptr< Statistics > m_stats
accumulates statistics of operations performed by the solver
Definition: IterativeSolverTemplate.h:681
std::vector< double > m_value_errors
value errors from the most recent solution
Definition: IterativeSolverTemplate.h:675
typename R::value_type value_type
The underlying type of elements of vectors.
Definition: IterativeSolver.h:204
A class that optimises a function using a Quasi-Newton or other method.
Definition: OptimizeBFGS.h:25
int add_vector(R &parameters, R &residual, value_type value) override
Definition: OptimizeBFGS.h:44
bool m_strong_Wolfe
Whether to use strong or weak Wolfe conditions.
Definition: OptimizeBFGS.h:273
size_t end_iteration(R &parameters, R &actions) override
Definition: OptimizeBFGS.h:203
double m_Wolfe_2
Acceptance parameter for function gradient; recommended value Nocedal and Wright p142.
Definition: OptimizeBFGS.h:275
double m_Wolfe_1
Acceptance parameter for function value; recommended value Nocedal and Wright p142.
Definition: OptimizeBFGS.h:274
void report(std::ostream &cout, bool endl=true) const override
Writes a report to cout output stream.
Definition: OptimizeBFGS.h:255
double m_linesearch_tolerance
Definition: OptimizeBFGS.h:276
std::shared_ptr< Options > get_options() const override
Definition: OptimizeBFGS.h:242
bool nonlinear() const override
Report whether the class is a non-linear solver.
Definition: OptimizeBFGS.h:42
bool m_linesearch
Definition: OptimizeBFGS.h:265
std::shared_ptr< Logger > logger
Definition: OptimizeBFGS.h:261
int m_max_size_qspace
maximum size of Q space
Definition: OptimizeBFGS.h:272
double m_quasinewton_maximum_step
Definition: OptimizeBFGS.h:280
size_t end_iteration(std::vector< R > &parameters, std::vector< R > &action) override
Definition: OptimizeBFGS.h:198
void BFGS_update_1(R &residual, std::shared_ptr< const subspace::IXSpace< R, Q, P > > xspace, const subspace::Matrix< double > &H)
Definition: OptimizeBFGS.h:141
void set_options(const Options &options) override
Definition: OptimizeBFGS.h:221
std::vector< double > m_BFGS_update_alpha
Definition: OptimizeBFGS.h:264
void set_value_errors() override
Implementation class should overload this to set errors in the current values (e.g....
Definition: OptimizeBFGS.h:209
void set_max_size_qspace(int n)
Definition: OptimizeBFGS.h:218
size_t end_iteration(const VecRef< R > &parameters, const VecRef< R > &action) override
Behaviour depends on the solver.
Definition: OptimizeBFGS.h:167
int get_max_size_qspace() const
Definition: OptimizeBFGS.h:219
void construct_residual(const std::vector< int > &roots, const CVecRef< R > &params, const VecRef< R > &actions) override
Constructs residual for given roots provided their parameters and actions.
Definition: OptimizeBFGS.h:270
void report() const override
Writes a report to std::cout.
Definition: IterativeSolverTemplate.h:349
bool m_last_iteration_linesearching
Definition: OptimizeBFGS.h:266
void BFGS_update_2(R &z, std::shared_ptr< const subspace::IXSpace< R, Q, P > > xspace, const subspace::Matrix< double > &H)
Definition: OptimizeBFGS.h:155
OptimizeBFGS(const std::shared_ptr< ArrayHandlers< R, Q, P > > &handlers, const std::shared_ptr< Logger > &logger_=std::make_shared< Logger >())
Definition: OptimizeBFGS.h:34
double m_linesearch_grow_factor
If the predicted line search step is extrapolation, limit the step to this factor times the current s...
Definition: OptimizeBFGS.h:278
Matrix container that allows simple data access, slicing, copying and resizing without loosing data.
Definition: Matrix.h:30
Solves subspace problem for minimisation using the Steepest Descent algorithm.
Definition: SubspaceSolverOptBFGS.h:16
4-parameter interpolation of a 1-dimensional function given two points for which function values and ...
Definition: helper.h:11
auto wrap(ForwardIt begin, ForwardIt end)
Takes a begin and end iterators and returns a vector of references to each element.
Definition: wrap.h:32
std::vector< std::reference_wrapper< const A > > CVecRef
Definition: wrap.h:14
std::vector< std::reference_wrapper< A > > VecRef
Definition: wrap.h:11
const std::shared_ptr< const molpro::Options > options()
Get the Options object associated with iterative-solver.
Definition: options.cpp:4
static std::shared_ptr< OptimizeBFGSOptions > OptimizeBFGS(const std::shared_ptr< Options > &options)
Definition: CastOptions.h:79
Allows setting and getting of options for OptimizeBFGS instance via IterativeSolver base class.
Definition: OptimizeBFGSOptions.h:9
Access point for different options in iterative solvers.
Definition: Options.h:20
Information about performance of IterativeSolver instance.
Definition: Statistics.h:10