iterative-solver 0.0
gemm.h
1#ifndef LINEARALGEBRA_SRC_MOLPRO_LINALG_ARRAY_UTIL_GEMM_H
2#define LINEARALGEBRA_SRC_MOLPRO_LINALG_ARRAY_UTIL_GEMM_H
3#ifdef HAVE_MPI_H
4#include <mpi.h>
5#endif
6#include "BufferManager.h"
7#include <future>
8#include <iostream>
9#include <molpro/Options.h>
10#include <molpro/Profiler.h>
11#include <molpro/cblas.h>
12#include <molpro/linalg/array/DistrArrayFile.h>
13#include <molpro/linalg/array/type_traits.h>
14#include <molpro/linalg/itsolv/subspace/Matrix.h>
15#include <molpro/linalg/itsolv/wrap.h>
16#include <molpro/linalg/itsolv/wrap_util.h>
17#include <molpro/linalg/options.h>
18#include <numeric>
19#include <vector>
20
24
26
28
29// Buffered
30
31template <class AL>
33 const CVecRef<DistrArrayFile>& xx) {
34 auto prof = molpro::Profiler::single()->push("gemm_inner_distr_distr (buffered)");
35 if (not yy.empty())
36 prof += xx.size() * yy.size() * yy[0].get().local_buffer()->size() * 2;
37 using value_type = typename array::mapped_or_value_type_t<AL>;
38 auto alphas = Matrix<value_type>({yy.size(), xx.size()});
39 alphas.fill(0);
40 auto non_const_yy = molpro::linalg::itsolv::const_cast_wrap(yy);
41 auto alphadata = const_cast<value_type*>(alphas.data().data());
42 gemm_distr_distr(alphadata, xx, non_const_yy, gemm_type::inner);
43#ifdef HAVE_MPI_H
44 MPI_Allreduce(MPI_IN_PLACE, alphadata, alphas.size(), MPI_DOUBLE, MPI_SUM, molpro::mpi::comm_global());
45#endif
46 return alphas;
47}
48
49template <class AL, typename = std::enable_if_t<!std::is_same_v<std::decay_t<AL>, DistrArrayFile>>>
51 const CVecRef<AL>& yy) {
52 auto result_transpose = gemm_inner_distr_distr(yy, xx);
53 Matrix<typename array::mapped_or_value_type_t<AL>> result({result_transpose.cols(), result_transpose.rows()});
55 return result;
56}
57
58template <class AL>
60 const CVecRef<DistrArrayFile>& xx, const VecRef<AL>& yy) {
61 if (yy.empty() or xx.empty())
62 return;
63 auto prof = molpro::Profiler::single()->push("gemm_outer_distr_distr (buffered)");
64 if (not yy.empty())
65 prof += xx.size() * yy.size() * yy[0].get().local_buffer()->size() * 2;
66 if (alphas.rows() != xx.size())
67 throw std::out_of_range(std::string{"gemm_outer_distr_distr: dimensions of xx and alphas are different: "} +
68 std::to_string(alphas.rows()) + " " + std::to_string(xx.size()));
69 if (alphas.cols() != yy.size())
70 throw std::out_of_range(std::string{"gemm_outer_distr_distr: dimensions of yy and alphas are different: "} +
71 std::to_string(alphas.cols()) + " " + std::to_string(yy.size()));
72 gemm_distr_distr(const_cast<typename array::mapped_or_value_type_t<AL>*>(alphas.data().data()), xx, yy,
73 gemm_type::outer);
74}
75
76template <class AL>
77void gemm_distr_distr(array::mapped_or_value_type_t<AL>* alphadata, const CVecRef<DistrArrayFile>& xx,
78 const VecRef<AL>& yy, gemm_type gemm_type) {
79
80 auto prof = molpro::Profiler::single()->push("gemm_distr_distr");
81 if (xx.size() == 0 || yy.size() == 0) {
82 return;
83 }
84
85 bool yy_constant_stride = true;
86 int previous_stride = 0;
87 int yy_stride = yy.front().get().local_buffer()->size();
88 for (size_t j = 0; j < std::max((size_t)1, yy.size()) - 1; ++j) {
89 auto unique_ptr_j = yy.at(j).get().local_buffer()->data();
90 auto unique_ptr_jp1 = yy.at(j + 1).get().local_buffer()->data();
91 yy_stride = unique_ptr_jp1 - unique_ptr_j;
92 // std::cout << "j="<<j<<" yy_stride="<<yy_stride<<std::endl;
93 if (j > 0)
94 yy_constant_stride = yy_constant_stride && (yy_stride == previous_stride);
95 previous_stride = yy_stride;
96 }
97 yy_constant_stride = yy_constant_stride && (yy_stride > 0);
98
100 auto number_of_buffers = options->parameter("GEMM_BUFFERS", 2);
101 const int buf_size =
102 std::min(int(yy.front().get().local_buffer()->size()), options->parameter("GEMM_PAGESIZE", 8192)) *
103 number_of_buffers;
104// std::cout << "buf_size=" << buf_size << " number_of_buffers=" << number_of_buffers << std::endl;
105
106 molpro::Profiler::single()->start("gemm: buffer setup");
107 BufferManager buffer(xx, buf_size, number_of_buffers);
108 molpro::Profiler::single()->stop("gemm: buffer setup");
109 for (auto buffer_iterator = buffer.begin(); buffer_iterator != buffer.end(); ++buffer_iterator) {
110 auto container_offset = buffer.buffer_offset();
111 int current_buf_size = buffer.buffer_size();
112// std::cout << "container_offset="<<container_offset<<", current_buf_size="<<current_buf_size<<std::endl;
113 if (gemm_type == gemm_type::outer) {
114 if (yy_constant_stride and not yy.empty()) {
115 auto prof =
116 molpro::Profiler::single()->push("gemm_outer: cblas_dgemm dimensions " + std::to_string(xx.size()) + ", " +
117 std::to_string(yy.size()) + ", " + std::to_string(current_buf_size));
118// std::cout << "outer dgemm container_offset="<<container_offset<<std::endl;
119 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, current_buf_size, yy.size(), xx.size(), 1,
120 buffer_iterator->data(), buffer.buffer_stride(), alphadata, yy.size(), 1,
121 yy[0].get().local_buffer()->data() + container_offset, yy_stride);
122 } else { // non-uniform stride:
123 auto prof =
124 molpro::Profiler::single()->push("gemm_outer: cblas_dgemv dimensions " + std::to_string(xx.size()) + ", " +
125 std::to_string(yy.size()) + ", " + std::to_string(current_buf_size));
126// std::cout << "outer dgemv"<<std::endl;
127 for (size_t i = 0; i < yy.size(); ++i) {
128 cblas_dgemv(CblasColMajor, CblasNoTrans, current_buf_size, xx.size(), 1, buffer_iterator->data(), buffer.buffer_stride(),
129 alphadata + i, yy.size(), 1, yy[i].get().local_buffer()->data() + container_offset, 1);
130 }
131 }
132 } else if (gemm_type == gemm_type::inner) {
133 if (yy_constant_stride and not yy.empty()) {
134 auto prof =
135 molpro::Profiler::single()->push("gemm_inner: cblas_dgemm dimensions " + std::to_string(xx.size()) + ", " +
136 std::to_string(yy.size()) + ", " + std::to_string(current_buf_size));
137// std::cout << "inner dgemm"<<std::endl;
138 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, xx.size(), yy.size(), current_buf_size, 1,
139 buffer_iterator->data(), buffer.buffer_stride(), yy[0].get().local_buffer()->data() + container_offset, yy_stride,
140 1, alphadata, xx.size());
141 } else { // non-uniform stride:
142 auto prof =
143 molpro::Profiler::single()->push("gemm_inner: cblas_dgemv dimensions " + std::to_string(xx.size()) + ", " +
144 std::to_string(yy.size()) + ", " + std::to_string(current_buf_size));
145// std::cout << "inner dgemv"<<std::endl;
146 for (size_t k = 0; k < yy.size(); ++k) {
147 cblas_dgemv(CblasColMajor, CblasTrans, current_buf_size, xx.size(), 1, buffer_iterator->data(), buffer.buffer_stride(),
148 yy[k].get().local_buffer()->data() + container_offset, 1, 1, alphadata + k * xx.size(), 1);
149 }
150 }
151 }
152 }
153}
154
155// Without buffers
156
157template <class AL, class AR = AL>
159 const CVecRef<AR>& yy) {
160 if (std::is_same<AL, DistrArrayFile>::value) {
161 throw std::runtime_error("gemm_inner_distr_distr (unbuffered) called with DistrArrayFile (should never happen!)");
162 }
163 // const size_t spacing = 1;
164 using value_type = typename array::mapped_or_value_type_t<AL>;
165 auto mat = Matrix<value_type>({xx.size(), yy.size()});
166 if (xx.size() == 0 || yy.size() == 0)
167 return mat;
168 auto prof = molpro::Profiler::single()->push("gemm_inner_distr_distr (unbuffered)");
169 if (not xx.empty())
170 prof += mat.cols() * mat.rows() * xx.at(0).get().local_buffer()->size() * 2;
171 for (size_t j = 0; j < mat.cols(); ++j) {
172 auto loc_y = yy.at(j).get().local_buffer();
173 for (size_t i = 0; i < mat.rows(); ++i) {
174 auto loc_x = xx.at(i).get().local_buffer();
175 mat(i, j) = std::inner_product(begin(*loc_x), end(*loc_x), begin(*loc_y), (value_type)0);
176 // mat(i,j) += cblas_ddot(end(*loc_x) - begin(*loc_x), begin(*loc_x), spacing, begin(*loc_y), spacing);
177 }
178 }
179#ifdef HAVE_MPI_H
180 MPI_Allreduce(MPI_IN_PLACE, const_cast<value_type*>(mat.data().data()), mat.size(), MPI_DOUBLE, MPI_SUM,
181 xx.at(0).get().communicator());
182#endif
183 return mat;
184}
185
186template <class AL, class AR = AL>
187void gemm_outer_distr_distr(const Matrix<typename array::mapped_or_value_type_t<AL>> alphas, const CVecRef<AR>& xx,
188 const VecRef<AL>& yy) {
189 if (std::is_same<AL, DistrArrayFile>::value) {
190 throw std::runtime_error("gemm_outer_distr_distr (unbuffered) called with DistrArrayFile (should never happen!)");
191 }
192 auto prof = molpro::Profiler::single()->push("gemm_outer_distr_distr (unbuffered)");
193 if (not yy.empty())
194 prof += alphas.rows() * alphas.cols() * yy[0].get().local_buffer()->size() * 2;
195 for (size_t ii = 0; ii < alphas.rows(); ++ii) {
196 auto loc_x = xx.at(ii).get().local_buffer();
197 for (size_t jj = 0; jj < alphas.cols(); ++jj) {
198 auto loc_y = yy[jj].get().local_buffer();
199 for (size_t i = 0; i < loc_y->size(); ++i)
200 (*loc_y)[i] += alphas(ii, jj) * (*loc_x)[i];
201 }
202 }
203}
204
205// Sparse
206
207template <class AL, class AR = AL>
208void gemm_outer_distr_sparse(const Matrix<typename array::mapped_or_value_type_t<AL>> alphas, const CVecRef<AR>& xx,
209 const VecRef<AL>& yy) {
210 for (size_t ii = 0; ii < alphas.cols(); ++ii) {
211 auto loc_y = yy[ii].get().local_buffer();
212 for (size_t jj = 0; jj < alphas.rows(); ++jj) {
213 if (loc_y->size() > 0) {
214 size_t i;
216 for (auto it = xx.at(jj).get().lower_bound(loc_y->start());
217 it != xx.at(jj).get().upper_bound(loc_y->start() + loc_y->size() - 1); ++it) {
218 std::tie(i, v) = *it;
219 (*loc_y)[i - loc_y->start()] += alphas(jj, ii) * v;
220 }
221 }
222 }
223 }
224}
225
226template <class AL, class AR = AL>
228 const CVecRef<AR>& yy) {
229 using value_type = typename array::mapped_or_value_type_t<AL>;
230 auto mat = Matrix<value_type>({xx.size(), yy.size()});
231 if (xx.size() == 0 || yy.size() == 0)
232 return mat;
233 for (size_t i = 0; i < mat.rows(); ++i) {
234 auto loc_x = xx.at(i).get().local_buffer();
235 for (size_t j = 0; j < mat.cols(); ++j) {
236 mat(i, j) = 0;
237 if (loc_x->size() > 0) {
238 size_t k;
239 value_type v;
240 for (auto it = yy.at(j).get().lower_bound(loc_x->start());
241 it != yy.at(j).get().upper_bound(loc_x->start() + loc_x->size() - 1); ++it) {
242 std::tie(k, v) = *it;
243 mat(i, j) += (*loc_x)[k - loc_x->start()] * v;
244 }
245 }
246 }
247 }
248#ifdef HAVE_MPI_H
249 MPI_Allreduce(MPI_IN_PLACE, const_cast<value_type*>(mat.data().data()), mat.size(), MPI_DOUBLE, MPI_SUM,
250 xx.at(0).get().communicator());
251#endif
252 return mat;
253}
254
255// Handlers
256
257template <class Handler, class AL, class AR = AL>
258void gemm_outer_default(Handler& handler, const Matrix<typename Handler::value_type> alphas, const CVecRef<AR>& xx,
259 const VecRef<AL>& yy) {
260 for (size_t ii = 0; ii < alphas.rows(); ++ii) {
261 for (size_t jj = 0; jj < alphas.cols(); ++jj) {
262 handler.axpy(alphas(ii, jj), xx.at(ii).get(), yy[jj].get());
263 }
264 }
265}
266
267template <class Handler, class AL, class AR = AL>
268Matrix<typename Handler::value_type> gemm_inner_default(Handler& handler, const CVecRef<AL>& xx,
269 const CVecRef<AR>& yy) {
270 auto mat = Matrix<typename Handler::value_type>({xx.size(), yy.size()});
271 if (xx.size() == 0 || yy.size() == 0)
272 return mat;
273 for (size_t ii = 0; ii < mat.rows(); ++ii) {
274 for (size_t jj = 0; jj < mat.cols(); ++jj) {
275 mat(ii, jj) = handler.dot(xx.at(ii).get(), yy.at(jj).get());
276 }
277 }
278 return mat;
279}
280
281} // namespace molpro::linalg::array::util
282
283#endif // LINEARALGEBRA_SRC_MOLPRO_LINALG_ARRAY_UTIL_GEMM_H
BufferManager provides single-buffered or asynchronous double-buffered read access to the data in a c...
Definition: BufferManager.h:20
size_t buffer_offset() const
Definition: BufferManager.h:66
size_t buffer_size() const
Definition: BufferManager.h:56
size_t buffer_stride() const
Definition: BufferManager.h:61
Iterator end()
Definition: BufferManager.h:128
Iterator begin()
Definition: BufferManager.h:126
Matrix container that allows simple data access, slicing, copying and resizing without loosing data.
Definition: Matrix.h:28
void fill(T value)
Sets all elements of matrix to value.
Definition: Matrix.h:89
index_type rows() const
Definition: Matrix.h:165
index_type cols() const
Definition: Matrix.h:166
static std::shared_ptr< Profiler > single()
auto begin(Span< T > &x)
Definition: Span.h:84
auto end(Span< T > &x)
Definition: Span.h:94
Definition: ArrayHandler.h:23
void gemm_outer_distr_distr(const Matrix< typename array::mapped_or_value_type_t< AL > > alphas, const CVecRef< DistrArrayFile > &xx, const VecRef< AL > &yy)
Definition: gemm.h:59
Matrix< typename array::mapped_or_value_type_t< AL > > gemm_inner_distr_distr(const CVecRef< AL > &yy, const CVecRef< DistrArrayFile > &xx)
Definition: gemm.h:32
Matrix< typename array::mapped_or_value_type_t< AL > > gemm_inner_distr_sparse(const CVecRef< AL > &xx, const CVecRef< AR > &yy)
Definition: gemm.h:227
void gemm_distr_distr(array::mapped_or_value_type_t< AL > *alphadata, const CVecRef< DistrArrayFile > &xx, const VecRef< AL > &yy, gemm_type gemm_type)
Definition: gemm.h:77
void gemm_outer_distr_sparse(const Matrix< typename array::mapped_or_value_type_t< AL > > alphas, const CVecRef< AR > &xx, const VecRef< AL > &yy)
Definition: gemm.h:208
void gemm_outer_default(Handler &handler, const Matrix< typename Handler::value_type > alphas, const CVecRef< AR > &xx, const VecRef< AL > &yy)
Definition: gemm.h:258
gemm_type
Definition: gemm.h:27
@ outer
Definition: gemm.h:27
@ inner
Definition: gemm.h:27
Matrix< typename Handler::value_type > gemm_inner_default(Handler &handler, const CVecRef< AL > &xx, const CVecRef< AR > &yy)
Definition: gemm.h:268
typename mapped_or_value_type< A >::type mapped_or_value_type_t
Definition: type_traits.h:37
void transpose_copy(ML &&ml, const MR &mr)
Definition: Matrix.h:281
std::vector< std::reference_wrapper< const A > > CVecRef
Definition: wrap.h:14
auto const_cast_wrap(ForwardIt begin, ForwardIt end)
Takes a begin and end iterators and returns a vector of const-casted references to each element.
Definition: wrap.h:42
std::vector< std::reference_wrapper< A > > VecRef
Definition: wrap.h:11
const std::shared_ptr< const molpro::Options > options()
Get the Options object associated with iterative-solver.
Definition: options.cpp:4