26 #ifndef TILEDARRAY_MATH_PARTIAL_REDUCE_H__INCLUDED 27 #define TILEDARRAY_MATH_PARTIAL_REDUCE_H__INCLUDED 38 template <std::
size_t N>
47 template <
typename Left,
typename Right,
typename Result,
typename Op>
48 static TILEDARRAY_FORCE_INLINE
void 49 row_reduce(
const Left* MADNESS_RESTRICT
const left,
const std::size_t,
50 const Right* MADNESS_RESTRICT
const right, Result* MADNESS_RESTRICT
const result,
60 template <
typename Arg,
typename Result,
typename Op>
61 static TILEDARRAY_FORCE_INLINE
void 62 row_reduce(
const Arg* MADNESS_RESTRICT
const arg,
const std::size_t,
63 Result* MADNESS_RESTRICT
const result,
const Op& op)
72 template <
typename Left,
typename Right,
typename Result,
typename Op>
73 static TILEDARRAY_FORCE_INLINE
void 74 col_reduce(
const Left* MADNESS_RESTRICT
const left,
const std::size_t ,
75 const Right* MADNESS_RESTRICT
const right, Result* MADNESS_RESTRICT
const result,
79 const Right right_j = right[
offset];
85 for_each_block([right_j,&op] (Result& result_ij,
const Left left_i)
86 { op(result_ij, left_i, right_j); }, result, left_block);
90 template <
typename Arg,
typename Result,
typename Op>
91 static TILEDARRAY_FORCE_INLINE
void 92 col_reduce(
const Arg* MADNESS_RESTRICT
const arg,
const std::size_t ,
93 Result* MADNESS_RESTRICT
const result,
const Op& op)
105 template <std::
size_t N>
106 class PartialReduceUnwind :
public PartialReduceUnwind<N - 1> {
113 template <
typename Left,
typename Right,
typename Result,
typename Op>
114 static TILEDARRAY_FORCE_INLINE
void 115 row_reduce(
const Left* MADNESS_RESTRICT
const left,
const std::size_t stride,
116 const Right* MADNESS_RESTRICT
const right, Result* MADNESS_RESTRICT
const result,
131 template <
typename Arg,
typename Result,
typename Op>
132 static TILEDARRAY_FORCE_INLINE
void 133 row_reduce(
const Arg* MADNESS_RESTRICT
const arg,
const std::size_t stride,
134 Result* MADNESS_RESTRICT
const result,
const Op& op)
148 template <
typename Left,
typename Right,
typename Result,
typename Op>
149 static TILEDARRAY_FORCE_INLINE
void 150 col_reduce(
const Left* MADNESS_RESTRICT
const left,
const std::size_t stride,
151 const Right* MADNESS_RESTRICT
const right, Result* MADNESS_RESTRICT
const result,
156 const Right right_j = right[
offset];
162 for_each_block([right_j,&op] (Result& result_ij,
const Left left_i)
163 { op(result_ij, left_i, right_j); }, result, left_block);
169 template <
typename Arg,
typename Result,
typename Op>
170 static TILEDARRAY_FORCE_INLINE
void 171 col_reduce(
const Arg* MADNESS_RESTRICT
const arg,
const std::size_t stride,
172 Result* MADNESS_RESTRICT
const result,
const Op& op)
203 template <
typename Left,
typename Right,
typename Result,
typename Op>
205 const Left* MADNESS_RESTRICT
const left,
const Right* MADNESS_RESTRICT
const right,
206 Result* MADNESS_RESTRICT
const result,
const Op& op)
211 const std::size_t mx = m & index_mask::value;
212 const std::size_t nx = n & index_mask::value;
221 const Left* MADNESS_RESTRICT
const left_i = left + (i * n);
238 const Right right_j = right[j];
243 for_each_block([right_j,&op] (Result& result_ij,
const Left left_i)
244 { op(result_ij, left_i, right_j); }, result_block, left_block);
255 Result result_block = result[i];
257 result[i] = result_block;
273 template <
typename Arg,
typename Result,
typename Op>
275 const Arg* MADNESS_RESTRICT
const arg, Result* MADNESS_RESTRICT
const result,
const Op& op)
280 const std::size_t mx = m & index_mask::value;
281 const std::size_t nx = n & index_mask::value;
290 const Arg* MADNESS_RESTRICT
const arg_i = arg + (i * n);
316 Result result_block = result[i];
318 result[i] = result_block;
335 template <
typename Left,
typename Right,
typename Result,
typename Op>
337 const Left* MADNESS_RESTRICT
const left,
const Right* MADNESS_RESTRICT
const right,
338 Result* MADNESS_RESTRICT
const result,
const Op& op)
343 const std::size_t mx = m & index_mask::value;
344 const std::size_t nx = n & index_mask::value;
353 const Left* MADNESS_RESTRICT
const left_i = left + (i * n);
372 Result result_block = result[j];
377 reduce_block(op, result_block, left_block, right_block);
379 result[j] = result_block;
387 const Right right_i = right[i];
391 op(result_j, left_ij, right_i);
392 }, n, result, left + (i * n));
407 template <
typename Arg,
typename Result,
typename Op>
409 const Arg* MADNESS_RESTRICT
const arg, Result* MADNESS_RESTRICT
const result,
const Op& op)
414 const std::size_t mx = m & index_mask::value;
415 const std::size_t nx = n & index_mask::value;
420 const Arg* MADNESS_RESTRICT
const arg_i = arg + (i * n);
439 Result result_block = result[j];
446 result[j] = result_block;
462 #endif // TILEDARRAY_MATH_PARTIAL_REDUCE_H__INCLUDED PartialReduceUnwind< N - 1 > PartialReduceUnwindN1
void row_reduce(const std::size_t m, const std::size_t n, const Left *MADNESS_RESTRICT const left, const Right *MADNESS_RESTRICT const right, Result *MADNESS_RESTRICT const result, const Op &op)
Reduce the rows of a matrix.
static TILEDARRAY_FORCE_INLINE void col_reduce(const Left *MADNESS_RESTRICT const left, const std::size_t stride, const Right *MADNESS_RESTRICT const right, Result *MADNESS_RESTRICT const result, const Op &op)
TILEDARRAY_FORCE_INLINE void for_each_block(Op &&op, Result *const result, const Args *const ... args)
static TILEDARRAY_FORCE_INLINE void row_reduce(const Arg *MADNESS_RESTRICT const arg, const std::size_t, Result *MADNESS_RESTRICT const result, const Op &op)
TILEDARRAY_FORCE_INLINE void gather_block(Result *const result, const Arg *const arg, const std::size_t stride)
static TILEDARRAY_FORCE_INLINE void row_reduce(const Arg *MADNESS_RESTRICT const arg, const std::size_t stride, Result *MADNESS_RESTRICT const result, const Op &op)
static const std::size_t offset
static TILEDARRAY_FORCE_INLINE void row_reduce(const Left *MADNESS_RESTRICT const left, const std::size_t stride, const Right *MADNESS_RESTRICT const right, Result *MADNESS_RESTRICT const result, const Op &op)
static TILEDARRAY_FORCE_INLINE void col_reduce(const Arg *MADNESS_RESTRICT const arg, const std::size_t, Result *MADNESS_RESTRICT const result, const Op &op)
TILEDARRAY_FORCE_INLINE void copy_block(Result *const result, const Arg *const arg)
#define TILEDARRAY_LOOP_UNWIND
static TILEDARRAY_FORCE_INLINE void col_reduce(const Arg *MADNESS_RESTRICT const arg, const std::size_t stride, Result *MADNESS_RESTRICT const result, const Op &op)
static TILEDARRAY_FORCE_INLINE void col_reduce(const Left *MADNESS_RESTRICT const left, const std::size_t, const Right *MADNESS_RESTRICT const right, Result *MADNESS_RESTRICT const result, const Op &op)
void inplace_vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
static TILEDARRAY_FORCE_INLINE void row_reduce(const Left *MADNESS_RESTRICT const left, const std::size_t, const Right *MADNESS_RESTRICT const right, Result *MADNESS_RESTRICT const result, const Op &op)
PartialReduceUnwind< TILEDARRAY_LOOP_UNWIND - 1 > PartialReduceUnwindN
Partial reduce algorithm automatic loop unwinding.
void col_reduce(const std::size_t m, const std::size_t n, const Left *MADNESS_RESTRICT const left, const Right *MADNESS_RESTRICT const right, Result *MADNESS_RESTRICT const result, const Op &op)
Reduce the columns of a matrix.
void reduce_op_serial(Op &&op, const std::size_t n, Result &result, const Args *const ... args)
TILEDARRAY_FORCE_INLINE void reduce_block(Op &&op, Result &result, const Args *const ... args)