26 #ifndef TILEDARRAY_MATH_OUTER_H__INCLUDED 27 #define TILEDARRAY_MATH_OUTER_H__INCLUDED 38 template <std::
size_t N>
47 template <
typename X,
typename Y,
typename Result,
typename Op>
48 static TILEDARRAY_FORCE_INLINE
void 49 outer(
const X*
const x_block,
const Y*
const y_block,
50 Result*
const result,
const std::size_t ,
const Op& op)
55 const X x = x_block[
offset];
57 result_block, y_block);
62 template <
typename X,
typename Y,
typename Init,
typename Result,
typename Op>
63 static TILEDARRAY_FORCE_INLINE
void 64 fill(
const X*
const x_block,
const Y*
const y_block,
65 const Init*
const init, Result*
const result,
66 const std::size_t ,
const Op& op)
71 const X x = x_block[
offset];
78 template <
typename X,
typename Y,
typename Result,
typename Op>
79 static TILEDARRAY_FORCE_INLINE
void 80 fill(
const X*
const x_block,
const Y*
const y_block,
81 Result*
const result,
const std::size_t ,
const Op& op)
85 const X x = x_block[
offset];
87 for_each_block([x,&op] (Result& res,
const Y y) { res = op(x, y); },
88 result_block, y_block);
95 template <std::
size_t N>
96 class OuterVectorOpUnwind :
public OuterVectorOpUnwind<N - 1> {
103 template <
typename X,
typename Y,
typename Result,
typename Op>
104 static TILEDARRAY_FORCE_INLINE
void 105 outer(
const X*
const x_block,
const Y*
const y_block,
106 Result*
const result,
const std::size_t stride,
const Op& op)
112 const X x = x_block[
offset];
114 result_block, y_block);
123 template <
typename X,
typename Y,
typename Init,
typename Result,
typename Op>
124 static TILEDARRAY_FORCE_INLINE
void 125 fill(
const X*
const x_block,
const Y*
const y_block,
126 const Init*
const init, Result*
const result,
127 const std::size_t stride,
const Op& op)
133 const X x = x_block[
offset];
135 init_block, y_block);
143 template <
typename X,
typename Y,
typename Result,
typename Op>
144 static TILEDARRAY_FORCE_INLINE
void 145 fill(
const X*
const x_block,
const Y*
const y_block,
146 Result* MADNESS_RESTRICT
const result,
const std::size_t stride,
const Op& op)
151 const X x = x_block[
offset];
153 for_each_block([x,&op] (Result& res,
const Y y) { res = op(x, y); },
154 result_block, y_block);
179 template <
typename X,
typename Y,
typename A,
typename Op>
181 const X*
const x,
const Y*
const y, A* a,
const Op& op)
186 const std::size_t mx = m & index_mask::value;
187 const std::size_t nx = n & index_mask::value;
216 const auto bind_first_op = [y_j,&op] (A& a_ij,
const X x_i) { a_ij = op(x_i, y_j); };
224 for(; i < m; ++i, a += n) {
228 {
return op(x_i, y_j); }, n, a, y);
247 template <
typename X,
typename Y,
typename A,
typename Op>
248 void outer(
const std::size_t m,
const std::size_t n,
249 const X*
const x,
const Y*
const y, A* a,
const Op& op)
254 const std::size_t mx = m & index_mask::value;
255 const std::size_t nx = n & index_mask::value;
279 A*
const a_ij = a + j;
288 {
return op(a_ij, x_i, y_j); },
297 for(; i < m; ++i, a += n) {
300 {
return op(a_ij, x_i, y_j); },
328 template <
typename X,
typename Y,
typename A,
typename B,
typename Op>
330 const X* MADNESS_RESTRICT
const x,
const Y* MADNESS_RESTRICT
const y,
331 const A* MADNESS_RESTRICT a, B* MADNESS_RESTRICT b,
const Op& op)
336 const std::size_t mx = m & index_mask::value;
337 const std::size_t nx = n & index_mask::value;
369 {
return op(a_ij, x_i, y_j); },
377 for(; i < m; ++i, a += n, b += n) {
396 {
return op(a_ij, x_i, y_j); },
417 #endif // TILEDARRAY_MATH_OUTER_H__INCLUDED
TILEDARRAY_FORCE_INLINE void scatter_block(Result *const result, const std::size_t stride, const Arg *const arg)
TILEDARRAY_FORCE_INLINE void for_each_block(Op &&op, Result *const result, const Args *const ... args)
TILEDARRAY_FORCE_INLINE void gather_block(Result *const result, const Arg *const arg, const std::size_t stride)
void outer(const std::size_t m, const std::size_t n, const X *const x, const Y *const y, A *a, const Op &op)
Compute the outer of x and y to modify a.
TILEDARRAY_FORCE_INLINE void copy_block(Result *const result, const Arg *const arg)
#define TILEDARRAY_LOOP_UNWIND
void outer_fill(const std::size_t m, const std::size_t n, const X *const x, const Y *const y, A *a, const Op &op)
Compute and store outer of x and y in a.
void vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
static TILEDARRAY_FORCE_INLINE void fill(const X *const x_block, const Y *const y_block, const Init *const init, Result *const result, const std::size_t stride, const Op &op)
Outer algorithm automatic loop unwinding.
void inplace_vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
static TILEDARRAY_FORCE_INLINE void fill(const X *const x_block, const Y *const y_block, Result *const result, const std::size_t, const Op &op)
static TILEDARRAY_FORCE_INLINE void outer(const X *const x_block, const Y *const y_block, Result *const result, const std::size_t stride, const Op &op)
OuterVectorOpUnwind< TILEDARRAY_LOOP_UNWIND - 1 > OuterVectorOpUnwindN
static TILEDARRAY_FORCE_INLINE void fill(const X *const x_block, const Y *const y_block, Result *MADNESS_RESTRICT const result, const std::size_t stride, const Op &op)
static TILEDARRAY_FORCE_INLINE void fill(const X *const x_block, const Y *const y_block, const Init *const init, Result *const result, const std::size_t, const Op &op)
static TILEDARRAY_FORCE_INLINE void outer(const X *const x_block, const Y *const y_block, Result *const result, const std::size_t, const Op &op)
OuterVectorOpUnwind< N - 1 > OuterVectorOpUnwindN1
static const std::size_t offset