26 #ifndef TILEDARRAY_MATH_VECTOR_OP_H__INCLUDED 27 #define TILEDARRAY_MATH_VECTOR_OP_H__INCLUDED 31 #include <TiledArray/config.h> 33 #define TILEDARRAY_LOOP_UNWIND ::TiledArray::math::LoopUnwind::value 43 typedef std::integral_constant<std::size_t, TILEDARRAY_CACHELINE_SIZE /
sizeof(double)>
LoopUnwind;
57 template <
typename Op,
typename Result,
typename... Args>
58 static TILEDARRAY_FORCE_INLINE
void 59 for_each(Op&& op, Result* MADNESS_RESTRICT
const result,
const Args* MADNESS_RESTRICT
const ...args) {
63 template <
typename Op,
typename Result,
typename... Args>
64 static TILEDARRAY_FORCE_INLINE
void 65 for_each_ptr(Op&& op, Result* MADNESS_RESTRICT
const result,
const Args* MADNESS_RESTRICT
const ...args) {
69 template <
typename Op,
typename Result,
typename... Args>
70 static TILEDARRAY_FORCE_INLINE
void 71 reduce(Op&& op, Result& MADNESS_RESTRICT result,
const Args* MADNESS_RESTRICT
const ...args) {
72 op(result, args[
offset]...);
75 template <
typename Result,
typename Arg>
76 static TILEDARRAY_FORCE_INLINE
void 77 scatter(Result* MADNESS_RESTRICT
const result,
const Arg* MADNESS_RESTRICT
const arg,
83 template <
typename Result,
typename Arg>
84 static TILEDARRAY_FORCE_INLINE
void 85 gather(Result* MADNESS_RESTRICT
const result,
const Arg* MADNESS_RESTRICT
const arg,
97 template <std::
size_t N>
98 struct VectorOpUnwind :
public VectorOpUnwind<N - 1ul> {
105 template <
typename Op,
typename Result,
typename... Args>
106 static TILEDARRAY_FORCE_INLINE
void 107 for_each(Op&& op, Result* MADNESS_RESTRICT
const result,
const Args* MADNESS_RESTRICT
const ...args) {
112 template <
typename Op,
typename Result,
typename... Args>
113 static TILEDARRAY_FORCE_INLINE
void 114 for_each_ptr(Op&& op, Result* MADNESS_RESTRICT
const result,
const Args* MADNESS_RESTRICT
const ...args) {
119 template <
typename Op,
typename Result,
typename... Args>
120 static TILEDARRAY_FORCE_INLINE
void 121 reduce(Op&& op, Result& MADNESS_RESTRICT result,
const Args* MADNESS_RESTRICT
const ...args) {
122 op(result, args[
offset]...);
126 template <
typename Result,
typename Arg>
127 static TILEDARRAY_FORCE_INLINE
void 128 scatter(Result* MADNESS_RESTRICT
const result,
const Arg* MADNESS_RESTRICT
const arg,
129 const std::size_t result_stride)
135 template <
typename Result,
typename Arg>
136 static TILEDARRAY_FORCE_INLINE
void 137 gather(Result* MADNESS_RESTRICT
const result,
const Arg* MADNESS_RESTRICT
const arg,
138 std::size_t arg_stride)
150 template <
typename Op,
typename Result,
typename... Args>
151 TILEDARRAY_FORCE_INLINE
void 153 const Args*
const... args)
158 template <
typename Op,
typename Result,
typename... Args>
159 TILEDARRAY_FORCE_INLINE
void 164 template <
typename Op,
typename Result,
typename... Args>
165 TILEDARRAY_FORCE_INLINE
void 167 const Args* MADNESS_RESTRICT
const... args)
169 for(std::size_t i = 0ul; i < n; ++i)
170 op(result[i], args[i]...);
173 template <
typename Op,
typename Result,
typename... Args>
174 TILEDARRAY_FORCE_INLINE
typename std::enable_if<(
sizeof...(Args) >= 0)>::type
176 const Args*
const... args)
182 template <
typename Op,
typename Result,
typename... Args>
183 TILEDARRAY_FORCE_INLINE
typename std::enable_if<(
sizeof...(Args) > 0)>::type
188 template <
typename Op,
typename Result,
typename... Args>
189 TILEDARRAY_FORCE_INLINE
void 191 const Args* MADNESS_RESTRICT
const... args)
193 for(std::size_t i = 0ul; i < n; ++i)
194 op(result + i, args[i]...);
197 template <
typename Op,
typename Result,
typename... Args>
198 TILEDARRAY_FORCE_INLINE
199 void reduce_block(Op&& op, Result& result,
const Args*
const... args) {
203 template <
typename Op,
typename Result,
typename... Args>
204 TILEDARRAY_FORCE_INLINE
209 template <
typename Op,
typename Result,
typename... Args>
210 TILEDARRAY_FORCE_INLINE
211 void reduce_block_n(Op&& op,
const std::size_t n, Result& MADNESS_RESTRICT result,
212 const Args* MADNESS_RESTRICT
const... args)
214 for(std::size_t i = 0ul; i < n; ++i)
215 op(result, args[i]...);
218 template <
typename Result,
typename Arg>
219 TILEDARRAY_FORCE_INLINE
void 221 for_each_block([] (Result& lhs, param_type<Arg> rhs) { lhs = rhs; },
225 template <
typename Arg,
typename Result>
226 TILEDARRAY_FORCE_INLINE
void 227 copy_block_n(std::size_t n, Result*
const result,
const Arg*
const arg) {
232 template <
typename Arg,
typename Result>
233 TILEDARRAY_FORCE_INLINE
void 234 scatter_block(Result*
const result,
const std::size_t stride,
const Arg*
const arg) {
239 template <
typename Result,
typename Arg>
240 TILEDARRAY_FORCE_INLINE
void 242 const std::size_t stride,
const Arg*
const arg)
244 for(std::size_t i = 0; i < n; ++i, result += stride)
248 template <
typename Result,
typename Arg>
249 TILEDARRAY_FORCE_INLINE
void 250 gather_block(Result*
const result,
const Arg*
const arg,
const std::size_t stride) {
254 template <
typename Arg,
typename Result>
255 TILEDARRAY_FORCE_INLINE
void 257 const std::size_t stride)
259 for(std::size_t i = 0; i < n; ++i, arg += stride)
263 template <
typename T>
285 const T*
data()
const {
return block_; }
289 #ifdef HAVE_INTEL_TBB 297 static constexpr std::size_t GRAIN_SIZE = 8ul;
302 SizeTRange(
const size_t start,
const size_t end)
303 : lower(start), upper(end) { }
308 SizeTRange() =
default;
309 SizeTRange(
const SizeTRange &r) =
default;
317 bool empty()
const {
return lower > upper; }
319 bool is_divisible()
const {
return size() >= 2 * GRAIN_SIZE; }
321 size_t begin()
const {
return lower; }
323 size_t end()
const {
return upper; }
325 size_t size()
const {
return upper - lower; }
327 SizeTRange(SizeTRange &r, tbb::split) {
328 size_t nblock = (r.upper - r.lower) / block_size;
329 nblock = (nblock + 1) / 2;
330 lower = r.lower + nblock * block_size;
341 template <
typename Op,
typename Result,
typename... Args,
342 typename std::enable_if<std::is_void<
typename std::result_of<Op(Result&,
343 Args...)>::type>::value>::type* =
nullptr>
345 const Args*
const... args)
356 result_block.store(result + i);
362 #ifdef HAVE_INTEL_TBB 364 template<
typename Op,
typename Result,
typename... Args>
365 class ApplyInplaceVectorOp{
368 ApplyInplaceVectorOp(Op& op, Result*
const result,
const Args*
const... args)
369 :op_(op), result_(result), args_(args...){}
371 ~ApplyInplaceVectorOp(){}
373 template<std::size_t... Is>
374 void helper(SizeTRange& range,
const std::index_sequence<Is...>& )
const {
375 std::size_t offset = range.begin();
376 std::size_t n_range = range.size();
380 void operator()(SizeTRange& range)
const {
381 helper(range, std::make_index_sequence<
sizeof...(Args)>());
387 Result*
const result_;
388 std::tuple<
const Args *
const ...> args_;
394 template <
typename Op,
typename Result,
typename... Args,
395 typename std::enable_if<std::is_void<
typename std::result_of<Op(Result&,
396 Args...)>::type>::value>::type* =
nullptr>
398 const Args*
const... args)
400 #ifdef HAVE_INTEL_TBB 402 SizeTRange range(0, n);
411 auto apply_inplace_vector_op = ApplyInplaceVectorOp<Op, Result, Args...>(op, result, args...);
413 tbb::parallel_for(range, apply_inplace_vector_op, tbb::auto_partitioner());
419 template <
typename Op,
typename Result,
typename... Args,
420 typename std::enable_if<! std::is_void<
typename std::result_of<Op(
421 Args...)>::type>::value>::type* =
nullptr>
423 const Args*
const... args)
425 auto wrapper_op = [&op] (Result& res, param_type<Args>... a)
437 result_block.store(result + i);
443 #ifdef HAVE_INTEL_TBB 445 template<
typename Op,
typename Result,
typename... Args>
449 ApplyVectorOp(Op& op, Result*
const result,
const Args*
const... args)
450 :op_(op), result_(result), args_(args...){}
454 template<std::size_t... Is>
455 void helper(SizeTRange& range,
const std::index_sequence<Is...>& )
const {
456 std::size_t offset = range.begin();
457 std::size_t n_range = range.size();
458 vector_op_serial(op_, n_range, result_+offset, (std::get<Is>(args_)+offset)...);
461 void operator()(SizeTRange& range)
const {
462 helper(range, std::make_index_sequence<
sizeof...(Args)>());
468 Result*
const result_;
469 std::tuple<
const Args *
const ...> args_;
475 template <
typename Op,
typename Result,
typename... Args,
476 typename std::enable_if<! std::is_void<
typename std::result_of<Op(
477 Args...)>::type>::value>::type* =
nullptr>
478 void vector_op(Op&& op,
const std::size_t n, Result*
const result,
479 const Args*
const... args)
481 #ifdef HAVE_INTEL_TBB 483 SizeTRange range(0, n);
492 auto apply_vector_op = ApplyVectorOp<Op,Result,Args...>(op, result, args...);
494 tbb::parallel_for(range, apply_vector_op, tbb::auto_partitioner());
500 template <
typename Op,
typename Result,
typename... Args>
502 const Args*
const... args)
515 #ifdef HAVE_INTEL_TBB 516 template<
typename Op,
typename Result,
typename... Args>
517 class ApplyVectorPtrOp{
520 ApplyVectorPtrOp(Op& op, Result*
const result,
const Args*
const... args)
521 :op_(op), result_(result), args_(args...){}
523 ~ApplyVectorPtrOp(){}
525 template<std::size_t... Is>
526 void helper(SizeTRange& range,
const std::index_sequence<Is...>& )
const {
527 std::size_t offset = range.begin();
528 std::size_t n_range = range.size();
532 void operator()(SizeTRange& range)
const {
533 helper(range, std::make_index_sequence<
sizeof...(Args)>());
539 Result*
const result_;
540 std::tuple<
const Args *
const ...> args_;
545 template <
typename Op,
typename Result,
typename... Args>
547 const Args*
const... args){
548 #ifdef HAVE_INTEL_TBB 550 SizeTRange range(0, n);
559 auto apply_vector_ptr_op = ApplyVectorPtrOp<Op,Result,Args...>(op, result, args...);
560 tbb::parallel_for(range, apply_vector_ptr_op,tbb::auto_partitioner());
566 template <
typename Op,
typename Result,
typename... Args>
568 const Args*
const... args)
577 Result temp = result;
585 #ifdef HAVE_INTEL_TBB 586 template<
typename ReduceOp,
typename JoinOp,
typename Result,
typename... Args>
592 ApplyReduceOp(ReduceOp&
reduce_op, JoinOp& join_op,
const Result&
identity,
const Result& result,
const Args*
const... args)
599 ApplyReduceOp(ApplyReduceOp& rhs, tbb::split) :
600 reduce_op_(rhs.reduce_op_),
601 join_op_(rhs.join_op_),
602 identity_(rhs.identity_),
603 result_(rhs.identity_),
609 template<std::size_t... Is>
610 void helper(SizeTRange& range,
const std::index_sequence<Is...>& ) {
611 std::size_t offset = range.begin();
612 std::size_t n_range = range.size();
613 reduce_op_serial(reduce_op_, n_range, result_, (std::get<Is>(args_)+offset)...);
616 void operator()(SizeTRange& range) {
617 helper(range, std::make_index_sequence<
sizeof...(Args)>());
620 void join(
const ApplyReduceOp& rhs){
621 join_op_(result_, rhs.result_);
624 const Result result()
const{
630 ReduceOp& reduce_op_;
632 const Result identity_;
634 std::tuple<
const Args *
const ...> args_;
639 template <
typename ReduceOp,
typename JoinOp,
typename Result,
typename... Args>
641 const Args*
const... args)
644 #ifdef HAVE_INTEL_TBB 645 SizeTRange range(0, n);
647 auto apply_reduce_op = ApplyReduceOp<ReduceOp,JoinOp,Result,Args...>(
reduce_op, join_op,
identity, result, args...);
649 tbb::parallel_reduce(range, apply_reduce_op, tbb::auto_partitioner());
651 result = apply_reduce_op.result();
657 template <
typename Arg,
typename Result>
658 typename std::enable_if<! (std::is_same<Arg, Result>::value && std::is_scalar<Arg>::value)>::type
660 Result*
const result)
673 template <
typename T>
674 inline typename std::enable_if<std::is_scalar<T>::value>::type
675 copy_vector(
const std::size_t n,
const T*
const arg, T*
const result) {
676 std::memcpy(result, arg, n *
sizeof(T));
679 template <
typename Arg,
typename Result>
680 void fill_vector(
const std::size_t n,
const Arg& arg, Result*
const result) {
681 auto fill_op = [arg] (Result& res) { res = arg; };
686 template <
typename Arg,
typename Result>
687 typename std::enable_if<! (std::is_scalar<Arg>::value && std::is_scalar<Result>::value)>::type
689 Result*
const result)
691 auto op = [] (Result*
const res, param_type<Arg> a) {
new(res) Result(a); };
695 template <
typename Arg,
typename Result>
696 inline typename std::enable_if<std::is_scalar<Arg>::value && std::is_scalar<Result>::value>::type
701 template <
typename Arg,
typename Result>
702 typename std::enable_if<! (std::is_scalar<Arg>::value && std::is_scalar<Result>::value)>::type
704 Result*
const result)
706 auto op = [arg] (Result*
const res) {
new(res) Result(arg); };
711 template <
typename Arg,
typename Result>
712 inline typename std::enable_if<std::is_scalar<Arg>::value && std::is_scalar<Result>::value>::type
714 Result*
const result)
718 template <
typename Arg>
719 typename std::enable_if<! std::is_scalar<Arg>::value>::type
721 auto op = [] (Arg*
const a) { a->~Arg(); };
725 template <
typename Arg>
726 inline typename std::enable_if<std::is_scalar<Arg>::value>::type
730 template <
typename Arg,
typename Result,
typename Op>
731 typename std::enable_if<! (std::is_scalar<Arg>::value && std::is_scalar<Result>::value)>::type
733 Result*
const result, Op&& op)
736 [&op] (Result*
const res, param_type<Arg> a) {
new(res) Result(op(a)); };
740 template <
typename Arg,
typename Result,
typename Op>
741 inline typename std::enable_if<std::is_scalar<Arg>::value && std::is_scalar<Result>::value>::type
743 Result*
const result, Op&& op)
749 template <
typename Left,
typename Right,
typename Result,
typename Op>
750 typename std::enable_if<! (std::is_scalar<Left>::value &&
751 std::is_scalar<Right>::value && std::is_scalar<Result>::value)>::type
753 const Right*
const right, Result*
const result, Op&& op)
755 auto wrapper_op = [&op] (Result*
const res, param_type<Left> l,
756 param_type<Right> r) {
new(res) Result(op(l, r)); };
761 template <
typename Left,
typename Right,
typename Result,
typename Op>
762 typename std::enable_if<std::is_scalar<Left>::value &&
763 std::is_scalar<Right>::value && std::is_scalar<Result>::value>::type
765 const Right*
const right, Result*
const result, Op&& op)
773 #endif // TILEDARRAY_MATH_VECTOR_OP_H__INCLUDED
void inplace_vector_op_serial(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
void scatter_to(T *const data, std::size_t stride) const
std::enable_if<!(std::is_same< Arg, Result >::value &&std::is_scalar< Arg >::value)>::type copy_vector(const std::size_t n, const Arg *const arg, Result *const result)
TILEDARRAY_FORCE_INLINE void scatter_block(Result *const result, const std::size_t stride, const Arg *const arg)
void store(T *const data) const
TILEDARRAY_FORCE_INLINE void for_each_block(Op &&op, Result *const result, const Args *const ... args)
std::enable_if<!(std::is_scalar< Left >::value &&std::is_scalar< Right >::value &&std::is_scalar< Result >::value)>::type uninitialized_binary_vector_op(const std::size_t n, const Left *const left, const Right *const right, Result *const result, Op &&op)
TILEDARRAY_FORCE_INLINE void gather_block(Result *const result, const Arg *const arg, const std::size_t stride)
static TILEDARRAY_FORCE_INLINE void gather(Result *MADNESS_RESTRICT const result, const Arg *MADNESS_RESTRICT const arg, std::size_t)
std::enable_if<!(std::is_scalar< Arg >::value &&std::is_scalar< Result >::value)>::type uninitialized_copy_vector(const std::size_t n, const Arg *const arg, Result *const result)
VectorOpUnwind< TILEDARRAY_LOOP_UNWIND - 1 > VecOpUnwindN
void vector_op_serial(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
static TILEDARRAY_FORCE_INLINE void for_each_ptr(Op &&op, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ...args)
std::integral_constant< std::size_t, ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul)> index_mask
static TILEDARRAY_FORCE_INLINE void reduce(Op &&op, Result &MADNESS_RESTRICT result, const Args *MADNESS_RESTRICT const ...args)
static TILEDARRAY_FORCE_INLINE void gather(Result *MADNESS_RESTRICT const result, const Arg *MADNESS_RESTRICT const arg, std::size_t arg_stride)
static TILEDARRAY_FORCE_INLINE void reduce(Op &&op, Result &MADNESS_RESTRICT result, const Args *MADNESS_RESTRICT const ...args)
Block< T > & gather(const T *const data, const std::size_t stride)
TILEDARRAY_FORCE_INLINE void copy_block(Result *const result, const Arg *const arg)
void fill_vector(const std::size_t n, const Arg &arg, Result *const result)
size_t size(const DistArray< Tile, Policy > &a)
std::enable_if<! std::is_scalar< Arg >::value >::type destroy_vector(const std::size_t n, Arg *const arg)
static TILEDARRAY_FORCE_INLINE void for_each(Op &&op, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ...args)
TILEDARRAY_FORCE_INLINE void for_each_block_n(Op &&op, const std::size_t n, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ... args)
void vector_ptr_op_serial(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
#define TILEDARRAY_LOOP_UNWIND
TILEDARRAY_FORCE_INLINE void for_each_block_ptr_n(Op &&op, const std::size_t n, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ... args)
static TILEDARRAY_FORCE_INLINE void for_each(Op &&op, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ...args)
typename param< U >::type param_type
void vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Vector loop unwind helper class.
void load(const T *const data)
T identity()
identity for group of objects of type T
TILEDARRAY_FORCE_INLINE void scatter_block_n(const std::size_t n, Result *result, const std::size_t stride, const Arg *const arg)
TILEDARRAY_FORCE_INLINE void gather_block_n(const std::size_t n, Result *const result, const Arg *const arg, const std::size_t stride)
void reduce_op(ReduceOp &&reduce_op, JoinOp &&join_op, const Result &identity, const std::size_t n, Result &result, const Args *const ... args)
void inplace_vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
VectorOpUnwind< N - 1ul > VectorOpUnwindN1
bool empty(const Tile< Arg > &arg)
Check that arg is empty (no data)
static constexpr std::size_t offset
std::integral_constant< std::size_t, TILEDARRAY_CACHELINE_SIZE/sizeof(double)> LoopUnwind
TILEDARRAY_FORCE_INLINE std::enable_if<(sizeof...(Args) >=0)>::type for_each_block_ptr(Op &&op, Result *const result, const Args *const ... args)
TILEDARRAY_FORCE_INLINE void copy_block_n(std::size_t n, Result *const result, const Arg *const arg)
void vector_ptr_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
std::enable_if<!(std::is_scalar< Arg >::value &&std::is_scalar< Result >::value)>::type uninitialized_unary_vector_op(const std::size_t n, const Arg *const arg, Result *const result, Op &&op)
static TILEDARRAY_FORCE_INLINE void for_each_ptr(Op &&op, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ...args)
void reduce_op_serial(Op &&op, const std::size_t n, Result &result, const Args *const ... args)
TILEDARRAY_FORCE_INLINE void reduce_block(Op &&op, Result &result, const Args *const ... args)
std::enable_if<!(std::is_scalar< Arg >::value &&std::is_scalar< Result >::value)>::type uninitialized_fill_vector(const std::size_t n, const Arg &arg, Result *const result)
TILEDARRAY_FORCE_INLINE void reduce_block_n(Op &&op, const std::size_t n, Result &MADNESS_RESTRICT result, const Args *MADNESS_RESTRICT const ... args)
static TILEDARRAY_FORCE_INLINE void scatter(Result *MADNESS_RESTRICT const result, const Arg *MADNESS_RESTRICT const arg, const std::size_t result_stride)
static TILEDARRAY_FORCE_INLINE void scatter(Result *MADNESS_RESTRICT const result, const Arg *MADNESS_RESTRICT const arg, const std::size_t)
Block(const T *const data)