TiledArray  0.7.0
vector_op.h
Go to the documentation of this file.
1 /*
2  * This file is a part of TiledArray.
3  * Copyright (C) 2013 Virginia Tech
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * justus
19  * Department of Chemistry, Virginia Tech
20  *
21  * vector_op.h
22  * Nov 17, 2013
23  *
24  */
25 
26 #ifndef TILEDARRAY_MATH_VECTOR_OP_H__INCLUDED
27 #define TILEDARRAY_MATH_VECTOR_OP_H__INCLUDED
28 
29 #include <TiledArray/type_traits.h>
30 #include <TiledArray/madness.h>
31 #include <TiledArray/config.h>
32 
33 #define TILEDARRAY_LOOP_UNWIND ::TiledArray::math::LoopUnwind::value
34 
35 
36 namespace TiledArray {
37  namespace math {
38 
39  // Import param_type into
41 
42  // Define compile time constant for loop unwinding.
43  typedef std::integral_constant<std::size_t, TILEDARRAY_CACHELINE_SIZE / sizeof(double)> LoopUnwind;
44  typedef std::integral_constant<std::size_t, ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul)> index_mask;
45 
46  template <std::size_t> struct VectorOpUnwind;
47 
49 
52  template <>
53  struct VectorOpUnwind<0ul> {
54 
55  static constexpr std::size_t offset = TILEDARRAY_LOOP_UNWIND - 1ul;
56 
57  template <typename Op, typename Result, typename... Args>
58  static TILEDARRAY_FORCE_INLINE void
59  for_each(Op&& op, Result* MADNESS_RESTRICT const result, const Args* MADNESS_RESTRICT const ...args) {
60  op(result[offset], args[offset]...);
61  }
62 
63  template <typename Op, typename Result, typename... Args>
64  static TILEDARRAY_FORCE_INLINE void
65  for_each_ptr(Op&& op, Result* MADNESS_RESTRICT const result, const Args* MADNESS_RESTRICT const ...args) {
66  op(result + offset, args[offset]...);
67  }
68 
69  template <typename Op, typename Result, typename... Args>
70  static TILEDARRAY_FORCE_INLINE void
71  reduce(Op&& op, Result& MADNESS_RESTRICT result, const Args* MADNESS_RESTRICT const ...args) {
72  op(result, args[offset]...);
73  }
74 
75  template <typename Result, typename Arg>
76  static TILEDARRAY_FORCE_INLINE void
77  scatter(Result* MADNESS_RESTRICT const result, const Arg* MADNESS_RESTRICT const arg,
78  const std::size_t /*result_stride*/)
79  {
80  *result = arg[offset];
81  }
82 
83  template <typename Result, typename Arg>
84  static TILEDARRAY_FORCE_INLINE void
85  gather(Result* MADNESS_RESTRICT const result, const Arg* MADNESS_RESTRICT const arg,
86  std::size_t /*arg_stride*/)
87  {
88  result[offset] = *arg;
89  }
90 
91  }; // struct VectorOpUnwind
92 
94 
97  template <std::size_t N>
98  struct VectorOpUnwind : public VectorOpUnwind<N - 1ul> {
99 
101 
102  static constexpr std::size_t offset = TILEDARRAY_LOOP_UNWIND - N - 1ul;
103 
104 
105  template <typename Op, typename Result, typename... Args>
106  static TILEDARRAY_FORCE_INLINE void
107  for_each(Op&& op, Result* MADNESS_RESTRICT const result, const Args* MADNESS_RESTRICT const ...args) {
108  op(result[offset], args[offset]...);
109  VectorOpUnwindN1::for_each(op, result, args...);
110  }
111 
112  template <typename Op, typename Result, typename... Args>
113  static TILEDARRAY_FORCE_INLINE void
114  for_each_ptr(Op&& op, Result* MADNESS_RESTRICT const result, const Args* MADNESS_RESTRICT const ...args) {
115  op(result + offset, args[offset]...);
116  VectorOpUnwindN1::for_each_ptr(op, result, args...);
117  }
118 
119  template <typename Op, typename Result, typename... Args>
120  static TILEDARRAY_FORCE_INLINE void
121  reduce(Op&& op, Result& MADNESS_RESTRICT result, const Args* MADNESS_RESTRICT const ...args) {
122  op(result, args[offset]...);
123  VectorOpUnwindN1::reduce(op, result, args...);
124  }
125 
126  template <typename Result, typename Arg>
127  static TILEDARRAY_FORCE_INLINE void
128  scatter(Result* MADNESS_RESTRICT const result, const Arg* MADNESS_RESTRICT const arg,
129  const std::size_t result_stride)
130  {
131  *result = arg[offset];
132  VectorOpUnwindN1::scatter(result + result_stride, arg, result_stride);
133  }
134 
135  template <typename Result, typename Arg>
136  static TILEDARRAY_FORCE_INLINE void
137  gather(Result* MADNESS_RESTRICT const result, const Arg* MADNESS_RESTRICT const arg,
138  std::size_t arg_stride)
139  {
140  result[offset] = *arg;
141  VectorOpUnwindN1::gather(result, arg + arg_stride, arg_stride);
142  }
143 
144  }; // struct VectorOpUnwind
145 
146 
148  template <typename> class Block;
149 
150  template <typename Op, typename Result, typename... Args>
151  TILEDARRAY_FORCE_INLINE void
152  for_each_block(Op&& op, Result* const result,
153  const Args* const... args)
154  {
155  VecOpUnwindN::for_each(op, result, args...);
156  }
157 
158  template <typename Op, typename Result, typename... Args>
159  TILEDARRAY_FORCE_INLINE void
160  for_each_block(Op&& op, Block<Result>& result, Block<Args>&&... args) {
161  VecOpUnwindN::for_each(op, result.data(), args.data()...);
162  }
163 
164  template <typename Op, typename Result, typename... Args>
165  TILEDARRAY_FORCE_INLINE void
166  for_each_block_n(Op&& op, const std::size_t n, Result* MADNESS_RESTRICT const result,
167  const Args* MADNESS_RESTRICT const... args)
168  {
169  for(std::size_t i = 0ul; i < n; ++i)
170  op(result[i], args[i]...);
171  }
172 
173  template <typename Op, typename Result, typename... Args>
174  TILEDARRAY_FORCE_INLINE typename std::enable_if<(sizeof...(Args) >= 0)>::type
175  for_each_block_ptr(Op&& op, Result* const result,
176  const Args* const... args)
177  {
178  VecOpUnwindN::for_each_ptr(op, result, args...);
179  }
180 
181 
182  template <typename Op, typename Result, typename... Args>
183  TILEDARRAY_FORCE_INLINE typename std::enable_if<(sizeof...(Args) > 0)>::type
184  for_each_block_ptr(Op&& op, Result* const result, Block<Args>&&... args) {
185  VecOpUnwindN::for_each_ptr(op, result, args.data()...);
186  }
187 
188  template <typename Op, typename Result, typename... Args>
189  TILEDARRAY_FORCE_INLINE void
190  for_each_block_ptr_n(Op&& op, const std::size_t n, Result* MADNESS_RESTRICT const result,
191  const Args* MADNESS_RESTRICT const... args)
192  {
193  for(std::size_t i = 0ul; i < n; ++i)
194  op(result + i, args[i]...);
195  }
196 
197  template <typename Op, typename Result, typename... Args>
198  TILEDARRAY_FORCE_INLINE
199  void reduce_block(Op&& op, Result& result, const Args* const... args) {
200  VecOpUnwindN::reduce(op, result, args...);
201  }
202 
203  template <typename Op, typename Result, typename... Args>
204  TILEDARRAY_FORCE_INLINE
205  void reduce_block(Op&& op, Result& result, Block<Args>&&... args) {
206  VecOpUnwindN::reduce(op, result, args.data()...);
207  }
208 
209  template <typename Op, typename Result, typename... Args>
210  TILEDARRAY_FORCE_INLINE
211  void reduce_block_n(Op&& op, const std::size_t n, Result& MADNESS_RESTRICT result,
212  const Args* MADNESS_RESTRICT const... args)
213  {
214  for(std::size_t i = 0ul; i < n; ++i)
215  op(result, args[i]...);
216  }
217 
218  template <typename Result, typename Arg>
219  TILEDARRAY_FORCE_INLINE void
220  copy_block(Result* const result, const Arg* const arg) {
221  for_each_block([] (Result& lhs, param_type<Arg> rhs) { lhs = rhs; },
222  result, arg);
223  }
224 
225  template <typename Arg, typename Result>
226  TILEDARRAY_FORCE_INLINE void
227  copy_block_n(std::size_t n, Result* const result, const Arg* const arg) {
228  for_each_block_n([] (Result& lhs, param_type<Arg> rhs) { lhs = rhs; },
229  n, result, arg);
230  }
231 
232  template <typename Arg, typename Result>
233  TILEDARRAY_FORCE_INLINE void
234  scatter_block(Result* const result, const std::size_t stride, const Arg* const arg) {
235  VecOpUnwindN::scatter(result, arg, stride);
236  }
237 
238 
239  template <typename Result, typename Arg>
240  TILEDARRAY_FORCE_INLINE void
241  scatter_block_n(const std::size_t n, Result* result,
242  const std::size_t stride, const Arg* const arg)
243  {
244  for(std::size_t i = 0; i < n; ++i, result += stride)
245  *result = arg[i];
246  }
247 
248  template <typename Result, typename Arg>
249  TILEDARRAY_FORCE_INLINE void
250  gather_block(Result* const result, const Arg* const arg, const std::size_t stride) {
251  VecOpUnwindN::gather(result, arg, stride);
252  }
253 
254  template <typename Arg, typename Result>
255  TILEDARRAY_FORCE_INLINE void
256  gather_block_n(const std::size_t n, Result* const result, const Arg* const arg,
257  const std::size_t stride)
258  {
259  for(std::size_t i = 0; i < n; ++i, arg += stride)
260  result[i] = *arg;
261  }
262 
263  template <typename T>
264  class Block {
265  TILEDARRAY_ALIGNED_STORAGE T block_[TILEDARRAY_LOOP_UNWIND];
266 
267  public:
268  Block() { }
269  explicit Block(const T* const data) { load(data); }
270 
271  void load(const T* const data) { copy_block(block_, data); }
272 
273  void store(T* const data) const { copy_block(data, block_); }
274 
275  Block<T>& gather(const T* const data, const std::size_t stride) {
276  gather_block(block_, data, stride);
277  return *this;
278  }
279 
280  void scatter_to(T* const data, std::size_t stride) const {
281  scatter_block(data, stride, block_);
282  }
283 
284  T* data() { return block_; }
285  const T* data() const { return block_; }
286 
287  }; // class Block
288 
289 #ifdef HAVE_INTEL_TBB
290 
291  struct SizeTRange {
292 
293  static constexpr std::size_t block_size = TILEDARRAY_LOOP_UNWIND;
294 
295  // GRAIN_SIZE is set to 8 to trigger TiledArray Unit Test
296  // in reality, partition is controled by tbb::auto_partitioner instead of GRAIN_SIZE
297  static constexpr std::size_t GRAIN_SIZE = 8ul;
298 
299  size_t lower;
300  size_t upper;
301 
302  SizeTRange(const size_t start, const size_t end)
303  : lower(start), upper(end) { }
304 
305 // SizeTRange(const size_t n, const size_t g_size)
306 // : lower(0), upper(n - 1), grain_size(g_size) { }
307 
308  SizeTRange() = default;
309  SizeTRange(const SizeTRange &r) = default;
310 
311  ~SizeTRange() { }
312 
313 // void set_grain_size(std::size_t grain_size){
314 // GRAIN_SIZE = grain_size;
315 // }
316 
317  bool empty() const { return lower > upper; }
318 
319  bool is_divisible() const { return size() >= 2 * GRAIN_SIZE; }
320 
321  size_t begin() const { return lower; }
322 
323  size_t end() const { return upper; }
324 
325  size_t size() const { return upper - lower; }
326 
327  SizeTRange(SizeTRange &r, tbb::split) {
328  size_t nblock = (r.upper - r.lower) / block_size;
329  nblock = (nblock + 1) / 2;
330  lower = r.lower + nblock * block_size;
331  upper = r.upper;
332  r.upper = lower;
333  }
334 
335  };
336 
337 // SizeTRange::set_grain_size(1024ul);
338 
339 #endif
340 
341  template <typename Op, typename Result, typename... Args,
342  typename std::enable_if<std::is_void<typename std::result_of<Op(Result&,
343  Args...)>::type>::value>::type* = nullptr>
344  void inplace_vector_op_serial(Op&& op, const std::size_t n, Result* const result,
345  const Args* const... args)
346  {
347  std::size_t i = 0ul;
348 
349  // Compute block iteration limit
350  constexpr std::size_t index_mask = ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul);
351  const std::size_t nx = n & index_mask;
352 
353  for(; i < nx; i += TILEDARRAY_LOOP_UNWIND) {
354  Block<Result> result_block(result + i);
355  for_each_block(op, result_block, Block<Args>(args + i)...);
356  result_block.store(result + i);
357  }
358 
359  for_each_block_n(op, n - i, result + i, (args + i)...);
360  }
361 
362 #ifdef HAVE_INTEL_TBB
363 
364  template<typename Op, typename Result, typename... Args>
365  class ApplyInplaceVectorOp{
366 
367  public:
368  ApplyInplaceVectorOp(Op& op, Result* const result, const Args* const... args)
369  :op_(op), result_(result), args_(args...){}
370 
371  ~ApplyInplaceVectorOp(){}
372 
373  template<std::size_t... Is>
374  void helper(SizeTRange& range, const std::index_sequence<Is...>& ) const {
375  std::size_t offset = range.begin();
376  std::size_t n_range = range.size();
377  inplace_vector_op_serial(op_, n_range, result_+offset, (std::get<Is>(args_)+offset)...);
378  }
379 
380  void operator()(SizeTRange& range) const {
381  helper(range, std::make_index_sequence<sizeof...(Args)>());
382  }
383 
384  private:
385 
386  Op& op_;
387  Result* const result_;
388  std::tuple<const Args * const ...> args_;
389 
390  };
391 
392 #endif
393 
394  template <typename Op, typename Result, typename... Args,
395  typename std::enable_if<std::is_void<typename std::result_of<Op(Result&,
396  Args...)>::type>::value>::type* = nullptr>
397  void inplace_vector_op(Op&& op, const std::size_t n, Result* const result,
398  const Args* const... args)
399  {
400  #ifdef HAVE_INTEL_TBB
401 // std::cout << "INPLACE_TBB_VECTOR_OP" << std::endl;
402  SizeTRange range(0, n);
403 
404  // if support lambda variadic
405 // auto apply_inplace_vector_op = [op, result, args...](SizeTRange &range) {
406 // size_t offset = range.begin();
407 // size_t n_range = range.size();
408 // inplace_vector_op_serial(op, n_range, result + offset, (args + offset)...);
409 // };
410  // else
411  auto apply_inplace_vector_op = ApplyInplaceVectorOp<Op, Result, Args...>(op, result, args...);
412 
413  tbb::parallel_for(range, apply_inplace_vector_op, tbb::auto_partitioner());
414  #else
415  inplace_vector_op_serial(op, n, result, args...);
416  #endif
417  }
418 
419  template <typename Op, typename Result, typename... Args,
420  typename std::enable_if<! std::is_void<typename std::result_of<Op(
421  Args...)>::type>::value>::type* = nullptr>
422  void vector_op_serial(Op&& op, const std::size_t n, Result* const result,
423  const Args* const... args)
424  {
425  auto wrapper_op = [&op] (Result& res, param_type<Args>... a)
426  { res = op(a...); };
427 
428  std::size_t i = 0ul;
429 
430  // Compute block iteration limit
431  constexpr std::size_t index_mask = ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul);
432  const std::size_t nx = n & index_mask;
433 
434  for(; i < nx; i += TILEDARRAY_LOOP_UNWIND) {
435  Block<Result> result_block;
436  for_each_block(wrapper_op, result_block, Block<Args>(args + i)...);
437  result_block.store(result + i);
438  }
439 
440  for_each_block_n(wrapper_op, n - i, result + i, (args + i)...);
441  }
442 
443 #ifdef HAVE_INTEL_TBB
444 
445  template<typename Op, typename Result, typename... Args>
446  class ApplyVectorOp{
447 
448  public:
449  ApplyVectorOp(Op& op, Result* const result, const Args* const... args)
450  :op_(op), result_(result), args_(args...){}
451 
452  ~ApplyVectorOp(){}
453 
454  template<std::size_t... Is>
455  void helper(SizeTRange& range, const std::index_sequence<Is...>& ) const {
456  std::size_t offset = range.begin();
457  std::size_t n_range = range.size();
458  vector_op_serial(op_, n_range, result_+offset, (std::get<Is>(args_)+offset)...);
459  }
460 
461  void operator()(SizeTRange& range) const {
462  helper(range, std::make_index_sequence<sizeof...(Args)>());
463  }
464 
465  private:
466 
467  Op& op_;
468  Result* const result_;
469  std::tuple<const Args * const ...> args_;
470 
471  };
472 
473 #endif
474 
475  template <typename Op, typename Result, typename... Args,
476  typename std::enable_if<! std::is_void<typename std::result_of<Op(
477  Args...)>::type>::value>::type* = nullptr>
478  void vector_op(Op&& op, const std::size_t n, Result* const result,
479  const Args* const... args)
480  {
481  #ifdef HAVE_INTEL_TBB
482 // std::cout << "TBB_VECTOR_OP" << std::endl;
483  SizeTRange range(0, n);
484 
485  // if support lambda variadic
486 // auto apply_vector_op = [op, result, args...](SizeTRange &range) {
487 // size_t offset = range.begin();
488 // size_t n_range = range.size();
489 // vector_op_serial(op, n_range, result + offset, (args + offset)...);
490 // };
491  // else
492  auto apply_vector_op = ApplyVectorOp<Op,Result,Args...>(op, result, args...);
493 
494  tbb::parallel_for(range, apply_vector_op, tbb::auto_partitioner());
495  #else
496  vector_op_serial(op, n, result, args...);
497  #endif
498  }
499 
500  template <typename Op, typename Result, typename... Args>
501  void vector_ptr_op_serial(Op&& op, const std::size_t n, Result* const result,
502  const Args* const... args)
503  {
504  std::size_t i = 0ul;
505 
506  // Compute block iteration limit
507  constexpr std::size_t index_mask = ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul);
508  const std::size_t nx = n & index_mask;
509 
510  for(; i < nx; i += TILEDARRAY_LOOP_UNWIND)
511  for_each_block_ptr(op, result + i, Block<Args>(args + i)...);
512  for_each_block_ptr_n(op, n - i, result + i, (args + i)...);
513  }
514 
515 #ifdef HAVE_INTEL_TBB
516  template<typename Op, typename Result, typename... Args>
517  class ApplyVectorPtrOp{
518 
519  public:
520  ApplyVectorPtrOp(Op& op, Result* const result, const Args* const... args)
521  :op_(op), result_(result), args_(args...){}
522 
523  ~ApplyVectorPtrOp(){}
524 
525  template<std::size_t... Is>
526  void helper(SizeTRange& range, const std::index_sequence<Is...>& ) const {
527  std::size_t offset = range.begin();
528  std::size_t n_range = range.size();
529  vector_ptr_op_serial(op_, n_range, result_+offset, (std::get<Is>(args_)+offset)...);
530  }
531 
532  void operator()(SizeTRange& range) const {
533  helper(range, std::make_index_sequence<sizeof...(Args)>());
534  }
535 
536  private:
537 
538  Op& op_;
539  Result* const result_;
540  std::tuple<const Args * const ...> args_;
541 
542  };
543 #endif
544 
545  template <typename Op, typename Result, typename... Args>
546  void vector_ptr_op(Op&& op, const std::size_t n, Result* const result,
547  const Args* const... args){
548  #ifdef HAVE_INTEL_TBB
549 // std::cout << "TBB_VECTOR_PTR_OP" << std::endl;
550  SizeTRange range(0, n);
551 
552  // if support lambda variadic
553 // auto apply_vector_ptr_op = [op, result, args...](SizeTRange &range) {
554 // size_t offset = range.begin();
555 // size_t n_range = range.size();
556 // vector_ptr_op_serial(op, n_range, result + offset, (args + offset)...);
557 // };
558  // else
559  auto apply_vector_ptr_op = ApplyVectorPtrOp<Op,Result,Args...>(op, result, args...);
560  tbb::parallel_for(range, apply_vector_ptr_op,tbb::auto_partitioner());
561  #else
562  vector_ptr_op_serial(op,n,result,args...);
563  #endif
564  }
565 
566  template <typename Op, typename Result, typename... Args>
567  void reduce_op_serial(Op&& op, const std::size_t n, Result& result,
568  const Args* const... args)
569  {
570  std::size_t i = 0ul;
571 
572  // Compute block iteration limit
573  constexpr std::size_t index_mask = ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul);
574  const std::size_t nx = n & index_mask;
575 
576  for(; i < nx; i += TILEDARRAY_LOOP_UNWIND) {
577  Result temp = result;
578  reduce_block(op, temp, Block<Args>(args + i)...);
579  result = temp;
580  }
581 
582  reduce_block_n(op, n - i, result, (args + i)...);
583  }
584 
585 #ifdef HAVE_INTEL_TBB
586  template<typename ReduceOp, typename JoinOp, typename Result, typename... Args>
589  class ApplyReduceOp{
590 
591  public:
592  ApplyReduceOp(ReduceOp& reduce_op, JoinOp& join_op, const Result& identity, const Result& result, const Args* const... args)
593  :reduce_op_(reduce_op),
594  join_op_(join_op),
595  identity_(identity),
596  result_(result),
597  args_(args...){}
598 
599  ApplyReduceOp(ApplyReduceOp& rhs, tbb::split) :
600  reduce_op_(rhs.reduce_op_),
601  join_op_(rhs.join_op_),
602  identity_(rhs.identity_),
603  result_(rhs.identity_),
604  args_(rhs.args_)
605  { }
606 
607  ~ApplyReduceOp(){}
608 
609  template<std::size_t... Is>
610  void helper(SizeTRange& range, const std::index_sequence<Is...>& ) {
611  std::size_t offset = range.begin();
612  std::size_t n_range = range.size();
613  reduce_op_serial(reduce_op_, n_range, result_, (std::get<Is>(args_)+offset)...);
614  }
615 
616  void operator()(SizeTRange& range) {
617  helper(range, std::make_index_sequence<sizeof...(Args)>());
618  }
619 
620  void join(const ApplyReduceOp& rhs){
621  join_op_(result_, rhs.result_);
622  }
623 
624  const Result result() const{
625  return result_;
626  }
627 
628  private:
629 
630  ReduceOp& reduce_op_;
631  JoinOp& join_op_;
632  const Result identity_;
633  Result result_;
634  std::tuple<const Args * const ...> args_;
635 
636  };
637 #endif
638 
639  template <typename ReduceOp, typename JoinOp, typename Result, typename... Args>
640  void reduce_op(ReduceOp&& reduce_op, JoinOp&& join_op, const Result& identity, const std::size_t n, Result& result,
641  const Args* const... args)
642  {
643  //TODO implement reduce operation with TBB
644 #ifdef HAVE_INTEL_TBB
645  SizeTRange range(0, n);
646 
647  auto apply_reduce_op = ApplyReduceOp<ReduceOp,JoinOp,Result,Args...>(reduce_op, join_op, identity, result, args...);
648 
649  tbb::parallel_reduce(range, apply_reduce_op, tbb::auto_partitioner());
650 
651  result = apply_reduce_op.result();
652 #else
653  reduce_op_serial(reduce_op,n,result,args...);
654 #endif
655  }
656 
657  template <typename Arg, typename Result>
658  typename std::enable_if<! (std::is_same<Arg, Result>::value && std::is_scalar<Arg>::value)>::type
659  copy_vector(const std::size_t n, const Arg* const arg,
660  Result* const result)
661  {
662  std::size_t i = 0ul;
663 
664  // Compute block iteration limit
665  constexpr std::size_t index_mask = ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul);
666  const std::size_t nx = n & index_mask;
667 
668  for(; i < nx; i += TILEDARRAY_LOOP_UNWIND)
669  copy_block(result + i, arg + i);
670  copy_block_n(n - i, result + i, arg + i);
671  }
672 
673  template <typename T>
674  inline typename std::enable_if<std::is_scalar<T>::value>::type
675  copy_vector(const std::size_t n, const T* const arg, T* const result) {
676  std::memcpy(result, arg, n * sizeof(T));
677  }
678 
679  template <typename Arg, typename Result>
680  void fill_vector(const std::size_t n, const Arg& arg, Result* const result) {
681  auto fill_op = [arg] (Result& res) { res = arg; };
682  vector_op(fill_op, n, result);
683  }
684 
685 
686  template <typename Arg, typename Result>
687  typename std::enable_if<! (std::is_scalar<Arg>::value && std::is_scalar<Result>::value)>::type
688  uninitialized_copy_vector(const std::size_t n, const Arg* const arg,
689  Result* const result)
690  {
691  auto op = [] (Result* const res, param_type<Arg> a) { new(res) Result(a); };
692  vector_ptr_op(op, n, result, arg);
693  }
694 
695  template <typename Arg, typename Result>
696  inline typename std::enable_if<std::is_scalar<Arg>::value && std::is_scalar<Result>::value>::type
697  uninitialized_copy_vector(const std::size_t n, const Arg* const arg, Result* const result) {
698  copy_vector(n, arg, result);
699  }
700 
701  template <typename Arg, typename Result>
702  typename std::enable_if<! (std::is_scalar<Arg>::value && std::is_scalar<Result>::value)>::type
703  uninitialized_fill_vector(const std::size_t n, const Arg& arg,
704  Result* const result)
705  {
706  auto op = [arg] (Result* const res) { new(res) Result(arg); };
707  vector_ptr_op(op, n, result);
708  }
709 
710 
711  template <typename Arg, typename Result>
712  inline typename std::enable_if<std::is_scalar<Arg>::value && std::is_scalar<Result>::value>::type
713  uninitialized_fill_vector(const std::size_t n, const Arg& arg,
714  Result* const result)
715  { fill_vector(n, arg, result); }
716 
717 
718  template <typename Arg>
719  typename std::enable_if<! std::is_scalar<Arg>::value>::type
720  destroy_vector(const std::size_t n, Arg* const arg) {
721  auto op = [] (Arg* const a) { a->~Arg(); };
722  vector_ptr_op(op, n, arg);
723  }
724 
725  template <typename Arg>
726  inline typename std::enable_if<std::is_scalar<Arg>::value>::type
727  destroy_vector(const std::size_t, const Arg* const) { }
728 
729 
730  template <typename Arg, typename Result, typename Op>
731  typename std::enable_if<! (std::is_scalar<Arg>::value && std::is_scalar<Result>::value)>::type
732  uninitialized_unary_vector_op(const std::size_t n, const Arg* const arg,
733  Result* const result, Op&& op)
734  {
735  auto wrapper_op =
736  [&op] (Result* const res, param_type<Arg> a) { new(res) Result(op(a)); };
737  vector_ptr_op(wrapper_op, n, result, arg);
738  }
739 
740  template <typename Arg, typename Result, typename Op>
741  inline typename std::enable_if<std::is_scalar<Arg>::value && std::is_scalar<Result>::value>::type
742  uninitialized_unary_vector_op(const std::size_t n, const Arg* const arg,
743  Result* const result, Op&& op)
744  {
745  vector_op(op, n, result, arg);
746  }
747 
748 
749  template <typename Left, typename Right, typename Result, typename Op>
750  typename std::enable_if<! (std::is_scalar<Left>::value &&
751  std::is_scalar<Right>::value && std::is_scalar<Result>::value)>::type
752  uninitialized_binary_vector_op(const std::size_t n, const Left* const left,
753  const Right* const right, Result* const result, Op&& op)
754  {
755  auto wrapper_op = [&op] (Result* const res, param_type<Left> l,
756  param_type<Right> r) { new(res) Result(op(l, r)); };
757 
758  vector_ptr_op(op, n, result, left, right);
759  }
760 
761  template <typename Left, typename Right, typename Result, typename Op>
762  typename std::enable_if<std::is_scalar<Left>::value &&
763  std::is_scalar<Right>::value && std::is_scalar<Result>::value>::type
764  uninitialized_binary_vector_op(const std::size_t n, const Left* const left,
765  const Right* const right, Result* const result, Op&& op)
766  {
767  vector_op(op, n, result, left, right);
768  }
769 
770  } // namespace math
771 } // namespace TiledArray
772 
773 #endif // TILEDARRAY_MATH_VECTOR_OP_H__INCLUDED
const T * data() const
Definition: vector_op.h:285
void inplace_vector_op_serial(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:344
void scatter_to(T *const data, std::size_t stride) const
Definition: vector_op.h:280
std::enable_if<!(std::is_same< Arg, Result >::value &&std::is_scalar< Arg >::value)>::type copy_vector(const std::size_t n, const Arg *const arg, Result *const result)
Definition: vector_op.h:659
TILEDARRAY_FORCE_INLINE void scatter_block(Result *const result, const std::size_t stride, const Arg *const arg)
Definition: vector_op.h:234
void store(T *const data) const
Definition: vector_op.h:273
TILEDARRAY_FORCE_INLINE void for_each_block(Op &&op, Result *const result, const Args *const ... args)
Definition: vector_op.h:152
std::enable_if<!(std::is_scalar< Left >::value &&std::is_scalar< Right >::value &&std::is_scalar< Result >::value)>::type uninitialized_binary_vector_op(const std::size_t n, const Left *const left, const Right *const right, Result *const result, Op &&op)
Definition: vector_op.h:752
TILEDARRAY_FORCE_INLINE void gather_block(Result *const result, const Arg *const arg, const std::size_t stride)
Definition: vector_op.h:250
static TILEDARRAY_FORCE_INLINE void gather(Result *MADNESS_RESTRICT const result, const Arg *MADNESS_RESTRICT const arg, std::size_t)
Definition: vector_op.h:85
std::enable_if<!(std::is_scalar< Arg >::value &&std::is_scalar< Result >::value)>::type uninitialized_copy_vector(const std::size_t n, const Arg *const arg, Result *const result)
Definition: vector_op.h:688
VectorOpUnwind< TILEDARRAY_LOOP_UNWIND - 1 > VecOpUnwindN
Definition: vector_op.h:147
STL namespace.
void vector_op_serial(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:422
static TILEDARRAY_FORCE_INLINE void for_each_ptr(Op &&op, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ...args)
Definition: vector_op.h:65
std::integral_constant< std::size_t, ~std::size_t(TILEDARRAY_LOOP_UNWIND - 1ul)> index_mask
Definition: vector_op.h:44
static TILEDARRAY_FORCE_INLINE void reduce(Op &&op, Result &MADNESS_RESTRICT result, const Args *MADNESS_RESTRICT const ...args)
Definition: vector_op.h:121
static TILEDARRAY_FORCE_INLINE void gather(Result *MADNESS_RESTRICT const result, const Arg *MADNESS_RESTRICT const arg, std::size_t arg_stride)
Definition: vector_op.h:137
static TILEDARRAY_FORCE_INLINE void reduce(Op &&op, Result &MADNESS_RESTRICT result, const Args *MADNESS_RESTRICT const ...args)
Definition: vector_op.h:71
Block< T > & gather(const T *const data, const std::size_t stride)
Definition: vector_op.h:275
TILEDARRAY_FORCE_INLINE void copy_block(Result *const result, const Arg *const arg)
Definition: vector_op.h:220
void fill_vector(const std::size_t n, const Arg &arg, Result *const result)
Definition: vector_op.h:680
size_t size(const DistArray< Tile, Policy > &a)
Definition: utils.h:49
std::enable_if<! std::is_scalar< Arg >::value >::type destroy_vector(const std::size_t n, Arg *const arg)
Definition: vector_op.h:720
static TILEDARRAY_FORCE_INLINE void for_each(Op &&op, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ...args)
Definition: vector_op.h:107
TILEDARRAY_FORCE_INLINE void for_each_block_n(Op &&op, const std::size_t n, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ... args)
Definition: vector_op.h:166
void vector_ptr_op_serial(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:501
#define TILEDARRAY_LOOP_UNWIND
Definition: vector_op.h:33
TILEDARRAY_FORCE_INLINE void for_each_block_ptr_n(Op &&op, const std::size_t n, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ... args)
Definition: vector_op.h:190
static TILEDARRAY_FORCE_INLINE void for_each(Op &&op, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ...args)
Definition: vector_op.h:59
typename param< U >::type param_type
Definition: type_traits.h:677
void vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:478
Vector loop unwind helper class.
Definition: vector_op.h:46
void load(const T *const data)
Definition: vector_op.h:271
T identity()
identity for group of objects of type T
TILEDARRAY_FORCE_INLINE void scatter_block_n(const std::size_t n, Result *result, const std::size_t stride, const Arg *const arg)
Definition: vector_op.h:241
TILEDARRAY_FORCE_INLINE void gather_block_n(const std::size_t n, Result *const result, const Arg *const arg, const std::size_t stride)
Definition: vector_op.h:256
void reduce_op(ReduceOp &&reduce_op, JoinOp &&join_op, const Result &identity, const std::size_t n, Result &result, const Args *const ... args)
Definition: vector_op.h:640
void inplace_vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:397
VectorOpUnwind< N - 1ul > VectorOpUnwindN1
Definition: vector_op.h:100
bool empty(const Tile< Arg > &arg)
Check that arg is empty (no data)
Definition: tile.h:305
static constexpr std::size_t offset
Definition: vector_op.h:102
std::integral_constant< std::size_t, TILEDARRAY_CACHELINE_SIZE/sizeof(double)> LoopUnwind
Definition: vector_op.h:43
TILEDARRAY_FORCE_INLINE std::enable_if<(sizeof...(Args) >=0)>::type for_each_block_ptr(Op &&op, Result *const result, const Args *const ... args)
Definition: vector_op.h:175
TILEDARRAY_FORCE_INLINE void copy_block_n(std::size_t n, Result *const result, const Arg *const arg)
Definition: vector_op.h:227
void vector_ptr_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:546
std::enable_if<!(std::is_scalar< Arg >::value &&std::is_scalar< Result >::value)>::type uninitialized_unary_vector_op(const std::size_t n, const Arg *const arg, Result *const result, Op &&op)
Definition: vector_op.h:732
static TILEDARRAY_FORCE_INLINE void for_each_ptr(Op &&op, Result *MADNESS_RESTRICT const result, const Args *MADNESS_RESTRICT const ...args)
Definition: vector_op.h:114
void reduce_op_serial(Op &&op, const std::size_t n, Result &result, const Args *const ... args)
Definition: vector_op.h:567
TILEDARRAY_FORCE_INLINE void reduce_block(Op &&op, Result &result, const Args *const ... args)
Definition: vector_op.h:199
std::enable_if<!(std::is_scalar< Arg >::value &&std::is_scalar< Result >::value)>::type uninitialized_fill_vector(const std::size_t n, const Arg &arg, Result *const result)
Definition: vector_op.h:703
TILEDARRAY_FORCE_INLINE void reduce_block_n(Op &&op, const std::size_t n, Result &MADNESS_RESTRICT result, const Args *MADNESS_RESTRICT const ... args)
Definition: vector_op.h:211
static TILEDARRAY_FORCE_INLINE void scatter(Result *MADNESS_RESTRICT const result, const Arg *MADNESS_RESTRICT const arg, const std::size_t result_stride)
Definition: vector_op.h:128
static TILEDARRAY_FORCE_INLINE void scatter(Result *MADNESS_RESTRICT const result, const Arg *MADNESS_RESTRICT const arg, const std::size_t)
Definition: vector_op.h:77
Block(const T *const data)
Definition: vector_op.h:269