26 #ifndef TILEDARRAY_TENSOR_KENERLS_H__INCLUDED 27 #define TILEDARRAY_TENSOR_KENERLS_H__INCLUDED 35 template <
typename,
typename>
class Tensor;
57 template <
typename TR,
typename Op,
typename T1,
typename... Ts,
58 typename std::enable_if<
is_tensor<TR, T1, Ts...>::value
60 inline TR
tensor_op(Op&& op,
const T1& tensor1,
const Ts&... tensors) {
78 template <
typename TR,
typename Op,
typename T1,
typename... Ts,
79 typename std::enable_if<(is_tensor<T1, Ts...>::value
80 || is_tensor_of_tensor<TR, T1, Ts...>::value)
81 && is_contiguous_tensor<T1, Ts...>::value>::type* =
nullptr>
92 template <
typename Op,
typename Tensor,
typename ... Tensors>
97 const auto& range = tensor.range();
99 this->operator()(result, std::forward<Op>(op), std::forward<Tensor>(tensor), std::forward<Tensors>(tensors)...);
105 template <
typename Op,
typename Tensor,
typename ... Tensors>
110 const auto& range = result.range();
111 for (
auto&& i : range)
112 result[std::forward<decltype(i)>(i)] = std::forward<Op>(op)(
113 std::forward<Tensor>(tensor)[std::forward<decltype(i)>(i)],
114 std::forward<Tensors>(tensors)[std::forward<decltype(i)>(i)]...);
117 template <
typename Op,
typename Tensor,
typename ... Tensors>
125 const auto& range = tensor.range();
126 T result(perm ^ range);
127 this->operator()(result, std::forward<Op>(op), perm, std::forward<Tensor>(tensor), std::forward<Tensors>(tensors)...);
131 template <
typename Op,
typename Tensor,
typename ... Tensors>
139 const auto& range = tensor.range();
140 for (
auto&& i : range)
141 result[perm ^ std::forward<decltype(i)>(i)] = std::forward<Op>(op)(
142 std::forward<Tensor>(tensor)[std::forward<decltype(i)>(i)],
143 std::forward<Tensors>(tensors)[std::forward<decltype(i)>(i)]...);
160 template <
typename Op,
typename TR,
typename... Ts,
161 typename std::enable_if<is_tensor<TR, Ts...>::value
162 && is_contiguous_tensor<TR, Ts...>::value>::type* =
nullptr>
167 const auto volume = result.range().volume();
183 template <
typename Op,
typename TR,
typename... Ts,
184 typename std::enable_if<is_tensor_of_tensor<TR, Ts...>::value
185 && is_contiguous_tensor<TR, Ts...>::value>::type* =
nullptr>
190 const auto volume = result.range().volume();
192 for(decltype(result.range().volume()) i = 0ul; i < volume; ++i) {
222 template <
typename InputOp,
typename OutputOp,
typename TR,
typename T1,
typename... Ts,
223 typename std::enable_if<is_tensor<TR, T1, Ts...>::value
224 && is_contiguous_tensor<TR, T1, Ts...>::value>::type* =
nullptr>
226 const Permutation& perm, TR& result,
const T1& tensor1,
227 const Ts&... tensors)
235 permute(input_op, output_op, result, perm, tensor1, tensors...);
264 template <
typename InputOp,
typename OutputOp,
typename TR,
typename T1,
typename... Ts,
265 typename std::enable_if<is_tensor_of_tensor<TR, T1, Ts...>::value
266 && is_contiguous_tensor<TR, T1, Ts...>::value>::type* =
nullptr>
268 const Permutation& perm, TR& result,
const T1& tensor1,
269 const Ts&... tensors)
277 auto wrapper_input_op = [=] (
typename T1::const_reference MADNESS_RESTRICT value1,
278 typename Ts::const_reference MADNESS_RESTRICT... values) ->
279 typename T1::value_type
280 {
return tensor_op<TR::value_type>(input_op, value1, values...); };
282 auto wrapper_output_op = [=] (
typename T1::pointer MADNESS_RESTRICT
const result_value,
283 const typename TR::value_type value)
286 permute(wrapper_input_op, wrapper_output_op, result, perm, tensor1,
300 template <
typename Op,
typename TR,
typename... Ts,
301 typename std::enable_if<is_tensor<TR, Ts...>::value
302 && ! (is_contiguous_tensor<TR, Ts...>::value)>::type* =
nullptr>
307 const auto stride =
inner_size(result, tensors...);
308 const auto volume = result.range().volume();
310 for(decltype(result.range().volume()) i = 0ul; i < volume; i += stride)
312 (tensors.data() + tensors.range().ordinal(i))...);
325 template <
typename Op,
typename TR,
typename... Ts,
326 typename std::enable_if<is_tensor_of_tensor<TR, Ts...>::value
327 && ! (is_contiguous_tensor<TR, Ts...>::value)>::type* =
nullptr>
332 const auto stride =
inner_size(result, tensors...);
333 const auto volume = result.range().volume();
335 auto inplace_tensor_range =
336 [=] (
typename TR::pointer MADNESS_RESTRICT
const result_data,
337 typename Ts::const_pointer MADNESS_RESTRICT
const... tensors_data)
339 for(decltype(result.range().volume()) i = 0ul; i < stride; ++i)
343 for(decltype(result.range().volume()) i = 0ul; i < volume; i += stride)
344 inplace_tensor_range(result.data() + result.range().ordinal(i),
345 (tensors.data() + tensors.range().ordinal(i))...);
363 template <
typename Op,
typename TR,
typename... Ts,
364 typename std::enable_if<is_tensor<TR, Ts...>::value
365 && is_contiguous_tensor<TR, Ts...>::value>::type* =
nullptr>
366 inline void tensor_init(Op&& op, TR& result,
const Ts&... tensors) {
370 const auto volume = result.range().volume();
372 auto wrapper_op = [=] (
typename TR::pointer MADNESS_RESTRICT result,
373 typename Ts::const_reference MADNESS_RESTRICT... ts)
374 {
new(result)
typename TR::value_type(op(ts...)); };
390 template <
typename Op,
typename TR,
typename... Ts,
391 typename std::enable_if<is_tensor_of_tensor<TR, Ts...>::value
392 && is_contiguous_tensor<TR, Ts...>::value>::type* =
nullptr>
393 inline void tensor_init(Op&& op, TR& result,
const Ts&... tensors) {
397 const auto volume = result.range().volume();
399 for(decltype(result.range().volume()) i = 0ul; i < volume; ++i) {
400 new(result.data() + i)
401 typename TR::value_type(tensor_op<typename TR::value_type>(op, tensors[i]...));
420 template <
typename Op,
typename TR,
typename T1,
typename... Ts,
421 typename std::enable_if<is_tensor<TR, T1, Ts...>::value
422 && is_contiguous_tensor<TR, T1, Ts...>::value>::type* =
nullptr>
424 const T1& tensor1,
const Ts&... tensors)
431 auto output_op = [=] (
typename TR::pointer MADNESS_RESTRICT result,
432 typename TR::const_reference MADNESS_RESTRICT temp)
433 {
new(result)
typename TR::value_type(temp); };
435 permute(op, output_op, result, perm, tensor1, tensors...);
453 template <
typename Op,
typename TR,
typename T1,
typename... Ts,
454 typename std::enable_if<is_tensor_of_tensor<TR, T1, Ts...>::value
455 && is_contiguous_tensor<TR, T1, Ts...>::value>::type* =
nullptr>
457 const T1& tensor1,
const Ts&... tensors)
464 auto output_op = [=] (
typename TR::pointer MADNESS_RESTRICT result,
465 typename TR::const_reference MADNESS_RESTRICT temp)
466 {
new(result)
typename TR::value_type(temp); };
467 auto tensor_input_op = [=] (
typename T1::const_reference MADNESS_RESTRICT value1,
468 typename Ts::const_reference MADNESS_RESTRICT... values) ->
469 typename TR::value_type
470 {
return tensor_op<typename TR::value_type>(op, value1, values...); };
472 permute(tensor_input_op, output_op, result, perm, tensor1, tensors...);
488 template <
typename Op,
typename TR,
typename T1,
typename... Ts,
489 typename std::enable_if<is_tensor<TR, T1, Ts...>::value
490 && is_contiguous_tensor<TR>::value
491 && ! is_contiguous_tensor<T1, Ts...>::value>::type* =
nullptr>
493 const Ts&... tensors)
498 const auto stride =
inner_size(tensor1, tensors...);
499 const auto volume = tensor1.range().volume();
501 auto wrapper_op = [=] (
typename TR::pointer MADNESS_RESTRICT result_ptr,
502 const typename T1::value_type value1,
503 const typename Ts::value_type... values)
504 {
new(result_ptr)
typename T1::value_type(op(value1, values...)); };
506 for(decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride)
508 (tensor1.data() + tensor1.range().ordinal(i)),
509 (tensors.data() + tensors.range().ordinal(i))...);
524 template <
typename Op,
typename TR,
typename T1,
typename... Ts,
525 typename std::enable_if<is_tensor_of_tensor<TR, T1, Ts...>::value
526 && is_contiguous_tensor<TR>::value
527 && ! is_contiguous_tensor<T1, Ts...>::value>::type* =
nullptr>
528 inline void tensor_init(Op&& op, TR& result,
const T1& tensor1,
529 const Ts&... tensors)
534 const auto stride =
inner_size(tensor1, tensors...);
535 const auto volume = tensor1.range().volume();
538 auto inplace_tensor_range =
539 [=] (
typename TR::pointer MADNESS_RESTRICT
const result_data,
540 typename T1::const_pointer MADNESS_RESTRICT
const tensor1_data,
541 typename Ts::const_pointer MADNESS_RESTRICT
const... tensors_data)
543 for(decltype(result.range().volume()) i = 0ul; i < stride; ++i)
545 typename TR::value_type(tensor_op<typename TR::value_type>(op,
546 tensor1_data[i], tensors_data[i]...));
549 for(decltype(volume) i = 0ul; i < volume; i += stride)
550 inplace_tensor_range(result.data() + i,
551 (tensor1.data() + tensor1.range().ordinal(i)),
552 (tensors.data() + tensors.range().ordinal(i))...);
576 template <
typename ReduceOp,
typename JoinOp,
typename Scalar,
typename T1,
typename... Ts,
577 typename std::enable_if_t<is_tensor<T1, Ts...>::value
578 && is_contiguous_tensor<T1, Ts...>::value>* =
nullptr>
580 Scalar
identity,
const T1& tensor1,
const Ts&... tensors)
585 const auto volume = tensor1.range().volume();
588 tensor1.data(), tensors.data()...);
611 template <
typename ReduceOp,
typename JoinOp,
typename Scalar,
typename T1,
typename... Ts,
612 typename std::enable_if<is_tensor_of_tensor<T1, Ts...>::value
613 && is_contiguous_tensor<T1, Ts...>::value>::type* =
nullptr>
615 Scalar
identity,
const T1& tensor1,
const Ts&... tensors)
620 const auto volume = tensor1.range().volume();
623 for(decltype(tensor1.range().volume()) i = 0ul; i < volume; ++i) {
626 join_op(result, temp);
650 template <
typename ReduceOp,
typename JoinOp,
typename Scalar,
typename T1,
typename... Ts,
651 typename std::enable_if<is_tensor<T1, Ts...>::value
652 && ! is_contiguous_tensor<T1, Ts...>::value>::type* =
nullptr>
654 const Scalar
identity,
const T1& tensor1,
const Ts&... tensors)
659 const auto stride =
inner_size(tensor1, tensors...);
660 const auto volume = tensor1.range().volume();
663 for(decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride) {
666 tensor1.data() + tensor1.range().ordinal(i),
667 (tensors.data() + tensors.range().ordinal(i))...);
668 join_op(result, temp);
692 template <
typename ReduceOp,
typename JoinOp,
typename Scalar,
typename T1,
typename... Ts,
693 typename std::enable_if<
694 is_tensor_of_tensor<T1, Ts...>::value
695 && ! is_contiguous_tensor<T1, Ts...>::value>::type* =
nullptr>
697 const Scalar
identity,
const T1& tensor1,
const Ts&... tensors)
702 const auto stride =
inner_size(tensor1, tensors...);
703 const auto volume = tensor1.range().volume();
705 auto tensor_reduce_range =
706 [=] (Scalar& MADNESS_RESTRICT result,
707 typename T1::const_pointer MADNESS_RESTRICT
const tensor1_data,
708 typename Ts::const_pointer MADNESS_RESTRICT
const... tensors_data)
710 for(decltype(result.range().volume()) i = 0ul; i < stride; ++i) {
712 tensor1_data[i], tensors_data[i]...);
713 join_op(result, temp);
718 for(decltype(tensor1.range().volume()) i = 0ul; i < volume; i += stride) {
719 Scalar temp = tensor_reduce_range(result,
720 tensor1.data() + tensor1.range().ordinal(i),
721 (tensors.data() + tensors.range().ordinal(i))...);
722 join_op(result, temp);
731 #endif // TILEDARRAY_TENSOR_KENERLS_H__INCLUDED
void inplace_tensor_op(Op &&op, TR &result, const Ts &... tensors)
In-place tensor operations with contiguous data.
void permute(InputOp &&input_op, OutputOp &&output_op, Result &result, const Permutation &perm, const Arg0 &arg0, const Args &... args)
Construct a permuted tensor copy.
constexpr bool is_range_set_congruent(const Permutation &perm, const T &tensor)
Test that the ranges of a permuted tensor is congruent with itself.
bool is_range_congruent(const Left &left, const ShiftWrapper< Right > &right)
Check for congruent range objects with a shifted tensor.
An N-dimensional tensor object.
void tensor_init(Op &&op, TR &result, const Ts &... tensors)
Initialize tensor with contiguous tensor arguments.
T1::size_type inner_size(const T1 &tensor1, const T2 &)
Get the inner size of two tensors.
index_type dim() const
Domain size accessor.
constexpr bool empty()
Test for empty tensors in an empty list.
T identity()
identity for group of objects of type T
void reduce_op(ReduceOp &&reduce_op, JoinOp &&join_op, const Result &identity, const std::size_t n, Result &result, const Args *const ... args)
void inplace_vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Permutation of a sequence of objects indexed by base-0 indices.
TR tensor_op(Op &&op, const T1 &tensor1, const Ts &... tensors)
Tensor operations with contiguous data.
Scalar tensor_reduce(ReduceOp &&reduce_op, JoinOp &&join_op, Scalar identity, const T1 &tensor1, const Ts &... tensors)
Reduction operation for contiguous tensors.
void vector_ptr_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)