TiledArray  0.7.0
permute.h
Go to the documentation of this file.
1 /*
2  * This file is a part of TiledArray.
3  * Copyright (C) 2015 Virginia Tech
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Justus Calvin
19  * Department of Chemistry, Virginia Tech
20  *
21  * permute.h
22  * May 31, 2015
23  *
24  */
25 
26 #ifndef TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED
27 #define TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED
28 
29 #include <TiledArray/perm_index.h>
31 
32 namespace TiledArray {
33  namespace detail {
34 
35 
37 
50  template <typename SizeType>
51  inline void fuse_dimensions(SizeType * MADNESS_RESTRICT const fused_size,
52  SizeType * MADNESS_RESTRICT const fused_weight,
53  const SizeType * MADNESS_RESTRICT const size, const Permutation& perm)
54  {
55  const unsigned int ndim1 = perm.dim() - 1u;
56 
57  int i = ndim1;
58  fused_size[3] = size[i--];
59  while((i >= 0) && (perm[i + 1u] == (perm[i] + 1u)))
60  fused_size[3] *= size[i--];
61  fused_weight[3] = 1u;
62 
63 
64  if((i >= 0) && (perm[i] != ndim1)) {
65  fused_size[2] = size[i--];
66  while((i >= 0) && (perm[i] != ndim1))
67  fused_size[2] *= size[i--];
68 
69  fused_weight[2] = fused_size[3];
70 
71  fused_size[1] = size[i--];
72  while((i >= 0) && (perm[i + 1] == (perm[i] + 1u)))
73  fused_size[1] *= size[i--];
74 
75  fused_weight[1] = fused_size[2] * fused_weight[2];
76  } else {
77  fused_size[2] = 1ul;
78  fused_weight[2] = 0ul;
79 
80  fused_size[1] = size[i--];
81  while((i >= 0) && (perm[i + 1] == (perm[i] + 1u)))
82  fused_size[1] *= size[i--];
83 
84  fused_weight[1] = fused_size[3];
85  }
86 
87  if(i >= 0) {
88  fused_size[0] = size[i--];
89  while(i >= 0)
90  fused_size[0] *= size[i--];
91 
92  fused_weight[0] = fused_size[1] * fused_weight[1];
93  } else {
94  fused_size[0] = 1ul;
95  fused_weight[0] = 0ul;
96  }
97  }
98 
100 
120  template <typename InputOp, typename OutputOp, typename Result,
121  typename Arg0, typename... Args>
122  inline void permute(InputOp&& input_op, OutputOp&& output_op, Result& result,
123  const Permutation& perm, const Arg0& arg0, const Args&... args)
124  {
125  detail::PermIndex perm_index_op(arg0.range(), perm);
126 
127  // Cache constants
128  const unsigned int ndim = arg0.range().rank();
129  const unsigned int ndim1 = ndim - 1;
130  const typename Result::size_type volume = arg0.range().volume();
131 
132  // Get pointer to arg extent
133  const auto* MADNESS_RESTRICT const arg0_extent = arg0.range().extent_data();
134 
135  if(perm[ndim1] == ndim1) {
136  // This is the simple case where the last dimension is not permuted.
137  // Therefore, it can be shuffled in chunks.
138 
139  // Determine which dimensions can be permuted with the least significant
140  // dimension.
141  typename Result::size_type block_size = arg0_extent[ndim1];
142  for(int i = int(ndim1) - 1 ; i >= 0; --i) {
143  if(int(perm[i]) != i)
144  break;
145  block_size *= arg0_extent[i];
146  }
147 
148  // Combine the input and output operations
149  auto op = [=] (typename Result::pointer result,
150  typename Arg0::const_reference a0, typename Args::const_reference... as)
151  { output_op(result, input_op(a0, as...)); };
152 
153  // Permute the data
154  for(typename Result::size_type index = 0ul; index < volume; index += block_size) {
155  const typename Result::size_type perm_index = perm_index_op(index);
156 
157  // Copy the block
158  math::vector_ptr_op(op, block_size, result.data() + perm_index,
159  arg0.data() + index, (args.data() + index)...);
160  }
161 
162  } else {
163  // This is the more complicated case. Here we permute in terms of matrix
164  // transposes. The data layout of the input and output matrices are
165  // chosen such that they both contain stride one dimensions.
166 
167  // Here we partition the n dimensional index space, I, of the permute
168  // tensor with up to four parts
169  // {I_1, ..., I_i, I_i+1, ..., I_j, I_j+1, ..., I_k, I_k+1, ..., I_n}
170  // where the subrange {I_k+1, ..., I_n} is the (fused) inner dimension
171  // in the input tensor, and the subrange {I_i+1, ..., I_j} is the
172  // (fused) inner dimension in the output tensor that has been mapped to
173  // the input tensor. These ranges are used to form a set of matrices in
174  // the input tensor that are transposed and copied to the output tensor.
175  // The remaining (fused) index ranges {I_1, ..., I_i} and
176  // {I_j+1, ..., I_k} are used to form the outer loop around the matrix
177  // transpose operations. These outer ranges may or may not be zero size.
178  typename Result::size_type other_fused_size[4];
179  typename Result::size_type other_fused_weight[4];
180  fuse_dimensions(other_fused_size, other_fused_weight,
181  arg0.range().extent_data(), perm);
182 
183  // Compute the fused stride for the result matrix transpose.
184  const auto* MADNESS_RESTRICT const result_extent = result.range().extent_data();
185  typename Result::size_type result_outer_stride = 1ul;
186  for(unsigned int i = perm[ndim1] + 1u; i < ndim; ++i)
187  result_outer_stride *= result_extent[i];
188 
189  // Copy data from the input to the output matrix via a series of matrix
190  // transposes.
191  for(typename Result::size_type i = 0ul; i < other_fused_size[0]; ++i) {
192  typename Result::size_type index = i * other_fused_weight[0];
193  for(typename Result::size_type j = 0ul; j < other_fused_size[2]; ++j, index += other_fused_weight[2]) {
194  // Compute the ordinal index of the input and output matrices.
195  typename Result::size_type perm_index = perm_index_op(index);
196 
197  math::transpose(input_op, output_op,
198  other_fused_size[1], other_fused_size[3],
199  result_outer_stride, result.data() + perm_index,
200  other_fused_weight[1], arg0.data() + index, (args.data() + index)...);
201  }
202  }
203  }
204  }
205 
206 
207  } // namespace detail
208 } // namespace TiledArray
209 
210 #endif // TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED
A functor that permutes ordinal indices.
Definition: perm_index.h:39
void permute(InputOp &&input_op, OutputOp &&output_op, Result &result, const Permutation &perm, const Arg0 &arg0, const Args &... args)
Construct a permuted tensor copy.
Definition: permute.h:122
index_type dim() const
Domain size accessor.
Definition: permutation.h:206
constexpr std::size_t size(T(&)[N])
Array size accessor.
Definition: utility.h:47
void fuse_dimensions(SizeType *MADNESS_RESTRICT const fused_size, SizeType *MADNESS_RESTRICT const fused_weight, const SizeType *MADNESS_RESTRICT const size, const Permutation &perm)
Compute the fused dimensions for permutation.
Definition: permute.h:51
Permutation of a sequence of objects indexed by base-0 indices.
Definition: permutation.h:119
void transpose(InputOp &&input_op, OutputOp &&output_op, const std::size_t m, const std::size_t n, const std::size_t result_stride, Result *result, const std::size_t arg_stride, const Args *const ... args)
Matrix transpose and initialization.
Definition: transpose.h:176
void vector_ptr_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:546