doc/html/tensor_2permute_8h_source.html

 /*
  *  This file is a part of TiledArray.
  *  Copyright (C) 2015  Virginia Tech
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  *  Justus Calvin
  *  Department of Chemistry, Virginia Tech
  *
  *  permute.h
  *  May 31, 2015
  *
  */

 #ifndef TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED
 #define TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED

 #include <TiledArray/perm_index.h>
 #include <TiledArray/math/transpose.h>

 namespace TiledArray {
   namespace detail {


     template <typename SizeType>
     inline void fuse_dimensions(SizeType * MADNESS_RESTRICT const fused_size,
         SizeType * MADNESS_RESTRICT const fused_weight,
         const SizeType * MADNESS_RESTRICT const size, const Permutation& perm)
     {
       const unsigned int ndim1 = perm.dim() - 1u;

       int i = ndim1;
       fused_size[3] = size[i--];
       while((i >= 0) && (perm[i + 1u] == (perm[i] + 1u)))
         fused_size[3] *= size[i--];
       fused_weight[3] = 1u;


       if((i >= 0) && (perm[i] != ndim1)) {
         fused_size[2] = size[i--];
         while((i >= 0) && (perm[i] != ndim1))
           fused_size[2] *= size[i--];

         fused_weight[2] = fused_size[3];

         fused_size[1] = size[i--];
         while((i >= 0) && (perm[i + 1] == (perm[i] + 1u)))
           fused_size[1] *= size[i--];

         fused_weight[1] = fused_size[2] * fused_weight[2];
       } else {
         fused_size[2] = 1ul;
         fused_weight[2] = 0ul;

         fused_size[1] = size[i--];
         while((i >= 0) && (perm[i + 1] == (perm[i] + 1u)))
           fused_size[1] *= size[i--];

         fused_weight[1] = fused_size[3];
       }

       if(i >= 0) {
         fused_size[0] = size[i--];
         while(i >= 0)
           fused_size[0] *= size[i--];

         fused_weight[0] = fused_size[1] * fused_weight[1];
       } else {
         fused_size[0] = 1ul;
         fused_weight[0] = 0ul;
       }
     }


     template <typename InputOp, typename OutputOp, typename Result,
         typename Arg0, typename... Args>
     inline void permute(InputOp&& input_op, OutputOp&& output_op, Result& result,
         const Permutation& perm, const Arg0& arg0, const Args&... args)
     {
       detail::PermIndex perm_index_op(arg0.range(), perm);

       // Cache constants
       const unsigned int ndim = arg0.range().rank();
       const unsigned int ndim1 = ndim - 1;
       const typename Result::size_type volume = arg0.range().volume();

       // Get pointer to arg extent
       const auto* MADNESS_RESTRICT const arg0_extent = arg0.range().extent_data();

       if(perm[ndim1] == ndim1) {
         // This is the simple case where the last dimension is not permuted.
         // Therefore, it can be shuffled in chunks.

         // Determine which dimensions can be permuted with the least significant
         // dimension.
         typename Result::size_type block_size = arg0_extent[ndim1];
         for(int i = int(ndim1) - 1 ; i >= 0; --i) {
           if(int(perm[i]) != i)
             break;
           block_size *= arg0_extent[i];
         }

         // Combine the input and output operations
         auto op = [=] (typename Result::pointer result,
             typename Arg0::const_reference a0, typename Args::const_reference... as)
         { output_op(result, input_op(a0, as...)); };

         // Permute the data
         for(typename Result::size_type index = 0ul; index < volume; index += block_size) {
           const typename Result::size_type perm_index = perm_index_op(index);

           // Copy the block
           math::vector_ptr_op(op, block_size, result.data() + perm_index,
               arg0.data() + index, (args.data() + index)...);
         }

       } else {
         // This is the more complicated case. Here we permute in terms of matrix
         // transposes. The data layout of the input and output matrices are
         // chosen such that they both contain stride one dimensions.

         // Here we partition the n dimensional index space, I, of the permute
         // tensor with up to four parts
         // {I_1, ..., I_i, I_i+1, ..., I_j, I_j+1, ..., I_k, I_k+1, ..., I_n}
         // where the subrange {I_k+1, ..., I_n} is the (fused) inner dimension
         // in the input tensor, and the subrange {I_i+1, ..., I_j} is the
         // (fused) inner dimension in the output tensor that has been mapped to
         // the input tensor. These ranges are used to form a set of matrices in
         // the input tensor that are transposed and copied to the output tensor.
         // The remaining (fused) index ranges {I_1, ..., I_i} and
         // {I_j+1, ..., I_k} are used to form the outer loop around the matrix
         // transpose operations. These outer ranges may or may not be zero size.
         typename Result::size_type other_fused_size[4];
         typename Result::size_type other_fused_weight[4];
         fuse_dimensions(other_fused_size, other_fused_weight,
             arg0.range().extent_data(), perm);

         // Compute the fused stride for the result matrix transpose.
         const auto* MADNESS_RESTRICT const result_extent = result.range().extent_data();
         typename Result::size_type  result_outer_stride = 1ul;
         for(unsigned int i = perm[ndim1] + 1u; i < ndim; ++i)
           result_outer_stride *= result_extent[i];

         // Copy data from the input to the output matrix via a series of matrix
         // transposes.
         for(typename Result::size_type i = 0ul; i < other_fused_size[0]; ++i) {
           typename Result::size_type index = i * other_fused_weight[0];
           for(typename Result::size_type j = 0ul; j < other_fused_size[2]; ++j, index += other_fused_weight[2]) {
             // Compute the ordinal index of the input and output matrices.
             typename Result::size_type perm_index = perm_index_op(index);

             math::transpose(input_op, output_op,
                 other_fused_size[1], other_fused_size[3],
                 result_outer_stride, result.data() + perm_index,
                 other_fused_weight[1], arg0.data() + index, (args.data() + index)...);
           }
         }
       }
     }


   }  // namespace detail
 } // namespace TiledArray

 #endif // TILEDARRAY_TENSOR_PERMUTE_H__INCLUDED
TiledArray::detail::PermIndex
A functor that permutes ordinal indices.
Definition: perm_index.h:39

TiledArray::detail::permute
void permute(InputOp &&input_op, OutputOp &&output_op, Result &result, const Permutation &perm, const Arg0 &arg0, const Args &... args)
Construct a permuted tensor copy.
Definition: permute.h:122

transpose.h

perm_index.h

TiledArray
Definition: conjgrad.h:34

TiledArray::Permutation::dim
index_type dim() const
Domain size accessor.
Definition: permutation.h:206

TiledArray::detail::size
constexpr std::size_t size(T(&)[N])
Array size accessor.
Definition: utility.h:47

TiledArray::detail::fuse_dimensions
void fuse_dimensions(SizeType *MADNESS_RESTRICT const fused_size, SizeType *MADNESS_RESTRICT const fused_weight, const SizeType *MADNESS_RESTRICT const size, const Permutation &perm)
Compute the fused dimensions for permutation.
Definition: permute.h:51

TiledArray::Permutation
Permutation of a sequence of objects indexed by base-0 indices.
Definition: permutation.h:119

TiledArray::math::transpose
void transpose(InputOp &&input_op, OutputOp &&output_op, const std::size_t m, const std::size_t n, const std::size_t result_stride, Result *result, const std::size_t arg_stride, const Args *const ... args)
Matrix transpose and initialization.
Definition: transpose.h:176

TiledArray::math::vector_ptr_op
void vector_ptr_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:546