doc/html/sparse__shape_8h_source.html

 /*
  *  This file is a part of TiledArray.
  *  Copyright (C) 2013  Virginia Tech
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation, either version 3 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  *  Justus Calvin
  *  Department of Chemistry, Virginia Tech
  *
  *  sparse_shape.h
  *  Jul 9, 2013
  *
  */

 #ifndef TILEDARRAY_SPARSE_SHAPE_H__INCLUDED
 #define TILEDARRAY_SPARSE_SHAPE_H__INCLUDED

 #include <TiledArray/tensor.h>
 #include <TiledArray/tiled_range.h>
 #include <TiledArray/val_array.h>
 #include <TiledArray/tensor/shift_wrapper.h>
 #include <TiledArray/tensor/tensor_interface.h>
 #include <typeinfo>

 namespace TiledArray {


   template <typename T>
   class SparseShape {
   public:
     typedef SparseShape<T> SparseShape_;
     typedef T value_type;
     static_assert(TiledArray::detail::is_scalar<T>::value,
                   "SparseShape<T> only supports scalar numeric types for T");
     typedef typename Tensor<value_type>::size_type size_type;

    private:

     // T must be a numeric type
     static_assert(std::is_floating_point<T>::value,
         "SparseShape template type T must be a floating point type");

     // Internal typedefs
     typedef detail::ValArray<value_type> vector_type;

     Tensor<value_type> tile_norms_;
     std::shared_ptr<vector_type> size_vectors_;
     size_type zero_tile_count_;
     static value_type threshold_;

     template <typename Op>
     static vector_type
     recursive_outer_product(const vector_type* const size_vectors,
         const unsigned int dim, const Op& op)
     {
       vector_type result;

       if(dim == 1u) {
         // Construct a modified copy of size_vector[0]
         result = op(*size_vectors);
       } else {
         // Compute split the range and compute the outer products
         const unsigned int middle = (dim >> 1u) + (dim & 1u);
         const vector_type left = recursive_outer_product(size_vectors, middle, op);
         const vector_type right = recursive_outer_product(size_vectors + middle, dim - middle, op);

         // Compute the outer product of left and right

         result = vector_type(left.size() * right.size());
         result.outer_fill(left, right,
             [] (const value_type left, const value_type right) { return left * right; });
       }

       return result;
     }


     void normalize() {
       const value_type threshold = threshold_;
       const unsigned int dim = tile_norms_.range().rank();
       const vector_type* MADNESS_RESTRICT const size_vectors = size_vectors_.get();
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;

       if(dim == 1u) {
         auto normalize_op = [threshold, &zero_tile_count] (value_type& norm, const value_type size) {
           TA_ASSERT(norm >= value_type(0));
           norm /= size;
           if(norm < threshold) {
             norm = value_type(0);
             ++zero_tile_count;
           }
         };

         // This is the easy case where the data is a vector and can be
         // normalized directly.
         math::inplace_vector_op(normalize_op, size_vectors[0].size(), tile_norms_.data(),
             size_vectors[0].data());

       } else {
         // Here the normalization constants are computed and multiplied by the
         // norm data using a recursive, outer-product algorithm. This is done to
         // minimize temporary memory requirements, memory bandwidth, and work.

         auto inv_vec_op = [] (const vector_type& size_vector) {
           return vector_type(size_vector,
               [] (const value_type size) { return value_type(1) / size; });
         };

         // Compute the left and right outer products
         const unsigned int middle = (dim >> 1u) + (dim & 1u);
         const vector_type left = recursive_outer_product(size_vectors, middle, inv_vec_op);
         const vector_type right = recursive_outer_product(size_vectors + middle, dim - middle, inv_vec_op);

         auto normalize_op = [threshold, &zero_tile_count] (value_type& norm,
             const value_type x, const value_type y)
         {
           TA_ASSERT(norm >= value_type(0));
           norm *= x * y;
           if(norm < threshold) {
             norm = value_type(0);
             ++zero_tile_count;
           }
         };

         math::outer(left.size(), right.size(), left.data(), right.data(),
             tile_norms_.data(), normalize_op);
       }

       zero_tile_count_ = zero_tile_count;
     }

     static std::shared_ptr<vector_type>
     initialize_size_vectors(const TiledRange& trange) {
       // Allocate memory for size vectors
       const unsigned int dim = trange.tiles_range().rank();
       std::shared_ptr<vector_type> size_vectors(new vector_type[dim],
           std::default_delete<vector_type[]>());

       // Initialize the size vectors
       for(unsigned int i = 0ul; i != dim; ++i) {
         const size_type n = trange.data()[i].tiles_range().second - trange.data()[i].tiles_range().first;

         size_vectors.get()[i] = vector_type(n, & (* trange.data()[i].begin()),
             [] (const TiledRange1::range_type& tile)
             { return value_type(tile.second - tile.first); });
       }

       return size_vectors;
     }

     std::shared_ptr<vector_type> perm_size_vectors(const Permutation& perm) const {
       const unsigned int n = tile_norms_.range().rank();

       // Allocate memory for the contracted size vectors
       std::shared_ptr<vector_type> result_size_vectors(new vector_type[n],
           std::default_delete<vector_type[]>());

       // Initialize the size vectors
       for(unsigned int i = 0u; i < n; ++i) {
         const unsigned int perm_i = perm[i];
         result_size_vectors.get()[perm_i] = size_vectors_.get()[i];
       }

       return result_size_vectors;
     }

     SparseShape(const Tensor<T>& tile_norms, const std::shared_ptr<vector_type>& size_vectors,
         const size_type zero_tile_count) :
       tile_norms_(tile_norms), size_vectors_(size_vectors),
       zero_tile_count_(zero_tile_count)
     { }

   public:


     SparseShape() : tile_norms_(), size_vectors_(), zero_tile_count_(0ul) { }


     SparseShape(const Tensor<value_type>& tile_norms, const TiledRange& trange) :
       tile_norms_(tile_norms.clone()), size_vectors_(initialize_size_vectors(trange)),
       zero_tile_count_(0ul)
     {
       TA_ASSERT(! tile_norms_.empty());
       TA_ASSERT(tile_norms_.range() == trange.tiles_range());

       normalize();
     }


     template<typename SparseNormSequence>
     SparseShape(const SparseNormSequence& tile_norms,
                 const TiledRange& trange) :
       tile_norms_(trange.tiles_range(), value_type(0)), size_vectors_(initialize_size_vectors(trange)),
       zero_tile_count_(trange.tiles_range().volume())
     {
       const auto dim = tile_norms_.range().rank();
       for(const auto& pair_idx_norm: tile_norms) {
         auto compute_tile_volume = [dim,this,pair_idx_norm]() -> uint64_t {
           uint64_t tile_volume = 1;
           for(size_t d=0;d != dim; ++d)
             tile_volume *= size_vectors_.get()[d].at(pair_idx_norm.first[d]);
           return tile_volume;
         };
         auto norm_per_element = pair_idx_norm.second / compute_tile_volume();
         if (norm_per_element >= threshold()) {
           tile_norms_[pair_idx_norm.first] = norm_per_element;
           --zero_tile_count_;
         }
       }
     }


     SparseShape(World& world, const Tensor<value_type>& tile_norms,
                 const TiledRange& trange) :
       tile_norms_(tile_norms.clone()), size_vectors_(initialize_size_vectors(trange)),
       zero_tile_count_(0ul)
     {
       TA_ASSERT(! tile_norms_.empty());
       TA_ASSERT(tile_norms_.range() == trange.tiles_range());

       // reduce norm data from all processors
       world.gop.sum(tile_norms_.data(), tile_norms_.size());

       normalize();
     }


     template<typename SparseNormSequence>
     SparseShape(World& world,
                 const SparseNormSequence& tile_norms,
                 const TiledRange& trange) : SparseShape(tile_norms, trange)
     {
       world.gop.sum(tile_norms_.data(), tile_norms_.size());
     }


     SparseShape(const SparseShape<T>& other) :
       tile_norms_(other.tile_norms_), size_vectors_(other.size_vectors_),
       zero_tile_count_(other.zero_tile_count_)
     { }


     SparseShape<T>& operator=(const SparseShape<T>& other) {
       tile_norms_ = other.tile_norms_;
       size_vectors_ = other.size_vectors_;
       zero_tile_count_ = other.zero_tile_count_;
       return *this;
     }


     bool validate(const Range& range) const {
       if(tile_norms_.empty())
         return false;
       return (range == tile_norms_.range());
     }


     template <typename Index>
     bool is_zero(const Index& i) const {
       TA_ASSERT(! tile_norms_.empty());
       return tile_norms_[i] < threshold_;
     }


     static constexpr bool is_dense() { return false; }


     float sparsity() const {
       TA_ASSERT(! tile_norms_.empty());
       return float(zero_tile_count_) / float(tile_norms_.size());
     }


     static value_type threshold() { return threshold_; }


     static void threshold(const value_type thresh) { threshold_ = thresh; }


     template <typename Index>
     value_type operator[](const Index& index) const {
       TA_ASSERT(! tile_norms_.empty());
       return tile_norms_[index];
     }


     template<typename Op>
     SparseShape_ transform(Op &&op) const {

         Tensor<T> new_norms = op(tile_norms_);
         madness::AtomicInt zero_tile_count;
         zero_tile_count = 0;

         const value_type threshold = threshold_;
         auto apply_threshold = [threshold, &zero_tile_count](value_type &norm){
             TA_ASSERT(norm >= value_type(0));
             if(norm < threshold){
                 norm = value_type(0);
                 ++zero_tile_count;
             }
         };

         math::inplace_vector_op(apply_threshold, new_norms.range().volume(),
                 new_norms.data());

         return SparseShape_(std::move(new_norms), size_vectors_,
                             zero_tile_count);
     }


     const Tensor<value_type>& data() const { return tile_norms_; }


     bool empty() const { return tile_norms_.empty(); }


     SparseShape_ mask(const SparseShape_ &mask_shape) const {
       TA_ASSERT(!tile_norms_.empty());
       TA_ASSERT(!mask_shape.empty());
       TA_ASSERT(tile_norms_.range() == mask_shape.tile_norms_.range());

       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = zero_tile_count_;
       auto op = [threshold, &zero_tile_count] (value_type left,
           const value_type right)
       {
         if(left >= threshold && right < threshold) {
           left = value_type(0);
           ++zero_tile_count;
         }

         return left;
       };

       Tensor<value_type> result_tile_norms =
           tile_norms_.binary(mask_shape.tile_norms_, op);

       return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
     }


     template <typename Index>
     SparseShape update_block(const Index& lower_bound, const Index& upper_bound,
         const SparseShape& other) const
     {
       Tensor<value_type> result_tile_norms = tile_norms_.clone();

       auto result_tile_norms_blk = result_tile_norms.block(lower_bound, upper_bound);
       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = zero_tile_count_;
       result_tile_norms_blk.inplace_binary(other.tile_norms_,
           [threshold,&zero_tile_count] (value_type& l, const value_type r) {
             // Update the zero tile count for the result
             if((l < threshold) && (r >= threshold))
               ++zero_tile_count;
             else if((l >= threshold) && (r < threshold))
               --zero_tile_count;

             // Update the tile norm value
             l = r;
           });

       return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
     }

   private:


     template <typename Index>
     std::shared_ptr<vector_type>
     block_range(const Index& lower_bound, const Index& upper_bound) const {
       TA_ASSERT(detail::size(lower_bound) == tile_norms_.range().rank());
       TA_ASSERT(detail::size(upper_bound) == tile_norms_.range().rank());

       // Get the number dimensions of the shape
       const auto rank = detail::size(lower_bound);
       const auto* MADNESS_RESTRICT const lower = detail::data(lower_bound);
       const auto* MADNESS_RESTRICT const upper = detail::data(upper_bound);

       std::shared_ptr<vector_type> size_vectors(new vector_type[rank],
           std::default_delete<vector_type[]>());

       for(auto i = 0ul; i < rank; ++i) {
         // Get the new range size
         const auto lower_i = lower[i];
         const auto upper_i = upper[i];
         const auto extent_i = upper_i - lower_i;

         // Check that the input indices are in range
         TA_ASSERT(lower_i < upper_i);
         TA_ASSERT(upper_i <= tile_norms_.range().upbound(i));

         // Construct the size vector for rank i
         size_vectors.get()[i] = vector_type(extent_i,
             size_vectors_.get()[i].data() + lower_i);
       }

       return size_vectors;
     }

   public:

     template <typename Index>
     SparseShape block(const Index& lower_bound, const Index& upper_bound) const {
       std::shared_ptr<vector_type> size_vectors =
           block_range(lower_bound, upper_bound);

       // Copy the data from arg to result
       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       auto copy_op = [threshold,&zero_tile_count] (value_type& MADNESS_RESTRICT result,
           const value_type arg)
       {
         result = arg;
         if(arg < threshold)
           ++zero_tile_count;
       };


       // Construct the result norms tensor
       TensorConstView<value_type> block_view =
           tile_norms_.block(lower_bound, upper_bound);
       Tensor<value_type> result_norms((Range(block_view.range().extent())));
       result_norms.inplace_binary(shift(block_view), copy_op);

       return SparseShape(result_norms, size_vectors, zero_tile_count);
     }


     template <typename Index, typename Factor>
     SparseShape block(const Index& lower_bound, const Index& upper_bound,
         const Factor factor) const
     {
       const value_type abs_factor = to_abs_factor(factor);
       std::shared_ptr<vector_type> size_vectors =
           block_range(lower_bound, upper_bound);

       // Copy the data from arg to result
       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       auto copy_op = [abs_factor,threshold,&zero_tile_count] (value_type& MADNESS_RESTRICT result,
               const value_type arg)
       {
         result = arg * abs_factor;
         if(result < threshold) {
           ++zero_tile_count;
           result = value_type(0);
         }
       };

       // Construct the result norms tensor
       TensorConstView<value_type> block_view =
           tile_norms_.block(lower_bound, upper_bound);
       Tensor<value_type> result_norms((Range(block_view.range().extent())));
       result_norms.inplace_binary(shift(block_view), copy_op);

       return SparseShape(result_norms, size_vectors, zero_tile_count);
     }


     template <typename Index>
     SparseShape block(const Index& lower_bound, const Index& upper_bound,
         const Permutation& perm) const
     {
       return block(lower_bound, upper_bound).perm(perm);
     }


     template <typename Index, typename Factor>
     SparseShape block(const Index& lower_bound, const Index& upper_bound,
         const Factor factor, const Permutation& perm) const
     {
       return block(lower_bound, upper_bound, factor).perm(perm);
     }


     SparseShape_ perm(const Permutation& perm) const {
       return SparseShape_(tile_norms_.permute(perm), perm_size_vectors(perm),
           zero_tile_count_);
     }


     template <typename Factor>
     SparseShape_ scale(const Factor factor) const {
       TA_ASSERT(! tile_norms_.empty());
       const value_type threshold = threshold_;
       const value_type abs_factor = to_abs_factor(factor);
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       auto op = [threshold, &zero_tile_count, abs_factor] (value_type value) {
         value *= abs_factor;
         if(value < threshold) {
           value = value_type(0);
           ++zero_tile_count;
         }
         return value;
       };

       Tensor<value_type> result_tile_norms = tile_norms_.unary(op);

       return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
     }


     template <typename Factor>
     SparseShape_ scale(const Factor factor, const Permutation& perm) const {
       TA_ASSERT(! tile_norms_.empty());
       const value_type threshold = threshold_;
       const value_type abs_factor = to_abs_factor(factor);
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       auto op = [threshold, &zero_tile_count, abs_factor] (value_type value) {
         value *= abs_factor;
         if(value < threshold) {
           value = value_type(0);
           ++zero_tile_count;
         }
         return value;
       };

       Tensor<value_type> result_tile_norms = tile_norms_.unary(op, perm);

       return SparseShape_(result_tile_norms, perm_size_vectors(perm),
           zero_tile_count);
     }


     SparseShape_ add(const SparseShape_& other) const {
       TA_ASSERT(! tile_norms_.empty());
       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       auto op = [threshold, &zero_tile_count] (value_type left,
           const value_type right)
       {
         left += right;
         if(left < threshold) {
           left = value_type(0);
           ++zero_tile_count;
         }
         return left;
       };

       Tensor<value_type> result_tile_norms =
           tile_norms_.binary(other.tile_norms_, op);

       return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
     }


     SparseShape_ add(const SparseShape_& other, const Permutation& perm) const {
       TA_ASSERT(! tile_norms_.empty());
       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       auto op = [threshold, &zero_tile_count] (value_type left,
           const value_type right)
       {
         left += right;
         if(left < threshold) {
           left = value_type(0);
           ++zero_tile_count;
         }
         return left;
       };

       Tensor<value_type> result_tile_norms =
           tile_norms_.binary(other.tile_norms_, op, perm);

       return SparseShape_(result_tile_norms, perm_size_vectors(perm),
           zero_tile_count);
     }


     template <typename Factor>
     SparseShape_ add(const SparseShape_& other, const Factor factor) const {
       TA_ASSERT(! tile_norms_.empty());
       const value_type threshold = threshold_;
       const value_type abs_factor = to_abs_factor(factor);
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       auto op = [threshold, &zero_tile_count, abs_factor] (value_type left,
           const value_type right)
       {
         left += right;
         left *= abs_factor;
         if(left < threshold) {
           left = value_type(0);
           ++zero_tile_count;
         }
         return left;
       };

       Tensor<value_type> result_tile_norms =
           tile_norms_.binary(other.tile_norms_, op);

       return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
     }


     template <typename Factor>
     SparseShape_ add(const SparseShape_& other, const Factor factor,
         const Permutation& perm) const
     {
       TA_ASSERT(! tile_norms_.empty());
       const value_type threshold = threshold_;
       const value_type abs_factor = to_abs_factor(factor);
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       auto op = [threshold, &zero_tile_count, abs_factor]
                  (value_type left, const value_type right)
       {
         left += right;
         left *= abs_factor;
         if(left < threshold) {
           left = value_type(0);
           ++zero_tile_count;
         }
         return left;
       };

       Tensor<value_type> result_tile_norms =
           tile_norms_.binary(other.tile_norms_, op, perm);

       return SparseShape_(result_tile_norms, perm_size_vectors(perm),
           zero_tile_count);
     }

     SparseShape_ add(value_type value) const {
       TA_ASSERT(! tile_norms_.empty());
       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;

       Tensor<T> result_tile_norms(tile_norms_.range());

       value = std::abs(value);
       const unsigned int dim = tile_norms_.range().rank();
       const vector_type* MADNESS_RESTRICT const size_vectors = size_vectors_.get();

       if(dim == 1u) {
         auto add_const_op = [threshold, &zero_tile_count, value] (value_type norm,
             const value_type size)
         {
           norm += value / std::sqrt(size);
           if(norm < threshold) {
             norm = 0;
             ++zero_tile_count;
           }
           return norm;
         };

         // This is the easy case where the data is a vector and can be
         // normalized directly.
         math::vector_op(add_const_op, size_vectors[0].size(), result_tile_norms.data(),
             tile_norms_.data(), size_vectors[0].data());

       } else {
         // Here the normalization constants are computed and multiplied by the
         // norm data using a recursive, outer algorithm. This is done to
         // minimize temporary memory requirements, memory bandwidth, and work.

         auto inv_sqrt_vec_op = [] (const vector_type size_vector) {
           return vector_type(size_vector,
               [] (const value_type size) { return value_type(1) / std::sqrt(size); });
         };

         // Compute the left and right outer products
         const unsigned int middle = (dim >> 1u) + (dim & 1u);
         const vector_type left = recursive_outer_product(size_vectors, middle, inv_sqrt_vec_op);
         const vector_type right = recursive_outer_product(size_vectors + middle, dim - middle, inv_sqrt_vec_op);

         math::outer_fill(left.size(), right.size(), left.data(), right.data(),
             tile_norms_.data(), result_tile_norms.data(),
             [threshold, &zero_tile_count, value] (value_type& norm,
                 const value_type x, const value_type y)
             {
               norm += value * x * y;
               if(norm < threshold) {
                 norm = value_type(0);
                 ++zero_tile_count;
               }
             });
       }

       return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
     }

     SparseShape_ add(const value_type value, const Permutation& perm) const {
       // TODO: Optimize this function so it does the permutation at the same
       // time as the addition.
       return add(value).perm(perm);
     }

     SparseShape_ subt(const SparseShape_& other) const {
       return add(other);
     }

     SparseShape_ subt(const SparseShape_& other, const Permutation& perm) const {
       return add(other, perm);
     }

     template <typename Factor>
     SparseShape_ subt(const SparseShape_& other, const Factor factor) const {
       return add(other, factor);
     }

     template <typename Factor>
     SparseShape_ subt(const SparseShape_& other, const Factor factor,
         const Permutation& perm) const
     {
       return add(other, factor, perm);
     }

     SparseShape_ subt(const value_type value) const {
       return add(value);
     }

     SparseShape_ subt(const value_type value, const Permutation& perm) const {
       return add(value, perm);
     }

   private:

     static size_type scale_by_size(Tensor<T>& tile_norms,
         const vector_type* MADNESS_RESTRICT const size_vectors)
     {
       const unsigned int dim = tile_norms.range().rank();
       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;

       if(dim == 1u) {
         // This is the easy case where the data is a vector and can be
         // normalized directly.
         math::inplace_vector_op(
             [threshold, &zero_tile_count] (value_type& norm, const value_type size) {
               norm *= size;
               if(norm < threshold) {
                 norm = value_type(0);
                 ++zero_tile_count;
               }
             },
             size_vectors[0].size(), tile_norms.data(), size_vectors[0].data());
       } else {
         // Here the normalization constants are computed and multiplied by the
         // norm data using a recursive, outer algorithm. This is done to
         // minimize temporary memory requirements, memory bandwidth, and work.

         auto noop = [](const vector_type& size_vector) -> const vector_type& {
           return size_vector;
         };

         // Compute the left and right outer products
         const unsigned int middle = (dim >> 1u) + (dim & 1u);
         const vector_type left = recursive_outer_product(size_vectors, middle, noop);
         const vector_type right = recursive_outer_product(size_vectors + middle, dim - middle, noop);

         math::outer(left.size(), right.size(), left.data(), right.data(), tile_norms.data(),
             [threshold, &zero_tile_count] (value_type& norm, const value_type x,
                 const value_type y)
             {
               norm *= x * y;
               if(norm < threshold) {
                 norm = value_type(0);
                 ++zero_tile_count;
               }
             });
       }

       return zero_tile_count;
     }

   public:

     SparseShape_ mult(const SparseShape_& other) const {
       // TODO: Optimize this function so that the tensor arithmetic and
       // scale_by_size operations are performed in one step instead of two.

       TA_ASSERT(! tile_norms_.empty());
       Tensor<T> result_tile_norms = tile_norms_.mult(other.tile_norms_);
       const size_type zero_tile_count =
           scale_by_size(result_tile_norms, size_vectors_.get());

       return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
     }

     SparseShape_ mult(const SparseShape_& other, const Permutation& perm) const {
       // TODO: Optimize this function so that the tensor arithmetic and
       // scale_by_size operations are performed in one step instead of two.

       TA_ASSERT(! tile_norms_.empty());
       Tensor<T> result_tile_norms = tile_norms_.mult(other.tile_norms_, perm);
       std::shared_ptr<vector_type> result_size_vector = perm_size_vectors(perm);
       const size_type zero_tile_count =
                 scale_by_size(result_tile_norms, result_size_vector.get());

       return SparseShape_(result_tile_norms, result_size_vector, zero_tile_count);
     }

     template <typename Factor>
     SparseShape_ mult(const SparseShape_& other, const Factor factor) const {
       // TODO: Optimize this function so that the tensor arithmetic and
       // scale_by_size operations are performed in one step instead of two.

       TA_ASSERT(! tile_norms_.empty());
       const value_type abs_factor = to_abs_factor(factor);
       Tensor<T> result_tile_norms = tile_norms_.mult(other.tile_norms_, abs_factor);
       const size_type zero_tile_count =
           scale_by_size(result_tile_norms, size_vectors_.get());

       return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
     }

     template <typename Factor>
     SparseShape_ mult(const SparseShape_& other, const Factor factor,
         const Permutation& perm) const
     {
       // TODO: Optimize this function so that the tensor arithmetic and
       // scale_by_size operations are performed in one step instead of two.

       TA_ASSERT(! tile_norms_.empty());
       const value_type abs_factor = to_abs_factor(factor);
       Tensor<T> result_tile_norms = tile_norms_.mult(other.tile_norms_, abs_factor, perm);
       std::shared_ptr<vector_type> result_size_vector = perm_size_vectors(perm);
       const size_type zero_tile_count =
           scale_by_size(result_tile_norms, result_size_vector.get());

       return SparseShape_(result_tile_norms, result_size_vector, zero_tile_count);
     }

     template <typename Factor>
     SparseShape_ gemm(const SparseShape_& other, const Factor factor,
         const math::GemmHelper& gemm_helper) const
     {
       TA_ASSERT(! tile_norms_.empty());

       const value_type abs_factor = to_abs_factor(factor);
       const value_type threshold = threshold_;
       madness::AtomicInt zero_tile_count;
       zero_tile_count = 0;
       integer M = 0, N = 0, K = 0;
       gemm_helper.compute_matrix_sizes(M, N, K, tile_norms_.range(), other.tile_norms_.range());

       // Allocate memory for the contracted size vectors
       std::shared_ptr<vector_type> result_size_vectors(new vector_type[gemm_helper.result_rank()],
           std::default_delete<vector_type[]>());

       // Initialize the result size vectors
       unsigned int x = 0ul;
       for(unsigned int i = gemm_helper.left_outer_begin(); i < gemm_helper.left_outer_end(); ++i, ++x)
         result_size_vectors.get()[x] = size_vectors_.get()[i];
       for(unsigned int i = gemm_helper.right_outer_begin(); i < gemm_helper.right_outer_end(); ++i, ++x)
         result_size_vectors.get()[x] = other.size_vectors_.get()[i];

       // Compute the number of inner ranks
       const unsigned int k_rank = gemm_helper.left_inner_end() - gemm_helper.left_inner_begin();

       // Construct the result norm tensor
       Tensor<value_type> result_norms(gemm_helper.make_result_range<typename Tensor<T>::range_type>(
           tile_norms_.range(), other.tile_norms_.range()), 0);

       if(k_rank > 0u) {

         // Compute size vector
         const vector_type k_sizes =
             recursive_outer_product(size_vectors_.get() + gemm_helper.left_inner_begin(),
                 k_rank, [] (const vector_type& size_vector) -> const vector_type&
                 { return size_vector; });

         // TODO: Make this faster. It can be done without using temporaries
         // for the arguments, but requires a custom matrix multiply.

         Tensor<value_type> left(tile_norms_.range());
         const size_type mk = M * K;
         auto left_op = [] (const value_type left, const value_type right)
             { return left * right; };
         for(size_type i = 0ul; i < mk; i += K)
           math::vector_op(left_op, K, left.data() + i,
               tile_norms_.data() + i, k_sizes.data());

         Tensor<value_type> right(other.tile_norms_.range());
         for(integer i = 0ul, k = 0; k < K; i += N, ++k) {
           const value_type factor = k_sizes[k];
           auto right_op = [=] (const value_type arg) { return arg * factor; };
           math::vector_op(right_op, N, right.data() + i, other.tile_norms_.data() + i);
         }

         result_norms = left.gemm(right, abs_factor, gemm_helper);

         // Hard zero tiles that are below the zero threshold.
         result_norms.inplace_unary(
             [threshold, &zero_tile_count] (value_type& value) {
               if(value < threshold) {
                 value = value_type(0);
                 ++zero_tile_count;
               }
             });

       } else {

         // This is an outer product, so the inputs can be used directly
         math::outer_fill(M, N, tile_norms_.data(), other.tile_norms_.data(), result_norms.data(),
             [threshold, &zero_tile_count, abs_factor] (const value_type left,
                 const value_type right)
             {
               value_type norm = left * right * abs_factor;
               if(norm < threshold) {
                 norm = value_type(0);
                 ++zero_tile_count;
               }
               return norm;
             });
       }

       return SparseShape_(result_norms, result_size_vectors, zero_tile_count);
     }

     template <typename Factor>
     SparseShape_ gemm(const SparseShape_& other, const Factor factor,
         const math::GemmHelper& gemm_helper, const Permutation& perm) const
     {
       return gemm(other, factor, gemm_helper).perm(perm);
     }

   private:
     template <typename Factor>
     static value_type to_abs_factor(const Factor factor) {
       using std::abs;
       const auto cast_abs_factor = static_cast<value_type>(abs(factor));
       TA_ASSERT(std::isfinite(cast_abs_factor));
       return cast_abs_factor;
     }

   }; // class SparseShape

   // Static member initialization
   template <typename T>
   typename SparseShape<T>::value_type SparseShape<T>::threshold_ = std::numeric_limits<T>::epsilon();


   template <typename T>
   inline std::ostream& operator<<(std::ostream& os, const SparseShape<T>& shape) {
     os << "SparseShape<" << typeid(T).name() << ">:" << std::endl
        << shape.data() << std::endl;
     return os;
   }


 #ifndef TILEDARRAY_HEADER_ONLY

   extern template class SparseShape<float>;

 #endif // TILEDARRAY_HEADER_ONLY

 } // namespace TiledArray

 #endif // TILEDARRAY_SPASE_SHAPE_H__INCLUDED
TiledArray::Tensor::block
detail::TensorInterface< T, BlockRange > block(const Index &lower_bound, const Index &upper_bound)
Definition: tensor.h:490

TiledArray::SparseShape::block
SparseShape block(const Index &lower_bound, const Index &upper_bound, const Factor factor) const
Create a scaled sub-block of the shape.
Definition: sparse_shape.h:565

TiledArray::shift
detail::ShiftWrapper< T > shift(T &tensor)
Shift a tensor from one range to another.
Definition: shift_wrapper.h:136

TiledArray::Range
A (hyperrectangular) interval on , space of integer n-indices.
Definition: range.h:39

TiledArray::math::GemmHelper::compute_matrix_sizes
void compute_matrix_sizes(integer &m, integer &n, integer &k, const Left &left, const Right &right) const
Compute the matrix dimension that can be used in a *GEMM call.
Definition: gemm_helper.h:252

TiledArray::SparseShape::gemm
SparseShape_ gemm(const SparseShape_ &other, const Factor factor, const math::GemmHelper &gemm_helper, const Permutation &perm) const
Definition: sparse_shape.h:1130

TiledArray::SparseShape::empty
bool empty() const
Initialization check.
Definition: sparse_shape.h:419

TiledArray::detail::ValArray::outer_fill
void outer_fill(const ValArray< U > &left, const ValArray< V > &right, const Op &op)
Outer fill operation.
Definition: val_array.h:404

TiledArray::SparseShape::SparseShape
SparseShape(const SparseShape< T > &other)
Copy constructor.
Definition: sparse_shape.h:309

TiledArray::detail::data
auto data(T &t)
Container data pointer accessor.
Definition: utility.h:89

TiledArray::detail::is_scalar
Definition: type_traits.h:436

TiledArray::SparseShape::add
SparseShape_ add(value_type value) const
Definition: sparse_shape.h:831

TiledArray::SparseShape::subt
SparseShape_ subt(const SparseShape_ &other, const Permutation &perm) const
Definition: sparse_shape.h:901

TiledArray::Tensor
An N-dimensional tensor object.
Definition: foreach.h:40

TiledArray::Tensor::permute
Tensor_ permute(const Permutation &perm) const
Create a permuted copy of this tensor.
Definition: tensor.h:526

TiledArray::SparseShape::scale
SparseShape_ scale(const Factor factor) const
Scale shape.
Definition: sparse_shape.h:640

TiledArray::Tensor::range
const range_type & range() const
Tensor range object accessor.
Definition: tensor.h:310

TiledArray::SparseShape::SparseShape
SparseShape(World &world, const Tensor< value_type > &tile_norms, const TiledRange &trange)
Collective "dense" constructor.
Definition: sparse_shape.h:271

TiledArray::math::GemmHelper::result_rank
unsigned int result_rank() const
Result rank accessor.
Definition: gemm_helper.h:134

TiledArray::norm
decltype(auto) norm(const Tile< Arg > &arg)
Vector 2-norm of a tile.
Definition: tile.h:930

TiledArray::SparseShape::is_zero
bool is_zero(const Index &i) const
Check that a tile is zero.
Definition: sparse_shape.h:340

TiledArray::SparseShape::data
const Tensor< value_type > & data() const
Data accessor.
Definition: sparse_shape.h:414

TiledArray::SparseShape::perm
SparseShape_ perm(const Permutation &perm) const
Create a permuted shape of this shape.
Definition: sparse_shape.h:624

TiledArray::SparseShape::subt
SparseShape_ subt(const SparseShape_ &other, const Factor factor) const
Definition: sparse_shape.h:906

TiledArray::detail::SizeArray::data
const_pointer data() const
Definition: size_array.h:170

TiledArray::SparseShape::mult
SparseShape_ mult(const SparseShape_ &other, const Permutation &perm) const
Definition: sparse_shape.h:990

TiledArray::detail::ValArray
Value array.
Definition: val_array.h:44

TiledArray::SparseShape::block
SparseShape block(const Index &lower_bound, const Index &upper_bound, const Factor factor, const Permutation &perm) const
Create a copy of a sub-block of the shape.
Definition: sparse_shape.h:614

TiledArray::SparseShape::add
SparseShape_ add(const SparseShape_ &other, const Factor factor) const
Add and scale shapes.
Definition: sparse_shape.h:767

TiledArray::SparseShape::SparseShape_
SparseShape< T > SparseShape_
This object type.
Definition: sparse_shape.h:57

TiledArray::SparseShape::SparseShape
SparseShape(const SparseNormSequence &tile_norms, const TiledRange &trange)
"Sparse" constructor
Definition: sparse_shape.h:240

TiledArray::detail::abs
auto abs(const ComplexConjugate< T > &a)
Definition: complex.h:247

TiledArray::SparseShape::value_type
T value_type
The norm value type.
Definition: sparse_shape.h:58

TiledArray::math::outer
void outer(const std::size_t m, const std::size_t n, const X *const x, const Y *const y, A *a, const Op &op)
Compute the outer of x and y to modify a.
Definition: outer.h:248

TiledArray::Range::rank
unsigned int rank() const
Rank accessor.
Definition: range.h:542

TiledArray::SparseShape::update_block
SparseShape update_block(const Index &lower_bound, const Index &upper_bound, const SparseShape &other) const
Update sub-block of shape.
Definition: sparse_shape.h:460

TiledArray::math::GemmHelper::make_result_range
R make_result_range(const Left &left, const Right &right) const
Construct a result range based on left and right ranges.
Definition: gemm_helper.h:166

TiledArray::Tensor::inplace_binary
Tensor_ & inplace_binary(const Right &right, Op &&op)
Use a binary, element wise operation to modify this tensor.
Definition: tensor.h:601

TiledArray::SparseShape
Arbitrary sparse shape.
Definition: sparse_shape.h:55

TiledArray::SparseShape::sparsity
float sparsity() const
Sparsity of the shape.
Definition: sparse_shape.h:353

tensor.h

TiledArray::Tensor::empty
bool empty() const
Test if the tensor is empty.
Definition: tensor.h:427

TiledArray::SparseShape::size_type
Tensor< value_type >::size_type size_type
Size type.
Definition: sparse_shape.h:60

TiledArray::size
size_t size(const DistArray< Tile, Policy > &a)
Definition: utils.h:49

TiledArray::clone
DistArray< Tile, Policy > clone(const DistArray< Tile, Policy > &arg)
Create a deep copy of an array.
Definition: clone.h:43

TiledArray::Tensor::mult
Tensor_ mult(const Right &right) const
Multiply this by right to create a new tensor.
Definition: tensor.h:941

TiledArray
Definition: conjgrad.h:34

TiledArray::math::outer_fill
void outer_fill(const std::size_t m, const std::size_t n, const X *const x, const Y *const y, A *a, const Op &op)
Compute and store outer of x and y in a.
Definition: outer.h:180

TiledArray::Tensor::data
const_pointer data() const
Data direct access.
Definition: tensor.h:416

TiledArray::SparseShape::mult
SparseShape_ mult(const SparseShape_ &other, const Factor factor) const
Definition: sparse_shape.h:1006

TiledArray::math::GemmHelper::left_outer_end
unsigned int left_outer_end() const
Definition: gemm_helper.h:149

TiledArray::SparseShape::transform
SparseShape_ transform(Op &&op) const
Transform the norm tensor with an operation.
Definition: sparse_shape.h:389

TiledArray::detail::size
constexpr std::size_t size(T(&)[N])
Array size accessor.
Definition: utility.h:47

TiledArray::SparseShape::add
SparseShape_ add(const SparseShape_ &other, const Factor factor, const Permutation &perm) const
Add, scale, and permute shapes.
Definition: sparse_shape.h:804

TiledArray::SparseShape::SparseShape
SparseShape(const Tensor< value_type > &tile_norms, const TiledRange &trange)
Constructor.
Definition: sparse_shape.h:219

TiledArray::SparseShape::add
SparseShape_ add(const SparseShape_ &other, const Permutation &perm) const
Add and permute shapes.
Definition: sparse_shape.h:732

TiledArray::math::vector_op
void vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:478

TiledArray::TiledRange1::range_type
std::pair< size_type, size_type > range_type
Definition: tiled_range1.h:40

TiledArray::TiledRange::tiles_range
const range_type & tiles_range() const
Access the tile range.
Definition: tiled_range.h:122

TA_ASSERT
#define TA_ASSERT(a)
Definition: error.h:107

TiledArray::Tensor::binary
Tensor_ binary(const Right &right, Op &&op) const
Use a binary, element wise operation to construct a new tensor.
Definition: tensor.h:568

TiledArray::SparseShape::subt
SparseShape_ subt(const value_type value) const
Definition: sparse_shape.h:917

TiledArray::SparseShape::mask
SparseShape_ mask(const SparseShape_ &mask_shape) const
Compute union of two shapes.
Definition: sparse_shape.h:425

TiledArray::SparseShape::operator=
SparseShape< T > & operator=(const SparseShape< T > &other)
Copy assignment operator.
Definition: sparse_shape.h:319

TiledArray::SparseShape::subt
SparseShape_ subt(const value_type value, const Permutation &perm) const
Definition: sparse_shape.h:921

TiledArray::SparseShape::operator[]
value_type operator[](const Index &index) const
Tile norm accessor.
Definition: sparse_shape.h:374

TiledArray::SparseShape::threshold
static value_type threshold()
Threshold accessor.
Definition: sparse_shape.h:361

val_array.h

TiledArray::math::GemmHelper::right_outer_begin
unsigned int right_outer_begin() const
Definition: gemm_helper.h:153

TiledArray::detail::TensorInterface
Tensor interface for external data.
Definition: tensor_interface.h:47

TiledArray::SparseShape::mult
SparseShape_ mult(const SparseShape_ &other, const Factor factor, const Permutation &perm) const
Definition: sparse_shape.h:1022

TiledArray::SparseShape::SparseShape
SparseShape(World &world, const SparseNormSequence &tile_norms, const TiledRange &trange)
Collective "sparse" constructor.
Definition: sparse_shape.h:298

tiled_range.h

TiledArray::detail::SizeArray::size
size_type size() const
Definition: size_array.h:160

TiledArray::TiledRange
Range data of a tiled array.
Definition: tiled_range.h:31

TiledArray::SparseShape::mult
SparseShape_ mult(const SparseShape_ &other) const
Definition: sparse_shape.h:978

TiledArray::SparseShape::add
SparseShape_ add(const SparseShape_ &other) const
Add shapes.
Definition: sparse_shape.h:701

TiledArray::SparseShape::subt
SparseShape_ subt(const SparseShape_ &other) const
Definition: sparse_shape.h:897

shift_wrapper.h

TiledArray::math::inplace_vector_op
void inplace_vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:397

TiledArray::Range::upbound
size_array upbound() const
Range upper bound accessor.
Definition: range.h:577

TiledArray::Tensor::gemm
Tensor_ gemm(const Tensor< U, AU > &other, const V factor, const math::GemmHelper &gemm_helper) const
Contract this tensor with other.
Definition: tensor.h:1135

TiledArray::math::GemmHelper
Contraction to *GEMM helper.
Definition: gemm_helper.h:39

TiledArray::Tensor::size
size_type size() const
Tensor dimension size accessor.
Definition: tensor.h:317

TiledArray::SparseShape::threshold
static void threshold(const value_type thresh)
Set threshold to thresh.
Definition: sparse_shape.h:366

TiledArray::SparseShape::block
SparseShape block(const Index &lower_bound, const Index &upper_bound, const Permutation &perm) const
Create a copy of a sub-block of the shape.
Definition: sparse_shape.h:600

TiledArray::Permutation
Permutation of a sequence of objects indexed by base-0 indices.
Definition: permutation.h:119

TiledArray::SparseShape::is_dense
static constexpr bool is_dense()
Check density.
Definition: sparse_shape.h:348

TiledArray::detail::TensorInterface::range
const range_type & range() const
Tensor range object accessor.
Definition: tensor_interface.h:205

TiledArray::SparseShape::subt
SparseShape_ subt(const SparseShape_ &other, const Factor factor, const Permutation &perm) const
Definition: sparse_shape.h:911

TiledArray::Tensor::inplace_unary
Tensor_ & inplace_unary(Op &&op)
Use a unary, element wise operation to modify this tensor.
Definition: tensor.h:639

TiledArray::SparseShape::SparseShape
SparseShape()
Default constructor.
Definition: sparse_shape.h:210

TiledArray::SparseShape::block
SparseShape block(const Index &lower_bound, const Index &upper_bound) const
Create a copy of a sub-block of the shape.
Definition: sparse_shape.h:530

TiledArray::Tensor::clone
Tensor_ clone() const
Definition: tensor.h:288

TiledArray::math::GemmHelper::left_outer_begin
unsigned int left_outer_begin() const
Definition: gemm_helper.h:148

TiledArray::math::GemmHelper::right_outer_end
unsigned int right_outer_end() const
Definition: gemm_helper.h:154

TiledArray::SparseShape::validate
bool validate(const Range &range) const
Validate shape range.
Definition: sparse_shape.h:329

TiledArray::SparseShape::gemm
SparseShape_ gemm(const SparseShape_ &other, const Factor factor, const math::GemmHelper &gemm_helper) const
Definition: sparse_shape.h:1041

TiledArray::SparseShape::scale
SparseShape_ scale(const Factor factor, const Permutation &perm) const
Scale and permute shape.
Definition: sparse_shape.h:672

TiledArray::math::GemmHelper::left_inner_end
unsigned int left_inner_end() const
Definition: gemm_helper.h:147

TiledArray::Tensor::unary
Tensor_ unary(Op &&op) const
Use a unary, element wise operation to construct a new tensor.
Definition: tensor.h:614

tensor_interface.h

TiledArray::SparseShape::add
SparseShape_ add(const value_type value, const Permutation &perm) const
Definition: sparse_shape.h:891

TiledArray::math::GemmHelper::left_inner_begin
unsigned int left_inner_begin() const
Definition: gemm_helper.h:146