tiledarray/dox-master/tensor_2tensor_8h_source.html

/*

 *  This file is a part of TiledArray.

 *  Copyright (C) 2013  Virginia Tech

 *

 *  This program is free software: you can redistribute it and/or modify

 *  it under the terms of the GNU General Public License as published by

 *  the Free Software Foundation, either version 3 of the License, or

 *  (at your option) any later version.

 *

 *  This program is distributed in the hope that it will be useful,

 *  but WITHOUT ANY WARRANTY; without even the implied warranty of

 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 *  GNU General Public License for more details.

 *

 *  You should have received a copy of the GNU General Public License

 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.

 *

 */


#ifndef TILEDARRAY_TENSOR_TENSOR_H__INCLUDED

#define TILEDARRAY_TENSOR_TENSOR_H__INCLUDED


#include "TiledArray/math/blas.h"

#include "TiledArray/math/gemm_helper.h"

#include "TiledArray/tensor/complex.h"

#include "TiledArray/tensor/kernels.h"

#include "TiledArray/tile_interface/clone.h"

#include "TiledArray/tile_interface/permute.h"

#include "TiledArray/tile_interface/trace.h"

#include "TiledArray/util/logger.h"

namespace TiledArray {


// Forward declare Tensor for type traits

template <typename T, typename A>

class Tensor;


namespace detail {


template <typename T, typename A>

struct TraceIsDefined<Tensor<T, A>, enable_if_numeric_t<T>> : std::true_type {};


}  // namespace detail


template <typename T, typename A>

class Tensor {

  // meaningful error if T& is not assignable, see

  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48101

  static_assert(

      std::is_assignable<std::add_lvalue_reference_t<T>, T>::value,

      "Tensor<T>: T must be an assignable type (e.g. cannot be const)");


 public:

  typedef Tensor<T, A> Tensor_;

  typedef Range range_type;

  typedef typename range_type::index1_type index1_type;

  typedef typename range_type::ordinal_type ordinal_type;

  typedef typename range_type::ordinal_type

      size_type;

  typedef A allocator_type;

  typedef

      typename allocator_type::value_type value_type;

  typedef

      typename allocator_type::reference reference;

  typedef typename allocator_type::const_reference

      const_reference;

  typedef typename allocator_type::pointer pointer;

  typedef typename allocator_type::const_pointer

      const_pointer;

  typedef typename allocator_type::difference_type

      difference_type;

  typedef pointer iterator;

  typedef const_pointer const_iterator;

  typedef typename TiledArray::detail::numeric_type<T>::type

      numeric_type;

  typedef typename TiledArray::detail::scalar_type<T>::type

      scalar_type;


 private:

  template <typename X>

  using numeric_t = typename TiledArray::detail::numeric_type<X>::type;


  class Impl : public allocator_type {

   public:


    Impl() : allocator_type(), range_(), data_(NULL) {}


    explicit Impl(const range_type& range)

        : allocator_type(), range_(range), data_(NULL) {

      data_ = allocator_type::allocate(range.volume());

    }


    explicit Impl(range_type&& range)

        : allocator_type(), range_(range), data_(NULL) {

      data_ = allocator_type::allocate(range.volume());

    }


    ~Impl() {

      math::destroy_vector(range_.volume(), data_);

      allocator_type::deallocate(data_, range_.volume());

      data_ = NULL;

    }


    range_type range_;

    pointer data_;

  };                    // class Impl


  template <typename... Ts>

  struct is_tensor {

    static constexpr bool value = detail::is_tensor<Ts...>::value ||

                                  detail::is_tensor_of_tensor<Ts...>::value;

  };


  template <typename U,

            typename std::enable_if<detail::is_scalar_v<U>>::type* = nullptr>

  static void default_init(index1_type, U*) {}


  template <typename U,

            typename std::enable_if<!detail::is_scalar_v<U>>::type* = nullptr>

  static void default_init(index1_type n, U* u) {

    math::uninitialized_fill_vector(n, U(), u);

  }


  std::shared_ptr<Impl> pimpl_;

  static const range_type empty_range_;


 public:

  // Compiler generated functions

  Tensor() : pimpl_() {}

  Tensor(const Tensor_& other) : pimpl_(other.pimpl_) {}

  Tensor(Tensor_&& other) : pimpl_(std::move(other.pimpl_)) {}

  ~Tensor() {}

  Tensor_& operator=(const Tensor_& other) {

    pimpl_ = other.pimpl_;

    return *this;

  }

  Tensor_& operator=(Tensor_&& other) {

    pimpl_ = std::move(other.pimpl_);

    return *this;

  }


  explicit Tensor(const range_type& range)

      : pimpl_(std::make_shared<Impl>(range)) {

    default_init(range.volume(), pimpl_->data_);

  }


  template <

      typename Value,

      typename std::enable_if<std::is_same<Value, value_type>::value &&

                              detail::is_tensor<Value>::value>::type* = nullptr>

  Tensor(const range_type& range, const Value& value)

      : pimpl_(std::make_shared<Impl>(range)) {

    const auto n = pimpl_->range_.volume();

    pointer MADNESS_RESTRICT const data = pimpl_->data_;

    Clone<Value, Value> cloner;

    for (size_type i = 0ul; i < n; ++i)

      new (data + i) value_type(cloner(value));

  }


  template <typename Value, typename std::enable_if<

                                detail::is_numeric_v<Value>>::type* = nullptr>

  Tensor(const range_type& range, const Value& value)

      : pimpl_(std::make_shared<Impl>(range)) {

    detail::tensor_init([value]() -> Value { return value; }, *this);

  }


  template <typename InIter,

            typename std::enable_if<

                TiledArray::detail::is_input_iterator<InIter>::value &&

                !std::is_pointer<InIter>::value>::type* = nullptr>

  Tensor(const range_type& range, InIter it)

      : pimpl_(std::make_shared<Impl>(range)) {

    auto n = range.volume();

    pointer MADNESS_RESTRICT const data = pimpl_->data_;

    for (size_type i = 0ul; i < n; ++i, ++it) data[i] = *it;

  }


  template <typename U>

  Tensor(const Range& range, const U* u)

      : pimpl_(std::make_shared<Impl>(range)) {

    math::uninitialized_copy_vector(range.volume(), u, pimpl_->data_);

  }


  Tensor(const Range& range, std::initializer_list<T> il)

      : Tensor(range, il.begin()) {}


  template <

      typename T1,

      typename std::enable_if<

          is_tensor<T1>::value && !std::is_same<T1, Tensor_>::value &&

          !detail::has_conversion_operator_v<T1, Tensor_>>::type* = nullptr>

  explicit Tensor(const T1& other)

      : pimpl_(std::make_shared<Impl>(detail::clone_range(other))) {

    auto op = [](const numeric_t<T1> arg) -> numeric_t<T1> { return arg; };


    detail::tensor_init(op, *this, other);

  }


  template <

      typename T1, typename Perm,

      typename std::enable_if<is_tensor<T1>::value &&

                              detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor(const T1& other, const Perm& perm)

      : pimpl_(std::make_shared<Impl>(outer(perm) * other.range())) {

    auto op = [](const numeric_t<T1> arg) -> numeric_t<T1> { return arg; };


    detail::tensor_init(op, outer(perm), *this, other);


    // If we actually have a ToT the inner permutation was not applied above so

    // we do that now

    constexpr bool is_tot = detail::is_tensor_of_tensor_v<Tensor_>;

    constexpr bool is_bperm = detail::is_bipartite_permutation_v<Perm>;

    // tile ops pass bipartite permutations here even if this is a plain tensor

    // static_assert(is_tot || (!is_tot && !is_bperm), "Permutation type does

    // not match Tensor_");

    if constexpr (is_tot && is_bperm) {

      if (inner_size(perm) != 0) {

        auto inner_perm = inner(perm);

        Permute<value_type, value_type> p;

        for (auto& x : *this) x = p(x, inner_perm);

      }

    }

  }


  template <typename T1, typename Op,

            typename std::enable_if_t<

                is_tensor<T1>::value &&

                !detail::is_permutation_v<std::decay_t<Op>>>* = nullptr>

  Tensor(const T1& other, Op&& op)

      : pimpl_(std::make_shared<Impl>(detail::clone_range(other))) {

    detail::tensor_init(op, *this, other);

  }


  template <

      typename T1, typename Op, typename Perm,

      typename std::enable_if_t<is_tensor<T1>::value &&

                                detail::is_permutation_v<Perm>>* = nullptr>

  Tensor(const T1& other, Op&& op, const Perm& perm)

      : pimpl_(std::make_shared<Impl>(outer(perm) * other.range())) {

    detail::tensor_init(op, outer(perm), *this, other);

    // If we actually have a ToT the inner permutation was not applied above so

    // we do that now

    constexpr bool is_tot = detail::is_tensor_of_tensor_v<Tensor_>;

    constexpr bool is_bperm = detail::is_bipartite_permutation_v<Perm>;

    // tile ops pass bipartite permutations here even if this is a plain tensor

    // static_assert(is_tot || (!is_tot && !is_bperm), "Permutation type does

    // not match Tensor_");

    if constexpr (is_tot && is_bperm) {

      if (inner_size(perm) != 0) {

        auto inner_perm = inner(perm);

        Permute<value_type, value_type> p;

        for (auto& x : *this) x = p(x, inner_perm);

      }

    }

  }


  template <typename T1, typename T2, typename Op,

            typename std::enable_if<is_tensor<T1, T2>::value>::type* = nullptr>

  Tensor(const T1& left, const T2& right, Op&& op)

      : pimpl_(std::make_shared<Impl>(detail::clone_range(left))) {

    detail::tensor_init(op, *this, left, right);

  }


  template <

      typename T1, typename T2, typename Op, typename Perm,

      typename std::enable_if<is_tensor<T1, T2>::value &&

                              detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor(const T1& left, const T2& right, Op&& op, const Perm& perm)

      : pimpl_(std::make_shared<Impl>(outer(perm) * left.range())) {

    detail::tensor_init(op, outer(perm), *this, left, right);

    // If we actually have a ToT the inner permutation was not applied above so

    // we do that now

    constexpr bool is_tot = detail::is_tensor_of_tensor_v<Tensor_>;

    constexpr bool is_bperm = detail::is_bipartite_permutation_v<Perm>;

    // tile ops pass bipartite permutations here even if this is a plain tensor

    // static_assert(is_tot || (!is_tot && !is_bperm), "Permutation type does

    // not match Tensor_");

    if constexpr (is_tot && is_bperm) {

      if (inner_size(perm) != 0) {

        auto inner_perm = inner(perm);

        Permute<value_type, value_type> p;

        for (auto& x : *this) x = p(x, inner_perm);

      }

    }

  }


  Tensor_ clone() const {

    Tensor_ result;

    if (pimpl_) {

      result = detail::tensor_op<Tensor_>(

          [](const numeric_type value) -> numeric_type { return value; },

          *this);

    }

    return result;

  }


  template <typename T1,

            typename std::enable_if<is_tensor<T1>::value>::type* = nullptr>

  Tensor_& operator=(const T1& other) {

    pimpl_ = std::make_shared<Impl>(detail::clone_range(other));

    detail::inplace_tensor_op(

        [](reference MADNESS_RESTRICT tr,

           typename T1::const_reference MADNESS_RESTRICT t1) { tr = t1; },

        *this, other);


    return *this;

  }


  const range_type& range() const {

    return (pimpl_ ? pimpl_->range_ : empty_range_);

  }


  range_type& range() {

    TA_ASSERT(pimpl_);

    return pimpl_->range_;

  }


  ordinal_type size() const { return (pimpl_ ? pimpl_->range_.volume() : 0ul); }


  template <typename Ordinal,

            std::enable_if_t<std::is_integral<Ordinal>::value>* = nullptr>

  const_reference operator[](const Ordinal ord) const {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(ord));

    return pimpl_->data_[ord];

  }


  template <typename Ordinal,

            std::enable_if_t<std::is_integral<Ordinal>::value>* = nullptr>

  reference operator[](const Ordinal ord) {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(ord));

    return pimpl_->data_[ord];

  }


  template <typename Index,

            std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>

  const_reference operator[](const Index& i) const {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i));

    return pimpl_->data_[pimpl_->range_.ordinal(i)];

  }


  template <typename Index,

            std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>

  reference operator[](const Index& i) {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i));

    return pimpl_->data_[pimpl_->range_.ordinal(i)];

  }


  template <typename Integer,

            std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>

  const_reference operator[](const std::initializer_list<Integer>& i) const {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i));

    return pimpl_->data_[pimpl_->range_.ordinal(i)];

  }


  template <typename Integer,

            std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>

  reference operator[](const std::initializer_list<Integer>& i) {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i));

    return pimpl_->data_[pimpl_->range_.ordinal(i)];

  }


  template <typename Index,

            std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>

  const_reference operator()(const Index& i) const {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i));

    return pimpl_->data_[pimpl_->range_.ordinal(i)];

  }


  template <typename Index,

            std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>

  reference operator()(const Index& i) {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i));

    return pimpl_->data_[pimpl_->range_.ordinal(i)];

  }


  template <typename Integer,

            std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>

  const_reference operator()(const std::initializer_list<Integer>& i) const {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i));

    return pimpl_->data_[pimpl_->range_.ordinal(i)];

  }


  template <typename Integer,

            std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>

  reference operator()(const std::initializer_list<Integer>& i) {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i));

    return pimpl_->data_[pimpl_->range_.ordinal(i)];

  }


  template <

      typename... Index,

      std::enable_if_t<detail::is_integral_list<Index...>::value>* = nullptr>

  const_reference operator()(const Index&... i) const {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i...));

    return pimpl_->data_[pimpl_->range_.ordinal(i...)];

  }


  template <

      typename... Index,

      std::enable_if_t<detail::is_integral_list<Index...>::value>* = nullptr>

  reference operator()(const Index&... i) {

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.includes(i...));

    return pimpl_->data_[pimpl_->range_.ordinal(i...)];

  }


  const_iterator begin() const { return (pimpl_ ? pimpl_->data_ : NULL); }


  iterator begin() { return (pimpl_ ? pimpl_->data_ : NULL); }


  const_iterator end() const {

    return (pimpl_ ? pimpl_->data_ + pimpl_->range_.volume() : NULL);

  }


  iterator end() {

    return (pimpl_ ? pimpl_->data_ + pimpl_->range_.volume() : NULL);

  }


  const_pointer data() const { return (pimpl_ ? pimpl_->data_ : NULL); }


  pointer data() { return (pimpl_ ? pimpl_->data_ : NULL); }


  bool empty() const { return !pimpl_; }


  template <typename Archive,

            typename std::enable_if<madness::archive::is_output_archive<

                Archive>::value>::type* = nullptr>

  void serialize(Archive& ar) {

    if (pimpl_) {

      ar & pimpl_->range_.volume();

      ar& madness::archive::wrap(pimpl_->data_, pimpl_->range_.volume());

      ar & pimpl_->range_;

    } else {

      ar& ordinal_type(0ul);

    }

  }


  template <typename Archive,

            typename std::enable_if<madness::archive::is_input_archive<

                Archive>::value>::type* = nullptr>

  void serialize(Archive& ar) {

    ordinal_type n = 0ul;

    ar& n;

    if (n) {

      std::shared_ptr<Impl> temp = std::make_shared<Impl>();

      temp->data_ = temp->allocate(n);

      try {

        // need to construct elements of data_ using placement new in case its

        // default ctor is not trivial N.B. for fundamental types and standard

        // alloc this incurs no overhead (Eigen::aligned_alloc OK also)

        auto* data_ptr = temp->data_;

        for (ordinal_type i = 0; i != n; ++i, ++data_ptr)

          new (static_cast<void*>(data_ptr)) value_type;


        ar& madness::archive::wrap(temp->data_, n);

        ar & temp->range_;

      } catch (...) {

        temp->deallocate(temp->data_, n);

        throw;

      }


      pimpl_ = temp;

    } else {

      pimpl_.reset();

    }

  }


  void swap(Tensor_& other) { std::swap(pimpl_, other.pimpl_); }


  // clang-format off


  // clang-format on

  template <typename Index1, typename Index2,

            typename = std::enable_if_t<detail::is_integral_range_v<Index1> &&

                                        detail::is_integral_range_v<Index2>>>

  detail::TensorInterface<T, BlockRange> block(const Index1& lower_bound,

                                               const Index2& upper_bound) {

    TA_ASSERT(pimpl_);

    return detail::TensorInterface<T, BlockRange>(

        BlockRange(pimpl_->range_, lower_bound, upper_bound), pimpl_->data_);

  }


  template <typename Index1, typename Index2,

            typename = std::enable_if_t<detail::is_integral_range_v<Index1> &&

                                        detail::is_integral_range_v<Index2>>>

  detail::TensorInterface<const T, BlockRange> block(

      const Index1& lower_bound, const Index2& upper_bound) const {

    TA_ASSERT(pimpl_);

    return detail::TensorInterface<const T, BlockRange>(

        BlockRange(pimpl_->range_, lower_bound, upper_bound), pimpl_->data_);

  }


  // clang-format off


  // clang-format on

  template <typename Index1, typename Index2,

            typename = std::enable_if_t<std::is_integral_v<Index1> &&

                                        std::is_integral_v<Index2>>>

  detail::TensorInterface<T, BlockRange> block(

      const std::initializer_list<Index1>& lower_bound,

      const std::initializer_list<Index2>& upper_bound) {

    TA_ASSERT(pimpl_);

    return detail::TensorInterface<T, BlockRange>(

        BlockRange(pimpl_->range_, lower_bound, upper_bound), pimpl_->data_);

  }


  template <typename Index1, typename Index2,

            typename = std::enable_if_t<std::is_integral_v<Index1> &&

                                        std::is_integral_v<Index2>>>

  detail::TensorInterface<const T, BlockRange> block(

      const std::initializer_list<Index1>& lower_bound,

      const std::initializer_list<Index2>& upper_bound) const {

    TA_ASSERT(pimpl_);

    return detail::TensorInterface<const T, BlockRange>(

        BlockRange(pimpl_->range_, lower_bound, upper_bound), pimpl_->data_);

  }


  // clang-format off


  // clang-format on

  template <typename PairRange,

            typename = std::enable_if_t<detail::is_gpair_range_v<PairRange>>>

  detail::TensorInterface<const T, BlockRange> block(

      const PairRange& bounds) const {

    return detail::TensorInterface<const T, BlockRange>(

        BlockRange(pimpl_->range_, bounds), pimpl_->data_);

  }


  template <typename PairRange,

            typename = std::enable_if_t<detail::is_gpair_range_v<PairRange>>>

  detail::TensorInterface<T, BlockRange> block(const PairRange& bounds) {

    return detail::TensorInterface<T, BlockRange>(

        BlockRange(pimpl_->range_, bounds), pimpl_->data_);

  }


  // clang-format off


  // clang-format on

  template <typename Index,

            typename = std::enable_if_t<std::is_integral_v<Index>>>

  detail::TensorInterface<const T, BlockRange> block(

      const std::initializer_list<std::initializer_list<Index>>& bounds) const {

    return detail::TensorInterface<const T, BlockRange>(

        BlockRange(pimpl_->range_, bounds), pimpl_->data_);

  }


  template <typename Index,

            typename = std::enable_if_t<std::is_integral_v<Index>>>

  detail::TensorInterface<T, BlockRange> block(

      const std::initializer_list<std::initializer_list<Index>>& bounds) {

    return detail::TensorInterface<T, BlockRange>(

        BlockRange(pimpl_->range_, bounds), pimpl_->data_);

  }


  template <typename Perm,

            typename = std::enable_if_t<detail::is_permutation_v<Perm>>>

  Tensor_ permute(const Perm& perm) const {

    constexpr bool is_tot = detail::is_tensor_of_tensor_v<Tensor_>;

    [[maybe_unused]] constexpr bool is_bperm =

        detail::is_bipartite_permutation_v<Perm>;

    // tile ops pass bipartite permutations here even if this is a plain tensor

    // static_assert(is_tot || (!is_tot && !is_bperm), "Permutation type does

    // not match Tensor_");

    if constexpr (!is_tot) {

      if constexpr (is_bperm) {

        TA_ASSERT(inner_size(perm) == 0);  // ensure this is a plain permutation

        return Tensor_(*this, outer(perm));

      } else

        return Tensor_(*this, perm);

    } else {

      // If we have a ToT we need to apply the permutation in two steps. The

      // first step is identical to the non-ToT case (permute the outer modes)

      // the second step does the inner modes

      Tensor_ rv(*this, outer(perm));

      if constexpr (is_bperm) {

        if (inner_size(perm) != 0) {

          auto inner_perm = inner(perm);

          Permute<value_type, value_type> p;

          for (auto& inner_t : rv) inner_t = p(inner_t, inner_perm);

        }

      }

      return rv;

    }

    abort();  // unreachable

  }


  template <typename Index,

            std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>

  Tensor_& shift_to(const Index& bound_shift) {

    TA_ASSERT(pimpl_);

    pimpl_->range_.inplace_shift(bound_shift);

    return *this;

  }


  template <typename Integer,

            std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>

  Tensor_& shift_to(const std::initializer_list<Integer>& bound_shift) {

    TA_ASSERT(pimpl_);

    pimpl_->range_.template inplace_shift<std::initializer_list<Integer>>(

        bound_shift);

    return *this;

  }


  template <typename Index,

            std::enable_if_t<detail::is_integral_range_v<Index>>* = nullptr>

  Tensor_ shift(const Index& bound_shift) const {

    TA_ASSERT(pimpl_);

    Tensor_ result = clone();

    result.shift_to(bound_shift);

    return result;

  }


  template <typename Integer,

            std::enable_if_t<std::is_integral_v<Integer>>* = nullptr>

  Tensor_ shift(const std::initializer_list<Integer>& bound_shift) const {

    TA_ASSERT(pimpl_);

    Tensor_ result = clone();

    result.template shift_to<std::initializer_list<Integer>>(bound_shift);

    return result;

  }


  // Generic vector operations


  template <typename Right, typename Op,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  Tensor_ binary(const Right& right, Op&& op) const {

    return Tensor_(*this, right, op);

  }


  template <

      typename Right, typename Op, typename Perm,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor_ binary(const Right& right, Op&& op, const Perm& perm) const {

    constexpr bool is_tot = detail::is_tensor_of_tensor_v<Tensor_>;

    [[maybe_unused]] constexpr bool is_bperm =

        detail::is_bipartite_permutation_v<Perm>;

    // tile ops pass bipartite permutations here even if this is a plain tensor

    // static_assert(is_tot || (!is_tot && !is_bperm), "Permutation type does

    // not match Tensor_");

    if constexpr (!is_tot) {

      if constexpr (is_bperm) {

        TA_ASSERT(inner_size(perm) == 0);  // ensure this is a plain permutation

        return Tensor_(*this, right, op, outer(perm));

      } else

        return Tensor_(*this, right, op, perm);

    } else {

      // AFAIK the other branch fundamentally relies on raw pointer arithmetic,

      // which won't work for ToTs.

      auto temp = binary(right, std::forward<Op>(op));

      Permute<Tensor_, Tensor_> p;

      return p(temp, perm);

    }

    abort();  // unreachable

  }


  template <typename Right, typename Op,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  Tensor_& inplace_binary(const Right& right, Op&& op) {

    detail::inplace_tensor_op(op, *this, right);

    return *this;

  }


  template <typename Op>

  Tensor_ unary(Op&& op) const {

    return Tensor_(*this, op);

  }


  template <typename Op, typename Perm,

            typename = std::enable_if_t<detail::is_permutation_v<Perm>>>

  Tensor_ unary(Op&& op, const Perm& perm) const {

    constexpr bool is_tot = detail::is_tensor_of_tensor_v<Tensor_>;

    [[maybe_unused]] constexpr bool is_bperm =

        detail::is_bipartite_permutation_v<Perm>;

    // tile ops pass bipartite permutations here even if this is a plain tensor

    // static_assert(is_tot || (!is_tot && !is_bperm), "Permutation type does

    // not match Tensor_");

    if constexpr (!is_tot) {

      if constexpr (is_bperm) {

        TA_ASSERT(inner_size(perm) == 0);  // ensure this is a plain permutation

        return Tensor_(*this, op, outer(perm));

      } else

        return Tensor_(*this, op, perm);

    } else {

      auto temp = unary(std::forward<Op>(op));

      Permute<Tensor_, Tensor_> p;

      return p(temp, perm);

    }

    abort();  // unreachable

  }


  template <typename Op>

  Tensor_& inplace_unary(Op&& op) {

    detail::inplace_tensor_op(op, *this);

    return *this;

  }


  // Scale operation


  template <typename Scalar, typename std::enable_if<

                                 detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_ scale(const Scalar factor) const {

    return unary(

        [factor](const numeric_type a) -> numeric_type { return a * factor; });

  }


  template <typename Scalar, typename Perm,

            typename = std::enable_if_t<detail::is_numeric_v<Scalar> &&

                                        detail::is_permutation_v<Perm>>>

  Tensor_ scale(const Scalar factor, const Perm& perm) const {

    return unary(

        [factor](const numeric_type a) -> numeric_type { return a * factor; },

        perm);

  }


  template <typename Scalar, typename std::enable_if<

                                 detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_& scale_to(const Scalar factor) {

    return inplace_unary(

        [factor](numeric_type& MADNESS_RESTRICT res) { res *= factor; });

  }


  // Addition operations


  template <typename Right,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  Tensor_ add(const Right& right) const {

    return binary(

        right,

        [](const numeric_type l, const numeric_t<Right> r) -> numeric_type {

          return l + r;

        });

  }


  template <

      typename Right, typename Perm,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor_ add(const Right& right, const Perm& perm) const {

    return binary(

        right,

        [](const numeric_type l, const numeric_t<Right> r) -> numeric_type {

          return l + r;

        },

        perm);

  }


  template <

      typename Right, typename Scalar,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_ add(const Right& right, const Scalar factor) const {

    return binary(right,

                  [factor](const numeric_type l, const numeric_t<Right> r)

                      -> numeric_type { return (l + r) * factor; });

  }


  template <typename Right, typename Scalar, typename Perm,

            typename std::enable_if<

                is_tensor<Right>::value && detail::is_numeric_v<Scalar> &&

                detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor_ add(const Right& right, const Scalar factor, const Perm& perm) const {

    return binary(

        right,

        [factor](const numeric_type l, const numeric_t<Right> r)

            -> numeric_type { return (l + r) * factor; },

        perm);

  }


  Tensor_ add(const numeric_type value) const {

    return unary(

        [value](const numeric_type a) -> numeric_type { return a + value; });

  }


  template <typename Perm,

            typename = std::enable_if_t<detail::is_permutation_v<Perm>>>

  Tensor_ add(const numeric_type value, const Perm& perm) const {

    return unary(

        [value](const numeric_type a) -> numeric_type { return a + value; },

        perm);

  }


  template <typename Right,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  Tensor_& add_to(const Right& right) {

    return inplace_binary(right, [](numeric_type& MADNESS_RESTRICT l,

                                    const numeric_t<Right> r) { l += r; });

  }


  template <

      typename Right, typename Scalar,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_& add_to(const Right& right, const Scalar factor) {

    return inplace_binary(

        right, [factor](numeric_type& MADNESS_RESTRICT l,

                        const numeric_t<Right> r) { (l += r) *= factor; });

  }


  Tensor_& add_to(const numeric_type value) {

    return inplace_unary(

        [value](numeric_type& MADNESS_RESTRICT res) { res += value; });

  }


  // Subtraction operations


  template <typename Right,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  Tensor_ subt(const Right& right) const {

    return binary(

        right,

        [](const numeric_type l, const numeric_t<Right> r) -> numeric_type {

          return l - r;

        });

  }


  template <

      typename Right, typename Perm,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor_ subt(const Right& right, const Perm& perm) const {

    return binary(

        right,

        [](const numeric_type l, const numeric_t<Right> r) -> numeric_type {

          return l - r;

        },

        perm);

  }


  template <

      typename Right, typename Scalar,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_ subt(const Right& right, const Scalar factor) const {

    return binary(right,

                  [factor](const numeric_type l, const numeric_t<Right> r)

                      -> numeric_type { return (l - r) * factor; });

  }


  template <typename Right, typename Scalar, typename Perm,

            typename std::enable_if<

                is_tensor<Right>::value && detail::is_numeric_v<Scalar> &&

                detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor_ subt(const Right& right, const Scalar factor,

               const Perm& perm) const {

    return binary(

        right,

        [factor](const numeric_type l, const numeric_t<Right> r)

            -> numeric_type { return (l - r) * factor; },

        perm);

  }


  Tensor_ subt(const numeric_type value) const { return add(-value); }


  template <typename Perm,

            typename = std::enable_if_t<detail::is_permutation_v<Perm>>>

  Tensor_ subt(const numeric_type value, const Perm& perm) const {

    return add(-value, perm);

  }


  template <typename Right,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  Tensor_& subt_to(const Right& right) {

    return inplace_binary(right, [](numeric_type& MADNESS_RESTRICT l,

                                    const numeric_t<Right> r) { l -= r; });

  }


  template <

      typename Right, typename Scalar,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_& subt_to(const Right& right, const Scalar factor) {

    return inplace_binary(

        right, [factor](numeric_type& MADNESS_RESTRICT l,

                        const numeric_t<Right> r) { (l -= r) *= factor; });

  }


  Tensor_& subt_to(const numeric_type value) { return add_to(-value); }


  // Multiplication operations


  template <typename Right,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  Tensor_ mult(const Right& right) const {

    return binary(

        right,

        [](const numeric_type l, const numeric_t<Right> r) -> numeric_type {

          return l * r;

        });

  }


  template <

      typename Right, typename Perm,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor_ mult(const Right& right, const Perm& perm) const {

    return binary(

        right,

        [](const numeric_type l, const numeric_t<Right> r) -> numeric_type {

          return l * r;

        },

        perm);

  }


  template <

      typename Right, typename Scalar,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_ mult(const Right& right, const Scalar factor) const {

    return binary(right,

                  [factor](const numeric_type l, const numeric_t<Right> r)

                      -> numeric_type { return (l * r) * factor; });

  }


  template <typename Right, typename Scalar, typename Perm,

            typename std::enable_if<

                is_tensor<Right>::value && detail::is_numeric_v<Scalar> &&

                detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor_ mult(const Right& right, const Scalar factor,

               const Perm& perm) const {

    return binary(

        right,

        [factor](const numeric_type l, const numeric_t<Right> r)

            -> numeric_type { return (l * r) * factor; },

        perm);

  }


  template <typename Right,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  Tensor_& mult_to(const Right& right) {

    return inplace_binary(right, [](numeric_type& MADNESS_RESTRICT l,

                                    const numeric_t<Right> r) { l *= r; });

  }


  template <

      typename Right, typename Scalar,

      typename std::enable_if<is_tensor<Right>::value &&

                              detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_& mult_to(const Right& right, const Scalar factor) {

    return inplace_binary(

        right, [factor](numeric_type& MADNESS_RESTRICT l,

                        const numeric_t<Right> r) { (l *= r) *= factor; });

  }


  // Negation operations


  Tensor_ neg() const {

    return unary([](const numeric_type r) -> numeric_type { return -r; });

  }


  template <typename Perm,

            typename = std::enable_if_t<detail::is_permutation_v<Perm>>>

  Tensor_ neg(const Perm& perm) const {

    return unary([](const numeric_type l) -> numeric_type { return -l; }, perm);

  }


  Tensor_& neg_to() {

    return inplace_unary([](numeric_type& MADNESS_RESTRICT l) { l = -l; });

  }


  Tensor_ conj() const {

    TA_ASSERT(pimpl_);

    return scale(detail::conj_op());

  }


  template <typename Scalar, typename std::enable_if<

                                 detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_ conj(const Scalar factor) const {

    TA_ASSERT(pimpl_);

    return scale(detail::conj_op(factor));

  }


  template <typename Perm,

            typename = std::enable_if_t<detail::is_permutation_v<Perm>>>

  Tensor_ conj(const Perm& perm) const {

    TA_ASSERT(pimpl_);

    return scale(detail::conj_op(), perm);

  }


  template <

      typename Scalar, typename Perm,

      typename std::enable_if<detail::is_numeric_v<Scalar> &&

                              detail::is_permutation_v<Perm>>::type* = nullptr>

  Tensor_ conj(const Scalar factor, const Perm& perm) const {

    TA_ASSERT(pimpl_);

    return scale(detail::conj_op(factor), perm);

  }


  Tensor_& conj_to() {

    TA_ASSERT(pimpl_);

    return scale_to(detail::conj_op());

  }


  template <typename Scalar, typename std::enable_if<

                                 detail::is_numeric_v<Scalar>>::type* = nullptr>

  Tensor_& conj_to(const Scalar factor) {

    TA_ASSERT(pimpl_);

    return scale_to(detail::conj_op(factor));

  }


  // GEMM operations


  template <typename U, typename AU, typename V>

  Tensor_ gemm(const Tensor<U, AU>& other, const V factor,

               const math::GemmHelper& gemm_helper) const {

    static_assert(!detail::is_tensor_of_tensor_v<Tensor_, Tensor<U, AU>>,

                  "TA::Tensor<T>::gemm without custom element op is only "

                  "applicable to plain tensors");

    // Check that this tensor is not empty and has the correct rank

    TA_ASSERT(pimpl_);

    TA_ASSERT(pimpl_->range_.rank() == gemm_helper.left_rank());


    // Check that the arguments are not empty and have the correct ranks

    TA_ASSERT(!other.empty());

    TA_ASSERT(other.range().rank() == gemm_helper.right_rank());


    // Construct the result Tensor

    Tensor_ result(gemm_helper.make_result_range<range_type>(pimpl_->range_,

                                                             other.range()));


    // Check that the inner dimensions of left and right match

    TA_ASSERT(ignore_tile_position() ||

              gemm_helper.left_right_congruent(pimpl_->range_.lobound_data(),

                                               other.range().lobound_data()));

    TA_ASSERT(ignore_tile_position() ||

              gemm_helper.left_right_congruent(pimpl_->range_.upbound_data(),

                                               other.range().upbound_data()));

    TA_ASSERT(gemm_helper.left_right_congruent(pimpl_->range_.extent_data(),

                                               other.range().extent_data()));


    // Compute gemm dimensions

    using integer = TiledArray::math::blas::integer;

    integer m = 1, n = 1, k = 1;

    gemm_helper.compute_matrix_sizes(m, n, k, pimpl_->range_, other.range());


    // Get the leading dimension for left and right matrices.

    const integer lda =

        (gemm_helper.left_op() == TiledArray::math::blas::NoTranspose ? k : m);

    const integer ldb =

        (gemm_helper.right_op() == TiledArray::math::blas::NoTranspose ? n : k);


    math::blas::gemm(gemm_helper.left_op(), gemm_helper.right_op(), m, n, k,

                     factor, pimpl_->data_, lda, other.data(), ldb,

                     numeric_type(0), result.data(), n);


#ifdef TA_ENABLE_TILE_OPS_LOGGING

    if (TiledArray::TileOpsLogger<T>::get_instance_ptr() != nullptr &&

        TiledArray::TileOpsLogger<T>::get_instance().gemm) {

      auto& logger = TiledArray::TileOpsLogger<T>::get_instance();

      auto apply = [](auto& fnptr, const Range& arg) {

        return fnptr ? fnptr(arg) : arg;

      };

      auto tformed_left_range =

          apply(logger.gemm_left_range_transform, pimpl_->range_);

      auto tformed_right_range =

          apply(logger.gemm_right_range_transform, other.range());

      auto tformed_result_range =

          apply(logger.gemm_result_range_transform, result.range());

      if ((!logger.gemm_result_range_filter ||

           logger.gemm_result_range_filter(tformed_result_range)) &&

          (!logger.gemm_left_range_filter ||

           logger.gemm_left_range_filter(tformed_left_range)) &&

          (!logger.gemm_right_range_filter ||

           logger.gemm_right_range_filter(tformed_right_range))) {

        logger << "TA::Tensor::gemm=: left=" << tformed_left_range

               << " right=" << tformed_right_range

               << " result=" << tformed_result_range << std::endl;

        if (TiledArray::TileOpsLogger<T>::get_instance()

                .gemm_print_contributions) {

          if (!TiledArray::TileOpsLogger<T>::get_instance().gemm_printer) {

            // must use custom printer if result's range transformed

            if (!logger.gemm_result_range_transform)

              logger << result << std::endl;

            else

              logger << make_map(result.data(), tformed_result_range)

                     << std::endl;

          } else {

            TiledArray::TileOpsLogger<T>::get_instance().gemm_printer(

                *logger.log, tformed_left_range, this->data(),

                tformed_right_range, other.data(), tformed_right_range,

                result.data());

          }

        }

      }

    }

#endif  // TA_ENABLE_TILE_OPS_LOGGING


    return result;

  }


  template <typename U, typename AU, typename V, typename AV, typename W>

  Tensor_& gemm(const Tensor<U, AU>& left, const Tensor<V, AV>& right,

                const W factor, const math::GemmHelper& gemm_helper) {

    static_assert(

        !detail::is_tensor_of_tensor_v<Tensor_, Tensor<U, AU>, Tensor<V, AV>>,

        "TA::Tensor<T>::gemm without custom element op is only applicable to "

        "plain tensors");

    if (this->empty()) {

      *this = left.gemm(right, factor, gemm_helper);

    } else {

      // Check that this tensor is not empty and has the correct rank

      TA_ASSERT(pimpl_);

      TA_ASSERT(pimpl_->range_.rank() == gemm_helper.result_rank());


      // Check that the arguments are not empty and have the correct ranks

      TA_ASSERT(!left.empty());

      TA_ASSERT(left.range().rank() == gemm_helper.left_rank());

      TA_ASSERT(!right.empty());

      TA_ASSERT(right.range().rank() == gemm_helper.right_rank());


      // Check that the outer dimensions of left match the corresponding

      // dimensions in result

      TA_ASSERT(ignore_tile_position() || gemm_helper.left_result_congruent(

                                              left.range().lobound_data(),

                                              pimpl_->range_.lobound_data()));

      TA_ASSERT(ignore_tile_position() || gemm_helper.left_result_congruent(

                                              left.range().upbound_data(),

                                              pimpl_->range_.upbound_data()));

      TA_ASSERT(gemm_helper.left_result_congruent(

          left.range().extent_data(), pimpl_->range_.extent_data()));


      // Check that the outer dimensions of right match the corresponding

      // dimensions in result

      TA_ASSERT(ignore_tile_position() || gemm_helper.right_result_congruent(

                                              right.range().lobound_data(),

                                              pimpl_->range_.lobound_data()));

      TA_ASSERT(ignore_tile_position() || gemm_helper.right_result_congruent(

                                              right.range().upbound_data(),

                                              pimpl_->range_.upbound_data()));

      TA_ASSERT(gemm_helper.right_result_congruent(

          right.range().extent_data(), pimpl_->range_.extent_data()));


      // Check that the inner dimensions of left and right match

      TA_ASSERT(ignore_tile_position() ||

                gemm_helper.left_right_congruent(left.range().lobound_data(),

                                                 right.range().lobound_data()));

      TA_ASSERT(ignore_tile_position() ||

                gemm_helper.left_right_congruent(left.range().upbound_data(),

                                                 right.range().upbound_data()));

      TA_ASSERT(gemm_helper.left_right_congruent(left.range().extent_data(),

                                                 right.range().extent_data()));


      // Compute gemm dimensions

      using integer = TiledArray::math::blas::integer;

      integer m, n, k;

      gemm_helper.compute_matrix_sizes(m, n, k, left.range(), right.range());


      // Get the leading dimension for left and right matrices.

      const integer lda =

          (gemm_helper.left_op() == TiledArray::math::blas::NoTranspose ? k

                                                                        : m);

      const integer ldb =

          (gemm_helper.right_op() == TiledArray::math::blas::NoTranspose ? n

                                                                         : k);


      // may need to split gemm into multiply + accumulate for tracing purposes

#ifdef TA_ENABLE_TILE_OPS_LOGGING

      {

        const bool twostep =

            TiledArray::TileOpsLogger<T>::get_instance().gemm &&

            TiledArray::TileOpsLogger<T>::get_instance()

                .gemm_print_contributions;

        std::unique_ptr<T[]> data_copy;

        size_t tile_volume;

        if (twostep) {

          tile_volume = range().volume();

          data_copy = std::make_unique<T[]>(tile_volume);

          std::copy(pimpl_->data_, pimpl_->data_ + tile_volume,

                    data_copy.get());

        }

        non_distributed::gemm(gemm_helper.left_op(), gemm_helper.right_op(), m,

                              n, k, factor, left.data(), lda, right.data(), ldb,

                              twostep ? numeric_type(0) : numeric_type(1),

                              pimpl_->data_, n);


        if (TiledArray::TileOpsLogger<T>::get_instance_ptr() != nullptr &&

            TiledArray::TileOpsLogger<T>::get_instance().gemm) {

          auto& logger = TiledArray::TileOpsLogger<T>::get_instance();

          auto apply = [](auto& fnptr, const Range& arg) {

            return fnptr ? fnptr(arg) : arg;

          };

          auto tformed_left_range =

              apply(logger.gemm_left_range_transform, left.range());

          auto tformed_right_range =

              apply(logger.gemm_right_range_transform, right.range());

          auto tformed_result_range =

              apply(logger.gemm_result_range_transform, pimpl_->range_);

          if ((!logger.gemm_result_range_filter ||

               logger.gemm_result_range_filter(tformed_result_range)) &&

              (!logger.gemm_left_range_filter ||

               logger.gemm_left_range_filter(tformed_left_range)) &&

              (!logger.gemm_right_range_filter ||

               logger.gemm_right_range_filter(tformed_right_range))) {

            logger << "TA::Tensor::gemm+: left=" << tformed_left_range

                   << " right=" << tformed_right_range

                   << " result=" << tformed_result_range << std::endl;

            if (TiledArray::TileOpsLogger<T>::get_instance()

                    .gemm_print_contributions) {

              if (!TiledArray::TileOpsLogger<T>::get_instance()

                       .gemm_printer) {  // default printer

                // must use custom printer if result's range transformed

                if (!logger.gemm_result_range_transform)

                  logger << *this << std::endl;

                else

                  logger << make_map(pimpl_->data_, tformed_result_range)

                         << std::endl;

              } else {

                TiledArray::TileOpsLogger<T>::get_instance().gemm_printer(

                    *logger.log, tformed_left_range, left.data(),

                    tformed_right_range, right.data(), tformed_right_range,

                    pimpl_->data_);

              }

            }

          }

        }


        if (twostep) {

          for (size_t v = 0; v != tile_volume; ++v) {

            pimpl_->data_[v] += data_copy[v];

          }

        }

      }

#else  // TA_ENABLE_TILE_OPS_LOGGING

      math::blas::gemm(gemm_helper.left_op(), gemm_helper.right_op(), m, n, k,

                       factor, left.data(), lda, right.data(), ldb,

                       numeric_type(1), pimpl_->data_, n);

#endif  // TA_ENABLE_TILE_OPS_LOGGING

    }


    return *this;

  }


  template <typename U, typename AU, typename V, typename AV,

            typename ElementMultiplyAddOp,

            typename = std::enable_if_t<std::is_invocable_r_v<

                void, std::remove_reference_t<ElementMultiplyAddOp>,

                value_type&, const U&, const V&>>>

  Tensor_& gemm(const Tensor<U, AU>& left, const Tensor<V, AV>& right,

                const math::GemmHelper& gemm_helper,

                ElementMultiplyAddOp&& elem_muladd_op) {

    // Check that the arguments are not empty and have the correct ranks

    TA_ASSERT(!left.empty());

    TA_ASSERT(left.range().rank() == gemm_helper.left_rank());

    TA_ASSERT(!right.empty());

    TA_ASSERT(right.range().rank() == gemm_helper.right_rank());


    // Check that the inner dimensions of left and right match

    TA_ASSERT(ignore_tile_position() ||

              gemm_helper.left_right_congruent(left.range().lobound_data(),

                                               right.range().lobound_data()));

    TA_ASSERT(ignore_tile_position() ||

              gemm_helper.left_right_congruent(left.range().upbound_data(),

                                               right.range().upbound_data()));

    TA_ASSERT(gemm_helper.left_right_congruent(left.range().extent_data(),

                                               right.range().extent_data()));


    if (this->empty()) {  // initialize, if empty

      *this = Tensor_(gemm_helper.make_result_range<range_type>(left.range(),

                                                                right.range()));

    } else {

      // Check that the outer dimensions of left match the corresponding

      // dimensions in result

      TA_ASSERT(ignore_tile_position() || gemm_helper.left_result_congruent(

                                              left.range().lobound_data(),

                                              pimpl_->range_.lobound_data()));

      TA_ASSERT(ignore_tile_position() || gemm_helper.left_result_congruent(

                                              left.range().upbound_data(),

                                              pimpl_->range_.upbound_data()));

      TA_ASSERT(gemm_helper.left_result_congruent(

          left.range().extent_data(), pimpl_->range_.extent_data()));


      // Check that the outer dimensions of right match the corresponding

      // dimensions in result

      TA_ASSERT(ignore_tile_position() || gemm_helper.right_result_congruent(

                                              right.range().lobound_data(),

                                              pimpl_->range_.lobound_data()));

      TA_ASSERT(ignore_tile_position() || gemm_helper.right_result_congruent(

                                              right.range().upbound_data(),

                                              pimpl_->range_.upbound_data()));

      TA_ASSERT(gemm_helper.right_result_congruent(

          right.range().extent_data(), pimpl_->range_.extent_data()));

    }


    // Compute gemm dimensions

    using integer = TiledArray::math::blas::integer;

    integer M, N, K;

    gemm_helper.compute_matrix_sizes(M, N, K, left.range(), right.range());


    // Get the leading dimension for left and right matrices.

    const integer lda =

        (gemm_helper.left_op() == TiledArray::math::blas::NoTranspose ? K : M);

    const integer ldb =

        (gemm_helper.right_op() == TiledArray::math::blas::NoTranspose ? N : K);


    for (integer m = 0; m != M; ++m) {

      for (integer n = 0; n != N; ++n) {

        auto c_offset = m * N + n;

        for (integer k = 0; k != K; ++k) {

          auto a_offset =

              gemm_helper.left_op() == TiledArray::math::blas::NoTranspose

                  ? m * lda + k

                  : k * lda + m;

          auto b_offset =

              gemm_helper.right_op() == TiledArray::math::blas::NoTranspose

                  ? k * ldb + n

                  : n * ldb + k;

          elem_muladd_op(*(pimpl_->data_ + c_offset), *(left.data() + a_offset),

                         *(right.data() + b_offset));

        }

      }

    }


    return *this;

  }


  // Reduction operations


  template <typename TileType = Tensor_,

            typename = detail::enable_if_trace_is_defined_t<TileType>>

  decltype(auto) trace() const {

    return TiledArray::trace(*this);

  }


  template <typename ReduceOp, typename JoinOp, typename Scalar>

  decltype(auto) reduce(ReduceOp&& reduce_op, JoinOp&& join_op,

                        Scalar identity) const {

    return detail::tensor_reduce(reduce_op, join_op, identity, *this);

  }


  template <typename Right, typename ReduceOp, typename JoinOp, typename Scalar,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  decltype(auto) reduce(const Right& other, ReduceOp&& reduce_op,

                        JoinOp&& join_op, Scalar identity) const {

    return detail::tensor_reduce(reduce_op, join_op, identity, *this, other);

  }


  numeric_type sum() const {

    auto sum_op = [](numeric_type& MADNESS_RESTRICT res,

                     const numeric_type arg) { res += arg; };

    return reduce(sum_op, sum_op, numeric_type(0));

  }


  numeric_type product() const {

    auto mult_op = [](numeric_type& MADNESS_RESTRICT res,

                      const numeric_type arg) { res *= arg; };

    return reduce(mult_op, mult_op, numeric_type(1));

  }


  scalar_type squared_norm() const {

    auto square_op = [](scalar_type& MADNESS_RESTRICT res,

                        const numeric_type arg) {

      res += TiledArray::detail::norm(arg);

    };

    auto sum_op = [](scalar_type& MADNESS_RESTRICT res, const scalar_type arg) {

      res += arg;

    };

    return reduce(square_op, sum_op, scalar_type(0));

  }


  template <typename ResultType = scalar_type>

  ResultType norm() const {

    return std::sqrt(static_cast<ResultType>(squared_norm()));

  }


  template <typename Numeric = numeric_type>

  numeric_type min(

      typename std::enable_if<

          detail::is_strictly_ordered<Numeric>::value>::type* = nullptr) const {

    auto min_op = [](numeric_type& MADNESS_RESTRICT res,

                     const numeric_type arg) { res = std::min(res, arg); };

    return reduce(min_op, min_op, std::numeric_limits<numeric_type>::max());

  }


  template <typename Numeric = numeric_type>

  numeric_type max(

      typename std::enable_if<

          detail::is_strictly_ordered<Numeric>::value>::type* = nullptr) const {

    auto max_op = [](numeric_type& MADNESS_RESTRICT res,

                     const numeric_type arg) { res = std::max(res, arg); };

    return reduce(max_op, max_op, std::numeric_limits<scalar_type>::min());

  }


  scalar_type abs_min() const {

    auto abs_min_op = [](scalar_type& MADNESS_RESTRICT res,

                         const numeric_type arg) {

      res = std::min(res, std::abs(arg));

    };

    auto min_op = [](scalar_type& MADNESS_RESTRICT res, const scalar_type arg) {

      res = std::min(res, arg);

    };

    return reduce(abs_min_op, min_op, std::numeric_limits<scalar_type>::max());

  }


  scalar_type abs_max() const {

    auto abs_max_op = [](scalar_type& MADNESS_RESTRICT res,

                         const numeric_type arg) {

      res = std::max(res, std::abs(arg));

    };

    auto max_op = [](scalar_type& MADNESS_RESTRICT res, const scalar_type arg) {

      res = std::max(res, arg);

    };

    return reduce(abs_max_op, max_op, scalar_type(0));

  }


  template <typename Right,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  numeric_type dot(const Right& other) const {

    auto mult_add_op = [](numeric_type& res, const numeric_type l,

                          const numeric_t<Right> r) { res += l * r; };

    auto add_op = [](numeric_type& MADNESS_RESTRICT res,

                     const numeric_type value) { res += value; };

    return reduce(other, mult_add_op, add_op, numeric_type(0));

  }


  template <typename Right,

            typename std::enable_if<is_tensor<Right>::value>::type* = nullptr>

  numeric_type inner_product(const Right& other) const {

    auto mult_add_op = [](numeric_type& res, const numeric_type l,

                          const numeric_t<Right> r) {

      res += TiledArray::detail::inner_product(l, r);

    };

    auto add_op = [](numeric_type& MADNESS_RESTRICT res,

                     const numeric_type value) { res += value; };

    return reduce(other, mult_add_op, add_op, numeric_type(0));

  }


};  // class Tensor


template <typename T, typename A>

const typename Tensor<T, A>::range_type Tensor<T, A>::empty_range_;


template <typename T, typename A>

bool operator==(const Tensor<T, A>& a, const Tensor<T, A>& b) {

  return a.range() == b.range() &&

         std::equal(a.data(), a.data() + a.size(), b.data());

}

template <typename T, typename A>

bool operator!=(const Tensor<T, A>& a, const Tensor<T, A>& b) {

  return !(a == b);

}


namespace detail {


template <typename T, typename A>

struct Trace<Tensor<T, A>, detail::enable_if_numeric_t<T>> {

  decltype(auto) operator()(const Tensor<T>& t) const {

    using size_type = typename Tensor<T>::size_type;

    using value_type = typename Tensor<T>::value_type;

    const auto range = t.range();


    // Get pointers to the range data

    const size_type n = range.rank();

    const auto* MADNESS_RESTRICT const lower = range.lobound_data();

    const auto* MADNESS_RESTRICT const upper = range.upbound_data();

    const auto* MADNESS_RESTRICT const stride = range.stride_data();


    // Search for the largest lower bound and the smallest upper bound

    const size_type lower_max = *std::max_element(lower, lower + n);

    const size_type upper_min = *std::min_element(upper, upper + n);


    value_type result = 0;


    if (lower_max >= upper_min) return result;  // No diagonal element in tile


    // Compute the first and last ordinal index

    size_type first = 0ul, last = 0ul, trace_stride = 0ul;

    for (size_type i = 0ul; i < n; ++i) {

      const size_type lower_i = lower[i];

      const size_type stride_i = stride[i];


      first += (lower_max - lower_i) * stride_i;

      last += (upper_min - lower_i) * stride_i;

      trace_stride += stride_i;

    }


    // Compute the trace

    const value_type* MADNESS_RESTRICT const data = &t[first];

    for (; first < last; first += trace_stride) result += data[first];


    return result;

  }

};


template <typename T, typename A>

struct transform<Tensor<T, A>> {

  template <typename Op, typename T1>

  Tensor<T, A> operator()(Op&& op, T1&& t1) const {

    return Tensor<T, A>(std::forward<T1>(t1), std::forward<Op>(op));

  }

  template <typename Op, typename Perm, typename T1,

            typename = std::enable_if_t<

                detail::is_permutation_v<std::remove_reference_t<Perm>>>>

  Tensor<T, A> operator()(Op&& op, Perm&& perm, T1&& t1) const {

    return Tensor<T, A>(std::forward<T1>(t1), std::forward<Op>(op),

                        std::forward<Perm>(perm));

  }

  template <typename Op, typename T1, typename T2>

  Tensor<T, A> operator()(Op&& op, T1&& t1, T2&& t2) const {

    return Tensor<T, A>(std::forward<T1>(t1), std::forward<T2>(t2),

                        std::forward<Op>(op));

  }

  template <typename Op, typename Perm, typename T1, typename T2,

            typename = std::enable_if_t<

                detail::is_permutation_v<std::remove_reference_t<Perm>>>>

  Tensor<T, A> operator()(Op&& op, Perm&& perm, T1&& t1, T2&& t2) const {

    return Tensor<T, A>(std::forward<T1>(t1), std::forward<T2>(t2),

                        std::forward<Op>(op), std::forward<Perm>(perm));

  }

};

}  // namespace detail


#ifndef TILEDARRAY_HEADER_ONLY


extern template class Tensor<double, Eigen::aligned_allocator<double>>;

extern template class Tensor<float, Eigen::aligned_allocator<float>>;

extern template class Tensor<int, Eigen::aligned_allocator<int>>;

extern template class Tensor<long, Eigen::aligned_allocator<long>>;

//  extern template

//  class Tensor<std::complex<double>,

//  Eigen::aligned_allocator<std::complex<double> > >; extern template class

//  Tensor<std::complex<float>, Eigen::aligned_allocator<std::complex<float> >

//  >;


#endif  // TILEDARRAY_HEADER_ONLY


}  // namespace TiledArray


#endif  // TILEDARRAY_TENSOR_TENSOR_H__INCLUDED