TiledArray  0.7.0
sparse_shape.h
Go to the documentation of this file.
1 /*
2  * This file is a part of TiledArray.
3  * Copyright (C) 2013 Virginia Tech
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Justus Calvin
19  * Department of Chemistry, Virginia Tech
20  *
21  * sparse_shape.h
22  * Jul 9, 2013
23  *
24  */
25 
26 #ifndef TILEDARRAY_SPARSE_SHAPE_H__INCLUDED
27 #define TILEDARRAY_SPARSE_SHAPE_H__INCLUDED
28 
29 #include <TiledArray/tensor.h>
30 #include <TiledArray/tiled_range.h>
31 #include <TiledArray/val_array.h>
34 #include <typeinfo>
35 
36 namespace TiledArray {
37 
39 
54  template <typename T>
55  class SparseShape {
56  public:
58  typedef T value_type;
60  "SparseShape<T> only supports scalar numeric types for T");
61  typedef typename Tensor<value_type>::size_type size_type;
62 
63  private:
64 
65  // T must be a numeric type
66  static_assert(std::is_floating_point<T>::value,
67  "SparseShape template type T must be a floating point type");
68 
69  // Internal typedefs
71 
72  Tensor<value_type> tile_norms_;
73  std::shared_ptr<vector_type> size_vectors_;
74  size_type zero_tile_count_;
75  static value_type threshold_;
76 
77  template <typename Op>
78  static vector_type
79  recursive_outer_product(const vector_type* const size_vectors,
80  const unsigned int dim, const Op& op)
81  {
82  vector_type result;
83 
84  if(dim == 1u) {
85  // Construct a modified copy of size_vector[0]
86  result = op(*size_vectors);
87  } else {
88  // Compute split the range and compute the outer products
89  const unsigned int middle = (dim >> 1u) + (dim & 1u);
90  const vector_type left = recursive_outer_product(size_vectors, middle, op);
91  const vector_type right = recursive_outer_product(size_vectors + middle, dim - middle, op);
92 
93  // Compute the outer product of left and right
94 
95  result = vector_type(left.size() * right.size());
96  result.outer_fill(left, right,
97  [] (const value_type left, const value_type right) { return left * right; });
98  }
99 
100  return result;
101  }
102 
103 
105 
109  void normalize() {
110  const value_type threshold = threshold_;
111  const unsigned int dim = tile_norms_.range().rank();
112  const vector_type* MADNESS_RESTRICT const size_vectors = size_vectors_.get();
113  madness::AtomicInt zero_tile_count;
114  zero_tile_count = 0;
115 
116  if(dim == 1u) {
117  auto normalize_op = [threshold, &zero_tile_count] (value_type& norm, const value_type size) {
118  TA_ASSERT(norm >= value_type(0));
119  norm /= size;
120  if(norm < threshold) {
121  norm = value_type(0);
122  ++zero_tile_count;
123  }
124  };
125 
126  // This is the easy case where the data is a vector and can be
127  // normalized directly.
128  math::inplace_vector_op(normalize_op, size_vectors[0].size(), tile_norms_.data(),
129  size_vectors[0].data());
130 
131  } else {
132  // Here the normalization constants are computed and multiplied by the
133  // norm data using a recursive, outer-product algorithm. This is done to
134  // minimize temporary memory requirements, memory bandwidth, and work.
135 
136  auto inv_vec_op = [] (const vector_type& size_vector) {
137  return vector_type(size_vector,
138  [] (const value_type size) { return value_type(1) / size; });
139  };
140 
141  // Compute the left and right outer products
142  const unsigned int middle = (dim >> 1u) + (dim & 1u);
143  const vector_type left = recursive_outer_product(size_vectors, middle, inv_vec_op);
144  const vector_type right = recursive_outer_product(size_vectors + middle, dim - middle, inv_vec_op);
145 
146  auto normalize_op = [threshold, &zero_tile_count] (value_type& norm,
147  const value_type x, const value_type y)
148  {
149  TA_ASSERT(norm >= value_type(0));
150  norm *= x * y;
151  if(norm < threshold) {
152  norm = value_type(0);
153  ++zero_tile_count;
154  }
155  };
156 
157  math::outer(left.size(), right.size(), left.data(), right.data(),
158  tile_norms_.data(), normalize_op);
159  }
160 
161  zero_tile_count_ = zero_tile_count;
162  }
163 
164  static std::shared_ptr<vector_type>
165  initialize_size_vectors(const TiledRange& trange) {
166  // Allocate memory for size vectors
167  const unsigned int dim = trange.tiles_range().rank();
168  std::shared_ptr<vector_type> size_vectors(new vector_type[dim],
169  std::default_delete<vector_type[]>());
170 
171  // Initialize the size vectors
172  for(unsigned int i = 0ul; i != dim; ++i) {
173  const size_type n = trange.data()[i].tiles_range().second - trange.data()[i].tiles_range().first;
174 
175  size_vectors.get()[i] = vector_type(n, & (* trange.data()[i].begin()),
176  [] (const TiledRange1::range_type& tile)
177  { return value_type(tile.second - tile.first); });
178  }
179 
180  return size_vectors;
181  }
182 
183  std::shared_ptr<vector_type> perm_size_vectors(const Permutation& perm) const {
184  const unsigned int n = tile_norms_.range().rank();
185 
186  // Allocate memory for the contracted size vectors
187  std::shared_ptr<vector_type> result_size_vectors(new vector_type[n],
188  std::default_delete<vector_type[]>());
189 
190  // Initialize the size vectors
191  for(unsigned int i = 0u; i < n; ++i) {
192  const unsigned int perm_i = perm[i];
193  result_size_vectors.get()[perm_i] = size_vectors_.get()[i];
194  }
195 
196  return result_size_vectors;
197  }
198 
199  SparseShape(const Tensor<T>& tile_norms, const std::shared_ptr<vector_type>& size_vectors,
200  const size_type zero_tile_count) :
201  tile_norms_(tile_norms), size_vectors_(size_vectors),
202  zero_tile_count_(zero_tile_count)
203  { }
204 
205  public:
206 
208 
210  SparseShape() : tile_norms_(), size_vectors_(), zero_tile_count_(0ul) { }
211 
213 
219  SparseShape(const Tensor<value_type>& tile_norms, const TiledRange& trange) :
220  tile_norms_(tile_norms.clone()), size_vectors_(initialize_size_vectors(trange)),
221  zero_tile_count_(0ul)
222  {
223  TA_ASSERT(! tile_norms_.empty());
224  TA_ASSERT(tile_norms_.range() == trange.tiles_range());
225 
226  normalize();
227  }
228 
230 
239  template<typename SparseNormSequence>
240  SparseShape(const SparseNormSequence& tile_norms,
241  const TiledRange& trange) :
242  tile_norms_(trange.tiles_range(), value_type(0)), size_vectors_(initialize_size_vectors(trange)),
243  zero_tile_count_(trange.tiles_range().volume())
244  {
245  const auto dim = tile_norms_.range().rank();
246  for(const auto& pair_idx_norm: tile_norms) {
247  auto compute_tile_volume = [dim,this,pair_idx_norm]() -> uint64_t {
248  uint64_t tile_volume = 1;
249  for(size_t d=0;d != dim; ++d)
250  tile_volume *= size_vectors_.get()[d].at(pair_idx_norm.first[d]);
251  return tile_volume;
252  };
253  auto norm_per_element = pair_idx_norm.second / compute_tile_volume();
254  if (norm_per_element >= threshold()) {
255  tile_norms_[pair_idx_norm.first] = norm_per_element;
256  --zero_tile_count_;
257  }
258  }
259  }
260 
262 
271  SparseShape(World& world, const Tensor<value_type>& tile_norms,
272  const TiledRange& trange) :
273  tile_norms_(tile_norms.clone()), size_vectors_(initialize_size_vectors(trange)),
274  zero_tile_count_(0ul)
275  {
276  TA_ASSERT(! tile_norms_.empty());
277  TA_ASSERT(tile_norms_.range() == trange.tiles_range());
278 
279  // reduce norm data from all processors
280  world.gop.sum(tile_norms_.data(), tile_norms_.size());
281 
282  normalize();
283  }
284 
286 
297  template<typename SparseNormSequence>
298  SparseShape(World& world,
299  const SparseNormSequence& tile_norms,
300  const TiledRange& trange) : SparseShape(tile_norms, trange)
301  {
302  world.gop.sum(tile_norms_.data(), tile_norms_.size());
303  }
304 
306 
309  SparseShape(const SparseShape<T>& other) :
310  tile_norms_(other.tile_norms_), size_vectors_(other.size_vectors_),
311  zero_tile_count_(other.zero_tile_count_)
312  { }
313 
315 
320  tile_norms_ = other.tile_norms_;
321  size_vectors_ = other.size_vectors_;
322  zero_tile_count_ = other.zero_tile_count_;
323  return *this;
324  }
325 
327 
329  bool validate(const Range& range) const {
330  if(tile_norms_.empty())
331  return false;
332  return (range == tile_norms_.range());
333  }
334 
336 
339  template <typename Index>
340  bool is_zero(const Index& i) const {
341  TA_ASSERT(! tile_norms_.empty());
342  return tile_norms_[i] < threshold_;
343  }
344 
346 
348  static constexpr bool is_dense() { return false; }
349 
351 
353  float sparsity() const {
354  TA_ASSERT(! tile_norms_.empty());
355  return float(zero_tile_count_) / float(tile_norms_.size());
356  }
357 
359 
361  static value_type threshold() { return threshold_; }
362 
364 
366  static void threshold(const value_type thresh) { threshold_ = thresh; }
367 
369 
373  template <typename Index>
374  value_type operator[](const Index& index) const {
375  TA_ASSERT(! tile_norms_.empty());
376  return tile_norms_[index];
377  }
378 
380 
388  template<typename Op>
389  SparseShape_ transform(Op &&op) const {
390 
391  Tensor<T> new_norms = op(tile_norms_);
392  madness::AtomicInt zero_tile_count;
393  zero_tile_count = 0;
394 
395  const value_type threshold = threshold_;
396  auto apply_threshold = [threshold, &zero_tile_count](value_type &norm){
397  TA_ASSERT(norm >= value_type(0));
398  if(norm < threshold){
399  norm = value_type(0);
400  ++zero_tile_count;
401  }
402  };
403 
404  math::inplace_vector_op(apply_threshold, new_norms.range().volume(),
405  new_norms.data());
406 
407  return SparseShape_(std::move(new_norms), size_vectors_,
408  zero_tile_count);
409  }
410 
412 
414  const Tensor<value_type>& data() const { return tile_norms_; }
415 
417 
419  bool empty() const { return tile_norms_.empty(); }
420 
422 
425  SparseShape_ mask(const SparseShape_ &mask_shape) const {
426  TA_ASSERT(!tile_norms_.empty());
427  TA_ASSERT(!mask_shape.empty());
428  TA_ASSERT(tile_norms_.range() == mask_shape.tile_norms_.range());
429 
430  const value_type threshold = threshold_;
431  madness::AtomicInt zero_tile_count;
432  zero_tile_count = zero_tile_count_;
433  auto op = [threshold, &zero_tile_count] (value_type left,
434  const value_type right)
435  {
436  if(left >= threshold && right < threshold) {
437  left = value_type(0);
438  ++zero_tile_count;
439  }
440 
441  return left;
442  };
443 
444  Tensor<value_type> result_tile_norms =
445  tile_norms_.binary(mask_shape.tile_norms_, op);
446 
447  return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
448  }
449 
451 
459  template <typename Index>
460  SparseShape update_block(const Index& lower_bound, const Index& upper_bound,
461  const SparseShape& other) const
462  {
463  Tensor<value_type> result_tile_norms = tile_norms_.clone();
464 
465  auto result_tile_norms_blk = result_tile_norms.block(lower_bound, upper_bound);
466  const value_type threshold = threshold_;
467  madness::AtomicInt zero_tile_count;
468  zero_tile_count = zero_tile_count_;
469  result_tile_norms_blk.inplace_binary(other.tile_norms_,
470  [threshold,&zero_tile_count] (value_type& l, const value_type r) {
471  // Update the zero tile count for the result
472  if((l < threshold) && (r >= threshold))
473  ++zero_tile_count;
474  else if((l >= threshold) && (r < threshold))
475  --zero_tile_count;
476 
477  // Update the tile norm value
478  l = r;
479  });
480 
481  return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
482  }
483 
484  private:
485 
487 
491  template <typename Index>
492  std::shared_ptr<vector_type>
493  block_range(const Index& lower_bound, const Index& upper_bound) const {
494  TA_ASSERT(detail::size(lower_bound) == tile_norms_.range().rank());
495  TA_ASSERT(detail::size(upper_bound) == tile_norms_.range().rank());
496 
497  // Get the number dimensions of the shape
498  const auto rank = detail::size(lower_bound);
499  const auto* MADNESS_RESTRICT const lower = detail::data(lower_bound);
500  const auto* MADNESS_RESTRICT const upper = detail::data(upper_bound);
501 
502  std::shared_ptr<vector_type> size_vectors(new vector_type[rank],
503  std::default_delete<vector_type[]>());
504 
505  for(auto i = 0ul; i < rank; ++i) {
506  // Get the new range size
507  const auto lower_i = lower[i];
508  const auto upper_i = upper[i];
509  const auto extent_i = upper_i - lower_i;
510 
511  // Check that the input indices are in range
512  TA_ASSERT(lower_i < upper_i);
513  TA_ASSERT(upper_i <= tile_norms_.range().upbound(i));
514 
515  // Construct the size vector for rank i
516  size_vectors.get()[i] = vector_type(extent_i,
517  size_vectors_.get()[i].data() + lower_i);
518  }
519 
520  return size_vectors;
521  }
522 
523  public:
525 
529  template <typename Index>
530  SparseShape block(const Index& lower_bound, const Index& upper_bound) const {
531  std::shared_ptr<vector_type> size_vectors =
532  block_range(lower_bound, upper_bound);
533 
534  // Copy the data from arg to result
535  const value_type threshold = threshold_;
536  madness::AtomicInt zero_tile_count;
537  zero_tile_count = 0;
538  auto copy_op = [threshold,&zero_tile_count] (value_type& MADNESS_RESTRICT result,
539  const value_type arg)
540  {
541  result = arg;
542  if(arg < threshold)
543  ++zero_tile_count;
544  };
545 
546 
547  // Construct the result norms tensor
548  TensorConstView<value_type> block_view =
549  tile_norms_.block(lower_bound, upper_bound);
550  Tensor<value_type> result_norms((Range(block_view.range().extent())));
551  result_norms.inplace_binary(shift(block_view), copy_op);
552 
553  return SparseShape(result_norms, size_vectors, zero_tile_count);
554  }
555 
556 
558 
564  template <typename Index, typename Factor>
565  SparseShape block(const Index& lower_bound, const Index& upper_bound,
566  const Factor factor) const
567  {
568  const value_type abs_factor = to_abs_factor(factor);
569  std::shared_ptr<vector_type> size_vectors =
570  block_range(lower_bound, upper_bound);
571 
572  // Copy the data from arg to result
573  const value_type threshold = threshold_;
574  madness::AtomicInt zero_tile_count;
575  zero_tile_count = 0;
576  auto copy_op = [abs_factor,threshold,&zero_tile_count] (value_type& MADNESS_RESTRICT result,
577  const value_type arg)
578  {
579  result = arg * abs_factor;
580  if(result < threshold) {
581  ++zero_tile_count;
582  result = value_type(0);
583  }
584  };
585 
586  // Construct the result norms tensor
587  TensorConstView<value_type> block_view =
588  tile_norms_.block(lower_bound, upper_bound);
589  Tensor<value_type> result_norms((Range(block_view.range().extent())));
590  result_norms.inplace_binary(shift(block_view), copy_op);
591 
592  return SparseShape(result_norms, size_vectors, zero_tile_count);
593  }
594 
596 
599  template <typename Index>
600  SparseShape block(const Index& lower_bound, const Index& upper_bound,
601  const Permutation& perm) const
602  {
603  return block(lower_bound, upper_bound).perm(perm);
604  }
605 
606 
608 
613  template <typename Index, typename Factor>
614  SparseShape block(const Index& lower_bound, const Index& upper_bound,
615  const Factor factor, const Permutation& perm) const
616  {
617  return block(lower_bound, upper_bound, factor).perm(perm);
618  }
619 
621 
625  return SparseShape_(tile_norms_.permute(perm), perm_size_vectors(perm),
626  zero_tile_count_);
627  }
628 
630 
639  template <typename Factor>
640  SparseShape_ scale(const Factor factor) const {
641  TA_ASSERT(! tile_norms_.empty());
642  const value_type threshold = threshold_;
643  const value_type abs_factor = to_abs_factor(factor);
644  madness::AtomicInt zero_tile_count;
645  zero_tile_count = 0;
646  auto op = [threshold, &zero_tile_count, abs_factor] (value_type value) {
647  value *= abs_factor;
648  if(value < threshold) {
649  value = value_type(0);
650  ++zero_tile_count;
651  }
652  return value;
653  };
654 
655  Tensor<value_type> result_tile_norms = tile_norms_.unary(op);
656 
657  return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
658  }
659 
661 
671  template <typename Factor>
672  SparseShape_ scale(const Factor factor, const Permutation& perm) const {
673  TA_ASSERT(! tile_norms_.empty());
674  const value_type threshold = threshold_;
675  const value_type abs_factor = to_abs_factor(factor);
676  madness::AtomicInt zero_tile_count;
677  zero_tile_count = 0;
678  auto op = [threshold, &zero_tile_count, abs_factor] (value_type value) {
679  value *= abs_factor;
680  if(value < threshold) {
681  value = value_type(0);
682  ++zero_tile_count;
683  }
684  return value;
685  };
686 
687  Tensor<value_type> result_tile_norms = tile_norms_.unary(op, perm);
688 
689  return SparseShape_(result_tile_norms, perm_size_vectors(perm),
690  zero_tile_count);
691  }
692 
694 
701  SparseShape_ add(const SparseShape_& other) const {
702  TA_ASSERT(! tile_norms_.empty());
703  const value_type threshold = threshold_;
704  madness::AtomicInt zero_tile_count;
705  zero_tile_count = 0;
706  auto op = [threshold, &zero_tile_count] (value_type left,
707  const value_type right)
708  {
709  left += right;
710  if(left < threshold) {
711  left = value_type(0);
712  ++zero_tile_count;
713  }
714  return left;
715  };
716 
717  Tensor<value_type> result_tile_norms =
718  tile_norms_.binary(other.tile_norms_, op);
719 
720  return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
721  }
722 
724 
732  SparseShape_ add(const SparseShape_& other, const Permutation& perm) const {
733  TA_ASSERT(! tile_norms_.empty());
734  const value_type threshold = threshold_;
735  madness::AtomicInt zero_tile_count;
736  zero_tile_count = 0;
737  auto op = [threshold, &zero_tile_count] (value_type left,
738  const value_type right)
739  {
740  left += right;
741  if(left < threshold) {
742  left = value_type(0);
743  ++zero_tile_count;
744  }
745  return left;
746  };
747 
748  Tensor<value_type> result_tile_norms =
749  tile_norms_.binary(other.tile_norms_, op, perm);
750 
751  return SparseShape_(result_tile_norms, perm_size_vectors(perm),
752  zero_tile_count);
753  }
754 
756 
766  template <typename Factor>
767  SparseShape_ add(const SparseShape_& other, const Factor factor) const {
768  TA_ASSERT(! tile_norms_.empty());
769  const value_type threshold = threshold_;
770  const value_type abs_factor = to_abs_factor(factor);
771  madness::AtomicInt zero_tile_count;
772  zero_tile_count = 0;
773  auto op = [threshold, &zero_tile_count, abs_factor] (value_type left,
774  const value_type right)
775  {
776  left += right;
777  left *= abs_factor;
778  if(left < threshold) {
779  left = value_type(0);
780  ++zero_tile_count;
781  }
782  return left;
783  };
784 
785  Tensor<value_type> result_tile_norms =
786  tile_norms_.binary(other.tile_norms_, op);
787 
788  return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
789  }
790 
792 
803  template <typename Factor>
804  SparseShape_ add(const SparseShape_& other, const Factor factor,
805  const Permutation& perm) const
806  {
807  TA_ASSERT(! tile_norms_.empty());
808  const value_type threshold = threshold_;
809  const value_type abs_factor = to_abs_factor(factor);
810  madness::AtomicInt zero_tile_count;
811  zero_tile_count = 0;
812  auto op = [threshold, &zero_tile_count, abs_factor]
813  (value_type left, const value_type right)
814  {
815  left += right;
816  left *= abs_factor;
817  if(left < threshold) {
818  left = value_type(0);
819  ++zero_tile_count;
820  }
821  return left;
822  };
823 
824  Tensor<value_type> result_tile_norms =
825  tile_norms_.binary(other.tile_norms_, op, perm);
826 
827  return SparseShape_(result_tile_norms, perm_size_vectors(perm),
828  zero_tile_count);
829  }
830 
831  SparseShape_ add(value_type value) const {
832  TA_ASSERT(! tile_norms_.empty());
833  const value_type threshold = threshold_;
834  madness::AtomicInt zero_tile_count;
835  zero_tile_count = 0;
836 
837  Tensor<T> result_tile_norms(tile_norms_.range());
838 
839  value = std::abs(value);
840  const unsigned int dim = tile_norms_.range().rank();
841  const vector_type* MADNESS_RESTRICT const size_vectors = size_vectors_.get();
842 
843  if(dim == 1u) {
844  auto add_const_op = [threshold, &zero_tile_count, value] (value_type norm,
845  const value_type size)
846  {
847  norm += value / std::sqrt(size);
848  if(norm < threshold) {
849  norm = 0;
850  ++zero_tile_count;
851  }
852  return norm;
853  };
854 
855  // This is the easy case where the data is a vector and can be
856  // normalized directly.
857  math::vector_op(add_const_op, size_vectors[0].size(), result_tile_norms.data(),
858  tile_norms_.data(), size_vectors[0].data());
859 
860  } else {
861  // Here the normalization constants are computed and multiplied by the
862  // norm data using a recursive, outer algorithm. This is done to
863  // minimize temporary memory requirements, memory bandwidth, and work.
864 
865  auto inv_sqrt_vec_op = [] (const vector_type size_vector) {
866  return vector_type(size_vector,
867  [] (const value_type size) { return value_type(1) / std::sqrt(size); });
868  };
869 
870  // Compute the left and right outer products
871  const unsigned int middle = (dim >> 1u) + (dim & 1u);
872  const vector_type left = recursive_outer_product(size_vectors, middle, inv_sqrt_vec_op);
873  const vector_type right = recursive_outer_product(size_vectors + middle, dim - middle, inv_sqrt_vec_op);
874 
875  math::outer_fill(left.size(), right.size(), left.data(), right.data(),
876  tile_norms_.data(), result_tile_norms.data(),
877  [threshold, &zero_tile_count, value] (value_type& norm,
878  const value_type x, const value_type y)
879  {
880  norm += value * x * y;
881  if(norm < threshold) {
882  norm = value_type(0);
883  ++zero_tile_count;
884  }
885  });
886  }
887 
888  return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
889  }
890 
891  SparseShape_ add(const value_type value, const Permutation& perm) const {
892  // TODO: Optimize this function so it does the permutation at the same
893  // time as the addition.
894  return add(value).perm(perm);
895  }
896 
897  SparseShape_ subt(const SparseShape_& other) const {
898  return add(other);
899  }
900 
901  SparseShape_ subt(const SparseShape_& other, const Permutation& perm) const {
902  return add(other, perm);
903  }
904 
905  template <typename Factor>
906  SparseShape_ subt(const SparseShape_& other, const Factor factor) const {
907  return add(other, factor);
908  }
909 
910  template <typename Factor>
911  SparseShape_ subt(const SparseShape_& other, const Factor factor,
912  const Permutation& perm) const
913  {
914  return add(other, factor, perm);
915  }
916 
917  SparseShape_ subt(const value_type value) const {
918  return add(value);
919  }
920 
921  SparseShape_ subt(const value_type value, const Permutation& perm) const {
922  return add(value, perm);
923  }
924 
925  private:
926 
927  static size_type scale_by_size(Tensor<T>& tile_norms,
928  const vector_type* MADNESS_RESTRICT const size_vectors)
929  {
930  const unsigned int dim = tile_norms.range().rank();
931  const value_type threshold = threshold_;
932  madness::AtomicInt zero_tile_count;
933  zero_tile_count = 0;
934 
935  if(dim == 1u) {
936  // This is the easy case where the data is a vector and can be
937  // normalized directly.
939  [threshold, &zero_tile_count] (value_type& norm, const value_type size) {
940  norm *= size;
941  if(norm < threshold) {
942  norm = value_type(0);
943  ++zero_tile_count;
944  }
945  },
946  size_vectors[0].size(), tile_norms.data(), size_vectors[0].data());
947  } else {
948  // Here the normalization constants are computed and multiplied by the
949  // norm data using a recursive, outer algorithm. This is done to
950  // minimize temporary memory requirements, memory bandwidth, and work.
951 
952  auto noop = [](const vector_type& size_vector) -> const vector_type& {
953  return size_vector;
954  };
955 
956  // Compute the left and right outer products
957  const unsigned int middle = (dim >> 1u) + (dim & 1u);
958  const vector_type left = recursive_outer_product(size_vectors, middle, noop);
959  const vector_type right = recursive_outer_product(size_vectors + middle, dim - middle, noop);
960 
961  math::outer(left.size(), right.size(), left.data(), right.data(), tile_norms.data(),
962  [threshold, &zero_tile_count] (value_type& norm, const value_type x,
963  const value_type y)
964  {
965  norm *= x * y;
966  if(norm < threshold) {
967  norm = value_type(0);
968  ++zero_tile_count;
969  }
970  });
971  }
972 
973  return zero_tile_count;
974  }
975 
976  public:
977 
978  SparseShape_ mult(const SparseShape_& other) const {
979  // TODO: Optimize this function so that the tensor arithmetic and
980  // scale_by_size operations are performed in one step instead of two.
981 
982  TA_ASSERT(! tile_norms_.empty());
983  Tensor<T> result_tile_norms = tile_norms_.mult(other.tile_norms_);
984  const size_type zero_tile_count =
985  scale_by_size(result_tile_norms, size_vectors_.get());
986 
987  return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
988  }
989 
990  SparseShape_ mult(const SparseShape_& other, const Permutation& perm) const {
991  // TODO: Optimize this function so that the tensor arithmetic and
992  // scale_by_size operations are performed in one step instead of two.
993 
994  TA_ASSERT(! tile_norms_.empty());
995  Tensor<T> result_tile_norms = tile_norms_.mult(other.tile_norms_, perm);
996  std::shared_ptr<vector_type> result_size_vector = perm_size_vectors(perm);
997  const size_type zero_tile_count =
998  scale_by_size(result_tile_norms, result_size_vector.get());
999 
1000  return SparseShape_(result_tile_norms, result_size_vector, zero_tile_count);
1001  }
1002 
1005  template <typename Factor>
1006  SparseShape_ mult(const SparseShape_& other, const Factor factor) const {
1007  // TODO: Optimize this function so that the tensor arithmetic and
1008  // scale_by_size operations are performed in one step instead of two.
1009 
1010  TA_ASSERT(! tile_norms_.empty());
1011  const value_type abs_factor = to_abs_factor(factor);
1012  Tensor<T> result_tile_norms = tile_norms_.mult(other.tile_norms_, abs_factor);
1013  const size_type zero_tile_count =
1014  scale_by_size(result_tile_norms, size_vectors_.get());
1015 
1016  return SparseShape_(result_tile_norms, size_vectors_, zero_tile_count);
1017  }
1018 
1021  template <typename Factor>
1022  SparseShape_ mult(const SparseShape_& other, const Factor factor,
1023  const Permutation& perm) const
1024  {
1025  // TODO: Optimize this function so that the tensor arithmetic and
1026  // scale_by_size operations are performed in one step instead of two.
1027 
1028  TA_ASSERT(! tile_norms_.empty());
1029  const value_type abs_factor = to_abs_factor(factor);
1030  Tensor<T> result_tile_norms = tile_norms_.mult(other.tile_norms_, abs_factor, perm);
1031  std::shared_ptr<vector_type> result_size_vector = perm_size_vectors(perm);
1032  const size_type zero_tile_count =
1033  scale_by_size(result_tile_norms, result_size_vector.get());
1034 
1035  return SparseShape_(result_tile_norms, result_size_vector, zero_tile_count);
1036  }
1037 
1040  template <typename Factor>
1041  SparseShape_ gemm(const SparseShape_& other, const Factor factor,
1042  const math::GemmHelper& gemm_helper) const
1043  {
1044  TA_ASSERT(! tile_norms_.empty());
1045 
1046  const value_type abs_factor = to_abs_factor(factor);
1047  const value_type threshold = threshold_;
1048  madness::AtomicInt zero_tile_count;
1049  zero_tile_count = 0;
1050  integer M = 0, N = 0, K = 0;
1051  gemm_helper.compute_matrix_sizes(M, N, K, tile_norms_.range(), other.tile_norms_.range());
1052 
1053  // Allocate memory for the contracted size vectors
1054  std::shared_ptr<vector_type> result_size_vectors(new vector_type[gemm_helper.result_rank()],
1055  std::default_delete<vector_type[]>());
1056 
1057  // Initialize the result size vectors
1058  unsigned int x = 0ul;
1059  for(unsigned int i = gemm_helper.left_outer_begin(); i < gemm_helper.left_outer_end(); ++i, ++x)
1060  result_size_vectors.get()[x] = size_vectors_.get()[i];
1061  for(unsigned int i = gemm_helper.right_outer_begin(); i < gemm_helper.right_outer_end(); ++i, ++x)
1062  result_size_vectors.get()[x] = other.size_vectors_.get()[i];
1063 
1064  // Compute the number of inner ranks
1065  const unsigned int k_rank = gemm_helper.left_inner_end() - gemm_helper.left_inner_begin();
1066 
1067  // Construct the result norm tensor
1068  Tensor<value_type> result_norms(gemm_helper.make_result_range<typename Tensor<T>::range_type>(
1069  tile_norms_.range(), other.tile_norms_.range()), 0);
1070 
1071  if(k_rank > 0u) {
1072 
1073  // Compute size vector
1074  const vector_type k_sizes =
1075  recursive_outer_product(size_vectors_.get() + gemm_helper.left_inner_begin(),
1076  k_rank, [] (const vector_type& size_vector) -> const vector_type&
1077  { return size_vector; });
1078 
1079  // TODO: Make this faster. It can be done without using temporaries
1080  // for the arguments, but requires a custom matrix multiply.
1081 
1082  Tensor<value_type> left(tile_norms_.range());
1083  const size_type mk = M * K;
1084  auto left_op = [] (const value_type left, const value_type right)
1085  { return left * right; };
1086  for(size_type i = 0ul; i < mk; i += K)
1087  math::vector_op(left_op, K, left.data() + i,
1088  tile_norms_.data() + i, k_sizes.data());
1089 
1090  Tensor<value_type> right(other.tile_norms_.range());
1091  for(integer i = 0ul, k = 0; k < K; i += N, ++k) {
1092  const value_type factor = k_sizes[k];
1093  auto right_op = [=] (const value_type arg) { return arg * factor; };
1094  math::vector_op(right_op, N, right.data() + i, other.tile_norms_.data() + i);
1095  }
1096 
1097  result_norms = left.gemm(right, abs_factor, gemm_helper);
1098 
1099  // Hard zero tiles that are below the zero threshold.
1100  result_norms.inplace_unary(
1101  [threshold, &zero_tile_count] (value_type& value) {
1102  if(value < threshold) {
1103  value = value_type(0);
1104  ++zero_tile_count;
1105  }
1106  });
1107 
1108  } else {
1109 
1110  // This is an outer product, so the inputs can be used directly
1111  math::outer_fill(M, N, tile_norms_.data(), other.tile_norms_.data(), result_norms.data(),
1112  [threshold, &zero_tile_count, abs_factor] (const value_type left,
1113  const value_type right)
1114  {
1115  value_type norm = left * right * abs_factor;
1116  if(norm < threshold) {
1117  norm = value_type(0);
1118  ++zero_tile_count;
1119  }
1120  return norm;
1121  });
1122  }
1123 
1124  return SparseShape_(result_norms, result_size_vectors, zero_tile_count);
1125  }
1126 
1129  template <typename Factor>
1130  SparseShape_ gemm(const SparseShape_& other, const Factor factor,
1131  const math::GemmHelper& gemm_helper, const Permutation& perm) const
1132  {
1133  return gemm(other, factor, gemm_helper).perm(perm);
1134  }
1135 
1136  private:
1137  template <typename Factor>
1138  static value_type to_abs_factor(const Factor factor) {
1139  using std::abs;
1140  const auto cast_abs_factor = static_cast<value_type>(abs(factor));
1141  TA_ASSERT(std::isfinite(cast_abs_factor));
1142  return cast_abs_factor;
1143  }
1144 
1145  }; // class SparseShape
1146 
1147  // Static member initialization
1148  template <typename T>
1149  typename SparseShape<T>::value_type SparseShape<T>::threshold_ = std::numeric_limits<T>::epsilon();
1150 
1152 
1157  template <typename T>
1158  inline std::ostream& operator<<(std::ostream& os, const SparseShape<T>& shape) {
1159  os << "SparseShape<" << typeid(T).name() << ">:" << std::endl
1160  << shape.data() << std::endl;
1161  return os;
1162  }
1163 
1164 
1165 #ifndef TILEDARRAY_HEADER_ONLY
1166 
1167  extern template class SparseShape<float>;
1168 
1169 #endif // TILEDARRAY_HEADER_ONLY
1170 
1171 } // namespace TiledArray
1172 
1173 #endif // TILEDARRAY_SPASE_SHAPE_H__INCLUDED
detail::TensorInterface< T, BlockRange > block(const Index &lower_bound, const Index &upper_bound)
Definition: tensor.h:490
SparseShape block(const Index &lower_bound, const Index &upper_bound, const Factor factor) const
Create a scaled sub-block of the shape.
Definition: sparse_shape.h:565
detail::ShiftWrapper< T > shift(T &tensor)
Shift a tensor from one range to another.
A (hyperrectangular) interval on , space of integer n-indices.
Definition: range.h:39
void compute_matrix_sizes(integer &m, integer &n, integer &k, const Left &left, const Right &right) const
Compute the matrix dimension that can be used in a *GEMM call.
Definition: gemm_helper.h:252
SparseShape_ gemm(const SparseShape_ &other, const Factor factor, const math::GemmHelper &gemm_helper, const Permutation &perm) const
bool empty() const
Initialization check.
Definition: sparse_shape.h:419
void outer_fill(const ValArray< U > &left, const ValArray< V > &right, const Op &op)
Outer fill operation.
Definition: val_array.h:404
SparseShape(const SparseShape< T > &other)
Copy constructor.
Definition: sparse_shape.h:309
auto data(T &t)
Container data pointer accessor.
Definition: utility.h:89
SparseShape_ add(value_type value) const
Definition: sparse_shape.h:831
SparseShape_ subt(const SparseShape_ &other, const Permutation &perm) const
Definition: sparse_shape.h:901
An N-dimensional tensor object.
Definition: foreach.h:40
Tensor_ permute(const Permutation &perm) const
Create a permuted copy of this tensor.
Definition: tensor.h:526
SparseShape_ scale(const Factor factor) const
Scale shape.
Definition: sparse_shape.h:640
const range_type & range() const
Tensor range object accessor.
Definition: tensor.h:310
SparseShape(World &world, const Tensor< value_type > &tile_norms, const TiledRange &trange)
Collective "dense" constructor.
Definition: sparse_shape.h:271
unsigned int result_rank() const
Result rank accessor.
Definition: gemm_helper.h:134
decltype(auto) norm(const Tile< Arg > &arg)
Vector 2-norm of a tile.
Definition: tile.h:930
bool is_zero(const Index &i) const
Check that a tile is zero.
Definition: sparse_shape.h:340
const Tensor< value_type > & data() const
Data accessor.
Definition: sparse_shape.h:414
SparseShape_ perm(const Permutation &perm) const
Create a permuted shape of this shape.
Definition: sparse_shape.h:624
SparseShape_ subt(const SparseShape_ &other, const Factor factor) const
Definition: sparse_shape.h:906
const_pointer data() const
Definition: size_array.h:170
SparseShape_ mult(const SparseShape_ &other, const Permutation &perm) const
Definition: sparse_shape.h:990
SparseShape block(const Index &lower_bound, const Index &upper_bound, const Factor factor, const Permutation &perm) const
Create a copy of a sub-block of the shape.
Definition: sparse_shape.h:614
SparseShape_ add(const SparseShape_ &other, const Factor factor) const
Add and scale shapes.
Definition: sparse_shape.h:767
SparseShape< T > SparseShape_
This object type.
Definition: sparse_shape.h:57
SparseShape(const SparseNormSequence &tile_norms, const TiledRange &trange)
"Sparse" constructor
Definition: sparse_shape.h:240
auto abs(const ComplexConjugate< T > &a)
Definition: complex.h:247
T value_type
The norm value type.
Definition: sparse_shape.h:58
void outer(const std::size_t m, const std::size_t n, const X *const x, const Y *const y, A *a, const Op &op)
Compute the outer of x and y to modify a.
Definition: outer.h:248
unsigned int rank() const
Rank accessor.
Definition: range.h:542
SparseShape update_block(const Index &lower_bound, const Index &upper_bound, const SparseShape &other) const
Update sub-block of shape.
Definition: sparse_shape.h:460
R make_result_range(const Left &left, const Right &right) const
Construct a result range based on left and right ranges.
Definition: gemm_helper.h:166
Tensor_ & inplace_binary(const Right &right, Op &&op)
Use a binary, element wise operation to modify this tensor.
Definition: tensor.h:601
Arbitrary sparse shape.
Definition: sparse_shape.h:55
float sparsity() const
Sparsity of the shape.
Definition: sparse_shape.h:353
bool empty() const
Test if the tensor is empty.
Definition: tensor.h:427
Tensor< value_type >::size_type size_type
Size type.
Definition: sparse_shape.h:60
size_t size(const DistArray< Tile, Policy > &a)
Definition: utils.h:49
DistArray< Tile, Policy > clone(const DistArray< Tile, Policy > &arg)
Create a deep copy of an array.
Definition: clone.h:43
Tensor_ mult(const Right &right) const
Multiply this by right to create a new tensor.
Definition: tensor.h:941
void outer_fill(const std::size_t m, const std::size_t n, const X *const x, const Y *const y, A *a, const Op &op)
Compute and store outer of x and y in a.
Definition: outer.h:180
const_pointer data() const
Data direct access.
Definition: tensor.h:416
SparseShape_ mult(const SparseShape_ &other, const Factor factor) const
unsigned int left_outer_end() const
Definition: gemm_helper.h:149
SparseShape_ transform(Op &&op) const
Transform the norm tensor with an operation.
Definition: sparse_shape.h:389
constexpr std::size_t size(T(&)[N])
Array size accessor.
Definition: utility.h:47
SparseShape_ add(const SparseShape_ &other, const Factor factor, const Permutation &perm) const
Add, scale, and permute shapes.
Definition: sparse_shape.h:804
SparseShape(const Tensor< value_type > &tile_norms, const TiledRange &trange)
Constructor.
Definition: sparse_shape.h:219
SparseShape_ add(const SparseShape_ &other, const Permutation &perm) const
Add and permute shapes.
Definition: sparse_shape.h:732
void vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:478
std::pair< size_type, size_type > range_type
Definition: tiled_range1.h:40
const range_type & tiles_range() const
Access the tile range.
Definition: tiled_range.h:122
#define TA_ASSERT(a)
Definition: error.h:107
Tensor_ binary(const Right &right, Op &&op) const
Use a binary, element wise operation to construct a new tensor.
Definition: tensor.h:568
SparseShape_ subt(const value_type value) const
Definition: sparse_shape.h:917
SparseShape_ mask(const SparseShape_ &mask_shape) const
Compute union of two shapes.
Definition: sparse_shape.h:425
SparseShape< T > & operator=(const SparseShape< T > &other)
Copy assignment operator.
Definition: sparse_shape.h:319
SparseShape_ subt(const value_type value, const Permutation &perm) const
Definition: sparse_shape.h:921
value_type operator[](const Index &index) const
Tile norm accessor.
Definition: sparse_shape.h:374
static value_type threshold()
Threshold accessor.
Definition: sparse_shape.h:361
unsigned int right_outer_begin() const
Definition: gemm_helper.h:153
Tensor interface for external data.
SparseShape_ mult(const SparseShape_ &other, const Factor factor, const Permutation &perm) const
SparseShape(World &world, const SparseNormSequence &tile_norms, const TiledRange &trange)
Collective "sparse" constructor.
Definition: sparse_shape.h:298
size_type size() const
Definition: size_array.h:160
Range data of a tiled array.
Definition: tiled_range.h:31
SparseShape_ mult(const SparseShape_ &other) const
Definition: sparse_shape.h:978
SparseShape_ add(const SparseShape_ &other) const
Add shapes.
Definition: sparse_shape.h:701
SparseShape_ subt(const SparseShape_ &other) const
Definition: sparse_shape.h:897
void inplace_vector_op(Op &&op, const std::size_t n, Result *const result, const Args *const ... args)
Definition: vector_op.h:397
size_array upbound() const
Range upper bound accessor.
Definition: range.h:577
Tensor_ gemm(const Tensor< U, AU > &other, const V factor, const math::GemmHelper &gemm_helper) const
Contract this tensor with other.
Definition: tensor.h:1135
Contraction to *GEMM helper.
Definition: gemm_helper.h:39
size_type size() const
Tensor dimension size accessor.
Definition: tensor.h:317
static void threshold(const value_type thresh)
Set threshold to thresh.
Definition: sparse_shape.h:366
SparseShape block(const Index &lower_bound, const Index &upper_bound, const Permutation &perm) const
Create a copy of a sub-block of the shape.
Definition: sparse_shape.h:600
Permutation of a sequence of objects indexed by base-0 indices.
Definition: permutation.h:119
static constexpr bool is_dense()
Check density.
Definition: sparse_shape.h:348
const range_type & range() const
Tensor range object accessor.
SparseShape_ subt(const SparseShape_ &other, const Factor factor, const Permutation &perm) const
Definition: sparse_shape.h:911
Tensor_ & inplace_unary(Op &&op)
Use a unary, element wise operation to modify this tensor.
Definition: tensor.h:639
SparseShape()
Default constructor.
Definition: sparse_shape.h:210
SparseShape block(const Index &lower_bound, const Index &upper_bound) const
Create a copy of a sub-block of the shape.
Definition: sparse_shape.h:530
Tensor_ clone() const
Definition: tensor.h:288
unsigned int left_outer_begin() const
Definition: gemm_helper.h:148
unsigned int right_outer_end() const
Definition: gemm_helper.h:154
bool validate(const Range &range) const
Validate shape range.
Definition: sparse_shape.h:329
SparseShape_ gemm(const SparseShape_ &other, const Factor factor, const math::GemmHelper &gemm_helper) const
SparseShape_ scale(const Factor factor, const Permutation &perm) const
Scale and permute shape.
Definition: sparse_shape.h:672
unsigned int left_inner_end() const
Definition: gemm_helper.h:147
Tensor_ unary(Op &&op) const
Use a unary, element wise operation to construct a new tensor.
Definition: tensor.h:614
SparseShape_ add(const value_type value, const Permutation &perm) const
Definition: sparse_shape.h:891
unsigned int left_inner_begin() const
Definition: gemm_helper.h:146