5 #ifndef TILEDARRAY_CONVERSIONS_VECTOR_OF_ARRAYS_H_
6 #define TILEDARRAY_CONVERSIONS_VECTOR_OF_ARRAYS_H_
26 std::size_t block_size = 1) {
30 std::vector<std::size_t> new_trange1_v;
31 auto range_size = array_rank;
32 new_trange1_v.push_back(0);
33 for (decltype(range_size) i = block_size; i < range_size; i += block_size) {
34 new_trange1_v.push_back(i);
36 new_trange1_v.push_back(range_size);
37 new_trange1 =
TA::TiledRange1(new_trange1_v.begin(), new_trange1_v.end());
43 auto old_trange1s = array_trange.
data();
44 old_trange1s.insert(old_trange1s.begin(), new_trange1);
45 new_trange =
TA::TiledRange(old_trange1s.begin(), old_trange1s.end());
70 template <
typename Tile>
72 madness::World& global_world,
74 const std::size_t array_rank,
const TA::TiledRange& fused_trange) {
75 if (arrays.size() == 0) {
80 const std::size_t
rank = global_world.rank();
81 auto size = global_world.size();
82 auto first_tile_in_mode0 = *fused_trange.
dim(0).
begin();
83 const auto block_size =
84 first_tile_in_mode0.second - first_tile_in_mode0.first;
86 std::size_t ntiles_per_array = arrays[0].trange().tiles_range().volume();
88 std::vector<size_t> tile_volumes(ntiles_per_array);
90 const auto& tiles_range = arrays[0].trange().tiles_range();
91 for (
auto&& tile_idx : tiles_range) {
92 const auto tile_ord = tiles_range.ordinal(tile_idx);
93 tile_volumes[tile_ord] =
94 arrays[0].trange().make_tile_range(tile_idx).volume();
104 std::size_t narrays = array_rank;
105 size_t fused_tile_ord = 0;
106 auto element_offset_in_owner = 0;
107 for (
size_t vidx = 0, fused_vidx = 0; vidx < narrays;
108 vidx += block_size, ++fused_vidx) {
109 bool have_rank = (
rank == fused_vidx % size);
113 const auto vblk_size =
114 (narrays - vidx) >= block_size ? block_size : narrays - vidx;
115 for (
size_t tile_ord = 0; tile_ord != ntiles_per_array;
116 ++tile_ord, ++fused_tile_ord) {
117 auto array_ptr = arrays.begin() + element_offset_in_owner * vblk_size;
118 float unscaled_fused_tile_norm2 = 0;
119 const auto tile_volume = tile_volumes[tile_ord];
120 for (
size_t v = 0, vv = vidx; v != vblk_size; ++v, ++vv) {
121 const auto unscaled_tile_norm =
122 (*(array_ptr)).shape().data()[tile_ord] * tile_volume;
123 unscaled_fused_tile_norm2 += unscaled_tile_norm * unscaled_tile_norm;
126 const auto fused_tile_volume = tile_volume * vblk_size;
127 const auto fused_tile_norm =
128 std::sqrt(unscaled_fused_tile_norm2) / fused_tile_volume;
130 *(fused_tile_norms.
data() + fused_tile_ord) = fused_tile_norm;
132 element_offset_in_owner += 1;
134 fused_tile_ord += ntiles_per_array;
158 template <
typename Tile>
162 const std::size_t array_rank,
const TA::TiledRange& fused_trange) {
183 const std::size_t split_ntiles,
const std::size_t tile_size) {
188 std::size_t offset = tile_idx * split_ntiles;
194 const auto* split_tile_begin = shape.data().data() + offset;
195 std::transform(split_tile_begin, split_tile_begin + split_ntiles,
196 split_tile_norms.
data(),
197 [tile_size](
const float& elem) { return elem * tile_size; });
215 const std::size_t split_ntiles,
const std::size_t tile_size) {
223 template <
typename Array>
225 :
public madness::WorldObject<dist_subarray_vec<Array>> {
236 const std::size_t
rank)
240 this->process_pending();
249 template <
typename Index>
251 return split_array.at(r).find(i);
258 unsigned long size()
const {
return rank_; }
261 const std::vector<Array>& split_array;
288 template <
typename Tile,
typename Policy>
290 madness::World& global_world,
292 const std::size_t fused_dim_extent,
294 auto nproc = global_world.size();
308 global_world, arrays.
array_accessor(), fused_dim_extent, fused_trange);
316 const std::vector<madness::Future<Tile>>& tiles) {
320 size_t result_volume = 0;
321 auto* result_ptr = result.
data();
322 for (
auto&& fut_of_tile : tiles) {
324 const auto& tile = fut_of_tile.get();
325 const auto* tile_data = tile.data();
326 const auto tile_volume = tile.size();
327 std::copy(tile_data, tile_data + tile_volume, result_ptr);
328 result_ptr += tile_volume;
329 result_volume += tile_volume;
336 for (
auto&& fused_tile_ord : *fused_array.
pmap()) {
337 if (!fused_array.
is_zero(fused_tile_ord)) {
340 const auto div0 = std::ldiv(fused_tile_ord, ntiles_per_array);
342 const auto tile_idx_mode0 = div0.quot;
344 const auto tile_ord_array = div0.rem;
346 const auto div1 = std::ldiv(tile_idx_mode0, nproc);
347 const auto tile_idx_on_owner = div1.quot;
348 const auto vector_idx_offset_on_owner = tile_idx_on_owner * block_size;
349 const auto owner_rank = div1.rem;
351 auto fused_tile_range =
352 fused_array.
trange().make_tile_range(fused_tile_ord);
354 std::vector<madness::Future<Tile>> input_tiles;
355 input_tiles.reserve(fused_tile_range.extent(0));
356 for (
size_t v = 0, vidx = tile_idx_mode0 * block_size;
357 v != block_size && vidx < fused_dim_extent; ++v, ++vidx) {
358 using Index = decltype(tile_ord_array);
359 input_tiles.emplace_back(
360 arrays.task(owner_rank,
363 vector_idx_offset_on_owner + v, tile_ord_array));
367 global_world.taskq.add(
make_tile, std::move(fused_tile_range),
368 std::move(input_tiles)));
373 global_world.gop.fence();
392 template <
typename Tile,
typename Policy>
395 std::size_t tile_idx,
399 auto arrays_size = split_arrays.size();
402 auto tile_range = fused_array.
trange().dim(0).tile(tile_idx);
403 auto tile_size = tile_range.second - tile_range.first;
405 auto& shape = fused_array.
shape();
408 for (
size_t i = tile_range.first; i < tile_range.second; ++i) {
410 split_trange, shape, tile_idx, split_ntiles, tile_size);
414 split_arrays.push_back(split_array);
419 const size_t i_offset_in_tile) {
420 const auto split_tile_volume = range.
volume();
422 fused_tile.
data() + i_offset_in_tile * split_tile_volume);
426 auto split_array_ptr = split_arrays.data();
427 for (std::size_t index : *(*split_array_ptr).pmap()) {
428 std::size_t fused_array_index = tile_idx * split_ntiles + index;
429 if (!fused_array.
is_zero(fused_array_index)) {
430 for (std::size_t i = tile_range.first, tile_count = 0;
431 i < tile_range.second; ++i, ++tile_count) {
432 auto& array = *(split_array_ptr + arrays_size + tile_count);
433 array.set(index, local_world.taskq.add(
434 make_tile, array.trange().make_tile_range(index),
435 fused_array.
find(fused_array_index), tile_count));
444 #endif // TILEDARRAY_CONVERSIONS_VECTOR_OF_ARRAYS_H_