26 #ifndef TILEDARRAY_GRID_H__INCLUDED
27 #define TILEDARRAY_GRID_H__INCLUDED
86 static size_type optimal_proc_row(
const double nprocs,
const double Mm,
90 double P_row_estimate = std::sqrt(nprocs);
97 const double PMm = nprocs * Mm;
98 const double two_P = nprocs + nprocs;
100 const unsigned int max_it = 21u;
101 unsigned int it = 0u;
105 const double P_row2 = P_row_estimate * P_row_estimate;
106 const double NnP_row2 = Nn * P_row2;
109 const double f = NnP_row2 * (2.0 * P_row2 - P_row_estimate) +
110 PMm * (P_row_estimate - two_P);
111 const double df = NnP_row2 * (8.0 * P_row_estimate - 3.0) + PMm;
114 const double P_row_n1 = P_row_estimate - (f / df);
117 r =
std::abs(P_row_n1 - P_row_estimate);
120 P_row_estimate = P_row_n1;
122 }
while ((r > 0.1) && ((++it) < max_it));
124 return P_row_estimate + 0.5;
143 if (unused == 0u)
return;
146 const size_type delta = std::max<size_type>(1ul, std::log2(nprocs));
151 std::max<int_fast32_t>(min_x, int_fast32_t(x) - delta);
154 for (; test_x >= min_test_x; --test_x) {
155 const size_type test_y = nprocs / test_x;
156 const size_type test_unused = nprocs - test_x * test_y;
159 if ((test_unused < unused) ||
160 ((test_unused == unused) && (test_diff < diff))) {
163 unused = test_unused;
174 const std::size_t row_size,
const std::size_t col_size) {
192 }
else if (size_ <= nprocs) {
199 if (
rank < proc_size_) {
201 rank_row_ =
rank / proc_cols_;
202 rank_col_ =
rank % proc_cols_;
214 std::max<size_type>(((nprocs + cols_ - 1ul) / cols_), 1ul);
215 const size_type max_proc_rows = std::min<size_type>(nprocs, rows_);
219 proc_rows_ = std::max<size_type>(
221 std::min<size_type>(optimal_proc_row(nprocs, row_size, col_size),
223 proc_cols_ = nprocs / proc_rows_;
225 if ((proc_rows_ > min_proc_rows) && (proc_rows_ < max_proc_rows)) {
228 minimize_unused_procs(proc_rows_, proc_cols_, nprocs, min_proc_rows,
232 proc_size_ = proc_rows_ * proc_cols_;
234 if (
rank < proc_size_) {
236 rank_row_ =
rank / proc_cols_;
237 rank_col_ =
rank % proc_cols_;
240 local_rows_ = (rows_ / proc_rows_) +
241 (
size_type(rank_row_) < (rows_ % proc_rows_) ? 1u : 0u);
242 local_cols_ = (cols_ / proc_cols_) +
243 (
size_type(rank_col_) < (cols_ % proc_cols_) ? 1u : 0u);
244 local_size_ = local_rows_ * local_cols_;
278 const std::size_t row_size,
const std::size_t col_size)
282 size_(rows_ * cols_),
297 init(world_->rank(), world_->size(), row_size, col_size);
300 #ifdef TILEDARRAY_ENABLE_TEST_PROC_GRID
319 const std::size_t row_size,
const std::size_t col_size)
323 size_(rows_ * cols_),
339 init(test_rank, test_nprocs, row_size, col_size);
341 #endif // TILEDARRAY_ENABLE_TEST_PROC_GRID
350 : world_(other.world_),
354 proc_rows_(other.proc_rows_),
355 proc_cols_(other.proc_cols_),
356 proc_size_(other.proc_size_),
357 rank_row_(other.rank_row_),
358 rank_col_(other.rank_col_),
359 local_rows_(other.local_rows_),
360 local_cols_(other.local_cols_),
361 local_size_(other.local_size_) {}
367 world_ = other.world_;
371 proc_rows_ = other.proc_rows_;
372 proc_cols_ = other.proc_cols_;
373 proc_size_ = other.proc_size_;
374 rank_row_ = other.rank_row_;
375 rank_col_ = other.rank_col_;
376 local_rows_ = other.local_rows_;
377 local_cols_ = other.local_cols_;
378 local_size_ = other.local_size_;
446 madness::Group group;
448 if (local_size_ != 0u) {
450 std::vector<ProcessID> proc_list;
451 proc_list.reserve(proc_cols_);
455 const size_type row_end = p + proc_cols_;
456 for (; p < row_end; ++p) proc_list.push_back(p);
459 group = madness::Group(*world_, proc_list, did);
472 madness::Group group;
474 if (local_size_ != 0u) {
476 std::vector<ProcessID> proc_list;
477 proc_list.reserve(proc_rows_);
480 for (
size_type p = rank_col_; p < proc_size_; p += proc_cols_)
481 proc_list.push_back(p);
484 if (proc_list.size() != 0)
485 group = madness::Group(*world_, proc_list, did);
498 return rank_col_ + row * proc_cols_;
508 return rank_row_ * proc_cols_ + col;
518 return std::make_shared<CyclicPmap>(*world_, rows_, cols_, proc_rows_,
531 return std::make_shared<CyclicPmap>(*world_,
rows, cols_, proc_rows_,
544 return std::make_shared<CyclicPmap>(*world_, rows_,
cols, proc_rows_,
552 #endif // TILEDARRAY_GRID_H__INCLUDED