26 #ifndef TILEDARRAY_GRID_H__INCLUDED 27 #define TILEDARRAY_GRID_H__INCLUDED 87 static size_type optimal_proc_row(
const double nprocs,
const double Mm,
92 double P_row_estimate = std::sqrt(nprocs);
99 const double PMm = nprocs * Mm;
100 const double two_P = nprocs + nprocs;
102 const unsigned int max_it = 21u;
103 unsigned int it = 0u;
107 const double P_row2 = P_row_estimate * P_row_estimate;
108 const double NnP_row2 = Nn * P_row2;
111 const double f = NnP_row2 * ( 2.0 * P_row2 - P_row_estimate)
112 + PMm * ( P_row_estimate - two_P);
113 const double df = NnP_row2 * ( 8.0 * P_row_estimate - 3.0) + PMm;
116 const double P_row_n1 = P_row_estimate - (f / df);
119 r =
std::abs(P_row_n1 - P_row_estimate);
122 P_row_estimate = P_row_n1;
124 }
while((r > 0.1) && ((++it) < max_it));
126 return P_row_estimate + 0.5;
150 const size_type delta = std::max<size_type>(1ul, std::log2(nprocs));
154 const size_type min_test_x = std::max<int_fast32_t>(min_x, int_fast32_t(x) - delta);
157 for(; test_x >= min_test_x; --test_x) {
158 const size_type test_y = nprocs / test_x;
159 const size_type test_unused = nprocs - test_x * test_y;
162 if((test_unused < unused) || ((test_unused == unused) && (test_diff < diff))) {
165 unused = test_unused;
176 const std::size_t row_size,
const std::size_t col_size)
195 }
else if(size_ <= nprocs) {
202 if(rank < proc_size_) {
204 rank_row_ = rank / proc_cols_;
205 rank_col_ = rank % proc_cols_;
217 std::max<size_type>(((nprocs + cols_ - 1ul) / cols_), 1ul);
218 const size_type max_proc_rows = std::min<size_type>(nprocs, rows_);
222 proc_rows_ = std::max<size_type>(min_proc_rows,
223 std::min<size_type>(optimal_proc_row(nprocs, row_size, col_size),
225 proc_cols_ = nprocs / proc_rows_;
227 if((proc_rows_ > min_proc_rows) && (proc_rows_ < max_proc_rows)) {
230 minimize_unused_procs(proc_rows_, proc_cols_, nprocs,
231 min_proc_rows, max_proc_rows);
234 proc_size_ = proc_rows_ * proc_cols_;
236 if(rank < proc_size_) {
238 rank_row_ = rank / proc_cols_;
239 rank_col_ = rank % proc_cols_;
242 local_rows_ = (rows_ / proc_rows_) + (
size_type(rank_row_) < (rows_ % proc_rows_) ? 1u : 0u);
243 local_cols_ = (cols_ / proc_cols_) + (
size_type(rank_col_) < (cols_ % proc_cols_) ? 1u : 0u);
244 local_size_ = local_rows_ * local_cols_;
254 world_(NULL), rows_(0u), cols_(0u), size_(0u), proc_rows_(0u),
255 proc_cols_(0u), proc_size_(0u), rank_row_(0), rank_col_(0),
256 local_rows_(0u), local_cols_(0u), local_size_(0u)
270 const std::size_t row_size,
const std::size_t col_size) :
271 world_(&world), rows_(
rows), cols_(
cols), size_(rows_ * cols_),
272 proc_rows_(0ul), proc_cols_(0ul), proc_size_(0ul),
273 rank_row_(-1), rank_col_(-1),
274 local_rows_(0ul), local_cols_(0ul), local_size_(0ul)
282 init(world_->rank(), world_->size(), row_size, col_size);
285 #ifdef TILEDARRAY_ENABLE_TEST_PROC_GRID 305 const std::size_t row_size,
const std::size_t col_size) :
306 world_(&world), rows_(
rows), cols_(
cols), size_(rows_ * cols_),
307 proc_rows_(0u), proc_cols_(0u), proc_size_(0u), rank_row_(-1),
308 rank_col_(-1), local_rows_(0u), local_cols_(0u), local_size_(0u)
317 init(test_rank, test_nprocs, row_size, col_size);
319 #endif // TILEDARRAY_ENABLE_TEST_PROC_GRID 328 world_(other.world_), rows_(other.rows_), cols_(other.cols_),
329 size_(other.size_), proc_rows_(other.proc_rows_),
330 proc_cols_(other.proc_cols_), proc_size_(other.proc_size_),
331 rank_row_(other.rank_row_), rank_col_(other.rank_col_),
332 local_rows_(other.local_rows_), local_cols_(other.local_cols_),
333 local_size_(other.local_size_)
340 world_ = other.world_;
344 proc_rows_ = other.proc_rows_;
345 proc_cols_ = other.proc_cols_;
346 proc_size_ = other.proc_size_;
347 rank_row_ = other.rank_row_;
348 rank_col_ = other.rank_col_;
349 local_rows_ = other.local_rows_;
350 local_cols_ = other.local_cols_;
351 local_size_ = other.local_size_;
420 madness::Group group;
422 if(local_size_ != 0u) {
424 std::vector<ProcessID> proc_list;
425 proc_list.reserve(proc_cols_);
429 const size_type row_end = p + proc_cols_;
430 for(; p < row_end; ++p)
431 proc_list.push_back(p);
434 group = madness::Group(*world_, proc_list, did);
447 madness::Group group;
449 if(local_size_ != 0u) {
451 std::vector<ProcessID> proc_list;
452 proc_list.reserve(proc_rows_);
455 for(
size_type p = rank_col_; p < proc_size_; p += proc_cols_)
456 proc_list.push_back(p);
459 if(proc_list.size() != 0)
460 group = madness::Group(*world_, proc_list, did);
472 return rank_col_ + row * proc_cols_;
481 return rank_row_ * proc_cols_ + col;
491 return std::make_shared<CyclicPmap>(*world_, rows_, cols_, proc_rows_, proc_cols_);
503 return std::make_shared<CyclicPmap>(*world_,
rows, cols_, proc_rows_, proc_cols_);
515 return std::make_shared<CyclicPmap>(*world_, rows_,
cols, proc_rows_, proc_cols_);
522 #endif // TILEDARRAY_GRID_H__INCLUDED ProcessID rank_row() const
Rank row accessor.
ProcessID rank_col() const
Rank row accessor.
madness::Group make_row_group(const madness::DistributedID &did) const
Construct a row group.
ProcessID map_col(const size_type col) const
Map a column to the process in this process's row.
ProcessID map_row(const size_type row) const
Map a row to the process in this process's column.
size_type cols() const
Element column count accessor.
auto abs(const ComplexConjugate< T > &a)
std::shared_ptr< Pmap > make_pmap() const
Construct a cyclic process.
ProcGrid(const ProcGrid &other)
Copy constructor.
ProcGrid(World &world, const size_type rows, const size_type cols, const std::size_t row_size, const std::size_t col_size)
Construct a process grid.
ProcGrid & operator=(const ProcGrid &other)
Copy assignment operator.
madness::Group make_col_group(const madness::DistributedID &did) const
Construct a column group.
ProcGrid()
Default constructor.
size_type rows() const
Element row count accessor.
size_type proc_cols() const
Process column count accessor.
size_type local_rows() const
Local element row count accessor.
size_type size() const
Element count accessor.
size_type proc_rows() const
Process row count accessor.
size_type local_cols() const
Local element column count accessor.
size_type local_size() const
Local element count accessor.
std::shared_ptr< Pmap > make_row_phase_pmap(const size_type cols) const
Construct row phased a cyclic process.
std::shared_ptr< Pmap > make_col_phase_pmap(const size_type rows) const
Construct column phased a cyclic process.
size_type proc_size() const
Process grid size accessor.
KroneckerDeltaTile< _N >::numeric_type min(const KroneckerDeltaTile< _N > &arg)