MPQC  3.0.0-alpha
core.hpp
1 #ifndef MPQC_ARRAY_CORE_HPP
2 #define MPQC_ARRAY_CORE_HPP
3 
4 #include "mpqc/array/forward.hpp"
5 
6 #include "mpqc_config.h"
7 #include "mpqc/mpi.hpp"
8 #ifdef HAVE_MPI
9 #include "mpqc/array/parallel.hpp"
10 #ifdef HAVE_ARMCI
11 extern "C" {
12 #include <armci.h>
13 }
14 #endif
15 #endif // HAVE_MPI
16 
17 #include "mpqc/range.hpp"
18 #include <boost/foreach.hpp>
19 #include "mpqc/utility/mutex.hpp"
20 #include "mpqc/utility/exception.hpp"
21 
22 #include <boost/noncopyable.hpp>
23 
24 namespace mpqc {
25 namespace detail {
26 
29  };
30 
31  template<typename T>
33  : ArrayBase, boost::noncopyable
34  {
35 
36  template<typename Extent>
37  array_impl(const std::string &name,
38  const std::vector<Extent> &extents)
39  : ArrayBase(name, extents)
40  {
41  data_.resize(this->size());
42  }
43 
44  void sync() {}
45 
46  protected:
47 
48  void _put(const std::vector<range> &r, const void *buffer) {
49  apply(putv, r, this->dims_, &this->data_[0], (const T*)buffer);
50  }
51 
52  void _get(const std::vector<range> &r, void *buffer) const {
53  apply(getv, r, this->dims_, &this->data_[0], (T*)buffer);
54  }
55 
56  private:
57 
58  std::vector<T> data_;
59 
60  static size_t putv(size_t size, T *data, const T *buffer) {
61  std::copy(buffer, buffer+size, data);
62  return size;
63  }
64 
65  static size_t getv(size_t size, const T *data, T *buffer) {
66  std::copy(data, data+size, buffer);
67  return size;
68  }
69 
70  template<class F, typename data_ptr, typename buffer_ptr>
71  static size_t apply(F f,
72  const std::vector<range> &r,
73  const std::vector<size_t> &dims,
74  data_ptr data, buffer_ptr buffer) {
75  size_t begin = 0;
76  std::vector<size_t> counts;
77  std::vector<size_t> strides;
78  counts.push_back(1);
79  strides.push_back(1);
80  for (size_t i = 0, stride = 1; i < dims.size(); ++i) {
81  counts.back() *= r[i].size();
82  begin += *r[i].begin()*stride;
83  stride *= dims[i];
84  // std::cout << "counts = " << counts.back() << std::endl;
85  // std::cout << "strides = " << strides.back() << std::endl;
86  if (dims[i] != r[i].size()) {
87  counts.push_back(1);
88  strides.push_back(stride);
89  }
90  }
91  return apply(f, counts, strides, data+begin, buffer, strides.size());
92  }
93 
94  template<class F, typename data_ptr, typename buffer_ptr>
95  static size_t apply(F f,
96  const std::vector<size_t> &counts,
97  const std::vector<size_t> &strides,
98  data_ptr data, buffer_ptr buffer,
99  int level) {
100  if (level == 0)
101  return 0;
102  if (level == 1) {
103  //std::cout << "count1 = " << counts[0] << std::endl;
104  return f(counts[0], data, buffer);
105  }
106  size_t size = 0;
107  --level;
108  for (size_t i = 0; i < counts[level]; ++i) {
109  //std::cout << size << " " << strides[level] << std::endl;
110  size_t n = apply(f, counts, strides, data, buffer+size, level);
111  data += strides[level];
112  size += n;
113  }
114  return size;
115  }
116 
117 
118  };
119 
120 #ifdef HAVE_ARMCI
121 
122  template<typename T>
124  : ArrayBase, boost::noncopyable
125  {
126 
127  array_parallel_impl(const std::string &name,
128  const std::vector<size_t> &dims,
129  const MPI::Comm &comm)
130  : ArrayBase(name, dims)
131  {
132  initialize(ArrayBase::dims_, comm);
133  }
134 
135  private:
136 
137  void initialize(const std::vector<size_t> &dims, const MPI::Comm &comm) {
138 
139  // dont have code to use ARMCI groups yet
140  MPQC_CHECK(comm == MPI::Comm::World());
141 
142  size_t size = 1;
143  std::vector<range> extents;
144  for (int i = 0; i < int(dims.size())-1; ++i) {
145  size *= dims[i];
146  extents.push_back(range(0, dims[i]));
147  }
148  size_t block = (dims.back() + comm.size() - 1)/comm.size();
149  size *= block;
150 
151  check(ARMCI_Init(), "ARMCI_Init", comm);
152 
153  std::vector<void*> data(comm.size(), NULL);
154  check(ARMCI_Malloc(&data[0], size*sizeof(T)), "ARMCI_Malloc", comm);
155  data_ = data[comm.rank()];
156 
157  for (size_t i = 0; i < comm.size(); ++i) {
158  Tile tile;
159  tile.data = data[i];
160  tile.proc = i;
161  tile.local = (i == comm.rank());
162  size_t begin = std::min<size_t>(dims.back(), i*block);
163  size_t end = std::min<size_t>(dims.back(), begin+block);
164  tile.extents = extents;
165  tile.extents.push_back(range(begin, end));
166  tiles_.push_back(tile);
167  //printf("tile[%i].ptr=%p\n", i, tile.ptr);
168  }
169 
170  }
171 
172  ~array_parallel_impl() {
173  ARMCI_Free(this->data_);
174  }
175 
176  void sync() {
177  ARMCI_Barrier();
178  }
179 
180  const MPI::Comm& comm() const {
181  return this->comm_;
182  }
183 
184  protected:
185 
186  void _put(const std::vector<range> &r, const void *buffer) {
187  apply<PUT>(this->tiles_, r, (void*)buffer);
188  }
189 
190  void _get(const std::vector<range> &r, void *buffer) const {
191  apply<GET>(this->tiles_, r, buffer);
192  }
193 
194  private:
195 
196  enum OP { PUT, GET };
197 
198  typedef array_tile<void*> Tile;
199  void *data_;
200  std::vector<Tile> tiles_;
201 
202  static void check(int err, const std::string &func,
203  MPI::Comm comm = MPI::Comm::Self()) {
204  if (comm.any((err != 0))) {
205  throw std::runtime_error(func + " failed");
206  }
207  }
208 
209  template<OP op>
210  static void apply(const std::vector<Tile> &tiles,
211  const std::vector<range> &r,
212  void *buffer) {
213 
214  T *local = (T*)buffer;
215 
216  BOOST_FOREACH (Tile t, tiles) {
217 
218  std::vector<range> x = t.subset(r);
219  //std::cout << t.extents << " ^ " << r << " = " << x << std::endl;
220  if (x.empty()) continue;
221 
222  size_t total = 1;
223  size_t N = x.size();
224  int remote_strides[N];
225  int local_strides[N];
226  int count[N];
227 
228  for (size_t i = 0; i < N; ++i) {
229  total *= x[i].size();
230  count[i] = x[i].size();
231  local_strides[i] =
232  x[i].size()*((i > 0) ? local_strides[i-1] : 1);
233  remote_strides[i] =
234  t.extents[i].size()*((i > 0) ? remote_strides[i - 1] : 1);
235  local_strides[i] *= sizeof(T);
236  remote_strides[i] *= sizeof(T);
237  }
238  count[0] *= sizeof(T);
239 
240  T *remote = (T*)t.data;
241  for (size_t i = 0, stride = 1; i < N; ++i) {
242  remote += (x[i].begin() - t.extents[i].begin())*stride;
243  stride *= t.extents[i].size();
244  }
245 
246  // for (size_t i = 0; i < N; ++i) {
247  // printf("%i count=%i, remote_strides=%i, local_strides=%i, total=%lu\n",
248  // i, count[i], remote_strides[i], local_strides[i], total);
249  // }
250  // printf("tile.ptr=%p\n", t.ptr);
251 
252  mutex::global::lock();
253  if (op == GET) {
254  int err;
255  err = ARMCI_GetS(remote, remote_strides,
256  local, local_strides,
257  count, N-1, t.proc);
258  check(err, "ARMCI_GetS failed");
259  }
260  if (op == PUT) {
261  int err;
262  err = ARMCI_PutS(local, local_strides,
263  remote, remote_strides,
264  count, N-1, t.proc);
265  check(err, "ARMCI_PutS failed");
266  }
267  mutex::global::unlock();
268 
269  local += total;
270 
271  }
272  }
273 
274  };
275 #endif // HAVE_ARMCI
276 
277 } // namespace detail
278 } // namespace mpqc
279 
280 
281 namespace mpqc {
282  static const detail::array_core_driver ARRAY_CORE;
283 }
284 
285 
286 #endif /* MPQC_ARRAY_CORE_HPP */
mpqc::detail::array_impl
Definition: forward.hpp:18
mpqc
Contains new MPQC code since version 3.
Definition: integralenginepool.hpp:37
mpqc::range
Definition: range.hpp:23
mpqc::detail::array_core_driver
Definition: core.hpp:27
mpqc::detail::array_parallel_impl
Definition: forward.hpp:21
mpqc::MPI::Comm
MPI_Comm object wrapper/stub.
Definition: comm.hpp:14
mpqc::detail::ArrayBase
Definition: forward.hpp:23

Generated at Sun Jan 26 2020 23:24:01 for MPQC 3.0.0-alpha using the documentation package Doxygen 1.8.16.