cpu_cuda_vector.h
Go to the documentation of this file.
1 
2 #ifndef TILEDARRAY_CUDA_CPU_CUDA_VECTOR_H__INCLUDED
3 #define TILEDARRAY_CUDA_CPU_CUDA_VECTOR_H__INCLUDED
4 
5 #include <btas/array_adaptor.h>
6 
9 
10 namespace TiledArray {
11 
13 
17 template <typename T, typename HostAlloc = std::allocator<T>,
18  typename DeviceAlloc = thrust::device_allocator<T>>
20  public:
21  typedef T value_type;
22  typedef T& reference;
23  typedef const T& const_reference;
24  typedef typename thrust::host_vector<T, HostAlloc>::size_type size_type;
25  typedef typename thrust::host_vector<T, HostAlloc>::difference_type
27  typedef typename thrust::host_vector<T, HostAlloc>::iterator iterator;
28  typedef
29  typename thrust::host_vector<T, HostAlloc>::const_iterator const_iterator;
30 
31  enum class state { none = 0x00, host = 0x01, device = 0x10, all = 0x11 };
32 
34  cpu_cuda_vector() : state_(state::host) {}
38  : host_vec_(static_cast<int>(st) & static_cast<int>(state::host) ? size
39  : 0),
40  state_(st) {
41  if (static_cast<int>(st) & static_cast<int>(state::device))
42  thrust::resize(device_vec_, size);
43  }
48  : host_vec_(
49  static_cast<int>(st) & static_cast<int>(state::host) ? size : 0,
50  value),
51  device_vec_(
52  static_cast<int>(st) & static_cast<int>(state::device) ? size : 0,
53  value),
54  state_(st) {}
57  template <typename RandomAccessIterator>
58  cpu_cuda_vector(RandomAccessIterator begin, RandomAccessIterator end)
59  : host_vec_(begin, end), state_(state::host) {}
60 
61  size_type size() const {
62  if (on_host()) return host_vec_.size();
63  if (on_device()) return device_vec_.size();
64  }
65 
66  void resize(size_type new_size) {
67  if (on_host()) host_vec_.resize(new_size);
68  if (on_device()) {
69  // device_vec_.resize(new_size);
70  assert(false);
71  }
72  }
73 
75  void to_device() const {
76  assert(on_host());
77  device_vec_ = host_vec_;
78  state_ = state::all;
79  }
81  void to_host() const {
82  assert(on_device());
83  host_vec_ = device_vec_;
84  state_ = state::all;
85  }
86 
87  const T* host_data() const {
88  assert(on_host());
89  return host_vec_.data();
90  }
91  T* host_data() {
92  assert(on_host());
93  state_ = state::host;
94  return host_vec_.data();
95  }
96  const T* device_data() const {
97  assert(on_device());
98  return thrust::raw_pointer_cast(device_vec_.data());
99  }
100  T* device_data() {
101  assert(on_device());
102  state_ = state::device;
103  return thrust::raw_pointer_cast(device_vec_.data());
104  }
105 
106  const T* data() const { return host_data(); }
107  T* data() { return host_data(); }
108 
110  assert(on_host());
111  return std::begin(host_vec_);
112  }
114  assert(on_host());
115  return std::cbegin(host_vec_);
116  }
118  assert(on_host());
119  return std::cbegin(host_vec_);
120  }
122  assert(on_host());
123  return std::end(host_vec_);
124  }
125  const_iterator end() const {
126  assert(on_host());
127  return std::cend(host_vec_);
128  }
130  assert(on_host());
131  return std::cend(host_vec_);
132  }
133 
134  const_reference operator[](std::size_t i) const {
135  assert(on_host());
136  return host_vec_[i];
137  }
138 
139  reference operator[](std::size_t i) {
140  assert(on_host());
141  return host_vec_[i];
142  }
143 
144  bool on_host() const {
145  return static_cast<int>(state_) & static_cast<int>(state::host);
146  }
147  bool on_device() const {
148  return static_cast<int>(state_) & static_cast<int>(state::device);
149  }
150 
151  private:
152  mutable thrust::host_vector<T, HostAlloc> host_vec_;
153  mutable thrust::device_vector<T, DeviceAlloc> device_vec_;
154  mutable state state_;
155 };
156 
157 extern template class cpu_cuda_vector<double>;
158 extern template class cpu_cuda_vector<float>;
159 
160 template <MemorySpace Space, typename T, typename HostAlloc,
161  typename DeviceAlloc>
163  const cpu_cuda_vector<T, HostAlloc, DeviceAlloc>& vec) noexcept {
164  return (vec.on_host() && overlap(MemorySpace::CPU, Space)) ||
165  (vec.on_device() && overlap(MemorySpace::CUDA, Space));
166 }
167 
168 template <ExecutionSpace Space, typename T, typename HostAlloc,
169  typename DeviceAlloc>
171  cudaStream_t stream = 0) {
172  switch (Space) {
173  case ExecutionSpace::CPU: {
174  vec.to_host();
175  break;
176  }
177  case ExecutionSpace::CUDA: {
178  vec.to_device();
179  break;
180  }
181  default:
182  throw std::runtime_error("invalid execution space");
183  }
184 }
185 
186 template <typename T>
187 void make_device_storage(cpu_cuda_vector<T>& storage, std::size_t n,
188  cudaStream_t stream = 0) {
190 }
191 
192 template <typename T>
194  return storage.device_data();
195 }
196 
197 template <typename T>
198 const T* device_data(const cpu_cuda_vector<T>& storage) {
199  return storage.device_data();
200 }
201 
202 } // namespace TiledArray
203 
204 namespace madness {
205 namespace archive {
206 
207 // forward decls
208 template <class Archive, typename T>
210 template <class Archive, typename T>
212 
213 template <class Archive, typename T>
214 struct ArchiveLoadImpl<Archive, TiledArray::cpu_cuda_vector<T>> {
215  static inline void load(const Archive& ar,
218  ar& n;
219  x.resize(n);
220  for (auto& xi : x) ar& xi;
221  }
222 };
223 
224 template <class Archive, typename T>
225 struct ArchiveStoreImpl<Archive, TiledArray::cpu_cuda_vector<T>> {
226  static inline void store(const Archive& ar,
228  ar& x.size();
229  for (const auto& xi : x) ar& xi;
230  }
231 };
232 
233 } // namespace archive
234 } // namespace madness
235 
236 #endif // TILEDARRAY_CUDA_CPU_CUDA_VECTOR_H__INCLUDED
void to_device() const
moves the data from the host to the device (even if it's there)
ExecutionSpace
enumerates the execution spaces
Definition: platform.h:58
const_iterator begin() const
cpu_cuda_vector()
creates an empty vector
const_iterator cbegin() const
constexpr bool overlap(MemorySpace space1, MemorySpace space2)
Definition: platform.h:53
a vector that lives on either host or device side, or both
thrust::host_vector< T, HostAlloc >::iterator iterator
cpu_cuda_vector(size_type size, T value, state st=state::host)
thrust::host_vector< T, HostAlloc >::const_iterator const_iterator
const_reference operator[](std::size_t i) const
void make_device_storage(cpu_cuda_vector< T > &storage, std::size_t n, cudaStream_t stream=0)
const T * host_data() const
reference operator[](std::size_t i)
static void store(const Archive &ar, const TiledArray::cpu_cuda_vector< T > &x)
const_iterator cend() const
cpu_cuda_vector(size_type size, state st=state::host)
static void load(const Archive &ar, TiledArray::cpu_cuda_vector< T > &x)
T * device_data(cpu_cuda_vector< T > &storage)
const T * device_data() const
cpu_cuda_vector(RandomAccessIterator begin, RandomAccessIterator end)
void resize(size_type new_size)
void to_host() const
moves the data from the device to the host (even if it's there)
void to_execution_space(cpu_cuda_vector< T, HostAlloc, DeviceAlloc > &vec, cudaStream_t stream=0)
thrust::host_vector< T, HostAlloc >::size_type size_type
thrust::host_vector< T, HostAlloc >::difference_type difference_type
MemorySpace
enumerates the memory spaces
Definition: platform.h:30
const_iterator end() const
bool in_memory_space(const cpu_cuda_vector< T, HostAlloc, DeviceAlloc > &vec) noexcept