um_storage.h
Go to the documentation of this file.
1 /*
2  * This file is a part of TiledArray.
3  * Copyright (C) 2018 Virginia Tech
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Eduard Valeyev
19  * Department of Chemistry, Virginia Tech
20  * Feb 6, 2018
21  *
22  */
23 
24 #ifndef TILEDARRAY_CUDA_UM_VECTOR_H__INCLUDED
25 #define TILEDARRAY_CUDA_UM_VECTOR_H__INCLUDED
26 
27 #include <TiledArray/cuda/thrust.h>
29 
30 #ifdef TILEDARRAY_HAS_CUDA
31 
32 #include <btas/array_adaptor.h>
33 #include <btas/varray/varray.h>
34 
36 #include <TiledArray/utility.h>
37 
38 namespace TiledArray {
39 
40 template <typename T>
41 using cuda_um_thrust_vector =
42  thrust::device_vector<T, TiledArray::cuda_um_allocator<T>>;
43 
45 template <MemorySpace Space, typename Storage>
46 bool in_memory_space(const Storage& vec) noexcept {
47  return overlap(MemorySpace::CUDA_UM, Space);
48 }
49 
54 template <ExecutionSpace Space, typename Storage>
55 void to_execution_space(Storage& vec, cudaStream_t stream = 0) {
56  switch (Space) {
57  case ExecutionSpace::CPU: {
58  using std::data;
59  using std::size;
60  using value_type = typename Storage::value_type;
61  if (cudaEnv::instance()->concurrent_managed_access()) {
62  CudaSafeCall(cudaMemPrefetchAsync(data(vec),
63  size(vec) * sizeof(value_type),
64  cudaCpuDeviceId, stream));
65  }
66  break;
67  }
68  case ExecutionSpace::CUDA: {
69  using std::data;
70  using std::size;
71  using value_type = typename Storage::value_type;
72  int device = -1;
73  if (cudaEnv::instance()->concurrent_managed_access()) {
74  CudaSafeCall(cudaGetDevice(&device));
75  CudaSafeCall(cudaMemPrefetchAsync(
76  data(vec), size(vec) * sizeof(value_type), device, stream));
77  }
78  break;
79  }
80  default:
81  throw std::runtime_error("invalid execution space");
82  }
83 }
84 
92 template <typename Storage>
93 void make_device_storage(Storage& storage, std::size_t n,
94  const cudaStream_t& stream = 0) {
95  storage = Storage(n);
96  TiledArray::to_execution_space<TiledArray::ExecutionSpace::CUDA>(storage,
97  stream);
98 }
99 
106 template <typename Storage>
107 typename Storage::value_type* device_data(Storage& storage) {
108  return storage.data();
109 }
110 
117 template <typename Storage>
118 const typename Storage::value_type* device_data(const Storage& storage) {
119  return storage.data();
120 }
121 
122 } // namespace TiledArray
123 
124 namespace madness {
125 namespace archive {
126 
127 // forward decls
128 template <class Archive, typename T>
129 struct ArchiveLoadImpl;
130 template <class Archive, typename T>
131 struct ArchiveStoreImpl;
132 
133 template <class Archive, typename T>
134 struct ArchiveLoadImpl<Archive, TiledArray::cuda_um_thrust_vector<T>> {
135  static inline void load(const Archive& ar,
136  TiledArray::cuda_um_thrust_vector<T>& x) {
137  typename thrust::device_vector<
138  T, TiledArray::cuda_um_allocator<T>>::size_type n(0);
139  ar& n;
140  x.resize(n);
141  for (auto& xi : x) ar& xi;
142  }
143 };
144 
145 template <class Archive, typename T>
146 struct ArchiveStoreImpl<Archive, TiledArray::cuda_um_thrust_vector<T>> {
147  static inline void store(const Archive& ar,
148  const TiledArray::cuda_um_thrust_vector<T>& x) {
149  ar& x.size();
150  for (const auto& xi : x) ar& xi;
151  }
152 };
153 
154 template <class Archive, typename T>
155 struct ArchiveLoadImpl<Archive, TiledArray::cuda_um_btas_varray<T>> {
156  static inline void load(const Archive& ar,
157  TiledArray::cuda_um_btas_varray<T>& x) {
158  typename TiledArray::cuda_um_btas_varray<T>::size_type n(0);
159  ar& n;
160  x.resize(n);
161  for (auto& xi : x) ar& xi;
162  }
163 };
164 
165 template <class Archive, typename T>
166 struct ArchiveStoreImpl<Archive, TiledArray::cuda_um_btas_varray<T>> {
167  static inline void store(const Archive& ar,
168  const TiledArray::cuda_um_btas_varray<T>& x) {
169  ar& x.size();
170  for (const auto& xi : x) ar& xi;
171  }
172 };
173 
174 } // namespace archive
175 } // namespace madness
176 
177 #endif // TILEDARRAY_HAS_CUDA
178 
179 #endif // TILEDARRAY_CUDA_UM_VECTOR_H__INCLUDED
constexpr bool overlap(MemorySpace space1, MemorySpace space2)
Definition: platform.h:53
void make_device_storage(cpu_cuda_vector< T > &storage, std::size_t n, cudaStream_t stream=0)
void load(TiledArray::DistArray< Tile, Policy > &x, const std::string name)
Definition: dist_array.h:1696
T * device_data(cpu_cuda_vector< T > &storage)
void to_execution_space(cpu_cuda_vector< T, HostAlloc, DeviceAlloc > &vec, cudaStream_t stream=0)
bool in_memory_space(const cpu_cuda_vector< T, HostAlloc, DeviceAlloc > &vec) noexcept