MPQC  3.0.0-alpha
file.hpp
1 #ifndef MPQC_FILE_HPP
2 #define MPQC_FILE_HPP
3 
4 #include <set>
5 #include <string>
6 #include <cassert>
7 #include <fstream>
8 
9 #include <stdlib.h>
10 #include <hdf5.h>
11 
12 #include <memory>
13 
14 #include <boost/array.hpp>
15 #include <boost/utility/enable_if.hpp>
16 #include <boost/noncopyable.hpp>
17 #include <boost/type_traits.hpp>
18 #include <boost/assert.hpp>
19 
20 #include <boost/typeof/typeof.hpp>
21 
22 #include "mpqc/range.hpp"
23 #include "mpqc/range/operator.hpp"
24 
25 #include "mpqc/utility/check.hpp"
26 #include <boost/foreach.hpp>
27 #include "mpqc/utility/timer.hpp"
28 #include "mpqc/utility/mutex.hpp"
29 
30 #include <util/misc/exenv.h>
31 
32 namespace mpqc {
33 namespace detail {
34 
38 namespace File {
39 
41 #define MPQC_FILE_VERIFY(expr) MPQC_CHECK((expr) >= 0)
42 
43 #ifndef H5_HAVE_THREADSAFE
44 #warning "HDF5 NOT THREADSAFE, HDF5 will use global mutex"
45 #endif
46 
48  struct threadsafe : boost::noncopyable {
49  //#ifndef H5_HAVE_THREADSAFE
50  threadsafe() {
51  // use global lock since HDF5 may call MPI routines
52  lock();
53  }
54  ~threadsafe() {
55  unlock();
56  }
57  static void lock() { mutex::global::lock(); }
58  static void unlock() { mutex::global::unlock(); }
59  //#endif
60  };
61 
62 #define MPQC_FILE_THREADSAFE mpqc::detail::File::threadsafe _threadsafe
63 
67  template<typename T>
68  inline hid_t h5t() {
69  typedef typename boost::remove_const<T>::type U;
70  if (boost::is_same<U,int>::value) return H5T_NATIVE_INT;
71  if (boost::is_same<U,double>::value) return H5T_NATIVE_DOUBLE;
72  throw std::runtime_error("no mapping to HDF5 type");
73  return hid_t();
74  }
75 
76  struct Properties : boost::noncopyable {
77  explicit Properties(hid_t props) {
78  props_ = H5Pcreate(props);
79  }
80  ~Properties() {
81  H5Pclose(props_);
82  }
83  hid_t id() const {
84  return props_;
85  }
86  private:
87  hid_t props_;
88  };
89 
94  struct Object {
95 
97  Object() : id_(0) {}
98 
100  Object(const Object &o) {
101  *this = o;
102  }
103 
110  Object(const Object &parent, hid_t id, void (*close)(hid_t), bool increment) {
111  MPQC_ASSERT(id);
112  parent_.reset(new Object(parent));
113  update(id, close, increment);
114  }
115 
116  ~Object() {
117  if (!id_) return;
118  MPQC_ASSERT(close_);
119  MPQC_FILE_THREADSAFE;
120  close_(id_);
121  }
122 
123  void operator=(const Object &o) {
124  Object *parent = o.parent_.get();
125  parent_.reset(parent ? new Object(*parent) : NULL);
126  update(o.id_, o.close_, true);
127  }
128 
129  hid_t id() const {
130  return id_;
131  }
132 
133  const Object& parent() const {
134  return *parent_;
135  }
136 
137  hid_t file() const {
138  MPQC_FILE_THREADSAFE;
139  return H5Iget_file_id(id_);
140  }
141 
142  static std::string filename(hid_t id) {
143  MPQC_FILE_THREADSAFE;
144  std::vector<char> str(H5Fget_name(id, NULL, 0) + 1);
145  MPQC_FILE_VERIFY(H5Fget_name(id, &str[0], str.size()));
146  return std::string(&str[0]);
147  }
148 
149  operator bool() const {
150  return (this->id_);
151  }
152 
153  protected:
154 
155  std::auto_ptr<Object> parent_;
156  hid_t id_;
157  void (*close_)(hid_t);
158 
159  template<class F>
160  void update(hid_t id, F close, bool increment) {
161  if (id && increment) {
162  MPQC_FILE_THREADSAFE;
163  MPQC_FILE_VERIFY(H5Iinc_ref(id));
164  }
165  id_ = id;
166  close_ = close;
167  }
168  };
169 
170  struct Attribute: Object {
171  };
172 
173  template<class Container>
175  static Container data;
176  };
177 
178  template<class Container>
180 
181 } // namespace File
182 } // namespace detail
183 } // namespace mpqc
184 
185 namespace mpqc {
186 
197 
201 
202  struct Group;
203 
204  template<typename T>
205  struct Dataset;
206 
207  template<typename T>
208  struct Dataspace;
209 
211  struct Driver : boost::noncopyable {
212  hid_t fapl() const {
213  return fapl_;
214  }
215  protected:
216  Driver() {
217  this->fapl_ = H5Pcreate(H5P_FILE_ACCESS);
218  MPQC_FILE_VERIFY(this->fapl_);
219  }
220  hid_t fapl_;
221  };
222 
224  struct POSIXDriver : Driver {
225  POSIXDriver() : Driver() {
226  }
227  };
228 
229 #ifdef H5_HAVE_DIRECT
230  struct DirectDriver : Driver {
232  DirectDriver() : Driver()
233  {
234  MPQC_FILE_VERIFY(H5Pset_fapl_direct(Driver::fapl_, 1024, 4096, 8*4096));
235  }
236  };
237 #endif
238 
241  File() {}
242 
252  explicit File(const std::string &name, const Driver &driver = POSIXDriver()) {
253  initialize(name, driver);
254  }
255 
261  Group group(const std::string &name = "/");
262 
263  private:
264 
265  // opened files
267 
269  void initialize(const std::string &name, const Driver &driver) {
270  Object o = File::open(name, driver);
271  if (!o) {
272  o = File::create(name, driver);
273  }
274  Object::operator=(o);
275  }
276 
280  static void close(hid_t id) {
281  hid_t count, result;
282  MPQC_FILE_VERIFY((count = H5Iget_ref(id)));
283  MPQC_FILE_VERIFY(result = H5Fclose(id));
284  if (!(count-1)) {
285  files_::data.erase(id);
286  }
287  }
288 
289  explicit File(hid_t id, bool increment) :
290  Object(Object(), id, &File::close, increment)
291  {
292  MPQC_FILE_VERIFY(id);
293  }
294 
299  static std::string realpath(const std::string &name) {
300  char *str = ::realpath(name.c_str(), NULL);
301  //std::cout << "realpath: " << str << std::endl;
302  std::string path(str ? str : name);
303  free(str);
304  return path;
305  }
306 
314  static File open(const std::string &name, const Driver &driver) {
315  if (name.empty()) return File();
316  std::string path = realpath(name);
317  // find previously opened file of same realpath
318  BOOST_FOREACH (auto id, files_::data) {
319  if (path == realpath(Object::filename(id)))
320  return File(id, true);
321  }
322  hid_t fapl = driver.fapl();
323  hid_t id = H5Fcreate(name.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, fapl);
324  MPQC_FILE_VERIFY(id);
325  files_::data.insert(id);
326  return File(id, false);
327  //return File(H5Fopen(path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT), false);
328  }
329 
336  static File create(const std::string &name, const Driver &driver) {
337  hid_t fapl = driver.fapl();
338  hid_t id = H5Fcreate(name.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, fapl);
339  MPQC_FILE_VERIFY(id);
340  files_::data.insert(id);
341  return File(id, false);
342  }
343 
344  };
345 
350  template<typename T_>
351  struct File::Dataspace {
352 
353  typedef typename boost::remove_const<T_>::type T;
354 
356  size_t size() const {
357  return size_;
358  }
359 
361  const std::vector<range>& extents() const {
362  return range_;
363  }
364 
367  std::vector<range> r = range_;
368  r[ndims_ - 1] = range(i, i + 1);
369  return Dataspace(parent_, base_, r, ndims_ - 1);
370  }
371 
373  Dataspace<T> operator()(const std::vector<range> &r) {
374  return Dataspace<T>(parent_, base_, extend(r), ndims_);
375  }
376 
377 #ifdef DOXYGEN
378 
385  template<class R, ...>
386  Dataspace<T> operator()(const R &r, ...);
387 #else
388  template<class S>
390  return this->operator()(std::vector<range>(t));
391  }
392  MPQC_RANGE_OPERATORS(4, Dataspace<T>, this->operator())
393 #endif
394 
400  void write(const T *buffer) {
401  timer t;
402  apply(&H5Dwrite, this->parent_.id(), rebase(range_), (T*) buffer);
403  // printf("File::write size=%lu bytes, %f mb/s\n",
404  // (sizeof(T)*size_), (sizeof(T)*size_)/(t*1e6));
405  }
406 
412  void read(T *buffer) const {
413  timer t;
414  apply(&H5Dread, this->parent_.id(), rebase(range_), buffer);
415  // printf("File::read size=%lu bytes, rate=%f mb/s\n",
416  // (sizeof(T)*size_), (sizeof(T)*size_)/(t*1e6));
417  }
418 
419  private:
420 
421  Object parent_;
422  size_t ndims_, size_;
423  std::vector<size_t> base_;
424  std::vector<mpqc::range> range_;
425  friend class Dataset<T> ;
426 
427  Dataspace(const Object &parent, const std::vector<size_t> &base,
428  const std::vector<mpqc::range> &r, size_t ndims) :
429  parent_(parent),
430  base_(base),
431  range_(r),
432  ndims_(ndims)
433  {
434  MPQC_ASSERT(ndims <= range_.size());
435  size_ = (r.size() ? 1 : 0);
436  for (int i = 0; i < range_.size(); ++i) {
437  size_ *= range_[i].size();
438  //std::cout << "range_[i] = " << range_[i] << std::endl;
439  }
440  }
441 
443  std::vector<range> extend(const std::vector<range> &r) const {
444  //std::cout << r.size() << " " << ndims_ << std::endl;
445  MPQC_ASSERT(r.size() == ndims_);
446  std::vector<range> x = r;
447  for (size_t i = ndims_; i < range_.size(); ++i) {
448  x.push_back(range_[i]);
449  }
450  return x;
451  }
452 
454  std::vector<range> rebase(const std::vector<range> &r) const {
455  MPQC_ASSERT(r.size() == base_.size());
456  std::vector<range> v;
457  for (int i = 0; i < base_.size(); ++i) {
458  auto begin = *r[i].begin() - base_[i];
459  v.push_back(range(begin, begin + r[i].size()));
460  }
461  return v;
462  }
463 
465  template<class F>
466  static void apply(F f, hid_t dset, const std::vector<range> &r, T *buffer) {
467  MPQC_FILE_THREADSAFE;
468  hid_t fspace = H5Dget_space(dset);
469  size_t size = select(fspace, r);
470  hsize_t mdims[] = { size };
471  hid_t mspace = H5Screate_simple(1, mdims, NULL);
472  MPQC_FILE_VERIFY(mspace);
473  MPQC_FILE_VERIFY(H5Sselect_all(mspace));
474  hid_t type = detail::File::h5t<T>();
475  timer t;
476  MPQC_FILE_VERIFY(f(dset, type, mspace, fspace, H5P_DEFAULT, buffer));
477  MPQC_FILE_VERIFY(H5Sclose(mspace));
478  MPQC_FILE_VERIFY(H5Sclose(fspace));
479  }
480 
482  static size_t select(hid_t space, const std::vector<range> &r) {
483  size_t N = H5Sget_simple_extent_ndims(space);
484  MPQC_FILE_VERIFY(N);
485  //printf("id = %i, N = %lu\n", space, N);
486  hsize_t fstart[N];
487  hsize_t fstride[N]; // Stride of hyperslab
488  hsize_t fcount[N]; // Block count
489  hsize_t fblock[N]; // Block sizes
490  size_t size = 1;
491  //printf("select [ ");
492  for (size_t i = 0, j = N - 1; i < N; ++i, --j) {
493  fstart[i] = *r[j].begin();
494  fcount[i] = r[j].size();
495  // std::cout << "r[j] = " << r[j]
496  // << " fstart=" << fstart[i]
497  // << ", fcount[i]=" << fcount[i] << std::endl;
498  fstride[i] = 1;
499  fblock[i] = 1;
500  size *= fcount[i];
501  }
502  //printf(" ], size = %i\n", size);
503  MPQC_FILE_VERIFY
504  (H5Sselect_hyperslab
505  (space, H5S_SELECT_SET, fstart, fstride, fcount, fblock));
506  return size;
507  }
508 
509  };
510 
514  template<typename T>
515  struct File::Dataset : File::Object {
516 
517  Dataset() {}
518 
526  template<typename Extents>
527  Dataset(const Object &parent, const std::string &name, const Extents &extents,
528  const File::Properties &dcpl = File::Properties(H5P_DATASET_CREATE))
529  : Object(Dataset::create(parent, name, extents, dcpl))
530  {
531  MPQC_ASSERT(id() > 0);
532  BOOST_FOREACH (auto e, extents) {
533  range r = extent(e);
534  //std::cout << "Dataset " << r << std::endl;
535  base_.push_back(*r.begin());
536  dims_.push_back(r.size());
537  }
538  }
539 
541  size_t rank() const {
542  return dims_.size();
543  }
544 
545  std::vector<range> extents() const {
546  std::vector<range> r;
547  for (size_t i = 0; i < this->rank(); ++i) {
548  r.push_back(range(base_[i], base_[i]+dims_[i]));
549  }
550  return r;
551  }
552 
558  void write(const T *buffer) {
559  this->operator()(this->extents()).write(buffer);
560  }
561 
567  void read(T *buffer) const {
568  this->operator()(this->extents()).read(buffer);
569  }
570 
572  Dataspace<T> operator[](size_t index) {
573  std::vector<range> r;
574  for (int i = 0; i < this->rank(); ++i) {
575  r.push_back(range(base_[i], base_[i] + dims_[i]));
576  }
577  return Dataspace<T>(*this, base_, r, r.size())[index];
578  }
579 
581  Dataspace<T> operator()(const std::vector<range> &r) {
582  //std::cout << this->extents_.size() << " " << r.size() << std::endl;
583  MPQC_ASSERT(this->rank() == r.size());
584  return Dataspace<T>(*this, base_, r, r.size());
585  }
586 
588  Dataspace<const T> operator()(const std::vector<range> &r) const {
589  MPQC_ASSERT(this->rank() == r.size());
590  return Dataspace<const T>(*this, base_, r, r.size());
591  }
592 
593 #ifdef DOXYGEN
594 
601  template<class R, ...>
602  Dataspace<T> operator()(const R &r, ...);
603 #else
604  template<class S>
605  Dataspace<T> operator()(const range::tie<S> &t) {
606  return this->operator()(std::vector<range>(t));
607  }
608  template<class S>
609  Dataspace<T> operator()(const range::tie<S> &t) const {
610  return this->operator()(std::vector<range>(t));
611  }
612  MPQC_RANGE_OPERATORS(4, Dataspace<T>, this->operator())
613  MPQC_RANGE_CONST_OPERATORS(4, Dataspace<const T>, this->operator())
614 #endif
615 
616  template<typename Extents>
617  static Object create(const Object &parent,
618  const std::string &name,
619  const Extents &extents,
620  const Properties &dcpl) {
621 
622  hid_t id;
623  // Object constructor may also obtain mutex
624  // make sure lock is released before constructing Object
625  {
626  MPQC_FILE_THREADSAFE;
627  std::vector<hsize_t> dims;
628  BOOST_FOREACH (auto e, extents) {
629  dims.push_back(extent(e).size());
630  //std::cout << dims.back() << std::endl;
631  }
632  std::reverse(dims.begin(), dims.end());
633 
634  hid_t fspace = H5Screate_simple(dims.size(), &dims[0], NULL);
635  hid_t type = detail::File::h5t<T>();
636  //printf("parent.id() = %i, name=%s\n", parent.id(), name.c_str());
637 #if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 8
638  id = H5Dcreate(parent.id(), name.c_str(), type, fspace, dcpl.id());
639 #else
640  id = H5Dcreate1(parent.id(), name.c_str(), type, fspace, dcpl.id());
641 #endif
642  MPQC_FILE_VERIFY(id);
643  }
644  return Object(parent, id, &Dataset::close, false);
645  }
646 
647  // static Dataset open(Object parent, const std::string &name) {
648  // return Dataset(H5Dopen1(parent.id(), name.c_str()));
649  // }
650 
651  protected:
652 
654  static range extent(const range &r) {
655  return r;
656  }
657 
659  template <typename E>
660  static range extent(const E &e) {
661  return range(0,e);
662  }
663 
664  private:
665 
666  std::vector<size_t> base_; // base index
667  std::vector<size_t> dims_; // dimensions
668 
672  static void close(hid_t id) {
673  // printf("H5Dclose(%i), ref=%i\n", id, H5Iget_ref(id));
674  MPQC_FILE_VERIFY(H5Dclose(id));
675  }
676 
677  };
678 
683 
690  static Group create(Object parent, const std::string &name) {
691 #if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 8
692  hid_t id = H5Gopen(parent.id(), name.c_str());
693 #else
694  hid_t id = H5Gopen1(parent.id(), name.c_str());
695 #endif
696  return Group(Object(parent, id, &Group::close, false));
697  }
698 
705  template<typename T, typename Dims>
706  Dataset<T> dataset(const std::string &name, const Dims &dims) {
707  return Dataset<T>(*this, name, dims);
708  }
709 
710  private:
711 
715  static void close(hid_t id) {
716  //printf("H5Gclose(%i), ref=%i\n", id, H5Iget_ref(id));
717  MPQC_FILE_VERIFY(H5Gclose(id));
718  }
719 
720  Group(Object o) : Object(o) {}
721  };
722 
723  inline File::Group File::group(const std::string &name) {
724  return Group::create(*this, name);
725  }
726 
734  template<typename T, class A>
735  void operator>>(File::Dataset<T> ds, A &a) {
736  ds.read(a.data());
737  }
738 
746  template<typename T, class A>
747  void operator<<(File::Dataset<T> ds, const A &a) {
748  ds.write(a.data());
749  }
750 
758  template<typename T, class A>
760  ds.read(a.data());
761  }
762 
770  template<typename T, class A>
771  void operator<<(File::Dataspace<T> ds, const A &a) {
772  ds.write(a.data());
773  }
774  // group File
776 
777 } // namespace mpqc
778 
779 #endif // MPQC_FILE_HPP
MPQC_RANGE_CONST_OPERATORS
#define MPQC_RANGE_CONST_OPERATORS(N, Type, Function)
Generates const version of MPQC_RANGE_OPERATORS.
Definition: operator.hpp:45
mpqc::File::File
File(const std::string &name, const Driver &driver=POSIXDriver())
Create or open File.
Definition: file.hpp:252
mpqc::detail::File::Object::Object
Object()
Default constructor with an invalid handle.
Definition: file.hpp:97
mpqc
Contains new MPQC code since version 3.
Definition: integralenginepool.hpp:37
mpqc::File::POSIXDriver
POSIX I/O file driver, the default.
Definition: file.hpp:224
MPQC_RANGE_OPERATORS
#define MPQC_RANGE_OPERATORS(N, Type, Function)
Generates operators()(const R1 r1, ...) of arity 1 to N.
Definition: operator.hpp:41
mpqc::File::Group::dataset
Dataset< T > dataset(const std::string &name, const Dims &dims)
Create or open Dataset.
Definition: file.hpp:706
mpqc::timer
Definition: timer.hpp:9
mpqc::range
Definition: range.hpp:23
mpqc::operator>>
void operator>>(Array< T > A, V &v)
Read from Array to a generic vector V.
Definition: array.hpp:204
mpqc::File::Group::create
static Group create(Object parent, const std::string &name)
Create a group.
Definition: file.hpp:690
mpqc::File::Dataspace::read
void read(T *buffer) const
Reads contiguous buffer from dataset.
Definition: file.hpp:412
mpqc::File::Dataset::operator()
Dataspace< T > operator()(const std::vector< range > &r)
Access dataspace of same rank.
Definition: file.hpp:581
mpqc::File::Driver
Base file driver.
Definition: file.hpp:211
mpqc::File::Dataset::read
void read(T *buffer) const
Reads contiguous buffer from dataset.
Definition: file.hpp:567
mpqc::detail::File::static_container
Definition: file.hpp:174
mpqc::File::Dataset::extent
static range extent(const E &e)
Return integral argument e as range(0,e)
Definition: file.hpp:660
mpqc::File::Dataset::Dataset
Dataset(const Object &parent, const std::string &name, const Extents &extents, const File::Properties &dcpl=File::Properties(H5P_DATASET_CREATE))
Create dataset belonging to some file object.
Definition: file.hpp:527
mpqc::File
Top-level file object that holds groups.
Definition: file.hpp:196
mpqc::detail::File::Attribute
Definition: file.hpp:170
mpqc::detail::File::Properties
Definition: file.hpp:76
mpqc::File::Dataspace
A subset of File::Dataset.
Definition: file.hpp:208
mpqc::File::Dataset
Array-like collection of data.
Definition: file.hpp:205
mpqc::File::Dataset::operator()
Dataspace< const T > operator()(const std::vector< range > &r) const
Access dataspace of same rank.
Definition: file.hpp:588
mpqc::File::Group
Directory-like container that holds datasets and other groups.
Definition: file.hpp:682
mpqc::File::Dataset::rank
size_t rank() const
Dataspace rank (number of dimensions)
Definition: file.hpp:541
mpqc::File::Dataspace::size
size_t size() const
Number of elements in the set.
Definition: file.hpp:356
mpqc::detail::File::Object
A reference-counted HDF5 handle object, superclass for eg File, Dataset, Attribute,...
Definition: file.hpp:94
mpqc::detail::File::Object::Object
Object(const Object &parent, hid_t id, void(*close)(hid_t), bool increment)
Definition: file.hpp:110
mpqc::File::Dataset::write
void write(const T *buffer)
Writes contiguous buffer into dataset.
Definition: file.hpp:558
mpqc::File::Dataset::extent
static range extent(const range &r)
Return range as is.
Definition: file.hpp:654
mpqc::range::tie
boost::tuple tie wrapper
Definition: tie.hpp:15
mpqc::File::File
File()
Constructs a null file object.
Definition: file.hpp:241
mpqc::detail::File::Object::Object
Object(const Object &o)
Copy constructor.
Definition: file.hpp:100
mpqc::operator<<
void operator<<(Array< T > A, const V &v)
Write to Array from a generic vector V.
Definition: array.hpp:191
mpqc::File::Dataspace::operator()
Dataspace< T > operator()(const std::vector< range > &r)
Access sub-dataspace of the same rank.
Definition: file.hpp:373
mpqc::File::group
Group group(const std::string &name="/")
Creates or opens a file group.
Definition: file.hpp:723
mpqc::File::Dataset::operator[]
Dataspace< T > operator[](size_t index)
Access dataspace of rank-1.
Definition: file.hpp:572
mpqc::detail::File::threadsafe
HDF5 may not be threadsafe, in that case mpqc::mutex::global is used.
Definition: file.hpp:48
mpqc::detail::File::h5t
hid_t h5t()
Translate C type into corresponding HDF5 type.
Definition: file.hpp:68
mpqc::File::Dataspace::operator[]
Dataspace< T > operator[](size_t i)
Access sub-dataspace of rank-1.
Definition: file.hpp:366
mpqc::File::Dataspace::extents
const std::vector< range > & extents() const
The extents of the set in terms of ranges.
Definition: file.hpp:361
mpqc::File::Dataspace::write
void write(const T *buffer)
Writes contiguous buffer into dataset.
Definition: file.hpp:400

Generated at Sun Jan 26 2020 23:24:01 for MPQC 3.0.0-alpha using the documentation package Doxygen 1.8.16.