28 #ifndef _ccr12_mtensor_h
29 #define _ccr12_mtensor_h
32 #include <chemistry/qc/ccr12/tensor.h>
33 #include <chemistry/qc/ccr12/ccr12_info.h>
34 #include <math/scmat/matrix.h>
37 void print_tile(
double* data,
int n01,
int n23) {
50 template <
typename I>
inline std::pair<I,I>
intersect(
const std::pair<I,I>& range1,
const std::pair<I,I>& range2) {
51 I start_max = std::max(range1.first, range2.first);
52 I fence_min = std::min(range1.second, range2.second);
53 if (start_max < fence_min)
return make_pair(start_max, fence_min);
54 return make_pair(I(0),I(0));
58 template <
typename I>
inline bool in(
const std::pair<I,I>& r,
const std::pair<I,I>&
range) {
59 return r.first >=
range.first && r.second <=
range.second;
62 template <
typename I>
inline bool in(I i,
const std::pair<I,I>&
range) {
69 template <
size_t NDIM>
73 typedef long tile_index;
74 typedef long element_index;
75 typedef std::pair<tile_index, tile_index> tile_range;
76 typedef std::vector<tile_range> tile_ranges;
77 typedef std::pair<element_index, element_index> element_range;
78 typedef std::vector<element_range> element_ranges;
79 typedef std::vector<element_index> element_index_map;
80 static const size_t ndim = NDIM;
87 const tile_ranges&
range) :
88 tensor_(tensor), info_(info), range_(
range)
90 MPQC_ASSERT(
range.size() == NDIM);
105 const element_index_map& eimap0,
106 const element_index_map& eimap1,
107 const element_index_map& eimap2,
108 const element_index_map& eimap3,
109 element_ranges
const* mapped_element_ranges = 0,
110 bool src_is_2301 =
false);
121 template <
typename SrcType>
void convert(
const SrcType& src,
124 const element_index_map& eimap0,
125 const element_index_map& eimap1,
126 const element_index_map& eimap2,
127 const element_index_map& eimap3,
128 element_ranges
const* mapped_element_ranges = 0) {
130 MPQC_ASSERT(NDIM == 4);
131 MPQC_ASSERT(mapped_element_ranges != 0);
134 const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]);
135 const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]);
136 const std::vector<bool> tile2_in_erange = tiles_in_erange(range_[2], eimap2, (*mapped_element_ranges)[2]);
137 const std::vector<bool> tile3_in_erange = tiles_in_erange(range_[3], eimap3, (*mapped_element_ranges)[3]);
140 const int nproc_with_ints = info_->mem()->n();
141 const int me = info_->mem()->me();
143 const size_t ntiles0 = range_[0].second - range_[0].first;
144 const size_t ntiles1 = range_[1].second - range_[1].first;
145 const size_t ntiles2 = range_[2].second - range_[2].first;
146 const size_t ntiles3 = range_[3].second - range_[3].first;
151 const bool espace0_eq_espace1 = ((*mapped_element_ranges)[0] == (*mapped_element_ranges)[1]);
152 const bool espace2_eq_espace3 = ((*mapped_element_ranges)[2] == (*mapped_element_ranges)[3]);
153 const bool can_always_antisymmetrize = (espace0_eq_espace1 || espace2_eq_espace3);
154 MPQC_ASSERT(can_always_antisymmetrize ==
true);
163 const size_t maxsize1 = info_->maxtilesize();
164 const size_t maxtilesize = maxsize1 * maxsize1 * maxsize1 * maxsize1;
165 double* data = info_->mem()->malloc_local_double(maxtilesize);
167 for (
long t0 = range_[0].first; t0 < range_[0].second; ++t0) {
168 const long size0 = info_->get_range(t0);
169 const long offset0 = info_->get_offset(t0);
170 const long spin0 = info_->get_spin(t0);
171 const bool in_erange0 = tile0_in_erange[t0];
173 for (
long t1 = std::max(range_[1].first, t0); t1 < range_[1].second; ++t1) {
174 const long size1 = info_->get_range(t1);
175 const long offset1 = info_->get_offset(t1);
176 const long spin1 = info_->get_spin(t1);
177 const bool in_erange1 = tile1_in_erange[t1];
179 const bool aaaa = (spin0 == spin1);
181 for (
long t2 = range_[2].first; t2 < range_[2].second; ++t2) {
182 const long size2 = info_->get_range(t2);
183 const long offset2 = info_->get_offset(t2);
184 const long spin2 = info_->get_spin(t2);
185 const bool in_erange2 = tile2_in_erange[t2];
187 const bool abab = ((spin0 != spin1) && (spin0 == spin2));
188 const bool abba = ((spin0 != spin1) && (spin0 != spin2));
190 for (
long t3 = std::max(range_[3].first, t2); t3 < range_[3].second; ++t3) {
191 const long size3 = info_->get_range(t3);
192 const long offset3 = info_->get_offset(t3);
193 const long spin3 = info_->get_spin(t3);
194 const bool in_erange3 = tile3_in_erange[t3];
197 const long tile_key = (t3-range_[3].first) +
198 ntiles3 * ( (t2-range_[2].first) +
199 ntiles2 * ( (t1-range_[1].first) +
200 ntiles1 * (t0-range_[0].first)
207 if ((!in_erange0 || !in_erange1 || !in_erange2 || !in_erange3))
212 bool antisymmetrize01 = espace0_eq_espace1;
213 bool antisymmetrize23 = espace2_eq_espace3;
215 if (antisymmetrize23) antisymmetrize01 =
false;
217 long size = size0 * size1 * size2 * size3;
218 std::fill(data, data+size, 0.0);
221 for (
int i0 = 0; i0 < size0; ++i0) {
222 const int ii0 = eimap0[offset0 + i0];
223 for (
int i1 = 0; i1 < size1; ++i1) {
224 const int ii1 = eimap1[offset1 + i1];
230 if (antisymmetrize01) {
231 const long iii0 = std::max(ii0, ii1);
232 const long iii1 = std::min(ii0, ii1);
233 i01 = iii0 * (iii0+1)/2 + iii1;
235 i01 = ii0 * n1 + ii1;
237 const long i01_proc = i01 % nproc_with_ints;
241 if (antisymmetrize23) {
243 for (
int i2 = 0; i2 < size2; ++i2) {
244 const int ii2 = eimap2[offset2 + i2];
245 for (
int i3 = 0; i3 < size3; ++i3, ++i0123) {
246 const int ii3 = eimap3[offset3 + i3];
248 const int i23 = ii2 * n3 + ii3;
249 const int i32 = ii3 * n3 + ii2;
252 const double integral_0123 = src(i01,i23);
253 data[i0123] = integral_0123;
255 const double integral_0132 = src(i01,i32);
256 data[i0123] = -integral_0132;
258 const double integral_0123 = src(i01,i23);
259 const double integral_0132 = src(i01,i32);
260 data[i0123] = integral_0123 - integral_0132;
268 else if (antisymmetrize01) {
270 const int i10 = i1 * n1 + i0;
272 for (
int i2 = 0; i2 < size2; ++i2) {
273 const int ii2 = eimap2[offset2 + i2];
274 for (
int i3 = 0; i3 < size3; ++i3, ++i0123) {
275 const int ii3 = eimap3[offset3 + i3];
277 const int i23 = ii2 * n3 + ii3;
280 const double integral_0123 = src(i01,i23);
281 data[i0123] = integral_0123;
283 const double integral_1023 = src(i10,i23);
284 data[i0123] = -integral_1023;
286 const double integral_0123 = src(i01,i23);
287 const double integral_1023 = src(i10,i23);
288 data[i0123] = integral_0123 - integral_1023;
306 const double sum = std::accumulate(data, data+size, 0.0);
307 ExEnv::out0() <<
"tiles = (" << t0 <<
"," << t1 <<
"," << t2 <<
"," << t3
308 <<
") key = " << tile_key <<
" sum = " << sum << std::endl;
310 for(
int i=0; i<size; ++i) {
311 ExEnv::out0() <<
"data[" << i <<
"] = " << data[i] << std::endl;
321 info_->mem()->free_local_double(data);
336 template <
typename SrcType>
void convert(
const SrcType& src0,
const SrcType& src1,
341 const element_index_map& eimap0,
342 const element_index_map& eimap1,
343 const element_index_map& eimap2,
344 const element_index_map& eimap3,
345 element_ranges
const* mapped_element_ranges = 0,
346 bool src1_is_1023 =
false) {
348 MPQC_ASSERT(NDIM == 4);
349 MPQC_ASSERT(mapped_element_ranges != 0);
352 const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]);
353 const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]);
354 const std::vector<bool> tile2_in_erange = tiles_in_erange(range_[2], eimap2, (*mapped_element_ranges)[2]);
355 const std::vector<bool> tile3_in_erange = tiles_in_erange(range_[3], eimap3, (*mapped_element_ranges)[3]);
358 const int nproc_with_ints = info_->mem()->n();
359 const int me = info_->mem()->me();
361 const size_t ntiles0 = range_[0].second - range_[0].first;
362 const size_t ntiles1 = range_[1].second - range_[1].first;
363 const size_t ntiles2 = range_[2].second - range_[2].first;
364 const size_t ntiles3 = range_[3].second - range_[3].first;
369 const bool espace0_eq_espace1 = ((*mapped_element_ranges)[0] == (*mapped_element_ranges)[1]);
370 const bool espace2_eq_espace3 = ((*mapped_element_ranges)[2] == (*mapped_element_ranges)[3]);
371 const bool can_always_antisymmetrize = (espace0_eq_espace1 || espace2_eq_espace3);
372 MPQC_ASSERT(can_always_antisymmetrize ==
true);
381 const size_t maxsize1 = info_->maxtilesize();
382 const size_t maxtilesize = maxsize1 * maxsize1 * maxsize1 * maxsize1;
383 double* data = info_->mem()->malloc_local_double(maxtilesize);
385 for (
long t0 = range_[0].first; t0 < range_[0].second; ++t0) {
386 const long size0 = info_->get_range(t0);
387 const long offset0 = info_->get_offset(t0);
388 const long spin0 = info_->get_spin(t0);
389 const bool in_erange0 = tile0_in_erange[t0];
391 for (
long t1 = std::max(range_[1].first, t0); t1 < range_[1].second; ++t1) {
392 const long size1 = info_->get_range(t1);
393 const long offset1 = info_->get_offset(t1);
394 const long spin1 = info_->get_spin(t1);
395 const bool in_erange1 = tile1_in_erange[t1];
397 const bool aaaa = (spin0 == spin1);
399 for (
long t2 = range_[2].first; t2 < range_[2].second; ++t2) {
400 const long size2 = info_->get_range(t2);
401 const long offset2 = info_->get_offset(t2);
402 const long spin2 = info_->get_spin(t2);
403 const bool in_erange2 = tile2_in_erange[t2];
405 const bool abab = ((spin0 != spin1) && (spin0 == spin2));
406 const bool abba = ((spin0 != spin1) && (spin0 != spin2));
408 for (
long t3 = std::max(range_[3].first, t2); t3 < range_[3].second; ++t3) {
409 const long size3 = info_->get_range(t3);
410 const long offset3 = info_->get_offset(t3);
411 const long spin3 = info_->get_spin(t3);
412 const bool in_erange3 = tile3_in_erange[t3];
415 const long tile_key = (t3-range_[3].first) +
416 ntiles3 * ( (t2-range_[2].first) +
417 ntiles2 * ( (t1-range_[1].first) +
418 ntiles1 * (t0-range_[0].first)
425 if ((!in_erange0 || !in_erange1 || !in_erange2 || !in_erange3))
430 bool antisymmetrize01 = espace0_eq_espace1;
431 bool antisymmetrize23 = espace2_eq_espace3;
433 if (antisymmetrize23) antisymmetrize01 =
false;
435 long size = size0 * size1 * size2 * size3;
436 std::fill(data, data+size, 0.0);
439 for (
int i0 = 0; i0 < size0; ++i0) {
440 const int ii0 = eimap0[offset0 + i0];
441 for (
int i1 = 0; i1 < size1; ++i1) {
442 const int ii1 = eimap1[offset1 + i1];
448 if (antisymmetrize01) {
449 const long iii0 = std::max(ii0, ii1);
450 const long iii1 = std::min(ii0, ii1);
451 i01 = iii0 * (iii0+1)/2 + iii1;
453 i01 = ii0 * n1 + ii1;
455 const long i01_proc = i01 % nproc_with_ints;
459 if (antisymmetrize23) {
461 for (
int i2 = 0; i2 < size2; ++i2) {
462 const int ii2 = eimap2[offset2 + i2];
463 for (
int i3 = 0; i3 < size3; ++i3, ++i0123) {
464 const int ii3 = eimap3[offset3 + i3];
466 const int i23 = ii2 * n3 + ii3;
467 const int i32 = ii3 * n2 + ii2;
470 const double integral_0123 = src0(i01,i23);
471 data[i0123] = integral_0123;
473 const double integral_0132 = src1(i01,i32);
474 data[i0123] = -integral_0132;
476 const double integral_0123 = src0(i01,i23);
477 const double integral_0132 = src1(i01,i32);
478 data[i0123] = integral_0123 - integral_0132;
486 else if (antisymmetrize01) {
488 const int i10 = i1 * n0 + i0;
490 for (
int i2 = 0; i2 < size2; ++i2) {
491 const int ii2 = eimap2[offset2 + i2];
492 for (
int i3 = 0; i3 < size3; ++i3, ++i0123) {
493 const int ii3 = eimap3[offset3 + i3];
495 const int i23 = ii2 * n3 + ii3;
498 const double integral_0123 = src0(i01,i23);
499 data[i0123] = integral_0123;
501 const double integral_1023 = src1(i10,i23);
502 data[i0123] = -integral_1023;
504 const double integral_0123 = src0(i01,i23);
505 const double integral_1023 = src1(i10,i23);
506 data[i0123] = integral_0123 - integral_1023;
524 const double sum = std::accumulate(data, data+size, 0.0);
525 ExEnv::out0() <<
"tiles = (" << t0 <<
"," << t1 <<
"," << t2 <<
"," << t3
526 <<
") key = " << tile_key <<
" sum = " << sum << std::endl;
528 for(
int i=0; i<size; ++i) {
529 ExEnv::out0() <<
"data[" << i <<
"] = " << data[i] << std::endl;
539 info_->mem()->free_local_double(data);
551 template <
typename SrcType>
void convert(
const SrcType& src,
552 const element_index_map& eimap0,
553 const element_index_map& eimap1,
554 element_ranges
const* mapped_element_ranges = 0) {
556 MPQC_ASSERT(NDIM == 2);
559 const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]);
560 const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]);
563 const int nproc_with_ints = info_->mem()->n();
564 const int me = info_->mem()->me();
566 const size_t ntiles0 = range_[0].second - range_[0].first;
567 const size_t ntiles1 = range_[1].second - range_[1].first;
570 #if 0 // assume that the user knows what she's doing
571 element_range src_range[2];
572 src_range[0] = this->map_range(range_[0], eimap0);
573 src_range[1] = this->map_range(range_[1], eimap1);
574 MPQC_ASSERT(src_range[0].second <= src->ni());
575 MPQC_ASSERT(src_range[1].second <= src->nj());
584 const size_t maxsize1 = info_->maxtilesize();
585 const size_t maxtilesize = maxsize1 * maxsize1;
586 double* data = info_->mem()->malloc_local_double(maxtilesize);
588 for (
long t0 = range_[0].first; t0 < range_[0].second; ++t0) {
589 const long size0 = info_->get_range(t0);
590 const long offset0 = info_->get_offset(t0);
591 const long spin0 = info_->get_spin(t0);
592 const bool in_erange0 = tile0_in_erange[t0];
594 for (
long t1 = range_[1].first; t1 < range_[1].second; ++t1) {
595 const long size1 = info_->get_range(t1);
596 const long offset1 = info_->get_offset(t1);
597 const long spin1 = info_->get_spin(t1);
598 const bool in_erange1 = tile1_in_erange[t1];
601 const long tile_key = (t1-range_[1].first) + ntiles1 * (t0-range_[0].first);
606 if ((!in_erange0 || !in_erange1))
609 long size = size0 * size1;
610 std::fill(data, data+size, 0.0);
613 for (
int i0 = 0; i0 < size0; ++i0) {
614 const int ii0 = eimap0[offset0 + i0];
615 for (
int i1 = 0; i1 < size1; ++i1, ++i01) {
616 const int ii1 = eimap1[offset1 + i1];
618 data[i01] = src(ii0, ii1);
625 const double sum = std::accumulate(data, data+size, 0.0);
627 <<
") key = " << tile_key <<
" sum = " << sum << endl;
634 info_->mem()->free_local_double(data);
640 void print(
const std::string& label, std::ostream& os =
ExEnv::out0())
const {
641 tensor_->
print(label, os);
650 element_range map_range(
const element_range& rng,
651 const element_index_map& eimap) {
652 element_range result(eimap[rng.first],
654 for(element_index i=rng.first; i != rng.second; ++i) {
655 const element_index ii = eimap[i];
656 result.first = std::min(result.first, ii);
657 result.second = std::max(result.second, ii);
665 tiles_in_erange(
const tile_range& range,
666 const element_index_map& eimap,
667 const element_range& erange) {
669 std::vector<bool> result(range.second,
false);
670 for(
long t=range.first; t!=range.second; ++t) {
671 const long start = info_->get_offset(t);
672 const long size = info_->get_range(t);
673 const long fence = start + size;
674 const element_range tile(start, fence);
676 const element_range mapped_tile = map_range(tile, eimap);
677 result[t] =
in(mapped_tile,erange);
686 #include <chemistry/qc/ccr12/mtensor.timpl.h>
688 #endif // end of header guard