30 #ifndef _chemistry_qc_scf_cadfclhf_h
31 #define _chemistry_qc_scf_cadfclhf_h
39 #include <type_traits>
40 #include <unordered_set>
41 #include <unordered_map>
44 #include <Eigen/Dense>
45 #include <Eigen/Sparse>
48 #include <chemistry/qc/scf/clhf.h>
49 #include <chemistry/qc/lcao/wfnworld.h>
50 #include <util/container/conc_cache_fwd.h>
51 #include <util/container/thread_wrap.h>
52 #include <util/misc/hash.h>
55 #include "ordered_shells.h"
56 #include "treemat_fwd.h"
59 template<
typename... Types>
60 using shared_ptr = std::shared_ptr<Types...>;
61 using std::make_shared;
63 typedef typename boost::integer_range<int> int_range;
64 typedef unsigned long long ull;
66 #define USE_INTEGRAL_CACHE 0
73 class ApproximatePairWriter;
76 namespace assignments {
84 do_threaded(
int nthread,
const std::function<
void(
int)>& f){
85 boost::thread_group compute_threads;
87 for(
int ithr = 0; ithr < nthread; ++ithr) {
89 compute_threads.create_thread([&, ithr](){
95 compute_threads.join_all();
102 typedef Eigen::Matrix<uli, Eigen::Dynamic, Eigen::Dynamic> matrix_t;
107 double interval_h_, interval_v_;
109 int nbins_h_, nbins_v_;
114 double min_h_, max_h_, min_v_, max_v_;
117 int nbins_h,
double min_h,
double max_h,
118 int nbins_v,
double min_v,
double max_v,
119 bool log_h=
false,
bool log_v=
false,
120 bool clip_edges=
false
123 const matrix_t&
matrix()
const {
return hist_mat_; }
125 void insert_value(
double value_h,
double value_v);
128 hist_mat_ +=
other.hist_mat_;
151 void ao_fock(
double accuracy);
152 void reset_density();
153 double new_density();
160 typedef Eigen::Map<Eigen::VectorXd, Eigen::Unaligned, Eigen::Stride<1, Eigen::Dynamic>> CoefView;
161 typedef shared_ptr<CoefView> CoefContainer;
162 typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> TwoCenterIntContainer;
163 typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> ThreeCenterIntContainer;
164 typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> FourCenterIntContainer;
165 typedef shared_ptr<TwoCenterIntContainer> TwoCenterIntContainerPtr;
166 typedef shared_ptr<ThreeCenterIntContainer> ThreeCenterIntContainerPtr;
167 typedef shared_ptr<FourCenterIntContainer> FourCenterIntContainerPtr;
168 typedef std::unordered_map<
170 std::pair<CoefContainer, CoefContainer>,
173 typedef Eigen::HouseholderQR<Eigen::MatrixXd> Decomposition;
174 typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> RowMatrix;
175 typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> ColMatrix;
176 typedef Eigen::Map<RowMatrix, Eigen::Unaligned, Eigen::OuterStride<>> StridedRowMap;
177 template<
typename T1,
typename T2>
178 using fast_map = std::unordered_map<T1, T2, sc::hash<T1>>;
208 typedef std::atomic_uint_fast64_t accumulate_t;
209 typedef decltype(accumulate_t().load()) count_t;
211 bool histogram_mode =
false;
220 : int_screening_values(
other.int_screening_values),
221 int_actual_values(
other.int_actual_values),
222 int_distance_factors(
other.int_distance_factors),
223 int_distances(
other.int_distances),
224 int_indices(
other.int_indices),
225 int_ams(
other.int_ams)
227 K_3c_needed.store(
other.K_3c_needed.load());
228 K_3c_needed_fxn.store(
other.K_3c_needed_fxn.load());
229 K_3c_dist_screened.store(
other.K_3c_dist_screened.load());
230 K_3c_dist_screened_fxn.store(
other.K_3c_dist_screened_fxn.load());
231 K_3c_underestimated.store(
other.K_3c_underestimated.load());
232 K_3c_underestimated_fxn.store(
other.K_3c_underestimated_fxn.load());
233 K_3c_perfect.store(
other.K_3c_perfect.load());
234 K_3c_perfect_fxn.store(
other.K_3c_perfect_fxn.load());
235 K_3c_contract_fxn.store(
other.K_3c_contract_fxn.load());
236 K_2c_contract_fxn.store(
other.K_2c_contract_fxn.load());
237 Kt_contract1_fxn.store(
other.Kt_contract1_fxn.load());
238 Kt_contract2_fxn.store(
other.Kt_contract2_fxn.load());
239 L3_build_compares.store(
other.L3_build_compares.load());
241 parent =
other.parent;
242 is_first =
other.is_first;
245 accumulate_t K_3c_needed = { 0 };
246 accumulate_t K_3c_needed_fxn = { 0 };
247 accumulate_t K_3c_dist_screened = { 0 };
248 accumulate_t K_3c_dist_screened_fxn = { 0 };
249 accumulate_t K_3c_underestimated = { 0 };
250 accumulate_t K_3c_underestimated_fxn = { 0 };
251 accumulate_t K_3c_perfect = { 0 };
252 accumulate_t K_3c_perfect_fxn = { 0 };
253 accumulate_t K_3c_contract_fxn = { 0 };
254 accumulate_t K_2c_contract_fxn = { 0 };
255 accumulate_t Kt_contract1_fxn = { 0 };
256 accumulate_t Kt_contract2_fxn = { 0 };
257 accumulate_t L3_build_compares = { 0 };
267 std::vector<cadf::Histogram2d> values_hists;
268 std::vector<cadf::Histogram2d> distance_hists;
269 std::vector<cadf::Histogram2d> distance_noschwarz_hists;
270 std::vector<cadf::Histogram2d> exponent_ratio_hists;
271 std::vector<ull> int_am_counts;
272 std::vector<double> int_am_ratio_sums;
273 std::vector<double> int_am_ratio_log_sums;
275 void set_nthread(
int nthr) {
276 int_screening_values.set_nthread(nthr);
277 int_actual_values.set_nthread(nthr);
278 int_distance_factors.set_nthread(nthr);
279 int_distances.set_nthread(nthr);
280 int_indices.set_nthread(nthr);
281 int_ams.set_nthread(nthr);
285 auto accum_all = [&msg](accumulate_t& val) {
287 long tmp = (long)val.load();
292 accum_all(K_3c_needed);
293 accum_all(K_3c_needed_fxn);
294 accum_all(K_3c_dist_screened);
295 accum_all(K_3c_dist_screened_fxn);
296 accum_all(K_3c_underestimated);
297 accum_all(K_3c_underestimated_fxn);
298 accum_all(K_3c_perfect);
299 accum_all(K_3c_perfect_fxn);
300 accum_all(K_3c_contract_fxn);
301 accum_all(K_2c_contract_fxn);
302 accum_all(Kt_contract1_fxn);
303 accum_all(Kt_contract2_fxn);
304 accum_all(L3_build_compares);
307 bool is_first =
false;
311 mutable accumulate_t sig_pairs = { 0 };
312 mutable accumulate_t sig_pairs_fxn = { 0 };
314 mutable bool xml_stats_saved =
false;
317 auto accum_all = [&msg](accumulate_t& val) {
318 long tmp = val.load();
322 accum_all(sig_pairs);
323 accum_all(sig_pairs_fxn);
326 std::vector<Iteration> iterations;
328 ScreeningStatistics()
333 Iteration& next_iteration() {
334 iterations.emplace_back();
335 iterations.back().is_first = iterations.size() == 1;
336 iterations.back().parent =
this;
337 return iterations.back();
340 void print_summary(std::ostream& out,
341 const Ref<GaussianBasisSet>&
basis,
342 const Ref<GaussianBasisSet>& dfbs,
343 const CADFCLHF* parent,
344 int print_level = -1,
345 bool new_exchange =
false
353 SignificantPairs = 1,
363 double screening_thresh_ = 1e-7;
365 double pair_screening_thresh_;
367 double full_screening_thresh_;
369 double coef_screening_thresh_;
374 double distance_screening_thresh_;
376 bool do_linK_ =
true;
380 bool linK_block_rho_;
382 bool linK_use_distance_ =
true;
384 int print_screening_stats_;
388 double distance_damping_factor_ = 1.0;
393 bool print_iteration_timings_ =
false;
398 bool use_norms_nu_ =
true;
405 bool use_norms_sigma_ =
false;
412 bool sigma_norms_chunk_by_atoms_ =
false;
416 bool xml_debug_ =
false;
420 bool xml_screening_data_ =
false;
425 bool use_extents_ =
true;
431 bool use_max_extents_ =
true;
436 bool subtract_extents_ =
false;;
440 double well_separated_thresh_ = 1e-1;
446 bool exact_diagonal_J_ =
false;
452 bool exact_diagonal_K_ =
false;
457 bool cadf_K_only_ =
false;
463 bool B_use_buffer_ =
false;
465 size_t B_buffer_size_;
472 bool scale_screening_thresh_ =
true;
478 double full_screening_thresh_min_;
485 double full_screening_expon_ = 1.0;
489 bool screen_B_ =
true;
496 bool distribute_coefficients_ =
false;
498 double screen_B_use_distance_ =
true;
502 double B_screening_thresh_;
507 bool store_coefs_transpose_ =
false;
512 bool thread_4c_ints_ =
false;
517 double d_over_screening_thresh_;
522 double d_under_screening_thresh_;
528 bool use_norms_B_ =
false;
532 bool shuffle_L_3_keys_ =
true;
536 bool shuffle_J_assignments_ =
true;
540 bool match_orbitals_ =
true;
544 double match_orbitals_max_diff_ = 1e-4;
548 double match_orbitals_max_homo_offset_ = 0.05;
552 bool match_orbitals_use_svd_ =
true;
555 bool debug_coulomb_energy_ =
false;
558 bool debug_exchange_energy_ =
false;
562 bool new_exchange_algorithm_ =
true;
567 int min_atoms_per_node_ = 3;
570 bool count_ints_only_ =
false;
571 bool count_ints_use_norms_ =
true;
572 double count_ints_exclude_thresh_ = 1e-16;
573 bool count_ints_histogram_ =
false;
574 int count_ints_hist_distance_bins_ = 25;
575 int count_ints_hist_ratio_bins_ = 300;
576 int count_ints_hist_bins_ = 200;
577 double count_ints_hist_min_ratio_ = 1e-8;
578 double count_ints_hist_max_ratio_ = 1e8;
579 double count_ints_hist_max_int_ = 1e3;
580 double count_ints_max_distance_ = -1.0;
581 bool count_ints_hist_clip_edges_ =
false;
582 bool count_ints_exclude_two_center_ =
false;
583 bool qqr_only_ =
false;
586 bool safe_extents_ =
false;
587 bool dist_factor_use_overlap_ =
false;
588 int dist_factor_overlap_max_am_diff_ = 99;
589 bool dist_factor_overlap_exclude_contracted_ =
false;
590 double dist_factor_overlap_schwarz_ratio_cutoff_ = 1e-2;
591 int count_ints_n_integral_extrema_ = 1;
595 int n_iter_only_ = -1;
598 std::shared_ptr<ScreeningStatistics> stats_;
599 ScreeningStatistics::Iteration* iter_stats_;
601 double prev_density_frob_;
602 double prev_epsilon_;
603 double prev_epsilon_dist_;
604 double prev_epsilon_B_;
605 double prev_epsilon_d_over_;
606 double prev_epsilon_d_under_;
608 RefDiagSCMatrix prev_evals_;
609 RefSCMatrix prev_evecs_;
610 std::vector<int> prev_occ_;
612 int max_fxn_obs_ = 0;
613 int max_fxn_obs_j_ish_ = 0;
614 int max_fxn_obs_j_jsh_ = 0;
615 int max_fxn_obs_todo_ = 0;
616 int max_fxn_atom_obs_ = 0;
617 int max_fxn_dfbs_ = 0;
618 int max_fxn_dfbs_todo_ = 0;
619 int max_fxn_atom_dfbs_ = 0;
620 int max_obs_atom_fxn_on_dfbs_center_todo_ = 0;
621 int max_fxn_atom_dfbs_todo_ = 0;
627 Ref<WavefunctionWorld> world_;
629 TwoCenterIntContainerPtr g2_full_ptr_;
631 RefDiagSCMatrix core_evals_;
632 RefSCMatrix core_evecs_;
634 std::vector<int> orb_to_hcore_orb_;
639 RowMatrix dipole_frob_;
642 std::atomic<size_t> memory_used_;
644 class TrackedMemoryHolder {
645 std::atomic<size_t>& used_ref_;
649 std::atomic<size_t>& accum,
651 ) : used_ref_(accum), size_(size)
655 ~TrackedMemoryHolder() { used_ref_ -= size_; }
657 void consume_memory(
size_t size) {
658 memory_used_ += size;
660 void release_memory(
size_t size) {
661 memory_used_ -= size;
663 TrackedMemoryHolder hold_memory(
size_t size) {
664 return TrackedMemoryHolder(memory_used_, size);
674 RefSCMatrix compute_J();
675 RefSCMatrix compute_J_cadf();
676 RefSCMatrix compute_J_df();
678 RefSCMatrix compute_K();
682 RefSCMatrix new_compute_K();
684 double get_distance_factor(
694 bool ignore_extents =
false
697 void compute_coefficients();
699 template<
template<
typename...>
class container,
typename map_type>
700 void get_coefs_ish_jsh(
704 container<map_type>& coefsA,
705 container<map_type>& coefsB
710 void init_significant_pairs();
716 void print(std::ostream& o)
const;
719 TwoCenterIntContainerPtr ints_to_eigen(
721 Ref<TwoBodyTwoCenterInt>& ints,
723 const GaussianBasisSet* bs1=0,
724 const GaussianBasisSet* bs2=0
727 template <
typename ShellRange>
728 TwoCenterIntContainerPtr ints_to_eigen(
731 Ref<TwoBodyTwoCenterInt>& ints,
735 template <
typename ShellRange>
736 TwoCenterIntContainerPtr ints_to_eigen_threaded(
739 std::vector<Ref<TwoBodyTwoCenterInt>>& ints_for_thread,
743 template <
typename ShellRange>
744 Eigen::Map<TwoCenterIntContainer>
745 ints_to_eigen_map_threaded(
748 std::vector<Ref<TwoBodyTwoCenterInt>>& ints_for_thread,
753 template <
typename ShellRange>
754 ThreeCenterIntContainerPtr ints_to_eigen(
756 Ref<TwoBodyTwoCenterInt>& ints,
761 ThreeCenterIntContainerPtr ints_to_eigen(
762 int ish,
int jsh,
int ksh,
763 Ref<TwoBodyThreeCenterInt>& ints,
765 GaussianBasisSet* bs1 = 0,
766 GaussianBasisSet* bs2 = 0,
767 GaussianBasisSet* bs3 = 0
771 template <
typename ShellRange>
772 ThreeCenterIntContainerPtr ints_to_eigen(
775 Ref<TwoBodyThreeCenterInt>& ints,
780 template <
typename ShellRange1,
typename ShellRange2>
781 ThreeCenterIntContainerPtr ints_to_eigen(
785 Ref<TwoBodyThreeCenterInt>& ints,
790 template <
typename ShellRange>
791 ThreeCenterIntContainerPtr ints_to_eigen(
795 Ref<TwoBodyThreeCenterInt>& ints,
799 template <
typename ShellRange>
800 Eigen::Map<ThreeCenterIntContainer> ints_to_eigen_map(
804 Ref<TwoBodyThreeCenterInt>& ints,
806 double* __restrict__ buffer
809 template <
typename MapType>
810 void ints_to_eigen_map(
814 Ref<TwoBodyThreeCenterInt>& ints,
819 template <
typename ShellRange>
820 Eigen::Map<ThreeCenterIntContainer> ints_to_eigen_map(
824 Ref<TwoBodyThreeCenterInt>& ints,
829 template <
typename ShellRange1,
typename ShellRange2>
830 Eigen::Map<ThreeCenterIntContainer> ints_to_eigen_map(
834 Ref<TwoBodyThreeCenterInt>& ints,
840 int ish,
int jsh,
int ksh,
841 int nbfi,
int nbfj,
int nbfk,
842 Ref<TwoBodyThreeCenterInt>& ints,
844 double* buffer,
int stride=-1
848 template <
typename ShellRange>
849 ThreeCenterIntContainerPtr ints_to_eigen(
853 Ref<TwoBodyThreeCenterInt>& ints,
857 FourCenterIntContainerPtr ints_to_eigen(
863 shared_ptr<Decomposition> get_decomposition(
int ish,
int jsh, Ref<TwoBodyTwoCenterInt> ints);
867 RefSymmSCMatrix gmat_;
873 bool density_reset_ =
true;
879 bool have_coefficients_;
882 Ref<GaussianBasisSet> dfbs_ = 0;
885 std::map<PairSet, std::map<std::pair<int, int>,
int>> pair_assignments_;
888 std::map<std::pair<int, ShellBlockSkeleton<>>,
int> pair_assignments_k_;
890 shared_ptr<cadf::AssignmentGrid> atom_pair_assignments_k_;
891 shared_ptr<cadf::assignments::Assignments> assignments_new_k_;
894 std::vector<std::pair<int, int>> local_pairs_all_;
895 std::vector<std::pair<int, int>> local_pairs_sig_;
898 std::vector<std::pair<int, ShellBlockSkeleton<>>> local_pairs_k_;
902 std::unordered_map<int, std::vector<int>> linK_local_map_;
903 std::unordered_map<int, std::vector<int>> linK_local_map_ish_Xsh_;
906 std::vector<std::pair<int, int>> sig_pairs_;
909 std::vector<std::set<int>> sig_partners_;
912 std::vector<ContiguousShellBlockList> sig_partner_blocks_;
915 Eigen::VectorXd schwarz_df_;
917 Eigen::VectorXd schwarz_df_mine_;
920 std::atomic<int> local_pairs_spot_;
923 Eigen::MatrixXd schwarz_frob_;
926 Eigen::MatrixXd C_bar_;
927 Eigen::MatrixXd C_bar_mine_;
928 Eigen::MatrixXd C_underbar_;
929 shared_ptr<cadf::TreeMatrix<>> C_dfsame_;
932 std::vector<Ref<TwoBodyThreeCenterInt>> eris_3c_;
933 std::vector<Ref<TwoBodyThreeCenterInt>> metric_ints_3c_;
936 std::vector<Ref<TwoBodyTwoCenterInt>> eris_2c_;
937 std::vector<Ref<TwoBodyTwoCenterInt>> metric_ints_2c_;
940 std::atomic<long> ints_computed_locally_;
943 std::vector<Eigen::Vector3d> centers_;
950 fast_map<std::pair<int, int>, Eigen::Vector3d> pair_centers_;
953 fast_map<std::pair<int, int>,
double> pair_extents_;
954 fast_map<std::pair<int, int>,
double> effective_pair_exponents_;
955 std::vector<double> df_extents_;
956 std::vector<double> effective_df_exponents_;
959 double* __restrict__ coefficients_data_ = 0;
960 double* __restrict__ dist_coefs_data_ = 0;
961 double* __restrict__ dist_coefs_data_df_ = 0;
963 fast_map<int, Eigen::Map<RowMatrix>> coefs_X_nu;
964 fast_map<int, Eigen::Map<RowMatrix>> coefs_X_nu_other;
965 fast_map<int, Eigen::Map<RowMatrix>> coefs_mu_X;
966 fast_map<int, Eigen::Map<RowMatrix>> coefs_mu_X_other;
971 std::vector<RowMatrix> coefs_blocked_;
972 std::vector<std::vector<int>> coef_block_offsets_;
974 std::vector<Eigen::Map<RowMatrix>> coefs_transpose_blocked_;
975 std::vector<Eigen::Map<RowMatrix>> coefs_transpose_blocked_other_;
977 std::vector<Eigen::Map<RowMatrix>> coefs_transpose_;
979 shared_ptr<DecompositionCache> decomps_;
981 bool threads_initialized_ =
false;
983 static ClassDesc cd_;
985 typedef typename decltype(sig_partners_)::value_type::iterator sig_partners_iter_t;
986 typedef range_of<ShellData, sig_partners_iter_t> sig_partners_range_t;
989 iter_significant_partners(
const ShellData& ish){
990 const auto& sig_parts = sig_partners_[ish];
991 return boost::make_iterator_range(
992 basis_element_iterator<ShellData, sig_partners_iter_t>(
993 ish.basis, ish.dfbasis, sig_parts.begin()
995 basis_element_iterator<ShellData, sig_partners_iter_t>(
996 ish.basis, ish.dfbasis, sig_parts.end()
1001 range_of_shell_blocks<sig_partners_iter_t>
1002 iter_significant_partners_blocked(
1004 int requirements = Contiguous,
1005 int target_size = DEFAULT_TARGET_BLOCK_SIZE
1007 const auto& sig_parts = sig_partners_[ish];
1008 return boost::make_iterator_range(
1009 shell_block_iterator<sig_partners_iter_t>(
1010 sig_parts.begin(), sig_parts.end(),
1011 ish.basis, ish.dfbasis, requirements, target_size
1013 shell_block_iterator<sig_partners_iter_t>(
1014 sig_parts.end(), sig_parts.end(),
1015 ish.basis, ish.dfbasis, requirements, target_size
1020 bool is_sig_pair(
int ish,
int jsh)
const {
1021 const auto& parts = sig_partners_[ish];
1022 return parts.find(jsh) != parts.end();
1026 typedef madness::ConcurrentHashMap<int, OrderedShellList> IndexListMap;
1027 typedef madness::ConcurrentHashMap<
1028 std::pair<int, int>,
1032 typedef madness::ConcurrentHashMap<
1033 std::pair<int, int>,
1034 std::vector<std::pair<uint64_t, uint64_t>>,
1038 IndexListMap L_schwarz;
1040 IndexListMap L_C_under;
1042 IndexListMap2 L_3_star;
1044 IndexListMap2 L_d_over;
1045 LinKRangeMap2 L_d_under_ranges;
1047 friend class ApproximatePairWriter;
1049 Ref<ApproximatePairWriter> pair_writer_;
1057 get_split_range_part(
1058 const Ref<MessageGrp>& msg,
1059 int full_begin,
int full_end,
1060 int& out_begin,
int& out_size
1064 get_split_range_part(
1066 int full_begin,
int full_end,
1067 int& out_begin,
int& out_size
1070 boost::property_tree::ptree&
1072 const CADFCLHF::ScreeningStatistics& obj,
1073 boost::property_tree::ptree& parent,
1074 const XMLWriter& writer
1077 boost::property_tree::ptree&
1079 const CADFCLHF::ScreeningStatistics::Iteration& obj,
1080 boost::property_tree::ptree& parent,
1081 const XMLWriter& writer
1085 inline std::string data_size_to_string(
size_t t) {
1090 double thousand_m = 1;
1091 double thousand_mp1 = 1024;
1092 while (t >= thousand_mp1) {
1094 thousand_m = thousand_mp1;
1095 thousand_mp1 *= 1024;
1101 case 0: unit =
"B";
break;
1102 case 1: unit =
"kB";
break;
1103 case 2: unit =
"MB";
break;
1104 case 3: unit =
"GB";
break;
1105 case 4: unit =
"TB";
break;
1106 case 5: unit =
"PB";
break;
1107 case 6: unit =
"EB";
break;
1108 case 7: unit =
"ZB";
break;
1109 case 8: unit =
"YB";
break;
1110 default: MPQC_ASSERT(
false);
break;
1114 std::ostringstream oss;
1115 oss.precision(prec);
1116 oss << (double)t/(
double)thousand_m << unit;
1122 #include "get_ints_impl.h"
1123 #include "coefs_impl.h"