21#ifndef _libint2_src_lib_libint_boys_h_
22#define _libint2_src_lib_libint_boys_h_
24#if defined(__cplusplus)
26#include <libint2/util/vector.h>
41#define posix_memalign(p, a, s) \
42 (((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno)
43#define posix_memfree(p) ((_aligned_free((p))))
45#define posix_memfree(p) ((free((p))))
49#include <libint2/util/cxxstd.h>
50#if LIBINT2_CPLUSPLUS_STD < 2011
51#error "Libint2 C++ API requires C++11 support"
54#include <libint2/boys_fwd.h>
55#include <libint2/initialize.h>
56#include <libint2/numeric.h>
59extern "C" void dgesv_(
const int* n,
const int* nrhs,
double* A,
const int* lda,
60 int* ipiv,
double* b,
const int* ldb,
int* info);
66template <
typename Real>
73 for (
int i = 1; i <= ifac; i++) {
74 fac[i] = i * fac[i - 1];
82 if (idf >= 1) df[1] = 1.0;
83 if (idf >= 2) df[2] = 1.0;
84 for (
int i = 3; i <= idf; i++) {
85 df[i] = (i - 1) * df[i - 2];
90 bc_.resize((ibc + 1) * (ibc + 1));
91 std::fill(bc_.begin(), bc_.end(), Real(0));
94 for (
int i = 1; i <= ibc; ++i) bc[i] = bc[i - 1] + (ibc + 1);
96 for (
int i = 0; i <= ibc; i++) bc[i][0] = 1.0;
97 for (
int i = 0; i <= ibc; i++)
98 for (
int j = 1; j <= i; ++j)
99 bc[i][j] = bc[i][j - 1] * Real(i - j + 1) / Real(j);
102 for (
int i = 0; i < 128; i++) {
103 twoi1[i] = 1.0 / (Real(2.0) * i + Real(1.0));
104 ihalf[i] = Real(i) - Real(0.5);
110 std::vector<Real> fac;
111 std::vector<Real> df;
112 std::vector<Real*> bc;
121 std::vector<Real> bc_;
143template <
typename Real>
145 static std::shared_ptr<const FmEval_Reference> instance(
148 static auto instance_ = std::make_shared<const FmEval_Reference>();
155 static Real
eval(Real T,
size_t m) {
157 static const Real T_crit =
158 std::numeric_limits<Real>::is_bounded
159 ? -log(std::numeric_limits<Real>::min() * 100.5 / 2.)
161 if (std::numeric_limits<Real>::is_bounded && T > T_crit)
162 throw std::overflow_error(
163 "FmEval_Reference<Real>::eval: Real lacks precision for the given "
164 "value of argument T");
165 static const Real half = Real(1) / 2;
166 Real denom = (m + half);
168 Real term = exp(-T) / (2 * denom);
171 const Real epsilon = get_epsilon(T);
172 const Real epsilon_divided_10 = epsilon / 10;
176 term = old_term * T / denom;
181 }
while (term > sum * epsilon_divided_10 || old_term < term);
192 static void eval(Real* Fm, Real T,
size_t mmax) {
196 for (
size_t m = 0; m <= mmax; ++m) Fm[m] =
eval(T, m);
213template <
typename Real>
215 static std::shared_ptr<const FmEval_Reference2> instance(
218 static auto instance_ = std::make_shared<const FmEval_Reference2>();
229 static void eval(Real* Fm, Real t,
size_t mmax) {
233 const Real two_over_sqrt_pi{
234 1.128379167095512573896158903121545171688101258657997713688171443421284936882986828973487320404214727};
235 const Real K = 1 / two_over_sqrt_pi;
239 auto sqrt_t = sqrt(t);
240 Fm[0] = K * erf(sqrt_t) / sqrt_t;
242 for (
size_t m = 0; m <= mmax - 1; m++) {
243 Fm[m + 1] = ((2 * m + 1) * Fm[m] - et) / (t2);
252template <
typename Real =
double>
254#include <libint2/boys_cheb7.h>
256 static_assert(std::is_same<Real, double>::value,
257 "FmEval_Chebyshev7 only supports double as the real type");
259 static constexpr int ORDER = interpolation_order;
260 static constexpr int ORDERp1 = ORDER + 1;
262 static constexpr Real T_crit =
265 static constexpr Real delta = cheb_table_delta;
266 static constexpr Real one_over_delta = 1 / delta;
281 double precision = std::numeric_limits<double>::epsilon())
282 : mmax(m_max), numbers_(14) {
283#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
285#if !defined(__AVX__) && defined(NDEBUG)
287 static bool printed_performance_warning =
false;
288 if (!printed_performance_warning) {
290 <<
"libint2::FmEval_Chebyshev7 on x86(-64) platforms needs AVX "
291 "support for best performance"
293 printed_performance_warning =
true;
298 if (precision < std::numeric_limits<double>::epsilon())
299 throw std::invalid_argument(
301 "FmEval_Chebyshev7 does not support precision smaller than ") +
302 std::to_string(std::numeric_limits<double>::epsilon()));
303 if (mmax > cheb_table_mmax)
304 throw std::invalid_argument(
305 "FmEval_Chebyshev7::init() : requested mmax exceeds the "
307 if (m_max >= 0) init_table();
317 static std::shared_ptr<const FmEval_Chebyshev7>
instance(
int m_max,
321 static auto instance_ = std::make_shared<const FmEval_Chebyshev7>(m_max);
323 while (instance_->max_m() < m_max) {
324 static std::mutex mtx;
325 std::lock_guard<std::mutex> lck(mtx);
326 if (instance_->max_m() < m_max) {
327 auto new_instance = std::make_shared<const FmEval_Chebyshev7>(m_max);
328 instance_ = new_instance;
345 inline void eval(Real* Fm, Real x,
int m_max)
const {
349 const double one_over_x = 1 / x;
350 Fm[0] = 0.88622692545275801365 *
352 if (m_max == 0)
return;
355 for (
int i = 1; i <= m_max; i++)
356 Fm[i] = Fm[i - 1] * numbers_.ihalf[i] * one_over_x;
365 const Real x_over_delta = x * one_over_delta;
366 const int iv = int(x_over_delta);
368 x_over_delta - (Real)iv - 0.5;
372 const auto x2 = xd * xd;
373 const auto x3 = x2 * xd;
374 const auto x4 = x2 * x2;
375 const auto x5 = x2 * x3;
376 const auto x6 = x3 * x3;
377 const auto x7 = x3 * x4;
383 c + (ORDERp1) * (iv * (mmax + 1) +
387 if (m_max - m >= 3) {
388 const int unroll_size = 4;
389 const int m_fence = (m_max + 2 - unroll_size);
390 for (; m < m_fence; m += unroll_size, d += ORDERp1 * unroll_size) {
406 horizontal_add(fm0, fm1, fm2, fm3);
410 if (m_max - m >= 1) {
411 const int unroll_size = 2;
412 const int m_fence = (m_max + 2 - unroll_size);
413 for (; m < m_fence; m += unroll_size, d += ORDERp1 * unroll_size) {
426 for (; m <= m_max; ++m, d += ORDERp1) {
430 Fm[m] = horizontal_add(d0v * x0vec + d1v * x1vec);
434 for (m = m_min; m <= m_max; ++m, d += ORDERp1) {
441 xd * (d[5] + xd * (d[6] + xd * (d[7])))))));
460 int status = posix_memalign(
461 &result, ORDERp1 *
sizeof(Real),
462 (mmax + 1) * cheb_table_nintervals * ORDERp1 *
sizeof(Real));
464 if (status == EINVAL)
465 throw std::logic_error(
466 "FmEval_Chebyshev7::init() : posix_memalign failed, alignment must "
467 "be a power of 2 at least as large as sizeof(void *)");
468 if (status == ENOMEM)
throw std::bad_alloc();
471 c =
static_cast<Real*
>(result);
475 for (std::size_t iv = 0; iv < cheb_table_nintervals; ++iv) {
477 std::copy(cheb_table[iv], cheb_table[iv] + (mmax + 1) * ORDERp1,
478 c + (iv * (mmax + 1)) * ORDERp1);
484#if LIBINT2_CONSTEXPR_STATICS
485template <
typename Real>
486constexpr double FmEval_Chebyshev7<
487 Real>::cheb_table[FmEval_Chebyshev7<Real>::cheb_table_nintervals]
488 [(FmEval_Chebyshev7<Real>::cheb_table_mmax + 1) *
489 (FmEval_Chebyshev7<Real>::interpolation_order + 1)];
493template <
typename Real>
494double FmEval_Chebyshev7<
495 Real>::cheb_table[FmEval_Chebyshev7<Real>::cheb_table_nintervals]
496 [(FmEval_Chebyshev7<Real>::cheb_table_mmax + 1) *
497 (FmEval_Chebyshev7<Real>::interpolation_order + 1)];
504const double oon[] = {0.0, 1.0, 1.0 / 2.0, 1.0 / 3.0,
505 1.0 / 4.0, 1.0 / 5.0, 1.0 / 6.0, 1.0 / 7.0,
506 1.0 / 8.0, 1.0 / 9.0, 1.0 / 10.0, 1.0 / 11.0};
519template <
typename Real =
double,
int INTERPOLATION_ORDER = 7>
522 static_assert(std::is_same<Real, double>::value,
523 "FmEval_Taylor only supports double as the real type");
525 static const int max_interp_order = 8;
526 static const bool INTERPOLATION_AND_RECURSION =
529 const double soft_zero_;
534 Real
precision = std::numeric_limits<Real>::epsilon())
537 numbers_(INTERPOLATION_ORDER + 1, 2 * (mmax + INTERPOLATION_ORDER)) {
538#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
540#if !defined(__AVX__) && defined(NDEBUG)
542 static bool printed_performance_warning =
false;
543 if (!printed_performance_warning) {
545 <<
"libint2::FmEval_Taylor on x86(-64) platforms needs AVX support "
546 "for best performance"
548 printed_performance_warning =
true;
556 const double sqrt_pi = std::sqrt(M_PI);
563 delT_ = 2.0 * std::pow(cutoff_ * numbers_.fac[INTERPOLATION_ORDER + 1],
564 1.0 / INTERPOLATION_ORDER);
565 oodelT_ = 1.0 / delT_;
566 max_m_ = mmax + INTERPOLATION_ORDER;
568 T_crit_ =
new Real[max_m_ + 1];
571 for (
int m = max_m_; m >= 0; --m) {
578 double T = -log(cutoff_);
579 const double egamma =
580 cutoff_ * sqrt_pi * numbers_.df[2 * m] / std::pow(2.0, m);
584 const double damping_factor = 0.2;
587 func = std::pow(T, m - 0.5) * std::exp(-T) - egamma;
588 const double dfuncdT =
589 ((m - 0.5) * std::pow(T, m - 1.5) - std::pow(T, m - 0.5)) *
597 double deltaT = -func / dfuncdT;
598 const double sign_deltaT = (deltaT > 0.0) ? 1.0 : -1.0;
599 const double max_deltaT = damping_factor * T;
600 if (std::fabs(deltaT) > max_deltaT) deltaT = sign_deltaT * max_deltaT;
606 }
while (std::fabs(func / egamma) >= soft_zero_);
608 const int T_idx = (int)std::floor(T_new / delT_);
609 max_T_ = std::max(max_T_, T_idx);
614 const int nrow = max_T_ + 1;
615 const int ncol = max_m_ + 1;
616 grid_ =
new Real*[nrow];
617 grid_[0] =
new Real[nrow * ncol];
620 for (
int r = 1; r < nrow; ++r) grid_[r] = grid_[r - 1] + ncol;
629 for (
int T_idx = max_T_; T_idx >= 0; --T_idx) {
630 const double T = T_idx * delT_;
643 static std::shared_ptr<const FmEval_Taylor>
instance(
645 Real
precision = std::numeric_limits<Real>::epsilon()) {
649 static auto instance_ =
650 std::make_shared<const FmEval_Taylor>(mmax,
precision);
652 while (instance_->max_m() < mmax || instance_->precision() >
precision) {
653 static std::mutex mtx;
654 std::lock_guard<std::mutex> lck(mtx);
655 if (instance_->max_m() < mmax || instance_->precision() >
precision) {
657 std::make_shared<const FmEval_Taylor>(mmax,
precision);
658 instance_ = new_instance;
667 int max_m()
const {
return max_m_ - INTERPOLATION_ORDER + 1; }
679 void eval(Real* Fm, Real T,
int mmax)
const {
680 const double sqrt_pio2 = 1.2533141373155002512;
681 const double two_T = 2.0 * T;
684 const int mmin = INTERPOLATION_AND_RECURSION ? mmax : 0;
688 const bool use_upward_recursion =
true;
689 if (use_upward_recursion) {
691 if (T > T_crit_[0]) {
692 const double one_over_x = 1.0 / T;
694 0.88622692545275801365 *
696 if (mmax == 0)
return;
699 for (
int i = 1; i <= mmax; i++)
701 Fm[i - 1] * numbers_.ihalf[i] * one_over_x;
708 if (T > T_crit_[mmax]) {
709 double pow_two_T_to_minusjp05 = std::pow(two_T, -mmax - 0.5);
710 for (
int m = mmax; m >= mmin; --m) {
712 Fm[m] = numbers_.df[2 * m] * sqrt_pio2 * pow_two_T_to_minusjp05;
713 pow_two_T_to_minusjp05 *= two_T;
716 const int T_ind = (int)(0.5 + T * oodelT_);
717 const Real h = T_ind * delT_ - T;
718 const Real* F_row = grid_[T_ind] + mmin;
722 if (INTERPOLATION_ORDER == 3 || INTERPOLATION_ORDER == 7) {
723 const double h2 = h * h * oon[2];
724 const double h3 = h2 * h * oon[3];
726 if (INTERPOLATION_ORDER == 7) {
727 const double h4 = h3 * h * oon[4];
728 const double h5 = h4 * h * oon[5];
729 const double h6 = h5 * h * oon[6];
730 const double h7 = h6 * h * oon[7];
740 const int unroll_size = 2;
741 const int m_fence = (mmax + 2 - unroll_size);
742 for (; m < m_fence; m += unroll_size, F_row += unroll_size) {
744 if (INTERPOLATION_ORDER == 3 || INTERPOLATION_ORDER == 7) {
746 fr0_0123.
load(F_row);
748 fr1_0123.
load(F_row + 1);
750 horizontal_add(fr0_0123 * h0123, fr1_0123 * h0123);
751 if (INTERPOLATION_ORDER == 7) {
753 fr0_4567.
load(F_row + 4);
755 fr1_4567.
load(F_row + 5);
756 fm01 += horizontal_add(fr0_4567 * h4567, fr1_4567 * h4567);
761 Real total0 = 0.0, total1 = 0.0;
762 for (
int i = INTERPOLATION_ORDER; i >= 1; --i) {
763 total0 = oon[i] * h * (F_row[i] + total0);
764 total1 = oon[i] * h * (F_row[i + 1] + total1);
766 Fm[m] = F_row[0] + total0;
767 Fm[m + 1] = F_row[1] + total1;
775 if (INTERPOLATION_ORDER == 3 || INTERPOLATION_ORDER == 7) {
778 if (INTERPOLATION_ORDER == 7) {
780 fr4567.
load(F_row + 4);
784 Fm[m] = horizontal_add(fr0123 * h0123 + fr4567 * h4567);
786 Fm[m] = horizontal_add(fr0123 * h0123);
791 for (
int i = INTERPOLATION_ORDER; i >= 1; --i) {
792 total = oon[i] * h * (F_row[i] + total);
794 Fm[m] = F_row[0] + total;
815 if (INTERPOLATION_AND_RECURSION && mmin > 0) {
816 const Real exp_mT = std::exp(-T);
817 for (
int m = mmin - 1; m >= 0; --m) {
818 Fm[m] = (exp_mT + two_T * Fm[m + 1]) * numbers_.twoi1[m];
849 static double truncation_error(
unsigned int m,
double T) {
850 const double m2 = m * m;
851 const double m3 = m2 * m;
852 const double m4 = m2 * m2;
853 const double T2 = T * T;
854 const double T3 = T2 * T;
855 const double T4 = T2 * T2;
856 const double T5 = T2 * T3;
858 const double result =
860 (105 + 16 * m4 + 16 * m3 * (T - 8) - 30 * T + 12 * T2 - 8 * T3 +
861 16 * T4 + 8 * m2 * (43 - 9 * T + 2 * T2) +
862 4 * m * (-88 + 23 * T - 8 * T2 + 4 * T3)) /
875 static double truncation_error(
double T) {
876 const double result = exp(-T) / (2 * T);
882constexpr double pow10(
long exp) {
883 return (exp == 0) ? 1.
884 : ((exp > 0) ? 10. * pow10(exp - 1) : 0.1 * pow10(exp + 1));
900template <
typename Real =
double>
903#include <libint2/tenno_cheb.h>
905 static_assert(std::is_same<Real, double>::value,
906 "TennoGmEval only supports double as the real type");
908 static const int mmin_ = -1;
909 static constexpr Real Tmax =
910 (1 << cheb_table_tmaxlog2);
912 static constexpr Real Umax = detail::pow10(
913 cheb_table_umaxlog10);
914 static constexpr Real Umin = detail::pow10(
915 cheb_table_uminlog10);
916 static constexpr Real one_over_Umin =
918 static constexpr std::size_t ORDERp1 = interpolation_order + 1;
919 static constexpr Real maxabsprecision =
934 : mmax_(mmax), precision_(
precision), numbers_() {
935#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
937#if !defined(__AVX__) && defined(NDEBUG)
939 static bool printed_performance_warning =
false;
940 if (!printed_performance_warning) {
942 <<
"libint2::TennoGmEval on x86(-64) platforms needs AVX support "
943 "for best performance"
945 printed_performance_warning =
true;
951 if (precision_ < maxabsprecision)
952 throw std::invalid_argument(
953 std::string(
"TennoGmEval does not support precision smaller than ") +
954 std::to_string(maxabsprecision));
956 if (mmax > cheb_table_mmax)
957 throw std::invalid_argument(
958 "TennoGmEval::init() : requested mmax exceeds the "
964 if (c_ !=
nullptr) posix_memfree(c_);
969 static std::shared_ptr<const TennoGmEval>
instance(
int m_max,
double = 0) {
972 static auto instance_ = std::make_shared<const TennoGmEval>(m_max);
974 while (instance_->max_m() < m_max) {
975 static std::mutex mtx;
976 std::lock_guard<std::mutex> lck(mtx);
977 if (instance_->max_m() < m_max) {
978 auto new_instance = std::make_shared<const TennoGmEval>(m_max);
979 instance_ = new_instance;
986 unsigned int max_m()
const {
return mmax_; }
1003 assert(mmax <= mmax_);
1005 const auto U = 0.25 * zeta * zeta * one_over_rho;
1007 if (T > Tmax || U < Umin) {
1008 eval_urr<false>(Gm, T, U, 0, mmax);
1010 interpolate_Gm<false>(Gm, T, U, 0, mmax);
1027 assert(mmax <= mmax_);
1029 const auto U = 0.25 * zeta * zeta * one_over_rho;
1031 const auto zeta_over_two_rho = 0.5 * zeta * one_over_rho;
1033 eval_urr<true>(Gm, T, U, zeta_over_two_rho, mmax);
1035 interpolate_Gm<true>(Gm, T, U, zeta_over_two_rho, mmax);
1045 template <
bool compute_Gm1>
1046 static inline std::tuple<Real, Real> eval_G0_and_maybe_Gm1(Real T, Real U) {
1049 const Real sqrtPi(1.7724538509055160272981674833411451827975494561224);
1055 assert(!compute_Gm1);
1056 if (T < std::numeric_limits<Real>::epsilon()) {
1059 const Real sqrt_T = sqrt(T);
1060 const Real sqrtPi_over_2 = sqrtPi * 0.5;
1061 G_0 = sqrtPi_over_2 * erf(sqrt_T) / sqrt_T;
1063 }
else if (T > std::numeric_limits<Real>::epsilon()) {
1064 const Real sqrt_U = sqrt(U);
1065 const Real sqrt_T = sqrt(T);
1066 const Real oo_sqrt_T = 1 / sqrt_T;
1067 const Real oo_sqrt_U = compute_Gm1 ? (1 / sqrt_U) : 0;
1068 const Real kappa = sqrt_U - sqrt_T;
1069 const Real lambda = sqrt_U + sqrt_T;
1070 const Real sqrtPi_over_4 = sqrtPi * 0.25;
1071 const Real pfac = sqrtPi_over_4;
1072 const Real erfc_k = exp(kappa * kappa - T) * erfc(kappa);
1075 if (lambda * lambda - T > 709)
1076 throw std::invalid_argument(
1077 "TennoGmEval::eval_G0_and_maybe_Gm1(): for large T and/or U "
1078 "current implementation overflows, need to implement asymptotic "
1080 const Real erfc_l = exp(lambda * lambda - T) * erfc(lambda);
1082 G_m1 = compute_Gm1 ? pfac * (erfc_k + erfc_l) * oo_sqrt_U : 0.0;
1083 G_0 = pfac * (erfc_k - erfc_l) * oo_sqrt_T;
1086 const Real exp_U = exp(U);
1087 const Real sqrt_U = sqrt(U);
1089 const Real sqrtPi_over_2 = sqrtPi * 0.5;
1090 const Real oo_sqrt_U = compute_Gm1 ? (1 / sqrt_U) : 0;
1092 G_m1 = exp_U * sqrtPi_over_2 * erfc(sqrt_U) * oo_sqrt_U;
1094 G_0 = 1 - exp_U * sqrtPi * erfc(sqrt_U) * sqrt_U;
1096 const Real oo_U = Real{1} / U;
1097 const Real x = oo_U;
1098 const Real x2 = x * x;
1099 const Real x3 = x2 * x;
1100 const Real x4 = x2 * x2;
1101 const Real x5 = x3 * x2;
1102 const Real x6 = x3 * x3;
1107 const Real cm1[] = {1. / 2, -1. / 4, 3. / 8,
1108 -15. / 16, 105. / 32, -945. / 64};
1109 G_m1 = cm1[0] * x + cm1[1] * x2 + cm1[2] * x3 + cm1[3] * x4 +
1110 cm1[4] * x5 + cm1[5] * x6;
1115 const Real c0[] = {1. / 2, -3. / 4, 15. / 8,
1116 -105. / 16, 945. / 32, -10395. / 64};
1117 G_0 = c0[0] * x + c0[1] * x2 + c0[2] * x3 + c0[3] * x4 + c0[4] * x5 +
1122 return std::make_tuple(G_0, G_m1);
1141 static void eval_urr(Real* Gm_vec, Real T, Real U, Real zeta_over_two_rho,
1146 const Real sqrt_U = sqrt(U);
1147 const Real sqrt_T = sqrt(T);
1148 const Real oo_sqrt_T = 1 / sqrt_T;
1149 const Real oo_sqrt_U = 1 / sqrt_U;
1150 const Real kappa = sqrt_U - sqrt_T;
1151 const Real lambda = sqrt_U + sqrt_T;
1152 const Real sqrtPi_over_4(
1153 0.44311346272637900682454187083528629569938736403060);
1154 const Real pfac = sqrtPi_over_4;
1155 const Real erfc_k = exp(kappa * kappa - T) * erfc(kappa);
1156 const Real erfc_l = exp(lambda * lambda - T) * erfc(lambda);
1161 Gmm1 = pfac * (erfc_k + erfc_l) * oo_sqrt_U;
1162 Gm = pfac * (erfc_k - erfc_l) * oo_sqrt_T;
1164#if __cplusplus >= 201703L
1170 Gm_vec[0] = (Gmm1 - Gm) * zeta_over_two_rho;
1175 const Real oo_two_T = 0.5 / T;
1176 const Real two_U = 2.0 * U;
1177 const Real exp_mT = exp(-T);
1179 for (
unsigned int m = 0, two_m_minus_1 = 1; m < mmax;
1180 ++m, two_m_minus_1 += 2) {
1181 Gmp1 = oo_two_T * (two_m_minus_1 * Gm + two_U * Gmm1 - exp_mT);
1183#if __cplusplus >= 201703L
1187 Gm_vec[m + 1] = Gmp1;
1189 Gm_vec[m + 1] = (Gm - Gmp1) * zeta_over_two_rho;
1212 void interpolate_Gm(Real* Gm_vec, Real T, Real U, Real zeta_over_two_rho,
1216 throw std::invalid_argument(
1217 "TennoGmEval::eval_{yukawa,slater}() : arguments out of range, "
1218 "zeta*zeta*one_over_rho/4=" +
1219 std::to_string(U) +
" cannot exceed " + std::to_string(Umax));
1221 assert(U >= Umin && U <= Umax);
1224 auto linear_map_02 = [](Real x) {
1225 assert(x >= 0 && x <= 2);
1226 return (x - 1) * 0.5;
1229 auto log2_map = [](Real x, Real one_over_a) {
1230 return std::log2(x * one_over_a) - 0.5;
1233 auto log10_map = [](Real x, Real one_over_a) {
1234 return std::log10(x * one_over_a) - 0.5;
1240 : int(std::floor(std::log2(T) *
1241 (1. - std::numeric_limits<double>::epsilon())));
1242 assert(Tint >= 0 && Tint < cheb_table_tmaxlog2 - cheb_table_tminlog2 + 1);
1244 constexpr Real one_over_2K[] = {1, 0.5, 0.25, 0.125,
1245 0.0625, 0.03125, 0.015625, 0.0078125,
1246 0.00390625, .001953125};
1248 const auto log10_U_over_Umin =
1249 std::log10(U * one_over_Umin) *
1250 (1. - std::numeric_limits<double>::epsilon());
1251 const int Uint = int(std::floor(log10_U_over_Umin));
1252 assert(Uint >= 0 && Uint < cheb_table_umaxlog10 - cheb_table_uminlog10);
1254 constexpr Real one_over_10K[] = {
1255 1. / detail::pow10(cheb_table_uminlog10),
1256 1. / detail::pow10(cheb_table_uminlog10 + 1),
1257 1. / detail::pow10(cheb_table_uminlog10 + 2),
1258 1. / detail::pow10(cheb_table_uminlog10 + 3),
1259 1. / detail::pow10(cheb_table_uminlog10 + 4),
1260 1. / detail::pow10(cheb_table_uminlog10 + 5),
1261 1. / detail::pow10(cheb_table_uminlog10 + 6),
1262 1. / detail::pow10(cheb_table_uminlog10 + 7),
1263 1. / detail::pow10(cheb_table_uminlog10 + 8),
1264 1. / detail::pow10(cheb_table_uminlog10 + 9)};
1268 : log2_map(T, one_over_2K[Tint]);
1270 log10_map(U, one_over_10K[Uint]);
1272 const int interval =
1273 Tint * (cheb_table_umaxlog10 - cheb_table_uminlog10) + Uint;
1276 assert(ORDERp1 == 16);
1277 const auto t2 = t * t;
1278 const auto t3 = t2 * t;
1279 const auto t4 = t2 * t2;
1280 const auto t5 = t2 * t3;
1281 const auto t6 = t3 * t3;
1282 const auto t7 = t3 * t4;
1283 const auto t8 = t4 * t4;
1284 const auto t9 = t4 * t5;
1285 const auto t10 = t5 * t5;
1286 const auto t11 = t5 * t6;
1287 const auto t12 = t6 * t6;
1288 const auto t13 = t6 * t7;
1289 const auto t14 = t7 * t7;
1290 const auto t15 = t7 * t8;
1291 const auto u2 = u * u;
1292 const auto u3 = u2 * u;
1293 const auto u4 = u2 * u2;
1294 const auto u5 = u2 * u3;
1295 const auto u6 = u3 * u3;
1296 const auto u7 = u3 * u4;
1297 const auto u8 = u4 * u4;
1298 const auto u9 = u4 * u5;
1299 const auto u10 = u5 * u5;
1300 const auto u11 = u5 * u6;
1301 const auto u12 = u6 * u6;
1302 const auto u13 = u6 * u7;
1303 const auto u14 = u7 * u7;
1304 const auto u15 = u7 * u8;
1313 constexpr bool compute_Gmm10 =
true;
1315 if (compute_Gmm10) {
1319 std::tie(Exp ? G0 : Gm_vec[0], Gmm1) = eval_G0_and_maybe_Gm1<Exp>(T, U);
1321#if __cplusplus >= 201703L
1325 Gm_vec[0] = (Gmm1 - G0) * zeta_over_two_rho;
1330 mmin_interp = Exp ? -1 : 0;
1333 for (
long m = mmin_interp; m <= mmax; ++m) {
1334 const Real* c_tuint =
1335 c_ + (ORDERp1) * (ORDERp1) *
1336 (interval * (mmax_ - mmin_ + 1) +
1340 c13v, c20v, c21v, c22v, c23v, c30v, c31v, c32v, c33v, c40v, c41v,
1341 c42v, c43v, c50v, c51v, c52v, c53v, c60v, c61v, c62v, c63v, c70v,
1344 c93v, ca0v, ca1v, ca2v, ca3v, cb0v, cb1v, cb2v, cb3v, cc0v, cc1v,
1345 cc2v, cc3v, cd0v, cd1v, cd2v, cd3v, ce0v, ce1v, ce2v, ce3v, cf0v,
1353 t0vec * (c00v * u0vec + c01v * u1vec + c02v * u2vec + c03v * u3vec);
1360 t1vec * (c10v * u0vec + c11v * u1vec + c12v * u2vec + c13v * u3vec);
1367 t2vec * (c20v * u0vec + c21v * u1vec + c22v * u2vec + c23v * u3vec);
1374 t3vec * (c30v * u0vec + c31v * u1vec + c32v * u2vec + c33v * u3vec);
1383 t4vec * (c40v * u0vec + c41v * u1vec + c42v * u2vec + c43v * u3vec);
1390 t5vec * (c50v * u0vec + c51v * u1vec + c52v * u2vec + c53v * u3vec);
1397 t6vec * (c60v * u0vec + c61v * u1vec + c62v * u2vec + c63v * u3vec);
1404 t7vec * (c70v * u0vec + c71v * u1vec + c72v * u2vec + c73v * u3vec);
1413 t8vec * (c80v * u0vec + c81v * u1vec + c82v * u2vec + c83v * u3vec);
1420 t9vec * (c90v * u0vec + c91v * u1vec + c92v * u2vec + c93v * u3vec);
1427 tavec * (ca0v * u0vec + ca1v * u1vec + ca2v * u2vec + ca3v * u3vec);
1434 tbvec * (cb0v * u0vec + cb1v * u1vec + cb2v * u2vec + cb3v * u3vec);
1443 tcvec * (cc0v * u0vec + cc1v * u1vec + cc2v * u2vec + cc3v * u3vec);
1450 tdvec * (cd0v * u0vec + cd1v * u1vec + cd2v * u2vec + cd3v * u3vec);
1457 tevec * (ce0v * u0vec + ce1v * u1vec + ce2v * u2vec + ce3v * u3vec);
1464 tfvec * (cf0v * u0vec + cf1v * u1vec + cf2v * u2vec + cf3v * u3vec);
1475 for (
int i = 1; i != 16; ++i) {
1476 uvec[i] = uvec[i - 1] * u;
1477 tvec[i] = tvec[i - 1] * t;
1480 for (
int i = 0, ij = 0; i != 16; ++i) {
1481 for (
int j = 0; j != 16; ++j, ++ij) {
1482 Gm += c_tuint[ij] * tvec[i] * uvec[j];
1488#if __cplusplus >= 201703L
1495 Gm_vec[m] = (Gmm1 - Gm) * zeta_over_two_rho;
1507 ExpensiveNumbers<Real> numbers_;
1513 int status = posix_memalign(&result, std::max(
sizeof(Real), (
size_t)32),
1514 (mmax_ - mmin_ + 1) * cheb_table_nintervals *
1515 ORDERp1 * ORDERp1 *
sizeof(Real));
1517 if (status == EINVAL)
1518 throw std::logic_error(
1519 "TennoGmEval::init() : posix_memalign failed, alignment must be a "
1520 "power of 2 at least as large as sizeof(void *)");
1521 if (status == ENOMEM)
throw std::bad_alloc();
1524 c_ =
static_cast<Real*
>(result);
1528 for (std::size_t iv = 0; iv < cheb_table_nintervals; ++iv) {
1530 std::copy(cheb_table[iv],
1531 cheb_table[iv] + ((mmax_ - mmin_) + 1) * ORDERp1 * ORDERp1,
1532 c_ + (iv * ((mmax_ - mmin_) + 1)) * ORDERp1 * ORDERp1);
1538#if LIBINT2_CONSTEXPR_STATICS
1539template <
typename Real>
1541 TennoGmEval<Real>::cheb_table[TennoGmEval<Real>::cheb_table_nintervals]
1542 [(TennoGmEval<Real>::cheb_table_mmax + 2) *
1543 (TennoGmEval<Real>::interpolation_order + 1) *
1544 (TennoGmEval<Real>::interpolation_order + 1)];
1548template <
typename Real>
1550 TennoGmEval<Real>::cheb_table[TennoGmEval<Real>::cheb_table_nintervals]
1551 [(TennoGmEval<Real>::cheb_table_mmax + 2) *
1552 (TennoGmEval<Real>::interpolation_order + 1) *
1553 (TennoGmEval<Real>::interpolation_order + 1)];
1557template <
typename Real,
int k>
1563template <
typename CoreEval>
1570template <
typename Real>
1572 std::vector<Real> Fm_;
1573 std::vector<Real> g_i;
1574 std::vector<Real> r_i;
1575 std::vector<Real> oorhog_i;
1581 void init(
int mmax) {
1582 Fm_.resize(mmax + 1);
1583 g_i.resize(mmax + 1);
1584 r_i.resize(mmax + 1);
1585 oorhog_i.resize(mmax + 1);
1616template <
typename Real,
int k>
1619 GaussianGmEval<Real, k>>
1621#ifndef LIBINT_USER_DEFINED_REAL
1634 precision_(precision),
1636 numbers_(-1, -1, mmax) {
1637 assert(k == -1 || k == 0 || k == 2);
1640 fm_eval_ = FmEvalType::instance(mmax_, precision_);
1644 ~GaussianGmEval() {}
1648 static std::shared_ptr<GaussianGmEval> instance(
1650 Real precision = std::numeric_limits<Real>::epsilon()) {
1652 assert(precision >= 0);
1654 static auto instance_ = std::make_shared<GaussianGmEval>(mmax, precision);
1656 while (instance_->max_m() < mmax || instance_->precision() > precision) {
1657 static std::mutex mtx;
1658 std::lock_guard<std::mutex> lck(mtx);
1659 if (instance_->max_m() < mmax || instance_->precision() > precision) {
1660 auto new_instance = std::make_shared<GaussianGmEval>(mmax, precision);
1661 instance_ = new_instance;
1670 int max_m()
const {
return mmax_; }
1672 Real precision()
const {
return precision_; }
1693 template <
typename AnyReal>
1694 void eval(Real* Gm, Real rho, Real T,
size_t mmax,
1695 const std::vector<std::pair<AnyReal, AnyReal>>& geminal,
1697 std::fill(Gm, Gm + mmax + 1, Real(0));
1699 const auto sqrt_rho = sqrt(rho);
1700 const auto oo_sqrt_rho = 1 / sqrt_rho;
1702 void* _scr = (scr == 0) ?
this : scr;
1704 reinterpret_cast<detail::CoreEvalScratch<GaussianGmEval<Real, -1
>>*>(
1706 for (
int i = 1; i <= mmax; i++) {
1707 scratch.r_i[i] = scratch.r_i[i - 1] * rho;
1712 typename std::vector<std::pair<AnyReal, AnyReal>>::const_iterator citer;
1713 const citer gend = geminal.end();
1714 for (citer i = geminal.begin(); i != gend; ++i) {
1715 const auto gamma = i->first;
1716 const auto gcoef = i->second;
1717 const auto rhog = rho + gamma;
1718 const auto oorhog = 1 / rhog;
1720 const auto gorg = gamma * oorhog;
1721 const auto rorg = rho * oorhog;
1722 const auto sqrt_rho_org = sqrt_rho * oorhog;
1723 const auto sqrt_rhog = sqrt(rhog);
1724 const auto sqrt_rorg = sqrt_rho_org * sqrt_rhog;
1727 constexpr Real const_SQRTPI_2(
1728 0.88622692545275801364908374167057259139877472806119);
1730 const auto SS_K0G12_SS = gcoef * oo_sqrt_rho * const_SQRTPI_2 * rorg *
1731 sqrt_rorg * exp(-gorg * T);
1734 void* _scr = (scr == 0) ?
this : scr;
1737 detail::CoreEvalScratch<GaussianGmEval<Real, -1
>>*>(_scr));
1739 const auto rorgT = rorg * T;
1740 fm_eval_->eval(&scratch.Fm_[0], rorgT, mmax);
1743 constexpr Real const_2_SQRTPI(
1744 1.12837916709551257389615890312154517);
1745 const auto pfac = const_2_SQRTPI * sqrt_rhog * SS_K0G12_SS;
1746 scratch.oorhog_i[0] = pfac;
1747 for (
int i = 1; i <= mmax; i++) {
1748 scratch.g_i[i] = scratch.g_i[i - 1] * gamma;
1749 scratch.oorhog_i[i] = scratch.oorhog_i[i - 1] * oorhog;
1751 for (
int m = 0; m <= mmax; m++) {
1753 Real* bcm = numbers_.bc[m];
1754 for (
int n = 0; n <= m; n++) {
1756 bcm[n] * scratch.r_i[n] * scratch.g_i[m - n] * scratch.Fm_[n];
1758 Gm[m] += ssss * scratch.oorhog_i[m];
1764 auto ss_oper_ss_m = SS_K0G12_SS;
1765 Gm[0] += ss_oper_ss_m;
1766 for (
int m = 1; m <= mmax; ++m) {
1767 ss_oper_ss_m *= gorg;
1768 Gm[m] += ss_oper_ss_m;
1774 const auto rorgT = rorg * T;
1775 const auto SS_K2G12_SS_0 = (1.5 + rorgT) * (SS_K0G12_SS * oorhog);
1776 const auto SS_K2G12_SS_m1 = rorg * (SS_K0G12_SS * oorhog);
1778 auto SS_K2G12_SS_gorg_m = SS_K2G12_SS_0;
1779 auto SS_K2G12_SS_gorg_m1 = SS_K2G12_SS_m1;
1780 Gm[0] += SS_K2G12_SS_gorg_m;
1781 for (
int m = 1; m <= mmax; ++m) {
1782 SS_K2G12_SS_gorg_m *= gorg;
1783 Gm[m] += SS_K2G12_SS_gorg_m - m * SS_K2G12_SS_gorg_m1;
1784 SS_K2G12_SS_gorg_m1 *= gorg;
1793 std::shared_ptr<const FmEvalType> fm_eval_;
1795 ExpensiveNumbers<Real> numbers_;
1798template <
typename GmEvalFunction>
1799struct GenericGmEval :
private GmEvalFunction {
1800 typedef typename GmEvalFunction::value_type Real;
1802 GenericGmEval(
int mmax, Real precision)
1803 : GmEvalFunction(mmax, precision), mmax_(mmax), precision_(precision) {}
1805 static std::shared_ptr<const GenericGmEval> instance(
1806 int mmax, Real precision = std::numeric_limits<Real>::epsilon()) {
1807 return std::make_shared<const GenericGmEval>(mmax, precision);
1810 template <
typename Real,
typename... ExtraArgs>
1811 void eval(Real* Gm, Real rho, Real T,
int mmax, ExtraArgs... args)
const {
1812 assert(mmax <= mmax_);
1813 (GmEvalFunction(*
this))(Gm, rho, T, mmax, std::forward<ExtraArgs>(args)...);
1818 int max_m()
const {
return mmax_; }
1820 Real precision()
const {
return precision_; }
1828namespace os_core_ints {
1829template <
typename Real,
int K>
1830struct r12_xx_K_gm_eval;
1831template <
typename Real>
1832struct erfc_coulomb_gm_eval;
1837template <
typename Real>
1838struct CoreEvalScratch<os_core_ints::r12_xx_K_gm_eval<Real, 1>> {
1839 std::vector<Real> Fm_;
1840 CoreEvalScratch(
const CoreEvalScratch&) =
default;
1841 CoreEvalScratch(CoreEvalScratch&&) =
default;
1843 explicit CoreEvalScratch(
int mmax) { Fm_.resize(mmax + 2); }
1846template <
typename Real>
1847struct CoreEvalScratch<os_core_ints::erfc_coulomb_gm_eval<Real>> {
1848 std::vector<Real> Fm_;
1849 CoreEvalScratch(
const CoreEvalScratch&) =
default;
1850 CoreEvalScratch(CoreEvalScratch&&) =
default;
1852 explicit CoreEvalScratch(
int mmax) { Fm_.resize(mmax + 1); }
1857namespace os_core_ints {
1860template <
typename Real>
1861struct delta_gm_eval {
1862 typedef Real value_type;
1864 delta_gm_eval(
unsigned int, Real) {}
1865 void operator()(Real* Gm, Real rho, Real T,
int mmax)
const {
1866 constexpr static auto one_over_two_pi = 1.0 / (2.0 * M_PI);
1867 const auto G0 = exp(-T) * rho * one_over_two_pi;
1868 std::fill(Gm, Gm + mmax + 1, G0);
1877template <
typename Real,
int K>
1878struct r12_xx_K_gm_eval;
1880template <
typename Real>
1881struct r12_xx_K_gm_eval<Real, 1>
1882 :
private detail::CoreEvalScratch<r12_xx_K_gm_eval<Real, 1>> {
1883 typedef detail::CoreEvalScratch<r12_xx_K_gm_eval<Real, 1>> base_type;
1884 typedef Real value_type;
1886#ifndef LIBINT_USER_DEFINED_REAL
1892 r12_xx_K_gm_eval(
unsigned int mmax, Real precision) : base_type(mmax) {
1893 fm_eval_ = FmEvalType::instance(mmax + 1, precision);
1895 void operator()(Real* Gm, Real rho, Real T,
int mmax) {
1896 fm_eval_->eval(&base_type::Fm_[0], T, mmax + 1);
1897 auto T_plus_m_plus_one = T + 1.0;
1898 Gm[0] = T_plus_m_plus_one * base_type::Fm_[0] - T * base_type::Fm_[1];
1899 auto minus_m = -1.0;
1900 T_plus_m_plus_one += 1.0;
1901 for (
auto m = 1; m <= mmax; ++m, minus_m -= 1.0, T_plus_m_plus_one += 1.0) {
1902 Gm[m] = minus_m * base_type::Fm_[m - 1] +
1903 T_plus_m_plus_one * base_type::Fm_[m] - T * base_type::Fm_[m + 1];
1908 std::shared_ptr<const FmEvalType> fm_eval_;
1912template <
typename Real>
1913struct erf_coulomb_gm_eval {
1914 typedef Real value_type;
1916#ifndef LIBINT_USER_DEFINED_REAL
1922 erf_coulomb_gm_eval(
unsigned int mmax, Real precision) {
1923 fm_eval_ = FmEvalType::instance(mmax, precision);
1925 void operator()(Real* Gm, Real rho, Real T,
int mmax, Real omega)
const {
1927 auto omega2 = omega * omega;
1928 auto omega2_over_omega2_plus_rho = omega2 / (omega2 + rho);
1929 fm_eval_->eval(Gm, T * omega2_over_omega2_plus_rho, mmax);
1932 auto ooversqrto2prho_exp_2mplus1 = sqrt(omega2_over_omega2_plus_rho);
1933 for (
auto m = 0; m <= mmax;
1934 ++m, ooversqrto2prho_exp_2mplus1 *= omega2_over_omega2_plus_rho) {
1935 Gm[m] *= ooversqrto2prho_exp_2mplus1;
1938 std::fill(Gm, Gm + mmax + 1, Real{0});
1943 std::shared_ptr<const FmEvalType> fm_eval_;
1949template <
typename Real>
1950struct erfc_coulomb_gm_eval
1951 :
private detail::CoreEvalScratch<erfc_coulomb_gm_eval<Real>> {
1952 typedef detail::CoreEvalScratch<erfc_coulomb_gm_eval<Real>> base_type;
1953 typedef Real value_type;
1955#ifndef LIBINT_USER_DEFINED_REAL
1961 erfc_coulomb_gm_eval(
unsigned int mmax, Real precision) : base_type(mmax) {
1962 fm_eval_ = FmEvalType::instance(mmax, precision);
1964 void operator()(Real* Gm, Real rho, Real T,
int mmax, Real omega) {
1965 fm_eval_->eval(&base_type::Fm_[0], T, mmax);
1966 std::copy(base_type::Fm_.cbegin(), base_type::Fm_.cbegin() + mmax + 1, Gm);
1968 auto omega2 = omega * omega;
1969 auto omega2_over_omega2_plus_rho = omega2 / (omega2 + rho);
1970 fm_eval_->eval(&base_type::Fm_[0], T * omega2_over_omega2_plus_rho, mmax);
1973 auto ooversqrto2prho_exp_2mplus1 = sqrt(omega2_over_omega2_plus_rho);
1974 for (
auto m = 0; m <= mmax;
1975 ++m, ooversqrto2prho_exp_2mplus1 *= omega2_over_omega2_plus_rho) {
1976 Gm[m] -= ooversqrto2prho_exp_2mplus1 * base_type::Fm_[m];
1982 std::shared_ptr<const FmEvalType> fm_eval_;
1996template <
typename Real>
1997Real fstg(Real zeta, Real x) {
1998 return -std::exp(-zeta * x) / zeta;
2001template <
typename Real>
2002Real fngtg(
const std::vector<Real>& cc,
const std::vector<Real>& aa, Real x) {
2004 const Real x2 = x * x;
2005 const unsigned int n = cc.size();
2006 for (
unsigned int i = 0; i < n; ++i) value += cc[i] * std::exp(-aa[i] * x2);
2013template <
typename Real>
2014Real wwtewklopper(Real x) {
2015 const Real x2 = x * x;
2016 return x2 * std::exp(-2 * x2);
2018template <
typename Real>
2019Real wwcusp(Real x) {
2020 const Real x2 = x * x;
2021 const Real x6 = x2 * x2 * x2;
2022 return std::exp(-0.005 * x6);
2025template <
typename Real>
2031template <
typename Real>
2032Real norm(
const std::vector<Real>& vec) {
2034 const unsigned int n = vec.size();
2035 for (
unsigned int i = 0; i < n; ++i) value += vec[i] * vec[i];
2039template <
typename Real>
2040void LinearSolveDamped(
const std::vector<Real>& A,
const std::vector<Real>& b,
2041 Real lambda, std::vector<Real>& x) {
2042 const size_t n = b.size();
2043 std::vector<Real> Acopy(A);
2044 for (
size_t m = 0; m < n; ++m) Acopy[m * n + m] *= (1 + lambda);
2045 std::vector<Real> e(b);
2050 std::vector<int> ipiv(n);
2054 dgesv_(&n, &one, &Acopy[0], &n, &ipiv[0], &e[0], &n, &info);
2072template <
typename Real>
2073void stg_ng_fit(
unsigned int n, Real zeta,
2074 std::vector<std::pair<Real, Real>>& geminal, Real xmin = 0.0,
2075 Real xmax = 10.0,
unsigned int npts = 1001) {
2077 std::vector<Real> cc(n, 1.0);
2078 std::vector<Real> aa(n);
2079 for (
unsigned int i = 0; i < n; ++i)
2080 aa[i] = std::pow(3.0, (i + 2 - (n + 1) / 2.0));
2083 Real ffnormfac = 0.0;
2085 for (
unsigned int i = 0; i < n; ++i)
2086 for (
unsigned int j = 0; j < n; ++j)
2087 ffnormfac += cc[i] * cc[j] / sqrt(aa[i] + aa[j]);
2088 const Real Nf = sqrt(2.0 * zeta) * zeta;
2089 const Real Nff = sqrt(2.0) / (sqrt(ffnormfac) * sqrt(sqrt(M_PI)));
2090 for (
unsigned int i = 0; i < n; ++i) cc[i] *= -Nff / Nf;
2092 Real lambda0 = 1000;
2094 const Real nu = 3.0;
2095 const Real epsilon = 1e-15;
2096 const unsigned int maxniter = 200;
2099 std::vector<Real> xi(npts);
2100 for (
unsigned int i = 0; i < npts; ++i)
2101 xi[i] = xmin + (xmax - xmin) * i / (npts - 1);
2103 std::vector<Real> err(npts);
2105 const size_t nparams =
2107 std::vector<Real> J(npts * nparams);
2108 std::vector<Real> delta(nparams);
2115 Real errnormIm1 = 1e3;
2116 bool converged =
false;
2117 unsigned int iter = 0;
2118 while (!converged && iter < maxniter) {
2122 for (
unsigned int i = 0; i < npts; ++i) {
2123 const Real x = xi[i];
2124 err[i] = (fstg(zeta, x) - fngtg(cc, aa, x)) * sqrt(ww(x));
2126 errnormI = norm(err) / sqrt((Real)npts);
2129 converged = std::abs((errnormI - errnormIm1) / errnormIm1) <= epsilon;
2130 if (converged)
break;
2131 errnormIm1 = errnormI;
2133 for (
unsigned int i = 0; i < npts; ++i) {
2134 const Real x2 = xi[i] * xi[i];
2135 const Real sqrt_ww_x = sqrt(ww(xi[i]));
2136 const unsigned int ioffset = i * nparams;
2137 for (
unsigned int j = 0; j < n; ++j)
2138 J[ioffset + j] = (std::exp(-aa[j] * x2)) * sqrt_ww_x;
2139 const unsigned int ioffsetn = ioffset + n;
2140 for (
unsigned int j = 0; j < n; ++j)
2141 J[ioffsetn + j] = -sqrt_ww_x * x2 * cc[j] * std::exp(-aa[j] * x2);
2144 std::vector<Real> A(nparams * nparams);
2145 for (
size_t r = 0, rc = 0; r < nparams; ++r) {
2146 for (
size_t c = 0; c < nparams; ++c, ++rc) {
2148 for (
size_t i = 0, ir = r, ic = c; i < npts;
2149 ++i, ir += nparams, ic += nparams)
2150 Arc += J[ir] * J[ic];
2155 std::vector<Real> b(nparams);
2156 for (
size_t r = 0; r < nparams; ++r) {
2158 for (
size_t i = 0, ir = r; i < npts; ++i, ir += nparams)
2159 br += J[ir] * err[i];
2167 for (
int l = -1; l < 1000; ++l) {
2168 LinearSolveDamped(A, b, lambda0, delta);
2170 std::vector<double> cc_0(cc);
2171 for (
unsigned int i = 0; i < n; ++i) cc_0[i] += delta[i];
2172 std::vector<double> aa_0(aa);
2173 for (
unsigned int i = 0; i < n; ++i) aa_0[i] += delta[i + n];
2177 bool step_too_large =
false;
2178 for (
unsigned int i = 0; i < n; ++i)
2179 if (aa_0[i] < 0.0) {
2180 step_too_large =
true;
2183 if (!step_too_large) {
2184 std::vector<double> err_0(npts);
2185 for (
unsigned int i = 0; i < npts; ++i) {
2186 const double x = xi[i];
2187 err_0[i] = (fstg(zeta, x) - fngtg(cc_0, aa_0, x)) * sqrt(ww(x));
2189 const double errnorm_0 = norm(err_0) / sqrt((
double)npts);
2190 if (errnorm_0 < errnormI) {
2204 assert(not(iter == maxniter && errnormI > 1e-10));
2206 for (
unsigned int i = 0; i < n; ++i)
2207 geminal[i] = std::make_pair(aa[i], cc[i]);
holds tables of expensive quantities
Definition boys.h:67
Computes the Boys function, $ F_m (T) = \int_0^1 u^{2m} \exp(-T u^2) \, {\rm d}u $,...
Definition boys.h:253
static std::shared_ptr< const FmEval_Chebyshev7 > instance(int m_max, double=0.0)
Singleton interface allows to manage the lone instance; adjusts max m values as needed in thread-safe...
Definition boys.h:317
void eval(Real *Fm, Real x, int m_max) const
fills in Fm with computed Boys function values for m in [0,mmax]
Definition boys.h:345
FmEval_Chebyshev7(int m_max, double precision=std::numeric_limits< double >::epsilon())
Definition boys.h:280
int max_m() const
Definition boys.h:337
Computes the Boys function, $ F_m (T) = \int_0^1 u^{2m} \exp(-T u^2) \, {\rm d}u $,...
Definition boys.h:520
static std::shared_ptr< const FmEval_Taylor > instance(unsigned int mmax, Real precision=std::numeric_limits< Real >::epsilon())
Singleton interface allows to manage the lone instance; adjusts max m and precision values as needed ...
Definition boys.h:643
void eval(Real *Fm, Real T, int mmax) const
computes Boys function values with m index in range [0,mmax]
Definition boys.h:679
int max_m() const
Definition boys.h:667
Real precision() const
Definition boys.h:669
FmEval_Taylor(unsigned int mmax, Real precision=std::numeric_limits< Real >::epsilon())
Constructs the object to be able to compute Boys funcion for m in [0,mmax], with relative precision.
Definition boys.h:533
double horizontal_add(VectorSSEDouble const &a)
Horizontal add.
Definition vector_x86.h:220
Defaults definitions for various parameters assumed by Libint.
Definition algebra.cc:24
std::ostream & verbose_stream()
Accessor for the disgnostics stream.
Definition initialize.h:128
bool verbose()
Accessor for the verbose flag.
Definition initialize.h:135
Computes the Boys function, $ F_m (T) = \int_0^1 u^{2m} \exp(-T u^2) \, {\rm d}u $,...
Definition boys.h:214
static void eval(Real *Fm, Real t, size_t mmax)
fills up an array of Fm(T) for m in [0,mmax]
Definition boys.h:229
Computes the Boys function, , using single algorithm (asymptotic expansion).
Definition boys.h:144
static void eval(Real *Fm, Real T, size_t mmax)
fills up an array of Fm(T) for m in [0,mmax]
Definition boys.h:192
static Real eval(Real T, size_t m)
computes a single value of using MacLaurin series to full precision of Real
Definition boys.h:155
core integral for Yukawa and exponential interactions
Definition boys.h:901
TennoGmEval(unsigned int mmax, Real precision=maxabsprecision)
Definition boys.h:933
void eval_slater(Real *Gm, Real one_over_rho, Real T, size_t mmax, Real zeta) const
Definition boys.h:1025
static std::shared_ptr< const TennoGmEval > instance(int m_max, double=0)
Singleton interface allows to manage the lone instance; adjusts max m values as needed in thread-safe...
Definition boys.h:969
Real precision() const
Definition boys.h:988
void eval_yukawa(Real *Gm, Real one_over_rho, Real T, size_t mmax, Real zeta) const
Definition boys.h:1001
some evaluators need thread-local scratch, but most don't
Definition boys.h:1564
SIMD vector of 4 double-precision floating-point real numbers, operations on which use AVX instructio...
Definition vector_x86.h:630
void convert(T *a) const
writes this to a
Definition vector_x86.h:707
void load(T const *a)
loads a to this
Definition vector_x86.h:702
void load_aligned(T const *a)
loads a to this
Definition vector_x86.h:705