LIBINT 2.7.2
engine.impl.h
1/*
2 * Copyright (C) 2004-2021 Edward F. Valeev
3 *
4 * This file is part of Libint.
5 *
6 * Libint is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21#ifndef _libint2_src_lib_libint_engineimpl_h_
22#define _libint2_src_lib_libint_engineimpl_h_
23
24#include "./engine.h"
25#include "./deriv_map.h"
26
27#include <iterator>
28
29#pragma GCC diagnostic push
30#pragma GCC system_header
31#include <Eigen/Core>
32#pragma GCC diagnostic pop
33
34#include <libint2/boys.h>
35#if LIBINT_HAS_SYSTEM_BOOST_PREPROCESSOR_VARIADICS
36# include <boost/preprocessor.hpp>
37# include <boost/preprocessor/facilities/is_1.hpp>
38#else // use bundled boost
39# include <libint2/boost/preprocessor.hpp>
40# include <libint2/boost/preprocessor/facilities/is_1.hpp>
41#endif
42
43// extra PP macros
44
45#define BOOST_PP_MAKE_TUPLE_INTERNAL(z, i, last) \
46 i BOOST_PP_COMMA_IF(BOOST_PP_NOT_EQUAL(i, last))
48#define BOOST_PP_MAKE_TUPLE(n) \
49 (BOOST_PP_REPEAT(n, BOOST_PP_MAKE_TUPLE_INTERNAL, BOOST_PP_DEC(n)))
50
51// the engine will be profiled by default if library was configured with
52// --enable-profile
53#ifdef LIBINT2_PROFILE
54#define LIBINT2_ENGINE_TIMERS
55// uncomment if want to profile each integral class
56#define LIBINT2_ENGINE_PROFILE_CLASS
57#endif
58// uncomment if want to profile the engine even if library was configured
59// without --enable-profile
60//# define LIBINT2_ENGINE_TIMERS
61
62namespace libint2 {
63
64template <typename T, unsigned N>
65typename std::remove_all_extents<T>::type* to_ptr1(T (&a)[N]) {
66 return reinterpret_cast<typename std::remove_all_extents<T>::type*>(&a);
67}
68
73#define BOOST_PP_NBODY_OPERATOR_LIST \
74 (overlap, \
75 (kinetic, \
76 (elecpot, \
77 (elecpot, \
78 (elecpot, \
79 (1emultipole, \
80 (2emultipole, \
81 (3emultipole, \
82 (sphemultipole, \
83 (eri, (eri, (eri, (eri, (eri, (eri, (eri, (eri, (eri, (eri, BOOST_PP_NIL)))))))))))))))))))
84
85#define BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE \
86 BOOST_PP_MAKE_TUPLE(BOOST_PP_LIST_SIZE(BOOST_PP_NBODY_OPERATOR_LIST))
87#define BOOST_PP_NBODY_OPERATOR_INDEX_LIST \
88 BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE)
89#define BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX \
90 8 // sphemultipole, the 9th member of BOOST_PP_NBODY_OPERATOR_LIST, is the last
91 // 1-body operator
92
93// make list of braket indices for n-body ints
94#define BOOST_PP_NBODY_BRAKET_INDEX_TUPLE \
95 BOOST_PP_MAKE_TUPLE(BOOST_PP_INC(BOOST_PP_NBODY_BRAKET_MAX_INDEX))
96#define BOOST_PP_NBODY_BRAKET_INDEX_LIST \
97 BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_BRAKET_INDEX_TUPLE)
98#define BOOST_PP_NBODY_BRAKET_RANK_TUPLE (2, 3, 4)
99#define BOOST_PP_NBODY_BRAKET_RANK_LIST \
100 BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_BRAKET_RANK_TUPLE)
101
102// make list of derivative orders for n-body ints
103#define BOOST_PP_NBODY_DERIV_ORDER_TUPLE \
104 BOOST_PP_MAKE_TUPLE(BOOST_PP_INC(LIBINT2_MAX_DERIV_ORDER))
105#define BOOST_PP_NBODY_DERIV_ORDER_LIST \
106 BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_DERIV_ORDER_TUPLE)
107
108
110__libint2_engine_inline libint2::any
111default_params(const Operator& oper) {
112 switch (static_cast<int>(oper)) {
113#define BOOST_PP_NBODYENGINE_MCR1(r, data, i, elem) \
114 case i: \
115 return operator_traits<static_cast<Operator>(i)>::default_params();
116 BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR1, _,
117 BOOST_PP_NBODY_OPERATOR_LIST)
118 default:
119 break;
120 }
121 assert(false && "missing case in switch"); // unreachable
122 abort();
123}
124
126
134template <typename... ShellPack>
135__libint2_engine_inline const Engine::target_ptr_vec& Engine::compute(
136 const libint2::Shell& first_shell, const ShellPack&... rest_of_shells) {
137 constexpr auto nargs = 1 + sizeof...(rest_of_shells);
138 assert(nargs == braket_rank() && "# of arguments to compute() does not match the braket type");
139
141 first_shell, rest_of_shells...}};
142
143 if (operator_rank() == 1) {
144 if (nargs == 2) return compute1(shells[0], shells[1]);
145 } else if (operator_rank() == 2) {
146 auto compute_ptr_idx = ((static_cast<int>(oper_) -
147 static_cast<int>(Operator::first_2body_oper)) *
148 nbrakets_2body +
149 (static_cast<int>(braket_) -
150 static_cast<int>(BraKet::first_2body_braket))) *
151 nderivorders_2body +
152 deriv_order_;
153 assert(compute_ptr_idx >= 0 && compute_ptr_idx < compute2_ptrs().size());
154 auto compute_ptr = compute2_ptrs()[compute_ptr_idx];
155 assert(compute_ptr != nullptr && "2-body compute function not found");
156 if (nargs == 2)
157 return (this->*compute_ptr)(shells[0], Shell::unit(), shells[1],
158 Shell::unit(), nullptr, nullptr);
159 if (nargs == 3)
160 return (this->*compute_ptr)(shells[0], Shell::unit(), shells[1],
161 shells[2], nullptr, nullptr);
162 if (nargs == 4)
163 return (this->*compute_ptr)(shells[0], shells[1], shells[2], shells[3], nullptr, nullptr);
164 }
165
166 assert(false && "missing feature"); // only reached if missing a feature
167 abort();
168}
169
174__libint2_engine_inline const Engine::target_ptr_vec& Engine::compute1(
175 const libint2::Shell& s1, const libint2::Shell& s2) {
176 // can only handle 1 contraction at a time
177 assert((s1.ncontr() == 1 && s2.ncontr() == 1) &&
178 "generally-contracted shells not yet supported");
179
180 const auto oper_is_nuclear =
181 (oper_ == Operator::nuclear || oper_ == Operator::erf_nuclear ||
182 oper_ == Operator::erfc_nuclear);
183
184 const auto l1 = s1.contr[0].l;
185 const auto l2 = s2.contr[0].l;
186 assert(l1 <= lmax_ && "the angular momentum limit is exceeded");
187 assert(l2 <= lmax_ && "the angular momentum limit is exceeded");
188
189 // if want nuclear, make sure there is at least one nucleus .. otherwise the
190 // user likely forgot to call set_params
191 if (oper_is_nuclear && nparams() == 0)
192 throw std::logic_error(
193 "Engine<*nuclear>, but no charges found; forgot to call "
194 "set_params()?");
195
196 const auto n1 = s1.size();
197 const auto n2 = s2.size();
198 const auto n12 = n1 * n2;
199 const auto ncart1 = s1.cartesian_size();
200 const auto ncart2 = s2.cartesian_size();
201 const auto ncart12 = ncart1 * ncart2;
202
203 // assert # of primitive pairs
204 const auto nprim1 = s1.nprim();
205 const auto nprim2 = s2.nprim();
206 const auto nprimpairs = nprim1 * nprim2;
207 assert(nprimpairs <= primdata_.size() && "the max number of primitive pairs exceeded");
208
209 auto nparam_sets = nparams();
210
211 // keep track if need to set targets_ explicitly
212 bool set_targets = set_targets_;
213
214 // # of targets computed by libint
215 const auto ntargets = nopers() * num_geometrical_derivatives(2, deriv_order_);
216
217 // Libint computes derivatives with respect to basis functions only, must
218 // must use translational invariance to recover derivatives w.r.t. operator
219 // degrees of freedom
220 // will compute derivs w.r.t. 2 Gaussian centers + (if nuclear) nparam_sets
221 // operator centers
222 const auto nderivcenters_shset = 2 + (oper_is_nuclear ? nparam_sets : 0);
223 const auto nderivcoord = 3 * nderivcenters_shset;
224 const auto num_shellsets_computed =
225 nopers() * num_geometrical_derivatives(nderivcenters_shset, deriv_order_);
226
227 // will use scratch_ if:
228 // - Coulomb ints are computed 1 charge at a time, contributions are
229 // accumulated in scratch_ (unless la==lb==0)
230 // - derivatives on the missing center need to be reconstructed (no need to
231 // accumulate into scratch though)
232 // NB ints in scratch are packed in order
233 const auto accumulate_ints_in_scratch = oper_is_nuclear;
234
235 // adjust max angular momentum, if needed
236 const auto lmax = std::max(l1, l2);
237 assert(lmax <= lmax_ && "the angular momentum limit is exceeded");
238
239 // N.B. for l=0 no need to transform to solid harmonics
240 // this is a workaround for the corner case of oper_ == Operator::*nuclear,
241 // and solid harmonics (s|s) integral ... beware the integral storage state
242 // machine
243 const auto tform_to_solids =
244 (s1.contr[0].pure || s2.contr[0].pure) && lmax != 0;
245
246 // simple (s|s) ints will be computed directly and accumulated in the first
247 // element of stack
248 const auto compute_directly =
249 lmax == 0 && deriv_order_ == 0 &&
250 (oper_ == Operator::overlap || oper_is_nuclear);
251 if (compute_directly) {
252 primdata_[0].stack[0] = 0;
253 targets_[0] = primdata_[0].stack;
254 }
255
256 if (accumulate_ints_in_scratch)
257 std::fill(std::begin(scratch_),
258 std::begin(scratch_) + num_shellsets_computed * ncart12, 0.0);
259
260 // loop over accumulation batches
261 for (auto pset = 0u; pset != nparam_sets; ++pset) {
262 if (!oper_is_nuclear)
263 assert(nparam_sets == 1 && "unexpected number of operator parameters");
264
265 auto p12 = 0;
266 for (auto p1 = 0; p1 != nprim1; ++p1) {
267 for (auto p2 = 0; p2 != nprim2; ++p2, ++p12) {
268 compute_primdata(primdata_[p12], s1, s2, p1, p2, pset);
269 }
270 }
271 primdata_[0].contrdepth = p12;
272
273 if (compute_directly) {
274 auto& result = primdata_[0].stack[0];
275 switch (oper_) {
276 case Operator::overlap:
277 for (auto p12 = 0; p12 != primdata_[0].contrdepth; ++p12)
278 result += primdata_[p12]._0_Overlap_0_x[0] *
279 primdata_[p12]._0_Overlap_0_y[0] *
280 primdata_[p12]._0_Overlap_0_z[0];
281 break;
282 case Operator::nuclear:
283 case Operator::erf_nuclear:
284 case Operator::erfc_nuclear:
285 for (auto p12 = 0; p12 != primdata_[0].contrdepth; ++p12)
286 result += primdata_[p12].LIBINT_T_S_ELECPOT_S(0)[0];
287 break;
288 default:
289 assert(false && "missing case in switch");
290 }
291 primdata_[0].targets[0] = &result;
292 } else {
293 const auto buildfnidx = s1.contr[0].l * hard_lmax_ + s2.contr[0].l;
294 assert(buildfnptrs_[buildfnidx] && "null build function ptr");
295 buildfnptrs_[buildfnidx](&primdata_[0]);
296
297 if (accumulate_ints_in_scratch) {
298 set_targets = true;
299 // - for non-derivative ints and first derivative ints the target
300 // ints computed by libint will appear at the front of targets_
301 // - for second and higher derivs need to re-index targets, hence
302 // will accumulate later, when computing operator derivatives via
303 // transinv
304 if (deriv_order_ <= 1) {
305 // accumulate targets computed by libint for this pset into the
306 // accumulated targets in scratch
307 auto s_target = &scratch_[0];
308 for (auto s = 0; s != ntargets; ++s, s_target += ncart12)
309 if (pset != 0)
310 std::transform(primdata_[0].targets[s],
311 primdata_[0].targets[s] + ncart12, s_target,
312 s_target, std::plus<value_type>());
313 else
314 std::copy(primdata_[0].targets[s],
315 primdata_[0].targets[s] + ncart12, s_target);
316 }
317
318 // 2. reconstruct derivatives of nuclear ints for each nucleus
319 // using translational invariance
320 // NB this is done in cartesian basis, otherwise would have to tform
321 // to solids contributions from every atom, rather than the running
322 // total at the end
323 if (deriv_order_ > 0) {
324 switch (deriv_order_) {
325 case 1: {
326 // first 6 shellsets are derivatives with respect to Gaussian
327 // positions
328 // following them are derivs with respect to nuclear coordinates
329 // (3 per nucleus)
330 assert(ntargets == 6 && "unexpected # of targets");
331 auto dest = &scratch_[0] + (6 + pset * 3) * ncart12;
332 for (auto s = 0; s != 3; ++s, dest += ncart12) {
333 auto src = primdata_[0].targets[s];
334 for (auto i = 0; i != ncart12; ++i) {
335 dest[i] = -src[i];
336 }
337 }
338 dest -= 3 * ncart12;
339 for (auto s = 3; s != 6; ++s, dest += ncart12) {
340 auto src = primdata_[0].targets[s];
341 for (auto i = 0; i != ncart12; ++i) {
342 dest[i] -= src[i];
343 }
344 }
345 } break;
346
347 case 2: {
348 // computes upper triangle index
349 // n2 = matrix size times 2
350 // i,j = indices, i<j
351 auto upper_triangle_index_ord = [](int n2, int i, int j) {
352 return i * (n2 - i -1) / 2 + j;
353 };
354 // same as above, but orders i and j
355 auto upper_triangle_index = [&](int n2, int i, int j) {
356 return upper_triangle_index_ord(n2, std::min(i, j), std::max(i, j));
357 };
358
359 // accumulate ints for this pset to scratch in locations
360 // remapped to overall deriv index
361 const auto ncoords_times_two = nderivcoord * 2;
362 for (auto d0 = 0, d01 = 0; d0 != 6; ++d0) {
363 for (auto d1 = d0; d1 != 6; ++d1, ++d01) {
364 const auto d01_full =
365 upper_triangle_index_ord(ncoords_times_two, d0, d1);
366 auto tgt = &scratch_[d01_full * ncart12];
367 if (pset != 0)
368 std::transform(primdata_[0].targets[d01],
369 primdata_[0].targets[d01] + ncart12, tgt,
370 tgt, std::plus<value_type>());
371 else
372 std::copy(primdata_[0].targets[d01],
373 primdata_[0].targets[d01] + ncart12, tgt);
374 }
375 }
376
377 // use translational invariance to build derivatives w.r.t.
378 // operator centers
379 {
380 // mixed derivatives: first deriv w.r.t. Gaussian, second
381 // w.r.t. operator coord pset
382 const auto c1 = 2 + pset;
383 for (auto c0 = 0; c0 != 2; ++c0) {
384 for (auto xyz0 = 0; xyz0 != 3; ++xyz0) {
385 const auto coord0 = c0 * 3 + xyz0;
386 for (auto xyz1 = 0; xyz1 != 3; ++xyz1) {
387 const auto coord1 = c1 * 3 + xyz1; // coord1 > coord0
388
389 const auto coord01_abs = upper_triangle_index_ord(
390 ncoords_times_two, coord0, coord1);
391 auto tgt = &scratch_[coord01_abs * ncart12];
392
393 // d2 / dAi dOj = - d2 / dAi dAj
394 {
395 auto coord1_A = xyz1;
396 const auto coord01_A =
397 upper_triangle_index(12, coord0, coord1_A);
398 const auto src = primdata_[0].targets[coord01_A];
399 for (auto i = 0; i != ncart12; ++i) tgt[i] = -src[i];
400 }
401
402 // d2 / dAi dOj -= d2 / dAi dBj
403 {
404 auto coord1_B = 3 + xyz1;
405 const auto coord01_B =
406 upper_triangle_index(12, coord0, coord1_B);
407 const auto src = primdata_[0].targets[coord01_B];
408 for (auto i = 0; i != ncart12; ++i) tgt[i] -= src[i];
409 }
410 }
411 }
412 }
413 } // mixed derivs
414 {
415 // operator derivs
416 const auto c0 = 2 + pset;
417 const auto c1 = c0;
418 for (auto xyz0 = 0; xyz0 != 3; ++xyz0) {
419 const auto coord0 = c0 * 3 + xyz0;
420 for (auto xyz1 = xyz0; xyz1 != 3; ++xyz1) {
421 const auto coord1 = c1 * 3 + xyz1; // coord1 > coord0
422
423 const auto coord01_abs = upper_triangle_index_ord(
424 ncoords_times_two, coord0, coord1);
425 auto tgt = &scratch_[coord01_abs * ncart12];
426
427 // d2 / dOi dOj = d2 / dAi dAj
428 {
429 auto coord0_A = xyz0;
430 auto coord1_A = xyz1;
431 const auto coord01_AA =
432 upper_triangle_index_ord(12, coord0_A, coord1_A);
433 const auto src = primdata_[0].targets[coord01_AA];
434 for (auto i = 0; i != ncart12; ++i) tgt[i] = src[i];
435 }
436
437 // d2 / dOi dOj += d2 / dAi dBj
438 {
439 auto coord0_A = xyz0;
440 auto coord1_B = 3 + xyz1;
441 const auto coord01_AB =
442 upper_triangle_index_ord(12, coord0_A, coord1_B);
443 const auto src = primdata_[0].targets[coord01_AB];
444 for (auto i = 0; i != ncart12; ++i) tgt[i] += src[i];
445 }
446
447 // d2 / dOi dOj += d2 / dBi dAj
448 {
449 auto coord0_B = 3 + xyz0;
450 auto coord1_A = xyz1;
451 const auto coord01_BA =
452 upper_triangle_index_ord(12, coord1_A, coord0_B);
453 const auto src = primdata_[0].targets[coord01_BA];
454 for (auto i = 0; i != ncart12; ++i) tgt[i] += src[i];
455 }
456
457 // d2 / dOi dOj += d2 / dBi dBj
458 {
459 auto coord0_B = 3 + xyz0;
460 auto coord1_B = 3 + xyz1;
461 const auto coord01_BB =
462 upper_triangle_index_ord(12, coord0_B, coord1_B);
463 const auto src = primdata_[0].targets[coord01_BB];
464 for (auto i = 0; i != ncart12; ++i) tgt[i] += src[i];
465 }
466 }
467 }
468 } // operator derivs
469
470 } break;
471
472 default: {
473 assert(deriv_order_ <= 2 && "feature not implemented");
474
475 // 1. since # of derivatives changes, remap derivatives computed
476 // by libint; targets_ will hold the "remapped" pointers to
477 // the data
478 using ShellSetDerivIterator =
480 std::vector<unsigned int>>;
481 ShellSetDerivIterator shellset_gaussian_diter(deriv_order_, 2);
482 ShellSetDerivIterator shellset_full_diter(deriv_order_,
483 nderivcenters_shset);
484 std::vector<unsigned int> full_deriv(3 * nderivcenters_shset, 0);
485 std::size_t s = 0;
486 while (shellset_gaussian_diter) { // loop over derivs computed
487 // by libint
488 const auto& s1s2_deriv = *shellset_gaussian_diter;
489 std::copy(std::begin(s1s2_deriv), std::end(s1s2_deriv),
490 std::begin(full_deriv));
491 const auto full_rank = ShellSetDerivIterator::rank(full_deriv);
492 targets_[full_rank] = primdata_[0].targets[s];
493 }
494 // use translational invariance to build derivatives w.r.t.
495 // operator centers
496 }
497
498 } // deriv_order_ switch
499 } // reconstruct derivatives
500 }
501 } // ltot != 0
502 } // pset (accumulation batches)
503
504 if (tform_to_solids) {
505 set_targets = false;
506 // where do spherical ints go?
507 auto* spherical_ints =
508 (accumulate_ints_in_scratch) ? scratch2_ : &scratch_[0];
509
510 // transform to solid harmonics, one shell set at a time:
511 // for each computed shell set ...
512 for (auto s = 0ul; s != num_shellsets_computed;
513 ++s, spherical_ints += n12) {
514 auto cartesian_ints = accumulate_ints_in_scratch
515 ? &scratch_[s * ncart12]
516 : primdata_[0].targets[s];
517 // transform
518 if (s1.contr[0].pure && s2.contr[0].pure) {
519 libint2::solidharmonics::tform(l1, l2, cartesian_ints, spherical_ints);
520 } else {
521 if (s1.contr[0].pure)
522 libint2::solidharmonics::tform_rows(l1, n2, cartesian_ints,
523 spherical_ints);
524 else
525 libint2::solidharmonics::tform_cols(n1, l2, cartesian_ints,
526 spherical_ints);
527 }
528 // .. and compute the destination
529 targets_[s] = spherical_ints;
530 } // loop cartesian shell set
531 } // tform to solids
532
533 if (set_targets) {
534 for (auto s = 0ul; s != num_shellsets_computed; ++s) {
535 auto cartesian_ints = accumulate_ints_in_scratch
536 ? &scratch_[s * ncart12]
537 : primdata_[0].targets[s];
538 targets_[s] = cartesian_ints;
539 }
540 }
541
542 if (cartesian_shell_normalization() == CartesianShellNormalization::uniform) {
544 for (auto s = 0ul; s != num_shellsets_computed; ++s) {
545 uniform_normalize_cartesian_shells(const_cast<value_type*>(targets_[s]), shells);
546 }
547 }
548
549 return targets_;
550}
551
552// generic _initializer
553__libint2_engine_inline void Engine::_initialize() {
554#define BOOST_PP_NBODYENGINE_MCR3_ncenter(product) \
555 BOOST_PP_TUPLE_ELEM(3, 1, product)
556
557#define BOOST_PP_NBODYENGINE_MCR3_default_ncenter(product) \
558 BOOST_PP_IIF(BOOST_PP_GREATER(BOOST_PP_TUPLE_ELEM(3, 0, product), \
559 BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX), \
560 4, 2)
561
562#define BOOST_PP_NBODYENGINE_MCR3_NCENTER(product) \
563 BOOST_PP_IIF( \
564 BOOST_PP_NOT_EQUAL(BOOST_PP_NBODYENGINE_MCR3_ncenter(product), \
565 BOOST_PP_NBODYENGINE_MCR3_default_ncenter(product)), \
566 BOOST_PP_NBODYENGINE_MCR3_ncenter(product), BOOST_PP_EMPTY())
567
568#define BOOST_PP_NBODYENGINE_MCR3_OPER(product) \
569 BOOST_PP_LIST_AT(BOOST_PP_NBODY_OPERATOR_LIST, \
570 BOOST_PP_TUPLE_ELEM(3, 0, product))
571
572#define BOOST_PP_NBODYENGINE_MCR3_DERIV(product) \
573 BOOST_PP_IIF(BOOST_PP_GREATER(BOOST_PP_TUPLE_ELEM(3, 2, product), 0), \
574 BOOST_PP_TUPLE_ELEM(3, 2, product), BOOST_PP_EMPTY())
575
576#define BOOST_PP_NBODYENGINE_MCR3_task(product) \
577 BOOST_PP_CAT(BOOST_PP_CAT(BOOST_PP_NBODYENGINE_MCR3_ncenter(product), \
578 BOOST_PP_NBODYENGINE_MCR3_OPER(product)), \
579 BOOST_PP_NBODYENGINE_MCR3_DERIV(product))
580
581#define BOOST_PP_NBODYENGINE_MCR3_TASK(product) \
582 BOOST_PP_IIF( \
583 BOOST_PP_CAT(LIBINT2_TASK_EXISTS_, \
584 BOOST_PP_NBODYENGINE_MCR3_task(product)), \
585 BOOST_PP_CAT(BOOST_PP_CAT(BOOST_PP_NBODYENGINE_MCR3_NCENTER(product), \
586 BOOST_PP_NBODYENGINE_MCR3_OPER(product)), \
587 BOOST_PP_NBODYENGINE_MCR3_DERIV(product)), \
588 default)
589
590#define BOOST_PP_NBODYENGINE_MCR3(r, product) \
591 if (static_cast<int>(oper_) == BOOST_PP_TUPLE_ELEM(3, 0, product) && \
592 static_cast<int>(rank(braket_)) == BOOST_PP_TUPLE_ELEM(3, 1, product) && \
593 deriv_order_ == BOOST_PP_TUPLE_ELEM(3, 2, product)) { \
594 hard_lmax_ = BOOST_PP_CAT(LIBINT2_MAX_AM_, \
595 BOOST_PP_NBODYENGINE_MCR3_TASK(product)) + \
596 1; \
597 hard_default_lmax_ = \
598 BOOST_PP_IF(BOOST_PP_IS_1(BOOST_PP_CAT(LIBINT2_CENTER_DEPENDENT_MAX_AM_, \
599 BOOST_PP_NBODYENGINE_MCR3_task(product))), \
600 BOOST_PP_CAT(LIBINT2_MAX_AM_, \
601 BOOST_PP_CAT(default, \
602 BOOST_PP_NBODYENGINE_MCR3_DERIV(product) \
603 ) \
604 ) + 1, std::numeric_limits<int>::max()); \
605 const auto lmax = \
606 BOOST_PP_IF(BOOST_PP_IS_1(BOOST_PP_CAT(LIBINT2_CENTER_DEPENDENT_MAX_AM_, \
607 BOOST_PP_NBODYENGINE_MCR3_task(product))), \
608 std::max(hard_lmax_,hard_default_lmax_), hard_lmax_); \
609 if (lmax_ >= lmax) { \
610 throw Engine::lmax_exceeded( \
611 BOOST_PP_STRINGIZE(BOOST_PP_NBODYENGINE_MCR3_TASK(product)), \
612 lmax, lmax_); \
613 } \
614 if (stack_size_ > 0) \
615 libint2_cleanup_default(&primdata_[0]); \
616 stack_size_ = LIBINT2_PREFIXED_NAME(BOOST_PP_CAT( \
617 libint2_need_memory_, BOOST_PP_NBODYENGINE_MCR3_TASK(product)))( \
618 lmax_); \
619 LIBINT2_PREFIXED_NAME( \
620 BOOST_PP_CAT(libint2_init_, BOOST_PP_NBODYENGINE_MCR3_TASK(product))) \
621 (&primdata_[0], lmax_, 0); \
622 BOOST_PP_IF(BOOST_PP_IS_1(LIBINT2_FLOP_COUNT), \
623 LIBINT2_PREFIXED_NAME(libint2_init_flopcounter) \
624 (&primdata_[0], primdata_.size()), BOOST_PP_EMPTY()); \
625 buildfnptrs_ = to_ptr1(LIBINT2_PREFIXED_NAME(BOOST_PP_CAT( \
626 libint2_build_, BOOST_PP_NBODYENGINE_MCR3_TASK(product)))); \
627 reset_scratch(); \
628 return; \
629 }
630
631 BOOST_PP_LIST_FOR_EACH_PRODUCT(
632 BOOST_PP_NBODYENGINE_MCR3, 3,
633 (BOOST_PP_NBODY_OPERATOR_INDEX_LIST, BOOST_PP_NBODY_BRAKET_RANK_LIST,
634 BOOST_PP_NBODY_DERIV_ORDER_LIST))
635
636 assert(
637 false &&
638 "missing case in switch"); // either deriv_order_ or oper_ is wrong
639 abort();
640} // _initialize<R>()
641
642__libint2_engine_inline void Engine::initialize(size_t max_nprim) {
643 assert(libint2::initialized() && "libint is not initialized");
644 assert(deriv_order_ <= LIBINT2_MAX_DERIV_ORDER &&
645 "exceeded the max derivative order of the library");
646
647 // validate braket
648#ifndef INCLUDE_ONEBODY
649 assert(braket_ != BraKet::x_x &&
650 "this braket type not supported by the library; give --enable-1body to configure");
651#endif
652#ifndef INCLUDE_ERI
653 assert(braket_ != BraKet::xx_xx &&
654 "this braket type not supported by the library; give --enable-eri to configure");
655#endif
656#ifndef INCLUDE_ERI3
657 assert((braket_ != BraKet::xs_xx && braket_ != BraKet::xx_xs) &&
658 "this braket type not supported by the library; give --enable-eri3 to configure");
659#endif
660#ifndef INCLUDE_ERI2
661 assert(braket_ != BraKet::xs_xs &&
662 "this braket type not supported by the library; give --enable-eri2 to configure");
663#endif
664
665 // make sure it's no default initialized
666 if (lmax_ < 0)
667 throw using_default_initialized();
668
669 // initialize braket, if needed
670 if (braket_ == BraKet::invalid) braket_ = default_braket(oper_);
671
672 if (max_nprim != 0) primdata_.resize(std::pow(max_nprim, braket_rank()));
673
674 // initialize targets
675 {
676 decltype(targets_)::allocator_type alloc(primdata_[0].targets);
677 targets_ = decltype(targets_)(alloc);
678 // in some cases extra memory use can be avoided if targets_ manages its own
679 // memory
680 // the only instance is where we permute derivative integrals, this calls
681 // for permuting
682 // target indices.
683 const auto permutable_targets =
684 deriv_order_ > 0 &&
685 (braket_ == BraKet::xx_xx || braket_ == BraKet::xs_xx ||
686 braket_ == BraKet::xx_xs);
687 if (permutable_targets)
688 targets_.reserve(max_ntargets + 1);
689 else
690 targets_.reserve(max_ntargets);
691 // will be resized to appropriate size in reset_scratch via _initialize
692 }
693
694#ifdef LIBINT2_ENGINE_TIMERS
695 timers.set_now_overhead(25);
696#endif
697#ifdef LIBINT2_PROFILE
698 primdata_[0].timers->set_now_overhead(25);
699#endif
700
701 _initialize();
702}
703
704namespace detail {
705__libint2_engine_inline std::vector<Engine::compute2_ptr_type>
706init_compute2_ptrs() {
707 auto max_ncompute2_ptrs = nopers_2body * nbrakets_2body * nderivorders_2body;
708 std::vector<Engine::compute2_ptr_type> result(max_ncompute2_ptrs, nullptr);
709
710#define BOOST_PP_NBODYENGINE_MCR7(r, product) \
711 if (BOOST_PP_TUPLE_ELEM(3, 0, product) >= \
712 static_cast<int>(Operator::first_2body_oper) && \
713 BOOST_PP_TUPLE_ELEM(3, 0, product) <= \
714 static_cast<int>(Operator::last_2body_oper) && \
715 BOOST_PP_TUPLE_ELEM(3, 1, product) >= \
716 static_cast<int>(BraKet::first_2body_braket) && \
717 BOOST_PP_TUPLE_ELEM(3, 1, product) <= \
718 static_cast<int>(BraKet::last_2body_braket)) { \
719 auto compute_ptr_idx = ((BOOST_PP_TUPLE_ELEM(3, 0, product) - \
720 static_cast<int>(Operator::first_2body_oper)) * \
721 nbrakets_2body + \
722 (BOOST_PP_TUPLE_ELEM(3, 1, product) - \
723 static_cast<int>(BraKet::first_2body_braket))) * \
724 nderivorders_2body + \
725 BOOST_PP_TUPLE_ELEM(3, 2, product); \
726 result.at(compute_ptr_idx) = &Engine::compute2< \
727 static_cast<Operator>(BOOST_PP_TUPLE_ELEM(3, 0, product)), \
728 static_cast<BraKet>(BOOST_PP_TUPLE_ELEM(3, 1, product)), \
729 BOOST_PP_TUPLE_ELEM(3, 2, product)>; \
730 }
731
732 BOOST_PP_LIST_FOR_EACH_PRODUCT(
733 BOOST_PP_NBODYENGINE_MCR7, 3,
734 (BOOST_PP_NBODY_OPERATOR_INDEX_LIST, BOOST_PP_NBODY_BRAKET_INDEX_LIST,
735 BOOST_PP_NBODY_DERIV_ORDER_LIST))
736
737 return result;
738}
739} // namespace detail
740
741__libint2_engine_inline const std::vector<Engine::compute2_ptr_type>&
742Engine::compute2_ptrs() const {
743 static std::vector<compute2_ptr_type> compute2_ptrs_ =
744 detail::init_compute2_ptrs();
745 return compute2_ptrs_;
746}
747
748__libint2_engine_inline unsigned int Engine::nparams() const {
749 switch (oper_) {
750 case Operator::nuclear:
751 return any_cast<const operator_traits<Operator::nuclear>::oper_params_type&>(params_)
752 .size();
753 case Operator::erf_nuclear:
754 case Operator::erfc_nuclear:
755 return std::get<1>(any_cast<const operator_traits<Operator::erfc_nuclear>::oper_params_type&>(params_))
756 .size();
757 default:
758 return 1;
759 }
760 return 1;
761}
762__libint2_engine_inline unsigned int Engine::nopers() const {
763 switch (static_cast<int>(oper_)) {
764#define BOOST_PP_NBODYENGINE_MCR4(r, data, i, elem) \
765 case i: \
766 return operator_traits<static_cast<Operator>(i)>::nopers;
767 BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR4, _,
768 BOOST_PP_NBODY_OPERATOR_LIST)
769 default:
770 break;
771 }
772 assert(false && "missing case in switch"); // unreachable
773 abort();
774}
775
776template <>
777__libint2_engine_inline any Engine::enforce_params_type<any>(
778 Operator oper, const any& params, bool throw_if_wrong_type) {
779 any result;
780 switch (static_cast<int>(oper)) {
781#define BOOST_PP_NBODYENGINE_MCR5A(r, data, i, elem) \
782 case i: \
783 if (any_cast<operator_traits<static_cast<Operator>(i)>::oper_params_type>( \
784 &params) != nullptr) { \
785 result = params; \
786 } else { \
787 if (throw_if_wrong_type) throw bad_any_cast(); \
788 result = operator_traits<static_cast<Operator>(i)>::default_params(); \
789 } \
790 break;
791
792 BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR5A, _,
793 BOOST_PP_NBODY_OPERATOR_LIST)
794
795 default:
796 assert(false && "missing case in switch"); // missed a case?
797 abort();
798 }
799 return result;
800}
801
802template <typename Params>
803__libint2_engine_inline any Engine::enforce_params_type(
804 Operator oper, const Params& params, bool throw_if_wrong_type) {
805 any result;
806 switch (static_cast<int>(oper)) {
807#define BOOST_PP_NBODYENGINE_MCR5B(r, data, i, elem) \
808 case i: \
809 if (std::is_same<Params, operator_traits<static_cast<Operator>( \
810 i)>::oper_params_type>::value) { \
811 result = params; \
812 } else { \
813 if (throw_if_wrong_type) throw std::bad_cast(); \
814 result = operator_traits<static_cast<Operator>(i)>::default_params(); \
815 } \
816 break;
817
818 BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR5B, _,
819 BOOST_PP_NBODY_OPERATOR_LIST)
820
821 default:
822 assert(false && "missing case in switch"); // missed a case?
823 abort();
824 }
825 return result;
826}
827
828__libint2_engine_inline any Engine::make_core_eval_pack(Operator oper) const {
829 any result;
830 switch (static_cast<int>(oper)) {
831#define BOOST_PP_NBODYENGINE_MCR6(r, data, i, elem) \
832 case i: \
833 result = libint2::detail::make_compressed_pair( \
834 operator_traits<static_cast<Operator>(i)>::core_eval_type::instance( \
835 braket_rank() * lmax_ + deriv_order_, \
836 std::numeric_limits<scalar_type>::epsilon()), \
837 libint2::detail::CoreEvalScratch< \
838 operator_traits<static_cast<Operator>(i)>::core_eval_type>( \
839 braket_rank() * lmax_ + deriv_order_)); \
840 assert(any_cast<detail::core_eval_pack_type<static_cast<Operator>(i)>>( \
841 &result) != nullptr); \
842 break;
843
844 BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR6, _,
845 BOOST_PP_NBODY_OPERATOR_LIST)
846
847 default:
848 assert(false && "missing case in switch"); // missed a case?
849 abort();
850 }
851 return result;
852}
853
854__libint2_engine_inline void Engine::init_core_ints_params(const any& params) {
855 if (oper_ == Operator::delcgtg2) {
856 // [g12,[- \Del^2, g12] = 2 (\Del g12) \cdot (\Del g12)
857 // (\Del exp(-a r_12^2) \cdot (\Del exp(-b r_12^2) = 4 a b (r_{12}^2 exp(-
858 // (a+b) r_{12}^2) )
859 // i.e. need to scale each coefficient by 4 a b
860 const auto& oparams =
861 any_cast<const operator_traits<Operator::delcgtg2>::oper_params_type&>(params);
862 const auto ng = oparams.size();
863 operator_traits<Operator::delcgtg2>::oper_params_type core_ints_params;
864 core_ints_params.reserve(ng * (ng + 1) / 2);
865 for (size_t b = 0; b < ng; ++b)
866 for (size_t k = 0; k <= b; ++k) {
867 const auto gexp = oparams[b].first + oparams[k].first;
868 const auto gcoeff = oparams[b].second * oparams[k].second *
869 (b == k ? 1 : 2); // if a != b include ab and ba
870 const auto gcoeff_rescaled =
871 4 * oparams[b].first * oparams[k].first * gcoeff;
872 core_ints_params.push_back(std::make_pair(gexp, gcoeff_rescaled));
873 }
874 core_ints_params_ = core_ints_params;
875 } else {
876 core_ints_params_ = params;
877 }
878}
879
880__libint2_engine_inline void Engine::compute_primdata(Libint_t& primdata, const Shell& s1,
881 const Shell& s2, size_t p1, size_t p2,
882 size_t oset) {
883 const auto& A = s1.O;
884 const auto& B = s2.O;
885
886 const auto alpha1 = s1.alpha[p1];
887 const auto alpha2 = s2.alpha[p2];
888
889 const auto c1 = s1.contr[0].coeff[p1];
890 const auto c2 = s2.contr[0].coeff[p2];
891
892 const auto gammap = alpha1 + alpha2;
893 const auto oogammap = 1 / gammap;
894 const auto rhop_over_alpha1 = alpha2 * oogammap;
895 const auto rhop = alpha1 * rhop_over_alpha1;
896 const auto Px = (alpha1 * A[0] + alpha2 * B[0]) * oogammap;
897 const auto Py = (alpha1 * A[1] + alpha2 * B[1]) * oogammap;
898 const auto Pz = (alpha1 * A[2] + alpha2 * B[2]) * oogammap;
899 const auto AB_x = A[0] - B[0];
900 const auto AB_y = A[1] - B[1];
901 const auto AB_z = A[2] - B[2];
902 const auto AB2_x = AB_x * AB_x;
903 const auto AB2_y = AB_y * AB_y;
904 const auto AB2_z = AB_z * AB_z;
905
906 assert(LIBINT2_SHELLQUARTET_SET == LIBINT2_SHELLQUARTET_SET_STANDARD && "non-standard shell ordering");
907
908 const auto oper_is_nuclear =
909 (oper_ == Operator::nuclear || oper_ == Operator::erf_nuclear ||
910 oper_ == Operator::erfc_nuclear);
911
912 // need to use HRR? see strategy.cc
913 const auto l1 = s1.contr[0].l;
914 const auto l2 = s2.contr[0].l;
915 const bool use_hrr = (oper_is_nuclear || oper_ == Operator::sphemultipole) && l1 > 0 && l2 > 0;
916 // unlike the 2-body ints, can go both ways, determine which way to go (the logic must match TwoCenter_OS_Tactic)
917 const bool hrr_ket_to_bra = l1 >= l2;
918 if (use_hrr) {
919 if (hrr_ket_to_bra) {
920#if LIBINT2_DEFINED(eri, AB_x)
921 primdata.AB_x[0] = AB_x;
922#endif
923#if LIBINT2_DEFINED(eri, AB_y)
924 primdata.AB_y[0] = AB_y;
925#endif
926#if LIBINT2_DEFINED(eri, AB_z)
927 primdata.AB_z[0] = AB_z;
928#endif
929 }
930 else {
931#if LIBINT2_DEFINED(eri, BA_x)
932 primdata.BA_x[0] = - AB_x;
933#endif
934#if LIBINT2_DEFINED(eri, BA_y)
935 primdata.BA_y[0] = - AB_y;
936#endif
937#if LIBINT2_DEFINED(eri, BA_z)
938 primdata.BA_z[0] = - AB_z;
939#endif
940 }
941 }
942
943 // figure out whether will do VRR on center A and/or B
944// if ((!use_hrr && l1 > 0) || hrr_ket_to_bra) {
945#if LIBINT2_DEFINED(eri, PA_x)
946 primdata.PA_x[0] = Px - A[0];
947#endif
948#if LIBINT2_DEFINED(eri, PA_y)
949 primdata.PA_y[0] = Py - A[1];
950#endif
951#if LIBINT2_DEFINED(eri, PA_z)
952 primdata.PA_z[0] = Pz - A[2];
953#endif
954// }
955//
956// if ((!use_hrr && l2 > 0) || !hrr_ket_to_bra) {
957#if LIBINT2_DEFINED(eri, PB_x)
958 primdata.PB_x[0] = Px - B[0];
959#endif
960#if LIBINT2_DEFINED(eri, PB_y)
961 primdata.PB_y[0] = Py - B[1];
962#endif
963#if LIBINT2_DEFINED(eri, PB_z)
964 primdata.PB_z[0] = Pz - B[2];
965#endif
966// }
967
968 if (oper_ == Operator::emultipole1 || oper_ == Operator::emultipole2 ||
969 oper_ == Operator::emultipole3) {
970 const auto& O = any_cast<const operator_traits<
971 Operator::emultipole1>::oper_params_type&>(params_); // same as emultipoleX
972#if LIBINT2_DEFINED(eri, BO_x)
973 primdata.BO_x[0] = B[0] - O[0];
974#endif
975#if LIBINT2_DEFINED(eri, BO_y)
976 primdata.BO_y[0] = B[1] - O[1];
977#endif
978#if LIBINT2_DEFINED(eri, BO_z)
979 primdata.BO_z[0] = B[2] - O[2];
980#endif
981 }
982 if (oper_ == Operator::sphemultipole) {
983 const auto& O = any_cast<const operator_traits<
984 Operator::emultipole1>::oper_params_type&>(params_);
985#if LIBINT2_DEFINED(eri, PO_x)
986 primdata.PO_x[0] = Px - O[0];
987#endif
988#if LIBINT2_DEFINED(eri, PO_y)
989 primdata.PO_y[0] = Py - O[1];
990#endif
991#if LIBINT2_DEFINED(eri, PO_z)
992 primdata.PO_z[0] = Pz - O[2];
993#endif
994#if LIBINT2_DEFINED(eri, PO2)
995 primdata.PO2[0] = (Px - O[0])*(Px - O[0]) + (Py - O[1])*(Py - O[1]) + (Pz - O[2])*(Pz - O[2]);
996#endif
997 }
998
999#if LIBINT2_DEFINED(eri, oo2z)
1000 primdata.oo2z[0] = 0.5 * oogammap;
1001#endif
1002
1003 decltype(c1) sqrt_PI(1.77245385090551602729816748334);
1004 const auto xyz_pfac = sqrt_PI * sqrt(oogammap);
1005 const auto ovlp_ss_x = exp(-rhop * AB2_x) * xyz_pfac * c1 * c2 * scale_;
1006 const auto ovlp_ss_y = exp(-rhop * AB2_y) * xyz_pfac;
1007 const auto ovlp_ss_z = exp(-rhop * AB2_z) * xyz_pfac;
1008
1009 primdata._0_Overlap_0_x[0] = ovlp_ss_x;
1010 primdata._0_Overlap_0_y[0] = ovlp_ss_y;
1011 primdata._0_Overlap_0_z[0] = ovlp_ss_z;
1012
1013 if (oper_ == Operator::kinetic || (deriv_order_ > 0)) {
1014#if LIBINT2_DEFINED(eri, two_alpha0_bra)
1015 primdata.two_alpha0_bra[0] = 2.0 * alpha1;
1016#endif
1017#if LIBINT2_DEFINED(eri, two_alpha0_ket)
1018 primdata.two_alpha0_ket[0] = 2.0 * alpha2;
1019#endif
1020 }
1021
1022 if (oper_is_nuclear) {
1023
1024 const auto& params = (oper_ == Operator::nuclear) ?
1025 any_cast<const operator_traits<Operator::nuclear>::oper_params_type&>(params_) :
1026 std::get<1>(any_cast<const operator_traits<Operator::erfc_nuclear>::oper_params_type&>(params_));
1027
1028 const auto& C = params[oset].second;
1029 const auto& q = params[oset].first;
1030#if LIBINT2_DEFINED(eri, PC_x)
1031 primdata.PC_x[0] = Px - C[0];
1032#endif
1033#if LIBINT2_DEFINED(eri, PC_y)
1034 primdata.PC_y[0] = Py - C[1];
1035#endif
1036#if LIBINT2_DEFINED(eri, PC_z)
1037 primdata.PC_z[0] = Pz - C[2];
1038#endif
1039
1040#if LIBINT2_DEFINED(eri, rho12_over_alpha1) || \
1041 LIBINT2_DEFINED(eri, rho12_over_alpha2)
1042 if (deriv_order_ > 0) {
1043#if LIBINT2_DEFINED(eri, rho12_over_alpha1)
1044 primdata.rho12_over_alpha1[0] = rhop_over_alpha1;
1045#endif
1046#if LIBINT2_DEFINED(eri, rho12_over_alpha2)
1047 primdata.rho12_over_alpha2[0] = alpha1 * oogammap;
1048#endif
1049 }
1050#endif
1051#if LIBINT2_DEFINED(eri, PC_x) && LIBINT2_DEFINED(eri, PC_y) && \
1052 LIBINT2_DEFINED(eri, PC_z)
1053 const auto PC2 = primdata.PC_x[0] * primdata.PC_x[0] +
1054 primdata.PC_y[0] * primdata.PC_y[0] +
1055 primdata.PC_z[0] * primdata.PC_z[0];
1056 const scalar_type U = gammap * PC2;
1057 const scalar_type rho = rhop;
1058 const auto mmax = s1.contr[0].l + s2.contr[0].l + deriv_order_;
1059 auto* fm_ptr = &(primdata.LIBINT_T_S_ELECPOT_S(0)[0]);
1060 if (oper_ == Operator::nuclear) {
1061 const auto& fm_engine_ptr =
1062 any_cast<const detail::core_eval_pack_type<Operator::nuclear>&>(core_eval_pack_)
1063 .first();
1064 fm_engine_ptr->eval(fm_ptr, U, mmax);
1065 } else if (oper_ == Operator::erf_nuclear) {
1066 const auto& core_eval_ptr =
1067 any_cast<const detail::core_eval_pack_type<Operator::erf_nuclear>&>(core_eval_pack_)
1068 .first();
1069 const auto& core_ints_params =
1070 std::get<0>(any_cast<const typename operator_traits<
1071 Operator::erf_nuclear>::oper_params_type&>(core_ints_params_));
1072 core_eval_ptr->eval(fm_ptr, rho, U, mmax, core_ints_params);
1073 } else if (oper_ == Operator::erfc_nuclear) {
1074 const auto& core_eval_ptr =
1075 any_cast<const detail::core_eval_pack_type<Operator::erfc_nuclear>&>(core_eval_pack_)
1076 .first();
1077 const auto& core_ints_params =
1078 std::get<0>(any_cast<const typename operator_traits<
1079 Operator::erfc_nuclear>::oper_params_type&>(core_ints_params_));
1080 core_eval_ptr->eval(fm_ptr, rho, U, mmax, core_ints_params);
1081 }
1082
1083 decltype(U) two_o_sqrt_PI(1.12837916709551257389615890312);
1084 const auto pfac =
1085 -q * sqrt(gammap) * two_o_sqrt_PI * ovlp_ss_x * ovlp_ss_y * ovlp_ss_z;
1086 const auto m_fence = mmax + 1;
1087 for (auto m = 0; m != m_fence; ++m) {
1088 fm_ptr[m] *= pfac;
1089 }
1090#endif
1091 }
1092} // Engine::compute_primdata()
1093
1098template <Operator op, BraKet bk, size_t der>
1099__libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2(
1100 const libint2::Shell& tbra1, const libint2::Shell& tbra2,
1101 const libint2::Shell& tket1, const libint2::Shell& tket2,
1102 const ShellPair* tspbra, const ShellPair* tspket) {
1103 assert(op == oper_ && "Engine::compute2 -- operator mismatch");
1104 assert(bk == braket_ && "Engine::compute2 -- braket mismatch");
1105 assert(der == deriv_order_ &&
1106 "Engine::compute2 -- deriv_order mismatch");
1107 assert(((tspbra == nullptr && tspket == nullptr) || (tspbra != nullptr && tspket != nullptr)) &&
1108 "Engine::compute2 -- expects zero or two ShellPair objects");
1109 assert(screening_method_ != ScreeningMethod::Invalid);
1110
1111 //
1112 // i.e. bra and ket refer to chemists bra and ket
1113 //
1114
1115 // can only handle 1 contraction at a time
1116 assert((tbra1.ncontr() == 1 && tbra2.ncontr() == 1 && tket1.ncontr() == 1 &&
1117 tket2.ncontr() == 1) && "generally-contracted shells are not yet supported");
1118
1119 // angular momentum limit obeyed?
1120 assert(tbra1.contr[0].l <= lmax_ && "the angular momentum limit is exceeded");
1121 assert(tbra2.contr[0].l <= lmax_ && "the angular momentum limit is exceeded");
1122 assert(tket1.contr[0].l <= lmax_ && "the angular momentum limit is exceeded");
1123 assert(tket2.contr[0].l <= lmax_ && "the angular momentum limit is exceeded");
1124
1125#if LIBINT2_SHELLQUARTET_SET == \
1126 LIBINT2_SHELLQUARTET_SET_STANDARD // standard angular momentum ordering
1127 const auto swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l);
1128 const auto swap_tket = (tket1.contr[0].l < tket2.contr[0].l);
1129 const auto swap_braket =
1130 ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l >
1131 tket1.contr[0].l + tket2.contr[0].l)) ||
1132 braket_ == BraKet::xx_xs;
1133#else // orca angular momentum ordering
1134 const auto swap_tbra = (tbra1.contr[0].l > tbra2.contr[0].l);
1135 const auto swap_tket = (tket1.contr[0].l > tket2.contr[0].l);
1136 const auto swap_braket =
1137 ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l <
1138 tket1.contr[0].l + tket2.contr[0].l)) ||
1139 braket_ == BraKet::xx_xs;
1140 assert(false && "feature not implemented");
1141 abort();
1142#endif
1143 const auto& bra1 =
1144 swap_braket ? (swap_tket ? tket2 : tket1) : (swap_tbra ? tbra2 : tbra1);
1145 const auto& bra2 =
1146 swap_braket ? (swap_tket ? tket1 : tket2) : (swap_tbra ? tbra1 : tbra2);
1147 const auto& ket1 =
1148 swap_braket ? (swap_tbra ? tbra2 : tbra1) : (swap_tket ? tket2 : tket1);
1149 const auto& ket2 =
1150 swap_braket ? (swap_tbra ? tbra1 : tbra2) : (swap_tket ? tket1 : tket2);
1151 const auto swap_bra = swap_braket ? swap_tket : swap_tbra;
1152 const auto swap_ket = swap_braket ? swap_tbra : swap_tket;
1153 // "permute" also the user-provided shell pair data
1154 const auto* spbra_precomputed = swap_braket ? tspket : tspbra;
1155 const auto* spket_precomputed = swap_braket ? tspbra : tspket;
1156 assert(((spbra_precomputed && spket_precomputed) || (screening_method_ == ScreeningMethod::Original || screening_method_ == ScreeningMethod::Conservative)) && "Engine::compute2: without precomputed shell pair data can only use original or conservative screening methods");
1157
1158 const auto tform = bra1.contr[0].pure || bra2.contr[0].pure ||
1159 ket1.contr[0].pure || ket2.contr[0].pure;
1160 const auto permute = swap_braket || swap_tbra || swap_tket;
1161 const auto use_scratch = permute || tform;
1162
1163 // assert # of primitive pairs
1164 auto nprim_bra1 = bra1.nprim();
1165 auto nprim_bra2 = bra2.nprim();
1166 auto nprim_ket1 = ket1.nprim();
1167 auto nprim_ket2 = ket2.nprim();
1168
1169 // adjust max angular momentum, if needed
1170 auto lmax = std::max(std::max(bra1.contr[0].l, bra2.contr[0].l),
1171 std::max(ket1.contr[0].l, ket2.contr[0].l));
1172 assert(lmax <= lmax_ && "the angular momentum limit is exceeded");
1173 const auto lmax_bra = std::max(bra1.contr[0].l, bra2.contr[0].l);
1174 const auto lmax_ket = std::max(ket1.contr[0].l, ket2.contr[0].l);
1175
1176#ifdef LIBINT2_ENGINE_PROFILE_CLASS
1177 class_id id(bra1.contr[0].l, bra2.contr[0].l, ket1.contr[0].l,
1178 ket2.contr[0].l);
1179 if (class_profiles.find(id) == class_profiles.end()) {
1180 class_profile dummy;
1181 class_profiles[id] = dummy;
1182 }
1183#endif
1184
1185// compute primitive data
1186#ifdef LIBINT2_ENGINE_TIMERS
1187 timers.start(0);
1188#endif
1189 {
1190 auto p = 0;
1191 // initialize shell pairs, if not given ...
1192 // since screening primitive pairs for bra is not possible without knowing the worst-case primitive data in ket (and vice versa)
1193 // using ln_precision_ is not safe, but should work fine for moderate basis sets ... precompute shell pair data
1194 // yourself to guarantee proper screening of primitive pairs (see Engine::set_precision() for details of how to screen
1195 // primitives for a given target precision of the integrals)
1196 const auto target_shellpair_ln_precision = ln_precision_;
1197 const auto recompute_spbra = !spbra_precomputed || spbra_precomputed->ln_prec > target_shellpair_ln_precision;
1198 const auto recompute_spket = !spket_precomputed || spket_precomputed->ln_prec > target_shellpair_ln_precision;
1199 const ShellPair& spbra = recompute_spbra ? (spbra_.init(bra1, bra2, target_shellpair_ln_precision, screening_method_), spbra_) : *spbra_precomputed;
1200 const ShellPair& spket = recompute_spket ? (spket_.init(ket1, ket2, target_shellpair_ln_precision, screening_method_), spket_) : *spket_precomputed;
1201 assert(spbra.screening_method_ == screening_method_ && spket.screening_method_ == screening_method_ && "Engine::compute2: received ShellPair initialized for an incompatible screening method");
1202 // determine whether shell pair data refers to the actual ({bra1,bra2}) or swapped ({bra2,bra1}) pairs
1203 // if computed the shell pair data here then it's always in actual order, otherwise check swap_bra/swap_ket
1204 const auto spbra_is_swapped = recompute_spbra ? false : swap_bra;
1205 const auto spket_is_swapped = recompute_spket ? false : swap_ket;
1206
1207 using real_t = Shell::real_t;
1208 // swapping bra turns AB into BA = -AB
1209 real_t BA[3];
1210 if (spbra_is_swapped) {
1211 for(auto xyz=0; xyz!=3; ++xyz)
1212 BA[xyz] = - spbra_precomputed->AB[xyz];
1213 }
1214 const auto& AB = spbra_is_swapped ? BA : spbra.AB;
1215 // swapping ket turns CD into DC = -CD
1216 real_t DC[3];
1217 if (spket_is_swapped) {
1218 for(auto xyz=0; xyz!=3; ++xyz)
1219 DC[xyz] = - spket_precomputed->AB[xyz];
1220 }
1221 const auto& CD = spket_is_swapped ? DC : spket.AB;
1222
1223 const auto& A = bra1.O;
1224 const auto& B = bra2.O;
1225 const auto& C = ket1.O;
1226 const auto& D = ket2.O;
1227
1228 // compute all primitive quartet data
1229 const auto npbra = spbra.primpairs.size();
1230 const auto npket = spket.primpairs.size();
1231 const scalar_type npbraket = npbra*npket;
1232 for (auto pb = 0; pb != npbra; ++pb) {
1233 for (auto pk = 0; pk != npket; ++pk) {
1234 // primitive quartet coarse screening:
1235 if (spbra.primpairs[pb].ln_scr + spket.primpairs[pk].ln_scr >
1236 ln_precision_) {
1237 Libint_t& primdata = primdata_[p];
1238 const auto& sbra1 = bra1;
1239 const auto& sbra2 = bra2;
1240 const auto& sket1 = ket1;
1241 const auto& sket2 = ket2;
1242 auto pbra = pb;
1243 auto pket = pk;
1244
1245 const auto& spbrapp = spbra.primpairs[pbra];
1246 const auto& spketpp = spket.primpairs[pket];
1247 // if shell-pair data given by user
1248 const auto& pbra1 = spbra_is_swapped ? spbrapp.p2 : spbrapp.p1;
1249 const auto& pbra2 = spbra_is_swapped ? spbrapp.p1 : spbrapp.p2;
1250 const auto& pket1 = spket_is_swapped ? spketpp.p2 : spketpp.p1;
1251 const auto& pket2 = spket_is_swapped ? spketpp.p1 : spketpp.p2;
1252
1253 const auto alpha0 = sbra1.alpha[pbra1];
1254 const auto alpha1 = sbra2.alpha[pbra2];
1255 const auto alpha2 = sket1.alpha[pket1];
1256 const auto alpha3 = sket2.alpha[pket2];
1257
1258 const auto c0 = sbra1.contr[0].coeff[pbra1];
1259 const auto c1 = sbra2.contr[0].coeff[pbra2];
1260 const auto c2 = sket1.contr[0].coeff[pket1];
1261 const auto c3 = sket2.contr[0].coeff[pket2];
1262
1263 const auto l0 = sbra1.contr[0].l;
1264 const auto l1 = sbra2.contr[0].l;
1265 const auto l2 = sket1.contr[0].l;
1266 const auto l3 = sket2.contr[0].l;
1267 const auto l = l0 + l1 + l2 + l3;
1268
1269 const auto gammap = alpha0 + alpha1;
1270 const auto oogammap = spbrapp.one_over_gamma;
1271 const auto rhop = alpha0 * alpha1 * oogammap;
1272
1273 const auto gammaq = alpha2 + alpha3;
1274 const auto oogammaq = spketpp.one_over_gamma;
1275 const auto rhoq = alpha2 * alpha3 * oogammaq;
1276
1277 const auto& P = spbrapp.P;
1278 const auto& Q = spketpp.P;
1279 const auto PQx = P[0] - Q[0];
1280 const auto PQy = P[1] - Q[1];
1281 const auto PQz = P[2] - Q[2];
1282 const auto PQ2 = PQx * PQx + PQy * PQy + PQz * PQz;
1283
1284 const auto K12 = spbrapp.K * spketpp.K;
1285 const auto gammapq = gammap + gammaq;
1286 const auto sqrt_gammapq = sqrt(gammapq);
1287 const auto oogammapq = 1.0 / (gammapq);
1288 auto pfac = K12 * sqrt_gammapq * oogammapq;
1289 pfac *= c0 * c1 * c2 * c3 * scale_;
1290
1291 // original and conservative methods: screen primitive integral using actual pfac
1292 if (static_cast<int>(screening_method_) & (static_cast<int>(ScreeningMethod::Original) | static_cast<int>(ScreeningMethod::Conservative))) {
1293 scalar_type magnitude_estimate = std::abs(pfac);
1294 if (screening_method_ == ScreeningMethod::Conservative) {
1295 // magnitude of primitive (ab|cd) integral for nonzero L differs from that of (00|00) geometric and exponent-dependent factor,
1296 // some of which only depend on bra or ket, and some are bra-ket dependent ... here we account only for bra-only and ket-only
1297 // nonspherical factors
1298 const auto nonspherical_pfac_magnitude = std::max(
1299 1., spbrapp.nonsph_screen_fac * spketpp.nonsph_screen_fac);
1300 magnitude_estimate *= nonspherical_pfac_magnitude * npbraket;
1301 }
1302 if (magnitude_estimate < precision_)
1303 continue;
1304 }
1305
1306 {
1307 const scalar_type rho = gammap * gammaq * oogammapq;
1308 const scalar_type T = PQ2 * rho;
1309 auto* gm_ptr = &(primdata.LIBINT_T_SS_EREP_SS(0)[0]);
1310 const auto mmax = l + deriv_order_;
1311
1312 if (!skip_core_ints) {
1313 switch (oper_) {
1314 case Operator::coulomb: {
1315 const auto& core_eval_ptr =
1316 any_cast<const detail::core_eval_pack_type<Operator::coulomb>&>(core_eval_pack_)
1317 .first();
1318 core_eval_ptr->eval(gm_ptr, T, mmax);
1319 } break;
1320 case Operator::cgtg_x_coulomb: {
1321 const auto& core_eval_ptr =
1322 any_cast<const detail::core_eval_pack_type<
1323 Operator::cgtg_x_coulomb>&>(core_eval_pack_)
1324 .first();
1325 auto& core_eval_scratch = any_cast<detail::core_eval_pack_type<
1326 Operator::cgtg_x_coulomb>&>(core_eval_pack_)
1327 .second();
1328 const auto& core_ints_params =
1329 any_cast<const typename operator_traits<
1330 Operator::cgtg>::oper_params_type&>(core_ints_params_);
1331 core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params,
1332 &core_eval_scratch);
1333 } break;
1334 case Operator::cgtg: {
1335 const auto& core_eval_ptr =
1336 any_cast<const detail::core_eval_pack_type<Operator::cgtg>&>(core_eval_pack_)
1337 .first();
1338 const auto& core_ints_params =
1339 any_cast<const typename operator_traits<
1340 Operator::cgtg>::oper_params_type&>(core_ints_params_);
1341 core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params);
1342 } break;
1343 case Operator::delcgtg2: {
1344 const auto& core_eval_ptr =
1345 any_cast<const detail::core_eval_pack_type<Operator::delcgtg2>&>(core_eval_pack_)
1346 .first();
1347 const auto& core_ints_params =
1348 any_cast<const typename operator_traits<
1349 Operator::cgtg>::oper_params_type&>(core_ints_params_);
1350 core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params);
1351 } break;
1352 case Operator::delta: {
1353 const auto& core_eval_ptr =
1354 any_cast<const detail::core_eval_pack_type<Operator::delta>&>(core_eval_pack_)
1355 .first();
1356 core_eval_ptr->eval(gm_ptr, rho, T, mmax);
1357 } break;
1358 case Operator::r12: {
1359 const auto& core_eval_ptr =
1360 any_cast<const detail::core_eval_pack_type<Operator::r12>&>(core_eval_pack_)
1361 .first();
1362 core_eval_ptr->eval(gm_ptr, rho, T, mmax);
1363 } break;
1364 case Operator::erf_coulomb: {
1365 const auto& core_eval_ptr =
1366 any_cast<const detail::core_eval_pack_type<Operator::erf_coulomb>&>(core_eval_pack_)
1367 .first();
1368 const auto& core_ints_params =
1369 any_cast<const typename operator_traits<
1370 Operator::erf_coulomb>::oper_params_type&>(core_ints_params_);
1371 core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params);
1372 } break;
1373 case Operator::erfc_coulomb: {
1374 const auto& core_eval_ptr =
1375 any_cast<const detail::core_eval_pack_type<Operator::erfc_coulomb>&>(core_eval_pack_)
1376 .first();
1377 const auto& core_ints_params =
1378 any_cast<const typename operator_traits<
1379 Operator::erfc_coulomb>::oper_params_type&>(core_ints_params_);
1380 core_eval_ptr->eval(gm_ptr, rho, T, mmax, core_ints_params);
1381 } break;
1382 case Operator::stg: {
1383 const auto& core_eval_ptr =
1384 any_cast<const detail::core_eval_pack_type<Operator::stg>&>(core_eval_pack_)
1385 .first();
1386 auto zeta =
1387 any_cast<const typename operator_traits<
1388 Operator::stg>::oper_params_type&>(core_ints_params_);
1389 const auto one_over_rho = gammapq * oogammap * oogammaq;
1390 core_eval_ptr->eval_slater(gm_ptr, one_over_rho, T, mmax, zeta);
1391 } break;
1392 case Operator::stg_x_coulomb: {
1393 const auto& core_eval_ptr =
1394 any_cast<const detail::core_eval_pack_type<Operator::stg>&>(core_eval_pack_)
1395 .first();
1396 auto zeta =
1397 any_cast<const typename operator_traits<
1398 Operator::stg>::oper_params_type&>(core_ints_params_);
1399 const auto one_over_rho = gammapq * oogammap * oogammaq;
1400 core_eval_ptr->eval_yukawa(gm_ptr, one_over_rho, T, mmax, zeta);
1401 } break;
1402 default:
1403 assert(false && "missing case in a switch"); // unreachable
1404 abort();
1405 }
1406 }
1407
1408 for (auto m = 0; m != mmax + 1; ++m) {
1409 gm_ptr[m] *= pfac;
1410 }
1411
1412 if (mmax != 0) {
1413 if (braket_ == BraKet::xx_xx) {
1414#if LIBINT2_DEFINED(eri, PA_x)
1415 primdata.PA_x[0] = P[0] - A[0];
1416#endif
1417#if LIBINT2_DEFINED(eri, PA_y)
1418 primdata.PA_y[0] = P[1] - A[1];
1419#endif
1420#if LIBINT2_DEFINED(eri, PA_z)
1421 primdata.PA_z[0] = P[2] - A[2];
1422#endif
1423#if LIBINT2_DEFINED(eri, PB_x)
1424 primdata.PB_x[0] = P[0] - B[0];
1425#endif
1426#if LIBINT2_DEFINED(eri, PB_y)
1427 primdata.PB_y[0] = P[1] - B[1];
1428#endif
1429#if LIBINT2_DEFINED(eri, PB_z)
1430 primdata.PB_z[0] = P[2] - B[2];
1431#endif
1432 }
1433
1434 if (braket_ != BraKet::xs_xs) {
1435#if LIBINT2_DEFINED(eri, QC_x)
1436 primdata.QC_x[0] = Q[0] - C[0];
1437#endif
1438#if LIBINT2_DEFINED(eri, QC_y)
1439 primdata.QC_y[0] = Q[1] - C[1];
1440#endif
1441#if LIBINT2_DEFINED(eri, QC_z)
1442 primdata.QC_z[0] = Q[2] - C[2];
1443#endif
1444#if LIBINT2_DEFINED(eri, QD_x)
1445 primdata.QD_x[0] = Q[0] - D[0];
1446#endif
1447#if LIBINT2_DEFINED(eri, QD_y)
1448 primdata.QD_y[0] = Q[1] - D[1];
1449#endif
1450#if LIBINT2_DEFINED(eri, QD_z)
1451 primdata.QD_z[0] = Q[2] - D[2];
1452#endif
1453 }
1454
1455 if (braket_ == BraKet::xx_xx) {
1456#if LIBINT2_DEFINED(eri, AB_x)
1457 primdata.AB_x[0] = AB[0];
1458#endif
1459#if LIBINT2_DEFINED(eri, AB_y)
1460 primdata.AB_y[0] = AB[1];
1461#endif
1462#if LIBINT2_DEFINED(eri, AB_z)
1463 primdata.AB_z[0] = AB[2];
1464#endif
1465#if LIBINT2_DEFINED(eri, BA_x)
1466 primdata.BA_x[0] = -AB[0];
1467#endif
1468#if LIBINT2_DEFINED(eri, BA_y)
1469 primdata.BA_y[0] = -AB[1];
1470#endif
1471#if LIBINT2_DEFINED(eri, BA_z)
1472 primdata.BA_z[0] = -AB[2];
1473#endif
1474 }
1475
1476 if (braket_ != BraKet::xs_xs) {
1477#if LIBINT2_DEFINED(eri, CD_x)
1478 primdata.CD_x[0] = CD[0];
1479#endif
1480#if LIBINT2_DEFINED(eri, CD_y)
1481 primdata.CD_y[0] = CD[1];
1482#endif
1483#if LIBINT2_DEFINED(eri, CD_z)
1484 primdata.CD_z[0] = CD[2];
1485#endif
1486#if LIBINT2_DEFINED(eri, DC_x)
1487 primdata.DC_x[0] = -CD[0];
1488#endif
1489#if LIBINT2_DEFINED(eri, DC_y)
1490 primdata.DC_y[0] = -CD[1];
1491#endif
1492#if LIBINT2_DEFINED(eri, DC_z)
1493 primdata.DC_z[0] = -CD[2];
1494#endif
1495 }
1496
1497 const auto gammap_o_gammapgammaq = oogammapq * gammap;
1498 const auto gammaq_o_gammapgammaq = oogammapq * gammaq;
1499
1500 const auto Wx =
1501 (gammap_o_gammapgammaq * P[0] + gammaq_o_gammapgammaq * Q[0]);
1502 const auto Wy =
1503 (gammap_o_gammapgammaq * P[1] + gammaq_o_gammapgammaq * Q[1]);
1504 const auto Wz =
1505 (gammap_o_gammapgammaq * P[2] + gammaq_o_gammapgammaq * Q[2]);
1506
1507 if (deriv_order_ > 0 || lmax_bra > 0) {
1508#if LIBINT2_DEFINED(eri, WP_x)
1509 primdata.WP_x[0] = Wx - P[0];
1510#endif
1511#if LIBINT2_DEFINED(eri, WP_y)
1512 primdata.WP_y[0] = Wy - P[1];
1513#endif
1514#if LIBINT2_DEFINED(eri, WP_z)
1515 primdata.WP_z[0] = Wz - P[2];
1516#endif
1517 }
1518 if (deriv_order_ > 0 || lmax_ket > 0) {
1519#if LIBINT2_DEFINED(eri, WQ_x)
1520 primdata.WQ_x[0] = Wx - Q[0];
1521#endif
1522#if LIBINT2_DEFINED(eri, WQ_y)
1523 primdata.WQ_y[0] = Wy - Q[1];
1524#endif
1525#if LIBINT2_DEFINED(eri, WQ_z)
1526 primdata.WQ_z[0] = Wz - Q[2];
1527#endif
1528 }
1529#if LIBINT2_DEFINED(eri, oo2z)
1530 primdata.oo2z[0] = 0.5 * oogammap;
1531#endif
1532#if LIBINT2_DEFINED(eri, oo2e)
1533 primdata.oo2e[0] = 0.5 * oogammaq;
1534#endif
1535#if LIBINT2_DEFINED(eri, oo2ze)
1536 primdata.oo2ze[0] = 0.5 * oogammapq;
1537#endif
1538#if LIBINT2_DEFINED(eri, roz)
1539 primdata.roz[0] = rho * oogammap;
1540#endif
1541#if LIBINT2_DEFINED(eri, roe)
1542 primdata.roe[0] = rho * oogammaq;
1543#endif
1544
1545// using ITR?
1546#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_0_x)
1547 primdata.TwoPRepITR_pfac0_0_0_x[0] =
1548 -(alpha1 * AB[0] + alpha3 * CD[0]) * oogammap;
1549#endif
1550#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_0_y)
1551 primdata.TwoPRepITR_pfac0_0_0_y[0] =
1552 -(alpha1 * AB[1] + alpha3 * CD[1]) * oogammap;
1553#endif
1554#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_0_z)
1555 primdata.TwoPRepITR_pfac0_0_0_z[0] =
1556 -(alpha1 * AB[2] + alpha3 * CD[2]) * oogammap;
1557#endif
1558#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_0_x)
1559 primdata.TwoPRepITR_pfac0_1_0_x[0] =
1560 -(alpha1 * AB[0] + alpha3 * CD[0]) * oogammaq;
1561#endif
1562#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_0_y)
1563 primdata.TwoPRepITR_pfac0_1_0_y[0] =
1564 -(alpha1 * AB[1] + alpha3 * CD[1]) * oogammaq;
1565#endif
1566#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_0_z)
1567 primdata.TwoPRepITR_pfac0_1_0_z[0] =
1568 -(alpha1 * AB[2] + alpha3 * CD[2]) * oogammaq;
1569#endif
1570#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_1_x)
1571 primdata.TwoPRepITR_pfac0_0_1_x[0] =
1572 (alpha0 * AB[0] + alpha2 * CD[0]) * oogammap;
1573#endif
1574#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_1_y)
1575 primdata.TwoPRepITR_pfac0_0_1_y[0] =
1576 (alpha0 * AB[1] + alpha2 * CD[1]) * oogammap;
1577#endif
1578#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_0_1_z)
1579 primdata.TwoPRepITR_pfac0_0_1_z[0] =
1580 (alpha0 * AB[2] + alpha2 * CD[2]) * oogammap;
1581#endif
1582#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_1_x)
1583 primdata.TwoPRepITR_pfac0_1_1_x[0] =
1584 (alpha0 * AB[0] + alpha2 * CD[0]) * oogammaq;
1585#endif
1586#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_1_y)
1587 primdata.TwoPRepITR_pfac0_1_1_y[0] =
1588 (alpha0 * AB[1] + alpha2 * CD[1]) * oogammaq;
1589#endif
1590#if LIBINT2_DEFINED(eri, TwoPRepITR_pfac0_1_1_z)
1591 primdata.TwoPRepITR_pfac0_1_1_z[0] =
1592 (alpha0 * AB[2] + alpha2 * CD[2]) * oogammaq;
1593#endif
1594#if LIBINT2_DEFINED(eri, eoz)
1595 primdata.eoz[0] = gammaq * oogammap;
1596#endif
1597#if LIBINT2_DEFINED(eri, zoe)
1598 primdata.zoe[0] = gammap * oogammaq;
1599#endif
1600
1601 // prefactors for derivative ERI relations
1602 if (deriv_order_ > 0) {
1603#if LIBINT2_DEFINED(eri, alpha1_rho_over_zeta2)
1604 primdata.alpha1_rho_over_zeta2[0] =
1605 alpha0 * (oogammap * gammaq_o_gammapgammaq);
1606#endif
1607#if LIBINT2_DEFINED(eri, alpha2_rho_over_zeta2)
1608 primdata.alpha2_rho_over_zeta2[0] =
1609 alpha1 * (oogammap * gammaq_o_gammapgammaq);
1610#endif
1611#if LIBINT2_DEFINED(eri, alpha3_rho_over_eta2)
1612 primdata.alpha3_rho_over_eta2[0] =
1613 alpha2 * (oogammaq * gammap_o_gammapgammaq);
1614#endif
1615#if LIBINT2_DEFINED(eri, alpha4_rho_over_eta2)
1616 primdata.alpha4_rho_over_eta2[0] =
1617 alpha3 * (oogammaq * gammap_o_gammapgammaq);
1618#endif
1619#if LIBINT2_DEFINED(eri, alpha1_over_zetapluseta)
1620 primdata.alpha1_over_zetapluseta[0] = alpha0 * oogammapq;
1621#endif
1622#if LIBINT2_DEFINED(eri, alpha2_over_zetapluseta)
1623 primdata.alpha2_over_zetapluseta[0] = alpha1 * oogammapq;
1624#endif
1625#if LIBINT2_DEFINED(eri, alpha3_over_zetapluseta)
1626 primdata.alpha3_over_zetapluseta[0] = alpha2 * oogammapq;
1627#endif
1628#if LIBINT2_DEFINED(eri, alpha4_over_zetapluseta)
1629 primdata.alpha4_over_zetapluseta[0] = alpha3 * oogammapq;
1630#endif
1631#if LIBINT2_DEFINED(eri, rho12_over_alpha1)
1632 primdata.rho12_over_alpha1[0] = alpha1 * oogammap;
1633#endif
1634#if LIBINT2_DEFINED(eri, rho12_over_alpha2)
1635 primdata.rho12_over_alpha2[0] = alpha0 * oogammap;
1636#endif
1637#if LIBINT2_DEFINED(eri, rho34_over_alpha3)
1638 primdata.rho34_over_alpha3[0] = alpha3 * oogammaq;
1639#endif
1640#if LIBINT2_DEFINED(eri, rho34_over_alpha4)
1641 primdata.rho34_over_alpha4[0] = alpha2 * oogammaq;
1642#endif
1643#if LIBINT2_DEFINED(eri, two_alpha0_bra)
1644 primdata.two_alpha0_bra[0] = 2.0 * alpha0;
1645#endif
1646#if LIBINT2_DEFINED(eri, two_alpha0_ket)
1647 primdata.two_alpha0_ket[0] = 2.0 * alpha1;
1648#endif
1649#if LIBINT2_DEFINED(eri, two_alpha1_bra)
1650 primdata.two_alpha1_bra[0] = 2.0 * alpha2;
1651#endif
1652#if LIBINT2_DEFINED(eri, two_alpha1_ket)
1653 primdata.two_alpha1_ket[0] = 2.0 * alpha3;
1654#endif
1655 }
1656 } // m != 0
1657
1658 ++p;
1659 } // prefac-based prim quartet screen
1660
1661 } // rough prim quartet screen based on pair values
1662 } // ket prim pair
1663 } // bra prim pair
1664 primdata_[0].contrdepth = p;
1665 }
1666
1667#ifdef LIBINT2_ENGINE_TIMERS
1668 const auto t0 = timers.stop(0);
1669#ifdef LIBINT2_ENGINE_PROFILE_CLASS
1670 class_profiles[id].prereqs += t0.count();
1671 if (primdata_[0].contrdepth != 0) {
1672 class_profiles[id].nshellset += 1;
1673 class_profiles[id].nprimset += primdata_[0].contrdepth;
1674 }
1675#endif
1676#endif
1677
1678 // all primitive combinations screened out? set 1st target ptr to nullptr
1679 if (primdata_[0].contrdepth == 0) {
1680 targets_[0] = nullptr;
1681 return targets_;
1682 }
1683
1684 // compute directly (ss|ss)
1685 const auto compute_directly = lmax == 0 && deriv_order_ == 0;
1686
1687 if (compute_directly) {
1688#ifdef LIBINT2_ENGINE_TIMERS
1689 timers.start(1);
1690#endif
1691 auto& stack = primdata_[0].stack[0];
1692 stack = 0;
1693 for (auto p = 0; p != primdata_[0].contrdepth; ++p)
1694 stack += primdata_[p].LIBINT_T_SS_EREP_SS(0)[0];
1695 primdata_[0].targets[0] = primdata_[0].stack;
1696#ifdef LIBINT2_ENGINE_TIMERS
1697 const auto t1 = timers.stop(1);
1698#ifdef LIBINT2_ENGINE_PROFILE_CLASS
1699 class_profiles[id].build_vrr += t1.count();
1700#endif
1701#endif
1702 } // compute directly
1703 else { // call libint
1704#ifdef LIBINT2_ENGINE_TIMERS
1705#ifdef LIBINT2_PROFILE
1706 const auto t1_hrr_start = primdata_[0].timers->read(0);
1707 const auto t1_vrr_start = primdata_[0].timers->read(1);
1708#endif
1709 timers.start(1);
1710#endif
1711
1712 size_t buildfnidx;
1713 switch (braket_) {
1714 case BraKet::xx_xx:
1715 buildfnidx =
1716 ((bra1.contr[0].l * hard_lmax_ + bra2.contr[0].l) * hard_lmax_ +
1717 ket1.contr[0].l) *
1718 hard_lmax_ +
1719 ket2.contr[0].l;
1720 break;
1721
1722 case BraKet::xx_xs: assert(false && "this braket is not supported"); abort(); break;
1723 case BraKet::xs_xx: {
1725 int ket_lmax = hard_lmax_;
1726 switch (deriv_order_) {
1727#define BOOST_PP_NBODYENGINE_MCR8(r, data, i, elem) \
1728 case i: \
1729 BOOST_PP_IF(BOOST_PP_IS_1(BOOST_PP_CAT(LIBINT2_CENTER_DEPENDENT_MAX_AM_3eri, \
1730 BOOST_PP_IIF(BOOST_PP_GREATER(i, 0), \
1731 i, \
1732 BOOST_PP_EMPTY()) \
1733 ) \
1734 ), \
1735 ket_lmax = hard_default_lmax_, BOOST_PP_EMPTY()); \
1736 break;
1737
1738 BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_NBODYENGINE_MCR8, _,
1739 BOOST_PP_NBODY_DERIV_ORDER_LIST)
1740
1741 default: assert(false && "missing case in switch"); abort();
1742 }
1743 buildfnidx =
1744 (bra1.contr[0].l * ket_lmax + ket1.contr[0].l) * ket_lmax +
1745 ket2.contr[0].l;
1746#ifdef ERI3_PURE_SH
1747 if (bra1.contr[0].l > 1)
1748 assert(bra1.contr[0].pure &&
1749 "library assumes a solid harmonics shell in bra of a 3-center "
1750 "2-body int, but a cartesian shell given");
1751#endif
1752 } break;
1753
1754 case BraKet::xs_xs:
1755 buildfnidx = bra1.contr[0].l * hard_lmax_ + ket1.contr[0].l;
1756#ifdef ERI2_PURE_SH
1757 if (bra1.contr[0].l > 1)
1758 assert(bra1.contr[0].pure &&
1759 "library assumes solid harmonics shells in a 2-center "
1760 "2-body int, but a cartesian shell given in bra");
1761 if (ket1.contr[0].l > 1)
1762 assert(ket1.contr[0].pure &&
1763 "library assumes solid harmonics shells in a 2-center "
1764 "2-body int, but a cartesian shell given in bra");
1765#endif
1766 break;
1767
1768 default:
1769 assert(false && "invalid braket");
1770 abort();
1771 }
1772
1773 assert(buildfnptrs_[buildfnidx] && "null build function ptr");
1774 buildfnptrs_[buildfnidx](&primdata_[0]);
1775
1776#ifdef LIBINT2_ENGINE_TIMERS
1777 const auto t1 = timers.stop(1);
1778#ifdef LIBINT2_ENGINE_PROFILE_CLASS
1779#ifndef LIBINT2_PROFILE
1780 class_profiles[id].build_vrr += t1.count();
1781#else
1782 class_profiles[id].build_hrr += primdata_[0].timers->read(0) - t1_hrr_start;
1783 class_profiles[id].build_vrr += primdata_[0].timers->read(1) - t1_vrr_start;
1784#endif
1785#endif
1786#endif
1787
1788#ifdef LIBINT2_ENGINE_TIMERS
1789 timers.start(2);
1790#endif
1791
1792 const auto ntargets = nshellsets();
1793
1794 // if needed, permute and transform
1795 if (use_scratch) {
1796 constexpr auto using_scalar_real = sizeof(value_type) == sizeof(scalar_type);
1797 static_assert(using_scalar_real,
1798 "Libint2 C++11 API only supports scalar real types");
1799 typedef Eigen::Matrix<scalar_type, Eigen::Dynamic, Eigen::Dynamic,
1800 Eigen::RowMajor>
1801 Matrix;
1802
1803 // a 2-d view of the 4-d source tensor
1804 const auto nr1_cart = bra1.cartesian_size();
1805 const auto nr2_cart = bra2.cartesian_size();
1806 const auto nc1_cart = ket1.cartesian_size();
1807 const auto nc2_cart = ket2.cartesian_size();
1808 const auto ncol_cart = nc1_cart * nc2_cart;
1809 const auto n1234_cart = nr1_cart * nr2_cart * ncol_cart;
1810 const auto nr1 = bra1.size();
1811 const auto nr2 = bra2.size();
1812 const auto nc1 = ket1.size();
1813 const auto nc2 = ket2.size();
1814 const auto nrow = nr1 * nr2;
1815 const auto ncol = nc1 * nc2;
1816
1817 // a 2-d view of the 4-d target tensor
1818 const auto nr1_tgt = tbra1.size();
1819 const auto nr2_tgt = tbra2.size();
1820 const auto nc1_tgt = tket1.size();
1821 const auto nc2_tgt = tket2.size();
1822 const auto ncol_tgt = nc1_tgt * nc2_tgt;
1823 const auto n_tgt = nr1_tgt * nr2_tgt * ncol_tgt;
1824
1825 auto hotscr = &scratch_[0]; // points to the hot scratch
1826
1827 // transform to solid harmonics first, then unpermute, if necessary
1828 for (auto s = 0; s != ntargets; ++s) {
1829 // when permuting derivatives may need to permute shellsets also, not
1830 // just integrals
1831 // within shellsets; this will point to where source shellset s should end up
1832 auto s_target = s;
1833
1834 auto source =
1835 primdata_[0].targets[s]; // points to the most recent result
1836 auto target = hotscr;
1837
1838 if (bra1.contr[0].pure) {
1839 libint2::solidharmonics::transform_first(
1840 bra1.contr[0].l, nr2_cart * ncol_cart, source, target);
1841 std::swap(source, target);
1842 }
1843 if (bra2.contr[0].pure) {
1844 libint2::solidharmonics::transform_inner(bra1.size(), bra2.contr[0].l,
1845 ncol_cart, source, target);
1846 std::swap(source, target);
1847 }
1848 if (ket1.contr[0].pure) {
1849 libint2::solidharmonics::transform_inner(nrow, ket1.contr[0].l,
1850 nc2_cart, source, target);
1851 std::swap(source, target);
1852 }
1853 if (ket2.contr[0].pure) {
1854 libint2::solidharmonics::transform_last(
1855 bra1.size() * bra2.size() * ket1.size(), ket2.contr[0].l, source,
1856 target);
1857 std::swap(source, target);
1858 }
1859
1860 // need to permute?
1861 if (permute) {
1862 // loop over rows of the source matrix
1863 const auto* src_row_ptr = source;
1864 auto tgt_ptr = target;
1865
1866 // if permuting derivatives ints must update their derivative index
1867 // Additional BraKet types would require adding support to DerivMapGenerator::generate_deriv_index_map
1868 if (deriv_order_){
1869 Tensor<size_t>& mapDerivIndex = libint2::DerivMapGenerator::instance(deriv_order_, braket_);
1870 switch(braket_) {
1871 case BraKet::xx_xx: {
1872 s_target = mapDerivIndex((size_t)swap_braket,(size_t)swap_tbra,(size_t)swap_tket,(size_t)s);
1873 }
1874 break;
1875 case BraKet::xs_xx: {
1876 assert(!swap_bra);
1877 assert(!swap_braket);
1878 s_target = mapDerivIndex((size_t)0,(size_t)0,(size_t)swap_tket,(size_t)s);
1879 }
1880 break;
1881 case BraKet::xs_xs: {
1882 assert(!swap_bra);
1883 assert(!swap_ket);
1884 assert(!swap_braket);
1885 s_target = s;
1886 }
1887 break;
1888
1889 default:
1890 assert(false && "This braket type not yet supported for geometric derivatives");
1891 abort();
1892 }
1893 }
1894
1895 for (auto r1 = 0; r1 != nr1; ++r1) {
1896 for (auto r2 = 0; r2 != nr2; ++r2, src_row_ptr += ncol) {
1897 typedef Eigen::Map<const Matrix> ConstMap;
1898 typedef Eigen::Map<Matrix> Map;
1899 typedef Eigen::Map<Matrix, Eigen::Unaligned,
1900 Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>>
1901 StridedMap;
1902
1903 // represent this source row as a matrix
1904 ConstMap src_blk_mat(src_row_ptr, nc1, nc2);
1905
1906 // and copy to the block of the target matrix
1907 if (swap_braket) {
1908 // if swapped bra and ket, a row of source becomes a column
1909 // of
1910 // target
1911 // source row {r1,r2} is mapped to target column {r1,r2} if
1912 // !swap_tket, else to {r2,r1}
1913 const auto tgt_col_idx =
1914 !swap_tket ? r1 * nr2 + r2 : r2 * nr1 + r1;
1915 StridedMap tgt_blk_mat(
1916 tgt_ptr + tgt_col_idx, nr1_tgt, nr2_tgt,
1917 Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>(
1918 nr2_tgt * ncol_tgt, ncol_tgt));
1919 if (swap_tbra)
1920 tgt_blk_mat = src_blk_mat.transpose();
1921 else
1922 tgt_blk_mat = src_blk_mat;
1923 } else {
1924 // source row {r1,r2} is mapped to target row {r1,r2} if
1925 // !swap_tbra, else to {r2,r1}
1926 const auto tgt_row_idx =
1927 !swap_tbra ? r1 * nr2 + r2 : r2 * nr1 + r1;
1928 Map tgt_blk_mat(tgt_ptr + tgt_row_idx * ncol, nc1_tgt, nc2_tgt);
1929 if (swap_tket)
1930 tgt_blk_mat = src_blk_mat.transpose();
1931 else
1932 tgt_blk_mat = src_blk_mat;
1933 }
1934 } // end of loop
1935 } // over rows of source
1936 std::swap(source, target);
1937 } // need to permute?
1938
1939 // if the integrals ended up in scratch_, keep them there, update the
1940 // hot buffer
1941 // to the next available scratch space, and update targets_
1942 if (source != primdata_[0].targets[s]) {
1943 hotscr += n1234_cart;
1944 if (s != s_target)
1945 assert(set_targets_ && "logic error"); // mess if targets_ points
1946 // to primdata_[0].targets
1947 targets_[s_target] = source;
1948 } else {
1949 // only needed if permuted derivs or set_targets_ is true
1950 // for simplicity always set targets_
1951 if (s != s_target)
1952 assert(set_targets_ && "logic error"); // mess if targets_ points
1953 // to primdata_[0].targets
1954 targets_[s_target] = source;
1955 }
1956 } // loop over shellsets
1957 } // if need_scratch => needed to transpose and/or tform
1958 else { // did not use scratch? may still need to update targets_
1959 if (set_targets_) {
1960 for (auto s = 0; s != ntargets; ++s)
1961 targets_[s] = primdata_[0].targets[s];
1962 }
1963 }
1964
1965#ifdef LIBINT2_ENGINE_TIMERS
1966 const auto t2 = timers.stop(2);
1967#ifdef LIBINT2_ENGINE_PROFILE_CLASS
1968 class_profiles[id].tform += t2.count();
1969#endif
1970#endif
1971 } // not (ss|ss)
1972
1973 if (cartesian_shell_normalization() == CartesianShellNormalization::uniform) {
1974 std::array<std::reference_wrapper<const Shell>, 4> shells{tbra1, tbra2, tket1, tket2};
1975 for (auto s = 0ul; s != targets_.size(); ++s) {
1976 uniform_normalize_cartesian_shells(const_cast<value_type*>(targets_[s]), shells);
1977 }
1978 }
1979
1980 return targets_;
1981}
1982
1983#undef BOOST_PP_NBODY_OPERATOR_LIST
1984#undef BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE
1985#undef BOOST_PP_NBODY_OPERATOR_INDEX_LIST
1986#undef BOOST_PP_NBODY_BRAKET_INDEX_TUPLE
1987#undef BOOST_PP_NBODY_BRAKET_INDEX_LIST
1988#undef BOOST_PP_NBODY_DERIV_ORDER_TUPLE
1989#undef BOOST_PP_NBODY_DERIV_ORDER_LIST
1990#undef BOOST_PP_NBODYENGINE_MCR3
1991#undef BOOST_PP_NBODYENGINE_MCR3_ncenter
1992#undef BOOST_PP_NBODYENGINE_MCR3_default_ncenter
1993#undef BOOST_PP_NBODYENGINE_MCR3_NCENTER
1994#undef BOOST_PP_NBODYENGINE_MCR3_OPER
1995#undef BOOST_PP_NBODYENGINE_MCR3_DERIV
1996#undef BOOST_PP_NBODYENGINE_MCR3_task
1997#undef BOOST_PP_NBODYENGINE_MCR3_TASK
1998#undef BOOST_PP_NBODYENGINE_MCR4
1999#undef BOOST_PP_NBODYENGINE_MCR5
2000#undef BOOST_PP_NBODYENGINE_MCR6
2001#undef BOOST_PP_NBODYENGINE_MCR7
2002
2003#ifdef LIBINT2_DOES_NOT_INLINE_ENGINE
2004template any Engine::enforce_params_type<Engine::empty_pod>(
2005 Operator oper, const Engine::empty_pod& params, bool throw_if_wrong_type);
2006
2007template const Engine::target_ptr_vec& Engine::compute<Shell>(
2008 const Shell& first_shell, const Shell&);
2009
2010template const Engine::target_ptr_vec& Engine::compute<Shell, Shell>(
2011 const Shell& first_shell, const Shell&, const Shell&);
2012
2013template const Engine::target_ptr_vec& Engine::compute<Shell, Shell, Shell>(
2014 const Shell& first_shell, const Shell&, const Shell&, const Shell&);
2015#endif
2016
2017} // namespace libint2
2018
2019#endif /* _libint2_src_lib_libint_engineimpl_h_ */
a partial C++17 std::any implementation (and less efficient than can be)
Definition: any.h:67
Array idential to C++0X arrays.
Definition: stdarray_bits.h:14
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
@ Original
standard screening method:
@ Conservative
conservative screening:
bool initialized()
checks if the libint has been initialized.
Definition: initialize.h:63
__libint2_engine_inline libint2::any default_params(const Operator &oper)
the runtime version of operator_traits<oper>::default_params()
Definition: engine.impl.h:111
void uniform_normalize_cartesian_shells(Real *intset, std::array< std::reference_wrapper< const Shell >, N > shells)
rescales cartesian Gaussians to convert from standard to uniform-normalized convention
Definition: cartesian.h:95
Iterates over all partitions of a non-negative integer into nonnegative integers in reverse lexicog...
Definition: intpart_iter.h:74
generally-contracted Solid-Harmonic/Cartesion Gaussian Shell
Definition: shell.h:81
svector< Contraction > contr
contractions
Definition: shell.h:105
static const Shell & unit()
Definition: shell.h:224