LIBINT 2.9.0
1body.h
1/*
2 * Copyright (C) 2004-2024 Edward F. Valeev
3 *
4 * This file is part of Libint library.
5 *
6 * Libint library is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with Libint library. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21// standard C++ headers
22#include <atomic>
23#include <chrono>
24#include <cmath>
25#include <fstream>
26#include <iomanip>
27#include <iostream>
28#include <iterator>
29#include <memory>
30#include <mutex>
31#include <sstream>
32#include <thread>
33#include <unordered_map>
34#include <vector>
35
36// have BTAS library?
37#ifdef LIBINT2_HAVE_BTAS
38#include <btas/btas.h>
39#else // LIBINT2_HAVE_BTAS
40#error "libint2::lcao requires BTAS"
41#endif
42
43// Libint Gaussian integrals library
44#include <libint2/chemistry/sto3g_atomic_density.h>
45#include <libint2/diis.h>
46#include <libint2/util/intpart_iter.h>
47
48#include <libint2.hpp>
49
50#if defined(_OPENMP)
51#include <omp.h>
52#endif
53
54typedef btas::RangeNd<CblasRowMajor, std::array<long, 2>> Range2;
55typedef btas::RangeNd<CblasRowMajor, std::array<long, 3>> Range3;
56typedef btas::RangeNd<CblasRowMajor, std::array<long, 4>> Range4;
57typedef btas::Tensor<double, Range2> Tensor2d;
58typedef btas::Tensor<double, Range3> Tensor3d;
59typedef btas::Tensor<double, Range3> Tensor4d;
60
61typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
62 Matrix; // import dense, dynamically sized Matrix type from Eigen;
63 // this is a matrix with row-major storage
64 // (http://en.wikipedia.org/wiki/Row-major_order)
65// to meet the layout of the integrals returned by the Libint integral library
66typedef Eigen::DiagonalMatrix<double, Eigen::Dynamic, Eigen::Dynamic>
67 DiagonalMatrix;
68
69using libint2::BasisSet;
70using libint2::BraKet;
71using libint2::Operator;
72using libint2::Shell;
73using libint2::libint2::Atom;
74
75std::vector<libint2::Atom> read_geometry(const std::string& filename);
76Matrix compute_soad(const std::vector<libint2::Atom>& atoms);
77// computes norm of shell-blocks of A
78Matrix compute_shellblock_norm(const BasisSet& obs, const Matrix& A);
79
80template <Operator obtype>
81std::array<Matrix, libint2::operator_traits<obtype>::nopers> compute_1body_ints(
82 const BasisSet& obs,
83 const std::vector<libint2::Atom>& atoms = std::vector<libint2::Atom>());
84
85#if LIBINT2_DERIV_ONEBODY_ORDER
86template <Operator obtype>
87std::vector<Matrix> compute_1body_ints_deriv(
88 unsigned deriv_order, const BasisSet& obs,
89 const std::vector<libint2::Atom>& atoms);
90#endif // LIBINT2_DERIV_ONEBODY_ORDER
91
92template <libint2::Operator Kernel = libint2::Operator::coulomb>
93Matrix compute_schwarz_ints(
94 const BasisSet& bs1, const BasisSet& bs2 = BasisSet(),
95 bool use_2norm = false, // use infty norm by default
96 typename libint2::operator_traits<Kernel>::oper_params_type params =
97 libint2::operator_traits<Kernel>::default_params());
98Matrix compute_do_ints(const BasisSet& bs1, const BasisSet& bs2 = BasisSet(),
99 bool use_2norm = false // use infty norm by default
100);
101
102using shellpair_list_t = std::unordered_map<size_t, std::vector<size_t>>;
103shellpair_list_t obs_shellpair_list; // shellpair list for OBS
104
109shellpair_list_t compute_shellpair_list(const BasisSet& bs1,
110 const BasisSet& bs2 = BasisSet(),
111 double threshold = 1e-12);
112
113Matrix compute_2body_fock(
114 const BasisSet& obs, const Matrix& D,
115 double precision = std::numeric_limits<
116 double>::epsilon(), // discard contributions smaller than this
117 const Matrix& Schwarz = Matrix() // K_ij = sqrt(||(ij|ij)||_\infty); if
118 // empty, do not Schwarz screen
119);
120// an Fock builder that can accept densities expressed a separate basis
121Matrix compute_2body_fock_general(
122 const BasisSet& obs, const Matrix& D, const BasisSet& D_bs,
123 bool D_is_sheldiagonal = false, // set D_is_shelldiagonal if doing SOAD
124 double precision = std::numeric_limits<
125 double>::epsilon() // discard contributions smaller than this
126);
127
128#if LIBINT2_DERIV_ERI_ORDER
129template <unsigned deriv_order>
130std::vector<Matrix> compute_2body_fock_deriv(
131 const BasisSet& obs, const std::vector<libint2::Atom>& atoms,
132 const Matrix& D,
133 double precision = std::numeric_limits<
134 double>::epsilon(), // discard contributions smaller than this
135 const Matrix& Schwarz = Matrix() // K_ij = sqrt(||(ij|ij)||_\infty); if
136 // empty, do not Schwarz screen
137);
138#endif // LIBINT2_DERIV_ERI_ORDER
139
140// returns {X,X^{-1},S_condition_number_after_conditioning}, where
141// X is the generalized square-root-inverse such that X.transpose() * S * X = I
142// columns of Xinv is the basis conditioned such that
143// the condition number of its metric (Xinv.transpose . Xinv) <
144// S_condition_number_threshold
145std::tuple<Matrix, Matrix, double> conditioning_orthogonalizer(
146 const Matrix& S, double S_condition_number_threshold);
147
148#ifdef LIBINT2_HAVE_BTAS
149#define HAVE_DENSITY_FITTING 1
151 const BasisSet& obs;
152 const BasisSet& dfbs;
153 DFFockEngine(const BasisSet& _obs, const BasisSet& _dfbs)
154 : obs(_obs), dfbs(_dfbs) {}
155
156 typedef btas::RangeNd<CblasRowMajor, std::array<long, 3>> Range3d;
157 typedef btas::Tensor<double, Range3d> Tensor3d;
158 Tensor3d xyK;
159
160 // a DF-based builder, using coefficients of occupied MOs
161 Matrix compute_2body_fock_dfC(const Matrix& Cocc);
162};
163#endif // HAVE_DENSITY_FITTING
164
165namespace libint2 {
166int nthreads;
167
169template <typename Lambda>
170void parallel_do(Lambda& lambda) {
171#ifdef _OPENMP
172#pragma omp parallel
173 {
174 auto thread_id = omp_get_thread_num();
175 lambda(thread_id);
176 }
177#else // use C++11 threads
178 std::vector<std::thread> threads;
179 for (int thread_id = 0; thread_id != libint2::nthreads; ++thread_id) {
180 if (thread_id != nthreads - 1)
181 threads.push_back(std::thread(lambda, thread_id));
182 else
183 lambda(thread_id);
184 } // threads_id
185 for (int thread_id = 0; thread_id < nthreads - 1; ++thread_id)
186 threads[thread_id].join();
187#endif
188}
189} // namespace libint2
190
191int main(int argc, char* argv[]) {
192 using std::cerr;
193 using std::cout;
194 using std::endl;
195
196 try {
197 /*** =========================== ***/
198 /*** initialize molecule ***/
199 /*** =========================== ***/
200
201 // read geometry from a file; by default read from h2o.xyz, else take
202 // filename (.xyz) from the command line
203 const auto filename = (argc > 1) ? argv[1] : "h2o.xyz";
204 const auto basisname = (argc > 2) ? argv[2] : "aug-cc-pVDZ";
205 bool do_density_fitting = false;
206#ifdef HAVE_DENSITY_FITTING
207 do_density_fitting = (argc > 3);
208 const auto dfbasisname = do_density_fitting ? argv[3] : "";
209#endif
210 std::vector<libint2::Atom> atoms = read_geometry(filename);
211
212 // set up thread pool
213 {
214 using libint2::nthreads;
215 auto nthreads_cstr = getenv("LIBINT_NUM_THREADS");
216 nthreads = 1;
217 if (nthreads_cstr && strcmp(nthreads_cstr, "")) {
218 std::istringstream iss(nthreads_cstr);
219 iss >> nthreads;
220 if (nthreads > 1 << 16 || nthreads <= 0) nthreads = 1;
221 }
222#if defined(_OPENMP)
223 omp_set_num_threads(nthreads);
224#endif
225 std::cout << "Will scale over " << nthreads
226#if defined(_OPENMP)
227 << " OpenMP"
228#else
229 << " C++11"
230#endif
231 << " threads" << std::endl;
232 }
233
234 // count the number of electrons
235 auto nelectron = 0;
236 for (auto i = 0; i < atoms.size(); ++i) nelectron += atoms[i].atomic_number;
237 const auto ndocc = nelectron / 2;
238 cout << "# of electrons = " << nelectron << endl;
239
240 // compute the nuclear repulsion energy
241 auto enuc = 0.0;
242 for (auto i = 0; i < atoms.size(); i++)
243 for (auto j = i + 1; j < atoms.size(); j++) {
244 auto xij = atoms[i].x - atoms[j].x;
245 auto yij = atoms[i].y - atoms[j].y;
246 auto zij = atoms[i].z - atoms[j].z;
247 auto r2 = xij * xij + yij * yij + zij * zij;
248 auto r = sqrt(r2);
249 enuc += atoms[i].atomic_number * atoms[j].atomic_number / r;
250 }
251 cout << "Nuclear repulsion energy = " << std::setprecision(15) << enuc
252 << endl;
253
255
256 cout << "libint2::Atomic Cartesian coordinates (a.u.):" << endl;
257 for (const auto& a : atoms)
258 std::cout << a.atomic_number << " " << a.x << " " << a.y << " " << a.z
259 << std::endl;
260
261 BasisSet obs(basisname, atoms);
262 cout << "orbital basis set rank = " << obs.nbf() << endl;
263
264#ifdef HAVE_DENSITY_FITTING
265 BasisSet dfbs;
266 if (do_density_fitting) {
267 dfbs = BasisSet(dfbasisname, atoms);
268 cout << "density-fitting basis set rank = " << dfbs.nbf() << endl;
269 }
270#endif // HAVE_DENSITY_FITTING
271
272 /*** =========================== ***/
273 /*** compute 1-e integrals ***/
274 /*** =========================== ***/
275
276 // initializes the Libint integrals library ... now ready to compute
278
279 // compute OBS non-negligible shell-pair list
280 {
281 obs_shellpair_list = compute_shellpair_list(obs);
282 size_t nsp = 0;
283 for (auto& sp : obs_shellpair_list) {
284 nsp += sp.second.size();
285 }
286 std::cout << "# of {all,non-negligible} shell-pairs = {"
287 << obs.size() * (obs.size() + 1) / 2 << "," << nsp << "}"
288 << std::endl;
289 }
290
291 // compute one-body integrals
292 auto S = compute_1body_ints<Operator::overlap>(obs)[0];
293 auto T = compute_1body_ints<Operator::kinetic>(obs)[0];
294 auto V = compute_1body_ints<Operator::nuclear>(
295 obs, libint2::make_point_charges(atoms))[0];
296 Matrix H = T + V;
297 T.resize(0, 0);
298 V.resize(0, 0);
299
300 // compute orthogonalizer X such that X.transpose() . S . X = I
301 Matrix X, Xinv;
302 double XtX_condition_number; // condition number of "re-conditioned"
303 // overlap obtained as Xinv.transpose() . Xinv
304 // one should think of columns of Xinv as the conditioned basis
305 // Re: name ... cond # (Xinv.transpose() . Xinv) = cond # (X.transpose() .
306 // X)
307 // by default assume can manage to compute with condition number of S <=
308 // 1/eps
309 // this is probably too optimistic, but in well-behaved cases even 10^11 is
310 // OK
311 double S_condition_number_threshold =
312 1.0 / std::numeric_limits<double>::epsilon();
313 std::tie(X, Xinv, XtX_condition_number) =
314 conditioning_orthogonalizer(S, S_condition_number_threshold);
315
316 Matrix D;
317 Matrix C_occ;
318 Matrix evals;
319 { // use SOAD as the guess density
320 const auto tstart = std::chrono::high_resolution_clock::now();
321
322 auto D_minbs = compute_soad(atoms); // compute guess in minimal basis
323 BasisSet minbs("STO-3G", atoms);
324 if (minbs == obs)
325 D = D_minbs;
326 else { // if basis != minimal basis, map non-representable SOAD guess
327 // into the AO basis
328 // by diagonalizing a Fock matrix
329 std::cout << "projecting SOAD into AO basis ... ";
330 auto F = H;
331 F += compute_2body_fock_general(
332 obs, D_minbs, minbs, true /* SOAD_D_is_shelldiagonal */,
333 std::numeric_limits<double>::epsilon() // this is cheap, no reason
334 // to be cheaper
335 );
336
337 // solve F C = e S C by (conditioned) transformation to F' C' = e C',
338 // where
339 // F' = X.transpose() . F . X; the original C is obtained as C = X . C'
340 Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(X.transpose() * F * X);
341 auto C = X * eig_solver.eigenvectors();
342
343 // compute density, D = C(occ) . C(occ)T
344 C_occ = C.leftCols(ndocc);
345 D = C_occ * C_occ.transpose();
346
347 const auto tstop = std::chrono::high_resolution_clock::now();
348 const std::chrono::duration<double> time_elapsed = tstop - tstart;
349 std::cout << "done (" << time_elapsed.count() << " s)" << std::endl;
350 }
351 }
352
353 // pre-compute data for Schwarz bounds
354 auto K = compute_schwarz_ints<>(obs, BasisSet(), true);
355
356// prepare for density fitting
357#ifdef HAVE_DENSITY_FITTING
358 std::unique_ptr<DFFockEngine> dffockengine(
359 do_density_fitting ? new DFFockEngine(obs, dfbs) : nullptr);
360#endif // HAVE_DENSITY_FITTING
361
362 /*** =========================== ***/
363 /*** SCF loop ***/
364 /*** =========================== ***/
365
366 const auto maxiter = 100;
367 const auto conv = 1e-12;
368 auto iter = 0;
369 auto rms_error = 1.0;
370 auto ediff_rel = 0.0;
371 auto ehf = 0.0;
372 auto n2 = D.cols() * D.rows();
373 libint2::DIIS<Matrix> diis(2); // start DIIS on second iteration
374
375 // prepare for incremental Fock build ...
376 Matrix D_diff = D;
377 Matrix F = H;
378 bool reset_incremental_fock_formation = false;
379 bool incremental_Fbuild_started = false;
380 double start_incremental_F_threshold = 1e-5;
381 double next_reset_threshold = 0.0;
382 size_t last_reset_iteration = 0;
383 // ... unless doing DF, then use MO coefficients, hence not "incremental"
384 if (do_density_fitting) start_incremental_F_threshold = 0.0;
385
386 do {
387 const auto tstart = std::chrono::high_resolution_clock::now();
388 ++iter;
389
390 // Last iteration's energy and density
391 auto ehf_last = ehf;
392 Matrix D_last = D;
393
394 if (not incremental_Fbuild_started &&
395 rms_error < start_incremental_F_threshold) {
396 incremental_Fbuild_started = true;
397 reset_incremental_fock_formation = false;
398 last_reset_iteration = iter - 1;
399 next_reset_threshold = rms_error / 1e1;
400 std::cout << "== started incremental fock build" << std::endl;
401 }
402 if (reset_incremental_fock_formation || not incremental_Fbuild_started) {
403 F = H;
404 D_diff = D;
405 }
406 if (reset_incremental_fock_formation && incremental_Fbuild_started) {
407 reset_incremental_fock_formation = false;
408 last_reset_iteration = iter;
409 next_reset_threshold = rms_error / 1e1;
410 std::cout << "== reset incremental fock build" << std::endl;
411 }
412
413 // build a new Fock matrix
414 if (not do_density_fitting) {
415 // totally empirical precision variation, involves the condition number
416 const auto precision_F = std::min(
417 std::min(1e-3 / XtX_condition_number, 1e-7),
418 std::max(rms_error / 1e4, std::numeric_limits<double>::epsilon()));
419 F += compute_2body_fock(obs, D_diff, precision_F, K);
420 }
421#if HAVE_DENSITY_FITTING
422 else { // do DF
423 F = H + dffockengine->compute_2body_fock_dfC(C_occ);
424 }
425#else
426 else {
427 assert(false);
428 } // do_density_fitting is true but HAVE_DENSITY_FITTING is not defined!
429 // should not happen
430#endif // HAVE_DENSITY_FITTING
431
432 // compute HF energy with the non-extrapolated Fock matrix
433 ehf = D.cwiseProduct(H + F).sum();
434 ediff_rel = std::abs((ehf - ehf_last) / ehf);
435
436 // compute SCF error
437 Matrix FD_comm = F * D * S - S * D * F;
438 rms_error = FD_comm.norm() / n2;
439 if (rms_error < next_reset_threshold || iter - last_reset_iteration >= 8)
440 reset_incremental_fock_formation = true;
441
442 // DIIS extrapolate F
443 Matrix F_diis = F; // extrapolated F cannot be used in incremental Fock
444 // build; only used to produce the density
445 // make a copy of the unextrapolated matrix
446 diis.extrapolate(F_diis, FD_comm);
447
448 // solve F C = e S C by (conditioned) transformation to F' C' = e C',
449 // where
450 // F' = X.transpose() . F . X; the original C is obtained as C = X . C'
451 Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(X.transpose() * F_diis *
452 X);
453 evals = eig_solver.eigenvalues();
454 auto C = X * eig_solver.eigenvectors();
455
456 // compute density, D = C(occ) . C(occ)T
457 C_occ = C.leftCols(ndocc);
458 D = C_occ * C_occ.transpose();
459 D_diff = D - D_last;
460
461 const auto tstop = std::chrono::high_resolution_clock::now();
462 const std::chrono::duration<double> time_elapsed = tstop - tstart;
463
464 if (iter == 1)
465 std::cout << "\n\nIter E(HF) D(E)/E "
466 "RMS([F,D])/nn Time(s)\n";
467 printf(" %02d %20.12f %20.12e %20.12e %10.5lf\n", iter, ehf + enuc,
468 ediff_rel, rms_error, time_elapsed.count());
469
470 } while (((ediff_rel > conv) || (rms_error > conv)) && (iter < maxiter));
471
472 printf("** Hartree-Fock energy = %20.12f\n", ehf + enuc);
473
474 auto Mu = compute_1body_ints<Operator::emultipole2>(obs);
475 std::array<double, 3> mu;
476 std::array<double, 6> qu;
477 for (int xyz = 0; xyz != 3; ++xyz)
478 mu[xyz] = -2 * D.cwiseProduct(Mu[xyz + 1])
479 .sum(); // 2 = alpha + beta, -1 = electron charge
480 for (int k = 0; k != 6; ++k)
481 qu[k] = -2 * D.cwiseProduct(Mu[k + 4])
482 .sum(); // 2 = alpha + beta, -1 = electron charge
483 std::cout << "** edipole = ";
484 std::copy(mu.begin(), mu.end(),
485 std::ostream_iterator<double>(std::cout, " "));
486 std::cout << std::endl;
487 std::cout << "** equadrupole = ";
488 std::copy(qu.begin(), qu.end(),
489 std::ostream_iterator<double>(std::cout, " "));
490 std::cout << std::endl;
491
492 { // compute force
493#if LIBINT2_DERIV_ONEBODY_ORDER
494 // compute 1-e forces
495 Matrix F1 = Matrix::Zero(atoms.size(), 3);
496 Matrix F_Pulay = Matrix::Zero(atoms.size(), 3);
498 // one-body contributions to the forces
500 auto T1 = compute_1body_ints_deriv<Operator::kinetic>(1, obs, atoms);
501 auto V1 = compute_1body_ints_deriv<Operator::nuclear>(1, obs, atoms);
502 for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
503 for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
504 auto force = 2 * (T1[i] + V1[i]).cwiseProduct(D).sum();
505 F1(atom, xyz) += force;
506 }
507 }
508
510 // Pulay force
512 // orbital energy density
513 DiagonalMatrix evals_occ(evals.topRows(ndocc));
514 Matrix W = C_occ * evals_occ * C_occ.transpose();
515 auto S1 = compute_1body_ints_deriv<Operator::overlap>(1, obs, atoms);
516 for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
517 for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
518 auto force = 2 * S1[i].cwiseProduct(W).sum();
519 F_Pulay(atom, xyz) -= force;
520 }
521 }
522
523 std::cout << "** 1-body forces = ";
524 for (int atom = 0; atom != atoms.size(); ++atom)
525 for (int xyz = 0; xyz != 3; ++xyz) std::cout << F1(atom, xyz) << " ";
526 std::cout << std::endl;
527 std::cout << "** Pulay forces = ";
528 for (int atom = 0; atom != atoms.size(); ++atom)
529 for (int xyz = 0; xyz != 3; ++xyz)
530 std::cout << F_Pulay(atom, xyz) << " ";
531 std::cout << std::endl;
532#endif // LIBINT2_DERIV_ONEBODY_ORDER
533
534#if LIBINT2_DERIV_ERI_ORDER
535 // compute 2-e forces
536 Matrix F2 = Matrix::Zero(atoms.size(), 3);
537
539 // two-body contributions to the forces
541 auto G1 = compute_2body_fock_deriv<1>(obs, atoms, D);
542 for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
543 for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
544 // identity prefactor since E(HF) = trace(H + F, D) = trace(2H + G, D)
545 auto force = G1[i].cwiseProduct(D).sum();
546 F2(atom, xyz) += force;
547 }
548 }
549
550 std::cout << "** 2-body forces = ";
551 for (int atom = 0; atom != atoms.size(); ++atom)
552 for (int xyz = 0; xyz != 3; ++xyz) std::cout << F2(atom, xyz) << " ";
553 std::cout << std::endl;
554#endif
555
556// if support 1-e and 2-e derivatives compute nuclear repulsion force and the
557// total force
558#if LIBINT2_DERIV_ONEBODY_ORDER && LIBINT2_DERIV_ERI_ORDER
559 // compute nuclear repulsion forces
560 Matrix FN = Matrix::Zero(atoms.size(), 3);
562 // nuclear repulsion contribution to the forces
564 for (auto a1 = 1; a1 != atoms.size(); ++a1) {
565 const auto& atom1 = atoms[a1];
566 for (auto a2 = 0; a2 < a1; ++a2) {
567 const auto& atom2 = atoms[a2];
568
569 auto x12 = atom1.x - atom2.x;
570 auto y12 = atom1.y - atom2.y;
571 auto z12 = atom1.z - atom2.z;
572 auto r12_2 = x12 * x12 + y12 * y12 + z12 * z12;
573 auto r12 = sqrt(r12_2);
574 auto r12_3 = r12 * r12_2;
575
576 auto z1z2_over_r12_3 =
577 atom1.atomic_number * atom2.atomic_number / r12_3;
578
579 auto fx = -x12 * z1z2_over_r12_3;
580 auto fy = -y12 * z1z2_over_r12_3;
581 auto fz = -z12 * z1z2_over_r12_3;
582 FN(a1, 0) += fx;
583 FN(a1, 1) += fy;
584 FN(a1, 2) += fz;
585 FN(a2, 0) -= fx;
586 FN(a2, 1) -= fy;
587 FN(a2, 2) -= fz;
588 }
589 }
590
591 std::cout << "** nuclear repulsion forces = ";
592 for (int atom = 0; atom != atoms.size(); ++atom)
593 for (int xyz = 0; xyz != 3; ++xyz) std::cout << FN(atom, xyz) << " ";
594 std::cout << std::endl;
595
596 auto F = F1 + F_Pulay + F2 + FN;
597 std::cout << "** Hartree-Fock forces = ";
598 for (int atom = 0; atom != atoms.size(); ++atom)
599 for (int xyz = 0; xyz != 3; ++xyz) std::cout << F(atom, xyz) << " ";
600 std::cout << std::endl;
601#endif
602 }
603
604 { // compute hessian
605 const auto ncoords = 3 * atoms.size();
606 // # of elems in upper triangle
607 const auto nelem = ncoords * (ncoords + 1) / 2;
608#if LIBINT2_DERIV_ONEBODY_ORDER > 1
609 // compute 1-e hessian
610 Matrix H1 = Matrix::Zero(ncoords, ncoords);
611 Matrix H_Pulay = Matrix::Zero(ncoords, ncoords);
613 // one-body contributions to the hessian
615 auto T2 = compute_1body_ints_deriv<Operator::kinetic>(2, obs, atoms);
616 auto V2 = compute_1body_ints_deriv<Operator::nuclear>(2, obs, atoms);
617 for (auto row = 0, i = 0; row != ncoords; ++row) {
618 for (auto col = row; col != ncoords; ++col, ++i) {
619 auto hess = 2 * (T2[i] + V2[i]).cwiseProduct(D).sum();
620 H1(row, col) += hess;
621 }
622 }
623
625 // Pulay hessian
627 // orbital energy density
628 DiagonalMatrix evals_occ(evals.topRows(ndocc));
629 Matrix W = C_occ * evals_occ * C_occ.transpose();
630 auto S2 = compute_1body_ints_deriv<Operator::overlap>(2, obs, atoms);
631 for (auto row = 0, i = 0; row != ncoords; ++row) {
632 for (auto col = row; col != ncoords; ++col, ++i) {
633 auto hess = 2 * S2[i].cwiseProduct(W).sum();
634 H_Pulay(row, col) -= hess;
635 }
636 }
637
638 std::cout << "** 1-body hessian = ";
639 for (auto row = 0, i = 0; row != ncoords; ++row) {
640 for (auto col = row; col != ncoords; ++col) {
641 std::cout << H1(row, col) << " ";
642 }
643 }
644 std::cout << std::endl;
645
646 std::cout << "** Pulay hessian = ";
647 for (auto row = 0, i = 0; row != ncoords; ++row) {
648 for (auto col = row; col != ncoords; ++col) {
649 std::cout << H_Pulay(row, col) << " ";
650 }
651 }
652 std::cout << std::endl;
653#endif // LIBINT2_DERIV_ONEBODY_ORDER > 1
654
655#if LIBINT2_DERIV_ERI_ORDER > 1
656 // compute 2-e forces
657 Matrix H2 = Matrix::Zero(ncoords, ncoords);
658
660 // two-body contributions to the forces
662 auto G2 = compute_2body_fock_deriv<2>(obs, atoms, D);
663 for (auto row = 0, i = 0; row != ncoords; ++row) {
664 for (auto col = row; col != ncoords; ++col, ++i) {
665 // identity prefactor since E(HF) = trace(H + F, D) = trace(2H + G, D)
666 auto hess = G2[i].cwiseProduct(D).sum();
667 H2(row, col) += hess;
668 }
669 }
670
671 std::cout << "** 2-body hessian = ";
672 for (auto row = 0, i = 0; row != ncoords; ++row) {
673 for (auto col = row; col != ncoords; ++col) {
674 std::cout << H2(row, col) << " ";
675 }
676 }
677 std::cout << std::endl;
678#endif
679
680// if support 1-e and 2-e 2nd derivatives compute nuclear repulsion hessian and
681// the total hessian
682#if LIBINT2_DERIV_ONEBODY_ORDER > 1 && LIBINT2_DERIV_ERI_ORDER > 1
683 // compute nuclear repulsion hessian
684 // NB only the upper triangle is computed!!!
685 Matrix HN = Matrix::Zero(ncoords, ncoords);
686
688 // nuclear repulsion contribution to the hessian
690 for (auto a1 = 1; a1 != atoms.size(); ++a1) {
691 const auto& atom1 = atoms[a1];
692 for (auto a2 = 0; a2 < a1; ++a2) {
693 const auto& atom2 = atoms[a2];
694
695 auto x12 = atom1.x - atom2.x;
696 auto y12 = atom1.y - atom2.y;
697 auto z12 = atom1.z - atom2.z;
698 auto x12_2 = x12 * x12;
699 auto y12_2 = y12 * y12;
700 auto z12_2 = z12 * z12;
701 auto r12_2 = x12 * x12 + y12 * y12 + z12 * z12;
702 auto r12 = sqrt(r12_2);
703 auto r12_5 = r12 * r12_2 * r12_2;
704
705 auto z1z2_over_r12_5 =
706 atom1.atomic_number * atom2.atomic_number / r12_5;
707
708 HN(3 * a1 + 0, 3 * a1 + 0) += z1z2_over_r12_5 * (3 * x12_2 - r12_2);
709 HN(3 * a1 + 1, 3 * a1 + 1) += z1z2_over_r12_5 * (3 * y12_2 - r12_2);
710 HN(3 * a1 + 2, 3 * a1 + 2) += z1z2_over_r12_5 * (3 * z12_2 - r12_2);
711 HN(3 * a1 + 0, 3 * a1 + 1) += z1z2_over_r12_5 * (3 * x12 * y12);
712 HN(3 * a1 + 0, 3 * a1 + 2) += z1z2_over_r12_5 * (3 * x12 * z12);
713 HN(3 * a1 + 1, 3 * a1 + 2) += z1z2_over_r12_5 * (3 * y12 * z12);
714
715 HN(3 * a2 + 0, 3 * a2 + 0) += z1z2_over_r12_5 * (3 * x12_2 - r12_2);
716 HN(3 * a2 + 1, 3 * a2 + 1) += z1z2_over_r12_5 * (3 * y12_2 - r12_2);
717 HN(3 * a2 + 2, 3 * a2 + 2) += z1z2_over_r12_5 * (3 * z12_2 - r12_2);
718 HN(3 * a2 + 0, 3 * a2 + 1) += z1z2_over_r12_5 * (3 * x12 * y12);
719 HN(3 * a2 + 0, 3 * a2 + 2) += z1z2_over_r12_5 * (3 * x12 * z12);
720 HN(3 * a2 + 1, 3 * a2 + 2) += z1z2_over_r12_5 * (3 * y12 * z12);
721
722 HN(3 * a2 + 0, 3 * a1 + 0) -= z1z2_over_r12_5 * (3 * x12_2 - r12_2);
723 HN(3 * a2 + 1, 3 * a1 + 1) -= z1z2_over_r12_5 * (3 * y12_2 - r12_2);
724 HN(3 * a2 + 2, 3 * a1 + 2) -= z1z2_over_r12_5 * (3 * z12_2 - r12_2);
725 HN(3 * a2 + 1, 3 * a1 + 0) -= z1z2_over_r12_5 * (3 * y12 * x12);
726 HN(3 * a2 + 2, 3 * a1 + 0) -= z1z2_over_r12_5 * (3 * z12 * x12);
727 HN(3 * a2 + 2, 3 * a1 + 1) -= z1z2_over_r12_5 * (3 * z12 * y12);
728 HN(3 * a2 + 0, 3 * a1 + 1) -= z1z2_over_r12_5 * (3 * x12 * y12);
729 HN(3 * a2 + 0, 3 * a1 + 2) -= z1z2_over_r12_5 * (3 * x12 * z12);
730 HN(3 * a2 + 1, 3 * a1 + 2) -= z1z2_over_r12_5 * (3 * y12 * z12);
731 }
732 }
733
734 std::cout << "** nuclear repulsion hessian = ";
735 for (auto row = 0, i = 0; row != ncoords; ++row) {
736 for (auto col = row; col != ncoords; ++col) {
737 std::cout << HN(row, col) << " ";
738 }
739 }
740 std::cout << std::endl;
741
742 auto H = H1 + H_Pulay + H2 + HN;
743 std::cout << "** Hartree-Fock hessian = ";
744 for (auto row = 0, i = 0; row != ncoords; ++row) {
745 for (auto col = row; col != ncoords; ++col) {
746 std::cout << H(row, col) << " ";
747 }
748 }
749 std::cout << std::endl;
750#endif
751 }
752
753 libint2::finalize(); // done with libint
754
755 } // end of try block; if any exceptions occurred, report them and exit
756 // cleanly
757
758 catch (const char* ex) {
759 cerr << "caught exception: " << ex << endl;
760 return 1;
761 } catch (std::string& ex) {
762 cerr << "caught exception: " << ex << endl;
763 return 1;
764 } catch (std::exception& ex) {
765 cerr << ex.what() << endl;
766 return 1;
767 } catch (...) {
768 cerr << "caught unknown exception\n";
769 return 1;
770 }
771
772 return 0;
773}
774
775std::vector<libint2::Atom> read_geometry(const std::string& filename) {
776 std::cout << "Will read geometry from " << filename << std::endl;
777 std::ifstream is(filename);
778 if (not is.good()) {
779 char errmsg[256] = "Could not open file ";
780 strncpy(errmsg + 20, filename.c_str(), 235);
781 errmsg[255] = '\0';
782 throw std::ios_base::failure(errmsg);
783 }
784
785 // to prepare for MPI parallelization, we will read the entire file into a
786 // string that can be
787 // broadcast to everyone, then converted to an std::istringstream object that
788 // can be used just like std::ifstream
789 std::ostringstream oss;
790 oss << is.rdbuf();
791 // use ss.str() to get the entire contents of the file as an std::string
792 // broadcast
793 // then make an std::istringstream in each process
794 std::istringstream iss(oss.str());
795
796 // check the extension: if .xyz, assume the standard XYZ format, otherwise
797 // throw an exception
798 if (filename.rfind(".xyz") != std::string::npos)
799 return libint2::read_dotxyz(iss);
800 else
801 throw std::invalid_argument{"only .xyz files are accepted"};
802}
803
804// computes Superposition-Of-libint2::Atomic-Densities guess for the molecular
805// density matrix in minimal basis; occupies subshells by smearing electrons
806// evenly over the orbitals
807Matrix compute_soad(const std::vector<libint2::Atom>& atoms) {
808 // compute number of atomic orbitals
809 size_t nao = 0;
810 for (const auto& atom : atoms) {
811 const auto Z = atom.atomic_number;
812 nao += libint2::sto3g_num_ao(Z);
813 }
814
815 // compute the minimal basis density
816 Matrix D = Matrix::Zero(nao, nao);
817 size_t ao_offset = 0; // first AO of this atom
818 for (const auto& atom : atoms) {
819 const auto Z = atom.atomic_number;
820 const auto& occvec = libint2::sto3g_ao_occupation_vector(Z);
821 for (const auto& occ : occvec) {
822 D(ao_offset, ao_offset) = occ;
823 ++ao_offset;
824 }
825 }
826
827 return D * 0.5; // we use densities normalized to # of electrons/2
828}
829
830Matrix compute_shellblock_norm(const BasisSet& obs, const Matrix& A) {
831 const auto nsh = obs.size();
832 Matrix Ash(nsh, nsh);
833
834 auto shell2bf = obs.shell2bf();
835 for (size_t s1 = 0; s1 != nsh; ++s1) {
836 const auto& s1_first = shell2bf[s1];
837 const auto& s1_size = obs[s1].size();
838 for (size_t s2 = 0; s2 != nsh; ++s2) {
839 const auto& s2_first = shell2bf[s2];
840 const auto& s2_size = obs[s2].size();
841
842 Ash(s1, s2) = A.block(s1_first, s2_first, s1_size, s2_size)
843 .lpNorm<Eigen::Infinity>();
844 }
845 }
846
847 return Ash;
848}
849
850template <Operator obtype, typename OperatorParams>
851std::array<Matrix, libint2::operator_traits<obtype>::nopers> compute_1body_ints(
852 const BasisSet& obs, OperatorParams params) {
853 const auto n = obs.nbf();
854 const auto nshells = obs.size();
855 using libint2::nthreads;
856 typedef std::array<Matrix, libint2::operator_traits<obtype>::nopers>
857 result_type;
858 const unsigned int nopers = libint2::operator_traits<obtype>::nopers;
859 result_type result;
860 for (auto& r : result) r = Matrix::Zero(n, n);
861
862 // construct the 1-body integrals engine
863 std::vector<libint2::Engine> engines(nthreads);
864 engines[0] = libint2::Engine(obtype, obs.max_nprim(), obs.max_l(), 0);
865 // nuclear attraction ints engine needs to know where the charges sit ...
866 // the nuclei are charges in this case; in QM/MM there will also be classical
867 // charges
868 if (obtype == Operator::nuclear || obtype == Operator::erf_nuclear ||
869 obtype == Operator::erfc_nuclear) {
870 engines[0].set_params(params);
871 }
872 for (size_t i = 1; i != nthreads; ++i) {
873 engines[i] = engines[0];
874 }
875
876 auto shell2bf = obs.shell2bf();
877
878 auto compute = [&](int thread_id) {
879 const auto& buf = engines[thread_id].results();
880
881 // loop over unique shell pairs, {s1,s2} such that s1 >= s2
882 // this is due to the permutational symmetry of the real integrals over
883 // Hermitian operators: (1|2) = (2|1)
884 for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
885 auto bf1 = shell2bf[s1]; // first basis function in this shell
886 auto n1 = obs[s1].size();
887 auto s1_offset = s1 * (s1 + 1) / 2;
888
889 for (auto s2 : obs_shellpair_list[s1]) {
890 auto s12 = s1_offset + s2;
891 if (s12 % nthreads != thread_id) continue;
892
893 auto bf2 = shell2bf[s2];
894 auto n2 = obs[s2].size();
895
896 auto n12 = n1 * n2;
897
898 // compute shell pair; return is the pointer to the buffer
899 engines[thread_id].compute(obs[s1], obs[s2]);
900
901 for (unsigned int op = 0; op != nopers; ++op) {
902 // "map" buffer to a const Eigen Matrix, and copy it to the
903 // corresponding blocks of the result
904 Eigen::Map<const Matrix> buf_mat(buf[op], n1, n2);
905 result[op].block(bf1, bf2, n1, n2) = buf_mat;
906 if (s1 != s2) // if s1 >= s2, copy {s1,s2} to the corresponding
907 // {s2,s1} block, note the transpose!
908 result[op].block(bf2, bf1, n2, n1) = buf_mat.transpose();
909 }
910 }
911 }
912 }; // compute lambda
913
914 libint2::parallel_do(compute);
915
916 return result;
917}
918
919#if LIBINT2_DERIV_ONEBODY_ORDER
920template <Operator obtype>
921std::vector<Matrix> compute_1body_ints_deriv(
922 unsigned deriv_order, const BasisSet& obs,
923 const std::vector<libint2::Atom>& atoms) {
924 using libint2::nthreads;
925 const auto n = obs.nbf();
926 const auto nshells = obs.size();
927 constexpr auto nopers = libint2::operator_traits<obtype>::nopers;
928 const auto nresults =
929 nopers * libint2::num_geometrical_derivatives(atoms.size(), deriv_order);
930 typedef std::vector<Matrix> result_type;
931 result_type result(nresults);
932 for (auto& r : result) r = Matrix::Zero(n, n);
933
934 // construct the 1-body integrals engine
935 std::vector<libint2::Engine> engines(nthreads);
936 engines[0] =
937 libint2::Engine(obtype, obs.max_nprim(), obs.max_l(), deriv_order);
938 // nuclear attraction ints engine needs to know where the charges sit ...
939 // the nuclei are charges in this case; in QM/MM there will also be classical
940 // charges
941 if (obtype == Operator::nuclear) {
942 std::vector<std::pair<double, std::array<double, 3>>> q;
943 for (const auto& atom : atoms) {
944 q.push_back({static_cast<double>(atom.atomic_number),
945 {{atom.x, atom.y, atom.z}}});
946 }
947 engines[0].set_params(q);
948 }
949 for (size_t i = 1; i != nthreads; ++i) {
950 engines[i] = engines[0];
951 }
952
953 auto shell2bf = obs.shell2bf();
954 auto shell2atom = obs.shell2atom(atoms);
955
956 const auto natoms = atoms.size();
957 const auto two_times_ncoords = 6 * natoms;
958 const auto nderivcenters_shset =
959 2 + ((obtype == Operator::nuclear) ? natoms : 0);
960
961 auto compute = [&](int thread_id) {
962 const auto& buf = engines[thread_id].results();
963
964 // loop over unique shell pairs, {s1,s2} such that s1 >= s2
965 // this is due to the permutational symmetry of the real integrals over
966 // Hermitian operators: (1|2) = (2|1)
967 for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
968 auto bf1 = shell2bf[s1]; // first basis function in this shell
969 auto n1 = obs[s1].size();
970 auto atom1 = shell2atom[s1];
971 assert(atom1 != -1);
972
973 auto s1_offset = s1 * (s1 + 1) / 2;
974 for (auto s2 : obs_shellpair_list[s1]) {
975 auto s12 = s1_offset + s2;
976 if (s12 % nthreads != thread_id) continue;
977
978 auto bf2 = shell2bf[s2];
979 auto n2 = obs[s2].size();
980 auto atom2 = shell2atom[s2];
981
982 auto n12 = n1 * n2;
983
984 // compute shell pair; return is the pointer to the buffer
985 engines[thread_id].compute(obs[s1], obs[s2]);
986
987 // "copy" lambda copies shell set \c idx to the operator matrix with
988 // index \c op
989 auto add_shellset_to_dest = [&](std::size_t op, std::size_t idx,
990 double scale = 1.0) {
991 // "map" buffer to a const Eigen Matrix, and copy it to the
992 // corresponding blocks of the result
993 Eigen::Map<const Matrix> buf_mat(buf[idx], n1, n2);
994 if (scale == 1.0)
995 result[op].block(bf1, bf2, n1, n2) += buf_mat;
996 else
997 result[op].block(bf1, bf2, n1, n2) += scale * buf_mat;
998 if (s1 != s2) { // if s1 >= s2, copy {s1,s2} to the corresponding
999 // {s2,s1} block, note the transpose!
1000 if (scale == 1.0)
1001 result[op].block(bf2, bf1, n2, n1) += buf_mat.transpose();
1002 else
1003 result[op].block(bf2, bf1, n2, n1) += scale * buf_mat.transpose();
1004 }
1005 };
1006
1007 switch (deriv_order) {
1008 case 0:
1009 for (std::size_t op = 0; op != nopers; ++op) {
1010 add_shellset_to_dest(op, op);
1011 }
1012 break;
1013
1014 // map deriv quanta for this shell pair to the overall deriv quanta
1015 //
1016 // easiest to explain with example:
1017 // in sto-3g water shells 0 1 2 sit on atom 0, shells 3 and 4 on
1018 // atoms 1 and 2 respectively each call to engine::compute for
1019 // nuclear ints will return derivatives with respect to 15
1020 // coordinates, obtained as 3 (x,y,z) times 2 + 3 = 5 centers (2
1021 // centers on which shells sit + 3 nuclear charges) (for overlap,
1022 // kinetic, and emultipole ints we there are only 6 coordinates
1023 // since the operator is coordinate-independent, or derivatives
1024 // with respect to the operator coordinates are not computed)
1025 //
1026
1027 case 1: {
1028 std::size_t shellset_idx = 0;
1029 for (auto c = 0; c != nderivcenters_shset; ++c) {
1030 auto atom = (c == 0) ? atom1 : ((c == 1) ? atom2 : c - 2);
1031 auto op_start = 3 * atom * nopers;
1032 auto op_fence = op_start + nopers;
1033 for (auto xyz = 0; xyz != 3;
1034 ++xyz, op_start += nopers, op_fence += nopers) {
1035 for (unsigned int op = op_start; op != op_fence;
1036 ++op, ++shellset_idx) {
1037 add_shellset_to_dest(op, shellset_idx);
1038 }
1039 }
1040 }
1041 } break;
1042
1043 case 2: {
1044 //
1045 // must pay attention to symmetry when computing 2nd and
1046 // higher-order derivs e.g. d2 (s1|s2) / dX dY involves several
1047 // cases:
1048 // 1. only s1 (or only s2) depends on X AND Y (i.e. X and Y refer to
1049 // same atom) =>
1050 // d2 (s1|s2) / dX dY = (d2 s1 / dX dY | s2)
1051 // 2. s1 depends on X only, s2 depends on Y only (or vice versa) =>
1052 // d2 (s1|s2) / dX dY = (d s1 / dX | d s2 / dY)
1053 // 3. s1 AND s2 depend on X AND Y (i.e. X and Y refer to same atom)
1054 // =>
1055 // case A: X != Y
1056 // d2 (s1|s2) / dX dY = (d2 s1 / dX dY | s2) + (d s1 / dX | d s2
1057 // / dY)
1058 // + (d s1 / dY | d s2 / dX) + (s1| d2 s2 / dX dY )
1059 // case B: X == Y
1060 // d2 (s1|s2) / dX2 = (d2 s1 / dX2 | s2) + 2 (d s1 / dX | d s2 /
1061 // dX)
1062 // + (s1| d2 s2 / dX2 )
1063
1064 // computes upper triangle index
1065 // n2 = matrix size times 2
1066 // i,j = (unordered) indices
1067 auto upper_triangle_index = [](size_t n2, long i, long j) {
1068 return std::min(i, j) * (n2 - std::min(i, j) - 1) / 2 +
1069 std::max(i, j);
1070 };
1071
1072 // look over shellsets in the order in which they appear
1073 std::size_t shellset_idx = 0;
1074 for (auto c1 = 0; c1 != nderivcenters_shset; ++c1) {
1075 auto a1 = (c1 == 0) ? atom1 : ((c1 == 1) ? atom2 : c1 - 2);
1076 auto coord1 = 3 * a1;
1077 for (auto xyz1 = 0; xyz1 != 3; ++xyz1, ++coord1) {
1078 for (auto c2 = c1; c2 != nderivcenters_shset; ++c2) {
1079 auto a2 = (c2 == 0) ? atom1 : ((c2 == 1) ? atom2 : c2 - 2);
1080 auto xyz2_start = (c1 == c2) ? xyz1 : 0;
1081 auto coord2 = 3 * a2 + xyz2_start;
1082 for (auto xyz2 = xyz2_start; xyz2 != 3; ++xyz2, ++coord2) {
1083 double scale = (coord1 == coord2 && c1 != c2) ? 2.0 : 1.0;
1084
1085 const auto coord12 =
1086 upper_triangle_index(two_times_ncoords, coord1, coord2);
1087 auto op_start = coord12 * nopers;
1088 auto op_fence = op_start + nopers;
1089 for (auto op = op_start; op != op_fence;
1090 ++op, ++shellset_idx) {
1091 add_shellset_to_dest(op, shellset_idx, scale);
1092 }
1093 }
1094 }
1095 }
1096 }
1097 } break;
1098
1099 default: {
1100 assert(false && "not yet implemented");
1101
1102 using ShellSetDerivIterator =
1104 std::vector<unsigned int>>;
1105 ShellSetDerivIterator shellset_diter(deriv_order,
1106 nderivcenters_shset);
1107 while (shellset_diter) {
1108 const auto& deriv = *shellset_diter;
1109 }
1110 }
1111 } // copy shell block switch
1112
1113 } // s2 <= s1
1114 } // s1
1115 }; // compute lambda
1116
1117 libint2::parallel_do(compute);
1118
1119 return result;
1120}
1121#endif
1122
1123template <libint2::Operator Kernel>
1124Matrix compute_schwarz_ints(
1125 const BasisSet& bs1, const BasisSet& _bs2, bool use_2norm,
1126 typename libint2::operator_traits<Kernel>::oper_params_type params) {
1127 const BasisSet& bs2 = (_bs2.empty() ? bs1 : _bs2);
1128 const auto nsh1 = bs1.size();
1129 const auto nsh2 = bs2.size();
1130 const auto bs1_equiv_bs2 = (&bs1 == &bs2);
1131
1132 Matrix K = Matrix::Zero(nsh1, nsh2);
1133
1134 // construct the 2-electron repulsion integrals engine
1135 using libint2::Engine;
1136 using libint2::nthreads;
1137 std::vector<Engine> engines(nthreads);
1138
1139 // !!! very important: cannot screen primitives in Schwarz computation !!!
1140 auto epsilon = 0.;
1141 engines[0] = Engine(Kernel, std::max(bs1.max_nprim(), bs2.max_nprim()),
1142 std::max(bs1.max_l(), bs2.max_l()), 0, epsilon, params);
1143 for (size_t i = 1; i != nthreads; ++i) {
1144 engines[i] = engines[0];
1145 }
1146
1147 std::cout << "computing Schwarz bound prerequisites (kernel=" << (int)Kernel
1148 << ") ... ";
1149
1150 libint2::Timers<1> timer;
1151 timer.set_now_overhead(25);
1152 timer.start(0);
1153
1154 auto compute = [&](int thread_id) {
1155 const auto& buf = engines[thread_id].results();
1156
1157 // loop over permutationally-unique set of shells
1158 for (auto s1 = 0l, s12 = 0l; s1 != nsh1; ++s1) {
1159 auto n1 = bs1[s1].size(); // number of basis functions in this shell
1160
1161 auto s2_max = bs1_equiv_bs2 ? s1 : nsh2 - 1;
1162 for (auto s2 = 0; s2 <= s2_max; ++s2, ++s12) {
1163 if (s12 % nthreads != thread_id) continue;
1164
1165 auto n2 = bs2[s2].size();
1166 auto n12 = n1 * n2;
1167
1168 engines[thread_id].compute2<Kernel, BraKet::xx_xx, 0>(bs1[s1], bs2[s2],
1169 bs1[s1], bs2[s2]);
1170 assert(buf[0] != nullptr &&
1171 "to compute Schwarz ints turn off primitive screening");
1172
1173 // the diagonal elements are the Schwarz ints ... use Map.diagonal()
1174 Eigen::Map<const Matrix> buf_mat(buf[0], n12, n12);
1175 auto norm2 =
1176 use_2norm ? buf_mat.norm() : buf_mat.lpNorm<Eigen::Infinity>();
1177 K(s1, s2) = std::sqrt(norm2);
1178 if (bs1_equiv_bs2) K(s2, s1) = K(s1, s2);
1179 }
1180 }
1181 }; // thread lambda
1182
1183 libint2::parallel_do(compute);
1184
1185 timer.stop(0);
1186 std::cout << "done (" << timer.read(0) << " s)" << std::endl;
1187
1188 return K;
1189}
1190
1191Matrix compute_do_ints(const BasisSet& bs1, const BasisSet& bs2,
1192 bool use_2norm) {
1193 return compute_schwarz_ints<libint2::Operator::delta>(bs1, bs2, use_2norm);
1194}
1195
1196shellpair_list_t compute_shellpair_list(const BasisSet& bs1,
1197 const BasisSet& _bs2,
1198 const double threshold) {
1199 const BasisSet& bs2 = (_bs2.empty() ? bs1 : _bs2);
1200 const auto nsh1 = bs1.size();
1201 const auto nsh2 = bs2.size();
1202 const auto bs1_equiv_bs2 = (&bs1 == &bs2);
1203
1204 using libint2::nthreads;
1205
1206 // construct the 2-electron repulsion integrals engine
1207 using libint2::Engine;
1208 std::vector<Engine> engines;
1209 engines.reserve(nthreads);
1210 engines.emplace_back(Operator::overlap,
1211 std::max(bs1.max_nprim(), bs2.max_nprim()),
1212 std::max(bs1.max_l(), bs2.max_l()), 0);
1213 for (size_t i = 1; i != nthreads; ++i) {
1214 engines.push_back(engines[0]);
1215 }
1216
1217 std::cout << "computing non-negligible shell-pair list ... ";
1218
1219 libint2::Timers<1> timer;
1220 timer.set_now_overhead(25);
1221 timer.start(0);
1222
1223 shellpair_list_t result;
1224
1225 std::mutex mx;
1226
1227 auto compute = [&](int thread_id) {
1228 auto& engine = engines[thread_id];
1229 const auto& buf = engine.results();
1230
1231 // loop over permutationally-unique set of shells
1232 for (auto s1 = 0l, s12 = 0l; s1 != nsh1; ++s1) {
1233 mx.lock();
1234 if (result.find(s1) == result.end())
1235 result.insert(std::make_pair(s1, std::vector<size_t>()));
1236 mx.unlock();
1237
1238 auto n1 = bs1[s1].size(); // number of basis functions in this shell
1239
1240 auto s2_max = bs1_equiv_bs2 ? s1 : nsh2 - 1;
1241 for (auto s2 = 0; s2 <= s2_max; ++s2, ++s12) {
1242 if (s12 % nthreads != thread_id) continue;
1243
1244 auto on_same_center = (bs1[s1].O == bs2[s2].O);
1245 bool significant = on_same_center;
1246 if (not on_same_center) {
1247 auto n2 = bs2[s2].size();
1248 engines[thread_id].compute(bs1[s1], bs2[s2]);
1249 Eigen::Map<const Matrix> buf_mat(buf[0], n1, n2);
1250 auto norm = buf_mat.norm();
1251 significant = (norm >= threshold);
1252 }
1253
1254 if (significant) {
1255 mx.lock();
1256 result[s1].emplace_back(s2);
1257 mx.unlock();
1258 }
1259 }
1260 }
1261 }; // end of compute
1262
1263 libint2::parallel_do(compute);
1264
1265 // resort shell list in increasing order, i.e. result[s][s1] < result[s][s2]
1266 // if s1 < s2
1267 auto sort = [&](int thread_id) {
1268 for (auto s1 = 0l; s1 != nsh1; ++s1) {
1269 if (s1 % nthreads == thread_id) {
1270 auto& list = result[s1];
1271 std::sort(list.begin(), list.end());
1272 }
1273 }
1274 }; // end of sort
1275
1277
1278 timer.stop(0);
1279 std::cout << "done (" << timer.read(0) << " s)" << std::endl;
1280
1281 return result;
1282}
1283
1284// returns {X,X^{-1},rank,A_condition_number,result_A_condition_number}, where
1285// X is the generalized square-root-inverse such that X.transpose() * A * X = I
1286//
1287// if symmetric is true, produce "symmetric" sqrtinv: X = U . A_evals_sqrtinv .
1288// U.transpose()),
1289// else produce "canonical" sqrtinv: X = U . A_evals_sqrtinv
1290// where U are eigenvectors of A
1291// rows and cols of symmetric X are equivalent; for canonical X the rows are
1292// original basis (AO),
1293// cols are transformed basis ("orthogonal" AO)
1294//
1295// A is conditioned to max_condition_number
1296std::tuple<Matrix, Matrix, size_t, double, double> gensqrtinv(
1297 const Matrix& S, bool symmetric = false,
1298 double max_condition_number = 1e8) {
1299 Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(S);
1300 auto U = eig_solver.eigenvectors();
1301 auto s = eig_solver.eigenvalues();
1302 auto s_max = s.maxCoeff();
1303 auto condition_number = std::min(
1304 s_max / std::max(s.minCoeff(), std::numeric_limits<double>::min()),
1305 1.0 / std::numeric_limits<double>::epsilon());
1306 auto threshold = s_max / max_condition_number;
1307 long n = s.rows();
1308 long n_cond = 0;
1309 for (long i = n - 1; i >= 0; --i) {
1310 if (s(i) >= threshold) {
1311 ++n_cond;
1312 } else
1313 i = 0; // skip rest since eigenvalues are in ascending order
1314 }
1315
1316 auto sigma = s.bottomRows(n_cond);
1317 auto result_condition_number = sigma.maxCoeff() / sigma.minCoeff();
1318 auto sigma_sqrt = sigma.array().sqrt().matrix().asDiagonal();
1319 auto sigma_invsqrt = sigma.array().sqrt().inverse().matrix().asDiagonal();
1320
1321 // make canonical X/Xinv
1322 auto U_cond = U.block(0, n - n_cond, n, n_cond);
1323 Matrix X = U_cond * sigma_invsqrt;
1324 Matrix Xinv = U_cond * sigma_sqrt;
1325 // convert to symmetric, if needed
1326 if (symmetric) {
1327 X = X * U_cond.transpose();
1328 Xinv = Xinv * U_cond.transpose();
1329 }
1330 return std::make_tuple(X, Xinv, size_t(n_cond), condition_number,
1331 result_condition_number);
1332}
1333
1334std::tuple<Matrix, Matrix, double> conditioning_orthogonalizer(
1335 const Matrix& S, double S_condition_number_threshold) {
1336 size_t obs_rank;
1337 double S_condition_number;
1338 double XtX_condition_number;
1339 Matrix X, Xinv;
1340
1341 assert(S.rows() == S.cols());
1342
1343 std::tie(X, Xinv, obs_rank, S_condition_number, XtX_condition_number) =
1344 gensqrtinv(S, false, S_condition_number_threshold);
1345 auto obs_nbf_omitted = (long)S.rows() - (long)obs_rank;
1346 std::cout << "overlap condition number = " << S_condition_number;
1347 if (obs_nbf_omitted > 0)
1348 std::cout << " (dropped " << obs_nbf_omitted << " "
1349 << (obs_nbf_omitted > 1 ? "fns" : "fn") << " to reduce to "
1350 << XtX_condition_number << ")";
1351 std::cout << std::endl;
1352
1353 if (obs_nbf_omitted > 0) {
1354 Matrix should_be_I = X.transpose() * S * X;
1355 Matrix I = Matrix::Identity(should_be_I.rows(), should_be_I.cols());
1356 std::cout << "||X^t * S * X - I||_2 = " << (should_be_I - I).norm()
1357 << " (should be 0)" << std::endl;
1358 }
1359
1360 return std::make_tuple(X, Xinv, XtX_condition_number);
1361}
1362
1363Matrix compute_2body_2index_ints(const BasisSet& bs) {
1364 using libint2::nthreads;
1365 const auto n = bs.nbf();
1366 const auto nshells = bs.size();
1367 Matrix result = Matrix::Zero(n, n);
1368
1369 // build engines for each thread
1370 using libint2::Engine;
1371 std::vector<Engine> engines(nthreads);
1372 engines[0] =
1373 Engine(libint2::Operator::coulomb, bs.max_nprim(), bs.max_l(), 0);
1374 engines[0].set(BraKet::xs_xs);
1375 for (size_t i = 1; i != nthreads; ++i) {
1376 engines[i] = engines[0];
1377 }
1378
1379 auto shell2bf = bs.shell2bf();
1380 auto unitshell = Shell::unit();
1381
1382 auto compute = [&](int thread_id) {
1383 auto& engine = engines[thread_id];
1384 const auto& buf = engine.results();
1385
1386 // loop over unique shell pairs, {s1,s2} such that s1 >= s2
1387 // this is due to the permutational symmetry of the real integrals over
1388 // Hermitian operators: (1|2) = (2|1)
1389 for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
1390 auto bf1 = shell2bf[s1]; // first basis function in this shell
1391 auto n1 = bs[s1].size();
1392
1393 for (auto s2 = 0; s2 <= s1; ++s2, ++s12) {
1394 if (s12 % nthreads != thread_id) continue;
1395
1396 auto bf2 = shell2bf[s2];
1397 auto n2 = bs[s2].size();
1398
1399 // compute shell pair; return is the pointer to the buffer
1400 engine.compute(bs[s1], bs[s2]);
1401 if (buf[0] == nullptr)
1402 continue; // if all integrals screened out, skip to next shell set
1403
1404 // "map" buffer to a const Eigen Matrix, and copy it to the
1405 // corresponding blocks of the result
1406 Eigen::Map<const Matrix> buf_mat(buf[0], n1, n2);
1407 result.block(bf1, bf2, n1, n2) = buf_mat;
1408 if (s1 != s2) // if s1 >= s2, copy {s1,s2} to the corresponding {s2,s1}
1409 // block, note the transpose!
1410 result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
1411 }
1412 }
1413 }; // compute lambda
1414
1415 libint2::parallel_do(compute);
1416
1417 return result;
1418}
1419
1420Matrix compute_2body_fock(const BasisSet& obs, const Matrix& D,
1421 double precision, const Matrix& Schwarz) {
1422 const auto n = obs.nbf();
1423 const auto nshells = obs.size();
1424 using libint2::nthreads;
1425 std::vector<Matrix> G(nthreads, Matrix::Zero(n, n));
1426
1427 const auto do_schwarz_screen = Schwarz.cols() != 0 && Schwarz.rows() != 0;
1428 Matrix D_shblk_norm =
1429 compute_shellblock_norm(obs, D); // matrix of infty-norms of shell blocks
1430
1431 auto fock_precision = precision;
1432 // engine precision controls primitive truncation, assume worst-case scenario
1433 // (all primitive combinations add up constructively)
1434 auto max_nprim = obs.max_nprim();
1435 auto max_nprim4 = max_nprim * max_nprim * max_nprim * max_nprim;
1436 auto engine_precision = std::min(fock_precision / D_shblk_norm.maxCoeff(),
1437 std::numeric_limits<double>::epsilon()) /
1438 max_nprim4;
1439
1440 // construct the 2-electron repulsion integrals engine pool
1441 using libint2::Engine;
1442 std::vector<Engine> engines(nthreads);
1443 engines[0] = Engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 0);
1444 engines[0].set_precision(engine_precision); // shellset-dependent precision
1445 // control will likely break
1446 // positive definiteness
1447 // stick with this simple recipe
1448 std::cout << "compute_2body_fock:precision = " << precision << std::endl;
1449 std::cout << "Engine::precision = " << engines[0].precision() << std::endl;
1450 for (size_t i = 1; i != nthreads; ++i) {
1451 engines[i] = engines[0];
1452 }
1453 std::atomic<size_t> num_ints_computed{0};
1454
1455#if defined(REPORT_INTEGRAL_TIMINGS)
1456 std::vector<libint2::Timers<1>> timers(nthreads);
1457#endif
1458
1459 auto shell2bf = obs.shell2bf();
1460
1461 auto lambda = [&](int thread_id) {
1462 auto& engine = engines[thread_id];
1463 auto& g = G[thread_id];
1464 const auto& buf = engine.results();
1465
1466#if defined(REPORT_INTEGRAL_TIMINGS)
1467 auto& timer = timers[thread_id];
1468 timer.clear();
1469 timer.set_now_overhead(25);
1470#endif
1471
1472 // loop over permutationally-unique set of shells
1473 for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1474 auto bf1_first = shell2bf[s1]; // first basis function in this shell
1475 auto n1 = obs[s1].size(); // number of basis functions in this shell
1476
1477 for (const auto& s2 : obs_shellpair_list[s1]) {
1478 auto bf2_first = shell2bf[s2];
1479 auto n2 = obs[s2].size();
1480
1481 const auto Dnorm12 = do_schwarz_screen ? D_shblk_norm(s1, s2) : 0.;
1482
1483 for (auto s3 = 0; s3 <= s1; ++s3) {
1484 auto bf3_first = shell2bf[s3];
1485 auto n3 = obs[s3].size();
1486
1487 const auto Dnorm123 =
1488 do_schwarz_screen
1489 ? std::max(D_shblk_norm(s1, s3),
1490 std::max(D_shblk_norm(s2, s3), Dnorm12))
1491 : 0.;
1492
1493 const auto s4_max = (s1 == s3) ? s2 : s3;
1494 for (const auto& s4 : obs_shellpair_list[s3]) {
1495 if (s4 > s4_max)
1496 break; // for each s3, s4 are stored in monotonically increasing
1497 // order
1498
1499 if ((s1234++) % nthreads != thread_id) continue;
1500
1501 const auto Dnorm1234 =
1502 do_schwarz_screen
1503 ? std::max(
1504 D_shblk_norm(s1, s4),
1505 std::max(D_shblk_norm(s2, s4),
1506 std::max(D_shblk_norm(s3, s4), Dnorm123)))
1507 : 0.;
1508
1509 if (do_schwarz_screen &&
1510 Dnorm1234 * Schwarz(s1, s2) * Schwarz(s3, s4) < fock_precision)
1511 continue;
1512
1513 auto bf4_first = shell2bf[s4];
1514 auto n4 = obs[s4].size();
1515
1516 num_ints_computed += n1 * n2 * n3 * n4;
1517
1518 // compute the permutational degeneracy (i.e. # of equivalents) of
1519 // the given shell set
1520 auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1521 auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1522 auto s12_34_deg = (s1 == s3) ? (s2 == s4 ? 1.0 : 2.0) : 2.0;
1523 auto s1234_deg = s12_deg * s34_deg * s12_34_deg;
1524
1525#if defined(REPORT_INTEGRAL_TIMINGS)
1526 timer.start(0);
1527#endif
1528
1529 engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1530 obs[s1], obs[s2], obs[s3], obs[s4]);
1531 const auto* buf_1234 = buf[0];
1532 if (buf_1234 == nullptr)
1533 continue; // if all integrals screened out, skip to next quartet
1534
1535#if defined(REPORT_INTEGRAL_TIMINGS)
1536 timer.stop(0);
1537#endif
1538
1539 Eigen::Map<MatrixXd> buf_1234_map(buf_1234, n12, n34);
1540 assert(buf_1234_map.norm() < Schwarz(s1, s2) * Schwarz(s3, s4));
1541
1542 // 1) each shell set of integrals contributes up to 6 shell sets of
1543 // the Fock matrix:
1544 // F(a,b) += (ab|cd) * D(c,d)
1545 // F(c,d) += (ab|cd) * D(a,b)
1546 // F(b,d) -= 1/4 * (ab|cd) * D(a,c)
1547 // F(b,c) -= 1/4 * (ab|cd) * D(a,d)
1548 // F(a,c) -= 1/4 * (ab|cd) * D(b,d)
1549 // F(a,d) -= 1/4 * (ab|cd) * D(b,c)
1550 // 2) each permutationally-unique integral (shell set) must be
1551 // scaled by its degeneracy,
1552 // i.e. the number of the integrals/sets equivalent to it
1553 // 3) the end result must be symmetrized
1554 for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1555 const auto bf1 = f1 + bf1_first;
1556 for (auto f2 = 0; f2 != n2; ++f2) {
1557 const auto bf2 = f2 + bf2_first;
1558 for (auto f3 = 0; f3 != n3; ++f3) {
1559 const auto bf3 = f3 + bf3_first;
1560 for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1561 const auto bf4 = f4 + bf4_first;
1562
1563 const auto value = buf_1234[f1234];
1564
1565 const auto value_scal_by_deg = value * s1234_deg;
1566
1567 g(bf1, bf2) += D(bf3, bf4) * value_scal_by_deg;
1568 g(bf3, bf4) += D(bf1, bf2) * value_scal_by_deg;
1569 g(bf1, bf3) -= 0.25 * D(bf2, bf4) * value_scal_by_deg;
1570 g(bf2, bf4) -= 0.25 * D(bf1, bf3) * value_scal_by_deg;
1571 g(bf1, bf4) -= 0.25 * D(bf2, bf3) * value_scal_by_deg;
1572 g(bf2, bf3) -= 0.25 * D(bf1, bf4) * value_scal_by_deg;
1573 }
1574 }
1575 }
1576 }
1577 }
1578 }
1579 }
1580 }
1581 }; // end of lambda
1582
1583 libint2::parallel_do(lambda);
1584
1585 // accumulate contributions from all threads
1586 for (size_t i = 1; i != nthreads; ++i) {
1587 G[0] += G[i];
1588 }
1589
1590#if defined(REPORT_INTEGRAL_TIMINGS)
1591 double time_for_ints = 0.0;
1592 for (auto& t : timers) {
1593 time_for_ints += t.read(0);
1594 }
1595 std::cout << "time for integrals = " << time_for_ints << std::endl;
1596 for (int t = 0; t != nthreads; ++t) engines[t].print_timers();
1597#endif
1598
1599 Matrix GG = 0.5 * (G[0] + G[0].transpose());
1600
1601 std::cout << "# of integrals = " << num_ints_computed << std::endl;
1602
1603 // symmetrize the result and return
1604 return GG;
1605}
1606
1607#if LIBINT2_DERIV_ERI_ORDER
1608template <unsigned deriv_order>
1609std::vector<Matrix> compute_2body_fock_deriv(
1610 const BasisSet& obs, const std::vector<libint2::Atom>& atoms,
1611 const Matrix& D, double precision, const Matrix& Schwarz) {
1612 const auto n = obs.nbf();
1613 const auto nshells = obs.size();
1614 const auto nderiv_shellset = libint2::num_geometrical_derivatives(
1615 4, deriv_order); // # of derivs for each shell quartet
1616 const auto nderiv = libint2::num_geometrical_derivatives(
1617 atoms.size(), deriv_order); // total # of derivs
1618 const auto ncoords_times_two = (atoms.size() * 3) * 2;
1619 using libint2::nthreads;
1620 std::vector<Matrix> G(nthreads * nderiv, Matrix::Zero(n, n));
1621
1622 const auto do_schwarz_screen = Schwarz.cols() != 0 && Schwarz.rows() != 0;
1623 Matrix D_shblk_norm =
1624 compute_shellblock_norm(obs, D); // matrix of infty-norms of shell blocks
1625
1626 auto fock_precision = precision;
1627 // engine precision controls primitive truncation, assume worst-case scenario
1628 // (all primitive combinations add up constructively)
1629 auto max_nprim = obs.max_nprim();
1630 auto max_nprim4 = max_nprim * max_nprim * max_nprim * max_nprim;
1631 auto engine_precision = std::min(fock_precision / D_shblk_norm.maxCoeff(),
1632 std::numeric_limits<double>::epsilon()) /
1633 max_nprim4;
1634
1635 // construct the 2-electron repulsion integrals engine pool
1636 using libint2::Engine;
1637 std::vector<Engine> engines(nthreads);
1638 engines[0] =
1639 Engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), deriv_order);
1640 engines[0].set_precision(engine_precision); // shellset-dependent precision
1641 // control will likely break
1642 // positive definiteness
1643 // stick with this simple recipe
1644 std::cout << "compute_2body_fock:precision = " << precision << std::endl;
1645 std::cout << "Engine::precision = " << engines[0].precision() << std::endl;
1646 for (size_t i = 1; i != nthreads; ++i) {
1647 engines[i] = engines[0];
1648 }
1649 std::atomic<size_t> num_ints_computed{0};
1650
1651#if defined(REPORT_INTEGRAL_TIMINGS)
1652 std::vector<libint2::Timers<1>> timers(nthreads);
1653#endif
1654
1655 auto shell2bf = obs.shell2bf();
1656 auto shell2atom = obs.shell2atom(atoms);
1657
1658 auto lambda = [&](int thread_id) {
1659 auto& engine = engines[thread_id];
1660 const auto& buf = engine.results();
1661
1662#if defined(REPORT_INTEGRAL_TIMINGS)
1663 auto& timer = timers[thread_id];
1664 timer.clear();
1665 timer.set_now_overhead(25);
1666#endif
1667
1668 size_t shell_atoms[4];
1669
1670 // loop over permutationally-unique set of shells
1671 for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1672 auto bf1_first = shell2bf[s1]; // first basis function in this shell
1673 auto n1 = obs[s1].size(); // number of basis functions in this shell
1674 shell_atoms[0] = shell2atom[s1];
1675
1676 for (const auto& s2 : obs_shellpair_list[s1]) {
1677 auto bf2_first = shell2bf[s2];
1678 auto n2 = obs[s2].size();
1679 shell_atoms[1] = shell2atom[s2];
1680
1681 const auto Dnorm12 = do_schwarz_screen ? D_shblk_norm(s1, s2) : 0.;
1682
1683 for (auto s3 = 0; s3 <= s1; ++s3) {
1684 auto bf3_first = shell2bf[s3];
1685 auto n3 = obs[s3].size();
1686 shell_atoms[2] = shell2atom[s3];
1687
1688 const auto Dnorm123 =
1689 do_schwarz_screen
1690 ? std::max(D_shblk_norm(s1, s3),
1691 std::max(D_shblk_norm(s2, s3), Dnorm12))
1692 : 0.;
1693
1694 const auto s4_max = (s1 == s3) ? s2 : s3;
1695 for (const auto& s4 : obs_shellpair_list[s3]) {
1696 if (s4 > s4_max)
1697 break; // for each s3, s4 are stored in monotonically increasing
1698 // order
1699
1700 if ((s1234++) % nthreads != thread_id) continue;
1701
1702 const auto Dnorm1234 =
1703 do_schwarz_screen
1704 ? std::max(
1705 D_shblk_norm(s1, s4),
1706 std::max(D_shblk_norm(s2, s4),
1707 std::max(D_shblk_norm(s3, s4), Dnorm123)))
1708 : 0.;
1709
1710 if (do_schwarz_screen &&
1711 Dnorm1234 * Schwarz(s1, s2) * Schwarz(s3, s4) < fock_precision)
1712 continue;
1713
1714 auto bf4_first = shell2bf[s4];
1715 auto n4 = obs[s4].size();
1716 shell_atoms[3] = shell2atom[s4];
1717
1718 const auto n1234 = n1 * n2 * n3 * n4;
1719
1720 // compute the permutational degeneracy (i.e. # of equivalents) of
1721 // the given shell set
1722 auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1723 auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1724 auto s12_34_deg = (s1 == s3) ? (s2 == s4 ? 1.0 : 2.0) : 2.0;
1725 auto s1234_deg = s12_deg * s34_deg * s12_34_deg;
1726
1727 // computes contribution from shell set \c idx to the operator
1728 // matrix with index \c op
1729 auto add_shellset_to_dest = [&](std::size_t op, std::size_t idx,
1730 int coord1, int coord2,
1731 double scale = 1.0) {
1732 auto& g = G[op];
1733 auto shset = buf[idx];
1734 const auto weight = scale * s1234_deg;
1735
1736 for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1737 const auto bf1 = f1 + bf1_first;
1738 for (auto f2 = 0; f2 != n2; ++f2) {
1739 const auto bf2 = f2 + bf2_first;
1740 for (auto f3 = 0; f3 != n3; ++f3) {
1741 const auto bf3 = f3 + bf3_first;
1742 for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1743 const auto bf4 = f4 + bf4_first;
1744
1745 const auto value = shset[f1234];
1746 const auto wvalue = value * weight;
1747
1748 g(bf1, bf2) += D(bf3, bf4) * wvalue;
1749 g(bf3, bf4) += D(bf1, bf2) * wvalue;
1750 g(bf1, bf3) -= 0.25 * D(bf2, bf4) * wvalue;
1751 g(bf2, bf4) -= 0.25 * D(bf1, bf3) * wvalue;
1752 g(bf1, bf4) -= 0.25 * D(bf2, bf3) * wvalue;
1753 g(bf2, bf3) -= 0.25 * D(bf1, bf4) * wvalue;
1754 }
1755 }
1756 }
1757 }
1758 };
1759
1760#if defined(REPORT_INTEGRAL_TIMINGS)
1761 timer.start(0);
1762#endif
1763
1764 engine.compute2<Operator::coulomb, BraKet::xx_xx, deriv_order>(
1765 obs[s1], obs[s2], obs[s3], obs[s4]);
1766 if (buf[0] == nullptr)
1767 continue; // if all integrals screened out, skip to next quartet
1768 num_ints_computed += nderiv_shellset * n1234;
1769
1770#if defined(REPORT_INTEGRAL_TIMINGS)
1771 timer.stop(0);
1772#endif
1773
1774 switch (deriv_order) {
1775 case 0: {
1776 int coord1 = 0, coord2 = 0;
1777 add_shellset_to_dest(thread_id, 0, coord1, coord2);
1778 } break;
1779
1780 case 1: {
1781 for (auto d = 0; d != 12; ++d) {
1782 const int a = d / 3;
1783 const int xyz = d % 3;
1784
1785 auto coord = shell_atoms[a] * 3 + xyz;
1786 auto& g = G[thread_id * nderiv + coord];
1787
1788 int coord1 = 0, coord2 = 0;
1789
1790 add_shellset_to_dest(thread_id * nderiv + coord, d, coord1,
1791 coord2);
1792
1793 } // d \in [0,12)
1794 } break;
1795
1796 case 2: {
1797 // computes upper triangle index
1798 // n2 = matrix size times 2
1799 // i,j = (unordered) indices
1800 auto upper_triangle_index = [](size_t n2, size_t i, size_t j) {
1801 return std::min(i, j) * (n2 - std::min(i, j) - 1) / 2 +
1802 std::max(i, j);
1803 };
1804 // look over shellsets in the order in which they appear
1805 std::size_t shellset_idx = 0;
1806 for (auto c1 = 0; c1 != 4; ++c1) {
1807 auto a1 = shell_atoms[c1];
1808 auto coord1 = 3 * a1;
1809 for (auto xyz1 = 0; xyz1 != 3; ++xyz1, ++coord1) {
1810 for (auto c2 = c1; c2 != 4; ++c2) {
1811 auto a2 = shell_atoms[c2];
1812 auto xyz2_start = (c1 == c2) ? xyz1 : 0;
1813 auto coord2 = 3 * a2 + xyz2_start;
1814 for (auto xyz2 = xyz2_start; xyz2 != 3;
1815 ++xyz2, ++coord2) {
1816 double scale =
1817 (coord1 == coord2 && c1 != c2) ? 2.0 : 1.0;
1818
1819 const auto coord12 = upper_triangle_index(
1820 ncoords_times_two, coord1, coord2);
1821 auto op = thread_id * nderiv + coord12;
1822 add_shellset_to_dest(op, shellset_idx, coord1, coord2,
1823 scale);
1824 ++shellset_idx;
1825 }
1826 }
1827 }
1828 }
1829 } break;
1830
1831 default:
1832 assert(deriv_order <= 2 &&
1833 "support for 3rd and higher derivatives of the Fock "
1834 "matrix not yet implemented");
1835 }
1836 }
1837 }
1838 }
1839 }
1840 }; // end of lambda
1841
1842 libint2::parallel_do(lambda);
1843
1844 // accumulate contributions from all threads
1845 for (size_t t = 1; t != nthreads; ++t) {
1846 for (auto d = 0; d != nderiv; ++d) {
1847 G[d] += G[t * nderiv + d];
1848 }
1849 }
1850
1851#if defined(REPORT_INTEGRAL_TIMINGS)
1852 double time_for_ints = 0.0;
1853 for (auto& t : timers) {
1854 time_for_ints += t.read(0);
1855 }
1856 std::cout << "time for integrals = " << time_for_ints << std::endl;
1857 for (int t = 0; t != nthreads; ++t) engines[t].print_timers();
1858#endif
1859
1860 std::vector<Matrix> GG(nderiv);
1861 for (auto d = 0; d != nderiv; ++d) {
1862 GG[d] = 0.5 * (G[d] + G[d].transpose());
1863 }
1864
1865 std::cout << "# of integrals = " << num_ints_computed << std::endl;
1866
1867 // symmetrize the result and return
1868 return GG;
1869}
1870
1871#endif
1872
1873Matrix compute_2body_fock_general(const BasisSet& obs, const Matrix& D,
1874 const BasisSet& D_bs, bool D_is_shelldiagonal,
1875 double precision) {
1876 const auto n = obs.nbf();
1877 const auto nshells = obs.size();
1878 const auto n_D = D_bs.nbf();
1879 assert(D.cols() == D.rows() && D.cols() == n_D);
1880
1881 using libint2::nthreads;
1882 std::vector<Matrix> G(nthreads, Matrix::Zero(n, n));
1883
1884 // construct the 2-electron repulsion integrals engine
1885 using libint2::Engine;
1886 std::vector<Engine> engines(nthreads);
1887 engines[0] = Engine(libint2::Operator::coulomb,
1888 std::max(obs.max_nprim(), D_bs.max_nprim()),
1889 std::max(obs.max_l(), D_bs.max_l()), 0);
1890 engines[0].set_precision(precision); // shellset-dependent precision control
1891 // will likely break positive
1892 // definiteness
1893 // stick with this simple recipe
1894 for (size_t i = 1; i != nthreads; ++i) {
1895 engines[i] = engines[0];
1896 }
1897 auto shell2bf = obs.shell2bf();
1898 auto shell2bf_D = D_bs.shell2bf();
1899
1900 auto lambda = [&](int thread_id) {
1901 auto& engine = engines[thread_id];
1902 auto& g = G[thread_id];
1903 const auto& buf = engine.results();
1904
1905 // loop over permutationally-unique set of shells
1906 for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1907 auto bf1_first = shell2bf[s1]; // first basis function in this shell
1908 auto n1 = obs[s1].size(); // number of basis functions in this shell
1909
1910 for (auto s2 = 0; s2 <= s1; ++s2) {
1911 auto bf2_first = shell2bf[s2];
1912 auto n2 = obs[s2].size();
1913
1914 for (auto s3 = 0; s3 < D_bs.size(); ++s3) {
1915 auto bf3_first = shell2bf_D[s3];
1916 auto n3 = D_bs[s3].size();
1917
1918 auto s4_begin = D_is_shelldiagonal ? s3 : 0;
1919 auto s4_fence = D_is_shelldiagonal ? s3 + 1 : D_bs.size();
1920
1921 for (auto s4 = s4_begin; s4 != s4_fence; ++s4, ++s1234) {
1922 if (s1234 % nthreads != thread_id) continue;
1923
1924 auto bf4_first = shell2bf_D[s4];
1925 auto n4 = D_bs[s4].size();
1926
1927 // compute the permutational degeneracy (i.e. # of equivalents) of
1928 // the given shell set
1929 auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1930
1931 if (s3 >= s4) {
1932 auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1933 auto s1234_deg = s12_deg * s34_deg;
1934 // auto s1234_deg = s12_deg;
1935 engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1936 obs[s1], obs[s2], D_bs[s3], D_bs[s4]);
1937 const auto* buf_1234 = buf[0];
1938 if (buf_1234 != nullptr) {
1939 for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1940 const auto bf1 = f1 + bf1_first;
1941 for (auto f2 = 0; f2 != n2; ++f2) {
1942 const auto bf2 = f2 + bf2_first;
1943 for (auto f3 = 0; f3 != n3; ++f3) {
1944 const auto bf3 = f3 + bf3_first;
1945 for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1946 const auto bf4 = f4 + bf4_first;
1947
1948 const auto value = buf_1234[f1234];
1949 const auto value_scal_by_deg = value * s1234_deg;
1950 g(bf1, bf2) += 2.0 * D(bf3, bf4) * value_scal_by_deg;
1951 }
1952 }
1953 }
1954 }
1955 }
1956 }
1957
1958 engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1959 obs[s1], D_bs[s3], obs[s2], D_bs[s4]);
1960 const auto* buf_1324 = buf[0];
1961 if (buf_1324 == nullptr)
1962 continue; // if all integrals screened out, skip to next quartet
1963
1964 for (auto f1 = 0, f1324 = 0; f1 != n1; ++f1) {
1965 const auto bf1 = f1 + bf1_first;
1966 for (auto f3 = 0; f3 != n3; ++f3) {
1967 const auto bf3 = f3 + bf3_first;
1968 for (auto f2 = 0; f2 != n2; ++f2) {
1969 const auto bf2 = f2 + bf2_first;
1970 for (auto f4 = 0; f4 != n4; ++f4, ++f1324) {
1971 const auto bf4 = f4 + bf4_first;
1972
1973 const auto value = buf_1324[f1324];
1974 const auto value_scal_by_deg = value * s12_deg;
1975 g(bf1, bf2) -= D(bf3, bf4) * value_scal_by_deg;
1976 }
1977 }
1978 }
1979 }
1980 }
1981 }
1982 }
1983 }
1984 }; // thread lambda
1985
1986 libint2::parallel_do(lambda);
1987
1988 // accumulate contributions from all threads
1989 for (size_t i = 1; i != nthreads; ++i) {
1990 G[0] += G[i];
1991 }
1992
1993 // symmetrize the result and return
1994 return 0.5 * (G[0] + G[0].transpose());
1995}
1996
1997#ifdef HAVE_DENSITY_FITTING
1998
1999Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
2000 using libint2::nthreads;
2001
2002 const auto n = obs.nbf();
2003 const auto ndf = dfbs.nbf();
2004
2005 libint2::Timers<1> wall_timer;
2006 wall_timer.set_now_overhead(25);
2007 std::vector<libint2::Timers<5>> timers(nthreads);
2008 for (auto& timer : timers) timer.set_now_overhead(25);
2009
2010 typedef btas::RangeNd<CblasRowMajor, std::array<long, 1>> Range1d;
2011 typedef btas::RangeNd<CblasRowMajor, std::array<long, 2>> Range2d;
2012 typedef btas::Tensor<double, Range1d> Tensor1d;
2013 typedef btas::Tensor<double, Range2d> Tensor2d;
2014
2015 // using first time? compute 3-center ints and transform to inv sqrt
2016 // representation
2017 if (xyK.size() == 0) {
2018 wall_timer.start(0);
2019
2020 const auto nshells = obs.size();
2021 const auto nshells_df = dfbs.size();
2022 const auto& unitshell = libint2::Shell::unit();
2023
2024 // construct the 2-electron 3-center repulsion integrals engine
2025 // since the code assumes (xx|xs) braket, and Engine/libint only produces
2026 // (xs|xx), use 4-center engine
2027 std::vector<libint2::Engine> engines(nthreads);
2028 engines[0] = libint2::Engine(libint2::Operator::coulomb,
2029 std::max(obs.max_nprim(), dfbs.max_nprim()),
2030 std::max(obs.max_l(), dfbs.max_l()), 0);
2031 engines[0].set(BraKet::xs_xx);
2032 for (size_t i = 1; i != nthreads; ++i) {
2033 engines[i] = engines[0];
2034 }
2035
2036 auto shell2bf = obs.shell2bf();
2037 auto shell2bf_df = dfbs.shell2bf();
2038
2039 Tensor3d Zxy{ndf, n, n};
2040
2041 auto lambda = [&](int thread_id) {
2042 auto& engine = engines[thread_id];
2043 auto& timer = timers[thread_id];
2044 const auto& results = engine.results();
2045
2046 // loop over permutationally-unique set of shells
2047 for (auto s1 = 0l, s123 = 0l; s1 != nshells_df; ++s1) {
2048 auto bf1_first = shell2bf_df[s1]; // first basis function in this shell
2049 auto n1 = dfbs[s1].size(); // number of basis functions in this shell
2050
2051 for (auto s2 = 0; s2 != nshells; ++s2) {
2052 auto bf2_first = shell2bf[s2];
2053 auto n2 = obs[s2].size();
2054 const auto n12 = n1 * n2;
2055
2056 for (auto s3 = 0; s3 != nshells; ++s3, ++s123) {
2057 if (s123 % nthreads != thread_id) continue;
2058
2059 auto bf3_first = shell2bf[s3];
2060 auto n3 = obs[s3].size();
2061 const auto n123 = n12 * n3;
2062
2063 timer.start(0);
2064
2065 engine.compute2<Operator::coulomb, BraKet::xs_xx, 0>(
2066 dfbs[s1], unitshell, obs[s2], obs[s3]);
2067 const auto* buf = results[0];
2068 if (buf == nullptr) continue;
2069
2070 timer.stop(0);
2071 timer.start(1);
2072
2073 auto lower_bound = {bf1_first, bf2_first, bf3_first};
2074 auto upper_bound = {bf1_first + n1, bf2_first + n2, bf3_first + n3};
2075 auto view = btas::make_view(
2076 Zxy.range().slice(lower_bound, upper_bound), Zxy.storage());
2077 std::copy(buf, buf + n123, view.begin());
2078
2079 timer.stop(1);
2080 } // s3
2081 } // s2
2082 } // s1
2083 }; // lambda
2084
2085 libint2::parallel_do(lambda);
2086
2087 wall_timer.stop(0);
2088
2089 double ints_time = 0;
2090 for (const auto& timer : timers) ints_time += timer.read(0);
2091 std::cout << "time for Zxy integrals = " << ints_time
2092 << " (total from all threads)" << std::endl;
2093 double copy_time = 0;
2094 for (const auto& timer : timers) copy_time += timer.read(1);
2095 std::cout << "time for copying into BTAS = " << copy_time
2096 << " (total from all threads)" << std::endl;
2097 std::cout << "wall time for Zxy integrals + copy = " << wall_timer.read(0)
2098 << std::endl;
2099
2100 timers[0].start(2);
2101
2102 Matrix V = compute_2body_2index_ints(dfbs);
2103 Eigen::LLT<Matrix> V_LLt(V);
2104 Matrix I = Matrix::Identity(ndf, ndf);
2105 auto L = V_LLt.matrixL();
2106 Matrix V_L = L;
2107 Matrix Linv = L.solve(I).transpose();
2108 // check
2109 // std::cout << "||V - L L^t|| = " << (V - V_L * V_L.transpose()).norm() <<
2110 // std::endl;
2111 // std::cout << "||I - L L^-1^t|| = " << (I - V_L *
2112 // Linv.transpose()).norm() << std::endl;
2113 // std::cout << "||V^-1 - L^-1 L^-1^t|| = " << (V.inverse() - Linv *
2114 // Linv.transpose()).norm() << std::endl;
2115
2116 Tensor2d K{ndf, ndf};
2117 std::copy(Linv.data(), Linv.data() + ndf * ndf, K.begin());
2118
2119 xyK = Tensor3d{n, n, ndf};
2120 btas::contract(1.0, Zxy, {1, 2, 3}, K, {1, 4}, 0.0, xyK, {2, 3, 4});
2121 Zxy = Tensor3d{0, 0, 0}; // release memory
2122
2123 timers[0].stop(2);
2124 std::cout << "time for integrals metric tform = " << timers[0].read(2)
2125 << std::endl;
2126 } // if (xyK.size() == 0)
2127
2128 // compute exchange
2129 timers[0].start(3);
2130
2131 const auto nocc = Cocc.cols();
2132 Tensor2d Co{n, nocc};
2133 std::copy(Cocc.data(), Cocc.data() + n * nocc, Co.begin());
2134 Tensor3d xiK{n, nocc, ndf};
2135 btas::contract(1.0, xyK, {1, 2, 3}, Co, {2, 4}, 0.0, xiK, {1, 4, 3});
2136
2137 Tensor2d G{n, n};
2138 btas::contract(1.0, xiK, {1, 2, 3}, xiK, {4, 2, 3}, 0.0, G, {1, 4});
2139
2140 timers[0].stop(3);
2141 std::cout << "time for exchange = " << timers[0].read(3) << std::endl;
2142
2143 // compute Coulomb
2144 timers[0].start(4);
2145
2146 Tensor1d Jtmp{ndf};
2147 btas::contract(1.0, xiK, {1, 2, 3}, Co, {1, 2}, 0.0, Jtmp, {3});
2148 xiK = Tensor3d{0, 0, 0};
2149 btas::contract(2.0, xyK, {1, 2, 3}, Jtmp, {3}, -1.0, G, {1, 2});
2150
2151 timers[0].stop(4);
2152 std::cout << "time for coulomb = " << timers[0].read(4) << std::endl;
2153
2154 // copy result to an Eigen::Matrix
2155 Matrix result(n, n);
2156 std::copy(G.cbegin(), G.cend(), result.data());
2157 return result;
2158}
2159#endif // HAVE_DENSITY_FITTING
2160
2161// should be a unit test somewhere
2162void api_basic_compile_test(const BasisSet& obs) {
2163 using namespace libint2;
2164 Engine onebody_engine(
2165 Operator::overlap, // will compute overlap ints
2166 obs.max_nprim(), // max # of primitives in shells this engine will
2167 // accept
2168 obs.max_l() // max angular momentum of shells this engine will accept
2169 );
2170 auto shell2bf = obs.shell2bf();
2171 const auto& results = onebody_engine.results();
2172 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2173 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2174 std::cout << "compute shell set {" << s1 << "," << s2 << "} ... ";
2175 onebody_engine.compute(obs[s1], obs[s2]);
2176 const auto* ints_shellset = results[0];
2177 std::cout << "done" << std::endl;
2178
2179 auto bf1 = shell2bf[s1]; // first basis function in first shell
2180 auto n1 = obs[s1].size(); // number of basis functions in first shell
2181 auto bf2 = shell2bf[s2]; // first basis function in second shell
2182 auto n2 = obs[s2].size(); // number of basis functions in second shell
2183
2184 // this iterates over integrals in the order they are packed in array
2185 // ints_shellset
2186 for (auto f1 = 0; f1 != n1; ++f1)
2187 for (auto f2 = 0; f2 != n2; ++f2)
2188 std::cout << " " << bf1 + f1 << " " << bf2 + f2 << " "
2189 << ints_shellset[f1 * n2 + f2] << std::endl;
2190 }
2191 }
2192
2193 using libint2::Operator;
2194
2195 std::vector<std::pair<double, double>> cgtg_params{
2196 {0.1, 0.2}, {0.3, 0.4}, {0.5, 0.6}};
2197 {
2198 auto K = compute_schwarz_ints<Operator::cgtg>(obs, obs, false, cgtg_params);
2199 std::cout << "cGTG Schwarz ints\n" << K << std::endl;
2200 }
2201 {
2202 auto K = compute_schwarz_ints<Operator::cgtg_x_coulomb>(obs, obs, false,
2203 cgtg_params);
2204 std::cout << "cGTG/r12 Schwarz ints\n" << K << std::endl;
2205 }
2206 {
2207 auto K =
2208 compute_schwarz_ints<Operator::delcgtg2>(obs, obs, false, cgtg_params);
2209 std::cout << "||Del.cGTG||^2 Schwarz ints\n" << K << std::endl;
2210 }
2211
2212 { // test 2-index ints
2213 Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l());
2214 Engine eri2_engine = eri4_engine;
2215 eri2_engine.set(BraKet::xs_xs);
2216 auto shell2bf = obs.shell2bf();
2217 const auto& results4 = eri4_engine.results();
2218 const auto& results2 = eri2_engine.results();
2219 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2220 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2221 eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2222 eri2_engine.compute(obs[s1], obs[s2]);
2223
2224 auto bf1 = shell2bf[s1]; // first basis function in first shell
2225 auto n1 = obs[s1].size(); // number of basis functions in first shell
2226 auto bf2 = shell2bf[s2]; // first basis function in second shell
2227 auto n2 = obs[s2].size(); // number of basis functions in second shell
2228
2229 const auto* buf4 = results4[0];
2230 const auto* buf2 = results2[0];
2231
2232 // this iterates over integrals in the order they are packed in array
2233 // ints_shellset
2234 for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2235 for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2236 assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2237 "2-center ints test failed");
2238 }
2239 }
2240 }
2241 { // test 3-index ints
2242 Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l());
2243 Engine eri3_engine = eri4_engine;
2244 eri3_engine.set(BraKet::xs_xx);
2245 auto shell2bf = obs.shell2bf();
2246 const auto& results4 = eri4_engine.results();
2247 const auto& results3 = eri3_engine.results();
2248 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2249 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2250 for (auto s3 = 0; s3 != obs.size(); ++s3) {
2251 eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], obs[s3]);
2252 eri3_engine.compute(obs[s1], obs[s2], obs[s3]);
2253
2254 auto bf1 = shell2bf[s1]; // first basis function in first shell
2255 auto n1 = obs[s1].size(); // number of basis functions in first shell
2256 auto bf2 = shell2bf[s2]; // first basis function in second shell
2257 auto n2 =
2258 obs[s2].size(); // number of basis functions in second shell
2259 auto bf3 = shell2bf[s3]; // first basis function in third shell
2260 auto n3 = obs[s3].size(); // number of basis functions in third shell
2261
2262 const auto* buf4 = results4[0];
2263 const auto* buf3 = results3[0];
2264
2265 // this iterates over integrals in the order they are packed in array
2266 // ints_shellset
2267 for (auto f1 = 0, f123 = 0; f1 != n1; ++f1)
2268 for (auto f2 = 0; f2 != n2; ++f2)
2269 for (auto f3 = 0; f3 != n3; ++f3, ++f123)
2270 assert(std::abs(buf4[f123] - buf3[f123]) < 1e-12 &&
2271 "3-center ints test failed");
2272 }
2273 }
2274 }
2275 }
2276
2277#if LIBINT2_DERIV_ERI_ORDER
2278 { // test deriv 2-index ints
2279 Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 1);
2280 Engine eri2_engine = eri4_engine;
2281 eri2_engine.set(BraKet::xs_xs);
2282 auto shell2bf = obs.shell2bf();
2283 const auto& results4 = eri4_engine.results();
2284 const auto& results2 = eri2_engine.results();
2285 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2286 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2287 eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2288 eri2_engine.compute(obs[s1], obs[s2]);
2289
2290 auto bf1 = shell2bf[s1]; // first basis function in first shell
2291 auto n1 = obs[s1].size(); // number of basis functions in first shell
2292 auto bf2 = shell2bf[s2]; // first basis function in second shell
2293 auto n2 = obs[s2].size(); // number of basis functions in second shell
2294
2295 // loop over derivative shell sets
2296 for (auto d = 0; d != 6; ++d) {
2297 const auto* buf4 = results4[d < 3 ? d : d + 3];
2298 const auto* buf2 = results2[d];
2299
2300 // this iterates over integrals in the order they are packed in array
2301 // ints_shellset
2302 for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2303 for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2304 assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2305 "deriv 2-center ints test failed");
2306 }
2307 }
2308 }
2309 }
2310#endif
2311
2312#if LIBINT2_DERIV_ERI_ORDER > 1
2313 { // test 2nd deriv 2-index ints
2314 Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 2);
2315 Engine eri2_engine = eri4_engine;
2316 eri2_engine.set(BraKet::xs_xs);
2317 auto shell2bf = obs.shell2bf();
2318 const auto& results4 = eri4_engine.results();
2319 const auto& results2 = eri2_engine.results();
2320 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2321 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2322 eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2323 eri2_engine.compute(obs[s1], obs[s2]);
2324
2325 auto bf1 = shell2bf[s1]; // first basis function in first shell
2326 auto n1 = obs[s1].size(); // number of basis functions in first shell
2327 auto bf2 = shell2bf[s2]; // first basis function in second shell
2328 auto n2 = obs[s2].size(); // number of basis functions in second shell
2329
2330 // loop over derivative shell sets
2331 for (auto d1 = 0, d12 = 0; d1 != 6; ++d1) {
2332 const auto dd1 = d1 < 3 ? d1 : d1 + 3;
2333 for (auto d2 = d1; d2 != 6; ++d2, ++d12) {
2334 const auto dd2 = d2 < 3 ? d2 : d2 + 3;
2335 const auto dd12 = dd1 * (24 - dd1 - 1) / 2 + dd2;
2336 const auto* buf4 = results4[dd12];
2337 const auto* buf2 = results2[d12];
2338
2339 // this iterates over integrals in the order they are packed in
2340 // array
2341 // ints_shellset
2342 for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2343 for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2344 assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2345 "2nd deriv 2-center ints test failed");
2346 }
2347 }
2348 }
2349 }
2350 }
2351#endif
2352}
DIIS (`‘direct inversion of iterative subspace’') extrapolation.
Definition diis.h:140
Timers aggregates N C++11 "timers"; used to high-resolution profile stages of integral computation.
Definition timer.h:39
double read(size_t t) const
reads value (in seconds) of timer t , converted to double
Definition timer.h:72
dur_t stop(size_t t)
stops timer t
Definition timer.h:65
void clear()
resets timers to zero
Definition timer.h:74
void set_now_overhead(size_t ns)
use this to report the overhead of now() call; if set, the reported timings will be adjusted for this...
Definition timer.h:58
void start(size_t t)
starts timer t
Definition timer.h:61
Defaults definitions for various parameters assumed by Libint.
Definition algebra.cc:24
void parallel_do(Lambda &lambda)
fires off nthreads instances of lambda in parallel
Definition 1body.h:170
@ Schwarz
Schwarz screening method using Frobenius norm:
void initialize(bool verbose=false)
initializes the libint library if not currently initialized, no-op otherwise
Definition initialize.h:76
std::vector< std::pair< double, std::array< double, 3 > > > make_point_charges(const std::vector< libint2::Atom > &atoms)
converts a vector of Atoms to a vector of point charges
Definition atom.h:241
BraKet
types of shell sets supported by Engine, in chemist notation (i.e.
Definition include/libint2/braket.h:36
std::vector< Atom > read_dotxyz(std::istream &is, const double bohr_to_angstrom=constants::codata_2018::bohr_to_angstrom)
reads the list of atoms from a file in the standard XYZ format supported by most chemistry software (...
Definition atom.h:209
void finalize()
finalizes the libint library.
Definition initialize.h:105
const std::vector< Real > & sto3g_ao_occupation_vector(size_t Z)
computes average orbital occupancies in the ground state of a neutral atoms
Definition sto3g_atomic_density.h:86
size_t sto3g_num_ao(size_t Z)
Definition sto3g_atomic_density.h:57
Definition 1body.h:150
Iterates over all partitions of a non-negative integer into nonnegative integers in reverse lexicog...
Definition intpart_iter.h:74
generally-contracted Solid-Harmonic/Cartesion Gaussian Shell
Definition shell.h:720
static bool do_enforce_unit_normalization(defaultable_boolean flag=defaultable_boolean())
sets and/or reports whether the auto-renormalization to unity is set if called without arguments,...
Definition shell.h:895
static const Shell & unit()
Definition shell.h:906