LIBINT 2.7.2
1body.h
1/*
2 * Copyright (C) 2004-2021 Edward F. Valeev
3 *
4 * This file is part of Libint.
5 *
6 * Libint is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21// standard C++ headers
22#include <atomic>
23#include <chrono>
24#include <cmath>
25#include <fstream>
26#include <iomanip>
27#include <iostream>
28#include <iterator>
29#include <memory>
30#include <mutex>
31#include <sstream>
32#include <thread>
33#include <unordered_map>
34#include <vector>
35
36// have BTAS library?
37#ifdef LIBINT2_HAVE_BTAS
38# include <btas/btas.h>
39#else // LIBINT2_HAVE_BTAS
40# error "libint2::lcao requires BTAS"
41#endif
42
43// Libint Gaussian integrals library
44#include <libint2/diis.h>
45#include <libint2/util/intpart_iter.h>
46#include <libint2/chemistry/sto3g_atomic_density.h>
47#include <libint2.hpp>
48
49#if defined(_OPENMP)
50#include <omp.h>
51#endif
52
53typedef btas::RangeNd<CblasRowMajor, std::array<long, 2>> Range2;
54typedef btas::RangeNd<CblasRowMajor, std::array<long, 3>> Range3;
55typedef btas::RangeNd<CblasRowMajor, std::array<long, 4>> Range4;
56typedef btas::Tensor<double, Range2> Tensor2d;
57typedef btas::Tensor<double, Range3> Tensor3d;
58typedef btas::Tensor<double, Range3> Tensor4d;
59
60typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
61 Matrix; // import dense, dynamically sized Matrix type from Eigen;
62 // this is a matrix with row-major storage
63 // (http://en.wikipedia.org/wiki/Row-major_order)
64// to meet the layout of the integrals returned by the Libint integral library
65typedef Eigen::DiagonalMatrix<double, Eigen::Dynamic, Eigen::Dynamic>
66 DiagonalMatrix;
67
68using libint2::Shell;
69using libint2::libint2::Atom;
70using libint2::BasisSet;
71using libint2::Operator;
72using libint2::BraKet;
73
74std::vector<libint2::Atom> read_geometry(const std::string& filename);
75Matrix compute_soad(const std::vector<libint2::Atom>& atoms);
76// computes norm of shell-blocks of A
77Matrix compute_shellblock_norm(const BasisSet& obs, const Matrix& A);
78
79template <Operator obtype>
81 const BasisSet& obs, const std::vector<libint2::Atom>& atoms = std::vector<libint2::Atom>());
82
83#if LIBINT2_DERIV_ONEBODY_ORDER
84template <Operator obtype>
85std::vector<Matrix> compute_1body_ints_deriv(unsigned deriv_order,
86 const BasisSet& obs,
87 const std::vector<libint2::Atom>& atoms);
88#endif // LIBINT2_DERIV_ONEBODY_ORDER
89
90template <libint2::Operator Kernel = libint2::Operator::coulomb>
91Matrix compute_schwarz_ints(
92 const BasisSet& bs1, const BasisSet& bs2 = BasisSet(),
93 bool use_2norm = false, // use infty norm by default
94 typename libint2::operator_traits<Kernel>::oper_params_type params =
95 libint2::operator_traits<Kernel>::default_params());
96Matrix compute_do_ints(const BasisSet& bs1, const BasisSet& bs2 = BasisSet(),
97 bool use_2norm = false // use infty norm by default
98 );
99
100using shellpair_list_t = std::unordered_map<size_t, std::vector<size_t>>;
101shellpair_list_t obs_shellpair_list; // shellpair list for OBS
102
107shellpair_list_t compute_shellpair_list(const BasisSet& bs1,
108 const BasisSet& bs2 = BasisSet(),
109 double threshold = 1e-12);
110
111Matrix compute_2body_fock(
112 const BasisSet& obs, const Matrix& D,
113 double precision = std::numeric_limits<
114 double>::epsilon(), // discard contributions smaller than this
115 const Matrix& Schwarz = Matrix() // K_ij = sqrt(||(ij|ij)||_\infty); if
116 // empty, do not Schwarz screen
117 );
118// an Fock builder that can accept densities expressed a separate basis
119Matrix compute_2body_fock_general(
120 const BasisSet& obs, const Matrix& D, const BasisSet& D_bs,
121 bool D_is_sheldiagonal = false, // set D_is_shelldiagonal if doing SOAD
122 double precision = std::numeric_limits<
123 double>::epsilon() // discard contributions smaller than this
124 );
125
126#if LIBINT2_DERIV_ERI_ORDER
127template <unsigned deriv_order>
128std::vector<Matrix> compute_2body_fock_deriv(
129 const BasisSet& obs, const std::vector<libint2::Atom>& atoms,
130 const Matrix& D,
131 double precision = std::numeric_limits<
132 double>::epsilon(), // discard contributions smaller than this
133 const Matrix& Schwarz = Matrix() // K_ij = sqrt(||(ij|ij)||_\infty); if
134 // empty, do not Schwarz screen
135 );
136#endif // LIBINT2_DERIV_ERI_ORDER
137
138// returns {X,X^{-1},S_condition_number_after_conditioning}, where
139// X is the generalized square-root-inverse such that X.transpose() * S * X = I
140// columns of Xinv is the basis conditioned such that
141// the condition number of its metric (Xinv.transpose . Xinv) <
142// S_condition_number_threshold
143std::tuple<Matrix, Matrix, double> conditioning_orthogonalizer(
144 const Matrix& S, double S_condition_number_threshold);
145
146#ifdef LIBINT2_HAVE_BTAS
147#define HAVE_DENSITY_FITTING 1
149 const BasisSet& obs;
150 const BasisSet& dfbs;
151 DFFockEngine(const BasisSet& _obs, const BasisSet& _dfbs)
152 : obs(_obs), dfbs(_dfbs) {}
153
154 typedef btas::RangeNd<CblasRowMajor, std::array<long, 3>> Range3d;
155 typedef btas::Tensor<double, Range3d> Tensor3d;
156 Tensor3d xyK;
157
158 // a DF-based builder, using coefficients of occupied MOs
159 Matrix compute_2body_fock_dfC(const Matrix& Cocc);
160};
161#endif // HAVE_DENSITY_FITTING
162
163namespace libint2 {
164int nthreads;
165
167template <typename Lambda>
168void parallel_do(Lambda& lambda) {
169#ifdef _OPENMP
170#pragma omp parallel
171 {
172 auto thread_id = omp_get_thread_num();
173 lambda(thread_id);
174 }
175#else // use C++11 threads
176 std::vector<std::thread> threads;
177 for (int thread_id = 0; thread_id != libint2::nthreads; ++thread_id) {
178 if (thread_id != nthreads - 1)
179 threads.push_back(std::thread(lambda, thread_id));
180 else
181 lambda(thread_id);
182 } // threads_id
183 for (int thread_id = 0; thread_id < nthreads - 1; ++thread_id)
184 threads[thread_id].join();
185#endif
186}
187}
188
189int main(int argc, char* argv[]) {
190 using std::cout;
191 using std::cerr;
192 using std::endl;
193
194 try {
195 /*** =========================== ***/
196 /*** initialize molecule ***/
197 /*** =========================== ***/
198
199 // read geometry from a file; by default read from h2o.xyz, else take
200 // filename (.xyz) from the command line
201 const auto filename = (argc > 1) ? argv[1] : "h2o.xyz";
202 const auto basisname = (argc > 2) ? argv[2] : "aug-cc-pVDZ";
203 bool do_density_fitting = false;
204#ifdef HAVE_DENSITY_FITTING
205 do_density_fitting = (argc > 3);
206 const auto dfbasisname = do_density_fitting ? argv[3] : "";
207#endif
208 std::vector<libint2::Atom> atoms = read_geometry(filename);
209
210 // set up thread pool
211 {
212 using libint2::nthreads;
213 auto nthreads_cstr = getenv("LIBINT_NUM_THREADS");
214 nthreads = 1;
215 if (nthreads_cstr && strcmp(nthreads_cstr, "")) {
216 std::istringstream iss(nthreads_cstr);
217 iss >> nthreads;
218 if (nthreads > 1 << 16 || nthreads <= 0) nthreads = 1;
219 }
220#if defined(_OPENMP)
221 omp_set_num_threads(nthreads);
222#endif
223 std::cout << "Will scale over " << nthreads
224#if defined(_OPENMP)
225 << " OpenMP"
226#else
227 << " C++11"
228#endif
229 << " threads" << std::endl;
230 }
231
232 // count the number of electrons
233 auto nelectron = 0;
234 for (auto i = 0; i < atoms.size(); ++i) nelectron += atoms[i].atomic_number;
235 const auto ndocc = nelectron / 2;
236 cout << "# of electrons = " << nelectron << endl;
237
238 // compute the nuclear repulsion energy
239 auto enuc = 0.0;
240 for (auto i = 0; i < atoms.size(); i++)
241 for (auto j = i + 1; j < atoms.size(); j++) {
242 auto xij = atoms[i].x - atoms[j].x;
243 auto yij = atoms[i].y - atoms[j].y;
244 auto zij = atoms[i].z - atoms[j].z;
245 auto r2 = xij * xij + yij * yij + zij * zij;
246 auto r = sqrt(r2);
247 enuc += atoms[i].atomic_number * atoms[j].atomic_number / r;
248 }
249 cout << "Nuclear repulsion energy = " << std::setprecision(15) << enuc
250 << endl;
251
253
254 cout << "libint2::Atomic Cartesian coordinates (a.u.):" << endl;
255 for (const auto& a : atoms)
256 std::cout << a.atomic_number << " " << a.x << " " << a.y << " " << a.z
257 << std::endl;
258
259 BasisSet obs(basisname, atoms);
260 cout << "orbital basis set rank = " << obs.nbf() << endl;
261
262#ifdef HAVE_DENSITY_FITTING
263 BasisSet dfbs;
264 if (do_density_fitting) {
265 dfbs = BasisSet(dfbasisname, atoms);
266 cout << "density-fitting basis set rank = " << dfbs.nbf() << endl;
267 }
268#endif // HAVE_DENSITY_FITTING
269
270 /*** =========================== ***/
271 /*** compute 1-e integrals ***/
272 /*** =========================== ***/
273
274 // initializes the Libint integrals library ... now ready to compute
276
277 // compute OBS non-negligible shell-pair list
278 {
279 obs_shellpair_list = compute_shellpair_list(obs);
280 size_t nsp = 0;
281 for (auto& sp : obs_shellpair_list) {
282 nsp += sp.second.size();
283 }
284 std::cout << "# of {all,non-negligible} shell-pairs = {"
285 << obs.size() * (obs.size() + 1) / 2 << "," << nsp << "}"
286 << std::endl;
287 }
288
289 // compute one-body integrals
290 auto S = compute_1body_ints<Operator::overlap>(obs)[0];
291 auto T = compute_1body_ints<Operator::kinetic>(obs)[0];
292 auto V = compute_1body_ints<Operator::nuclear>(obs, libint2::make_point_charges(atoms))[0];
293 Matrix H = T + V;
294 T.resize(0, 0);
295 V.resize(0, 0);
296
297 // compute orthogonalizer X such that X.transpose() . S . X = I
298 Matrix X, Xinv;
299 double XtX_condition_number; // condition number of "re-conditioned"
300 // overlap obtained as Xinv.transpose() . Xinv
301 // one should think of columns of Xinv as the conditioned basis
302 // Re: name ... cond # (Xinv.transpose() . Xinv) = cond # (X.transpose() .
303 // X)
304 // by default assume can manage to compute with condition number of S <=
305 // 1/eps
306 // this is probably too optimistic, but in well-behaved cases even 10^11 is
307 // OK
308 double S_condition_number_threshold =
309 1.0 / std::numeric_limits<double>::epsilon();
310 std::tie(X, Xinv, XtX_condition_number) =
311 conditioning_orthogonalizer(S, S_condition_number_threshold);
312
313 Matrix D;
314 Matrix C_occ;
315 Matrix evals;
316 { // use SOAD as the guess density
317 const auto tstart = std::chrono::high_resolution_clock::now();
318
319 auto D_minbs = compute_soad(atoms); // compute guess in minimal basis
320 BasisSet minbs("STO-3G", atoms);
321 if (minbs == obs)
322 D = D_minbs;
323 else { // if basis != minimal basis, map non-representable SOAD guess
324 // into the AO basis
325 // by diagonalizing a Fock matrix
326 std::cout << "projecting SOAD into AO basis ... ";
327 auto F = H;
328 F += compute_2body_fock_general(
329 obs, D_minbs, minbs, true /* SOAD_D_is_shelldiagonal */,
330 std::numeric_limits<double>::epsilon() // this is cheap, no reason
331 // to be cheaper
332 );
333
334 // solve F C = e S C by (conditioned) transformation to F' C' = e C',
335 // where
336 // F' = X.transpose() . F . X; the original C is obtained as C = X . C'
337 Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(X.transpose() * F * X);
338 auto C = X * eig_solver.eigenvectors();
339
340 // compute density, D = C(occ) . C(occ)T
341 C_occ = C.leftCols(ndocc);
342 D = C_occ * C_occ.transpose();
343
344 const auto tstop = std::chrono::high_resolution_clock::now();
345 const std::chrono::duration<double> time_elapsed = tstop - tstart;
346 std::cout << "done (" << time_elapsed.count() << " s)" << std::endl;
347 }
348 }
349
350 // pre-compute data for Schwarz bounds
351 auto K = compute_schwarz_ints<>(obs, BasisSet(), true);
352
353// prepare for density fitting
354#ifdef HAVE_DENSITY_FITTING
355 std::unique_ptr<DFFockEngine> dffockengine(
356 do_density_fitting ? new DFFockEngine(obs, dfbs) : nullptr);
357#endif // HAVE_DENSITY_FITTING
358
359 /*** =========================== ***/
360 /*** SCF loop ***/
361 /*** =========================== ***/
362
363 const auto maxiter = 100;
364 const auto conv = 1e-12;
365 auto iter = 0;
366 auto rms_error = 1.0;
367 auto ediff_rel = 0.0;
368 auto ehf = 0.0;
369 auto n2 = D.cols() * D.rows();
370 libint2::DIIS<Matrix> diis(2); // start DIIS on second iteration
371
372 // prepare for incremental Fock build ...
373 Matrix D_diff = D;
374 Matrix F = H;
375 bool reset_incremental_fock_formation = false;
376 bool incremental_Fbuild_started = false;
377 double start_incremental_F_threshold = 1e-5;
378 double next_reset_threshold = 0.0;
379 size_t last_reset_iteration = 0;
380 // ... unless doing DF, then use MO coefficients, hence not "incremental"
381 if (do_density_fitting) start_incremental_F_threshold = 0.0;
382
383 do {
384 const auto tstart = std::chrono::high_resolution_clock::now();
385 ++iter;
386
387 // Last iteration's energy and density
388 auto ehf_last = ehf;
389 Matrix D_last = D;
390
391 if (not incremental_Fbuild_started &&
392 rms_error < start_incremental_F_threshold) {
393 incremental_Fbuild_started = true;
394 reset_incremental_fock_formation = false;
395 last_reset_iteration = iter - 1;
396 next_reset_threshold = rms_error / 1e1;
397 std::cout << "== started incremental fock build" << std::endl;
398 }
399 if (reset_incremental_fock_formation || not incremental_Fbuild_started) {
400 F = H;
401 D_diff = D;
402 }
403 if (reset_incremental_fock_formation && incremental_Fbuild_started) {
404 reset_incremental_fock_formation = false;
405 last_reset_iteration = iter;
406 next_reset_threshold = rms_error / 1e1;
407 std::cout << "== reset incremental fock build" << std::endl;
408 }
409
410 // build a new Fock matrix
411 if (not do_density_fitting) {
412 // totally empirical precision variation, involves the condition number
413 const auto precision_F = std::min(
414 std::min(1e-3 / XtX_condition_number, 1e-7),
415 std::max(rms_error / 1e4, std::numeric_limits<double>::epsilon()));
416 F += compute_2body_fock(obs, D_diff, precision_F, K);
417 }
418#if HAVE_DENSITY_FITTING
419 else { // do DF
420 F = H + dffockengine->compute_2body_fock_dfC(C_occ);
421 }
422#else
423 else {
424 assert(false);
425 } // do_density_fitting is true but HAVE_DENSITY_FITTING is not defined!
426 // should not happen
427#endif // HAVE_DENSITY_FITTING
428
429 // compute HF energy with the non-extrapolated Fock matrix
430 ehf = D.cwiseProduct(H + F).sum();
431 ediff_rel = std::abs((ehf - ehf_last) / ehf);
432
433 // compute SCF error
434 Matrix FD_comm = F * D * S - S * D * F;
435 rms_error = FD_comm.norm() / n2;
436 if (rms_error < next_reset_threshold || iter - last_reset_iteration >= 8)
437 reset_incremental_fock_formation = true;
438
439 // DIIS extrapolate F
440 Matrix F_diis = F; // extrapolated F cannot be used in incremental Fock
441 // build; only used to produce the density
442 // make a copy of the unextrapolated matrix
443 diis.extrapolate(F_diis, FD_comm);
444
445 // solve F C = e S C by (conditioned) transformation to F' C' = e C',
446 // where
447 // F' = X.transpose() . F . X; the original C is obtained as C = X . C'
448 Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(X.transpose() * F_diis *
449 X);
450 evals = eig_solver.eigenvalues();
451 auto C = X * eig_solver.eigenvectors();
452
453 // compute density, D = C(occ) . C(occ)T
454 C_occ = C.leftCols(ndocc);
455 D = C_occ * C_occ.transpose();
456 D_diff = D - D_last;
457
458 const auto tstop = std::chrono::high_resolution_clock::now();
459 const std::chrono::duration<double> time_elapsed = tstop - tstart;
460
461 if (iter == 1)
462 std::cout << "\n\nIter E(HF) D(E)/E "
463 "RMS([F,D])/nn Time(s)\n";
464 printf(" %02d %20.12f %20.12e %20.12e %10.5lf\n", iter, ehf + enuc,
465 ediff_rel, rms_error, time_elapsed.count());
466
467 } while (((ediff_rel > conv) || (rms_error > conv)) && (iter < maxiter));
468
469 printf("** Hartree-Fock energy = %20.12f\n", ehf + enuc);
470
471 auto Mu = compute_1body_ints<Operator::emultipole2>(obs);
474 for (int xyz = 0; xyz != 3; ++xyz)
475 mu[xyz] = -2 *
476 D.cwiseProduct(Mu[xyz + 1])
477 .sum(); // 2 = alpha + beta, -1 = electron charge
478 for (int k = 0; k != 6; ++k)
479 qu[k] = -2 *
480 D.cwiseProduct(Mu[k + 4])
481 .sum(); // 2 = alpha + beta, -1 = electron charge
482 std::cout << "** edipole = ";
483 std::copy(mu.begin(), mu.end(),
484 std::ostream_iterator<double>(std::cout, " "));
485 std::cout << std::endl;
486 std::cout << "** equadrupole = ";
487 std::copy(qu.begin(), qu.end(),
488 std::ostream_iterator<double>(std::cout, " "));
489 std::cout << std::endl;
490
491 { // compute force
492#if LIBINT2_DERIV_ONEBODY_ORDER
493 // compute 1-e forces
494 Matrix F1 = Matrix::Zero(atoms.size(), 3);
495 Matrix F_Pulay = Matrix::Zero(atoms.size(), 3);
497 // one-body contributions to the forces
499 auto T1 = compute_1body_ints_deriv<Operator::kinetic>(1, obs, atoms);
500 auto V1 = compute_1body_ints_deriv<Operator::nuclear>(1, obs, atoms);
501 for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
502 for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
503 auto force = 2 * (T1[i] + V1[i]).cwiseProduct(D).sum();
504 F1(atom, xyz) += force;
505 }
506 }
507
509 // Pulay force
511 // orbital energy density
512 DiagonalMatrix evals_occ(evals.topRows(ndocc));
513 Matrix W = C_occ * evals_occ * C_occ.transpose();
514 auto S1 = compute_1body_ints_deriv<Operator::overlap>(1, obs, atoms);
515 for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
516 for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
517 auto force = 2 * S1[i].cwiseProduct(W).sum();
518 F_Pulay(atom, xyz) -= force;
519 }
520 }
521
522 std::cout << "** 1-body forces = ";
523 for (int atom = 0; atom != atoms.size(); ++atom)
524 for (int xyz = 0; xyz != 3; ++xyz) std::cout << F1(atom, xyz) << " ";
525 std::cout << std::endl;
526 std::cout << "** Pulay forces = ";
527 for (int atom = 0; atom != atoms.size(); ++atom)
528 for (int xyz = 0; xyz != 3; ++xyz)
529 std::cout << F_Pulay(atom, xyz) << " ";
530 std::cout << std::endl;
531#endif // LIBINT2_DERIV_ONEBODY_ORDER
532
533#if LIBINT2_DERIV_ERI_ORDER
534 // compute 2-e forces
535 Matrix F2 = Matrix::Zero(atoms.size(), 3);
536
538 // two-body contributions to the forces
540 auto G1 = compute_2body_fock_deriv<1>(obs, atoms, D);
541 for (auto atom = 0, i = 0; atom != atoms.size(); ++atom) {
542 for (auto xyz = 0; xyz != 3; ++xyz, ++i) {
543 // identity prefactor since E(HF) = trace(H + F, D) = trace(2H + G, D)
544 auto force = G1[i].cwiseProduct(D).sum();
545 F2(atom, xyz) += force;
546 }
547 }
548
549 std::cout << "** 2-body forces = ";
550 for (int atom = 0; atom != atoms.size(); ++atom)
551 for (int xyz = 0; xyz != 3; ++xyz) std::cout << F2(atom, xyz) << " ";
552 std::cout << std::endl;
553#endif
554
555// if support 1-e and 2-e derivatives compute nuclear repulsion force and the
556// total force
557#if LIBINT2_DERIV_ONEBODY_ORDER && LIBINT2_DERIV_ERI_ORDER
558 // compute nuclear repulsion forces
559 Matrix FN = Matrix::Zero(atoms.size(), 3);
561 // nuclear repulsion contribution to the forces
563 for (auto a1 = 1; a1 != atoms.size(); ++a1) {
564 const auto& atom1 = atoms[a1];
565 for (auto a2 = 0; a2 < a1; ++a2) {
566 const auto& atom2 = atoms[a2];
567
568 auto x12 = atom1.x - atom2.x;
569 auto y12 = atom1.y - atom2.y;
570 auto z12 = atom1.z - atom2.z;
571 auto r12_2 = x12 * x12 + y12 * y12 + z12 * z12;
572 auto r12 = sqrt(r12_2);
573 auto r12_3 = r12 * r12_2;
574
575 auto z1z2_over_r12_3 =
576 atom1.atomic_number * atom2.atomic_number / r12_3;
577
578 auto fx = -x12 * z1z2_over_r12_3;
579 auto fy = -y12 * z1z2_over_r12_3;
580 auto fz = -z12 * z1z2_over_r12_3;
581 FN(a1, 0) += fx;
582 FN(a1, 1) += fy;
583 FN(a1, 2) += fz;
584 FN(a2, 0) -= fx;
585 FN(a2, 1) -= fy;
586 FN(a2, 2) -= fz;
587 }
588 }
589
590 std::cout << "** nuclear repulsion forces = ";
591 for (int atom = 0; atom != atoms.size(); ++atom)
592 for (int xyz = 0; xyz != 3; ++xyz) std::cout << FN(atom, xyz) << " ";
593 std::cout << std::endl;
594
595 auto F = F1 + F_Pulay + F2 + FN;
596 std::cout << "** Hartree-Fock forces = ";
597 for (int atom = 0; atom != atoms.size(); ++atom)
598 for (int xyz = 0; xyz != 3; ++xyz) std::cout << F(atom, xyz) << " ";
599 std::cout << std::endl;
600#endif
601 }
602
603 { // compute hessian
604 const auto ncoords = 3 * atoms.size();
605 // # of elems in upper triangle
606 const auto nelem = ncoords * (ncoords+1) / 2;
607#if LIBINT2_DERIV_ONEBODY_ORDER > 1
608 // compute 1-e hessian
609 Matrix H1 = Matrix::Zero(ncoords, ncoords);
610 Matrix H_Pulay = Matrix::Zero(ncoords, ncoords);
612 // one-body contributions to the hessian
614 auto T2 = compute_1body_ints_deriv<Operator::kinetic>(2, obs, atoms);
615 auto V2 = compute_1body_ints_deriv<Operator::nuclear>(2, obs, atoms);
616 for (auto row = 0, i = 0; row != ncoords; ++row) {
617 for (auto col = row; col != ncoords; ++col, ++i) {
618 auto hess = 2 * (T2[i] + V2[i]).cwiseProduct(D).sum();
619 H1(row, col) += hess;
620 }
621 }
622
624 // Pulay hessian
626 // orbital energy density
627 DiagonalMatrix evals_occ(evals.topRows(ndocc));
628 Matrix W = C_occ * evals_occ * C_occ.transpose();
629 auto S2 = compute_1body_ints_deriv<Operator::overlap>(2, obs, atoms);
630 for (auto row = 0, i = 0; row != ncoords; ++row) {
631 for (auto col = row; col != ncoords; ++col, ++i) {
632 auto hess = 2 * S2[i].cwiseProduct(W).sum();
633 H_Pulay(row, col) -= hess;
634 }
635 }
636
637 std::cout << "** 1-body hessian = ";
638 for (auto row = 0, i = 0; row != ncoords; ++row) {
639 for (auto col = row; col != ncoords; ++col) {
640 std::cout << H1(row, col) << " ";
641 }
642 }
643 std::cout << std::endl;
644
645 std::cout << "** Pulay hessian = ";
646 for (auto row = 0, i = 0; row != ncoords; ++row) {
647 for (auto col = row; col != ncoords; ++col) {
648 std::cout << H_Pulay(row, col) << " ";
649 }
650 }
651 std::cout << std::endl;
652#endif // LIBINT2_DERIV_ONEBODY_ORDER > 1
653
654#if LIBINT2_DERIV_ERI_ORDER > 1
655 // compute 2-e forces
656 Matrix H2 = Matrix::Zero(ncoords, ncoords);
657
659 // two-body contributions to the forces
661 auto G2 = compute_2body_fock_deriv<2>(obs, atoms, D);
662 for (auto row = 0, i = 0; row != ncoords; ++row) {
663 for (auto col = row; col != ncoords; ++col, ++i) {
664 // identity prefactor since E(HF) = trace(H + F, D) = trace(2H + G, D)
665 auto hess = G2[i].cwiseProduct(D).sum();
666 H2(row, col) += hess;
667 }
668 }
669
670 std::cout << "** 2-body hessian = ";
671 for (auto row = 0, i = 0; row != ncoords; ++row) {
672 for (auto col = row; col != ncoords; ++col) {
673 std::cout << H2(row, col) << " ";
674 }
675 }
676 std::cout << std::endl;
677#endif
678
679// if support 1-e and 2-e 2nd derivatives compute nuclear repulsion hessian and
680// the total hessian
681#if LIBINT2_DERIV_ONEBODY_ORDER > 1 && LIBINT2_DERIV_ERI_ORDER > 1
682 // compute nuclear repulsion hessian
683 // NB only the upper triangle is computed!!!
684 Matrix HN = Matrix::Zero(ncoords, ncoords);
685
687 // nuclear repulsion contribution to the hessian
689 for (auto a1 = 1; a1 != atoms.size(); ++a1) {
690 const auto& atom1 = atoms[a1];
691 for (auto a2 = 0; a2 < a1; ++a2) {
692 const auto& atom2 = atoms[a2];
693
694 auto x12 = atom1.x - atom2.x;
695 auto y12 = atom1.y - atom2.y;
696 auto z12 = atom1.z - atom2.z;
697 auto x12_2 = x12 * x12;
698 auto y12_2 = y12 * y12;
699 auto z12_2 = z12 * z12;
700 auto r12_2 = x12 * x12 + y12 * y12 + z12 * z12;
701 auto r12 = sqrt(r12_2);
702 auto r12_5 = r12 * r12_2 * r12_2;
703
704 auto z1z2_over_r12_5 =
705 atom1.atomic_number * atom2.atomic_number / r12_5;
706
707 HN(3*a1 + 0, 3*a1 + 0) += z1z2_over_r12_5 * (3*x12_2 - r12_2);
708 HN(3*a1 + 1, 3*a1 + 1) += z1z2_over_r12_5 * (3*y12_2 - r12_2);
709 HN(3*a1 + 2, 3*a1 + 2) += z1z2_over_r12_5 * (3*z12_2 - r12_2);
710 HN(3*a1 + 0, 3*a1 + 1) += z1z2_over_r12_5 * (3*x12*y12);
711 HN(3*a1 + 0, 3*a1 + 2) += z1z2_over_r12_5 * (3*x12*z12);
712 HN(3*a1 + 1, 3*a1 + 2) += z1z2_over_r12_5 * (3*y12*z12);
713
714 HN(3*a2 + 0, 3*a2 + 0) += z1z2_over_r12_5 * (3*x12_2 - r12_2);
715 HN(3*a2 + 1, 3*a2 + 1) += z1z2_over_r12_5 * (3*y12_2 - r12_2);
716 HN(3*a2 + 2, 3*a2 + 2) += z1z2_over_r12_5 * (3*z12_2 - r12_2);
717 HN(3*a2 + 0, 3*a2 + 1) += z1z2_over_r12_5 * (3*x12*y12);
718 HN(3*a2 + 0, 3*a2 + 2) += z1z2_over_r12_5 * (3*x12*z12);
719 HN(3*a2 + 1, 3*a2 + 2) += z1z2_over_r12_5 * (3*y12*z12);
720
721 HN(3*a2 + 0, 3*a1 + 0) -= z1z2_over_r12_5 * (3*x12_2 - r12_2);
722 HN(3*a2 + 1, 3*a1 + 1) -= z1z2_over_r12_5 * (3*y12_2 - r12_2);
723 HN(3*a2 + 2, 3*a1 + 2) -= z1z2_over_r12_5 * (3*z12_2 - r12_2);
724 HN(3*a2 + 1, 3*a1 + 0) -= z1z2_over_r12_5 * (3*y12*x12);
725 HN(3*a2 + 2, 3*a1 + 0) -= z1z2_over_r12_5 * (3*z12*x12);
726 HN(3*a2 + 2, 3*a1 + 1) -= z1z2_over_r12_5 * (3*z12*y12);
727 HN(3*a2 + 0, 3*a1 + 1) -= z1z2_over_r12_5 * (3*x12*y12);
728 HN(3*a2 + 0, 3*a1 + 2) -= z1z2_over_r12_5 * (3*x12*z12);
729 HN(3*a2 + 1, 3*a1 + 2) -= z1z2_over_r12_5 * (3*y12*z12);
730 }
731 }
732
733 std::cout << "** nuclear repulsion hessian = ";
734 for (auto row = 0, i = 0; row != ncoords; ++row) {
735 for (auto col = row; col != ncoords; ++col) {
736 std::cout << HN(row, col) << " ";
737 }
738 }
739 std::cout << std::endl;
740
741 auto H = H1 + H_Pulay + H2 + HN;
742 std::cout << "** Hartree-Fock hessian = ";
743 for (auto row = 0, i = 0; row != ncoords; ++row) {
744 for (auto col = row; col != ncoords; ++col) {
745 std::cout << H(row, col) << " ";
746 }
747 }
748 std::cout << std::endl;
749#endif
750 }
751
752 libint2::finalize(); // done with libint
753
754 } // end of try block; if any exceptions occurred, report them and exit
755 // cleanly
756
757 catch (const char* ex) {
758 cerr << "caught exception: " << ex << endl;
759 return 1;
760 } catch (std::string& ex) {
761 cerr << "caught exception: " << ex << endl;
762 return 1;
763 } catch (std::exception& ex) {
764 cerr << ex.what() << endl;
765 return 1;
766 } catch (...) {
767 cerr << "caught unknown exception\n";
768 return 1;
769 }
770
771 return 0;
772}
773
774std::vector<libint2::Atom> read_geometry(const std::string& filename) {
775 std::cout << "Will read geometry from " << filename << std::endl;
776 std::ifstream is(filename);
777 if (not is.good()) {
778 char errmsg[256] = "Could not open file ";
779 strncpy(errmsg + 20, filename.c_str(), 235);
780 errmsg[255] = '\0';
781 throw std::ios_base::failure(errmsg);
782 }
783
784 // to prepare for MPI parallelization, we will read the entire file into a
785 // string that can be
786 // broadcast to everyone, then converted to an std::istringstream object that
787 // can be used just like std::ifstream
788 std::ostringstream oss;
789 oss << is.rdbuf();
790 // use ss.str() to get the entire contents of the file as an std::string
791 // broadcast
792 // then make an std::istringstream in each process
793 std::istringstream iss(oss.str());
794
795 // check the extension: if .xyz, assume the standard XYZ format, otherwise
796 // throw an exception
797 if (filename.rfind(".xyz") != std::string::npos)
798 return libint2::read_dotxyz(iss);
799 else
800 throw "only .xyz files are accepted";
801}
802
803// computes Superposition-Of-libint2::Atomic-Densities guess for the molecular density
804// matrix
805// in minimal basis; occupies subshells by smearing electrons evenly over the
806// orbitals
807Matrix compute_soad(const std::vector<libint2::Atom>& atoms) {
808 // compute number of atomic orbitals
809 size_t nao = 0;
810 for (const auto& atom : atoms) {
811 const auto Z = atom.atomic_number;
812 nao += libint2::sto3g_num_ao(Z);
813 }
814
815 // compute the minimal basis density
816 Matrix D = Matrix::Zero(nao, nao);
817 size_t ao_offset = 0; // first AO of this atom
818 for (const auto& atom : atoms) {
819 const auto Z = atom.atomic_number;
820 const auto& occvec = libint2::sto3g_ao_occupation_vector(Z);
821 for(const auto& occ: occvec) {
822 D(ao_offset, ao_offset) = occ;
823 ++ao_offset;
824 }
825 }
826
827 return D * 0.5; // we use densities normalized to # of electrons/2
828}
829
830Matrix compute_shellblock_norm(const BasisSet& obs, const Matrix& A) {
831 const auto nsh = obs.size();
832 Matrix Ash(nsh, nsh);
833
834 auto shell2bf = obs.shell2bf();
835 for (size_t s1 = 0; s1 != nsh; ++s1) {
836 const auto& s1_first = shell2bf[s1];
837 const auto& s1_size = obs[s1].size();
838 for (size_t s2 = 0; s2 != nsh; ++s2) {
839 const auto& s2_first = shell2bf[s2];
840 const auto& s2_size = obs[s2].size();
841
842 Ash(s1, s2) = A.block(s1_first, s2_first, s1_size, s2_size)
843 .lpNorm<Eigen::Infinity>();
844 }
845 }
846
847 return Ash;
848}
849
850template <Operator obtype, typename OperatorParams>
852 const BasisSet& obs, OperatorParams params) {
853 const auto n = obs.nbf();
854 const auto nshells = obs.size();
855 using libint2::nthreads;
857 result_type;
858 const unsigned int nopers = libint2::operator_traits<obtype>::nopers;
859 result_type result;
860 for (auto& r : result) r = Matrix::Zero(n, n);
861
862 // construct the 1-body integrals engine
863 std::vector<libint2::Engine> engines(nthreads);
864 engines[0] = libint2::Engine(obtype, obs.max_nprim(), obs.max_l(), 0);
865 // nuclear attraction ints engine needs to know where the charges sit ...
866 // the nuclei are charges in this case; in QM/MM there will also be classical
867 // charges
868 if (obtype == Operator::nuclear || obtype == Operator::erf_nuclear ||
869 obtype == Operator::erfc_nuclear) {
870 engines[0].set_params(params);
871 }
872 for (size_t i = 1; i != nthreads; ++i) {
873 engines[i] = engines[0];
874 }
875
876 auto shell2bf = obs.shell2bf();
877
878 auto compute = [&](int thread_id) {
879
880 const auto& buf = engines[thread_id].results();
881
882 // loop over unique shell pairs, {s1,s2} such that s1 >= s2
883 // this is due to the permutational symmetry of the real integrals over
884 // Hermitian operators: (1|2) = (2|1)
885 for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
886 auto bf1 = shell2bf[s1]; // first basis function in this shell
887 auto n1 = obs[s1].size();
888 auto s1_offset = s1 * (s1+1) / 2;
889
890 for(auto s2: obs_shellpair_list[s1]) {
891 auto s12 = s1_offset + s2;
892 if (s12 % nthreads != thread_id) continue;
893
894 auto bf2 = shell2bf[s2];
895 auto n2 = obs[s2].size();
896
897 auto n12 = n1 * n2;
898
899 // compute shell pair; return is the pointer to the buffer
900 engines[thread_id].compute(obs[s1], obs[s2]);
901
902 for (unsigned int op = 0; op != nopers; ++op) {
903 // "map" buffer to a const Eigen Matrix, and copy it to the
904 // corresponding blocks of the result
905 Eigen::Map<const Matrix> buf_mat(buf[op], n1, n2);
906 result[op].block(bf1, bf2, n1, n2) = buf_mat;
907 if (s1 != s2) // if s1 >= s2, copy {s1,s2} to the corresponding
908 // {s2,s1} block, note the transpose!
909 result[op].block(bf2, bf1, n2, n1) = buf_mat.transpose();
910 }
911 }
912 }
913 }; // compute lambda
914
915 libint2::parallel_do(compute);
916
917 return result;
918}
919
920#if LIBINT2_DERIV_ONEBODY_ORDER
921template <Operator obtype>
922std::vector<Matrix> compute_1body_ints_deriv(unsigned deriv_order,
923 const BasisSet& obs,
924 const std::vector<libint2::Atom>& atoms) {
925 using libint2::nthreads;
926 const auto n = obs.nbf();
927 const auto nshells = obs.size();
928 constexpr auto nopers = libint2::operator_traits<obtype>::nopers;
929 const auto nresults =
930 nopers * libint2::num_geometrical_derivatives(atoms.size(), deriv_order);
931 typedef std::vector<Matrix> result_type;
932 result_type result(nresults);
933 for (auto& r : result) r = Matrix::Zero(n, n);
934
935 // construct the 1-body integrals engine
936 std::vector<libint2::Engine> engines(nthreads);
937 engines[0] =
938 libint2::Engine(obtype, obs.max_nprim(), obs.max_l(), deriv_order);
939 // nuclear attraction ints engine needs to know where the charges sit ...
940 // the nuclei are charges in this case; in QM/MM there will also be classical
941 // charges
942 if (obtype == Operator::nuclear) {
943 std::vector<std::pair<double, std::array<double, 3>>> q;
944 for (const auto& atom : atoms) {
945 q.push_back({static_cast<double>(atom.atomic_number),
946 {{atom.x, atom.y, atom.z}}});
947 }
948 engines[0].set_params(q);
949 }
950 for (size_t i = 1; i != nthreads; ++i) {
951 engines[i] = engines[0];
952 }
953
954 auto shell2bf = obs.shell2bf();
955 auto shell2atom = obs.shell2atom(atoms);
956
957 const auto natoms = atoms.size();
958 const auto two_times_ncoords = 6*natoms;
959 const auto nderivcenters_shset =
960 2 + ((obtype == Operator::nuclear) ? natoms : 0);
961
962 auto compute = [&](int thread_id) {
963
964 const auto& buf = engines[thread_id].results();
965
966 // loop over unique shell pairs, {s1,s2} such that s1 >= s2
967 // this is due to the permutational symmetry of the real integrals over
968 // Hermitian operators: (1|2) = (2|1)
969 for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
970 auto bf1 = shell2bf[s1]; // first basis function in this shell
971 auto n1 = obs[s1].size();
972 auto atom1 = shell2atom[s1];
973 assert(atom1 != -1);
974
975 auto s1_offset = s1 * (s1+1) / 2;
976 for(auto s2: obs_shellpair_list[s1]) {
977 auto s12 = s1_offset + s2;
978 if (s12 % nthreads != thread_id) continue;
979
980 auto bf2 = shell2bf[s2];
981 auto n2 = obs[s2].size();
982 auto atom2 = shell2atom[s2];
983
984 auto n12 = n1 * n2;
985
986 // compute shell pair; return is the pointer to the buffer
987 engines[thread_id].compute(obs[s1], obs[s2]);
988
989 // "copy" lambda copies shell set \c idx to the operator matrix with
990 // index \c op
991 auto add_shellset_to_dest = [&](std::size_t op, std::size_t idx,
992 double scale = 1.0) {
993 // "map" buffer to a const Eigen Matrix, and copy it to the
994 // corresponding blocks of the result
995 Eigen::Map<const Matrix> buf_mat(buf[idx], n1, n2);
996 if (scale == 1.0)
997 result[op].block(bf1, bf2, n1, n2) += buf_mat;
998 else
999 result[op].block(bf1, bf2, n1, n2) += scale * buf_mat;
1000 if (s1 != s2) { // if s1 >= s2, copy {s1,s2} to the corresponding
1001 // {s2,s1} block, note the transpose!
1002 if (scale == 1.0)
1003 result[op].block(bf2, bf1, n2, n1) += buf_mat.transpose();
1004 else
1005 result[op].block(bf2, bf1, n2, n1) += scale * buf_mat.transpose();
1006 }
1007 };
1008
1009 switch (deriv_order) {
1010 case 0:
1011 for (std::size_t op = 0; op != nopers; ++op) {
1012 add_shellset_to_dest(op, op);
1013 }
1014 break;
1015
1016 // map deriv quanta for this shell pair to the overall deriv quanta
1017 //
1018 // easiest to explain with example:
1019 // in sto-3g water shells 0 1 2 sit on atom 0, shells 3 and 4 on atoms
1020 // 1 and 2 respectively
1021 // each call to engine::compute for nuclear ints will return
1022 // derivatives
1023 // with respect to 15 coordinates, obtained as 3 (x,y,z) times 2 + 3 =
1024 // 5 centers
1025 // (2 centers on which shells sit + 3 nuclear charges)
1026 // (for overlap, kinetic, and emultipole ints we there are only 6
1027 // coordinates
1028 // since the operator is coordinate-independent, or derivatives with
1029 // respect to
1030 // the operator coordinates are not computed)
1031 //
1032
1033 case 1: {
1034 std::size_t shellset_idx = 0;
1035 for (auto c = 0; c != nderivcenters_shset; ++c) {
1036 auto atom = (c == 0) ? atom1 : ((c == 1) ? atom2 : c - 2);
1037 auto op_start = 3 * atom * nopers;
1038 auto op_fence = op_start + nopers;
1039 for (auto xyz = 0; xyz != 3;
1040 ++xyz, op_start += nopers, op_fence += nopers) {
1041 for (unsigned int op = op_start; op != op_fence;
1042 ++op, ++shellset_idx) {
1043 add_shellset_to_dest(op, shellset_idx);
1044 }
1045 }
1046 }
1047 } break;
1048
1049 case 2: {
1050 //
1051 // must pay attention to symmetry when computing 2nd and higher-order derivs
1052 // e.g. d2 (s1|s2) / dX dY involves several cases:
1053 // 1. only s1 (or only s2) depends on X AND Y (i.e. X and Y refer to same atom) =>
1054 // d2 (s1|s2) / dX dY = (d2 s1 / dX dY | s2)
1055 // 2. s1 depends on X only, s2 depends on Y only (or vice versa) =>
1056 // d2 (s1|s2) / dX dY = (d s1 / dX | d s2 / dY)
1057 // 3. s1 AND s2 depend on X AND Y (i.e. X and Y refer to same atom) =>
1058 // case A: X != Y
1059 // d2 (s1|s2) / dX dY = (d2 s1 / dX dY | s2) + (d s1 / dX | d s2 / dY)
1060 // + (d s1 / dY | d s2 / dX) + (s1| d2 s2 / dX dY )
1061 // case B: X == Y
1062 // d2 (s1|s2) / dX2 = (d2 s1 / dX2 | s2) + 2 (d s1 / dX | d s2 / dX)
1063 // + (s1| d2 s2 / dX2 )
1064
1065 // computes upper triangle index
1066 // n2 = matrix size times 2
1067 // i,j = (unordered) indices
1068 auto upper_triangle_index = [](size_t n2, long i, long j) {
1069 return std::min(i, j) * (n2 - std::min(i, j) - 1) / 2 + std::max(i, j);
1070 };
1071
1072 // look over shellsets in the order in which they appear
1073 std::size_t shellset_idx = 0;
1074 for (auto c1 = 0; c1 != nderivcenters_shset; ++c1) {
1075 auto a1 = (c1 == 0) ? atom1 : ((c1 == 1) ? atom2 : c1 - 2);
1076 auto coord1 = 3 * a1;
1077 for (auto xyz1 = 0; xyz1 != 3; ++xyz1, ++coord1) {
1078
1079 for (auto c2 = c1; c2 != nderivcenters_shset; ++c2) {
1080 auto a2 = (c2 == 0) ? atom1 : ((c2 == 1) ? atom2 : c2 - 2);
1081 auto xyz2_start = (c1 == c2) ? xyz1 : 0;
1082 auto coord2 = 3 * a2 + xyz2_start;
1083 for (auto xyz2 = xyz2_start; xyz2 != 3;
1084 ++xyz2, ++coord2) {
1085
1086 double scale = (coord1 == coord2 && c1 != c2) ? 2.0 : 1.0;
1087
1088 const auto coord12 =
1089 upper_triangle_index(two_times_ncoords, coord1, coord2);
1090 auto op_start = coord12 * nopers;
1091 auto op_fence = op_start + nopers;
1092 for (auto op = op_start; op != op_fence;
1093 ++op, ++shellset_idx) {
1094 add_shellset_to_dest(op, shellset_idx, scale);
1095 }
1096 }
1097 }
1098 }
1099 }
1100 } break;
1101
1102 default: {
1103 assert(false && "not yet implemented");
1104
1105 using ShellSetDerivIterator =
1107 std::vector<unsigned int>>;
1108 ShellSetDerivIterator shellset_diter(deriv_order,
1109 nderivcenters_shset);
1110 while (shellset_diter) {
1111 const auto& deriv = *shellset_diter;
1112 }
1113 }
1114 } // copy shell block switch
1115
1116 } // s2 <= s1
1117 } // s1
1118 }; // compute lambda
1119
1120 libint2::parallel_do(compute);
1121
1122 return result;
1123}
1124#endif
1125
1126template <libint2::Operator Kernel>
1127Matrix compute_schwarz_ints(
1128 const BasisSet& bs1, const BasisSet& _bs2, bool use_2norm,
1129 typename libint2::operator_traits<Kernel>::oper_params_type params) {
1130 const BasisSet& bs2 = (_bs2.empty() ? bs1 : _bs2);
1131 const auto nsh1 = bs1.size();
1132 const auto nsh2 = bs2.size();
1133 const auto bs1_equiv_bs2 = (&bs1 == &bs2);
1134
1135 Matrix K = Matrix::Zero(nsh1, nsh2);
1136
1137 // construct the 2-electron repulsion integrals engine
1138 using libint2::Engine;
1139 using libint2::nthreads;
1140 std::vector<Engine> engines(nthreads);
1141
1142 // !!! very important: cannot screen primitives in Schwarz computation !!!
1143 auto epsilon = 0.;
1144 engines[0] = Engine(Kernel, std::max(bs1.max_nprim(), bs2.max_nprim()),
1145 std::max(bs1.max_l(), bs2.max_l()), 0, epsilon, params);
1146 for (size_t i = 1; i != nthreads; ++i) {
1147 engines[i] = engines[0];
1148 }
1149
1150 std::cout << "computing Schwarz bound prerequisites (kernel=" << (int)Kernel
1151 << ") ... ";
1152
1153 libint2::Timers<1> timer;
1154 timer.set_now_overhead(25);
1155 timer.start(0);
1156
1157 auto compute = [&](int thread_id) {
1158
1159 const auto& buf = engines[thread_id].results();
1160
1161 // loop over permutationally-unique set of shells
1162 for (auto s1 = 0l, s12 = 0l; s1 != nsh1; ++s1) {
1163 auto n1 = bs1[s1].size(); // number of basis functions in this shell
1164
1165 auto s2_max = bs1_equiv_bs2 ? s1 : nsh2 - 1;
1166 for (auto s2 = 0; s2 <= s2_max; ++s2, ++s12) {
1167 if (s12 % nthreads != thread_id) continue;
1168
1169 auto n2 = bs2[s2].size();
1170 auto n12 = n1 * n2;
1171
1172 engines[thread_id].compute2<Kernel, BraKet::xx_xx, 0>(bs1[s1], bs2[s2],
1173 bs1[s1], bs2[s2]);
1174 assert(buf[0] != nullptr &&
1175 "to compute Schwarz ints turn off primitive screening");
1176
1177 // the diagonal elements are the Schwarz ints ... use Map.diagonal()
1178 Eigen::Map<const Matrix> buf_mat(buf[0], n12, n12);
1179 auto norm2 = use_2norm ? buf_mat.norm()
1180 : buf_mat.lpNorm<Eigen::Infinity>();
1181 K(s1, s2) = std::sqrt(norm2);
1182 if (bs1_equiv_bs2) K(s2, s1) = K(s1, s2);
1183 }
1184 }
1185 }; // thread lambda
1186
1187 libint2::parallel_do(compute);
1188
1189 timer.stop(0);
1190 std::cout << "done (" << timer.read(0) << " s)" << std::endl;
1191
1192 return K;
1193}
1194
1195Matrix compute_do_ints(const BasisSet& bs1, const BasisSet& bs2,
1196 bool use_2norm) {
1197 return compute_schwarz_ints<libint2::Operator::delta>(bs1, bs2, use_2norm);
1198}
1199
1200shellpair_list_t compute_shellpair_list(const BasisSet& bs1,
1201 const BasisSet& _bs2,
1202 const double threshold) {
1203 const BasisSet& bs2 = (_bs2.empty() ? bs1 : _bs2);
1204 const auto nsh1 = bs1.size();
1205 const auto nsh2 = bs2.size();
1206 const auto bs1_equiv_bs2 = (&bs1 == &bs2);
1207
1208 using libint2::nthreads;
1209
1210 // construct the 2-electron repulsion integrals engine
1211 using libint2::Engine;
1212 std::vector<Engine> engines;
1213 engines.reserve(nthreads);
1214 engines.emplace_back(Operator::overlap,
1215 std::max(bs1.max_nprim(), bs2.max_nprim()),
1216 std::max(bs1.max_l(), bs2.max_l()), 0);
1217 for (size_t i = 1; i != nthreads; ++i) {
1218 engines.push_back(engines[0]);
1219 }
1220
1221 std::cout << "computing non-negligible shell-pair list ... ";
1222
1223 libint2::Timers<1> timer;
1224 timer.set_now_overhead(25);
1225 timer.start(0);
1226
1227 shellpair_list_t result;
1228
1229 std::mutex mx;
1230
1231 auto compute = [&](int thread_id) {
1232
1233 auto& engine = engines[thread_id];
1234 const auto& buf = engine.results();
1235
1236 // loop over permutationally-unique set of shells
1237 for (auto s1 = 0l, s12 = 0l; s1 != nsh1; ++s1) {
1238 mx.lock();
1239 if (result.find(s1) == result.end())
1240 result.insert(std::make_pair(s1, std::vector<size_t>()));
1241 mx.unlock();
1242
1243 auto n1 = bs1[s1].size(); // number of basis functions in this shell
1244
1245 auto s2_max = bs1_equiv_bs2 ? s1 : nsh2 - 1;
1246 for (auto s2 = 0; s2 <= s2_max; ++s2, ++s12) {
1247 if (s12 % nthreads != thread_id) continue;
1248
1249 auto on_same_center = (bs1[s1].O == bs2[s2].O);
1250 bool significant = on_same_center;
1251 if (not on_same_center) {
1252 auto n2 = bs2[s2].size();
1253 engines[thread_id].compute(bs1[s1], bs2[s2]);
1254 Eigen::Map<const Matrix> buf_mat(buf[0], n1, n2);
1255 auto norm = buf_mat.norm();
1256 significant = (norm >= threshold);
1257 }
1258
1259 if (significant) {
1260 mx.lock();
1261 result[s1].emplace_back(s2);
1262 mx.unlock();
1263 }
1264 }
1265 }
1266 }; // end of compute
1267
1268 libint2::parallel_do(compute);
1269
1270 // resort shell list in increasing order, i.e. result[s][s1] < result[s][s2]
1271 // if s1 < s2
1272 auto sort = [&](int thread_id) {
1273 for (auto s1 = 0l; s1 != nsh1; ++s1) {
1274 if (s1 % nthreads == thread_id) {
1275 auto& list = result[s1];
1276 std::sort(list.begin(), list.end());
1277 }
1278 }
1279 }; // end of sort
1280
1282
1283 timer.stop(0);
1284 std::cout << "done (" << timer.read(0) << " s)" << std::endl;
1285
1286 return result;
1287}
1288
1289// returns {X,X^{-1},rank,A_condition_number,result_A_condition_number}, where
1290// X is the generalized square-root-inverse such that X.transpose() * A * X = I
1291//
1292// if symmetric is true, produce "symmetric" sqrtinv: X = U . A_evals_sqrtinv .
1293// U.transpose()),
1294// else produce "canonical" sqrtinv: X = U . A_evals_sqrtinv
1295// where U are eigenvectors of A
1296// rows and cols of symmetric X are equivalent; for canonical X the rows are
1297// original basis (AO),
1298// cols are transformed basis ("orthogonal" AO)
1299//
1300// A is conditioned to max_condition_number
1301std::tuple<Matrix, Matrix, size_t, double, double> gensqrtinv(
1302 const Matrix& S, bool symmetric = false,
1303 double max_condition_number = 1e8) {
1304 Eigen::SelfAdjointEigenSolver<Matrix> eig_solver(S);
1305 auto U = eig_solver.eigenvectors();
1306 auto s = eig_solver.eigenvalues();
1307 auto s_max = s.maxCoeff();
1308 auto condition_number = std::min(
1309 s_max / std::max(s.minCoeff(), std::numeric_limits<double>::min()),
1310 1.0 / std::numeric_limits<double>::epsilon());
1311 auto threshold = s_max / max_condition_number;
1312 long n = s.rows();
1313 long n_cond = 0;
1314 for (long i = n - 1; i >= 0; --i) {
1315 if (s(i) >= threshold) {
1316 ++n_cond;
1317 } else
1318 i = 0; // skip rest since eigenvalues are in ascending order
1319 }
1320
1321 auto sigma = s.bottomRows(n_cond);
1322 auto result_condition_number = sigma.maxCoeff() / sigma.minCoeff();
1323 auto sigma_sqrt = sigma.array().sqrt().matrix().asDiagonal();
1324 auto sigma_invsqrt = sigma.array().sqrt().inverse().matrix().asDiagonal();
1325
1326 // make canonical X/Xinv
1327 auto U_cond = U.block(0, n - n_cond, n, n_cond);
1328 Matrix X = U_cond * sigma_invsqrt;
1329 Matrix Xinv = U_cond * sigma_sqrt;
1330 // convert to symmetric, if needed
1331 if (symmetric) {
1332 X = X * U_cond.transpose();
1333 Xinv = Xinv * U_cond.transpose();
1334 }
1335 return std::make_tuple(X, Xinv, size_t(n_cond), condition_number,
1336 result_condition_number);
1337}
1338
1339std::tuple<Matrix, Matrix, double> conditioning_orthogonalizer(
1340 const Matrix& S, double S_condition_number_threshold) {
1341 size_t obs_rank;
1342 double S_condition_number;
1343 double XtX_condition_number;
1344 Matrix X, Xinv;
1345
1346 assert(S.rows() == S.cols());
1347
1348 std::tie(X, Xinv, obs_rank, S_condition_number, XtX_condition_number) =
1349 gensqrtinv(S, false, S_condition_number_threshold);
1350 auto obs_nbf_omitted = (long)S.rows() - (long)obs_rank;
1351 std::cout << "overlap condition number = " << S_condition_number;
1352 if (obs_nbf_omitted > 0)
1353 std::cout << " (dropped " << obs_nbf_omitted << " "
1354 << (obs_nbf_omitted > 1 ? "fns" : "fn") << " to reduce to "
1355 << XtX_condition_number << ")";
1356 std::cout << std::endl;
1357
1358 if (obs_nbf_omitted > 0) {
1359 Matrix should_be_I = X.transpose() * S * X;
1360 Matrix I = Matrix::Identity(should_be_I.rows(), should_be_I.cols());
1361 std::cout << "||X^t * S * X - I||_2 = " << (should_be_I - I).norm()
1362 << " (should be 0)" << std::endl;
1363 }
1364
1365 return std::make_tuple(X, Xinv, XtX_condition_number);
1366}
1367
1368Matrix compute_2body_2index_ints(const BasisSet& bs) {
1369 using libint2::nthreads;
1370 const auto n = bs.nbf();
1371 const auto nshells = bs.size();
1372 Matrix result = Matrix::Zero(n, n);
1373
1374 // build engines for each thread
1375 using libint2::Engine;
1376 std::vector<Engine> engines(nthreads);
1377 engines[0] =
1378 Engine(libint2::Operator::coulomb, bs.max_nprim(), bs.max_l(), 0);
1379 engines[0].set(BraKet::xs_xs);
1380 for (size_t i = 1; i != nthreads; ++i) {
1381 engines[i] = engines[0];
1382 }
1383
1384 auto shell2bf = bs.shell2bf();
1385 auto unitshell = Shell::unit();
1386
1387 auto compute = [&](int thread_id) {
1388
1389 auto& engine = engines[thread_id];
1390 const auto& buf = engine.results();
1391
1392 // loop over unique shell pairs, {s1,s2} such that s1 >= s2
1393 // this is due to the permutational symmetry of the real integrals over
1394 // Hermitian operators: (1|2) = (2|1)
1395 for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) {
1396 auto bf1 = shell2bf[s1]; // first basis function in this shell
1397 auto n1 = bs[s1].size();
1398
1399 for (auto s2 = 0; s2 <= s1; ++s2, ++s12) {
1400 if (s12 % nthreads != thread_id) continue;
1401
1402 auto bf2 = shell2bf[s2];
1403 auto n2 = bs[s2].size();
1404
1405 // compute shell pair; return is the pointer to the buffer
1406 engine.compute(bs[s1], bs[s2]);
1407 if (buf[0] == nullptr)
1408 continue; // if all integrals screened out, skip to next shell set
1409
1410 // "map" buffer to a const Eigen Matrix, and copy it to the
1411 // corresponding blocks of the result
1412 Eigen::Map<const Matrix> buf_mat(buf[0], n1, n2);
1413 result.block(bf1, bf2, n1, n2) = buf_mat;
1414 if (s1 != s2) // if s1 >= s2, copy {s1,s2} to the corresponding {s2,s1}
1415 // block, note the transpose!
1416 result.block(bf2, bf1, n2, n1) = buf_mat.transpose();
1417 }
1418 }
1419 }; // compute lambda
1420
1421 libint2::parallel_do(compute);
1422
1423 return result;
1424}
1425
1426Matrix compute_2body_fock(const BasisSet& obs, const Matrix& D,
1427 double precision, const Matrix& Schwarz) {
1428 const auto n = obs.nbf();
1429 const auto nshells = obs.size();
1430 using libint2::nthreads;
1431 std::vector<Matrix> G(nthreads, Matrix::Zero(n, n));
1432
1433 const auto do_schwarz_screen = Schwarz.cols() != 0 && Schwarz.rows() != 0;
1434 Matrix D_shblk_norm =
1435 compute_shellblock_norm(obs, D); // matrix of infty-norms of shell blocks
1436
1437 auto fock_precision = precision;
1438 // engine precision controls primitive truncation, assume worst-case scenario
1439 // (all primitive combinations add up constructively)
1440 auto max_nprim = obs.max_nprim();
1441 auto max_nprim4 = max_nprim * max_nprim * max_nprim * max_nprim;
1442 auto engine_precision = std::min(fock_precision / D_shblk_norm.maxCoeff(),
1443 std::numeric_limits<double>::epsilon()) /
1444 max_nprim4;
1445
1446 // construct the 2-electron repulsion integrals engine pool
1447 using libint2::Engine;
1448 std::vector<Engine> engines(nthreads);
1449 engines[0] = Engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 0);
1450 engines[0].set_precision(engine_precision); // shellset-dependent precision
1451 // control will likely break
1452 // positive definiteness
1453 // stick with this simple recipe
1454 std::cout << "compute_2body_fock:precision = " << precision << std::endl;
1455 std::cout << "Engine::precision = " << engines[0].precision() << std::endl;
1456 for (size_t i = 1; i != nthreads; ++i) {
1457 engines[i] = engines[0];
1458 }
1459 std::atomic<size_t> num_ints_computed{0};
1460
1461#if defined(REPORT_INTEGRAL_TIMINGS)
1462 std::vector<libint2::Timers<1>> timers(nthreads);
1463#endif
1464
1465 auto shell2bf = obs.shell2bf();
1466
1467 auto lambda = [&](int thread_id) {
1468
1469 auto& engine = engines[thread_id];
1470 auto& g = G[thread_id];
1471 const auto& buf = engine.results();
1472
1473#if defined(REPORT_INTEGRAL_TIMINGS)
1474 auto& timer = timers[thread_id];
1475 timer.clear();
1476 timer.set_now_overhead(25);
1477#endif
1478
1479 // loop over permutationally-unique set of shells
1480 for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1481 auto bf1_first = shell2bf[s1]; // first basis function in this shell
1482 auto n1 = obs[s1].size(); // number of basis functions in this shell
1483
1484 for (const auto& s2 : obs_shellpair_list[s1]) {
1485 auto bf2_first = shell2bf[s2];
1486 auto n2 = obs[s2].size();
1487
1488 const auto Dnorm12 = do_schwarz_screen ? D_shblk_norm(s1, s2) : 0.;
1489
1490 for (auto s3 = 0; s3 <= s1; ++s3) {
1491 auto bf3_first = shell2bf[s3];
1492 auto n3 = obs[s3].size();
1493
1494 const auto Dnorm123 =
1495 do_schwarz_screen
1496 ? std::max(D_shblk_norm(s1, s3),
1497 std::max(D_shblk_norm(s2, s3), Dnorm12))
1498 : 0.;
1499
1500 const auto s4_max = (s1 == s3) ? s2 : s3;
1501 for (const auto& s4 : obs_shellpair_list[s3]) {
1502 if (s4 > s4_max)
1503 break; // for each s3, s4 are stored in monotonically increasing
1504 // order
1505
1506 if ((s1234++) % nthreads != thread_id) continue;
1507
1508 const auto Dnorm1234 =
1509 do_schwarz_screen
1510 ? std::max(
1511 D_shblk_norm(s1, s4),
1512 std::max(D_shblk_norm(s2, s4),
1513 std::max(D_shblk_norm(s3, s4), Dnorm123)))
1514 : 0.;
1515
1516 if (do_schwarz_screen &&
1517 Dnorm1234 * Schwarz(s1, s2) * Schwarz(s3, s4) <
1518 fock_precision)
1519 continue;
1520
1521 auto bf4_first = shell2bf[s4];
1522 auto n4 = obs[s4].size();
1523
1524 num_ints_computed += n1 * n2 * n3 * n4;
1525
1526 // compute the permutational degeneracy (i.e. # of equivalents) of
1527 // the given shell set
1528 auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1529 auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1530 auto s12_34_deg = (s1 == s3) ? (s2 == s4 ? 1.0 : 2.0) : 2.0;
1531 auto s1234_deg = s12_deg * s34_deg * s12_34_deg;
1532
1533#if defined(REPORT_INTEGRAL_TIMINGS)
1534 timer.start(0);
1535#endif
1536
1537 engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1538 obs[s1], obs[s2], obs[s3], obs[s4]);
1539 const auto* buf_1234 = buf[0];
1540 if (buf_1234 == nullptr)
1541 continue; // if all integrals screened out, skip to next quartet
1542
1543#if defined(REPORT_INTEGRAL_TIMINGS)
1544 timer.stop(0);
1545#endif
1546
1547 Eigen::Map<MatrixXd> buf_1234_map(buf_1234, n12, n34);
1548 assert(buf_1234_map.norm() < Schwarz(s1, s2) * Schwarz(s3, s4));
1549
1550 // 1) each shell set of integrals contributes up to 6 shell sets of
1551 // the Fock matrix:
1552 // F(a,b) += (ab|cd) * D(c,d)
1553 // F(c,d) += (ab|cd) * D(a,b)
1554 // F(b,d) -= 1/4 * (ab|cd) * D(a,c)
1555 // F(b,c) -= 1/4 * (ab|cd) * D(a,d)
1556 // F(a,c) -= 1/4 * (ab|cd) * D(b,d)
1557 // F(a,d) -= 1/4 * (ab|cd) * D(b,c)
1558 // 2) each permutationally-unique integral (shell set) must be
1559 // scaled by its degeneracy,
1560 // i.e. the number of the integrals/sets equivalent to it
1561 // 3) the end result must be symmetrized
1562 for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1563 const auto bf1 = f1 + bf1_first;
1564 for (auto f2 = 0; f2 != n2; ++f2) {
1565 const auto bf2 = f2 + bf2_first;
1566 for (auto f3 = 0; f3 != n3; ++f3) {
1567 const auto bf3 = f3 + bf3_first;
1568 for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1569 const auto bf4 = f4 + bf4_first;
1570
1571 const auto value = buf_1234[f1234];
1572
1573 const auto value_scal_by_deg = value * s1234_deg;
1574
1575 g(bf1, bf2) += D(bf3, bf4) * value_scal_by_deg;
1576 g(bf3, bf4) += D(bf1, bf2) * value_scal_by_deg;
1577 g(bf1, bf3) -= 0.25 * D(bf2, bf4) * value_scal_by_deg;
1578 g(bf2, bf4) -= 0.25 * D(bf1, bf3) * value_scal_by_deg;
1579 g(bf1, bf4) -= 0.25 * D(bf2, bf3) * value_scal_by_deg;
1580 g(bf2, bf3) -= 0.25 * D(bf1, bf4) * value_scal_by_deg;
1581 }
1582 }
1583 }
1584 }
1585 }
1586 }
1587 }
1588 }
1589
1590 }; // end of lambda
1591
1592 libint2::parallel_do(lambda);
1593
1594 // accumulate contributions from all threads
1595 for (size_t i = 1; i != nthreads; ++i) {
1596 G[0] += G[i];
1597 }
1598
1599#if defined(REPORT_INTEGRAL_TIMINGS)
1600 double time_for_ints = 0.0;
1601 for (auto& t : timers) {
1602 time_for_ints += t.read(0);
1603 }
1604 std::cout << "time for integrals = " << time_for_ints << std::endl;
1605 for (int t = 0; t != nthreads; ++t) engines[t].print_timers();
1606#endif
1607
1608 Matrix GG = 0.5 * (G[0] + G[0].transpose());
1609
1610 std::cout << "# of integrals = " << num_ints_computed << std::endl;
1611
1612 // symmetrize the result and return
1613 return GG;
1614}
1615
1616#if LIBINT2_DERIV_ERI_ORDER
1617template <unsigned deriv_order>
1618std::vector<Matrix> compute_2body_fock_deriv(const BasisSet& obs,
1619 const std::vector<libint2::Atom>& atoms,
1620 const Matrix& D, double precision,
1621 const Matrix& Schwarz) {
1622 const auto n = obs.nbf();
1623 const auto nshells = obs.size();
1624 const auto nderiv_shellset =
1625 libint2::num_geometrical_derivatives(4, deriv_order); // # of derivs for each shell quartet
1626 const auto nderiv = libint2::num_geometrical_derivatives(
1627 atoms.size(), deriv_order); // total # of derivs
1628 const auto ncoords_times_two = (atoms.size() * 3) * 2;
1629 using libint2::nthreads;
1630 std::vector<Matrix> G(nthreads * nderiv, Matrix::Zero(n, n));
1631
1632 const auto do_schwarz_screen = Schwarz.cols() != 0 && Schwarz.rows() != 0;
1633 Matrix D_shblk_norm =
1634 compute_shellblock_norm(obs, D); // matrix of infty-norms of shell blocks
1635
1636 auto fock_precision = precision;
1637 // engine precision controls primitive truncation, assume worst-case scenario
1638 // (all primitive combinations add up constructively)
1639 auto max_nprim = obs.max_nprim();
1640 auto max_nprim4 = max_nprim * max_nprim * max_nprim * max_nprim;
1641 auto engine_precision = std::min(fock_precision / D_shblk_norm.maxCoeff(),
1642 std::numeric_limits<double>::epsilon()) /
1643 max_nprim4;
1644
1645 // construct the 2-electron repulsion integrals engine pool
1646 using libint2::Engine;
1647 std::vector<Engine> engines(nthreads);
1648 engines[0] =
1649 Engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), deriv_order);
1650 engines[0].set_precision(engine_precision); // shellset-dependent precision
1651 // control will likely break
1652 // positive definiteness
1653 // stick with this simple recipe
1654 std::cout << "compute_2body_fock:precision = " << precision << std::endl;
1655 std::cout << "Engine::precision = " << engines[0].precision() << std::endl;
1656 for (size_t i = 1; i != nthreads; ++i) {
1657 engines[i] = engines[0];
1658 }
1659 std::atomic<size_t> num_ints_computed{0};
1660
1661#if defined(REPORT_INTEGRAL_TIMINGS)
1662 std::vector<libint2::Timers<1>> timers(nthreads);
1663#endif
1664
1665 auto shell2bf = obs.shell2bf();
1666 auto shell2atom = obs.shell2atom(atoms);
1667
1668 auto lambda = [&](int thread_id) {
1669
1670 auto& engine = engines[thread_id];
1671 const auto& buf = engine.results();
1672
1673#if defined(REPORT_INTEGRAL_TIMINGS)
1674 auto& timer = timers[thread_id];
1675 timer.clear();
1676 timer.set_now_overhead(25);
1677#endif
1678
1679 size_t shell_atoms[4];
1680
1681 // loop over permutationally-unique set of shells
1682 for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1683 auto bf1_first = shell2bf[s1]; // first basis function in this shell
1684 auto n1 = obs[s1].size(); // number of basis functions in this shell
1685 shell_atoms[0] = shell2atom[s1];
1686
1687 for (const auto& s2 : obs_shellpair_list[s1]) {
1688 auto bf2_first = shell2bf[s2];
1689 auto n2 = obs[s2].size();
1690 shell_atoms[1] = shell2atom[s2];
1691
1692 const auto Dnorm12 = do_schwarz_screen ? D_shblk_norm(s1, s2) : 0.;
1693
1694 for (auto s3 = 0; s3 <= s1; ++s3) {
1695 auto bf3_first = shell2bf[s3];
1696 auto n3 = obs[s3].size();
1697 shell_atoms[2] = shell2atom[s3];
1698
1699 const auto Dnorm123 =
1700 do_schwarz_screen
1701 ? std::max(D_shblk_norm(s1, s3),
1702 std::max(D_shblk_norm(s2, s3), Dnorm12))
1703 : 0.;
1704
1705 const auto s4_max = (s1 == s3) ? s2 : s3;
1706 for (const auto& s4 : obs_shellpair_list[s3]) {
1707 if (s4 > s4_max)
1708 break; // for each s3, s4 are stored in monotonically increasing
1709 // order
1710
1711 if ((s1234++) % nthreads != thread_id) continue;
1712
1713 const auto Dnorm1234 =
1714 do_schwarz_screen
1715 ? std::max(
1716 D_shblk_norm(s1, s4),
1717 std::max(D_shblk_norm(s2, s4),
1718 std::max(D_shblk_norm(s3, s4), Dnorm123)))
1719 : 0.;
1720
1721 if (do_schwarz_screen &&
1722 Dnorm1234 * Schwarz(s1, s2) * Schwarz(s3, s4) <
1723 fock_precision)
1724 continue;
1725
1726 auto bf4_first = shell2bf[s4];
1727 auto n4 = obs[s4].size();
1728 shell_atoms[3] = shell2atom[s4];
1729
1730 const auto n1234 = n1 * n2 * n3 * n4;
1731
1732 // compute the permutational degeneracy (i.e. # of equivalents) of
1733 // the given shell set
1734 auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1735 auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1736 auto s12_34_deg = (s1 == s3) ? (s2 == s4 ? 1.0 : 2.0) : 2.0;
1737 auto s1234_deg = s12_deg * s34_deg * s12_34_deg;
1738
1739 // computes contribution from shell set \c idx to the operator matrix with
1740 // index \c op
1741 auto add_shellset_to_dest = [&](
1742 std::size_t op, std::size_t idx, int coord1, int coord2, double scale = 1.0) {
1743 auto& g = G[op];
1744 auto shset = buf[idx];
1745 const auto weight = scale * s1234_deg;
1746
1747 for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1748 const auto bf1 = f1 + bf1_first;
1749 for (auto f2 = 0; f2 != n2; ++f2) {
1750 const auto bf2 = f2 + bf2_first;
1751 for (auto f3 = 0; f3 != n3; ++f3) {
1752 const auto bf3 = f3 + bf3_first;
1753 for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1754 const auto bf4 = f4 + bf4_first;
1755
1756 const auto value = shset[f1234];
1757 const auto wvalue = value * weight;
1758
1759 g(bf1, bf2) += D(bf3, bf4) * wvalue;
1760 g(bf3, bf4) += D(bf1, bf2) * wvalue;
1761 g(bf1, bf3) -= 0.25 * D(bf2, bf4) * wvalue;
1762 g(bf2, bf4) -= 0.25 * D(bf1, bf3) * wvalue;
1763 g(bf1, bf4) -= 0.25 * D(bf2, bf3) * wvalue;
1764 g(bf2, bf3) -= 0.25 * D(bf1, bf4) * wvalue;
1765 }
1766 }
1767 }
1768 }
1769 };
1770
1771#if defined(REPORT_INTEGRAL_TIMINGS)
1772 timer.start(0);
1773#endif
1774
1775 engine.compute2<Operator::coulomb, BraKet::xx_xx, deriv_order>(
1776 obs[s1], obs[s2], obs[s3], obs[s4]);
1777 if (buf[0] == nullptr)
1778 continue; // if all integrals screened out, skip to next quartet
1779 num_ints_computed += nderiv_shellset * n1234;
1780
1781#if defined(REPORT_INTEGRAL_TIMINGS)
1782 timer.stop(0);
1783#endif
1784
1785 switch (deriv_order) {
1786 case 0: {
1787 int coord1 = 0, coord2 = 0;
1788 add_shellset_to_dest(thread_id, 0, coord1, coord2);
1789 } break;
1790
1791 case 1: {
1792 for (auto d = 0; d != 12; ++d) {
1793 const int a = d / 3;
1794 const int xyz = d % 3;
1795
1796 auto coord = shell_atoms[a] * 3 + xyz;
1797 auto& g = G[thread_id * nderiv + coord];
1798
1799 int coord1 = 0, coord2 = 0;
1800
1801 add_shellset_to_dest(thread_id * nderiv + coord, d, coord1, coord2);
1802
1803 } // d \in [0,12)
1804 } break;
1805
1806 case 2: {
1807 // computes upper triangle index
1808 // n2 = matrix size times 2
1809 // i,j = (unordered) indices
1810 auto upper_triangle_index = [](size_t n2, size_t i, size_t j) {
1811 return std::min(i, j) * (n2 - std::min(i, j) - 1) / 2 + std::max(i, j);
1812 };
1813 // look over shellsets in the order in which they appear
1814 std::size_t shellset_idx = 0;
1815 for (auto c1 = 0; c1 != 4; ++c1) {
1816 auto a1 = shell_atoms[c1];
1817 auto coord1 = 3 * a1;
1818 for (auto xyz1 = 0; xyz1 != 3; ++xyz1, ++coord1) {
1819 for (auto c2 = c1; c2 != 4; ++c2) {
1820 auto a2 = shell_atoms[c2];
1821 auto xyz2_start = (c1 == c2) ? xyz1 : 0;
1822 auto coord2 = 3 * a2 + xyz2_start;
1823 for (auto xyz2 = xyz2_start; xyz2 != 3;
1824 ++xyz2, ++coord2) {
1825 double scale =
1826 (coord1 == coord2 && c1 != c2) ? 2.0 : 1.0;
1827
1828 const auto coord12 = upper_triangle_index(
1829 ncoords_times_two, coord1, coord2);
1830 auto op = thread_id * nderiv + coord12;
1831 add_shellset_to_dest(op, shellset_idx, coord1, coord2, scale);
1832 ++shellset_idx;
1833 }
1834 }
1835 }
1836 }
1837 } break;
1838
1839 default:
1840 assert(deriv_order <= 2 &&
1841 "support for 3rd and higher derivatives of the Fock "
1842 "matrix not yet implemented");
1843 }
1844 }
1845 }
1846 }
1847 }
1848
1849 }; // end of lambda
1850
1851 libint2::parallel_do(lambda);
1852
1853 // accumulate contributions from all threads
1854 for (size_t t = 1; t != nthreads; ++t) {
1855 for (auto d = 0; d != nderiv; ++d) {
1856 G[d] += G[t * nderiv + d];
1857 }
1858 }
1859
1860#if defined(REPORT_INTEGRAL_TIMINGS)
1861 double time_for_ints = 0.0;
1862 for (auto& t : timers) {
1863 time_for_ints += t.read(0);
1864 }
1865 std::cout << "time for integrals = " << time_for_ints << std::endl;
1866 for (int t = 0; t != nthreads; ++t) engines[t].print_timers();
1867#endif
1868
1869 std::vector<Matrix> GG(nderiv);
1870 for (auto d = 0; d != nderiv; ++d) {
1871 GG[d] = 0.5 * (G[d] + G[d].transpose());
1872 }
1873
1874 std::cout << "# of integrals = " << num_ints_computed << std::endl;
1875
1876 // symmetrize the result and return
1877 return GG;
1878}
1879
1880#endif
1881
1882Matrix compute_2body_fock_general(const BasisSet& obs, const Matrix& D,
1883 const BasisSet& D_bs, bool D_is_shelldiagonal,
1884 double precision) {
1885 const auto n = obs.nbf();
1886 const auto nshells = obs.size();
1887 const auto n_D = D_bs.nbf();
1888 assert(D.cols() == D.rows() && D.cols() == n_D);
1889
1890 using libint2::nthreads;
1891 std::vector<Matrix> G(nthreads, Matrix::Zero(n, n));
1892
1893 // construct the 2-electron repulsion integrals engine
1894 using libint2::Engine;
1895 std::vector<Engine> engines(nthreads);
1896 engines[0] = Engine(libint2::Operator::coulomb,
1897 std::max(obs.max_nprim(), D_bs.max_nprim()),
1898 std::max(obs.max_l(), D_bs.max_l()), 0);
1899 engines[0].set_precision(precision); // shellset-dependent precision control
1900 // will likely break positive
1901 // definiteness
1902 // stick with this simple recipe
1903 for (size_t i = 1; i != nthreads; ++i) {
1904 engines[i] = engines[0];
1905 }
1906 auto shell2bf = obs.shell2bf();
1907 auto shell2bf_D = D_bs.shell2bf();
1908
1909 auto lambda = [&](int thread_id) {
1910
1911 auto& engine = engines[thread_id];
1912 auto& g = G[thread_id];
1913 const auto& buf = engine.results();
1914
1915 // loop over permutationally-unique set of shells
1916 for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) {
1917 auto bf1_first = shell2bf[s1]; // first basis function in this shell
1918 auto n1 = obs[s1].size(); // number of basis functions in this shell
1919
1920 for (auto s2 = 0; s2 <= s1; ++s2) {
1921 auto bf2_first = shell2bf[s2];
1922 auto n2 = obs[s2].size();
1923
1924 for (auto s3 = 0; s3 < D_bs.size(); ++s3) {
1925 auto bf3_first = shell2bf_D[s3];
1926 auto n3 = D_bs[s3].size();
1927
1928 auto s4_begin = D_is_shelldiagonal ? s3 : 0;
1929 auto s4_fence = D_is_shelldiagonal ? s3 + 1 : D_bs.size();
1930
1931 for (auto s4 = s4_begin; s4 != s4_fence; ++s4, ++s1234) {
1932 if (s1234 % nthreads != thread_id) continue;
1933
1934 auto bf4_first = shell2bf_D[s4];
1935 auto n4 = D_bs[s4].size();
1936
1937 // compute the permutational degeneracy (i.e. # of equivalents) of
1938 // the given shell set
1939 auto s12_deg = (s1 == s2) ? 1.0 : 2.0;
1940
1941 if (s3 >= s4) {
1942 auto s34_deg = (s3 == s4) ? 1.0 : 2.0;
1943 auto s1234_deg = s12_deg * s34_deg;
1944 // auto s1234_deg = s12_deg;
1945 engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1946 obs[s1], obs[s2], D_bs[s3], D_bs[s4]);
1947 const auto* buf_1234 = buf[0];
1948 if (buf_1234 != nullptr) {
1949 for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) {
1950 const auto bf1 = f1 + bf1_first;
1951 for (auto f2 = 0; f2 != n2; ++f2) {
1952 const auto bf2 = f2 + bf2_first;
1953 for (auto f3 = 0; f3 != n3; ++f3) {
1954 const auto bf3 = f3 + bf3_first;
1955 for (auto f4 = 0; f4 != n4; ++f4, ++f1234) {
1956 const auto bf4 = f4 + bf4_first;
1957
1958 const auto value = buf_1234[f1234];
1959 const auto value_scal_by_deg = value * s1234_deg;
1960 g(bf1, bf2) += 2.0 * D(bf3, bf4) * value_scal_by_deg;
1961 }
1962 }
1963 }
1964 }
1965 }
1966 }
1967
1968 engine.compute2<Operator::coulomb, BraKet::xx_xx, 0>(
1969 obs[s1], D_bs[s3], obs[s2], D_bs[s4]);
1970 const auto* buf_1324 = buf[0];
1971 if (buf_1324 == nullptr)
1972 continue; // if all integrals screened out, skip to next quartet
1973
1974 for (auto f1 = 0, f1324 = 0; f1 != n1; ++f1) {
1975 const auto bf1 = f1 + bf1_first;
1976 for (auto f3 = 0; f3 != n3; ++f3) {
1977 const auto bf3 = f3 + bf3_first;
1978 for (auto f2 = 0; f2 != n2; ++f2) {
1979 const auto bf2 = f2 + bf2_first;
1980 for (auto f4 = 0; f4 != n4; ++f4, ++f1324) {
1981 const auto bf4 = f4 + bf4_first;
1982
1983 const auto value = buf_1324[f1324];
1984 const auto value_scal_by_deg = value * s12_deg;
1985 g(bf1, bf2) -= D(bf3, bf4) * value_scal_by_deg;
1986 }
1987 }
1988 }
1989 }
1990 }
1991 }
1992 }
1993 }
1994
1995 }; // thread lambda
1996
1997 libint2::parallel_do(lambda);
1998
1999 // accumulate contributions from all threads
2000 for (size_t i = 1; i != nthreads; ++i) {
2001 G[0] += G[i];
2002 }
2003
2004 // symmetrize the result and return
2005 return 0.5 * (G[0] + G[0].transpose());
2006}
2007
2008#ifdef HAVE_DENSITY_FITTING
2009
2010Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) {
2011
2012 using libint2::nthreads;
2013
2014 const auto n = obs.nbf();
2015 const auto ndf = dfbs.nbf();
2016
2017 libint2::Timers<1> wall_timer;
2018 wall_timer.set_now_overhead(25);
2019 std::vector<libint2::Timers<5>> timers(nthreads);
2020 for(auto& timer: timers) timer.set_now_overhead(25);
2021
2022 typedef btas::RangeNd<CblasRowMajor, std::array<long, 1>> Range1d;
2023 typedef btas::RangeNd<CblasRowMajor, std::array<long, 2>> Range2d;
2024 typedef btas::Tensor<double, Range1d> Tensor1d;
2025 typedef btas::Tensor<double, Range2d> Tensor2d;
2026
2027 // using first time? compute 3-center ints and transform to inv sqrt
2028 // representation
2029 if (xyK.size() == 0) {
2030
2031 wall_timer.start(0);
2032
2033 const auto nshells = obs.size();
2034 const auto nshells_df = dfbs.size();
2035 const auto& unitshell = libint2::Shell::unit();
2036
2037 // construct the 2-electron 3-center repulsion integrals engine
2038 // since the code assumes (xx|xs) braket, and Engine/libint only produces
2039 // (xs|xx), use 4-center engine
2040 std::vector<libint2::Engine> engines(nthreads);
2041 engines[0] = libint2::Engine(libint2::Operator::coulomb,
2042 std::max(obs.max_nprim(), dfbs.max_nprim()),
2043 std::max(obs.max_l(), dfbs.max_l()), 0);
2044 engines[0].set(BraKet::xs_xx);
2045 for (size_t i = 1; i != nthreads; ++i) {
2046 engines[i] = engines[0];
2047 }
2048
2049 auto shell2bf = obs.shell2bf();
2050 auto shell2bf_df = dfbs.shell2bf();
2051
2052 Tensor3d Zxy{ndf, n, n};
2053
2054 auto lambda = [&](int thread_id) {
2055
2056 auto& engine = engines[thread_id];
2057 auto& timer = timers[thread_id];
2058 const auto& results = engine.results();
2059
2060 // loop over permutationally-unique set of shells
2061 for (auto s1 = 0l, s123 = 0l; s1 != nshells_df; ++s1) {
2062 auto bf1_first = shell2bf_df[s1]; // first basis function in this shell
2063 auto n1 = dfbs[s1].size(); // number of basis functions in this shell
2064
2065 for (auto s2 = 0; s2 != nshells; ++s2) {
2066 auto bf2_first = shell2bf[s2];
2067 auto n2 = obs[s2].size();
2068 const auto n12 = n1 * n2;
2069
2070 for (auto s3 = 0; s3 != nshells; ++s3, ++s123) {
2071 if (s123 % nthreads != thread_id) continue;
2072
2073 auto bf3_first = shell2bf[s3];
2074 auto n3 = obs[s3].size();
2075 const auto n123 = n12 * n3;
2076
2077 timer.start(0);
2078
2079 engine.compute2<Operator::coulomb, BraKet::xs_xx, 0>(
2080 dfbs[s1], unitshell, obs[s2], obs[s3]);
2081 const auto* buf = results[0];
2082 if (buf == nullptr)
2083 continue;
2084
2085 timer.stop(0);
2086 timer.start(1);
2087
2088 auto lower_bound = {bf1_first, bf2_first, bf3_first};
2089 auto upper_bound = {bf1_first + n1, bf2_first + n2, bf3_first + n3};
2090 auto view = btas::make_view(
2091 Zxy.range().slice(lower_bound, upper_bound), Zxy.storage());
2092 std::copy(buf, buf + n123, view.begin());
2093
2094 timer.stop(1);
2095 } // s3
2096 } // s2
2097 } // s1
2098
2099 }; // lambda
2100
2101 libint2::parallel_do(lambda);
2102
2103 wall_timer.stop(0);
2104
2105 double ints_time = 0;
2106 for(const auto& timer: timers) ints_time += timer.read(0);
2107 std::cout << "time for Zxy integrals = " << ints_time << " (total from all threads)" << std::endl;
2108 double copy_time = 0;
2109 for(const auto& timer: timers) copy_time += timer.read(1);
2110 std::cout << "time for copying into BTAS = " << copy_time << " (total from all threads)"<< std::endl;
2111 std::cout << "wall time for Zxy integrals + copy = " << wall_timer.read(0) << std::endl;
2112
2113 timers[0].start(2);
2114
2115 Matrix V = compute_2body_2index_ints(dfbs);
2116 Eigen::LLT<Matrix> V_LLt(V);
2117 Matrix I = Matrix::Identity(ndf, ndf);
2118 auto L = V_LLt.matrixL();
2119 Matrix V_L = L;
2120 Matrix Linv = L.solve(I).transpose();
2121 // check
2122 // std::cout << "||V - L L^t|| = " << (V - V_L * V_L.transpose()).norm() <<
2123 // std::endl;
2124 // std::cout << "||I - L L^-1^t|| = " << (I - V_L *
2125 // Linv.transpose()).norm() << std::endl;
2126 // std::cout << "||V^-1 - L^-1 L^-1^t|| = " << (V.inverse() - Linv *
2127 // Linv.transpose()).norm() << std::endl;
2128
2129 Tensor2d K{ndf, ndf};
2130 std::copy(Linv.data(), Linv.data() + ndf * ndf, K.begin());
2131
2132 xyK = Tensor3d{n, n, ndf};
2133 btas::contract(1.0, Zxy, {1, 2, 3}, K, {1, 4}, 0.0, xyK, {2, 3, 4});
2134 Zxy = Tensor3d{0, 0, 0}; // release memory
2135
2136 timers[0].stop(2);
2137 std::cout << "time for integrals metric tform = " << timers[0].read(2)
2138 << std::endl;
2139 } // if (xyK.size() == 0)
2140
2141 // compute exchange
2142 timers[0].start(3);
2143
2144 const auto nocc = Cocc.cols();
2145 Tensor2d Co{n, nocc};
2146 std::copy(Cocc.data(), Cocc.data() + n * nocc, Co.begin());
2147 Tensor3d xiK{n, nocc, ndf};
2148 btas::contract(1.0, xyK, {1, 2, 3}, Co, {2, 4}, 0.0, xiK, {1, 4, 3});
2149
2150 Tensor2d G{n, n};
2151 btas::contract(1.0, xiK, {1, 2, 3}, xiK, {4, 2, 3}, 0.0, G, {1, 4});
2152
2153 timers[0].stop(3);
2154 std::cout << "time for exchange = " << timers[0].read(3) << std::endl;
2155
2156 // compute Coulomb
2157 timers[0].start(4);
2158
2159 Tensor1d Jtmp{ndf};
2160 btas::contract(1.0, xiK, {1, 2, 3}, Co, {1, 2}, 0.0, Jtmp, {3});
2161 xiK = Tensor3d{0, 0, 0};
2162 btas::contract(2.0, xyK, {1, 2, 3}, Jtmp, {3}, -1.0, G, {1, 2});
2163
2164 timers[0].stop(4);
2165 std::cout << "time for coulomb = " << timers[0].read(4) << std::endl;
2166
2167 // copy result to an Eigen::Matrix
2168 Matrix result(n, n);
2169 std::copy(G.cbegin(), G.cend(), result.data());
2170 return result;
2171}
2172#endif // HAVE_DENSITY_FITTING
2173
2174// should be a unit test somewhere
2175void api_basic_compile_test(const BasisSet& obs) {
2176 using namespace libint2;
2177 Engine onebody_engine(
2178 Operator::overlap, // will compute overlap ints
2179 obs.max_nprim(), // max # of primitives in shells this engine will
2180 // accept
2181 obs.max_l() // max angular momentum of shells this engine will accept
2182 );
2183 auto shell2bf = obs.shell2bf();
2184 const auto& results = onebody_engine.results();
2185 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2186 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2187 std::cout << "compute shell set {" << s1 << "," << s2 << "} ... ";
2188 onebody_engine.compute(obs[s1], obs[s2]);
2189 const auto* ints_shellset = results[0];
2190 std::cout << "done" << std::endl;
2191
2192 auto bf1 = shell2bf[s1]; // first basis function in first shell
2193 auto n1 = obs[s1].size(); // number of basis functions in first shell
2194 auto bf2 = shell2bf[s2]; // first basis function in second shell
2195 auto n2 = obs[s2].size(); // number of basis functions in second shell
2196
2197 // this iterates over integrals in the order they are packed in array
2198 // ints_shellset
2199 for (auto f1 = 0; f1 != n1; ++f1)
2200 for (auto f2 = 0; f2 != n2; ++f2)
2201 std::cout << " " << bf1 + f1 << " " << bf2 + f2 << " "
2202 << ints_shellset[f1 * n2 + f2] << std::endl;
2203 }
2204 }
2205
2206 using libint2::Operator;
2207
2208 std::vector<std::pair<double, double>> cgtg_params{
2209 {0.1, 0.2}, {0.3, 0.4}, {0.5, 0.6}};
2210 {
2211 auto K =
2212 compute_schwarz_ints<Operator::cgtg>(obs, obs, false, cgtg_params);
2213 std::cout << "cGTG Schwarz ints\n" << K << std::endl;
2214 }
2215 {
2216 auto K = compute_schwarz_ints<Operator::cgtg_x_coulomb>(obs, obs, false,
2217 cgtg_params);
2218 std::cout << "cGTG/r12 Schwarz ints\n" << K << std::endl;
2219 }
2220 {
2221 auto K =
2222 compute_schwarz_ints<Operator::delcgtg2>(obs, obs, false, cgtg_params);
2223 std::cout << "||Del.cGTG||^2 Schwarz ints\n" << K << std::endl;
2224 }
2225
2226 { // test 2-index ints
2227 Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l());
2228 Engine eri2_engine = eri4_engine;
2229 eri2_engine.set(BraKet::xs_xs);
2230 auto shell2bf = obs.shell2bf();
2231 const auto& results4 = eri4_engine.results();
2232 const auto& results2 = eri2_engine.results();
2233 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2234 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2235 eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2236 eri2_engine.compute(obs[s1], obs[s2]);
2237
2238 auto bf1 = shell2bf[s1]; // first basis function in first shell
2239 auto n1 = obs[s1].size(); // number of basis functions in first shell
2240 auto bf2 = shell2bf[s2]; // first basis function in second shell
2241 auto n2 = obs[s2].size(); // number of basis functions in second shell
2242
2243 const auto* buf4 = results4[0];
2244 const auto* buf2 = results2[0];
2245
2246 // this iterates over integrals in the order they are packed in array
2247 // ints_shellset
2248 for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2249 for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2250 assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2251 "2-center ints test failed");
2252 }
2253 }
2254 }
2255 { // test 3-index ints
2256 Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l());
2257 Engine eri3_engine = eri4_engine;
2258 eri3_engine.set(BraKet::xs_xx);
2259 auto shell2bf = obs.shell2bf();
2260 const auto& results4 = eri4_engine.results();
2261 const auto& results3 = eri3_engine.results();
2262 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2263 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2264 for (auto s3 = 0; s3 != obs.size(); ++s3) {
2265 eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], obs[s3]);
2266 eri3_engine.compute(obs[s1], obs[s2], obs[s3]);
2267
2268 auto bf1 = shell2bf[s1]; // first basis function in first shell
2269 auto n1 = obs[s1].size(); // number of basis functions in first shell
2270 auto bf2 = shell2bf[s2]; // first basis function in second shell
2271 auto n2 =
2272 obs[s2].size(); // number of basis functions in second shell
2273 auto bf3 = shell2bf[s3]; // first basis function in third shell
2274 auto n3 = obs[s3].size(); // number of basis functions in third shell
2275
2276 const auto* buf4 = results4[0];
2277 const auto* buf3 = results3[0];
2278
2279 // this iterates over integrals in the order they are packed in array
2280 // ints_shellset
2281 for (auto f1 = 0, f123 = 0; f1 != n1; ++f1)
2282 for (auto f2 = 0; f2 != n2; ++f2)
2283 for (auto f3 = 0; f3 != n3; ++f3, ++f123)
2284 assert(std::abs(buf4[f123] - buf3[f123]) < 1e-12 &&
2285 "3-center ints test failed");
2286 }
2287 }
2288 }
2289 }
2290
2291#if LIBINT2_DERIV_ERI_ORDER
2292 { // test deriv 2-index ints
2293 Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 1);
2294 Engine eri2_engine = eri4_engine;
2295 eri2_engine.set(BraKet::xs_xs);
2296 auto shell2bf = obs.shell2bf();
2297 const auto& results4 = eri4_engine.results();
2298 const auto& results2 = eri2_engine.results();
2299 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2300 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2301 eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2302 eri2_engine.compute(obs[s1], obs[s2]);
2303
2304 auto bf1 = shell2bf[s1]; // first basis function in first shell
2305 auto n1 = obs[s1].size(); // number of basis functions in first shell
2306 auto bf2 = shell2bf[s2]; // first basis function in second shell
2307 auto n2 = obs[s2].size(); // number of basis functions in second shell
2308
2309 // loop over derivative shell sets
2310 for(auto d=0; d!=6; ++d) {
2311 const auto* buf4 = results4[d<3 ? d : d+3];
2312 const auto* buf2 = results2[d];
2313
2314 // this iterates over integrals in the order they are packed in array
2315 // ints_shellset
2316 for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2317 for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2318 assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2319 "deriv 2-center ints test failed");
2320 }
2321
2322 }
2323 }
2324 }
2325#endif
2326
2327#if LIBINT2_DERIV_ERI_ORDER > 1
2328 { // test 2nd deriv 2-index ints
2329 Engine eri4_engine(Operator::coulomb, obs.max_nprim(), obs.max_l(), 2);
2330 Engine eri2_engine = eri4_engine;
2331 eri2_engine.set(BraKet::xs_xs);
2332 auto shell2bf = obs.shell2bf();
2333 const auto& results4 = eri4_engine.results();
2334 const auto& results2 = eri2_engine.results();
2335 for (auto s1 = 0; s1 != obs.size(); ++s1) {
2336 for (auto s2 = 0; s2 != obs.size(); ++s2) {
2337 eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit());
2338 eri2_engine.compute(obs[s1], obs[s2]);
2339
2340 auto bf1 = shell2bf[s1]; // first basis function in first shell
2341 auto n1 = obs[s1].size(); // number of basis functions in first shell
2342 auto bf2 = shell2bf[s2]; // first basis function in second shell
2343 auto n2 = obs[s2].size(); // number of basis functions in second shell
2344
2345 // loop over derivative shell sets
2346 for (auto d1 = 0, d12 = 0; d1 != 6; ++d1) {
2347 const auto dd1 = d1 < 3 ? d1 : d1 + 3;
2348 for (auto d2 = d1; d2 != 6; ++d2, ++d12) {
2349 const auto dd2 = d2 < 3 ? d2 : d2 + 3;
2350 const auto dd12 = dd1 * (24 - dd1 - 1) / 2 + dd2;
2351 const auto* buf4 = results4[dd12];
2352 const auto* buf2 = results2[d12];
2353
2354 // this iterates over integrals in the order they are packed in
2355 // array
2356 // ints_shellset
2357 for (auto f1 = 0, f12 = 0; f1 != n1; ++f1)
2358 for (auto f2 = 0; f2 != n2; ++f2, ++f12)
2359 assert(std::abs(buf4[f12] - buf2[f12]) < 1e-12 &&
2360 "2nd deriv 2-center ints test failed");
2361 }
2362 }
2363 }
2364 }
2365 }
2366#endif
2367
2368}
DIIS (`‘direct inversion of iterative subspace’') extrapolation.
Definition: diis.h:132
Timers aggregates N C++11 "timers"; used to high-resolution profile stages of integral computation.
Definition: timer.h:38
double read(size_t t) const
reads value (in seconds) of timer t , converted to double
Definition: timer.h:74
dur_t stop(size_t t)
stops timer t
Definition: timer.h:67
void clear()
resets timers to zero
Definition: timer.h:78
void set_now_overhead(size_t ns)
use this to report the overhead of now() call; if set, the reported timings will be adjusted for this...
Definition: timer.h:57
void start(size_t t)
starts timer t
Definition: timer.h:62
Array idential to C++0X arrays.
Definition: stdarray_bits.h:14
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
void parallel_do(Lambda &lambda)
fires off nthreads instances of lambda in parallel
Definition: 1body.h:168
@ Schwarz
Schwarz screening method using Frobenius norm:
void initialize(bool verbose=false)
initializes the libint library if not currently initialized, no-op otherwise
Definition: initialize.h:70
std::vector< std::pair< double, std::array< double, 3 > > > make_point_charges(const std::vector< libint2::Atom > &atoms)
converts a vector of Atoms to a vector of point charges
Definition: atom.h:242
BraKet
types of shell sets supported by Engine, in chemist notation (i.e.
Definition: include/libint2/braket.h:36
std::vector< Atom > read_dotxyz(std::istream &is, const double bohr_to_angstrom=constants::codata_2018::bohr_to_angstrom)
reads the list of atoms from a file in the standard XYZ format supported by most chemistry software (...
Definition: atom.h:210
void finalize()
finalizes the libint library.
Definition: initialize.h:90
const std::vector< Real > & sto3g_ao_occupation_vector(size_t Z)
computes average orbital occupancies in the ground state of a neutral atoms
Definition: sto3g_atomic_density.h:84
size_t sto3g_num_ao(size_t Z)
Definition: sto3g_atomic_density.h:56
Definition: 1body.h:148
Iterates over all partitions of a non-negative integer into nonnegative integers in reverse lexicog...
Definition: intpart_iter.h:74
generally-contracted Solid-Harmonic/Cartesion Gaussian Shell
Definition: shell.h:81
static bool do_enforce_unit_normalization(defaultable_boolean flag=defaultable_boolean())
sets and/or reports whether the auto-renormalization to unity is set if called without arguments,...
Definition: shell.h:215
static const Shell & unit()
Definition: shell.h:224