21#ifndef _libint2_include_solidharmonics_h_
22#define _libint2_include_solidharmonics_h_
24#include <libint2/util/cxxstd.h>
25#if LIBINT2_CPLUSPLUS_STD < 2011
26# error "The simple Libint API requires C++11 support"
34#ifndef LIBINT2_REALTYPE
35# define LIBINT2_REALTYPE double
37#include <libint2/shell.h>
38#include <libint2/cgshell_ordering.h>
41 template <
typename Int>
42 signed char parity(Int i) {
49 namespace solidharmonics {
55 template <
typename Real>
56 class SolidHarmonicsCoefficients {
58 typedef ::libint2::value_type real_t;
60 SolidHarmonicsCoefficients() : l_(-1) {
62 SolidHarmonicsCoefficients(
unsigned char l) : l_(l) {
63 assert(l <= std::numeric_limits<signed char>::max());
67 SolidHarmonicsCoefficients(SolidHarmonicsCoefficients&& other) :
68 values_(std::move(other.values_)),
69 row_offset_(std::move(other.row_offset_)),
70 colidx_(std::move(other.colidx_)),
74 SolidHarmonicsCoefficients(
const SolidHarmonicsCoefficients& other) =
default;
76 void init(
unsigned char l) {
77 assert(l <= std::numeric_limits<signed char>::max());
82 static const SolidHarmonicsCoefficients& instance(
unsigned int l) {
83 static std::vector<SolidHarmonicsCoefficients> shg_coefs(SolidHarmonicsCoefficients::CtorHelperIter(0),
84 SolidHarmonicsCoefficients::CtorHelperIter(11));
90 const Real* row_values(
size_t r)
const {
91 return &values_[0] + row_offset_[r];
94 const unsigned char* row_idx(
size_t r)
const {
95 return &colidx_[0] + row_offset_[r];
98 unsigned char nnz(
size_t r)
const {
99 return row_offset_[r+1] - row_offset_[r];
107 static Real coeff(
int l,
int m,
int lx,
int ly,
int lz) {
108 using libint2::math::fac;
109 using libint2::math::df_Kminus1;
110 using libint2::math::bc;
112 auto abs_m = std::abs(m);
113 if ((lx + ly - abs_m)%2)
116 auto j = (lx + ly - abs_m)/2;
123 auto comp = (m >= 0) ? 1 : -1;
126 if (comp != parity(abs(i)))
130 Real pfac = sqrt( ((Real(fac[2*lx])*Real(fac[2*ly])*Real(fac[2*lz]))/fac[2*l]) *
131 ((Real(fac[l-abs_m]))/(fac[l])) *
132 (Real(1)/fac[l+abs_m]) *
133 (Real(1)/(fac[lx]*fac[ly]*fac[lz]))
138 pfac *= parity((i-1)/2);
143 auto i_max = (l-abs_m)/2;
145 for(
auto i=i_min;i<=i_max;i++) {
146 Real pfac1 = bc(l,i)*bc(i,j);
147 pfac1 *= (Real(parity(i)*fac[2*(l-i)])/fac[l-abs_m-2*i]);
149 const int k_min = std::max((lx-abs_m)/2,0);
150 const int k_max = std::min(j,lx/2);
151 for(
int k=k_min;k<=k_max;k++) {
153 sum1 += bc(j,k)*bc(abs_m,lx-2*k)*parity(k);
157 sum *= sqrt(Real(df_Kminus1[2*l])/(df_Kminus1[2*lx]*df_Kminus1[2*ly]*df_Kminus1[2*lz]));
159 Real result = (m == 0) ? pfac*sum : M_SQRT2*pfac*sum;
164 std::vector<Real> values_;
165 std::vector<unsigned short> row_offset_;
166 std::vector<unsigned char> colidx_;
170 const unsigned short npure = 2*l_ + 1;
171 const unsigned short ncart = (l_ + 1) * (l_ + 2) / 2;
172 std::vector<Real> full_coeff(npure * ncart);
174#if LIBINT_SHGSHELL_ORDERING == LIBINT_SHGSHELL_ORDERING_STANDARD
175 for(
signed char pure_idx=0, m=-l_; pure_idx!=npure; ++pure_idx, ++m) {
176#elif LIBINT_SHGSHELL_ORDERING == LIBINT_SHGSHELL_ORDERING_GAUSSIAN
177 for(
signed char pure_idx=0, m=0; pure_idx!=npure; ++pure_idx, m=(m>0?-m:1-m)) {
179# error "unknown value of macro LIBINT_SHGSHELL_ORDERING"
181 signed char cart_idx = 0;
182 signed char lx, ly, lz;
183 FOR_CART(lx, ly, lz, l_)
184 full_coeff[pure_idx * ncart + cart_idx] = coeff(l_, m, lx, ly, lz);
193 for(
size_t i=0; i!=full_coeff.size(); ++i)
194 nnz += full_coeff[i] == 0.0 ? 0 : 1;
198 row_offset_.resize(npure+1);
201 unsigned short pc = 0;
202 unsigned short cnt = 0;
203 for(
unsigned short p=0; p!=npure; ++p) {
204 row_offset_[p] = cnt;
205 for(
unsigned short c=0; c!=ncart; ++c, ++pc) {
206 if (full_coeff[pc] != 0.0) {
207 values_[cnt] = full_coeff[pc];
213 row_offset_[npure] = cnt;
219 struct CtorHelperIter :
public std::iterator<std::input_iterator_tag, SolidHarmonicsCoefficients> {
221 using typename std::iterator<std::input_iterator_tag, SolidHarmonicsCoefficients>::value_type;
223 CtorHelperIter() =
default;
224 CtorHelperIter(
unsigned int l) : l_(l) {}
225 CtorHelperIter(
const CtorHelperIter&) =
default;
226 CtorHelperIter& operator=(
const CtorHelperIter& rhs) { l_ = rhs.l_;
return *
this; }
228 CtorHelperIter& operator++() { ++l_;
return *
this; }
229 CtorHelperIter& operator--() { assert(l_ > 0); --l_;
return *
this; }
232 return value_type(l_);
234 bool operator==(
const CtorHelperIter& rhs)
const {
237 bool operator!=(
const CtorHelperIter& rhs)
const {
238 return not (*
this == rhs);
246 template <
typename Real>
247 void transform_first(
size_t l,
size_t n2,
const Real *src, Real *tgt)
249 const auto& coefs = SolidHarmonicsCoefficients<Real>::instance(l);
251 const auto n = 2*l+1;
252 std::fill(tgt, tgt + n * n2, 0);
255 for(
size_t s=0; s!=n; ++s) {
256 const auto nc_s = coefs.nnz(s);
257 const auto* c_idxs = coefs.row_idx(s);
258 const auto* c_vals = coefs.row_values(s);
260 const auto tgt_blk_s_offset = s * n2;
262 for(
size_t ic=0; ic!=nc_s; ++ic) {
263 const auto c = c_idxs[ic];
264 const auto s_c_coeff = c_vals[ic];
266 auto src_blk_s = src + c * n2;
267 auto tgt_blk_s = tgt + tgt_blk_s_offset;
270 for(
size_t i2=0; i2!=n2; ++i2, ++src_blk_s, ++tgt_blk_s) {
271 *tgt_blk_s += s_c_coeff * *src_blk_s;
279 template <
typename Real>
280 void transform_first2(
int l1,
int l2,
size_t inner_dim,
const Real* source_blk, Real* target_blk) {
281 const auto& coefs1 = SolidHarmonicsCoefficients<Real>::instance(l1);
282 const auto& coefs2 = SolidHarmonicsCoefficients<Real>::instance(l2);
284 const auto ncart2 = (l2+1)*(l2+2)/2;
285 const auto npure1 = 2*l1+1;
286 const auto npure2 = 2*l2+1;
287 const auto ncart2inner = ncart2 * inner_dim;
288 const auto npure2inner = npure2 * inner_dim;
289 std::fill(target_blk, target_blk + npure1 * npure2inner, 0);
292 const size_t inner_blk_size = 8;
293 const size_t nblks = (inner_dim+inner_blk_size-1)/inner_blk_size;
294 for(
size_t blk=0; blk!=nblks; ++blk) {
295 const auto blk_begin = blk * inner_blk_size;
296 const auto blk_end = std::min(blk_begin + inner_blk_size,inner_dim);
297 const auto blk_size = blk_end - blk_begin;
300 for(
size_t s1=0; s1!=npure1; ++s1) {
301 const auto nc1 = coefs1.nnz(s1);
302 const auto* c1_idxs = coefs1.row_idx(s1);
303 const auto* c1_vals = coefs1.row_values(s1);
305 auto target_blk_s1 = target_blk + s1 * npure2inner + blk_begin;
308 for(
size_t s2=0; s2!=npure2; ++s2) {
309 const auto nc2 = coefs2.nnz(s2);
310 const auto* c2_idxs = coefs2.row_idx(s2);
311 const auto* c2_vals = coefs2.row_values(s2);
312 const auto s2inner = s2 * inner_dim;
313 const auto target_blk_s1_blk_begin = target_blk_s1 + s2inner;
315 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
316 auto c1 = c1_idxs[ic1];
317 auto s1_c1_coeff = c1_vals[ic1];
319 auto source_blk_c1 = source_blk + c1 * ncart2inner + blk_begin;
321 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
322 auto c2 = c2_idxs[ic2];
323 auto s2_c2_coeff = c2_vals[ic2];
324 const auto c2inner = c2 * inner_dim;
326 const auto coeff = s1_c1_coeff * s2_c2_coeff;
327 const auto source_blk_c1_blk_begin = source_blk_c1 + c2inner;
328 for(
auto b=0; b<blk_size; ++b)
329 target_blk_s1_blk_begin[b] += source_blk_c1_blk_begin[b] * coeff;
343 template <
typename Real>
344 void transform_inner(
size_t n1,
size_t l,
size_t n2,
const Real *src, Real *tgt)
346 const auto& coefs = SolidHarmonicsCoefficients<Real>::instance(l);
348 const auto nc = (l+1)*(l+2)/2;
349 const auto n = 2*l+1;
350 const auto nc_n2 = nc * n2;
351 const auto n_n2 = n * n2;
352 std::fill(tgt, tgt + n1 * n_n2, 0);
355 for(
size_t s=0; s!=n; ++s) {
356 const auto nc_s = coefs.nnz(s);
357 const auto* c_idxs = coefs.row_idx(s);
358 const auto* c_vals = coefs.row_values(s);
360 const auto tgt_blk_s_offset = s * n2;
362 for(
size_t ic=0; ic!=nc_s; ++ic) {
363 const auto c = c_idxs[ic];
364 const auto s_c_coeff = c_vals[ic];
366 auto src_blk_s = src + c * n2;
367 auto tgt_blk_s = tgt + tgt_blk_s_offset;
370 for(
size_t i1=0; i1!=n1; ++i1, src_blk_s+=nc_n2, tgt_blk_s+=n_n2) {
371 for(
size_t i2=0; i2!=n2; ++i2) {
372 tgt_blk_s[i2] += s_c_coeff * src_blk_s[i2];
381 template <
typename Real>
382 void transform_last(
size_t n1,
size_t l,
const Real *src, Real *tgt)
384 const auto& coefs = SolidHarmonicsCoefficients<Real>::instance(l);
386 const auto nc = (l+1)*(l+2)/2;
387 const auto n = 2*l+1;
388 std::fill(tgt, tgt + n1 * n, 0);
391 for(
size_t s=0; s!=n; ++s) {
392 const auto nc_s = coefs.nnz(s);
393 const auto* c_idxs = coefs.row_idx(s);
394 const auto* c_vals = coefs.row_values(s);
396 const auto tgt_blk_s_offset = s;
398 for(
size_t ic=0; ic!=nc_s; ++ic) {
399 const auto c = c_idxs[ic];
400 const auto s_c_coeff = c_vals[ic];
402 auto src_blk_s = src + c;
403 auto tgt_blk_s = tgt + tgt_blk_s_offset;
406 for(
size_t i1=0; i1!=n1; ++i1, src_blk_s+=nc, tgt_blk_s+=n) {
407 *tgt_blk_s += s_c_coeff * *src_blk_s;
415 template <
typename Real>
416 void tform_last2(
size_t n1,
int l_row,
int l_col,
const Real* source_blk, Real* target_blk) {
417 const auto& coefs_row = SolidHarmonicsCoefficients<Real>::instance(l_row);
418 const auto& coefs_col = SolidHarmonicsCoefficients<Real>::instance(l_col);
420 const auto ncart_row = (l_row+1)*(l_row+2)/2;
421 const auto ncart_col = (l_col+1)*(l_col+2)/2;
422 const auto ncart = ncart_row * ncart_col;
423 const auto npure_row = 2*l_row+1;
424 const auto npure_col = 2*l_col+1;
425 const auto npure = npure_row * npure_col;
426 std::fill(target_blk, target_blk + n1 * npure, 0);
428 for(
size_t i1=0; i1!=n1; ++i1, source_blk+=ncart, target_blk+=npure) {
430 for(
size_t s1=0; s1!=npure_row; ++s1) {
431 const auto nc1 = coefs_row.nnz(s1);
432 const auto* c1_idxs = coefs_row.row_idx(s1);
433 const auto* c1_vals = coefs_row.row_values(s1);
435 auto target_blk_s1 = target_blk + s1 * npure_col;
438 for(
size_t s2=0; s2!=npure_col; ++s2) {
439 const auto nc2 = coefs_col.nnz(s2);
440 const auto* c2_idxs = coefs_col.row_idx(s2);
441 const auto* c2_vals = coefs_col.row_values(s2);
443 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
444 auto c1 = c1_idxs[ic1];
445 auto s1_c1_coeff = c1_vals[ic1];
447 auto source_blk_c1 = source_blk + c1 * ncart_col;
449 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
450 auto c2 = c2_idxs[ic2];
451 auto s2_c2_coeff = c2_vals[ic2];
453 target_blk_s1[s2] += source_blk_c1[c2] * s1_c1_coeff * s2_c2_coeff;
465 template <
typename Real>
466 void tform(
int l_row,
int l_col,
const Real* source_blk, Real* target_blk) {
467 const auto& coefs_row = SolidHarmonicsCoefficients<Real>::instance(l_row);
468 const auto& coefs_col = SolidHarmonicsCoefficients<Real>::instance(l_col);
470 const auto ncart_col = (l_col+1)*(l_col+2)/2;
471 const auto npure_row = 2*l_row+1;
472 const auto npure_col = 2*l_col+1;
473 std::fill(target_blk, target_blk + npure_row * npure_col, 0);
476 for(
auto s1=0; s1!=npure_row; ++s1) {
477 const auto nc1 = coefs_row.nnz(s1);
478 const auto* c1_idxs = coefs_row.row_idx(s1);
479 const auto* c1_vals = coefs_row.row_values(s1);
481 auto target_blk_s1 = target_blk + s1 * npure_col;
484 for(
auto s2=0; s2!=npure_col; ++s2) {
485 const auto nc2 = coefs_col.nnz(s2);
486 const auto* c2_idxs = coefs_col.row_idx(s2);
487 const auto* c2_vals = coefs_col.row_values(s2);
489 for(
size_t ic1=0; ic1!=nc1; ++ic1) {
490 auto c1 = c1_idxs[ic1];
491 auto s1_c1_coeff = c1_vals[ic1];
493 auto source_blk_c1 = source_blk + c1 * ncart_col;
495 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
496 auto c2 = c2_idxs[ic2];
497 auto s2_c2_coeff = c2_vals[ic2];
499 target_blk_s1[s2] += source_blk_c1[c2] * s1_c1_coeff * s2_c2_coeff;
510 template <
typename Real>
511 void tform_cols(
size_t nrow,
int l_col,
const Real* source_blk, Real* target_blk) {
512 return transform_last(nrow, l_col, source_blk, target_blk);
513 const auto& coefs_col = SolidHarmonicsCoefficients<Real>::instance(l_col);
515 const auto ncart_col = (l_col+1)*(l_col+2)/2;
516 const auto npure_col = 2*l_col+1;
519 for(
auto r1=0ul; r1!=nrow; ++r1) {
521 auto source_blk_r1 = source_blk + r1 * ncart_col;
522 auto target_blk_r1 = target_blk + r1 * npure_col;
525 for(
auto s2=0; s2!=npure_col; ++s2) {
526 const auto nc2 = coefs_col.nnz(s2);
527 const auto* c2_idxs = coefs_col.row_idx(s2);
528 const auto* c2_vals = coefs_col.row_values(s2);
530 Real r1_s2_value = 0.0;
532 for(
size_t ic2=0; ic2!=nc2; ++ic2) {
533 auto c2 = c2_idxs[ic2];
534 auto s2_c2_coeff = c2_vals[ic2];
536 r1_s2_value += source_blk_r1[c2] * s2_c2_coeff;
540 target_blk_r1[s2] = r1_s2_value;
549 template <
typename Real>
550 void tform_rows(
int l_row,
size_t ncol,
const Real* source_blk, Real* target_blk) {
551 return transform_first(l_row, ncol, source_blk, target_blk);
552 const auto& coefs_row = SolidHarmonicsCoefficients<Real>::instance(l_row);
554 const auto npure_row = 2*l_row+1;
557 for(
auto s1=0; s1!=npure_row; ++s1) {
558 const auto nc1 = coefs_row.nnz(s1);
559 const auto* c1_idxs = coefs_row.row_idx(s1);
560 const auto* c1_vals = coefs_row.row_values(s1);
562 auto target_blk_s1 = target_blk + s1 * ncol;
565 for(
decltype(ncol) c2=0; c2!=ncol; ++c2) {
567 Real s1_c2_value = 0.0;
568 auto source_blk_c2_offset = source_blk + c2;
570 for(std::size_t ic1=0; ic1!=nc1; ++ic1) {
571 auto c1 = c1_idxs[ic1];
572 auto s1_c1_coeff = c1_vals[ic1];
574 s1_c2_value += source_blk_c2_offset[c1 * ncol] * s1_c1_coeff;
578 target_blk_s1[c2] = s1_c2_value;
586 template <
typename Real,
typename Shell>
587 void tform(
const Shell& shell_row,
const Shell& shell_col,
const Real* source_blk, Real* target_blk) {
588 const auto trow = shell_row.pure;
589 const auto tcol = shell_col.pure;
593 Real localscratch[500];
594 tform_cols(shell_row.cartesian_size(), shell_col.l, source_blk, &localscratch[0]);
595 tform_rows(shell_row.l, shell_col.size(), &localscratch[0], target_blk);
598 tform_rows(shell_row.l, shell_col.cartesian_size(), source_blk, target_blk);
601 tform_cols(shell_row.cartesian_size(), shell_col.l, source_blk, target_blk);
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
SafePtr< CTimeEntity< typename ProductType< T, U >::result > > operator*(const SafePtr< CTimeEntity< T > > &A, const SafePtr< CTimeEntity< U > > &B)
Creates product A*B.
Definition: entity.h:280