55 static void compute(
const Libint_t* inteval, LIBINT2_REALTYPE* target,
56 const LIBINT2_REALTYPE* src0,
57 const LIBINT2_REALTYPE* src1,
58 const LIBINT2_REALTYPE* src2,
59 const LIBINT2_REALTYPE* src3,
60 const LIBINT2_REALTYPE* src4) {
62 assert(not(Lb < 2 || Ld < 1));
64 const unsigned int veclen = vectorize ? inteval->veclen : 1;
66 const unsigned int Nd = INT_NCART(Ld);
67 const unsigned int NdV = Nd * veclen;
70 FOR_CART(bx, by, bz, Lb)
77 enum XYZ { x = 0, y = 1, z = 2 };
85 const LIBINT2_REALTYPE *PB, *WP;
88#if LIBINT2_DEFINED(eri, PB_x)
89 if (not unit_a) PB = inteval->PB_x;
94#if LIBINT2_DEFINED(eri, PB_y)
95 if (not unit_a) PB = inteval->PB_y;
100#if LIBINT2_DEFINED(eri, PB_z)
101 if (not unit_a) PB = inteval->PB_z;
107 const unsigned int ibm1 = INT_CARTINDEX(Lb - 1, b[0], b[1]);
108 const unsigned int bm10d0_offset = ibm1 * NdV;
109 const LIBINT2_REALTYPE* src0_ptr = unit_a ? 0 : src0 + bm10d0_offset;
110 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
115 const unsigned int ibm2 = INT_CARTINDEX(Lb - 2, b[0], b[1]);
116 const unsigned int bm20d0_offset = ibm2 * NdV;
118 const LIBINT2_REALTYPE* src2_ptr = src2 + bm20d0_offset;
119 const LIBINT2_REALTYPE* src3_ptr = src3 + bm20d0_offset;
120 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
123 for (
unsigned int d = 0; d < Nd; ++d) {
124 for (
unsigned int v = 0; v < veclen; ++v, ++dv) {
125 LIBINT2_REALTYPE value =
126 WP[v] * src1_ptr[dv] +
127 bxyz * inteval->oo2z[v] *
128 (src2_ptr[dv] - inteval->roz[v] * src3_ptr[dv]);
129 if (not unit_a) value += PB[v] * src0_ptr[dv];
133#if LIBINT2_FLOP_COUNT
134 inteval->nflops[0] += (unit_a ? 6 : 8) * NdV;
139 for (
unsigned int d = 0; d < Nd; ++d) {
140 for (
unsigned int v = 0; v < veclen; ++v, ++dv) {
141 LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv];
142 if (not unit_a) value += PB[v] * src0_ptr[dv];
146#if LIBINT2_FLOP_COUNT
147 inteval->nflops[0] += (unit_a ? 1 : 3) * NdV;
152 const unsigned int Ndm1 = INT_NCART(Ld - 1);
153 const unsigned int Ndm1V = Ndm1 * veclen;
154 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
155 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
159 FOR_CART(dx, dy, dz, Ld - 1)
167 const unsigned int dc = INT_CARTINDEX(Ld, d[0], d[1]);
168 const unsigned int dc_offset = dc * veclen;
169 LIBINT2_REALTYPE* tptr = target + dc_offset;
170 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
171 for (
unsigned int v = 0; v < veclen; ++v) {
172 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
174#if LIBINT2_FLOP_COUNT
175 inteval->nflops[0] += 3 * veclen;
196 template <
int Lb,
int Ld,
bool vectorize>
struct OSVRR_sx_sx<1,Lb,Ld,vectorize> {
198 static void compute(
const Libint_t* inteval,
199 LIBINT2_REALTYPE* target,
200 const LIBINT2_REALTYPE* src0,
201 const LIBINT2_REALTYPE* src1,
202 const LIBINT2_REALTYPE* src2,
203 const LIBINT2_REALTYPE* src3,
204 const LIBINT2_REALTYPE* src4) {
207 if (Lb < 1 || Ld < 2)
223 const unsigned int veclen = vectorize ? inteval->veclen : 1;
225 const unsigned int Nb = INT_NCART(Lb);
226 const unsigned int Nd = INT_NCART(Ld);
227 const unsigned int Ndv = Nd * veclen;
228 const unsigned int Ndm1 = INT_NCART(Ld-1);
229 const unsigned int Ndm1v = Ndm1 * veclen;
230 const unsigned int Ndm2 = INT_NCART(Ld-2);
231 const unsigned int Ndm2v = Ndm2 * veclen;
235 FOR_CART(dx, dy, dz, Ld)
237 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
239 enum XYZ {x=0, y=1, z=2};
242 if (dy != 0) xyz = y;
243 if (dx != 0) xyz = x;
247 const LIBINT2_REALTYPE *QD, *WQ;
263 const unsigned int idm1 = INT_CARTINDEX(Ld-1,d[0],d[1]);
264 const unsigned int d0_offset =
id * veclen;
265 const unsigned int dm10_offset = idm1 * veclen;
266 LIBINT2_REALTYPE* target_ptr = target + d0_offset;
267 const LIBINT2_REALTYPE* src0_ptr = src0 + dm10_offset;
268 const LIBINT2_REALTYPE* src1_ptr = src1 + dm10_offset;
273 const unsigned int idm2 = INT_CARTINDEX(Ld-2,d[0],d[1]);
274 const unsigned int dm20_offset = idm2 * veclen;
276 const LIBINT2_REALTYPE* src2_ptr = src2 + dm20_offset;
277 const LIBINT2_REALTYPE* src3_ptr = src3 + dm20_offset;
278 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
280 for(
unsigned int b = 0; b < Nb; ++b) {
281 for(
unsigned int v=0; v<veclen; ++v) {
282 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v]
283 + dxyz * inteval->oo2e[v] * (src2_ptr[v] - inteval->roe[v] * src3_ptr[v]);
291#if LIBINT2_FLOP_COUNT
292 inteval->nflops[0] += 8 * Nb * veclen;
297 for(
unsigned int b = 0; b < Nb; ++b) {
298 for(
unsigned int v=0; v<veclen; ++v) {
299 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v];
305#if LIBINT2_FLOP_COUNT
306 inteval->nflops[0] += 3 * Nb * veclen;
311 const LIBINT2_REALTYPE* src4_ptr = src4 + dm10_offset;
315 FOR_CART(bx, by, bz, Lb-1)
317 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
320 const unsigned int ib = INT_CARTINDEX(Lb,b[0],b[1]);
321 const unsigned int b0d0_offset = ib * Ndv + d0_offset;
322 LIBINT2_REALTYPE* target_ptr = target + b0d0_offset;
323 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
324 for(
unsigned int v=0; v<veclen; ++v) {
325 target_ptr[v] += bxyz * inteval->oo2ze[v] * src4_ptr[v];
327#if LIBINT2_FLOP_COUNT
328 inteval->nflops[0] += 3 * veclen;
357 static void compute(
const Libint_t* inteval, LIBINT2_REALTYPE* target,
358 const LIBINT2_REALTYPE* src1,
359 const LIBINT2_REALTYPE* src4) {
361 assert(not(Lb < 2 || Ld < 1));
363 const unsigned int veclen = vectorize ? inteval->veclen : 1;
365 const unsigned int Nd = INT_NCART(Ld);
366 const unsigned int NdV = Nd * veclen;
369 FOR_CART(bx, by, bz, Lb)
376 enum XYZ { x = 0, y = 1, z = 2 };
379 if (by != 0) xyz = y;
380 if (bx != 0) xyz = x;
384 const LIBINT2_REALTYPE* WP;
397 const unsigned int ibm1 = INT_CARTINDEX(Lb - 1, b[0], b[1]);
398 const unsigned int bm10d0_offset = ibm1 * NdV;
399 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
403 for (
unsigned int d = 0; d < Nd; ++d) {
404 for (
unsigned int v = 0; v < veclen; ++v, ++dv) {
405 target[dv] = WP[v] * src1_ptr[dv];
408#if LIBINT2_FLOP_COUNT
409 inteval->nflops[0] += NdV;
414 const unsigned int Ndm1 = INT_NCART(Ld - 1);
415 const unsigned int Ndm1V = Ndm1 * veclen;
416 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
417 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
421 FOR_CART(dx, dy, dz, Ld - 1)
429 const unsigned int dc = INT_CARTINDEX(Ld, d[0], d[1]);
430 const unsigned int dc_offset = dc * veclen;
431 LIBINT2_REALTYPE* tptr = target + dc_offset;
432 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
433 for (
unsigned int v = 0; v < veclen; ++v) {
434 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
436#if LIBINT2_FLOP_COUNT
437 inteval->nflops[0] += 3 * veclen;