LIBINT 2.9.0
OSVRR_sx_sx.h
1/*
2 * Copyright (C) 2004-2024 Edward F. Valeev
3 *
4 * This file is part of Libint library.
5 *
6 * Libint library is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with Libint library. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21#ifndef _libint2_src_lib_libint_osvrrsxsx_h_
22#define _libint2_src_lib_libint_osvrrsxsx_h_
23
24#include <libint2.h>
25#include <libint2/cgshell_ordering.h>
26#include <util_types.h>
27
28#include <cstdlib>
29
30#ifdef __GNUC__
31#pragma implementation
32#endif
33
34namespace libint2 {
35
36template <int part, int Lb, int Ld, bool unit_a, bool vectorize>
38 static void compute(const Libint_t* inteval, LIBINT2_REALTYPE* target,
39 const LIBINT2_REALTYPE* src1,
40 const LIBINT2_REALTYPE* src0,
41 const LIBINT2_REALTYPE* src2,
42 const LIBINT2_REALTYPE* src3,
43 const LIBINT2_REALTYPE* src4);
44};
45
53template <int Lb, int Ld, bool unit_a, bool vectorize>
54struct OSVRR_sx_sx<0, Lb, Ld, unit_a, vectorize> {
55 static void compute(const Libint_t* inteval, LIBINT2_REALTYPE* target,
56 const LIBINT2_REALTYPE* src0,
57 const LIBINT2_REALTYPE* src1,
58 const LIBINT2_REALTYPE* src2,
59 const LIBINT2_REALTYPE* src3,
60 const LIBINT2_REALTYPE* src4) {
61 // works for (sd|sp) and higher
62 assert(not(Lb < 2 || Ld < 1));
63
64 const unsigned int veclen = vectorize ? inteval->veclen : 1;
65
66 const unsigned int Nd = INT_NCART(Ld);
67 const unsigned int NdV = Nd * veclen;
68
69 int bx, by, bz;
70 FOR_CART(bx, by, bz, Lb)
71
72 int b[3];
73 b[0] = bx;
74 b[1] = by;
75 b[2] = bz;
76
77 enum XYZ { x = 0, y = 1, z = 2 };
78 // Build along x, if possible
79 XYZ xyz = z;
80 if (by != 0) xyz = y;
81 if (bx != 0) xyz = x;
82 --b[xyz];
83
84 // redirect
85 const LIBINT2_REALTYPE *PB, *WP;
86 switch (xyz) {
87 case x:
88#if LIBINT2_DEFINED(eri, PB_x)
89 if (not unit_a) PB = inteval->PB_x;
90#endif
91 WP = inteval->WP_x;
92 break;
93 case y:
94#if LIBINT2_DEFINED(eri, PB_y)
95 if (not unit_a) PB = inteval->PB_y;
96#endif
97 WP = inteval->WP_y;
98 break;
99 case z:
100#if LIBINT2_DEFINED(eri, PB_z)
101 if (not unit_a) PB = inteval->PB_z;
102#endif
103 WP = inteval->WP_z;
104 break;
105 }
106
107 const unsigned int ibm1 = INT_CARTINDEX(Lb - 1, b[0], b[1]);
108 const unsigned int bm10d0_offset = ibm1 * NdV;
109 const LIBINT2_REALTYPE* src0_ptr = unit_a ? 0 : src0 + bm10d0_offset;
110 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
111
112 // if b-2_xyz exists, include (0 b-2_xyz | 0 d)
113 if (b[xyz] > 0) {
114 --b[xyz];
115 const unsigned int ibm2 = INT_CARTINDEX(Lb - 2, b[0], b[1]);
116 const unsigned int bm20d0_offset = ibm2 * NdV;
117 ++b[xyz];
118 const LIBINT2_REALTYPE* src2_ptr = src2 + bm20d0_offset;
119 const LIBINT2_REALTYPE* src3_ptr = src3 + bm20d0_offset;
120 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
121
122 unsigned int dv = 0;
123 for (unsigned int d = 0; d < Nd; ++d) {
124 for (unsigned int v = 0; v < veclen; ++v, ++dv) {
125 LIBINT2_REALTYPE value =
126 WP[v] * src1_ptr[dv] +
127 bxyz * inteval->oo2z[v] *
128 (src2_ptr[dv] - inteval->roz[v] * src3_ptr[dv]);
129 if (not unit_a) value += PB[v] * src0_ptr[dv];
130 target[dv] = value;
131 }
132 }
133#if LIBINT2_FLOP_COUNT
134 inteval->nflops[0] += (unit_a ? 6 : 8) * NdV;
135#endif
136
137 } else {
138 unsigned int dv = 0;
139 for (unsigned int d = 0; d < Nd; ++d) {
140 for (unsigned int v = 0; v < veclen; ++v, ++dv) {
141 LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv];
142 if (not unit_a) value += PB[v] * src0_ptr[dv];
143 target[dv] = value;
144 }
145 }
146#if LIBINT2_FLOP_COUNT
147 inteval->nflops[0] += (unit_a ? 1 : 3) * NdV;
148#endif
149 }
150
151 {
152 const unsigned int Ndm1 = INT_NCART(Ld - 1);
153 const unsigned int Ndm1V = Ndm1 * veclen;
154 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
155 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
156
157 // loop over d-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
158 int dx, dy, dz;
159 FOR_CART(dx, dy, dz, Ld - 1)
160
161 int d[3];
162 d[0] = dx;
163 d[1] = dy;
164 d[2] = dz;
165 ++d[xyz];
166
167 const unsigned int dc = INT_CARTINDEX(Ld, d[0], d[1]);
168 const unsigned int dc_offset = dc * veclen;
169 LIBINT2_REALTYPE* tptr = target + dc_offset;
170 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
171 for (unsigned int v = 0; v < veclen; ++v) {
172 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
173 }
174#if LIBINT2_FLOP_COUNT
175 inteval->nflops[0] += 3 * veclen;
176#endif
177 src4_ptr += veclen;
178
179 END_FOR_CART
180 }
181
182 target += NdV;
183
184 END_FOR_CART
185 }
186};
187
188#if 0
196 template <int Lb, int Ld, bool vectorize> struct OSVRR_sx_sx<1,Lb,Ld,vectorize> {
197
198 static void compute(const Libint_t* inteval,
199 LIBINT2_REALTYPE* target,
200 const LIBINT2_REALTYPE* src0,
201 const LIBINT2_REALTYPE* src1,
202 const LIBINT2_REALTYPE* src2,
203 const LIBINT2_REALTYPE* src3,
204 const LIBINT2_REALTYPE* src4) {
205
206 // works for (sp|sd) and higher
207 if (Lb < 1 || Ld < 2)
208 abort();
209
210 // ALGORITHM
211 // loop over functions in d
212 // decide in which direction can build (x, y, or z)
213 // decide whether d-2 exists
214 // loop over functions in b
215 // include contributions from (0 b | 0 d-1)^(m), (0 b | 0 d-1)^(m+1),
216 // and possibly (0 b | 0 d-2)^(m), (0 b | 0 d-2)^(m+1) for each b
217 // end of loop over b
218 // loop over b-1
219 // include contribution from (0 b-1 | 0 d-1)^(m+1)
220 // end of loop over b-1
221 // end of loop over d
222
223 const unsigned int veclen = vectorize ? inteval->veclen : 1;
224
225 const unsigned int Nb = INT_NCART(Lb);
226 const unsigned int Nd = INT_NCART(Ld);
227 const unsigned int Ndv = Nd * veclen;
228 const unsigned int Ndm1 = INT_NCART(Ld-1);
229 const unsigned int Ndm1v = Ndm1 * veclen;
230 const unsigned int Ndm2 = INT_NCART(Ld-2);
231 const unsigned int Ndm2v = Ndm2 * veclen;
232
233 int dx, dy, dz;
234 int id = 0;
235 FOR_CART(dx, dy, dz, Ld)
236
237 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
238
239 enum XYZ {x=0, y=1, z=2};
240 // Build along x, if possible
241 XYZ xyz = z;
242 if (dy != 0) xyz = y;
243 if (dx != 0) xyz = x;
244 --d[xyz];
245
246 // redirect
247 const LIBINT2_REALTYPE *QD, *WQ;
248 switch(xyz) {
249 case x:
250 QD = inteval->QD_x;
251 WQ = inteval->WQ_x;
252 break;
253 case y:
254 QD = inteval->QD_y;
255 WQ = inteval->WQ_y;
256 break;
257 case z:
258 QD = inteval->QD_z;
259 WQ = inteval->WQ_z;
260 break;
261 }
262
263 const unsigned int idm1 = INT_CARTINDEX(Ld-1,d[0],d[1]);
264 const unsigned int d0_offset = id * veclen;
265 const unsigned int dm10_offset = idm1 * veclen;
266 LIBINT2_REALTYPE* target_ptr = target + d0_offset;
267 const LIBINT2_REALTYPE* src0_ptr = src0 + dm10_offset;
268 const LIBINT2_REALTYPE* src1_ptr = src1 + dm10_offset;
269
270 // if d-2_xyz exists, include (0 b | 0 d-2_xyz)
271 if (d[xyz] > 0) {
272 --d[xyz];
273 const unsigned int idm2 = INT_CARTINDEX(Ld-2,d[0],d[1]);
274 const unsigned int dm20_offset = idm2 * veclen;
275 ++d[xyz];
276 const LIBINT2_REALTYPE* src2_ptr = src2 + dm20_offset;
277 const LIBINT2_REALTYPE* src3_ptr = src3 + dm20_offset;
278 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
279
280 for(unsigned int b = 0; b < Nb; ++b) {
281 for(unsigned int v=0; v<veclen; ++v) {
282 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v]
283 + dxyz * inteval->oo2e[v] * (src2_ptr[v] - inteval->roe[v] * src3_ptr[v]);
284 }
285 target_ptr += Ndv;
286 src0_ptr += Ndm1v;
287 src1_ptr += Ndm1v;
288 src2_ptr += Ndm2v;
289 src3_ptr += Ndm2v;
290 }
291#if LIBINT2_FLOP_COUNT
292 inteval->nflops[0] += 8 * Nb * veclen;
293#endif
294
295 }
296 else {
297 for(unsigned int b = 0; b < Nb; ++b) {
298 for(unsigned int v=0; v<veclen; ++v) {
299 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v];
300 }
301 target_ptr += Ndv;
302 src0_ptr += Ndm1v;
303 src1_ptr += Ndm1v;
304 }
305#if LIBINT2_FLOP_COUNT
306 inteval->nflops[0] += 3 * Nb * veclen;
307#endif
308 }
309
310 {
311 const LIBINT2_REALTYPE* src4_ptr = src4 + dm10_offset;
312
313 // loop over b-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
314 int bx, by, bz;
315 FOR_CART(bx, by, bz, Lb-1)
316
317 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
318 ++b[xyz];
319
320 const unsigned int ib = INT_CARTINDEX(Lb,b[0],b[1]);
321 const unsigned int b0d0_offset = ib * Ndv + d0_offset;
322 LIBINT2_REALTYPE* target_ptr = target + b0d0_offset;
323 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
324 for(unsigned int v=0; v<veclen; ++v) {
325 target_ptr[v] += bxyz * inteval->oo2ze[v] * src4_ptr[v];
326 }
327#if LIBINT2_FLOP_COUNT
328 inteval->nflops[0] += 3 * veclen;
329#endif
330 src4_ptr += Ndm1v;
331
332 END_FOR_CART // end of loop over b
333 }
334
335 ++id;
336
337 END_FOR_CART // end of loop over d
338
339 }
340
341 };
342#endif
343
344template <int part, int Lb, int Ld, bool vectorize>
346 static void compute(const Libint_t* inteval, LIBINT2_REALTYPE* target,
347 const LIBINT2_REALTYPE* src1,
348 const LIBINT2_REALTYPE* src4);
349};
350
355template <int Lb, int Ld, bool vectorize>
356struct OSAVRR_sx_sx<0, Lb, Ld, vectorize> {
357 static void compute(const Libint_t* inteval, LIBINT2_REALTYPE* target,
358 const LIBINT2_REALTYPE* src1,
359 const LIBINT2_REALTYPE* src4) {
360 // works for (sd|sp) and higher
361 assert(not(Lb < 2 || Ld < 1));
362
363 const unsigned int veclen = vectorize ? inteval->veclen : 1;
364
365 const unsigned int Nd = INT_NCART(Ld);
366 const unsigned int NdV = Nd * veclen;
367
368 int bx, by, bz;
369 FOR_CART(bx, by, bz, Lb)
370
371 int b[3];
372 b[0] = bx;
373 b[1] = by;
374 b[2] = bz;
375
376 enum XYZ { x = 0, y = 1, z = 2 };
377 // Build along x, if possible
378 XYZ xyz = z;
379 if (by != 0) xyz = y;
380 if (bx != 0) xyz = x;
381 --b[xyz];
382
383 // redirect
384 const LIBINT2_REALTYPE* WP;
385 switch (xyz) {
386 case x:
387 WP = inteval->WP_x;
388 break;
389 case y:
390 WP = inteval->WP_y;
391 break;
392 case z:
393 WP = inteval->WP_z;
394 break;
395 }
396
397 const unsigned int ibm1 = INT_CARTINDEX(Lb - 1, b[0], b[1]);
398 const unsigned int bm10d0_offset = ibm1 * NdV;
399 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
400
401 {
402 unsigned int dv = 0;
403 for (unsigned int d = 0; d < Nd; ++d) {
404 for (unsigned int v = 0; v < veclen; ++v, ++dv) {
405 target[dv] = WP[v] * src1_ptr[dv];
406 }
407 }
408#if LIBINT2_FLOP_COUNT
409 inteval->nflops[0] += NdV;
410#endif
411 }
412
413 {
414 const unsigned int Ndm1 = INT_NCART(Ld - 1);
415 const unsigned int Ndm1V = Ndm1 * veclen;
416 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
417 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
418
419 // loop over d-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
420 int dx, dy, dz;
421 FOR_CART(dx, dy, dz, Ld - 1)
422
423 int d[3];
424 d[0] = dx;
425 d[1] = dy;
426 d[2] = dz;
427 ++d[xyz];
428
429 const unsigned int dc = INT_CARTINDEX(Ld, d[0], d[1]);
430 const unsigned int dc_offset = dc * veclen;
431 LIBINT2_REALTYPE* tptr = target + dc_offset;
432 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
433 for (unsigned int v = 0; v < veclen; ++v) {
434 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
435 }
436#if LIBINT2_FLOP_COUNT
437 inteval->nflops[0] += 3 * veclen;
438#endif
439 src4_ptr += veclen;
440
441 END_FOR_CART
442 }
443
444 target += NdV;
445
446 END_FOR_CART
447 }
448};
449
450}; // namespace libint2
451
452#endif // header guard
Defaults definitions for various parameters assumed by Libint.
Definition algebra.cc:24
Definition OSVRR_sx_sx.h:345
Definition OSVRR_sx_sx.h:37