LIBINT 2.7.2
OSVRR_sx_sx.h
1/*
2 * Copyright (C) 2004-2021 Edward F. Valeev
3 *
4 * This file is part of Libint.
5 *
6 * Libint is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21#ifndef _libint2_src_lib_libint_osvrrsxsx_h_
22#define _libint2_src_lib_libint_osvrrsxsx_h_
23
24#include <cstdlib>
25#include <libint2.h>
26#include <util_types.h>
27#include <libint2/cgshell_ordering.h>
28
29#ifdef __GNUC__
30#pragma implementation
31#endif
32
33namespace libint2 {
34
35 template <int part, int Lb, int Ld, bool unit_a, bool vectorize> struct OSVRR_sx_sx {
36 static void compute(const Libint_t* inteval,
37 LIBINT2_REALTYPE* target,
38 const LIBINT2_REALTYPE* src1,
39 const LIBINT2_REALTYPE* src0,
40 const LIBINT2_REALTYPE* src2,
41 const LIBINT2_REALTYPE* src3,
42 const LIBINT2_REALTYPE* src4);
43 };
44
52 template <int Lb, int Ld,
53 bool unit_a,
54 bool vectorize> struct OSVRR_sx_sx<0,Lb,Ld,unit_a,vectorize> {
55
56 static void compute(const Libint_t* inteval,
57 LIBINT2_REALTYPE* target,
58 const LIBINT2_REALTYPE* src0,
59 const LIBINT2_REALTYPE* src1,
60 const LIBINT2_REALTYPE* src2,
61 const LIBINT2_REALTYPE* src3,
62 const LIBINT2_REALTYPE* src4) {
63
64 // works for (sd|sp) and higher
65 assert(not (Lb < 2 || Ld < 1));
66
67 const unsigned int veclen = vectorize ? inteval->veclen : 1;
68
69 const unsigned int Nd = INT_NCART(Ld);
70 const unsigned int NdV = Nd * veclen;
71
72 int bx, by, bz;
73 FOR_CART(bx, by, bz, Lb)
74
75 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
76
77 enum XYZ {x=0, y=1, z=2};
78 // Build along x, if possible
79 XYZ xyz = z;
80 if (by != 0) xyz = y;
81 if (bx != 0) xyz = x;
82 --b[xyz];
83
84 // redirect
85 const LIBINT2_REALTYPE *PB, *WP;
86 switch(xyz) {
87 case x:
88#if LIBINT2_DEFINED(eri,PB_x)
89 if (not unit_a) PB = inteval->PB_x;
90#endif
91 WP = inteval->WP_x;
92 break;
93 case y:
94#if LIBINT2_DEFINED(eri,PB_y)
95 if (not unit_a) PB = inteval->PB_y;
96#endif
97 WP = inteval->WP_y;
98 break;
99 case z:
100#if LIBINT2_DEFINED(eri,PB_z)
101 if (not unit_a) PB = inteval->PB_z;
102#endif
103 WP = inteval->WP_z;
104 break;
105 }
106
107 const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
108 const unsigned int bm10d0_offset = ibm1 * NdV;
109 const LIBINT2_REALTYPE* src0_ptr = unit_a ? 0 : src0 + bm10d0_offset;
110 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
111
112 // if b-2_xyz exists, include (0 b-2_xyz | 0 d)
113 if (b[xyz] > 0) {
114 --b[xyz];
115 const unsigned int ibm2 = INT_CARTINDEX(Lb-2,b[0],b[1]);
116 const unsigned int bm20d0_offset = ibm2 * NdV;
117 ++b[xyz];
118 const LIBINT2_REALTYPE* src2_ptr = src2 + bm20d0_offset;
119 const LIBINT2_REALTYPE* src3_ptr = src3 + bm20d0_offset;
120 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
121
122 unsigned int dv = 0;
123 for(unsigned int d = 0; d < Nd; ++d) {
124 for(unsigned int v=0; v<veclen; ++v, ++dv) {
125 LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv] + bxyz * inteval->oo2z[v] * (src2_ptr[dv] - inteval->roz[v] * src3_ptr[dv]);
126 if (not unit_a) value += PB[v] * src0_ptr[dv];
127 target[dv] = value;
128 }
129 }
130#if LIBINT2_FLOP_COUNT
131 inteval->nflops[0] += (unit_a ? 6 : 8) * NdV;
132#endif
133
134 }
135 else {
136 unsigned int dv = 0;
137 for(unsigned int d = 0; d < Nd; ++d) {
138 for(unsigned int v=0; v<veclen; ++v, ++dv) {
139 LIBINT2_REALTYPE value = WP[v] * src1_ptr[dv];
140 if (not unit_a) value += PB[v] * src0_ptr[dv];
141 target[dv] = value;
142 }
143 }
144#if LIBINT2_FLOP_COUNT
145 inteval->nflops[0] += (unit_a ? 1 : 3) * NdV;
146#endif
147 }
148
149 {
150 const unsigned int Ndm1 = INT_NCART(Ld-1);
151 const unsigned int Ndm1V = Ndm1 * veclen;
152 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
153 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
154
155 // loop over d-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
156 int dx, dy, dz;
157 FOR_CART(dx, dy, dz, Ld-1)
158
159 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
160 ++d[xyz];
161
162 const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
163 const unsigned int dc_offset = dc * veclen;
164 LIBINT2_REALTYPE* tptr = target + dc_offset;
165 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
166 for(unsigned int v=0; v<veclen; ++v) {
167 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
168 }
169#if LIBINT2_FLOP_COUNT
170 inteval->nflops[0] += 3 * veclen;
171#endif
172 src4_ptr += veclen;
173
174 END_FOR_CART
175 }
176
177 target += NdV;
178
179 END_FOR_CART
180
181 }
182
183 };
184
185#if 0
193 template <int Lb, int Ld, bool vectorize> struct OSVRR_sx_sx<1,Lb,Ld,vectorize> {
194
195 static void compute(const Libint_t* inteval,
196 LIBINT2_REALTYPE* target,
197 const LIBINT2_REALTYPE* src0,
198 const LIBINT2_REALTYPE* src1,
199 const LIBINT2_REALTYPE* src2,
200 const LIBINT2_REALTYPE* src3,
201 const LIBINT2_REALTYPE* src4) {
202
203 // works for (sp|sd) and higher
204 if (Lb < 1 || Ld < 2)
205 abort();
206
207 // ALGORITHM
208 // loop over functions in d
209 // decide in which direction can build (x, y, or z)
210 // decide whether d-2 exists
211 // loop over functions in b
212 // include contributions from (0 b | 0 d-1)^(m), (0 b | 0 d-1)^(m+1),
213 // and possibly (0 b | 0 d-2)^(m), (0 b | 0 d-2)^(m+1) for each b
214 // end of loop over b
215 // loop over b-1
216 // include contribution from (0 b-1 | 0 d-1)^(m+1)
217 // end of loop over b-1
218 // end of loop over d
219
220 const unsigned int veclen = vectorize ? inteval->veclen : 1;
221
222 const unsigned int Nb = INT_NCART(Lb);
223 const unsigned int Nd = INT_NCART(Ld);
224 const unsigned int Ndv = Nd * veclen;
225 const unsigned int Ndm1 = INT_NCART(Ld-1);
226 const unsigned int Ndm1v = Ndm1 * veclen;
227 const unsigned int Ndm2 = INT_NCART(Ld-2);
228 const unsigned int Ndm2v = Ndm2 * veclen;
229
230 int dx, dy, dz;
231 int id = 0;
232 FOR_CART(dx, dy, dz, Ld)
233
234 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
235
236 enum XYZ {x=0, y=1, z=2};
237 // Build along x, if possible
238 XYZ xyz = z;
239 if (dy != 0) xyz = y;
240 if (dx != 0) xyz = x;
241 --d[xyz];
242
243 // redirect
244 const LIBINT2_REALTYPE *QD, *WQ;
245 switch(xyz) {
246 case x:
247 QD = inteval->QD_x;
248 WQ = inteval->WQ_x;
249 break;
250 case y:
251 QD = inteval->QD_y;
252 WQ = inteval->WQ_y;
253 break;
254 case z:
255 QD = inteval->QD_z;
256 WQ = inteval->WQ_z;
257 break;
258 }
259
260 const unsigned int idm1 = INT_CARTINDEX(Ld-1,d[0],d[1]);
261 const unsigned int d0_offset = id * veclen;
262 const unsigned int dm10_offset = idm1 * veclen;
263 LIBINT2_REALTYPE* target_ptr = target + d0_offset;
264 const LIBINT2_REALTYPE* src0_ptr = src0 + dm10_offset;
265 const LIBINT2_REALTYPE* src1_ptr = src1 + dm10_offset;
266
267 // if d-2_xyz exists, include (0 b | 0 d-2_xyz)
268 if (d[xyz] > 0) {
269 --d[xyz];
270 const unsigned int idm2 = INT_CARTINDEX(Ld-2,d[0],d[1]);
271 const unsigned int dm20_offset = idm2 * veclen;
272 ++d[xyz];
273 const LIBINT2_REALTYPE* src2_ptr = src2 + dm20_offset;
274 const LIBINT2_REALTYPE* src3_ptr = src3 + dm20_offset;
275 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
276
277 for(unsigned int b = 0; b < Nb; ++b) {
278 for(unsigned int v=0; v<veclen; ++v) {
279 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v]
280 + dxyz * inteval->oo2e[v] * (src2_ptr[v] - inteval->roe[v] * src3_ptr[v]);
281 }
282 target_ptr += Ndv;
283 src0_ptr += Ndm1v;
284 src1_ptr += Ndm1v;
285 src2_ptr += Ndm2v;
286 src3_ptr += Ndm2v;
287 }
288#if LIBINT2_FLOP_COUNT
289 inteval->nflops[0] += 8 * Nb * veclen;
290#endif
291
292 }
293 else {
294 for(unsigned int b = 0; b < Nb; ++b) {
295 for(unsigned int v=0; v<veclen; ++v) {
296 target_ptr[v] = QD[v] * src0_ptr[v] + WQ[v] * src1_ptr[v];
297 }
298 target_ptr += Ndv;
299 src0_ptr += Ndm1v;
300 src1_ptr += Ndm1v;
301 }
302#if LIBINT2_FLOP_COUNT
303 inteval->nflops[0] += 3 * Nb * veclen;
304#endif
305 }
306
307 {
308 const LIBINT2_REALTYPE* src4_ptr = src4 + dm10_offset;
309
310 // loop over b-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
311 int bx, by, bz;
312 FOR_CART(bx, by, bz, Lb-1)
313
314 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
315 ++b[xyz];
316
317 const unsigned int ib = INT_CARTINDEX(Lb,b[0],b[1]);
318 const unsigned int b0d0_offset = ib * Ndv + d0_offset;
319 LIBINT2_REALTYPE* target_ptr = target + b0d0_offset;
320 const LIBINT2_REALTYPE bxyz = (LIBINT2_REALTYPE)b[xyz];
321 for(unsigned int v=0; v<veclen; ++v) {
322 target_ptr[v] += bxyz * inteval->oo2ze[v] * src4_ptr[v];
323 }
324#if LIBINT2_FLOP_COUNT
325 inteval->nflops[0] += 3 * veclen;
326#endif
327 src4_ptr += Ndm1v;
328
329 END_FOR_CART // end of loop over b
330 }
331
332 ++id;
333
334 END_FOR_CART // end of loop over d
335
336 }
337
338 };
339#endif
340
341 template <int part, int Lb, int Ld, bool vectorize> struct OSAVRR_sx_sx {
342 static void compute(const Libint_t* inteval,
343 LIBINT2_REALTYPE* target,
344 const LIBINT2_REALTYPE* src1,
345 const LIBINT2_REALTYPE* src4);
346 };
347
352 template <int Lb, int Ld,
353 bool vectorize> struct OSAVRR_sx_sx<0,Lb,Ld,vectorize> {
354
355 static void compute(const Libint_t* inteval,
356 LIBINT2_REALTYPE* target,
357 const LIBINT2_REALTYPE* src1,
358 const LIBINT2_REALTYPE* src4) {
359
360 // works for (sd|sp) and higher
361 assert(not (Lb < 2 || Ld < 1));
362
363 const unsigned int veclen = vectorize ? inteval->veclen : 1;
364
365 const unsigned int Nd = INT_NCART(Ld);
366 const unsigned int NdV = Nd * veclen;
367
368 int bx, by, bz;
369 FOR_CART(bx, by, bz, Lb)
370
371 int b[3]; b[0] = bx; b[1] = by; b[2] = bz;
372
373 enum XYZ {x=0, y=1, z=2};
374 // Build along x, if possible
375 XYZ xyz = z;
376 if (by != 0) xyz = y;
377 if (bx != 0) xyz = x;
378 --b[xyz];
379
380 // redirect
381 const LIBINT2_REALTYPE *WP;
382 switch(xyz) {
383 case x:
384 WP = inteval->WP_x;
385 break;
386 case y:
387 WP = inteval->WP_y;
388 break;
389 case z:
390 WP = inteval->WP_z;
391 break;
392 }
393
394 const unsigned int ibm1 = INT_CARTINDEX(Lb-1,b[0],b[1]);
395 const unsigned int bm10d0_offset = ibm1 * NdV;
396 const LIBINT2_REALTYPE* src1_ptr = src1 + bm10d0_offset;
397
398 {
399 unsigned int dv = 0;
400 for(unsigned int d = 0; d < Nd; ++d) {
401 for(unsigned int v=0; v<veclen; ++v, ++dv) {
402 target[dv] = WP[v] * src1_ptr[dv];
403 }
404 }
405#if LIBINT2_FLOP_COUNT
406 inteval->nflops[0] += NdV;
407#endif
408 }
409
410 {
411 const unsigned int Ndm1 = INT_NCART(Ld-1);
412 const unsigned int Ndm1V = Ndm1 * veclen;
413 const unsigned int bm10dm10_offset = ibm1 * Ndm1V;
414 const LIBINT2_REALTYPE* src4_ptr = src4 + bm10dm10_offset;
415
416 // loop over d-1 shell and include (0 b-1_xyz | 0 d-1_xyz) to (0 b | 0 d)
417 int dx, dy, dz;
418 FOR_CART(dx, dy, dz, Ld-1)
419
420 int d[3]; d[0] = dx; d[1] = dy; d[2] = dz;
421 ++d[xyz];
422
423 const unsigned int dc = INT_CARTINDEX(Ld,d[0],d[1]);
424 const unsigned int dc_offset = dc * veclen;
425 LIBINT2_REALTYPE* tptr = target + dc_offset;
426 const LIBINT2_REALTYPE dxyz = (LIBINT2_REALTYPE)d[xyz];
427 for(unsigned int v=0; v<veclen; ++v) {
428 tptr[v] += dxyz * inteval->oo2ze[v] * src4_ptr[v];
429 }
430#if LIBINT2_FLOP_COUNT
431 inteval->nflops[0] += 3 * veclen;
432#endif
433 src4_ptr += veclen;
434
435 END_FOR_CART
436 }
437
438 target += NdV;
439
440 END_FOR_CART
441
442 }
443
444 };
445
446
447};
448
449#endif // header guard
450
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
Definition: OSVRR_sx_sx.h:341
Definition: OSVRR_sx_sx.h:35