LIBINT 2.7.2
ITR_xs_xs.h
1/*
2 * Copyright (C) 2004-2021 Edward F. Valeev
3 *
4 * This file is part of Libint.
5 *
6 * Libint is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21#ifndef _libint2_src_lib_libint_itrxsxs_h_
22#define _libint2_src_lib_libint_itrxsxs_h_
23
24#include <cstdlib>
25#include <libint2.h>
26#include <util_types.h>
27#include <libint2/cgshell_ordering.h>
28
29namespace libint2 {
30
31 template <int part, int La, int Lc, bool InBra, bool vectorize> struct ITR_xs_xs {
32 static void compute(const Libint_t* inteval,
33 LIBINT2_REALTYPE* target,
34 const LIBINT2_REALTYPE* src0,
35 const LIBINT2_REALTYPE* src1,
36 const LIBINT2_REALTYPE* src2,
37 const LIBINT2_REALTYPE* src3);
38 };
39
46 template <int La, int Lc, bool InBra, bool vectorize> struct ITR_xs_xs<0,La,Lc,InBra,vectorize> {
47
48 static void compute(const Libint_t* inteval,
49 LIBINT2_REALTYPE* target,
50 const LIBINT2_REALTYPE* src0,
51 const LIBINT2_REALTYPE* src1,
52 const LIBINT2_REALTYPE* src2,
53 const LIBINT2_REALTYPE* src3) {
54
55 // works for (ds|ps) and higher
56 if (La < 2 || Lc < 1)
57 abort();
58
59 const unsigned int veclen = vectorize ? inteval->veclen : 1;
60
61 const unsigned int Nc = INT_NCART(Lc);
62 const unsigned int NcV = Nc * veclen;
63
64 int ax, ay, az;
65 FOR_CART(ax, ay, az, La)
66
67 int a[3]; a[0] = ax; a[1] = ay; a[2] = az;
68
69 enum XYZ {x=0, y=1, z=2};
70 // Build along x, if possible
71 XYZ xyz = z;
72 if (ay != 0) xyz = y;
73 if (ax != 0) xyz = x;
74 --a[xyz];
75
76 // redirect
77 const LIBINT2_REALTYPE *pfac0;
78 switch(xyz) {
79 case x:
80 pfac0 = InBra ?
81#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_0_0_x)
82 inteval->TwoPRepITR_pfac0_0_0_x
83#else
84 0
85#endif
86 :
87#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_0_1_x)
88 inteval->TwoPRepITR_pfac0_0_1_x;
89#else
90 0
91#endif
92 ;
93 break;
94 case y:
95 pfac0 = InBra ?
96#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_0_0_y)
97 inteval->TwoPRepITR_pfac0_0_0_y
98#else
99 0
100#endif
101 :
102#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_0_1_y)
103 inteval->TwoPRepITR_pfac0_0_1_y;
104#else
105 0
106#endif
107 ;
108 break;
109 case z:
110 pfac0 = InBra ?
111#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_0_0_z)
112 inteval->TwoPRepITR_pfac0_0_0_z
113#else
114 0
115#endif
116 :
117#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_0_1_z)
118 inteval->TwoPRepITR_pfac0_0_1_z;
119#else
120 0
121#endif
122 ;
123 break;
124 }
125
126 const unsigned int iam1 = INT_CARTINDEX(La-1,a[0],a[1]);
127 const unsigned int am10c0_offset = iam1 * NcV;
128 const LIBINT2_REALTYPE* src0_ptr = src0 + am10c0_offset;
129
130 // if a-2_xyz exists, include (a-2_xyz 0 | c 0)
131 if (a[xyz] > 0) {
132 --a[xyz];
133 const unsigned int iam2 = INT_CARTINDEX(La-2,a[0],a[1]);
134 const unsigned int am20c0_offset = iam2 * NcV;
135 ++a[xyz];
136 const LIBINT2_REALTYPE* src2_ptr = src2 + am20c0_offset;
137 const LIBINT2_REALTYPE axyz = (LIBINT2_REALTYPE)a[xyz];
138
139 unsigned int cv = 0;
140 for(unsigned int c = 0; c < Nc; ++c) {
141 for(unsigned int v=0; v<veclen; ++v, ++cv) {
142 target[cv] = pfac0[v] * src0_ptr[cv] + axyz * inteval->oo2z[v] * src2_ptr[cv];
143 }
144 }
145#if LIBINT2_FLOP_COUNT
146 inteval->nflops[0] += 4 * NcV;
147#endif
148
149 }
150 else {
151 unsigned int cv = 0;
152 for(unsigned int c = 0; c < Nc; ++c) {
153 for(unsigned int v=0; v<veclen; ++v, ++cv) {
154 target[cv] = pfac0[v] * src0_ptr[cv];
155 }
156 }
157#if LIBINT2_FLOP_COUNT
158 inteval->nflops[0] += 1 * NcV;
159#endif
160 }
161
162 {
163 const unsigned int Ncp1 = INT_NCART(Lc+1);
164 const unsigned int Ncp1V = Ncp1 * veclen;
165 const unsigned int am10cp10_offset = iam1 * Ncp1V;
166 const LIBINT2_REALTYPE* src1_ptr = src1 + am10cp10_offset;
167
168 // loop over c shell and include (a-1_xyz 0 | c+1_xyz 0) to (a 0 | c 0)
169 int cx, cy, cz;
170 int cv = 0;
171 FOR_CART(cx, cy, cz, Lc)
172
173 int c[3]; c[0] = cx; c[1] = cy; c[2] = cz;
174 ++c[xyz];
175
176 const unsigned int cp1 = INT_CARTINDEX(Lc+1,c[0],c[1]);
177 const unsigned int cp1_offset = cp1 * veclen;
178 const LIBINT2_REALTYPE* sptr = src1_ptr + cp1_offset;
179 const LIBINT2_REALTYPE cxyz = (LIBINT2_REALTYPE)c[xyz];
180 for(unsigned int v=0; v<veclen; ++v, ++cv) {
181 target[cv] -= inteval->eoz[v] * sptr[v];
182 }
183#if LIBINT2_FLOP_COUNT
184 inteval->nflops[0] += 2 * veclen;
185#endif
186
187 END_FOR_CART
188 }
189
190
191 {
192 const unsigned int Ncm1 = INT_NCART(Lc-1);
193 const unsigned int Ncm1V = Ncm1 * veclen;
194 const unsigned int am10cm10_offset = iam1 * Ncm1V;
195 const LIBINT2_REALTYPE* src3_ptr = src3 + am10cm10_offset;
196
197 // loop over c-1 shell and include (a-1_xyz 0 | c-1_xyz 0) to (a 0 | c 0)
198 int cx, cy, cz;
199 FOR_CART(cx, cy, cz, Lc-1)
200
201 int c[3]; c[0] = cx; c[1] = cy; c[2] = cz;
202 ++c[xyz];
203
204 const unsigned int cc = INT_CARTINDEX(Lc,c[0],c[1]);
205 const unsigned int cc_offset = cc * veclen;
206 LIBINT2_REALTYPE* tptr = target + cc_offset;
207 const LIBINT2_REALTYPE cxyz = (LIBINT2_REALTYPE)c[xyz];
208 for(unsigned int v=0; v<veclen; ++v) {
209 tptr[v] += cxyz * inteval->oo2z[v] * src3_ptr[v];
210 }
211#if LIBINT2_FLOP_COUNT
212 inteval->nflops[0] += 3 * veclen;
213#endif
214 src3_ptr += veclen;
215
216 END_FOR_CART
217 }
218
219 target += NcV;
220
221 END_FOR_CART
222
223 }
224
225 };
226
227#if 0
234 template <int La, int Lc, bool InBra, bool vectorize> struct ITR_xs_xs<1,La,Lc,InBra,vectorize> {
235
236 static void compute(const Libint_t* inteval,
237 LIBINT2_REALTYPE* target,
238 const LIBINT2_REALTYPE* src0,
239 const LIBINT2_REALTYPE* src1,
240 const LIBINT2_REALTYPE* src2,
241 const LIBINT2_REALTYPE* src3) {
242
243 // works for (ps|ds) and higher
244 if (La < 1 || Lc < 2)
245 abort();
246
247 const unsigned int veclen = vectorize ? inteval->veclen : 1;
248
249 const unsigned int Na = INT_NCART(La);
250 const unsigned int NaV = Na * veclen;
251
252 int cx, cy, cz;
253 FOR_CART(cx, cy, cz, Lc)
254
255 int c[3]; c[0] = cx; c[1] = cy; c[2] = cz;
256
257 enum XYZ {x=0, y=1, z=2};
258 // Build along x, if possible
259 XYZ xyz = z;
260 if (cy != 0) xyz = y;
261 if (cx != 0) xyz = x;
262 --c[xyz];
263
264 // redirect
265 const LIBINT2_REALTYPE *pfac0;
266 switch(xyz) {
267 case x:
268 pfac0 = InBra ?
269#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_1_0_x)
270 inteval->TwoPRepITR_pfac0_1_0_x
271#else
272 0
273#endif
274 :
275#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_0_1_x)
276 inteval->TwoPRepITR_pfac0_0_1_x;
277#else
278 0
279#endif
280 ;
281 break;
282 case y:
283 pfac0 = InBra ?
284#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_1_0_y)
285 inteval->TwoPRepITR_pfac0_1_0_y
286#else
287 0
288#endif
289 :
290#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_1_1_y)
291 inteval->TwoPRepITR_pfac0_1_1_y;
292#else
293 0
294#endif
295 ;
296 break;
297 case z:
298 pfac0 = InBra ?
299#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_1_0_z)
300 inteval->TwoPRepITR_pfac0_1_0_z
301#else
302 0
303#endif
304 :
305#if LIBINT2_DEFINED(eri,TwoPRepITR_pfac0_1_1_z)
306 inteval->TwoPRepITR_pfac0_1_1_z;
307#else
308 0
309#endif
310 ;
311 break;
312 }
313
314 const unsigned int icm1 = INT_CARTINDEX(Lc-1,c[0],c[1]);
315 const unsigned int a0cm10_offset = icm1 * NaV;
316 const LIBINT2_REALTYPE* src0_ptr = src0 + a0cm10_offset;
317
318 // if c-2_xyz exists, include (a 0 | c-2_xyz 0)
319 if (c[xyz] > 0) {
320 --c[xyz];
321 const unsigned int icm2 = INT_CARTINDEX(Lc-2,c[0],c[1]);
322 const unsigned int a0cm20_offset = icm2 * NaV;
323 ++c[xyz];
324 const LIBINT2_REALTYPE* src2_ptr = src2 + am20c0_offset;
325 const LIBINT2_REALTYPE axyz = (LIBINT2_REALTYPE)c[xyz];
326
327 unsigned int cv = 0;
328 for(unsigned int c = 0; c < Nc; ++c) {
329 for(unsigned int v=0; v<veclen; ++v, ++cv) {
330 target[cv] = pfac0[v] * src0_ptr[cv] + axyz * inteval->oo2z[v] * src2_ptr[cv];
331 }
332 }
333#if LIBINT2_FLOP_COUNT
334 inteval->nflops[0] += 4 * NcV;
335#endif
336
337 }
338 else {
339 unsigned int cv = 0;
340 for(unsigned int c = 0; c < Nc; ++c) {
341 for(unsigned int v=0; v<veclen; ++v, ++cv) {
342 target[cv] = pfac0[v] * src0_ptr[cv];
343 }
344 }
345#if LIBINT2_FLOP_COUNT
346 inteval->nflops[0] += 1 * NcV;
347#endif
348 }
349
350 {
351 const unsigned int Ncp1 = INT_NCART(Lc+1);
352 const unsigned int Ncp1V = Ncp1 * veclen;
353 const unsigned int am10cp10_offset = iam1 * Ncp1V;
354 const LIBINT2_REALTYPE* src1_ptr = src1 + am10cp10_offset;
355
356 // loop over c shell and include (c-1_xyz 0 | c+1_xyz 0) to (c 0 | c 0)
357 int cx, cy, cz;
358 int cv = 0;
359 FOR_CART(cx, cy, cz, Lc)
360
361 int c[3]; c[0] = cx; c[1] = cy; c[2] = cz;
362 ++c[xyz];
363
364 const unsigned int cp1 = INT_CARTINDEX(Lc+1,c[0],c[1]);
365 const unsigned int cp1_offset = cp1 * veclen;
366 const LIBINT2_REALTYPE* sptr = src1_ptr + cp1_offset;
367 const LIBINT2_REALTYPE cxyz = (LIBINT2_REALTYPE)c[xyz];
368 for(unsigned int v=0; v<veclen; ++v, ++cv) {
369 target[cv] -= inteval->eoz[v] * sptr[v];
370 }
371#if LIBINT2_FLOP_COUNT
372 inteval->nflops[0] += 2 * veclen;
373#endif
374
375 END_FOR_CART
376 }
377
378
379 {
380 const unsigned int Ncm1 = INT_NCART(Lc-1);
381 const unsigned int Ncm1V = Ncm1 * veclen;
382 const unsigned int am10cm10_offset = iam1 * Ncm1V;
383 const LIBINT2_REALTYPE* src3_ptr = src3 + am10cm10_offset;
384
385 // loop over c-1 shell and include (c-1_xyz 0 | c-1_xyz 0) to (c 0 | c 0)
386 int cx, cy, cz;
387 FOR_CART(cx, cy, cz, Lc-1)
388
389 int c[3]; c[0] = cx; c[1] = cy; c[2] = cz;
390 ++c[xyz];
391
392 const unsigned int cc = INT_CARTINDEX(Lc,c[0],c[1]);
393 const unsigned int cc_offset = cc * veclen;
394 LIBINT2_REALTYPE* tptr = target + cc_offset;
395 const LIBINT2_REALTYPE cxyz = (LIBINT2_REALTYPE)c[xyz];
396 for(unsigned int v=0; v<veclen; ++v) {
397 tptr[v] += cxyz * inteval->oo2z[v] * src3_ptr[v];
398 }
399#if LIBINT2_FLOP_COUNT
400 inteval->nflops[0] += 3 * veclen;
401#endif
402 src3_ptr += veclen;
403
404 END_FOR_CART
405 }
406
407 target += NcV;
408
409 END_FOR_CART
410
411 }
412
413 };
414#endif
415
416};
417
418#endif // header guard
419
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
Definition: ITR_xs_xs.h:31