LIBINT 2.7.2
OSVRR_xs_xs.h
1/*
2 * Copyright (C) 2004-2021 Edward F. Valeev
3 *
4 * This file is part of Libint.
5 *
6 * Libint is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21#ifndef _libint2_src_lib_libint_osvrrxsxs_h_
22#define _libint2_src_lib_libint_osvrrxsxs_h_
23
24#include <cstdlib>
25#include <libint2.h>
26#include <util_types.h>
27#include <libint2/cgshell_ordering.h>
28
29namespace libint2 {
30
31 template <int part, int La, int Lc, bool unit_b, bool vectorize> struct OSVRR_xs_xs {
32 static void compute(const Libint_t* inteval,
33 LIBINT2_REALTYPE* target,
34 const LIBINT2_REALTYPE* src0,
35 const LIBINT2_REALTYPE* src1,
36 const LIBINT2_REALTYPE* src2,
37 const LIBINT2_REALTYPE* src3,
38 const LIBINT2_REALTYPE* src4);
39 };
40
48 template <int La, int Lc,
49 bool unit_b,
50 bool vectorize> struct OSVRR_xs_xs<0,La,Lc,unit_b,vectorize> {
51
52 static void compute(const Libint_t* inteval,
53 LIBINT2_REALTYPE* target,
54 const LIBINT2_REALTYPE* src0,
55 const LIBINT2_REALTYPE* src1,
56 const LIBINT2_REALTYPE* src2,
57 const LIBINT2_REALTYPE* src3,
58 const LIBINT2_REALTYPE* src4) {
59
60 // works for (ds|ps) and higher
61 assert(not (La < 2 || Lc < 1));
62
63 const unsigned int veclen = vectorize ? inteval->veclen : 1;
64
65 const unsigned int Nc = INT_NCART(Lc);
66 const unsigned int NcV = Nc * veclen;
67
68 int ax, ay, az;
69 FOR_CART(ax, ay, az, La)
70
71 int a[3]; a[0] = ax; a[1] = ay; a[2] = az;
72
73 enum XYZ {x=0, y=1, z=2};
74 // Build along x, if possible
75 XYZ xyz = z;
76 if (ay != 0) xyz = y;
77 if (ax != 0) xyz = x;
78 --a[xyz];
79
80 // redirect
81 const LIBINT2_REALTYPE *PA, *WP;
82 switch(xyz) {
83 case x:
84#if LIBINT2_DEFINED(eri,PA_x)
85 if (not unit_b) PA = inteval->PA_x;
86#endif
87 WP = inteval->WP_x;
88 break;
89 case y:
90#if LIBINT2_DEFINED(eri,PA_y)
91 if (not unit_b) PA = inteval->PA_y;
92#endif
93 WP = inteval->WP_y;
94 break;
95 case z:
96#if LIBINT2_DEFINED(eri,PA_z)
97 if (not unit_b) PA = inteval->PA_z;
98#endif
99 WP = inteval->WP_z;
100 break;
101 }
102
103 const unsigned int iam1 = INT_CARTINDEX(La-1,a[0],a[1]);
104 const unsigned int am10c0_offset = iam1 * NcV;
105 const LIBINT2_REALTYPE* src0_ptr = unit_b ? 0 : src0 + am10c0_offset;
106 const LIBINT2_REALTYPE* src1_ptr = src1 + am10c0_offset;
107
108 // if a-2_xyz exists, include (a-2_xyz 0 | c 0)
109 if (a[xyz] > 0) {
110 --a[xyz];
111 const unsigned int iam2 = INT_CARTINDEX(La-2,a[0],a[1]);
112 const unsigned int am20c0_offset = iam2 * NcV;
113 ++a[xyz];
114 const LIBINT2_REALTYPE* src2_ptr = src2 + am20c0_offset;
115 const LIBINT2_REALTYPE* src3_ptr = src3 + am20c0_offset;
116 const LIBINT2_REALTYPE axyz = (LIBINT2_REALTYPE)a[xyz];
117
118 unsigned int cv = 0;
119 for(unsigned int c = 0; c < Nc; ++c) {
120 for(unsigned int v=0; v<veclen; ++v, ++cv) {
121 LIBINT2_REALTYPE value = WP[v] * src1_ptr[cv] + axyz * inteval->oo2z[v] * (src2_ptr[cv] - inteval->roz[v] * src3_ptr[cv]);
122 if (not unit_b) value += PA[v] * src0_ptr[cv];
123 target[cv] = value;
124 }
125 }
126#if LIBINT2_FLOP_COUNT
127 inteval->nflops[0] += (unit_b ? 6 : 8) * NcV;
128#endif
129
130 }
131 else {
132 unsigned int cv = 0;
133 for(unsigned int c = 0; c < Nc; ++c) {
134 for(unsigned int v=0; v<veclen; ++v, ++cv) {
135 LIBINT2_REALTYPE value = WP[v] * src1_ptr[cv];
136 if (not unit_b) value += PA[v] * src0_ptr[cv];
137 target[cv] = value;
138 }
139 }
140#if LIBINT2_FLOP_COUNT
141 inteval->nflops[0] += (unit_b ? 1 : 3) * NcV;
142#endif
143 }
144
145 {
146 const unsigned int Ncm1 = INT_NCART(Lc-1);
147 const unsigned int Ncm1V = Ncm1 * veclen;
148 const unsigned int am10cm10_offset = iam1 * Ncm1V;
149 const LIBINT2_REALTYPE* src4_ptr = src4 + am10cm10_offset;
150
151 // loop over c-1 shell and include (a-1_xyz 0 | c-1_xyz 0) to (a 0 | c 0)
152 int cx, cy, cz;
153 FOR_CART(cx, cy, cz, Lc-1)
154
155 int c[3]; c[0] = cx; c[1] = cy; c[2] = cz;
156 ++c[xyz];
157
158 const unsigned int cc = INT_CARTINDEX(Lc,c[0],c[1]);
159 const unsigned int cc_offset = cc * veclen;
160 LIBINT2_REALTYPE* tptr = target + cc_offset;
161 const LIBINT2_REALTYPE cxyz = (LIBINT2_REALTYPE)c[xyz];
162 for(unsigned int v=0; v<veclen; ++v) {
163 tptr[v] += cxyz * inteval->oo2ze[v] * src4_ptr[v];
164 }
165#if LIBINT2_FLOP_COUNT
166 inteval->nflops[0] += 3 * veclen;
167#endif
168 src4_ptr += veclen;
169
170 END_FOR_CART
171 }
172
173 target += NcV;
174
175 END_FOR_CART
176
178 //inteval->nflops[0] = inteval->nflops[0] + 222 * 1 * 1 * veclen;
179
180 }
181
182 };
183
184 // Ahlrichs' extension of OS VRR
185 template <int part, int La, int Lc, bool vectorize> struct OSAVRR_xs_xs {
186 static void compute(const Libint_t* inteval,
187 LIBINT2_REALTYPE* target,
188 const LIBINT2_REALTYPE* src1,
189 const LIBINT2_REALTYPE* src4);
190 };
191
196 template <int La, int Lc, bool vectorize> struct OSAVRR_xs_xs<0,La,Lc,vectorize> {
197
198 static void compute(const Libint_t* inteval,
199 LIBINT2_REALTYPE* target,
200 const LIBINT2_REALTYPE* src1,
201 const LIBINT2_REALTYPE* src4) {
202
203 // works for (ps|ps) and higher
204 assert(not (La < 1 || Lc < 1));
205
206 const unsigned int veclen = vectorize ? inteval->veclen : 1;
207
208 const unsigned int Nc = INT_NCART(Lc);
209 const unsigned int NcV = Nc * veclen;
210
211 int ax, ay, az;
212 FOR_CART(ax, ay, az, La)
213
214 int a[3]; a[0] = ax; a[1] = ay; a[2] = az;
215
216 enum XYZ {x=0, y=1, z=2};
217 // Build along x, if possible
218 XYZ xyz = z;
219 if (ay != 0) xyz = y;
220 if (ax != 0) xyz = x;
221 --a[xyz];
222
223 // redirect
224 const LIBINT2_REALTYPE *WP;
225 switch(xyz) {
226 case x:
227 WP = inteval->WP_x;
228 break;
229 case y:
230 WP = inteval->WP_y;
231 break;
232 case z:
233 WP = inteval->WP_z;
234 break;
235 }
236
237 const unsigned int iam1 = INT_CARTINDEX(La-1,a[0],a[1]);
238 const unsigned int am10c0_offset = iam1 * NcV;
239 const LIBINT2_REALTYPE* src1_ptr = src1 + am10c0_offset;
240
241 {
242 unsigned int cv = 0;
243 for(unsigned int c = 0; c < Nc; ++c) {
244 for(unsigned int v=0; v<veclen; ++v, ++cv) {
245 target[cv] = WP[v] * src1_ptr[cv];
246 }
247 }
248#if LIBINT2_FLOP_COUNT
249 inteval->nflops[0] += NcV;
250#endif
251 }
252
253 {
254 const unsigned int Ncm1 = INT_NCART(Lc-1);
255 const unsigned int Ncm1V = Ncm1 * veclen;
256 const unsigned int am10cm10_offset = iam1 * Ncm1V;
257 const LIBINT2_REALTYPE* src4_ptr = src4 + am10cm10_offset;
258
259 // loop over c-1 shell and include (a-1_xyz 0 | c-1_xyz 0) to (a 0 | c 0)
260 int cx, cy, cz;
261 FOR_CART(cx, cy, cz, Lc-1)
262
263 int c[3]; c[0] = cx; c[1] = cy; c[2] = cz;
264 ++c[xyz];
265
266 const unsigned int cc = INT_CARTINDEX(Lc,c[0],c[1]);
267 const unsigned int cc_offset = cc * veclen;
268 LIBINT2_REALTYPE* tptr = target + cc_offset;
269 const LIBINT2_REALTYPE cxyz = (LIBINT2_REALTYPE)c[xyz];
270 for(unsigned int v=0; v<veclen; ++v) {
271 tptr[v] += cxyz * inteval->oo2ze[v] * src4_ptr[v];
272 }
273#if LIBINT2_FLOP_COUNT
274 inteval->nflops[0] += 3 * veclen;
275#endif
276 src4_ptr += veclen;
277
278 END_FOR_CART
279 }
280
281 target += NcV;
282
283 END_FOR_CART
284
286 //inteval->nflops[0] = inteval->nflops[0] + 222 * 1 * 1 * veclen;
287
288 }
289
290 };
291
292};
293
294#endif // header guard
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24
static void compute(const Libint_t *inteval, LIBINT2_REALTYPE *target, const LIBINT2_REALTYPE *src1, const LIBINT2_REALTYPE *src4)
Definition: OSVRR_xs_xs.h:198
Definition: OSVRR_xs_xs.h:185
static void compute(const Libint_t *inteval, LIBINT2_REALTYPE *target, const LIBINT2_REALTYPE *src0, const LIBINT2_REALTYPE *src1, const LIBINT2_REALTYPE *src2, const LIBINT2_REALTYPE *src3, const LIBINT2_REALTYPE *src4)
Definition: OSVRR_xs_xs.h:52
Definition: OSVRR_xs_xs.h:31