LIBINT 2.9.0
VRR_GTG_1d_xx_xx_vec.h
1/*
2 * Copyright (C) 2004-2024 Edward F. Valeev
3 *
4 * This file is part of Libint library.
5 *
6 * Libint library is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with Libint library. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21#ifndef _libint2_src_lib_libint_vrrgtg1dxxxx_h_
22#define _libint2_src_lib_libint_vrrgtg1dxxxx_h_
23// #include "../../../../SIMD_wrapped_vector/template/simd_wrapped_vector.hpp"
24#include <libint2.h>
25#include <util_types.h>
26
27#include <cassert>
28#include <cstdlib>
29
30namespace libint2 {
31
42template <unsigned int CartesianAxis, int La, int Lb, int Lc, int Ld,
43 bool vectorize>
44struct VRR_GTG_1d_xx_xx {
45 static void compute(const Libint_t* inteval, VectorSIMD<double, npts>* target,
46 VectorSIMD<double, npts>* src0) {
47 enum XYZ { x = 0, y = 1, z = 2 };
48 assert(CartesianAxis == x || CartesianAxis == y || CartesianAxis == z);
49 // assert(vectorize == false);
50
51 const unsigned int veclen = vectorize ? inteval->veclen : 1;
52
53 // corner case: (00|00)
54 if (La == 0 && Lb == 0 && Lc == 0 && Ld == 0) {
55 for (unsigned int v = 0; v != veclen; ++v) target[v] = src0[v];
56 return;
57 }
58
59 //---------------------------------------------
60 // Part (1): build (a+b 0|c+d 0)
61 //---------------------------------------------
62
63 VectorSIMD<double, npts> apb_0_GTG_cpd_0[La + Lb + 1][Lc + Ld + 1];
64 apb_0_GTG_cpd_0[0][0] = src0[0];
65
66 const VectorSIMD<double, npts>*pfac0_0, *pfac0_1;
67 const VectorSIMD<double, npts>* pfac1_0 = inteval->R12kG12_pfac1_0;
68 const VectorSIMD<double, npts>* pfac1_1 = inteval->R12kG12_pfac1_1;
69 const VectorSIMD<double, npts>* pfac2 = inteval->R12kG12_pfac2;
70 switch (CartesianAxis) {
71 case x:
72 pfac0_0 = inteval->R12kG12_pfac0_0_x;
73 pfac0_1 = inteval->R12kG12_pfac0_1_x;
74 break;
75 case y:
76 pfac0_0 = inteval->R12kG12_pfac0_0_y;
77 pfac0_1 = inteval->R12kG12_pfac0_1_y;
78 break;
79 case z:
80 pfac0_0 = inteval->R12kG12_pfac0_0_z;
81 pfac0_1 = inteval->R12kG12_pfac0_1_z;
82 break;
83 default:
84 assert(false);
85 }
86
87 // build (0 0|1 0)
88 if (Lc + Ld > 0) {
89 apb_0_GTG_cpd_0[0][1] = pfac0_1[0] * apb_0_GTG_cpd_0[0][0];
90#if LIBiINT2_FLOP_COUNT
91 inteval->nflops[0] += 1;
92#endif
93 }
94
95 // build (0 0|c+d 0)
96 if (Lc + Ld > 1) {
97 for (int c_plus_d = 1; c_plus_d != Lc + Ld; ++c_plus_d) {
98 apb_0_GTG_cpd_0[0][c_plus_d + 1] =
99 pfac0_1[0] * apb_0_GTG_cpd_0[0][c_plus_d] +
100 c_plus_d * pfac1_1[0] * apb_0_GTG_cpd_0[0][c_plus_d - 1];
101 }
102#if LIBINT2_FLOP_COUNT
103 inteval->nflops[0] += 4 * (Lc + Ld - 1);
104#endif
105 }
106
107 // build (1 0|0 0)
108 if (La + Lb > 0) {
109 apb_0_GTG_cpd_0[1][0] = pfac0_0[0] * apb_0_GTG_cpd_0[0][0];
110#if LIBINT2_FLOP_COUNT
111 inteval->nflops[0] += 1;
112#endif
113 }
114
115 // build (a+b 0|0 0)
116 if (La + Lb > 1) {
117 for (int a_plus_b = 1; a_plus_b != La + Lb; ++a_plus_b) {
118 apb_0_GTG_cpd_0[a_plus_b + 1][0] =
119 pfac0_0[0] * apb_0_GTG_cpd_0[a_plus_b][0] +
120 a_plus_b * pfac1_0[0] * apb_0_GTG_cpd_0[a_plus_b - 1][0];
121 }
122#if LIBINT2_FLOP_COUNT
123 inteval->nflops[0] += 4 * (La + Lb - 1);
124#endif
125 }
126
127 // build (1 0|c+d 0)
128 if (La + Lb > 0 && Lc + Ld > 0) {
129 for (int c_plus_d = 1; c_plus_d <= Lc + Ld; ++c_plus_d) {
130 apb_0_GTG_cpd_0[1][c_plus_d] =
131 pfac0_0[0] * apb_0_GTG_cpd_0[0][c_plus_d] +
132 c_plus_d * pfac2[0] * apb_0_GTG_cpd_0[0][c_plus_d - 1];
133 }
134#if LIBINT2_FLOP_COUNT
135 inteval->nflops[0] += 4 * (Lc + Ld - 1);
136#endif
137 }
138
139 // build (a+b 0|c+d 0)
140 if (La + Lb > 1 && Lc + Ld > 0) {
141 for (int a_plus_b = 1; a_plus_b != La + Lb; ++a_plus_b) {
142 for (int c_plus_d = 1; c_plus_d <= Lc + Ld; ++c_plus_d) {
143 apb_0_GTG_cpd_0[a_plus_b + 1][c_plus_d] =
144 pfac0_0[0] * apb_0_GTG_cpd_0[a_plus_b][c_plus_d] +
145 a_plus_b * pfac1_0[0] * apb_0_GTG_cpd_0[a_plus_b - 1][c_plus_d] +
146 c_plus_d * pfac2[0] * apb_0_GTG_cpd_0[a_plus_b][c_plus_d - 1];
147 }
148 }
149#if LIBINT2_FLOP_COUNT
150 inteval->nflops[0] += 7 * (La + Lb - 1) * (Lc + Ld - 1);
151#endif
152 }
153
154 //---------------------------------------------
155 // Part (2): build (a b|c+d 0)
156 //---------------------------------------------
157
158 double AB[1];
159 switch (CartesianAxis) {
160 case x:
161 std::cout << "printing before segfault" << std::endl;
162 AB[0] = inteval->AB_x[0];
163 break;
164 case y:
165 AB[0] = inteval->AB_y[0];
166 break;
167 case z:
168 AB[0] = inteval->AB_z[0];
169 break;
170 default:
171 assert(false);
172 }
173
174 VectorSIMD<double, npts> a_b_GTG_cpd_0[La + 1][Lb + 1][Lc + Ld + 1];
175 for (int c_plus_d = 0; c_plus_d <= Lc + Ld; ++c_plus_d) {
176 // copy (a+b 0| to a local 0,a+b buffer
177 VectorSIMD<double, npts> b_a_GTG[La + Lb + 1][La + Lb + 1];
178 for (int a_plus_b = 0; a_plus_b <= La + Lb; ++a_plus_b) {
179 b_a_GTG[0][a_plus_b] = apb_0_GTG_cpd_0[a_plus_b][c_plus_d];
180 }
181 // use HRR to compute b,a
182 for (int b = 1; b <= Lb; ++b) {
183 for (int a = 0; a <= La + Lb - b; ++a) {
184 b_a_GTG[b][a] = b_a_GTG[b - 1][a + 1] + AB[0] * b_a_GTG[b - 1][a];
185 }
186#if LIBINT2_FLOP_COUNT
187 inteval->nflops[0] += 2 * (La + Lb - b + 1);
188#endif
189 }
190 // copy b,a to (a b|
191 for (int b = 0; b <= Lb; ++b) {
192 for (int a = 0; a <= La; ++a) {
193 a_b_GTG_cpd_0[a][b][c_plus_d] = b_a_GTG[b][a];
194 }
195 }
196 }
197
198 //---------------------------------------------
199 // Part (3): build (a b|c d)
200 //---------------------------------------------
201
202 double CD[1];
203 switch (CartesianAxis) {
204 case x:
205 CD[0] = inteval->CD_x[0];
206 break;
207 case y:
208 CD[0] = inteval->CD_y[0];
209 break;
210 case z:
211 CD[0] = inteval->CD_z[0];
212 break;
213 default:
214 assert(false);
215 }
216
217 VectorSIMD<double, npts>* target_a_b_blk_ptr = target;
218 const int Nd = (Ld + 1);
219 const int Ncd = (Lc + 1) * Nd;
220 for (int a = 0; a <= La; ++a) {
221 for (int b = 0; b <= Lb; ++b, target_a_b_blk_ptr += Ncd) {
222 // copy |c+d 0) to a local 0,c+d buffer
223 VectorSIMD<double, npts> d_c_GTG[Lc + Ld + 1][Lc + Ld + 1];
224 for (int c_plus_d = 0; c_plus_d <= Lc + Ld; ++c_plus_d) {
225 d_c_GTG[0][c_plus_d] = a_b_GTG_cpd_0[a][b][c_plus_d];
226 }
227 // use HRR to compute d,c
228 for (int d = 1; d <= Ld; ++d) {
229 for (int c = 0; c <= Lc + Ld - d; ++c) {
230 d_c_GTG[d][c] = d_c_GTG[d - 1][c + 1] + CD[0] * d_c_GTG[d - 1][c];
231 }
232#if LIBINT2_FLOP_COUNT
233 inteval->nflops[0] += 2 * (Lc + Ld - d + 1);
234#endif
235 }
236 // copy d,c to |c d)
237 for (int d = 0; d <= Ld; ++d) {
238 for (int c = 0, cd = d; c <= Lc; ++c, cd += Nd) {
239 target_a_b_blk_ptr[cd] = d_c_GTG[d][c];
240 }
241 }
242 }
243 }
244
245 // done
246 }
247};
248
249}; // namespace libint2
250
251#endif // header guard
Defaults definitions for various parameters assumed by Libint.
Definition algebra.cc:24