21#ifndef _libint2_src_lib_libint_vrrgtg1dxxxx_h_
22#define _libint2_src_lib_libint_vrrgtg1dxxxx_h_
25#include <util_types.h>
42template <
unsigned int CartesianAxis,
int La,
int Lb,
int Lc,
int Ld,
44struct VRR_GTG_1d_xx_xx {
45 static void compute(
const Libint_t* inteval, VectorSIMD<double, npts>* target,
46 VectorSIMD<double, npts>* src0) {
47 enum XYZ { x = 0, y = 1, z = 2 };
48 assert(CartesianAxis == x || CartesianAxis == y || CartesianAxis == z);
51 const unsigned int veclen = vectorize ? inteval->veclen : 1;
54 if (La == 0 && Lb == 0 && Lc == 0 && Ld == 0) {
55 for (
unsigned int v = 0; v != veclen; ++v) target[v] = src0[v];
63 VectorSIMD<double, npts> apb_0_GTG_cpd_0[La + Lb + 1][Lc + Ld + 1];
64 apb_0_GTG_cpd_0[0][0] = src0[0];
66 const VectorSIMD<double, npts>*pfac0_0, *pfac0_1;
67 const VectorSIMD<double, npts>* pfac1_0 = inteval->R12kG12_pfac1_0;
68 const VectorSIMD<double, npts>* pfac1_1 = inteval->R12kG12_pfac1_1;
69 const VectorSIMD<double, npts>* pfac2 = inteval->R12kG12_pfac2;
70 switch (CartesianAxis) {
72 pfac0_0 = inteval->R12kG12_pfac0_0_x;
73 pfac0_1 = inteval->R12kG12_pfac0_1_x;
76 pfac0_0 = inteval->R12kG12_pfac0_0_y;
77 pfac0_1 = inteval->R12kG12_pfac0_1_y;
80 pfac0_0 = inteval->R12kG12_pfac0_0_z;
81 pfac0_1 = inteval->R12kG12_pfac0_1_z;
89 apb_0_GTG_cpd_0[0][1] = pfac0_1[0] * apb_0_GTG_cpd_0[0][0];
90#if LIBiINT2_FLOP_COUNT
91 inteval->nflops[0] += 1;
97 for (
int c_plus_d = 1; c_plus_d != Lc + Ld; ++c_plus_d) {
98 apb_0_GTG_cpd_0[0][c_plus_d + 1] =
99 pfac0_1[0] * apb_0_GTG_cpd_0[0][c_plus_d] +
100 c_plus_d * pfac1_1[0] * apb_0_GTG_cpd_0[0][c_plus_d - 1];
102#if LIBINT2_FLOP_COUNT
103 inteval->nflops[0] += 4 * (Lc + Ld - 1);
109 apb_0_GTG_cpd_0[1][0] = pfac0_0[0] * apb_0_GTG_cpd_0[0][0];
110#if LIBINT2_FLOP_COUNT
111 inteval->nflops[0] += 1;
117 for (
int a_plus_b = 1; a_plus_b != La + Lb; ++a_plus_b) {
118 apb_0_GTG_cpd_0[a_plus_b + 1][0] =
119 pfac0_0[0] * apb_0_GTG_cpd_0[a_plus_b][0] +
120 a_plus_b * pfac1_0[0] * apb_0_GTG_cpd_0[a_plus_b - 1][0];
122#if LIBINT2_FLOP_COUNT
123 inteval->nflops[0] += 4 * (La + Lb - 1);
128 if (La + Lb > 0 && Lc + Ld > 0) {
129 for (
int c_plus_d = 1; c_plus_d <= Lc + Ld; ++c_plus_d) {
130 apb_0_GTG_cpd_0[1][c_plus_d] =
131 pfac0_0[0] * apb_0_GTG_cpd_0[0][c_plus_d] +
132 c_plus_d * pfac2[0] * apb_0_GTG_cpd_0[0][c_plus_d - 1];
134#if LIBINT2_FLOP_COUNT
135 inteval->nflops[0] += 4 * (Lc + Ld - 1);
140 if (La + Lb > 1 && Lc + Ld > 0) {
141 for (
int a_plus_b = 1; a_plus_b != La + Lb; ++a_plus_b) {
142 for (
int c_plus_d = 1; c_plus_d <= Lc + Ld; ++c_plus_d) {
143 apb_0_GTG_cpd_0[a_plus_b + 1][c_plus_d] =
144 pfac0_0[0] * apb_0_GTG_cpd_0[a_plus_b][c_plus_d] +
145 a_plus_b * pfac1_0[0] * apb_0_GTG_cpd_0[a_plus_b - 1][c_plus_d] +
146 c_plus_d * pfac2[0] * apb_0_GTG_cpd_0[a_plus_b][c_plus_d - 1];
149#if LIBINT2_FLOP_COUNT
150 inteval->nflops[0] += 7 * (La + Lb - 1) * (Lc + Ld - 1);
159 switch (CartesianAxis) {
161 std::cout <<
"printing before segfault" << std::endl;
162 AB[0] = inteval->AB_x[0];
165 AB[0] = inteval->AB_y[0];
168 AB[0] = inteval->AB_z[0];
174 VectorSIMD<double, npts> a_b_GTG_cpd_0[La + 1][Lb + 1][Lc + Ld + 1];
175 for (
int c_plus_d = 0; c_plus_d <= Lc + Ld; ++c_plus_d) {
177 VectorSIMD<double, npts> b_a_GTG[La + Lb + 1][La + Lb + 1];
178 for (
int a_plus_b = 0; a_plus_b <= La + Lb; ++a_plus_b) {
179 b_a_GTG[0][a_plus_b] = apb_0_GTG_cpd_0[a_plus_b][c_plus_d];
182 for (
int b = 1; b <= Lb; ++b) {
183 for (
int a = 0; a <= La + Lb - b; ++a) {
184 b_a_GTG[b][a] = b_a_GTG[b - 1][a + 1] + AB[0] * b_a_GTG[b - 1][a];
186#if LIBINT2_FLOP_COUNT
187 inteval->nflops[0] += 2 * (La + Lb - b + 1);
191 for (
int b = 0; b <= Lb; ++b) {
192 for (
int a = 0; a <= La; ++a) {
193 a_b_GTG_cpd_0[a][b][c_plus_d] = b_a_GTG[b][a];
203 switch (CartesianAxis) {
205 CD[0] = inteval->CD_x[0];
208 CD[0] = inteval->CD_y[0];
211 CD[0] = inteval->CD_z[0];
217 VectorSIMD<double, npts>* target_a_b_blk_ptr = target;
218 const int Nd = (Ld + 1);
219 const int Ncd = (Lc + 1) * Nd;
220 for (
int a = 0; a <= La; ++a) {
221 for (
int b = 0; b <= Lb; ++b, target_a_b_blk_ptr += Ncd) {
223 VectorSIMD<double, npts> d_c_GTG[Lc + Ld + 1][Lc + Ld + 1];
224 for (
int c_plus_d = 0; c_plus_d <= Lc + Ld; ++c_plus_d) {
225 d_c_GTG[0][c_plus_d] = a_b_GTG_cpd_0[a][b][c_plus_d];
228 for (
int d = 1; d <= Ld; ++d) {
229 for (
int c = 0; c <= Lc + Ld - d; ++c) {
230 d_c_GTG[d][c] = d_c_GTG[d - 1][c + 1] + CD[0] * d_c_GTG[d - 1][c];
232#if LIBINT2_FLOP_COUNT
233 inteval->nflops[0] += 2 * (Lc + Ld - d + 1);
237 for (
int d = 0; d <= Ld; ++d) {
238 for (
int c = 0, cd = d; c <= Lc; ++c, cd += Nd) {
239 target_a_b_blk_ptr[cd] = d_c_GTG[d][c];
Defaults definitions for various parameters assumed by Libint.
Definition algebra.cc:24