11#ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12#define EIGEN_CONFIGURE_VECTORIZATION_H
36#if (defined EIGEN_CUDACC)
37 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
38 #define EIGEN_ALIGNOF(x) __alignof(x)
39#elif EIGEN_HAS_ALIGNAS
40 #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
41 #define EIGEN_ALIGNOF(x) alignof(x)
42#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
43 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
44 #define EIGEN_ALIGNOF(x) __alignof(x)
46 #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
47 #define EIGEN_ALIGNOF(x) __alignof(x)
50 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
51 #define EIGEN_ALIGNOF(x) __alignof(x)
53 #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
57#if defined(EIGEN_DONT_VECTORIZE)
58 #if defined(EIGEN_GPUCC)
61 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
63 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
65#elif defined(__AVX512F__)
67 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
70 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
72 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
77#define EIGEN_MIN_ALIGN_BYTES 16
83#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
84#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
89#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
90 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
91 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
93 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
96#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
106 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
107 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
108 #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
112 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
114 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
118 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
119 && !EIGEN_GCC3_OR_OLDER \
120 && !EIGEN_COMP_SUNCC \
122 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
124 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
127 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
128 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
130 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
136#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
137#undef EIGEN_MAX_STATIC_ALIGN_BYTES
138#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
141#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
142 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
152#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
153#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
154#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
155#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
156#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
157#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
159#define EIGEN_ALIGN_MAX
165#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
166#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
169#ifdef EIGEN_DONT_ALIGN
170 #ifdef EIGEN_MAX_ALIGN_BYTES
171 #undef EIGEN_MAX_ALIGN_BYTES
173 #define EIGEN_MAX_ALIGN_BYTES 0
174#elif !defined(EIGEN_MAX_ALIGN_BYTES)
175 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
178#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
179#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
181#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
185#ifndef EIGEN_UNALIGNED_VECTORIZE
186#define EIGEN_UNALIGNED_VECTORIZE 1
193#if EIGEN_MAX_ALIGN_BYTES==0
194 #ifndef EIGEN_DONT_VECTORIZE
195 #define EIGEN_DONT_VECTORIZE
204 #if (EIGEN_COMP_MSVC >= 1500)
206 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
207 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
211 #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
212 #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
216#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
218 #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
223 #define EIGEN_VECTORIZE
224 #define EIGEN_VECTORIZE_SSE
225 #define EIGEN_VECTORIZE_SSE2
232 #define EIGEN_VECTORIZE_SSE3
235 #define EIGEN_VECTORIZE_SSSE3
238 #define EIGEN_VECTORIZE_SSE4_1
243 #define EIGEN_VECTORIZE_SSE4_2
246 #ifndef EIGEN_USE_SYCL
247 #define EIGEN_VECTORIZE_AVX
249 #define EIGEN_VECTORIZE_SSE3
250 #define EIGEN_VECTORIZE_SSSE3
251 #define EIGEN_VECTORIZE_SSE4_1
252 #define EIGEN_VECTORIZE_SSE4_2
255 #ifndef EIGEN_USE_SYCL
256 #define EIGEN_VECTORIZE_AVX2
257 #define EIGEN_VECTORIZE_AVX
259 #define EIGEN_VECTORIZE_SSE3
260 #define EIGEN_VECTORIZE_SSSE3
261 #define EIGEN_VECTORIZE_SSE4_1
262 #define EIGEN_VECTORIZE_SSE4_2
264 #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
267 #define EIGEN_VECTORIZE_FMA
269 #if defined(__AVX512F__)
270 #ifndef EIGEN_VECTORIZE_FMA
272 #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
274 #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
277 #ifndef EIGEN_USE_SYCL
278 #define EIGEN_VECTORIZE_AVX512
279 #define EIGEN_VECTORIZE_AVX2
280 #define EIGEN_VECTORIZE_AVX
282 #define EIGEN_VECTORIZE_FMA
283 #define EIGEN_VECTORIZE_SSE3
284 #define EIGEN_VECTORIZE_SSSE3
285 #define EIGEN_VECTORIZE_SSE4_1
286 #define EIGEN_VECTORIZE_SSE4_2
287 #ifndef EIGEN_USE_SYCL
289 #define EIGEN_VECTORIZE_AVX512DQ
292 #define EIGEN_VECTORIZE_AVX512ER
294 #ifdef __AVX512BF16__
295 #define EIGEN_VECTORIZE_AVX512BF16
302 #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
305 #ifdef EIGEN_VECTORIZE_AVX
306 #undef EIGEN_VECTORIZE_AVX
307 #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
308 #ifdef EIGEN_VECTORIZE_AVX2
309 #undef EIGEN_VECTORIZE_AVX2
311 #ifdef EIGEN_VECTORIZE_FMA
312 #undef EIGEN_VECTORIZE_FMA
314 #ifdef EIGEN_VECTORIZE_AVX512
315 #undef EIGEN_VECTORIZE_AVX512
317 #ifdef EIGEN_VECTORIZE_AVX512DQ
318 #undef EIGEN_VECTORIZE_AVX512DQ
320 #ifdef EIGEN_VECTORIZE_AVX512ER
321 #undef EIGEN_VECTORIZE_AVX512ER
345 #if EIGEN_COMP_ICC >= 1110
346 #include <immintrin.h>
348 #include <mmintrin.h>
349 #include <emmintrin.h>
350 #include <xmmintrin.h>
351 #ifdef EIGEN_VECTORIZE_SSE3
352 #include <pmmintrin.h>
354 #ifdef EIGEN_VECTORIZE_SSSE3
355 #include <tmmintrin.h>
357 #ifdef EIGEN_VECTORIZE_SSE4_1
358 #include <smmintrin.h>
360 #ifdef EIGEN_VECTORIZE_SSE4_2
361 #include <nmmintrin.h>
363 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
364 #include <immintrin.h>
369 #elif defined __VSX__
371 #define EIGEN_VECTORIZE
372 #define EIGEN_VECTORIZE_VSX
380 #elif defined __ALTIVEC__
382 #define EIGEN_VECTORIZE
383 #define EIGEN_VECTORIZE_ALTIVEC
391 #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
393 #define EIGEN_VECTORIZE
394 #define EIGEN_VECTORIZE_NEON
395 #include <arm_neon.h>
399 #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
401 #define EIGEN_VECTORIZE
402 #define EIGEN_VECTORIZE_SVE
407 #if defined __ARM_FEATURE_SVE_BITS
408 #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
410#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
413#elif (defined __s390x__ && defined __VEC__)
415#define EIGEN_VECTORIZE
416#define EIGEN_VECTORIZE_ZVECTOR
417#include <vecintrin.h>
419#elif defined __mips_msa
423#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
429#define EIGEN_VECTORIZE
430#define EIGEN_VECTORIZE_MSA
440#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
441 #include <arm_fp16.h>
444#if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
446 #define EIGEN_HAS_FP16_C
448 #if defined(EIGEN_COMP_CLANG)
452 #include <immintrin.h>
456#if defined EIGEN_CUDACC
457 #define EIGEN_VECTORIZE_GPU
458 #include <vector_types.h>
459 #if EIGEN_CUDA_SDK_VER >= 70500
460 #define EIGEN_HAS_CUDA_FP16
464#if defined(EIGEN_HAS_CUDA_FP16)
465 #include <cuda_runtime_api.h>
466 #include <cuda_fp16.h>
469#if defined(EIGEN_HIPCC)
470 #define EIGEN_VECTORIZE_GPU
471 #include <hip/hip_vector_types.h>
472 #define EIGEN_HAS_HIP_FP16
473 #include <hip/hip_fp16.h>
480inline static const char *SimdInstructionSetsInUse(
void) {
481#if defined(EIGEN_VECTORIZE_AVX512)
482 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
483#elif defined(EIGEN_VECTORIZE_AVX)
484 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
485#elif defined(EIGEN_VECTORIZE_SSE4_2)
486 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
487#elif defined(EIGEN_VECTORIZE_SSE4_1)
488 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
489#elif defined(EIGEN_VECTORIZE_SSSE3)
490 return "SSE, SSE2, SSE3, SSSE3";
491#elif defined(EIGEN_VECTORIZE_SSE3)
492 return "SSE, SSE2, SSE3";
493#elif defined(EIGEN_VECTORIZE_SSE2)
495#elif defined(EIGEN_VECTORIZE_ALTIVEC)
497#elif defined(EIGEN_VECTORIZE_VSX)
499#elif defined(EIGEN_VECTORIZE_NEON)
501#elif defined(EIGEN_VECTORIZE_SVE)
503#elif defined(EIGEN_VECTORIZE_ZVECTOR)
504 return "S390X ZVECTOR";
505#elif defined(EIGEN_VECTORIZE_MSA)
Namespace containing all symbols from the Eigen library.
Definition Core:141