Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
Eigen  3.4.0
ConfigureVectorization.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2020, Arm Limited and Contributors
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12 #define EIGEN_CONFIGURE_VECTORIZATION_H
13 
14 //------------------------------------------------------------------------------------------
15 // Static and dynamic alignment control
16 //
17 // The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
18 // as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
19 // The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
20 // a default value is automatically computed based on architecture, compiler, and OS.
21 //
22 // This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
23 // to be used to declare statically aligned buffers.
24 //------------------------------------------------------------------------------------------
25 
26 
27 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
28  * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
29  * so that vectorization doesn't affect binary compatibility.
30  *
31  * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
32  * vectorized and non-vectorized code.
33  *
34  * FIXME: this code can be cleaned up once we switch to proper C++11 only.
35  */
36 #if (defined EIGEN_CUDACC)
37  #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
38  #define EIGEN_ALIGNOF(x) __alignof(x)
39 #elif EIGEN_HAS_ALIGNAS
40  #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
41  #define EIGEN_ALIGNOF(x) alignof(x)
42 #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
43  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
44  #define EIGEN_ALIGNOF(x) __alignof(x)
45 #elif EIGEN_COMP_MSVC
46  #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
47  #define EIGEN_ALIGNOF(x) __alignof(x)
48 #elif EIGEN_COMP_SUNCC
49  // FIXME not sure about this one:
50  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
51  #define EIGEN_ALIGNOF(x) __alignof(x)
52 #else
53  #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
54 #endif
55 
56 // If the user explicitly disable vectorization, then we also disable alignment
57 #if defined(EIGEN_DONT_VECTORIZE)
58  #if defined(EIGEN_GPUCC)
59  // GPU code is always vectorized and requires memory alignment for
60  // statically allocated buffers.
61  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
62  #else
63  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
64  #endif
65 #elif defined(__AVX512F__)
66  // 64 bytes static alignment is preferred only if really required
67  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
68 #elif defined(__AVX__)
69  // 32 bytes static alignment is preferred only if really required
70  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
71 #else
72  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
73 #endif
74 
75 
76 // EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
77 #define EIGEN_MIN_ALIGN_BYTES 16
78 
79 // Defined the boundary (in bytes) on which the data needs to be aligned. Note
80 // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
81 // aligned at all regardless of the value of this #define.
82 
83 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
84 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
85 #endif
86 
87 // EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
88 // They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
89 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
90  #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
91  #undef EIGEN_MAX_STATIC_ALIGN_BYTES
92  #endif
93  #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
94 #endif
95 
96 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
97 
98  // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
99 
100  // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
101  // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
102  // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
103  // certain common platform (compiler+architecture combinations) to avoid these problems.
104  // Only static alignment is really problematic (relies on nonstandard compiler extensions),
105  // try to keep heap alignment even when we have to disable static alignment.
106  #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
107  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
108  #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
109  // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
110  // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
111  // 4.8 and newer seem definitely unaffected.
112  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
113  #else
114  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
115  #endif
116 
117  // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
118  #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
119  && !EIGEN_GCC3_OR_OLDER \
120  && !EIGEN_COMP_SUNCC \
121  && !EIGEN_OS_QNX
122  #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
123  #else
124  #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
125  #endif
126 
127  #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
128  #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
129  #else
130  #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
131  #endif
132 
133 #endif
134 
135 // If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
136 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
137 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
138 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
139 #endif
140 
141 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
142  #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
143 #endif
144 
145 // At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
146 // It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES)
147 // and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
148 // Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
149 
150 
151 // Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
152 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
153 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
154 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
155 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
156 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
157 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
158 #else
159 #define EIGEN_ALIGN_MAX
160 #endif
161 
162 
163 // Dynamic alignment control
164 
165 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
166 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
167 #endif
168 
169 #ifdef EIGEN_DONT_ALIGN
170  #ifdef EIGEN_MAX_ALIGN_BYTES
171  #undef EIGEN_MAX_ALIGN_BYTES
172  #endif
173  #define EIGEN_MAX_ALIGN_BYTES 0
174 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
175  #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
176 #endif
177 
178 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
179 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
180 #else
181 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
182 #endif
183 
184 
185 #ifndef EIGEN_UNALIGNED_VECTORIZE
186 #define EIGEN_UNALIGNED_VECTORIZE 1
187 #endif
188 
189 //----------------------------------------------------------------------
190 
191 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
192 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
193 #if EIGEN_MAX_ALIGN_BYTES==0
194  #ifndef EIGEN_DONT_VECTORIZE
195  #define EIGEN_DONT_VECTORIZE
196  #endif
197 #endif
198 
199 
200 // The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
201 // removed as gcc 4.1 and msvc 2008 are not supported anyways.
202 #if EIGEN_COMP_MSVC
203  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
204  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
205  // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
206  #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
207  #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
208  #endif
209  #endif
210 #else
211  #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
212  #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
213  #endif
214 #endif
215 
216 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
217 
218  #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
219 
220  // Defines symbols for compile-time detection of which instructions are
221  // used.
222  // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
223  #define EIGEN_VECTORIZE
224  #define EIGEN_VECTORIZE_SSE
225  #define EIGEN_VECTORIZE_SSE2
226 
227  // Detect sse3/ssse3/sse4:
228  // gcc and icc defines __SSE3__, ...
229  // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
230  // want to force the use of those instructions with msvc.
231  #ifdef __SSE3__
232  #define EIGEN_VECTORIZE_SSE3
233  #endif
234  #ifdef __SSSE3__
235  #define EIGEN_VECTORIZE_SSSE3
236  #endif
237  #ifdef __SSE4_1__
238  #define EIGEN_VECTORIZE_SSE4_1
239  #endif
240  // e2k has SSE up to 4.1, effectively (SSE4.2/AVX1 might be faster or not)
241  #ifndef __e2k__
242  #ifdef __SSE4_2__
243  #define EIGEN_VECTORIZE_SSE4_2
244  #endif
245  #ifdef __AVX__
246  #ifndef EIGEN_USE_SYCL
247  #define EIGEN_VECTORIZE_AVX
248  #endif
249  #define EIGEN_VECTORIZE_SSE3
250  #define EIGEN_VECTORIZE_SSSE3
251  #define EIGEN_VECTORIZE_SSE4_1
252  #define EIGEN_VECTORIZE_SSE4_2
253  #endif
254  #ifdef __AVX2__
255  #ifndef EIGEN_USE_SYCL
256  #define EIGEN_VECTORIZE_AVX2
257  #define EIGEN_VECTORIZE_AVX
258  #endif
259  #define EIGEN_VECTORIZE_SSE3
260  #define EIGEN_VECTORIZE_SSSE3
261  #define EIGEN_VECTORIZE_SSE4_1
262  #define EIGEN_VECTORIZE_SSE4_2
263  #endif
264  #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
265  // MSVC does not expose a switch dedicated for FMA
266  // For MSVC, AVX2 => FMA
267  #define EIGEN_VECTORIZE_FMA
268  #endif
269  #if defined(__AVX512F__)
270  #ifndef EIGEN_VECTORIZE_FMA
271  #if EIGEN_COMP_GNUC
272  #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
273  #else
274  #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
275  #endif
276  #endif
277  #ifndef EIGEN_USE_SYCL
278  #define EIGEN_VECTORIZE_AVX512
279  #define EIGEN_VECTORIZE_AVX2
280  #define EIGEN_VECTORIZE_AVX
281  #endif
282  #define EIGEN_VECTORIZE_FMA
283  #define EIGEN_VECTORIZE_SSE3
284  #define EIGEN_VECTORIZE_SSSE3
285  #define EIGEN_VECTORIZE_SSE4_1
286  #define EIGEN_VECTORIZE_SSE4_2
287  #ifndef EIGEN_USE_SYCL
288  #ifdef __AVX512DQ__
289  #define EIGEN_VECTORIZE_AVX512DQ
290  #endif
291  #ifdef __AVX512ER__
292  #define EIGEN_VECTORIZE_AVX512ER
293  #endif
294  #ifdef __AVX512BF16__
295  #define EIGEN_VECTORIZE_AVX512BF16
296  #endif
297  #endif
298  #endif
299  #endif // __e2k__
300 
301  // Disable AVX support on broken xcode versions
302  #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
303  // A nasty bug in the clang compiler shipped with xcode in a common compilation situation
304  // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
305  #ifdef EIGEN_VECTORIZE_AVX
306  #undef EIGEN_VECTORIZE_AVX
307  #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
308  #ifdef EIGEN_VECTORIZE_AVX2
309  #undef EIGEN_VECTORIZE_AVX2
310  #endif
311  #ifdef EIGEN_VECTORIZE_FMA
312  #undef EIGEN_VECTORIZE_FMA
313  #endif
314  #ifdef EIGEN_VECTORIZE_AVX512
315  #undef EIGEN_VECTORIZE_AVX512
316  #endif
317  #ifdef EIGEN_VECTORIZE_AVX512DQ
318  #undef EIGEN_VECTORIZE_AVX512DQ
319  #endif
320  #ifdef EIGEN_VECTORIZE_AVX512ER
321  #undef EIGEN_VECTORIZE_AVX512ER
322  #endif
323  #endif
324  // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX
325  // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests
326  // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases
327  // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping tests
328  // with -macosx-version-min=10.15 and AVX
329  // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with
330  // -macosx-version-min=10.15 and AVX
331  #endif
332 
333  // include files
334 
335  // This extern "C" works around a MINGW-w64 compilation issue
336  // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
337  // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
338  // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
339  // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
340  // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
341  // notice that since these are C headers, the extern "C" is theoretically needed anyways.
342  extern "C" {
343  // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
344  // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
345  #if EIGEN_COMP_ICC >= 1110
346  #include <immintrin.h>
347  #else
348  #include <mmintrin.h>
349  #include <emmintrin.h>
350  #include <xmmintrin.h>
351  #ifdef EIGEN_VECTORIZE_SSE3
352  #include <pmmintrin.h>
353  #endif
354  #ifdef EIGEN_VECTORIZE_SSSE3
355  #include <tmmintrin.h>
356  #endif
357  #ifdef EIGEN_VECTORIZE_SSE4_1
358  #include <smmintrin.h>
359  #endif
360  #ifdef EIGEN_VECTORIZE_SSE4_2
361  #include <nmmintrin.h>
362  #endif
363  #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
364  #include <immintrin.h>
365  #endif
366  #endif
367  } // end extern "C"
368 
369  #elif defined __VSX__
370 
371  #define EIGEN_VECTORIZE
372  #define EIGEN_VECTORIZE_VSX
373  #include <altivec.h>
374  // We need to #undef all these ugly tokens defined in <altivec.h>
375  // => use __vector instead of vector
376  #undef bool
377  #undef vector
378  #undef pixel
379 
380  #elif defined __ALTIVEC__
381 
382  #define EIGEN_VECTORIZE
383  #define EIGEN_VECTORIZE_ALTIVEC
384  #include <altivec.h>
385  // We need to #undef all these ugly tokens defined in <altivec.h>
386  // => use __vector instead of vector
387  #undef bool
388  #undef vector
389  #undef pixel
390 
391  #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
392 
393  #define EIGEN_VECTORIZE
394  #define EIGEN_VECTORIZE_NEON
395  #include <arm_neon.h>
396 
397  // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
398  // will not select the backend automatically
399  #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
400 
401  #define EIGEN_VECTORIZE
402  #define EIGEN_VECTORIZE_SVE
403  #include <arm_sve.h>
404 
405  // Since we depend on knowing SVE vector lengths at compile-time, we need
406  // to ensure a fixed lengths is set
407  #if defined __ARM_FEATURE_SVE_BITS
408  #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
409  #else
410 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
411 #endif
412 
413 #elif (defined __s390x__ && defined __VEC__)
414 
415 #define EIGEN_VECTORIZE
416 #define EIGEN_VECTORIZE_ZVECTOR
417 #include <vecintrin.h>
418 
419 #elif defined __mips_msa
420 
421 // Limit MSA optimizations to little-endian CPUs for now.
422 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
423 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
424 #if defined(__LP64__)
425 #define EIGEN_MIPS_64
426 #else
427 #define EIGEN_MIPS_32
428 #endif
429 #define EIGEN_VECTORIZE
430 #define EIGEN_VECTORIZE_MSA
431 #include <msa.h>
432 #endif
433 
434 #endif
435 #endif
436 
437 // Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
438 // compilers seem to follow this. We therefore include it explicitly.
439 // See also: https://bugs.llvm.org/show_bug.cgi?id=47955
440 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
441  #include <arm_fp16.h>
442 #endif
443 
444 #if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
445  // We can use the optimized fp16 to float and float to fp16 conversion routines
446  #define EIGEN_HAS_FP16_C
447 
448  #if defined(EIGEN_COMP_CLANG)
449  // Workaround for clang: The FP16C intrinsics for clang are included by
450  // immintrin.h, as opposed to emmintrin.h as suggested by Intel:
451  // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
452  #include <immintrin.h>
453  #endif
454 #endif
455 
456 #if defined EIGEN_CUDACC
457  #define EIGEN_VECTORIZE_GPU
458  #include <vector_types.h>
459  #if EIGEN_CUDA_SDK_VER >= 70500
460  #define EIGEN_HAS_CUDA_FP16
461  #endif
462 #endif
463 
464 #if defined(EIGEN_HAS_CUDA_FP16)
465  #include <cuda_runtime_api.h>
466  #include <cuda_fp16.h>
467 #endif
468 
469 #if defined(EIGEN_HIPCC)
470  #define EIGEN_VECTORIZE_GPU
471  #include <hip/hip_vector_types.h>
472  #define EIGEN_HAS_HIP_FP16
473  #include <hip/hip_fp16.h>
474 #endif
475 
476 
478 namespace Eigen {
479 
480 inline static const char *SimdInstructionSetsInUse(void) {
481 #if defined(EIGEN_VECTORIZE_AVX512)
482  return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
483 #elif defined(EIGEN_VECTORIZE_AVX)
484  return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
485 #elif defined(EIGEN_VECTORIZE_SSE4_2)
486  return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
487 #elif defined(EIGEN_VECTORIZE_SSE4_1)
488  return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
489 #elif defined(EIGEN_VECTORIZE_SSSE3)
490  return "SSE, SSE2, SSE3, SSSE3";
491 #elif defined(EIGEN_VECTORIZE_SSE3)
492  return "SSE, SSE2, SSE3";
493 #elif defined(EIGEN_VECTORIZE_SSE2)
494  return "SSE, SSE2";
495 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
496  return "AltiVec";
497 #elif defined(EIGEN_VECTORIZE_VSX)
498  return "VSX";
499 #elif defined(EIGEN_VECTORIZE_NEON)
500  return "ARM NEON";
501 #elif defined(EIGEN_VECTORIZE_SVE)
502  return "ARM SVE";
503 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
504  return "S390X ZVECTOR";
505 #elif defined(EIGEN_VECTORIZE_MSA)
506  return "MIPS MSA";
507 #else
508  return "None";
509 #endif
510 }
511 
512 } // end namespace Eigen
513 
514 
515 #endif // EIGEN_CONFIGURE_VECTORIZATION_H
Namespace containing all symbols from the Eigen library.
Definition: Core:141