clang API Documentation

avx512fintrin.h
Go to the documentation of this file.
00001 /*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
00002  *
00003  * Permission is hereby granted, free of charge, to any person obtaining a copy
00004  * of this software and associated documentation files (the "Software"), to deal
00005  * in the Software without restriction, including without limitation the rights
00006  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00007  * copies of the Software, and to permit persons to whom the Software is
00008  * furnished to do so, subject to the following conditions:
00009  *
00010  * The above copyright notice and this permission notice shall be included in
00011  * all copies or substantial portions of the Software.
00012  *
00013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00015  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00016  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00017  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00018  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00019  * THE SOFTWARE.
00020  *
00021  *===-----------------------------------------------------------------------===
00022  */
00023 #ifndef __IMMINTRIN_H
00024 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
00025 #endif
00026 
00027 #ifndef __AVX512FINTRIN_H
00028 #define __AVX512FINTRIN_H
00029 
00030 typedef double __v8df __attribute__((__vector_size__(64)));
00031 typedef float __v16sf __attribute__((__vector_size__(64)));
00032 typedef long long __v8di __attribute__((__vector_size__(64)));
00033 typedef int __v16si __attribute__((__vector_size__(64)));
00034 
00035 typedef float __m512 __attribute__((__vector_size__(64)));
00036 typedef double __m512d __attribute__((__vector_size__(64)));
00037 typedef long long __m512i __attribute__((__vector_size__(64)));
00038 
00039 typedef unsigned char __mmask8;
00040 typedef unsigned short __mmask16;
00041 
00042 /* Rounding mode macros.  */
00043 #define _MM_FROUND_TO_NEAREST_INT   0x00
00044 #define _MM_FROUND_TO_NEG_INF       0x01
00045 #define _MM_FROUND_TO_POS_INF       0x02
00046 #define _MM_FROUND_TO_ZERO          0x03
00047 #define _MM_FROUND_CUR_DIRECTION    0x04
00048 
00049 /* Create vectors with repeated elements */
00050 
00051 static  __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00052 _mm512_setzero_si512(void)
00053 {
00054   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
00055 }
00056 
00057 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00058 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
00059 {
00060   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
00061                  (__v16si)
00062                  _mm512_setzero_si512 (),
00063                  __M);
00064 }
00065 
00066 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00067 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
00068 {
00069 #ifdef __x86_64__
00070   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
00071                  (__v8di)
00072                  _mm512_setzero_si512 (),
00073                  __M);
00074 #else
00075   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
00076                  (__v8di)
00077                  _mm512_setzero_si512 (),
00078                  __M);
00079 #endif
00080 }
00081 
00082 static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
00083 _mm512_setzero_ps(void)
00084 {
00085   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
00086                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
00087 }
00088 static  __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
00089 _mm512_setzero_pd(void)
00090 {
00091   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
00092 }
00093 
00094 static __inline __m512 __attribute__((__always_inline__, __nodebug__))
00095 _mm512_set1_ps(float __w)
00096 {
00097   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
00098                    __w, __w, __w, __w, __w, __w, __w, __w  };
00099 }
00100 
00101 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
00102 _mm512_set1_pd(double __w)
00103 {
00104   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
00105 }
00106 
00107 static __inline __m512i __attribute__((__always_inline__, __nodebug__))
00108 _mm512_set1_epi32(int __s)
00109 {
00110   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
00111                              __s, __s, __s, __s, __s, __s, __s, __s };
00112 }
00113 
00114 static __inline __m512i __attribute__((__always_inline__, __nodebug__))
00115 _mm512_set1_epi64(long long __d)
00116 {
00117   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
00118 }
00119 
00120 static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00121 _mm512_broadcastss_ps(__m128 __X)
00122 {
00123   float __f = __X[0];
00124   return (__v16sf){ __f, __f, __f, __f,
00125                     __f, __f, __f, __f,
00126                     __f, __f, __f, __f,
00127                     __f, __f, __f, __f };
00128 }
00129 
00130 static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00131 _mm512_broadcastsd_pd(__m128d __X)
00132 {
00133   double __d = __X[0];
00134   return (__v8df){ __d, __d, __d, __d,
00135                    __d, __d, __d, __d };
00136 }
00137 
00138 /* Cast between vector types */
00139 
00140 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
00141 _mm512_castpd256_pd512(__m256d __a)
00142 {
00143   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
00144 }
00145 
00146 static __inline __m512 __attribute__((__always_inline__, __nodebug__))
00147 _mm512_castps256_ps512(__m256 __a)
00148 {
00149   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
00150                                           -1, -1, -1, -1, -1, -1, -1, -1);
00151 }
00152 
00153 static __inline __m128d __attribute__((__always_inline__, __nodebug__))
00154 _mm512_castpd512_pd128(__m512d __a)
00155 {
00156   return __builtin_shufflevector(__a, __a, 0, 1);
00157 }
00158 
00159 static __inline __m128 __attribute__((__always_inline__, __nodebug__))
00160 _mm512_castps512_ps128(__m512 __a)
00161 {
00162   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
00163 }
00164 
00165 /* Arithmetic */
00166 
00167 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
00168 _mm512_add_pd(__m512d __a, __m512d __b)
00169 {
00170   return __a + __b;
00171 }
00172 
00173 static __inline __m512 __attribute__((__always_inline__, __nodebug__))
00174 _mm512_add_ps(__m512 __a, __m512 __b)
00175 {
00176   return __a + __b;
00177 }
00178 
00179 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
00180 _mm512_mul_pd(__m512d __a, __m512d __b)
00181 {
00182   return __a * __b;
00183 }
00184 
00185 static __inline __m512 __attribute__((__always_inline__, __nodebug__))
00186 _mm512_mul_ps(__m512 __a, __m512 __b)
00187 {
00188   return __a * __b;
00189 }
00190 
00191 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
00192 _mm512_sub_pd(__m512d __a, __m512d __b)
00193 {
00194   return __a - __b;
00195 }
00196 
00197 static __inline __m512 __attribute__((__always_inline__, __nodebug__))
00198 _mm512_sub_ps(__m512 __a, __m512 __b)
00199 {
00200   return __a - __b;
00201 }
00202 
00203 static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00204 _mm512_max_pd(__m512d __A, __m512d __B)
00205 {
00206   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
00207              (__v8df) __B,
00208              (__v8df)
00209              _mm512_setzero_pd (),
00210              (__mmask8) -1,
00211              _MM_FROUND_CUR_DIRECTION);
00212 }
00213 
00214 static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00215 _mm512_max_ps(__m512 __A, __m512 __B)
00216 {
00217   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
00218             (__v16sf) __B,
00219             (__v16sf)
00220             _mm512_setzero_ps (),
00221             (__mmask16) -1,
00222             _MM_FROUND_CUR_DIRECTION);
00223 }
00224 
00225 static __inline __m512i
00226 __attribute__ ((__always_inline__, __nodebug__))
00227 _mm512_max_epi32(__m512i __A, __m512i __B)
00228 {
00229   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
00230               (__v16si) __B,
00231               (__v16si)
00232               _mm512_setzero_si512 (),
00233               (__mmask16) -1);
00234 }
00235 
00236 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00237 _mm512_max_epu32(__m512i __A, __m512i __B)
00238 {
00239   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
00240               (__v16si) __B,
00241               (__v16si)
00242               _mm512_setzero_si512 (),
00243               (__mmask16) -1);
00244 }
00245 
00246 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00247 _mm512_max_epi64(__m512i __A, __m512i __B)
00248 {
00249   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
00250               (__v8di) __B,
00251               (__v8di)
00252               _mm512_setzero_si512 (),
00253               (__mmask8) -1);
00254 }
00255 
00256 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00257 _mm512_max_epu64(__m512i __A, __m512i __B)
00258 {
00259   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
00260               (__v8di) __B,
00261               (__v8di)
00262               _mm512_setzero_si512 (),
00263               (__mmask8) -1);
00264 }
00265 
00266 static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00267 _mm512_min_pd(__m512d __A, __m512d __B)
00268 {
00269   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
00270              (__v8df) __B,
00271              (__v8df)
00272              _mm512_setzero_pd (),
00273              (__mmask8) -1,
00274              _MM_FROUND_CUR_DIRECTION);
00275 }
00276 
00277 static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00278 _mm512_min_ps(__m512 __A, __m512 __B)
00279 {
00280   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
00281             (__v16sf) __B,
00282             (__v16sf)
00283             _mm512_setzero_ps (),
00284             (__mmask16) -1,
00285             _MM_FROUND_CUR_DIRECTION);
00286 }
00287 
00288 static __inline __m512i
00289 __attribute__ ((__always_inline__, __nodebug__))
00290 _mm512_min_epi32(__m512i __A, __m512i __B)
00291 {
00292   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
00293               (__v16si) __B,
00294               (__v16si)
00295               _mm512_setzero_si512 (),
00296               (__mmask16) -1);
00297 }
00298 
00299 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00300 _mm512_min_epu32(__m512i __A, __m512i __B)
00301 {
00302   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
00303               (__v16si) __B,
00304               (__v16si)
00305               _mm512_setzero_si512 (),
00306               (__mmask16) -1);
00307 }
00308 
00309 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00310 _mm512_min_epi64(__m512i __A, __m512i __B)
00311 {
00312   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
00313               (__v8di) __B,
00314               (__v8di)
00315               _mm512_setzero_si512 (),
00316               (__mmask8) -1);
00317 }
00318 
00319 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00320 _mm512_min_epu64(__m512i __A, __m512i __B)
00321 {
00322   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
00323               (__v8di) __B,
00324               (__v8di)
00325               _mm512_setzero_si512 (),
00326               (__mmask8) -1);
00327 }
00328 
00329 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00330 _mm512_mul_epi32(__m512i __X, __m512i __Y)
00331 {
00332   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
00333               (__v16si) __Y,
00334               (__v8di)
00335               _mm512_setzero_si512 (),
00336               (__mmask8) -1);
00337 }
00338 
00339 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00340 _mm512_mul_epu32(__m512i __X, __m512i __Y)
00341 {
00342   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
00343                (__v16si) __Y,
00344                (__v8di)
00345                _mm512_setzero_si512 (),
00346                (__mmask8) -1);
00347 }
00348 
00349 static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00350 _mm512_sqrt_pd(__m512d a)
00351 {
00352   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
00353                                                 (__v8df) _mm512_setzero_pd (),
00354                                                 (__mmask8) -1,
00355                                                 _MM_FROUND_CUR_DIRECTION);
00356 }
00357 
00358 static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00359 _mm512_sqrt_ps(__m512 a)
00360 {
00361   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
00362                                                (__v16sf) _mm512_setzero_ps (),
00363                                                (__mmask16) -1,
00364                                                _MM_FROUND_CUR_DIRECTION);
00365 }
00366 
00367 static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00368 _mm512_rsqrt14_pd(__m512d __A)
00369 {
00370   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
00371                  (__v8df)
00372                  _mm512_setzero_pd (),
00373                  (__mmask8) -1);}
00374 
00375 static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00376 _mm512_rsqrt14_ps(__m512 __A)
00377 {
00378   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
00379                 (__v16sf)
00380                 _mm512_setzero_ps (),
00381                 (__mmask16) -1);
00382 }
00383 
00384 static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00385 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
00386 {
00387   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
00388              (__v4sf) __B,
00389              (__v4sf)
00390              _mm_setzero_ps (),
00391              (__mmask8) -1);
00392 }
00393 
00394 static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00395 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
00396 {
00397   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
00398               (__v2df) __B,
00399               (__v2df)
00400               _mm_setzero_pd (),
00401               (__mmask8) -1);
00402 }
00403 
00404 static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00405 _mm512_rcp14_pd(__m512d __A)
00406 {
00407   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
00408                (__v8df)
00409                _mm512_setzero_pd (),
00410                (__mmask8) -1);
00411 }
00412 
00413 static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00414 _mm512_rcp14_ps(__m512 __A)
00415 {
00416   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
00417               (__v16sf)
00418               _mm512_setzero_ps (),
00419               (__mmask16) -1);
00420 }
00421 static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00422 _mm_rcp14_ss(__m128 __A, __m128 __B)
00423 {
00424   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
00425                  (__v4sf) __B,
00426                  (__v4sf)
00427                  _mm_setzero_ps (),
00428                  (__mmask8) -1);
00429 }
00430 
00431 static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00432 _mm_rcp14_sd(__m128d __A, __m128d __B)
00433 {
00434   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
00435             (__v2df) __B,
00436             (__v2df)
00437             _mm_setzero_pd (),
00438             (__mmask8) -1);
00439 }
00440 
00441 static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
00442 _mm512_floor_ps(__m512 __A)
00443 {
00444   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
00445                                                   _MM_FROUND_FLOOR,
00446                                                   (__v16sf) __A, -1,
00447                                                   _MM_FROUND_CUR_DIRECTION);
00448 }
00449 
00450 static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
00451 _mm512_floor_pd(__m512d __A)
00452 {
00453   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
00454                                                    _MM_FROUND_FLOOR,
00455                                                    (__v8df) __A, -1,
00456                                                    _MM_FROUND_CUR_DIRECTION);
00457 }
00458 
00459 static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
00460 _mm512_ceil_ps(__m512 __A)
00461 {
00462   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
00463                                                   _MM_FROUND_CEIL,
00464                                                   (__v16sf) __A, -1,
00465                                                   _MM_FROUND_CUR_DIRECTION);
00466 }
00467 
00468 static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
00469 _mm512_ceil_pd(__m512d __A)
00470 {
00471   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
00472                                                    _MM_FROUND_CEIL,
00473                                                    (__v8df) __A, -1,
00474                                                    _MM_FROUND_CUR_DIRECTION);
00475 }
00476 
00477 static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
00478 _mm512_abs_epi64(__m512i __A)
00479 {
00480   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
00481              (__v8di)
00482              _mm512_setzero_si512 (),
00483              (__mmask8) -1);
00484 }
00485 
00486 static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
00487 _mm512_abs_epi32(__m512i __A)
00488 {
00489   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
00490              (__v16si)
00491              _mm512_setzero_si512 (),
00492              (__mmask16) -1);
00493 }
00494 
00495 static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
00496 _mm512_roundscale_ps(__m512 __A, const int __imm)
00497 {
00498   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
00499               (__v16sf) __A, -1,
00500               _MM_FROUND_CUR_DIRECTION);
00501 }
00502 static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
00503 _mm512_roundscale_pd(__m512d __A, const int __imm)
00504 {
00505   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
00506                (__v8df) __A, -1,
00507                _MM_FROUND_CUR_DIRECTION);
00508 }
00509 
00510 static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00511 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
00512 {
00513   return (__m512d)
00514     __builtin_ia32_vfmaddpd512_mask(__A,
00515                                     __B,
00516                                     __C,
00517                                     (__mmask8) -1,
00518                                     _MM_FROUND_CUR_DIRECTION);
00519 }
00520 
00521 static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00522 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
00523 {
00524   return (__m512d)
00525     __builtin_ia32_vfmsubpd512_mask(__A,
00526                                     __B,
00527                                     __C,
00528                                     (__mmask8) -1,
00529                                     _MM_FROUND_CUR_DIRECTION);
00530 }
00531 
00532 static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
00533 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
00534 {
00535   return (__m512d)
00536     __builtin_ia32_vfnmaddpd512_mask(__A,
00537                                      __B,
00538                                      __C,
00539                                      (__mmask8) -1,
00540                                      _MM_FROUND_CUR_DIRECTION);
00541 }
00542 
00543 static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00544 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
00545 {
00546   return (__m512)
00547     __builtin_ia32_vfmaddps512_mask(__A,
00548                                     __B,
00549                                     __C,
00550                                     (__mmask16) -1,
00551                                     _MM_FROUND_CUR_DIRECTION);
00552 }
00553 
00554 static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00555 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
00556 {
00557   return (__m512)
00558     __builtin_ia32_vfmsubps512_mask(__A,
00559                                     __B,
00560                                     __C,
00561                                     (__mmask16) -1,
00562                                     _MM_FROUND_CUR_DIRECTION);
00563 }
00564 
00565 static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
00566 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
00567 {
00568   return (__m512)
00569     __builtin_ia32_vfnmaddps512_mask(__A,
00570                                      __B,
00571                                      __C,
00572                                      (__mmask16) -1,
00573                                      _MM_FROUND_CUR_DIRECTION);
00574 }
00575 
00576 /* Vector permutations */
00577 
00578 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00579 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
00580 {
00581   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
00582                                                        /* idx */ ,
00583                                                        (__v16si) __A,
00584                                                        (__v16si) __B,
00585                                                        (__mmask16) -1);
00586 }
00587 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00588 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
00589 {
00590   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
00591                                                        /* idx */ ,
00592                                                        (__v8di) __A,
00593                                                        (__v8di) __B,
00594                                                        (__mmask8) -1);
00595 }
00596 
00597 static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
00598 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
00599 {
00600   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
00601                                                         /* idx */ ,
00602                                                         (__v8df) __A,
00603                                                         (__v8df) __B,
00604                                                         (__mmask8) -1);
00605 }
00606 static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
00607 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
00608 {
00609   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
00610                                                        /* idx */ ,
00611                                                        (__v16sf) __A,
00612                                                        (__v16sf) __B,
00613                                                        (__mmask16) -1);
00614 }
00615 
00616 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00617 _mm512_valign_epi64(__m512i __A, __m512i __B, const int __I)
00618 {
00619   return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
00620                                                  (__v8di)__B,
00621                                                  __I,
00622                                                  (__v8di)_mm512_setzero_si512(),
00623                                                  (__mmask8) -1);
00624 }
00625 
00626 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00627 _mm512_valign_epi32(__m512i __A, __m512i __B, const int __I)
00628 {
00629   return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
00630                                                 (__v16si)__B,
00631                                                 __I,
00632                                                 (__v16si)_mm512_setzero_si512(),
00633                                                 (__mmask16) -1);
00634 }
00635 
00636 /* Vector Blend */
00637 
00638 static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
00639 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
00640 {
00641   return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
00642                  (__v8df) __W,
00643                  (__mmask8) __U);
00644 }
00645 
00646 static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
00647 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
00648 {
00649   return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
00650                 (__v16sf) __W,
00651                 (__mmask16) __U);
00652 }
00653 
00654 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00655 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
00656 {
00657   return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
00658                 (__v8di) __W,
00659                 (__mmask8) __U);
00660 }
00661 
00662 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00663 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
00664 {
00665   return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
00666                 (__v16si) __W,
00667                 (__mmask16) __U);
00668 }
00669 
00670 /* Compare */
00671 
00672 static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
00673 _mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
00674 {
00675   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
00676                (__v16sf) b, p, (__mmask16) -1,
00677                _MM_FROUND_CUR_DIRECTION);
00678 }
00679 
00680 static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__)) 
00681 _mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
00682 {
00683   return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
00684               (__v8df) __Y, __P,
00685               (__mmask8) -1,
00686               _MM_FROUND_CUR_DIRECTION);
00687 }
00688 
00689 /* Conversion */
00690 
00691 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00692 _mm512_cvttps_epu32(__m512 __A)
00693 {
00694   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
00695                   (__v16si)
00696                   _mm512_setzero_si512 (),
00697                   (__mmask16) -1,
00698                   _MM_FROUND_CUR_DIRECTION);
00699 }
00700 
00701 static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
00702 _mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
00703 {
00704   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
00705                (__v16sf)
00706                _mm512_setzero_ps (),
00707                (__mmask16) -1,
00708                __R);
00709 }
00710 
00711 static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
00712 _mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
00713 {
00714   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
00715                (__v16sf)
00716                _mm512_setzero_ps (),
00717                (__mmask16) -1,
00718                __R);
00719 }
00720 
00721 static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
00722 _mm512_cvtepi32_pd(__m256i __A)
00723 {
00724   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
00725                 (__v8df)
00726                 _mm512_setzero_pd (),
00727                 (__mmask8) -1);
00728 }
00729 
00730 static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
00731 _mm512_cvtepu32_pd(__m256i __A)
00732 {
00733   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
00734                 (__v8df)
00735                 _mm512_setzero_pd (),
00736                 (__mmask8) -1);
00737 }
00738 static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
00739 _mm512_cvt_roundpd_ps(__m512d __A, const int __R)
00740 {
00741   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
00742                (__v8sf)
00743                _mm256_setzero_ps (),
00744                (__mmask8) -1,
00745                __R);
00746 }
00747 
00748 static  __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
00749 _mm512_cvtps_ph(__m512 __A, const int __I)
00750 {
00751   return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
00752                  __I,
00753                  (__v16hi)
00754                  _mm256_setzero_si256 (),
00755                  -1);
00756 }
00757 
00758 static  __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
00759 _mm512_cvtph_ps(__m256i __A)
00760 {
00761   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
00762                 (__v16sf)
00763                 _mm512_setzero_ps (),
00764                 (__mmask16) -1,
00765                 _MM_FROUND_CUR_DIRECTION);
00766 }
00767 
00768 static __inline __m512i __attribute__((__always_inline__, __nodebug__))
00769 _mm512_cvttps_epi32(__m512 a)
00770 {
00771   return (__m512i)
00772     __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
00773                                      (__v16si) _mm512_setzero_si512 (),
00774                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
00775 }
00776 
00777 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
00778 _mm512_cvttpd_epi32(__m512d a)
00779 {
00780   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
00781                                                    (__v8si)_mm256_setzero_si256(),
00782                                                    (__mmask8) -1,
00783                                                     _MM_FROUND_CUR_DIRECTION);
00784 }
00785 
00786 static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
00787 _mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
00788 {
00789   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
00790                  (__v8si)
00791                  _mm256_setzero_si256 (),
00792                  (__mmask8) -1,
00793                  __R);
00794 }
00795 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00796 _mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
00797 {
00798   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
00799                  (__v16si)
00800                  _mm512_setzero_si512 (),
00801                  (__mmask16) -1,
00802                  __R);
00803 }
00804 
00805 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00806 _mm512_cvt_roundps_epi32(__m512 __A, const int __R)
00807 {
00808   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
00809                 (__v16si)
00810                 _mm512_setzero_si512 (),
00811                 (__mmask16) -1,
00812                 __R);
00813 }
00814 static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
00815 _mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
00816 {
00817   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
00818                 (__v8si)
00819                 _mm256_setzero_si256 (),
00820                 (__mmask8) -1,
00821                 __R);
00822 }
00823 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00824 _mm512_cvt_roundps_epu32(__m512 __A, const int __R)
00825 {
00826   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
00827                 (__v16si)
00828                 _mm512_setzero_si512 (),
00829                 (__mmask16) -1,
00830                 __R);
00831 }
00832 static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
00833 _mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
00834 {
00835   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
00836                 (__v8si)
00837                 _mm256_setzero_si256 (),
00838                 (__mmask8) -1,
00839                 __R);
00840 }
00841 
00842 /* Bit Test */
00843 
00844 static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
00845 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
00846 {
00847   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
00848             (__v16si) __B,
00849             (__mmask16) -1);
00850 }
00851 
00852 static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
00853 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
00854 {
00855   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
00856                  (__v8di) __B,
00857                  (__mmask8) -1);
00858 }
00859 
00860 /* SIMD load ops */
00861 
00862 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00863 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
00864 {
00865   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
00866                                                      (__v16si)
00867                                                      _mm512_setzero_si512 (),
00868                                                      (__mmask16) __U);
00869 }
00870 
00871 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
00872 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
00873 {
00874   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
00875                                                      (__v8di)
00876                                                      _mm512_setzero_si512 (),
00877                                                      (__mmask8) __U);
00878 }
00879 
00880 static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
00881 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
00882 {
00883   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
00884                                                   (__v16sf)
00885                                                   _mm512_setzero_ps (),
00886                                                   (__mmask16) __U);
00887 }
00888 
00889 static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
00890 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
00891 {
00892   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
00893                                                    (__v8df)
00894                                                    _mm512_setzero_pd (),
00895                                                    (__mmask8) __U);
00896 }
00897 
00898 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
00899 _mm512_loadu_pd(double const *__p)
00900 {
00901   struct __loadu_pd {
00902     __m512d __v;
00903   } __attribute__((packed, may_alias));
00904   return ((struct __loadu_pd*)__p)->__v;
00905 }
00906 
00907 static __inline __m512 __attribute__((__always_inline__, __nodebug__))
00908 _mm512_loadu_ps(float const *__p)
00909 {
00910   struct __loadu_ps {
00911     __m512 __v;
00912   } __attribute__((packed, may_alias));
00913   return ((struct __loadu_ps*)__p)->__v;
00914 }
00915 
00916 /* SIMD store ops */
00917 
00918 static __inline void __attribute__ ((__always_inline__, __nodebug__))
00919 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
00920 {
00921   __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
00922                                      (__mmask8) __U);
00923 }
00924 
00925 static __inline void __attribute__ ((__always_inline__, __nodebug__))
00926 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
00927 {
00928   __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
00929                                      (__mmask16) __U);
00930 }
00931 
00932 static __inline void __attribute__ ((__always_inline__, __nodebug__))
00933 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
00934 {
00935   __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
00936 }
00937 
00938 static __inline void __attribute__ ((__always_inline__, __nodebug__))
00939 _mm512_storeu_pd(void *__P, __m512d __A)
00940 {
00941   __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
00942 }
00943 
00944 static __inline void __attribute__ ((__always_inline__, __nodebug__))
00945 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
00946 {
00947   __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
00948                                    (__mmask16) __U);
00949 }
00950 
00951 static __inline void __attribute__ ((__always_inline__, __nodebug__))
00952 _mm512_storeu_ps(void *__P, __m512 __A)
00953 {
00954   __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
00955 }
00956 
00957 static __inline void __attribute__ ((__always_inline__, __nodebug__))
00958 _mm512_store_ps(void *__P, __m512 __A)
00959 {
00960   *(__m512*)__P = __A;
00961 }
00962 
00963 static __inline void __attribute__ ((__always_inline__, __nodebug__))
00964 _mm512_store_pd(void *__P, __m512d __A)
00965 {
00966   *(__m512d*)__P = __A;
00967 }
00968 
00969 /* Mask ops */
00970 
00971 static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
00972 _mm512_knot(__mmask16 __M)
00973 {
00974   return __builtin_ia32_knothi(__M);
00975 }
00976 
00977 /* Integer compare */
00978 
00979 static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
00980 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
00981   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
00982                                                    (__mmask16)-1);
00983 }
00984 
00985 static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
00986 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
00987   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
00988                                                    __u);
00989 }
00990 
00991 static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
00992 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
00993   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
00994                                                   __u);
00995 }
00996 
00997 static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
00998 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
00999   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
01000                                                   (__mmask8)-1);
01001 }
01002 
01003 #endif // __AVX512FINTRIN_H