clang API Documentation
00001 /*===---- xopintrin.h - XOP intrinsics -------------------------------------=== 00002 * 00003 * Permission is hereby granted, free of charge, to any person obtaining a copy 00004 * of this software and associated documentation files (the "Software"), to deal 00005 * in the Software without restriction, including without limitation the rights 00006 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00007 * copies of the Software, and to permit persons to whom the Software is 00008 * furnished to do so, subject to the following conditions: 00009 * 00010 * The above copyright notice and this permission notice shall be included in 00011 * all copies or substantial portions of the Software. 00012 * 00013 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00014 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00015 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00016 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00017 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00018 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00019 * THE SOFTWARE. 00020 * 00021 *===-----------------------------------------------------------------------=== 00022 */ 00023 00024 #ifndef __X86INTRIN_H 00025 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 00026 #endif 00027 00028 #ifndef __XOPINTRIN_H 00029 #define __XOPINTRIN_H 00030 00031 #ifndef __XOP__ 00032 # error "XOP instruction set is not enabled" 00033 #else 00034 00035 #include <fma4intrin.h> 00036 00037 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00038 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 00039 { 00040 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 00041 } 00042 00043 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00044 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 00045 { 00046 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 00047 } 00048 00049 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00050 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 00051 { 00052 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 00053 } 00054 00055 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00056 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 00057 { 00058 return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 00059 } 00060 00061 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00062 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 00063 { 00064 return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 00065 } 00066 00067 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00068 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 00069 { 00070 return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 00071 } 00072 00073 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00074 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 00075 { 00076 return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 00077 } 00078 00079 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00080 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 00081 { 00082 return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 00083 } 00084 00085 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00086 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 00087 { 00088 return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 00089 } 00090 00091 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00092 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 00093 { 00094 return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 00095 } 00096 00097 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00098 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 00099 { 00100 return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 00101 } 00102 00103 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00104 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 00105 { 00106 return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 00107 } 00108 00109 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00110 _mm_haddw_epi8(__m128i __A) 00111 { 00112 return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); 00113 } 00114 00115 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00116 _mm_haddd_epi8(__m128i __A) 00117 { 00118 return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); 00119 } 00120 00121 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00122 _mm_haddq_epi8(__m128i __A) 00123 { 00124 return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); 00125 } 00126 00127 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00128 _mm_haddd_epi16(__m128i __A) 00129 { 00130 return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); 00131 } 00132 00133 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00134 _mm_haddq_epi16(__m128i __A) 00135 { 00136 return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); 00137 } 00138 00139 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00140 _mm_haddq_epi32(__m128i __A) 00141 { 00142 return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); 00143 } 00144 00145 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00146 _mm_haddw_epu8(__m128i __A) 00147 { 00148 return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); 00149 } 00150 00151 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00152 _mm_haddd_epu8(__m128i __A) 00153 { 00154 return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); 00155 } 00156 00157 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00158 _mm_haddq_epu8(__m128i __A) 00159 { 00160 return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); 00161 } 00162 00163 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00164 _mm_haddd_epu16(__m128i __A) 00165 { 00166 return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); 00167 } 00168 00169 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00170 _mm_haddq_epu16(__m128i __A) 00171 { 00172 return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); 00173 } 00174 00175 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00176 _mm_haddq_epu32(__m128i __A) 00177 { 00178 return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); 00179 } 00180 00181 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00182 _mm_hsubw_epi8(__m128i __A) 00183 { 00184 return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); 00185 } 00186 00187 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00188 _mm_hsubd_epi16(__m128i __A) 00189 { 00190 return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); 00191 } 00192 00193 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00194 _mm_hsubq_epi32(__m128i __A) 00195 { 00196 return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); 00197 } 00198 00199 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00200 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 00201 { 00202 return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); 00203 } 00204 00205 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 00206 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) 00207 { 00208 return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); 00209 } 00210 00211 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00212 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 00213 { 00214 return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 00215 } 00216 00217 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00218 _mm_rot_epi8(__m128i __A, __m128i __B) 00219 { 00220 return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); 00221 } 00222 00223 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00224 _mm_rot_epi16(__m128i __A, __m128i __B) 00225 { 00226 return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); 00227 } 00228 00229 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00230 _mm_rot_epi32(__m128i __A, __m128i __B) 00231 { 00232 return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); 00233 } 00234 00235 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00236 _mm_rot_epi64(__m128i __A, __m128i __B) 00237 { 00238 return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); 00239 } 00240 00241 #define _mm_roti_epi8(A, N) __extension__ ({ \ 00242 __m128i __A = (A); \ 00243 (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) 00244 00245 #define _mm_roti_epi16(A, N) __extension__ ({ \ 00246 __m128i __A = (A); \ 00247 (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) 00248 00249 #define _mm_roti_epi32(A, N) __extension__ ({ \ 00250 __m128i __A = (A); \ 00251 (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) 00252 00253 #define _mm_roti_epi64(A, N) __extension__ ({ \ 00254 __m128i __A = (A); \ 00255 (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) 00256 00257 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00258 _mm_shl_epi8(__m128i __A, __m128i __B) 00259 { 00260 return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); 00261 } 00262 00263 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00264 _mm_shl_epi16(__m128i __A, __m128i __B) 00265 { 00266 return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); 00267 } 00268 00269 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00270 _mm_shl_epi32(__m128i __A, __m128i __B) 00271 { 00272 return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); 00273 } 00274 00275 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00276 _mm_shl_epi64(__m128i __A, __m128i __B) 00277 { 00278 return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); 00279 } 00280 00281 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00282 _mm_sha_epi8(__m128i __A, __m128i __B) 00283 { 00284 return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); 00285 } 00286 00287 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00288 _mm_sha_epi16(__m128i __A, __m128i __B) 00289 { 00290 return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); 00291 } 00292 00293 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00294 _mm_sha_epi32(__m128i __A, __m128i __B) 00295 { 00296 return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); 00297 } 00298 00299 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00300 _mm_sha_epi64(__m128i __A, __m128i __B) 00301 { 00302 return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); 00303 } 00304 00305 #define _mm_com_epu8(A, B, N) __extension__ ({ \ 00306 __m128i __A = (A); \ 00307 __m128i __B = (B); \ 00308 (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) 00309 00310 #define _mm_com_epu16(A, B, N) __extension__ ({ \ 00311 __m128i __A = (A); \ 00312 __m128i __B = (B); \ 00313 (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) 00314 00315 #define _mm_com_epu32(A, B, N) __extension__ ({ \ 00316 __m128i __A = (A); \ 00317 __m128i __B = (B); \ 00318 (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) 00319 00320 #define _mm_com_epu64(A, B, N) __extension__ ({ \ 00321 __m128i __A = (A); \ 00322 __m128i __B = (B); \ 00323 (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) 00324 00325 #define _mm_com_epi8(A, B, N) __extension__ ({ \ 00326 __m128i __A = (A); \ 00327 __m128i __B = (B); \ 00328 (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) 00329 00330 #define _mm_com_epi16(A, B, N) __extension__ ({ \ 00331 __m128i __A = (A); \ 00332 __m128i __B = (B); \ 00333 (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) 00334 00335 #define _mm_com_epi32(A, B, N) __extension__ ({ \ 00336 __m128i __A = (A); \ 00337 __m128i __B = (B); \ 00338 (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) 00339 00340 #define _mm_com_epi64(A, B, N) __extension__ ({ \ 00341 __m128i __A = (A); \ 00342 __m128i __B = (B); \ 00343 (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) 00344 00345 #define _MM_PCOMCTRL_LT 0 00346 #define _MM_PCOMCTRL_LE 1 00347 #define _MM_PCOMCTRL_GT 2 00348 #define _MM_PCOMCTRL_GE 3 00349 #define _MM_PCOMCTRL_EQ 4 00350 #define _MM_PCOMCTRL_NEQ 5 00351 #define _MM_PCOMCTRL_FALSE 6 00352 #define _MM_PCOMCTRL_TRUE 7 00353 00354 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00355 _mm_comlt_epu8(__m128i __A, __m128i __B) 00356 { 00357 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); 00358 } 00359 00360 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00361 _mm_comle_epu8(__m128i __A, __m128i __B) 00362 { 00363 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); 00364 } 00365 00366 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00367 _mm_comgt_epu8(__m128i __A, __m128i __B) 00368 { 00369 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); 00370 } 00371 00372 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00373 _mm_comge_epu8(__m128i __A, __m128i __B) 00374 { 00375 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); 00376 } 00377 00378 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00379 _mm_comeq_epu8(__m128i __A, __m128i __B) 00380 { 00381 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); 00382 } 00383 00384 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00385 _mm_comneq_epu8(__m128i __A, __m128i __B) 00386 { 00387 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); 00388 } 00389 00390 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00391 _mm_comfalse_epu8(__m128i __A, __m128i __B) 00392 { 00393 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); 00394 } 00395 00396 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00397 _mm_comtrue_epu8(__m128i __A, __m128i __B) 00398 { 00399 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); 00400 } 00401 00402 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00403 _mm_comlt_epu16(__m128i __A, __m128i __B) 00404 { 00405 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); 00406 } 00407 00408 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00409 _mm_comle_epu16(__m128i __A, __m128i __B) 00410 { 00411 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); 00412 } 00413 00414 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00415 _mm_comgt_epu16(__m128i __A, __m128i __B) 00416 { 00417 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); 00418 } 00419 00420 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00421 _mm_comge_epu16(__m128i __A, __m128i __B) 00422 { 00423 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); 00424 } 00425 00426 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00427 _mm_comeq_epu16(__m128i __A, __m128i __B) 00428 { 00429 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); 00430 } 00431 00432 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00433 _mm_comneq_epu16(__m128i __A, __m128i __B) 00434 { 00435 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); 00436 } 00437 00438 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00439 _mm_comfalse_epu16(__m128i __A, __m128i __B) 00440 { 00441 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); 00442 } 00443 00444 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00445 _mm_comtrue_epu16(__m128i __A, __m128i __B) 00446 { 00447 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); 00448 } 00449 00450 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00451 _mm_comlt_epu32(__m128i __A, __m128i __B) 00452 { 00453 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); 00454 } 00455 00456 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00457 _mm_comle_epu32(__m128i __A, __m128i __B) 00458 { 00459 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); 00460 } 00461 00462 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00463 _mm_comgt_epu32(__m128i __A, __m128i __B) 00464 { 00465 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); 00466 } 00467 00468 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00469 _mm_comge_epu32(__m128i __A, __m128i __B) 00470 { 00471 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); 00472 } 00473 00474 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00475 _mm_comeq_epu32(__m128i __A, __m128i __B) 00476 { 00477 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); 00478 } 00479 00480 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00481 _mm_comneq_epu32(__m128i __A, __m128i __B) 00482 { 00483 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); 00484 } 00485 00486 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00487 _mm_comfalse_epu32(__m128i __A, __m128i __B) 00488 { 00489 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); 00490 } 00491 00492 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00493 _mm_comtrue_epu32(__m128i __A, __m128i __B) 00494 { 00495 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); 00496 } 00497 00498 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00499 _mm_comlt_epu64(__m128i __A, __m128i __B) 00500 { 00501 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); 00502 } 00503 00504 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00505 _mm_comle_epu64(__m128i __A, __m128i __B) 00506 { 00507 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); 00508 } 00509 00510 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00511 _mm_comgt_epu64(__m128i __A, __m128i __B) 00512 { 00513 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); 00514 } 00515 00516 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00517 _mm_comge_epu64(__m128i __A, __m128i __B) 00518 { 00519 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); 00520 } 00521 00522 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00523 _mm_comeq_epu64(__m128i __A, __m128i __B) 00524 { 00525 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); 00526 } 00527 00528 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00529 _mm_comneq_epu64(__m128i __A, __m128i __B) 00530 { 00531 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); 00532 } 00533 00534 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00535 _mm_comfalse_epu64(__m128i __A, __m128i __B) 00536 { 00537 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); 00538 } 00539 00540 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00541 _mm_comtrue_epu64(__m128i __A, __m128i __B) 00542 { 00543 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); 00544 } 00545 00546 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00547 _mm_comlt_epi8(__m128i __A, __m128i __B) 00548 { 00549 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); 00550 } 00551 00552 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00553 _mm_comle_epi8(__m128i __A, __m128i __B) 00554 { 00555 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); 00556 } 00557 00558 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00559 _mm_comgt_epi8(__m128i __A, __m128i __B) 00560 { 00561 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); 00562 } 00563 00564 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00565 _mm_comge_epi8(__m128i __A, __m128i __B) 00566 { 00567 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); 00568 } 00569 00570 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00571 _mm_comeq_epi8(__m128i __A, __m128i __B) 00572 { 00573 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); 00574 } 00575 00576 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00577 _mm_comneq_epi8(__m128i __A, __m128i __B) 00578 { 00579 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); 00580 } 00581 00582 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00583 _mm_comfalse_epi8(__m128i __A, __m128i __B) 00584 { 00585 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); 00586 } 00587 00588 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00589 _mm_comtrue_epi8(__m128i __A, __m128i __B) 00590 { 00591 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); 00592 } 00593 00594 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00595 _mm_comlt_epi16(__m128i __A, __m128i __B) 00596 { 00597 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); 00598 } 00599 00600 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00601 _mm_comle_epi16(__m128i __A, __m128i __B) 00602 { 00603 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); 00604 } 00605 00606 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00607 _mm_comgt_epi16(__m128i __A, __m128i __B) 00608 { 00609 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); 00610 } 00611 00612 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00613 _mm_comge_epi16(__m128i __A, __m128i __B) 00614 { 00615 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); 00616 } 00617 00618 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00619 _mm_comeq_epi16(__m128i __A, __m128i __B) 00620 { 00621 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); 00622 } 00623 00624 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00625 _mm_comneq_epi16(__m128i __A, __m128i __B) 00626 { 00627 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); 00628 } 00629 00630 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00631 _mm_comfalse_epi16(__m128i __A, __m128i __B) 00632 { 00633 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); 00634 } 00635 00636 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00637 _mm_comtrue_epi16(__m128i __A, __m128i __B) 00638 { 00639 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); 00640 } 00641 00642 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00643 _mm_comlt_epi32(__m128i __A, __m128i __B) 00644 { 00645 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); 00646 } 00647 00648 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00649 _mm_comle_epi32(__m128i __A, __m128i __B) 00650 { 00651 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); 00652 } 00653 00654 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00655 _mm_comgt_epi32(__m128i __A, __m128i __B) 00656 { 00657 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); 00658 } 00659 00660 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00661 _mm_comge_epi32(__m128i __A, __m128i __B) 00662 { 00663 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); 00664 } 00665 00666 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00667 _mm_comeq_epi32(__m128i __A, __m128i __B) 00668 { 00669 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); 00670 } 00671 00672 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00673 _mm_comneq_epi32(__m128i __A, __m128i __B) 00674 { 00675 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); 00676 } 00677 00678 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00679 _mm_comfalse_epi32(__m128i __A, __m128i __B) 00680 { 00681 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); 00682 } 00683 00684 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00685 _mm_comtrue_epi32(__m128i __A, __m128i __B) 00686 { 00687 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); 00688 } 00689 00690 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00691 _mm_comlt_epi64(__m128i __A, __m128i __B) 00692 { 00693 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); 00694 } 00695 00696 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00697 _mm_comle_epi64(__m128i __A, __m128i __B) 00698 { 00699 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); 00700 } 00701 00702 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00703 _mm_comgt_epi64(__m128i __A, __m128i __B) 00704 { 00705 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); 00706 } 00707 00708 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00709 _mm_comge_epi64(__m128i __A, __m128i __B) 00710 { 00711 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); 00712 } 00713 00714 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00715 _mm_comeq_epi64(__m128i __A, __m128i __B) 00716 { 00717 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); 00718 } 00719 00720 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00721 _mm_comneq_epi64(__m128i __A, __m128i __B) 00722 { 00723 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); 00724 } 00725 00726 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00727 _mm_comfalse_epi64(__m128i __A, __m128i __B) 00728 { 00729 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); 00730 } 00731 00732 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 00733 _mm_comtrue_epi64(__m128i __A, __m128i __B) 00734 { 00735 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); 00736 } 00737 00738 #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ 00739 __m128d __X = (X); \ 00740 __m128d __Y = (Y); \ 00741 __m128i __C = (C); \ 00742 (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ 00743 (__v2di)__C, (I)); }) 00744 00745 #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ 00746 __m256d __X = (X); \ 00747 __m256d __Y = (Y); \ 00748 __m256i __C = (C); \ 00749 (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ 00750 (__v4di)__C, (I)); }) 00751 00752 #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ 00753 __m128 __X = (X); \ 00754 __m128 __Y = (Y); \ 00755 __m128i __C = (C); \ 00756 (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ 00757 (__v4si)__C, (I)); }) 00758 00759 #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ 00760 __m256 __X = (X); \ 00761 __m256 __Y = (Y); \ 00762 __m256i __C = (C); \ 00763 (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ 00764 (__v8si)__C, (I)); }) 00765 00766 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00767 _mm_frcz_ss(__m128 __A) 00768 { 00769 return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); 00770 } 00771 00772 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00773 _mm_frcz_sd(__m128d __A) 00774 { 00775 return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); 00776 } 00777 00778 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00779 _mm_frcz_ps(__m128 __A) 00780 { 00781 return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); 00782 } 00783 00784 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00785 _mm_frcz_pd(__m128d __A) 00786 { 00787 return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); 00788 } 00789 00790 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 00791 _mm256_frcz_ps(__m256 __A) 00792 { 00793 return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); 00794 } 00795 00796 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 00797 _mm256_frcz_pd(__m256d __A) 00798 { 00799 return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); 00800 } 00801 00802 #endif /* __XOP__ */ 00803 00804 #endif /* __XOPINTRIN_H */