clang API Documentation
00001 /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 00002 * 00003 * Permission is hereby granted, free of charge, to any person obtaining a copy 00004 * of this software and associated documentation files (the "Software"), to deal 00005 * in the Software without restriction, including without limitation the rights 00006 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00007 * copies of the Software, and to permit persons to whom the Software is 00008 * furnished to do so, subject to the following conditions: 00009 * 00010 * The above copyright notice and this permission notice shall be included in 00011 * all copies or substantial portions of the Software. 00012 * 00013 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00014 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00015 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00016 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00017 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00018 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00019 * THE SOFTWARE. 00020 * 00021 *===-----------------------------------------------------------------------=== 00022 */ 00023 00024 #ifndef __MMINTRIN_H 00025 #define __MMINTRIN_H 00026 00027 #ifndef __MMX__ 00028 #error "MMX instruction set not enabled" 00029 #else 00030 00031 typedef long long __m64 __attribute__((__vector_size__(8))); 00032 00033 typedef int __v2si __attribute__((__vector_size__(8))); 00034 typedef short __v4hi __attribute__((__vector_size__(8))); 00035 typedef char __v8qi __attribute__((__vector_size__(8))); 00036 00037 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 00038 _mm_empty(void) 00039 { 00040 __builtin_ia32_emms(); 00041 } 00042 00043 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00044 _mm_cvtsi32_si64(int __i) 00045 { 00046 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 00047 } 00048 00049 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 00050 _mm_cvtsi64_si32(__m64 __m) 00051 { 00052 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 00053 } 00054 00055 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00056 _mm_cvtsi64_m64(long long __i) 00057 { 00058 return (__m64)__i; 00059 } 00060 00061 static __inline__ long long __attribute__((__always_inline__, __nodebug__)) 00062 _mm_cvtm64_si64(__m64 __m) 00063 { 00064 return (long long)__m; 00065 } 00066 00067 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00068 _mm_packs_pi16(__m64 __m1, __m64 __m2) 00069 { 00070 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 00071 } 00072 00073 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00074 _mm_packs_pi32(__m64 __m1, __m64 __m2) 00075 { 00076 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 00077 } 00078 00079 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00080 _mm_packs_pu16(__m64 __m1, __m64 __m2) 00081 { 00082 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 00083 } 00084 00085 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00086 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 00087 { 00088 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 00089 } 00090 00091 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00092 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 00093 { 00094 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 00095 } 00096 00097 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00098 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 00099 { 00100 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 00101 } 00102 00103 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00104 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 00105 { 00106 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 00107 } 00108 00109 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00110 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 00111 { 00112 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 00113 } 00114 00115 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00116 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 00117 { 00118 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 00119 } 00120 00121 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00122 _mm_add_pi8(__m64 __m1, __m64 __m2) 00123 { 00124 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 00125 } 00126 00127 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00128 _mm_add_pi16(__m64 __m1, __m64 __m2) 00129 { 00130 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 00131 } 00132 00133 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00134 _mm_add_pi32(__m64 __m1, __m64 __m2) 00135 { 00136 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 00137 } 00138 00139 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00140 _mm_adds_pi8(__m64 __m1, __m64 __m2) 00141 { 00142 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 00143 } 00144 00145 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00146 _mm_adds_pi16(__m64 __m1, __m64 __m2) 00147 { 00148 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 00149 } 00150 00151 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00152 _mm_adds_pu8(__m64 __m1, __m64 __m2) 00153 { 00154 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 00155 } 00156 00157 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00158 _mm_adds_pu16(__m64 __m1, __m64 __m2) 00159 { 00160 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 00161 } 00162 00163 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00164 _mm_sub_pi8(__m64 __m1, __m64 __m2) 00165 { 00166 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 00167 } 00168 00169 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00170 _mm_sub_pi16(__m64 __m1, __m64 __m2) 00171 { 00172 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 00173 } 00174 00175 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00176 _mm_sub_pi32(__m64 __m1, __m64 __m2) 00177 { 00178 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 00179 } 00180 00181 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00182 _mm_subs_pi8(__m64 __m1, __m64 __m2) 00183 { 00184 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 00185 } 00186 00187 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00188 _mm_subs_pi16(__m64 __m1, __m64 __m2) 00189 { 00190 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 00191 } 00192 00193 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00194 _mm_subs_pu8(__m64 __m1, __m64 __m2) 00195 { 00196 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 00197 } 00198 00199 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00200 _mm_subs_pu16(__m64 __m1, __m64 __m2) 00201 { 00202 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 00203 } 00204 00205 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00206 _mm_madd_pi16(__m64 __m1, __m64 __m2) 00207 { 00208 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 00209 } 00210 00211 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00212 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 00213 { 00214 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 00215 } 00216 00217 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00218 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 00219 { 00220 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 00221 } 00222 00223 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00224 _mm_sll_pi16(__m64 __m, __m64 __count) 00225 { 00226 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 00227 } 00228 00229 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00230 _mm_slli_pi16(__m64 __m, int __count) 00231 { 00232 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 00233 } 00234 00235 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00236 _mm_sll_pi32(__m64 __m, __m64 __count) 00237 { 00238 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 00239 } 00240 00241 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00242 _mm_slli_pi32(__m64 __m, int __count) 00243 { 00244 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 00245 } 00246 00247 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00248 _mm_sll_si64(__m64 __m, __m64 __count) 00249 { 00250 return (__m64)__builtin_ia32_psllq(__m, __count); 00251 } 00252 00253 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00254 _mm_slli_si64(__m64 __m, int __count) 00255 { 00256 return (__m64)__builtin_ia32_psllqi(__m, __count); 00257 } 00258 00259 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00260 _mm_sra_pi16(__m64 __m, __m64 __count) 00261 { 00262 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 00263 } 00264 00265 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00266 _mm_srai_pi16(__m64 __m, int __count) 00267 { 00268 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 00269 } 00270 00271 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00272 _mm_sra_pi32(__m64 __m, __m64 __count) 00273 { 00274 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 00275 } 00276 00277 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00278 _mm_srai_pi32(__m64 __m, int __count) 00279 { 00280 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 00281 } 00282 00283 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00284 _mm_srl_pi16(__m64 __m, __m64 __count) 00285 { 00286 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 00287 } 00288 00289 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00290 _mm_srli_pi16(__m64 __m, int __count) 00291 { 00292 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 00293 } 00294 00295 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00296 _mm_srl_pi32(__m64 __m, __m64 __count) 00297 { 00298 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 00299 } 00300 00301 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00302 _mm_srli_pi32(__m64 __m, int __count) 00303 { 00304 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 00305 } 00306 00307 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00308 _mm_srl_si64(__m64 __m, __m64 __count) 00309 { 00310 return (__m64)__builtin_ia32_psrlq(__m, __count); 00311 } 00312 00313 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00314 _mm_srli_si64(__m64 __m, int __count) 00315 { 00316 return (__m64)__builtin_ia32_psrlqi(__m, __count); 00317 } 00318 00319 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00320 _mm_and_si64(__m64 __m1, __m64 __m2) 00321 { 00322 return __builtin_ia32_pand(__m1, __m2); 00323 } 00324 00325 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00326 _mm_andnot_si64(__m64 __m1, __m64 __m2) 00327 { 00328 return __builtin_ia32_pandn(__m1, __m2); 00329 } 00330 00331 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00332 _mm_or_si64(__m64 __m1, __m64 __m2) 00333 { 00334 return __builtin_ia32_por(__m1, __m2); 00335 } 00336 00337 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00338 _mm_xor_si64(__m64 __m1, __m64 __m2) 00339 { 00340 return __builtin_ia32_pxor(__m1, __m2); 00341 } 00342 00343 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00344 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 00345 { 00346 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 00347 } 00348 00349 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00350 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 00351 { 00352 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 00353 } 00354 00355 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00356 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 00357 { 00358 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 00359 } 00360 00361 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00362 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 00363 { 00364 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 00365 } 00366 00367 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00368 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 00369 { 00370 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 00371 } 00372 00373 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00374 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 00375 { 00376 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 00377 } 00378 00379 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00380 _mm_setzero_si64(void) 00381 { 00382 return (__m64){ 0LL }; 00383 } 00384 00385 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00386 _mm_set_pi32(int __i1, int __i0) 00387 { 00388 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 00389 } 00390 00391 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00392 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 00393 { 00394 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 00395 } 00396 00397 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00398 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 00399 char __b1, char __b0) 00400 { 00401 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 00402 __b4, __b5, __b6, __b7); 00403 } 00404 00405 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00406 _mm_set1_pi32(int __i) 00407 { 00408 return _mm_set_pi32(__i, __i); 00409 } 00410 00411 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00412 _mm_set1_pi16(short __w) 00413 { 00414 return _mm_set_pi16(__w, __w, __w, __w); 00415 } 00416 00417 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00418 _mm_set1_pi8(char __b) 00419 { 00420 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 00421 } 00422 00423 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00424 _mm_setr_pi32(int __i0, int __i1) 00425 { 00426 return _mm_set_pi32(__i1, __i0); 00427 } 00428 00429 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00430 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 00431 { 00432 return _mm_set_pi16(__w3, __w2, __w1, __w0); 00433 } 00434 00435 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 00436 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 00437 char __b6, char __b7) 00438 { 00439 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 00440 } 00441 00442 00443 /* Aliases for compatibility. */ 00444 #define _m_empty _mm_empty 00445 #define _m_from_int _mm_cvtsi32_si64 00446 #define _m_to_int _mm_cvtsi64_si32 00447 #define _m_packsswb _mm_packs_pi16 00448 #define _m_packssdw _mm_packs_pi32 00449 #define _m_packuswb _mm_packs_pu16 00450 #define _m_punpckhbw _mm_unpackhi_pi8 00451 #define _m_punpckhwd _mm_unpackhi_pi16 00452 #define _m_punpckhdq _mm_unpackhi_pi32 00453 #define _m_punpcklbw _mm_unpacklo_pi8 00454 #define _m_punpcklwd _mm_unpacklo_pi16 00455 #define _m_punpckldq _mm_unpacklo_pi32 00456 #define _m_paddb _mm_add_pi8 00457 #define _m_paddw _mm_add_pi16 00458 #define _m_paddd _mm_add_pi32 00459 #define _m_paddsb _mm_adds_pi8 00460 #define _m_paddsw _mm_adds_pi16 00461 #define _m_paddusb _mm_adds_pu8 00462 #define _m_paddusw _mm_adds_pu16 00463 #define _m_psubb _mm_sub_pi8 00464 #define _m_psubw _mm_sub_pi16 00465 #define _m_psubd _mm_sub_pi32 00466 #define _m_psubsb _mm_subs_pi8 00467 #define _m_psubsw _mm_subs_pi16 00468 #define _m_psubusb _mm_subs_pu8 00469 #define _m_psubusw _mm_subs_pu16 00470 #define _m_pmaddwd _mm_madd_pi16 00471 #define _m_pmulhw _mm_mulhi_pi16 00472 #define _m_pmullw _mm_mullo_pi16 00473 #define _m_psllw _mm_sll_pi16 00474 #define _m_psllwi _mm_slli_pi16 00475 #define _m_pslld _mm_sll_pi32 00476 #define _m_pslldi _mm_slli_pi32 00477 #define _m_psllq _mm_sll_si64 00478 #define _m_psllqi _mm_slli_si64 00479 #define _m_psraw _mm_sra_pi16 00480 #define _m_psrawi _mm_srai_pi16 00481 #define _m_psrad _mm_sra_pi32 00482 #define _m_psradi _mm_srai_pi32 00483 #define _m_psrlw _mm_srl_pi16 00484 #define _m_psrlwi _mm_srli_pi16 00485 #define _m_psrld _mm_srl_pi32 00486 #define _m_psrldi _mm_srli_pi32 00487 #define _m_psrlq _mm_srl_si64 00488 #define _m_psrlqi _mm_srli_si64 00489 #define _m_pand _mm_and_si64 00490 #define _m_pandn _mm_andnot_si64 00491 #define _m_por _mm_or_si64 00492 #define _m_pxor _mm_xor_si64 00493 #define _m_pcmpeqb _mm_cmpeq_pi8 00494 #define _m_pcmpeqw _mm_cmpeq_pi16 00495 #define _m_pcmpeqd _mm_cmpeq_pi32 00496 #define _m_pcmpgtb _mm_cmpgt_pi8 00497 #define _m_pcmpgtw _mm_cmpgt_pi16 00498 #define _m_pcmpgtd _mm_cmpgt_pi32 00499 00500 #endif /* __MMX__ */ 00501 00502 #endif /* __MMINTRIN_H */ 00503