clang API Documentation

mmintrin.h
Go to the documentation of this file.
00001 /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
00002  *
00003  * Permission is hereby granted, free of charge, to any person obtaining a copy
00004  * of this software and associated documentation files (the "Software"), to deal
00005  * in the Software without restriction, including without limitation the rights
00006  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00007  * copies of the Software, and to permit persons to whom the Software is
00008  * furnished to do so, subject to the following conditions:
00009  *
00010  * The above copyright notice and this permission notice shall be included in
00011  * all copies or substantial portions of the Software.
00012  *
00013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00015  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00016  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00017  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00018  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00019  * THE SOFTWARE.
00020  *
00021  *===-----------------------------------------------------------------------===
00022  */
00023 
00024 #ifndef __MMINTRIN_H
00025 #define __MMINTRIN_H
00026 
00027 #ifndef __MMX__
00028 #error "MMX instruction set not enabled"
00029 #else
00030 
00031 typedef long long __m64 __attribute__((__vector_size__(8)));
00032 
00033 typedef int __v2si __attribute__((__vector_size__(8)));
00034 typedef short __v4hi __attribute__((__vector_size__(8)));
00035 typedef char __v8qi __attribute__((__vector_size__(8)));
00036 
00037 static __inline__ void __attribute__((__always_inline__, __nodebug__))
00038 _mm_empty(void)
00039 {
00040     __builtin_ia32_emms();
00041 }
00042 
00043 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00044 _mm_cvtsi32_si64(int __i)
00045 {
00046     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
00047 }
00048 
00049 static __inline__ int __attribute__((__always_inline__, __nodebug__))
00050 _mm_cvtsi64_si32(__m64 __m)
00051 {
00052     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
00053 }
00054 
00055 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00056 _mm_cvtsi64_m64(long long __i)
00057 {
00058     return (__m64)__i;
00059 }
00060 
00061 static __inline__ long long __attribute__((__always_inline__, __nodebug__))
00062 _mm_cvtm64_si64(__m64 __m)
00063 {
00064     return (long long)__m;
00065 }
00066 
00067 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00068 _mm_packs_pi16(__m64 __m1, __m64 __m2)
00069 {
00070     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
00071 }
00072 
00073 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00074 _mm_packs_pi32(__m64 __m1, __m64 __m2)
00075 {
00076     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
00077 }
00078 
00079 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00080 _mm_packs_pu16(__m64 __m1, __m64 __m2)
00081 {
00082     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
00083 }
00084 
00085 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00086 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
00087 {
00088     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
00089 }
00090 
00091 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00092 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
00093 {
00094     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
00095 }
00096 
00097 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00098 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
00099 {
00100     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
00101 }
00102 
00103 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00104 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
00105 {
00106     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
00107 }
00108 
00109 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00110 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
00111 {
00112     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
00113 }
00114 
00115 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00116 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
00117 {
00118     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
00119 }
00120 
00121 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00122 _mm_add_pi8(__m64 __m1, __m64 __m2)
00123 {
00124     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
00125 }
00126 
00127 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00128 _mm_add_pi16(__m64 __m1, __m64 __m2)
00129 {
00130     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
00131 }
00132 
00133 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00134 _mm_add_pi32(__m64 __m1, __m64 __m2)
00135 {
00136     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
00137 }
00138 
00139 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00140 _mm_adds_pi8(__m64 __m1, __m64 __m2) 
00141 {
00142     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
00143 }
00144 
00145 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00146 _mm_adds_pi16(__m64 __m1, __m64 __m2)
00147 {
00148     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);    
00149 }
00150 
00151 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00152 _mm_adds_pu8(__m64 __m1, __m64 __m2) 
00153 {
00154     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
00155 }
00156  
00157 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00158 _mm_adds_pu16(__m64 __m1, __m64 __m2) 
00159 {
00160     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
00161 }
00162 
00163 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00164 _mm_sub_pi8(__m64 __m1, __m64 __m2)
00165 {
00166     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
00167 }
00168  
00169 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00170 _mm_sub_pi16(__m64 __m1, __m64 __m2)
00171 {
00172     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
00173 }
00174  
00175 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00176 _mm_sub_pi32(__m64 __m1, __m64 __m2)
00177 {
00178     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
00179 }
00180 
00181 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00182 _mm_subs_pi8(__m64 __m1, __m64 __m2)
00183 {
00184     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
00185 }
00186 
00187 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00188 _mm_subs_pi16(__m64 __m1, __m64 __m2)
00189 {
00190     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
00191 }
00192 
00193 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00194 _mm_subs_pu8(__m64 __m1, __m64 __m2)
00195 {
00196     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
00197 }
00198  
00199 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00200 _mm_subs_pu16(__m64 __m1, __m64 __m2)
00201 {
00202     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
00203 }
00204 
00205 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00206 _mm_madd_pi16(__m64 __m1, __m64 __m2)
00207 {
00208     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
00209 }
00210 
00211 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00212 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
00213 {
00214     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
00215 }
00216  
00217 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00218 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 
00219 {
00220     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
00221 }
00222 
00223 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00224 _mm_sll_pi16(__m64 __m, __m64 __count)
00225 {
00226     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
00227 }
00228 
00229 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00230 _mm_slli_pi16(__m64 __m, int __count)
00231 {
00232     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);    
00233 }
00234 
00235 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00236 _mm_sll_pi32(__m64 __m, __m64 __count)
00237 {
00238     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
00239 }
00240 
00241 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00242 _mm_slli_pi32(__m64 __m, int __count)
00243 {
00244     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
00245 }
00246 
00247 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00248 _mm_sll_si64(__m64 __m, __m64 __count)
00249 {
00250     return (__m64)__builtin_ia32_psllq(__m, __count);
00251 }
00252 
00253 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00254 _mm_slli_si64(__m64 __m, int __count)
00255 {
00256     return (__m64)__builtin_ia32_psllqi(__m, __count);    
00257 }
00258 
00259 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00260 _mm_sra_pi16(__m64 __m, __m64 __count)
00261 {
00262     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);    
00263 }
00264 
00265 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00266 _mm_srai_pi16(__m64 __m, int __count)
00267 {
00268     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
00269 }
00270 
00271 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00272 _mm_sra_pi32(__m64 __m, __m64 __count)
00273 {
00274     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);    
00275 }
00276 
00277 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00278 _mm_srai_pi32(__m64 __m, int __count)
00279 {
00280     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
00281 }
00282 
00283 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00284 _mm_srl_pi16(__m64 __m, __m64 __count)
00285 {
00286     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);    
00287 }
00288 
00289 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00290 _mm_srli_pi16(__m64 __m, int __count)
00291 {
00292     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);    
00293 }
00294 
00295 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00296 _mm_srl_pi32(__m64 __m, __m64 __count)
00297 {
00298     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);       
00299 }
00300 
00301 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00302 _mm_srli_pi32(__m64 __m, int __count)
00303 {
00304     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
00305 }
00306 
00307 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00308 _mm_srl_si64(__m64 __m, __m64 __count)
00309 {
00310     return (__m64)__builtin_ia32_psrlq(__m, __count);    
00311 }
00312 
00313 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00314 _mm_srli_si64(__m64 __m, int __count)
00315 {
00316     return (__m64)__builtin_ia32_psrlqi(__m, __count);    
00317 }
00318 
00319 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00320 _mm_and_si64(__m64 __m1, __m64 __m2)
00321 {
00322     return __builtin_ia32_pand(__m1, __m2);
00323 }
00324 
00325 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00326 _mm_andnot_si64(__m64 __m1, __m64 __m2)
00327 {
00328     return __builtin_ia32_pandn(__m1, __m2);
00329 }
00330 
00331 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00332 _mm_or_si64(__m64 __m1, __m64 __m2)
00333 {
00334     return __builtin_ia32_por(__m1, __m2);
00335 }
00336 
00337 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00338 _mm_xor_si64(__m64 __m1, __m64 __m2)
00339 {
00340     return __builtin_ia32_pxor(__m1, __m2);
00341 }
00342 
00343 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00344 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
00345 {
00346     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
00347 }
00348 
00349 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00350 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
00351 {
00352     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
00353 }
00354 
00355 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00356 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
00357 {
00358     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
00359 }
00360 
00361 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00362 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
00363 {
00364     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
00365 }
00366 
00367 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00368 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
00369 {
00370     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
00371 }
00372 
00373 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00374 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
00375 {
00376     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
00377 }
00378 
00379 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00380 _mm_setzero_si64(void)
00381 {
00382     return (__m64){ 0LL };
00383 }
00384 
00385 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00386 _mm_set_pi32(int __i1, int __i0)
00387 {
00388     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
00389 }
00390 
00391 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00392 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
00393 {
00394     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
00395 }
00396 
00397 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00398 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
00399             char __b1, char __b0)
00400 {
00401     return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
00402                                                __b4, __b5, __b6, __b7);
00403 }
00404 
00405 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00406 _mm_set1_pi32(int __i)
00407 {
00408     return _mm_set_pi32(__i, __i);
00409 }
00410 
00411 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00412 _mm_set1_pi16(short __w)
00413 {
00414     return _mm_set_pi16(__w, __w, __w, __w);
00415 }
00416 
00417 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00418 _mm_set1_pi8(char __b)
00419 {
00420     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
00421 }
00422 
00423 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00424 _mm_setr_pi32(int __i0, int __i1)
00425 {
00426     return _mm_set_pi32(__i1, __i0);
00427 }
00428 
00429 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00430 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
00431 {
00432     return _mm_set_pi16(__w3, __w2, __w1, __w0);
00433 }
00434 
00435 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
00436 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
00437              char __b6, char __b7)
00438 {
00439     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
00440 }
00441 
00442 
00443 /* Aliases for compatibility. */
00444 #define _m_empty _mm_empty
00445 #define _m_from_int _mm_cvtsi32_si64
00446 #define _m_to_int _mm_cvtsi64_si32
00447 #define _m_packsswb _mm_packs_pi16
00448 #define _m_packssdw _mm_packs_pi32
00449 #define _m_packuswb _mm_packs_pu16
00450 #define _m_punpckhbw _mm_unpackhi_pi8
00451 #define _m_punpckhwd _mm_unpackhi_pi16
00452 #define _m_punpckhdq _mm_unpackhi_pi32
00453 #define _m_punpcklbw _mm_unpacklo_pi8
00454 #define _m_punpcklwd _mm_unpacklo_pi16
00455 #define _m_punpckldq _mm_unpacklo_pi32
00456 #define _m_paddb _mm_add_pi8
00457 #define _m_paddw _mm_add_pi16
00458 #define _m_paddd _mm_add_pi32
00459 #define _m_paddsb _mm_adds_pi8
00460 #define _m_paddsw _mm_adds_pi16
00461 #define _m_paddusb _mm_adds_pu8
00462 #define _m_paddusw _mm_adds_pu16
00463 #define _m_psubb _mm_sub_pi8
00464 #define _m_psubw _mm_sub_pi16
00465 #define _m_psubd _mm_sub_pi32
00466 #define _m_psubsb _mm_subs_pi8
00467 #define _m_psubsw _mm_subs_pi16
00468 #define _m_psubusb _mm_subs_pu8
00469 #define _m_psubusw _mm_subs_pu16
00470 #define _m_pmaddwd _mm_madd_pi16
00471 #define _m_pmulhw _mm_mulhi_pi16
00472 #define _m_pmullw _mm_mullo_pi16
00473 #define _m_psllw _mm_sll_pi16
00474 #define _m_psllwi _mm_slli_pi16
00475 #define _m_pslld _mm_sll_pi32
00476 #define _m_pslldi _mm_slli_pi32
00477 #define _m_psllq _mm_sll_si64
00478 #define _m_psllqi _mm_slli_si64
00479 #define _m_psraw _mm_sra_pi16
00480 #define _m_psrawi _mm_srai_pi16
00481 #define _m_psrad _mm_sra_pi32
00482 #define _m_psradi _mm_srai_pi32
00483 #define _m_psrlw _mm_srl_pi16
00484 #define _m_psrlwi _mm_srli_pi16
00485 #define _m_psrld _mm_srl_pi32
00486 #define _m_psrldi _mm_srli_pi32
00487 #define _m_psrlq _mm_srl_si64
00488 #define _m_psrlqi _mm_srli_si64
00489 #define _m_pand _mm_and_si64
00490 #define _m_pandn _mm_andnot_si64
00491 #define _m_por _mm_or_si64
00492 #define _m_pxor _mm_xor_si64
00493 #define _m_pcmpeqb _mm_cmpeq_pi8
00494 #define _m_pcmpeqw _mm_cmpeq_pi16
00495 #define _m_pcmpeqd _mm_cmpeq_pi32
00496 #define _m_pcmpgtb _mm_cmpgt_pi8
00497 #define _m_pcmpgtw _mm_cmpgt_pi16
00498 #define _m_pcmpgtd _mm_cmpgt_pi32
00499 
00500 #endif /* __MMX__ */
00501 
00502 #endif /* __MMINTRIN_H */
00503