clang API Documentation

fma4intrin.h
Go to the documentation of this file.
00001 /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
00002  *
00003  * Permission is hereby granted, free of charge, to any person obtaining a copy
00004  * of this software and associated documentation files (the "Software"), to deal
00005  * in the Software without restriction, including without limitation the rights
00006  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00007  * copies of the Software, and to permit persons to whom the Software is
00008  * furnished to do so, subject to the following conditions:
00009  *
00010  * The above copyright notice and this permission notice shall be included in
00011  * all copies or substantial portions of the Software.
00012  *
00013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00015  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00016  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00017  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00018  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00019  * THE SOFTWARE.
00020  *
00021  *===-----------------------------------------------------------------------===
00022  */
00023 
00024 #ifndef __X86INTRIN_H
00025 #error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
00026 #endif
00027 
00028 #ifndef __FMA4INTRIN_H
00029 #define __FMA4INTRIN_H
00030 
00031 #ifndef __FMA4__
00032 # error "FMA4 instruction set is not enabled"
00033 #else
00034 
00035 #include <pmmintrin.h>
00036 
00037 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00038 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
00039 {
00040   return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
00041 }
00042 
00043 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00044 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
00045 {
00046   return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
00047 }
00048 
00049 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00050 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
00051 {
00052   return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
00053 }
00054 
00055 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00056 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
00057 {
00058   return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
00059 }
00060 
00061 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00062 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
00063 {
00064   return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
00065 }
00066 
00067 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00068 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
00069 {
00070   return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
00071 }
00072 
00073 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00074 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
00075 {
00076   return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
00077 }
00078 
00079 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00080 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
00081 {
00082   return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
00083 }
00084 
00085 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00086 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
00087 {
00088   return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
00089 }
00090 
00091 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00092 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
00093 {
00094   return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
00095 }
00096 
00097 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00098 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
00099 {
00100   return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
00101 }
00102 
00103 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00104 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
00105 {
00106   return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
00107 }
00108 
00109 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00110 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
00111 {
00112   return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
00113 }
00114 
00115 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00116 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
00117 {
00118   return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
00119 }
00120 
00121 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00122 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
00123 {
00124   return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
00125 }
00126 
00127 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00128 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
00129 {
00130   return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
00131 }
00132 
00133 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00134 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
00135 {
00136   return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
00137 }
00138 
00139 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00140 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
00141 {
00142   return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
00143 }
00144 
00145 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
00146 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
00147 {
00148   return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
00149 }
00150 
00151 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
00152 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
00153 {
00154   return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
00155 }
00156 
00157 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
00158 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
00159 {
00160   return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
00161 }
00162 
00163 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
00164 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
00165 {
00166   return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
00167 }
00168 
00169 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
00170 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
00171 {
00172   return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
00173 }
00174 
00175 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
00176 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
00177 {
00178   return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
00179 }
00180 
00181 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
00182 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
00183 {
00184   return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
00185 }
00186 
00187 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
00188 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
00189 {
00190   return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
00191 }
00192 
00193 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
00194 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
00195 {
00196   return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
00197 }
00198 
00199 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
00200 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
00201 {
00202   return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
00203 }
00204 
00205 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
00206 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
00207 {
00208   return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
00209 }
00210 
00211 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
00212 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
00213 {
00214   return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
00215 }
00216 
00217 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
00218 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
00219 {
00220   return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
00221 }
00222 
00223 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
00224 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
00225 {
00226   return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
00227 }
00228 
00229 #endif /* __FMA4__ */
00230 
00231 #endif /* __FMA4INTRIN_H */