clang API Documentation
00001 /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== 00002 * 00003 * Permission is hereby granted, free of charge, to any person obtaining a copy 00004 * of this software and associated documentation files (the "Software"), to deal 00005 * in the Software without restriction, including without limitation the rights 00006 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00007 * copies of the Software, and to permit persons to whom the Software is 00008 * furnished to do so, subject to the following conditions: 00009 * 00010 * The above copyright notice and this permission notice shall be included in 00011 * all copies or substantial portions of the Software. 00012 * 00013 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00014 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00015 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00016 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00017 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00018 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00019 * THE SOFTWARE. 00020 * 00021 *===-----------------------------------------------------------------------=== 00022 */ 00023 00024 #ifndef __X86INTRIN_H 00025 #error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead." 00026 #endif 00027 00028 #ifndef __FMA4INTRIN_H 00029 #define __FMA4INTRIN_H 00030 00031 #ifndef __FMA4__ 00032 # error "FMA4 instruction set is not enabled" 00033 #else 00034 00035 #include <pmmintrin.h> 00036 00037 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00038 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) 00039 { 00040 return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); 00041 } 00042 00043 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00044 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) 00045 { 00046 return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); 00047 } 00048 00049 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00050 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) 00051 { 00052 return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); 00053 } 00054 00055 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00056 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) 00057 { 00058 return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); 00059 } 00060 00061 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00062 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) 00063 { 00064 return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); 00065 } 00066 00067 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00068 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) 00069 { 00070 return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); 00071 } 00072 00073 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00074 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) 00075 { 00076 return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); 00077 } 00078 00079 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00080 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) 00081 { 00082 return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); 00083 } 00084 00085 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00086 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) 00087 { 00088 return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); 00089 } 00090 00091 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00092 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) 00093 { 00094 return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); 00095 } 00096 00097 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00098 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) 00099 { 00100 return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); 00101 } 00102 00103 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00104 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) 00105 { 00106 return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); 00107 } 00108 00109 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00110 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) 00111 { 00112 return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); 00113 } 00114 00115 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00116 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) 00117 { 00118 return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); 00119 } 00120 00121 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00122 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) 00123 { 00124 return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); 00125 } 00126 00127 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00128 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) 00129 { 00130 return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); 00131 } 00132 00133 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00134 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) 00135 { 00136 return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); 00137 } 00138 00139 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00140 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) 00141 { 00142 return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); 00143 } 00144 00145 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 00146 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) 00147 { 00148 return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); 00149 } 00150 00151 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 00152 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) 00153 { 00154 return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); 00155 } 00156 00157 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 00158 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) 00159 { 00160 return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); 00161 } 00162 00163 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 00164 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) 00165 { 00166 return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); 00167 } 00168 00169 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 00170 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) 00171 { 00172 return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); 00173 } 00174 00175 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 00176 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) 00177 { 00178 return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); 00179 } 00180 00181 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 00182 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) 00183 { 00184 return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); 00185 } 00186 00187 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 00188 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) 00189 { 00190 return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); 00191 } 00192 00193 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 00194 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) 00195 { 00196 return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); 00197 } 00198 00199 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 00200 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) 00201 { 00202 return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); 00203 } 00204 00205 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 00206 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) 00207 { 00208 return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); 00209 } 00210 00211 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 00212 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) 00213 { 00214 return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); 00215 } 00216 00217 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 00218 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) 00219 { 00220 return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); 00221 } 00222 00223 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 00224 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) 00225 { 00226 return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); 00227 } 00228 00229 #endif /* __FMA4__ */ 00230 00231 #endif /* __FMA4INTRIN_H */