LLVM API Documentation
00001 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Define several functions to decode x86 specific shuffle semantics into a 00011 // generic vector mask. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "X86ShuffleDecode.h" 00016 #include "llvm/IR/Constants.h" 00017 #include "llvm/CodeGen/MachineValueType.h" 00018 00019 //===----------------------------------------------------------------------===// 00020 // Vector Mask Decoding 00021 //===----------------------------------------------------------------------===// 00022 00023 namespace llvm { 00024 00025 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 00026 // Defaults the copying the dest value. 00027 ShuffleMask.push_back(0); 00028 ShuffleMask.push_back(1); 00029 ShuffleMask.push_back(2); 00030 ShuffleMask.push_back(3); 00031 00032 // Decode the immediate. 00033 unsigned ZMask = Imm & 15; 00034 unsigned CountD = (Imm >> 4) & 3; 00035 unsigned CountS = (Imm >> 6) & 3; 00036 00037 // CountS selects which input element to use. 00038 unsigned InVal = 4+CountS; 00039 // CountD specifies which element of destination to update. 00040 ShuffleMask[CountD] = InVal; 00041 // ZMask zaps values, potentially overriding the CountD elt. 00042 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 00043 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 00044 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 00045 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 00046 } 00047 00048 // <3,1> or <6,7,2,3> 00049 void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 00050 for (unsigned i = NElts/2; i != NElts; ++i) 00051 ShuffleMask.push_back(NElts+i); 00052 00053 for (unsigned i = NElts/2; i != NElts; ++i) 00054 ShuffleMask.push_back(i); 00055 } 00056 00057 // <0,2> or <0,1,4,5> 00058 void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 00059 for (unsigned i = 0; i != NElts/2; ++i) 00060 ShuffleMask.push_back(i); 00061 00062 for (unsigned i = 0; i != NElts/2; ++i) 00063 ShuffleMask.push_back(NElts+i); 00064 } 00065 00066 void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 00067 unsigned NumElts = VT.getVectorNumElements(); 00068 for (int i = 0, e = NumElts / 2; i < e; ++i) { 00069 ShuffleMask.push_back(2 * i); 00070 ShuffleMask.push_back(2 * i); 00071 } 00072 } 00073 00074 void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 00075 unsigned NumElts = VT.getVectorNumElements(); 00076 for (int i = 0, e = NumElts / 2; i < e; ++i) { 00077 ShuffleMask.push_back(2 * i + 1); 00078 ShuffleMask.push_back(2 * i + 1); 00079 } 00080 } 00081 00082 void DecodePALIGNRMask(MVT VT, unsigned Imm, 00083 SmallVectorImpl<int> &ShuffleMask) { 00084 unsigned NumElts = VT.getVectorNumElements(); 00085 unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); 00086 00087 unsigned NumLanes = VT.getSizeInBits() / 128; 00088 unsigned NumLaneElts = NumElts / NumLanes; 00089 00090 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 00091 for (unsigned i = 0; i != NumLaneElts; ++i) { 00092 unsigned Base = i + Offset; 00093 // if i+offset is out of this lane then we actually need the other source 00094 if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 00095 ShuffleMask.push_back(Base + l); 00096 } 00097 } 00098 } 00099 00100 /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. 00101 /// VT indicates the type of the vector allowing it to handle different 00102 /// datatypes and vector widths. 00103 void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 00104 unsigned NumElts = VT.getVectorNumElements(); 00105 00106 unsigned NumLanes = VT.getSizeInBits() / 128; 00107 unsigned NumLaneElts = NumElts / NumLanes; 00108 00109 unsigned NewImm = Imm; 00110 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 00111 for (unsigned i = 0; i != NumLaneElts; ++i) { 00112 ShuffleMask.push_back(NewImm % NumLaneElts + l); 00113 NewImm /= NumLaneElts; 00114 } 00115 if (NumLaneElts == 4) NewImm = Imm; // reload imm 00116 } 00117 } 00118 00119 void DecodePSHUFHWMask(MVT VT, unsigned Imm, 00120 SmallVectorImpl<int> &ShuffleMask) { 00121 unsigned NumElts = VT.getVectorNumElements(); 00122 00123 for (unsigned l = 0; l != NumElts; l += 8) { 00124 unsigned NewImm = Imm; 00125 for (unsigned i = 0, e = 4; i != e; ++i) { 00126 ShuffleMask.push_back(l + i); 00127 } 00128 for (unsigned i = 4, e = 8; i != e; ++i) { 00129 ShuffleMask.push_back(l + 4 + (NewImm & 3)); 00130 NewImm >>= 2; 00131 } 00132 } 00133 } 00134 00135 void DecodePSHUFLWMask(MVT VT, unsigned Imm, 00136 SmallVectorImpl<int> &ShuffleMask) { 00137 unsigned NumElts = VT.getVectorNumElements(); 00138 00139 for (unsigned l = 0; l != NumElts; l += 8) { 00140 unsigned NewImm = Imm; 00141 for (unsigned i = 0, e = 4; i != e; ++i) { 00142 ShuffleMask.push_back(l + (NewImm & 3)); 00143 NewImm >>= 2; 00144 } 00145 for (unsigned i = 4, e = 8; i != e; ++i) { 00146 ShuffleMask.push_back(l + i); 00147 } 00148 } 00149 } 00150 00151 /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 00152 /// the type of the vector allowing it to handle different datatypes and vector 00153 /// widths. 00154 void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 00155 unsigned NumElts = VT.getVectorNumElements(); 00156 00157 unsigned NumLanes = VT.getSizeInBits() / 128; 00158 unsigned NumLaneElts = NumElts / NumLanes; 00159 00160 unsigned NewImm = Imm; 00161 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 00162 // each half of a lane comes from different source 00163 for (unsigned s = 0; s != NumElts*2; s += NumElts) { 00164 for (unsigned i = 0; i != NumLaneElts/2; ++i) { 00165 ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 00166 NewImm /= NumLaneElts; 00167 } 00168 } 00169 if (NumLaneElts == 4) NewImm = Imm; // reload imm 00170 } 00171 } 00172 00173 /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 00174 /// and punpckh*. VT indicates the type of the vector allowing it to handle 00175 /// different datatypes and vector widths. 00176 void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 00177 unsigned NumElts = VT.getVectorNumElements(); 00178 00179 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 00180 // independently on 128-bit lanes. 00181 unsigned NumLanes = VT.getSizeInBits() / 128; 00182 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 00183 unsigned NumLaneElts = NumElts / NumLanes; 00184 00185 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 00186 for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { 00187 ShuffleMask.push_back(i); // Reads from dest/src1 00188 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 00189 } 00190 } 00191 } 00192 00193 /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 00194 /// and punpckl*. VT indicates the type of the vector allowing it to handle 00195 /// different datatypes and vector widths. 00196 void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 00197 unsigned NumElts = VT.getVectorNumElements(); 00198 00199 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 00200 // independently on 128-bit lanes. 00201 unsigned NumLanes = VT.getSizeInBits() / 128; 00202 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 00203 unsigned NumLaneElts = NumElts / NumLanes; 00204 00205 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 00206 for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { 00207 ShuffleMask.push_back(i); // Reads from dest/src1 00208 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 00209 } 00210 } 00211 } 00212 00213 void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, 00214 SmallVectorImpl<int> &ShuffleMask) { 00215 if (Imm & 0x88) 00216 return; // Not a shuffle 00217 00218 unsigned HalfSize = VT.getVectorNumElements()/2; 00219 00220 for (unsigned l = 0; l != 2; ++l) { 00221 unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; 00222 for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) 00223 ShuffleMask.push_back(i); 00224 } 00225 } 00226 00227 void DecodePSHUFBMask(const ConstantDataSequential *C, 00228 SmallVectorImpl<int> &ShuffleMask) { 00229 Type *MaskTy = C->getType(); 00230 assert(MaskTy->isVectorTy() && "Expected a vector constant mask!"); 00231 assert(MaskTy->getVectorElementType()->isIntegerTy(8) && 00232 "Expected i8 constant mask elements!"); 00233 int NumElements = MaskTy->getVectorNumElements(); 00234 // FIXME: Add support for AVX-512. 00235 assert((NumElements == 16 || NumElements == 32) && 00236 "Only 128-bit and 256-bit vectors supported!"); 00237 assert((unsigned)NumElements == C->getNumElements() && 00238 "Constant mask has a different number of elements!"); 00239 00240 ShuffleMask.reserve(NumElements); 00241 for (int i = 0; i < NumElements; ++i) { 00242 // For AVX vectors with 32 bytes the base of the shuffle is the half of the 00243 // vector we're inside. 00244 int Base = i < 16 ? 0 : 16; 00245 uint64_t Element = C->getElementAsInteger(i); 00246 // If the high bit (7) of the byte is set, the element is zeroed. 00247 if (Element & (1 << 7)) 00248 ShuffleMask.push_back(SM_SentinelZero); 00249 else { 00250 int Index = Base + Element; 00251 assert((Index >= 0 && Index < NumElements) && 00252 "Out of bounds shuffle index for pshub instruction!"); 00253 ShuffleMask.push_back(Index); 00254 } 00255 } 00256 } 00257 00258 void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, 00259 SmallVectorImpl<int> &ShuffleMask) { 00260 for (int i = 0, e = RawMask.size(); i < e; ++i) { 00261 uint64_t M = RawMask[i]; 00262 // For AVX vectors with 32 bytes the base of the shuffle is the half of 00263 // the vector we're inside. 00264 int Base = i < 16 ? 0 : 16; 00265 // If the high bit (7) of the byte is set, the element is zeroed. 00266 if (M & (1 << 7)) 00267 ShuffleMask.push_back(SM_SentinelZero); 00268 else { 00269 int Index = Base + M; 00270 assert((Index >= 0 && (unsigned)Index < RawMask.size()) && 00271 "Out of bounds shuffle index for pshub instruction!"); 00272 ShuffleMask.push_back(Index); 00273 } 00274 } 00275 } 00276 00277 void DecodeBLENDMask(MVT VT, unsigned Imm, 00278 SmallVectorImpl<int> &ShuffleMask) { 00279 int NumElements = VT.getVectorNumElements(); 00280 for (int i = 0; i < NumElements; ++i) 00281 ShuffleMask.push_back(((Imm >> i) & 1) ? NumElements + i : i); 00282 } 00283 00284 /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 00285 /// No VT provided since it only works on 256-bit, 4 element vectors. 00286 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 00287 for (unsigned i = 0; i != 4; ++i) { 00288 ShuffleMask.push_back((Imm >> (2*i)) & 3); 00289 } 00290 } 00291 00292 } // llvm namespace