LLVM API Documentation
00001 //===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the SystemZSelectionDAGInfo class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "SystemZTargetMachine.h" 00015 #include "llvm/CodeGen/SelectionDAG.h" 00016 00017 using namespace llvm; 00018 00019 #define DEBUG_TYPE "systemz-selectiondag-info" 00020 00021 SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL) 00022 : TargetSelectionDAGInfo(&DL) {} 00023 00024 SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { 00025 } 00026 00027 // Decide whether it is best to use a loop or straight-line code for 00028 // a block operation of Size bytes with source address Src and destination 00029 // address Dest. Sequence is the opcode to use for straight-line code 00030 // (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). 00031 // Return the chain for the completed operation. 00032 static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, 00033 unsigned Loop, SDValue Chain, SDValue Dst, 00034 SDValue Src, uint64_t Size) { 00035 EVT PtrVT = Src.getValueType(); 00036 // The heuristic we use is to prefer loops for anything that would 00037 // require 7 or more MVCs. With these kinds of sizes there isn't 00038 // much to choose between straight-line code and looping code, 00039 // since the time will be dominated by the MVCs themselves. 00040 // However, the loop has 4 or 5 instructions (depending on whether 00041 // the base addresses can be proved equal), so there doesn't seem 00042 // much point using a loop for 5 * 256 bytes or fewer. Anything in 00043 // the range (5 * 256, 6 * 256) will need another instruction after 00044 // the loop, so it doesn't seem worth using a loop then either. 00045 // The next value up, 6 * 256, can be implemented in the same 00046 // number of straight-line MVCs as 6 * 256 - 1. 00047 if (Size > 6 * 256) 00048 return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, 00049 DAG.getConstant(Size, PtrVT), 00050 DAG.getConstant(Size / 256, PtrVT)); 00051 return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, 00052 DAG.getConstant(Size, PtrVT)); 00053 } 00054 00055 SDValue SystemZSelectionDAGInfo:: 00056 EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00057 SDValue Dst, SDValue Src, SDValue Size, unsigned Align, 00058 bool IsVolatile, bool AlwaysInline, 00059 MachinePointerInfo DstPtrInfo, 00060 MachinePointerInfo SrcPtrInfo) const { 00061 if (IsVolatile) 00062 return SDValue(); 00063 00064 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) 00065 return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, 00066 Chain, Dst, Src, CSize->getZExtValue()); 00067 return SDValue(); 00068 } 00069 00070 // Handle a memset of 1, 2, 4 or 8 bytes with the operands given by 00071 // Chain, Dst, ByteVal and Size. These cases are expected to use 00072 // MVI, MVHHI, MVHI and MVGHI respectively. 00073 static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00074 SDValue Dst, uint64_t ByteVal, uint64_t Size, 00075 unsigned Align, 00076 MachinePointerInfo DstPtrInfo) { 00077 uint64_t StoreVal = ByteVal; 00078 for (unsigned I = 1; I < Size; ++I) 00079 StoreVal |= ByteVal << (I * 8); 00080 return DAG.getStore(Chain, DL, 00081 DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), 00082 Dst, DstPtrInfo, false, false, Align); 00083 } 00084 00085 SDValue SystemZSelectionDAGInfo:: 00086 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00087 SDValue Dst, SDValue Byte, SDValue Size, 00088 unsigned Align, bool IsVolatile, 00089 MachinePointerInfo DstPtrInfo) const { 00090 EVT PtrVT = Dst.getValueType(); 00091 00092 if (IsVolatile) 00093 return SDValue(); 00094 00095 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { 00096 uint64_t Bytes = CSize->getZExtValue(); 00097 if (Bytes == 0) 00098 return SDValue(); 00099 if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) { 00100 // Handle cases that can be done using at most two of 00101 // MVI, MVHI, MVHHI and MVGHI. The latter two can only be 00102 // used if ByteVal is all zeros or all ones; in other casees, 00103 // we can move at most 2 halfwords. 00104 uint64_t ByteVal = CByte->getZExtValue(); 00105 if (ByteVal == 0 || ByteVal == 255 ? 00106 Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : 00107 Bytes <= 4) { 00108 unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); 00109 unsigned Size2 = Bytes - Size1; 00110 SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, 00111 Align, DstPtrInfo); 00112 if (Size2 == 0) 00113 return Chain1; 00114 Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 00115 DAG.getConstant(Size1, PtrVT)); 00116 DstPtrInfo = DstPtrInfo.getWithOffset(Size1); 00117 SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, 00118 std::min(Align, Size1), DstPtrInfo); 00119 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); 00120 } 00121 } else { 00122 // Handle one and two bytes using STC. 00123 if (Bytes <= 2) { 00124 SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, 00125 false, false, Align); 00126 if (Bytes == 1) 00127 return Chain1; 00128 SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 00129 DAG.getConstant(1, PtrVT)); 00130 SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, 00131 DstPtrInfo.getWithOffset(1), 00132 false, false, 1); 00133 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); 00134 } 00135 } 00136 assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); 00137 00138 // Handle the special case of a memset of 0, which can use XC. 00139 auto *CByte = dyn_cast<ConstantSDNode>(Byte); 00140 if (CByte && CByte->getZExtValue() == 0) 00141 return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, 00142 Chain, Dst, Dst, Bytes); 00143 00144 // Copy the byte to the first location and then use MVC to copy 00145 // it to the rest. 00146 Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, 00147 false, false, Align); 00148 SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 00149 DAG.getConstant(1, PtrVT)); 00150 return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, 00151 Chain, DstPlus1, Dst, Bytes - 1); 00152 } 00153 return SDValue(); 00154 } 00155 00156 // Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), 00157 // deciding whether to use a loop or straight-line code. 00158 static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00159 SDValue Src1, SDValue Src2, uint64_t Size) { 00160 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 00161 EVT PtrVT = Src1.getValueType(); 00162 // A two-CLC sequence is a clear win over a loop, not least because it 00163 // needs only one branch. A three-CLC sequence needs the same number 00164 // of branches as a loop (i.e. 2), but is shorter. That brings us to 00165 // lengths greater than 768 bytes. It seems relatively likely that 00166 // a difference will be found within the first 768 bytes, so we just 00167 // optimize for the smallest number of branch instructions, in order 00168 // to avoid polluting the prediction buffer too much. A loop only ever 00169 // needs 2 branches, whereas a straight-line sequence would need 3 or more. 00170 if (Size > 3 * 256) 00171 return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, 00172 DAG.getConstant(Size, PtrVT), 00173 DAG.getConstant(Size / 256, PtrVT)); 00174 return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, 00175 DAG.getConstant(Size, PtrVT)); 00176 } 00177 00178 // Convert the current CC value into an integer that is 0 if CC == 0, 00179 // less than zero if CC == 1 and greater than zero if CC >= 2. 00180 // The sequence starts with IPM, which puts CC into bits 29 and 28 00181 // of an integer and clears bits 30 and 31. 00182 static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { 00183 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 00184 SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 00185 DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); 00186 SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, 00187 DAG.getConstant(31, MVT::i32)); 00188 return ROTL; 00189 } 00190 00191 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 00192 EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00193 SDValue Src1, SDValue Src2, SDValue Size, 00194 MachinePointerInfo Op1PtrInfo, 00195 MachinePointerInfo Op2PtrInfo) const { 00196 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { 00197 uint64_t Bytes = CSize->getZExtValue(); 00198 assert(Bytes > 0 && "Caller should have handled 0-size case"); 00199 Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); 00200 SDValue Glue = Chain.getValue(1); 00201 return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); 00202 } 00203 return std::make_pair(SDValue(), SDValue()); 00204 } 00205 00206 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 00207 EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00208 SDValue Src, SDValue Char, SDValue Length, 00209 MachinePointerInfo SrcPtrInfo) const { 00210 // Use SRST to find the character. End is its address on success. 00211 EVT PtrVT = Src.getValueType(); 00212 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); 00213 Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); 00214 Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); 00215 Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, 00216 DAG.getConstant(255, MVT::i32)); 00217 SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); 00218 SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, 00219 Limit, Src, Char); 00220 Chain = End.getValue(1); 00221 SDValue Glue = End.getValue(2); 00222 00223 // Now select between End and null, depending on whether the character 00224 // was found. 00225 SmallVector<SDValue, 5> Ops; 00226 Ops.push_back(End); 00227 Ops.push_back(DAG.getConstant(0, PtrVT)); 00228 Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32)); 00229 Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32)); 00230 Ops.push_back(Glue); 00231 VTs = DAG.getVTList(PtrVT, MVT::Glue); 00232 End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); 00233 return std::make_pair(End, Chain); 00234 } 00235 00236 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 00237 EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00238 SDValue Dest, SDValue Src, 00239 MachinePointerInfo DestPtrInfo, 00240 MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { 00241 SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); 00242 SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, 00243 DAG.getConstant(0, MVT::i32)); 00244 return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); 00245 } 00246 00247 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 00248 EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00249 SDValue Src1, SDValue Src2, 00250 MachinePointerInfo Op1PtrInfo, 00251 MachinePointerInfo Op2PtrInfo) const { 00252 SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); 00253 SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, 00254 DAG.getConstant(0, MVT::i32)); 00255 Chain = Unused.getValue(1); 00256 SDValue Glue = Chain.getValue(2); 00257 return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); 00258 } 00259 00260 // Search from Src for a null character, stopping once Src reaches Limit. 00261 // Return a pair of values, the first being the number of nonnull characters 00262 // and the second being the out chain. 00263 // 00264 // This can be used for strlen by setting Limit to 0. 00265 static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, 00266 SDValue Chain, SDValue Src, 00267 SDValue Limit) { 00268 EVT PtrVT = Src.getValueType(); 00269 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); 00270 SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, 00271 Limit, Src, DAG.getConstant(0, MVT::i32)); 00272 Chain = End.getValue(1); 00273 SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); 00274 return std::make_pair(Len, Chain); 00275 } 00276 00277 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 00278 EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00279 SDValue Src, MachinePointerInfo SrcPtrInfo) const { 00280 EVT PtrVT = Src.getValueType(); 00281 return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT)); 00282 } 00283 00284 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: 00285 EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, 00286 SDValue Src, SDValue MaxLength, 00287 MachinePointerInfo SrcPtrInfo) const { 00288 EVT PtrVT = Src.getValueType(); 00289 MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); 00290 SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); 00291 return getBoundedStrlen(DAG, DL, Chain, Src, Limit); 00292 }