LLVM API Documentation
00001 //==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains a pass that expands pseudo instructions into target 00011 // instructions to allow proper scheduling and other late optimizations. This 00012 // pass should be run after register allocation but before the post-regalloc 00013 // scheduling pass. 00014 // 00015 //===----------------------------------------------------------------------===// 00016 00017 #include "MCTargetDesc/AArch64AddressingModes.h" 00018 #include "AArch64InstrInfo.h" 00019 #include "AArch64Subtarget.h" 00020 #include "llvm/CodeGen/MachineFunctionPass.h" 00021 #include "llvm/CodeGen/MachineInstrBuilder.h" 00022 #include "llvm/Support/MathExtras.h" 00023 using namespace llvm; 00024 00025 namespace { 00026 class AArch64ExpandPseudo : public MachineFunctionPass { 00027 public: 00028 static char ID; 00029 AArch64ExpandPseudo() : MachineFunctionPass(ID) {} 00030 00031 const AArch64InstrInfo *TII; 00032 00033 bool runOnMachineFunction(MachineFunction &Fn) override; 00034 00035 const char *getPassName() const override { 00036 return "AArch64 pseudo instruction expansion pass"; 00037 } 00038 00039 private: 00040 bool expandMBB(MachineBasicBlock &MBB); 00041 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 00042 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 00043 unsigned BitSize); 00044 }; 00045 char AArch64ExpandPseudo::ID = 0; 00046 } 00047 00048 /// \brief Transfer implicit operands on the pseudo instruction to the 00049 /// instructions created from the expansion. 00050 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 00051 MachineInstrBuilder &DefMI) { 00052 const MCInstrDesc &Desc = OldMI.getDesc(); 00053 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; 00054 ++i) { 00055 const MachineOperand &MO = OldMI.getOperand(i); 00056 assert(MO.isReg() && MO.getReg()); 00057 if (MO.isUse()) 00058 UseMI.addOperand(MO); 00059 else 00060 DefMI.addOperand(MO); 00061 } 00062 } 00063 00064 /// \brief Helper function which extracts the specified 16-bit chunk from a 00065 /// 64-bit value. 00066 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { 00067 assert(ChunkIdx < 4 && "Out of range chunk index specified!"); 00068 00069 return (Imm >> (ChunkIdx * 16)) & 0xFFFF; 00070 } 00071 00072 /// \brief Helper function which replicates a 16-bit chunk within a 64-bit 00073 /// value. Indices correspond to element numbers in a v4i16. 00074 static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) { 00075 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!"); 00076 const unsigned ShiftAmt = ToIdx * 16; 00077 00078 // Replicate the source chunk to the destination position. 00079 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt; 00080 // Clear the destination chunk. 00081 Imm &= ~(0xFFFFLL << ShiftAmt); 00082 // Insert the replicated chunk. 00083 return Imm | Chunk; 00084 } 00085 00086 /// \brief Helper function which tries to materialize a 64-bit value with an 00087 /// ORR + MOVK instruction sequence. 00088 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, 00089 MachineBasicBlock &MBB, 00090 MachineBasicBlock::iterator &MBBI, 00091 const AArch64InstrInfo *TII, unsigned ChunkIdx) { 00092 assert(ChunkIdx < 4 && "Out of range chunk index specified!"); 00093 const unsigned ShiftAmt = ChunkIdx * 16; 00094 00095 uint64_t Encoding; 00096 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { 00097 // Create the ORR-immediate instruction. 00098 MachineInstrBuilder MIB = 00099 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 00100 .addOperand(MI.getOperand(0)) 00101 .addReg(AArch64::XZR) 00102 .addImm(Encoding); 00103 00104 // Create the MOVK instruction. 00105 const unsigned Imm16 = getChunk(UImm, ChunkIdx); 00106 const unsigned DstReg = MI.getOperand(0).getReg(); 00107 const bool DstIsDead = MI.getOperand(0).isDead(); 00108 MachineInstrBuilder MIB1 = 00109 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 00110 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 00111 .addReg(DstReg) 00112 .addImm(Imm16) 00113 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 00114 00115 transferImpOps(MI, MIB, MIB1); 00116 MI.eraseFromParent(); 00117 return true; 00118 } 00119 00120 return false; 00121 } 00122 00123 /// \brief Check whether the given 16-bit chunk replicated to full 64-bit width 00124 /// can be materialized with an ORR instruction. 00125 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { 00126 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; 00127 00128 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding); 00129 } 00130 00131 /// \brief Check for identical 16-bit chunks within the constant and if so 00132 /// materialize them with a single ORR instruction. The remaining one or two 00133 /// 16-bit chunks will be materialized with MOVK instructions. 00134 /// 00135 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order 00136 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with 00137 /// an ORR instruction. 00138 /// 00139 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, 00140 MachineBasicBlock &MBB, 00141 MachineBasicBlock::iterator &MBBI, 00142 const AArch64InstrInfo *TII) { 00143 typedef DenseMap<uint64_t, unsigned> CountMap; 00144 CountMap Counts; 00145 00146 // Scan the constant and count how often every chunk occurs. 00147 for (unsigned Idx = 0; Idx < 4; ++Idx) 00148 ++Counts[getChunk(UImm, Idx)]; 00149 00150 // Traverse the chunks to find one which occurs more than once. 00151 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); 00152 Chunk != End; ++Chunk) { 00153 const uint64_t ChunkVal = Chunk->first; 00154 const unsigned Count = Chunk->second; 00155 00156 uint64_t Encoding = 0; 00157 00158 // We are looking for chunks which have two or three instances and can be 00159 // materialized with an ORR instruction. 00160 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) 00161 continue; 00162 00163 const bool CountThree = Count == 3; 00164 // Create the ORR-immediate instruction. 00165 MachineInstrBuilder MIB = 00166 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 00167 .addOperand(MI.getOperand(0)) 00168 .addReg(AArch64::XZR) 00169 .addImm(Encoding); 00170 00171 const unsigned DstReg = MI.getOperand(0).getReg(); 00172 const bool DstIsDead = MI.getOperand(0).isDead(); 00173 00174 unsigned ShiftAmt = 0; 00175 uint64_t Imm16 = 0; 00176 // Find the first chunk not materialized with the ORR instruction. 00177 for (; ShiftAmt < 64; ShiftAmt += 16) { 00178 Imm16 = (UImm >> ShiftAmt) & 0xFFFF; 00179 00180 if (Imm16 != ChunkVal) 00181 break; 00182 } 00183 00184 // Create the first MOVK instruction. 00185 MachineInstrBuilder MIB1 = 00186 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 00187 .addReg(DstReg, 00188 RegState::Define | getDeadRegState(DstIsDead && CountThree)) 00189 .addReg(DstReg) 00190 .addImm(Imm16) 00191 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 00192 00193 // In case we have three instances the whole constant is now materialized 00194 // and we can exit. 00195 if (CountThree) { 00196 transferImpOps(MI, MIB, MIB1); 00197 MI.eraseFromParent(); 00198 return true; 00199 } 00200 00201 // Find the remaining chunk which needs to be materialized. 00202 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { 00203 Imm16 = (UImm >> ShiftAmt) & 0xFFFF; 00204 00205 if (Imm16 != ChunkVal) 00206 break; 00207 } 00208 00209 // Create the second MOVK instruction. 00210 MachineInstrBuilder MIB2 = 00211 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 00212 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 00213 .addReg(DstReg) 00214 .addImm(Imm16) 00215 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); 00216 00217 transferImpOps(MI, MIB, MIB2); 00218 MI.eraseFromParent(); 00219 return true; 00220 } 00221 00222 return false; 00223 } 00224 00225 /// \brief Check whether this chunk matches the pattern '1...0...'. This pattern 00226 /// starts a contiguous sequence of ones if we look at the bits from the LSB 00227 /// towards the MSB. 00228 static bool isStartChunk(uint64_t Chunk) { 00229 if (Chunk == 0 || Chunk == UINT64_MAX) 00230 return false; 00231 00232 return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64; 00233 } 00234 00235 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern 00236 /// ends a contiguous sequence of ones if we look at the bits from the LSB 00237 /// towards the MSB. 00238 static bool isEndChunk(uint64_t Chunk) { 00239 if (Chunk == 0 || Chunk == UINT64_MAX) 00240 return false; 00241 00242 return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64; 00243 } 00244 00245 /// \brief Clear or set all bits in the chunk at the given index. 00246 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { 00247 const uint64_t Mask = 0xFFFF; 00248 00249 if (Clear) 00250 // Clear chunk in the immediate. 00251 Imm &= ~(Mask << (Idx * 16)); 00252 else 00253 // Set all bits in the immediate for the particular chunk. 00254 Imm |= Mask << (Idx * 16); 00255 00256 return Imm; 00257 } 00258 00259 /// \brief Check whether the constant contains a sequence of contiguous ones, 00260 /// which might be interrupted by one or two chunks. If so, materialize the 00261 /// sequence of contiguous ones with an ORR instruction. 00262 /// Materialize the chunks which are either interrupting the sequence or outside 00263 /// of the sequence with a MOVK instruction. 00264 /// 00265 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk 00266 /// which ends the sequence (0...1...). Then we are looking for constants which 00267 /// contain at least one S and E chunk. 00268 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|. 00269 /// 00270 /// We are also looking for constants like |S|A|B|E| where the contiguous 00271 /// sequence of ones wraps around the MSB into the LSB. 00272 /// 00273 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, 00274 MachineBasicBlock &MBB, 00275 MachineBasicBlock::iterator &MBBI, 00276 const AArch64InstrInfo *TII) { 00277 const int NotSet = -1; 00278 const uint64_t Mask = 0xFFFF; 00279 00280 int StartIdx = NotSet; 00281 int EndIdx = NotSet; 00282 // Try to find the chunks which start/end a contiguous sequence of ones. 00283 for (int Idx = 0; Idx < 4; ++Idx) { 00284 int64_t Chunk = getChunk(UImm, Idx); 00285 // Sign extend the 16-bit chunk to 64-bit. 00286 Chunk = (Chunk << 48) >> 48; 00287 00288 if (isStartChunk(Chunk)) 00289 StartIdx = Idx; 00290 else if (isEndChunk(Chunk)) 00291 EndIdx = Idx; 00292 } 00293 00294 // Early exit in case we can't find a start/end chunk. 00295 if (StartIdx == NotSet || EndIdx == NotSet) 00296 return false; 00297 00298 // Outside of the contiguous sequence of ones everything needs to be zero. 00299 uint64_t Outside = 0; 00300 // Chunks between the start and end chunk need to have all their bits set. 00301 uint64_t Inside = Mask; 00302 00303 // If our contiguous sequence of ones wraps around from the MSB into the LSB, 00304 // just swap indices and pretend we are materializing a contiguous sequence 00305 // of zeros surrounded by a contiguous sequence of ones. 00306 if (StartIdx > EndIdx) { 00307 std::swap(StartIdx, EndIdx); 00308 std::swap(Outside, Inside); 00309 } 00310 00311 uint64_t OrrImm = UImm; 00312 int FirstMovkIdx = NotSet; 00313 int SecondMovkIdx = NotSet; 00314 00315 // Find out which chunks we need to patch up to obtain a contiguous sequence 00316 // of ones. 00317 for (int Idx = 0; Idx < 4; ++Idx) { 00318 const uint64_t Chunk = getChunk(UImm, Idx); 00319 00320 // Check whether we are looking at a chunk which is not part of the 00321 // contiguous sequence of ones. 00322 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { 00323 OrrImm = updateImm(OrrImm, Idx, Outside == 0); 00324 00325 // Remember the index we need to patch. 00326 if (FirstMovkIdx == NotSet) 00327 FirstMovkIdx = Idx; 00328 else 00329 SecondMovkIdx = Idx; 00330 00331 // Check whether we are looking a chunk which is part of the contiguous 00332 // sequence of ones. 00333 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { 00334 OrrImm = updateImm(OrrImm, Idx, Inside != Mask); 00335 00336 // Remember the index we need to patch. 00337 if (FirstMovkIdx == NotSet) 00338 FirstMovkIdx = Idx; 00339 else 00340 SecondMovkIdx = Idx; 00341 } 00342 } 00343 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); 00344 00345 // Create the ORR-immediate instruction. 00346 uint64_t Encoding = 0; 00347 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding); 00348 MachineInstrBuilder MIB = 00349 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) 00350 .addOperand(MI.getOperand(0)) 00351 .addReg(AArch64::XZR) 00352 .addImm(Encoding); 00353 00354 const unsigned DstReg = MI.getOperand(0).getReg(); 00355 const bool DstIsDead = MI.getOperand(0).isDead(); 00356 00357 const bool SingleMovk = SecondMovkIdx == NotSet; 00358 // Create the first MOVK instruction. 00359 MachineInstrBuilder MIB1 = 00360 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 00361 .addReg(DstReg, 00362 RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) 00363 .addReg(DstReg) 00364 .addImm(getChunk(UImm, FirstMovkIdx)) 00365 .addImm( 00366 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16)); 00367 00368 // Early exit in case we only need to emit a single MOVK instruction. 00369 if (SingleMovk) { 00370 transferImpOps(MI, MIB, MIB1); 00371 MI.eraseFromParent(); 00372 return true; 00373 } 00374 00375 // Create the second MOVK instruction. 00376 MachineInstrBuilder MIB2 = 00377 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) 00378 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 00379 .addReg(DstReg) 00380 .addImm(getChunk(UImm, SecondMovkIdx)) 00381 .addImm( 00382 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16)); 00383 00384 transferImpOps(MI, MIB, MIB2); 00385 MI.eraseFromParent(); 00386 return true; 00387 } 00388 00389 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 00390 /// real move-immediate instructions to synthesize the immediate. 00391 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 00392 MachineBasicBlock::iterator MBBI, 00393 unsigned BitSize) { 00394 MachineInstr &MI = *MBBI; 00395 uint64_t Imm = MI.getOperand(1).getImm(); 00396 const unsigned Mask = 0xFFFF; 00397 00398 // Try a MOVI instruction (aka ORR-immediate with the zero register). 00399 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); 00400 uint64_t Encoding; 00401 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { 00402 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri); 00403 MachineInstrBuilder MIB = 00404 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 00405 .addOperand(MI.getOperand(0)) 00406 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 00407 .addImm(Encoding); 00408 transferImpOps(MI, MIB, MIB); 00409 MI.eraseFromParent(); 00410 return true; 00411 } 00412 00413 // Scan the immediate and count the number of 16-bit chunks which are either 00414 // all ones or all zeros. 00415 unsigned OneChunks = 0; 00416 unsigned ZeroChunks = 0; 00417 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { 00418 const unsigned Chunk = (Imm >> Shift) & Mask; 00419 if (Chunk == Mask) 00420 OneChunks++; 00421 else if (Chunk == 0) 00422 ZeroChunks++; 00423 } 00424 00425 // Since we can't materialize the constant with a single ORR instruction, 00426 // let's see whether we can materialize 3/4 of the constant with an ORR 00427 // instruction and use an additional MOVK instruction to materialize the 00428 // remaining 1/4. 00429 // 00430 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|. 00431 // 00432 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR, 00433 // we would create the following instruction sequence: 00434 // 00435 // ORR x0, xzr, |A|X|A|X| 00436 // MOVK x0, |B|, LSL #16 00437 // 00438 // Only look at 64-bit constants which can't be materialized with a single 00439 // instruction e.g. which have less than either three all zero or all one 00440 // chunks. 00441 // 00442 // Ignore 32-bit constants here, they always can be materialized with a 00443 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized 00444 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair. 00445 // Thus we fall back to the default code below which in the best case creates 00446 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one). 00447 // 00448 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) { 00449 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2 00450 // identical? 00451 if (getChunk(UImm, 0) == getChunk(UImm, 2)) { 00452 // See if we can come up with a constant which can be materialized with 00453 // ORR-immediate by replicating element 3 into element 1. 00454 uint64_t OrrImm = replicateChunk(UImm, 3, 1); 00455 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1)) 00456 return true; 00457 00458 // See if we can come up with a constant which can be materialized with 00459 // ORR-immediate by replicating element 1 into element 3. 00460 OrrImm = replicateChunk(UImm, 1, 3); 00461 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3)) 00462 return true; 00463 00464 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3 00465 // identical? 00466 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) { 00467 // See if we can come up with a constant which can be materialized with 00468 // ORR-immediate by replicating element 2 into element 0. 00469 uint64_t OrrImm = replicateChunk(UImm, 2, 0); 00470 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0)) 00471 return true; 00472 00473 // See if we can come up with a constant which can be materialized with 00474 // ORR-immediate by replicating element 1 into element 3. 00475 OrrImm = replicateChunk(UImm, 0, 2); 00476 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2)) 00477 return true; 00478 } 00479 } 00480 00481 // Check for identical 16-bit chunks within the constant and if so materialize 00482 // them with a single ORR instruction. The remaining one or two 16-bit chunks 00483 // will be materialized with MOVK instructions. 00484 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII)) 00485 return true; 00486 00487 // Check whether the constant contains a sequence of contiguous ones, which 00488 // might be interrupted by one or two chunks. If so, materialize the sequence 00489 // of contiguous ones with an ORR instruction. Materialize the chunks which 00490 // are either interrupting the sequence or outside of the sequence with a 00491 // MOVK instruction. 00492 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII)) 00493 return true; 00494 00495 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or 00496 // more MOVK instructions to insert additional 16-bit portions into the 00497 // lower bits. 00498 bool isNeg = false; 00499 00500 // Use MOVN to materialize the high bits if we have more all one chunks 00501 // than all zero chunks. 00502 if (OneChunks > ZeroChunks) { 00503 isNeg = true; 00504 Imm = ~Imm; 00505 } 00506 00507 unsigned FirstOpc; 00508 if (BitSize == 32) { 00509 Imm &= (1LL << 32) - 1; 00510 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi); 00511 } else { 00512 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi); 00513 } 00514 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN 00515 unsigned LastShift = 0; // LSL amount for last MOVK 00516 if (Imm != 0) { 00517 unsigned LZ = countLeadingZeros(Imm); 00518 unsigned TZ = countTrailingZeros(Imm); 00519 Shift = ((63 - LZ) / 16) * 16; 00520 LastShift = (TZ / 16) * 16; 00521 } 00522 unsigned Imm16 = (Imm >> Shift) & Mask; 00523 unsigned DstReg = MI.getOperand(0).getReg(); 00524 bool DstIsDead = MI.getOperand(0).isDead(); 00525 MachineInstrBuilder MIB1 = 00526 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc)) 00527 .addReg(DstReg, RegState::Define | 00528 getDeadRegState(DstIsDead && Shift == LastShift)) 00529 .addImm(Imm16) 00530 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); 00531 00532 // If a MOVN was used for the high bits of a negative value, flip the rest 00533 // of the bits back for use with MOVK. 00534 if (isNeg) 00535 Imm = ~Imm; 00536 00537 if (Shift == LastShift) { 00538 transferImpOps(MI, MIB1, MIB1); 00539 MI.eraseFromParent(); 00540 return true; 00541 } 00542 00543 MachineInstrBuilder MIB2; 00544 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi); 00545 while (Shift != LastShift) { 00546 Shift -= 16; 00547 Imm16 = (Imm >> Shift) & Mask; 00548 if (Imm16 == (isNeg ? Mask : 0)) 00549 continue; // This 16-bit portion is already set correctly. 00550 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 00551 .addReg(DstReg, 00552 RegState::Define | 00553 getDeadRegState(DstIsDead && Shift == LastShift)) 00554 .addReg(DstReg) 00555 .addImm(Imm16) 00556 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); 00557 } 00558 00559 transferImpOps(MI, MIB1, MIB2); 00560 MI.eraseFromParent(); 00561 return true; 00562 } 00563 00564 /// \brief If MBBI references a pseudo instruction that should be expanded here, 00565 /// do the expansion and return true. Otherwise return false. 00566 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 00567 MachineBasicBlock::iterator MBBI) { 00568 MachineInstr &MI = *MBBI; 00569 unsigned Opcode = MI.getOpcode(); 00570 switch (Opcode) { 00571 default: 00572 break; 00573 00574 case AArch64::ADDWrr: 00575 case AArch64::SUBWrr: 00576 case AArch64::ADDXrr: 00577 case AArch64::SUBXrr: 00578 case AArch64::ADDSWrr: 00579 case AArch64::SUBSWrr: 00580 case AArch64::ADDSXrr: 00581 case AArch64::SUBSXrr: 00582 case AArch64::ANDWrr: 00583 case AArch64::ANDXrr: 00584 case AArch64::BICWrr: 00585 case AArch64::BICXrr: 00586 case AArch64::ANDSWrr: 00587 case AArch64::ANDSXrr: 00588 case AArch64::BICSWrr: 00589 case AArch64::BICSXrr: 00590 case AArch64::EONWrr: 00591 case AArch64::EONXrr: 00592 case AArch64::EORWrr: 00593 case AArch64::EORXrr: 00594 case AArch64::ORNWrr: 00595 case AArch64::ORNXrr: 00596 case AArch64::ORRWrr: 00597 case AArch64::ORRXrr: { 00598 unsigned Opcode; 00599 switch (MI.getOpcode()) { 00600 default: 00601 return false; 00602 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 00603 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 00604 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 00605 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 00606 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 00607 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 00608 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 00609 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 00610 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 00611 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 00612 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 00613 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 00614 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 00615 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 00616 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 00617 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 00618 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 00619 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 00620 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 00621 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 00622 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 00623 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 00624 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 00625 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 00626 } 00627 MachineInstrBuilder MIB1 = 00628 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), 00629 MI.getOperand(0).getReg()) 00630 .addOperand(MI.getOperand(1)) 00631 .addOperand(MI.getOperand(2)) 00632 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 00633 transferImpOps(MI, MIB1, MIB1); 00634 MI.eraseFromParent(); 00635 return true; 00636 } 00637 00638 case AArch64::LOADgot: { 00639 // Expand into ADRP + LDR. 00640 unsigned DstReg = MI.getOperand(0).getReg(); 00641 const MachineOperand &MO1 = MI.getOperand(1); 00642 unsigned Flags = MO1.getTargetFlags(); 00643 MachineInstrBuilder MIB1 = 00644 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 00645 MachineInstrBuilder MIB2 = 00646 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) 00647 .addOperand(MI.getOperand(0)) 00648 .addReg(DstReg); 00649 00650 if (MO1.isGlobal()) { 00651 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 00652 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 00653 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 00654 } else if (MO1.isSymbol()) { 00655 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 00656 MIB2.addExternalSymbol(MO1.getSymbolName(), 00657 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 00658 } else { 00659 assert(MO1.isCPI() && 00660 "Only expect globals, externalsymbols, or constant pools"); 00661 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 00662 Flags | AArch64II::MO_PAGE); 00663 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 00664 Flags | AArch64II::MO_PAGEOFF | 00665 AArch64II::MO_NC); 00666 } 00667 00668 transferImpOps(MI, MIB1, MIB2); 00669 MI.eraseFromParent(); 00670 return true; 00671 } 00672 00673 case AArch64::MOVaddr: 00674 case AArch64::MOVaddrJT: 00675 case AArch64::MOVaddrCP: 00676 case AArch64::MOVaddrBA: 00677 case AArch64::MOVaddrTLS: 00678 case AArch64::MOVaddrEXT: { 00679 // Expand into ADRP + ADD. 00680 unsigned DstReg = MI.getOperand(0).getReg(); 00681 MachineInstrBuilder MIB1 = 00682 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 00683 .addOperand(MI.getOperand(1)); 00684 00685 MachineInstrBuilder MIB2 = 00686 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 00687 .addOperand(MI.getOperand(0)) 00688 .addReg(DstReg) 00689 .addOperand(MI.getOperand(2)) 00690 .addImm(0); 00691 00692 transferImpOps(MI, MIB1, MIB2); 00693 MI.eraseFromParent(); 00694 return true; 00695 } 00696 00697 case AArch64::MOVi32imm: 00698 return expandMOVImm(MBB, MBBI, 32); 00699 case AArch64::MOVi64imm: 00700 return expandMOVImm(MBB, MBBI, 64); 00701 case AArch64::RET_ReallyLR: 00702 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 00703 .addReg(AArch64::LR); 00704 MI.eraseFromParent(); 00705 return true; 00706 } 00707 return false; 00708 } 00709 00710 /// \brief Iterate over the instructions in basic block MBB and expand any 00711 /// pseudo instructions. Return true if anything was modified. 00712 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 00713 bool Modified = false; 00714 00715 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 00716 while (MBBI != E) { 00717 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 00718 Modified |= expandMI(MBB, MBBI); 00719 MBBI = NMBBI; 00720 } 00721 00722 return Modified; 00723 } 00724 00725 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 00726 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 00727 00728 bool Modified = false; 00729 for (auto &MBB : MF) 00730 Modified |= expandMBB(MBB); 00731 return Modified; 00732 } 00733 00734 /// \brief Returns an instance of the pseudo instruction expansion pass. 00735 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 00736 return new AArch64ExpandPseudo(); 00737 }