LLVM API Documentation
00001 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the pass which converts floating point instructions from 00011 // pseudo registers into register stack instructions. This pass uses live 00012 // variable information to indicate where the FPn registers are used and their 00013 // lifetimes. 00014 // 00015 // The x87 hardware tracks liveness of the stack registers, so it is necessary 00016 // to implement exact liveness tracking between basic blocks. The CFG edges are 00017 // partitioned into bundles where the same FP registers must be live in 00018 // identical stack positions. Instructions are inserted at the end of each basic 00019 // block to rearrange the live registers to match the outgoing bundle. 00020 // 00021 // This approach avoids splitting critical edges at the potential cost of more 00022 // live register shuffling instructions when critical edges are present. 00023 // 00024 //===----------------------------------------------------------------------===// 00025 00026 #include "X86.h" 00027 #include "X86InstrInfo.h" 00028 #include "llvm/ADT/DepthFirstIterator.h" 00029 #include "llvm/ADT/STLExtras.h" 00030 #include "llvm/ADT/SmallPtrSet.h" 00031 #include "llvm/ADT/SmallSet.h" 00032 #include "llvm/ADT/SmallVector.h" 00033 #include "llvm/ADT/Statistic.h" 00034 #include "llvm/CodeGen/EdgeBundles.h" 00035 #include "llvm/CodeGen/MachineFunctionPass.h" 00036 #include "llvm/CodeGen/MachineInstrBuilder.h" 00037 #include "llvm/CodeGen/MachineRegisterInfo.h" 00038 #include "llvm/CodeGen/LivePhysRegs.h" 00039 #include "llvm/CodeGen/Passes.h" 00040 #include "llvm/IR/InlineAsm.h" 00041 #include "llvm/Support/Debug.h" 00042 #include "llvm/Support/ErrorHandling.h" 00043 #include "llvm/Support/raw_ostream.h" 00044 #include "llvm/Target/TargetInstrInfo.h" 00045 #include "llvm/Target/TargetMachine.h" 00046 #include "llvm/Target/TargetSubtargetInfo.h" 00047 #include <algorithm> 00048 #include <bitset> 00049 using namespace llvm; 00050 00051 #define DEBUG_TYPE "x86-codegen" 00052 00053 STATISTIC(NumFXCH, "Number of fxch instructions inserted"); 00054 STATISTIC(NumFP , "Number of floating point instructions"); 00055 00056 namespace { 00057 const unsigned ScratchFPReg = 7; 00058 00059 struct FPS : public MachineFunctionPass { 00060 static char ID; 00061 FPS() : MachineFunctionPass(ID) { 00062 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 00063 // This is really only to keep valgrind quiet. 00064 // The logic in isLive() is too much for it. 00065 memset(Stack, 0, sizeof(Stack)); 00066 memset(RegMap, 0, sizeof(RegMap)); 00067 } 00068 00069 void getAnalysisUsage(AnalysisUsage &AU) const override { 00070 AU.setPreservesCFG(); 00071 AU.addRequired<EdgeBundles>(); 00072 AU.addPreservedID(MachineLoopInfoID); 00073 AU.addPreservedID(MachineDominatorsID); 00074 MachineFunctionPass::getAnalysisUsage(AU); 00075 } 00076 00077 bool runOnMachineFunction(MachineFunction &MF) override; 00078 00079 const char *getPassName() const override { return "X86 FP Stackifier"; } 00080 00081 private: 00082 const TargetInstrInfo *TII; // Machine instruction info. 00083 00084 // Two CFG edges are related if they leave the same block, or enter the same 00085 // block. The transitive closure of an edge under this relation is a 00086 // LiveBundle. It represents a set of CFG edges where the live FP stack 00087 // registers must be allocated identically in the x87 stack. 00088 // 00089 // A LiveBundle is usually all the edges leaving a block, or all the edges 00090 // entering a block, but it can contain more edges if critical edges are 00091 // present. 00092 // 00093 // The set of live FP registers in a LiveBundle is calculated by bundleCFG, 00094 // but the exact mapping of FP registers to stack slots is fixed later. 00095 struct LiveBundle { 00096 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. 00097 unsigned Mask; 00098 00099 // Number of pre-assigned live registers in FixStack. This is 0 when the 00100 // stack order has not yet been fixed. 00101 unsigned FixCount; 00102 00103 // Assigned stack order for live-in registers. 00104 // FixStack[i] == getStackEntry(i) for all i < FixCount. 00105 unsigned char FixStack[8]; 00106 00107 LiveBundle() : Mask(0), FixCount(0) {} 00108 00109 // Have the live registers been assigned a stack order yet? 00110 bool isFixed() const { return !Mask || FixCount; } 00111 }; 00112 00113 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges 00114 // with no live FP registers. 00115 SmallVector<LiveBundle, 8> LiveBundles; 00116 00117 // The edge bundle analysis provides indices into the LiveBundles vector. 00118 EdgeBundles *Bundles; 00119 00120 // Return a bitmask of FP registers in block's live-in list. 00121 static unsigned calcLiveInMask(MachineBasicBlock *MBB) { 00122 unsigned Mask = 0; 00123 for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), 00124 E = MBB->livein_end(); I != E; ++I) { 00125 unsigned Reg = *I; 00126 if (Reg < X86::FP0 || Reg > X86::FP6) 00127 continue; 00128 Mask |= 1 << (Reg - X86::FP0); 00129 } 00130 return Mask; 00131 } 00132 00133 // Partition all the CFG edges into LiveBundles. 00134 void bundleCFG(MachineFunction &MF); 00135 00136 MachineBasicBlock *MBB; // Current basic block 00137 00138 // The hardware keeps track of how many FP registers are live, so we have 00139 // to model that exactly. Usually, each live register corresponds to an 00140 // FP<n> register, but when dealing with calls, returns, and inline 00141 // assembly, it is sometimes necessary to have live scratch registers. 00142 unsigned Stack[8]; // FP<n> Registers in each stack slot... 00143 unsigned StackTop; // The current top of the FP stack. 00144 00145 enum { 00146 NumFPRegs = 8 // Including scratch pseudo-registers. 00147 }; 00148 00149 // For each live FP<n> register, point to its Stack[] entry. 00150 // The first entries correspond to FP0-FP6, the rest are scratch registers 00151 // used when we need slightly different live registers than what the 00152 // register allocator thinks. 00153 unsigned RegMap[NumFPRegs]; 00154 00155 // Set up our stack model to match the incoming registers to MBB. 00156 void setupBlockStack(); 00157 00158 // Shuffle live registers to match the expectations of successor blocks. 00159 void finishBlockStack(); 00160 00161 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 00162 void dumpStack() const { 00163 dbgs() << "Stack contents:"; 00164 for (unsigned i = 0; i != StackTop; ++i) { 00165 dbgs() << " FP" << Stack[i]; 00166 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 00167 } 00168 } 00169 #endif 00170 00171 /// getSlot - Return the stack slot number a particular register number is 00172 /// in. 00173 unsigned getSlot(unsigned RegNo) const { 00174 assert(RegNo < NumFPRegs && "Regno out of range!"); 00175 return RegMap[RegNo]; 00176 } 00177 00178 /// isLive - Is RegNo currently live in the stack? 00179 bool isLive(unsigned RegNo) const { 00180 unsigned Slot = getSlot(RegNo); 00181 return Slot < StackTop && Stack[Slot] == RegNo; 00182 } 00183 00184 /// getStackEntry - Return the X86::FP<n> register in register ST(i). 00185 unsigned getStackEntry(unsigned STi) const { 00186 if (STi >= StackTop) 00187 report_fatal_error("Access past stack top!"); 00188 return Stack[StackTop-1-STi]; 00189 } 00190 00191 /// getSTReg - Return the X86::ST(i) register which contains the specified 00192 /// FP<RegNo> register. 00193 unsigned getSTReg(unsigned RegNo) const { 00194 return StackTop - 1 - getSlot(RegNo) + X86::ST0; 00195 } 00196 00197 // pushReg - Push the specified FP<n> register onto the stack. 00198 void pushReg(unsigned Reg) { 00199 assert(Reg < NumFPRegs && "Register number out of range!"); 00200 if (StackTop >= 8) 00201 report_fatal_error("Stack overflow!"); 00202 Stack[StackTop] = Reg; 00203 RegMap[Reg] = StackTop++; 00204 } 00205 00206 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 00207 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { 00208 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 00209 if (isAtTop(RegNo)) return; 00210 00211 unsigned STReg = getSTReg(RegNo); 00212 unsigned RegOnTop = getStackEntry(0); 00213 00214 // Swap the slots the regs are in. 00215 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 00216 00217 // Swap stack slot contents. 00218 if (RegMap[RegOnTop] >= StackTop) 00219 report_fatal_error("Access past stack top!"); 00220 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 00221 00222 // Emit an fxch to update the runtime processors version of the state. 00223 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); 00224 ++NumFXCH; 00225 } 00226 00227 void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { 00228 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 00229 unsigned STReg = getSTReg(RegNo); 00230 pushReg(AsReg); // New register on top of stack 00231 00232 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); 00233 } 00234 00235 /// popStackAfter - Pop the current value off of the top of the FP stack 00236 /// after the specified instruction. 00237 void popStackAfter(MachineBasicBlock::iterator &I); 00238 00239 /// freeStackSlotAfter - Free the specified register from the register 00240 /// stack, so that it is no longer in a register. If the register is 00241 /// currently at the top of the stack, we just pop the current instruction, 00242 /// otherwise we store the current top-of-stack into the specified slot, 00243 /// then pop the top of stack. 00244 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 00245 00246 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted 00247 /// instruction. 00248 MachineBasicBlock::iterator 00249 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); 00250 00251 /// Adjust the live registers to be the set in Mask. 00252 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); 00253 00254 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is 00255 /// st(0), FP reg FixStack[1] is st(1) etc. 00256 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, 00257 MachineBasicBlock::iterator I); 00258 00259 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 00260 00261 void handleCall(MachineBasicBlock::iterator &I); 00262 void handleZeroArgFP(MachineBasicBlock::iterator &I); 00263 void handleOneArgFP(MachineBasicBlock::iterator &I); 00264 void handleOneArgFPRW(MachineBasicBlock::iterator &I); 00265 void handleTwoArgFP(MachineBasicBlock::iterator &I); 00266 void handleCompareFP(MachineBasicBlock::iterator &I); 00267 void handleCondMovFP(MachineBasicBlock::iterator &I); 00268 void handleSpecialFP(MachineBasicBlock::iterator &I); 00269 00270 // Check if a COPY instruction is using FP registers. 00271 static bool isFPCopy(MachineInstr *MI) { 00272 unsigned DstReg = MI->getOperand(0).getReg(); 00273 unsigned SrcReg = MI->getOperand(1).getReg(); 00274 00275 return X86::RFP80RegClass.contains(DstReg) || 00276 X86::RFP80RegClass.contains(SrcReg); 00277 } 00278 00279 void setKillFlags(MachineBasicBlock &MBB) const; 00280 }; 00281 char FPS::ID = 0; 00282 } 00283 00284 FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 00285 00286 /// getFPReg - Return the X86::FPx register number for the specified operand. 00287 /// For example, this returns 3 for X86::FP3. 00288 static unsigned getFPReg(const MachineOperand &MO) { 00289 assert(MO.isReg() && "Expected an FP register!"); 00290 unsigned Reg = MO.getReg(); 00291 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 00292 return Reg - X86::FP0; 00293 } 00294 00295 /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 00296 /// register references into FP stack references. 00297 /// 00298 bool FPS::runOnMachineFunction(MachineFunction &MF) { 00299 // We only need to run this pass if there are any FP registers used in this 00300 // function. If it is all integer, there is nothing for us to do! 00301 bool FPIsUsed = false; 00302 00303 assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); 00304 for (unsigned i = 0; i <= 6; ++i) 00305 if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { 00306 FPIsUsed = true; 00307 break; 00308 } 00309 00310 // Early exit. 00311 if (!FPIsUsed) return false; 00312 00313 Bundles = &getAnalysis<EdgeBundles>(); 00314 TII = MF.getSubtarget().getInstrInfo(); 00315 00316 // Prepare cross-MBB liveness. 00317 bundleCFG(MF); 00318 00319 StackTop = 0; 00320 00321 // Process the function in depth first order so that we process at least one 00322 // of the predecessors for every reachable block in the function. 00323 SmallPtrSet<MachineBasicBlock*, 8> Processed; 00324 MachineBasicBlock *Entry = MF.begin(); 00325 00326 bool Changed = false; 00327 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed)) 00328 Changed |= processBasicBlock(MF, *BB); 00329 00330 // Process any unreachable blocks in arbitrary order now. 00331 if (MF.size() != Processed.size()) 00332 for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) 00333 if (Processed.insert(BB)) 00334 Changed |= processBasicBlock(MF, *BB); 00335 00336 LiveBundles.clear(); 00337 00338 return Changed; 00339 } 00340 00341 /// bundleCFG - Scan all the basic blocks to determine consistent live-in and 00342 /// live-out sets for the FP registers. Consistent means that the set of 00343 /// registers live-out from a block is identical to the live-in set of all 00344 /// successors. This is not enforced by the normal live-in lists since 00345 /// registers may be implicitly defined, or not used by all successors. 00346 void FPS::bundleCFG(MachineFunction &MF) { 00347 assert(LiveBundles.empty() && "Stale data in LiveBundles"); 00348 LiveBundles.resize(Bundles->getNumBundles()); 00349 00350 // Gather the actual live-in masks for all MBBs. 00351 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 00352 MachineBasicBlock *MBB = I; 00353 const unsigned Mask = calcLiveInMask(MBB); 00354 if (!Mask) 00355 continue; 00356 // Update MBB ingoing bundle mask. 00357 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask; 00358 } 00359 } 00360 00361 /// processBasicBlock - Loop over all of the instructions in the basic block, 00362 /// transforming FP instructions into their stack form. 00363 /// 00364 bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 00365 bool Changed = false; 00366 MBB = &BB; 00367 00368 setKillFlags(BB); 00369 setupBlockStack(); 00370 00371 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 00372 MachineInstr *MI = I; 00373 uint64_t Flags = MI->getDesc().TSFlags; 00374 00375 unsigned FPInstClass = Flags & X86II::FPTypeMask; 00376 if (MI->isInlineAsm()) 00377 FPInstClass = X86II::SpecialFP; 00378 00379 if (MI->isCopy() && isFPCopy(MI)) 00380 FPInstClass = X86II::SpecialFP; 00381 00382 if (MI->isImplicitDef() && 00383 X86::RFP80RegClass.contains(MI->getOperand(0).getReg())) 00384 FPInstClass = X86II::SpecialFP; 00385 00386 if (MI->isCall()) 00387 FPInstClass = X86II::SpecialFP; 00388 00389 if (FPInstClass == X86II::NotFP) 00390 continue; // Efficiently ignore non-fp insts! 00391 00392 MachineInstr *PrevMI = nullptr; 00393 if (I != BB.begin()) 00394 PrevMI = std::prev(I); 00395 00396 ++NumFP; // Keep track of # of pseudo instrs 00397 DEBUG(dbgs() << "\nFPInst:\t" << *MI); 00398 00399 // Get dead variables list now because the MI pointer may be deleted as part 00400 // of processing! 00401 SmallVector<unsigned, 8> DeadRegs; 00402 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00403 const MachineOperand &MO = MI->getOperand(i); 00404 if (MO.isReg() && MO.isDead()) 00405 DeadRegs.push_back(MO.getReg()); 00406 } 00407 00408 switch (FPInstClass) { 00409 case X86II::ZeroArgFP: handleZeroArgFP(I); break; 00410 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 00411 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 00412 case X86II::TwoArgFP: handleTwoArgFP(I); break; 00413 case X86II::CompareFP: handleCompareFP(I); break; 00414 case X86II::CondMovFP: handleCondMovFP(I); break; 00415 case X86II::SpecialFP: handleSpecialFP(I); break; 00416 default: llvm_unreachable("Unknown FP Type!"); 00417 } 00418 00419 // Check to see if any of the values defined by this instruction are dead 00420 // after definition. If so, pop them. 00421 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { 00422 unsigned Reg = DeadRegs[i]; 00423 // Check if Reg is live on the stack. An inline-asm register operand that 00424 // is in the clobber list and marked dead might not be live on the stack. 00425 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) { 00426 DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); 00427 freeStackSlotAfter(I, Reg-X86::FP0); 00428 } 00429 } 00430 00431 // Print out all of the instructions expanded to if -debug 00432 DEBUG( 00433 MachineBasicBlock::iterator PrevI(PrevMI); 00434 if (I == PrevI) { 00435 dbgs() << "Just deleted pseudo instruction\n"; 00436 } else { 00437 MachineBasicBlock::iterator Start = I; 00438 // Rewind to first instruction newly inserted. 00439 while (Start != BB.begin() && std::prev(Start) != PrevI) --Start; 00440 dbgs() << "Inserted instructions:\n\t"; 00441 Start->print(dbgs(), &MF.getTarget()); 00442 while (++Start != std::next(I)) {} 00443 } 00444 dumpStack(); 00445 ); 00446 (void)PrevMI; 00447 00448 Changed = true; 00449 } 00450 00451 finishBlockStack(); 00452 00453 return Changed; 00454 } 00455 00456 /// setupBlockStack - Use the live bundles to set up our model of the stack 00457 /// to match predecessors' live out stack. 00458 void FPS::setupBlockStack() { 00459 DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() 00460 << " derived from " << MBB->getName() << ".\n"); 00461 StackTop = 0; 00462 // Get the live-in bundle for MBB. 00463 const LiveBundle &Bundle = 00464 LiveBundles[Bundles->getBundle(MBB->getNumber(), false)]; 00465 00466 if (!Bundle.Mask) { 00467 DEBUG(dbgs() << "Block has no FP live-ins.\n"); 00468 return; 00469 } 00470 00471 // Depth-first iteration should ensure that we always have an assigned stack. 00472 assert(Bundle.isFixed() && "Reached block before any predecessors"); 00473 00474 // Push the fixed live-in registers. 00475 for (unsigned i = Bundle.FixCount; i > 0; --i) { 00476 MBB->addLiveIn(X86::ST0+i-1); 00477 DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" 00478 << unsigned(Bundle.FixStack[i-1]) << '\n'); 00479 pushReg(Bundle.FixStack[i-1]); 00480 } 00481 00482 // Kill off unwanted live-ins. This can happen with a critical edge. 00483 // FIXME: We could keep these live registers around as zombies. They may need 00484 // to be revived at the end of a short block. It might save a few instrs. 00485 adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); 00486 DEBUG(MBB->dump()); 00487 } 00488 00489 /// finishBlockStack - Revive live-outs that are implicitly defined out of 00490 /// MBB. Shuffle live registers to match the expected fixed stack of any 00491 /// predecessors, and ensure that all predecessors are expecting the same 00492 /// stack. 00493 void FPS::finishBlockStack() { 00494 // The RET handling below takes care of return blocks for us. 00495 if (MBB->succ_empty()) 00496 return; 00497 00498 DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() 00499 << " derived from " << MBB->getName() << ".\n"); 00500 00501 // Get MBB's live-out bundle. 00502 unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true); 00503 LiveBundle &Bundle = LiveBundles[BundleIdx]; 00504 00505 // We may need to kill and define some registers to match successors. 00506 // FIXME: This can probably be combined with the shuffle below. 00507 MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); 00508 adjustLiveRegs(Bundle.Mask, Term); 00509 00510 if (!Bundle.Mask) { 00511 DEBUG(dbgs() << "No live-outs.\n"); 00512 return; 00513 } 00514 00515 // Has the stack order been fixed yet? 00516 DEBUG(dbgs() << "LB#" << BundleIdx << ": "); 00517 if (Bundle.isFixed()) { 00518 DEBUG(dbgs() << "Shuffling stack to match.\n"); 00519 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); 00520 } else { 00521 // Not fixed yet, we get to choose. 00522 DEBUG(dbgs() << "Fixing stack order now.\n"); 00523 Bundle.FixCount = StackTop; 00524 for (unsigned i = 0; i < StackTop; ++i) 00525 Bundle.FixStack[i] = getStackEntry(i); 00526 } 00527 } 00528 00529 00530 //===----------------------------------------------------------------------===// 00531 // Efficient Lookup Table Support 00532 //===----------------------------------------------------------------------===// 00533 00534 namespace { 00535 struct TableEntry { 00536 uint16_t from; 00537 uint16_t to; 00538 bool operator<(const TableEntry &TE) const { return from < TE.from; } 00539 friend bool operator<(const TableEntry &TE, unsigned V) { 00540 return TE.from < V; 00541 } 00542 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V, 00543 const TableEntry &TE) { 00544 return V < TE.from; 00545 } 00546 }; 00547 } 00548 00549 #ifndef NDEBUG 00550 static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { 00551 for (unsigned i = 0; i != NumEntries-1; ++i) 00552 if (!(Table[i] < Table[i+1])) return false; 00553 return true; 00554 } 00555 #endif 00556 00557 static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { 00558 const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); 00559 if (I != Table+N && I->from == Opcode) 00560 return I->to; 00561 return -1; 00562 } 00563 00564 #ifdef NDEBUG 00565 #define ASSERT_SORTED(TABLE) 00566 #else 00567 #define ASSERT_SORTED(TABLE) \ 00568 { static bool TABLE##Checked = false; \ 00569 if (!TABLE##Checked) { \ 00570 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ 00571 "All lookup tables must be sorted for efficient access!"); \ 00572 TABLE##Checked = true; \ 00573 } \ 00574 } 00575 #endif 00576 00577 //===----------------------------------------------------------------------===// 00578 // Register File -> Register Stack Mapping Methods 00579 //===----------------------------------------------------------------------===// 00580 00581 // OpcodeTable - Sorted map of register instructions to their stack version. 00582 // The first element is an register file pseudo instruction, the second is the 00583 // concrete X86 instruction which uses the register stack. 00584 // 00585 static const TableEntry OpcodeTable[] = { 00586 { X86::ABS_Fp32 , X86::ABS_F }, 00587 { X86::ABS_Fp64 , X86::ABS_F }, 00588 { X86::ABS_Fp80 , X86::ABS_F }, 00589 { X86::ADD_Fp32m , X86::ADD_F32m }, 00590 { X86::ADD_Fp64m , X86::ADD_F64m }, 00591 { X86::ADD_Fp64m32 , X86::ADD_F32m }, 00592 { X86::ADD_Fp80m32 , X86::ADD_F32m }, 00593 { X86::ADD_Fp80m64 , X86::ADD_F64m }, 00594 { X86::ADD_FpI16m32 , X86::ADD_FI16m }, 00595 { X86::ADD_FpI16m64 , X86::ADD_FI16m }, 00596 { X86::ADD_FpI16m80 , X86::ADD_FI16m }, 00597 { X86::ADD_FpI32m32 , X86::ADD_FI32m }, 00598 { X86::ADD_FpI32m64 , X86::ADD_FI32m }, 00599 { X86::ADD_FpI32m80 , X86::ADD_FI32m }, 00600 { X86::CHS_Fp32 , X86::CHS_F }, 00601 { X86::CHS_Fp64 , X86::CHS_F }, 00602 { X86::CHS_Fp80 , X86::CHS_F }, 00603 { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, 00604 { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, 00605 { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, 00606 { X86::CMOVB_Fp32 , X86::CMOVB_F }, 00607 { X86::CMOVB_Fp64 , X86::CMOVB_F }, 00608 { X86::CMOVB_Fp80 , X86::CMOVB_F }, 00609 { X86::CMOVE_Fp32 , X86::CMOVE_F }, 00610 { X86::CMOVE_Fp64 , X86::CMOVE_F }, 00611 { X86::CMOVE_Fp80 , X86::CMOVE_F }, 00612 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, 00613 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, 00614 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, 00615 { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, 00616 { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, 00617 { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, 00618 { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, 00619 { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, 00620 { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, 00621 { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, 00622 { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, 00623 { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, 00624 { X86::CMOVP_Fp32 , X86::CMOVP_F }, 00625 { X86::CMOVP_Fp64 , X86::CMOVP_F }, 00626 { X86::CMOVP_Fp80 , X86::CMOVP_F }, 00627 { X86::COS_Fp32 , X86::COS_F }, 00628 { X86::COS_Fp64 , X86::COS_F }, 00629 { X86::COS_Fp80 , X86::COS_F }, 00630 { X86::DIVR_Fp32m , X86::DIVR_F32m }, 00631 { X86::DIVR_Fp64m , X86::DIVR_F64m }, 00632 { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, 00633 { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, 00634 { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, 00635 { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, 00636 { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, 00637 { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, 00638 { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, 00639 { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, 00640 { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, 00641 { X86::DIV_Fp32m , X86::DIV_F32m }, 00642 { X86::DIV_Fp64m , X86::DIV_F64m }, 00643 { X86::DIV_Fp64m32 , X86::DIV_F32m }, 00644 { X86::DIV_Fp80m32 , X86::DIV_F32m }, 00645 { X86::DIV_Fp80m64 , X86::DIV_F64m }, 00646 { X86::DIV_FpI16m32 , X86::DIV_FI16m }, 00647 { X86::DIV_FpI16m64 , X86::DIV_FI16m }, 00648 { X86::DIV_FpI16m80 , X86::DIV_FI16m }, 00649 { X86::DIV_FpI32m32 , X86::DIV_FI32m }, 00650 { X86::DIV_FpI32m64 , X86::DIV_FI32m }, 00651 { X86::DIV_FpI32m80 , X86::DIV_FI32m }, 00652 { X86::ILD_Fp16m32 , X86::ILD_F16m }, 00653 { X86::ILD_Fp16m64 , X86::ILD_F16m }, 00654 { X86::ILD_Fp16m80 , X86::ILD_F16m }, 00655 { X86::ILD_Fp32m32 , X86::ILD_F32m }, 00656 { X86::ILD_Fp32m64 , X86::ILD_F32m }, 00657 { X86::ILD_Fp32m80 , X86::ILD_F32m }, 00658 { X86::ILD_Fp64m32 , X86::ILD_F64m }, 00659 { X86::ILD_Fp64m64 , X86::ILD_F64m }, 00660 { X86::ILD_Fp64m80 , X86::ILD_F64m }, 00661 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, 00662 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, 00663 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, 00664 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, 00665 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, 00666 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, 00667 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, 00668 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, 00669 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, 00670 { X86::IST_Fp16m32 , X86::IST_F16m }, 00671 { X86::IST_Fp16m64 , X86::IST_F16m }, 00672 { X86::IST_Fp16m80 , X86::IST_F16m }, 00673 { X86::IST_Fp32m32 , X86::IST_F32m }, 00674 { X86::IST_Fp32m64 , X86::IST_F32m }, 00675 { X86::IST_Fp32m80 , X86::IST_F32m }, 00676 { X86::IST_Fp64m32 , X86::IST_FP64m }, 00677 { X86::IST_Fp64m64 , X86::IST_FP64m }, 00678 { X86::IST_Fp64m80 , X86::IST_FP64m }, 00679 { X86::LD_Fp032 , X86::LD_F0 }, 00680 { X86::LD_Fp064 , X86::LD_F0 }, 00681 { X86::LD_Fp080 , X86::LD_F0 }, 00682 { X86::LD_Fp132 , X86::LD_F1 }, 00683 { X86::LD_Fp164 , X86::LD_F1 }, 00684 { X86::LD_Fp180 , X86::LD_F1 }, 00685 { X86::LD_Fp32m , X86::LD_F32m }, 00686 { X86::LD_Fp32m64 , X86::LD_F32m }, 00687 { X86::LD_Fp32m80 , X86::LD_F32m }, 00688 { X86::LD_Fp64m , X86::LD_F64m }, 00689 { X86::LD_Fp64m80 , X86::LD_F64m }, 00690 { X86::LD_Fp80m , X86::LD_F80m }, 00691 { X86::MUL_Fp32m , X86::MUL_F32m }, 00692 { X86::MUL_Fp64m , X86::MUL_F64m }, 00693 { X86::MUL_Fp64m32 , X86::MUL_F32m }, 00694 { X86::MUL_Fp80m32 , X86::MUL_F32m }, 00695 { X86::MUL_Fp80m64 , X86::MUL_F64m }, 00696 { X86::MUL_FpI16m32 , X86::MUL_FI16m }, 00697 { X86::MUL_FpI16m64 , X86::MUL_FI16m }, 00698 { X86::MUL_FpI16m80 , X86::MUL_FI16m }, 00699 { X86::MUL_FpI32m32 , X86::MUL_FI32m }, 00700 { X86::MUL_FpI32m64 , X86::MUL_FI32m }, 00701 { X86::MUL_FpI32m80 , X86::MUL_FI32m }, 00702 { X86::SIN_Fp32 , X86::SIN_F }, 00703 { X86::SIN_Fp64 , X86::SIN_F }, 00704 { X86::SIN_Fp80 , X86::SIN_F }, 00705 { X86::SQRT_Fp32 , X86::SQRT_F }, 00706 { X86::SQRT_Fp64 , X86::SQRT_F }, 00707 { X86::SQRT_Fp80 , X86::SQRT_F }, 00708 { X86::ST_Fp32m , X86::ST_F32m }, 00709 { X86::ST_Fp64m , X86::ST_F64m }, 00710 { X86::ST_Fp64m32 , X86::ST_F32m }, 00711 { X86::ST_Fp80m32 , X86::ST_F32m }, 00712 { X86::ST_Fp80m64 , X86::ST_F64m }, 00713 { X86::ST_FpP80m , X86::ST_FP80m }, 00714 { X86::SUBR_Fp32m , X86::SUBR_F32m }, 00715 { X86::SUBR_Fp64m , X86::SUBR_F64m }, 00716 { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, 00717 { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, 00718 { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, 00719 { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, 00720 { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, 00721 { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, 00722 { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, 00723 { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, 00724 { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, 00725 { X86::SUB_Fp32m , X86::SUB_F32m }, 00726 { X86::SUB_Fp64m , X86::SUB_F64m }, 00727 { X86::SUB_Fp64m32 , X86::SUB_F32m }, 00728 { X86::SUB_Fp80m32 , X86::SUB_F32m }, 00729 { X86::SUB_Fp80m64 , X86::SUB_F64m }, 00730 { X86::SUB_FpI16m32 , X86::SUB_FI16m }, 00731 { X86::SUB_FpI16m64 , X86::SUB_FI16m }, 00732 { X86::SUB_FpI16m80 , X86::SUB_FI16m }, 00733 { X86::SUB_FpI32m32 , X86::SUB_FI32m }, 00734 { X86::SUB_FpI32m64 , X86::SUB_FI32m }, 00735 { X86::SUB_FpI32m80 , X86::SUB_FI32m }, 00736 { X86::TST_Fp32 , X86::TST_F }, 00737 { X86::TST_Fp64 , X86::TST_F }, 00738 { X86::TST_Fp80 , X86::TST_F }, 00739 { X86::UCOM_FpIr32 , X86::UCOM_FIr }, 00740 { X86::UCOM_FpIr64 , X86::UCOM_FIr }, 00741 { X86::UCOM_FpIr80 , X86::UCOM_FIr }, 00742 { X86::UCOM_Fpr32 , X86::UCOM_Fr }, 00743 { X86::UCOM_Fpr64 , X86::UCOM_Fr }, 00744 { X86::UCOM_Fpr80 , X86::UCOM_Fr }, 00745 }; 00746 00747 static unsigned getConcreteOpcode(unsigned Opcode) { 00748 ASSERT_SORTED(OpcodeTable); 00749 int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); 00750 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 00751 return Opc; 00752 } 00753 00754 //===----------------------------------------------------------------------===// 00755 // Helper Methods 00756 //===----------------------------------------------------------------------===// 00757 00758 // PopTable - Sorted map of instructions to their popping version. The first 00759 // element is an instruction, the second is the version which pops. 00760 // 00761 static const TableEntry PopTable[] = { 00762 { X86::ADD_FrST0 , X86::ADD_FPrST0 }, 00763 00764 { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, 00765 { X86::DIV_FrST0 , X86::DIV_FPrST0 }, 00766 00767 { X86::IST_F16m , X86::IST_FP16m }, 00768 { X86::IST_F32m , X86::IST_FP32m }, 00769 00770 { X86::MUL_FrST0 , X86::MUL_FPrST0 }, 00771 00772 { X86::ST_F32m , X86::ST_FP32m }, 00773 { X86::ST_F64m , X86::ST_FP64m }, 00774 { X86::ST_Frr , X86::ST_FPrr }, 00775 00776 { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, 00777 { X86::SUB_FrST0 , X86::SUB_FPrST0 }, 00778 00779 { X86::UCOM_FIr , X86::UCOM_FIPr }, 00780 00781 { X86::UCOM_FPr , X86::UCOM_FPPr }, 00782 { X86::UCOM_Fr , X86::UCOM_FPr }, 00783 }; 00784 00785 /// popStackAfter - Pop the current value off of the top of the FP stack after 00786 /// the specified instruction. This attempts to be sneaky and combine the pop 00787 /// into the instruction itself if possible. The iterator is left pointing to 00788 /// the last instruction, be it a new pop instruction inserted, or the old 00789 /// instruction if it was modified in place. 00790 /// 00791 void FPS::popStackAfter(MachineBasicBlock::iterator &I) { 00792 MachineInstr* MI = I; 00793 DebugLoc dl = MI->getDebugLoc(); 00794 ASSERT_SORTED(PopTable); 00795 if (StackTop == 0) 00796 report_fatal_error("Cannot pop empty stack!"); 00797 RegMap[Stack[--StackTop]] = ~0; // Update state 00798 00799 // Check to see if there is a popping version of this instruction... 00800 int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); 00801 if (Opcode != -1) { 00802 I->setDesc(TII->get(Opcode)); 00803 if (Opcode == X86::UCOM_FPPr) 00804 I->RemoveOperand(0); 00805 } else { // Insert an explicit pop 00806 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); 00807 } 00808 } 00809 00810 /// freeStackSlotAfter - Free the specified register from the register stack, so 00811 /// that it is no longer in a register. If the register is currently at the top 00812 /// of the stack, we just pop the current instruction, otherwise we store the 00813 /// current top-of-stack into the specified slot, then pop the top of stack. 00814 void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 00815 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 00816 popStackAfter(I); 00817 return; 00818 } 00819 00820 // Otherwise, store the top of stack into the dead slot, killing the operand 00821 // without having to add in an explicit xchg then pop. 00822 // 00823 I = freeStackSlotBefore(++I, FPRegNo); 00824 } 00825 00826 /// freeStackSlotBefore - Free the specified register without trying any 00827 /// folding. 00828 MachineBasicBlock::iterator 00829 FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { 00830 unsigned STReg = getSTReg(FPRegNo); 00831 unsigned OldSlot = getSlot(FPRegNo); 00832 unsigned TopReg = Stack[StackTop-1]; 00833 Stack[OldSlot] = TopReg; 00834 RegMap[TopReg] = OldSlot; 00835 RegMap[FPRegNo] = ~0; 00836 Stack[--StackTop] = ~0; 00837 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg); 00838 } 00839 00840 /// adjustLiveRegs - Kill and revive registers such that exactly the FP 00841 /// registers with a bit in Mask are live. 00842 void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { 00843 unsigned Defs = Mask; 00844 unsigned Kills = 0; 00845 for (unsigned i = 0; i < StackTop; ++i) { 00846 unsigned RegNo = Stack[i]; 00847 if (!(Defs & (1 << RegNo))) 00848 // This register is live, but we don't want it. 00849 Kills |= (1 << RegNo); 00850 else 00851 // We don't need to imp-def this live register. 00852 Defs &= ~(1 << RegNo); 00853 } 00854 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); 00855 00856 // Produce implicit-defs for free by using killed registers. 00857 while (Kills && Defs) { 00858 unsigned KReg = countTrailingZeros(Kills); 00859 unsigned DReg = countTrailingZeros(Defs); 00860 DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); 00861 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); 00862 std::swap(RegMap[KReg], RegMap[DReg]); 00863 Kills &= ~(1 << KReg); 00864 Defs &= ~(1 << DReg); 00865 } 00866 00867 // Kill registers by popping. 00868 if (Kills && I != MBB->begin()) { 00869 MachineBasicBlock::iterator I2 = std::prev(I); 00870 while (StackTop) { 00871 unsigned KReg = getStackEntry(0); 00872 if (!(Kills & (1 << KReg))) 00873 break; 00874 DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); 00875 popStackAfter(I2); 00876 Kills &= ~(1 << KReg); 00877 } 00878 } 00879 00880 // Manually kill the rest. 00881 while (Kills) { 00882 unsigned KReg = countTrailingZeros(Kills); 00883 DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); 00884 freeStackSlotBefore(I, KReg); 00885 Kills &= ~(1 << KReg); 00886 } 00887 00888 // Load zeros for all the imp-defs. 00889 while(Defs) { 00890 unsigned DReg = countTrailingZeros(Defs); 00891 DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); 00892 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); 00893 pushReg(DReg); 00894 Defs &= ~(1 << DReg); 00895 } 00896 00897 // Now we should have the correct registers live. 00898 DEBUG(dumpStack()); 00899 assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); 00900 } 00901 00902 /// shuffleStackTop - emit fxch instructions before I to shuffle the top 00903 /// FixCount entries into the order given by FixStack. 00904 /// FIXME: Is there a better algorithm than insertion sort? 00905 void FPS::shuffleStackTop(const unsigned char *FixStack, 00906 unsigned FixCount, 00907 MachineBasicBlock::iterator I) { 00908 // Move items into place, starting from the desired stack bottom. 00909 while (FixCount--) { 00910 // Old register at position FixCount. 00911 unsigned OldReg = getStackEntry(FixCount); 00912 // Desired register at position FixCount. 00913 unsigned Reg = FixStack[FixCount]; 00914 if (Reg == OldReg) 00915 continue; 00916 // (Reg st0) (OldReg st0) = (Reg OldReg st0) 00917 moveToTop(Reg, I); 00918 if (FixCount > 0) 00919 moveToTop(OldReg, I); 00920 } 00921 DEBUG(dumpStack()); 00922 } 00923 00924 00925 //===----------------------------------------------------------------------===// 00926 // Instruction transformation implementation 00927 //===----------------------------------------------------------------------===// 00928 00929 void FPS::handleCall(MachineBasicBlock::iterator &I) { 00930 unsigned STReturns = 0; 00931 00932 for (const auto &MO : I->operands()) { 00933 if (!MO.isReg()) 00934 continue; 00935 00936 unsigned R = MO.getReg() - X86::FP0; 00937 00938 if (R < 8) { 00939 assert(MO.isDef() && MO.isImplicit()); 00940 STReturns |= 1 << R; 00941 } 00942 } 00943 00944 unsigned N = CountTrailingOnes_32(STReturns); 00945 00946 // FP registers used for function return must be consecutive starting at 00947 // FP0. 00948 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2)); 00949 00950 for (unsigned I = 0; I < N; ++I) 00951 pushReg(N - I - 1); 00952 } 00953 00954 /// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 00955 /// 00956 void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 00957 MachineInstr *MI = I; 00958 unsigned DestReg = getFPReg(MI->getOperand(0)); 00959 00960 // Change from the pseudo instruction to the concrete instruction. 00961 MI->RemoveOperand(0); // Remove the explicit ST(0) operand 00962 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 00963 00964 // Result gets pushed on the stack. 00965 pushReg(DestReg); 00966 } 00967 00968 /// handleOneArgFP - fst <mem>, ST(0) 00969 /// 00970 void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 00971 MachineInstr *MI = I; 00972 unsigned NumOps = MI->getDesc().getNumOperands(); 00973 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && 00974 "Can only handle fst* & ftst instructions!"); 00975 00976 // Is this the last use of the source register? 00977 unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); 00978 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 00979 00980 // FISTP64m is strange because there isn't a non-popping versions. 00981 // If we have one _and_ we don't want to pop the operand, duplicate the value 00982 // on the stack instead of moving it. This ensure that popping the value is 00983 // always ok. 00984 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. 00985 // 00986 if (!KillsSrc && 00987 (MI->getOpcode() == X86::IST_Fp64m32 || 00988 MI->getOpcode() == X86::ISTT_Fp16m32 || 00989 MI->getOpcode() == X86::ISTT_Fp32m32 || 00990 MI->getOpcode() == X86::ISTT_Fp64m32 || 00991 MI->getOpcode() == X86::IST_Fp64m64 || 00992 MI->getOpcode() == X86::ISTT_Fp16m64 || 00993 MI->getOpcode() == X86::ISTT_Fp32m64 || 00994 MI->getOpcode() == X86::ISTT_Fp64m64 || 00995 MI->getOpcode() == X86::IST_Fp64m80 || 00996 MI->getOpcode() == X86::ISTT_Fp16m80 || 00997 MI->getOpcode() == X86::ISTT_Fp32m80 || 00998 MI->getOpcode() == X86::ISTT_Fp64m80 || 00999 MI->getOpcode() == X86::ST_FpP80m)) { 01000 duplicateToTop(Reg, ScratchFPReg, I); 01001 } else { 01002 moveToTop(Reg, I); // Move to the top of the stack... 01003 } 01004 01005 // Convert from the pseudo instruction to the concrete instruction. 01006 MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand 01007 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 01008 01009 if (MI->getOpcode() == X86::IST_FP64m || 01010 MI->getOpcode() == X86::ISTT_FP16m || 01011 MI->getOpcode() == X86::ISTT_FP32m || 01012 MI->getOpcode() == X86::ISTT_FP64m || 01013 MI->getOpcode() == X86::ST_FP80m) { 01014 if (StackTop == 0) 01015 report_fatal_error("Stack empty??"); 01016 --StackTop; 01017 } else if (KillsSrc) { // Last use of operand? 01018 popStackAfter(I); 01019 } 01020 } 01021 01022 01023 /// handleOneArgFPRW: Handle instructions that read from the top of stack and 01024 /// replace the value with a newly computed value. These instructions may have 01025 /// non-fp operands after their FP operands. 01026 /// 01027 /// Examples: 01028 /// R1 = fchs R2 01029 /// R1 = fadd R2, [mem] 01030 /// 01031 void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 01032 MachineInstr *MI = I; 01033 #ifndef NDEBUG 01034 unsigned NumOps = MI->getDesc().getNumOperands(); 01035 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 01036 #endif 01037 01038 // Is this the last use of the source register? 01039 unsigned Reg = getFPReg(MI->getOperand(1)); 01040 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 01041 01042 if (KillsSrc) { 01043 // If this is the last use of the source register, just make sure it's on 01044 // the top of the stack. 01045 moveToTop(Reg, I); 01046 if (StackTop == 0) 01047 report_fatal_error("Stack cannot be empty!"); 01048 --StackTop; 01049 pushReg(getFPReg(MI->getOperand(0))); 01050 } else { 01051 // If this is not the last use of the source register, _copy_ it to the top 01052 // of the stack. 01053 duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); 01054 } 01055 01056 // Change from the pseudo instruction to the concrete instruction. 01057 MI->RemoveOperand(1); // Drop the source operand. 01058 MI->RemoveOperand(0); // Drop the destination operand. 01059 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 01060 } 01061 01062 01063 //===----------------------------------------------------------------------===// 01064 // Define tables of various ways to map pseudo instructions 01065 // 01066 01067 // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 01068 static const TableEntry ForwardST0Table[] = { 01069 { X86::ADD_Fp32 , X86::ADD_FST0r }, 01070 { X86::ADD_Fp64 , X86::ADD_FST0r }, 01071 { X86::ADD_Fp80 , X86::ADD_FST0r }, 01072 { X86::DIV_Fp32 , X86::DIV_FST0r }, 01073 { X86::DIV_Fp64 , X86::DIV_FST0r }, 01074 { X86::DIV_Fp80 , X86::DIV_FST0r }, 01075 { X86::MUL_Fp32 , X86::MUL_FST0r }, 01076 { X86::MUL_Fp64 , X86::MUL_FST0r }, 01077 { X86::MUL_Fp80 , X86::MUL_FST0r }, 01078 { X86::SUB_Fp32 , X86::SUB_FST0r }, 01079 { X86::SUB_Fp64 , X86::SUB_FST0r }, 01080 { X86::SUB_Fp80 , X86::SUB_FST0r }, 01081 }; 01082 01083 // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 01084 static const TableEntry ReverseST0Table[] = { 01085 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative 01086 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative 01087 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative 01088 { X86::DIV_Fp32 , X86::DIVR_FST0r }, 01089 { X86::DIV_Fp64 , X86::DIVR_FST0r }, 01090 { X86::DIV_Fp80 , X86::DIVR_FST0r }, 01091 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative 01092 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative 01093 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative 01094 { X86::SUB_Fp32 , X86::SUBR_FST0r }, 01095 { X86::SUB_Fp64 , X86::SUBR_FST0r }, 01096 { X86::SUB_Fp80 , X86::SUBR_FST0r }, 01097 }; 01098 01099 // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 01100 static const TableEntry ForwardSTiTable[] = { 01101 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative 01102 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative 01103 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative 01104 { X86::DIV_Fp32 , X86::DIVR_FrST0 }, 01105 { X86::DIV_Fp64 , X86::DIVR_FrST0 }, 01106 { X86::DIV_Fp80 , X86::DIVR_FrST0 }, 01107 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative 01108 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative 01109 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative 01110 { X86::SUB_Fp32 , X86::SUBR_FrST0 }, 01111 { X86::SUB_Fp64 , X86::SUBR_FrST0 }, 01112 { X86::SUB_Fp80 , X86::SUBR_FrST0 }, 01113 }; 01114 01115 // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 01116 static const TableEntry ReverseSTiTable[] = { 01117 { X86::ADD_Fp32 , X86::ADD_FrST0 }, 01118 { X86::ADD_Fp64 , X86::ADD_FrST0 }, 01119 { X86::ADD_Fp80 , X86::ADD_FrST0 }, 01120 { X86::DIV_Fp32 , X86::DIV_FrST0 }, 01121 { X86::DIV_Fp64 , X86::DIV_FrST0 }, 01122 { X86::DIV_Fp80 , X86::DIV_FrST0 }, 01123 { X86::MUL_Fp32 , X86::MUL_FrST0 }, 01124 { X86::MUL_Fp64 , X86::MUL_FrST0 }, 01125 { X86::MUL_Fp80 , X86::MUL_FrST0 }, 01126 { X86::SUB_Fp32 , X86::SUB_FrST0 }, 01127 { X86::SUB_Fp64 , X86::SUB_FrST0 }, 01128 { X86::SUB_Fp80 , X86::SUB_FrST0 }, 01129 }; 01130 01131 01132 /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 01133 /// instructions which need to be simplified and possibly transformed. 01134 /// 01135 /// Result: ST(0) = fsub ST(0), ST(i) 01136 /// ST(i) = fsub ST(0), ST(i) 01137 /// ST(0) = fsubr ST(0), ST(i) 01138 /// ST(i) = fsubr ST(0), ST(i) 01139 /// 01140 void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 01141 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 01142 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 01143 MachineInstr *MI = I; 01144 01145 unsigned NumOperands = MI->getDesc().getNumOperands(); 01146 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 01147 unsigned Dest = getFPReg(MI->getOperand(0)); 01148 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 01149 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 01150 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 01151 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 01152 DebugLoc dl = MI->getDebugLoc(); 01153 01154 unsigned TOS = getStackEntry(0); 01155 01156 // One of our operands must be on the top of the stack. If neither is yet, we 01157 // need to move one. 01158 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 01159 // We can choose to move either operand to the top of the stack. If one of 01160 // the operands is killed by this instruction, we want that one so that we 01161 // can update right on top of the old version. 01162 if (KillsOp0) { 01163 moveToTop(Op0, I); // Move dead operand to TOS. 01164 TOS = Op0; 01165 } else if (KillsOp1) { 01166 moveToTop(Op1, I); 01167 TOS = Op1; 01168 } else { 01169 // All of the operands are live after this instruction executes, so we 01170 // cannot update on top of any operand. Because of this, we must 01171 // duplicate one of the stack elements to the top. It doesn't matter 01172 // which one we pick. 01173 // 01174 duplicateToTop(Op0, Dest, I); 01175 Op0 = TOS = Dest; 01176 KillsOp0 = true; 01177 } 01178 } else if (!KillsOp0 && !KillsOp1) { 01179 // If we DO have one of our operands at the top of the stack, but we don't 01180 // have a dead operand, we must duplicate one of the operands to a new slot 01181 // on the stack. 01182 duplicateToTop(Op0, Dest, I); 01183 Op0 = TOS = Dest; 01184 KillsOp0 = true; 01185 } 01186 01187 // Now we know that one of our operands is on the top of the stack, and at 01188 // least one of our operands is killed by this instruction. 01189 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 01190 "Stack conditions not set up right!"); 01191 01192 // We decide which form to use based on what is on the top of the stack, and 01193 // which operand is killed by this instruction. 01194 const TableEntry *InstTable; 01195 bool isForward = TOS == Op0; 01196 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 01197 if (updateST0) { 01198 if (isForward) 01199 InstTable = ForwardST0Table; 01200 else 01201 InstTable = ReverseST0Table; 01202 } else { 01203 if (isForward) 01204 InstTable = ForwardSTiTable; 01205 else 01206 InstTable = ReverseSTiTable; 01207 } 01208 01209 int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), 01210 MI->getOpcode()); 01211 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 01212 01213 // NotTOS - The register which is not on the top of stack... 01214 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 01215 01216 // Replace the old instruction with a new instruction 01217 MBB->remove(I++); 01218 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); 01219 01220 // If both operands are killed, pop one off of the stack in addition to 01221 // overwriting the other one. 01222 if (KillsOp0 && KillsOp1 && Op0 != Op1) { 01223 assert(!updateST0 && "Should have updated other operand!"); 01224 popStackAfter(I); // Pop the top of stack 01225 } 01226 01227 // Update stack information so that we know the destination register is now on 01228 // the stack. 01229 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 01230 assert(UpdatedSlot < StackTop && Dest < 7); 01231 Stack[UpdatedSlot] = Dest; 01232 RegMap[Dest] = UpdatedSlot; 01233 MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction 01234 } 01235 01236 /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 01237 /// register arguments and no explicit destinations. 01238 /// 01239 void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 01240 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 01241 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 01242 MachineInstr *MI = I; 01243 01244 unsigned NumOperands = MI->getDesc().getNumOperands(); 01245 assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 01246 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 01247 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 01248 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 01249 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 01250 01251 // Make sure the first operand is on the top of stack, the other one can be 01252 // anywhere. 01253 moveToTop(Op0, I); 01254 01255 // Change from the pseudo instruction to the concrete instruction. 01256 MI->getOperand(0).setReg(getSTReg(Op1)); 01257 MI->RemoveOperand(1); 01258 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 01259 01260 // If any of the operands are killed by this instruction, free them. 01261 if (KillsOp0) freeStackSlotAfter(I, Op0); 01262 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 01263 } 01264 01265 /// handleCondMovFP - Handle two address conditional move instructions. These 01266 /// instructions move a st(i) register to st(0) iff a condition is true. These 01267 /// instructions require that the first operand is at the top of the stack, but 01268 /// otherwise don't modify the stack at all. 01269 void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 01270 MachineInstr *MI = I; 01271 01272 unsigned Op0 = getFPReg(MI->getOperand(0)); 01273 unsigned Op1 = getFPReg(MI->getOperand(2)); 01274 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 01275 01276 // The first operand *must* be on the top of the stack. 01277 moveToTop(Op0, I); 01278 01279 // Change the second operand to the stack register that the operand is in. 01280 // Change from the pseudo instruction to the concrete instruction. 01281 MI->RemoveOperand(0); 01282 MI->RemoveOperand(1); 01283 MI->getOperand(0).setReg(getSTReg(Op1)); 01284 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 01285 01286 // If we kill the second operand, make sure to pop it from the stack. 01287 if (Op0 != Op1 && KillsOp1) { 01288 // Get this value off of the register stack. 01289 freeStackSlotAfter(I, Op1); 01290 } 01291 } 01292 01293 01294 /// handleSpecialFP - Handle special instructions which behave unlike other 01295 /// floating point instructions. This is primarily intended for use by pseudo 01296 /// instructions. 01297 /// 01298 void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { 01299 MachineInstr *MI = Inst; 01300 01301 if (MI->isCall()) { 01302 handleCall(Inst); 01303 return; 01304 } 01305 01306 switch (MI->getOpcode()) { 01307 default: llvm_unreachable("Unknown SpecialFP instruction!"); 01308 case TargetOpcode::COPY: { 01309 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP. 01310 const MachineOperand &MO1 = MI->getOperand(1); 01311 const MachineOperand &MO0 = MI->getOperand(0); 01312 bool KillsSrc = MI->killsRegister(MO1.getReg()); 01313 01314 // FP <- FP copy. 01315 unsigned DstFP = getFPReg(MO0); 01316 unsigned SrcFP = getFPReg(MO1); 01317 assert(isLive(SrcFP) && "Cannot copy dead register"); 01318 if (KillsSrc) { 01319 // If the input operand is killed, we can just change the owner of the 01320 // incoming stack slot into the result. 01321 unsigned Slot = getSlot(SrcFP); 01322 Stack[Slot] = DstFP; 01323 RegMap[DstFP] = Slot; 01324 } else { 01325 // For COPY we just duplicate the specified value to a new stack slot. 01326 // This could be made better, but would require substantial changes. 01327 duplicateToTop(SrcFP, DstFP, Inst); 01328 } 01329 break; 01330 } 01331 01332 case TargetOpcode::IMPLICIT_DEF: { 01333 // All FP registers must be explicitly defined, so load a 0 instead. 01334 unsigned Reg = MI->getOperand(0).getReg() - X86::FP0; 01335 DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n'); 01336 BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::LD_F0)); 01337 pushReg(Reg); 01338 break; 01339 } 01340 01341 case TargetOpcode::INLINEASM: { 01342 // The inline asm MachineInstr currently only *uses* FP registers for the 01343 // 'f' constraint. These should be turned into the current ST(x) register 01344 // in the machine instr. 01345 // 01346 // There are special rules for x87 inline assembly. The compiler must know 01347 // exactly how many registers are popped and pushed implicitly by the asm. 01348 // Otherwise it is not possible to restore the stack state after the inline 01349 // asm. 01350 // 01351 // There are 3 kinds of input operands: 01352 // 01353 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A 01354 // popped input operand must be in a fixed stack slot, and it is either 01355 // tied to an output operand, or in the clobber list. The MI has ST use 01356 // and def operands for these inputs. 01357 // 01358 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are 01359 // preserved by the inline asm. The fixed stack slots must be STn-STm 01360 // following the popped inputs. A fixed input operand cannot be tied to 01361 // an output or appear in the clobber list. The MI has ST use operands 01362 // and no defs for these inputs. 01363 // 01364 // 3. Preserved inputs. These inputs use the "f" constraint which is 01365 // represented as an FP register. The inline asm won't change these 01366 // stack slots. 01367 // 01368 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered 01369 // registers do not count as output operands. The inline asm changes the 01370 // stack as if it popped all the popped inputs and then pushed all the 01371 // output operands. 01372 01373 // Scan the assembly for ST registers used, defined and clobbered. We can 01374 // only tell clobbers from defs by looking at the asm descriptor. 01375 unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0; 01376 unsigned NumOps = 0; 01377 SmallSet<unsigned, 1> FRegIdx; 01378 unsigned RCID; 01379 01380 for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands(); 01381 i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) { 01382 unsigned Flags = MI->getOperand(i).getImm(); 01383 01384 NumOps = InlineAsm::getNumOperandRegisters(Flags); 01385 if (NumOps != 1) 01386 continue; 01387 const MachineOperand &MO = MI->getOperand(i + 1); 01388 if (!MO.isReg()) 01389 continue; 01390 unsigned STReg = MO.getReg() - X86::FP0; 01391 if (STReg >= 8) 01392 continue; 01393 01394 // If the flag has a register class constraint, this must be an operand 01395 // with constraint "f". Record its index and continue. 01396 if (InlineAsm::hasRegClassConstraint(Flags, RCID)) { 01397 FRegIdx.insert(i + 1); 01398 continue; 01399 } 01400 01401 switch (InlineAsm::getKind(Flags)) { 01402 case InlineAsm::Kind_RegUse: 01403 STUses |= (1u << STReg); 01404 break; 01405 case InlineAsm::Kind_RegDef: 01406 case InlineAsm::Kind_RegDefEarlyClobber: 01407 STDefs |= (1u << STReg); 01408 if (MO.isDead()) 01409 STDeadDefs |= (1u << STReg); 01410 break; 01411 case InlineAsm::Kind_Clobber: 01412 STClobbers |= (1u << STReg); 01413 break; 01414 default: 01415 break; 01416 } 01417 } 01418 01419 if (STUses && !isMask_32(STUses)) 01420 MI->emitError("fixed input regs must be last on the x87 stack"); 01421 unsigned NumSTUses = CountTrailingOnes_32(STUses); 01422 01423 // Defs must be contiguous from the stack top. ST0-STn. 01424 if (STDefs && !isMask_32(STDefs)) { 01425 MI->emitError("output regs must be last on the x87 stack"); 01426 STDefs = NextPowerOf2(STDefs) - 1; 01427 } 01428 unsigned NumSTDefs = CountTrailingOnes_32(STDefs); 01429 01430 // So must the clobbered stack slots. ST0-STm, m >= n. 01431 if (STClobbers && !isMask_32(STDefs | STClobbers)) 01432 MI->emitError("clobbers must be last on the x87 stack"); 01433 01434 // Popped inputs are the ones that are also clobbered or defined. 01435 unsigned STPopped = STUses & (STDefs | STClobbers); 01436 if (STPopped && !isMask_32(STPopped)) 01437 MI->emitError("implicitly popped regs must be last on the x87 stack"); 01438 unsigned NumSTPopped = CountTrailingOnes_32(STPopped); 01439 01440 DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops " 01441 << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n"); 01442 01443 #ifndef NDEBUG 01444 // If any input operand uses constraint "f", all output register 01445 // constraints must be early-clobber defs. 01446 for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) 01447 if (FRegIdx.count(I)) { 01448 assert((1 << getFPReg(MI->getOperand(I)) & STDefs) == 0 && 01449 "Operands with constraint \"f\" cannot overlap with defs"); 01450 } 01451 #endif 01452 01453 // Collect all FP registers (register operands with constraints "t", "u", 01454 // and "f") to kill afer the instruction. 01455 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff; 01456 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 01457 MachineOperand &Op = MI->getOperand(i); 01458 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 01459 continue; 01460 unsigned FPReg = getFPReg(Op); 01461 01462 // If we kill this operand, make sure to pop it from the stack after the 01463 // asm. We just remember it for now, and pop them all off at the end in 01464 // a batch. 01465 if (Op.isUse() && Op.isKill()) 01466 FPKills |= 1U << FPReg; 01467 } 01468 01469 // Do not include registers that are implicitly popped by defs/clobbers. 01470 FPKills &= ~(STDefs | STClobbers); 01471 01472 // Now we can rearrange the live registers to match what was requested. 01473 unsigned char STUsesArray[8]; 01474 01475 for (unsigned I = 0; I < NumSTUses; ++I) 01476 STUsesArray[I] = I; 01477 01478 shuffleStackTop(STUsesArray, NumSTUses, Inst); 01479 DEBUG({dbgs() << "Before asm: "; dumpStack();}); 01480 01481 // With the stack layout fixed, rewrite the FP registers. 01482 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 01483 MachineOperand &Op = MI->getOperand(i); 01484 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 01485 continue; 01486 01487 unsigned FPReg = getFPReg(Op); 01488 01489 if (FRegIdx.count(i)) 01490 // Operand with constraint "f". 01491 Op.setReg(getSTReg(FPReg)); 01492 else 01493 // Operand with a single register class constraint ("t" or "u"). 01494 Op.setReg(X86::ST0 + FPReg); 01495 } 01496 01497 // Simulate the inline asm popping its inputs and pushing its outputs. 01498 StackTop -= NumSTPopped; 01499 01500 for (unsigned i = 0; i < NumSTDefs; ++i) 01501 pushReg(NumSTDefs - i - 1); 01502 01503 // If this asm kills any FP registers (is the last use of them) we must 01504 // explicitly emit pop instructions for them. Do this now after the asm has 01505 // executed so that the ST(x) numbers are not off (which would happen if we 01506 // did this inline with operand rewriting). 01507 // 01508 // Note: this might be a non-optimal pop sequence. We might be able to do 01509 // better by trying to pop in stack order or something. 01510 while (FPKills) { 01511 unsigned FPReg = countTrailingZeros(FPKills); 01512 if (isLive(FPReg)) 01513 freeStackSlotAfter(Inst, FPReg); 01514 FPKills &= ~(1U << FPReg); 01515 } 01516 01517 // Don't delete the inline asm! 01518 return; 01519 } 01520 01521 case X86::WIN_FTOL_32: 01522 case X86::WIN_FTOL_64: { 01523 // Push the operand into ST0. 01524 MachineOperand &Op = MI->getOperand(0); 01525 assert(Op.isUse() && Op.isReg() && 01526 Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6); 01527 unsigned FPReg = getFPReg(Op); 01528 if (Op.isKill()) 01529 moveToTop(FPReg, Inst); 01530 else 01531 duplicateToTop(FPReg, FPReg, Inst); 01532 01533 // Emit the call. This will pop the operand. 01534 BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32)) 01535 .addExternalSymbol("_ftol2") 01536 .addReg(X86::ST0, RegState::ImplicitKill) 01537 .addReg(X86::ECX, RegState::ImplicitDefine) 01538 .addReg(X86::EAX, RegState::Define | RegState::Implicit) 01539 .addReg(X86::EDX, RegState::Define | RegState::Implicit) 01540 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 01541 --StackTop; 01542 01543 break; 01544 } 01545 01546 case X86::RETQ: 01547 case X86::RETL: 01548 case X86::RETIL: 01549 case X86::RETIQ: 01550 // If RET has an FP register use operand, pass the first one in ST(0) and 01551 // the second one in ST(1). 01552 01553 // Find the register operands. 01554 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; 01555 unsigned LiveMask = 0; 01556 01557 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 01558 MachineOperand &Op = MI->getOperand(i); 01559 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 01560 continue; 01561 // FP Register uses must be kills unless there are two uses of the same 01562 // register, in which case only one will be a kill. 01563 assert(Op.isUse() && 01564 (Op.isKill() || // Marked kill. 01565 getFPReg(Op) == FirstFPRegOp || // Second instance. 01566 MI->killsRegister(Op.getReg())) && // Later use is marked kill. 01567 "Ret only defs operands, and values aren't live beyond it"); 01568 01569 if (FirstFPRegOp == ~0U) 01570 FirstFPRegOp = getFPReg(Op); 01571 else { 01572 assert(SecondFPRegOp == ~0U && "More than two fp operands!"); 01573 SecondFPRegOp = getFPReg(Op); 01574 } 01575 LiveMask |= (1 << getFPReg(Op)); 01576 01577 // Remove the operand so that later passes don't see it. 01578 MI->RemoveOperand(i); 01579 --i, --e; 01580 } 01581 01582 // We may have been carrying spurious live-ins, so make sure only the returned 01583 // registers are left live. 01584 adjustLiveRegs(LiveMask, MI); 01585 if (!LiveMask) return; // Quick check to see if any are possible. 01586 01587 // There are only four possibilities here: 01588 // 1) we are returning a single FP value. In this case, it has to be in 01589 // ST(0) already, so just declare success by removing the value from the 01590 // FP Stack. 01591 if (SecondFPRegOp == ~0U) { 01592 // Assert that the top of stack contains the right FP register. 01593 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && 01594 "Top of stack not the right register for RET!"); 01595 01596 // Ok, everything is good, mark the value as not being on the stack 01597 // anymore so that our assertion about the stack being empty at end of 01598 // block doesn't fire. 01599 StackTop = 0; 01600 return; 01601 } 01602 01603 // Otherwise, we are returning two values: 01604 // 2) If returning the same value for both, we only have one thing in the FP 01605 // stack. Consider: RET FP1, FP1 01606 if (StackTop == 1) { 01607 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& 01608 "Stack misconfiguration for RET!"); 01609 01610 // Duplicate the TOS so that we return it twice. Just pick some other FPx 01611 // register to hold it. 01612 unsigned NewReg = ScratchFPReg; 01613 duplicateToTop(FirstFPRegOp, NewReg, MI); 01614 FirstFPRegOp = NewReg; 01615 } 01616 01617 /// Okay we know we have two different FPx operands now: 01618 assert(StackTop == 2 && "Must have two values live!"); 01619 01620 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently 01621 /// in ST(1). In this case, emit an fxch. 01622 if (getStackEntry(0) == SecondFPRegOp) { 01623 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); 01624 moveToTop(FirstFPRegOp, MI); 01625 } 01626 01627 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in 01628 /// ST(1). Just remove both from our understanding of the stack and return. 01629 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); 01630 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); 01631 StackTop = 0; 01632 return; 01633 } 01634 01635 Inst = MBB->erase(Inst); // Remove the pseudo instruction 01636 01637 // We want to leave I pointing to the previous instruction, but what if we 01638 // just erased the first instruction? 01639 if (Inst == MBB->begin()) { 01640 DEBUG(dbgs() << "Inserting dummy KILL\n"); 01641 Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL)); 01642 } else 01643 --Inst; 01644 } 01645 01646 void FPS::setKillFlags(MachineBasicBlock &MBB) const { 01647 const TargetRegisterInfo *TRI = 01648 MBB.getParent()->getSubtarget().getRegisterInfo(); 01649 LivePhysRegs LPR(TRI); 01650 01651 LPR.addLiveOuts(&MBB); 01652 01653 for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); 01654 I != E; ++I) { 01655 if (I->isDebugValue()) 01656 continue; 01657 01658 std::bitset<8> Defs; 01659 SmallVector<MachineOperand *, 2> Uses; 01660 MachineInstr &MI = *I; 01661 01662 for (auto &MO : I->operands()) { 01663 if (!MO.isReg()) 01664 continue; 01665 01666 unsigned Reg = MO.getReg() - X86::FP0; 01667 01668 if (Reg >= 8) 01669 continue; 01670 01671 if (MO.isDef()) { 01672 Defs.set(Reg); 01673 if (!LPR.contains(MO.getReg())) 01674 MO.setIsDead(); 01675 } else 01676 Uses.push_back(&MO); 01677 } 01678 01679 for (auto *MO : Uses) 01680 if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg())) 01681 MO->setIsKill(); 01682 01683 LPR.stepBackward(MI); 01684 } 01685 }