LLVM API Documentation

Namespaces | Defines | Functions | Variables
LoopVectorize.cpp File Reference
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
#include <tuple>

Go to the source code of this file.

Namespaces

namespace  llvm
 

List of target independent CodeGen pass IDs.


Defines

#define LV_NAME   "loop-vectorize"
#define DEBUG_TYPE   LV_NAME

Functions

 STATISTIC (LoopsVectorized,"Number of loops vectorized")
 STATISTIC (LoopsAnalyzed,"Number of loops analyzed for vectorization")
static ValuestripIntegerCast (Value *V)
static const SCEVreplaceSymbolicStrideSCEV (ScalarEvolution *SE, ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
 Replaces the symbolic stride in a pointer SCEV expression by one.
static unsigned getGEPInductionOperand (const DataLayout *DL, const GetElementPtrInst *Gep)
 Find the operand of the GEP that should be checked for consecutive stores. This ignores trailing indices that have no effect on the final pointer.
static InstructiongetFirstInst (Instruction *FirstInst, Value *V, Instruction *Loc)
static unsigned getReductionBinOp (LoopVectorizationLegality::ReductionKind Kind)
 This function translates the reduction kind to an LLVM binary operator.
ValuecreateMinMaxOp (IRBuilder<> &Builder, LoopVectorizationLegality::MinMaxReductionKind RK, Value *Left, Value *Right)
static bool isPredicatedBlock (unsigned BlockNum)
 Check whether this block is a predicated block. Due to if predication of stores we might create a sequence of "if(pred) a[i] = ...; " blocks. We start with one vectorized basic block. For every conditional block we split this vectorized block. Therefore, every second block will be a predicated one.
static void cse (SmallVector< BasicBlock *, 4 > &BBs)
 Perform cse of induction variable instructions.
static ValueaddFastMathFlag (Value *V)
 Adds a 'fast' flag to floating point operations.
static bool canIfConvertPHINodes (BasicBlock *BB)
 Check whether it is safe to if-convert this phi node.
static TypeconvertPointerToIntegerType (const DataLayout &DL, Type *Ty)
static TypegetWiderType (const DataLayout &DL, Type *Ty0, Type *Ty1)
static bool hasOutsideLoopUser (const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &Reductions)
 Check that the instruction has outside loop users and is not an identified reduction variable.
static ValuestripGetElementPtr (Value *Ptr, ScalarEvolution *SE, const DataLayout *DL, Loop *Lp)
 Remove GEPs whose indices but the last one are loop invariant and return the induction operand of the gep pointer.
static ValuegetUniqueCastUse (Value *Ptr, Loop *Lp, Type *Ty)
 Look for a cast use of the passed value.
static ValuegetStrideFromPointer (Value *Ptr, ScalarEvolution *SE, const DataLayout *DL, Loop *Lp)
 Get the stride of a pointer access in a loop. Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a pointer to the Value, or null otherwise.
static bool hasComputableBounds (ScalarEvolution *SE, ValueToValueMap &Strides, Value *Ptr)
 Check whether a pointer can participate in a runtime bounds check.
static int isStridedPtr (ScalarEvolution *SE, const DataLayout *DL, Value *Ptr, const Loop *Lp, ValueToValueMap &StridesMap)
 Check the stride of the pointer and ensure that it does not wrap in the address space.
static bool isInBoundsGep (Value *Ptr)
static bool hasMultipleUsesOf (Instruction *I, SmallPtrSetImpl< Instruction * > &Insts)
static bool areAllUsesIn (Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
static bool isLikelyComplexAddressComputation (Value *Ptr, LoopVectorizationLegality *Legal, ScalarEvolution *SE, const Loop *TheLoop)
 Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.
static bool isStrideMul (Instruction *I, LoopVectorizationLegality *Legal)
Passllvm::createLoopVectorizePass (bool NoUnrolling=false, bool AlwaysVectorize=true)

Variables

static cl::opt< unsignedVectorizationFactor ("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."))
static cl::opt< unsignedVectorizationInterleave ("force-vector-interleave", cl::init(0), cl::Hidden, cl::desc("Sets the vectorization interleave count. ""Zero is autoselect."))
static cl::opt< boolEnableIfConversion ("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
static cl::opt< unsignedTinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value."))
 We don't vectorize loops with a known constant trip count below this number.
static cl::opt< boolEnableMemAccessVersioning ("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning"))
static const unsigned TinyTripCountUnrollThreshold = 128
 We don't unroll loops with a known constant trip count below this number.
static const unsigned RuntimeMemoryCheckThreshold = 8
static const unsigned MaxVectorWidth = 64
 Maximum simd width.
static cl::opt< unsignedForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))
static cl::opt< unsignedForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))
static const unsigned MaxInterleaveFactor = 16
 Maximum vectorization interleave count.
static cl::opt< unsignedForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops."))
static cl::opt< unsignedForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops."))
static cl::opt< unsignedForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing."))
static cl::opt< unsignedSmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the unroller."))
static cl::opt< boolLoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions."))
static cl::opt< boolEnableLoadStoreRuntimeUnroll ("enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden, cl::desc("Enable runtime unrolling until load/store ports are saturated"))
static cl::opt< unsignedNumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
 The number of stores in a loop that are allowed to need predication.
static cl::opt< boolEnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when unrolling"))
static cl::opt< boolEnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))
static cl::opt< unsignedMaxNestedScalarReductionUF ("max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden, cl::desc("The maximum unroll factor to use when unrolling a scalar ""reduction in a nested loop."))
static const char lv_name [] = "Loop Vectorization"

Define Documentation

#define DEBUG_TYPE   LV_NAME

Definition at line 101 of file LoopVectorize.cpp.

#define LV_NAME   "loop-vectorize"

Definition at line 100 of file LoopVectorize.cpp.


Function Documentation

static Value* addFastMathFlag ( Value V) [static]

Adds a 'fast' flag to floating point operations.

Definition at line 2732 of file LoopVectorize.cpp.

References llvm::FastMathFlags::setUnsafeAlgebra().

static bool areAllUsesIn ( Instruction I,
SmallPtrSetImpl< Instruction * > &  Set 
) [static]
static bool canIfConvertPHINodes ( BasicBlock BB) [static]

Check whether it is safe to if-convert this phi node.

Phi nodes with constant expressions that can trap are not safe to if convert.

Definition at line 3439 of file LoopVectorize.cpp.

References llvm::BasicBlock::begin(), llvm::dyn_cast(), llvm::BasicBlock::end(), llvm::PHINode::getIncomingValue(), llvm::PHINode::getNumIncomingValues(), and I.

static Type* convertPointerToIntegerType ( const DataLayout DL,
Type Ty 
) [static]
Value* createMinMaxOp ( IRBuilder<> &  Builder,
LoopVectorizationLegality::MinMaxReductionKind  RK,
Value Left,
Value Right 
)
static void cse ( SmallVector< BasicBlock *, 4 > &  BBs) [static]
static Instruction* getFirstInst ( Instruction FirstInst,
Value V,
Instruction Loc 
) [static]

Definition at line 2001 of file LoopVectorize.cpp.

References llvm::Instruction::getParent().

static unsigned getGEPInductionOperand ( const DataLayout DL,
const GetElementPtrInst Gep 
) [static]

Find the operand of the GEP that should be checked for consecutive stores. This ignores trailing indices that have no effect on the final pointer.

Definition at line 1601 of file LoopVectorize.cpp.

References advance(), llvm::gep_type_begin(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::Type::getScalarType(), llvm::GetElementPtrInst::getType(), llvm::DataLayout::getTypeAllocSize(), llvm::PatternMatch::m_Zero(), and llvm::PatternMatch::match().

Referenced by stripGetElementPtr().

static unsigned getReductionBinOp ( LoopVectorizationLegality::ReductionKind  Kind) [static]

This function translates the reduction kind to an LLVM binary operator.

Definition at line 2602 of file LoopVectorize.cpp.

References llvm::APIntOps::And(), llvm::FCmp, llvm_unreachable, llvm::APIntOps::Or(), and llvm::APIntOps::Xor().

static Value* getStrideFromPointer ( Value Ptr,
ScalarEvolution SE,
const DataLayout DL,
Loop Lp 
) [static]
static Value* getUniqueCastUse ( Value Ptr,
Loop Lp,
Type Ty 
) [static]

Look for a cast use of the passed value.

Definition at line 3842 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::Value::getType(), and llvm::Value::users().

Referenced by getStrideFromPointer().

static Type* getWiderType ( const DataLayout DL,
Type Ty0,
Type Ty1 
) [static]
static bool hasComputableBounds ( ScalarEvolution SE,
ValueToValueMap Strides,
Value Ptr 
) [static]

Check whether a pointer can participate in a runtime bounds check.

Definition at line 4077 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::SCEVAddRecExpr::isAffine(), and replaceSymbolicStrideSCEV().

static bool hasMultipleUsesOf ( Instruction I,
SmallPtrSetImpl< Instruction * > &  Insts 
) [static]
static bool hasOutsideLoopUser ( const Loop TheLoop,
Instruction Inst,
SmallPtrSetImpl< Value * > &  Reductions 
) [static]

Check that the instruction has outside loop users and is not an identified reduction variable.

Definition at line 3605 of file LoopVectorize.cpp.

References llvm::LoopBase< BlockT, LoopT >::contains(), llvm::SmallPtrSetImpl< PtrType >::count(), llvm::dbgs(), DEBUG, and llvm::Value::users().

static bool isInBoundsGep ( Value Ptr) [static]

Definition at line 4406 of file LoopVectorize.cpp.

Referenced by isStridedPtr().

static bool isLikelyComplexAddressComputation ( Value Ptr,
LoopVectorizationLegality *  Legal,
ScalarEvolution SE,
const Loop TheLoop 
) [static]

Check whether the address computation for a non-consecutive memory access looks like an unlikely candidate for being merged into the indexing mode.

We look for a GEP which has one index that is an induction variable and all other indices are loop invariant. If the stride of this access is also within a small bound we decide that this address computation can likely be merged into the addressing mode. In all other cases, we identify the address computation as complex.

Definition at line 5796 of file LoopVectorize.cpp.

References llvm::dyn_cast(), llvm::APInt::getBitWidth(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), llvm::APInt::getSExtValue(), llvm::SCEVAddRecExpr::getStepRecurrence(), llvm::SCEVConstant::getValue(), llvm::ConstantInt::getValue(), and llvm::ScalarEvolution::isLoopInvariant().

static bool isPredicatedBlock ( unsigned  BlockNum) [static]

Check whether this block is a predicated block. Due to if predication of stores we might create a sequence of "if(pred) a[i] = ...; " blocks. We start with one vectorized basic block. For every conditional block we split this vectorized block. Therefore, every second block will be a predicated one.

Definition at line 2697 of file LoopVectorize.cpp.

Referenced by cse().

static int isStridedPtr ( ScalarEvolution SE,
const DataLayout DL,
Value Ptr,
const Loop Lp,
ValueToValueMap StridesMap 
) [static]
static bool isStrideMul ( Instruction I,
LoopVectorizationLegality *  Legal 
) [static]

Definition at line 5840 of file LoopVectorize.cpp.

References llvm::User::getOperand().

static const SCEV* replaceSymbolicStrideSCEV ( ScalarEvolution SE,
ValueToValueMap PtrToStride,
Value Ptr,
Value OrigPtr = nullptr 
) [static]

Replaces the symbolic stride in a pointer SCEV expression by one.

If OrigPtr is not null, use it to look up the stride value instead of Ptr.

Definition at line 1508 of file LoopVectorize.cpp.

References llvm::dbgs(), DEBUG, llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT >::end(), llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT >::find(), llvm::ConstantInt::get(), llvm::ScalarEvolution::getSCEV(), llvm::Value::getType(), llvm::SCEVParameterRewriter::rewrite(), and stripIntegerCast().

Referenced by hasComputableBounds(), and isStridedPtr().

STATISTIC ( LoopsVectorized  ,
"Number of loops vectorized"   
)
STATISTIC ( LoopsAnalyzed  ,
"Number of loops analyzed for vectorization"   
)
static Value* stripGetElementPtr ( Value Ptr,
ScalarEvolution SE,
const DataLayout DL,
Loop Lp 
) [static]

Remove GEPs whose indices but the last one are loop invariant and return the induction operand of the gep pointer.

Definition at line 3824 of file LoopVectorize.cpp.

References llvm::dyn_cast(), getGEPInductionOperand(), llvm::User::getNumOperands(), llvm::User::getOperand(), llvm::ScalarEvolution::getSCEV(), and llvm::ScalarEvolution::isLoopInvariant().

Referenced by getStrideFromPointer().

static Value* stripIntegerCast ( Value V) [static]

Definition at line 1497 of file LoopVectorize.cpp.

Referenced by replaceSymbolicStrideSCEV().


Variable Documentation

cl::opt<bool> EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) [static]
cl::opt<bool> EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")) [static]
cl::opt<bool> EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when unrolling")) [static]
cl::opt<bool> EnableLoadStoreRuntimeUnroll("enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden, cl::desc("Enable runtime unrolling until load/store ports are saturated")) [static]
cl::opt<bool> EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning")) [static]

This enables versioning on the strides of symbolically striding memory accesses in code like the following. for (i = 0; i < N; ++i) A[i * Stride1] += B[i * Stride2] ...

Will be roughly translated to if (Stride1 == 1 && Stride2 == 1) { for (i = 0; i < N; i+=4) A[i:i+3] += ... } else ...

cl::opt<unsigned> ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for ""an instruction to a single constant value. Mostly ""useful for getting consistent testing.")) [static]
cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""scalar loops.")) [static]
cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for ""vectorized loops.")) [static]
cl::opt<unsigned> ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) [static]
cl::opt<unsigned> ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) [static]
cl::opt<bool> LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO ""heuristics minimizing code growth in cold regions and being more ""aggressive in hot regions.")) [static]
const char lv_name[] = "Loop Vectorization" [static]

Definition at line 6060 of file LoopVectorize.cpp.

Maximum vectorization interleave count.

Definition at line 161 of file LoopVectorize.cpp.

cl::opt<unsigned> MaxNestedScalarReductionUF("max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden, cl::desc("The maximum unroll factor to use when unrolling a scalar ""reduction in a nested loop.")) [static]
const unsigned MaxVectorWidth = 64 [static]

Maximum simd width.

Definition at line 150 of file LoopVectorize.cpp.

cl::opt<unsigned> NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) [static]

The number of stores in a loop that are allowed to need predication.

When performing memory disambiguation checks at runtime do not make more than this number of comparisons.

Definition at line 147 of file LoopVectorize.cpp.

cl::opt<unsigned> SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the unroller.")) [static]

We don't unroll loops with a known constant trip count below this number.

Definition at line 143 of file LoopVectorize.cpp.

cl::opt<unsigned> TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Don't vectorize loops with a constant ""trip count that is smaller than this ""value.")) [static]

We don't vectorize loops with a known constant trip count below this number.

cl::opt<unsigned> VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect.")) [static]
cl::opt<unsigned> VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden, cl::desc("Sets the vectorization interleave count. ""Zero is autoselect.")) [static]