OpenCV
3.0.0-dev
Open Source Computer Vision
|
Modules | |
Private implementation helpers | |
Classes | |
struct | cv::v_reg< _Tp, n > |
Macros | |
#define | CV_SIMD128 0 |
Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled) More... | |
#define | CV_SIMD128_64F 0 |
Set to 1 if current intrinsics implementation supports 64-bit float vectors. More... | |
Typedefs | |
typedef v_reg< float, 4 > | cv::v_float32x4 |
Four 32-bit floating point values (single precision) More... | |
typedef v_reg< double, 2 > | cv::v_float64x2 |
Two 64-bit floating point values (double precision) More... | |
typedef v_reg< short, 8 > | cv::v_int16x8 |
Eight 16-bit signed integer values. More... | |
typedef v_reg< int, 4 > | cv::v_int32x4 |
Four 32-bit signed integer values. More... | |
typedef v_reg< int64, 2 > | cv::v_int64x2 |
Two 64-bit signed integer values. More... | |
typedef v_reg< schar, 16 > | cv::v_int8x16 |
Sixteen 8-bit signed integer values. More... | |
typedef v_reg< ushort, 8 > | cv::v_uint16x8 |
Eight 16-bit unsigned integer values. More... | |
typedef v_reg< unsigned, 4 > | cv::v_uint32x4 |
Four 32-bit unsigned integer values. More... | |
typedef v_reg< uint64, 2 > | cv::v_uint64x2 |
Two 64-bit unsigned integer values. More... | |
typedef v_reg< uchar, 16 > | cv::v_uint8x16 |
Sixteen 8-bit unsigned integer values. More... | |
Functions | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator!= (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Not equal comparison. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator& (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Bitwise AND. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > & | cv::operator&= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator* (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Multiply values. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > & | cv::operator*= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator+ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Add values. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > & | cv::operator+= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator- (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Subtract values. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > & | cv::operator-= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator/ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Divide values. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > & | cv::operator/= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator< (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Less-than comparison. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator<< (const v_reg< _Tp, n > &a, int imm) |
Bitwise shift left. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator<= (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Less-than or equal comparison. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator== (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Equal comparison. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator> (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Greater-than comparison. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator>= (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Greater-than or equal comparison. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator>> (const v_reg< _Tp, n > &a, int imm) |
Bitwise shift right. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator^ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Bitwise XOR. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > & | cv::operator^= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator| (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Bitwise OR. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > & | cv::operator|= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::operator~ (const v_reg< _Tp, n > &a) |
Bitwise NOT. More... | |
template<typename _Tp , int n> | |
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > | cv::v_abs (const v_reg< _Tp, n > &a) |
Absolute value of elements. More... | |
template<typename _Tp , int n> | |
v_reg< typename V_TypeTraits< _Tp >::abs_type, n > | cv::v_absdiff (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Absolute difference. More... | |
v_float32x4 | cv::v_absdiff (const v_float32x4 &a, const v_float32x4 &b) |
v_float64x2 | cv::v_absdiff (const v_float64x2 &a, const v_float64x2 &b) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_add_wrap (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Add values without saturation. More... | |
template<typename _Tp , int n> | |
v_reg< int, n > | cv::v_ceil (const v_reg< _Tp, n > &a) |
Ceil elements. More... | |
template<int n> | |
v_reg< int, n > | cv::v_ceil (const v_reg< float, n > &a) |
Ceil. More... | |
template<int n> | |
v_reg< int, n *2 > | cv::v_ceil (const v_reg< double, n > &a) |
template<typename _Tp , int n> | |
bool | cv::v_check_all (const v_reg< _Tp, n > &a) |
Check if all packed values are less than zero. More... | |
template<typename _Tp , int n> | |
bool | cv::v_check_any (const v_reg< _Tp, n > &a) |
Check if any of packed values is less than zero. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_combine_high (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Combine vector from last elements of two vectors. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_combine_low (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Combine vector from first elements of two vectors. More... | |
template<int n> | |
v_reg< float, n > | cv::v_cvt_f32 (const v_reg< int, n > &a) |
Convert to float. More... | |
template<int n> | |
v_reg< double, n > | cv::v_cvt_f64 (const v_reg< int, n *2 > &a) |
Convert to double. More... | |
template<int n> | |
v_reg< double, n > | cv::v_cvt_f64 (const v_reg< float, n *2 > &a) |
Convert to double. More... | |
template<typename _Tp , int n> | |
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > | cv::v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Dot product of elements. More... | |
template<typename _Tp , int n> | |
void | cv::v_expand (const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1) |
Expand values to the wider pack type. More... | |
template<int s, typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_extract (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Vector extract. More... | |
template<typename _Tp , int n> | |
v_reg< int, n > | cv::v_floor (const v_reg< _Tp, n > &a) |
Floor elements. More... | |
template<int n> | |
v_reg< int, n > | cv::v_floor (const v_reg< float, n > &a) |
Floor. More... | |
template<int n> | |
v_reg< int, n *2 > | cv::v_floor (const v_reg< double, n > &a) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_invsqrt (const v_reg< _Tp, n > &a) |
Inversed square root. More... | |
template<typename _Tp > | |
v_reg< _Tp, V_SIMD128Traits< _Tp >::nlanes > | cv::v_load (const _Tp *ptr) |
Load register contents from memory. More... | |
template<typename _Tp > | |
v_reg< _Tp, V_SIMD128Traits< _Tp >::nlanes > | cv::v_load_aligned (const _Tp *ptr) |
Load register contents from memory (aligned) More... | |
template<typename _Tp , int n> | |
void | cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c) |
Load and deinterleave (4 channels) More... | |
template<typename _Tp , int n> | |
void | cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c, v_reg< _Tp, n > &d) |
Load and deinterleave (3 channels) More... | |
template<typename _Tp > | |
v_reg< typename V_TypeTraits< _Tp >::w_type, V_SIMD128Traits< _Tp >::nlanes/2 > | cv::v_load_expand (const _Tp *ptr) |
Load register contents from memory with double expand. More... | |
template<typename _Tp > | |
v_reg< typename V_TypeTraits< _Tp >::q_type, V_SIMD128Traits< _Tp >::nlanes/4 > | cv::v_load_expand_q (const _Tp *ptr) |
Load register contents from memory with quad expand. More... | |
template<typename _Tp > | |
v_reg< _Tp, V_SIMD128Traits< _Tp >::nlanes > | cv::v_load_halves (const _Tp *loptr, const _Tp *hiptr) |
Load register contents from two memory blocks. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Magnitude. More... | |
v_float32x4 | cv::v_matmul (const v_float32x4 &v, const v_float32x4 &m0, const v_float32x4 &m1, const v_float32x4 &m2, const v_float32x4 &m3) |
Matrix multiplication. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_max (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Choose max values for each pair. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_min (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Choose min values for each pair. More... | |
template<typename _Tp , int n> | |
void | cv::v_mul_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d) |
Multiply and expand. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_muladd (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c) |
Multiply and add. More... | |
template<typename _Tp , int n> | |
void | cv::v_recombine (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high) |
Combine two vectors from lower and higher parts of two other vectors. More... | |
template<typename _Tp , int n> | |
_Tp | cv::v_reduce_max (const v_reg< _Tp, n > &a) |
Find one max value. More... | |
template<typename _Tp , int n> | |
_Tp | cv::v_reduce_min (const v_reg< _Tp, n > &a) |
Find one min value. More... | |
template<typename _Tp , int n> | |
V_TypeTraits< _Tp >::sum_type | cv::v_reduce_sum (const v_reg< _Tp, n > &a) |
Sum packed values. More... | |
template<typename _Tp , int n> | |
v_reg< int, n > | cv::v_round (const v_reg< _Tp, n > &a) |
Round elements. More... | |
template<int n> | |
v_reg< int, n > | cv::v_round (const v_reg< float, n > &a) |
Round. More... | |
template<int n> | |
v_reg< int, n *2 > | cv::v_round (const v_reg< double, n > &a) |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_select (const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Bitwise select. More... | |
template<typename _Tp , int n> | |
int | cv::v_signmask (const v_reg< _Tp, n > &a) |
Get negative values mask. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_sqr_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Square of the magnitude. More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_sqrt (const v_reg< _Tp, n > &a) |
Square root of elements. More... | |
template<typename _Tp , int n> | |
void | cv::v_store (_Tp *ptr, const v_reg< _Tp, n > &a) |
Store data to memory. More... | |
template<typename _Tp , int n> | |
void | cv::v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a) |
Store data to memory (aligned) More... | |
template<typename _Tp , int n> | |
void | cv::v_store_high (_Tp *ptr, const v_reg< _Tp, n > &a) |
Store data to memory (higher half) More... | |
template<typename _Tp , int n> | |
void | cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c) |
Interleave and store (3 channels) More... | |
template<typename _Tp , int n> | |
void | cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, const v_reg< _Tp, n > &d) |
Interleave and store (4 channels) More... | |
template<typename _Tp , int n> | |
void | cv::v_store_low (_Tp *ptr, const v_reg< _Tp, n > &a) |
Store data to memory (lower half) More... | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | cv::v_sub_wrap (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Subtract values without saturation. More... | |
template<typename _Tp > | |
void | cv::v_transpose4x4 (v_reg< _Tp, 4 > &a0, const v_reg< _Tp, 4 > &a1, const v_reg< _Tp, 4 > &a2, const v_reg< _Tp, 4 > &a3, v_reg< _Tp, 4 > &b0, v_reg< _Tp, 4 > &b1, v_reg< _Tp, 4 > &b2, v_reg< _Tp, 4 > &b3) |
Transpose 4x4 matrix. More... | |
template<typename _Tp , int n> | |
v_reg< int, n > | cv::v_trunc (const v_reg< _Tp, n > &a) |
Truncate elements. More... | |
template<int n> | |
v_reg< int, n > | cv::v_trunc (const v_reg< float, n > &a) |
Trunc. More... | |
template<int n> | |
v_reg< int, n *2 > | cv::v_trunc (const v_reg< double, n > &a) |
template<typename _Tp , int n> | |
void | cv::v_zip (const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1) |
Interleave two vectors. More... | |
Init with zero | |
v_uint8x16 | cv::v_setzero_u8 () |
v_int8x16 | cv::v_setzero_s8 () |
v_uint16x8 | cv::v_setzero_u16 () |
v_int16x8 | cv::v_setzero_s16 () |
v_uint32x4 | cv::v_setzero_u32 () |
v_int32x4 | cv::v_setzero_s32 () |
v_float32x4 | cv::v_setzero_f32 () |
v_float64x2 | cv::v_setzero_f64 () |
v_uint64x2 | cv::v_setzero_u64 () |
v_int64x2 | cv::v_setzero_s64 () |
Init with value | |
v_uint8x16 | cv::v_setall_u8 (uchar val) |
v_int8x16 | cv::v_setall_s8 (schar val) |
v_uint16x8 | cv::v_setall_u16 (ushort val) |
v_int16x8 | cv::v_setall_s16 (short val) |
v_uint32x4 | cv::v_setall_u32 (unsigned val) |
v_int32x4 | cv::v_setall_s32 (int val) |
v_float32x4 | cv::v_setall_f32 (float val) |
v_float64x2 | cv::v_setall_f64 (double val) |
v_uint64x2 | cv::v_setall_u64 (uint64 val) |
v_int64x2 | cv::v_setall_s64 (int64 val) |
Reinterpret | |
Convert vector to different type without modifying underlying data. | |
template<typename _Tp0 , int n0> | |
v_uint8x16 | cv::v_reinterpret_as_u8 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_int8x16 | cv::v_reinterpret_as_s8 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_uint16x8 | cv::v_reinterpret_as_u16 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_int16x8 | cv::v_reinterpret_as_s16 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_uint32x4 | cv::v_reinterpret_as_u32 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_int32x4 | cv::v_reinterpret_as_s32 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_float32x4 | cv::v_reinterpret_as_f32 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_float64x2 | cv::v_reinterpret_as_f64 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_uint64x2 | cv::v_reinterpret_as_u64 (const v_reg< _Tp0, n0 > &a) |
template<typename _Tp0 , int n0> | |
v_int64x2 | cv::v_reinterpret_as_s64 (const v_reg< _Tp0, n0 > &a) |
Left shift | |
template<int n> | |
v_uint16x8 | cv::v_shl (const v_uint16x8 &a) |
template<int n> | |
v_int16x8 | cv::v_shl (const v_int16x8 &a) |
template<int n> | |
v_uint32x4 | cv::v_shl (const v_uint32x4 &a) |
template<int n> | |
v_int32x4 | cv::v_shl (const v_int32x4 &a) |
template<int n> | |
v_uint64x2 | cv::v_shl (const v_uint64x2 &a) |
template<int n> | |
v_int64x2 | cv::v_shl (const v_int64x2 &a) |
Right shift | |
template<int n> | |
v_uint16x8 | cv::v_shr (const v_uint16x8 &a) |
template<int n> | |
v_int16x8 | cv::v_shr (const v_int16x8 &a) |
template<int n> | |
v_uint32x4 | cv::v_shr (const v_uint32x4 &a) |
template<int n> | |
v_int32x4 | cv::v_shr (const v_int32x4 &a) |
template<int n> | |
v_uint64x2 | cv::v_shr (const v_uint64x2 &a) |
template<int n> | |
v_int64x2 | cv::v_shr (const v_int64x2 &a) |
Rounding shift | |
template<int n> | |
v_uint16x8 | cv::v_rshr (const v_uint16x8 &a) |
template<int n> | |
v_int16x8 | cv::v_rshr (const v_int16x8 &a) |
template<int n> | |
v_uint32x4 | cv::v_rshr (const v_uint32x4 &a) |
template<int n> | |
v_int32x4 | cv::v_rshr (const v_int32x4 &a) |
template<int n> | |
v_uint64x2 | cv::v_rshr (const v_uint64x2 &a) |
template<int n> | |
v_int64x2 | cv::v_rshr (const v_int64x2 &a) |
Pack | |
Pack values from two vectors to one Return vector type have twice more elements than input vector types. Variant with u suffix also converts to corresponding unsigned type.
| |
v_uint8x16 | cv::v_pack (const v_uint16x8 &a, const v_uint16x8 &b) |
v_int8x16 | cv::v_pack (const v_int16x8 &a, const v_int16x8 &b) |
v_uint16x8 | cv::v_pack (const v_uint32x4 &a, const v_uint32x4 &b) |
v_int16x8 | cv::v_pack (const v_int32x4 &a, const v_int32x4 &b) |
v_uint32x4 | cv::v_pack (const v_uint64x2 &a, const v_uint64x2 &b) |
v_int32x4 | cv::v_pack (const v_int64x2 &a, const v_int64x2 &b) |
v_uint8x16 | cv::v_pack_u (const v_int16x8 &a, const v_int16x8 &b) |
v_uint16x8 | cv::v_pack_u (const v_int32x4 &a, const v_int32x4 &b) |
Pack with rounding shift | |
Pack values from two vectors to one with rounding shift Values from the input vectors will be shifted right by n bits with rounding, converted to narrower type and returned in the result vector. Variant with u suffix converts to unsigned type.
| |
template<int n> | |
v_uint8x16 | cv::v_rshr_pack (const v_uint16x8 &a, const v_uint16x8 &b) |
template<int n> | |
v_int8x16 | cv::v_rshr_pack (const v_int16x8 &a, const v_int16x8 &b) |
template<int n> | |
v_uint16x8 | cv::v_rshr_pack (const v_uint32x4 &a, const v_uint32x4 &b) |
template<int n> | |
v_int16x8 | cv::v_rshr_pack (const v_int32x4 &a, const v_int32x4 &b) |
template<int n> | |
v_uint32x4 | cv::v_rshr_pack (const v_uint64x2 &a, const v_uint64x2 &b) |
template<int n> | |
v_int32x4 | cv::v_rshr_pack (const v_int64x2 &a, const v_int64x2 &b) |
template<int n> | |
v_uint8x16 | cv::v_rshr_pack_u (const v_int16x8 &a, const v_int16x8 &b) |
template<int n> | |
v_uint16x8 | cv::v_rshr_pack_u (const v_int32x4 &a, const v_int32x4 &b) |
Pack and store | |
Store values from the input vector into memory with pack Values will be stored into memory with saturating conversion to narrower type. Variant with u suffix converts to corresponding unsigned type.
| |
void | cv::v_pack_store (uchar *ptr, const v_uint16x8 &a) |
void | cv::v_pack_store (schar *ptr, const v_int16x8 &a) |
void | cv::v_pack_store (ushort *ptr, const v_uint32x4 &a) |
void | cv::v_pack_store (short *ptr, const v_int32x4 &a) |
void | cv::v_pack_store (unsigned *ptr, const v_uint64x2 &a) |
void | cv::v_pack_store (int *ptr, const v_int64x2 &a) |
void | cv::v_pack_u_store (uchar *ptr, const v_int16x8 &a) |
void | cv::v_pack_u_store (ushort *ptr, const v_int32x4 &a) |
Pack and store with rounding shift | |
Store values from the input vector into memory with pack Values will be shifted n bits right with rounding, converted to narrower type and stored into memory. Variant with u suffix converts to unsigned type.
| |
template<int n> | |
void | cv::v_rshr_pack_store (uchar *ptr, const v_uint16x8 &a) |
template<int n> | |
void | cv::v_rshr_pack_store (schar *ptr, const v_int16x8 &a) |
template<int n> | |
void | cv::v_rshr_pack_store (ushort *ptr, const v_uint32x4 &a) |
template<int n> | |
void | cv::v_rshr_pack_store (short *ptr, const v_int32x4 &a) |
template<int n> | |
void | cv::v_rshr_pack_store (unsigned *ptr, const v_uint64x2 &a) |
template<int n> | |
void | cv::v_rshr_pack_store (int *ptr, const v_int64x2 &a) |
template<int n> | |
void | cv::v_rshr_pack_u_store (uchar *ptr, const v_int16x8 &a) |
template<int n> | |
void | cv::v_rshr_pack_u_store (ushort *ptr, const v_int32x4 &a) |
"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms. Currently there are two supported SIMD extensions: SSE/SSE2 on x86 architectures and NEON on ARM architectures, both allow working with 128 bit registers containing packed values of different types. In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as expected although it could be slower.
There are several types representing 128-bit register as a vector of packed values, each type is implemented as a structure based on a one SIMD register.
These operations allow to set contents of the register explicitly or by loading it from some memory block and to save contents of the register to memory block.
These operations allow to reorder or recombine elements in one or multiple vectors.
Element-wise binary and unary operations.
Most of these operations return only one value.
Different type conversions and casts:
In these operations vectors represent matrix rows/columns: v_dotprod, v_matmul, v_transpose4x4
Most operations are implemented only for some subset of the available types, following matrices shows the applicability of different operations to the types.
Regular integers:
Operations\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 |
---|---|---|---|---|---|---|
load, store | x | x | x | x | x | x |
interleave | x | x | x | x | x | x |
expand | x | x | x | x | x | x |
expand_q | x | x | ||||
add, sub | x | x | x | x | x | x |
add_wrap, sub_wrap | x | x | x | x | ||
mul | x | x | x | x | ||
mul_expand | x | x | x | |||
compare | x | x | x | x | x | x |
shift | x | x | x | x | ||
dotprod | x | |||||
logical | x | x | x | x | x | x |
min, max | x | x | x | x | x | x |
absdiff | x | x | x | x | x | x |
reduce | x | x | ||||
mask | x | x | x | x | x | x |
pack | x | x | x | x | x | x |
pack_u | x | x | ||||
unpack | x | x | x | x | x | x |
extract | x | x | x | x | x | x |
cvt_flt32 | x | |||||
cvt_flt64 | x | |||||
transpose4x4 | x | x |
Big integers:
Operations\Types | uint 64x2 | int 64x2 |
---|---|---|
load, store | x | x |
add, sub | x | x |
shift | x | x |
logical | x | x |
extract | x | x |
Floating point:
Operations\Types | float 32x4 | float 64x2 |
---|---|---|
load, store | x | x |
interleave | x | |
add, sub | x | x |
mul | x | x |
div | x | x |
compare | x | x |
min, max | x | x |
absdiff | x | x |
reduce | x | |
mask | x | x |
unpack | x | x |
cvt_flt32 | x | |
cvt_flt64 | x | |
sqrt, abs | x | x |
float math | x | x |
transpose4x4 | x |
#define CV_SIMD128 0 |
Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
#define CV_SIMD128_64F 0 |
Set to 1 if current intrinsics implementation supports 64-bit float vectors.
typedef v_reg<float, 4> cv::v_float32x4 |
Four 32-bit floating point values (single precision)
typedef v_reg<double, 2> cv::v_float64x2 |
Two 64-bit floating point values (double precision)
typedef v_reg<short, 8> cv::v_int16x8 |
Eight 16-bit signed integer values.
typedef v_reg<int, 4> cv::v_int32x4 |
Four 32-bit signed integer values.
typedef v_reg<int64, 2> cv::v_int64x2 |
Two 64-bit signed integer values.
typedef v_reg<schar, 16> cv::v_int8x16 |
Sixteen 8-bit signed integer values.
typedef v_reg<ushort, 8> cv::v_uint16x8 |
Eight 16-bit unsigned integer values.
typedef v_reg<unsigned, 4> cv::v_uint32x4 |
Four 32-bit unsigned integer values.
typedef v_reg<uint64, 2> cv::v_uint64x2 |
Two 64-bit unsigned integer values.
typedef v_reg<uchar, 16> cv::v_uint8x16 |
Sixteen 8-bit unsigned integer values.
|
inline |
Not equal comparison.
For all types except 64-bit integer values.
|
inline |
Bitwise AND.
Only for integer types.
|
inline |
|
inline |
Multiply values.
For 16- and 32-bit integer types and floating types.
|
inline |
|
inline |
Add values.
For all types.
|
inline |
|
inline |
Subtract values.
For all types.
|
inline |
|
inline |
Divide values.
For floating types only.
|
inline |
|
inline |
Less-than comparison.
For all types except 64-bit integer values.
|
inline |
Bitwise shift left.
For 16-, 32- and 64-bit integer values.
|
inline |
Less-than or equal comparison.
For all types except 64-bit integer values.
|
inline |
Equal comparison.
For all types except 64-bit integer values.
|
inline |
Greater-than comparison.
For all types except 64-bit integer values.
|
inline |
Greater-than or equal comparison.
For all types except 64-bit integer values.
|
inline |
Bitwise shift right.
For 16-, 32- and 64-bit integer values.
|
inline |
Bitwise XOR.
Only for integer types.
|
inline |
|
inline |
Bitwise OR.
Only for integer types.
|
inline |
|
inline |
Bitwise NOT.
Only for integer types.
|
inline |
Absolute value of elements.
Only for floating point types.
|
inline |
Absolute difference.
Returns \( |a - b| \) converted to corresponding unsigned type. Example:
For 8-, 16-, 32-bit integer source types.
|
inline |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
For 32-bit floating point values
|
inline |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
For 64-bit floating point values
|
inline |
Add values without saturation.
For 8- and 16-bit integer values.
|
inline |
Ceil elements.
Only for floating point types.
|
inline |
Ceil.
Ceil each value. Input type is float vector ==> output type is int vector.
|
inline |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
|
inline |
Check if all packed values are less than zero.
Unsigned values will be casted to signed: uchar 254 => char -2
. For all types except 64-bit.
|
inline |
Check if any of packed values is less than zero.
Unsigned values will be casted to signed: uchar 254 => char -2
. For all types except 64-bit.
|
inline |
Combine vector from last elements of two vectors.
Scheme:
For all types except 64-bit.
|
inline |
Combine vector from first elements of two vectors.
Scheme:
For all types except 64-bit.
|
inline |
Convert to float.
Supported input type is cv::v_int32x4.
|
inline |
Convert to double.
Supported input type is cv::v_int32x4.
|
inline |
Convert to double.
Supported input type is cv::v_float32x4.
|
inline |
Dot product of elements.
Multiply values in two registers and sum adjacent result pairs. Scheme:
Implemented only for 16-bit signed source type (v_int16x8).
|
inline |
Expand values to the wider pack type.
Copy contents of register to two registers with 2x wider pack type. Scheme:
|
inline |
Vector extract.
Scheme:
Restriction: 0 <= shift < nlanes
Usage:
For integer types only.
|
inline |
Floor elements.
Only for floating point types.
|
inline |
Floor.
Floor each value. Input type is float vector ==> output type is int vector.
|
inline |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
|
inline |
Inversed square root.
Returns \( 1/sqrt(a) \) For floating point types only.
|
inline |
Load register contents from memory.
ptr | pointer to memory block with data |
|
inline |
Load register contents from memory (aligned)
similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
|
inline |
Load and deinterleave (4 channels)
Load data from memory deinterleave and store to 4 registers. Scheme:
For all types except 64-bit.
|
inline |
Load and deinterleave (3 channels)
Load data from memory deinterleave and store to 3 registers. Scheme:
For all types except 64-bit.
|
inline |
Load register contents from memory with double expand.
Same as cv::v_load, but result pack type will be 2x wider than memory type.
For 8-, 16-, 32-bit integer source types.
|
inline |
Load register contents from memory with quad expand.
Same as cv::v_load_expand, but result type is 4 times wider than source.
For 8-bit integer source types.
|
inline |
Load register contents from two memory blocks.
loptr | memory block containing data for first half (0..n/2) |
hiptr | memory block containing data for second half (n/2..n) |
|
inline |
Magnitude.
Returns \( sqrt(a^2 + b^2) \) For floating point types only.
|
inline |
Matrix multiplication.
Scheme:
|
inline |
Multiply and expand.
Multiply values two registers and store results in two registers with wider pack type. Scheme:
Example:
Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).
|
inline |
Multiply and add.
Returns \( a*b + c \) For floating point types only.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Combine two vectors from lower and higher parts of two other vectors.
|
inline |
Find one max value.
Scheme:
For 32-bit integer and 32-bit floating point types.
|
inline |
Find one min value.
Scheme:
For 32-bit integer and 32-bit floating point types.
|
inline |
Sum packed values.
Scheme:
For 32-bit integer and 32-bit floating point types.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Round elements.
Only for floating point types.
|
inline |
Round.
Rounds each value. Input type is float vector ==> output type is int vector.
|
inline |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Bitwise select.
Return value will be built by combining values a and b using the following scheme: If the i-th bit in mask is 1 select i-th bit from a else select i-th bit from b
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Get negative values mask.
Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. Example:
For all types except 64-bit.
|
inline |
Square of the magnitude.
Returns \( a^2 + b^2 \) For floating point types only.
|
inline |
Square root of elements.
Only for floating point types.
|
inline |
Store data to memory.
Store register contents to memory. Scheme:
Pointer can be unaligned.
|
inline |
Store data to memory (aligned)
Store register contents to memory. Scheme:
Pointer should be aligned by 16-byte boundary.
|
inline |
Store data to memory (higher half)
Store higher half of register contents to memory. Scheme:
|
inline |
Interleave and store (3 channels)
Interleave and store data from 3 registers to memory. Scheme:
For all types except 64-bit.
|
inline |
Interleave and store (4 channels)
Interleave and store data from 4 registers to memory. Scheme:
For all types except 64-bit.
|
inline |
Store data to memory (lower half)
Store lower half of register contents to memory. Scheme:
|
inline |
Subtract values without saturation.
For 8- and 16-bit integer values.
|
inline |
Transpose 4x4 matrix.
Scheme:
|
inline |
Truncate elements.
Only for floating point types.
|
inline |
Trunc.
Truncate each value. Input type is float vector ==> output type is int vector.
|
inline |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.