41 #ifndef NEON_HPP_INCLUDED
42 #define NEON_HPP_INCLUDED
44 #if defined __GNUC__ && defined __ARM_NEON__
50 #include <boost/cstdint.hpp>
63 template<
class T,
unsigned S>
91 template<
class T,
unsigned S,
unsigned D>
103 template<
class Td,
unsigned S>
108 for(
unsigned i = 0;
i <
S; ++
i) {
117 return vdup_n<uint16_t, 8>(imm);
123 return vdup_n<uint8_t, 8>(imm);
128 template<
class Td,
class Tm,
unsigned S>
133 for(
unsigned i = 0;
i <
S; ++
i) {
142 return vshr<uint8_t, uint16_t, 8>(
m, imm);
147 template<
class Td,
class Tn,
class Tm,
unsigned S>
152 for(
unsigned i = 0;
i <
S; ++
i) {
161 return vadd<uint16_t, uint16_t, uint16_t, 8>(
n,
m);
166 template<
class Td,
class Ts,
unsigned S>
171 for(
unsigned i = 0;
i <
S; ++
i) {
181 return vmul<uint16_t, uint8_t, 8>(
n,
m);
186 template<
class Td,
unsigned S,
unsigned D>
191 for(
unsigned i = 0;
i <
S; ++
i) {
192 for(
unsigned j = 0; j < D; ++j) {
193 d.
val[j][
i] =
static_cast<Td
>(base[
i * D + j]);
202 return vld<uint8_t, 8, 4>(base);
205 template<
class Td,
unsigned S,
unsigned D>
209 for(
unsigned i = 0;
i <
S; ++
i) {
210 for(
unsigned j = 0; j < D; ++j) {
211 base[
i * D + j] = list.
val[j][
i];
219 vst<uint8_t, 8, 4>(base, list);
uint8x8_t vshrn_n_u16(uint16x8_t m, const unsigned imm)
tvector< Td, S > vadd(tvector< Tn, S > n, tvector< Tm, S > m)
tvector< uint16_t, 8 > uint16x8_t
const T & operator[](unsigned i) const
GLuint const GLfloat * val
uint16x8_t vdupq_n_u16(uint16_t imm)
GLint GLenum GLsizei GLint GLsizei const GLvoid * data
tvector< Td, S > vdup_n(Td imm)
tmatrix< uint8_t, 8, 4 > uint8x8x4_t
uint8x8_t vdup_n_u8(uint8_t imm)
tvector< Td, S > vshr(tvector< Tm, S > m, const unsigned imm)
T & operator[](unsigned i)
uint8x8x4_t vld4_u8(uint8_t *base)
tvector< uint8_t, 8 > uint8x8_t
void vst(Td *base, tmatrix< Td, S, D > list)
void vst4_u8(uint8_t *base, uint8x8x4_t list)
tmatrix< Td, S, D > vld(Td *base)
tvector< Td, S > vmul(tvector< Ts, S > n, tvector< Ts, S > m)
uint16x8_t vaddq_u16(uint16x8_t n, uint16x8_t m)
uint16x8_t vmull_u8(uint8x8_t n, uint8x8_t m)