The Battle for Wesnoth  1.13.4+dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
neon.hpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2012 - 2016 by Mark de Wever <[email protected]>
3  Part of the Battle for Wesnoth Project http://www.wesnoth.org/
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; either version 2 of the License, or
8  (at your option) any later version.
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY.
11 
12  See the COPYING file for more details.
13 */
14 
15 /**
16  * @file
17  * Helper class for ARM NEON support.
18  *
19  * When using g++ on an ARM that support the NEON it uses the gcc intrinsics
20  * [1], for all other platforms an emulation is used. The emulation is based on
21  * the RealView Compilation Tool Assembler Guide (ARM DUI 0204J (ID101213)) [2].
22  * The emulation follows the latter convensions instead of the former. The
23  * numbers in the sections refer to the section numers in [2].
24  *
25  * Not everything is implemented, only functions used are implemented.
26  *
27  * Common template parameters are:
28  * * Td type of the destination.
29  * * Tn type of the first operand.
30  * * Tm type of the second operand.
31  * * Ts type of the source (Tm and Tn).
32  * * S number of vector elements.
33  * * D number of matrix vectors.
34  *
35  * [1]
36  * http://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/ARM-NEON-Intrinsics.html
37  * [2]
38  * http://infocenter.arm.com/help/topic/com.arm.doc.dui0204i/DUI0204I_rvct_assembler_guide.pdf
39  */
40 
41 #ifndef NEON_HPP_INCLUDED
42 #define NEON_HPP_INCLUDED
43 
44 #if defined __GNUC__ && defined __ARM_NEON__
45 
46 #include <arm_neon.h>
47 
48 #else
49 
50 #include <boost/cstdint.hpp>
51 
52 /***** ***** ***** ***** types ***** ***** ***** *****/
53 
54 /**
55  * Emulates a vector.
56  *
57  * Gcc also supports __attribute__ ((__vector_size__ (8))) but that only works
58  * with gcc, _not_ with g++. It also is not portable.
59  *
60  * @tparam T The base type of the vector.
61  * @tparam S The size of the vector.
62  */
63 template<class T, unsigned S>
64 struct tvector
65 {
66  const T&
67  operator[](unsigned i) const
68  {
69  return data[i];
70  }
71 
72  T&
73  operator[](unsigned i)
74  {
75  return data[i];
76  }
77 
78  T data[S];
79 };
80 
83 
84 /**
85  * Emulates a matrix.
86  *
87  * The guide [2] doesn't use the term matrix, but uses various terms for it;
88  * e.g. table in the VTBL instructions (5.8.9) and lanes in the VLDn
89  * instructions (5.12).
90  */
91 template<class T, unsigned S, unsigned D>
92 struct tmatrix
93 {
95 };
96 
98 
99 
100 /***** ***** ***** ***** 5.8.3 VDUP ***** ***** ***** *****/
101 
102 /* The imm is actually the Rm. */
103 template<class Td, unsigned S>
104 inline tvector<Td, S>
105 vdup_n(Td imm)
106 {
108  for(unsigned i = 0; i < S; ++i) {
109  d[i] = imm;
110  }
111  return d;
112 }
113 
114 inline uint16x8_t
115 vdupq_n_u16(uint16_t imm)
116 {
117  return vdup_n<uint16_t, 8>(imm);
118 }
119 
120 inline uint8x8_t
121 vdup_n_u8(uint8_t imm)
122 {
123  return vdup_n<uint8_t, 8>(imm);
124 }
125 
126 /***** ***** ***** ***** 5.9.3 VSHR ***** ***** ***** *****/
127 
128 template<class Td, class Tm, unsigned S>
129 inline tvector<Td, S>
130 vshr(tvector<Tm, S> m, const unsigned imm)
131 {
133  for(unsigned i = 0; i < S; ++i) {
134  d[i] = m[i] >> imm;
135  }
136  return d;
137 }
138 
139 inline uint8x8_t
140 vshrn_n_u16(uint16x8_t m, const unsigned imm)
141 {
142  return vshr<uint8_t, uint16_t, 8>(m, imm);
143 }
144 
145 /***** ***** ***** ***** 5.10.3 VADD ***** ***** ***** *****/
146 
147 template<class Td, class Tn, class Tm, unsigned S>
148 inline tvector<Td, S>
150 {
152  for(unsigned i = 0; i < S; ++i) {
153  d[i] = n[i] + m[i];
154  }
155  return d;
156 }
157 
158 inline uint16x8_t
160 {
161  return vadd<uint16_t, uint16_t, uint16_t, 8>(n, m);
162 }
163 
164 /***** ***** ***** ***** 5.11.1 VMUL ***** ***** ***** *****/
165 
166 template<class Td, class Ts, unsigned S>
167 inline tvector<Td, S>
169 {
171  for(unsigned i = 0; i < S; ++i) {
172  d[i] = n[i] * m[i];
173  }
174  return d;
175 }
176 
177 
178 inline uint16x8_t
180 {
181  return vmul<uint16_t, uint8_t, 8>(n, m);
182 }
183 
184 /***** ***** ***** ***** 5.12.3 VLDn and VSTn ***** ***** ***** *****/
185 
186 template<class Td, unsigned S, unsigned D>
187 inline tmatrix<Td, S, D>
188 vld(Td* base)
189 {
191  for(unsigned i = 0; i < S; ++i) {
192  for(unsigned j = 0; j < D; ++j) {
193  d.val[j][i] = static_cast<Td>(base[i * D + j]);
194  }
195  }
196  return d;
197 }
198 
199 inline uint8x8x4_t
200 vld4_u8(uint8_t* base)
201 {
202  return vld<uint8_t, 8, 4>(base);
203 }
204 
205 template<class Td, unsigned S, unsigned D>
206 inline void
207 vst(Td* base, tmatrix<Td, S, D> list)
208 {
209  for(unsigned i = 0; i < S; ++i) {
210  for(unsigned j = 0; j < D; ++j) {
211  base[i * D + j] = list.val[j][i];
212  }
213  }
214 }
215 
216 inline void
217 vst4_u8(uint8_t* base, uint8x8x4_t list)
218 {
219  vst<uint8_t, 8, 4>(base, list);
220 }
221 
222 #endif
223 
224 #endif
uint8x8_t vshrn_n_u16(uint16x8_t m, const unsigned imm)
Definition: neon.hpp:140
tvector< Td, S > vadd(tvector< Tn, S > n, tvector< Tm, S > m)
Definition: neon.hpp:149
tvector< uint16_t, 8 > uint16x8_t
Definition: neon.hpp:82
const T & operator[](unsigned i) const
Definition: neon.hpp:67
tvector< T, S > val[D]
Definition: neon.hpp:94
GLuint const GLfloat * val
Definition: glew.h:2614
#define d
uint16x8_t vdupq_n_u16(uint16_t imm)
Definition: neon.hpp:115
GLint GLenum GLsizei GLint GLsizei const GLvoid * data
Definition: glew.h:1347
tvector< Td, S > vdup_n(Td imm)
Definition: neon.hpp:105
tmatrix< uint8_t, 8, 4 > uint8x8x4_t
Definition: neon.hpp:97
uint8x8_t vdup_n_u8(uint8_t imm)
Definition: neon.hpp:121
tvector< Td, S > vshr(tvector< Tm, S > m, const unsigned imm)
Definition: neon.hpp:130
T & operator[](unsigned i)
Definition: neon.hpp:73
uint8x8x4_t vld4_u8(uint8_t *base)
Definition: neon.hpp:200
tvector< uint8_t, 8 > uint8x8_t
Definition: neon.hpp:81
void vst(Td *base, tmatrix< Td, S, D > list)
Definition: neon.hpp:207
void vst4_u8(uint8_t *base, uint8x8x4_t list)
Definition: neon.hpp:217
tmatrix< Td, S, D > vld(Td *base)
Definition: neon.hpp:188
size_t i
Definition: function.cpp:1057
Emulates a vector.
Definition: neon.hpp:64
tvector< Td, S > vmul(tvector< Ts, S > n, tvector< Ts, S > m)
Definition: neon.hpp:168
GLclampd n
Definition: glew.h:5903
const GLdouble * m
Definition: glew.h:6968
uint16x8_t vaddq_u16(uint16x8_t n, uint16x8_t m)
Definition: neon.hpp:159
Emulates a matrix.
Definition: neon.hpp:92
uint16x8_t vmull_u8(uint8x8_t n, uint8x8_t m)
Definition: neon.hpp:179
#define S(x)
Definition: luac.cpp:374