Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vecemu.c
Go to the documentation of this file.
1 /*
2  * Routines to emulate some Altivec/VMX instructions, specifically
3  * those that can trap when given denormalized operands in Java mode.
4  */
5 #include <linux/kernel.h>
6 #include <linux/errno.h>
7 #include <linux/sched.h>
8 #include <asm/ptrace.h>
9 #include <asm/processor.h>
10 #include <asm/uaccess.h>
11 
12 /* Functions in vector.S */
13 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
14 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
15 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
16 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
17 extern void vrefp(vector128 *dst, vector128 *src);
18 extern void vrsqrtefp(vector128 *dst, vector128 *src);
19 extern void vexptep(vector128 *dst, vector128 *src);
20 
21 static unsigned int exp2s[8] = {
22  0x800000,
23  0x8b95c2,
24  0x9837f0,
25  0xa5fed7,
26  0xb504f3,
27  0xc5672a,
28  0xd744fd,
29  0xeac0c7
30 };
31 
32 /*
33  * Computes an estimate of 2^x. The `s' argument is the 32-bit
34  * single-precision floating-point representation of x.
35  */
36 static unsigned int eexp2(unsigned int s)
37 {
38  int exp, pwr;
39  unsigned int mant, frac;
40 
41  /* extract exponent field from input */
42  exp = ((s >> 23) & 0xff) - 127;
43  if (exp > 7) {
44  /* check for NaN input */
45  if (exp == 128 && (s & 0x7fffff) != 0)
46  return s | 0x400000; /* return QNaN */
47  /* 2^-big = 0, 2^+big = +Inf */
48  return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
49  }
50  if (exp < -23)
51  return 0x3f800000; /* 1.0 */
52 
53  /* convert to fixed point integer in 9.23 representation */
54  pwr = (s & 0x7fffff) | 0x800000;
55  if (exp > 0)
56  pwr <<= exp;
57  else
58  pwr >>= -exp;
59  if (s & 0x80000000)
60  pwr = -pwr;
61 
62  /* extract integer part, which becomes exponent part of result */
63  exp = (pwr >> 23) + 126;
64  if (exp >= 254)
65  return 0x7f800000;
66  if (exp < -23)
67  return 0;
68 
69  /* table lookup on top 3 bits of fraction to get mantissa */
70  mant = exp2s[(pwr >> 20) & 7];
71 
72  /* linear interpolation using remaining 20 bits of fraction */
73  asm("mulhwu %0,%1,%2" : "=r" (frac)
74  : "r" (pwr << 12), "r" (0x172b83ff));
75  asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
76  mant += frac;
77 
78  if (exp >= 0)
79  return mant + (exp << 23);
80 
81  /* denormalized result */
82  exp = -exp;
83  mant += 1 << (exp - 1);
84  return mant >> exp;
85 }
86 
87 /*
88  * Computes an estimate of log_2(x). The `s' argument is the 32-bit
89  * single-precision floating-point representation of x.
90  */
91 static unsigned int elog2(unsigned int s)
92 {
93  int exp, mant, lz, frac;
94 
95  exp = s & 0x7f800000;
96  mant = s & 0x7fffff;
97  if (exp == 0x7f800000) { /* Inf or NaN */
98  if (mant != 0)
99  s |= 0x400000; /* turn NaN into QNaN */
100  return s;
101  }
102  if ((exp | mant) == 0) /* +0 or -0 */
103  return 0xff800000; /* return -Inf */
104 
105  if (exp == 0) {
106  /* denormalized */
107  asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
108  mant <<= lz - 8;
109  exp = (-118 - lz) << 23;
110  } else {
111  mant |= 0x800000;
112  exp -= 127 << 23;
113  }
114 
115  if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
116  exp |= 0x400000; /* 0.5 * 2^23 */
117  asm("mulhwu %0,%1,%2" : "=r" (mant)
118  : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
119  }
120  if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
121  exp |= 0x200000; /* 0.25 * 2^23 */
122  asm("mulhwu %0,%1,%2" : "=r" (mant)
123  : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
124  }
125  if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
126  exp |= 0x100000; /* 0.125 * 2^23 */
127  asm("mulhwu %0,%1,%2" : "=r" (mant)
128  : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
129  }
130  if (mant > 0x800000) { /* 1.0 * 2^23 */
131  /* calculate (mant - 1) * 1.381097463 */
132  /* 1.381097463 == 0.125 / (2^0.125 - 1) */
133  asm("mulhwu %0,%1,%2" : "=r" (frac)
134  : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
135  exp += frac;
136  }
137  s = exp & 0x80000000;
138  if (exp != 0) {
139  if (s)
140  exp = -exp;
141  asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
142  lz = 8 - lz;
143  if (lz > 0)
144  exp >>= lz;
145  else if (lz < 0)
146  exp <<= -lz;
147  s += ((lz + 126) << 23) + exp;
148  }
149  return s;
150 }
151 
152 #define VSCR_SAT 1
153 
154 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
155 {
156  int exp, mant;
157 
158  exp = (x >> 23) & 0xff;
159  mant = x & 0x7fffff;
160  if (exp == 255 && mant != 0)
161  return 0; /* NaN -> 0 */
162  exp = exp - 127 + scale;
163  if (exp < 0)
164  return 0; /* round towards zero */
165  if (exp >= 31) {
166  /* saturate, unless the result would be -2^31 */
167  if (x + (scale << 23) != 0xcf000000)
168  *vscrp |= VSCR_SAT;
169  return (x & 0x80000000)? 0x80000000: 0x7fffffff;
170  }
171  mant |= 0x800000;
172  mant = (mant << 7) >> (30 - exp);
173  return (x & 0x80000000)? -mant: mant;
174 }
175 
176 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
177 {
178  int exp;
179  unsigned int mant;
180 
181  exp = (x >> 23) & 0xff;
182  mant = x & 0x7fffff;
183  if (exp == 255 && mant != 0)
184  return 0; /* NaN -> 0 */
185  exp = exp - 127 + scale;
186  if (exp < 0)
187  return 0; /* round towards zero */
188  if (x & 0x80000000) {
189  /* negative => saturate to 0 */
190  *vscrp |= VSCR_SAT;
191  return 0;
192  }
193  if (exp >= 32) {
194  /* saturate */
195  *vscrp |= VSCR_SAT;
196  return 0xffffffff;
197  }
198  mant |= 0x800000;
199  mant = (mant << 8) >> (31 - exp);
200  return mant;
201 }
202 
203 /* Round to floating integer, towards 0 */
204 static unsigned int rfiz(unsigned int x)
205 {
206  int exp;
207 
208  exp = ((x >> 23) & 0xff) - 127;
209  if (exp == 128 && (x & 0x7fffff) != 0)
210  return x | 0x400000; /* NaN -> make it a QNaN */
211  if (exp >= 23)
212  return x; /* it's an integer already (or Inf) */
213  if (exp < 0)
214  return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
215  return x & ~(0x7fffff >> exp);
216 }
217 
218 /* Round to floating integer, towards +/- Inf */
219 static unsigned int rfii(unsigned int x)
220 {
221  int exp, mask;
222 
223  exp = ((x >> 23) & 0xff) - 127;
224  if (exp == 128 && (x & 0x7fffff) != 0)
225  return x | 0x400000; /* NaN -> make it a QNaN */
226  if (exp >= 23)
227  return x; /* it's an integer already (or Inf) */
228  if ((x & 0x7fffffff) == 0)
229  return x; /* +/-0 -> +/-0 */
230  if (exp < 0)
231  /* 0 < |x| < 1.0 rounds to +/- 1.0 */
232  return (x & 0x80000000) | 0x3f800000;
233  mask = 0x7fffff >> exp;
234  /* mantissa overflows into exponent - that's OK,
235  it can't overflow into the sign bit */
236  return (x + mask) & ~mask;
237 }
238 
239 /* Round to floating integer, to nearest */
240 static unsigned int rfin(unsigned int x)
241 {
242  int exp, half;
243 
244  exp = ((x >> 23) & 0xff) - 127;
245  if (exp == 128 && (x & 0x7fffff) != 0)
246  return x | 0x400000; /* NaN -> make it a QNaN */
247  if (exp >= 23)
248  return x; /* it's an integer already (or Inf) */
249  if (exp < -1)
250  return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
251  if (exp == -1)
252  /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
253  return (x & 0x80000000) | 0x3f800000;
254  half = 0x400000 >> exp;
255  /* add 0.5 to the magnitude and chop off the fraction bits */
256  return (x + half) & ~(0x7fffff >> exp);
257 }
258 
260 {
261  unsigned int instr, i;
262  unsigned int va, vb, vc, vd;
263  vector128 *vrs;
264 
265  if (get_user(instr, (unsigned int __user *) regs->nip))
266  return -EFAULT;
267  if ((instr >> 26) != 4)
268  return -EINVAL; /* not an altivec instruction */
269  vd = (instr >> 21) & 0x1f;
270  va = (instr >> 16) & 0x1f;
271  vb = (instr >> 11) & 0x1f;
272  vc = (instr >> 6) & 0x1f;
273 
274  vrs = current->thread.vr;
275  switch (instr & 0x3f) {
276  case 10:
277  switch (vc) {
278  case 0: /* vaddfp */
279  vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
280  break;
281  case 1: /* vsubfp */
282  vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
283  break;
284  case 4: /* vrefp */
285  vrefp(&vrs[vd], &vrs[vb]);
286  break;
287  case 5: /* vrsqrtefp */
288  vrsqrtefp(&vrs[vd], &vrs[vb]);
289  break;
290  case 6: /* vexptefp */
291  for (i = 0; i < 4; ++i)
292  vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
293  break;
294  case 7: /* vlogefp */
295  for (i = 0; i < 4; ++i)
296  vrs[vd].u[i] = elog2(vrs[vb].u[i]);
297  break;
298  case 8: /* vrfin */
299  for (i = 0; i < 4; ++i)
300  vrs[vd].u[i] = rfin(vrs[vb].u[i]);
301  break;
302  case 9: /* vrfiz */
303  for (i = 0; i < 4; ++i)
304  vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
305  break;
306  case 10: /* vrfip */
307  for (i = 0; i < 4; ++i) {
308  u32 x = vrs[vb].u[i];
309  x = (x & 0x80000000)? rfiz(x): rfii(x);
310  vrs[vd].u[i] = x;
311  }
312  break;
313  case 11: /* vrfim */
314  for (i = 0; i < 4; ++i) {
315  u32 x = vrs[vb].u[i];
316  x = (x & 0x80000000)? rfii(x): rfiz(x);
317  vrs[vd].u[i] = x;
318  }
319  break;
320  case 14: /* vctuxs */
321  for (i = 0; i < 4; ++i)
322  vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
323  &current->thread.vscr.u[3]);
324  break;
325  case 15: /* vctsxs */
326  for (i = 0; i < 4; ++i)
327  vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
328  &current->thread.vscr.u[3]);
329  break;
330  default:
331  return -EINVAL;
332  }
333  break;
334  case 46: /* vmaddfp */
335  vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
336  break;
337  case 47: /* vnmsubfp */
338  vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
339  break;
340  default:
341  return -EINVAL;
342  }
343 
344  return 0;
345 }