Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
visemul.c
Go to the documentation of this file.
1 /* visemul.c: Emulation of VIS instructions.
2  *
3  * Copyright (C) 2006 David S. Miller ([email protected])
4  */
5 #include <linux/kernel.h>
6 #include <linux/errno.h>
7 #include <linux/thread_info.h>
8 #include <linux/perf_event.h>
9 
10 #include <asm/ptrace.h>
11 #include <asm/pstate.h>
12 #include <asm/fpumacro.h>
13 #include <asm/uaccess.h>
14 #include <asm/cacheflush.h>
15 
16 /* OPF field of various VIS instructions. */
17 
18 /* 000111011 - four 16-bit packs */
19 #define FPACK16_OPF 0x03b
20 
21 /* 000111010 - two 32-bit packs */
22 #define FPACK32_OPF 0x03a
23 
24 /* 000111101 - four 16-bit packs */
25 #define FPACKFIX_OPF 0x03d
26 
27 /* 001001101 - four 16-bit expands */
28 #define FEXPAND_OPF 0x04d
29 
30 /* 001001011 - two 32-bit merges */
31 #define FPMERGE_OPF 0x04b
32 
33 /* 000110001 - 8-by-16-bit partitoned product */
34 #define FMUL8x16_OPF 0x031
35 
36 /* 000110011 - 8-by-16-bit upper alpha partitioned product */
37 #define FMUL8x16AU_OPF 0x033
38 
39 /* 000110101 - 8-by-16-bit lower alpha partitioned product */
40 #define FMUL8x16AL_OPF 0x035
41 
42 /* 000110110 - upper 8-by-16-bit partitioned product */
43 #define FMUL8SUx16_OPF 0x036
44 
45 /* 000110111 - lower 8-by-16-bit partitioned product */
46 #define FMUL8ULx16_OPF 0x037
47 
48 /* 000111000 - upper 8-by-16-bit partitioned product */
49 #define FMULD8SUx16_OPF 0x038
50 
51 /* 000111001 - lower unsigned 8-by-16-bit partitioned product */
52 #define FMULD8ULx16_OPF 0x039
53 
54 /* 000101000 - four 16-bit compare; set rd if src1 > src2 */
55 #define FCMPGT16_OPF 0x028
56 
57 /* 000101100 - two 32-bit compare; set rd if src1 > src2 */
58 #define FCMPGT32_OPF 0x02c
59 
60 /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
61 #define FCMPLE16_OPF 0x020
62 
63 /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
64 #define FCMPLE32_OPF 0x024
65 
66 /* 000100010 - four 16-bit compare; set rd if src1 != src2 */
67 #define FCMPNE16_OPF 0x022
68 
69 /* 000100110 - two 32-bit compare; set rd if src1 != src2 */
70 #define FCMPNE32_OPF 0x026
71 
72 /* 000101010 - four 16-bit compare; set rd if src1 == src2 */
73 #define FCMPEQ16_OPF 0x02a
74 
75 /* 000101110 - two 32-bit compare; set rd if src1 == src2 */
76 #define FCMPEQ32_OPF 0x02e
77 
78 /* 000000000 - Eight 8-bit edge boundary processing */
79 #define EDGE8_OPF 0x000
80 
81 /* 000000001 - Eight 8-bit edge boundary processing, no CC */
82 #define EDGE8N_OPF 0x001
83 
84 /* 000000010 - Eight 8-bit edge boundary processing, little-endian */
85 #define EDGE8L_OPF 0x002
86 
87 /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
88 #define EDGE8LN_OPF 0x003
89 
90 /* 000000100 - Four 16-bit edge boundary processing */
91 #define EDGE16_OPF 0x004
92 
93 /* 000000101 - Four 16-bit edge boundary processing, no CC */
94 #define EDGE16N_OPF 0x005
95 
96 /* 000000110 - Four 16-bit edge boundary processing, little-endian */
97 #define EDGE16L_OPF 0x006
98 
99 /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
100 #define EDGE16LN_OPF 0x007
101 
102 /* 000001000 - Two 32-bit edge boundary processing */
103 #define EDGE32_OPF 0x008
104 
105 /* 000001001 - Two 32-bit edge boundary processing, no CC */
106 #define EDGE32N_OPF 0x009
107 
108 /* 000001010 - Two 32-bit edge boundary processing, little-endian */
109 #define EDGE32L_OPF 0x00a
110 
111 /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
112 #define EDGE32LN_OPF 0x00b
113 
114 /* 000111110 - distance between 8 8-bit components */
115 #define PDIST_OPF 0x03e
116 
117 /* 000010000 - convert 8-bit 3-D address to blocked byte address */
118 #define ARRAY8_OPF 0x010
119 
120 /* 000010010 - convert 16-bit 3-D address to blocked byte address */
121 #define ARRAY16_OPF 0x012
122 
123 /* 000010100 - convert 32-bit 3-D address to blocked byte address */
124 #define ARRAY32_OPF 0x014
125 
126 /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
127 #define BMASK_OPF 0x019
128 
129 /* 001001100 - Permute bytes as specified by GSR.MASK */
130 #define BSHUFFLE_OPF 0x04c
131 
132 #define VIS_OPF_SHIFT 5
133 #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
134 
135 #define RS1(INSN) (((INSN) >> 14) & 0x1f)
136 #define RS2(INSN) (((INSN) >> 0) & 0x1f)
137 #define RD(INSN) (((INSN) >> 25) & 0x1f)
138 
139 static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
140  unsigned int rd, int from_kernel)
141 {
142  if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
143  if (from_kernel != 0)
144  __asm__ __volatile__("flushw");
145  else
146  flushw_user();
147  }
148 }
149 
150 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
151 {
152  unsigned long value, fp;
153 
154  if (reg < 16)
155  return (!reg ? 0 : regs->u_regs[reg]);
156 
157  fp = regs->u_regs[UREG_FP];
158 
159  if (regs->tstate & TSTATE_PRIV) {
160  struct reg_window *win;
161  win = (struct reg_window *)(fp + STACK_BIAS);
162  value = win->locals[reg - 16];
163  } else if (!test_thread_64bit_stack(fp)) {
164  struct reg_window32 __user *win32;
165  win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
166  get_user(value, &win32->locals[reg - 16]);
167  } else {
168  struct reg_window __user *win;
169  win = (struct reg_window __user *)(fp + STACK_BIAS);
170  get_user(value, &win->locals[reg - 16]);
171  }
172  return value;
173 }
174 
175 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
176  struct pt_regs *regs)
177 {
178  unsigned long fp = regs->u_regs[UREG_FP];
179 
180  BUG_ON(reg < 16);
181  BUG_ON(regs->tstate & TSTATE_PRIV);
182 
183  if (!test_thread_64bit_stack(fp)) {
184  struct reg_window32 __user *win32;
185  win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
186  return (unsigned long __user *)&win32->locals[reg - 16];
187  } else {
188  struct reg_window __user *win;
189  win = (struct reg_window __user *)(fp + STACK_BIAS);
190  return &win->locals[reg - 16];
191  }
192 }
193 
194 static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
195  struct pt_regs *regs)
196 {
197  BUG_ON(reg >= 16);
198  BUG_ON(regs->tstate & TSTATE_PRIV);
199 
200  return &regs->u_regs[reg];
201 }
202 
203 static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
204 {
205  if (rd < 16) {
206  unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
207 
208  *rd_kern = val;
209  } else {
210  unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
211 
212  if (!test_thread_64bit_stack(regs->u_regs[UREG_FP]))
213  __put_user((u32)val, (u32 __user *)rd_user);
214  else
215  __put_user(val, rd_user);
216  }
217 }
218 
219 static inline unsigned long fpd_regval(struct fpustate *f,
220  unsigned int insn_regnum)
221 {
222  insn_regnum = (((insn_regnum & 1) << 5) |
223  (insn_regnum & 0x1e));
224 
225  return *(unsigned long *) &f->regs[insn_regnum];
226 }
227 
228 static inline unsigned long *fpd_regaddr(struct fpustate *f,
229  unsigned int insn_regnum)
230 {
231  insn_regnum = (((insn_regnum & 1) << 5) |
232  (insn_regnum & 0x1e));
233 
234  return (unsigned long *) &f->regs[insn_regnum];
235 }
236 
237 static inline unsigned int fps_regval(struct fpustate *f,
238  unsigned int insn_regnum)
239 {
240  return f->regs[insn_regnum];
241 }
242 
243 static inline unsigned int *fps_regaddr(struct fpustate *f,
244  unsigned int insn_regnum)
245 {
246  return &f->regs[insn_regnum];
247 }
248 
249 struct edge_tab {
251 };
252 static struct edge_tab edge8_tab[8] = {
253  { 0xff, 0x80 },
254  { 0x7f, 0xc0 },
255  { 0x3f, 0xe0 },
256  { 0x1f, 0xf0 },
257  { 0x0f, 0xf8 },
258  { 0x07, 0xfc },
259  { 0x03, 0xfe },
260  { 0x01, 0xff },
261 };
262 static struct edge_tab edge8_tab_l[8] = {
263  { 0xff, 0x01 },
264  { 0xfe, 0x03 },
265  { 0xfc, 0x07 },
266  { 0xf8, 0x0f },
267  { 0xf0, 0x1f },
268  { 0xe0, 0x3f },
269  { 0xc0, 0x7f },
270  { 0x80, 0xff },
271 };
272 static struct edge_tab edge16_tab[4] = {
273  { 0xf, 0x8 },
274  { 0x7, 0xc },
275  { 0x3, 0xe },
276  { 0x1, 0xf },
277 };
278 static struct edge_tab edge16_tab_l[4] = {
279  { 0xf, 0x1 },
280  { 0xe, 0x3 },
281  { 0xc, 0x7 },
282  { 0x8, 0xf },
283 };
284 static struct edge_tab edge32_tab[2] = {
285  { 0x3, 0x2 },
286  { 0x1, 0x3 },
287 };
288 static struct edge_tab edge32_tab_l[2] = {
289  { 0x3, 0x1 },
290  { 0x2, 0x3 },
291 };
292 
293 static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
294 {
295  unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
296  u16 left, right;
297 
298  maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
299  orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
300  orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
301 
302  if (test_thread_flag(TIF_32BIT)) {
303  rs1 = rs1 & 0xffffffff;
304  rs2 = rs2 & 0xffffffff;
305  }
306  switch (opf) {
307  default:
308  case EDGE8_OPF:
309  case EDGE8N_OPF:
310  left = edge8_tab[rs1 & 0x7].left;
311  right = edge8_tab[rs2 & 0x7].right;
312  break;
313  case EDGE8L_OPF:
314  case EDGE8LN_OPF:
315  left = edge8_tab_l[rs1 & 0x7].left;
316  right = edge8_tab_l[rs2 & 0x7].right;
317  break;
318 
319  case EDGE16_OPF:
320  case EDGE16N_OPF:
321  left = edge16_tab[(rs1 >> 1) & 0x3].left;
322  right = edge16_tab[(rs2 >> 1) & 0x3].right;
323  break;
324 
325  case EDGE16L_OPF:
326  case EDGE16LN_OPF:
327  left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
328  right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
329  break;
330 
331  case EDGE32_OPF:
332  case EDGE32N_OPF:
333  left = edge32_tab[(rs1 >> 2) & 0x1].left;
334  right = edge32_tab[(rs2 >> 2) & 0x1].right;
335  break;
336 
337  case EDGE32L_OPF:
338  case EDGE32LN_OPF:
339  left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
340  right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
341  break;
342  }
343 
344  if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
345  rd_val = right & left;
346  else
347  rd_val = left;
348 
349  store_reg(regs, rd_val, RD(insn));
350 
351  switch (opf) {
352  case EDGE8_OPF:
353  case EDGE8L_OPF:
354  case EDGE16_OPF:
355  case EDGE16L_OPF:
356  case EDGE32_OPF:
357  case EDGE32L_OPF: {
358  unsigned long ccr, tstate;
359 
360  __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
361  "rd %%ccr, %0"
362  : "=r" (ccr)
363  : "r" (orig_rs1), "r" (orig_rs2)
364  : "cc");
365  tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
366  regs->tstate = tstate | (ccr << 32UL);
367  }
368  }
369 }
370 
371 static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
372 {
373  unsigned long rs1, rs2, rd_val;
374  unsigned int bits, bits_mask;
375 
376  maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
377  rs1 = fetch_reg(RS1(insn), regs);
378  rs2 = fetch_reg(RS2(insn), regs);
379 
380  bits = (rs2 > 5 ? 5 : rs2);
381  bits_mask = (1UL << bits) - 1UL;
382 
383  rd_val = ((((rs1 >> 11) & 0x3) << 0) |
384  (((rs1 >> 33) & 0x3) << 2) |
385  (((rs1 >> 55) & 0x1) << 4) |
386  (((rs1 >> 13) & 0xf) << 5) |
387  (((rs1 >> 35) & 0xf) << 9) |
388  (((rs1 >> 56) & 0xf) << 13) |
389  (((rs1 >> 17) & bits_mask) << 17) |
390  (((rs1 >> 39) & bits_mask) << (17 + bits)) |
391  (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
392 
393  switch (opf) {
394  case ARRAY16_OPF:
395  rd_val <<= 1;
396  break;
397 
398  case ARRAY32_OPF:
399  rd_val <<= 2;
400  }
401 
402  store_reg(regs, rd_val, RD(insn));
403 }
404 
405 static void bmask(struct pt_regs *regs, unsigned int insn)
406 {
407  unsigned long rs1, rs2, rd_val, gsr;
408 
409  maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
410  rs1 = fetch_reg(RS1(insn), regs);
411  rs2 = fetch_reg(RS2(insn), regs);
412  rd_val = rs1 + rs2;
413 
414  store_reg(regs, rd_val, RD(insn));
415 
416  gsr = current_thread_info()->gsr[0] & 0xffffffff;
417  gsr |= rd_val << 32UL;
418  current_thread_info()->gsr[0] = gsr;
419 }
420 
421 static void bshuffle(struct pt_regs *regs, unsigned int insn)
422 {
423  struct fpustate *f = FPUSTATE;
424  unsigned long rs1, rs2, rd_val;
425  unsigned long bmask, i;
426 
427  bmask = current_thread_info()->gsr[0] >> 32UL;
428 
429  rs1 = fpd_regval(f, RS1(insn));
430  rs2 = fpd_regval(f, RS2(insn));
431 
432  rd_val = 0UL;
433  for (i = 0; i < 8; i++) {
434  unsigned long which = (bmask >> (i * 4)) & 0xf;
435  unsigned long byte;
436 
437  if (which < 8)
438  byte = (rs1 >> (which * 8)) & 0xff;
439  else
440  byte = (rs2 >> ((which-8)*8)) & 0xff;
441  rd_val |= (byte << (i * 8));
442  }
443 
444  *fpd_regaddr(f, RD(insn)) = rd_val;
445 }
446 
447 static void pdist(struct pt_regs *regs, unsigned int insn)
448 {
449  struct fpustate *f = FPUSTATE;
450  unsigned long rs1, rs2, *rd, rd_val;
451  unsigned long i;
452 
453  rs1 = fpd_regval(f, RS1(insn));
454  rs2 = fpd_regval(f, RS2(insn));
455  rd = fpd_regaddr(f, RD(insn));
456 
457  rd_val = *rd;
458 
459  for (i = 0; i < 8; i++) {
460  s16 s1, s2;
461 
462  s1 = (rs1 >> (56 - (i * 8))) & 0xff;
463  s2 = (rs2 >> (56 - (i * 8))) & 0xff;
464 
465  /* Absolute value of difference. */
466  s1 -= s2;
467  if (s1 < 0)
468  s1 = ~s1 + 1;
469 
470  rd_val += s1;
471  }
472 
473  *rd = rd_val;
474 }
475 
476 static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
477 {
478  struct fpustate *f = FPUSTATE;
479  unsigned long rs1, rs2, gsr, scale, rd_val;
480 
481  gsr = current_thread_info()->gsr[0];
482  scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
483  switch (opf) {
484  case FPACK16_OPF: {
485  unsigned long byte;
486 
487  rs2 = fpd_regval(f, RS2(insn));
488  rd_val = 0;
489  for (byte = 0; byte < 4; byte++) {
490  unsigned int val;
491  s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
492  int scaled = src << scale;
493  int from_fixed = scaled >> 7;
494 
495  val = ((from_fixed < 0) ?
496  0 :
497  (from_fixed > 255) ?
498  255 : from_fixed);
499 
500  rd_val |= (val << (8 * byte));
501  }
502  *fps_regaddr(f, RD(insn)) = rd_val;
503  break;
504  }
505 
506  case FPACK32_OPF: {
507  unsigned long word;
508 
509  rs1 = fpd_regval(f, RS1(insn));
510  rs2 = fpd_regval(f, RS2(insn));
511  rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
512  for (word = 0; word < 2; word++) {
513  unsigned long val;
514  s32 src = (rs2 >> (word * 32UL));
515  s64 scaled = src << scale;
516  s64 from_fixed = scaled >> 23;
517 
518  val = ((from_fixed < 0) ?
519  0 :
520  (from_fixed > 255) ?
521  255 : from_fixed);
522 
523  rd_val |= (val << (32 * word));
524  }
525  *fpd_regaddr(f, RD(insn)) = rd_val;
526  break;
527  }
528 
529  case FPACKFIX_OPF: {
530  unsigned long word;
531 
532  rs2 = fpd_regval(f, RS2(insn));
533 
534  rd_val = 0;
535  for (word = 0; word < 2; word++) {
536  long val;
537  s32 src = (rs2 >> (word * 32UL));
538  s64 scaled = src << scale;
539  s64 from_fixed = scaled >> 16;
540 
541  val = ((from_fixed < -32768) ?
542  -32768 :
543  (from_fixed > 32767) ?
544  32767 : from_fixed);
545 
546  rd_val |= ((val & 0xffff) << (word * 16));
547  }
548  *fps_regaddr(f, RD(insn)) = rd_val;
549  break;
550  }
551 
552  case FEXPAND_OPF: {
553  unsigned long byte;
554 
555  rs2 = fps_regval(f, RS2(insn));
556 
557  rd_val = 0;
558  for (byte = 0; byte < 4; byte++) {
559  unsigned long val;
560  u8 src = (rs2 >> (byte * 8)) & 0xff;
561 
562  val = src << 4;
563 
564  rd_val |= (val << (byte * 16));
565  }
566  *fpd_regaddr(f, RD(insn)) = rd_val;
567  break;
568  }
569 
570  case FPMERGE_OPF: {
571  rs1 = fps_regval(f, RS1(insn));
572  rs2 = fps_regval(f, RS2(insn));
573 
574  rd_val = (((rs2 & 0x000000ff) << 0) |
575  ((rs1 & 0x000000ff) << 8) |
576  ((rs2 & 0x0000ff00) << 8) |
577  ((rs1 & 0x0000ff00) << 16) |
578  ((rs2 & 0x00ff0000) << 16) |
579  ((rs1 & 0x00ff0000) << 24) |
580  ((rs2 & 0xff000000) << 24) |
581  ((rs1 & 0xff000000) << 32));
582  *fpd_regaddr(f, RD(insn)) = rd_val;
583  break;
584  }
585  }
586 }
587 
588 static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
589 {
590  struct fpustate *f = FPUSTATE;
591  unsigned long rs1, rs2, rd_val;
592 
593  switch (opf) {
594  case FMUL8x16_OPF: {
595  unsigned long byte;
596 
597  rs1 = fps_regval(f, RS1(insn));
598  rs2 = fpd_regval(f, RS2(insn));
599 
600  rd_val = 0;
601  for (byte = 0; byte < 4; byte++) {
602  u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
603  s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
604  u32 prod = src1 * src2;
605  u16 scaled = ((prod & 0x00ffff00) >> 8);
606 
607  /* Round up. */
608  if (prod & 0x80)
609  scaled++;
610  rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
611  }
612 
613  *fpd_regaddr(f, RD(insn)) = rd_val;
614  break;
615  }
616 
617  case FMUL8x16AU_OPF:
618  case FMUL8x16AL_OPF: {
619  unsigned long byte;
620  s16 src2;
621 
622  rs1 = fps_regval(f, RS1(insn));
623  rs2 = fps_regval(f, RS2(insn));
624 
625  rd_val = 0;
626  src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0);
627  for (byte = 0; byte < 4; byte++) {
628  u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
629  u32 prod = src1 * src2;
630  u16 scaled = ((prod & 0x00ffff00) >> 8);
631 
632  /* Round up. */
633  if (prod & 0x80)
634  scaled++;
635  rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
636  }
637 
638  *fpd_regaddr(f, RD(insn)) = rd_val;
639  break;
640  }
641 
642  case FMUL8SUx16_OPF:
643  case FMUL8ULx16_OPF: {
644  unsigned long byte, ushift;
645 
646  rs1 = fpd_regval(f, RS1(insn));
647  rs2 = fpd_regval(f, RS2(insn));
648 
649  rd_val = 0;
650  ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
651  for (byte = 0; byte < 4; byte++) {
652  u16 src1;
653  s16 src2;
654  u32 prod;
655  u16 scaled;
656 
657  src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
658  src2 = ((rs2 >> (16 * byte)) & 0xffff);
659  prod = src1 * src2;
660  scaled = ((prod & 0x00ffff00) >> 8);
661 
662  /* Round up. */
663  if (prod & 0x80)
664  scaled++;
665  rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
666  }
667 
668  *fpd_regaddr(f, RD(insn)) = rd_val;
669  break;
670  }
671 
672  case FMULD8SUx16_OPF:
673  case FMULD8ULx16_OPF: {
674  unsigned long byte, ushift;
675 
676  rs1 = fps_regval(f, RS1(insn));
677  rs2 = fps_regval(f, RS2(insn));
678 
679  rd_val = 0;
680  ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
681  for (byte = 0; byte < 2; byte++) {
682  u16 src1;
683  s16 src2;
684  u32 prod;
685  u16 scaled;
686 
687  src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
688  src2 = ((rs2 >> (16 * byte)) & 0xffff);
689  prod = src1 * src2;
690  scaled = ((prod & 0x00ffff00) >> 8);
691 
692  /* Round up. */
693  if (prod & 0x80)
694  scaled++;
695  rd_val |= ((scaled & 0xffffUL) <<
696  ((byte * 32UL) + 7UL));
697  }
698  *fpd_regaddr(f, RD(insn)) = rd_val;
699  break;
700  }
701  }
702 }
703 
704 static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
705 {
706  struct fpustate *f = FPUSTATE;
707  unsigned long rs1, rs2, rd_val, i;
708 
709  rs1 = fpd_regval(f, RS1(insn));
710  rs2 = fpd_regval(f, RS2(insn));
711 
712  rd_val = 0;
713 
714  switch (opf) {
715  case FCMPGT16_OPF:
716  for (i = 0; i < 4; i++) {
717  s16 a = (rs1 >> (i * 16)) & 0xffff;
718  s16 b = (rs2 >> (i * 16)) & 0xffff;
719 
720  if (a > b)
721  rd_val |= 8 >> i;
722  }
723  break;
724 
725  case FCMPGT32_OPF:
726  for (i = 0; i < 2; i++) {
727  s32 a = (rs1 >> (i * 32)) & 0xffffffff;
728  s32 b = (rs2 >> (i * 32)) & 0xffffffff;
729 
730  if (a > b)
731  rd_val |= 2 >> i;
732  }
733  break;
734 
735  case FCMPLE16_OPF:
736  for (i = 0; i < 4; i++) {
737  s16 a = (rs1 >> (i * 16)) & 0xffff;
738  s16 b = (rs2 >> (i * 16)) & 0xffff;
739 
740  if (a <= b)
741  rd_val |= 8 >> i;
742  }
743  break;
744 
745  case FCMPLE32_OPF:
746  for (i = 0; i < 2; i++) {
747  s32 a = (rs1 >> (i * 32)) & 0xffffffff;
748  s32 b = (rs2 >> (i * 32)) & 0xffffffff;
749 
750  if (a <= b)
751  rd_val |= 2 >> i;
752  }
753  break;
754 
755  case FCMPNE16_OPF:
756  for (i = 0; i < 4; i++) {
757  s16 a = (rs1 >> (i * 16)) & 0xffff;
758  s16 b = (rs2 >> (i * 16)) & 0xffff;
759 
760  if (a != b)
761  rd_val |= 8 >> i;
762  }
763  break;
764 
765  case FCMPNE32_OPF:
766  for (i = 0; i < 2; i++) {
767  s32 a = (rs1 >> (i * 32)) & 0xffffffff;
768  s32 b = (rs2 >> (i * 32)) & 0xffffffff;
769 
770  if (a != b)
771  rd_val |= 2 >> i;
772  }
773  break;
774 
775  case FCMPEQ16_OPF:
776  for (i = 0; i < 4; i++) {
777  s16 a = (rs1 >> (i * 16)) & 0xffff;
778  s16 b = (rs2 >> (i * 16)) & 0xffff;
779 
780  if (a == b)
781  rd_val |= 8 >> i;
782  }
783  break;
784 
785  case FCMPEQ32_OPF:
786  for (i = 0; i < 2; i++) {
787  s32 a = (rs1 >> (i * 32)) & 0xffffffff;
788  s32 b = (rs2 >> (i * 32)) & 0xffffffff;
789 
790  if (a == b)
791  rd_val |= 2 >> i;
792  }
793  break;
794  }
795 
796  maybe_flush_windows(0, 0, RD(insn), 0);
797  store_reg(regs, rd_val, RD(insn));
798 }
799 
800 /* Emulate the VIS instructions which are not implemented in
801  * hardware on Niagara.
802  */
803 int vis_emul(struct pt_regs *regs, unsigned int insn)
804 {
805  unsigned long pc = regs->tpc;
806  unsigned int opf;
807 
808  BUG_ON(regs->tstate & TSTATE_PRIV);
809 
810  perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
811 
812  if (test_thread_flag(TIF_32BIT))
813  pc = (u32)pc;
814 
815  if (get_user(insn, (u32 __user *) pc))
816  return -EFAULT;
817 
818  save_and_clear_fpu();
819 
820  opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
821  switch (opf) {
822  default:
823  return -EINVAL;
824 
825  /* Pixel Formatting Instructions. */
826  case FPACK16_OPF:
827  case FPACK32_OPF:
828  case FPACKFIX_OPF:
829  case FEXPAND_OPF:
830  case FPMERGE_OPF:
831  pformat(regs, insn, opf);
832  break;
833 
834  /* Partitioned Multiply Instructions */
835  case FMUL8x16_OPF:
836  case FMUL8x16AU_OPF:
837  case FMUL8x16AL_OPF:
838  case FMUL8SUx16_OPF:
839  case FMUL8ULx16_OPF:
840  case FMULD8SUx16_OPF:
841  case FMULD8ULx16_OPF:
842  pmul(regs, insn, opf);
843  break;
844 
845  /* Pixel Compare Instructions */
846  case FCMPGT16_OPF:
847  case FCMPGT32_OPF:
848  case FCMPLE16_OPF:
849  case FCMPLE32_OPF:
850  case FCMPNE16_OPF:
851  case FCMPNE32_OPF:
852  case FCMPEQ16_OPF:
853  case FCMPEQ32_OPF:
854  pcmp(regs, insn, opf);
855  break;
856 
857  /* Edge Handling Instructions */
858  case EDGE8_OPF:
859  case EDGE8N_OPF:
860  case EDGE8L_OPF:
861  case EDGE8LN_OPF:
862  case EDGE16_OPF:
863  case EDGE16N_OPF:
864  case EDGE16L_OPF:
865  case EDGE16LN_OPF:
866  case EDGE32_OPF:
867  case EDGE32N_OPF:
868  case EDGE32L_OPF:
869  case EDGE32LN_OPF:
870  edge(regs, insn, opf);
871  break;
872 
873  /* Pixel Component Distance */
874  case PDIST_OPF:
875  pdist(regs, insn);
876  break;
877 
878  /* Three-Dimensional Array Addressing Instructions */
879  case ARRAY8_OPF:
880  case ARRAY16_OPF:
881  case ARRAY32_OPF:
882  array(regs, insn, opf);
883  break;
884 
885  /* Byte Mask and Shuffle Instructions */
886  case BMASK_OPF:
887  bmask(regs, insn);
888  break;
889 
890  case BSHUFFLE_OPF:
891  bshuffle(regs, insn);
892  break;
893  }
894 
895  regs->tpc = regs->tnpc;
896  regs->tnpc += 4;
897  return 0;
898 }