Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
unaligned.c
Go to the documentation of this file.
1 /*
2  * Architecture-specific unaligned trap handling.
3  *
4  * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
5  * Stephane Eranian <[email protected]>
6  * David Mosberger-Tang <[email protected]>
7  *
8  * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
9  * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
10  * stacked register returns an undefined value; it does NOT trigger a
11  * "rsvd register fault").
12  * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
13  * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
14  * 2001/01/17 Add support emulation of unaligned kernel accesses.
15  */
16 #include <linux/jiffies.h>
17 #include <linux/kernel.h>
18 #include <linux/sched.h>
19 #include <linux/tty.h>
20 #include <linux/ratelimit.h>
21 
22 #include <asm/intrinsics.h>
23 #include <asm/processor.h>
24 #include <asm/rse.h>
25 #include <asm/uaccess.h>
26 #include <asm/unaligned.h>
27 
28 extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
29 
30 #undef DEBUG_UNALIGNED_TRAP
31 
32 #ifdef DEBUG_UNALIGNED_TRAP
33 # define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
34 # define DDUMP(str,vp,len) dump(str, vp, len)
35 
36 static void
37 dump (const char *str, void *vp, size_t len)
38 {
39  unsigned char *cp = vp;
40  int i;
41 
42  printk("%s", str);
43  for (i = 0; i < len; ++i)
44  printk (" %02x", *cp++);
45  printk("\n");
46 }
47 #else
48 # define DPRINT(a...)
49 # define DDUMP(str,vp,len)
50 #endif
51 
52 #define IA64_FIRST_STACKED_GR 32
53 #define IA64_FIRST_ROTATING_FR 32
54 #define SIGN_EXT9 0xffffffffffffff00ul
55 
56 /*
57  * sysctl settable hook which tells the kernel whether to honor the
58  * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want
59  * to allow the super user to enable/disable this for security reasons
60  * (i.e. don't allow attacker to fill up logs with unaligned accesses).
61  */
64 
65 /*
66  * For M-unit:
67  *
68  * opcode | m | x6 |
69  * --------|------|---------|
70  * [40-37] | [36] | [35:30] |
71  * --------|------|---------|
72  * 4 | 1 | 6 | = 11 bits
73  * --------------------------
74  * However bits [31:30] are not directly useful to distinguish between
75  * load/store so we can use [35:32] instead, which gives the following
76  * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
77  * checking the m-bit until later in the load/store emulation.
78  */
79 #define IA64_OPCODE_MASK 0x1ef
80 #define IA64_OPCODE_SHIFT 32
81 
82 /*
83  * Table C-28 Integer Load/Store
84  *
85  * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
86  *
87  * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
88  * the address (bits [8:3]), so we must failed.
89  */
90 #define LD_OP 0x080
91 #define LDS_OP 0x081
92 #define LDA_OP 0x082
93 #define LDSA_OP 0x083
94 #define LDBIAS_OP 0x084
95 #define LDACQ_OP 0x085
96 /* 0x086, 0x087 are not relevant */
97 #define LDCCLR_OP 0x088
98 #define LDCNC_OP 0x089
99 #define LDCCLRACQ_OP 0x08a
100 #define ST_OP 0x08c
101 #define STREL_OP 0x08d
102 /* 0x08e,0x8f are not relevant */
103 
104 /*
105  * Table C-29 Integer Load +Reg
106  *
107  * we use the ld->m (bit [36:36]) field to determine whether or not we have
108  * a load/store of this form.
109  */
110 
111 /*
112  * Table C-30 Integer Load/Store +Imm
113  *
114  * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
115  *
116  * ld8.fill, st8.fill must be aligned because the Nat register are based on
117  * the address, so we must fail and the program must be fixed.
118  */
119 #define LD_IMM_OP 0x0a0
120 #define LDS_IMM_OP 0x0a1
121 #define LDA_IMM_OP 0x0a2
122 #define LDSA_IMM_OP 0x0a3
123 #define LDBIAS_IMM_OP 0x0a4
124 #define LDACQ_IMM_OP 0x0a5
125 /* 0x0a6, 0xa7 are not relevant */
126 #define LDCCLR_IMM_OP 0x0a8
127 #define LDCNC_IMM_OP 0x0a9
128 #define LDCCLRACQ_IMM_OP 0x0aa
129 #define ST_IMM_OP 0x0ac
130 #define STREL_IMM_OP 0x0ad
131 /* 0x0ae,0xaf are not relevant */
132 
133 /*
134  * Table C-32 Floating-point Load/Store
135  */
136 #define LDF_OP 0x0c0
137 #define LDFS_OP 0x0c1
138 #define LDFA_OP 0x0c2
139 #define LDFSA_OP 0x0c3
140 /* 0x0c6 is irrelevant */
141 #define LDFCCLR_OP 0x0c8
142 #define LDFCNC_OP 0x0c9
143 /* 0x0cb is irrelevant */
144 #define STF_OP 0x0cc
145 
146 /*
147  * Table C-33 Floating-point Load +Reg
148  *
149  * we use the ld->m (bit [36:36]) field to determine whether or not we have
150  * a load/store of this form.
151  */
152 
153 /*
154  * Table C-34 Floating-point Load/Store +Imm
155  */
156 #define LDF_IMM_OP 0x0e0
157 #define LDFS_IMM_OP 0x0e1
158 #define LDFA_IMM_OP 0x0e2
159 #define LDFSA_IMM_OP 0x0e3
160 /* 0x0e6 is irrelevant */
161 #define LDFCCLR_IMM_OP 0x0e8
162 #define LDFCNC_IMM_OP 0x0e9
163 #define STF_IMM_OP 0x0ec
164 
165 typedef struct {
166  unsigned long qp:6; /* [0:5] */
167  unsigned long r1:7; /* [6:12] */
168  unsigned long imm:7; /* [13:19] */
169  unsigned long r3:7; /* [20:26] */
170  unsigned long x:1; /* [27:27] */
171  unsigned long hint:2; /* [28:29] */
172  unsigned long x6_sz:2; /* [30:31] */
173  unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
174  unsigned long m:1; /* [36:36] */
175  unsigned long op:4; /* [37:40] */
176  unsigned long pad:23; /* [41:63] */
177 } load_store_t;
178 
179 
180 typedef enum {
181  UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
182  UPD_REG /* ldXZ r1=[r3],r2 */
183 } update_t;
184 
185 /*
186  * We use tables to keep track of the offsets of registers in the saved state.
187  * This way we save having big switch/case statements.
188  *
189  * We use bit 0 to indicate switch_stack or pt_regs.
190  * The offset is simply shifted by 1 bit.
191  * A 2-byte value should be enough to hold any kind of offset
192  *
193  * In case the calling convention changes (and thus pt_regs/switch_stack)
194  * simply use RSW instead of RPT or vice-versa.
195  */
196 
197 #define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
198 #define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
199 
200 #define RPT(x) (RPO(x) << 1)
201 #define RSW(x) (1| RSO(x)<<1)
202 
203 #define GR_OFFS(x) (gr_info[x]>>1)
204 #define GR_IN_SW(x) (gr_info[x] & 0x1)
205 
206 #define FR_OFFS(x) (fr_info[x]>>1)
207 #define FR_IN_SW(x) (fr_info[x] & 0x1)
208 
209 static u16 gr_info[32]={
210  0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
211 
212  RPT(r1), RPT(r2), RPT(r3),
213 
214  RSW(r4), RSW(r5), RSW(r6), RSW(r7),
215 
216  RPT(r8), RPT(r9), RPT(r10), RPT(r11),
217  RPT(r12), RPT(r13), RPT(r14), RPT(r15),
218 
219  RPT(r16), RPT(r17), RPT(r18), RPT(r19),
220  RPT(r20), RPT(r21), RPT(r22), RPT(r23),
221  RPT(r24), RPT(r25), RPT(r26), RPT(r27),
222  RPT(r28), RPT(r29), RPT(r30), RPT(r31)
223 };
224 
225 static u16 fr_info[32]={
226  0, /* constant : WE SHOULD NEVER GET THIS */
227  0, /* constant : WE SHOULD NEVER GET THIS */
228 
229  RSW(f2), RSW(f3), RSW(f4), RSW(f5),
230 
231  RPT(f6), RPT(f7), RPT(f8), RPT(f9),
232  RPT(f10), RPT(f11),
233 
234  RSW(f12), RSW(f13), RSW(f14),
235  RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
236  RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
237  RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
238  RSW(f30), RSW(f31)
239 };
240 
241 /* Invalidate ALAT entry for integer register REGNO. */
242 static void
243 invala_gr (int regno)
244 {
245 # define F(reg) case reg: ia64_invala_gr(reg); break
246 
247  switch (regno) {
248  F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
249  F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
250  F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
251  F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
252  F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
253  F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
254  F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
255  F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
256  F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
257  F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
258  F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
259  F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
260  F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
261  F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
262  F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
263  F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
264  }
265 # undef F
266 }
267 
268 /* Invalidate ALAT entry for floating-point register REGNO. */
269 static void
270 invala_fr (int regno)
271 {
272 # define F(reg) case reg: ia64_invala_fr(reg); break
273 
274  switch (regno) {
275  F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
276  F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
277  F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
278  F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
279  F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
280  F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
281  F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
282  F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
283  F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
284  F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
285  F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
286  F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
287  F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
288  F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
289  F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
290  F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
291  }
292 # undef F
293 }
294 
295 static inline unsigned long
296 rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
297 {
298  reg += rrb;
299  if (reg >= sor)
300  reg -= sor;
301  return reg;
302 }
303 
304 static void
305 set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
306 {
307  struct switch_stack *sw = (struct switch_stack *) regs - 1;
308  unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
309  unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
310  unsigned long rnats, nat_mask;
311  unsigned long on_kbs;
312  long sof = (regs->cr_ifs) & 0x7f;
313  long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
314  long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
315  long ridx = r1 - 32;
316 
317  if (ridx >= sof) {
318  /* this should never happen, as the "rsvd register fault" has higher priority */
319  DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
320  return;
321  }
322 
323  if (ridx < sor)
324  ridx = rotate_reg(sor, rrb_gr, ridx);
325 
326  DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
327  r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
328 
329  on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
330  addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
331  if (addr >= kbs) {
332  /* the register is on the kernel backing store: easy... */
333  rnat_addr = ia64_rse_rnat_addr(addr);
334  if ((unsigned long) rnat_addr >= sw->ar_bspstore)
335  rnat_addr = &sw->ar_rnat;
336  nat_mask = 1UL << ia64_rse_slot_num(addr);
337 
338  *addr = val;
339  if (nat)
340  *rnat_addr |= nat_mask;
341  else
342  *rnat_addr &= ~nat_mask;
343  return;
344  }
345 
346  if (!user_stack(current, regs)) {
347  DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
348  return;
349  }
350 
351  bspstore = (unsigned long *)regs->ar_bspstore;
352  ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
353  bsp = ia64_rse_skip_regs(ubs_end, -sof);
354  addr = ia64_rse_skip_regs(bsp, ridx);
355 
356  DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
357 
358  ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
359 
360  rnat_addr = ia64_rse_rnat_addr(addr);
361 
362  ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
363  DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
364  (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
365 
366  nat_mask = 1UL << ia64_rse_slot_num(addr);
367  if (nat)
368  rnats |= nat_mask;
369  else
370  rnats &= ~nat_mask;
371  ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
372 
373  DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
374 }
375 
376 
377 static void
378 get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
379 {
380  struct switch_stack *sw = (struct switch_stack *) regs - 1;
381  unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
382  unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
383  unsigned long rnats, nat_mask;
384  unsigned long on_kbs;
385  long sof = (regs->cr_ifs) & 0x7f;
386  long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
387  long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
388  long ridx = r1 - 32;
389 
390  if (ridx >= sof) {
391  /* read of out-of-frame register returns an undefined value; 0 in our case. */
392  DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
393  goto fail;
394  }
395 
396  if (ridx < sor)
397  ridx = rotate_reg(sor, rrb_gr, ridx);
398 
399  DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
400  r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
401 
402  on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
403  addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
404  if (addr >= kbs) {
405  /* the register is on the kernel backing store: easy... */
406  *val = *addr;
407  if (nat) {
408  rnat_addr = ia64_rse_rnat_addr(addr);
409  if ((unsigned long) rnat_addr >= sw->ar_bspstore)
410  rnat_addr = &sw->ar_rnat;
411  nat_mask = 1UL << ia64_rse_slot_num(addr);
412  *nat = (*rnat_addr & nat_mask) != 0;
413  }
414  return;
415  }
416 
417  if (!user_stack(current, regs)) {
418  DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
419  goto fail;
420  }
421 
422  bspstore = (unsigned long *)regs->ar_bspstore;
423  ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
424  bsp = ia64_rse_skip_regs(ubs_end, -sof);
425  addr = ia64_rse_skip_regs(bsp, ridx);
426 
427  DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
428 
429  ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
430 
431  if (nat) {
432  rnat_addr = ia64_rse_rnat_addr(addr);
433  nat_mask = 1UL << ia64_rse_slot_num(addr);
434 
435  DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
436 
437  ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
438  *nat = (rnats & nat_mask) != 0;
439  }
440  return;
441 
442  fail:
443  *val = 0;
444  if (nat)
445  *nat = 0;
446  return;
447 }
448 
449 
450 static void
451 setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
452 {
453  struct switch_stack *sw = (struct switch_stack *) regs - 1;
454  unsigned long addr;
455  unsigned long bitmask;
456  unsigned long *unat;
457 
458  /*
459  * First takes care of stacked registers
460  */
461  if (regnum >= IA64_FIRST_STACKED_GR) {
462  set_rse_reg(regs, regnum, val, nat);
463  return;
464  }
465 
466  /*
467  * Using r0 as a target raises a General Exception fault which has higher priority
468  * than the Unaligned Reference fault.
469  */
470 
471  /*
472  * Now look at registers in [0-31] range and init correct UNAT
473  */
474  if (GR_IN_SW(regnum)) {
475  addr = (unsigned long)sw;
476  unat = &sw->ar_unat;
477  } else {
478  addr = (unsigned long)regs;
479  unat = &sw->caller_unat;
480  }
481  DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
482  addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
483  /*
484  * add offset from base of struct
485  * and do it !
486  */
487  addr += GR_OFFS(regnum);
488 
489  *(unsigned long *)addr = val;
490 
491  /*
492  * We need to clear the corresponding UNAT bit to fully emulate the load
493  * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
494  */
495  bitmask = 1UL << (addr >> 3 & 0x3f);
496  DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
497  if (nat) {
498  *unat |= bitmask;
499  } else {
500  *unat &= ~bitmask;
501  }
502  DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
503 }
504 
505 /*
506  * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
507  * range from 32-127, result is in the range from 0-95.
508  */
509 static inline unsigned long
510 fph_index (struct pt_regs *regs, long regnum)
511 {
512  unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
513  return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
514 }
515 
516 static void
517 setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
518 {
519  struct switch_stack *sw = (struct switch_stack *)regs - 1;
520  unsigned long addr;
521 
522  /*
523  * From EAS-2.5: FPDisableFault has higher priority than Unaligned
524  * Fault. Thus, when we get here, we know the partition is enabled.
525  * To update f32-f127, there are three choices:
526  *
527  * (1) save f32-f127 to thread.fph and update the values there
528  * (2) use a gigantic switch statement to directly access the registers
529  * (3) generate code on the fly to update the desired register
530  *
531  * For now, we are using approach (1).
532  */
533  if (regnum >= IA64_FIRST_ROTATING_FR) {
535  current->thread.fph[fph_index(regs, regnum)] = *fpval;
536  } else {
537  /*
538  * pt_regs or switch_stack ?
539  */
540  if (FR_IN_SW(regnum)) {
541  addr = (unsigned long)sw;
542  } else {
543  addr = (unsigned long)regs;
544  }
545 
546  DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
547 
548  addr += FR_OFFS(regnum);
549  *(struct ia64_fpreg *)addr = *fpval;
550 
551  /*
552  * mark the low partition as being used now
553  *
554  * It is highly unlikely that this bit is not already set, but
555  * let's do it for safety.
556  */
557  regs->cr_ipsr |= IA64_PSR_MFL;
558  }
559 }
560 
561 /*
562  * Those 2 inline functions generate the spilled versions of the constant floating point
563  * registers which can be used with stfX
564  */
565 static inline void
566 float_spill_f0 (struct ia64_fpreg *final)
567 {
568  ia64_stf_spill(final, 0);
569 }
570 
571 static inline void
572 float_spill_f1 (struct ia64_fpreg *final)
573 {
574  ia64_stf_spill(final, 1);
575 }
576 
577 static void
578 getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
579 {
580  struct switch_stack *sw = (struct switch_stack *) regs - 1;
581  unsigned long addr;
582 
583  /*
584  * From EAS-2.5: FPDisableFault has higher priority than
585  * Unaligned Fault. Thus, when we get here, we know the partition is
586  * enabled.
587  *
588  * When regnum > 31, the register is still live and we need to force a save
589  * to current->thread.fph to get access to it. See discussion in setfpreg()
590  * for reasons and other ways of doing this.
591  */
592  if (regnum >= IA64_FIRST_ROTATING_FR) {
594  *fpval = current->thread.fph[fph_index(regs, regnum)];
595  } else {
596  /*
597  * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
598  * not saved, we must generate their spilled form on the fly
599  */
600  switch(regnum) {
601  case 0:
602  float_spill_f0(fpval);
603  break;
604  case 1:
605  float_spill_f1(fpval);
606  break;
607  default:
608  /*
609  * pt_regs or switch_stack ?
610  */
611  addr = FR_IN_SW(regnum) ? (unsigned long)sw
612  : (unsigned long)regs;
613 
614  DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
615  FR_IN_SW(regnum), addr, FR_OFFS(regnum));
616 
617  addr += FR_OFFS(regnum);
618  *fpval = *(struct ia64_fpreg *)addr;
619  }
620  }
621 }
622 
623 
624 static void
625 getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
626 {
627  struct switch_stack *sw = (struct switch_stack *) regs - 1;
628  unsigned long addr, *unat;
629 
630  if (regnum >= IA64_FIRST_STACKED_GR) {
631  get_rse_reg(regs, regnum, val, nat);
632  return;
633  }
634 
635  /*
636  * take care of r0 (read-only always evaluate to 0)
637  */
638  if (regnum == 0) {
639  *val = 0;
640  if (nat)
641  *nat = 0;
642  return;
643  }
644 
645  /*
646  * Now look at registers in [0-31] range and init correct UNAT
647  */
648  if (GR_IN_SW(regnum)) {
649  addr = (unsigned long)sw;
650  unat = &sw->ar_unat;
651  } else {
652  addr = (unsigned long)regs;
653  unat = &sw->caller_unat;
654  }
655 
656  DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
657 
658  addr += GR_OFFS(regnum);
659 
660  *val = *(unsigned long *)addr;
661 
662  /*
663  * do it only when requested
664  */
665  if (nat)
666  *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
667 }
668 
669 static void
670 emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
671 {
672  /*
673  * IMPORTANT:
674  * Given the way we handle unaligned speculative loads, we should
675  * not get to this point in the code but we keep this sanity check,
676  * just in case.
677  */
678  if (ld.x6_op == 1 || ld.x6_op == 3) {
679  printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
680  if (die_if_kernel("unaligned reference on speculative load with register update\n",
681  regs, 30))
682  return;
683  }
684 
685 
686  /*
687  * at this point, we know that the base register to update is valid i.e.,
688  * it's not r0
689  */
690  if (type == UPD_IMMEDIATE) {
691  unsigned long imm;
692 
693  /*
694  * Load +Imm: ldXZ r1=[r3],imm(9)
695  *
696  *
697  * form imm9: [13:19] contain the first 7 bits
698  */
699  imm = ld.x << 7 | ld.imm;
700 
701  /*
702  * sign extend (1+8bits) if m set
703  */
704  if (ld.m) imm |= SIGN_EXT9;
705 
706  /*
707  * ifa == r3 and we know that the NaT bit on r3 was clear so
708  * we can directly use ifa.
709  */
710  ifa += imm;
711 
712  setreg(ld.r3, ifa, 0, regs);
713 
714  DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
715 
716  } else if (ld.m) {
717  unsigned long r2;
718  int nat_r2;
719 
720  /*
721  * Load +Reg Opcode: ldXZ r1=[r3],r2
722  *
723  * Note: that we update r3 even in the case of ldfX.a
724  * (where the load does not happen)
725  *
726  * The way the load algorithm works, we know that r3 does not
727  * have its NaT bit set (would have gotten NaT consumption
728  * before getting the unaligned fault). So we can use ifa
729  * which equals r3 at this point.
730  *
731  * IMPORTANT:
732  * The above statement holds ONLY because we know that we
733  * never reach this code when trying to do a ldX.s.
734  * If we ever make it to here on an ldfX.s then
735  */
736  getreg(ld.imm, &r2, &nat_r2, regs);
737 
738  ifa += r2;
739 
740  /*
741  * propagate Nat r2 -> r3
742  */
743  setreg(ld.r3, ifa, nat_r2, regs);
744 
745  DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
746  }
747 }
748 
749 
750 static int
751 emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
752 {
753  unsigned int len = 1 << ld.x6_sz;
754  unsigned long val = 0;
755 
756  /*
757  * r0, as target, doesn't need to be checked because Illegal Instruction
758  * faults have higher priority than unaligned faults.
759  *
760  * r0 cannot be found as the base as it would never generate an
761  * unaligned reference.
762  */
763 
764  /*
765  * ldX.a we will emulate load and also invalidate the ALAT entry.
766  * See comment below for explanation on how we handle ldX.a
767  */
768 
769  if (len != 2 && len != 4 && len != 8) {
770  DPRINT("unknown size: x6=%d\n", ld.x6_sz);
771  return -1;
772  }
773  /* this assumes little-endian byte-order: */
774  if (copy_from_user(&val, (void __user *) ifa, len))
775  return -1;
776  setreg(ld.r1, val, 0, regs);
777 
778  /*
779  * check for updates on any kind of loads
780  */
781  if (ld.op == 0x5 || ld.m)
782  emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
783 
784  /*
785  * handling of various loads (based on EAS2.4):
786  *
787  * ldX.acq (ordered load):
788  * - acquire semantics would have been used, so force fence instead.
789  *
790  * ldX.c.clr (check load and clear):
791  * - if we get to this handler, it's because the entry was not in the ALAT.
792  * Therefore the operation reverts to a normal load
793  *
794  * ldX.c.nc (check load no clear):
795  * - same as previous one
796  *
797  * ldX.c.clr.acq (ordered check load and clear):
798  * - same as above for c.clr part. The load needs to have acquire semantics. So
799  * we use the fence semantics which is stronger and thus ensures correctness.
800  *
801  * ldX.a (advanced load):
802  * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
803  * address doesn't match requested size alignment. This means that we would
804  * possibly need more than one load to get the result.
805  *
806  * The load part can be handled just like a normal load, however the difficult
807  * part is to get the right thing into the ALAT. The critical piece of information
808  * in the base address of the load & size. To do that, a ld.a must be executed,
809  * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
810  * if we use the same target register, we will be okay for the check.a instruction.
811  * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
812  * which would overlap within [r3,r3+X] (the size of the load was store in the
813  * ALAT). If such an entry is found the entry is invalidated. But this is not good
814  * enough, take the following example:
815  * r3=3
816  * ld4.a r1=[r3]
817  *
818  * Could be emulated by doing:
819  * ld1.a r1=[r3],1
820  * store to temporary;
821  * ld1.a r1=[r3],1
822  * store & shift to temporary;
823  * ld1.a r1=[r3],1
824  * store & shift to temporary;
825  * ld1.a r1=[r3]
826  * store & shift to temporary;
827  * r1=temporary
828  *
829  * So in this case, you would get the right value is r1 but the wrong info in
830  * the ALAT. Notice that you could do it in reverse to finish with address 3
831  * but you would still get the size wrong. To get the size right, one needs to
832  * execute exactly the same kind of load. You could do it from a aligned
833  * temporary location, but you would get the address wrong.
834  *
835  * So no matter what, it is not possible to emulate an advanced load
836  * correctly. But is that really critical ?
837  *
838  * We will always convert ld.a into a normal load with ALAT invalidated. This
839  * will enable compiler to do optimization where certain code path after ld.a
840  * is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
841  *
842  * If there is a store after the advanced load, one must either do a ld.c.* or
843  * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
844  * entry found in ALAT), and that's perfectly ok because:
845  *
846  * - ld.c.*, if the entry is not present a normal load is executed
847  * - chk.a.*, if the entry is not present, execution jumps to recovery code
848  *
849  * In either case, the load can be potentially retried in another form.
850  *
851  * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
852  * up a stale entry later). The register base update MUST also be performed.
853  */
854 
855  /*
856  * when the load has the .acq completer then
857  * use ordering fence.
858  */
859  if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
860  mb();
861 
862  /*
863  * invalidate ALAT entry in case of advanced load
864  */
865  if (ld.x6_op == 0x2)
866  invala_gr(ld.r1);
867 
868  return 0;
869 }
870 
871 static int
872 emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
873 {
874  unsigned long r2;
875  unsigned int len = 1 << ld.x6_sz;
876 
877  /*
878  * if we get to this handler, Nat bits on both r3 and r2 have already
879  * been checked. so we don't need to do it
880  *
881  * extract the value to be stored
882  */
883  getreg(ld.imm, &r2, NULL, regs);
884 
885  /*
886  * we rely on the macros in unaligned.h for now i.e.,
887  * we let the compiler figure out how to read memory gracefully.
888  *
889  * We need this switch/case because the way the inline function
890  * works. The code is optimized by the compiler and looks like
891  * a single switch/case.
892  */
893  DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
894 
895  if (len != 2 && len != 4 && len != 8) {
896  DPRINT("unknown size: x6=%d\n", ld.x6_sz);
897  return -1;
898  }
899 
900  /* this assumes little-endian byte-order: */
901  if (copy_to_user((void __user *) ifa, &r2, len))
902  return -1;
903 
904  /*
905  * stX [r3]=r2,imm(9)
906  *
907  * NOTE:
908  * ld.r3 can never be r0, because r0 would not generate an
909  * unaligned access.
910  */
911  if (ld.op == 0x5) {
912  unsigned long imm;
913 
914  /*
915  * form imm9: [12:6] contain first 7bits
916  */
917  imm = ld.x << 7 | ld.r1;
918  /*
919  * sign extend (8bits) if m set
920  */
921  if (ld.m) imm |= SIGN_EXT9;
922  /*
923  * ifa == r3 (NaT is necessarily cleared)
924  */
925  ifa += imm;
926 
927  DPRINT("imm=%lx r3=%lx\n", imm, ifa);
928 
929  setreg(ld.r3, ifa, 0, regs);
930  }
931  /*
932  * we don't have alat_invalidate_multiple() so we need
933  * to do the complete flush :-<<
934  */
935  ia64_invala();
936 
937  /*
938  * stX.rel: use fence instead of release
939  */
940  if (ld.x6_op == 0xd)
941  mb();
942 
943  return 0;
944 }
945 
946 /*
947  * floating point operations sizes in bytes
948  */
949 static const unsigned char float_fsz[4]={
950  10, /* extended precision (e) */
951  8, /* integer (8) */
952  4, /* single precision (s) */
953  8 /* double precision (d) */
954 };
955 
956 static inline void
957 mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
958 {
959  ia64_ldfe(6, init);
960  ia64_stop();
961  ia64_stf_spill(final, 6);
962 }
963 
964 static inline void
965 mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
966 {
967  ia64_ldf8(6, init);
968  ia64_stop();
969  ia64_stf_spill(final, 6);
970 }
971 
972 static inline void
973 mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
974 {
975  ia64_ldfs(6, init);
976  ia64_stop();
977  ia64_stf_spill(final, 6);
978 }
979 
980 static inline void
981 mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
982 {
983  ia64_ldfd(6, init);
984  ia64_stop();
985  ia64_stf_spill(final, 6);
986 }
987 
988 static inline void
989 float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
990 {
991  ia64_ldf_fill(6, init);
992  ia64_stop();
993  ia64_stfe(final, 6);
994 }
995 
996 static inline void
997 float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
998 {
999  ia64_ldf_fill(6, init);
1000  ia64_stop();
1001  ia64_stf8(final, 6);
1002 }
1003 
1004 static inline void
1005 float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
1006 {
1007  ia64_ldf_fill(6, init);
1008  ia64_stop();
1009  ia64_stfs(final, 6);
1010 }
1011 
1012 static inline void
1013 float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1014 {
1015  ia64_ldf_fill(6, init);
1016  ia64_stop();
1017  ia64_stfd(final, 6);
1018 }
1019 
1020 static int
1021 emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1022 {
1023  struct ia64_fpreg fpr_init[2];
1024  struct ia64_fpreg fpr_final[2];
1025  unsigned long len = float_fsz[ld.x6_sz];
1026 
1027  /*
1028  * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1029  * higher priority than unaligned faults.
1030  *
1031  * r0 cannot be found as the base as it would never generate an unaligned
1032  * reference.
1033  */
1034 
1035  /*
1036  * make sure we get clean buffers
1037  */
1038  memset(&fpr_init, 0, sizeof(fpr_init));
1039  memset(&fpr_final, 0, sizeof(fpr_final));
1040 
1041  /*
1042  * ldfpX.a: we don't try to emulate anything but we must
1043  * invalidate the ALAT entry and execute updates, if any.
1044  */
1045  if (ld.x6_op != 0x2) {
1046  /*
1047  * This assumes little-endian byte-order. Note that there is no "ldfpe"
1048  * instruction:
1049  */
1050  if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
1051  || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
1052  return -1;
1053 
1054  DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1055  DDUMP("frp_init =", &fpr_init, 2*len);
1056  /*
1057  * XXX fixme
1058  * Could optimize inlines by using ldfpX & 2 spills
1059  */
1060  switch( ld.x6_sz ) {
1061  case 0:
1062  mem2float_extended(&fpr_init[0], &fpr_final[0]);
1063  mem2float_extended(&fpr_init[1], &fpr_final[1]);
1064  break;
1065  case 1:
1066  mem2float_integer(&fpr_init[0], &fpr_final[0]);
1067  mem2float_integer(&fpr_init[1], &fpr_final[1]);
1068  break;
1069  case 2:
1070  mem2float_single(&fpr_init[0], &fpr_final[0]);
1071  mem2float_single(&fpr_init[1], &fpr_final[1]);
1072  break;
1073  case 3:
1074  mem2float_double(&fpr_init[0], &fpr_final[0]);
1075  mem2float_double(&fpr_init[1], &fpr_final[1]);
1076  break;
1077  }
1078  DDUMP("fpr_final =", &fpr_final, 2*len);
1079  /*
1080  * XXX fixme
1081  *
1082  * A possible optimization would be to drop fpr_final and directly
1083  * use the storage from the saved context i.e., the actual final
1084  * destination (pt_regs, switch_stack or thread structure).
1085  */
1086  setfpreg(ld.r1, &fpr_final[0], regs);
1087  setfpreg(ld.imm, &fpr_final[1], regs);
1088  }
1089 
1090  /*
1091  * Check for updates: only immediate updates are available for this
1092  * instruction.
1093  */
1094  if (ld.m) {
1095  /*
1096  * the immediate is implicit given the ldsz of the operation:
1097  * single: 8 (2x4) and for all others it's 16 (2x8)
1098  */
1099  ifa += len<<1;
1100 
1101  /*
1102  * IMPORTANT:
1103  * the fact that we force the NaT of r3 to zero is ONLY valid
1104  * as long as we don't come here with a ldfpX.s.
1105  * For this reason we keep this sanity check
1106  */
1107  if (ld.x6_op == 1 || ld.x6_op == 3)
1108  printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1109  __func__);
1110 
1111  setreg(ld.r3, ifa, 0, regs);
1112  }
1113 
1114  /*
1115  * Invalidate ALAT entries, if any, for both registers.
1116  */
1117  if (ld.x6_op == 0x2) {
1118  invala_fr(ld.r1);
1119  invala_fr(ld.imm);
1120  }
1121  return 0;
1122 }
1123 
1124 
1125 static int
1126 emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1127 {
1128  struct ia64_fpreg fpr_init;
1129  struct ia64_fpreg fpr_final;
1130  unsigned long len = float_fsz[ld.x6_sz];
1131 
1132  /*
1133  * fr0 & fr1 don't need to be checked because Illegal Instruction
1134  * faults have higher priority than unaligned faults.
1135  *
1136  * r0 cannot be found as the base as it would never generate an
1137  * unaligned reference.
1138  */
1139 
1140  /*
1141  * make sure we get clean buffers
1142  */
1143  memset(&fpr_init,0, sizeof(fpr_init));
1144  memset(&fpr_final,0, sizeof(fpr_final));
1145 
1146  /*
1147  * ldfX.a we don't try to emulate anything but we must
1148  * invalidate the ALAT entry.
1149  * See comments in ldX for descriptions on how the various loads are handled.
1150  */
1151  if (ld.x6_op != 0x2) {
1152  if (copy_from_user(&fpr_init, (void __user *) ifa, len))
1153  return -1;
1154 
1155  DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1156  DDUMP("fpr_init =", &fpr_init, len);
1157  /*
1158  * we only do something for x6_op={0,8,9}
1159  */
1160  switch( ld.x6_sz ) {
1161  case 0:
1162  mem2float_extended(&fpr_init, &fpr_final);
1163  break;
1164  case 1:
1165  mem2float_integer(&fpr_init, &fpr_final);
1166  break;
1167  case 2:
1168  mem2float_single(&fpr_init, &fpr_final);
1169  break;
1170  case 3:
1171  mem2float_double(&fpr_init, &fpr_final);
1172  break;
1173  }
1174  DDUMP("fpr_final =", &fpr_final, len);
1175  /*
1176  * XXX fixme
1177  *
1178  * A possible optimization would be to drop fpr_final and directly
1179  * use the storage from the saved context i.e., the actual final
1180  * destination (pt_regs, switch_stack or thread structure).
1181  */
1182  setfpreg(ld.r1, &fpr_final, regs);
1183  }
1184 
1185  /*
1186  * check for updates on any loads
1187  */
1188  if (ld.op == 0x7 || ld.m)
1189  emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1190 
1191  /*
1192  * invalidate ALAT entry in case of advanced floating point loads
1193  */
1194  if (ld.x6_op == 0x2)
1195  invala_fr(ld.r1);
1196 
1197  return 0;
1198 }
1199 
1200 
1201 static int
1202 emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1203 {
1204  struct ia64_fpreg fpr_init;
1205  struct ia64_fpreg fpr_final;
1206  unsigned long len = float_fsz[ld.x6_sz];
1207 
1208  /*
1209  * make sure we get clean buffers
1210  */
1211  memset(&fpr_init,0, sizeof(fpr_init));
1212  memset(&fpr_final,0, sizeof(fpr_final));
1213 
1214  /*
1215  * if we get to this handler, Nat bits on both r3 and r2 have already
1216  * been checked. so we don't need to do it
1217  *
1218  * extract the value to be stored
1219  */
1220  getfpreg(ld.imm, &fpr_init, regs);
1221  /*
1222  * during this step, we extract the spilled registers from the saved
1223  * context i.e., we refill. Then we store (no spill) to temporary
1224  * aligned location
1225  */
1226  switch( ld.x6_sz ) {
1227  case 0:
1228  float2mem_extended(&fpr_init, &fpr_final);
1229  break;
1230  case 1:
1231  float2mem_integer(&fpr_init, &fpr_final);
1232  break;
1233  case 2:
1234  float2mem_single(&fpr_init, &fpr_final);
1235  break;
1236  case 3:
1237  float2mem_double(&fpr_init, &fpr_final);
1238  break;
1239  }
1240  DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1241  DDUMP("fpr_init =", &fpr_init, len);
1242  DDUMP("fpr_final =", &fpr_final, len);
1243 
1244  if (copy_to_user((void __user *) ifa, &fpr_final, len))
1245  return -1;
1246 
1247  /*
1248  * stfX [r3]=r2,imm(9)
1249  *
1250  * NOTE:
1251  * ld.r3 can never be r0, because r0 would not generate an
1252  * unaligned access.
1253  */
1254  if (ld.op == 0x7) {
1255  unsigned long imm;
1256 
1257  /*
1258  * form imm9: [12:6] contain first 7bits
1259  */
1260  imm = ld.x << 7 | ld.r1;
1261  /*
1262  * sign extend (8bits) if m set
1263  */
1264  if (ld.m)
1265  imm |= SIGN_EXT9;
1266  /*
1267  * ifa == r3 (NaT is necessarily cleared)
1268  */
1269  ifa += imm;
1270 
1271  DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1272 
1273  setreg(ld.r3, ifa, 0, regs);
1274  }
1275  /*
1276  * we don't have alat_invalidate_multiple() so we need
1277  * to do the complete flush :-<<
1278  */
1279  ia64_invala();
1280 
1281  return 0;
1282 }
1283 
1284 /*
1285  * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1286  * eventually fix the program. However, we don't want to do that for every access so we
1287  * pace it with jiffies.
1288  */
1289 static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5);
1290 
1291 void
1292 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1293 {
1294  struct ia64_psr *ipsr = ia64_psr(regs);
1295  mm_segment_t old_fs = get_fs();
1296  unsigned long bundle[2];
1297  unsigned long opcode;
1298  struct siginfo si;
1299  const struct exception_table_entry *eh = NULL;
1300  union {
1301  unsigned long l;
1303  } u;
1304  int ret = -1;
1305 
1306  if (ia64_psr(regs)->be) {
1307  /* we don't support big-endian accesses */
1308  if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
1309  return;
1310  goto force_sigbus;
1311  }
1312 
1313  /*
1314  * Treat kernel accesses for which there is an exception handler entry the same as
1315  * user-level unaligned accesses. Otherwise, a clever program could trick this
1316  * handler into reading an arbitrary kernel addresses...
1317  */
1318  if (!user_mode(regs))
1319  eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
1320  if (user_mode(regs) || eh) {
1321  if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1322  goto force_sigbus;
1323 
1324  if (!no_unaligned_warning &&
1325  !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
1326  __ratelimit(&logging_rate_limit))
1327  {
1328  char buf[200]; /* comm[] is at most 16 bytes... */
1329  size_t len;
1330 
1331  len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1332  "ip=0x%016lx\n\r", current->comm,
1333  task_pid_nr(current),
1334  ifa, regs->cr_iip + ipsr->ri);
1335  /*
1336  * Don't call tty_write_message() if we're in the kernel; we might
1337  * be holding locks...
1338  */
1339  if (user_mode(regs))
1340  tty_write_message(current->signal->tty, buf);
1341  buf[len-1] = '\0'; /* drop '\r' */
1342  /* watch for command names containing %s */
1343  printk(KERN_WARNING "%s", buf);
1344  } else {
1345  if (no_unaligned_warning) {
1346  printk_once(KERN_WARNING "%s(%d) encountered an "
1347  "unaligned exception which required\n"
1348  "kernel assistance, which degrades "
1349  "the performance of the application.\n"
1350  "Unaligned exception warnings have "
1351  "been disabled by the system "
1352  "administrator\n"
1353  "echo 0 > /proc/sys/kernel/ignore-"
1354  "unaligned-usertrap to re-enable\n",
1355  current->comm, task_pid_nr(current));
1356  }
1357  }
1358  } else {
1359  if (__ratelimit(&logging_rate_limit)) {
1360  printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1361  ifa, regs->cr_iip + ipsr->ri);
1362  if (unaligned_dump_stack)
1363  dump_stack();
1364  }
1365  set_fs(KERNEL_DS);
1366  }
1367 
1368  DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1369  regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1370 
1371  if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
1372  goto failure;
1373 
1374  /*
1375  * extract the instruction from the bundle given the slot number
1376  */
1377  switch (ipsr->ri) {
1378  case 0: u.l = (bundle[0] >> 5); break;
1379  case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1380  case 2: u.l = (bundle[1] >> 23); break;
1381  }
1382  opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1383 
1384  DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1385  "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1386  u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1387 
1388  /*
1389  * IMPORTANT:
1390  * Notice that the switch statement DOES not cover all possible instructions
1391  * that DO generate unaligned references. This is made on purpose because for some
1392  * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1393  * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1394  * the program will get a signal and die:
1395  *
1396  * load/store:
1397  * - ldX.spill
1398  * - stX.spill
1399  * Reason: RNATs are based on addresses
1400  * - ld16
1401  * - st16
1402  * Reason: ld16 and st16 are supposed to occur in a single
1403  * memory op
1404  *
1405  * synchronization:
1406  * - cmpxchg
1407  * - fetchadd
1408  * - xchg
1409  * Reason: ATOMIC operations cannot be emulated properly using multiple
1410  * instructions.
1411  *
1412  * speculative loads:
1413  * - ldX.sZ
1414  * Reason: side effects, code must be ready to deal with failure so simpler
1415  * to let the load fail.
1416  * ---------------------------------------------------------------------------------
1417  * XXX fixme
1418  *
1419  * I would like to get rid of this switch case and do something
1420  * more elegant.
1421  */
1422  switch (opcode) {
1423  case LDS_OP:
1424  case LDSA_OP:
1425  if (u.insn.x)
1426  /* oops, really a semaphore op (cmpxchg, etc) */
1427  goto failure;
1428  /* no break */
1429  case LDS_IMM_OP:
1430  case LDSA_IMM_OP:
1431  case LDFS_OP:
1432  case LDFSA_OP:
1433  case LDFS_IMM_OP:
1434  /*
1435  * The instruction will be retried with deferred exceptions turned on, and
1436  * we should get Nat bit installed
1437  *
1438  * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1439  * are actually executed even though the operation failed. So we don't
1440  * need to take care of this.
1441  */
1442  DPRINT("forcing PSR_ED\n");
1443  regs->cr_ipsr |= IA64_PSR_ED;
1444  goto done;
1445 
1446  case LD_OP:
1447  case LDA_OP:
1448  case LDBIAS_OP:
1449  case LDACQ_OP:
1450  case LDCCLR_OP:
1451  case LDCNC_OP:
1452  case LDCCLRACQ_OP:
1453  if (u.insn.x)
1454  /* oops, really a semaphore op (cmpxchg, etc) */
1455  goto failure;
1456  /* no break */
1457  case LD_IMM_OP:
1458  case LDA_IMM_OP:
1459  case LDBIAS_IMM_OP:
1460  case LDACQ_IMM_OP:
1461  case LDCCLR_IMM_OP:
1462  case LDCNC_IMM_OP:
1463  case LDCCLRACQ_IMM_OP:
1464  ret = emulate_load_int(ifa, u.insn, regs);
1465  break;
1466 
1467  case ST_OP:
1468  case STREL_OP:
1469  if (u.insn.x)
1470  /* oops, really a semaphore op (cmpxchg, etc) */
1471  goto failure;
1472  /* no break */
1473  case ST_IMM_OP:
1474  case STREL_IMM_OP:
1475  ret = emulate_store_int(ifa, u.insn, regs);
1476  break;
1477 
1478  case LDF_OP:
1479  case LDFA_OP:
1480  case LDFCCLR_OP:
1481  case LDFCNC_OP:
1482  if (u.insn.x)
1483  ret = emulate_load_floatpair(ifa, u.insn, regs);
1484  else
1485  ret = emulate_load_float(ifa, u.insn, regs);
1486  break;
1487 
1488  case LDF_IMM_OP:
1489  case LDFA_IMM_OP:
1490  case LDFCCLR_IMM_OP:
1491  case LDFCNC_IMM_OP:
1492  ret = emulate_load_float(ifa, u.insn, regs);
1493  break;
1494 
1495  case STF_OP:
1496  case STF_IMM_OP:
1497  ret = emulate_store_float(ifa, u.insn, regs);
1498  break;
1499 
1500  default:
1501  goto failure;
1502  }
1503  DPRINT("ret=%d\n", ret);
1504  if (ret)
1505  goto failure;
1506 
1507  if (ipsr->ri == 2)
1508  /*
1509  * given today's architecture this case is not likely to happen because a
1510  * memory access instruction (M) can never be in the last slot of a
1511  * bundle. But let's keep it for now.
1512  */
1513  regs->cr_iip += 16;
1514  ipsr->ri = (ipsr->ri + 1) & 0x3;
1515 
1516  DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1517  done:
1518  set_fs(old_fs); /* restore original address limit */
1519  return;
1520 
1521  failure:
1522  /* something went wrong... */
1523  if (!user_mode(regs)) {
1524  if (eh) {
1525  ia64_handle_exception(regs, eh);
1526  goto done;
1527  }
1528  if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
1529  return;
1530  /* NOT_REACHED */
1531  }
1532  force_sigbus:
1533  si.si_signo = SIGBUS;
1534  si.si_errno = 0;
1535  si.si_code = BUS_ADRALN;
1536  si.si_addr = (void __user *) ifa;
1537  si.si_flags = 0;
1538  si.si_isr = 0;
1539  si.si_imm = 0;
1540  force_sig_info(SIGBUS, &si, current);
1541  goto done;
1542 }