Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
usercopy.c
Go to the documentation of this file.
1 /*
2  * User address space access functions.
3  * The non-inlined parts of asm-cris/uaccess.h are here.
4  *
5  * Copyright (C) 2000, 2003 Axis Communications AB.
6  *
7  * Written by Hans-Peter Nilsson.
8  * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
9  */
10 
11 #include <asm/uaccess.h>
12 
13 /* Asm:s have been tweaked (within the domain of correctness) to give
14  satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32".
15 
16  Check regularly...
17 
18  Note that for CRISv32, the PC saved at a bus-fault is the address
19  *at* the faulting instruction, with a special case for instructions
20  in delay slots: then it's the address of the branch. Note also that
21  in contrast to v10, a postincrement in the instruction is *not*
22  performed at a bus-fault; the register is seen having the original
23  value in fault handlers. */
24 
25 
26 /* Copy to userspace. This is based on the memcpy used for
27  kernel-to-kernel copying; see "string.c". */
28 
29 unsigned long
30 __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
31 {
32  /* We want the parameters put in special registers.
33  Make sure the compiler is able to make something useful of this.
34  As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
35 
36  FIXME: Comment for old gcc version. Check.
37  If gcc was alright, it really would need no temporaries, and no
38  stack space to save stuff on. */
39 
40  register char *dst __asm__ ("r13") = pdst;
41  register const char *src __asm__ ("r11") = psrc;
42  register int n __asm__ ("r12") = pn;
43  register int retn __asm__ ("r10") = 0;
44 
45 
46  /* When src is aligned but not dst, this makes a few extra needless
47  cycles. I believe it would take as many to check that the
48  re-alignment was unnecessary. */
49  if (((unsigned long) dst & 3) != 0
50  /* Don't align if we wouldn't copy more than a few bytes; so we
51  don't have to check further for overflows. */
52  && n >= 3)
53  {
54  if ((unsigned long) dst & 1)
55  {
56  __asm_copy_to_user_1 (dst, src, retn);
57  n--;
58  }
59 
60  if ((unsigned long) dst & 2)
61  {
62  __asm_copy_to_user_2 (dst, src, retn);
63  n -= 2;
64  }
65  }
66 
67  /* Movem is dirt cheap. The overheap is low enough to always use the
68  minimum possible block size as the threshold. */
69  if (n >= 44)
70  {
71  /* For large copies we use 'movem'. */
72 
73  /* It is not optimal to tell the compiler about clobbering any
74  registers; that will move the saving/restoring of those registers
75  to the function prologue/epilogue, and make non-movem sizes
76  suboptimal. */
77  __asm__ volatile ("\
78  ;; Check that the register asm declaration got right. \n\
79  ;; The GCC manual explicitly says TRT will happen. \n\
80  .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
81  .err \n\
82  .endif \n\
83  \n\
84  ;; Save the registers we'll use in the movem process \n\
85  ;; on the stack. \n\
86  subq 11*4,$sp \n\
87  movem $r10,[$sp] \n\
88  \n\
89  ;; Now we've got this: \n\
90  ;; r11 - src \n\
91  ;; r13 - dst \n\
92  ;; r12 - n \n\
93  \n\
94  ;; Update n for the first loop \n\
95  subq 44,$r12 \n\
96 0: \n\
97  movem [$r11+],$r10 \n\
98  subq 44,$r12 \n\
99 1: bge 0b \n\
100  movem $r10,[$r13+] \n\
101 3: \n\
102  addq 44,$r12 ;; compensate for last loop underflowing n \n\
103  \n\
104  ;; Restore registers from stack \n\
105  movem [$sp+],$r10 \n\
106 2: \n\
107  .section .fixup,\"ax\" \n\
108 4: \n\
109 ; When failing on any of the 1..44 bytes in a chunk, we adjust back the \n\
110 ; source pointer and just drop through to the by-16 and by-4 loops to \n\
111 ; get the correct number of failing bytes. This necessarily means a \n\
112 ; few extra exceptions, but invalid user pointers shouldn't happen in \n\
113 ; time-critical code anyway. \n\
114  jump 3b \n\
115  subq 44,$r11 \n\
116  \n\
117  .previous \n\
118  .section __ex_table,\"a\" \n\
119  .dword 1b,4b \n\
120  .previous"
121 
122  /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
123  /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
124 
125  }
126 
127  while (n >= 16)
128  {
129  __asm_copy_to_user_16 (dst, src, retn);
130  n -= 16;
131  }
132 
133  /* Having a separate by-four loops cuts down on cache footprint.
134  FIXME: Test with and without; increasing switch to be 0..15. */
135  while (n >= 4)
136  {
137  __asm_copy_to_user_4 (dst, src, retn);
138  n -= 4;
139  }
140 
141  switch (n)
142  {
143  case 0:
144  break;
145  case 1:
146  __asm_copy_to_user_1 (dst, src, retn);
147  break;
148  case 2:
149  __asm_copy_to_user_2 (dst, src, retn);
150  break;
151  case 3:
152  __asm_copy_to_user_3 (dst, src, retn);
153  break;
154  }
155 
156  return retn;
157 }
158 
159 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
160  userland. The return-value is the number of bytes that were
161  inaccessible. */
162 
163 unsigned long
164 __copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
165 {
166  /* We want the parameters put in special registers.
167  Make sure the compiler is able to make something useful of this.
168  As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
169 
170  FIXME: Comment for old gcc version. Check.
171  If gcc was alright, it really would need no temporaries, and no
172  stack space to save stuff on. */
173 
174  register char *dst __asm__ ("r13") = pdst;
175  register const char *src __asm__ ("r11") = psrc;
176  register int n __asm__ ("r12") = pn;
177  register int retn __asm__ ("r10") = 0;
178 
179  /* The best reason to align src is that we then know that a read-fault
180  was for aligned bytes; there's no 1..3 remaining good bytes to
181  pickle. */
182  if (((unsigned long) src & 3) != 0)
183  {
184  if (((unsigned long) src & 1) && n != 0)
185  {
186  __asm_copy_from_user_1 (dst, src, retn);
187  n--;
188  }
189 
190  if (((unsigned long) src & 2) && n >= 2)
191  {
192  __asm_copy_from_user_2 (dst, src, retn);
193  n -= 2;
194  }
195 
196  /* We only need one check after the unalignment-adjustments, because
197  if both adjustments were done, either both or neither reference
198  had an exception. */
199  if (retn != 0)
200  goto copy_exception_bytes;
201  }
202 
203  /* Movem is dirt cheap. The overheap is low enough to always use the
204  minimum possible block size as the threshold. */
205  if (n >= 44)
206  {
207  /* It is not optimal to tell the compiler about clobbering any
208  registers; that will move the saving/restoring of those registers
209  to the function prologue/epilogue, and make non-movem sizes
210  suboptimal. */
211  __asm__ volatile ("\
212  .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
213  .err \n\
214  .endif \n\
215  \n\
216  ;; Save the registers we'll use in the movem process \n\
217  ;; on the stack. \n\
218  subq 11*4,$sp \n\
219  movem $r10,[$sp] \n\
220  \n\
221  ;; Now we've got this: \n\
222  ;; r11 - src \n\
223  ;; r13 - dst \n\
224  ;; r12 - n \n\
225  \n\
226  ;; Update n for the first loop \n\
227  subq 44,$r12 \n\
228 0: \n\
229  movem [$r11+],$r10 \n\
230  \n\
231  subq 44,$r12 \n\
232  bge 0b \n\
233  movem $r10,[$r13+] \n\
234  \n\
235 4: \n\
236  addq 44,$r12 ;; compensate for last loop underflowing n \n\
237  \n\
238  ;; Restore registers from stack \n\
239  movem [$sp+],$r10 \n\
240  .section .fixup,\"ax\" \n\
241  \n\
242 ;; Do not jump back into the loop if we fail. For some uses, we get a \n\
243 ;; page fault somewhere on the line. Without checking for page limits, \n\
244 ;; we don't know where, but we need to copy accurately and keep an \n\
245 ;; accurate count; not just clear the whole line. To do that, we fall \n\
246 ;; down in the code below, proceeding with smaller amounts. It should \n\
247 ;; be kept in mind that we have to cater to code like what at one time \n\
248 ;; was in fs/super.c: \n\
249 ;; i = size - copy_from_user((void *)page, data, size); \n\
250 ;; which would cause repeated faults while clearing the remainder of \n\
251 ;; the SIZE bytes at PAGE after the first fault. \n\
252 ;; A caveat here is that we must not fall through from a failing page \n\
253 ;; to a valid page. \n\
254  \n\
255 3: \n\
256  jump 4b ;; Fall through, pretending the fault didn't happen. \n\
257  nop \n\
258  \n\
259  .previous \n\
260  .section __ex_table,\"a\" \n\
261  .dword 0b,3b \n\
262  .previous"
263 
264  /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
265  /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
266  }
267 
268  /* Either we directly start copying here, using dword copying in a loop,
269  or we copy as much as possible with 'movem' and then the last block
270  (<44 bytes) is copied here. This will work since 'movem' will have
271  updated src, dst and n. (Except with failing src.)
272 
273  Since we want to keep src accurate, we can't use
274  __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
275  retn, but not src (by design; it's value is ignored elsewhere). */
276 
277  while (n >= 4)
278  {
279  __asm_copy_from_user_4 (dst, src, retn);
280  n -= 4;
281 
282  if (retn)
283  goto copy_exception_bytes;
284  }
285 
286  /* If we get here, there were no memory read faults. */
287  switch (n)
288  {
289  /* These copies are at least "naturally aligned" (so we don't have
290  to check each byte), due to the src alignment code before the
291  movem loop. The *_3 case *will* get the correct count for retn. */
292  case 0:
293  /* This case deliberately left in (if you have doubts check the
294  generated assembly code). */
295  break;
296  case 1:
297  __asm_copy_from_user_1 (dst, src, retn);
298  break;
299  case 2:
300  __asm_copy_from_user_2 (dst, src, retn);
301  break;
302  case 3:
303  __asm_copy_from_user_3 (dst, src, retn);
304  break;
305  }
306 
307  /* If we get here, retn correctly reflects the number of failing
308  bytes. */
309  return retn;
310 
311 copy_exception_bytes:
312  /* We already have "retn" bytes cleared, and need to clear the
313  remaining "n" bytes. A non-optimized simple byte-for-byte in-line
314  memset is preferred here, since this isn't speed-critical code and
315  we'd rather have this a leaf-function than calling memset. */
316  {
317  char *endp;
318  for (endp = dst + n; dst < endp; dst++)
319  *dst = 0;
320  }
321 
322  return retn + n;
323 }
324 
325 /* Zero userspace. */
326 
327 unsigned long
328 __do_clear_user (void __user *pto, unsigned long pn)
329 {
330  /* We want the parameters put in special registers.
331  Make sure the compiler is able to make something useful of this.
332  As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
333 
334  FIXME: Comment for old gcc version. Check.
335  If gcc was alright, it really would need no temporaries, and no
336  stack space to save stuff on. */
337 
338  register char *dst __asm__ ("r13") = pto;
339  register int n __asm__ ("r12") = pn;
340  register int retn __asm__ ("r10") = 0;
341 
342 
343  if (((unsigned long) dst & 3) != 0
344  /* Don't align if we wouldn't copy more than a few bytes. */
345  && n >= 3)
346  {
347  if ((unsigned long) dst & 1)
348  {
349  __asm_clear_1 (dst, retn);
350  n--;
351  }
352 
353  if ((unsigned long) dst & 2)
354  {
355  __asm_clear_2 (dst, retn);
356  n -= 2;
357  }
358  }
359 
360  /* Decide which copying method to use.
361  FIXME: This number is from the "ordinary" kernel memset. */
362  if (n >= 48)
363  {
364  /* For large clears we use 'movem' */
365 
366  /* It is not optimal to tell the compiler about clobbering any
367  call-saved registers; that will move the saving/restoring of
368  those registers to the function prologue/epilogue, and make
369  non-movem sizes suboptimal.
370 
371  This method is not foolproof; it assumes that the "asm reg"
372  declarations at the beginning of the function really are used
373  here (beware: they may be moved to temporary registers).
374  This way, we do not have to save/move the registers around into
375  temporaries; we can safely use them straight away.
376 
377  If you want to check that the allocation was right; then
378  check the equalities in the first comment. It should say
379  something like "r13=r13, r11=r11, r12=r12". */
380  __asm__ volatile ("\
381  .ifnc %0%1%2,$r13$r12$r10 \n\
382  .err \n\
383  .endif \n\
384  \n\
385  ;; Save the registers we'll clobber in the movem process \n\
386  ;; on the stack. Don't mention them to gcc, it will only be \n\
387  ;; upset. \n\
388  subq 11*4,$sp \n\
389  movem $r10,[$sp] \n\
390  \n\
391  clear.d $r0 \n\
392  clear.d $r1 \n\
393  clear.d $r2 \n\
394  clear.d $r3 \n\
395  clear.d $r4 \n\
396  clear.d $r5 \n\
397  clear.d $r6 \n\
398  clear.d $r7 \n\
399  clear.d $r8 \n\
400  clear.d $r9 \n\
401  clear.d $r10 \n\
402  clear.d $r11 \n\
403  \n\
404  ;; Now we've got this: \n\
405  ;; r13 - dst \n\
406  ;; r12 - n \n\
407  \n\
408  ;; Update n for the first loop \n\
409  subq 12*4,$r12 \n\
410 0: \n\
411  subq 12*4,$r12 \n\
412 1: \n\
413  bge 0b \n\
414  movem $r11,[$r13+] \n\
415  \n\
416  addq 12*4,$r12 ;; compensate for last loop underflowing n \n\
417  \n\
418  ;; Restore registers from stack \n\
419  movem [$sp+],$r10 \n\
420 2: \n\
421  .section .fixup,\"ax\" \n\
422 3: \n\
423  movem [$sp],$r10 \n\
424  addq 12*4,$r10 \n\
425  addq 12*4,$r13 \n\
426  movem $r10,[$sp] \n\
427  jump 0b \n\
428  clear.d $r10 \n\
429  \n\
430  .previous \n\
431  .section __ex_table,\"a\" \n\
432  .dword 1b,3b \n\
433  .previous"
434 
435  /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
436  /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
437  /* Clobber */ : "r11");
438  }
439 
440  while (n >= 16)
441  {
442  __asm_clear_16 (dst, retn);
443  n -= 16;
444  }
445 
446  /* Having a separate by-four loops cuts down on cache footprint.
447  FIXME: Test with and without; increasing switch to be 0..15. */
448  while (n >= 4)
449  {
450  __asm_clear_4 (dst, retn);
451  n -= 4;
452  }
453 
454  switch (n)
455  {
456  case 0:
457  break;
458  case 1:
459  __asm_clear_1 (dst, retn);
460  break;
461  case 2:
462  __asm_clear_2 (dst, retn);
463  break;
464  case 3:
465  __asm_clear_3 (dst, retn);
466  break;
467  }
468 
469  return retn;
470 }