Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
gup.c
Go to the documentation of this file.
1 /*
2  * Lockless get_user_pages_fast for s390
3  *
4  * Copyright IBM Corp. 2010
5  * Author(s): Martin Schwidefsky <[email protected]>
6  */
7 #include <linux/sched.h>
8 #include <linux/mm.h>
9 #include <linux/hugetlb.h>
10 #include <linux/vmstat.h>
11 #include <linux/pagemap.h>
12 #include <linux/rwsem.h>
13 #include <asm/pgtable.h>
14 
15 /*
16  * The performance critical leaf functions are made noinline otherwise gcc
17  * inlines everything into a single function which results in too much
18  * register pressure.
19  */
20 static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
21  unsigned long end, int write, struct page **pages, int *nr)
22 {
23  unsigned long mask;
24  pte_t *ptep, pte;
25  struct page *page;
26 
27  mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
28 
29  ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
30  do {
31  pte = *ptep;
32  barrier();
33  if ((pte_val(pte) & mask) != 0)
34  return 0;
35  VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
36  page = pte_page(pte);
37  if (!page_cache_get_speculative(page))
38  return 0;
39  if (unlikely(pte_val(pte) != pte_val(*ptep))) {
40  put_page(page);
41  return 0;
42  }
43  pages[*nr] = page;
44  (*nr)++;
45 
46  } while (ptep++, addr += PAGE_SIZE, addr != end);
47 
48  return 1;
49 }
50 
51 static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
52  unsigned long end, int write, struct page **pages, int *nr)
53 {
54  unsigned long mask, result;
55  struct page *head, *page, *tail;
56  int refs;
57 
58  result = write ? 0 : _SEGMENT_ENTRY_RO;
59  mask = result | _SEGMENT_ENTRY_INV;
60  if ((pmd_val(pmd) & mask) != result)
61  return 0;
63 
64  refs = 0;
65  head = pmd_page(pmd);
66  page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
67  tail = page;
68  do {
69  VM_BUG_ON(compound_head(page) != head);
70  pages[*nr] = page;
71  (*nr)++;
72  page++;
73  refs++;
74  } while (addr += PAGE_SIZE, addr != end);
75 
76  if (!page_cache_add_speculative(head, refs)) {
77  *nr -= refs;
78  return 0;
79  }
80 
81  if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
82  *nr -= refs;
83  while (refs--)
84  put_page(head);
85  return 0;
86  }
87 
88  /*
89  * Any tail page need their mapcount reference taken before we
90  * return.
91  */
92  while (refs--) {
93  if (PageTail(tail))
94  get_huge_page_tail(tail);
95  tail++;
96  }
97 
98  return 1;
99 }
100 
101 
102 static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
103  unsigned long end, int write, struct page **pages, int *nr)
104 {
105  unsigned long next;
106  pmd_t *pmdp, pmd;
107 
108  pmdp = (pmd_t *) pudp;
109 #ifdef CONFIG_64BIT
110  if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
111  pmdp = (pmd_t *) pud_deref(pud);
112  pmdp += pmd_index(addr);
113 #endif
114  do {
115  pmd = *pmdp;
116  barrier();
117  next = pmd_addr_end(addr, end);
118  /*
119  * The pmd_trans_splitting() check below explains why
120  * pmdp_splitting_flush() has to serialize with
121  * smp_call_function() against our disabled IRQs, to stop
122  * this gup-fast code from running while we set the
123  * splitting bit in the pmd. Returning zero will take
124  * the slow path that will call wait_split_huge_page()
125  * if the pmd is still in splitting state.
126  */
127  if (pmd_none(pmd) || pmd_trans_splitting(pmd))
128  return 0;
129  if (unlikely(pmd_large(pmd))) {
130  if (!gup_huge_pmd(pmdp, pmd, addr, next,
131  write, pages, nr))
132  return 0;
133  } else if (!gup_pte_range(pmdp, pmd, addr, next,
134  write, pages, nr))
135  return 0;
136  } while (pmdp++, addr = next, addr != end);
137 
138  return 1;
139 }
140 
141 static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
142  unsigned long end, int write, struct page **pages, int *nr)
143 {
144  unsigned long next;
145  pud_t *pudp, pud;
146 
147  pudp = (pud_t *) pgdp;
148 #ifdef CONFIG_64BIT
149  if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
150  pudp = (pud_t *) pgd_deref(pgd);
151  pudp += pud_index(addr);
152 #endif
153  do {
154  pud = *pudp;
155  barrier();
156  next = pud_addr_end(addr, end);
157  if (pud_none(pud))
158  return 0;
159  if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
160  return 0;
161  } while (pudp++, addr = next, addr != end);
162 
163  return 1;
164 }
165 
166 /*
167  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
168  * back to the regular GUP.
169  */
170 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
171  struct page **pages)
172 {
173  struct mm_struct *mm = current->mm;
174  unsigned long addr, len, end;
175  unsigned long next, flags;
176  pgd_t *pgdp, pgd;
177  int nr = 0;
178 
179  start &= PAGE_MASK;
180  addr = start;
181  len = (unsigned long) nr_pages << PAGE_SHIFT;
182  end = start + len;
183  if ((end < start) || (end > TASK_SIZE))
184  return 0;
185 
186  local_irq_save(flags);
187  pgdp = pgd_offset(mm, addr);
188  do {
189  pgd = *pgdp;
190  barrier();
191  next = pgd_addr_end(addr, end);
192  if (pgd_none(pgd))
193  break;
194  if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
195  break;
196  } while (pgdp++, addr = next, addr != end);
197  local_irq_restore(flags);
198 
199  return nr;
200 }
201 
218 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
219  struct page **pages)
220 {
221  struct mm_struct *mm = current->mm;
222  unsigned long addr, len, end;
223  unsigned long next;
224  pgd_t *pgdp, pgd;
225  int nr = 0;
226 
227  start &= PAGE_MASK;
228  addr = start;
229  len = (unsigned long) nr_pages << PAGE_SHIFT;
230  end = start + len;
231  if ((end < start) || (end > TASK_SIZE))
232  goto slow_irqon;
233 
234  /*
235  * local_irq_disable() doesn't prevent pagetable teardown, but does
236  * prevent the pagetables from being freed on s390.
237  *
238  * So long as we atomically load page table pointers versus teardown,
239  * we can follow the address down to the the page and take a ref on it.
240  */
242  pgdp = pgd_offset(mm, addr);
243  do {
244  pgd = *pgdp;
245  barrier();
246  next = pgd_addr_end(addr, end);
247  if (pgd_none(pgd))
248  goto slow;
249  if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
250  goto slow;
251  } while (pgdp++, addr = next, addr != end);
253 
254  VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
255  return nr;
256 
257  {
258  int ret;
259 slow:
261 slow_irqon:
262  /* Try to get the remaining pages with get_user_pages */
263  start += nr << PAGE_SHIFT;
264  pages += nr;
265 
266  down_read(&mm->mmap_sem);
267  ret = get_user_pages(current, mm, start,
268  (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
269  up_read(&mm->mmap_sem);
270 
271  /* Have to be a bit careful with return values */
272  if (nr > 0) {
273  if (ret < 0)
274  ret = nr;
275  else
276  ret += nr;
277  }
278 
279  return ret;
280  }
281 }