Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
shmem.c
Go to the documentation of this file.
1 /*
2  * Resizable virtual memory filesystem for Linux.
3  *
4  * Copyright (C) 2000 Linus Torvalds.
5  * 2000 Transmeta Corp.
6  * 2000-2001 Christoph Rohland
7  * 2000-2001 SAP AG
8  * 2002 Red Hat Inc.
9  * Copyright (C) 2002-2011 Hugh Dickins.
10  * Copyright (C) 2011 Google Inc.
11  * Copyright (C) 2002-2005 VERITAS Software Corporation.
12  * Copyright (C) 2004 Andi Kleen, SuSE Labs
13  *
14  * Extended attribute support for tmpfs:
15  * Copyright (c) 2004, Luke Kenneth Casson Leighton <[email protected]>
16  * Copyright (c) 2004 Red Hat, Inc., James Morris <[email protected]>
17  *
18  * tiny-shmem:
19  * Copyright (c) 2004, 2008 Matt Mackall <[email protected]>
20  *
21  * This file is released under the GPL.
22  */
23 
24 #include <linux/fs.h>
25 #include <linux/init.h>
26 #include <linux/vfs.h>
27 #include <linux/mount.h>
28 #include <linux/pagemap.h>
29 #include <linux/file.h>
30 #include <linux/mm.h>
31 #include <linux/export.h>
32 #include <linux/swap.h>
33 
34 static struct vfsmount *shm_mnt;
35 
36 #ifdef CONFIG_SHMEM
37 /*
38  * This virtual memory filesystem is heavily based on the ramfs. It
39  * extends ramfs by the ability to use swap and honor resource limits
40  * which makes it a completely usable filesystem.
41  */
42 
43 #include <linux/xattr.h>
44 #include <linux/exportfs.h>
45 #include <linux/posix_acl.h>
46 #include <linux/generic_acl.h>
47 #include <linux/mman.h>
48 #include <linux/string.h>
49 #include <linux/slab.h>
50 #include <linux/backing-dev.h>
51 #include <linux/shmem_fs.h>
52 #include <linux/writeback.h>
53 #include <linux/blkdev.h>
54 #include <linux/pagevec.h>
55 #include <linux/percpu_counter.h>
56 #include <linux/falloc.h>
57 #include <linux/splice.h>
58 #include <linux/security.h>
59 #include <linux/swapops.h>
60 #include <linux/mempolicy.h>
61 #include <linux/namei.h>
62 #include <linux/ctype.h>
63 #include <linux/migrate.h>
64 #include <linux/highmem.h>
65 #include <linux/seq_file.h>
66 #include <linux/magic.h>
67 
68 #include <asm/uaccess.h>
69 #include <asm/pgtable.h>
70 
71 #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
72 #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
73 
74 /* Pretend that each entry is of this size in directory's i_size */
75 #define BOGO_DIRENT_SIZE 20
76 
77 /* Symlink up to this size is kmalloc'ed instead of using a swappable page */
78 #define SHORT_SYMLINK_LEN 128
79 
80 /*
81  * shmem_fallocate and shmem_writepage communicate via inode->i_private
82  * (with i_mutex making sure that it has only one user at a time):
83  * we would prefer not to enlarge the shmem inode just for that.
84  */
85 struct shmem_falloc {
86  pgoff_t start; /* start of range currently being fallocated */
87  pgoff_t next; /* the next page offset to be fallocated */
88  pgoff_t nr_falloced; /* how many new pages have been fallocated */
89  pgoff_t nr_unswapped; /* how often writepage refused to swap out */
90 };
91 
92 /* Flag allocation requirements to shmem_getpage */
93 enum sgp_type {
94  SGP_READ, /* don't exceed i_size, don't allocate page */
95  SGP_CACHE, /* don't exceed i_size, may allocate page */
96  SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
97  SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
98  SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
99 };
100 
101 #ifdef CONFIG_TMPFS
102 static unsigned long shmem_default_max_blocks(void)
103 {
104  return totalram_pages / 2;
105 }
106 
107 static unsigned long shmem_default_max_inodes(void)
108 {
109  return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
110 }
111 #endif
112 
113 static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
114 static int shmem_replace_page(struct page **pagep, gfp_t gfp,
116 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
117  struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
118 
119 static inline int shmem_getpage(struct inode *inode, pgoff_t index,
120  struct page **pagep, enum sgp_type sgp, int *fault_type)
121 {
122  return shmem_getpage_gfp(inode, index, pagep, sgp,
123  mapping_gfp_mask(inode->i_mapping), fault_type);
124 }
125 
126 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
127 {
128  return sb->s_fs_info;
129 }
130 
131 /*
132  * shmem_file_setup pre-accounts the whole fixed size of a VM object,
133  * for shared memory and for shared anonymous (/dev/zero) mappings
134  * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
135  * consistent with the pre-accounting of private mappings ...
136  */
137 static inline int shmem_acct_size(unsigned long flags, loff_t size)
138 {
139  return (flags & VM_NORESERVE) ?
140  0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
141 }
142 
143 static inline void shmem_unacct_size(unsigned long flags, loff_t size)
144 {
145  if (!(flags & VM_NORESERVE))
146  vm_unacct_memory(VM_ACCT(size));
147 }
148 
149 /*
150  * ... whereas tmpfs objects are accounted incrementally as
151  * pages are allocated, in order to allow huge sparse files.
152  * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
153  * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
154  */
155 static inline int shmem_acct_block(unsigned long flags)
156 {
157  return (flags & VM_NORESERVE) ?
159 }
160 
161 static inline void shmem_unacct_blocks(unsigned long flags, long pages)
162 {
163  if (flags & VM_NORESERVE)
164  vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
165 }
166 
167 static const struct super_operations shmem_ops;
168 static const struct address_space_operations shmem_aops;
169 static const struct file_operations shmem_file_operations;
170 static const struct inode_operations shmem_inode_operations;
171 static const struct inode_operations shmem_dir_inode_operations;
172 static const struct inode_operations shmem_special_inode_operations;
173 static const struct vm_operations_struct shmem_vm_ops;
174 
175 static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
176  .ra_pages = 0, /* No readahead */
178 };
179 
180 static LIST_HEAD(shmem_swaplist);
181 static DEFINE_MUTEX(shmem_swaplist_mutex);
182 
183 static int shmem_reserve_inode(struct super_block *sb)
184 {
185  struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
186  if (sbinfo->max_inodes) {
187  spin_lock(&sbinfo->stat_lock);
188  if (!sbinfo->free_inodes) {
189  spin_unlock(&sbinfo->stat_lock);
190  return -ENOSPC;
191  }
192  sbinfo->free_inodes--;
193  spin_unlock(&sbinfo->stat_lock);
194  }
195  return 0;
196 }
197 
198 static void shmem_free_inode(struct super_block *sb)
199 {
200  struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
201  if (sbinfo->max_inodes) {
202  spin_lock(&sbinfo->stat_lock);
203  sbinfo->free_inodes++;
204  spin_unlock(&sbinfo->stat_lock);
205  }
206 }
207 
220 static void shmem_recalc_inode(struct inode *inode)
221 {
222  struct shmem_inode_info *info = SHMEM_I(inode);
223  long freed;
224 
225  freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
226  if (freed > 0) {
227  struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
228  if (sbinfo->max_blocks)
229  percpu_counter_add(&sbinfo->used_blocks, -freed);
230  info->alloced -= freed;
231  inode->i_blocks -= freed * BLOCKS_PER_PAGE;
232  shmem_unacct_blocks(info->flags, freed);
233  }
234 }
235 
236 /*
237  * Replace item expected in radix tree by a new item, while holding tree lock.
238  */
239 static int shmem_radix_tree_replace(struct address_space *mapping,
240  pgoff_t index, void *expected, void *replacement)
241 {
242  void **pslot;
243  void *item = NULL;
244 
245  VM_BUG_ON(!expected);
246  pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
247  if (pslot)
248  item = radix_tree_deref_slot_protected(pslot,
249  &mapping->tree_lock);
250  if (item != expected)
251  return -ENOENT;
252  if (replacement)
253  radix_tree_replace_slot(pslot, replacement);
254  else
255  radix_tree_delete(&mapping->page_tree, index);
256  return 0;
257 }
258 
259 /*
260  * Sometimes, before we decide whether to proceed or to fail, we must check
261  * that an entry was not already brought back from swap by a racing thread.
262  *
263  * Checking page is not enough: by the time a SwapCache page is locked, it
264  * might be reused, and again be SwapCache, using the same swap as before.
265  */
266 static bool shmem_confirm_swap(struct address_space *mapping,
267  pgoff_t index, swp_entry_t swap)
268 {
269  void *item;
270 
271  rcu_read_lock();
272  item = radix_tree_lookup(&mapping->page_tree, index);
273  rcu_read_unlock();
274  return item == swp_to_radix_entry(swap);
275 }
276 
277 /*
278  * Like add_to_page_cache_locked, but error if expected item has gone.
279  */
280 static int shmem_add_to_page_cache(struct page *page,
281  struct address_space *mapping,
282  pgoff_t index, gfp_t gfp, void *expected)
283 {
284  int error;
285 
286  VM_BUG_ON(!PageLocked(page));
287  VM_BUG_ON(!PageSwapBacked(page));
288 
289  page_cache_get(page);
290  page->mapping = mapping;
291  page->index = index;
292 
293  spin_lock_irq(&mapping->tree_lock);
294  if (!expected)
295  error = radix_tree_insert(&mapping->page_tree, index, page);
296  else
297  error = shmem_radix_tree_replace(mapping, index, expected,
298  page);
299  if (!error) {
300  mapping->nrpages++;
301  __inc_zone_page_state(page, NR_FILE_PAGES);
302  __inc_zone_page_state(page, NR_SHMEM);
303  spin_unlock_irq(&mapping->tree_lock);
304  } else {
305  page->mapping = NULL;
306  spin_unlock_irq(&mapping->tree_lock);
307  page_cache_release(page);
308  }
309  return error;
310 }
311 
312 /*
313  * Like delete_from_page_cache, but substitutes swap for page.
314  */
315 static void shmem_delete_from_page_cache(struct page *page, void *radswap)
316 {
317  struct address_space *mapping = page->mapping;
318  int error;
319 
320  spin_lock_irq(&mapping->tree_lock);
321  error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
322  page->mapping = NULL;
323  mapping->nrpages--;
324  __dec_zone_page_state(page, NR_FILE_PAGES);
325  __dec_zone_page_state(page, NR_SHMEM);
326  spin_unlock_irq(&mapping->tree_lock);
327  page_cache_release(page);
328  BUG_ON(error);
329 }
330 
331 /*
332  * Like find_get_pages, but collecting swap entries as well as pages.
333  */
334 static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
335  pgoff_t start, unsigned int nr_pages,
336  struct page **pages, pgoff_t *indices)
337 {
338  unsigned int i;
339  unsigned int ret;
340  unsigned int nr_found;
341 
342  rcu_read_lock();
343 restart:
344  nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
345  (void ***)pages, indices, start, nr_pages);
346  ret = 0;
347  for (i = 0; i < nr_found; i++) {
348  struct page *page;
349 repeat:
350  page = radix_tree_deref_slot((void **)pages[i]);
351  if (unlikely(!page))
352  continue;
353  if (radix_tree_exception(page)) {
354  if (radix_tree_deref_retry(page))
355  goto restart;
356  /*
357  * Otherwise, we must be storing a swap entry
358  * here as an exceptional entry: so return it
359  * without attempting to raise page count.
360  */
361  goto export;
362  }
363  if (!page_cache_get_speculative(page))
364  goto repeat;
365 
366  /* Has the page moved? */
367  if (unlikely(page != *((void **)pages[i]))) {
368  page_cache_release(page);
369  goto repeat;
370  }
371 export:
372  indices[ret] = indices[i];
373  pages[ret] = page;
374  ret++;
375  }
376  if (unlikely(!ret && nr_found))
377  goto restart;
378  rcu_read_unlock();
379  return ret;
380 }
381 
382 /*
383  * Remove swap entry from radix tree, free the swap and its page cache.
384  */
385 static int shmem_free_swap(struct address_space *mapping,
386  pgoff_t index, void *radswap)
387 {
388  int error;
389 
390  spin_lock_irq(&mapping->tree_lock);
391  error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
392  spin_unlock_irq(&mapping->tree_lock);
393  if (!error)
394  free_swap_and_cache(radix_to_swp_entry(radswap));
395  return error;
396 }
397 
398 /*
399  * Pagevec may contain swap entries, so shuffle up pages before releasing.
400  */
401 static void shmem_deswap_pagevec(struct pagevec *pvec)
402 {
403  int i, j;
404 
405  for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
406  struct page *page = pvec->pages[i];
407  if (!radix_tree_exceptional_entry(page))
408  pvec->pages[j++] = page;
409  }
410  pvec->nr = j;
411 }
412 
413 /*
414  * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
415  */
416 void shmem_unlock_mapping(struct address_space *mapping)
417 {
418  struct pagevec pvec;
419  pgoff_t indices[PAGEVEC_SIZE];
420  pgoff_t index = 0;
421 
422  pagevec_init(&pvec, 0);
423  /*
424  * Minor point, but we might as well stop if someone else SHM_LOCKs it.
425  */
426  while (!mapping_unevictable(mapping)) {
427  /*
428  * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
429  * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
430  */
431  pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
432  PAGEVEC_SIZE, pvec.pages, indices);
433  if (!pvec.nr)
434  break;
435  index = indices[pvec.nr - 1] + 1;
436  shmem_deswap_pagevec(&pvec);
437  check_move_unevictable_pages(pvec.pages, pvec.nr);
438  pagevec_release(&pvec);
439  cond_resched();
440  }
441 }
442 
443 /*
444  * Remove range of pages and swap entries from radix tree, and free them.
445  * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
446  */
447 static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
448  bool unfalloc)
449 {
450  struct address_space *mapping = inode->i_mapping;
451  struct shmem_inode_info *info = SHMEM_I(inode);
452  pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
453  pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
454  unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
455  unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
456  struct pagevec pvec;
457  pgoff_t indices[PAGEVEC_SIZE];
458  long nr_swaps_freed = 0;
459  pgoff_t index;
460  int i;
461 
462  if (lend == -1)
463  end = -1; /* unsigned, so actually very big */
464 
465  pagevec_init(&pvec, 0);
466  index = start;
467  while (index < end) {
468  pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
469  min(end - index, (pgoff_t)PAGEVEC_SIZE),
470  pvec.pages, indices);
471  if (!pvec.nr)
472  break;
474  for (i = 0; i < pagevec_count(&pvec); i++) {
475  struct page *page = pvec.pages[i];
476 
477  index = indices[i];
478  if (index >= end)
479  break;
480 
481  if (radix_tree_exceptional_entry(page)) {
482  if (unfalloc)
483  continue;
484  nr_swaps_freed += !shmem_free_swap(mapping,
485  index, page);
486  continue;
487  }
488 
489  if (!trylock_page(page))
490  continue;
491  if (!unfalloc || !PageUptodate(page)) {
492  if (page->mapping == mapping) {
493  VM_BUG_ON(PageWriteback(page));
494  truncate_inode_page(mapping, page);
495  }
496  }
497  unlock_page(page);
498  }
499  shmem_deswap_pagevec(&pvec);
500  pagevec_release(&pvec);
502  cond_resched();
503  index++;
504  }
505 
506  if (partial_start) {
507  struct page *page = NULL;
508  shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
509  if (page) {
510  unsigned int top = PAGE_CACHE_SIZE;
511  if (start > end) {
512  top = partial_end;
513  partial_end = 0;
514  }
515  zero_user_segment(page, partial_start, top);
516  set_page_dirty(page);
517  unlock_page(page);
518  page_cache_release(page);
519  }
520  }
521  if (partial_end) {
522  struct page *page = NULL;
523  shmem_getpage(inode, end, &page, SGP_READ, NULL);
524  if (page) {
525  zero_user_segment(page, 0, partial_end);
526  set_page_dirty(page);
527  unlock_page(page);
528  page_cache_release(page);
529  }
530  }
531  if (start >= end)
532  return;
533 
534  index = start;
535  for ( ; ; ) {
536  cond_resched();
537  pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
538  min(end - index, (pgoff_t)PAGEVEC_SIZE),
539  pvec.pages, indices);
540  if (!pvec.nr) {
541  if (index == start || unfalloc)
542  break;
543  index = start;
544  continue;
545  }
546  if ((index == start || unfalloc) && indices[0] >= end) {
547  shmem_deswap_pagevec(&pvec);
548  pagevec_release(&pvec);
549  break;
550  }
552  for (i = 0; i < pagevec_count(&pvec); i++) {
553  struct page *page = pvec.pages[i];
554 
555  index = indices[i];
556  if (index >= end)
557  break;
558 
559  if (radix_tree_exceptional_entry(page)) {
560  if (unfalloc)
561  continue;
562  nr_swaps_freed += !shmem_free_swap(mapping,
563  index, page);
564  continue;
565  }
566 
567  lock_page(page);
568  if (!unfalloc || !PageUptodate(page)) {
569  if (page->mapping == mapping) {
570  VM_BUG_ON(PageWriteback(page));
571  truncate_inode_page(mapping, page);
572  }
573  }
574  unlock_page(page);
575  }
576  shmem_deswap_pagevec(&pvec);
577  pagevec_release(&pvec);
579  index++;
580  }
581 
582  spin_lock(&info->lock);
583  info->swapped -= nr_swaps_freed;
584  shmem_recalc_inode(inode);
585  spin_unlock(&info->lock);
586 }
587 
588 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
589 {
590  shmem_undo_range(inode, lstart, lend, false);
591  inode->i_ctime = inode->i_mtime = CURRENT_TIME;
592 }
594 
595 static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
596 {
597  struct inode *inode = dentry->d_inode;
598  int error;
599 
600  error = inode_change_ok(inode, attr);
601  if (error)
602  return error;
603 
604  if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
605  loff_t oldsize = inode->i_size;
606  loff_t newsize = attr->ia_size;
607 
608  if (newsize != oldsize) {
609  i_size_write(inode, newsize);
610  inode->i_ctime = inode->i_mtime = CURRENT_TIME;
611  }
612  if (newsize < oldsize) {
613  loff_t holebegin = round_up(newsize, PAGE_SIZE);
614  unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
615  shmem_truncate_range(inode, newsize, (loff_t)-1);
616  /* unmap again to remove racily COWed private pages */
617  unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
618  }
619  }
620 
621  setattr_copy(inode, attr);
622 #ifdef CONFIG_TMPFS_POSIX_ACL
623  if (attr->ia_valid & ATTR_MODE)
624  error = generic_acl_chmod(inode);
625 #endif
626  return error;
627 }
628 
629 static void shmem_evict_inode(struct inode *inode)
630 {
631  struct shmem_inode_info *info = SHMEM_I(inode);
632 
633  if (inode->i_mapping->a_ops == &shmem_aops) {
634  shmem_unacct_size(info->flags, inode->i_size);
635  inode->i_size = 0;
636  shmem_truncate_range(inode, 0, (loff_t)-1);
637  if (!list_empty(&info->swaplist)) {
638  mutex_lock(&shmem_swaplist_mutex);
639  list_del_init(&info->swaplist);
640  mutex_unlock(&shmem_swaplist_mutex);
641  }
642  } else
643  kfree(info->symlink);
644 
645  simple_xattrs_free(&info->xattrs);
646  WARN_ON(inode->i_blocks);
647  shmem_free_inode(inode->i_sb);
648  clear_inode(inode);
649 }
650 
651 /*
652  * If swap found in inode, free it and move page from swapcache to filecache.
653  */
654 static int shmem_unuse_inode(struct shmem_inode_info *info,
655  swp_entry_t swap, struct page **pagep)
656 {
657  struct address_space *mapping = info->vfs_inode.i_mapping;
658  void *radswap;
659  pgoff_t index;
660  gfp_t gfp;
661  int error = 0;
662 
663  radswap = swp_to_radix_entry(swap);
664  index = radix_tree_locate_item(&mapping->page_tree, radswap);
665  if (index == -1)
666  return 0;
667 
668  /*
669  * Move _head_ to start search for next from here.
670  * But be careful: shmem_evict_inode checks list_empty without taking
671  * mutex, and there's an instant in list_move_tail when info->swaplist
672  * would appear empty, if it were the only one on shmem_swaplist.
673  */
674  if (shmem_swaplist.next != &info->swaplist)
675  list_move_tail(&shmem_swaplist, &info->swaplist);
676 
677  gfp = mapping_gfp_mask(mapping);
678  if (shmem_should_replace_page(*pagep, gfp)) {
679  mutex_unlock(&shmem_swaplist_mutex);
680  error = shmem_replace_page(pagep, gfp, info, index);
681  mutex_lock(&shmem_swaplist_mutex);
682  /*
683  * We needed to drop mutex to make that restrictive page
684  * allocation, but the inode might have been freed while we
685  * dropped it: although a racing shmem_evict_inode() cannot
686  * complete without emptying the radix_tree, our page lock
687  * on this swapcache page is not enough to prevent that -
688  * free_swap_and_cache() of our swap entry will only
689  * trylock_page(), removing swap from radix_tree whatever.
690  *
691  * We must not proceed to shmem_add_to_page_cache() if the
692  * inode has been freed, but of course we cannot rely on
693  * inode or mapping or info to check that. However, we can
694  * safely check if our swap entry is still in use (and here
695  * it can't have got reused for another page): if it's still
696  * in use, then the inode cannot have been freed yet, and we
697  * can safely proceed (if it's no longer in use, that tells
698  * nothing about the inode, but we don't need to unuse swap).
699  */
700  if (!page_swapcount(*pagep))
701  error = -ENOENT;
702  }
703 
704  /*
705  * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
706  * but also to hold up shmem_evict_inode(): so inode cannot be freed
707  * beneath us (pagelock doesn't help until the page is in pagecache).
708  */
709  if (!error)
710  error = shmem_add_to_page_cache(*pagep, mapping, index,
711  GFP_NOWAIT, radswap);
712  if (error != -ENOMEM) {
713  /*
714  * Truncation and eviction use free_swap_and_cache(), which
715  * only does trylock page: if we raced, best clean up here.
716  */
717  delete_from_swap_cache(*pagep);
718  set_page_dirty(*pagep);
719  if (!error) {
720  spin_lock(&info->lock);
721  info->swapped--;
722  spin_unlock(&info->lock);
723  swap_free(swap);
724  }
725  error = 1; /* not an error, but entry was found */
726  }
727  return error;
728 }
729 
730 /*
731  * Search through swapped inodes to find and replace swap by page.
732  */
733 int shmem_unuse(swp_entry_t swap, struct page *page)
734 {
735  struct list_head *this, *next;
736  struct shmem_inode_info *info;
737  int found = 0;
738  int error = 0;
739 
740  /*
741  * There's a faint possibility that swap page was replaced before
742  * caller locked it: caller will come back later with the right page.
743  */
744  if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
745  goto out;
746 
747  /*
748  * Charge page using GFP_KERNEL while we can wait, before taking
749  * the shmem_swaplist_mutex which might hold up shmem_writepage().
750  * Charged back to the user (not to caller) when swap account is used.
751  */
752  error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
753  if (error)
754  goto out;
755  /* No radix_tree_preload: swap entry keeps a place for page in tree */
756 
757  mutex_lock(&shmem_swaplist_mutex);
758  list_for_each_safe(this, next, &shmem_swaplist) {
759  info = list_entry(this, struct shmem_inode_info, swaplist);
760  if (info->swapped)
761  found = shmem_unuse_inode(info, swap, &page);
762  else
763  list_del_init(&info->swaplist);
764  cond_resched();
765  if (found)
766  break;
767  }
768  mutex_unlock(&shmem_swaplist_mutex);
769 
770  if (found < 0)
771  error = found;
772 out:
773  unlock_page(page);
774  page_cache_release(page);
775  return error;
776 }
777 
778 /*
779  * Move the page from the page cache to the swap cache.
780  */
781 static int shmem_writepage(struct page *page, struct writeback_control *wbc)
782 {
783  struct shmem_inode_info *info;
784  struct address_space *mapping;
785  struct inode *inode;
787  pgoff_t index;
788 
789  BUG_ON(!PageLocked(page));
790  mapping = page->mapping;
791  index = page->index;
792  inode = mapping->host;
793  info = SHMEM_I(inode);
794  if (info->flags & VM_LOCKED)
795  goto redirty;
796  if (!total_swap_pages)
797  goto redirty;
798 
799  /*
800  * shmem_backing_dev_info's capabilities prevent regular writeback or
801  * sync from ever calling shmem_writepage; but a stacking filesystem
802  * might use ->writepage of its underlying filesystem, in which case
803  * tmpfs should write out to swap only in response to memory pressure,
804  * and not for the writeback threads or sync.
805  */
806  if (!wbc->for_reclaim) {
807  WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
808  goto redirty;
809  }
810 
811  /*
812  * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
813  * value into swapfile.c, the only way we can correctly account for a
814  * fallocated page arriving here is now to initialize it and write it.
815  *
816  * That's okay for a page already fallocated earlier, but if we have
817  * not yet completed the fallocation, then (a) we want to keep track
818  * of this page in case we have to undo it, and (b) it may not be a
819  * good idea to continue anyway, once we're pushing into swap. So
820  * reactivate the page, and let shmem_fallocate() quit when too many.
821  */
822  if (!PageUptodate(page)) {
823  if (inode->i_private) {
824  struct shmem_falloc *shmem_falloc;
825  spin_lock(&inode->i_lock);
826  shmem_falloc = inode->i_private;
827  if (shmem_falloc &&
828  index >= shmem_falloc->start &&
829  index < shmem_falloc->next)
830  shmem_falloc->nr_unswapped++;
831  else
832  shmem_falloc = NULL;
833  spin_unlock(&inode->i_lock);
834  if (shmem_falloc)
835  goto redirty;
836  }
837  clear_highpage(page);
838  flush_dcache_page(page);
839  SetPageUptodate(page);
840  }
841 
842  swap = get_swap_page();
843  if (!swap.val)
844  goto redirty;
845 
846  /*
847  * Add inode to shmem_unuse()'s list of swapped-out inodes,
848  * if it's not already there. Do it now before the page is
849  * moved to swap cache, when its pagelock no longer protects
850  * the inode from eviction. But don't unlock the mutex until
851  * we've incremented swapped, because shmem_unuse_inode() will
852  * prune a !swapped inode from the swaplist under this mutex.
853  */
854  mutex_lock(&shmem_swaplist_mutex);
855  if (list_empty(&info->swaplist))
856  list_add_tail(&info->swaplist, &shmem_swaplist);
857 
858  if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
859  swap_shmem_alloc(swap);
860  shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
861 
862  spin_lock(&info->lock);
863  info->swapped++;
864  shmem_recalc_inode(inode);
865  spin_unlock(&info->lock);
866 
867  mutex_unlock(&shmem_swaplist_mutex);
868  BUG_ON(page_mapped(page));
869  swap_writepage(page, wbc);
870  return 0;
871  }
872 
873  mutex_unlock(&shmem_swaplist_mutex);
874  swapcache_free(swap, NULL);
875 redirty:
876  set_page_dirty(page);
877  if (wbc->for_reclaim)
878  return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */
879  unlock_page(page);
880  return 0;
881 }
882 
883 #ifdef CONFIG_NUMA
884 #ifdef CONFIG_TMPFS
885 static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
886 {
887  char buffer[64];
888 
889  if (!mpol || mpol->mode == MPOL_DEFAULT)
890  return; /* show nothing */
891 
892  mpol_to_str(buffer, sizeof(buffer), mpol, 1);
893 
894  seq_printf(seq, ",mpol=%s", buffer);
895 }
896 
897 static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
898 {
899  struct mempolicy *mpol = NULL;
900  if (sbinfo->mpol) {
901  spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
902  mpol = sbinfo->mpol;
903  mpol_get(mpol);
904  spin_unlock(&sbinfo->stat_lock);
905  }
906  return mpol;
907 }
908 #endif /* CONFIG_TMPFS */
909 
910 static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
911  struct shmem_inode_info *info, pgoff_t index)
912 {
913  struct vm_area_struct pvma;
914  struct page *page;
915 
916  /* Create a pseudo vma that just contains the policy */
917  pvma.vm_start = 0;
918  /* Bias interleave by inode number to distribute better across nodes */
919  pvma.vm_pgoff = index + info->vfs_inode.i_ino;
920  pvma.vm_ops = NULL;
921  pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
922 
923  page = swapin_readahead(swap, gfp, &pvma, 0);
924 
925  /* Drop reference taken by mpol_shared_policy_lookup() */
926  mpol_cond_put(pvma.vm_policy);
927 
928  return page;
929 }
930 
931 static struct page *shmem_alloc_page(gfp_t gfp,
932  struct shmem_inode_info *info, pgoff_t index)
933 {
934  struct vm_area_struct pvma;
935  struct page *page;
936 
937  /* Create a pseudo vma that just contains the policy */
938  pvma.vm_start = 0;
939  /* Bias interleave by inode number to distribute better across nodes */
940  pvma.vm_pgoff = index + info->vfs_inode.i_ino;
941  pvma.vm_ops = NULL;
942  pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
943 
944  page = alloc_page_vma(gfp, &pvma, 0);
945 
946  /* Drop reference taken by mpol_shared_policy_lookup() */
947  mpol_cond_put(pvma.vm_policy);
948 
949  return page;
950 }
951 #else /* !CONFIG_NUMA */
952 #ifdef CONFIG_TMPFS
953 static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
954 {
955 }
956 #endif /* CONFIG_TMPFS */
957 
958 static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
959  struct shmem_inode_info *info, pgoff_t index)
960 {
961  return swapin_readahead(swap, gfp, NULL, 0);
962 }
963 
964 static inline struct page *shmem_alloc_page(gfp_t gfp,
965  struct shmem_inode_info *info, pgoff_t index)
966 {
967  return alloc_page(gfp);
968 }
969 #endif /* CONFIG_NUMA */
970 
971 #if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
972 static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
973 {
974  return NULL;
975 }
976 #endif
977 
978 /*
979  * When a page is moved from swapcache to shmem filecache (either by the
980  * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
981  * shmem_unuse_inode()), it may have been read in earlier from swap, in
982  * ignorance of the mapping it belongs to. If that mapping has special
983  * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
984  * we may need to copy to a suitable page before moving to filecache.
985  *
986  * In a future release, this may well be extended to respect cpuset and
987  * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
988  * but for now it is a simple matter of zone.
989  */
990 static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
991 {
992  return page_zonenum(page) > gfp_zone(gfp);
993 }
994 
995 static int shmem_replace_page(struct page **pagep, gfp_t gfp,
996  struct shmem_inode_info *info, pgoff_t index)
997 {
998  struct page *oldpage, *newpage;
999  struct address_space *swap_mapping;
1000  pgoff_t swap_index;
1001  int error;
1002 
1003  oldpage = *pagep;
1004  swap_index = page_private(oldpage);
1005  swap_mapping = page_mapping(oldpage);
1006 
1007  /*
1008  * We have arrived here because our zones are constrained, so don't
1009  * limit chance of success by further cpuset and node constraints.
1010  */
1011  gfp &= ~GFP_CONSTRAINT_MASK;
1012  newpage = shmem_alloc_page(gfp, info, index);
1013  if (!newpage)
1014  return -ENOMEM;
1015 
1016  page_cache_get(newpage);
1017  copy_highpage(newpage, oldpage);
1018  flush_dcache_page(newpage);
1019 
1020  __set_page_locked(newpage);
1021  SetPageUptodate(newpage);
1022  SetPageSwapBacked(newpage);
1023  set_page_private(newpage, swap_index);
1024  SetPageSwapCache(newpage);
1025 
1026  /*
1027  * Our caller will very soon move newpage out of swapcache, but it's
1028  * a nice clean interface for us to replace oldpage by newpage there.
1029  */
1030  spin_lock_irq(&swap_mapping->tree_lock);
1031  error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1032  newpage);
1033  if (!error) {
1034  __inc_zone_page_state(newpage, NR_FILE_PAGES);
1035  __dec_zone_page_state(oldpage, NR_FILE_PAGES);
1036  }
1037  spin_unlock_irq(&swap_mapping->tree_lock);
1038 
1039  if (unlikely(error)) {
1040  /*
1041  * Is this possible? I think not, now that our callers check
1042  * both PageSwapCache and page_private after getting page lock;
1043  * but be defensive. Reverse old to newpage for clear and free.
1044  */
1045  oldpage = newpage;
1046  } else {
1047  mem_cgroup_replace_page_cache(oldpage, newpage);
1048  lru_cache_add_anon(newpage);
1049  *pagep = newpage;
1050  }
1051 
1052  ClearPageSwapCache(oldpage);
1053  set_page_private(oldpage, 0);
1054 
1055  unlock_page(oldpage);
1056  page_cache_release(oldpage);
1057  page_cache_release(oldpage);
1058  return error;
1059 }
1060 
1061 /*
1062  * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
1063  *
1064  * If we allocate a new one we do not mark it dirty. That's up to the
1065  * vm. If we swap it in we mark it dirty since we also free the swap
1066  * entry since a page cannot live in both the swap and page cache
1067  */
1068 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1069  struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
1070 {
1071  struct address_space *mapping = inode->i_mapping;
1072  struct shmem_inode_info *info;
1073  struct shmem_sb_info *sbinfo;
1074  struct page *page;
1075  swp_entry_t swap;
1076  int error;
1077  int once = 0;
1078  int alloced = 0;
1079 
1080  if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1081  return -EFBIG;
1082 repeat:
1083  swap.val = 0;
1084  page = find_lock_page(mapping, index);
1085  if (radix_tree_exceptional_entry(page)) {
1086  swap = radix_to_swp_entry(page);
1087  page = NULL;
1088  }
1089 
1090  if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1091  ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1092  error = -EINVAL;
1093  goto failed;
1094  }
1095 
1096  /* fallocated page? */
1097  if (page && !PageUptodate(page)) {
1098  if (sgp != SGP_READ)
1099  goto clear;
1100  unlock_page(page);
1101  page_cache_release(page);
1102  page = NULL;
1103  }
1104  if (page || (sgp == SGP_READ && !swap.val)) {
1105  *pagep = page;
1106  return 0;
1107  }
1108 
1109  /*
1110  * Fast cache lookup did not find it:
1111  * bring it back from swap or allocate.
1112  */
1113  info = SHMEM_I(inode);
1114  sbinfo = SHMEM_SB(inode->i_sb);
1115 
1116  if (swap.val) {
1117  /* Look it up and read it in.. */
1118  page = lookup_swap_cache(swap);
1119  if (!page) {
1120  /* here we actually do the io */
1121  if (fault_type)
1122  *fault_type |= VM_FAULT_MAJOR;
1123  page = shmem_swapin(swap, gfp, info, index);
1124  if (!page) {
1125  error = -ENOMEM;
1126  goto failed;
1127  }
1128  }
1129 
1130  /* We have to do this with page locked to prevent races */
1131  lock_page(page);
1132  if (!PageSwapCache(page) || page_private(page) != swap.val ||
1133  !shmem_confirm_swap(mapping, index, swap)) {
1134  error = -EEXIST; /* try again */
1135  goto unlock;
1136  }
1137  if (!PageUptodate(page)) {
1138  error = -EIO;
1139  goto failed;
1140  }
1141  wait_on_page_writeback(page);
1142 
1143  if (shmem_should_replace_page(page, gfp)) {
1144  error = shmem_replace_page(&page, gfp, info, index);
1145  if (error)
1146  goto failed;
1147  }
1148 
1149  error = mem_cgroup_cache_charge(page, current->mm,
1150  gfp & GFP_RECLAIM_MASK);
1151  if (!error) {
1152  error = shmem_add_to_page_cache(page, mapping, index,
1153  gfp, swp_to_radix_entry(swap));
1154  /*
1155  * We already confirmed swap under page lock, and make
1156  * no memory allocation here, so usually no possibility
1157  * of error; but free_swap_and_cache() only trylocks a
1158  * page, so it is just possible that the entry has been
1159  * truncated or holepunched since swap was confirmed.
1160  * shmem_undo_range() will have done some of the
1161  * unaccounting, now delete_from_swap_cache() will do
1162  * the rest (including mem_cgroup_uncharge_swapcache).
1163  * Reset swap.val? No, leave it so "failed" goes back to
1164  * "repeat": reading a hole and writing should succeed.
1165  */
1166  if (error)
1167  delete_from_swap_cache(page);
1168  }
1169  if (error)
1170  goto failed;
1171 
1172  spin_lock(&info->lock);
1173  info->swapped--;
1174  shmem_recalc_inode(inode);
1175  spin_unlock(&info->lock);
1176 
1177  delete_from_swap_cache(page);
1178  set_page_dirty(page);
1179  swap_free(swap);
1180 
1181  } else {
1182  if (shmem_acct_block(info->flags)) {
1183  error = -ENOSPC;
1184  goto failed;
1185  }
1186  if (sbinfo->max_blocks) {
1187  if (percpu_counter_compare(&sbinfo->used_blocks,
1188  sbinfo->max_blocks) >= 0) {
1189  error = -ENOSPC;
1190  goto unacct;
1191  }
1192  percpu_counter_inc(&sbinfo->used_blocks);
1193  }
1194 
1195  page = shmem_alloc_page(gfp, info, index);
1196  if (!page) {
1197  error = -ENOMEM;
1198  goto decused;
1199  }
1200 
1201  SetPageSwapBacked(page);
1202  __set_page_locked(page);
1203  error = mem_cgroup_cache_charge(page, current->mm,
1204  gfp & GFP_RECLAIM_MASK);
1205  if (error)
1206  goto decused;
1207  error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
1208  if (!error) {
1209  error = shmem_add_to_page_cache(page, mapping, index,
1210  gfp, NULL);
1211  radix_tree_preload_end();
1212  }
1213  if (error) {
1215  goto decused;
1216  }
1217  lru_cache_add_anon(page);
1218 
1219  spin_lock(&info->lock);
1220  info->alloced++;
1221  inode->i_blocks += BLOCKS_PER_PAGE;
1222  shmem_recalc_inode(inode);
1223  spin_unlock(&info->lock);
1224  alloced = true;
1225 
1226  /*
1227  * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
1228  */
1229  if (sgp == SGP_FALLOC)
1230  sgp = SGP_WRITE;
1231 clear:
1232  /*
1233  * Let SGP_WRITE caller clear ends if write does not fill page;
1234  * but SGP_FALLOC on a page fallocated earlier must initialize
1235  * it now, lest undo on failure cancel our earlier guarantee.
1236  */
1237  if (sgp != SGP_WRITE) {
1238  clear_highpage(page);
1239  flush_dcache_page(page);
1240  SetPageUptodate(page);
1241  }
1242  if (sgp == SGP_DIRTY)
1243  set_page_dirty(page);
1244  }
1245 
1246  /* Perhaps the file has been truncated since we checked */
1247  if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
1248  ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1249  error = -EINVAL;
1250  if (alloced)
1251  goto trunc;
1252  else
1253  goto failed;
1254  }
1255  *pagep = page;
1256  return 0;
1257 
1258  /*
1259  * Error recovery.
1260  */
1261 trunc:
1262  info = SHMEM_I(inode);
1263  ClearPageDirty(page);
1264  delete_from_page_cache(page);
1265  spin_lock(&info->lock);
1266  info->alloced--;
1267  inode->i_blocks -= BLOCKS_PER_PAGE;
1268  spin_unlock(&info->lock);
1269 decused:
1270  sbinfo = SHMEM_SB(inode->i_sb);
1271  if (sbinfo->max_blocks)
1272  percpu_counter_add(&sbinfo->used_blocks, -1);
1273 unacct:
1274  shmem_unacct_blocks(info->flags, 1);
1275 failed:
1276  if (swap.val && error != -EINVAL &&
1277  !shmem_confirm_swap(mapping, index, swap))
1278  error = -EEXIST;
1279 unlock:
1280  if (page) {
1281  unlock_page(page);
1282  page_cache_release(page);
1283  }
1284  if (error == -ENOSPC && !once++) {
1285  info = SHMEM_I(inode);
1286  spin_lock(&info->lock);
1287  shmem_recalc_inode(inode);
1288  spin_unlock(&info->lock);
1289  goto repeat;
1290  }
1291  if (error == -EEXIST) /* from above or from radix_tree_insert */
1292  goto repeat;
1293  return error;
1294 }
1295 
1296 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1297 {
1298  struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1299  int error;
1300  int ret = VM_FAULT_LOCKED;
1301 
1302  error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1303  if (error)
1304  return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
1305 
1306  if (ret & VM_FAULT_MAJOR) {
1307  count_vm_event(PGMAJFAULT);
1309  }
1310  return ret;
1311 }
1312 
1313 #ifdef CONFIG_NUMA
1314 static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1315 {
1316  struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1317  return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1318 }
1319 
1320 static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1321  unsigned long addr)
1322 {
1323  struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1324  pgoff_t index;
1325 
1326  index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1327  return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1328 }
1329 #endif
1330 
1331 int shmem_lock(struct file *file, int lock, struct user_struct *user)
1332 {
1333  struct inode *inode = file->f_path.dentry->d_inode;
1334  struct shmem_inode_info *info = SHMEM_I(inode);
1335  int retval = -ENOMEM;
1336 
1337  spin_lock(&info->lock);
1338  if (lock && !(info->flags & VM_LOCKED)) {
1339  if (!user_shm_lock(inode->i_size, user))
1340  goto out_nomem;
1341  info->flags |= VM_LOCKED;
1342  mapping_set_unevictable(file->f_mapping);
1343  }
1344  if (!lock && (info->flags & VM_LOCKED) && user) {
1345  user_shm_unlock(inode->i_size, user);
1346  info->flags &= ~VM_LOCKED;
1347  mapping_clear_unevictable(file->f_mapping);
1348  }
1349  retval = 0;
1350 
1351 out_nomem:
1352  spin_unlock(&info->lock);
1353  return retval;
1354 }
1355 
1356 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1357 {
1358  file_accessed(file);
1359  vma->vm_ops = &shmem_vm_ops;
1360  return 0;
1361 }
1362 
1363 static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir,
1364  umode_t mode, dev_t dev, unsigned long flags)
1365 {
1366  struct inode *inode;
1367  struct shmem_inode_info *info;
1368  struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1369 
1370  if (shmem_reserve_inode(sb))
1371  return NULL;
1372 
1373  inode = new_inode(sb);
1374  if (inode) {
1375  inode->i_ino = get_next_ino();
1376  inode_init_owner(inode, dir, mode);
1377  inode->i_blocks = 0;
1378  inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1379  inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1380  inode->i_generation = get_seconds();
1381  info = SHMEM_I(inode);
1382  memset(info, 0, (char *)inode - (char *)info);
1383  spin_lock_init(&info->lock);
1384  info->flags = flags & VM_NORESERVE;
1385  INIT_LIST_HEAD(&info->swaplist);
1386  simple_xattrs_init(&info->xattrs);
1387  cache_no_acl(inode);
1388 
1389  switch (mode & S_IFMT) {
1390  default:
1391  inode->i_op = &shmem_special_inode_operations;
1392  init_special_inode(inode, mode, dev);
1393  break;
1394  case S_IFREG:
1395  inode->i_mapping->a_ops = &shmem_aops;
1396  inode->i_op = &shmem_inode_operations;
1397  inode->i_fop = &shmem_file_operations;
1399  shmem_get_sbmpol(sbinfo));
1400  break;
1401  case S_IFDIR:
1402  inc_nlink(inode);
1403  /* Some things misbehave if size == 0 on a directory */
1404  inode->i_size = 2 * BOGO_DIRENT_SIZE;
1405  inode->i_op = &shmem_dir_inode_operations;
1406  inode->i_fop = &simple_dir_operations;
1407  break;
1408  case S_IFLNK:
1409  /*
1410  * Must not load anything in the rbtree,
1411  * mpol_free_shared_policy will not be called.
1412  */
1414  break;
1415  }
1416  } else
1417  shmem_free_inode(sb);
1418  return inode;
1419 }
1420 
1421 #ifdef CONFIG_TMPFS
1422 static const struct inode_operations shmem_symlink_inode_operations;
1423 static const struct inode_operations shmem_short_symlink_operations;
1424 
1425 #ifdef CONFIG_TMPFS_XATTR
1426 static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
1427 #else
1428 #define shmem_initxattrs NULL
1429 #endif
1430 
1431 static int
1432 shmem_write_begin(struct file *file, struct address_space *mapping,
1433  loff_t pos, unsigned len, unsigned flags,
1434  struct page **pagep, void **fsdata)
1435 {
1436  struct inode *inode = mapping->host;
1437  pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1438  return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
1439 }
1440 
1441 static int
1442 shmem_write_end(struct file *file, struct address_space *mapping,
1443  loff_t pos, unsigned len, unsigned copied,
1444  struct page *page, void *fsdata)
1445 {
1446  struct inode *inode = mapping->host;
1447 
1448  if (pos + copied > inode->i_size)
1449  i_size_write(inode, pos + copied);
1450 
1451  if (!PageUptodate(page)) {
1452  if (copied < PAGE_CACHE_SIZE) {
1453  unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1454  zero_user_segments(page, 0, from,
1455  from + copied, PAGE_CACHE_SIZE);
1456  }
1457  SetPageUptodate(page);
1458  }
1459  set_page_dirty(page);
1460  unlock_page(page);
1461  page_cache_release(page);
1462 
1463  return copied;
1464 }
1465 
1466 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1467 {
1468  struct inode *inode = filp->f_path.dentry->d_inode;
1469  struct address_space *mapping = inode->i_mapping;
1470  pgoff_t index;
1471  unsigned long offset;
1472  enum sgp_type sgp = SGP_READ;
1473 
1474  /*
1475  * Might this read be for a stacking filesystem? Then when reading
1476  * holes of a sparse file, we actually need to allocate those pages,
1477  * and even mark them dirty, so it cannot exceed the max_blocks limit.
1478  */
1479  if (segment_eq(get_fs(), KERNEL_DS))
1480  sgp = SGP_DIRTY;
1481 
1482  index = *ppos >> PAGE_CACHE_SHIFT;
1483  offset = *ppos & ~PAGE_CACHE_MASK;
1484 
1485  for (;;) {
1486  struct page *page = NULL;
1487  pgoff_t end_index;
1488  unsigned long nr, ret;
1489  loff_t i_size = i_size_read(inode);
1490 
1491  end_index = i_size >> PAGE_CACHE_SHIFT;
1492  if (index > end_index)
1493  break;
1494  if (index == end_index) {
1495  nr = i_size & ~PAGE_CACHE_MASK;
1496  if (nr <= offset)
1497  break;
1498  }
1499 
1500  desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
1501  if (desc->error) {
1502  if (desc->error == -EINVAL)
1503  desc->error = 0;
1504  break;
1505  }
1506  if (page)
1507  unlock_page(page);
1508 
1509  /*
1510  * We must evaluate after, since reads (unlike writes)
1511  * are called without i_mutex protection against truncate
1512  */
1513  nr = PAGE_CACHE_SIZE;
1514  i_size = i_size_read(inode);
1515  end_index = i_size >> PAGE_CACHE_SHIFT;
1516  if (index == end_index) {
1517  nr = i_size & ~PAGE_CACHE_MASK;
1518  if (nr <= offset) {
1519  if (page)
1520  page_cache_release(page);
1521  break;
1522  }
1523  }
1524  nr -= offset;
1525 
1526  if (page) {
1527  /*
1528  * If users can be writing to this page using arbitrary
1529  * virtual addresses, take care about potential aliasing
1530  * before reading the page on the kernel side.
1531  */
1532  if (mapping_writably_mapped(mapping))
1533  flush_dcache_page(page);
1534  /*
1535  * Mark the page accessed if we read the beginning.
1536  */
1537  if (!offset)
1538  mark_page_accessed(page);
1539  } else {
1540  page = ZERO_PAGE(0);
1541  page_cache_get(page);
1542  }
1543 
1544  /*
1545  * Ok, we have the page, and it's up-to-date, so
1546  * now we can copy it to user space...
1547  *
1548  * The actor routine returns how many bytes were actually used..
1549  * NOTE! This may not be the same as how much of a user buffer
1550  * we filled up (we may be padding etc), so we can only update
1551  * "pos" here (the actor routine has to update the user buffer
1552  * pointers and the remaining count).
1553  */
1554  ret = actor(desc, page, offset, nr);
1555  offset += ret;
1556  index += offset >> PAGE_CACHE_SHIFT;
1557  offset &= ~PAGE_CACHE_MASK;
1558 
1559  page_cache_release(page);
1560  if (ret != nr || !desc->count)
1561  break;
1562 
1563  cond_resched();
1564  }
1565 
1566  *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1567  file_accessed(filp);
1568 }
1569 
1570 static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1571  const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1572 {
1573  struct file *filp = iocb->ki_filp;
1574  ssize_t retval;
1575  unsigned long seg;
1576  size_t count;
1577  loff_t *ppos = &iocb->ki_pos;
1578 
1579  retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1580  if (retval)
1581  return retval;
1582 
1583  for (seg = 0; seg < nr_segs; seg++) {
1584  read_descriptor_t desc;
1585 
1586  desc.written = 0;
1587  desc.arg.buf = iov[seg].iov_base;
1588  desc.count = iov[seg].iov_len;
1589  if (desc.count == 0)
1590  continue;
1591  desc.error = 0;
1592  do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1593  retval += desc.written;
1594  if (desc.error) {
1595  retval = retval ?: desc.error;
1596  break;
1597  }
1598  if (desc.count > 0)
1599  break;
1600  }
1601  return retval;
1602 }
1603 
1604 static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1605  struct pipe_inode_info *pipe, size_t len,
1606  unsigned int flags)
1607 {
1608  struct address_space *mapping = in->f_mapping;
1609  struct inode *inode = mapping->host;
1610  unsigned int loff, nr_pages, req_pages;
1611  struct page *pages[PIPE_DEF_BUFFERS];
1612  struct partial_page partial[PIPE_DEF_BUFFERS];
1613  struct page *page;
1614  pgoff_t index, end_index;
1615  loff_t isize, left;
1616  int error, page_nr;
1617  struct splice_pipe_desc spd = {
1618  .pages = pages,
1619  .partial = partial,
1620  .nr_pages_max = PIPE_DEF_BUFFERS,
1621  .flags = flags,
1622  .ops = &page_cache_pipe_buf_ops,
1623  .spd_release = spd_release_page,
1624  };
1625 
1626  isize = i_size_read(inode);
1627  if (unlikely(*ppos >= isize))
1628  return 0;
1629 
1630  left = isize - *ppos;
1631  if (unlikely(left < len))
1632  len = left;
1633 
1634  if (splice_grow_spd(pipe, &spd))
1635  return -ENOMEM;
1636 
1637  index = *ppos >> PAGE_CACHE_SHIFT;
1638  loff = *ppos & ~PAGE_CACHE_MASK;
1639  req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1640  nr_pages = min(req_pages, pipe->buffers);
1641 
1642  spd.nr_pages = find_get_pages_contig(mapping, index,
1643  nr_pages, spd.pages);
1644  index += spd.nr_pages;
1645  error = 0;
1646 
1647  while (spd.nr_pages < nr_pages) {
1648  error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
1649  if (error)
1650  break;
1651  unlock_page(page);
1652  spd.pages[spd.nr_pages++] = page;
1653  index++;
1654  }
1655 
1656  index = *ppos >> PAGE_CACHE_SHIFT;
1657  nr_pages = spd.nr_pages;
1658  spd.nr_pages = 0;
1659 
1660  for (page_nr = 0; page_nr < nr_pages; page_nr++) {
1661  unsigned int this_len;
1662 
1663  if (!len)
1664  break;
1665 
1666  this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
1667  page = spd.pages[page_nr];
1668 
1669  if (!PageUptodate(page) || page->mapping != mapping) {
1670  error = shmem_getpage(inode, index, &page,
1671  SGP_CACHE, NULL);
1672  if (error)
1673  break;
1674  unlock_page(page);
1675  page_cache_release(spd.pages[page_nr]);
1676  spd.pages[page_nr] = page;
1677  }
1678 
1679  isize = i_size_read(inode);
1680  end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1681  if (unlikely(!isize || index > end_index))
1682  break;
1683 
1684  if (end_index == index) {
1685  unsigned int plen;
1686 
1687  plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1688  if (plen <= loff)
1689  break;
1690 
1691  this_len = min(this_len, plen - loff);
1692  len = this_len;
1693  }
1694 
1695  spd.partial[page_nr].offset = loff;
1696  spd.partial[page_nr].len = this_len;
1697  len -= this_len;
1698  loff = 0;
1699  spd.nr_pages++;
1700  index++;
1701  }
1702 
1703  while (page_nr < nr_pages)
1704  page_cache_release(spd.pages[page_nr++]);
1705 
1706  if (spd.nr_pages)
1707  error = splice_to_pipe(pipe, &spd);
1708 
1709  splice_shrink_spd(&spd);
1710 
1711  if (error > 0) {
1712  *ppos += error;
1713  file_accessed(in);
1714  }
1715  return error;
1716 }
1717 
1718 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1719  loff_t len)
1720 {
1721  struct inode *inode = file->f_path.dentry->d_inode;
1722  struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1723  struct shmem_falloc shmem_falloc;
1724  pgoff_t start, index, end;
1725  int error;
1726 
1727  mutex_lock(&inode->i_mutex);
1728 
1729  if (mode & FALLOC_FL_PUNCH_HOLE) {
1730  struct address_space *mapping = file->f_mapping;
1731  loff_t unmap_start = round_up(offset, PAGE_SIZE);
1732  loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
1733 
1734  if ((u64)unmap_end > (u64)unmap_start)
1735  unmap_mapping_range(mapping, unmap_start,
1736  1 + unmap_end - unmap_start, 0);
1737  shmem_truncate_range(inode, offset, offset + len - 1);
1738  /* No need to unmap again: hole-punching leaves COWed pages */
1739  error = 0;
1740  goto out;
1741  }
1742 
1743  /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
1744  error = inode_newsize_ok(inode, offset + len);
1745  if (error)
1746  goto out;
1747 
1748  start = offset >> PAGE_CACHE_SHIFT;
1749  end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1750  /* Try to avoid a swapstorm if len is impossible to satisfy */
1751  if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
1752  error = -ENOSPC;
1753  goto out;
1754  }
1755 
1756  shmem_falloc.start = start;
1757  shmem_falloc.next = start;
1758  shmem_falloc.nr_falloced = 0;
1759  shmem_falloc.nr_unswapped = 0;
1760  spin_lock(&inode->i_lock);
1761  inode->i_private = &shmem_falloc;
1762  spin_unlock(&inode->i_lock);
1763 
1764  for (index = start; index < end; index++) {
1765  struct page *page;
1766 
1767  /*
1768  * Good, the fallocate(2) manpage permits EINTR: we may have
1769  * been interrupted because we are using up too much memory.
1770  */
1771  if (signal_pending(current))
1772  error = -EINTR;
1773  else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
1774  error = -ENOMEM;
1775  else
1776  error = shmem_getpage(inode, index, &page, SGP_FALLOC,
1777  NULL);
1778  if (error) {
1779  /* Remove the !PageUptodate pages we added */
1780  shmem_undo_range(inode,
1781  (loff_t)start << PAGE_CACHE_SHIFT,
1782  (loff_t)index << PAGE_CACHE_SHIFT, true);
1783  goto undone;
1784  }
1785 
1786  /*
1787  * Inform shmem_writepage() how far we have reached.
1788  * No need for lock or barrier: we have the page lock.
1789  */
1790  shmem_falloc.next++;
1791  if (!PageUptodate(page))
1792  shmem_falloc.nr_falloced++;
1793 
1794  /*
1795  * If !PageUptodate, leave it that way so that freeable pages
1796  * can be recognized if we need to rollback on error later.
1797  * But set_page_dirty so that memory pressure will swap rather
1798  * than free the pages we are allocating (and SGP_CACHE pages
1799  * might still be clean: we now need to mark those dirty too).
1800  */
1801  set_page_dirty(page);
1802  unlock_page(page);
1803  page_cache_release(page);
1804  cond_resched();
1805  }
1806 
1807  if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
1808  i_size_write(inode, offset + len);
1809  inode->i_ctime = CURRENT_TIME;
1810 undone:
1811  spin_lock(&inode->i_lock);
1812  inode->i_private = NULL;
1813  spin_unlock(&inode->i_lock);
1814 out:
1815  mutex_unlock(&inode->i_mutex);
1816  return error;
1817 }
1818 
1819 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1820 {
1821  struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
1822 
1823  buf->f_type = TMPFS_MAGIC;
1824  buf->f_bsize = PAGE_CACHE_SIZE;
1825  buf->f_namelen = NAME_MAX;
1826  if (sbinfo->max_blocks) {
1827  buf->f_blocks = sbinfo->max_blocks;
1828  buf->f_bavail =
1829  buf->f_bfree = sbinfo->max_blocks -
1830  percpu_counter_sum(&sbinfo->used_blocks);
1831  }
1832  if (sbinfo->max_inodes) {
1833  buf->f_files = sbinfo->max_inodes;
1834  buf->f_ffree = sbinfo->free_inodes;
1835  }
1836  /* else leave those fields 0 like simple_statfs */
1837  return 0;
1838 }
1839 
1840 /*
1841  * File creation. Allocate an inode, and we're done..
1842  */
1843 static int
1844 shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
1845 {
1846  struct inode *inode;
1847  int error = -ENOSPC;
1848 
1849  inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
1850  if (inode) {
1851  error = security_inode_init_security(inode, dir,
1852  &dentry->d_name,
1853  shmem_initxattrs, NULL);
1854  if (error) {
1855  if (error != -EOPNOTSUPP) {
1856  iput(inode);
1857  return error;
1858  }
1859  }
1860 #ifdef CONFIG_TMPFS_POSIX_ACL
1861  error = generic_acl_init(inode, dir);
1862  if (error) {
1863  iput(inode);
1864  return error;
1865  }
1866 #else
1867  error = 0;
1868 #endif
1869  dir->i_size += BOGO_DIRENT_SIZE;
1870  dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1871  d_instantiate(dentry, inode);
1872  dget(dentry); /* Extra count - pin the dentry in core */
1873  }
1874  return error;
1875 }
1876 
1877 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1878 {
1879  int error;
1880 
1881  if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1882  return error;
1883  inc_nlink(dir);
1884  return 0;
1885 }
1886 
1887 static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
1888  bool excl)
1889 {
1890  return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1891 }
1892 
1893 /*
1894  * Link a file..
1895  */
1896 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1897 {
1898  struct inode *inode = old_dentry->d_inode;
1899  int ret;
1900 
1901  /*
1902  * No ordinary (disk based) filesystem counts links as inodes;
1903  * but each new link needs a new dentry, pinning lowmem, and
1904  * tmpfs dentries cannot be pruned until they are unlinked.
1905  */
1906  ret = shmem_reserve_inode(inode->i_sb);
1907  if (ret)
1908  goto out;
1909 
1910  dir->i_size += BOGO_DIRENT_SIZE;
1911  inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1912  inc_nlink(inode);
1913  ihold(inode); /* New dentry reference */
1914  dget(dentry); /* Extra pinning count for the created dentry */
1915  d_instantiate(dentry, inode);
1916 out:
1917  return ret;
1918 }
1919 
1920 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1921 {
1922  struct inode *inode = dentry->d_inode;
1923 
1924  if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
1925  shmem_free_inode(inode->i_sb);
1926 
1927  dir->i_size -= BOGO_DIRENT_SIZE;
1928  inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1929  drop_nlink(inode);
1930  dput(dentry); /* Undo the count from "create" - this does all the work */
1931  return 0;
1932 }
1933 
1934 static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1935 {
1936  if (!simple_empty(dentry))
1937  return -ENOTEMPTY;
1938 
1939  drop_nlink(dentry->d_inode);
1940  drop_nlink(dir);
1941  return shmem_unlink(dir, dentry);
1942 }
1943 
1944 /*
1945  * The VFS layer already does all the dentry stuff for rename,
1946  * we just have to decrement the usage count for the target if
1947  * it exists so that the VFS layer correctly free's it when it
1948  * gets overwritten.
1949  */
1950 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1951 {
1952  struct inode *inode = old_dentry->d_inode;
1953  int they_are_dirs = S_ISDIR(inode->i_mode);
1954 
1955  if (!simple_empty(new_dentry))
1956  return -ENOTEMPTY;
1957 
1958  if (new_dentry->d_inode) {
1959  (void) shmem_unlink(new_dir, new_dentry);
1960  if (they_are_dirs)
1961  drop_nlink(old_dir);
1962  } else if (they_are_dirs) {
1963  drop_nlink(old_dir);
1964  inc_nlink(new_dir);
1965  }
1966 
1967  old_dir->i_size -= BOGO_DIRENT_SIZE;
1968  new_dir->i_size += BOGO_DIRENT_SIZE;
1969  old_dir->i_ctime = old_dir->i_mtime =
1970  new_dir->i_ctime = new_dir->i_mtime =
1971  inode->i_ctime = CURRENT_TIME;
1972  return 0;
1973 }
1974 
1975 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1976 {
1977  int error;
1978  int len;
1979  struct inode *inode;
1980  struct page *page;
1981  char *kaddr;
1982  struct shmem_inode_info *info;
1983 
1984  len = strlen(symname) + 1;
1985  if (len > PAGE_CACHE_SIZE)
1986  return -ENAMETOOLONG;
1987 
1988  inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
1989  if (!inode)
1990  return -ENOSPC;
1991 
1992  error = security_inode_init_security(inode, dir, &dentry->d_name,
1993  shmem_initxattrs, NULL);
1994  if (error) {
1995  if (error != -EOPNOTSUPP) {
1996  iput(inode);
1997  return error;
1998  }
1999  error = 0;
2000  }
2001 
2002  info = SHMEM_I(inode);
2003  inode->i_size = len-1;
2004  if (len <= SHORT_SYMLINK_LEN) {
2005  info->symlink = kmemdup(symname, len, GFP_KERNEL);
2006  if (!info->symlink) {
2007  iput(inode);
2008  return -ENOMEM;
2009  }
2010  inode->i_op = &shmem_short_symlink_operations;
2011  } else {
2012  error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2013  if (error) {
2014  iput(inode);
2015  return error;
2016  }
2017  inode->i_mapping->a_ops = &shmem_aops;
2018  inode->i_op = &shmem_symlink_inode_operations;
2019  kaddr = kmap_atomic(page);
2020  memcpy(kaddr, symname, len);
2021  kunmap_atomic(kaddr);
2022  SetPageUptodate(page);
2023  set_page_dirty(page);
2024  unlock_page(page);
2025  page_cache_release(page);
2026  }
2027  dir->i_size += BOGO_DIRENT_SIZE;
2028  dir->i_ctime = dir->i_mtime = CURRENT_TIME;
2029  d_instantiate(dentry, inode);
2030  dget(dentry);
2031  return 0;
2032 }
2033 
2034 static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
2035 {
2036  nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
2037  return NULL;
2038 }
2039 
2040 static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2041 {
2042  struct page *page = NULL;
2043  int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
2044  nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
2045  if (page)
2046  unlock_page(page);
2047  return page;
2048 }
2049 
2050 static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2051 {
2052  if (!IS_ERR(nd_get_link(nd))) {
2053  struct page *page = cookie;
2054  kunmap(page);
2055  mark_page_accessed(page);
2056  page_cache_release(page);
2057  }
2058 }
2059 
2060 #ifdef CONFIG_TMPFS_XATTR
2061 /*
2062  * Superblocks without xattr inode operations may get some security.* xattr
2063  * support from the LSM "for free". As soon as we have any other xattrs
2064  * like ACLs, we also need to implement the security.* handlers at
2065  * filesystem level, though.
2066  */
2067 
2068 /*
2069  * Callback for security_inode_init_security() for acquiring xattrs.
2070  */
2071 static int shmem_initxattrs(struct inode *inode,
2072  const struct xattr *xattr_array,
2073  void *fs_info)
2074 {
2075  struct shmem_inode_info *info = SHMEM_I(inode);
2076  const struct xattr *xattr;
2077  struct simple_xattr *new_xattr;
2078  size_t len;
2079 
2080  for (xattr = xattr_array; xattr->name != NULL; xattr++) {
2081  new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
2082  if (!new_xattr)
2083  return -ENOMEM;
2084 
2085  len = strlen(xattr->name) + 1;
2086  new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
2087  GFP_KERNEL);
2088  if (!new_xattr->name) {
2089  kfree(new_xattr);
2090  return -ENOMEM;
2091  }
2092 
2093  memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
2095  memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
2096  xattr->name, len);
2097 
2098  simple_xattr_list_add(&info->xattrs, new_xattr);
2099  }
2100 
2101  return 0;
2102 }
2103 
2104 static const struct xattr_handler *shmem_xattr_handlers[] = {
2105 #ifdef CONFIG_TMPFS_POSIX_ACL
2108 #endif
2109  NULL
2110 };
2111 
2112 static int shmem_xattr_validate(const char *name)
2113 {
2114  struct { const char *prefix; size_t len; } arr[] = {
2117  };
2118  int i;
2119 
2120  for (i = 0; i < ARRAY_SIZE(arr); i++) {
2121  size_t preflen = arr[i].len;
2122  if (strncmp(name, arr[i].prefix, preflen) == 0) {
2123  if (!name[preflen])
2124  return -EINVAL;
2125  return 0;
2126  }
2127  }
2128  return -EOPNOTSUPP;
2129 }
2130 
2131 static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
2132  void *buffer, size_t size)
2133 {
2134  struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2135  int err;
2136 
2137  /*
2138  * If this is a request for a synthetic attribute in the system.*
2139  * namespace use the generic infrastructure to resolve a handler
2140  * for it via sb->s_xattr.
2141  */
2143  return generic_getxattr(dentry, name, buffer, size);
2144 
2145  err = shmem_xattr_validate(name);
2146  if (err)
2147  return err;
2148 
2149  return simple_xattr_get(&info->xattrs, name, buffer, size);
2150 }
2151 
2152 static int shmem_setxattr(struct dentry *dentry, const char *name,
2153  const void *value, size_t size, int flags)
2154 {
2155  struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2156  int err;
2157 
2158  /*
2159  * If this is a request for a synthetic attribute in the system.*
2160  * namespace use the generic infrastructure to resolve a handler
2161  * for it via sb->s_xattr.
2162  */
2164  return generic_setxattr(dentry, name, value, size, flags);
2165 
2166  err = shmem_xattr_validate(name);
2167  if (err)
2168  return err;
2169 
2170  return simple_xattr_set(&info->xattrs, name, value, size, flags);
2171 }
2172 
2173 static int shmem_removexattr(struct dentry *dentry, const char *name)
2174 {
2175  struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2176  int err;
2177 
2178  /*
2179  * If this is a request for a synthetic attribute in the system.*
2180  * namespace use the generic infrastructure to resolve a handler
2181  * for it via sb->s_xattr.
2182  */
2184  return generic_removexattr(dentry, name);
2185 
2186  err = shmem_xattr_validate(name);
2187  if (err)
2188  return err;
2189 
2190  return simple_xattr_remove(&info->xattrs, name);
2191 }
2192 
2193 static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2194 {
2195  struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
2196  return simple_xattr_list(&info->xattrs, buffer, size);
2197 }
2198 #endif /* CONFIG_TMPFS_XATTR */
2199 
2200 static const struct inode_operations shmem_short_symlink_operations = {
2202  .follow_link = shmem_follow_short_symlink,
2203 #ifdef CONFIG_TMPFS_XATTR
2204  .setxattr = shmem_setxattr,
2205  .getxattr = shmem_getxattr,
2206  .listxattr = shmem_listxattr,
2207  .removexattr = shmem_removexattr,
2208 #endif
2209 };
2210 
2211 static const struct inode_operations shmem_symlink_inode_operations = {
2213  .follow_link = shmem_follow_link,
2214  .put_link = shmem_put_link,
2215 #ifdef CONFIG_TMPFS_XATTR
2216  .setxattr = shmem_setxattr,
2217  .getxattr = shmem_getxattr,
2218  .listxattr = shmem_listxattr,
2219  .removexattr = shmem_removexattr,
2220 #endif
2221 };
2222 
2223 static struct dentry *shmem_get_parent(struct dentry *child)
2224 {
2225  return ERR_PTR(-ESTALE);
2226 }
2227 
2228 static int shmem_match(struct inode *ino, void *vfh)
2229 {
2230  __u32 *fh = vfh;
2231  __u64 inum = fh[2];
2232  inum = (inum << 32) | fh[1];
2233  return ino->i_ino == inum && fh[0] == ino->i_generation;
2234 }
2235 
2236 static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
2237  struct fid *fid, int fh_len, int fh_type)
2238 {
2239  struct inode *inode;
2240  struct dentry *dentry = NULL;
2241  u64 inum;
2242 
2243  if (fh_len < 3)
2244  return NULL;
2245 
2246  inum = fid->raw[2];
2247  inum = (inum << 32) | fid->raw[1];
2248 
2249  inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
2250  shmem_match, fid->raw);
2251  if (inode) {
2252  dentry = d_find_alias(inode);
2253  iput(inode);
2254  }
2255 
2256  return dentry;
2257 }
2258 
2259 static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
2260  struct inode *parent)
2261 {
2262  if (*len < 3) {
2263  *len = 3;
2264  return 255;
2265  }
2266 
2267  if (inode_unhashed(inode)) {
2268  /* Unfortunately insert_inode_hash is not idempotent,
2269  * so as we hash inodes here rather than at creation
2270  * time, we need a lock to ensure we only try
2271  * to do it once
2272  */
2273  static DEFINE_SPINLOCK(lock);
2274  spin_lock(&lock);
2275  if (inode_unhashed(inode))
2276  __insert_inode_hash(inode,
2277  inode->i_ino + inode->i_generation);
2278  spin_unlock(&lock);
2279  }
2280 
2281  fh[0] = inode->i_generation;
2282  fh[1] = inode->i_ino;
2283  fh[2] = ((__u64)inode->i_ino) >> 32;
2284 
2285  *len = 3;
2286  return 1;
2287 }
2288 
2289 static const struct export_operations shmem_export_ops = {
2290  .get_parent = shmem_get_parent,
2291  .encode_fh = shmem_encode_fh,
2292  .fh_to_dentry = shmem_fh_to_dentry,
2293 };
2294 
2295 static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
2296  bool remount)
2297 {
2298  char *this_char, *value, *rest;
2299  uid_t uid;
2300  gid_t gid;
2301 
2302  while (options != NULL) {
2303  this_char = options;
2304  for (;;) {
2305  /*
2306  * NUL-terminate this option: unfortunately,
2307  * mount options form a comma-separated list,
2308  * but mpol's nodelist may also contain commas.
2309  */
2310  options = strchr(options, ',');
2311  if (options == NULL)
2312  break;
2313  options++;
2314  if (!isdigit(*options)) {
2315  options[-1] = '\0';
2316  break;
2317  }
2318  }
2319  if (!*this_char)
2320  continue;
2321  if ((value = strchr(this_char,'=')) != NULL) {
2322  *value++ = 0;
2323  } else {
2325  "tmpfs: No value for mount option '%s'\n",
2326  this_char);
2327  return 1;
2328  }
2329 
2330  if (!strcmp(this_char,"size")) {
2331  unsigned long long size;
2332  size = memparse(value,&rest);
2333  if (*rest == '%') {
2334  size <<= PAGE_SHIFT;
2335  size *= totalram_pages;
2336  do_div(size, 100);
2337  rest++;
2338  }
2339  if (*rest)
2340  goto bad_val;
2341  sbinfo->max_blocks =
2343  } else if (!strcmp(this_char,"nr_blocks")) {
2344  sbinfo->max_blocks = memparse(value, &rest);
2345  if (*rest)
2346  goto bad_val;
2347  } else if (!strcmp(this_char,"nr_inodes")) {
2348  sbinfo->max_inodes = memparse(value, &rest);
2349  if (*rest)
2350  goto bad_val;
2351  } else if (!strcmp(this_char,"mode")) {
2352  if (remount)
2353  continue;
2354  sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
2355  if (*rest)
2356  goto bad_val;
2357  } else if (!strcmp(this_char,"uid")) {
2358  if (remount)
2359  continue;
2360  uid = simple_strtoul(value, &rest, 0);
2361  if (*rest)
2362  goto bad_val;
2363  sbinfo->uid = make_kuid(current_user_ns(), uid);
2364  if (!uid_valid(sbinfo->uid))
2365  goto bad_val;
2366  } else if (!strcmp(this_char,"gid")) {
2367  if (remount)
2368  continue;
2369  gid = simple_strtoul(value, &rest, 0);
2370  if (*rest)
2371  goto bad_val;
2372  sbinfo->gid = make_kgid(current_user_ns(), gid);
2373  if (!gid_valid(sbinfo->gid))
2374  goto bad_val;
2375  } else if (!strcmp(this_char,"mpol")) {
2376  if (mpol_parse_str(value, &sbinfo->mpol, 1))
2377  goto bad_val;
2378  } else {
2379  printk(KERN_ERR "tmpfs: Bad mount option %s\n",
2380  this_char);
2381  return 1;
2382  }
2383  }
2384  return 0;
2385 
2386 bad_val:
2387  printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
2388  value, this_char);
2389  return 1;
2390 
2391 }
2392 
2393 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2394 {
2395  struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2396  struct shmem_sb_info config = *sbinfo;
2397  unsigned long inodes;
2398  int error = -EINVAL;
2399 
2400  if (shmem_parse_options(data, &config, true))
2401  return error;
2402 
2403  spin_lock(&sbinfo->stat_lock);
2404  inodes = sbinfo->max_inodes - sbinfo->free_inodes;
2405  if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
2406  goto out;
2407  if (config.max_inodes < inodes)
2408  goto out;
2409  /*
2410  * Those tests disallow limited->unlimited while any are in use;
2411  * but we must separately disallow unlimited->limited, because
2412  * in that case we have no record of how much is already in use.
2413  */
2414  if (config.max_blocks && !sbinfo->max_blocks)
2415  goto out;
2416  if (config.max_inodes && !sbinfo->max_inodes)
2417  goto out;
2418 
2419  error = 0;
2420  sbinfo->max_blocks = config.max_blocks;
2421  sbinfo->max_inodes = config.max_inodes;
2422  sbinfo->free_inodes = config.max_inodes - inodes;
2423 
2424  mpol_put(sbinfo->mpol);
2425  sbinfo->mpol = config.mpol; /* transfers initial ref */
2426 out:
2427  spin_unlock(&sbinfo->stat_lock);
2428  return error;
2429 }
2430 
2431 static int shmem_show_options(struct seq_file *seq, struct dentry *root)
2432 {
2433  struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
2434 
2435  if (sbinfo->max_blocks != shmem_default_max_blocks())
2436  seq_printf(seq, ",size=%luk",
2437  sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
2438  if (sbinfo->max_inodes != shmem_default_max_inodes())
2439  seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
2440  if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
2441  seq_printf(seq, ",mode=%03ho", sbinfo->mode);
2442  if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
2443  seq_printf(seq, ",uid=%u",
2444  from_kuid_munged(&init_user_ns, sbinfo->uid));
2445  if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
2446  seq_printf(seq, ",gid=%u",
2447  from_kgid_munged(&init_user_ns, sbinfo->gid));
2448  shmem_show_mpol(seq, sbinfo->mpol);
2449  return 0;
2450 }
2451 #endif /* CONFIG_TMPFS */
2452 
2453 static void shmem_put_super(struct super_block *sb)
2454 {
2455  struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
2456 
2458  kfree(sbinfo);
2459  sb->s_fs_info = NULL;
2460 }
2461 
2462 int shmem_fill_super(struct super_block *sb, void *data, int silent)
2463 {
2464  struct inode *inode;
2465  struct shmem_sb_info *sbinfo;
2466  int err = -ENOMEM;
2467 
2468  /* Round up to L1_CACHE_BYTES to resist false sharing */
2469  sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
2471  if (!sbinfo)
2472  return -ENOMEM;
2473 
2474  sbinfo->mode = S_IRWXUGO | S_ISVTX;
2475  sbinfo->uid = current_fsuid();
2476  sbinfo->gid = current_fsgid();
2477  sb->s_fs_info = sbinfo;
2478 
2479 #ifdef CONFIG_TMPFS
2480  /*
2481  * Per default we only allow half of the physical ram per
2482  * tmpfs instance, limiting inodes to one per page of lowmem;
2483  * but the internal instance is left unlimited.
2484  */
2485  if (!(sb->s_flags & MS_NOUSER)) {
2486  sbinfo->max_blocks = shmem_default_max_blocks();
2487  sbinfo->max_inodes = shmem_default_max_inodes();
2488  if (shmem_parse_options(data, sbinfo, false)) {
2489  err = -EINVAL;
2490  goto failed;
2491  }
2492  }
2493  sb->s_export_op = &shmem_export_ops;
2494  sb->s_flags |= MS_NOSEC;
2495 #else
2496  sb->s_flags |= MS_NOUSER;
2497 #endif
2498 
2499  spin_lock_init(&sbinfo->stat_lock);
2500  if (percpu_counter_init(&sbinfo->used_blocks, 0))
2501  goto failed;
2502  sbinfo->free_inodes = sbinfo->max_inodes;
2503 
2504  sb->s_maxbytes = MAX_LFS_FILESIZE;
2507  sb->s_magic = TMPFS_MAGIC;
2508  sb->s_op = &shmem_ops;
2509  sb->s_time_gran = 1;
2510 #ifdef CONFIG_TMPFS_XATTR
2511  sb->s_xattr = shmem_xattr_handlers;
2512 #endif
2513 #ifdef CONFIG_TMPFS_POSIX_ACL
2514  sb->s_flags |= MS_POSIXACL;
2515 #endif
2516 
2517  inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
2518  if (!inode)
2519  goto failed;
2520  inode->i_uid = sbinfo->uid;
2521  inode->i_gid = sbinfo->gid;
2522  sb->s_root = d_make_root(inode);
2523  if (!sb->s_root)
2524  goto failed;
2525  return 0;
2526 
2527 failed:
2528  shmem_put_super(sb);
2529  return err;
2530 }
2531 
2532 static struct kmem_cache *shmem_inode_cachep;
2533 
2534 static struct inode *shmem_alloc_inode(struct super_block *sb)
2535 {
2536  struct shmem_inode_info *info;
2537  info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
2538  if (!info)
2539  return NULL;
2540  return &info->vfs_inode;
2541 }
2542 
2543 static void shmem_destroy_callback(struct rcu_head *head)
2544 {
2545  struct inode *inode = container_of(head, struct inode, i_rcu);
2546  kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
2547 }
2548 
2549 static void shmem_destroy_inode(struct inode *inode)
2550 {
2551  if (S_ISREG(inode->i_mode))
2552  mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2553  call_rcu(&inode->i_rcu, shmem_destroy_callback);
2554 }
2555 
2556 static void shmem_init_inode(void *foo)
2557 {
2558  struct shmem_inode_info *info = foo;
2559  inode_init_once(&info->vfs_inode);
2560 }
2561 
2562 static int shmem_init_inodecache(void)
2563 {
2564  shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
2565  sizeof(struct shmem_inode_info),
2566  0, SLAB_PANIC, shmem_init_inode);
2567  return 0;
2568 }
2569 
2570 static void shmem_destroy_inodecache(void)
2571 {
2572  kmem_cache_destroy(shmem_inode_cachep);
2573 }
2574 
2575 static const struct address_space_operations shmem_aops = {
2576  .writepage = shmem_writepage,
2577  .set_page_dirty = __set_page_dirty_no_writeback,
2578 #ifdef CONFIG_TMPFS
2579  .write_begin = shmem_write_begin,
2580  .write_end = shmem_write_end,
2581 #endif
2582  .migratepage = migrate_page,
2583  .error_remove_page = generic_error_remove_page,
2584 };
2585 
2586 static const struct file_operations shmem_file_operations = {
2587  .mmap = shmem_mmap,
2588 #ifdef CONFIG_TMPFS
2589  .llseek = generic_file_llseek,
2590  .read = do_sync_read,
2591  .write = do_sync_write,
2592  .aio_read = shmem_file_aio_read,
2593  .aio_write = generic_file_aio_write,
2594  .fsync = noop_fsync,
2595  .splice_read = shmem_file_splice_read,
2596  .splice_write = generic_file_splice_write,
2597  .fallocate = shmem_fallocate,
2598 #endif
2599 };
2600 
2601 static const struct inode_operations shmem_inode_operations = {
2602  .setattr = shmem_setattr,
2603 #ifdef CONFIG_TMPFS_XATTR
2604  .setxattr = shmem_setxattr,
2605  .getxattr = shmem_getxattr,
2606  .listxattr = shmem_listxattr,
2607  .removexattr = shmem_removexattr,
2608 #endif
2609 };
2610 
2611 static const struct inode_operations shmem_dir_inode_operations = {
2612 #ifdef CONFIG_TMPFS
2613  .create = shmem_create,
2614  .lookup = simple_lookup,
2615  .link = shmem_link,
2616  .unlink = shmem_unlink,
2617  .symlink = shmem_symlink,
2618  .mkdir = shmem_mkdir,
2619  .rmdir = shmem_rmdir,
2620  .mknod = shmem_mknod,
2621  .rename = shmem_rename,
2622 #endif
2623 #ifdef CONFIG_TMPFS_XATTR
2624  .setxattr = shmem_setxattr,
2625  .getxattr = shmem_getxattr,
2626  .listxattr = shmem_listxattr,
2627  .removexattr = shmem_removexattr,
2628 #endif
2629 #ifdef CONFIG_TMPFS_POSIX_ACL
2630  .setattr = shmem_setattr,
2631 #endif
2632 };
2633 
2634 static const struct inode_operations shmem_special_inode_operations = {
2635 #ifdef CONFIG_TMPFS_XATTR
2636  .setxattr = shmem_setxattr,
2637  .getxattr = shmem_getxattr,
2638  .listxattr = shmem_listxattr,
2639  .removexattr = shmem_removexattr,
2640 #endif
2641 #ifdef CONFIG_TMPFS_POSIX_ACL
2642  .setattr = shmem_setattr,
2643 #endif
2644 };
2645 
2646 static const struct super_operations shmem_ops = {
2647  .alloc_inode = shmem_alloc_inode,
2648  .destroy_inode = shmem_destroy_inode,
2649 #ifdef CONFIG_TMPFS
2650  .statfs = shmem_statfs,
2651  .remount_fs = shmem_remount_fs,
2652  .show_options = shmem_show_options,
2653 #endif
2654  .evict_inode = shmem_evict_inode,
2655  .drop_inode = generic_delete_inode,
2656  .put_super = shmem_put_super,
2657 };
2658 
2659 static const struct vm_operations_struct shmem_vm_ops = {
2660  .fault = shmem_fault,
2661 #ifdef CONFIG_NUMA
2662  .set_policy = shmem_set_policy,
2663  .get_policy = shmem_get_policy,
2664 #endif
2665  .remap_pages = generic_file_remap_pages,
2666 };
2667 
2668 static struct dentry *shmem_mount(struct file_system_type *fs_type,
2669  int flags, const char *dev_name, void *data)
2670 {
2671  return mount_nodev(fs_type, flags, data, shmem_fill_super);
2672 }
2673 
2674 static struct file_system_type shmem_fs_type = {
2675  .owner = THIS_MODULE,
2676  .name = "tmpfs",
2677  .mount = shmem_mount,
2678  .kill_sb = kill_litter_super,
2679 };
2680 
2681 int __init shmem_init(void)
2682 {
2683  int error;
2684 
2685  error = bdi_init(&shmem_backing_dev_info);
2686  if (error)
2687  goto out4;
2688 
2689  error = shmem_init_inodecache();
2690  if (error)
2691  goto out3;
2692 
2693  error = register_filesystem(&shmem_fs_type);
2694  if (error) {
2695  printk(KERN_ERR "Could not register tmpfs\n");
2696  goto out2;
2697  }
2698 
2699  shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
2700  shmem_fs_type.name, NULL);
2701  if (IS_ERR(shm_mnt)) {
2702  error = PTR_ERR(shm_mnt);
2703  printk(KERN_ERR "Could not kern_mount tmpfs\n");
2704  goto out1;
2705  }
2706  return 0;
2707 
2708 out1:
2709  unregister_filesystem(&shmem_fs_type);
2710 out2:
2711  shmem_destroy_inodecache();
2712 out3:
2713  bdi_destroy(&shmem_backing_dev_info);
2714 out4:
2715  shm_mnt = ERR_PTR(error);
2716  return error;
2717 }
2718 
2719 #else /* !CONFIG_SHMEM */
2720 
2721 /*
2722  * tiny-shmem: simple shmemfs and tmpfs using ramfs code
2723  *
2724  * This is intended for small system where the benefits of the full
2725  * shmem code (swap-backed and resource-limited) are outweighed by
2726  * their complexity. On systems without swap this code should be
2727  * effectively equivalent, but much lighter weight.
2728  */
2729 
2730 #include <linux/ramfs.h>
2731 
2732 static struct file_system_type shmem_fs_type = {
2733  .name = "tmpfs",
2734  .mount = ramfs_mount,
2735  .kill_sb = kill_litter_super,
2736 };
2737 
2739 {
2740  BUG_ON(register_filesystem(&shmem_fs_type) != 0);
2741 
2742  shm_mnt = kern_mount(&shmem_fs_type);
2743  BUG_ON(IS_ERR(shm_mnt));
2744 
2745  return 0;
2746 }
2747 
2748 int shmem_unuse(swp_entry_t swap, struct page *page)
2749 {
2750  return 0;
2751 }
2752 
2753 int shmem_lock(struct file *file, int lock, struct user_struct *user)
2754 {
2755  return 0;
2756 }
2757 
2759 {
2760 }
2761 
2762 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
2763 {
2764  truncate_inode_pages_range(inode->i_mapping, lstart, lend);
2765 }
2767 
2768 #define shmem_vm_ops generic_file_vm_ops
2769 #define shmem_file_operations ramfs_file_operations
2770 #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
2771 #define shmem_acct_size(flags, size) 0
2772 #define shmem_unacct_size(flags, size) do {} while (0)
2773 
2774 #endif /* CONFIG_SHMEM */
2775 
2776 /* common code */
2777 
2784 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
2785 {
2786  int error;
2787  struct file *file;
2788  struct inode *inode;
2789  struct path path;
2790  struct dentry *root;
2791  struct qstr this;
2792 
2793  if (IS_ERR(shm_mnt))
2794  return (void *)shm_mnt;
2795 
2796  if (size < 0 || size > MAX_LFS_FILESIZE)
2797  return ERR_PTR(-EINVAL);
2798 
2799  if (shmem_acct_size(flags, size))
2800  return ERR_PTR(-ENOMEM);
2801 
2802  error = -ENOMEM;
2803  this.name = name;
2804  this.len = strlen(name);
2805  this.hash = 0; /* will go */
2806  root = shm_mnt->mnt_root;
2807  path.dentry = d_alloc(root, &this);
2808  if (!path.dentry)
2809  goto put_memory;
2810  path.mnt = mntget(shm_mnt);
2811 
2812  error = -ENOSPC;
2813  inode = shmem_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
2814  if (!inode)
2815  goto put_dentry;
2816 
2817  d_instantiate(path.dentry, inode);
2818  inode->i_size = size;
2819  clear_nlink(inode); /* It is unlinked */
2820 #ifndef CONFIG_MMU
2821  error = ramfs_nommu_expand_for_mapping(inode, size);
2822  if (error)
2823  goto put_dentry;
2824 #endif
2825 
2826  error = -ENFILE;
2827  file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
2828  &shmem_file_operations);
2829  if (!file)
2830  goto put_dentry;
2831 
2832  return file;
2833 
2834 put_dentry:
2835  path_put(&path);
2836 put_memory:
2837  shmem_unacct_size(flags, size);
2838  return ERR_PTR(error);
2839 }
2841 
2847 {
2848  struct file *file;
2849  loff_t size = vma->vm_end - vma->vm_start;
2850 
2851  file = shmem_file_setup("dev/zero", size, vma->vm_flags);
2852  if (IS_ERR(file))
2853  return PTR_ERR(file);
2854 
2855  if (vma->vm_file)
2856  fput(vma->vm_file);
2857  vma->vm_file = file;
2858  vma->vm_ops = &shmem_vm_ops;
2859  return 0;
2860 }
2861 
2877 struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
2878  pgoff_t index, gfp_t gfp)
2879 {
2880 #ifdef CONFIG_SHMEM
2881  struct inode *inode = mapping->host;
2882  struct page *page;
2883  int error;
2884 
2885  BUG_ON(mapping->a_ops != &shmem_aops);
2886  error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
2887  if (error)
2888  page = ERR_PTR(error);
2889  else
2890  unlock_page(page);
2891  return page;
2892 #else
2893  /*
2894  * The tiny !SHMEM case uses ramfs without swap
2895  */
2896  return read_cache_page_gfp(mapping, index, gfp);
2897 #endif
2898 }