Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
zsmalloc-main.c
Go to the documentation of this file.
1 /*
2  * zsmalloc memory allocator
3  *
4  * Copyright (C) 2011 Nitin Gupta
5  *
6  * This code is released using a dual license strategy: BSD/GPL
7  * You can choose the license that better fits your requirements.
8  *
9  * Released under the terms of 3-clause BSD License
10  * Released under the terms of GNU General Public License Version 2.0
11  */
12 
13 
14 /*
15  * This allocator is designed for use with zcache and zram. Thus, the
16  * allocator is supposed to work well under low memory conditions. In
17  * particular, it never attempts higher order page allocation which is
18  * very likely to fail under memory pressure. On the other hand, if we
19  * just use single (0-order) pages, it would suffer from very high
20  * fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy
21  * an entire page. This was one of the major issues with its predecessor
22  * (xvmalloc).
23  *
24  * To overcome these issues, zsmalloc allocates a bunch of 0-order pages
25  * and links them together using various 'struct page' fields. These linked
26  * pages act as a single higher-order page i.e. an object can span 0-order
27  * page boundaries. The code refers to these linked pages as a single entity
28  * called zspage.
29  *
30  * Following is how we use various fields and flags of underlying
31  * struct page(s) to form a zspage.
32  *
33  * Usage of struct page fields:
34  * page->first_page: points to the first component (0-order) page
35  * page->index (union with page->freelist): offset of the first object
36  * starting in this page. For the first page, this is
37  * always 0, so we use this field (aka freelist) to point
38  * to the first free object in zspage.
39  * page->lru: links together all component pages (except the first page)
40  * of a zspage
41  *
42  * For _first_ page only:
43  *
44  * page->private (union with page->first_page): refers to the
45  * component page after the first page
46  * page->freelist: points to the first free object in zspage.
47  * Free objects are linked together using in-place
48  * metadata.
49  * page->objects: maximum number of objects we can store in this
50  * zspage (class->zspage_order * PAGE_SIZE / class->size)
51  * page->lru: links together first pages of various zspages.
52  * Basically forming list of zspages in a fullness group.
53  * page->mapping: class index and fullness group of the zspage
54  *
55  * Usage of struct page flags:
56  * PG_private: identifies the first component page
57  * PG_private2: identifies the last component page
58  *
59  */
60 
61 #ifdef CONFIG_ZSMALLOC_DEBUG
62 #define DEBUG
63 #endif
64 
65 #include <linux/module.h>
66 #include <linux/kernel.h>
67 #include <linux/bitops.h>
68 #include <linux/errno.h>
69 #include <linux/highmem.h>
70 #include <linux/init.h>
71 #include <linux/string.h>
72 #include <linux/slab.h>
73 #include <asm/tlbflush.h>
74 #include <asm/pgtable.h>
75 #include <linux/cpumask.h>
76 #include <linux/cpu.h>
77 #include <linux/vmalloc.h>
78 #include <linux/hardirq.h>
79 #include <linux/spinlock.h>
80 #include <linux/types.h>
81 
82 #include "zsmalloc.h"
83 
84 /*
85  * This must be power of 2 and greater than of equal to sizeof(link_free).
86  * These two conditions ensure that any 'struct link_free' itself doesn't
87  * span more than 1 page which avoids complex case of mapping 2 pages simply
88  * to restore link_free pointer values.
89  */
90 #define ZS_ALIGN 8
91 
92 /*
93  * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
94  * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
95  */
96 #define ZS_MAX_ZSPAGE_ORDER 2
97 #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
98 
99 /*
100  * Object location (<PFN>, <obj_idx>) is encoded as
101  * as single (void *) handle value.
102  *
103  * Note that object index <obj_idx> is relative to system
104  * page <PFN> it is stored in, so for each sub-page belonging
105  * to a zspage, obj_idx starts with 0.
106  *
107  * This is made more complicated by various memory models and PAE.
108  */
109 
110 #ifndef MAX_PHYSMEM_BITS
111 #ifdef CONFIG_HIGHMEM64G
112 #define MAX_PHYSMEM_BITS 36
113 #else /* !CONFIG_HIGHMEM64G */
114 /*
115  * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
116  * be PAGE_SHIFT
117  */
118 #define MAX_PHYSMEM_BITS BITS_PER_LONG
119 #endif
120 #endif
121 #define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
122 #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
123 #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
124 
125 #define MAX(a, b) ((a) >= (b) ? (a) : (b))
126 /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
127 #define ZS_MIN_ALLOC_SIZE \
128  MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
129 #define ZS_MAX_ALLOC_SIZE PAGE_SIZE
130 
131 /*
132  * On systems with 4K page size, this gives 254 size classes! There is a
133  * trader-off here:
134  * - Large number of size classes is potentially wasteful as free page are
135  * spread across these classes
136  * - Small number of size classes causes large internal fragmentation
137  * - Probably its better to use specific size classes (empirically
138  * determined). NOTE: all those class sizes must be set as multiple of
139  * ZS_ALIGN to make sure link_free itself never has to span 2 pages.
140  *
141  * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
142  * (reason above)
143  */
144 #define ZS_SIZE_CLASS_DELTA 16
145 #define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
146  ZS_SIZE_CLASS_DELTA + 1)
147 
148 /*
149  * We do not maintain any list for completely empty or full pages
150  */
155 
158 };
159 
160 /*
161  * We assign a page to ZS_ALMOST_EMPTY fullness group when:
162  * n <= N / f, where
163  * n = number of allocated objects
164  * N = total number of objects zspage can store
165  * f = 1/fullness_threshold_frac
166  *
167  * Similarly, we assign zspage to:
168  * ZS_ALMOST_FULL when n > N / f
169  * ZS_EMPTY when n == 0
170  * ZS_FULL when n == N
171  *
172  * (see: fix_fullness_group())
173  */
174 static const int fullness_threshold_frac = 4;
175 
176 struct size_class {
177  /*
178  * Size of objects stored in this class. Must be multiple
179  * of ZS_ALIGN.
180  */
181  int size;
182  unsigned int index;
183 
184  /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
186 
188 
189  /* stats */
191 
193 };
194 
195 /*
196  * Placed within free objects to form a singly linked list.
197  * For every zspage, first_page->freelist gives head of this list.
198  *
199  * This must be power of 2 and less than or equal to ZS_ALIGN
200  */
201 struct link_free {
202  /* Handle of next free chunk (encodes <PFN, obj_idx>) */
203  void *next;
204 };
205 
206 struct zs_pool {
208 
209  gfp_t flags; /* allocation flags used when growing pool */
210  const char *name;
211 };
212 
213 /*
214  * A zspage's class index and fullness group
215  * are encoded in its (first)page->mapping
216  */
217 #define CLASS_IDX_BITS 28
218 #define FULLNESS_BITS 4
219 #define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)
220 #define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)
221 
222 /*
223  * By default, zsmalloc uses a copy-based object mapping method to access
224  * allocations that span two pages. However, if a particular architecture
225  * 1) Implements local_flush_tlb_kernel_range() and 2) Performs VM mapping
226  * faster than copying, then it should be added here so that
227  * USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use page table
228  * mapping rather than copying
229  * for object mapping.
230 */
231 #if defined(CONFIG_ARM)
232 #define USE_PGTABLE_MAPPING
233 #endif
234 
235 struct mapping_area {
236 #ifdef USE_PGTABLE_MAPPING
237  struct vm_struct *vm; /* vm area for mapping object that span pages */
238 #else
239  char *vm_buf; /* copy buffer for objects that span pages */
240 #endif
241  char *vm_addr; /* address of kmap_atomic()'ed pages */
242  enum zs_mapmode vm_mm; /* mapping mode */
243 };
244 
245 
246 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
247 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
248 
249 static int is_first_page(struct page *page)
250 {
251  return PagePrivate(page);
252 }
253 
254 static int is_last_page(struct page *page)
255 {
256  return PagePrivate2(page);
257 }
258 
259 static void get_zspage_mapping(struct page *page, unsigned int *class_idx,
260  enum fullness_group *fullness)
261 {
262  unsigned long m;
263  BUG_ON(!is_first_page(page));
264 
265  m = (unsigned long)page->mapping;
266  *fullness = m & FULLNESS_MASK;
267  *class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK;
268 }
269 
270 static void set_zspage_mapping(struct page *page, unsigned int class_idx,
271  enum fullness_group fullness)
272 {
273  unsigned long m;
274  BUG_ON(!is_first_page(page));
275 
276  m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) |
277  (fullness & FULLNESS_MASK);
278  page->mapping = (struct address_space *)m;
279 }
280 
281 static int get_size_class_index(int size)
282 {
283  int idx = 0;
284 
285  if (likely(size > ZS_MIN_ALLOC_SIZE))
286  idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
288 
289  return idx;
290 }
291 
292 static enum fullness_group get_fullness_group(struct page *page)
293 {
294  int inuse, max_objects;
295  enum fullness_group fg;
296  BUG_ON(!is_first_page(page));
297 
298  inuse = page->inuse;
299  max_objects = page->objects;
300 
301  if (inuse == 0)
302  fg = ZS_EMPTY;
303  else if (inuse == max_objects)
304  fg = ZS_FULL;
305  else if (inuse <= max_objects / fullness_threshold_frac)
306  fg = ZS_ALMOST_EMPTY;
307  else
308  fg = ZS_ALMOST_FULL;
309 
310  return fg;
311 }
312 
313 static void insert_zspage(struct page *page, struct size_class *class,
314  enum fullness_group fullness)
315 {
316  struct page **head;
317 
318  BUG_ON(!is_first_page(page));
319 
320  if (fullness >= _ZS_NR_FULLNESS_GROUPS)
321  return;
322 
323  head = &class->fullness_list[fullness];
324  if (*head)
325  list_add_tail(&page->lru, &(*head)->lru);
326 
327  *head = page;
328 }
329 
330 static void remove_zspage(struct page *page, struct size_class *class,
331  enum fullness_group fullness)
332 {
333  struct page **head;
334 
335  BUG_ON(!is_first_page(page));
336 
337  if (fullness >= _ZS_NR_FULLNESS_GROUPS)
338  return;
339 
340  head = &class->fullness_list[fullness];
341  BUG_ON(!*head);
342  if (list_empty(&(*head)->lru))
343  *head = NULL;
344  else if (*head == page)
345  *head = (struct page *)list_entry((*head)->lru.next,
346  struct page, lru);
347 
348  list_del_init(&page->lru);
349 }
350 
351 static enum fullness_group fix_fullness_group(struct zs_pool *pool,
352  struct page *page)
353 {
354  int class_idx;
355  struct size_class *class;
356  enum fullness_group currfg, newfg;
357 
358  BUG_ON(!is_first_page(page));
359 
360  get_zspage_mapping(page, &class_idx, &currfg);
361  newfg = get_fullness_group(page);
362  if (newfg == currfg)
363  goto out;
364 
365  class = &pool->size_class[class_idx];
366  remove_zspage(page, class, currfg);
367  insert_zspage(page, class, newfg);
368  set_zspage_mapping(page, class_idx, newfg);
369 
370 out:
371  return newfg;
372 }
373 
374 /*
375  * We have to decide on how many pages to link together
376  * to form a zspage for each size class. This is important
377  * to reduce wastage due to unusable space left at end of
378  * each zspage which is given as:
379  * wastage = Zp - Zp % size_class
380  * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
381  *
382  * For example, for size class of 3/8 * PAGE_SIZE, we should
383  * link together 3 PAGE_SIZE sized pages to form a zspage
384  * since then we can perfectly fit in 8 such objects.
385  */
386 static int get_pages_per_zspage(int class_size)
387 {
388  int i, max_usedpc = 0;
389  /* zspage order which gives maximum used size per KB */
390  int max_usedpc_order = 1;
391 
392  for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {
393  int zspage_size;
394  int waste, usedpc;
395 
396  zspage_size = i * PAGE_SIZE;
397  waste = zspage_size % class_size;
398  usedpc = (zspage_size - waste) * 100 / zspage_size;
399 
400  if (usedpc > max_usedpc) {
401  max_usedpc = usedpc;
402  max_usedpc_order = i;
403  }
404  }
405 
406  return max_usedpc_order;
407 }
408 
409 /*
410  * A single 'zspage' is composed of many system pages which are
411  * linked together using fields in struct page. This function finds
412  * the first/head page, given any component page of a zspage.
413  */
414 static struct page *get_first_page(struct page *page)
415 {
416  if (is_first_page(page))
417  return page;
418  else
419  return page->first_page;
420 }
421 
422 static struct page *get_next_page(struct page *page)
423 {
424  struct page *next;
425 
426  if (is_last_page(page))
427  next = NULL;
428  else if (is_first_page(page))
429  next = (struct page *)page->private;
430  else
431  next = list_entry(page->lru.next, struct page, lru);
432 
433  return next;
434 }
435 
436 /* Encode <page, obj_idx> as a single handle value */
437 static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
438 {
439  unsigned long handle;
440 
441  if (!page) {
442  BUG_ON(obj_idx);
443  return NULL;
444  }
445 
446  handle = page_to_pfn(page) << OBJ_INDEX_BITS;
447  handle |= (obj_idx & OBJ_INDEX_MASK);
448 
449  return (void *)handle;
450 }
451 
452 /* Decode <page, obj_idx> pair from the given object handle */
453 static void obj_handle_to_location(unsigned long handle, struct page **page,
454  unsigned long *obj_idx)
455 {
456  *page = pfn_to_page(handle >> OBJ_INDEX_BITS);
457  *obj_idx = handle & OBJ_INDEX_MASK;
458 }
459 
460 static unsigned long obj_idx_to_offset(struct page *page,
461  unsigned long obj_idx, int class_size)
462 {
463  unsigned long off = 0;
464 
465  if (!is_first_page(page))
466  off = page->index;
467 
468  return off + obj_idx * class_size;
469 }
470 
471 static void reset_page(struct page *page)
472 {
473  clear_bit(PG_private, &page->flags);
474  clear_bit(PG_private_2, &page->flags);
475  set_page_private(page, 0);
476  page->mapping = NULL;
477  page->freelist = NULL;
478  reset_page_mapcount(page);
479 }
480 
481 static void free_zspage(struct page *first_page)
482 {
483  struct page *nextp, *tmp, *head_extra;
484 
485  BUG_ON(!is_first_page(first_page));
486  BUG_ON(first_page->inuse);
487 
488  head_extra = (struct page *)page_private(first_page);
489 
490  reset_page(first_page);
491  __free_page(first_page);
492 
493  /* zspage with only 1 system page */
494  if (!head_extra)
495  return;
496 
497  list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) {
498  list_del(&nextp->lru);
499  reset_page(nextp);
500  __free_page(nextp);
501  }
502  reset_page(head_extra);
503  __free_page(head_extra);
504 }
505 
506 /* Initialize a newly allocated zspage */
507 static void init_zspage(struct page *first_page, struct size_class *class)
508 {
509  unsigned long off = 0;
510  struct page *page = first_page;
511 
512  BUG_ON(!is_first_page(first_page));
513  while (page) {
514  struct page *next_page;
515  struct link_free *link;
516  unsigned int i, objs_on_page;
517 
518  /*
519  * page->index stores offset of first object starting
520  * in the page. For the first page, this is always 0,
521  * so we use first_page->index (aka ->freelist) to store
522  * head of corresponding zspage's freelist.
523  */
524  if (page != first_page)
525  page->index = off;
526 
527  link = (struct link_free *)kmap_atomic(page) +
528  off / sizeof(*link);
529  objs_on_page = (PAGE_SIZE - off) / class->size;
530 
531  for (i = 1; i <= objs_on_page; i++) {
532  off += class->size;
533  if (off < PAGE_SIZE) {
534  link->next = obj_location_to_handle(page, i);
535  link += class->size / sizeof(*link);
536  }
537  }
538 
539  /*
540  * We now come to the last (full or partial) object on this
541  * page, which must point to the first object on the next
542  * page (if present)
543  */
544  next_page = get_next_page(page);
545  link->next = obj_location_to_handle(next_page, 0);
546  kunmap_atomic(link);
547  page = next_page;
548  off = (off + class->size) % PAGE_SIZE;
549  }
550 }
551 
552 /*
553  * Allocate a zspage for the given size class
554  */
555 static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
556 {
557  int i, error;
558  struct page *first_page = NULL, *uninitialized_var(prev_page);
559 
560  /*
561  * Allocate individual pages and link them together as:
562  * 1. first page->private = first sub-page
563  * 2. all sub-pages are linked together using page->lru
564  * 3. each sub-page is linked to the first page using page->first_page
565  *
566  * For each size class, First/Head pages are linked together using
567  * page->lru. Also, we set PG_private to identify the first page
568  * (i.e. no other sub-page has this flag set) and PG_private_2 to
569  * identify the last page.
570  */
571  error = -ENOMEM;
572  for (i = 0; i < class->pages_per_zspage; i++) {
573  struct page *page;
574 
575  page = alloc_page(flags);
576  if (!page)
577  goto cleanup;
578 
579  INIT_LIST_HEAD(&page->lru);
580  if (i == 0) { /* first page */
581  SetPagePrivate(page);
582  set_page_private(page, 0);
583  first_page = page;
584  first_page->inuse = 0;
585  }
586  if (i == 1)
587  first_page->private = (unsigned long)page;
588  if (i >= 1)
589  page->first_page = first_page;
590  if (i >= 2)
591  list_add(&page->lru, &prev_page->lru);
592  if (i == class->pages_per_zspage - 1) /* last page */
593  SetPagePrivate2(page);
594  prev_page = page;
595  }
596 
597  init_zspage(first_page, class);
598 
599  first_page->freelist = obj_location_to_handle(first_page, 0);
600  /* Maximum number of objects we can store in this zspage */
601  first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
602 
603  error = 0; /* Success */
604 
605 cleanup:
606  if (unlikely(error) && first_page) {
607  free_zspage(first_page);
608  first_page = NULL;
609  }
610 
611  return first_page;
612 }
613 
614 static struct page *find_get_zspage(struct size_class *class)
615 {
616  int i;
617  struct page *page;
618 
619  for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
620  page = class->fullness_list[i];
621  if (page)
622  break;
623  }
624 
625  return page;
626 }
627 
628 #ifdef USE_PGTABLE_MAPPING
629 static inline int __zs_cpu_up(struct mapping_area *area)
630 {
631  /*
632  * Make sure we don't leak memory if a cpu UP notification
633  * and zs_init() race and both call zs_cpu_up() on the same cpu
634  */
635  if (area->vm)
636  return 0;
637  area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
638  if (!area->vm)
639  return -ENOMEM;
640  return 0;
641 }
642 
643 static inline void __zs_cpu_down(struct mapping_area *area)
644 {
645  if (area->vm)
646  free_vm_area(area->vm);
647  area->vm = NULL;
648 }
649 
650 static inline void *__zs_map_object(struct mapping_area *area,
651  struct page *pages[2], int off, int size)
652 {
653  BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
654  area->vm_addr = area->vm->addr;
655  return area->vm_addr + off;
656 }
657 
658 static inline void __zs_unmap_object(struct mapping_area *area,
659  struct page *pages[2], int off, int size)
660 {
661  unsigned long addr = (unsigned long)area->vm_addr;
662  unsigned long end = addr + (PAGE_SIZE * 2);
663 
664  flush_cache_vunmap(addr, end);
667 }
668 
669 #else /* USE_PGTABLE_MAPPING */
670 
671 static inline int __zs_cpu_up(struct mapping_area *area)
672 {
673  /*
674  * Make sure we don't leak memory if a cpu UP notification
675  * and zs_init() race and both call zs_cpu_up() on the same cpu
676  */
677  if (area->vm_buf)
678  return 0;
679  area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
680  if (!area->vm_buf)
681  return -ENOMEM;
682  return 0;
683 }
684 
685 static inline void __zs_cpu_down(struct mapping_area *area)
686 {
687  if (area->vm_buf)
688  free_page((unsigned long)area->vm_buf);
689  area->vm_buf = NULL;
690 }
691 
692 static void *__zs_map_object(struct mapping_area *area,
693  struct page *pages[2], int off, int size)
694 {
695  int sizes[2];
696  void *addr;
697  char *buf = area->vm_buf;
698 
699  /* disable page faults to match kmap_atomic() return conditions */
700  pagefault_disable();
701 
702  /* no read fastpath */
703  if (area->vm_mm == ZS_MM_WO)
704  goto out;
705 
706  sizes[0] = PAGE_SIZE - off;
707  sizes[1] = size - sizes[0];
708 
709  /* copy object to per-cpu buffer */
710  addr = kmap_atomic(pages[0]);
711  memcpy(buf, addr + off, sizes[0]);
712  kunmap_atomic(addr);
713  addr = kmap_atomic(pages[1]);
714  memcpy(buf + sizes[0], addr, sizes[1]);
715  kunmap_atomic(addr);
716 out:
717  return area->vm_buf;
718 }
719 
720 static void __zs_unmap_object(struct mapping_area *area,
721  struct page *pages[2], int off, int size)
722 {
723  int sizes[2];
724  void *addr;
725  char *buf = area->vm_buf;
726 
727  /* no write fastpath */
728  if (area->vm_mm == ZS_MM_RO)
729  goto out;
730 
731  sizes[0] = PAGE_SIZE - off;
732  sizes[1] = size - sizes[0];
733 
734  /* copy per-cpu buffer to object */
735  addr = kmap_atomic(pages[0]);
736  memcpy(addr + off, buf, sizes[0]);
737  kunmap_atomic(addr);
738  addr = kmap_atomic(pages[1]);
739  memcpy(addr, buf + sizes[0], sizes[1]);
740  kunmap_atomic(addr);
741 
742 out:
743  /* enable page faults to match kunmap_atomic() return conditions */
744  pagefault_enable();
745 }
746 
747 #endif /* USE_PGTABLE_MAPPING */
748 
749 static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
750  void *pcpu)
751 {
752  int ret, cpu = (long)pcpu;
753  struct mapping_area *area;
754 
755  switch (action) {
756  case CPU_UP_PREPARE:
757  area = &per_cpu(zs_map_area, cpu);
758  ret = __zs_cpu_up(area);
759  if (ret)
760  return notifier_from_errno(ret);
761  break;
762  case CPU_DEAD:
763  case CPU_UP_CANCELED:
764  area = &per_cpu(zs_map_area, cpu);
765  __zs_cpu_down(area);
766  break;
767  }
768 
769  return NOTIFY_OK;
770 }
771 
772 static struct notifier_block zs_cpu_nb = {
773  .notifier_call = zs_cpu_notifier
774 };
775 
776 static void zs_exit(void)
777 {
778  int cpu;
779 
781  zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
782  unregister_cpu_notifier(&zs_cpu_nb);
783 }
784 
785 static int zs_init(void)
786 {
787  int cpu, ret;
788 
789  register_cpu_notifier(&zs_cpu_nb);
790  for_each_online_cpu(cpu) {
791  ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
792  if (notifier_to_errno(ret))
793  goto fail;
794  }
795  return 0;
796 fail:
797  zs_exit();
798  return notifier_to_errno(ret);
799 }
800 
801 struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
802 {
803  int i, ovhd_size;
804  struct zs_pool *pool;
805 
806  if (!name)
807  return NULL;
808 
809  ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
810  pool = kzalloc(ovhd_size, GFP_KERNEL);
811  if (!pool)
812  return NULL;
813 
814  for (i = 0; i < ZS_SIZE_CLASSES; i++) {
815  int size;
816  struct size_class *class;
817 
819  if (size > ZS_MAX_ALLOC_SIZE)
820  size = ZS_MAX_ALLOC_SIZE;
821 
822  class = &pool->size_class[i];
823  class->size = size;
824  class->index = i;
825  spin_lock_init(&class->lock);
826  class->pages_per_zspage = get_pages_per_zspage(size);
827 
828  }
829 
830  pool->flags = flags;
831  pool->name = name;
832 
833  return pool;
834 }
836 
837 void zs_destroy_pool(struct zs_pool *pool)
838 {
839  int i;
840 
841  for (i = 0; i < ZS_SIZE_CLASSES; i++) {
842  int fg;
843  struct size_class *class = &pool->size_class[i];
844 
845  for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
846  if (class->fullness_list[fg]) {
847  pr_info("Freeing non-empty class with size "
848  "%db, fullness group %d\n",
849  class->size, fg);
850  }
851  }
852  }
853  kfree(pool);
854 }
856 
866 unsigned long zs_malloc(struct zs_pool *pool, size_t size)
867 {
868  unsigned long obj;
869  struct link_free *link;
870  int class_idx;
871  struct size_class *class;
872 
873  struct page *first_page, *m_page;
874  unsigned long m_objidx, m_offset;
875 
876  if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
877  return 0;
878 
879  class_idx = get_size_class_index(size);
880  class = &pool->size_class[class_idx];
881  BUG_ON(class_idx != class->index);
882 
883  spin_lock(&class->lock);
884  first_page = find_get_zspage(class);
885 
886  if (!first_page) {
887  spin_unlock(&class->lock);
888  first_page = alloc_zspage(class, pool->flags);
889  if (unlikely(!first_page))
890  return 0;
891 
892  set_zspage_mapping(first_page, class->index, ZS_EMPTY);
893  spin_lock(&class->lock);
894  class->pages_allocated += class->pages_per_zspage;
895  }
896 
897  obj = (unsigned long)first_page->freelist;
898  obj_handle_to_location(obj, &m_page, &m_objidx);
899  m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
900 
901  link = (struct link_free *)kmap_atomic(m_page) +
902  m_offset / sizeof(*link);
903  first_page->freelist = link->next;
904  memset(link, POISON_INUSE, sizeof(*link));
905  kunmap_atomic(link);
906 
907  first_page->inuse++;
908  /* Now move the zspage to another fullness group, if required */
909  fix_fullness_group(pool, first_page);
910  spin_unlock(&class->lock);
911 
912  return obj;
913 }
915 
916 void zs_free(struct zs_pool *pool, unsigned long obj)
917 {
918  struct link_free *link;
919  struct page *first_page, *f_page;
920  unsigned long f_objidx, f_offset;
921 
922  int class_idx;
923  struct size_class *class;
924  enum fullness_group fullness;
925 
926  if (unlikely(!obj))
927  return;
928 
929  obj_handle_to_location(obj, &f_page, &f_objidx);
930  first_page = get_first_page(f_page);
931 
932  get_zspage_mapping(first_page, &class_idx, &fullness);
933  class = &pool->size_class[class_idx];
934  f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
935 
936  spin_lock(&class->lock);
937 
938  /* Insert this object in containing zspage's freelist */
939  link = (struct link_free *)((unsigned char *)kmap_atomic(f_page)
940  + f_offset);
941  link->next = first_page->freelist;
942  kunmap_atomic(link);
943  first_page->freelist = (void *)obj;
944 
945  first_page->inuse--;
946  fullness = fix_fullness_group(pool, first_page);
947 
948  if (fullness == ZS_EMPTY)
949  class->pages_allocated -= class->pages_per_zspage;
950 
951  spin_unlock(&class->lock);
952 
953  if (fullness == ZS_EMPTY)
954  free_zspage(first_page);
955 }
957 
972 void *zs_map_object(struct zs_pool *pool, unsigned long handle,
973  enum zs_mapmode mm)
974 {
975  struct page *page;
976  unsigned long obj_idx, off;
977 
978  unsigned int class_idx;
979  enum fullness_group fg;
980  struct size_class *class;
981  struct mapping_area *area;
982  struct page *pages[2];
983 
984  BUG_ON(!handle);
985 
986  /*
987  * Because we use per-cpu mapping areas shared among the
988  * pools/users, we can't allow mapping in interrupt context
989  * because it can corrupt another users mappings.
990  */
991  BUG_ON(in_interrupt());
992 
993  obj_handle_to_location(handle, &page, &obj_idx);
994  get_zspage_mapping(get_first_page(page), &class_idx, &fg);
995  class = &pool->size_class[class_idx];
996  off = obj_idx_to_offset(page, obj_idx, class->size);
997 
998  area = &get_cpu_var(zs_map_area);
999  area->vm_mm = mm;
1000  if (off + class->size <= PAGE_SIZE) {
1001  /* this object is contained entirely within a page */
1002  area->vm_addr = kmap_atomic(page);
1003  return area->vm_addr + off;
1004  }
1005 
1006  /* this object spans two pages */
1007  pages[0] = page;
1008  pages[1] = get_next_page(page);
1009  BUG_ON(!pages[1]);
1010 
1011  return __zs_map_object(area, pages, off, class->size);
1012 }
1014 
1015 void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1016 {
1017  struct page *page;
1018  unsigned long obj_idx, off;
1019 
1020  unsigned int class_idx;
1021  enum fullness_group fg;
1022  struct size_class *class;
1023  struct mapping_area *area;
1024 
1025  BUG_ON(!handle);
1026 
1027  obj_handle_to_location(handle, &page, &obj_idx);
1028  get_zspage_mapping(get_first_page(page), &class_idx, &fg);
1029  class = &pool->size_class[class_idx];
1030  off = obj_idx_to_offset(page, obj_idx, class->size);
1031 
1032  area = &__get_cpu_var(zs_map_area);
1033  if (off + class->size <= PAGE_SIZE)
1034  kunmap_atomic(area->vm_addr);
1035  else {
1036  struct page *pages[2];
1037 
1038  pages[0] = page;
1039  pages[1] = get_next_page(page);
1040  BUG_ON(!pages[1]);
1041 
1042  __zs_unmap_object(area, pages, off, class->size);
1043  }
1044  put_cpu_var(zs_map_area);
1045 }
1047 
1049 {
1050  int i;
1051  u64 npages = 0;
1052 
1053  for (i = 0; i < ZS_SIZE_CLASSES; i++)
1054  npages += pool->size_class[i].pages_allocated;
1055 
1056  return npages << PAGE_SHIFT;
1057 }
1059 
1060 module_init(zs_init);
1061 module_exit(zs_exit);
1062 
1063 MODULE_LICENSE("Dual BSD/GPL");
1064 MODULE_AUTHOR("Nitin Gupta <[email protected]>");