Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
zcache-main.c
Go to the documentation of this file.
1 /*
2  * zcache.c
3  *
4  * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp.
5  * Copyright (c) 2010,2011, Nitin Gupta
6  *
7  * Zcache provides an in-kernel "host implementation" for transcendent memory
8  * and, thus indirectly, for cleancache and frontswap. Zcache includes two
9  * page-accessible memory [1] interfaces, both utilizing the crypto compression
10  * API:
11  * 1) "compression buddies" ("zbud") is used for ephemeral pages
12  * 2) zsmalloc is used for persistent pages.
13  * Xvmalloc (based on the TLSF allocator) has very low fragmentation
14  * so maximizes space efficiency, while zbud allows pairs (and potentially,
15  * in the future, more than a pair of) compressed pages to be closely linked
16  * so that reclaiming can be done via the kernel's physical-page-oriented
17  * "shrinker" interface.
18  *
19  * [1] For a definition of page-accessible memory (aka PAM), see:
20  * http://marc.info/?l=linux-mm&m=127811271605009
21  */
22 
23 #include <linux/module.h>
24 #include <linux/cpu.h>
25 #include <linux/highmem.h>
26 #include <linux/list.h>
27 #include <linux/slab.h>
28 #include <linux/spinlock.h>
29 #include <linux/types.h>
30 #include <linux/atomic.h>
31 #include <linux/math64.h>
32 #include <linux/crypto.h>
33 #include <linux/string.h>
34 #include <linux/idr.h>
35 #include "tmem.h"
36 
37 #include "../zsmalloc/zsmalloc.h"
38 
39 #ifdef CONFIG_CLEANCACHE
40 #include <linux/cleancache.h>
41 #endif
42 #ifdef CONFIG_FRONTSWAP
43 #include <linux/frontswap.h>
44 #endif
45 
46 #if 0
47 /* this is more aggressive but may cause other problems? */
48 #define ZCACHE_GFP_MASK (GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN)
49 #else
50 #define ZCACHE_GFP_MASK \
51  (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC)
52 #endif
53 
54 #define MAX_CLIENTS 16
55 #define LOCAL_CLIENT ((uint16_t)-1)
56 
57 MODULE_LICENSE("GPL");
58 
59 struct zcache_client {
60  struct idr tmem_pools;
61  struct zs_pool *zspool;
62  bool allocated;
64 };
65 
66 static struct zcache_client zcache_host;
67 static struct zcache_client zcache_clients[MAX_CLIENTS];
68 
69 static inline uint16_t get_client_id_from_client(struct zcache_client *cli)
70 {
71  BUG_ON(cli == NULL);
72  if (cli == &zcache_host)
73  return LOCAL_CLIENT;
74  return cli - &zcache_clients[0];
75 }
76 
77 static struct zcache_client *get_zcache_client(uint16_t cli_id)
78 {
79  if (cli_id == LOCAL_CLIENT)
80  return &zcache_host;
81 
82  if ((unsigned int)cli_id < MAX_CLIENTS)
83  return &zcache_clients[cli_id];
84 
85  return NULL;
86 }
87 
88 static inline bool is_local_client(struct zcache_client *cli)
89 {
90  return cli == &zcache_host;
91 }
92 
93 /* crypto API for zcache */
94 #define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME
95 static char zcache_comp_name[ZCACHE_COMP_NAME_SZ];
96 static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms;
97 
98 enum comp_op {
101 };
102 
103 static inline int zcache_comp_op(enum comp_op op,
104  const u8 *src, unsigned int slen,
105  u8 *dst, unsigned int *dlen)
106 {
107  struct crypto_comp *tfm;
108  int ret;
109 
110  BUG_ON(!zcache_comp_pcpu_tfms);
111  tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
112  BUG_ON(!tfm);
113  switch (op) {
115  ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
116  break;
118  ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
119  break;
120  default:
121  ret = -EINVAL;
122  }
123  put_cpu();
124  return ret;
125 }
126 
127 /**********
128  * Compression buddies ("zbud") provides for packing two (or, possibly
129  * in the future, more) compressed ephemeral pages into a single "raw"
130  * (physical) page and tracking them with data structures so that
131  * the raw pages can be easily reclaimed.
132  *
133  * A zbud page ("zbpg") is an aligned page containing a list_head,
134  * a lock, and two "zbud headers". The remainder of the physical
135  * page is divided up into aligned 64-byte "chunks" which contain
136  * the compressed data for zero, one, or two zbuds. Each zbpg
137  * resides on: (1) an "unused list" if it has no zbuds; (2) a
138  * "buddied" list if it is fully populated with two zbuds; or
139  * (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks
140  * the one unbuddied zbud uses. The data inside a zbpg cannot be
141  * read or written unless the zbpg's lock is held.
142  */
143 
144 #define ZBH_SENTINEL 0x43214321
145 #define ZBPG_SENTINEL 0xdeadbeef
146 
147 #define ZBUD_MAX_BUDS 2
148 
149 struct zbud_hdr {
152  struct tmem_oid oid;
154  uint16_t size; /* compressed size in bytes, zero means unused */
156 };
157 
158 struct zbud_page {
163  /* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */
164 };
165 
166 #define CHUNK_SHIFT 6
167 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
168 #define CHUNK_MASK (~(CHUNK_SIZE-1))
169 #define NCHUNKS (((PAGE_SIZE - sizeof(struct zbud_page)) & \
170  CHUNK_MASK) >> CHUNK_SHIFT)
171 #define MAX_CHUNK (NCHUNKS-1)
172 
173 static struct {
174  struct list_head list;
175  unsigned count;
177 /* list N contains pages with N chunks USED and NCHUNKS-N unused */
178 /* element 0 is never used but optimizing that isn't worth it */
179 static unsigned long zbud_cumul_chunk_counts[NCHUNKS];
180 
182 static unsigned long zcache_zbud_buddied_count;
183 
184 /* protects the buddied list and all unbuddied lists */
185 static DEFINE_SPINLOCK(zbud_budlists_spinlock);
186 
187 static LIST_HEAD(zbpg_unused_list);
188 static unsigned long zcache_zbpg_unused_list_count;
189 
190 /* protects the unused page list */
191 static DEFINE_SPINLOCK(zbpg_unused_list_spinlock);
192 
193 static atomic_t zcache_zbud_curr_raw_pages;
194 static atomic_t zcache_zbud_curr_zpages;
195 static unsigned long zcache_zbud_curr_zbytes;
196 static unsigned long zcache_zbud_cumul_zpages;
197 static unsigned long zcache_zbud_cumul_zbytes;
198 static unsigned long zcache_compress_poor;
199 static unsigned long zcache_mean_compress_poor;
200 
201 /* forward references */
202 static void *zcache_get_free_page(void);
203 static void zcache_free_page(void *p);
204 
205 /*
206  * zbud helper functions
207  */
208 
209 static inline unsigned zbud_max_buddy_size(void)
210 {
211  return MAX_CHUNK << CHUNK_SHIFT;
212 }
213 
214 static inline unsigned zbud_size_to_chunks(unsigned size)
215 {
216  BUG_ON(size == 0 || size > zbud_max_buddy_size());
217  return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
218 }
219 
220 static inline int zbud_budnum(struct zbud_hdr *zh)
221 {
222  unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1);
223  struct zbud_page *zbpg = NULL;
224  unsigned budnum = -1U;
225  int i;
226 
227  for (i = 0; i < ZBUD_MAX_BUDS; i++)
228  if (offset == offsetof(typeof(*zbpg), buddy[i])) {
229  budnum = i;
230  break;
231  }
232  BUG_ON(budnum == -1U);
233  return budnum;
234 }
235 
236 static char *zbud_data(struct zbud_hdr *zh, unsigned size)
237 {
238  struct zbud_page *zbpg;
239  char *p;
240  unsigned budnum;
241 
242  ASSERT_SENTINEL(zh, ZBH);
243  budnum = zbud_budnum(zh);
244  BUG_ON(size == 0 || size > zbud_max_buddy_size());
245  zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
246  ASSERT_SPINLOCK(&zbpg->lock);
247  p = (char *)zbpg;
248  if (budnum == 0)
249  p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) &
250  CHUNK_MASK);
251  else if (budnum == 1)
252  p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK);
253  return p;
254 }
255 
256 /*
257  * zbud raw page management
258  */
259 
260 static struct zbud_page *zbud_alloc_raw_page(void)
261 {
262  struct zbud_page *zbpg = NULL;
263  struct zbud_hdr *zh0, *zh1;
264  bool recycled = 0;
265 
266  /* if any pages on the zbpg list, use one */
267  spin_lock(&zbpg_unused_list_spinlock);
268  if (!list_empty(&zbpg_unused_list)) {
269  zbpg = list_first_entry(&zbpg_unused_list,
270  struct zbud_page, bud_list);
271  list_del_init(&zbpg->bud_list);
272  zcache_zbpg_unused_list_count--;
273  recycled = 1;
274  }
275  spin_unlock(&zbpg_unused_list_spinlock);
276  if (zbpg == NULL)
277  /* none on zbpg list, try to get a kernel page */
278  zbpg = zcache_get_free_page();
279  if (likely(zbpg != NULL)) {
280  INIT_LIST_HEAD(&zbpg->bud_list);
281  zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1];
282  spin_lock_init(&zbpg->lock);
283  if (recycled) {
284  ASSERT_INVERTED_SENTINEL(zbpg, ZBPG);
285  SET_SENTINEL(zbpg, ZBPG);
286  BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid));
287  BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid));
288  } else {
289  atomic_inc(&zcache_zbud_curr_raw_pages);
290  INIT_LIST_HEAD(&zbpg->bud_list);
291  SET_SENTINEL(zbpg, ZBPG);
292  zh0->size = 0; zh1->size = 0;
293  tmem_oid_set_invalid(&zh0->oid);
294  tmem_oid_set_invalid(&zh1->oid);
295  }
296  }
297  return zbpg;
298 }
299 
300 static void zbud_free_raw_page(struct zbud_page *zbpg)
301 {
302  struct zbud_hdr *zh0 = &zbpg->buddy[0], *zh1 = &zbpg->buddy[1];
303 
304  ASSERT_SENTINEL(zbpg, ZBPG);
305  BUG_ON(!list_empty(&zbpg->bud_list));
306  ASSERT_SPINLOCK(&zbpg->lock);
307  BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid));
308  BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid));
309  INVERT_SENTINEL(zbpg, ZBPG);
310  spin_unlock(&zbpg->lock);
311  spin_lock(&zbpg_unused_list_spinlock);
312  list_add(&zbpg->bud_list, &zbpg_unused_list);
313  zcache_zbpg_unused_list_count++;
314  spin_unlock(&zbpg_unused_list_spinlock);
315 }
316 
317 /*
318  * core zbud handling routines
319  */
320 
321 static unsigned zbud_free(struct zbud_hdr *zh)
322 {
323  unsigned size;
324 
325  ASSERT_SENTINEL(zh, ZBH);
326  BUG_ON(!tmem_oid_valid(&zh->oid));
327  size = zh->size;
328  BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size());
329  zh->size = 0;
330  tmem_oid_set_invalid(&zh->oid);
331  INVERT_SENTINEL(zh, ZBH);
332  zcache_zbud_curr_zbytes -= size;
333  atomic_dec(&zcache_zbud_curr_zpages);
334  return size;
335 }
336 
337 static void zbud_free_and_delist(struct zbud_hdr *zh)
338 {
339  unsigned chunks;
340  struct zbud_hdr *zh_other;
341  unsigned budnum = zbud_budnum(zh), size;
342  struct zbud_page *zbpg =
343  container_of(zh, struct zbud_page, buddy[budnum]);
344 
345  spin_lock(&zbud_budlists_spinlock);
346  spin_lock(&zbpg->lock);
347  if (list_empty(&zbpg->bud_list)) {
348  /* ignore zombie page... see zbud_evict_pages() */
349  spin_unlock(&zbpg->lock);
350  spin_unlock(&zbud_budlists_spinlock);
351  return;
352  }
353  size = zbud_free(zh);
354  ASSERT_SPINLOCK(&zbpg->lock);
355  zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0];
356  if (zh_other->size == 0) { /* was unbuddied: unlist and free */
357  chunks = zbud_size_to_chunks(size) ;
358  BUG_ON(list_empty(&zbud_unbuddied[chunks].list));
359  list_del_init(&zbpg->bud_list);
360  zbud_unbuddied[chunks].count--;
361  spin_unlock(&zbud_budlists_spinlock);
362  zbud_free_raw_page(zbpg);
363  } else { /* was buddied: move remaining buddy to unbuddied list */
364  chunks = zbud_size_to_chunks(zh_other->size) ;
365  list_del_init(&zbpg->bud_list);
366  zcache_zbud_buddied_count--;
367  list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list);
368  zbud_unbuddied[chunks].count++;
369  spin_unlock(&zbud_budlists_spinlock);
370  spin_unlock(&zbpg->lock);
371  }
372 }
373 
374 static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id,
375  struct tmem_oid *oid,
376  uint32_t index, struct page *page,
377  void *cdata, unsigned size)
378 {
379  struct zbud_hdr *zh0, *zh1, *zh = NULL;
380  struct zbud_page *zbpg = NULL, *ztmp;
381  unsigned nchunks;
382  char *to;
383  int i, found_good_buddy = 0;
384 
385  nchunks = zbud_size_to_chunks(size) ;
386  for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) {
387  spin_lock(&zbud_budlists_spinlock);
388  if (!list_empty(&zbud_unbuddied[i].list)) {
389  list_for_each_entry_safe(zbpg, ztmp,
390  &zbud_unbuddied[i].list, bud_list) {
391  if (spin_trylock(&zbpg->lock)) {
392  found_good_buddy = i;
393  goto found_unbuddied;
394  }
395  }
396  }
397  spin_unlock(&zbud_budlists_spinlock);
398  }
399  /* didn't find a good buddy, try allocating a new page */
400  zbpg = zbud_alloc_raw_page();
401  if (unlikely(zbpg == NULL))
402  goto out;
403  /* ok, have a page, now compress the data before taking locks */
404  spin_lock(&zbud_budlists_spinlock);
405  spin_lock(&zbpg->lock);
406  list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list);
407  zbud_unbuddied[nchunks].count++;
408  zh = &zbpg->buddy[0];
409  goto init_zh;
410 
411 found_unbuddied:
412  ASSERT_SPINLOCK(&zbpg->lock);
413  zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1];
414  BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0)));
415  if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */
416  ASSERT_SENTINEL(zh0, ZBH);
417  zh = zh1;
418  } else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */
419  ASSERT_SENTINEL(zh1, ZBH);
420  zh = zh0;
421  } else
422  BUG();
423  list_del_init(&zbpg->bud_list);
424  zbud_unbuddied[found_good_buddy].count--;
426  zcache_zbud_buddied_count++;
427 
428 init_zh:
429  SET_SENTINEL(zh, ZBH);
430  zh->size = size;
431  zh->index = index;
432  zh->oid = *oid;
433  zh->pool_id = pool_id;
434  zh->client_id = client_id;
435  to = zbud_data(zh, size);
436  memcpy(to, cdata, size);
437  spin_unlock(&zbpg->lock);
438  spin_unlock(&zbud_budlists_spinlock);
439 
440  zbud_cumul_chunk_counts[nchunks]++;
441  atomic_inc(&zcache_zbud_curr_zpages);
442  zcache_zbud_cumul_zpages++;
443  zcache_zbud_curr_zbytes += size;
444  zcache_zbud_cumul_zbytes += size;
445 out:
446  return zh;
447 }
448 
449 static int zbud_decompress(struct page *page, struct zbud_hdr *zh)
450 {
451  struct zbud_page *zbpg;
452  unsigned budnum = zbud_budnum(zh);
453  unsigned int out_len = PAGE_SIZE;
454  char *to_va, *from_va;
455  unsigned size;
456  int ret = 0;
457 
458  zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
459  spin_lock(&zbpg->lock);
460  if (list_empty(&zbpg->bud_list)) {
461  /* ignore zombie page... see zbud_evict_pages() */
462  ret = -EINVAL;
463  goto out;
464  }
465  ASSERT_SENTINEL(zh, ZBH);
466  BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size());
467  to_va = kmap_atomic(page);
468  size = zh->size;
469  from_va = zbud_data(zh, size);
470  ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size,
471  to_va, &out_len);
472  BUG_ON(ret);
473  BUG_ON(out_len != PAGE_SIZE);
474  kunmap_atomic(to_va);
475 out:
476  spin_unlock(&zbpg->lock);
477  return ret;
478 }
479 
480 /*
481  * The following routines handle shrinking of ephemeral pages by evicting
482  * pages "least valuable" first.
483  */
484 
485 static unsigned long zcache_evicted_raw_pages;
486 static unsigned long zcache_evicted_buddied_pages;
487 static unsigned long zcache_evicted_unbuddied_pages;
488 
489 static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id,
490  uint16_t poolid);
491 static void zcache_put_pool(struct tmem_pool *pool);
492 
493 /*
494  * Flush and free all zbuds in a zbpg, then free the pageframe
495  */
496 static void zbud_evict_zbpg(struct zbud_page *zbpg)
497 {
498  struct zbud_hdr *zh;
499  int i, j;
500  uint32_t pool_id[ZBUD_MAX_BUDS], client_id[ZBUD_MAX_BUDS];
501  uint32_t index[ZBUD_MAX_BUDS];
502  struct tmem_oid oid[ZBUD_MAX_BUDS];
503  struct tmem_pool *pool;
504 
505  ASSERT_SPINLOCK(&zbpg->lock);
506  BUG_ON(!list_empty(&zbpg->bud_list));
507  for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) {
508  zh = &zbpg->buddy[i];
509  if (zh->size) {
510  client_id[j] = zh->client_id;
511  pool_id[j] = zh->pool_id;
512  oid[j] = zh->oid;
513  index[j] = zh->index;
514  j++;
515  zbud_free(zh);
516  }
517  }
518  spin_unlock(&zbpg->lock);
519  for (i = 0; i < j; i++) {
520  pool = zcache_get_pool_by_id(client_id[i], pool_id[i]);
521  if (pool != NULL) {
522  tmem_flush_page(pool, &oid[i], index[i]);
523  zcache_put_pool(pool);
524  }
525  }
526  ASSERT_SENTINEL(zbpg, ZBPG);
527  spin_lock(&zbpg->lock);
528  zbud_free_raw_page(zbpg);
529 }
530 
531 /*
532  * Free nr pages. This code is funky because we want to hold the locks
533  * protecting various lists for as short a time as possible, and in some
534  * circumstances the list may change asynchronously when the list lock is
535  * not held. In some cases we also trylock not only to avoid waiting on a
536  * page in use by another cpu, but also to avoid potential deadlock due to
537  * lock inversion.
538  */
539 static void zbud_evict_pages(int nr)
540 {
541  struct zbud_page *zbpg;
542  int i;
543 
544  /* first try freeing any pages on unused list */
545 retry_unused_list:
546  spin_lock_bh(&zbpg_unused_list_spinlock);
547  if (!list_empty(&zbpg_unused_list)) {
548  /* can't walk list here, since it may change when unlocked */
549  zbpg = list_first_entry(&zbpg_unused_list,
550  struct zbud_page, bud_list);
551  list_del_init(&zbpg->bud_list);
552  zcache_zbpg_unused_list_count--;
553  atomic_dec(&zcache_zbud_curr_raw_pages);
554  spin_unlock_bh(&zbpg_unused_list_spinlock);
555  zcache_free_page(zbpg);
556  zcache_evicted_raw_pages++;
557  if (--nr <= 0)
558  goto out;
559  goto retry_unused_list;
560  }
561  spin_unlock_bh(&zbpg_unused_list_spinlock);
562 
563  /* now try freeing unbuddied pages, starting with least space avail */
564  for (i = 0; i < MAX_CHUNK; i++) {
565 retry_unbud_list_i:
566  spin_lock_bh(&zbud_budlists_spinlock);
567  if (list_empty(&zbud_unbuddied[i].list)) {
568  spin_unlock_bh(&zbud_budlists_spinlock);
569  continue;
570  }
571  list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) {
572  if (unlikely(!spin_trylock(&zbpg->lock)))
573  continue;
574  list_del_init(&zbpg->bud_list);
576  spin_unlock(&zbud_budlists_spinlock);
577  zcache_evicted_unbuddied_pages++;
578  /* want budlists unlocked when doing zbpg eviction */
579  zbud_evict_zbpg(zbpg);
580  local_bh_enable();
581  if (--nr <= 0)
582  goto out;
583  goto retry_unbud_list_i;
584  }
585  spin_unlock_bh(&zbud_budlists_spinlock);
586  }
587 
588  /* as a last resort, free buddied pages */
589 retry_bud_list:
590  spin_lock_bh(&zbud_budlists_spinlock);
591  if (list_empty(&zbud_buddied_list)) {
592  spin_unlock_bh(&zbud_budlists_spinlock);
593  goto out;
594  }
596  if (unlikely(!spin_trylock(&zbpg->lock)))
597  continue;
598  list_del_init(&zbpg->bud_list);
599  zcache_zbud_buddied_count--;
600  spin_unlock(&zbud_budlists_spinlock);
601  zcache_evicted_buddied_pages++;
602  /* want budlists unlocked when doing zbpg eviction */
603  zbud_evict_zbpg(zbpg);
604  local_bh_enable();
605  if (--nr <= 0)
606  goto out;
607  goto retry_bud_list;
608  }
609  spin_unlock_bh(&zbud_budlists_spinlock);
610 out:
611  return;
612 }
613 
614 static void __init zbud_init(void)
615 {
616  int i;
617 
618  INIT_LIST_HEAD(&zbud_buddied_list);
619 
620  for (i = 0; i < NCHUNKS; i++)
621  INIT_LIST_HEAD(&zbud_unbuddied[i].list);
622 }
623 
624 #ifdef CONFIG_SYSFS
625 /*
626  * These sysfs routines show a nice distribution of how many zbpg's are
627  * currently (and have ever been placed) in each unbuddied list. It's fun
628  * to watch but can probably go away before final merge.
629  */
630 static int zbud_show_unbuddied_list_counts(char *buf)
631 {
632  int i;
633  char *p = buf;
634 
635  for (i = 0; i < NCHUNKS; i++)
636  p += sprintf(p, "%u ", zbud_unbuddied[i].count);
637  return p - buf;
638 }
639 
640 static int zbud_show_cumul_chunk_counts(char *buf)
641 {
642  unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0;
643  unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0;
644  unsigned long total_chunks_lte_42 = 0;
645  char *p = buf;
646 
647  for (i = 0; i < NCHUNKS; i++) {
648  p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]);
649  chunks += zbud_cumul_chunk_counts[i];
650  total_chunks += zbud_cumul_chunk_counts[i];
651  sum_total_chunks += i * zbud_cumul_chunk_counts[i];
652  if (i == 21)
653  total_chunks_lte_21 = total_chunks;
654  if (i == 32)
655  total_chunks_lte_32 = total_chunks;
656  if (i == 42)
657  total_chunks_lte_42 = total_chunks;
658  }
659  p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n",
660  total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42,
661  chunks == 0 ? 0 : sum_total_chunks / chunks);
662  return p - buf;
663 }
664 #endif
665 
666 /**********
667  * This "zv" PAM implementation combines the slab-based zsmalloc
668  * with the crypto compression API to maximize the amount of data that can
669  * be packed into a physical page.
670  *
671  * Zv represents a PAM page with the index and object (plus a "size" value
672  * necessary for decompression) immediately preceding the compressed data.
673  */
674 
675 #define ZVH_SENTINEL 0x43214321
676 
677 struct zv_hdr {
679  struct tmem_oid oid;
681  size_t size;
683 };
684 
685 /* rudimentary policy limits */
686 /* total number of persistent pages may not exceed this percentage */
687 static unsigned int zv_page_count_policy_percent = 75;
688 /*
689  * byte count defining poor compression; pages with greater zsize will be
690  * rejected
691  */
692 static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7;
693 /*
694  * byte count defining poor *mean* compression; pages with greater zsize
695  * will be rejected until sufficient better-compressed pages are accepted
696  * driving the mean below this threshold
697  */
698 static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
699 
700 static atomic_t zv_curr_dist_counts[NCHUNKS];
701 static atomic_t zv_cumul_dist_counts[NCHUNKS];
702 
703 static unsigned long zv_create(struct zs_pool *pool, uint32_t pool_id,
704  struct tmem_oid *oid, uint32_t index,
705  void *cdata, unsigned clen)
706 {
707  struct zv_hdr *zv;
708  u32 size = clen + sizeof(struct zv_hdr);
709  int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
710  unsigned long handle = 0;
711 
712  BUG_ON(!irqs_disabled());
713  BUG_ON(chunks >= NCHUNKS);
714  handle = zs_malloc(pool, size);
715  if (!handle)
716  goto out;
717  atomic_inc(&zv_curr_dist_counts[chunks]);
718  atomic_inc(&zv_cumul_dist_counts[chunks]);
719  zv = zs_map_object(pool, handle, ZS_MM_WO);
720  zv->index = index;
721  zv->oid = *oid;
722  zv->pool_id = pool_id;
723  zv->size = clen;
724  SET_SENTINEL(zv, ZVH);
725  memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen);
726  zs_unmap_object(pool, handle);
727 out:
728  return handle;
729 }
730 
731 static void zv_free(struct zs_pool *pool, unsigned long handle)
732 {
733  unsigned long flags;
734  struct zv_hdr *zv;
735  uint16_t size;
736  int chunks;
737 
738  zv = zs_map_object(pool, handle, ZS_MM_RW);
739  ASSERT_SENTINEL(zv, ZVH);
740  size = zv->size + sizeof(struct zv_hdr);
741  INVERT_SENTINEL(zv, ZVH);
742  zs_unmap_object(pool, handle);
743 
744  chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
745  BUG_ON(chunks >= NCHUNKS);
746  atomic_dec(&zv_curr_dist_counts[chunks]);
747 
748  local_irq_save(flags);
749  zs_free(pool, handle);
750  local_irq_restore(flags);
751 }
752 
753 static void zv_decompress(struct page *page, unsigned long handle)
754 {
755  unsigned int clen = PAGE_SIZE;
756  char *to_va;
757  int ret;
758  struct zv_hdr *zv;
759 
760  zv = zs_map_object(zcache_host.zspool, handle, ZS_MM_RO);
761  BUG_ON(zv->size == 0);
762  ASSERT_SENTINEL(zv, ZVH);
763  to_va = kmap_atomic(page);
764  ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, (char *)zv + sizeof(*zv),
765  zv->size, to_va, &clen);
766  kunmap_atomic(to_va);
767  zs_unmap_object(zcache_host.zspool, handle);
768  BUG_ON(ret);
769  BUG_ON(clen != PAGE_SIZE);
770 }
771 
772 #ifdef CONFIG_SYSFS
773 /*
774  * show a distribution of compression stats for zv pages.
775  */
776 
777 static int zv_curr_dist_counts_show(char *buf)
778 {
779  unsigned long i, n, chunks = 0, sum_total_chunks = 0;
780  char *p = buf;
781 
782  for (i = 0; i < NCHUNKS; i++) {
783  n = atomic_read(&zv_curr_dist_counts[i]);
784  p += sprintf(p, "%lu ", n);
785  chunks += n;
786  sum_total_chunks += i * n;
787  }
788  p += sprintf(p, "mean:%lu\n",
789  chunks == 0 ? 0 : sum_total_chunks / chunks);
790  return p - buf;
791 }
792 
793 static int zv_cumul_dist_counts_show(char *buf)
794 {
795  unsigned long i, n, chunks = 0, sum_total_chunks = 0;
796  char *p = buf;
797 
798  for (i = 0; i < NCHUNKS; i++) {
799  n = atomic_read(&zv_cumul_dist_counts[i]);
800  p += sprintf(p, "%lu ", n);
801  chunks += n;
802  sum_total_chunks += i * n;
803  }
804  p += sprintf(p, "mean:%lu\n",
805  chunks == 0 ? 0 : sum_total_chunks / chunks);
806  return p - buf;
807 }
808 
809 /*
810  * setting zv_max_zsize via sysfs causes all persistent (e.g. swap)
811  * pages that don't compress to less than this value (including metadata
812  * overhead) to be rejected. We don't allow the value to get too close
813  * to PAGE_SIZE.
814  */
815 static ssize_t zv_max_zsize_show(struct kobject *kobj,
816  struct kobj_attribute *attr,
817  char *buf)
818 {
819  return sprintf(buf, "%u\n", zv_max_zsize);
820 }
821 
822 static ssize_t zv_max_zsize_store(struct kobject *kobj,
823  struct kobj_attribute *attr,
824  const char *buf, size_t count)
825 {
826  unsigned long val;
827  int err;
828 
829  if (!capable(CAP_SYS_ADMIN))
830  return -EPERM;
831 
832  err = kstrtoul(buf, 10, &val);
833  if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
834  return -EINVAL;
835  zv_max_zsize = val;
836  return count;
837 }
838 
839 /*
840  * setting zv_max_mean_zsize via sysfs causes all persistent (e.g. swap)
841  * pages that don't compress to less than this value (including metadata
842  * overhead) to be rejected UNLESS the mean compression is also smaller
843  * than this value. In other words, we are load-balancing-by-zsize the
844  * accepted pages. Again, we don't allow the value to get too close
845  * to PAGE_SIZE.
846  */
847 static ssize_t zv_max_mean_zsize_show(struct kobject *kobj,
848  struct kobj_attribute *attr,
849  char *buf)
850 {
851  return sprintf(buf, "%u\n", zv_max_mean_zsize);
852 }
853 
854 static ssize_t zv_max_mean_zsize_store(struct kobject *kobj,
855  struct kobj_attribute *attr,
856  const char *buf, size_t count)
857 {
858  unsigned long val;
859  int err;
860 
861  if (!capable(CAP_SYS_ADMIN))
862  return -EPERM;
863 
864  err = kstrtoul(buf, 10, &val);
865  if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
866  return -EINVAL;
867  zv_max_mean_zsize = val;
868  return count;
869 }
870 
871 /*
872  * setting zv_page_count_policy_percent via sysfs sets an upper bound of
873  * persistent (e.g. swap) pages that will be retained according to:
874  * (zv_page_count_policy_percent * totalram_pages) / 100)
875  * when that limit is reached, further puts will be rejected (until
876  * some pages have been flushed). Note that, due to compression,
877  * this number may exceed 100; it defaults to 75 and we set an
878  * arbitary limit of 150. A poor choice will almost certainly result
879  * in OOM's, so this value should only be changed prudently.
880  */
881 static ssize_t zv_page_count_policy_percent_show(struct kobject *kobj,
882  struct kobj_attribute *attr,
883  char *buf)
884 {
885  return sprintf(buf, "%u\n", zv_page_count_policy_percent);
886 }
887 
888 static ssize_t zv_page_count_policy_percent_store(struct kobject *kobj,
889  struct kobj_attribute *attr,
890  const char *buf, size_t count)
891 {
892  unsigned long val;
893  int err;
894 
895  if (!capable(CAP_SYS_ADMIN))
896  return -EPERM;
897 
898  err = kstrtoul(buf, 10, &val);
899  if (err || (val == 0) || (val > 150))
900  return -EINVAL;
901  zv_page_count_policy_percent = val;
902  return count;
903 }
904 
905 static struct kobj_attribute zcache_zv_max_zsize_attr = {
906  .attr = { .name = "zv_max_zsize", .mode = 0644 },
907  .show = zv_max_zsize_show,
908  .store = zv_max_zsize_store,
909 };
910 
911 static struct kobj_attribute zcache_zv_max_mean_zsize_attr = {
912  .attr = { .name = "zv_max_mean_zsize", .mode = 0644 },
913  .show = zv_max_mean_zsize_show,
914  .store = zv_max_mean_zsize_store,
915 };
916 
917 static struct kobj_attribute zcache_zv_page_count_policy_percent_attr = {
918  .attr = { .name = "zv_page_count_policy_percent",
919  .mode = 0644 },
920  .show = zv_page_count_policy_percent_show,
921  .store = zv_page_count_policy_percent_store,
922 };
923 #endif
924 
925 /*
926  * zcache core code starts here
927  */
928 
929 /* useful stats not collected by cleancache or frontswap */
930 static unsigned long zcache_flush_total;
931 static unsigned long zcache_flush_found;
932 static unsigned long zcache_flobj_total;
933 static unsigned long zcache_flobj_found;
934 static unsigned long zcache_failed_eph_puts;
935 static unsigned long zcache_failed_pers_puts;
936 
937 /*
938  * Tmem operations assume the poolid implies the invoking client.
939  * Zcache only has one client (the kernel itself): LOCAL_CLIENT.
940  * RAMster has each client numbered by cluster node, and a KVM version
941  * of zcache would have one client per guest and each client might
942  * have a poolid==N.
943  */
944 static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid)
945 {
946  struct tmem_pool *pool = NULL;
947  struct zcache_client *cli = NULL;
948 
949  cli = get_zcache_client(cli_id);
950  if (!cli)
951  goto out;
952 
953  atomic_inc(&cli->refcount);
954  pool = idr_find(&cli->tmem_pools, poolid);
955  if (pool != NULL)
956  atomic_inc(&pool->refcount);
957 out:
958  return pool;
959 }
960 
961 static void zcache_put_pool(struct tmem_pool *pool)
962 {
963  struct zcache_client *cli = NULL;
964 
965  if (pool == NULL)
966  BUG();
967  cli = pool->client;
968  atomic_dec(&pool->refcount);
969  atomic_dec(&cli->refcount);
970 }
971 
973 {
974  struct zcache_client *cli;
975  int ret = -1;
976 
977  cli = get_zcache_client(cli_id);
978 
979  if (cli == NULL)
980  goto out;
981  if (cli->allocated)
982  goto out;
983  cli->allocated = 1;
984 #ifdef CONFIG_FRONTSWAP
985  cli->zspool = zs_create_pool("zcache", ZCACHE_GFP_MASK);
986  if (cli->zspool == NULL)
987  goto out;
988  idr_init(&cli->tmem_pools);
989 #endif
990  ret = 0;
991 out:
992  return ret;
993 }
994 
995 /* counters for debugging */
996 static unsigned long zcache_failed_get_free_pages;
997 static unsigned long zcache_failed_alloc;
998 static unsigned long zcache_put_to_flush;
999 
1000 /*
1001  * for now, used named slabs so can easily track usage; later can
1002  * either just use kmalloc, or perhaps add a slab-like allocator
1003  * to more carefully manage total memory utilization
1004  */
1005 static struct kmem_cache *zcache_objnode_cache;
1006 static struct kmem_cache *zcache_obj_cache;
1007 static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0);
1008 static unsigned long zcache_curr_obj_count_max;
1009 static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0);
1010 static unsigned long zcache_curr_objnode_count_max;
1011 
1012 /*
1013  * to avoid memory allocation recursion (e.g. due to direct reclaim), we
1014  * preload all necessary data structures so the hostops callbacks never
1015  * actually do a malloc
1016  */
1017 struct zcache_preload {
1018  void *page;
1019  struct tmem_obj *obj;
1020  int nr;
1022 };
1023 static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };
1024 
1025 static int zcache_do_preload(struct tmem_pool *pool)
1026 {
1027  struct zcache_preload *kp;
1028  struct tmem_objnode *objnode;
1029  struct tmem_obj *obj;
1030  void *page;
1031  int ret = -ENOMEM;
1032 
1033  if (unlikely(zcache_objnode_cache == NULL))
1034  goto out;
1035  if (unlikely(zcache_obj_cache == NULL))
1036  goto out;
1037 
1038  /* IRQ has already been disabled. */
1039  kp = &__get_cpu_var(zcache_preloads);
1040  while (kp->nr < ARRAY_SIZE(kp->objnodes)) {
1041  objnode = kmem_cache_alloc(zcache_objnode_cache,
1042  ZCACHE_GFP_MASK);
1043  if (unlikely(objnode == NULL)) {
1044  zcache_failed_alloc++;
1045  goto out;
1046  }
1047 
1048  kp->objnodes[kp->nr++] = objnode;
1049  }
1050 
1051  if (!kp->obj) {
1052  obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK);
1053  if (unlikely(obj == NULL)) {
1054  zcache_failed_alloc++;
1055  goto out;
1056  }
1057  kp->obj = obj;
1058  }
1059 
1060  if (!kp->page) {
1061  page = (void *)__get_free_page(ZCACHE_GFP_MASK);
1062  if (unlikely(page == NULL)) {
1063  zcache_failed_get_free_pages++;
1064  goto out;
1065  }
1066  kp->page = page;
1067  }
1068 
1069  ret = 0;
1070 out:
1071  return ret;
1072 }
1073 
1074 static void *zcache_get_free_page(void)
1075 {
1076  struct zcache_preload *kp;
1077  void *page;
1078 
1079  kp = &__get_cpu_var(zcache_preloads);
1080  page = kp->page;
1081  BUG_ON(page == NULL);
1082  kp->page = NULL;
1083  return page;
1084 }
1085 
1086 static void zcache_free_page(void *p)
1087 {
1088  free_page((unsigned long)p);
1089 }
1090 
1091 /*
1092  * zcache implementation for tmem host ops
1093  */
1094 
1095 static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool)
1096 {
1097  struct tmem_objnode *objnode = NULL;
1098  unsigned long count;
1099  struct zcache_preload *kp;
1100 
1101  kp = &__get_cpu_var(zcache_preloads);
1102  if (kp->nr <= 0)
1103  goto out;
1104  objnode = kp->objnodes[kp->nr - 1];
1105  BUG_ON(objnode == NULL);
1106  kp->objnodes[kp->nr - 1] = NULL;
1107  kp->nr--;
1108  count = atomic_inc_return(&zcache_curr_objnode_count);
1109  if (count > zcache_curr_objnode_count_max)
1110  zcache_curr_objnode_count_max = count;
1111 out:
1112  return objnode;
1113 }
1114 
1115 static void zcache_objnode_free(struct tmem_objnode *objnode,
1116  struct tmem_pool *pool)
1117 {
1118  atomic_dec(&zcache_curr_objnode_count);
1119  BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0);
1120  kmem_cache_free(zcache_objnode_cache, objnode);
1121 }
1122 
1123 static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool)
1124 {
1125  struct tmem_obj *obj = NULL;
1126  unsigned long count;
1127  struct zcache_preload *kp;
1128 
1129  kp = &__get_cpu_var(zcache_preloads);
1130  obj = kp->obj;
1131  BUG_ON(obj == NULL);
1132  kp->obj = NULL;
1133  count = atomic_inc_return(&zcache_curr_obj_count);
1134  if (count > zcache_curr_obj_count_max)
1135  zcache_curr_obj_count_max = count;
1136  return obj;
1137 }
1138 
1139 static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool)
1140 {
1141  atomic_dec(&zcache_curr_obj_count);
1142  BUG_ON(atomic_read(&zcache_curr_obj_count) < 0);
1143  kmem_cache_free(zcache_obj_cache, obj);
1144 }
1145 
1146 static struct tmem_hostops zcache_hostops = {
1147  .obj_alloc = zcache_obj_alloc,
1148  .obj_free = zcache_obj_free,
1149  .objnode_alloc = zcache_objnode_alloc,
1150  .objnode_free = zcache_objnode_free,
1151 };
1152 
1153 /*
1154  * zcache implementations for PAM page descriptor ops
1155  */
1156 
1157 static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0);
1158 static unsigned long zcache_curr_eph_pampd_count_max;
1159 static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0);
1160 static unsigned long zcache_curr_pers_pampd_count_max;
1161 
1162 /* forward reference */
1163 static int zcache_compress(struct page *from, void **out_va, unsigned *out_len);
1164 
1165 static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
1166  struct tmem_pool *pool, struct tmem_oid *oid,
1167  uint32_t index)
1168 {
1169  void *pampd = NULL, *cdata;
1170  unsigned clen;
1171  int ret;
1172  unsigned long count;
1173  struct page *page = (struct page *)(data);
1174  struct zcache_client *cli = pool->client;
1175  uint16_t client_id = get_client_id_from_client(cli);
1176  unsigned long zv_mean_zsize;
1177  unsigned long curr_pers_pampd_count;
1178  u64 total_zsize;
1179 
1180  if (eph) {
1181  ret = zcache_compress(page, &cdata, &clen);
1182  if (ret == 0)
1183  goto out;
1184  if (clen == 0 || clen > zbud_max_buddy_size()) {
1185  zcache_compress_poor++;
1186  goto out;
1187  }
1188  pampd = (void *)zbud_create(client_id, pool->pool_id, oid,
1189  index, page, cdata, clen);
1190  if (pampd != NULL) {
1191  count = atomic_inc_return(&zcache_curr_eph_pampd_count);
1192  if (count > zcache_curr_eph_pampd_count_max)
1193  zcache_curr_eph_pampd_count_max = count;
1194  }
1195  } else {
1196  curr_pers_pampd_count =
1197  atomic_read(&zcache_curr_pers_pampd_count);
1198  if (curr_pers_pampd_count >
1199  (zv_page_count_policy_percent * totalram_pages) / 100)
1200  goto out;
1201  ret = zcache_compress(page, &cdata, &clen);
1202  if (ret == 0)
1203  goto out;
1204  /* reject if compression is too poor */
1205  if (clen > zv_max_zsize) {
1206  zcache_compress_poor++;
1207  goto out;
1208  }
1209  /* reject if mean compression is too poor */
1210  if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {
1211  total_zsize = zs_get_total_size_bytes(cli->zspool);
1212  zv_mean_zsize = div_u64(total_zsize,
1213  curr_pers_pampd_count);
1214  if (zv_mean_zsize > zv_max_mean_zsize) {
1215  zcache_mean_compress_poor++;
1216  goto out;
1217  }
1218  }
1219  pampd = (void *)zv_create(cli->zspool, pool->pool_id,
1220  oid, index, cdata, clen);
1221  if (pampd == NULL)
1222  goto out;
1223  count = atomic_inc_return(&zcache_curr_pers_pampd_count);
1224  if (count > zcache_curr_pers_pampd_count_max)
1225  zcache_curr_pers_pampd_count_max = count;
1226  }
1227 out:
1228  return pampd;
1229 }
1230 
1231 /*
1232  * fill the pageframe corresponding to the struct page with the data
1233  * from the passed pampd
1234  */
1235 static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,
1236  void *pampd, struct tmem_pool *pool,
1237  struct tmem_oid *oid, uint32_t index)
1238 {
1239  int ret = 0;
1240 
1241  BUG_ON(is_ephemeral(pool));
1242  zv_decompress((struct page *)(data), (unsigned long)pampd);
1243  return ret;
1244 }
1245 
1246 /*
1247  * fill the pageframe corresponding to the struct page with the data
1248  * from the passed pampd
1249  */
1250 static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,
1251  void *pampd, struct tmem_pool *pool,
1252  struct tmem_oid *oid, uint32_t index)
1253 {
1254  BUG_ON(!is_ephemeral(pool));
1255  if (zbud_decompress((struct page *)(data), pampd) < 0)
1256  return -EINVAL;
1257  zbud_free_and_delist((struct zbud_hdr *)pampd);
1258  atomic_dec(&zcache_curr_eph_pampd_count);
1259  return 0;
1260 }
1261 
1262 /*
1263  * free the pampd and remove it from any zcache lists
1264  * pampd must no longer be pointed to from any tmem data structures!
1265  */
1266 static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,
1267  struct tmem_oid *oid, uint32_t index)
1268 {
1269  struct zcache_client *cli = pool->client;
1270 
1271  if (is_ephemeral(pool)) {
1272  zbud_free_and_delist((struct zbud_hdr *)pampd);
1273  atomic_dec(&zcache_curr_eph_pampd_count);
1274  BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0);
1275  } else {
1276  zv_free(cli->zspool, (unsigned long)pampd);
1277  atomic_dec(&zcache_curr_pers_pampd_count);
1278  BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0);
1279  }
1280 }
1281 
1282 static void zcache_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj)
1283 {
1284 }
1285 
1286 static void zcache_pampd_new_obj(struct tmem_obj *obj)
1287 {
1288 }
1289 
1290 static int zcache_pampd_replace_in_obj(void *pampd, struct tmem_obj *obj)
1291 {
1292  return -1;
1293 }
1294 
1295 static bool zcache_pampd_is_remote(void *pampd)
1296 {
1297  return 0;
1298 }
1299 
1300 static struct tmem_pamops zcache_pamops = {
1301  .create = zcache_pampd_create,
1302  .get_data = zcache_pampd_get_data,
1303  .get_data_and_free = zcache_pampd_get_data_and_free,
1304  .free = zcache_pampd_free,
1305  .free_obj = zcache_pampd_free_obj,
1306  .new_obj = zcache_pampd_new_obj,
1307  .replace_in_obj = zcache_pampd_replace_in_obj,
1308  .is_remote = zcache_pampd_is_remote,
1309 };
1310 
1311 /*
1312  * zcache compression/decompression and related per-cpu stuff
1313  */
1314 
1315 static DEFINE_PER_CPU(unsigned char *, zcache_dstmem);
1316 #define ZCACHE_DSTMEM_ORDER 1
1317 
1318 static int zcache_compress(struct page *from, void **out_va, unsigned *out_len)
1319 {
1320  int ret = 0;
1321  unsigned char *dmem = __get_cpu_var(zcache_dstmem);
1322  char *from_va;
1323 
1324  BUG_ON(!irqs_disabled());
1325  if (unlikely(dmem == NULL))
1326  goto out; /* no buffer or no compressor so can't compress */
1327  *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER;
1328  from_va = kmap_atomic(from);
1329  mb();
1330  ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem,
1331  out_len);
1332  BUG_ON(ret);
1333  *out_va = dmem;
1334  kunmap_atomic(from_va);
1335  ret = 1;
1336 out:
1337  return ret;
1338 }
1339 
1340 static int zcache_comp_cpu_up(int cpu)
1341 {
1342  struct crypto_comp *tfm;
1343 
1344  tfm = crypto_alloc_comp(zcache_comp_name, 0, 0);
1345  if (IS_ERR(tfm))
1346  return NOTIFY_BAD;
1347  *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
1348  return NOTIFY_OK;
1349 }
1350 
1351 static void zcache_comp_cpu_down(int cpu)
1352 {
1353  struct crypto_comp *tfm;
1354 
1355  tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
1356  crypto_free_comp(tfm);
1357  *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
1358 }
1359 
1360 static int zcache_cpu_notifier(struct notifier_block *nb,
1361  unsigned long action, void *pcpu)
1362 {
1363  int ret, cpu = (long)pcpu;
1364  struct zcache_preload *kp;
1365 
1366  switch (action) {
1367  case CPU_UP_PREPARE:
1368  ret = zcache_comp_cpu_up(cpu);
1369  if (ret != NOTIFY_OK) {
1370  pr_err("zcache: can't allocate compressor transform\n");
1371  return ret;
1372  }
1373  per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages(
1375  break;
1376  case CPU_DEAD:
1377  case CPU_UP_CANCELED:
1378  zcache_comp_cpu_down(cpu);
1379  free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
1381  per_cpu(zcache_dstmem, cpu) = NULL;
1382  kp = &per_cpu(zcache_preloads, cpu);
1383  while (kp->nr) {
1384  kmem_cache_free(zcache_objnode_cache,
1385  kp->objnodes[kp->nr - 1]);
1386  kp->objnodes[kp->nr - 1] = NULL;
1387  kp->nr--;
1388  }
1389  if (kp->obj) {
1390  kmem_cache_free(zcache_obj_cache, kp->obj);
1391  kp->obj = NULL;
1392  }
1393  if (kp->page) {
1394  free_page((unsigned long)kp->page);
1395  kp->page = NULL;
1396  }
1397  break;
1398  default:
1399  break;
1400  }
1401  return NOTIFY_OK;
1402 }
1403 
1404 static struct notifier_block zcache_cpu_notifier_block = {
1405  .notifier_call = zcache_cpu_notifier
1406 };
1407 
1408 #ifdef CONFIG_SYSFS
1409 #define ZCACHE_SYSFS_RO(_name) \
1410  static ssize_t zcache_##_name##_show(struct kobject *kobj, \
1411  struct kobj_attribute *attr, char *buf) \
1412  { \
1413  return sprintf(buf, "%lu\n", zcache_##_name); \
1414  } \
1415  static struct kobj_attribute zcache_##_name##_attr = { \
1416  .attr = { .name = __stringify(_name), .mode = 0444 }, \
1417  .show = zcache_##_name##_show, \
1418  }
1419 
1420 #define ZCACHE_SYSFS_RO_ATOMIC(_name) \
1421  static ssize_t zcache_##_name##_show(struct kobject *kobj, \
1422  struct kobj_attribute *attr, char *buf) \
1423  { \
1424  return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \
1425  } \
1426  static struct kobj_attribute zcache_##_name##_attr = { \
1427  .attr = { .name = __stringify(_name), .mode = 0444 }, \
1428  .show = zcache_##_name##_show, \
1429  }
1430 
1431 #define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \
1432  static ssize_t zcache_##_name##_show(struct kobject *kobj, \
1433  struct kobj_attribute *attr, char *buf) \
1434  { \
1435  return _func(buf); \
1436  } \
1437  static struct kobj_attribute zcache_##_name##_attr = { \
1438  .attr = { .name = __stringify(_name), .mode = 0444 }, \
1439  .show = zcache_##_name##_show, \
1440  }
1441 
1442 ZCACHE_SYSFS_RO(curr_obj_count_max);
1443 ZCACHE_SYSFS_RO(curr_objnode_count_max);
1444 ZCACHE_SYSFS_RO(flush_total);
1445 ZCACHE_SYSFS_RO(flush_found);
1446 ZCACHE_SYSFS_RO(flobj_total);
1447 ZCACHE_SYSFS_RO(flobj_found);
1448 ZCACHE_SYSFS_RO(failed_eph_puts);
1449 ZCACHE_SYSFS_RO(failed_pers_puts);
1450 ZCACHE_SYSFS_RO(zbud_curr_zbytes);
1451 ZCACHE_SYSFS_RO(zbud_cumul_zpages);
1452 ZCACHE_SYSFS_RO(zbud_cumul_zbytes);
1453 ZCACHE_SYSFS_RO(zbud_buddied_count);
1454 ZCACHE_SYSFS_RO(zbpg_unused_list_count);
1455 ZCACHE_SYSFS_RO(evicted_raw_pages);
1456 ZCACHE_SYSFS_RO(evicted_unbuddied_pages);
1457 ZCACHE_SYSFS_RO(evicted_buddied_pages);
1458 ZCACHE_SYSFS_RO(failed_get_free_pages);
1459 ZCACHE_SYSFS_RO(failed_alloc);
1460 ZCACHE_SYSFS_RO(put_to_flush);
1461 ZCACHE_SYSFS_RO(compress_poor);
1462 ZCACHE_SYSFS_RO(mean_compress_poor);
1463 ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages);
1464 ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages);
1465 ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count);
1466 ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count);
1467 ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts,
1468  zbud_show_unbuddied_list_counts);
1469 ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts,
1470  zbud_show_cumul_chunk_counts);
1471 ZCACHE_SYSFS_RO_CUSTOM(zv_curr_dist_counts,
1472  zv_curr_dist_counts_show);
1473 ZCACHE_SYSFS_RO_CUSTOM(zv_cumul_dist_counts,
1474  zv_cumul_dist_counts_show);
1475 
1476 static struct attribute *zcache_attrs[] = {
1477  &zcache_curr_obj_count_attr.attr,
1478  &zcache_curr_obj_count_max_attr.attr,
1479  &zcache_curr_objnode_count_attr.attr,
1480  &zcache_curr_objnode_count_max_attr.attr,
1481  &zcache_flush_total_attr.attr,
1482  &zcache_flobj_total_attr.attr,
1483  &zcache_flush_found_attr.attr,
1484  &zcache_flobj_found_attr.attr,
1485  &zcache_failed_eph_puts_attr.attr,
1486  &zcache_failed_pers_puts_attr.attr,
1487  &zcache_compress_poor_attr.attr,
1488  &zcache_mean_compress_poor_attr.attr,
1489  &zcache_zbud_curr_raw_pages_attr.attr,
1490  &zcache_zbud_curr_zpages_attr.attr,
1491  &zcache_zbud_curr_zbytes_attr.attr,
1492  &zcache_zbud_cumul_zpages_attr.attr,
1493  &zcache_zbud_cumul_zbytes_attr.attr,
1494  &zcache_zbud_buddied_count_attr.attr,
1495  &zcache_zbpg_unused_list_count_attr.attr,
1496  &zcache_evicted_raw_pages_attr.attr,
1497  &zcache_evicted_unbuddied_pages_attr.attr,
1498  &zcache_evicted_buddied_pages_attr.attr,
1499  &zcache_failed_get_free_pages_attr.attr,
1500  &zcache_failed_alloc_attr.attr,
1501  &zcache_put_to_flush_attr.attr,
1502  &zcache_zbud_unbuddied_list_counts_attr.attr,
1503  &zcache_zbud_cumul_chunk_counts_attr.attr,
1504  &zcache_zv_curr_dist_counts_attr.attr,
1505  &zcache_zv_cumul_dist_counts_attr.attr,
1506  &zcache_zv_max_zsize_attr.attr,
1507  &zcache_zv_max_mean_zsize_attr.attr,
1508  &zcache_zv_page_count_policy_percent_attr.attr,
1509  NULL,
1510 };
1511 
1512 static struct attribute_group zcache_attr_group = {
1513  .attrs = zcache_attrs,
1514  .name = "zcache",
1515 };
1516 
1517 #endif /* CONFIG_SYSFS */
1518 /*
1519  * When zcache is disabled ("frozen"), pools can be created and destroyed,
1520  * but all puts (and thus all other operations that require memory allocation)
1521  * must fail. If zcache is unfrozen, accepts puts, then frozen again,
1522  * data consistency requires all puts while frozen to be converted into
1523  * flushes.
1524  */
1525 static bool zcache_freeze;
1526 
1527 /*
1528  * zcache shrinker interface (only useful for ephemeral pages, so zbud only)
1529  */
1530 static int shrink_zcache_memory(struct shrinker *shrink,
1531  struct shrink_control *sc)
1532 {
1533  int ret = -1;
1534  int nr = sc->nr_to_scan;
1535  gfp_t gfp_mask = sc->gfp_mask;
1536 
1537  if (nr >= 0) {
1538  if (!(gfp_mask & __GFP_FS))
1539  /* does this case really need to be skipped? */
1540  goto out;
1541  zbud_evict_pages(nr);
1542  }
1543  ret = (int)atomic_read(&zcache_zbud_curr_raw_pages);
1544 out:
1545  return ret;
1546 }
1547 
1548 static struct shrinker zcache_shrinker = {
1549  .shrink = shrink_zcache_memory,
1550  .seeks = DEFAULT_SEEKS,
1551 };
1552 
1553 /*
1554  * zcache shims between cleancache/frontswap ops and tmem
1555  */
1556 
1557 static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1558  uint32_t index, struct page *page)
1559 {
1560  struct tmem_pool *pool;
1561  int ret = -1;
1562 
1563  BUG_ON(!irqs_disabled());
1564  pool = zcache_get_pool_by_id(cli_id, pool_id);
1565  if (unlikely(pool == NULL))
1566  goto out;
1567  if (!zcache_freeze && zcache_do_preload(pool) == 0) {
1568  /* preload does preempt_disable on success */
1569  ret = tmem_put(pool, oidp, index, (char *)(page),
1570  PAGE_SIZE, 0, is_ephemeral(pool));
1571  if (ret < 0) {
1572  if (is_ephemeral(pool))
1573  zcache_failed_eph_puts++;
1574  else
1575  zcache_failed_pers_puts++;
1576  }
1577  } else {
1578  zcache_put_to_flush++;
1579  if (atomic_read(&pool->obj_count) > 0)
1580  /* the put fails whether the flush succeeds or not */
1581  (void)tmem_flush_page(pool, oidp, index);
1582  }
1583 
1584  zcache_put_pool(pool);
1585 out:
1586  return ret;
1587 }
1588 
1589 static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1590  uint32_t index, struct page *page)
1591 {
1592  struct tmem_pool *pool;
1593  int ret = -1;
1594  unsigned long flags;
1595  size_t size = PAGE_SIZE;
1596 
1597  local_irq_save(flags);
1598  pool = zcache_get_pool_by_id(cli_id, pool_id);
1599  if (likely(pool != NULL)) {
1600  if (atomic_read(&pool->obj_count) > 0)
1601  ret = tmem_get(pool, oidp, index, (char *)(page),
1602  &size, 0, is_ephemeral(pool));
1603  zcache_put_pool(pool);
1604  }
1605  local_irq_restore(flags);
1606  return ret;
1607 }
1608 
1609 static int zcache_flush_page(int cli_id, int pool_id,
1610  struct tmem_oid *oidp, uint32_t index)
1611 {
1612  struct tmem_pool *pool;
1613  int ret = -1;
1614  unsigned long flags;
1615 
1616  local_irq_save(flags);
1617  zcache_flush_total++;
1618  pool = zcache_get_pool_by_id(cli_id, pool_id);
1619  if (likely(pool != NULL)) {
1620  if (atomic_read(&pool->obj_count) > 0)
1621  ret = tmem_flush_page(pool, oidp, index);
1622  zcache_put_pool(pool);
1623  }
1624  if (ret >= 0)
1625  zcache_flush_found++;
1626  local_irq_restore(flags);
1627  return ret;
1628 }
1629 
1630 static int zcache_flush_object(int cli_id, int pool_id,
1631  struct tmem_oid *oidp)
1632 {
1633  struct tmem_pool *pool;
1634  int ret = -1;
1635  unsigned long flags;
1636 
1637  local_irq_save(flags);
1638  zcache_flobj_total++;
1639  pool = zcache_get_pool_by_id(cli_id, pool_id);
1640  if (likely(pool != NULL)) {
1641  if (atomic_read(&pool->obj_count) > 0)
1642  ret = tmem_flush_object(pool, oidp);
1643  zcache_put_pool(pool);
1644  }
1645  if (ret >= 0)
1646  zcache_flobj_found++;
1647  local_irq_restore(flags);
1648  return ret;
1649 }
1650 
1651 static int zcache_destroy_pool(int cli_id, int pool_id)
1652 {
1653  struct tmem_pool *pool = NULL;
1654  struct zcache_client *cli;
1655  int ret = -1;
1656 
1657  if (pool_id < 0)
1658  goto out;
1659 
1660  cli = get_zcache_client(cli_id);
1661  if (cli == NULL)
1662  goto out;
1663 
1664  atomic_inc(&cli->refcount);
1665  pool = idr_find(&cli->tmem_pools, pool_id);
1666  if (pool == NULL)
1667  goto out;
1668  idr_remove(&cli->tmem_pools, pool_id);
1669  /* wait for pool activity on other cpus to quiesce */
1670  while (atomic_read(&pool->refcount) != 0)
1671  ;
1672  atomic_dec(&cli->refcount);
1673  local_bh_disable();
1674  ret = tmem_destroy_pool(pool);
1675  local_bh_enable();
1676  kfree(pool);
1677  pr_info("zcache: destroyed pool id=%d, cli_id=%d\n",
1678  pool_id, cli_id);
1679 out:
1680  return ret;
1681 }
1682 
1683 static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
1684 {
1685  int poolid = -1;
1686  struct tmem_pool *pool;
1687  struct zcache_client *cli = NULL;
1688  int r;
1689 
1690  cli = get_zcache_client(cli_id);
1691  if (cli == NULL)
1692  goto out;
1693 
1694  atomic_inc(&cli->refcount);
1695  pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC);
1696  if (pool == NULL) {
1697  pr_info("zcache: pool creation failed: out of memory\n");
1698  goto out;
1699  }
1700 
1701  do {
1702  r = idr_pre_get(&cli->tmem_pools, GFP_ATOMIC);
1703  if (r != 1) {
1704  kfree(pool);
1705  pr_info("zcache: pool creation failed: out of memory\n");
1706  goto out;
1707  }
1708  r = idr_get_new(&cli->tmem_pools, pool, &poolid);
1709  } while (r == -EAGAIN);
1710  if (r) {
1711  pr_info("zcache: pool creation failed: error %d\n", r);
1712  kfree(pool);
1713  goto out;
1714  }
1715 
1716  atomic_set(&pool->refcount, 0);
1717  pool->client = cli;
1718  pool->pool_id = poolid;
1719  tmem_new_pool(pool, flags);
1720  pr_info("zcache: created %s tmem pool, id=%d, client=%d\n",
1721  flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1722  poolid, cli_id);
1723 out:
1724  if (cli != NULL)
1725  atomic_dec(&cli->refcount);
1726  return poolid;
1727 }
1728 
1729 /**********
1730  * Two kernel functionalities currently can be layered on top of tmem.
1731  * These are "cleancache" which is used as a second-chance cache for clean
1732  * page cache pages; and "frontswap" which is used for swap pages
1733  * to avoid writes to disk. A generic "shim" is provided here for each
1734  * to translate in-kernel semantics to zcache semantics.
1735  */
1736 
1737 #ifdef CONFIG_CLEANCACHE
1738 static void zcache_cleancache_put_page(int pool_id,
1739  struct cleancache_filekey key,
1740  pgoff_t index, struct page *page)
1741 {
1742  u32 ind = (u32) index;
1743  struct tmem_oid oid = *(struct tmem_oid *)&key;
1744 
1745  if (likely(ind == index))
1746  (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page);
1747 }
1748 
1749 static int zcache_cleancache_get_page(int pool_id,
1750  struct cleancache_filekey key,
1751  pgoff_t index, struct page *page)
1752 {
1753  u32 ind = (u32) index;
1754  struct tmem_oid oid = *(struct tmem_oid *)&key;
1755  int ret = -1;
1756 
1757  if (likely(ind == index))
1758  ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page);
1759  return ret;
1760 }
1761 
1762 static void zcache_cleancache_flush_page(int pool_id,
1763  struct cleancache_filekey key,
1764  pgoff_t index)
1765 {
1766  u32 ind = (u32) index;
1767  struct tmem_oid oid = *(struct tmem_oid *)&key;
1768 
1769  if (likely(ind == index))
1770  (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
1771 }
1772 
1773 static void zcache_cleancache_flush_inode(int pool_id,
1774  struct cleancache_filekey key)
1775 {
1776  struct tmem_oid oid = *(struct tmem_oid *)&key;
1777 
1778  (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid);
1779 }
1780 
1781 static void zcache_cleancache_flush_fs(int pool_id)
1782 {
1783  if (pool_id >= 0)
1784  (void)zcache_destroy_pool(LOCAL_CLIENT, pool_id);
1785 }
1786 
1787 static int zcache_cleancache_init_fs(size_t pagesize)
1788 {
1789  BUG_ON(sizeof(struct cleancache_filekey) !=
1790  sizeof(struct tmem_oid));
1791  BUG_ON(pagesize != PAGE_SIZE);
1792  return zcache_new_pool(LOCAL_CLIENT, 0);
1793 }
1794 
1795 static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
1796 {
1797  /* shared pools are unsupported and map to private */
1798  BUG_ON(sizeof(struct cleancache_filekey) !=
1799  sizeof(struct tmem_oid));
1800  BUG_ON(pagesize != PAGE_SIZE);
1801  return zcache_new_pool(LOCAL_CLIENT, 0);
1802 }
1803 
1804 static struct cleancache_ops zcache_cleancache_ops = {
1805  .put_page = zcache_cleancache_put_page,
1806  .get_page = zcache_cleancache_get_page,
1807  .invalidate_page = zcache_cleancache_flush_page,
1808  .invalidate_inode = zcache_cleancache_flush_inode,
1809  .invalidate_fs = zcache_cleancache_flush_fs,
1810  .init_shared_fs = zcache_cleancache_init_shared_fs,
1811  .init_fs = zcache_cleancache_init_fs
1812 };
1813 
1815 {
1816  struct cleancache_ops old_ops =
1817  cleancache_register_ops(&zcache_cleancache_ops);
1818 
1819  return old_ops;
1820 }
1821 #endif
1822 
1823 #ifdef CONFIG_FRONTSWAP
1824 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1825 static int zcache_frontswap_poolid = -1;
1826 
1827 /*
1828  * Swizzling increases objects per swaptype, increasing tmem concurrency
1829  * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
1830  * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
1831  * frontswap_load(), but has side-effects. Hence using 8.
1832  */
1833 #define SWIZ_BITS 8
1834 #define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
1835 #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
1836 #define iswiz(_ind) (_ind >> SWIZ_BITS)
1837 
1838 static inline struct tmem_oid oswiz(unsigned type, u32 ind)
1839 {
1840  struct tmem_oid oid = { .oid = { 0 } };
1841  oid.oid[0] = _oswiz(type, ind);
1842  return oid;
1843 }
1844 
1845 static int zcache_frontswap_store(unsigned type, pgoff_t offset,
1846  struct page *page)
1847 {
1848  u64 ind64 = (u64)offset;
1849  u32 ind = (u32)offset;
1850  struct tmem_oid oid = oswiz(type, ind);
1851  int ret = -1;
1852  unsigned long flags;
1853 
1854  BUG_ON(!PageLocked(page));
1855  if (likely(ind64 == ind)) {
1856  local_irq_save(flags);
1857  ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1858  &oid, iswiz(ind), page);
1859  local_irq_restore(flags);
1860  }
1861  return ret;
1862 }
1863 
1864 /* returns 0 if the page was successfully gotten from frontswap, -1 if
1865  * was not present (should never happen!) */
1866 static int zcache_frontswap_load(unsigned type, pgoff_t offset,
1867  struct page *page)
1868 {
1869  u64 ind64 = (u64)offset;
1870  u32 ind = (u32)offset;
1871  struct tmem_oid oid = oswiz(type, ind);
1872  int ret = -1;
1873 
1874  BUG_ON(!PageLocked(page));
1875  if (likely(ind64 == ind))
1876  ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1877  &oid, iswiz(ind), page);
1878  return ret;
1879 }
1880 
1881 /* flush a single page from frontswap */
1882 static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
1883 {
1884  u64 ind64 = (u64)offset;
1885  u32 ind = (u32)offset;
1886  struct tmem_oid oid = oswiz(type, ind);
1887 
1888  if (likely(ind64 == ind))
1889  (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1890  &oid, iswiz(ind));
1891 }
1892 
1893 /* flush all pages from the passed swaptype */
1894 static void zcache_frontswap_flush_area(unsigned type)
1895 {
1896  struct tmem_oid oid;
1897  int ind;
1898 
1899  for (ind = SWIZ_MASK; ind >= 0; ind--) {
1900  oid = oswiz(type, ind);
1902  zcache_frontswap_poolid, &oid);
1903  }
1904 }
1905 
1906 static void zcache_frontswap_init(unsigned ignored)
1907 {
1908  /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1909  if (zcache_frontswap_poolid < 0)
1910  zcache_frontswap_poolid =
1912 }
1913 
1914 static struct frontswap_ops zcache_frontswap_ops = {
1915  .store = zcache_frontswap_store,
1916  .load = zcache_frontswap_load,
1917  .invalidate_page = zcache_frontswap_flush_page,
1918  .invalidate_area = zcache_frontswap_flush_area,
1919  .init = zcache_frontswap_init
1920 };
1921 
1923 {
1924  struct frontswap_ops old_ops =
1925  frontswap_register_ops(&zcache_frontswap_ops);
1926 
1927  return old_ops;
1928 }
1929 #endif
1930 
1931 /*
1932  * zcache initialization
1933  * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
1934  * NOTHING HAPPENS!
1935  */
1936 
1937 static int zcache_enabled;
1938 
1939 static int __init enable_zcache(char *s)
1940 {
1941  zcache_enabled = 1;
1942  return 1;
1943 }
1944 __setup("zcache", enable_zcache);
1945 
1946 /* allow independent dynamic disabling of cleancache and frontswap */
1947 
1948 static int use_cleancache = 1;
1949 
1950 static int __init no_cleancache(char *s)
1951 {
1952  use_cleancache = 0;
1953  return 1;
1954 }
1955 
1956 __setup("nocleancache", no_cleancache);
1957 
1958 static int use_frontswap = 1;
1959 
1960 static int __init no_frontswap(char *s)
1961 {
1962  use_frontswap = 0;
1963  return 1;
1964 }
1965 
1966 __setup("nofrontswap", no_frontswap);
1967 
1968 static int __init enable_zcache_compressor(char *s)
1969 {
1970  strncpy(zcache_comp_name, s, ZCACHE_COMP_NAME_SZ);
1971  zcache_enabled = 1;
1972  return 1;
1973 }
1974 __setup("zcache=", enable_zcache_compressor);
1975 
1976 
1977 static int __init zcache_comp_init(void)
1978 {
1979  int ret = 0;
1980 
1981  /* check crypto algorithm */
1982  if (*zcache_comp_name != '\0') {
1983  ret = crypto_has_comp(zcache_comp_name, 0, 0);
1984  if (!ret)
1985  pr_info("zcache: %s not supported\n",
1986  zcache_comp_name);
1987  }
1988  if (!ret)
1989  strcpy(zcache_comp_name, "lzo");
1990  ret = crypto_has_comp(zcache_comp_name, 0, 0);
1991  if (!ret) {
1992  ret = 1;
1993  goto out;
1994  }
1995  pr_info("zcache: using %s compressor\n", zcache_comp_name);
1996 
1997  /* alloc percpu transforms */
1998  ret = 0;
1999  zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
2000  if (!zcache_comp_pcpu_tfms)
2001  ret = 1;
2002 out:
2003  return ret;
2004 }
2005 
2006 static int __init zcache_init(void)
2007 {
2008  int ret = 0;
2009 
2010 #ifdef CONFIG_SYSFS
2011  ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
2012  if (ret) {
2013  pr_err("zcache: can't create sysfs\n");
2014  goto out;
2015  }
2016 #endif /* CONFIG_SYSFS */
2017 
2018  if (zcache_enabled) {
2019  unsigned int cpu;
2020 
2021  tmem_register_hostops(&zcache_hostops);
2022  tmem_register_pamops(&zcache_pamops);
2023  ret = register_cpu_notifier(&zcache_cpu_notifier_block);
2024  if (ret) {
2025  pr_err("zcache: can't register cpu notifier\n");
2026  goto out;
2027  }
2028  ret = zcache_comp_init();
2029  if (ret) {
2030  pr_err("zcache: compressor initialization failed\n");
2031  goto out;
2032  }
2033  for_each_online_cpu(cpu) {
2034  void *pcpu = (void *)(long)cpu;
2035  zcache_cpu_notifier(&zcache_cpu_notifier_block,
2036  CPU_UP_PREPARE, pcpu);
2037  }
2038  }
2039  zcache_objnode_cache = kmem_cache_create("zcache_objnode",
2040  sizeof(struct tmem_objnode), 0, 0, NULL);
2041  zcache_obj_cache = kmem_cache_create("zcache_obj",
2042  sizeof(struct tmem_obj), 0, 0, NULL);
2044  if (ret) {
2045  pr_err("zcache: can't create client\n");
2046  goto out;
2047  }
2048 
2049 #ifdef CONFIG_CLEANCACHE
2050  if (zcache_enabled && use_cleancache) {
2051  struct cleancache_ops old_ops;
2052 
2053  zbud_init();
2054  register_shrinker(&zcache_shrinker);
2055  old_ops = zcache_cleancache_register_ops();
2056  pr_info("zcache: cleancache enabled using kernel "
2057  "transcendent memory and compression buddies\n");
2058  if (old_ops.init_fs != NULL)
2059  pr_warning("zcache: cleancache_ops overridden");
2060  }
2061 #endif
2062 #ifdef CONFIG_FRONTSWAP
2063  if (zcache_enabled && use_frontswap) {
2064  struct frontswap_ops old_ops;
2065 
2066  old_ops = zcache_frontswap_register_ops();
2067  pr_info("zcache: frontswap enabled using kernel "
2068  "transcendent memory and zsmalloc\n");
2069  if (old_ops.init != NULL)
2070  pr_warning("zcache: frontswap_ops overridden");
2071  }
2072 #endif
2073 out:
2074  return ret;
2075 }
2076 
2077 module_init(zcache_init)