Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
readwrite.c
Go to the documentation of this file.
1 /*
2  * fs/logfs/readwrite.c
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <[email protected]>
7  *
8  *
9  * Actually contains five sets of very similar functions:
10  * read read blocks from a file
11  * seek_hole find next hole
12  * seek_data find next data block
13  * valid check whether a block still belongs to a file
14  * write write blocks to a file
15  * delete delete a block (for directories and ifile)
16  * rewrite move existing blocks of a file to a new location (gc helper)
17  * truncate truncate a file
18  */
19 #include "logfs.h"
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 
23 static u64 adjust_bix(u64 bix, level_t level)
24 {
25  switch (level) {
26  case 0:
27  return bix;
28  case LEVEL(1):
29  return max_t(u64, bix, I0_BLOCKS);
30  case LEVEL(2):
31  return max_t(u64, bix, I1_BLOCKS);
32  case LEVEL(3):
33  return max_t(u64, bix, I2_BLOCKS);
34  case LEVEL(4):
35  return max_t(u64, bix, I3_BLOCKS);
36  case LEVEL(5):
37  return max_t(u64, bix, I4_BLOCKS);
38  default:
39  WARN_ON(1);
40  return bix;
41  }
42 }
43 
44 static inline u64 maxbix(u8 height)
45 {
46  return 1ULL << (LOGFS_BLOCK_BITS * height);
47 }
48 
59 #define ARCH_SHIFT (BITS_PER_LONG - 32)
60 #define INDIRECT_BIT (0x80000000UL << ARCH_SHIFT)
61 #define LEVEL_SHIFT (28 + ARCH_SHIFT)
62 static inline pgoff_t first_indirect_block(void)
63 {
64  return INDIRECT_BIT | (1ULL << LEVEL_SHIFT);
65 }
66 
68 {
69  pgoff_t index;
70 
71  BUG_ON(bix >= INDIRECT_BIT);
72  if (level == 0)
73  return bix;
74 
75  index = INDIRECT_BIT;
76  index |= (__force long)level << LEVEL_SHIFT;
77  index |= bix >> ((__force u8)level * LOGFS_BLOCK_BITS);
78  return index;
79 }
80 
82 {
83  u8 __level;
84 
85  if (!(index & INDIRECT_BIT)) {
86  *bix = index;
87  *level = 0;
88  return;
89  }
90 
91  __level = (index & ~INDIRECT_BIT) >> LEVEL_SHIFT;
92  *level = LEVEL(__level);
93  *bix = (index << (__level * LOGFS_BLOCK_BITS)) & ~INDIRECT_BIT;
94  *bix = adjust_bix(*bix, *level);
95  return;
96 }
97 #undef ARCH_SHIFT
98 #undef INDIRECT_BIT
99 #undef LEVEL_SHIFT
100 
101 /*
102  * Time is stored as nanoseconds since the epoch.
103  */
104 static struct timespec be64_to_timespec(__be64 betime)
105 {
106  return ns_to_timespec(be64_to_cpu(betime));
107 }
108 
109 static __be64 timespec_to_be64(struct timespec tsp)
110 {
111  return cpu_to_be64((u64)tsp.tv_sec * NSEC_PER_SEC + tsp.tv_nsec);
112 }
113 
114 static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
115 {
116  struct logfs_inode *li = logfs_inode(inode);
117  int i;
118 
119  inode->i_mode = be16_to_cpu(di->di_mode);
120  li->li_height = di->di_height;
121  li->li_flags = be32_to_cpu(di->di_flags);
122  i_uid_write(inode, be32_to_cpu(di->di_uid));
123  i_gid_write(inode, be32_to_cpu(di->di_gid));
124  inode->i_size = be64_to_cpu(di->di_size);
126  inode->i_atime = be64_to_timespec(di->di_atime);
127  inode->i_ctime = be64_to_timespec(di->di_ctime);
128  inode->i_mtime = be64_to_timespec(di->di_mtime);
129  set_nlink(inode, be32_to_cpu(di->di_refcount));
130  inode->i_generation = be32_to_cpu(di->di_generation);
131 
132  switch (inode->i_mode & S_IFMT) {
133  case S_IFSOCK: /* fall through */
134  case S_IFBLK: /* fall through */
135  case S_IFCHR: /* fall through */
136  case S_IFIFO:
137  inode->i_rdev = be64_to_cpu(di->di_data[0]);
138  break;
139  case S_IFDIR: /* fall through */
140  case S_IFREG: /* fall through */
141  case S_IFLNK:
142  for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
143  li->li_data[i] = be64_to_cpu(di->di_data[i]);
144  break;
145  default:
146  BUG();
147  }
148 }
149 
150 static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di)
151 {
152  struct logfs_inode *li = logfs_inode(inode);
153  int i;
154 
155  di->di_mode = cpu_to_be16(inode->i_mode);
156  di->di_height = li->li_height;
157  di->di_pad = 0;
158  di->di_flags = cpu_to_be32(li->li_flags);
159  di->di_uid = cpu_to_be32(i_uid_read(inode));
160  di->di_gid = cpu_to_be32(i_gid_read(inode));
161  di->di_size = cpu_to_be64(i_size_read(inode));
163  di->di_atime = timespec_to_be64(inode->i_atime);
164  di->di_ctime = timespec_to_be64(inode->i_ctime);
165  di->di_mtime = timespec_to_be64(inode->i_mtime);
166  di->di_refcount = cpu_to_be32(inode->i_nlink);
167  di->di_generation = cpu_to_be32(inode->i_generation);
168 
169  switch (inode->i_mode & S_IFMT) {
170  case S_IFSOCK: /* fall through */
171  case S_IFBLK: /* fall through */
172  case S_IFCHR: /* fall through */
173  case S_IFIFO:
174  di->di_data[0] = cpu_to_be64(inode->i_rdev);
175  break;
176  case S_IFDIR: /* fall through */
177  case S_IFREG: /* fall through */
178  case S_IFLNK:
179  for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
180  di->di_data[i] = cpu_to_be64(li->li_data[i]);
181  break;
182  default:
183  BUG();
184  }
185 }
186 
187 static void __logfs_set_blocks(struct inode *inode)
188 {
189  struct super_block *sb = inode->i_sb;
190  struct logfs_inode *li = logfs_inode(inode);
191 
192  inode->i_blocks = ULONG_MAX;
193  if (li->li_used_bytes >> sb->s_blocksize_bits < ULONG_MAX)
194  inode->i_blocks = ALIGN(li->li_used_bytes, 512) >> 9;
195 }
196 
197 void logfs_set_blocks(struct inode *inode, u64 bytes)
198 {
199  struct logfs_inode *li = logfs_inode(inode);
200 
201  li->li_used_bytes = bytes;
202  __logfs_set_blocks(inode);
203 }
204 
205 static void prelock_page(struct super_block *sb, struct page *page, int lock)
206 {
207  struct logfs_super *super = logfs_super(sb);
208 
209  BUG_ON(!PageLocked(page));
210  if (lock) {
211  BUG_ON(PagePreLocked(page));
212  SetPagePreLocked(page);
213  } else {
214  /* We are in GC path. */
215  if (PagePreLocked(page))
216  super->s_lock_count++;
217  else
218  SetPagePreLocked(page);
219  }
220 }
221 
222 static void preunlock_page(struct super_block *sb, struct page *page, int lock)
223 {
224  struct logfs_super *super = logfs_super(sb);
225 
226  BUG_ON(!PageLocked(page));
227  if (lock)
228  ClearPagePreLocked(page);
229  else {
230  /* We are in GC path. */
231  BUG_ON(!PagePreLocked(page));
232  if (super->s_lock_count)
233  super->s_lock_count--;
234  else
235  ClearPagePreLocked(page);
236  }
237 }
238 
239 /*
240  * Logfs is prone to an AB-BA deadlock where one task tries to acquire
241  * s_write_mutex with a locked page and GC tries to get that page while holding
242  * s_write_mutex.
243  * To solve this issue logfs will ignore the page lock iff the page in question
244  * is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked
245  * in addition to PG_locked.
246  */
247 void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock)
248 {
249  struct logfs_super *super = logfs_super(sb);
250 
251  if (page)
252  prelock_page(sb, page, lock);
253 
254  if (lock) {
255  mutex_lock(&super->s_write_mutex);
256  logfs_gc_pass(sb);
257  /* FIXME: We also have to check for shadowed space
258  * and mempool fill grade */
259  }
260 }
261 
262 void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock)
263 {
264  struct logfs_super *super = logfs_super(sb);
265 
266  if (page)
267  preunlock_page(sb, page, lock);
268  /* Order matters - we must clear PG_pre_locked before releasing
269  * s_write_mutex or we could race against another task. */
270  if (lock)
271  mutex_unlock(&super->s_write_mutex);
272 }
273 
274 static struct page *logfs_get_read_page(struct inode *inode, u64 bix,
275  level_t level)
276 {
277  return find_or_create_page(inode->i_mapping,
278  logfs_pack_index(bix, level), GFP_NOFS);
279 }
280 
281 static void logfs_put_read_page(struct page *page)
282 {
283  unlock_page(page);
284  page_cache_release(page);
285 }
286 
287 static void logfs_lock_write_page(struct page *page)
288 {
289  int loop = 0;
290 
291  while (unlikely(!trylock_page(page))) {
292  if (loop++ > 0x1000) {
293  /* Has been observed once so far... */
294  printk(KERN_ERR "stack at %p\n", &loop);
295  BUG();
296  }
297  if (PagePreLocked(page)) {
298  /* Holder of page lock is waiting for us, it
299  * is safe to use this page. */
300  break;
301  }
302  /* Some other process has this page locked and has
303  * nothing to do with us. Wait for it to finish.
304  */
305  schedule();
306  }
307  BUG_ON(!PageLocked(page));
308 }
309 
310 static struct page *logfs_get_write_page(struct inode *inode, u64 bix,
311  level_t level)
312 {
313  struct address_space *mapping = inode->i_mapping;
314  pgoff_t index = logfs_pack_index(bix, level);
315  struct page *page;
316  int err;
317 
318 repeat:
319  page = find_get_page(mapping, index);
320  if (!page) {
321  page = __page_cache_alloc(GFP_NOFS);
322  if (!page)
323  return NULL;
324  err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS);
325  if (unlikely(err)) {
326  page_cache_release(page);
327  if (err == -EEXIST)
328  goto repeat;
329  return NULL;
330  }
331  } else logfs_lock_write_page(page);
332  BUG_ON(!PageLocked(page));
333  return page;
334 }
335 
336 static void logfs_unlock_write_page(struct page *page)
337 {
338  if (!PagePreLocked(page))
339  unlock_page(page);
340 }
341 
342 static void logfs_put_write_page(struct page *page)
343 {
344  logfs_unlock_write_page(page);
345  page_cache_release(page);
346 }
347 
348 static struct page *logfs_get_page(struct inode *inode, u64 bix, level_t level,
349  int rw)
350 {
351  if (rw == READ)
352  return logfs_get_read_page(inode, bix, level);
353  else
354  return logfs_get_write_page(inode, bix, level);
355 }
356 
357 static void logfs_put_page(struct page *page, int rw)
358 {
359  if (rw == READ)
360  logfs_put_read_page(page);
361  else
362  logfs_put_write_page(page);
363 }
364 
365 static unsigned long __get_bits(u64 val, int skip, int no)
366 {
367  u64 ret = val;
368 
369  ret >>= skip * no;
370  ret <<= 64 - no;
371  ret >>= 64 - no;
372  return ret;
373 }
374 
375 static unsigned long get_bits(u64 val, level_t skip)
376 {
377  return __get_bits(val, (__force int)skip, LOGFS_BLOCK_BITS);
378 }
379 
380 static inline void init_shadow_tree(struct super_block *sb,
381  struct shadow_tree *tree)
382 {
383  struct logfs_super *super = logfs_super(sb);
384 
385  btree_init_mempool64(&tree->new, super->s_btree_pool);
386  btree_init_mempool64(&tree->old, super->s_btree_pool);
387 }
388 
389 static void indirect_write_block(struct logfs_block *block)
390 {
391  struct page *page;
392  struct inode *inode;
393  int ret;
394 
395  page = block->page;
396  inode = page->mapping->host;
397  logfs_lock_write_page(page);
398  ret = logfs_write_buf(inode, page, 0);
399  logfs_unlock_write_page(page);
400  /*
401  * This needs some rework. Unless you want your filesystem to run
402  * completely synchronously (you don't), the filesystem will always
403  * report writes as 'successful' before the actual work has been
404  * done. The actual work gets done here and this is where any errors
405  * will show up. And there isn't much we can do about it, really.
406  *
407  * Some attempts to fix the errors (move from bad blocks, retry io,...)
408  * have already been done, so anything left should be either a broken
409  * device or a bug somewhere in logfs itself. Being relatively new,
410  * the odds currently favor a bug, so for now the line below isn't
411  * entirely tasteles.
412  */
413  BUG_ON(ret);
414 }
415 
416 static void inode_write_block(struct logfs_block *block)
417 {
418  struct inode *inode;
419  int ret;
420 
421  inode = block->inode;
422  if (inode->i_ino == LOGFS_INO_MASTER)
423  logfs_write_anchor(inode->i_sb);
424  else {
425  ret = __logfs_write_inode(inode, NULL, 0);
426  /* see indirect_write_block comment */
427  BUG_ON(ret);
428  }
429 }
430 
431 /*
432  * This silences a false, yet annoying gcc warning. I hate it when my editor
433  * jumps into bitops.h each time I recompile this file.
434  * TODO: Complain to gcc folks about this and upgrade compiler.
435  */
436 static unsigned long fnb(const unsigned long *addr,
437  unsigned long size, unsigned long offset)
438 {
439  return find_next_bit(addr, size, offset);
440 }
441 
442 static __be64 inode_val0(struct inode *inode)
443 {
444  struct logfs_inode *li = logfs_inode(inode);
445  u64 val;
446 
447  /*
448  * Explicit shifting generates good code, but must match the format
449  * of the structure. Add some paranoia just in case.
450  */
452  BUILD_BUG_ON(offsetof(struct logfs_disk_inode, di_height) != 2);
454 
455  val = (u64)inode->i_mode << 48 |
456  (u64)li->li_height << 40 |
457  (u64)li->li_flags;
458  return cpu_to_be64(val);
459 }
460 
461 static int inode_write_alias(struct super_block *sb,
462  struct logfs_block *block, write_alias_t *write_one_alias)
463 {
464  struct inode *inode = block->inode;
465  struct logfs_inode *li = logfs_inode(inode);
466  unsigned long pos;
467  u64 ino , bix;
468  __be64 val;
469  level_t level;
470  int err;
471 
472  for (pos = 0; ; pos++) {
473  pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
475  return 0;
476 
477  switch (pos) {
478  case INODE_HEIGHT_OFS:
479  val = inode_val0(inode);
480  break;
481  case INODE_USED_OFS:
482  val = cpu_to_be64(li->li_used_bytes);
483  break;
484  case INODE_SIZE_OFS:
485  val = cpu_to_be64(i_size_read(inode));
486  break;
488  val = cpu_to_be64(li->li_data[pos - INODE_POINTER_OFS]);
489  break;
490  default:
491  BUG();
492  }
493 
494  ino = LOGFS_INO_MASTER;
495  bix = inode->i_ino;
496  level = LEVEL(0);
497  err = write_one_alias(sb, ino, bix, level, pos, val);
498  if (err)
499  return err;
500  }
501 }
502 
503 static int indirect_write_alias(struct super_block *sb,
504  struct logfs_block *block, write_alias_t *write_one_alias)
505 {
506  unsigned long pos;
507  struct page *page = block->page;
508  u64 ino , bix;
509  __be64 *child, val;
510  level_t level;
511  int err;
512 
513  for (pos = 0; ; pos++) {
514  pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
515  if (pos >= LOGFS_BLOCK_FACTOR)
516  return 0;
517 
518  ino = page->mapping->host->i_ino;
519  logfs_unpack_index(page->index, &bix, &level);
520  child = kmap_atomic(page);
521  val = child[pos];
522  kunmap_atomic(child);
523  err = write_one_alias(sb, ino, bix, level, pos, val);
524  if (err)
525  return err;
526  }
527 }
528 
530 {
531  struct logfs_super *super = logfs_super(sb);
532  struct logfs_block *block;
533  int err;
534 
536  err = block->ops->write_alias(sb, block, write_alias_journal);
537  if (err)
538  return err;
539  }
540  return 0;
541 }
542 
543 void __free_block(struct super_block *sb, struct logfs_block *block)
544 {
545  BUG_ON(!list_empty(&block->item_list));
546  list_del(&block->alias_list);
547  mempool_free(block, logfs_super(sb)->s_block_pool);
548 }
549 
550 static void inode_free_block(struct super_block *sb, struct logfs_block *block)
551 {
552  struct inode *inode = block->inode;
553 
554  logfs_inode(inode)->li_block = NULL;
555  __free_block(sb, block);
556 }
557 
558 static void indirect_free_block(struct super_block *sb,
559  struct logfs_block *block)
560 {
561  struct page *page = block->page;
562 
563  if (PagePrivate(page)) {
564  ClearPagePrivate(page);
565  page_cache_release(page);
566  set_page_private(page, 0);
567  }
568  __free_block(sb, block);
569 }
570 
571 
572 static struct logfs_block_ops inode_block_ops = {
573  .write_block = inode_write_block,
574  .free_block = inode_free_block,
575  .write_alias = inode_write_alias,
576 };
577 
579  .write_block = indirect_write_block,
580  .free_block = indirect_free_block,
581  .write_alias = indirect_write_alias,
582 };
583 
585  u64 ino, u64 bix, level_t level)
586 {
587  struct logfs_super *super = logfs_super(sb);
588  struct logfs_block *block;
589 
590  block = mempool_alloc(super->s_block_pool, GFP_NOFS);
591  memset(block, 0, sizeof(*block));
592  INIT_LIST_HEAD(&block->alias_list);
593  INIT_LIST_HEAD(&block->item_list);
594  block->sb = sb;
595  block->ino = ino;
596  block->bix = bix;
597  block->level = level;
598  return block;
599 }
600 
601 static void alloc_inode_block(struct inode *inode)
602 {
603  struct logfs_inode *li = logfs_inode(inode);
604  struct logfs_block *block;
605 
606  if (li->li_block)
607  return;
608 
609  block = __alloc_block(inode->i_sb, LOGFS_INO_MASTER, inode->i_ino, 0);
610  block->inode = inode;
611  li->li_block = block;
612  block->ops = &inode_block_ops;
613 }
614 
615 void initialize_block_counters(struct page *page, struct logfs_block *block,
616  __be64 *array, int page_is_empty)
617 {
618  u64 ptr;
619  int i, start;
620 
621  block->partial = 0;
622  block->full = 0;
623  start = 0;
624  if (page->index < first_indirect_block()) {
625  /* Counters are pointless on level 0 */
626  return;
627  }
628  if (page->index == first_indirect_block()) {
629  /* Skip unused pointers */
630  start = I0_BLOCKS;
631  block->full = I0_BLOCKS;
632  }
633  if (!page_is_empty) {
634  for (i = start; i < LOGFS_BLOCK_FACTOR; i++) {
635  ptr = be64_to_cpu(array[i]);
636  if (ptr)
637  block->partial++;
638  if (ptr & LOGFS_FULLY_POPULATED)
639  block->full++;
640  }
641  }
642 }
643 
644 static void alloc_data_block(struct inode *inode, struct page *page)
645 {
646  struct logfs_block *block;
647  u64 bix;
648  level_t level;
649 
650  if (PagePrivate(page))
651  return;
652 
653  logfs_unpack_index(page->index, &bix, &level);
654  block = __alloc_block(inode->i_sb, inode->i_ino, bix, level);
655  block->page = page;
656 
657  SetPagePrivate(page);
658  page_cache_get(page);
659  set_page_private(page, (unsigned long) block);
660 
661  block->ops = &indirect_block_ops;
662 }
663 
664 static void alloc_indirect_block(struct inode *inode, struct page *page,
665  int page_is_empty)
666 {
667  struct logfs_block *block;
668  __be64 *array;
669 
670  if (PagePrivate(page))
671  return;
672 
673  alloc_data_block(inode, page);
674 
675  block = logfs_block(page);
676  array = kmap_atomic(page);
677  initialize_block_counters(page, block, array, page_is_empty);
678  kunmap_atomic(array);
679 }
680 
681 static void block_set_pointer(struct page *page, int index, u64 ptr)
682 {
683  struct logfs_block *block = logfs_block(page);
684  __be64 *array;
685  u64 oldptr;
686 
687  BUG_ON(!block);
688  array = kmap_atomic(page);
689  oldptr = be64_to_cpu(array[index]);
690  array[index] = cpu_to_be64(ptr);
691  kunmap_atomic(array);
692  SetPageUptodate(page);
693 
694  block->full += !!(ptr & LOGFS_FULLY_POPULATED)
695  - !!(oldptr & LOGFS_FULLY_POPULATED);
696  block->partial += !!ptr - !!oldptr;
697 }
698 
699 static u64 block_get_pointer(struct page *page, int index)
700 {
701  __be64 *block;
702  u64 ptr;
703 
704  block = kmap_atomic(page);
705  ptr = be64_to_cpu(block[index]);
706  kunmap_atomic(block);
707  return ptr;
708 }
709 
710 static int logfs_read_empty(struct page *page)
711 {
712  zero_user_segment(page, 0, PAGE_CACHE_SIZE);
713  return 0;
714 }
715 
716 static int logfs_read_direct(struct inode *inode, struct page *page)
717 {
718  struct logfs_inode *li = logfs_inode(inode);
719  pgoff_t index = page->index;
720  u64 block;
721 
722  block = li->li_data[index];
723  if (!block)
724  return logfs_read_empty(page);
725 
726  return logfs_segment_read(inode, page, block, index, 0);
727 }
728 
729 static int logfs_read_loop(struct inode *inode, struct page *page,
730  int rw_context)
731 {
732  struct logfs_inode *li = logfs_inode(inode);
733  u64 bix, bofs = li->li_data[INDIRECT_INDEX];
734  level_t level, target_level;
735  int ret;
736  struct page *ipage;
737 
738  logfs_unpack_index(page->index, &bix, &target_level);
739  if (!bofs)
740  return logfs_read_empty(page);
741 
742  if (bix >= maxbix(li->li_height))
743  return logfs_read_empty(page);
744 
745  for (level = LEVEL(li->li_height);
746  (__force u8)level > (__force u8)target_level;
747  level = SUBLEVEL(level)){
748  ipage = logfs_get_page(inode, bix, level, rw_context);
749  if (!ipage)
750  return -ENOMEM;
751 
752  ret = logfs_segment_read(inode, ipage, bofs, bix, level);
753  if (ret) {
754  logfs_put_read_page(ipage);
755  return ret;
756  }
757 
758  bofs = block_get_pointer(ipage, get_bits(bix, SUBLEVEL(level)));
759  logfs_put_page(ipage, rw_context);
760  if (!bofs)
761  return logfs_read_empty(page);
762  }
763 
764  return logfs_segment_read(inode, page, bofs, bix, 0);
765 }
766 
767 static int logfs_read_block(struct inode *inode, struct page *page,
768  int rw_context)
769 {
770  pgoff_t index = page->index;
771 
772  if (index < I0_BLOCKS)
773  return logfs_read_direct(inode, page);
774  return logfs_read_loop(inode, page, rw_context);
775 }
776 
777 static int logfs_exist_loop(struct inode *inode, u64 bix)
778 {
779  struct logfs_inode *li = logfs_inode(inode);
780  u64 bofs = li->li_data[INDIRECT_INDEX];
781  level_t level;
782  int ret;
783  struct page *ipage;
784 
785  if (!bofs)
786  return 0;
787  if (bix >= maxbix(li->li_height))
788  return 0;
789 
790  for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)) {
791  ipage = logfs_get_read_page(inode, bix, level);
792  if (!ipage)
793  return -ENOMEM;
794 
795  ret = logfs_segment_read(inode, ipage, bofs, bix, level);
796  if (ret) {
797  logfs_put_read_page(ipage);
798  return ret;
799  }
800 
801  bofs = block_get_pointer(ipage, get_bits(bix, SUBLEVEL(level)));
802  logfs_put_read_page(ipage);
803  if (!bofs)
804  return 0;
805  }
806 
807  return 1;
808 }
809 
810 int logfs_exist_block(struct inode *inode, u64 bix)
811 {
812  struct logfs_inode *li = logfs_inode(inode);
813 
814  if (bix < I0_BLOCKS)
815  return !!li->li_data[bix];
816  return logfs_exist_loop(inode, bix);
817 }
818 
819 static u64 seek_holedata_direct(struct inode *inode, u64 bix, int data)
820 {
821  struct logfs_inode *li = logfs_inode(inode);
822 
823  for (; bix < I0_BLOCKS; bix++)
824  if (data ^ (li->li_data[bix] == 0))
825  return bix;
826  return I0_BLOCKS;
827 }
828 
829 static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
830 {
831  struct logfs_inode *li = logfs_inode(inode);
832  __be64 *rblock;
833  u64 increment, bofs = li->li_data[INDIRECT_INDEX];
834  level_t level;
835  int ret, slot;
836  struct page *page;
837 
838  BUG_ON(!bofs);
839 
840  for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)) {
841  increment = 1 << (LOGFS_BLOCK_BITS * ((__force u8)level-1));
842  page = logfs_get_read_page(inode, bix, level);
843  if (!page)
844  return bix;
845 
846  ret = logfs_segment_read(inode, page, bofs, bix, level);
847  if (ret) {
848  logfs_put_read_page(page);
849  return bix;
850  }
851 
852  slot = get_bits(bix, SUBLEVEL(level));
853  rblock = kmap_atomic(page);
854  while (slot < LOGFS_BLOCK_FACTOR) {
855  if (data && (rblock[slot] != 0))
856  break;
857  if (!data && !(be64_to_cpu(rblock[slot]) & LOGFS_FULLY_POPULATED))
858  break;
859  slot++;
860  bix += increment;
861  bix &= ~(increment - 1);
862  }
863  if (slot >= LOGFS_BLOCK_FACTOR) {
864  kunmap_atomic(rblock);
865  logfs_put_read_page(page);
866  return bix;
867  }
868  bofs = be64_to_cpu(rblock[slot]);
869  kunmap_atomic(rblock);
870  logfs_put_read_page(page);
871  if (!bofs) {
872  BUG_ON(data);
873  return bix;
874  }
875  }
876  return bix;
877 }
878 
887 u64 logfs_seek_hole(struct inode *inode, u64 bix)
888 {
889  struct logfs_inode *li = logfs_inode(inode);
890 
891  if (bix < I0_BLOCKS) {
892  bix = seek_holedata_direct(inode, bix, 0);
893  if (bix < I0_BLOCKS)
894  return bix;
895  }
896 
897  if (!li->li_data[INDIRECT_INDEX])
898  return bix;
900  bix = maxbix(li->li_height);
901  else if (bix >= maxbix(li->li_height))
902  return bix;
903  else {
904  bix = seek_holedata_loop(inode, bix, 0);
905  if (bix < maxbix(li->li_height))
906  return bix;
907  /* Should not happen anymore. But if some port writes semi-
908  * corrupt images (as this one used to) we might run into it.
909  */
910  WARN_ON_ONCE(bix == maxbix(li->li_height));
911  }
912 
913  return bix;
914 }
915 
916 static u64 __logfs_seek_data(struct inode *inode, u64 bix)
917 {
918  struct logfs_inode *li = logfs_inode(inode);
919 
920  if (bix < I0_BLOCKS) {
921  bix = seek_holedata_direct(inode, bix, 1);
922  if (bix < I0_BLOCKS)
923  return bix;
924  }
925 
926  if (bix < maxbix(li->li_height)) {
927  if (!li->li_data[INDIRECT_INDEX])
928  bix = maxbix(li->li_height);
929  else
930  return seek_holedata_loop(inode, bix, 1);
931  }
932 
933  return bix;
934 }
935 
944 u64 logfs_seek_data(struct inode *inode, u64 bix)
945 {
946  struct super_block *sb = inode->i_sb;
947  u64 ret, end;
948 
949  ret = __logfs_seek_data(inode, bix);
950  end = i_size_read(inode) >> sb->s_blocksize_bits;
951  if (ret >= end)
952  ret = max(bix, end);
953  return ret;
954 }
955 
956 static int logfs_is_valid_direct(struct logfs_inode *li, u64 bix, u64 ofs)
957 {
958  return pure_ofs(li->li_data[bix]) == ofs;
959 }
960 
961 static int __logfs_is_valid_loop(struct inode *inode, u64 bix,
962  u64 ofs, u64 bofs)
963 {
964  struct logfs_inode *li = logfs_inode(inode);
965  level_t level;
966  int ret;
967  struct page *page;
968 
969  for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)){
970  page = logfs_get_write_page(inode, bix, level);
971  BUG_ON(!page);
972 
973  ret = logfs_segment_read(inode, page, bofs, bix, level);
974  if (ret) {
975  logfs_put_write_page(page);
976  return 0;
977  }
978 
979  bofs = block_get_pointer(page, get_bits(bix, SUBLEVEL(level)));
980  logfs_put_write_page(page);
981  if (!bofs)
982  return 0;
983 
984  if (pure_ofs(bofs) == ofs)
985  return 1;
986  }
987  return 0;
988 }
989 
990 static int logfs_is_valid_loop(struct inode *inode, u64 bix, u64 ofs)
991 {
992  struct logfs_inode *li = logfs_inode(inode);
993  u64 bofs = li->li_data[INDIRECT_INDEX];
994 
995  if (!bofs)
996  return 0;
997 
998  if (bix >= maxbix(li->li_height))
999  return 0;
1000 
1001  if (pure_ofs(bofs) == ofs)
1002  return 1;
1003 
1004  return __logfs_is_valid_loop(inode, bix, ofs, bofs);
1005 }
1006 
1007 static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
1008 {
1009  struct logfs_inode *li = logfs_inode(inode);
1010 
1011  if ((inode->i_nlink == 0) && atomic_read(&inode->i_count) == 1)
1012  return 0;
1013 
1014  if (bix < I0_BLOCKS)
1015  return logfs_is_valid_direct(li, bix, ofs);
1016  return logfs_is_valid_loop(inode, bix, ofs);
1017 }
1018 
1031 int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 bix,
1033 {
1034  struct logfs_super *super = logfs_super(sb);
1035  struct inode *inode;
1036  int ret, cookie;
1037 
1038  /* Umount closes a segment with free blocks remaining. Those
1039  * blocks are by definition invalid. */
1040  if (ino == -1)
1041  return 0;
1042 
1043  LOGFS_BUG_ON((u64)(u_long)ino != ino, sb);
1044 
1045  inode = logfs_safe_iget(sb, ino, &cookie);
1046  if (IS_ERR(inode))
1047  goto invalid;
1048 
1049  ret = __logfs_is_valid_block(inode, bix, ofs);
1050  logfs_safe_iput(inode, cookie);
1051  if (ret)
1052  return ret;
1053 
1054 invalid:
1055  /* Block is nominally invalid, but may still sit in the shadow tree,
1056  * waiting for a journal commit.
1057  */
1058  if (btree_lookup64(&super->s_shadow_tree.old, ofs))
1059  return 2;
1060  return 0;
1061 }
1062 
1063 int logfs_readpage_nolock(struct page *page)
1064 {
1065  struct inode *inode = page->mapping->host;
1066  int ret = -EIO;
1067 
1068  ret = logfs_read_block(inode, page, READ);
1069 
1070  if (ret) {
1071  ClearPageUptodate(page);
1072  SetPageError(page);
1073  } else {
1074  SetPageUptodate(page);
1075  ClearPageError(page);
1076  }
1077  flush_dcache_page(page);
1078 
1079  return ret;
1080 }
1081 
1082 static int logfs_reserve_bytes(struct inode *inode, int bytes)
1083 {
1084  struct logfs_super *super = logfs_super(inode->i_sb);
1085  u64 available = super->s_free_bytes + super->s_dirty_free_bytes
1086  - super->s_dirty_used_bytes - super->s_dirty_pages;
1087 
1088  if (!bytes)
1089  return 0;
1090 
1091  if (available < bytes)
1092  return -ENOSPC;
1093 
1094  if (available < bytes + super->s_root_reserve &&
1096  return -ENOSPC;
1097 
1098  return 0;
1099 }
1100 
1101 int get_page_reserve(struct inode *inode, struct page *page)
1102 {
1103  struct logfs_super *super = logfs_super(inode->i_sb);
1104  struct logfs_block *block = logfs_block(page);
1105  int ret;
1106 
1107  if (block && block->reserved_bytes)
1108  return 0;
1109 
1110  logfs_get_wblocks(inode->i_sb, page, WF_LOCK);
1111  while ((ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE)) &&
1112  !list_empty(&super->s_writeback_list)) {
1113  block = list_entry(super->s_writeback_list.next,
1114  struct logfs_block, alias_list);
1115  block->ops->write_block(block);
1116  }
1117  if (!ret) {
1118  alloc_data_block(inode, page);
1119  block = logfs_block(page);
1120  block->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE;
1121  super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE;
1122  list_move_tail(&block->alias_list, &super->s_writeback_list);
1123  }
1124  logfs_put_wblocks(inode->i_sb, page, WF_LOCK);
1125  return ret;
1126 }
1127 
1128 /*
1129  * We are protected by write lock. Push victims up to superblock level
1130  * and release transaction when appropriate.
1131  */
1132 /* FIXME: This is currently called from the wrong spots. */
1133 static void logfs_handle_transaction(struct inode *inode,
1134  struct logfs_transaction *ta)
1135 {
1136  struct logfs_super *super = logfs_super(inode->i_sb);
1137 
1138  if (!ta)
1139  return;
1140  logfs_inode(inode)->li_block->ta = NULL;
1141 
1142  if (inode->i_ino != LOGFS_INO_MASTER) {
1143  BUG(); /* FIXME: Yes, this needs more thought */
1144  /* just remember the transaction until inode is written */
1145  //BUG_ON(logfs_inode(inode)->li_transaction);
1146  //logfs_inode(inode)->li_transaction = ta;
1147  return;
1148  }
1149 
1150  switch (ta->state) {
1151  case CREATE_1: /* fall through */
1152  case UNLINK_1:
1153  BUG_ON(super->s_victim_ino);
1154  super->s_victim_ino = ta->ino;
1155  break;
1156  case CREATE_2: /* fall through */
1157  case UNLINK_2:
1158  BUG_ON(super->s_victim_ino != ta->ino);
1159  super->s_victim_ino = 0;
1160  /* transaction ends here - free it */
1161  kfree(ta);
1162  break;
1163  case CROSS_RENAME_1:
1164  BUG_ON(super->s_rename_dir);
1165  BUG_ON(super->s_rename_pos);
1166  super->s_rename_dir = ta->dir;
1167  super->s_rename_pos = ta->pos;
1168  break;
1169  case CROSS_RENAME_2:
1170  BUG_ON(super->s_rename_dir != ta->dir);
1171  BUG_ON(super->s_rename_pos != ta->pos);
1172  super->s_rename_dir = 0;
1173  super->s_rename_pos = 0;
1174  kfree(ta);
1175  break;
1176  case TARGET_RENAME_1:
1177  BUG_ON(super->s_rename_dir);
1178  BUG_ON(super->s_rename_pos);
1179  BUG_ON(super->s_victim_ino);
1180  super->s_rename_dir = ta->dir;
1181  super->s_rename_pos = ta->pos;
1182  super->s_victim_ino = ta->ino;
1183  break;
1184  case TARGET_RENAME_2:
1185  BUG_ON(super->s_rename_dir != ta->dir);
1186  BUG_ON(super->s_rename_pos != ta->pos);
1187  BUG_ON(super->s_victim_ino != ta->ino);
1188  super->s_rename_dir = 0;
1189  super->s_rename_pos = 0;
1190  break;
1191  case TARGET_RENAME_3:
1192  BUG_ON(super->s_rename_dir);
1193  BUG_ON(super->s_rename_pos);
1194  BUG_ON(super->s_victim_ino != ta->ino);
1195  super->s_victim_ino = 0;
1196  kfree(ta);
1197  break;
1198  default:
1199  BUG();
1200  }
1201 }
1202 
1203 /*
1204  * Not strictly a reservation, but rather a check that we still have enough
1205  * space to satisfy the write.
1206  */
1207 static int logfs_reserve_blocks(struct inode *inode, int blocks)
1208 {
1209  return logfs_reserve_bytes(inode, blocks * LOGFS_MAX_OBJECTSIZE);
1210 }
1211 
1214  long flags;
1215 };
1216 
1217 static struct logfs_shadow *alloc_shadow(struct inode *inode, u64 bix,
1218  level_t level, u64 old_ofs)
1219 {
1220  struct logfs_super *super = logfs_super(inode->i_sb);
1221  struct logfs_shadow *shadow;
1222 
1223  shadow = mempool_alloc(super->s_shadow_pool, GFP_NOFS);
1224  memset(shadow, 0, sizeof(*shadow));
1225  shadow->ino = inode->i_ino;
1226  shadow->bix = bix;
1227  shadow->gc_level = expand_level(inode->i_ino, level);
1228  shadow->old_ofs = old_ofs & ~LOGFS_FULLY_POPULATED;
1229  return shadow;
1230 }
1231 
1232 static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
1233 {
1234  struct logfs_super *super = logfs_super(inode->i_sb);
1235 
1236  mempool_free(shadow, super->s_shadow_pool);
1237 }
1238 
1239 static void mark_segment(struct shadow_tree *tree, u32 segno)
1240 {
1241  int err;
1242 
1243  if (!btree_lookup32(&tree->segment_map, segno)) {
1244  err = btree_insert32(&tree->segment_map, segno, (void *)1,
1245  GFP_NOFS);
1246  BUG_ON(err);
1247  tree->no_shadowed_segments++;
1248  }
1249 }
1250 
1274 static void fill_shadow_tree(struct inode *inode, struct page *page,
1275  struct logfs_shadow *shadow)
1276 {
1277  struct logfs_super *super = logfs_super(inode->i_sb);
1278  struct logfs_block *block = logfs_block(page);
1279  struct shadow_tree *tree = &super->s_shadow_tree;
1280 
1281  if (PagePrivate(page)) {
1282  if (block->alias_map)
1283  super->s_no_object_aliases -= bitmap_weight(
1284  block->alias_map, LOGFS_BLOCK_FACTOR);
1285  logfs_handle_transaction(inode, block->ta);
1286  block->ops->free_block(inode->i_sb, block);
1287  }
1288  if (shadow) {
1289  if (shadow->old_ofs)
1290  btree_insert64(&tree->old, shadow->old_ofs, shadow,
1291  GFP_NOFS);
1292  else
1293  btree_insert64(&tree->new, shadow->new_ofs, shadow,
1294  GFP_NOFS);
1295 
1296  super->s_dirty_used_bytes += shadow->new_len;
1297  super->s_dirty_free_bytes += shadow->old_len;
1298  mark_segment(tree, shadow->old_ofs >> super->s_segshift);
1299  mark_segment(tree, shadow->new_ofs >> super->s_segshift);
1300  }
1301 }
1302 
1303 static void logfs_set_alias(struct super_block *sb, struct logfs_block *block,
1304  long child_no)
1305 {
1306  struct logfs_super *super = logfs_super(sb);
1307 
1308  if (block->inode && block->inode->i_ino == LOGFS_INO_MASTER) {
1309  /* Aliases in the master inode are pointless. */
1310  return;
1311  }
1312 
1313  if (!test_bit(child_no, block->alias_map)) {
1314  set_bit(child_no, block->alias_map);
1315  super->s_no_object_aliases++;
1316  }
1317  list_move_tail(&block->alias_list, &super->s_object_alias);
1318 }
1319 
1320 /*
1321  * Object aliases can and often do change the size and occupied space of a
1322  * file. So not only do we have to change the pointers, we also have to
1323  * change inode->i_size and li->li_used_bytes. Which is done by setting
1324  * another two object aliases for the inode itself.
1325  */
1326 static void set_iused(struct inode *inode, struct logfs_shadow *shadow)
1327 {
1328  struct logfs_inode *li = logfs_inode(inode);
1329 
1330  if (shadow->new_len == shadow->old_len)
1331  return;
1332 
1333  alloc_inode_block(inode);
1334  li->li_used_bytes += shadow->new_len - shadow->old_len;
1335  __logfs_set_blocks(inode);
1336  logfs_set_alias(inode->i_sb, li->li_block, INODE_USED_OFS);
1337  logfs_set_alias(inode->i_sb, li->li_block, INODE_SIZE_OFS);
1338 }
1339 
1340 static int logfs_write_i0(struct inode *inode, struct page *page,
1341  struct write_control *wc)
1342 {
1343  struct logfs_shadow *shadow;
1344  u64 bix;
1345  level_t level;
1346  int full, err = 0;
1347 
1348  logfs_unpack_index(page->index, &bix, &level);
1349  if (wc->ofs == 0)
1350  if (logfs_reserve_blocks(inode, 1))
1351  return -ENOSPC;
1352 
1353  shadow = alloc_shadow(inode, bix, level, wc->ofs);
1354  if (wc->flags & WF_WRITE)
1355  err = logfs_segment_write(inode, page, shadow);
1356  if (wc->flags & WF_DELETE)
1357  logfs_segment_delete(inode, shadow);
1358  if (err) {
1359  free_shadow(inode, shadow);
1360  return err;
1361  }
1362 
1363  set_iused(inode, shadow);
1364  full = 1;
1365  if (level != 0) {
1366  alloc_indirect_block(inode, page, 0);
1367  full = logfs_block(page)->full == LOGFS_BLOCK_FACTOR;
1368  }
1369  fill_shadow_tree(inode, page, shadow);
1370  wc->ofs = shadow->new_ofs;
1371  if (wc->ofs && full)
1372  wc->ofs |= LOGFS_FULLY_POPULATED;
1373  return 0;
1374 }
1375 
1376 static int logfs_write_direct(struct inode *inode, struct page *page,
1377  long flags)
1378 {
1379  struct logfs_inode *li = logfs_inode(inode);
1380  struct write_control wc = {
1381  .ofs = li->li_data[page->index],
1382  .flags = flags,
1383  };
1384  int err;
1385 
1386  alloc_inode_block(inode);
1387 
1388  err = logfs_write_i0(inode, page, &wc);
1389  if (err)
1390  return err;
1391 
1392  li->li_data[page->index] = wc.ofs;
1393  logfs_set_alias(inode->i_sb, li->li_block,
1394  page->index + INODE_POINTER_OFS);
1395  return 0;
1396 }
1397 
1398 static int ptr_change(u64 ofs, struct page *page)
1399 {
1400  struct logfs_block *block = logfs_block(page);
1401  int empty0, empty1, full0, full1;
1402 
1403  empty0 = ofs == 0;
1404  empty1 = block->partial == 0;
1405  if (empty0 != empty1)
1406  return 1;
1407 
1408  /* The !! is necessary to shrink result to int */
1409  full0 = !!(ofs & LOGFS_FULLY_POPULATED);
1410  full1 = block->full == LOGFS_BLOCK_FACTOR;
1411  if (full0 != full1)
1412  return 1;
1413  return 0;
1414 }
1415 
1416 static int __logfs_write_rec(struct inode *inode, struct page *page,
1417  struct write_control *this_wc,
1418  pgoff_t bix, level_t target_level, level_t level)
1419 {
1420  int ret, page_empty = 0;
1421  int child_no = get_bits(bix, SUBLEVEL(level));
1422  struct page *ipage;
1423  struct write_control child_wc = {
1424  .flags = this_wc->flags,
1425  };
1426 
1427  ipage = logfs_get_write_page(inode, bix, level);
1428  if (!ipage)
1429  return -ENOMEM;
1430 
1431  if (this_wc->ofs) {
1432  ret = logfs_segment_read(inode, ipage, this_wc->ofs, bix, level);
1433  if (ret)
1434  goto out;
1435  } else if (!PageUptodate(ipage)) {
1436  page_empty = 1;
1437  logfs_read_empty(ipage);
1438  }
1439 
1440  child_wc.ofs = block_get_pointer(ipage, child_no);
1441 
1442  if ((__force u8)level-1 > (__force u8)target_level)
1443  ret = __logfs_write_rec(inode, page, &child_wc, bix,
1444  target_level, SUBLEVEL(level));
1445  else
1446  ret = logfs_write_i0(inode, page, &child_wc);
1447 
1448  if (ret)
1449  goto out;
1450 
1451  alloc_indirect_block(inode, ipage, page_empty);
1452  block_set_pointer(ipage, child_no, child_wc.ofs);
1453  /* FIXME: first condition seems superfluous */
1454  if (child_wc.ofs || logfs_block(ipage)->partial)
1455  this_wc->flags |= WF_WRITE;
1456  /* the condition on this_wc->ofs ensures that we won't consume extra
1457  * space for indirect blocks in the future, which we cannot reserve */
1458  if (!this_wc->ofs || ptr_change(this_wc->ofs, ipage))
1459  ret = logfs_write_i0(inode, ipage, this_wc);
1460  else
1461  logfs_set_alias(inode->i_sb, logfs_block(ipage), child_no);
1462 out:
1463  logfs_put_write_page(ipage);
1464  return ret;
1465 }
1466 
1467 static int logfs_write_rec(struct inode *inode, struct page *page,
1468  pgoff_t bix, level_t target_level, long flags)
1469 {
1470  struct logfs_inode *li = logfs_inode(inode);
1471  struct write_control wc = {
1472  .ofs = li->li_data[INDIRECT_INDEX],
1473  .flags = flags,
1474  };
1475  int ret;
1476 
1477  alloc_inode_block(inode);
1478 
1479  if (li->li_height > (__force u8)target_level)
1480  ret = __logfs_write_rec(inode, page, &wc, bix, target_level,
1481  LEVEL(li->li_height));
1482  else
1483  ret = logfs_write_i0(inode, page, &wc);
1484  if (ret)
1485  return ret;
1486 
1487  if (li->li_data[INDIRECT_INDEX] != wc.ofs) {
1488  li->li_data[INDIRECT_INDEX] = wc.ofs;
1489  logfs_set_alias(inode->i_sb, li->li_block,
1491  }
1492  return ret;
1493 }
1494 
1495 void logfs_add_transaction(struct inode *inode, struct logfs_transaction *ta)
1496 {
1497  alloc_inode_block(inode);
1498  logfs_inode(inode)->li_block->ta = ta;
1499 }
1500 
1501 void logfs_del_transaction(struct inode *inode, struct logfs_transaction *ta)
1502 {
1503  struct logfs_block *block = logfs_inode(inode)->li_block;
1504 
1505  if (block && block->ta)
1506  block->ta = NULL;
1507 }
1508 
1509 static int grow_inode(struct inode *inode, u64 bix, level_t level)
1510 {
1511  struct logfs_inode *li = logfs_inode(inode);
1512  u8 height = (__force u8)level;
1513  struct page *page;
1514  struct write_control wc = {
1515  .flags = WF_WRITE,
1516  };
1517  int err;
1518 
1519  BUG_ON(height > 5 || li->li_height > 5);
1520  while (height > li->li_height || bix >= maxbix(li->li_height)) {
1521  page = logfs_get_write_page(inode, I0_BLOCKS + 1,
1522  LEVEL(li->li_height + 1));
1523  if (!page)
1524  return -ENOMEM;
1525  logfs_read_empty(page);
1526  alloc_indirect_block(inode, page, 1);
1527  block_set_pointer(page, 0, li->li_data[INDIRECT_INDEX]);
1528  err = logfs_write_i0(inode, page, &wc);
1529  logfs_put_write_page(page);
1530  if (err)
1531  return err;
1532  li->li_data[INDIRECT_INDEX] = wc.ofs;
1533  wc.ofs = 0;
1534  li->li_height++;
1535  logfs_set_alias(inode->i_sb, li->li_block, INODE_HEIGHT_OFS);
1536  }
1537  return 0;
1538 }
1539 
1540 static int __logfs_write_buf(struct inode *inode, struct page *page, long flags)
1541 {
1542  struct logfs_super *super = logfs_super(inode->i_sb);
1543  pgoff_t index = page->index;
1544  u64 bix;
1545  level_t level;
1546  int err;
1547 
1548  flags |= WF_WRITE | WF_DELETE;
1549  inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1550 
1551  logfs_unpack_index(index, &bix, &level);
1552  if (logfs_block(page) && logfs_block(page)->reserved_bytes)
1553  super->s_dirty_pages -= logfs_block(page)->reserved_bytes;
1554 
1555  if (index < I0_BLOCKS)
1556  return logfs_write_direct(inode, page, flags);
1557 
1558  bix = adjust_bix(bix, level);
1559  err = grow_inode(inode, bix, level);
1560  if (err)
1561  return err;
1562  return logfs_write_rec(inode, page, bix, level, flags);
1563 }
1564 
1565 int logfs_write_buf(struct inode *inode, struct page *page, long flags)
1566 {
1567  struct super_block *sb = inode->i_sb;
1568  int ret;
1569 
1570  logfs_get_wblocks(sb, page, flags & WF_LOCK);
1571  ret = __logfs_write_buf(inode, page, flags);
1572  logfs_put_wblocks(sb, page, flags & WF_LOCK);
1573  return ret;
1574 }
1575 
1576 static int __logfs_delete(struct inode *inode, struct page *page)
1577 {
1578  long flags = WF_DELETE;
1579  int err;
1580 
1581  inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1582 
1583  if (page->index < I0_BLOCKS)
1584  return logfs_write_direct(inode, page, flags);
1585  err = grow_inode(inode, page->index, 0);
1586  if (err)
1587  return err;
1588  return logfs_write_rec(inode, page, page->index, 0, flags);
1589 }
1590 
1591 int logfs_delete(struct inode *inode, pgoff_t index,
1592  struct shadow_tree *shadow_tree)
1593 {
1594  struct super_block *sb = inode->i_sb;
1595  struct page *page;
1596  int ret;
1597 
1598  page = logfs_get_read_page(inode, index, 0);
1599  if (!page)
1600  return -ENOMEM;
1601 
1602  logfs_get_wblocks(sb, page, 1);
1603  ret = __logfs_delete(inode, page);
1604  logfs_put_wblocks(sb, page, 1);
1605 
1606  logfs_put_read_page(page);
1607 
1608  return ret;
1609 }
1610 
1611 int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
1612  gc_level_t gc_level, long flags)
1613 {
1614  level_t level = shrink_level(gc_level);
1615  struct page *page;
1616  int err;
1617 
1618  page = logfs_get_write_page(inode, bix, level);
1619  if (!page)
1620  return -ENOMEM;
1621 
1622  err = logfs_segment_read(inode, page, ofs, bix, level);
1623  if (!err) {
1624  if (level != 0)
1625  alloc_indirect_block(inode, page, 0);
1626  err = logfs_write_buf(inode, page, flags);
1627  if (!err && shrink_level(gc_level) == 0) {
1628  /* Rewrite cannot mark the inode dirty but has to
1629  * write it immediately.
1630  * Q: Can't we just create an alias for the inode
1631  * instead? And if not, why not?
1632  */
1633  if (inode->i_ino == LOGFS_INO_MASTER)
1634  logfs_write_anchor(inode->i_sb);
1635  else {
1636  err = __logfs_write_inode(inode, page, flags);
1637  }
1638  }
1639  }
1640  logfs_put_write_page(page);
1641  return err;
1642 }
1643 
1644 static int truncate_data_block(struct inode *inode, struct page *page,
1645  u64 ofs, struct logfs_shadow *shadow, u64 size)
1646 {
1647  loff_t pageofs = page->index << inode->i_sb->s_blocksize_bits;
1648  u64 bix;
1649  level_t level;
1650  int err;
1651 
1652  /* Does truncation happen within this page? */
1653  if (size <= pageofs || size - pageofs >= PAGE_SIZE)
1654  return 0;
1655 
1656  logfs_unpack_index(page->index, &bix, &level);
1657  BUG_ON(level != 0);
1658 
1659  err = logfs_segment_read(inode, page, ofs, bix, level);
1660  if (err)
1661  return err;
1662 
1663  zero_user_segment(page, size - pageofs, PAGE_CACHE_SIZE);
1664  return logfs_segment_write(inode, page, shadow);
1665 }
1666 
1667 static int logfs_truncate_i0(struct inode *inode, struct page *page,
1668  struct write_control *wc, u64 size)
1669 {
1670  struct logfs_shadow *shadow;
1671  u64 bix;
1672  level_t level;
1673  int err = 0;
1674 
1675  logfs_unpack_index(page->index, &bix, &level);
1676  BUG_ON(level != 0);
1677  shadow = alloc_shadow(inode, bix, level, wc->ofs);
1678 
1679  err = truncate_data_block(inode, page, wc->ofs, shadow, size);
1680  if (err) {
1681  free_shadow(inode, shadow);
1682  return err;
1683  }
1684 
1685  logfs_segment_delete(inode, shadow);
1686  set_iused(inode, shadow);
1687  fill_shadow_tree(inode, page, shadow);
1688  wc->ofs = shadow->new_ofs;
1689  return 0;
1690 }
1691 
1692 static int logfs_truncate_direct(struct inode *inode, u64 size)
1693 {
1694  struct logfs_inode *li = logfs_inode(inode);
1695  struct write_control wc;
1696  struct page *page;
1697  int e;
1698  int err;
1699 
1700  alloc_inode_block(inode);
1701 
1702  for (e = I0_BLOCKS - 1; e >= 0; e--) {
1703  if (size > (e+1) * LOGFS_BLOCKSIZE)
1704  break;
1705 
1706  wc.ofs = li->li_data[e];
1707  if (!wc.ofs)
1708  continue;
1709 
1710  page = logfs_get_write_page(inode, e, 0);
1711  if (!page)
1712  return -ENOMEM;
1713  err = logfs_segment_read(inode, page, wc.ofs, e, 0);
1714  if (err) {
1715  logfs_put_write_page(page);
1716  return err;
1717  }
1718  err = logfs_truncate_i0(inode, page, &wc, size);
1719  logfs_put_write_page(page);
1720  if (err)
1721  return err;
1722 
1723  li->li_data[e] = wc.ofs;
1724  }
1725  return 0;
1726 }
1727 
1728 /* FIXME: these need to become per-sb once we support different blocksizes */
1729 static u64 __logfs_step[] = {
1730  1,
1731  I1_BLOCKS,
1732  I2_BLOCKS,
1733  I3_BLOCKS,
1734 };
1735 
1736 static u64 __logfs_start_index[] = {
1737  I0_BLOCKS,
1738  I1_BLOCKS,
1739  I2_BLOCKS,
1740  I3_BLOCKS
1741 };
1742 
1743 static inline u64 logfs_step(level_t level)
1744 {
1745  return __logfs_step[(__force u8)level];
1746 }
1747 
1748 static inline u64 logfs_factor(u8 level)
1749 {
1750  return __logfs_step[level] * LOGFS_BLOCKSIZE;
1751 }
1752 
1753 static inline u64 logfs_start_index(level_t level)
1754 {
1755  return __logfs_start_index[(__force u8)level];
1756 }
1757 
1758 static void logfs_unpack_raw_index(pgoff_t index, u64 *bix, level_t *level)
1759 {
1760  logfs_unpack_index(index, bix, level);
1761  if (*bix <= logfs_start_index(SUBLEVEL(*level)))
1762  *bix = 0;
1763 }
1764 
1765 static int __logfs_truncate_rec(struct inode *inode, struct page *ipage,
1766  struct write_control *this_wc, u64 size)
1767 {
1768  int truncate_happened = 0;
1769  int e, err = 0;
1770  u64 bix, child_bix, next_bix;
1771  level_t level;
1772  struct page *page;
1773  struct write_control child_wc = { /* FIXME: flags */ };
1774 
1775  logfs_unpack_raw_index(ipage->index, &bix, &level);
1776  err = logfs_segment_read(inode, ipage, this_wc->ofs, bix, level);
1777  if (err)
1778  return err;
1779 
1780  for (e = LOGFS_BLOCK_FACTOR - 1; e >= 0; e--) {
1781  child_bix = bix + e * logfs_step(SUBLEVEL(level));
1782  next_bix = child_bix + logfs_step(SUBLEVEL(level));
1783  if (size > next_bix * LOGFS_BLOCKSIZE)
1784  break;
1785 
1786  child_wc.ofs = pure_ofs(block_get_pointer(ipage, e));
1787  if (!child_wc.ofs)
1788  continue;
1789 
1790  page = logfs_get_write_page(inode, child_bix, SUBLEVEL(level));
1791  if (!page)
1792  return -ENOMEM;
1793 
1794  if ((__force u8)level > 1)
1795  err = __logfs_truncate_rec(inode, page, &child_wc, size);
1796  else
1797  err = logfs_truncate_i0(inode, page, &child_wc, size);
1798  logfs_put_write_page(page);
1799  if (err)
1800  return err;
1801 
1802  truncate_happened = 1;
1803  alloc_indirect_block(inode, ipage, 0);
1804  block_set_pointer(ipage, e, child_wc.ofs);
1805  }
1806 
1807  if (!truncate_happened) {
1808  printk("ineffectual truncate (%lx, %lx, %llx)\n", inode->i_ino, ipage->index, size);
1809  return 0;
1810  }
1811 
1812  this_wc->flags = WF_DELETE;
1813  if (logfs_block(ipage)->partial)
1814  this_wc->flags |= WF_WRITE;
1815 
1816  return logfs_write_i0(inode, ipage, this_wc);
1817 }
1818 
1819 static int logfs_truncate_rec(struct inode *inode, u64 size)
1820 {
1821  struct logfs_inode *li = logfs_inode(inode);
1822  struct write_control wc = {
1823  .ofs = li->li_data[INDIRECT_INDEX],
1824  };
1825  struct page *page;
1826  int err;
1827 
1828  alloc_inode_block(inode);
1829 
1830  if (!wc.ofs)
1831  return 0;
1832 
1833  page = logfs_get_write_page(inode, 0, LEVEL(li->li_height));
1834  if (!page)
1835  return -ENOMEM;
1836 
1837  err = __logfs_truncate_rec(inode, page, &wc, size);
1838  logfs_put_write_page(page);
1839  if (err)
1840  return err;
1841 
1842  if (li->li_data[INDIRECT_INDEX] != wc.ofs)
1843  li->li_data[INDIRECT_INDEX] = wc.ofs;
1844  return 0;
1845 }
1846 
1847 static int __logfs_truncate(struct inode *inode, u64 size)
1848 {
1849  int ret;
1850 
1851  if (size >= logfs_factor(logfs_inode(inode)->li_height))
1852  return 0;
1853 
1854  ret = logfs_truncate_rec(inode, size);
1855  if (ret)
1856  return ret;
1857 
1858  return logfs_truncate_direct(inode, size);
1859 }
1860 
1861 /*
1862  * Truncate, by changing the segment file, can consume a fair amount
1863  * of resources. So back off from time to time and do some GC.
1864  * 8 or 2048 blocks should be well within safety limits even if
1865  * every single block resided in a different segment.
1866  */
1867 #define TRUNCATE_STEP (8 * 1024 * 1024)
1868 int logfs_truncate(struct inode *inode, u64 target)
1869 {
1870  struct super_block *sb = inode->i_sb;
1871  u64 size = i_size_read(inode);
1872  int err = 0;
1873 
1874  size = ALIGN(size, TRUNCATE_STEP);
1875  while (size > target) {
1876  if (size > TRUNCATE_STEP)
1877  size -= TRUNCATE_STEP;
1878  else
1879  size = 0;
1880  if (size < target)
1881  size = target;
1882 
1883  logfs_get_wblocks(sb, NULL, 1);
1884  err = __logfs_truncate(inode, size);
1885  if (!err)
1886  err = __logfs_write_inode(inode, NULL, 0);
1887  logfs_put_wblocks(sb, NULL, 1);
1888  }
1889 
1890  if (!err)
1891  err = vmtruncate(inode, target);
1892 
1893  /* I don't trust error recovery yet. */
1894  WARN_ON(err);
1895  return err;
1896 }
1897 
1898 static void move_page_to_inode(struct inode *inode, struct page *page)
1899 {
1900  struct logfs_inode *li = logfs_inode(inode);
1901  struct logfs_block *block = logfs_block(page);
1902 
1903  if (!block)
1904  return;
1905 
1906  log_blockmove("move_page_to_inode(%llx, %llx, %x)\n",
1907  block->ino, block->bix, block->level);
1908  BUG_ON(li->li_block);
1909  block->ops = &inode_block_ops;
1910  block->inode = inode;
1911  li->li_block = block;
1912 
1913  block->page = NULL;
1914  if (PagePrivate(page)) {
1915  ClearPagePrivate(page);
1916  page_cache_release(page);
1917  set_page_private(page, 0);
1918  }
1919 }
1920 
1921 static void move_inode_to_page(struct page *page, struct inode *inode)
1922 {
1923  struct logfs_inode *li = logfs_inode(inode);
1924  struct logfs_block *block = li->li_block;
1925 
1926  if (!block)
1927  return;
1928 
1929  log_blockmove("move_inode_to_page(%llx, %llx, %x)\n",
1930  block->ino, block->bix, block->level);
1931  BUG_ON(PagePrivate(page));
1932  block->ops = &indirect_block_ops;
1933  block->page = page;
1934 
1935  if (!PagePrivate(page)) {
1936  SetPagePrivate(page);
1937  page_cache_get(page);
1938  set_page_private(page, (unsigned long) block);
1939  }
1940 
1941  block->inode = NULL;
1942  li->li_block = NULL;
1943 }
1944 
1945 int logfs_read_inode(struct inode *inode)
1946 {
1947  struct super_block *sb = inode->i_sb;
1948  struct logfs_super *super = logfs_super(sb);
1949  struct inode *master_inode = super->s_master_inode;
1950  struct page *page;
1951  struct logfs_disk_inode *di;
1952  u64 ino = inode->i_ino;
1953 
1954  if (ino << sb->s_blocksize_bits > i_size_read(master_inode))
1955  return -ENODATA;
1956  if (!logfs_exist_block(master_inode, ino))
1957  return -ENODATA;
1958 
1959  page = read_cache_page(master_inode->i_mapping, ino,
1961  if (IS_ERR(page))
1962  return PTR_ERR(page);
1963 
1964  di = kmap_atomic(page);
1965  logfs_disk_to_inode(di, inode);
1966  kunmap_atomic(di);
1967  move_page_to_inode(inode, page);
1968  page_cache_release(page);
1969  return 0;
1970 }
1971 
1972 /* Caller must logfs_put_write_page(page); */
1973 static struct page *inode_to_page(struct inode *inode)
1974 {
1975  struct inode *master_inode = logfs_super(inode->i_sb)->s_master_inode;
1976  struct logfs_disk_inode *di;
1977  struct page *page;
1978 
1979  BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
1980 
1981  page = logfs_get_write_page(master_inode, inode->i_ino, 0);
1982  if (!page)
1983  return NULL;
1984 
1985  di = kmap_atomic(page);
1986  logfs_inode_to_disk(inode, di);
1987  kunmap_atomic(di);
1988  move_inode_to_page(page, inode);
1989  return page;
1990 }
1991 
1992 static int do_write_inode(struct inode *inode)
1993 {
1994  struct super_block *sb = inode->i_sb;
1995  struct inode *master_inode = logfs_super(sb)->s_master_inode;
1996  loff_t size = (inode->i_ino + 1) << inode->i_sb->s_blocksize_bits;
1997  struct page *page;
1998  int err;
1999 
2000  BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
2001  /* FIXME: lock inode */
2002 
2003  if (i_size_read(master_inode) < size)
2004  i_size_write(master_inode, size);
2005 
2006  /* TODO: Tell vfs this inode is clean now */
2007 
2008  page = inode_to_page(inode);
2009  if (!page)
2010  return -ENOMEM;
2011 
2012  /* FIXME: transaction is part of logfs_block now. Is that enough? */
2013  err = logfs_write_buf(master_inode, page, 0);
2014  if (err)
2015  move_page_to_inode(inode, page);
2016 
2017  logfs_put_write_page(page);
2018  return err;
2019 }
2020 
2021 static void logfs_mod_segment_entry(struct super_block *sb, u32 segno,
2022  int write,
2023  void (*change_se)(struct logfs_segment_entry *, long),
2024  long arg)
2025 {
2026  struct logfs_super *super = logfs_super(sb);
2027  struct inode *inode;
2028  struct page *page;
2029  struct logfs_segment_entry *se;
2030  pgoff_t page_no;
2031  int child_no;
2032 
2033  page_no = segno >> (sb->s_blocksize_bits - 3);
2034  child_no = segno & ((sb->s_blocksize >> 3) - 1);
2035 
2036  inode = super->s_segfile_inode;
2037  page = logfs_get_write_page(inode, page_no, 0);
2038  BUG_ON(!page); /* FIXME: We need some reserve page for this case */
2039  if (!PageUptodate(page))
2040  logfs_read_block(inode, page, WRITE);
2041 
2042  if (write)
2043  alloc_indirect_block(inode, page, 0);
2044  se = kmap_atomic(page);
2045  change_se(se + child_no, arg);
2046  if (write) {
2047  logfs_set_alias(sb, logfs_block(page), child_no);
2048  BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize);
2049  }
2050  kunmap_atomic(se);
2051 
2052  logfs_put_write_page(page);
2053 }
2054 
2055 static void __get_segment_entry(struct logfs_segment_entry *se, long _target)
2056 {
2057  struct logfs_segment_entry *target = (void *)_target;
2058 
2059  *target = *se;
2060 }
2061 
2062 void logfs_get_segment_entry(struct super_block *sb, u32 segno,
2063  struct logfs_segment_entry *se)
2064 {
2065  logfs_mod_segment_entry(sb, segno, 0, __get_segment_entry, (long)se);
2066 }
2067 
2068 static void __set_segment_used(struct logfs_segment_entry *se, long increment)
2069 {
2070  u32 valid;
2071 
2072  valid = be32_to_cpu(se->valid);
2073  valid += increment;
2074  se->valid = cpu_to_be32(valid);
2075 }
2076 
2077 void logfs_set_segment_used(struct super_block *sb, u64 ofs, int increment)
2078 {
2079  struct logfs_super *super = logfs_super(sb);
2080  u32 segno = ofs >> super->s_segshift;
2081 
2082  if (!increment)
2083  return;
2084 
2085  logfs_mod_segment_entry(sb, segno, 1, __set_segment_used, increment);
2086 }
2087 
2088 static void __set_segment_erased(struct logfs_segment_entry *se, long ec_level)
2089 {
2090  se->ec_level = cpu_to_be32(ec_level);
2091 }
2092 
2093 void logfs_set_segment_erased(struct super_block *sb, u32 segno, u32 ec,
2095 {
2096  u32 ec_level = ec << 4 | (__force u8)gc_level;
2097 
2098  logfs_mod_segment_entry(sb, segno, 1, __set_segment_erased, ec_level);
2099 }
2100 
2101 static void __set_segment_reserved(struct logfs_segment_entry *se, long ignore)
2102 {
2103  se->valid = cpu_to_be32(RESERVED);
2104 }
2105 
2107 {
2108  logfs_mod_segment_entry(sb, segno, 1, __set_segment_reserved, 0);
2109 }
2110 
2111 static void __set_segment_unreserved(struct logfs_segment_entry *se,
2112  long ec_level)
2113 {
2114  se->valid = 0;
2115  se->ec_level = cpu_to_be32(ec_level);
2116 }
2117 
2119 {
2120  u32 ec_level = ec << 4;
2121 
2122  logfs_mod_segment_entry(sb, segno, 1, __set_segment_unreserved,
2123  ec_level);
2124 }
2125 
2126 int __logfs_write_inode(struct inode *inode, struct page *page, long flags)
2127 {
2128  struct super_block *sb = inode->i_sb;
2129  int ret;
2130 
2131  logfs_get_wblocks(sb, page, flags & WF_LOCK);
2132  ret = do_write_inode(inode);
2133  logfs_put_wblocks(sb, page, flags & WF_LOCK);
2134  return ret;
2135 }
2136 
2137 static int do_delete_inode(struct inode *inode)
2138 {
2139  struct super_block *sb = inode->i_sb;
2140  struct inode *master_inode = logfs_super(sb)->s_master_inode;
2141  struct page *page;
2142  int ret;
2143 
2144  page = logfs_get_write_page(master_inode, inode->i_ino, 0);
2145  if (!page)
2146  return -ENOMEM;
2147 
2148  move_inode_to_page(page, inode);
2149 
2150  logfs_get_wblocks(sb, page, 1);
2151  ret = __logfs_delete(master_inode, page);
2152  logfs_put_wblocks(sb, page, 1);
2153 
2154  logfs_put_write_page(page);
2155  return ret;
2156 }
2157 
2158 /*
2159  * ZOMBIE inodes have already been deleted before and should remain dead,
2160  * if it weren't for valid checking. No need to kill them again here.
2161  */
2162 void logfs_evict_inode(struct inode *inode)
2163 {
2164  struct super_block *sb = inode->i_sb;
2165  struct logfs_inode *li = logfs_inode(inode);
2166  struct logfs_block *block = li->li_block;
2167  struct page *page;
2168 
2169  if (!inode->i_nlink) {
2170  if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
2171  li->li_flags |= LOGFS_IF_ZOMBIE;
2172  if (i_size_read(inode) > 0)
2173  logfs_truncate(inode, 0);
2174  do_delete_inode(inode);
2175  }
2176  }
2177  truncate_inode_pages(&inode->i_data, 0);
2178  clear_inode(inode);
2179 
2180  /* Cheaper version of write_inode. All changes are concealed in
2181  * aliases, which are moved back. No write to the medium happens.
2182  */
2183  /* Only deleted files may be dirty at this point */
2184  BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
2185  if (!block)
2186  return;
2188  block->ops->free_block(inode->i_sb, block);
2189  return;
2190  }
2191 
2192  page = inode_to_page(inode);
2193  BUG_ON(!page); /* FIXME: Use emergency page */
2194  logfs_put_write_page(page);
2195 }
2196 
2197 void btree_write_block(struct logfs_block *block)
2198 {
2199  struct inode *inode;
2200  struct page *page;
2201  int err, cookie;
2202 
2203  inode = logfs_safe_iget(block->sb, block->ino, &cookie);
2204  page = logfs_get_write_page(inode, block->bix, block->level);
2205 
2206  err = logfs_readpage_nolock(page);
2207  BUG_ON(err);
2208  BUG_ON(!PagePrivate(page));
2209  BUG_ON(logfs_block(page) != block);
2210  err = __logfs_write_buf(inode, page, 0);
2211  BUG_ON(err);
2212  BUG_ON(PagePrivate(page) || page->private);
2213 
2214  logfs_put_write_page(page);
2215  logfs_safe_iput(inode, cookie);
2216 }
2217 
2233 int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
2234  loff_t bix, long flags, struct shadow_tree *shadow_tree)
2235 {
2236  loff_t pos = bix << inode->i_sb->s_blocksize_bits;
2237  int err;
2238  struct page *page;
2239  void *pagebuf;
2240 
2241  BUG_ON(pos & (LOGFS_BLOCKSIZE-1));
2242  BUG_ON(count > LOGFS_BLOCKSIZE);
2243  page = logfs_get_write_page(inode, bix, 0);
2244  if (!page)
2245  return -ENOMEM;
2246 
2247  pagebuf = kmap_atomic(page);
2248  memcpy(pagebuf, buf, count);
2249  flush_dcache_page(page);
2250  kunmap_atomic(pagebuf);
2251 
2252  if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE)
2253  i_size_write(inode, pos + LOGFS_BLOCKSIZE);
2254 
2255  err = logfs_write_buf(inode, page, flags);
2256  logfs_put_write_page(page);
2257  return err;
2258 }
2259 
2261 {
2262  struct logfs_super *super = logfs_super(sb);
2263  struct inode *inode;
2264 
2266  if (IS_ERR(inode))
2267  return PTR_ERR(inode);
2268  super->s_segfile_inode = inode;
2269  return 0;
2270 }
2271 
2273 {
2274  struct logfs_super *super = logfs_super(sb);
2275  int min_fill = 3 * super->s_no_blocks;
2276 
2277  INIT_LIST_HEAD(&super->s_object_alias);
2278  INIT_LIST_HEAD(&super->s_writeback_list);
2279  mutex_init(&super->s_write_mutex);
2280  super->s_block_pool = mempool_create_kmalloc_pool(min_fill,
2281  sizeof(struct logfs_block));
2282  super->s_shadow_pool = mempool_create_kmalloc_pool(min_fill,
2283  sizeof(struct logfs_shadow));
2284  return 0;
2285 }
2286 
2288 {
2289  struct logfs_super *super = logfs_super(sb);
2290 
2291  logfs_mempool_destroy(super->s_block_pool);
2292  logfs_mempool_destroy(super->s_shadow_pool);
2293 }