20 #include <linux/sched.h>
21 #include <linux/slab.h>
59 #define ARCH_SHIFT (BITS_PER_LONG - 32)
60 #define INDIRECT_BIT (0x80000000UL << ARCH_SHIFT)
61 #define LEVEL_SHIFT (28 + ARCH_SHIFT)
62 static inline pgoff_t first_indirect_block(
void)
92 *level =
LEVEL(__level);
94 *bix = adjust_bix(*bix, *level);
150 static void logfs_inode_to_disk(
struct inode *inode,
struct logfs_disk_inode*di)
187 static void __logfs_set_blocks(
struct inode *inode)
202 __logfs_set_blocks(inode);
209 BUG_ON(!PageLocked(page));
222 static void preunlock_page(
struct super_block *sb,
struct page *page,
int lock)
226 BUG_ON(!PageLocked(page));
252 prelock_page(sb, page, lock);
267 preunlock_page(sb, page, lock);
274 static struct page *logfs_get_read_page(
struct inode *inode,
u64 bix,
281 static void logfs_put_read_page(
struct page *page)
287 static void logfs_lock_write_page(
struct page *page)
291 while (
unlikely(!trylock_page(page))) {
292 if (loop++ > 0x1000) {
307 BUG_ON(!PageLocked(page));
310 static struct page *logfs_get_write_page(
struct inode *inode,
u64 bix,
321 page = __page_cache_alloc(
GFP_NOFS);
331 }
else logfs_lock_write_page(page);
332 BUG_ON(!PageLocked(page));
336 static void logfs_unlock_write_page(
struct page *page)
342 static void logfs_put_write_page(
struct page *page)
344 logfs_unlock_write_page(page);
348 static struct page *logfs_get_page(
struct inode *inode,
u64 bix,
level_t level,
352 return logfs_get_read_page(inode, bix, level);
354 return logfs_get_write_page(inode, bix, level);
357 static void logfs_put_page(
struct page *page,
int rw)
360 logfs_put_read_page(page);
362 logfs_put_write_page(page);
365 static unsigned long __get_bits(
u64 val,
int skip,
int no)
380 static inline void init_shadow_tree(
struct super_block *sb,
397 logfs_lock_write_page(page);
399 logfs_unlock_write_page(page);
421 inode = block->
inode;
436 static unsigned long fnb(
const unsigned long *
addr,
442 static __be64 inode_val0(
struct inode *inode)
461 static int inode_write_alias(
struct super_block *sb,
464 struct inode *inode = block->
inode;
472 for (pos = 0; ; pos++) {
479 val = inode_val0(inode);
497 err = write_one_alias(sb, ino, bix, level, pos, val);
503 static int indirect_write_alias(
struct super_block *sb,
507 struct page *page = block->
page;
513 for (pos = 0; ; pos++) {
518 ino = page->
mapping->host->i_ino;
523 err = write_one_alias(sb, ino, bix, level, pos, val);
552 struct inode *inode = block->
inode;
558 static void indirect_free_block(
struct super_block *sb,
561 struct page *page = block->
page;
563 if (PagePrivate(page)) {
564 ClearPagePrivate(page);
566 set_page_private(page, 0);
573 .write_block = inode_write_block,
574 .free_block = inode_free_block,
575 .write_alias = inode_write_alias,
579 .write_block = indirect_write_block,
580 .free_block = indirect_free_block,
581 .write_alias = indirect_write_alias,
591 memset(block, 0,
sizeof(*block));
601 static void alloc_inode_block(
struct inode *inode)
612 block->
ops = &inode_block_ops;
616 __be64 *array,
int page_is_empty)
624 if (page->
index < first_indirect_block()) {
628 if (page->
index == first_indirect_block()) {
633 if (!page_is_empty) {
644 static void alloc_data_block(
struct inode *inode,
struct page *page)
650 if (PagePrivate(page))
657 SetPagePrivate(page);
659 set_page_private(page, (
unsigned long) block);
664 static void alloc_indirect_block(
struct inode *inode,
struct page *page,
670 if (PagePrivate(page))
673 alloc_data_block(inode, page);
681 static void block_set_pointer(
struct page *page,
int index,
u64 ptr)
692 SetPageUptodate(page);
696 block->
partial += !!ptr - !!oldptr;
699 static u64 block_get_pointer(
struct page *page,
int index)
710 static int logfs_read_empty(
struct page *page)
716 static int logfs_read_direct(
struct inode *inode,
struct page *page)
724 return logfs_read_empty(page);
729 static int logfs_read_loop(
struct inode *inode,
struct page *page,
740 return logfs_read_empty(page);
743 return logfs_read_empty(page);
748 ipage = logfs_get_page(inode, bix, level, rw_context);
754 logfs_put_read_page(ipage);
758 bofs = block_get_pointer(ipage, get_bits(bix,
SUBLEVEL(level)));
759 logfs_put_page(ipage, rw_context);
761 return logfs_read_empty(page);
767 static int logfs_read_block(
struct inode *inode,
struct page *page,
773 return logfs_read_direct(inode, page);
774 return logfs_read_loop(inode, page, rw_context);
777 static int logfs_exist_loop(
struct inode *inode,
u64 bix)
791 ipage = logfs_get_read_page(inode, bix, level);
797 logfs_put_read_page(ipage);
801 bofs = block_get_pointer(ipage, get_bits(bix,
SUBLEVEL(level)));
802 logfs_put_read_page(ipage);
816 return logfs_exist_loop(inode, bix);
819 static u64 seek_holedata_direct(
struct inode *inode,
u64 bix,
int data)
824 if (data ^ (li->
li_data[bix] == 0))
829 static u64 seek_holedata_loop(
struct inode *inode,
u64 bix,
int data)
842 page = logfs_get_read_page(inode, bix, level);
848 logfs_put_read_page(page);
852 slot = get_bits(bix,
SUBLEVEL(level));
855 if (data && (rblock[slot] != 0))
861 bix &= ~(increment - 1);
865 logfs_put_read_page(page);
870 logfs_put_read_page(page);
891 if (bix < I0_BLOCKS) {
892 bix = seek_holedata_direct(inode, bix, 0);
904 bix = seek_holedata_loop(inode, bix, 0);
916 static u64 __logfs_seek_data(
struct inode *inode,
u64 bix)
920 if (bix < I0_BLOCKS) {
921 bix = seek_holedata_direct(inode, bix, 1);
930 return seek_holedata_loop(inode, bix, 1);
949 ret = __logfs_seek_data(inode, bix);
961 static int __logfs_is_valid_loop(
struct inode *inode,
u64 bix,
970 page = logfs_get_write_page(inode, bix, level);
975 logfs_put_write_page(page);
979 bofs = block_get_pointer(page, get_bits(bix,
SUBLEVEL(level)));
980 logfs_put_write_page(page);
990 static int logfs_is_valid_loop(
struct inode *inode,
u64 bix,
u64 ofs)
1004 return __logfs_is_valid_loop(inode, bix, ofs, bofs);
1007 static int __logfs_is_valid_block(
struct inode *inode,
u64 bix,
u64 ofs)
1014 if (bix < I0_BLOCKS)
1015 return logfs_is_valid_direct(li, bix, ofs);
1016 return logfs_is_valid_loop(inode, bix, ofs);
1035 struct inode *
inode;
1049 ret = __logfs_is_valid_block(inode, bix, ofs);
1065 struct inode *inode = page->
mapping->host;
1068 ret = logfs_read_block(inode, page,
READ);
1071 ClearPageUptodate(page);
1074 SetPageUptodate(page);
1075 ClearPageError(page);
1082 static int logfs_reserve_bytes(
struct inode *inode,
int bytes)
1091 if (available < bytes)
1115 block->
ops->write_block(block);
1118 alloc_data_block(inode, page);
1133 static void logfs_handle_transaction(
struct inode *inode,
1150 switch (ta->
state) {
1207 static int logfs_reserve_blocks(
struct inode *inode,
int blocks)
1217 static struct logfs_shadow *alloc_shadow(
struct inode *inode,
u64 bix,
1224 memset(shadow, 0,
sizeof(*shadow));
1244 err = btree_insert32(&tree->
segment_map, segno, (
void *)1,
1274 static void fill_shadow_tree(
struct inode *inode,
struct page *page,
1281 if (PagePrivate(page)) {
1285 logfs_handle_transaction(inode, block->
ta);
1286 block->
ops->free_block(inode->
i_sb, block);
1290 btree_insert64(&tree->
old, shadow->
old_ofs, shadow,
1293 btree_insert64(&tree->
new, shadow->
new_ofs, shadow,
1326 static void set_iused(
struct inode *inode,
struct logfs_shadow *shadow)
1333 alloc_inode_block(inode);
1335 __logfs_set_blocks(inode);
1340 static int logfs_write_i0(
struct inode *inode,
struct page *page,
1350 if (logfs_reserve_blocks(inode, 1))
1353 shadow = alloc_shadow(inode, bix, level, wc->
ofs);
1359 free_shadow(inode, shadow);
1363 set_iused(inode, shadow);
1366 alloc_indirect_block(inode, page, 0);
1369 fill_shadow_tree(inode, page, shadow);
1371 if (wc->
ofs && full)
1376 static int logfs_write_direct(
struct inode *inode,
struct page *page,
1386 alloc_inode_block(inode);
1388 err = logfs_write_i0(inode, page, &wc);
1398 static int ptr_change(
u64 ofs,
struct page *page)
1401 int empty0, empty1, full0, full1;
1405 if (empty0 != empty1)
1416 static int __logfs_write_rec(
struct inode *inode,
struct page *page,
1420 int ret, page_empty = 0;
1421 int child_no = get_bits(bix,
SUBLEVEL(level));
1427 ipage = logfs_get_write_page(inode, bix, level);
1435 }
else if (!PageUptodate(ipage)) {
1437 logfs_read_empty(ipage);
1440 child_wc.
ofs = block_get_pointer(ipage, child_no);
1443 ret = __logfs_write_rec(inode, page, &child_wc, bix,
1446 ret = logfs_write_i0(inode, page, &child_wc);
1451 alloc_indirect_block(inode, ipage, page_empty);
1452 block_set_pointer(ipage, child_no, child_wc.
ofs);
1458 if (!this_wc->
ofs || ptr_change(this_wc->
ofs, ipage))
1459 ret = logfs_write_i0(inode, ipage, this_wc);
1463 logfs_put_write_page(ipage);
1467 static int logfs_write_rec(
struct inode *inode,
struct page *page,
1477 alloc_inode_block(inode);
1480 ret = __logfs_write_rec(inode, page, &wc, bix, target_level,
1483 ret = logfs_write_i0(inode, page, &wc);
1497 alloc_inode_block(inode);
1505 if (block && block->
ta)
1509 static int grow_inode(
struct inode *inode,
u64 bix,
level_t level)
1521 page = logfs_get_write_page(inode, I0_BLOCKS + 1,
1525 logfs_read_empty(page);
1526 alloc_indirect_block(inode, page, 1);
1528 err = logfs_write_i0(inode, page, &wc);
1529 logfs_put_write_page(page);
1540 static int __logfs_write_buf(
struct inode *inode,
struct page *page,
long flags)
1555 if (index < I0_BLOCKS)
1556 return logfs_write_direct(inode, page, flags);
1558 bix = adjust_bix(bix, level);
1559 err = grow_inode(inode, bix, level);
1562 return logfs_write_rec(inode, page, bix, level, flags);
1571 ret = __logfs_write_buf(inode, page, flags);
1576 static int __logfs_delete(
struct inode *inode,
struct page *page)
1583 if (page->
index < I0_BLOCKS)
1584 return logfs_write_direct(inode, page, flags);
1585 err = grow_inode(inode, page->
index, 0);
1588 return logfs_write_rec(inode, page, page->
index, 0, flags);
1598 page = logfs_get_read_page(inode, index, 0);
1603 ret = __logfs_delete(inode, page);
1606 logfs_put_read_page(page);
1614 level_t level = shrink_level(gc_level);
1618 page = logfs_get_write_page(inode, bix, level);
1625 alloc_indirect_block(inode, page, 0);
1627 if (!err && shrink_level(gc_level) == 0) {
1640 logfs_put_write_page(page);
1644 static int truncate_data_block(
struct inode *inode,
struct page *page,
1647 loff_t pageofs = page->
index << inode->
i_sb->s_blocksize_bits;
1653 if (size <= pageofs || size - pageofs >=
PAGE_SIZE)
1667 static int logfs_truncate_i0(
struct inode *inode,
struct page *page,
1677 shadow = alloc_shadow(inode, bix, level, wc->
ofs);
1679 err = truncate_data_block(inode, page, wc->
ofs, shadow, size);
1681 free_shadow(inode, shadow);
1686 set_iused(inode, shadow);
1687 fill_shadow_tree(inode, page, shadow);
1692 static int logfs_truncate_direct(
struct inode *inode,
u64 size)
1700 alloc_inode_block(inode);
1702 for (e = I0_BLOCKS - 1; e >= 0; e--) {
1710 page = logfs_get_write_page(inode, e, 0);
1715 logfs_put_write_page(page);
1718 err = logfs_truncate_i0(inode, page, &wc, size);
1719 logfs_put_write_page(page);
1729 static u64 __logfs_step[] = {
1736 static u64 __logfs_start_index[] = {
1748 static inline u64 logfs_factor(u8 level)
1753 static inline u64 logfs_start_index(
level_t level)
1755 return __logfs_start_index[(
__force u8)level];
1761 if (*bix <= logfs_start_index(
SUBLEVEL(*level)))
1765 static int __logfs_truncate_rec(
struct inode *inode,
struct page *ipage,
1768 int truncate_happened = 0;
1770 u64 bix, child_bix, next_bix;
1775 logfs_unpack_raw_index(ipage->
index, &bix, &level);
1781 child_bix = bix + e * logfs_step(
SUBLEVEL(level));
1782 next_bix = child_bix + logfs_step(
SUBLEVEL(level));
1786 child_wc.
ofs =
pure_ofs(block_get_pointer(ipage, e));
1790 page = logfs_get_write_page(inode, child_bix,
SUBLEVEL(level));
1795 err = __logfs_truncate_rec(inode, page, &child_wc, size);
1797 err = logfs_truncate_i0(inode, page, &child_wc, size);
1798 logfs_put_write_page(page);
1802 truncate_happened = 1;
1803 alloc_indirect_block(inode, ipage, 0);
1804 block_set_pointer(ipage, e, child_wc.
ofs);
1807 if (!truncate_happened) {
1808 printk(
"ineffectual truncate (%lx, %lx, %llx)\n", inode->
i_ino, ipage->
index, size);
1816 return logfs_write_i0(inode, ipage, this_wc);
1819 static int logfs_truncate_rec(
struct inode *inode,
u64 size)
1828 alloc_inode_block(inode);
1837 err = __logfs_truncate_rec(inode, page, &wc, size);
1838 logfs_put_write_page(page);
1847 static int __logfs_truncate(
struct inode *inode,
u64 size)
1851 if (size >= logfs_factor(
logfs_inode(inode)->li_height))
1854 ret = logfs_truncate_rec(inode, size);
1858 return logfs_truncate_direct(inode, size);
1867 #define TRUNCATE_STEP (8 * 1024 * 1024)
1871 u64 size = i_size_read(inode);
1875 while (size > target) {
1884 err = __logfs_truncate(inode, size);
1898 static void move_page_to_inode(
struct inode *inode,
struct page *page)
1909 block->
ops = &inode_block_ops;
1914 if (PagePrivate(page)) {
1915 ClearPagePrivate(page);
1917 set_page_private(page, 0);
1921 static void move_inode_to_page(
struct page *page,
struct inode *inode)
1931 BUG_ON(PagePrivate(page));
1935 if (!PagePrivate(page)) {
1936 SetPagePrivate(page);
1938 set_page_private(page, (
unsigned long) block);
1962 return PTR_ERR(page);
1965 logfs_disk_to_inode(di, inode);
1967 move_page_to_inode(inode, page);
1973 static struct page *inode_to_page(
struct inode *inode)
1981 page = logfs_get_write_page(master_inode, inode->
i_ino, 0);
1986 logfs_inode_to_disk(inode, di);
1988 move_inode_to_page(page, inode);
1992 static int do_write_inode(
struct inode *inode)
1996 loff_t size = (inode->
i_ino + 1) << inode->
i_sb->s_blocksize_bits;
2003 if (i_size_read(master_inode) <
size)
2004 i_size_write(master_inode, size);
2008 page = inode_to_page(inode);
2015 move_page_to_inode(inode, page);
2017 logfs_put_write_page(page);
2021 static void logfs_mod_segment_entry(
struct super_block *sb,
u32 segno,
2027 struct inode *
inode;
2037 page = logfs_get_write_page(inode, page_no, 0);
2039 if (!PageUptodate(page))
2040 logfs_read_block(inode, page,
WRITE);
2043 alloc_indirect_block(inode, page, 0);
2045 change_se(se + child_no,
arg);
2052 logfs_put_write_page(page);
2065 logfs_mod_segment_entry(sb, segno, 0, __get_segment_entry, (
long)se);
2085 logfs_mod_segment_entry(sb, segno, 1, __set_segment_used, increment);
2098 logfs_mod_segment_entry(sb, segno, 1, __set_segment_erased, ec_level);
2108 logfs_mod_segment_entry(sb, segno, 1, __set_segment_reserved, 0);
2120 u32 ec_level = ec << 4;
2122 logfs_mod_segment_entry(sb, segno, 1, __set_segment_unreserved,
2132 ret = do_write_inode(inode);
2137 static int do_delete_inode(
struct inode *inode)
2144 page = logfs_get_write_page(master_inode, inode->
i_ino, 0);
2148 move_inode_to_page(page, inode);
2151 ret = __logfs_delete(master_inode, page);
2154 logfs_put_write_page(page);
2172 if (i_size_read(inode) > 0)
2174 do_delete_inode(inode);
2188 block->
ops->free_block(inode->
i_sb, block);
2192 page = inode_to_page(inode);
2194 logfs_put_write_page(page);
2199 struct inode *
inode;
2204 page = logfs_get_write_page(inode, block->
bix, block->
level);
2208 BUG_ON(!PagePrivate(page));
2210 err = __logfs_write_buf(inode, page, 0);
2214 logfs_put_write_page(page);
2236 loff_t pos = bix << inode->
i_sb->s_blocksize_bits;
2241 BUG_ON(pos & (LOGFS_BLOCKSIZE-1));
2242 BUG_ON(count > LOGFS_BLOCKSIZE);
2243 page = logfs_get_write_page(inode, bix, 0);
2248 memcpy(pagebuf, buf, count);
2252 if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE)
2253 i_size_write(inode, pos + LOGFS_BLOCKSIZE);
2256 logfs_put_write_page(page);
2263 struct inode *
inode;
2267 return PTR_ERR(inode);
2280 super->
s_block_pool = mempool_create_kmalloc_pool(min_fill,
2282 super->
s_shadow_pool = mempool_create_kmalloc_pool(min_fill,