27 #include <linux/types.h>
28 #include <linux/slab.h>
31 #include <linux/time.h>
32 #include <linux/random.h>
58 #define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
60 static int ocfs2_force_read_journal(
struct inode *
inode);
61 static int ocfs2_recover_node(
struct ocfs2_super *osb,
62 int node_num,
int slot_num);
63 static int __ocfs2_recovery_thread(
void *
arg);
64 static int ocfs2_commit_cache(
struct ocfs2_super *osb);
65 static int __ocfs2_wait_on_mount(
struct ocfs2_super *osb,
int quota);
66 static int ocfs2_journal_toggle_dirty(
struct ocfs2_super *osb,
67 int dirty,
int replayed);
68 static int ocfs2_trylock_journal(
struct ocfs2_super *osb,
70 static int ocfs2_recover_orphans(
struct ocfs2_super *osb,
72 static int ocfs2_commit_thread(
void *
arg);
73 static void ocfs2_queue_recovery_completion(
struct ocfs2_journal *journal,
79 static inline int ocfs2_wait_on_mount(
struct ocfs2_super *osb)
81 return __ocfs2_wait_on_mount(osb, 0);
84 static inline int ocfs2_wait_on_quotas(
struct ocfs2_super *osb)
86 return __ocfs2_wait_on_mount(osb, 1);
141 for (i = 0; i < replay_map->
rm_slots; i++) {
162 for (i = 0; i < replay_map->
rm_slots; i++)
164 ocfs2_queue_recovery_completion(osb->
journal, i,
NULL,
197 rm->
rm_entries = (
unsigned int *)((
char *)rm +
207 static int ocfs2_recovery_thread_running(
struct ocfs2_super *osb)
239 static int __ocfs2_recovery_map_test(
struct ocfs2_super *osb,
240 unsigned int node_num)
247 for (i = 0; i < rm->
rm_used; i++) {
256 static int ocfs2_recovery_map_set(
struct ocfs2_super *osb,
257 unsigned int node_num)
262 if (__ocfs2_recovery_map_test(osb, node_num)) {
277 static void ocfs2_recovery_map_clear(
struct ocfs2_super *osb,
278 unsigned int node_num)
285 for (i = 0; i < rm->
rm_used; i++) {
293 (rm->
rm_used - i - 1) *
sizeof(
unsigned int));
300 static int ocfs2_commit_cache(
struct ocfs2_super *osb)
303 unsigned int flushed;
312 trace_ocfs2_commit_cache_begin(flushed);
327 ocfs2_inc_trans_id(journal);
333 trace_ocfs2_commit_cache_end(journal->
j_trans_id, flushed);
343 journal_t *journal = osb->
journal->j_journal;
348 if (ocfs2_is_hard_readonly(osb))
349 return ERR_PTR(-
EROFS);
355 if (journal_current_handle())
358 sb_start_intwrite(osb->
sb);
363 if (IS_ERR(handle)) {
365 sb_end_intwrite(osb->
sb);
369 if (is_journal_aborted(journal)) {
371 handle = ERR_PTR(-
EROFS);
374 if (!ocfs2_mount_local(osb))
389 nested = handle->h_ref > 1;
396 sb_end_intwrite(osb->
sb);
429 old_nblocks = handle->h_buffer_credits;
431 trace_ocfs2_extend_trans(old_nblocks, nblocks);
433 #ifdef CONFIG_OCFS2_DEBUG_FS
444 trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
446 old_nblocks + nblocks);
463 static inline struct ocfs2_triggers *to_ocfs2_trigger(
struct jbd2_buffer_trigger_type *triggers)
468 static void ocfs2_frozen_trigger(
struct jbd2_buffer_trigger_type *triggers,
469 struct buffer_head *bh,
487 static void ocfs2_dq_frozen_trigger(
struct jbd2_buffer_trigger_type *triggers,
488 struct buffer_head *bh,
492 ocfs2_block_dqtrailer(size, data);
507 static void ocfs2_db_frozen_trigger(
struct jbd2_buffer_trigger_type *triggers,
508 struct buffer_head *bh,
523 static void ocfs2_abort_trigger(
struct jbd2_buffer_trigger_type *triggers,
524 struct buffer_head *bh)
527 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
528 "bh->b_blocknr = %llu\n",
530 (
unsigned long long)bh->b_blocknr);
535 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
540 .t_frozen = ocfs2_frozen_trigger,
541 .t_abort = ocfs2_abort_trigger,
548 .t_frozen = ocfs2_frozen_trigger,
549 .t_abort = ocfs2_abort_trigger,
556 .t_frozen = ocfs2_frozen_trigger,
557 .t_abort = ocfs2_abort_trigger,
564 .t_frozen = ocfs2_frozen_trigger,
565 .t_abort = ocfs2_abort_trigger,
572 .t_frozen = ocfs2_db_frozen_trigger,
573 .t_abort = ocfs2_abort_trigger,
579 .t_frozen = ocfs2_frozen_trigger,
580 .t_abort = ocfs2_abort_trigger,
587 .t_frozen = ocfs2_dq_frozen_trigger,
588 .t_abort = ocfs2_abort_trigger,
594 .t_frozen = ocfs2_frozen_trigger,
595 .t_abort = ocfs2_abort_trigger,
602 .t_frozen = ocfs2_frozen_trigger,
603 .t_abort = ocfs2_abort_trigger,
608 static int __ocfs2_journal_access(handle_t *
handle,
610 struct buffer_head *bh,
622 trace_ocfs2_journal_access(
624 (
unsigned long long)bh->b_blocknr, type, bh->b_size);
627 if (!buffer_uptodate(bh)) {
628 mlog(
ML_ERROR,
"giving me a buffer that's not uptodate!\n");
630 (
unsigned long long)bh->b_blocknr);
640 ocfs2_set_ci_lock_trans(osb->
journal, ci);
657 if (!status && ocfs2_meta_ecc(osb) && triggers)
662 mlog(
ML_ERROR,
"Error %d getting %d access to buffer!\n",
669 struct buffer_head *bh,
int type)
671 return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
675 struct buffer_head *bh,
int type)
677 return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
681 struct buffer_head *bh,
int type)
683 return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
688 struct buffer_head *bh,
int type)
690 return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
694 struct buffer_head *bh,
int type)
696 return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
700 struct buffer_head *bh,
int type)
702 return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
706 struct buffer_head *bh,
int type)
708 return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
712 struct buffer_head *bh,
int type)
714 return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
718 struct buffer_head *bh,
int type)
720 return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
724 struct buffer_head *bh,
int type)
726 return __ocfs2_journal_access(handle, ci, bh,
NULL, type);
733 trace_ocfs2_journal_dirty((
unsigned long long)bh->b_blocknr);
739 #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
743 journal_t *journal = osb->
journal->j_journal;
750 journal->j_commit_interval = commit_interval;
752 journal->j_flags |= JBD2_BARRIER;
754 journal->j_flags &= ~JBD2_BARRIER;
762 journal_t *j_journal =
NULL;
764 struct buffer_head *bh =
NULL;
770 osb = journal->
j_osb;
789 OCFS2_I(inode)->ip_open_count++;
805 mlog(
ML_ERROR,
"Journal file size (%lld) is too small!\n",
811 trace_ocfs2_journal_init(inode->
i_size,
812 (
unsigned long long)inode->
i_blocks,
813 OCFS2_I(inode)->ip_clusters);
817 if (j_journal ==
NULL) {
823 trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
843 OCFS2_I(inode)->ip_open_count--;
851 static void ocfs2_bump_recovery_generation(
struct ocfs2_dinode *di)
853 le32_add_cpu(&(di->
id1.
journal1.ij_recovery_generation), 1);
861 static int ocfs2_journal_toggle_dirty(
struct ocfs2_super *osb,
862 int dirty,
int replayed)
867 struct buffer_head *bh = journal->
j_bh;
885 ocfs2_bump_recovery_generation(fe);
904 int num_running_trans = 0;
922 trace_ocfs2_journal_shutdown(num_running_trans);
935 trace_ocfs2_journal_shutdown_wait(osb->
commit_task);
942 if (ocfs2_mount_local(osb)) {
955 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
964 OCFS2_I(inode)->ip_open_count--;
969 brelse(journal->
j_bh);
980 static void ocfs2_clear_journal_error(
struct super_block *
sb,
989 "journal %u.\n", olderr, slot);
990 mlog(
ML_ERROR,
"File system on device %s needs checking.\n",
1005 osb = journal->
j_osb;
1015 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
1029 "error=%d", status);
1054 status = ocfs2_journal_toggle_dirty(journal->
j_osb, 0, 0);
1062 static int ocfs2_recovery_completed(
struct ocfs2_super *osb)
1089 static int ocfs2_force_read_journal(
struct inode *
inode)
1093 u64 v_blkno, p_blkno, p_blocks, num_blocks;
1094 #define CONCURRENT_JOURNAL_FILL 32ULL
1097 memset(bhs, 0,
sizeof(
struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
1099 num_blocks = ocfs2_blocks_for_bytes(inode->
i_sb, inode->
i_size);
1101 while (v_blkno < num_blocks) {
1103 &p_blkno, &p_blocks,
NULL);
1109 if (p_blocks > CONCURRENT_JOURNAL_FILL)
1115 p_blkno, p_blocks, bhs);
1121 for(i = 0; i < p_blocks; i++) {
1126 v_blkno += p_blocks;
1164 trace_ocfs2_complete_recovery(
1165 (
unsigned long long)OCFS2_I(journal->
j_inode)->ip_blkno);
1167 spin_lock(&journal->
j_lock);
1169 spin_unlock(&journal->
j_lock);
1174 ocfs2_wait_on_quotas(osb);
1180 trace_ocfs2_complete_recovery_slot(item->
lri_slot,
1203 ret = ocfs2_recover_orphans(osb, item->
lri_slot);
1218 trace_ocfs2_complete_recovery_end(ret);
1224 static void ocfs2_queue_recovery_completion(
struct ocfs2_journal *journal,
1256 spin_lock(&journal->
j_lock);
1259 spin_unlock(&journal->
j_lock);
1268 if (ocfs2_is_hard_readonly(osb))
1273 ocfs2_queue_recovery_completion(journal, osb->
slot_num,
1289 ocfs2_queue_recovery_completion(osb->
journal,
1298 static int __ocfs2_recovery_thread(
void *
arg)
1300 int status, node_num, slot_num;
1303 int *rm_quota =
NULL;
1304 int rm_quota_used = 0,
i;
1307 status = ocfs2_wait_on_mount(osb);
1339 trace_ocfs2_recovery_thread_node(node_num, slot_num);
1340 if (slot_num == -
ENOENT) {
1351 for (i = 0; i < rm_quota_used && rm_quota[
i] != slot_num; i++);
1352 if (i == rm_quota_used)
1353 rm_quota[rm_quota_used++] = slot_num;
1355 status = ocfs2_recover_node(osb, node_num, slot_num);
1358 ocfs2_recovery_map_clear(osb, node_num);
1361 "Error %d recovering node %d on device (%u,%u)!\n",
1370 trace_ocfs2_recovery_thread_end(status);
1374 status = (status == -
EROFS) ? 0 : status;
1381 for (i = 0; i < rm_quota_used; i++) {
1384 status = PTR_ERR(qrec);
1388 ocfs2_queue_recovery_completion(osb->
journal, rm_quota[i],
1399 if (!status && !ocfs2_recovery_completed(osb)) {
1425 trace_ocfs2_recovery_thread(node_num, osb->
node_num,
1428 -1 : ocfs2_recovery_map_set(osb, node_num));
1448 static int ocfs2_read_journal_inode(
struct ocfs2_super *osb,
1450 struct buffer_head **bh,
1451 struct inode **ret_inode)
1454 struct inode *inode =
NULL;
1476 if (status || !ret_inode)
1486 static int ocfs2_replay_journal(
struct ocfs2_super *osb,
1493 struct inode *inode =
NULL;
1495 journal_t *journal =
NULL;
1496 struct buffer_head *bh =
NULL;
1499 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1506 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1519 trace_ocfs2_replay_journal_recovered(slot_num,
1530 trace_ocfs2_replay_journal_lock_err(status);
1540 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1543 trace_ocfs2_replay_journal_skip(node_num);
1553 "device (%u,%u)\n", node_num, slot_num,
MAJOR(osb->
sb->s_dev),
1558 status = ocfs2_force_read_journal(inode);
1565 if (journal ==
NULL) {
1580 ocfs2_clear_journal_error(osb->
sb, journal, slot_num);
1591 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1595 ocfs2_bump_recovery_generation(fe);
1597 ocfs2_get_recovery_generation(fe);
1610 "device (%u,%u)\n", node_num, slot_num,
MAJOR(osb->
sb->s_dev),
1637 static int ocfs2_recover_node(
struct ocfs2_super *osb,
1638 int node_num,
int slot_num)
1644 trace_ocfs2_recover_node(node_num, slot_num, osb->
node_num);
1650 status = ocfs2_replay_journal(osb, node_num, slot_num);
1652 if (status == -
EBUSY) {
1653 trace_ocfs2_recover_node_skip(slot_num, node_num);
1682 ocfs2_queue_recovery_completion(osb->
journal, slot_num, la_copy,
1694 static int ocfs2_trylock_journal(
struct ocfs2_super *osb,
1698 struct inode *inode =
NULL;
1702 if (inode ==
NULL) {
1736 unsigned int node_num;
1739 struct buffer_head *bh =
NULL;
1747 status = ocfs2_read_journal_inode(osb, i, &bh,
NULL);
1753 gen = ocfs2_get_recovery_generation(di);
1760 trace_ocfs2_mark_dead_nodes(i,
1774 if (__ocfs2_recovery_map_test(osb, node_num)) {
1783 status = ocfs2_trylock_journal(osb, i);
1789 }
else if ((status < 0) && (status != -
EAGAIN)) {
1805 static inline unsigned long ocfs2_orphan_scan_timeout(
void)
1896 os_orphan_scan_work.work);
1903 ocfs2_orphan_scan_timeout());
1938 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1943 ocfs2_orphan_scan_timeout());
1952 static int ocfs2_orphan_filldir(
void *
priv,
const char *
name,
int name_len,
1958 if (name_len == 1 && !
strncmp(
".", name, 1))
1960 if (name_len == 2 && !
strncmp(
"..", name, 2))
1969 trace_ocfs2_orphan_filldir((
unsigned long long)OCFS2_I(iter)->ip_blkno);
1972 OCFS2_I(iter)->ip_next_orphan = p->
head;
1978 static int ocfs2_queue_orphans(
struct ocfs2_super *osb,
1980 struct inode **
head)
1983 struct inode *orphan_dir_inode =
NULL;
1993 if (!orphan_dir_inode) {
2007 ocfs2_orphan_filldir);
2019 iput(orphan_dir_inode);
2023 static int ocfs2_orphan_recovery_can_continue(
struct ocfs2_super *osb,
2034 static void ocfs2_mark_recovering_orphan_dir(
struct ocfs2_super *osb,
2047 ocfs2_orphan_recovery_can_continue(osb, slot));
2053 static void ocfs2_clear_recovering_orphan_dir(
struct ocfs2_super *osb,
2077 static int ocfs2_recover_orphans(
struct ocfs2_super *osb,
2081 struct inode *inode =
NULL;
2085 trace_ocfs2_recover_orphans(slot);
2087 ocfs2_mark_recovering_orphan_dir(osb, slot);
2088 ret = ocfs2_queue_orphans(osb, slot, &inode);
2089 ocfs2_clear_recovering_orphan_dir(osb, slot);
2097 oi = OCFS2_I(inode);
2098 trace_ocfs2_recover_orphans_iput(
2123 static int __ocfs2_wait_on_mount(
struct ocfs2_super *osb,
int quota)
2138 mlog(0,
"mount error, exiting!\n");
2145 static int ocfs2_commit_thread(
void *arg)
2162 status = ocfs2_commit_cache(osb);
2168 "commit_thread: %u transactions pending on "
2186 struct buffer_head *di_bh =
NULL;
2188 int journal_dirty = 0;
2190 for(slot = 0; slot < osb->
max_slots; slot++) {
2191 ret = ocfs2_read_journal_inode(osb, slot, &di_bh,
NULL);
2200 ocfs2_get_recovery_generation(di);