14 #include <linux/kernel.h>
15 #include <linux/export.h>
20 #include <linux/slab.h>
32 #include <linux/sysctl.h>
43 #define MAX_PAUSE max(HZ/5, 1)
49 #define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54 #define BANDWIDTH_INTERVAL max(HZ/5, 1)
56 #define RATELIMIT_CALC_SHIFT 10
62 static long ratelimit_pages = 32;
141 static void writeout_period(
unsigned long t);
143 static struct timer_list writeout_period_timer =
145 static unsigned long writeout_period_time = 0;
152 #define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
190 static unsigned long highmem_dirtyable_memory(
unsigned long total)
192 #ifdef CONFIG_HIGHMEM
198 &
NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
209 return min(x, total);
221 static unsigned long global_dirtyable_memory(
void)
229 x -= highmem_dirtyable_memory(x);
245 unsigned long background;
251 available_memory = global_dirtyable_memory();
263 if (background >= dirty)
264 background = dirty / 2;
267 background += background / 4;
270 *pbackground = background;
272 trace_global_dirty_state(background, dirty);
282 static unsigned long zone_dirtyable_memory(
struct zone *
zone)
305 static unsigned long zone_dirty_limit(
struct zone *
zone)
307 unsigned long zone_memory = zone_dirtyable_memory(zone);
313 zone_memory / global_dirtyable_memory();
332 unsigned long limit = zone_dirty_limit(zone);
340 void __user *
buffer,
size_t *lenp,
346 if (ret == 0 && write)
352 void __user *
buffer,
size_t *lenp,
358 if (ret == 0 && write)
364 void __user *
buffer,
size_t *lenp,
379 void __user *
buffer,
size_t *lenp,
393 static unsigned long wp_next_time(
unsigned long cur_time)
412 if (!
unlikely(writeout_period_time)) {
419 writeout_period_time = wp_next_time(jiffies);
420 mod_timer(&writeout_period_timer, writeout_period_time);
429 __bdi_writeout_inc(bdi);
438 long *numerator,
long *denominator)
441 numerator, denominator);
448 static void writeout_period(
unsigned long t)
450 int miss_periods = (
jiffies - writeout_period_time) /
454 writeout_period_time = wp_next_time(writeout_period_time +
456 mod_timer(&writeout_period_timer, writeout_period_time);
462 writeout_period_time = 0;
471 static unsigned int bdi_min_ratio;
482 if (bdi_min_ratio + min_ratio < 100) {
483 bdi_min_ratio += min_ratio;
514 static unsigned long dirty_freerun_ceiling(
unsigned long thresh,
515 unsigned long bg_thresh)
517 return (thresh + bg_thresh) / 2;
520 static unsigned long hard_dirty_limit(
unsigned long thresh)
550 long numerator, denominator;
555 bdi_writeout_fraction(bdi, &numerator, &denominator);
557 bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100;
558 bdi_dirty *= numerator;
559 do_div(bdi_dirty, denominator);
561 bdi_dirty += (dirty * bdi->
min_ratio) / 100;
562 if (bdi_dirty > (dirty * bdi->
max_ratio) / 100)
563 bdi_dirty = dirty * bdi->
max_ratio / 100;
644 unsigned long thresh,
645 unsigned long bg_thresh,
647 unsigned long bdi_thresh,
648 unsigned long bdi_dirty)
651 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
652 unsigned long limit = hard_dirty_limit(thresh);
653 unsigned long x_intercept;
654 unsigned long setpoint;
655 unsigned long bdi_setpoint;
679 setpoint = (freerun +
limit) / 2;
681 limit - setpoint + 1);
727 bdi_thresh =
max(bdi_thresh, (limit - dirty) / 8);
732 x = div_u64((
u64)bdi_thresh << 16, thresh + 1);
733 bdi_setpoint = setpoint * (
u64)x >> 16;
742 span = (thresh - bdi_thresh + 8 * write_bw) * (
u64)x >> 16;
743 x_intercept = bdi_setpoint + span;
745 if (bdi_dirty < x_intercept - span / 4) {
746 pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
747 x_intercept - bdi_setpoint + 1);
756 x_intercept = bdi_thresh / 2;
757 if (bdi_dirty < x_intercept) {
758 if (bdi_dirty > x_intercept / 8)
759 pos_ratio = div_u64(pos_ratio * x_intercept, bdi_dirty);
768 unsigned long elapsed,
769 unsigned long written)
791 bw >>=
ilog2(period);
796 if (avg > old && old >= (
unsigned long)bw)
797 avg -= (avg - old) >> 3;
799 if (avg < old && old <= (
unsigned long)bw)
800 avg += (old - avg) >> 3;
815 static void update_dirty_limit(
unsigned long thresh,
unsigned long dirty)
822 if (limit < thresh) {
832 thresh =
max(thresh, dirty);
833 if (limit > thresh) {
834 limit -= (limit - thresh) >> 5;
842 static void global_update_bandwidth(
unsigned long thresh,
847 static unsigned long update_time;
855 spin_lock(&dirty_lock);
857 update_dirty_limit(thresh, dirty);
860 spin_unlock(&dirty_lock);
870 unsigned long thresh,
871 unsigned long bg_thresh,
873 unsigned long bdi_thresh,
874 unsigned long bdi_dirty,
875 unsigned long dirtied,
876 unsigned long elapsed)
878 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
879 unsigned long limit = hard_dirty_limit(thresh);
880 unsigned long setpoint = (freerun +
limit) / 2;
883 unsigned long dirty_rate;
884 unsigned long task_ratelimit;
885 unsigned long balanced_dirty_ratelimit;
886 unsigned long pos_ratio;
896 pos_ratio = bdi_position_ratio(bdi, thresh, bg_thresh, dirty,
897 bdi_thresh, bdi_dirty);
901 task_ratelimit = (
u64)dirty_ratelimit *
902 pos_ratio >> RATELIMIT_CALC_SHIFT;
935 balanced_dirty_ratelimit = div_u64((
u64)task_ratelimit * write_bw,
940 if (
unlikely(balanced_dirty_ratelimit > write_bw))
941 balanced_dirty_ratelimit = write_bw;
978 if (dirty < setpoint) {
980 min(balanced_dirty_ratelimit, task_ratelimit));
981 if (dirty_ratelimit < x)
982 step = x - dirty_ratelimit;
985 max(balanced_dirty_ratelimit, task_ratelimit));
986 if (dirty_ratelimit > x)
987 step = dirty_ratelimit -
x;
995 step >>= dirty_ratelimit / (2 * step + 1);
999 step = (step + 7) / 8;
1001 if (dirty_ratelimit < balanced_dirty_ratelimit)
1002 dirty_ratelimit +=
step;
1004 dirty_ratelimit -=
step;
1009 trace_bdi_dirty_ratelimit(bdi, dirty_rate, task_ratelimit);
1013 unsigned long thresh,
1014 unsigned long bg_thresh,
1015 unsigned long dirty,
1016 unsigned long bdi_thresh,
1017 unsigned long bdi_dirty,
1022 unsigned long dirtied;
1023 unsigned long written;
1042 global_update_bandwidth(thresh, dirty, now);
1043 bdi_update_dirty_ratelimit(bdi, thresh, bg_thresh, dirty,
1044 bdi_thresh, bdi_dirty,
1047 bdi_update_write_bandwidth(bdi, elapsed, written);
1056 unsigned long thresh,
1057 unsigned long bg_thresh,
1058 unsigned long dirty,
1059 unsigned long bdi_thresh,
1060 unsigned long bdi_dirty,
1065 spin_lock(&bdi->
wb.list_lock);
1067 bdi_thresh, bdi_dirty, start_time);
1068 spin_unlock(&bdi->
wb.list_lock);
1079 static unsigned long dirty_poll_interval(
unsigned long dirty,
1080 unsigned long thresh)
1083 return 1
UL << (
ilog2(thresh - dirty) >> 1);
1089 unsigned long bdi_dirty)
1109 unsigned long task_ratelimit,
1110 unsigned long dirty_ratelimit,
1120 t =
max(1,
HZ / 100);
1129 t += (hi - lo) * (10 *
HZ) / 1024;
1149 t =
min(t, 1 + max_pause / 2);
1169 pause =
HZ * pages / (task_ratelimit + 1);
1170 if (pause > max_pause) {
1175 *nr_dirtied_pause =
pages;
1190 unsigned long pages_dirtied)
1192 unsigned long nr_reclaimable;
1193 unsigned long bdi_reclaimable;
1194 unsigned long nr_dirty;
1195 unsigned long bdi_dirty;
1196 unsigned long freerun;
1197 unsigned long background_thresh;
1198 unsigned long dirty_thresh;
1199 unsigned long bdi_thresh;
1205 bool dirty_exceeded =
false;
1206 unsigned long task_ratelimit;
1207 unsigned long dirty_ratelimit;
1208 unsigned long pos_ratio;
1210 unsigned long start_time =
jiffies;
1223 nr_dirty = nr_reclaimable + global_page_state(
NR_WRITEBACK);
1232 freerun = dirty_freerun_ceiling(dirty_thresh,
1234 if (nr_dirty <= freerun) {
1235 current->dirty_paused_when = now;
1238 dirty_poll_interval(nr_dirty, dirty_thresh);
1270 if (bdi_thresh < 2 * bdi_stat_error(bdi)) {
1272 bdi_dirty = bdi_reclaimable +
1276 bdi_dirty = bdi_reclaimable +
1280 dirty_exceeded = (bdi_dirty > bdi_thresh) &&
1281 (nr_dirty > dirty_thresh);
1285 bdi_update_bandwidth(bdi, dirty_thresh, background_thresh,
1286 nr_dirty, bdi_thresh, bdi_dirty,
1290 pos_ratio = bdi_position_ratio(bdi, dirty_thresh,
1291 background_thresh, nr_dirty,
1292 bdi_thresh, bdi_dirty);
1293 task_ratelimit = ((
u64)dirty_ratelimit * pos_ratio) >>
1295 max_pause = bdi_max_pause(bdi, bdi_dirty);
1296 min_pause = bdi_min_pause(bdi, max_pause,
1297 task_ratelimit, dirty_ratelimit,
1300 if (
unlikely(task_ratelimit == 0)) {
1305 period =
HZ * pages_dirtied / task_ratelimit;
1307 if (
current->dirty_paused_when)
1308 pause -= now -
current->dirty_paused_when;
1316 if (pause < min_pause) {
1317 trace_balance_dirty_pages(bdi,
1330 current->dirty_paused_when = now;
1332 }
else if (period) {
1335 }
else if (
current->nr_dirtied_pause <= pages_dirtied)
1336 current->nr_dirtied_pause += pages_dirtied;
1341 now +=
min(pause - max_pause, max_pause);
1346 trace_balance_dirty_pages(bdi,
1363 current->nr_dirtied_pause = nr_dirtied_pause;
1382 if (bdi_dirty <= bdi_stat_error(bdi))
1385 if (fatal_signal_pending(
current))
1406 if (nr_reclaimable > background_thresh)
1416 balance_dirty_pages_ratelimited(mapping);
1453 unsigned long nr_pages_dirtied)
1459 if (!bdi_cap_account_dirty(bdi))
1462 ratelimit =
current->nr_dirtied_pause;
1476 else if (
unlikely(*p >= ratelimit_pages)) {
1486 if (*p > 0 &&
current->nr_dirtied < ratelimit) {
1487 nr_pages_dirtied =
min(*p, ratelimit -
current->nr_dirtied);
1488 *p -= nr_pages_dirtied;
1489 current->nr_dirtied += nr_pages_dirtied;
1494 balance_dirty_pages(mapping,
current->nr_dirtied);
1500 unsigned long background_thresh;
1501 unsigned long dirty_thresh;
1505 dirty_thresh = hard_dirty_limit(dirty_thresh);
1511 dirty_thresh += dirty_thresh / 10;
1539 void laptop_mode_timer_fn(
unsigned long data)
1569 void laptop_sync_completion(
void)
1595 unsigned long background_thresh;
1596 unsigned long dirty_thresh;
1600 if (ratelimit_pages < 16)
1601 ratelimit_pages = 16;
1620 .notifier_call = ratelimit_handler,
1645 register_cpu_notifier(&ratelimit_nb);
1670 #define WRITEBACK_TAG_BATCH 4096
1671 unsigned long tagged;
1721 int range_whole = 0;
1724 pagevec_init(&pvec, 0);
1748 while (!done && (index <= end)) {
1756 for (i = 0; i < nr_pages; i++) {
1766 if (page->
index > end) {
1775 done_index = page->
index;
1793 if (!PageDirty(page)) {
1795 goto continue_unlock;
1798 if (PageWriteback(page)) {
1800 wait_on_page_writeback(page);
1802 goto continue_unlock;
1805 BUG_ON(PageWriteback(page));
1807 goto continue_unlock;
1810 ret = (*writepage)(
page, wbc,
data);
1825 done_index = page->index + 1;
1837 if (--wbc->nr_to_write <= 0 &&
1843 pagevec_release(&pvec);
1846 if (!cycled && !done) {
1872 int ret = mapping->
a_ops->writepage(page, wbc);
1873 mapping_set_error(mapping, ret);
1892 if (!mapping->
a_ops->writepage)
1909 if (mapping->
a_ops->writepages)
1910 ret = mapping->
a_ops->writepages(mapping, wbc);
1934 BUG_ON(!PageLocked(page));
1937 wait_on_page_writeback(page);
1941 ret = mapping->
a_ops->writepage(page, &wbc);
1942 if (ret == 0 && wait) {
1943 wait_on_page_writeback(page);
1944 if (PageError(page))
1960 if (!PageDirty(page))
1961 return !TestSetPageDirty(page);
1971 if (mapping_cap_account_dirty(mapping)) {
2011 if (!TestSetPageDirty(page)) {
2019 mapping2 = page_mapping(page);
2021 BUG_ON(mapping2 != mapping);
2022 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2028 if (mapping->
host) {
2048 if (mapping && mapping_cap_account_dirty(mapping)) {
2085 int (*spd)(
struct page *) = mapping->
a_ops->set_page_dirty;
2096 ClearPageReclaim(page);
2101 return (*spd)(
page);
2103 if (!PageDirty(page)) {
2104 if (!TestSetPageDirty(page))
2150 BUG_ON(!PageLocked(page));
2152 if (mapping && mapping_cap_account_dirty(mapping)) {
2190 if (TestClearPageDirty(page)) {
2198 return TestClearPageDirty(page);
2209 unsigned long flags;
2212 ret = TestClearPageWriteback(page);
2217 if (bdi_cap_account_writeback(bdi)) {
2219 __bdi_writeout_inc(bdi);
2222 spin_unlock_irqrestore(&mapping->
tree_lock, flags);
2224 ret = TestClearPageWriteback(page);
2240 unsigned long flags;
2243 ret = TestSetPageWriteback(page);
2248 if (bdi_cap_account_writeback(bdi))
2251 if (!PageDirty(page))
2258 spin_unlock_irqrestore(&mapping->
tree_lock, flags);
2260 ret = TestSetPageWriteback(page);