8 #include <linux/slab.h>
52 #define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10))
53 #define CONGESTION_OFF_THRESH(congestion_kb) \
54 (CONGESTION_ON_THRESH(congestion_kb) - \
55 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
59 if (PagePrivate(page))
68 static int ceph_set_page_dirty(
struct page *
page)
77 return !TestSetPageDirty(page);
79 if (TestSetPageDirty(page)) {
80 dout(
"%p set_page_dirty %p idx %lu -- already dirty\n",
85 inode = mapping->
host;
86 ci = ceph_inode(inode);
92 snapc = ceph_get_snap_context(ci->
i_snap_realm->cached_context);
102 dout(
"%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
103 "snapc %p seq %lld (%d snaps)\n",
123 SetPagePrivate(page);
125 dout(
"ANON set_page_dirty %p (raced truncate?)\n", page);
146 static void ceph_invalidatepage(
struct page *page,
unsigned long offset)
152 BUG_ON(!PageLocked(page));
153 BUG_ON(!PagePrivate(page));
163 if (!PageDirty(page))
164 pr_err(
"%p invalidatepage %p page not dirty\n", inode, page);
167 ClearPageChecked(page);
169 ci = ceph_inode(inode);
171 dout(
"%p invalidatepage %p idx %lu full dirty page %lu\n",
172 inode, page, page->
index, offset);
174 ceph_put_snap_context(snapc);
176 ClearPagePrivate(page);
178 dout(
"%p invalidatepage %p idx %lu partial dirty page\n",
179 inode, page, page->
index);
184 static int ceph_releasepage(
struct page *page,
gfp_t g)
187 dout(
"%p releasepage %p idx %lu\n", inode, page, page->
index);
196 static int readpage_nounlock(
struct file *filp,
struct page *page)
198 struct inode *inode = filp->f_dentry->d_inode;
201 &ceph_inode_to_client(inode)->
client->osdc;
205 dout(
"readpage inode %p file %p page %p index %lu\n",
206 inode, filp, page, page->
index);
220 SetPageUptodate(page);
223 return err < 0 ? err : 0;
226 static int ceph_readpage(
struct file *filp,
struct page *page)
228 int r = readpage_nounlock(filp, page);
238 struct inode *inode = req->
r_inode;
244 replyhead = msg->
front.iov_base;
249 dout(
"finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
253 struct page *page = req->
r_pages[
i];
257 int s = bytes < 0 ? 0 :
bytes;
258 zero_user_segment(page, s, PAGE_CACHE_SIZE);
260 dout(
"finish_read %p uptodate %p idx %lu\n", inode, page,
263 SetPageUptodate(page);
274 static int start_read(
struct inode *inode,
struct list_head *page_list,
int max)
277 &ceph_inode_to_client(inode)->
client->osdc;
292 next_index = page->
index;
294 if (page->
index != next_index)
298 if (max && nr_pages == max)
302 dout(
"start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
320 for (i = 0; i < nr_pages; ++
i) {
325 dout(
"start_read %p adding %p idx %lu\n", inode, page,
330 dout(
"start_read %p add_to_page_cache failed %p\n",
342 dout(
"start_read %p starting %p %lld~%lld\n", inode, req, off, len);
346 ceph_osdc_put_request(req);
352 ceph_osdc_put_request(req);
362 struct list_head *page_list,
unsigned nr_pages)
364 struct inode *inode = file->f_dentry->d_inode;
373 dout(
"readpages %p file %p nr_pages %d max %d\n", inode, file, nr_pages,
375 while (!list_empty(page_list)) {
376 rc = start_read(inode, page_list, max);
382 dout(
"readpages %p file %p ret %d\n", inode, file, rc);
399 dout(
" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
402 snapc = ceph_get_snap_context(capsnap->
context);
404 *snap_size = capsnap->
size;
410 dout(
" head snapc %p has %d dirty pages\n",
437 dout(
"writepage %p idx %lu\n", page, page->
index);
440 dout(
"writepage %p - no mapping\n", page);
444 ci = ceph_inode(inode);
445 fsc = ceph_inode_to_client(inode);
446 osdc = &fsc->
client->osdc;
449 snapc = page_snap_context(page);
451 dout(
"writepage %p page %p not dirty?\n", inode, page);
454 oldest = get_oldest_context(inode, &snap_size);
455 if (snapc->
seq > oldest->
seq) {
456 dout(
"writepage %p page %p snapc %p not writeable - noop\n",
460 ceph_put_snap_context(oldest);
463 ceph_put_snap_context(oldest);
469 i_size = i_size_read(inode);
470 if (i_size < page_off + len)
471 len = i_size - page_off;
473 dout(
"writepage %p page %p index %lu on %llu~%u snapc %p\n",
474 inode, page, page->
index, page_off, len, snapc);
481 set_page_writeback(page);
487 &page, 1, 0, 0,
true);
489 dout(
"writepage setting page/mapping error %d %p\n", err, page);
491 mapping_set_error(&inode->
i_data, err);
495 dout(
"writepage cleaned page %p\n", page);
499 ClearPagePrivate(page);
502 ceph_put_snap_context(snapc);
510 struct inode *inode = page->
mapping->host;
513 err = writepage_nounlock(page, wbc);
524 static void ceph_release_pages(
struct page **pages,
int num)
529 pagevec_init(&pvec, 0);
530 for (i = 0; i < num; i++) {
531 if (pagevec_add(&pvec, pages[i]) == 0)
532 pagevec_release(&pvec);
534 pagevec_release(&pvec);
547 struct inode *inode = req->
r_inode;
560 unsigned issued = ceph_caps_issued(ci);
563 replyhead = msg->
front.iov_base;
565 op = (
void *)(replyhead + 1);
579 mapping_set_error(mapping, rc);
581 dout(
"writepages_finish %p rc %d bytes %llu wrote %d (pages)\n",
582 inode, rc, bytes, wrote);
597 ceph_put_snap_context(page_snap_context(page));
599 ClearPagePrivate(page);
600 dout(
"unlocking %d %p\n", i, page);
614 dout(
"%p wrote+cleaned %d pages\n", inode, wrote);
620 ceph_sb_to_client(inode->
i_sb)->wb_pagevec_pool);
623 ceph_osdc_put_request(req);
646 static int ceph_writepages_start(
struct address_space *mapping,
649 struct inode *inode = mapping->
host;
655 pgoff_t max_pages = 0, max_pages_ever = 0;
673 dout(
"writepages_start %p dosync=%d (mode=%s)\n",
678 fsc = ceph_inode_to_client(inode);
680 pr_warning(
"writepage_start %p on forced umount\n", inode);
689 pagevec_init(&pvec, 0);
695 dout(
" cyclic, start at %lu\n", start);
702 dout(
" not cyclic, %lu to %lu\n", start, end);
708 ceph_put_snap_context(snapc);
709 snapc = get_oldest_context(inode, &snap_size);
713 dout(
" no snap context with dirty data?\n");
716 dout(
" oldest snapc is %p seq %lld (%d snaps)\n",
718 if (last_snapc && snapc != last_snapc) {
721 dout(
" snapc differs from last pass, restarting at %lu\n",
727 while (!done && index <= end) {
731 int pvec_pages, locked_pages;
741 max_pages = max_pages_ever;
745 want =
min(end - index,
747 max_pages - (
pgoff_t)locked_pages) - 1)
752 dout(
"pagevec_lookup_tag got %d\n", pvec_pages);
753 if (!pvec_pages && !locked_pages)
755 for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
757 dout(
"? %p idx %lu\n", page, page->
index);
758 if (locked_pages == 0)
760 else if (!trylock_page(page))
766 dout(
"!dirty or !mapping %p\n", page);
771 dout(
"end of range %p\n", page);
776 if (next && (page->
index != next)) {
777 dout(
"not consecutive %p\n", page);
782 dout(
"waiting on writeback %p\n", page);
783 wait_on_page_writeback(page);
785 if ((snap_size &&
page_offset(page) > snap_size) ||
788 dout(
"%p page eof %llu\n", page, snap_size ?
789 snap_size : i_size_read(inode));
794 if (PageWriteback(page)) {
795 dout(
"%p under writeback\n", page);
801 pgsnapc = page_snap_context(page);
802 if (pgsnapc->seq > snapc->
seq) {
803 dout(
"page snapc %p %lld > oldest %p %lld\n",
804 pgsnapc, pgsnapc->seq, snapc, snapc->
seq);
812 dout(
"%p !clear_page_dirty_for_io\n", page);
818 if (locked_pages == 0) {
842 alloc_page_vec(fsc, req);
850 dout(
"%p will write page %p idx %lu\n",
851 inode, page, page->
index);
861 set_page_writeback(page);
864 next = page->
index + 1;
869 goto release_pvec_pages;
872 BUG_ON(!locked_pages || first < 0);
874 if (pvec_pages && i == pvec_pages &&
875 locked_pages < max_pages) {
876 dout(
"reached end pvec, trying for more\n");
877 pagevec_reinit(&pvec);
883 for (j = i; j < pvec_pages; j++) {
884 dout(
" pvec leftover page %p\n",
886 pvec.pages[j-i+
first] = pvec.pages[
j];
893 len =
min((snap_size ? snap_size : i_size_read(inode)) - offset,
894 (
u64)locked_pages << PAGE_CACHE_SHIFT);
895 dout(
"writepages got %d pages at %llu~%llu\n",
896 locked_pages, offset, len);
900 reqhead = req->
r_request->front.iov_base;
901 op = (
void *)(reqhead + 1);
917 dout(
"pagevec_release on %d pages (%p)\n", (
int)pvec.nr,
918 pvec.nr ? pvec.pages[0] :
NULL);
919 pagevec_release(&pvec);
921 if (locked_pages && !done)
925 if (should_loop && !done) {
927 dout(
"writepages looping back to beginning of file\n");
938 ceph_osdc_put_request(req);
939 ceph_put_snap_context(snapc);
940 dout(
"writepages done, rc = %d\n", rc);
949 static int context_is_writeable_or_written(
struct inode *inode,
953 int ret = !oldest || snapc->
seq <= oldest->
seq;
955 ceph_put_snap_context(oldest);
967 static int ceph_update_writeable_page(
struct file *file,
968 loff_t
pos,
unsigned len,
971 struct inode *inode = file->f_dentry->d_inode;
975 int pos_in_page = pos & ~PAGE_CACHE_MASK;
976 int end_in_page = pos_in_page + len;
983 wait_on_page_writeback(page);
989 snapc = page_snap_context(page);
995 oldest = get_oldest_context(inode,
NULL);
998 if (snapc->
seq > oldest->
seq) {
999 ceph_put_snap_context(oldest);
1000 dout(
" page %p snapc %p not current or oldest\n",
1006 snapc = ceph_get_snap_context(snapc);
1010 context_is_writeable_or_written(inode, snapc));
1011 ceph_put_snap_context(snapc);
1016 ceph_put_snap_context(oldest);
1019 dout(
" page %p snapc %p not current, but oldest\n",
1023 r = writepage_nounlock(page,
NULL);
1029 if (PageUptodate(page)) {
1030 dout(
" page %p already uptodate\n", page);
1041 if (i_size + len > inode->
i_sb->s_maxbytes) {
1047 if (page_off >= i_size ||
1048 (pos_in_page == 0 && (pos+len) >= i_size &&
1050 dout(
" zeroing %p 0 - %d and %d - %d\n",
1052 zero_user_segments(page,
1060 r = readpage_nounlock(file, page);
1076 static int ceph_write_begin(
struct file *file,
struct address_space *mapping,
1077 loff_t pos,
unsigned len,
unsigned flags,
1078 struct page **pagep,
void **fsdata)
1080 struct inode *inode = file->f_dentry->d_inode;
1092 dout(
"write_begin file %p inode %p page %p %d~%d\n", file,
1093 inode, page, (
int)pos, (
int)len);
1095 r = ceph_update_writeable_page(file, pos, len, page);
1106 static int ceph_write_end(
struct file *file,
struct address_space *mapping,
1107 loff_t pos,
unsigned len,
unsigned copied,
1108 struct page *page,
void *fsdata)
1110 struct inode *inode = file->f_dentry->d_inode;
1116 dout(
"write_end file %p inode %p page %p %d~%d (%d)\n", file,
1117 inode, page, (
int)pos, (
int)copied, (
int)len);
1121 zero_user_segment(page, from+copied, len);
1125 if (pos+copied > inode->
i_size)
1128 if (!PageUptodate(page))
1129 SetPageUptodate(page);
1149 const struct iovec *iov,
1150 loff_t pos,
unsigned long nr_segs)
1157 .readpage = ceph_readpage,
1158 .readpages = ceph_readpages,
1159 .writepage = ceph_writepage,
1160 .writepages = ceph_writepages_start,
1161 .write_begin = ceph_write_begin,
1162 .write_end = ceph_write_end,
1163 .set_page_dirty = ceph_set_page_dirty,
1164 .invalidatepage = ceph_invalidatepage,
1165 .releasepage = ceph_releasepage,
1166 .direct_IO = ceph_direct_io,
1177 static int ceph_page_mkwrite(
struct vm_area_struct *vma,
struct vm_fault *vmf)
1179 struct inode *inode = vma->
vm_file->f_dentry->d_inode;
1180 struct page *page = vmf->page;
1189 size = i_size_read(inode);
1193 len = size & ~PAGE_CACHE_MASK;
1195 dout(
"page_mkwrite %p %llu~%llu page %p idx %lu\n", inode,
1196 off, len, page, page->
index);
1200 ret = VM_FAULT_NOPAGE;
1205 ret = ceph_update_writeable_page(vma->
vm_file, off, len, page);
1210 ret = VM_FAULT_LOCKED;
1215 ret = VM_FAULT_SIGBUS;
1218 dout(
"page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret);
1219 if (ret != VM_FAULT_LOCKED)
1224 static struct vm_operations_struct ceph_vmops = {
1226 .page_mkwrite = ceph_page_mkwrite,
1234 if (!mapping->
a_ops->readpage)
1236 file_accessed(file);
1237 vma->
vm_ops = &ceph_vmops;