32 #include <linux/nfs_fs.h>
34 #include <linux/module.h>
42 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
48 #define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
59 stripe_no = div_u64(offset, stripe_width);
79 return filelayout_get_dense_offset(flseg, offset);
91 dprintk(
"%s Reset task %5u for i/o through MDS "
92 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
94 hdr->
inode->i_sb->s_id,
95 (
long long)NFS_FILEID(hdr->
inode),
97 (
unsigned long long)data->
args.offset);
105 static void filelayout_reset_read(
struct nfs_read_data *data)
111 dprintk(
"%s Reset task %5u for i/o through MDS "
112 "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
114 hdr->
inode->i_sb->s_id,
115 (
long long)NFS_FILEID(hdr->
inode),
117 (
unsigned long long)data->
args.offset);
125 static void filelayout_fenceme(
struct inode *
inode,
struct pnfs_layout_hdr *lo)
130 pnfs_return_layout(inode);
133 static int filelayout_async_handle_error(
struct rpc_task *task,
134 struct nfs4_state *
state,
138 struct pnfs_layout_hdr *lo = lseg->
pls_layout;
139 struct inode *inode = lo->plh_inode;
140 struct nfs_server *mds_server = NFS_SERVER(inode);
141 struct nfs4_deviceid_node *
devid = FILELAYOUT_DEVID_NODE(lseg);
143 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
160 goto wait_on_recovery;
165 goto wait_on_recovery;
174 dprintk(
"%s ERROR %d, Reset session. Exchangeid "
175 "flags 0x%x\n", __func__, task->
tk_status,
176 clp->cl_exchange_flags);
177 nfs4_schedule_session_recovery(clp->cl_session, task->
tk_status);
193 dprintk(
"%s Invalid layout error %d\n", __func__,
213 dprintk(
"%s DS connection error %d\n", __func__,
216 set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
221 dprintk(
"%s Retry through MDS. Error %d\n", __func__,
230 if (
test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
237 static int filelayout_read_done_cb(
struct rpc_task *task,
243 err = filelayout_async_handle_error(task, data->
args.context->state,
248 filelayout_reset_read(data);
268 if (FILELAYOUT_LSEG(hdr->
lseg)->commit_through_mds ||
273 dprintk(
"%s ionde %lu pls_end_pos %lu\n", __func__, hdr->
inode->i_ino,
274 (
unsigned long) NFS_I(hdr->
inode)->layout->plh_lwb);
280 return filelayout_test_devid_invalid(node) ||
287 struct nfs4_deviceid_node *
node = FILELAYOUT_DEVID_NODE(lseg);
297 static void filelayout_read_prepare(
struct rpc_task *task,
void *data)
301 if (filelayout_reset_to_mds(rdata->
header->lseg)) {
302 dprintk(
"%s task %u reset io to MDS\n", __func__, task->tk_pid);
303 filelayout_reset_read(rdata);
309 if (nfs41_setup_sequence(rdata->
ds_clp->cl_session,
310 &rdata->
args.seq_args, &rdata->
res.seq_res,
317 static void filelayout_read_call_done(
struct rpc_task *task,
void *data)
328 rdata->
header->mds_ops->rpc_call_done(task, data);
331 static void filelayout_read_count_stats(
struct rpc_task *task,
void *data)
338 static void filelayout_read_release(
void *data)
341 struct pnfs_layout_hdr *lo = rdata->
header->lseg->pls_layout;
343 filelayout_fenceme(lo->plh_inode, lo);
345 rdata->
header->mds_ops->rpc_release(data);
348 static int filelayout_write_done_cb(
struct rpc_task *task,
354 err = filelayout_async_handle_error(task, data->
args.context->state,
359 filelayout_reset_write(data);
366 filelayout_set_layoutcommit(data);
375 data->
task.tk_status = 0;
377 sizeof(data->
verf.verifier));
378 data->
verf.verifier.data[0]++;
381 static int filelayout_commit_done_cb(
struct rpc_task *task,
386 err = filelayout_async_handle_error(task,
NULL, data->
ds_clp,
391 prepare_to_resend_writes(data);
401 static void filelayout_write_prepare(
struct rpc_task *task,
void *data)
405 if (filelayout_reset_to_mds(wdata->
header->lseg)) {
406 dprintk(
"%s task %u reset io to MDS\n", __func__, task->tk_pid);
407 filelayout_reset_write(wdata);
411 if (nfs41_setup_sequence(wdata->
ds_clp->cl_session,
412 &wdata->
args.seq_args, &wdata->
res.seq_res,
419 static void filelayout_write_call_done(
struct rpc_task *task,
void *data)
428 wdata->
header->mds_ops->rpc_call_done(task, data);
431 static void filelayout_write_count_stats(
struct rpc_task *task,
void *data)
438 static void filelayout_write_release(
void *data)
441 struct pnfs_layout_hdr *lo = wdata->
header->lseg->pls_layout;
443 filelayout_fenceme(lo->plh_inode, lo);
445 wdata->
header->mds_ops->rpc_release(data);
448 static void filelayout_commit_prepare(
struct rpc_task *task,
void *data)
452 if (nfs41_setup_sequence(wdata->
ds_clp->cl_session,
453 &wdata->
args.seq_args, &wdata->
res.seq_res,
460 static void filelayout_write_commit_done(
struct rpc_task *task,
void *data)
465 wdata->
mds_ops->rpc_call_done(task, data);
468 static void filelayout_commit_count_stats(
struct rpc_task *task,
void *data)
475 static void filelayout_commit_release(
void *calldata)
485 static const struct rpc_call_ops filelayout_read_call_ops = {
486 .rpc_call_prepare = filelayout_read_prepare,
487 .rpc_call_done = filelayout_read_call_done,
488 .rpc_count_stats = filelayout_read_count_stats,
489 .rpc_release = filelayout_read_release,
492 static const struct rpc_call_ops filelayout_write_call_ops = {
493 .rpc_call_prepare = filelayout_write_prepare,
494 .rpc_call_done = filelayout_write_call_done,
495 .rpc_count_stats = filelayout_write_count_stats,
496 .rpc_release = filelayout_write_release,
499 static const struct rpc_call_ops filelayout_commit_call_ops = {
500 .rpc_call_prepare = filelayout_commit_prepare,
501 .rpc_call_done = filelayout_write_commit_done,
502 .rpc_count_stats = filelayout_commit_count_stats,
503 .rpc_release = filelayout_commit_release,
512 loff_t offset = data->
args.offset;
517 dprintk(
"--> %s ino %lu pgbase %u req %Zu@%llu\n",
518 __func__, hdr->
inode->i_ino,
519 data->
args.pgbase, (
size_t)data->
args.count, offset);
527 dprintk(
"%s USE DS: %s cl_count %d\n", __func__,
537 data->
args.offset = filelayout_get_dserver_offset(lseg, offset);
554 loff_t offset = data->
args.offset;
565 dprintk(
"%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
566 __func__, hdr->
inode->i_ino, sync, (
size_t) data->
args.count,
579 data->
args.offset = filelayout_get_dserver_offset(lseg, offset);
583 &filelayout_write_call_ops, sync,
598 filelayout_check_layout(
struct pnfs_layout_hdr *lo,
604 struct nfs4_deviceid_node *
d;
607 struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
612 if (lgr->
range.offset != 0 ||
614 dprintk(
"%s Only whole file layouts supported. Use MDS i/o\n",
620 dprintk(
"%s pattern_offset %lld too large\n",
626 dprintk(
"%s Invalid stripe unit (%u)\n",
633 NFS_SERVER(lo->plh_inode)->nfs_client,
id);
647 dprintk(
"%s Bad first_stripe_index %u\n",
656 dprintk(
"%s num_fh %u not valid for given packing\n",
662 dprintk(
"%s Stripe unit (%u) not aligned with rsize %u "
669 dprintk(
"--> %s returns %d\n", __func__, status);
680 for (i = 0; i < fl->
num_fh; i++) {
692 filelayout_free_fh_array(fl);
697 filelayout_decode_layout(
struct pnfs_layout_hdr *flo,
710 dprintk(
"%s: set_layout_map Begin\n", __func__);
725 memcpy(
id, p,
sizeof(*
id));
727 nfs4_print_deviceid(
id);
742 dprintk(
"%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
759 for (i = 0; i < fl->
num_fh; i++) {
779 dprintk(
"DEBUG: %s: fh len %d\n", __func__,
787 filelayout_free_fh_array(fl);
809 _filelayout_free_lseg(fl);
818 struct pnfs_commit_bucket *buckets;
823 if (cinfo->
ds->nbuckets != 0) {
836 buckets = kcalloc(size,
sizeof(
struct pnfs_commit_bucket),
843 spin_lock(cinfo->
lock);
844 if (cinfo->
ds->nbuckets != 0)
847 cinfo->
ds->buckets = buckets;
848 cinfo->
ds->nbuckets =
size;
849 for (i = 0; i <
size; i++) {
850 INIT_LIST_HEAD(&buckets[i].written);
851 INIT_LIST_HEAD(&buckets[i].committing);
854 spin_unlock(cinfo->
lock);
860 filelayout_alloc_lseg(
struct pnfs_layout_hdr *layoutid,
869 fl = kzalloc(
sizeof(*fl), gfp_flags);
873 rc = filelayout_decode_layout(layoutid, fl, lgr, &
id, gfp_flags);
874 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &
id, gfp_flags)) {
875 _filelayout_free_lseg(fl);
891 u64 p_stripe, r_stripe;
898 p_stripe = (
u64)req_offset(prev);
899 r_stripe = (
u64)req_offset(req);
900 stripe_unit = FILELAYOUT_LSEG(pgio->
pg_lseg)->stripe_unit;
902 do_div(p_stripe, stripe_unit);
903 do_div(r_stripe, stripe_unit);
905 return (p_stripe == r_stripe);
956 status = filelayout_alloc_commit_info(pgio->
pg_lseg, &cinfo,
GFP_NOFS);
968 .pg_init = filelayout_pg_init_read,
969 .pg_test = filelayout_pg_test,
974 .pg_init = filelayout_pg_init_write,
975 .pg_test = filelayout_pg_test,
991 filelayout_clear_request_commit(
struct nfs_page *req,
996 spin_lock(cinfo->
lock);
999 cinfo->
ds->nwritten--;
1000 if (list_is_singular(&req->
wb_list)) {
1001 struct pnfs_commit_bucket *bucket;
1004 struct pnfs_commit_bucket,
1006 freeme = bucket->wlseg;
1007 bucket->wlseg =
NULL;
1011 spin_unlock(cinfo->
lock);
1016 filelayout_choose_commit_list(
struct nfs_page *req,
1023 struct pnfs_commit_bucket *buckets;
1026 return &cinfo->
mds->list;
1035 i = select_bucket_index(fl, j);
1036 buckets = cinfo->
ds->buckets;
1037 list = &buckets[
i].written;
1038 if (list_empty(list)) {
1045 buckets[
i].wlseg = pnfs_get_lseg(lseg);
1048 cinfo->
ds->nwritten++;
1053 filelayout_mark_request_commit(
struct nfs_page *req,
1059 list = filelayout_choose_commit_list(req, lseg, cinfo);
1081 else if (flseg->
num_fh == 0)
1088 static int filelayout_initiate_commit(
struct nfs_commit_data *data,
int how)
1098 prepare_to_resend_writes(data);
1099 filelayout_commit_release(data);
1102 dprintk(
"%s ino %lu, how %d cl_count %d\n", __func__,
1111 &filelayout_commit_call_ops, how,
1123 if (!nfs_lock_request(req))
1130 nfs_list_add_request(req, dst);
1132 if ((ret == max) && !cinfo->
dreq)
1139 filelayout_scan_ds_commit_list(
struct pnfs_commit_bucket *bucket,
1143 struct list_head *src = &bucket->written;
1144 struct list_head *dst = &bucket->committing;
1147 ret = transfer_commit_list(src, dst, cinfo, max);
1149 cinfo->
ds->nwritten -=
ret;
1150 cinfo->
ds->ncommitting +=
ret;
1151 bucket->clseg = bucket->wlseg;
1152 if (list_empty(src))
1153 bucket->wlseg =
NULL;
1155 pnfs_get_lseg(bucket->clseg);
1163 static int filelayout_scan_commit_lists(
struct nfs_commit_info *cinfo,
1168 for (i = 0; i < cinfo->
ds->nbuckets && max != 0; i++) {
1169 cnt = filelayout_scan_ds_commit_list(&cinfo->
ds->buckets[i],
1178 static void filelayout_recover_commit_reqs(
struct list_head *dst,
1181 struct pnfs_commit_bucket *
b;
1188 for (i = 0, b = cinfo->
ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1189 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1190 BUG_ON(!list_empty(&b->written));
1195 cinfo->
ds->nwritten = 0;
1202 struct pnfs_commit_bucket *bucket;
1205 unsigned int nreq = 0;
1207 fl_cinfo = cinfo->
ds;
1208 bucket = fl_cinfo->buckets;
1209 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
1210 if (list_empty(&bucket->committing))
1216 data->
lseg = bucket->clseg;
1217 bucket->clseg =
NULL;
1218 list_add(&data->
pages, list);
1223 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
1224 if (list_empty(&bucket->committing))
1228 bucket->clseg =
NULL;
1236 filelayout_commit_pagelist(
struct inode *inode,
struct list_head *mds_pages,
1241 unsigned int nreq = 0;
1243 if (!list_empty(mds_pages)) {
1247 list_add(&data->
pages, &list);
1253 nreq += alloc_ds_commits(cinfo, &list);
1263 list_del_init(&data->
pages);
1269 struct pnfs_commit_bucket *buckets;
1271 buckets = cinfo->
ds->buckets;
1273 filelayout_initiate_commit(data, how);
1277 cinfo->
ds->ncommitting = 0;
1282 filelayout_free_deveiceid_node(
struct nfs4_deviceid_node *d)
1287 static struct pnfs_layout_hdr *
1288 filelayout_alloc_layout_hdr(
struct inode *inode,
gfp_t gfp_flags)
1292 flo = kzalloc(
sizeof(*flo), gfp_flags);
1297 filelayout_free_layout_hdr(
struct pnfs_layout_hdr *lo)
1299 kfree(FILELAYOUT_FROM_HDR(lo));
1303 filelayout_get_ds_info(
struct inode *inode)
1305 struct pnfs_layout_hdr *
layout = NFS_I(inode)->layout;
1310 return &FILELAYOUT_FROM_HDR(layout)->commit_info;
1313 static struct pnfs_layoutdriver_type filelayout_type = {
1315 .name =
"LAYOUT_NFSV4_1_FILES",
1317 .alloc_layout_hdr = filelayout_alloc_layout_hdr,
1318 .free_layout_hdr = filelayout_free_layout_hdr,
1319 .alloc_lseg = filelayout_alloc_lseg,
1320 .free_lseg = filelayout_free_lseg,
1321 .pg_read_ops = &filelayout_pg_read_ops,
1322 .pg_write_ops = &filelayout_pg_write_ops,
1323 .get_ds_info = &filelayout_get_ds_info,
1324 .mark_request_commit = filelayout_mark_request_commit,
1325 .clear_request_commit = filelayout_clear_request_commit,
1326 .scan_commit_lists = filelayout_scan_commit_lists,
1327 .recover_commit_reqs = filelayout_recover_commit_reqs,
1328 .commit_pagelist = filelayout_commit_pagelist,
1329 .read_pagelist = filelayout_read_pagelist,
1330 .write_pagelist = filelayout_write_pagelist,
1331 .free_deviceid_node = filelayout_free_deveiceid_node,
1334 static int __init nfs4filelayout_init(
void)
1341 static void __exit nfs4filelayout_exit(
void)