Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xprt.c
Go to the documentation of this file.
1 /*
2  * linux/net/sunrpc/xprt.c
3  *
4  * This is a generic RPC call interface supporting congestion avoidance,
5  * and asynchronous calls.
6  *
7  * The interface works like this:
8  *
9  * - When a process places a call, it allocates a request slot if
10  * one is available. Otherwise, it sleeps on the backlog queue
11  * (xprt_reserve).
12  * - Next, the caller puts together the RPC message, stuffs it into
13  * the request struct, and calls xprt_transmit().
14  * - xprt_transmit sends the message and installs the caller on the
15  * transport's wait list. At the same time, if a reply is expected,
16  * it installs a timer that is run after the packet's timeout has
17  * expired.
18  * - When a packet arrives, the data_ready handler walks the list of
19  * pending requests for that transport. If a matching XID is found, the
20  * caller is woken up, and the timer removed.
21  * - When no reply arrives within the timeout interval, the timer is
22  * fired by the kernel and runs xprt_timer(). It either adjusts the
23  * timeout values (minor timeout) or wakes up the caller with a status
24  * of -ETIMEDOUT.
25  * - When the caller receives a notification from RPC that a reply arrived,
26  * it should release the RPC slot, and process the reply.
27  * If the call timed out, it may choose to retry the operation by
28  * adjusting the initial timeout value, and simply calling rpc_call
29  * again.
30  *
31  * Support for async RPC is done through a set of RPC-specific scheduling
32  * primitives that `transparently' work for processes as well as async
33  * tasks that rely on callbacks.
34  *
35  * Copyright (C) 1995-1997, Olaf Kirch <[email protected]>
36  *
37  * Transport switch API copyright (C) 2005, Chuck Lever <[email protected]>
38  */
39 
40 #include <linux/module.h>
41 
42 #include <linux/types.h>
43 #include <linux/interrupt.h>
44 #include <linux/workqueue.h>
45 #include <linux/net.h>
46 #include <linux/ktime.h>
47 
48 #include <linux/sunrpc/clnt.h>
49 #include <linux/sunrpc/metrics.h>
50 #include <linux/sunrpc/bc_xprt.h>
51 
52 #include "sunrpc.h"
53 
54 /*
55  * Local variables
56  */
57 
58 #ifdef RPC_DEBUG
59 # define RPCDBG_FACILITY RPCDBG_XPRT
60 #endif
61 
62 /*
63  * Local functions
64  */
65 static void xprt_init(struct rpc_xprt *xprt, struct net *net);
66 static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
67 static void xprt_connect_status(struct rpc_task *task);
68 static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
69 static void xprt_destroy(struct rpc_xprt *xprt);
70 
71 static DEFINE_SPINLOCK(xprt_list_lock);
72 static LIST_HEAD(xprt_list);
73 
74 /*
75  * The transport code maintains an estimate on the maximum number of out-
76  * standing RPC requests, using a smoothed version of the congestion
77  * avoidance implemented in 44BSD. This is basically the Van Jacobson
78  * congestion algorithm: If a retransmit occurs, the congestion window is
79  * halved; otherwise, it is incremented by 1/cwnd when
80  *
81  * - a reply is received and
82  * - a full number of requests are outstanding and
83  * - the congestion window hasn't been updated recently.
84  */
85 #define RPC_CWNDSHIFT (8U)
86 #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT)
87 #define RPC_INITCWND RPC_CWNDSCALE
88 #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
89 
90 #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
91 
104 int xprt_register_transport(struct xprt_class *transport)
105 {
106  struct xprt_class *t;
107  int result;
108 
109  result = -EEXIST;
110  spin_lock(&xprt_list_lock);
111  list_for_each_entry(t, &xprt_list, list) {
112  /* don't register the same transport class twice */
113  if (t->ident == transport->ident)
114  goto out;
115  }
116 
117  list_add_tail(&transport->list, &xprt_list);
118  printk(KERN_INFO "RPC: Registered %s transport module.\n",
119  transport->name);
120  result = 0;
121 
122 out:
123  spin_unlock(&xprt_list_lock);
124  return result;
125 }
127 
136 int xprt_unregister_transport(struct xprt_class *transport)
137 {
138  struct xprt_class *t;
139  int result;
140 
141  result = 0;
142  spin_lock(&xprt_list_lock);
143  list_for_each_entry(t, &xprt_list, list) {
144  if (t == transport) {
146  "RPC: Unregistered %s transport module.\n",
147  transport->name);
148  list_del_init(&transport->list);
149  goto out;
150  }
151  }
152  result = -ENOENT;
153 
154 out:
155  spin_unlock(&xprt_list_lock);
156  return result;
157 }
159 
169 {
170  struct xprt_class *t;
171  int result;
172 
173  result = 0;
174  spin_lock(&xprt_list_lock);
175  list_for_each_entry(t, &xprt_list, list) {
176  if (strcmp(t->name, transport_name) == 0) {
177  spin_unlock(&xprt_list_lock);
178  goto out;
179  }
180  }
181  spin_unlock(&xprt_list_lock);
182  result = request_module("xprt%s", transport_name);
183 out:
184  return result;
185 }
187 
197 int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
198 {
199  struct rpc_rqst *req = task->tk_rqstp;
200  int priority;
201 
202  if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
203  if (task == xprt->snd_task)
204  return 1;
205  goto out_sleep;
206  }
207  xprt->snd_task = task;
208  if (req != NULL) {
209  req->rq_bytes_sent = 0;
210  req->rq_ntrans++;
211  }
212 
213  return 1;
214 
215 out_sleep:
216  dprintk("RPC: %5u failed to lock transport %p\n",
217  task->tk_pid, xprt);
218  task->tk_timeout = 0;
219  task->tk_status = -EAGAIN;
220  if (req == NULL)
221  priority = RPC_PRIORITY_LOW;
222  else if (!req->rq_ntrans)
223  priority = RPC_PRIORITY_NORMAL;
224  else
225  priority = RPC_PRIORITY_HIGH;
226  rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
227  return 0;
228 }
230 
231 static void xprt_clear_locked(struct rpc_xprt *xprt)
232 {
233  xprt->snd_task = NULL;
234  if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
236  clear_bit(XPRT_LOCKED, &xprt->state);
238  } else
239  queue_work(rpciod_workqueue, &xprt->task_cleanup);
240 }
241 
242 /*
243  * xprt_reserve_xprt_cong - serialize write access to transports
244  * @task: task that is requesting access to the transport
245  *
246  * Same as xprt_reserve_xprt, but Van Jacobson congestion control is
247  * integrated into the decision of whether a request is allowed to be
248  * woken up and given access to the transport.
249  */
250 int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
251 {
252  struct rpc_rqst *req = task->tk_rqstp;
253  int priority;
254 
255  if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
256  if (task == xprt->snd_task)
257  return 1;
258  goto out_sleep;
259  }
260  if (req == NULL) {
261  xprt->snd_task = task;
262  return 1;
263  }
264  if (__xprt_get_cong(xprt, task)) {
265  xprt->snd_task = task;
266  req->rq_bytes_sent = 0;
267  req->rq_ntrans++;
268  return 1;
269  }
270  xprt_clear_locked(xprt);
271 out_sleep:
272  dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
273  task->tk_timeout = 0;
274  task->tk_status = -EAGAIN;
275  if (req == NULL)
276  priority = RPC_PRIORITY_LOW;
277  else if (!req->rq_ntrans)
278  priority = RPC_PRIORITY_NORMAL;
279  else
280  priority = RPC_PRIORITY_HIGH;
281  rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
282  return 0;
283 }
285 
286 static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
287 {
288  int retval;
289 
290  spin_lock_bh(&xprt->transport_lock);
291  retval = xprt->ops->reserve_xprt(xprt, task);
292  spin_unlock_bh(&xprt->transport_lock);
293  return retval;
294 }
295 
296 static bool __xprt_lock_write_func(struct rpc_task *task, void *data)
297 {
298  struct rpc_xprt *xprt = data;
299  struct rpc_rqst *req;
300 
301  req = task->tk_rqstp;
302  xprt->snd_task = task;
303  if (req) {
304  req->rq_bytes_sent = 0;
305  req->rq_ntrans++;
306  }
307  return true;
308 }
309 
310 static void __xprt_lock_write_next(struct rpc_xprt *xprt)
311 {
312  if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
313  return;
314 
315  if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_func, xprt))
316  return;
317  xprt_clear_locked(xprt);
318 }
319 
320 static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
321 {
322  struct rpc_xprt *xprt = data;
323  struct rpc_rqst *req;
324 
325  req = task->tk_rqstp;
326  if (req == NULL) {
327  xprt->snd_task = task;
328  return true;
329  }
330  if (__xprt_get_cong(xprt, task)) {
331  xprt->snd_task = task;
332  req->rq_bytes_sent = 0;
333  req->rq_ntrans++;
334  return true;
335  }
336  return false;
337 }
338 
339 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
340 {
341  if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
342  return;
343  if (RPCXPRT_CONGESTED(xprt))
344  goto out_unlock;
345  if (rpc_wake_up_first(&xprt->sending, __xprt_lock_write_cong_func, xprt))
346  return;
347 out_unlock:
348  xprt_clear_locked(xprt);
349 }
350 
358 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
359 {
360  if (xprt->snd_task == task) {
361  xprt_clear_locked(xprt);
362  __xprt_lock_write_next(xprt);
363  }
364 }
366 
375 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
376 {
377  if (xprt->snd_task == task) {
378  xprt_clear_locked(xprt);
379  __xprt_lock_write_next_cong(xprt);
380  }
381 }
383 
384 static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
385 {
386  spin_lock_bh(&xprt->transport_lock);
387  xprt->ops->release_xprt(xprt, task);
388  spin_unlock_bh(&xprt->transport_lock);
389 }
390 
391 /*
392  * Van Jacobson congestion avoidance. Check if the congestion window
393  * overflowed. Put the task to sleep if this is the case.
394  */
395 static int
396 __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
397 {
398  struct rpc_rqst *req = task->tk_rqstp;
399 
400  if (req->rq_cong)
401  return 1;
402  dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
403  task->tk_pid, xprt->cong, xprt->cwnd);
404  if (RPCXPRT_CONGESTED(xprt))
405  return 0;
406  req->rq_cong = 1;
407  xprt->cong += RPC_CWNDSCALE;
408  return 1;
409 }
410 
411 /*
412  * Adjust the congestion window, and wake up the next task
413  * that has been sleeping due to congestion
414  */
415 static void
416 __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
417 {
418  if (!req->rq_cong)
419  return;
420  req->rq_cong = 0;
421  xprt->cong -= RPC_CWNDSCALE;
422  __xprt_lock_write_next_cong(xprt);
423 }
424 
432 {
433  __xprt_put_cong(task->tk_xprt, task->tk_rqstp);
434 }
436 
444 void xprt_adjust_cwnd(struct rpc_task *task, int result)
445 {
446  struct rpc_rqst *req = task->tk_rqstp;
447  struct rpc_xprt *xprt = task->tk_xprt;
448  unsigned long cwnd = xprt->cwnd;
449 
450  if (result >= 0 && cwnd <= xprt->cong) {
451  /* The (cwnd >> 1) term makes sure
452  * the result gets rounded properly. */
453  cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
454  if (cwnd > RPC_MAXCWND(xprt))
455  cwnd = RPC_MAXCWND(xprt);
456  __xprt_lock_write_next_cong(xprt);
457  } else if (result == -ETIMEDOUT) {
458  cwnd >>= 1;
459  if (cwnd < RPC_CWNDSCALE)
460  cwnd = RPC_CWNDSCALE;
461  }
462  dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n",
463  xprt->cong, xprt->cwnd, cwnd);
464  xprt->cwnd = cwnd;
465  __xprt_put_cong(xprt, req);
466 }
468 
475 void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status)
476 {
477  if (status < 0)
478  rpc_wake_up_status(&xprt->pending, status);
479  else
480  rpc_wake_up(&xprt->pending);
481 }
483 
490 {
491  struct rpc_rqst *req = task->tk_rqstp;
492  struct rpc_xprt *xprt = req->rq_xprt;
493 
494  task->tk_timeout = req->rq_timeout;
495  rpc_sleep_on(&xprt->pending, task, action);
496 }
498 
505 void xprt_write_space(struct rpc_xprt *xprt)
506 {
507  spin_lock_bh(&xprt->transport_lock);
508  if (xprt->snd_task) {
509  dprintk("RPC: write space: waking waiting task on "
510  "xprt %p\n", xprt);
511  rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task);
512  }
513  spin_unlock_bh(&xprt->transport_lock);
514 }
516 
526 {
527  task->tk_timeout = task->tk_rqstp->rq_timeout;
528 }
530 
538 {
539  int timer = task->tk_msg.rpc_proc->p_timer;
540  struct rpc_clnt *clnt = task->tk_client;
541  struct rpc_rtt *rtt = clnt->cl_rtt;
542  struct rpc_rqst *req = task->tk_rqstp;
543  unsigned long max_timeout = clnt->cl_timeout->to_maxval;
544 
545  task->tk_timeout = rpc_calc_rto(rtt, timer);
546  task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
547  if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
548  task->tk_timeout = max_timeout;
549 }
551 
552 static void xprt_reset_majortimeo(struct rpc_rqst *req)
553 {
554  const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
555 
556  req->rq_majortimeo = req->rq_timeout;
557  if (to->to_exponential)
558  req->rq_majortimeo <<= to->to_retries;
559  else
560  req->rq_majortimeo += to->to_increment * to->to_retries;
561  if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0)
562  req->rq_majortimeo = to->to_maxval;
563  req->rq_majortimeo += jiffies;
564 }
565 
571 int xprt_adjust_timeout(struct rpc_rqst *req)
572 {
573  struct rpc_xprt *xprt = req->rq_xprt;
574  const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
575  int status = 0;
576 
577  if (time_before(jiffies, req->rq_majortimeo)) {
578  if (to->to_exponential)
579  req->rq_timeout <<= 1;
580  else
581  req->rq_timeout += to->to_increment;
582  if (to->to_maxval && req->rq_timeout >= to->to_maxval)
583  req->rq_timeout = to->to_maxval;
584  req->rq_retries++;
585  } else {
586  req->rq_timeout = to->to_initval;
587  req->rq_retries = 0;
588  xprt_reset_majortimeo(req);
589  /* Reset the RTT counters == "slow start" */
590  spin_lock_bh(&xprt->transport_lock);
591  rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
592  spin_unlock_bh(&xprt->transport_lock);
593  status = -ETIMEDOUT;
594  }
595 
596  if (req->rq_timeout == 0) {
597  printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n");
598  req->rq_timeout = 5 * HZ;
599  }
600  return status;
601 }
602 
603 static void xprt_autoclose(struct work_struct *work)
604 {
605  struct rpc_xprt *xprt =
606  container_of(work, struct rpc_xprt, task_cleanup);
607 
608  xprt->ops->close(xprt);
609  clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
610  xprt_release_write(xprt, NULL);
611 }
612 
618 void xprt_disconnect_done(struct rpc_xprt *xprt)
619 {
620  dprintk("RPC: disconnected transport %p\n", xprt);
621  spin_lock_bh(&xprt->transport_lock);
622  xprt_clear_connected(xprt);
624  spin_unlock_bh(&xprt->transport_lock);
625 }
627 
633 void xprt_force_disconnect(struct rpc_xprt *xprt)
634 {
635  /* Don't race with the test_bit() in xprt_clear_locked() */
636  spin_lock_bh(&xprt->transport_lock);
637  set_bit(XPRT_CLOSE_WAIT, &xprt->state);
638  /* Try to schedule an autoclose RPC call */
639  if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
640  queue_work(rpciod_workqueue, &xprt->task_cleanup);
642  spin_unlock_bh(&xprt->transport_lock);
643 }
644 
656 void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
657 {
658  /* Don't race with the test_bit() in xprt_clear_locked() */
659  spin_lock_bh(&xprt->transport_lock);
660  if (cookie != xprt->connect_cookie)
661  goto out;
662  if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt))
663  goto out;
664  set_bit(XPRT_CLOSE_WAIT, &xprt->state);
665  /* Try to schedule an autoclose RPC call */
666  if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
667  queue_work(rpciod_workqueue, &xprt->task_cleanup);
669 out:
670  spin_unlock_bh(&xprt->transport_lock);
671 }
672 
673 static void
674 xprt_init_autodisconnect(unsigned long data)
675 {
676  struct rpc_xprt *xprt = (struct rpc_xprt *)data;
677 
678  spin_lock(&xprt->transport_lock);
679  if (!list_empty(&xprt->recv))
680  goto out_abort;
681  if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
682  goto out_abort;
683  spin_unlock(&xprt->transport_lock);
684  set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
685  queue_work(rpciod_workqueue, &xprt->task_cleanup);
686  return;
687 out_abort:
688  spin_unlock(&xprt->transport_lock);
689 }
690 
696 void xprt_connect(struct rpc_task *task)
697 {
698  struct rpc_xprt *xprt = task->tk_xprt;
699 
700  dprintk("RPC: %5u xprt_connect xprt %p %s connected\n", task->tk_pid,
701  xprt, (xprt_connected(xprt) ? "is" : "is not"));
702 
703  if (!xprt_bound(xprt)) {
704  task->tk_status = -EAGAIN;
705  return;
706  }
707  if (!xprt_lock_write(xprt, task))
708  return;
709 
710  if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state))
711  xprt->ops->close(xprt);
712 
713  if (xprt_connected(xprt))
714  xprt_release_write(xprt, task);
715  else {
716  task->tk_rqstp->rq_bytes_sent = 0;
717  task->tk_timeout = task->tk_rqstp->rq_timeout;
718  rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
719 
720  if (test_bit(XPRT_CLOSING, &xprt->state))
721  return;
722  if (xprt_test_and_set_connecting(xprt))
723  return;
724  xprt->stat.connect_start = jiffies;
725  xprt->ops->connect(task);
726  }
727 }
728 
729 static void xprt_connect_status(struct rpc_task *task)
730 {
731  struct rpc_xprt *xprt = task->tk_xprt;
732 
733  if (task->tk_status == 0) {
734  xprt->stat.connect_count++;
735  xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
736  dprintk("RPC: %5u xprt_connect_status: connection established\n",
737  task->tk_pid);
738  return;
739  }
740 
741  switch (task->tk_status) {
742  case -EAGAIN:
743  dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
744  break;
745  case -ETIMEDOUT:
746  dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
747  "out\n", task->tk_pid);
748  break;
749  default:
750  dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
751  "server %s\n", task->tk_pid, -task->tk_status,
752  xprt->servername);
753  xprt_release_write(xprt, task);
754  task->tk_status = -EIO;
755  }
756 }
757 
764 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
765 {
766  struct rpc_rqst *entry;
767 
768  list_for_each_entry(entry, &xprt->recv, rq_list)
769  if (entry->rq_xid == xid)
770  return entry;
771 
772  dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
773  ntohl(xid));
774  xprt->stat.bad_xids++;
775  return NULL;
776 }
778 
779 static void xprt_update_rtt(struct rpc_task *task)
780 {
781  struct rpc_rqst *req = task->tk_rqstp;
782  struct rpc_rtt *rtt = task->tk_client->cl_rtt;
783  unsigned int timer = task->tk_msg.rpc_proc->p_timer;
784  long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt));
785 
786  if (timer) {
787  if (req->rq_ntrans == 1)
788  rpc_update_rtt(rtt, timer, m);
789  rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
790  }
791 }
792 
800 void xprt_complete_rqst(struct rpc_task *task, int copied)
801 {
802  struct rpc_rqst *req = task->tk_rqstp;
803  struct rpc_xprt *xprt = req->rq_xprt;
804 
805  dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
806  task->tk_pid, ntohl(req->rq_xid), copied);
807 
808  xprt->stat.recvs++;
809  req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
810  if (xprt->ops->timer != NULL)
811  xprt_update_rtt(task);
812 
813  list_del_init(&req->rq_list);
814  req->rq_private_buf.len = copied;
815  /* Ensure all writes are done before we update */
816  /* req->rq_reply_bytes_recvd */
817  smp_wmb();
818  req->rq_reply_bytes_recvd = copied;
819  rpc_wake_up_queued_task(&xprt->pending, task);
820 }
822 
823 static void xprt_timer(struct rpc_task *task)
824 {
825  struct rpc_rqst *req = task->tk_rqstp;
826  struct rpc_xprt *xprt = req->rq_xprt;
827 
828  if (task->tk_status != -ETIMEDOUT)
829  return;
830  dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
831 
832  spin_lock_bh(&xprt->transport_lock);
833  if (!req->rq_reply_bytes_recvd) {
834  if (xprt->ops->timer)
835  xprt->ops->timer(task);
836  } else
837  task->tk_status = 0;
838  spin_unlock_bh(&xprt->transport_lock);
839 }
840 
841 static inline int xprt_has_timer(struct rpc_xprt *xprt)
842 {
843  return xprt->idle_timeout != 0;
844 }
845 
852 {
853  struct rpc_rqst *req = task->tk_rqstp;
854  struct rpc_xprt *xprt = req->rq_xprt;
855  int err = 0;
856 
857  dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
858 
859  spin_lock_bh(&xprt->transport_lock);
860  if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) {
861  err = req->rq_reply_bytes_recvd;
862  goto out_unlock;
863  }
864  if (!xprt->ops->reserve_xprt(xprt, task))
865  err = -EAGAIN;
866 out_unlock:
867  spin_unlock_bh(&xprt->transport_lock);
868  return err;
869 }
870 
871 void xprt_end_transmit(struct rpc_task *task)
872 {
873  xprt_release_write(task->tk_rqstp->rq_xprt, task);
874 }
875 
882 void xprt_transmit(struct rpc_task *task)
883 {
884  struct rpc_rqst *req = task->tk_rqstp;
885  struct rpc_xprt *xprt = req->rq_xprt;
886  int status, numreqs;
887 
888  dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
889 
890  if (!req->rq_reply_bytes_recvd) {
891  if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
892  /*
893  * Add to the list only if we're expecting a reply
894  */
895  spin_lock_bh(&xprt->transport_lock);
896  /* Update the softirq receive buffer */
897  memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
898  sizeof(req->rq_private_buf));
899  /* Add request to the receive list */
900  list_add_tail(&req->rq_list, &xprt->recv);
901  spin_unlock_bh(&xprt->transport_lock);
902  xprt_reset_majortimeo(req);
903  /* Turn off autodisconnect */
904  del_singleshot_timer_sync(&xprt->timer);
905  }
906  } else if (!req->rq_bytes_sent)
907  return;
908 
909  req->rq_connect_cookie = xprt->connect_cookie;
910  req->rq_xtime = ktime_get();
911  status = xprt->ops->send_request(task);
912  if (status != 0) {
913  task->tk_status = status;
914  return;
915  }
916 
917  dprintk("RPC: %5u xmit complete\n", task->tk_pid);
918  task->tk_flags |= RPC_TASK_SENT;
919  spin_lock_bh(&xprt->transport_lock);
920 
921  xprt->ops->set_retrans_timeout(task);
922 
923  numreqs = atomic_read(&xprt->num_reqs);
924  if (numreqs > xprt->stat.max_slots)
925  xprt->stat.max_slots = numreqs;
926  xprt->stat.sends++;
927  xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
928  xprt->stat.bklog_u += xprt->backlog.qlen;
929  xprt->stat.sending_u += xprt->sending.qlen;
930  xprt->stat.pending_u += xprt->pending.qlen;
931 
932  /* Don't race with disconnect */
933  if (!xprt_connected(xprt))
934  task->tk_status = -ENOTCONN;
935  else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) {
936  /*
937  * Sleep on the pending queue since
938  * we're expecting a reply.
939  */
940  rpc_sleep_on(&xprt->pending, task, xprt_timer);
941  }
942  spin_unlock_bh(&xprt->transport_lock);
943 }
944 
945 static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
946 {
947  struct rpc_rqst *req = ERR_PTR(-EAGAIN);
948 
949  if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
950  goto out;
951  req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
952  if (req != NULL)
953  goto out;
954  atomic_dec(&xprt->num_reqs);
955  req = ERR_PTR(-ENOMEM);
956 out:
957  return req;
958 }
959 
960 static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
961 {
962  if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
963  kfree(req);
964  return true;
965  }
966  return false;
967 }
968 
969 void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
970 {
971  struct rpc_rqst *req;
972 
973  spin_lock(&xprt->reserve_lock);
974  if (!list_empty(&xprt->free)) {
975  req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
976  list_del(&req->rq_list);
977  goto out_init_req;
978  }
979  req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN);
980  if (!IS_ERR(req))
981  goto out_init_req;
982  switch (PTR_ERR(req)) {
983  case -ENOMEM:
984  dprintk("RPC: dynamic allocation of request slot "
985  "failed! Retrying\n");
986  task->tk_status = -ENOMEM;
987  break;
988  case -EAGAIN:
989  rpc_sleep_on(&xprt->backlog, task, NULL);
990  dprintk("RPC: waiting for request slot\n");
991  default:
992  task->tk_status = -EAGAIN;
993  }
994  spin_unlock(&xprt->reserve_lock);
995  return;
996 out_init_req:
997  task->tk_status = 0;
998  task->tk_rqstp = req;
999  xprt_request_init(task, xprt);
1000  spin_unlock(&xprt->reserve_lock);
1001 }
1003 
1004 void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
1005 {
1006  /* Note: grabbing the xprt_lock_write() ensures that we throttle
1007  * new slot allocation if the transport is congested (i.e. when
1008  * reconnecting a stream transport or when out of socket write
1009  * buffer space).
1010  */
1011  if (xprt_lock_write(xprt, task)) {
1012  xprt_alloc_slot(xprt, task);
1013  xprt_release_write(xprt, task);
1014  }
1015 }
1017 
1018 static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
1019 {
1020  spin_lock(&xprt->reserve_lock);
1021  if (!xprt_dynamic_free_slot(xprt, req)) {
1022  memset(req, 0, sizeof(*req)); /* mark unused */
1023  list_add(&req->rq_list, &xprt->free);
1024  }
1025  rpc_wake_up_next(&xprt->backlog);
1026  spin_unlock(&xprt->reserve_lock);
1027 }
1028 
1029 static void xprt_free_all_slots(struct rpc_xprt *xprt)
1030 {
1031  struct rpc_rqst *req;
1032  while (!list_empty(&xprt->free)) {
1033  req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list);
1034  list_del(&req->rq_list);
1035  kfree(req);
1036  }
1037 }
1038 
1039 struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
1040  unsigned int num_prealloc,
1041  unsigned int max_alloc)
1042 {
1043  struct rpc_xprt *xprt;
1044  struct rpc_rqst *req;
1045  int i;
1046 
1047  xprt = kzalloc(size, GFP_KERNEL);
1048  if (xprt == NULL)
1049  goto out;
1050 
1051  xprt_init(xprt, net);
1052 
1053  for (i = 0; i < num_prealloc; i++) {
1054  req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
1055  if (!req)
1056  break;
1057  list_add(&req->rq_list, &xprt->free);
1058  }
1059  if (i < num_prealloc)
1060  goto out_free;
1061  if (max_alloc > num_prealloc)
1062  xprt->max_reqs = max_alloc;
1063  else
1064  xprt->max_reqs = num_prealloc;
1065  xprt->min_reqs = num_prealloc;
1066  atomic_set(&xprt->num_reqs, num_prealloc);
1067 
1068  return xprt;
1069 
1070 out_free:
1071  xprt_free(xprt);
1072 out:
1073  return NULL;
1074 }
1076 
1077 void xprt_free(struct rpc_xprt *xprt)
1078 {
1079  put_net(xprt->xprt_net);
1080  xprt_free_all_slots(xprt);
1081  kfree(xprt);
1082 }
1084 
1092 void xprt_reserve(struct rpc_task *task)
1093 {
1094  struct rpc_xprt *xprt = task->tk_xprt;
1095 
1096  task->tk_status = 0;
1097  if (task->tk_rqstp != NULL)
1098  return;
1099 
1100  task->tk_timeout = 0;
1101  task->tk_status = -EAGAIN;
1102  xprt->ops->alloc_slot(xprt, task);
1103 }
1104 
1105 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
1106 {
1107  return (__force __be32)xprt->xid++;
1108 }
1109 
1110 static inline void xprt_init_xid(struct rpc_xprt *xprt)
1111 {
1112  xprt->xid = net_random();
1113 }
1114 
1115 static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
1116 {
1117  struct rpc_rqst *req = task->tk_rqstp;
1118 
1119  INIT_LIST_HEAD(&req->rq_list);
1120  req->rq_timeout = task->tk_client->cl_timeout->to_initval;
1121  req->rq_task = task;
1122  req->rq_xprt = xprt;
1123  req->rq_buffer = NULL;
1124  req->rq_xid = xprt_alloc_xid(xprt);
1125  req->rq_release_snd_buf = NULL;
1126  xprt_reset_majortimeo(req);
1127  dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
1128  req, ntohl(req->rq_xid));
1129 }
1130 
1136 void xprt_release(struct rpc_task *task)
1137 {
1138  struct rpc_xprt *xprt;
1139  struct rpc_rqst *req;
1140 
1141  if (!(req = task->tk_rqstp))
1142  return;
1143 
1144  xprt = req->rq_xprt;
1145  if (task->tk_ops->rpc_count_stats != NULL)
1146  task->tk_ops->rpc_count_stats(task, task->tk_calldata);
1147  else if (task->tk_client)
1148  rpc_count_iostats(task, task->tk_client->cl_metrics);
1149  spin_lock_bh(&xprt->transport_lock);
1150  xprt->ops->release_xprt(xprt, task);
1151  if (xprt->ops->release_request)
1152  xprt->ops->release_request(task);
1153  if (!list_empty(&req->rq_list))
1154  list_del(&req->rq_list);
1155  xprt->last_used = jiffies;
1156  if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
1157  mod_timer(&xprt->timer,
1158  xprt->last_used + xprt->idle_timeout);
1159  spin_unlock_bh(&xprt->transport_lock);
1160  if (req->rq_buffer)
1161  xprt->ops->buf_free(req->rq_buffer);
1162  if (req->rq_cred != NULL)
1163  put_rpccred(req->rq_cred);
1164  task->tk_rqstp = NULL;
1165  if (req->rq_release_snd_buf)
1166  req->rq_release_snd_buf(req);
1167 
1168  dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
1169  if (likely(!bc_prealloc(req)))
1170  xprt_free_slot(xprt, req);
1171  else
1172  xprt_free_bc_request(req);
1173 }
1174 
1175 static void xprt_init(struct rpc_xprt *xprt, struct net *net)
1176 {
1177  atomic_set(&xprt->count, 1);
1178 
1179  spin_lock_init(&xprt->transport_lock);
1180  spin_lock_init(&xprt->reserve_lock);
1181 
1182  INIT_LIST_HEAD(&xprt->free);
1183  INIT_LIST_HEAD(&xprt->recv);
1184 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
1185  spin_lock_init(&xprt->bc_pa_lock);
1186  INIT_LIST_HEAD(&xprt->bc_pa_list);
1187 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
1188 
1189  xprt->last_used = jiffies;
1190  xprt->cwnd = RPC_INITCWND;
1191  xprt->bind_index = 0;
1192 
1193  rpc_init_wait_queue(&xprt->binding, "xprt_binding");
1194  rpc_init_wait_queue(&xprt->pending, "xprt_pending");
1195  rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
1196  rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
1197 
1198  xprt_init_xid(xprt);
1199 
1200  xprt->xprt_net = get_net(net);
1201 }
1202 
1208 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
1209 {
1210  struct rpc_xprt *xprt;
1211  struct xprt_class *t;
1212 
1213  spin_lock(&xprt_list_lock);
1214  list_for_each_entry(t, &xprt_list, list) {
1215  if (t->ident == args->ident) {
1216  spin_unlock(&xprt_list_lock);
1217  goto found;
1218  }
1219  }
1220  spin_unlock(&xprt_list_lock);
1221  printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
1222  return ERR_PTR(-EIO);
1223 
1224 found:
1225  xprt = t->setup(args);
1226  if (IS_ERR(xprt)) {
1227  dprintk("RPC: xprt_create_transport: failed, %ld\n",
1228  -PTR_ERR(xprt));
1229  goto out;
1230  }
1231  INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1232  if (xprt_has_timer(xprt))
1233  setup_timer(&xprt->timer, xprt_init_autodisconnect,
1234  (unsigned long)xprt);
1235  else
1236  init_timer(&xprt->timer);
1237 
1238  if (strlen(args->servername) > RPC_MAXNETNAMELEN) {
1239  xprt_destroy(xprt);
1240  return ERR_PTR(-EINVAL);
1241  }
1242  xprt->servername = kstrdup(args->servername, GFP_KERNEL);
1243  if (xprt->servername == NULL) {
1244  xprt_destroy(xprt);
1245  return ERR_PTR(-ENOMEM);
1246  }
1247 
1248  dprintk("RPC: created transport %p with %u slots\n", xprt,
1249  xprt->max_reqs);
1250 out:
1251  return xprt;
1252 }
1253 
1259 static void xprt_destroy(struct rpc_xprt *xprt)
1260 {
1261  dprintk("RPC: destroying transport %p\n", xprt);
1262  del_timer_sync(&xprt->timer);
1263 
1264  rpc_destroy_wait_queue(&xprt->binding);
1265  rpc_destroy_wait_queue(&xprt->pending);
1266  rpc_destroy_wait_queue(&xprt->sending);
1267  rpc_destroy_wait_queue(&xprt->backlog);
1268  cancel_work_sync(&xprt->task_cleanup);
1269  kfree(xprt->servername);
1270  /*
1271  * Tear down transport state and free the rpc_xprt
1272  */
1273  xprt->ops->destroy(xprt);
1274 }
1275 
1281 void xprt_put(struct rpc_xprt *xprt)
1282 {
1283  if (atomic_dec_and_test(&xprt->count))
1284  xprt_destroy(xprt);
1285 }
1286 
1292 struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
1293 {
1294  if (atomic_inc_not_zero(&xprt->count))
1295  return xprt;
1296  return NULL;
1297 }