Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
blkback.c
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * Back-end of the driver for virtual block devices. This portion of the
4  * driver exports a 'unified' block-device interface that can be accessed
5  * by any operating system that implements a compatible front end. A
6  * reference front-end implementation can be found in:
7  * drivers/block/xen-blkfront.c
8  *
9  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
10  * Copyright (c) 2005, Christopher Clark
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License version 2
14  * as published by the Free Software Foundation; or, when distributed
15  * separately from the Linux kernel or incorporated into other
16  * software packages, subject to the following license:
17  *
18  * Permission is hereby granted, free of charge, to any person obtaining a copy
19  * of this source file (the "Software"), to deal in the Software without
20  * restriction, including without limitation the rights to use, copy, modify,
21  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22  * and to permit persons to whom the Software is furnished to do so, subject to
23  * the following conditions:
24  *
25  * The above copyright notice and this permission notice shall be included in
26  * all copies or substantial portions of the Software.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34  * IN THE SOFTWARE.
35  */
36 
37 #include <linux/spinlock.h>
38 #include <linux/kthread.h>
39 #include <linux/list.h>
40 #include <linux/delay.h>
41 #include <linux/freezer.h>
42 
43 #include <xen/events.h>
44 #include <xen/page.h>
45 #include <xen/xen.h>
46 #include <asm/xen/hypervisor.h>
47 #include <asm/xen/hypercall.h>
48 #include "common.h"
49 
50 /*
51  * These are rather arbitrary. They are fairly large because adjacent requests
52  * pulled from a communication ring are quite likely to end up being part of
53  * the same scatter/gather request at the disc.
54  *
55  * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
56  *
57  * This will increase the chances of being able to write whole tracks.
58  * 64 should be enough to keep us competitive with Linux.
59  */
60 static int xen_blkif_reqs = 64;
61 module_param_named(reqs, xen_blkif_reqs, int, 0);
62 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
63 
64 /* Run-time switchable: /sys/module/blkback/parameters/ */
65 static unsigned int log_stats;
66 module_param(log_stats, int, 0644);
67 
68 /*
69  * Each outstanding request that we've passed to the lower device layers has a
70  * 'pending_req' allocated to it. Each buffer_head that completes decrements
71  * the pendcnt towards zero. When it hits zero, the specified domain has a
72  * response queued for it, with the saved 'id' passed back.
73  */
74 struct pending_req {
75  struct xen_blkif *blkif;
77  int nr_pages;
79  unsigned short operation;
80  int status;
82 };
83 
84 #define BLKBACK_INVALID_HANDLE (~0)
85 
86 struct xen_blkbk {
88  /* List of all 'pending_req' available */
90  /* And its spinlock. */
93  /* The list of all pages that are available. */
94  struct page **pending_pages;
95  /* And the grant handles that are available. */
97 };
98 
99 static struct xen_blkbk *blkbk;
100 
101 /*
102  * Little helpful macro to figure out the index and virtual address of the
103  * pending_pages[..]. For each 'pending_req' we have have up to
104  * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
105  * 10 and would index in the pending_pages[..].
106  */
107 static inline int vaddr_pagenr(struct pending_req *req, int seg)
108 {
109  return (req - blkbk->pending_reqs) *
111 }
112 
113 #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
114 
115 static inline unsigned long vaddr(struct pending_req *req, int seg)
116 {
117  unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
118  return (unsigned long)pfn_to_kaddr(pfn);
119 }
120 
121 #define pending_handle(_req, _seg) \
122  (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
123 
124 
125 static int do_block_io_op(struct xen_blkif *blkif);
126 static int dispatch_rw_block_io(struct xen_blkif *blkif,
127  struct blkif_request *req,
128  struct pending_req *pending_req);
129 static void make_response(struct xen_blkif *blkif, u64 id,
130  unsigned short op, int st);
131 
132 /*
133  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
134  */
135 static struct pending_req *alloc_req(void)
136 {
137  struct pending_req *req = NULL;
138  unsigned long flags;
139 
140  spin_lock_irqsave(&blkbk->pending_free_lock, flags);
141  if (!list_empty(&blkbk->pending_free)) {
142  req = list_entry(blkbk->pending_free.next, struct pending_req,
143  free_list);
144  list_del(&req->free_list);
145  }
146  spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
147  return req;
148 }
149 
150 /*
151  * Return the 'pending_req' structure back to the freepool. We also
152  * wake up the thread if it was waiting for a free page.
153  */
154 static void free_req(struct pending_req *req)
155 {
156  unsigned long flags;
157  int was_empty;
158 
159  spin_lock_irqsave(&blkbk->pending_free_lock, flags);
160  was_empty = list_empty(&blkbk->pending_free);
161  list_add(&req->free_list, &blkbk->pending_free);
162  spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
163  if (was_empty)
164  wake_up(&blkbk->pending_free_wq);
165 }
166 
167 /*
168  * Routines for managing virtual block devices (vbds).
169  */
170 static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
171  int operation)
172 {
173  struct xen_vbd *vbd = &blkif->vbd;
174  int rc = -EACCES;
175 
176  if ((operation != READ) && vbd->readonly)
177  goto out;
178 
179  if (likely(req->nr_sects)) {
181 
182  if (unlikely(end < req->sector_number))
183  goto out;
184  if (unlikely(end > vbd_sz(vbd)))
185  goto out;
186  }
187 
188  req->dev = vbd->pdevice;
189  req->bdev = vbd->bdev;
190  rc = 0;
191 
192  out:
193  return rc;
194 }
195 
196 static void xen_vbd_resize(struct xen_blkif *blkif)
197 {
198  struct xen_vbd *vbd = &blkif->vbd;
199  struct xenbus_transaction xbt;
200  int err;
201  struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be);
202  unsigned long long new_size = vbd_sz(vbd);
203 
204  pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n",
205  blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
206  pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size);
207  vbd->size = new_size;
208 again:
209  err = xenbus_transaction_start(&xbt);
210  if (err) {
211  pr_warn(DRV_PFX "Error starting transaction");
212  return;
213  }
214  err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
215  (unsigned long long)vbd_sz(vbd));
216  if (err) {
217  pr_warn(DRV_PFX "Error writing new size");
218  goto abort;
219  }
220  /*
221  * Write the current state; we will use this to synchronize
222  * the front-end. If the current state is "connected" the
223  * front-end will get the new size information online.
224  */
225  err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
226  if (err) {
227  pr_warn(DRV_PFX "Error writing the state");
228  goto abort;
229  }
230 
231  err = xenbus_transaction_end(xbt, 0);
232  if (err == -EAGAIN)
233  goto again;
234  if (err)
235  pr_warn(DRV_PFX "Error ending transaction");
236  return;
237 abort:
238  xenbus_transaction_end(xbt, 1);
239 }
240 
241 /*
242  * Notification from the guest OS.
243  */
244 static void blkif_notify_work(struct xen_blkif *blkif)
245 {
246  blkif->waiting_reqs = 1;
247  wake_up(&blkif->wq);
248 }
249 
251 {
252  blkif_notify_work(dev_id);
253  return IRQ_HANDLED;
254 }
255 
256 /*
257  * SCHEDULER FUNCTIONS
258  */
259 
260 static void print_stats(struct xen_blkif *blkif)
261 {
262  pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
263  " | ds %4d\n",
264  current->comm, blkif->st_oo_req,
265  blkif->st_rd_req, blkif->st_wr_req,
266  blkif->st_f_req, blkif->st_ds_req);
267  blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
268  blkif->st_rd_req = 0;
269  blkif->st_wr_req = 0;
270  blkif->st_oo_req = 0;
271  blkif->st_ds_req = 0;
272 }
273 
275 {
276  struct xen_blkif *blkif = arg;
277  struct xen_vbd *vbd = &blkif->vbd;
278 
279  xen_blkif_get(blkif);
280 
281  while (!kthread_should_stop()) {
282  if (try_to_freeze())
283  continue;
284  if (unlikely(vbd->size != vbd_sz(vbd)))
285  xen_vbd_resize(blkif);
286 
288  blkif->wq,
289  blkif->waiting_reqs || kthread_should_stop());
291  blkbk->pending_free_wq,
292  !list_empty(&blkbk->pending_free) ||
294 
295  blkif->waiting_reqs = 0;
296  smp_mb(); /* clear flag *before* checking for work */
297 
298  if (do_block_io_op(blkif))
299  blkif->waiting_reqs = 1;
300 
301  if (log_stats && time_after(jiffies, blkif->st_print))
302  print_stats(blkif);
303  }
304 
305  if (log_stats)
306  print_stats(blkif);
307 
308  blkif->xenblkd = NULL;
309  xen_blkif_put(blkif);
310 
311  return 0;
312 }
313 
314 struct seg_buf {
315  unsigned long buf;
316  unsigned int nsec;
317 };
318 /*
319  * Unmap the grant references, and also remove the M2P over-rides
320  * used in the 'pending_req'.
321  */
322 static void xen_blkbk_unmap(struct pending_req *req)
323 {
324  struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
326  unsigned int i, invcount = 0;
328  int ret;
329 
330  for (i = 0; i < req->nr_pages; i++) {
331  handle = pending_handle(req, i);
332  if (handle == BLKBACK_INVALID_HANDLE)
333  continue;
334  gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
335  GNTMAP_host_map, handle);
337  pages[invcount] = virt_to_page(vaddr(req, i));
338  invcount++;
339  }
340 
341  ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
342  BUG_ON(ret);
343 }
344 
345 static int xen_blkbk_map(struct blkif_request *req,
346  struct pending_req *pending_req,
347  struct seg_buf seg[])
348 {
350  int i;
351  int nseg = req->u.rw.nr_segments;
352  int ret = 0;
353 
354  /*
355  * Fill out preq.nr_sects with proper amount of sectors, and setup
356  * assign map[..] with the PFN of the page in our domain with the
357  * corresponding grant reference for each page.
358  */
359  for (i = 0; i < nseg; i++) {
360  uint32_t flags;
361 
362  flags = GNTMAP_host_map;
363  if (pending_req->operation != BLKIF_OP_READ)
364  flags |= GNTMAP_readonly;
365  gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
366  req->u.rw.seg[i].gref,
367  pending_req->blkif->domid);
368  }
369 
370  ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
371  BUG_ON(ret);
372 
373  /*
374  * Now swizzle the MFN in our domain with the MFN from the other domain
375  * so that when we access vaddr(pending_req,i) it has the contents of
376  * the page from the other domain.
377  */
378  for (i = 0; i < nseg; i++) {
379  if (unlikely(map[i].status != 0)) {
380  pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
381  map[i].handle = BLKBACK_INVALID_HANDLE;
382  ret |= 1;
383  }
384 
385  pending_handle(pending_req, i) = map[i].handle;
386 
387  if (ret)
388  continue;
389 
390  seg[i].buf = map[i].dev_bus_addr |
391  (req->u.rw.seg[i].first_sect << 9);
392  }
393  return ret;
394 }
395 
396 static int dispatch_discard_io(struct xen_blkif *blkif,
397  struct blkif_request *req)
398 {
399  int err = 0;
400  int status = BLKIF_RSP_OKAY;
401  struct block_device *bdev = blkif->vbd.bdev;
402  unsigned long secure;
403 
404  blkif->st_ds_req++;
405 
406  xen_blkif_get(blkif);
407  secure = (blkif->vbd.discard_secure &&
408  (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
409  BLKDEV_DISCARD_SECURE : 0;
410 
411  err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
412  req->u.discard.nr_sectors,
413  GFP_KERNEL, secure);
414 
415  if (err == -EOPNOTSUPP) {
416  pr_debug(DRV_PFX "discard op failed, not supported\n");
417  status = BLKIF_RSP_EOPNOTSUPP;
418  } else if (err)
419  status = BLKIF_RSP_ERROR;
420 
421  make_response(blkif, req->u.discard.id, req->operation, status);
422  xen_blkif_put(blkif);
423  return err;
424 }
425 
426 static void xen_blk_drain_io(struct xen_blkif *blkif)
427 {
428  atomic_set(&blkif->drain, 1);
429  do {
430  /* The initial value is one, and one refcnt taken at the
431  * start of the xen_blkif_schedule thread. */
432  if (atomic_read(&blkif->refcnt) <= 2)
433  break;
435  &blkif->drain_complete, HZ);
436 
437  if (!atomic_read(&blkif->drain))
438  break;
439  } while (!kthread_should_stop());
440  atomic_set(&blkif->drain, 0);
441 }
442 
443 /*
444  * Completion callback on the bio's. Called as bh->b_end_io()
445  */
446 
447 static void __end_block_io_op(struct pending_req *pending_req, int error)
448 {
449  /* An error fails the entire request. */
450  if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
451  (error == -EOPNOTSUPP)) {
452  pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
453  xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
454  pending_req->status = BLKIF_RSP_EOPNOTSUPP;
455  } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
456  (error == -EOPNOTSUPP)) {
457  pr_debug(DRV_PFX "write barrier op failed, not supported\n");
458  xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
459  pending_req->status = BLKIF_RSP_EOPNOTSUPP;
460  } else if (error) {
461  pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
462  " error=%d\n", error);
463  pending_req->status = BLKIF_RSP_ERROR;
464  }
465 
466  /*
467  * If all of the bio's have completed it is time to unmap
468  * the grant references associated with 'request' and provide
469  * the proper response on the ring.
470  */
471  if (atomic_dec_and_test(&pending_req->pendcnt)) {
472  xen_blkbk_unmap(pending_req);
473  make_response(pending_req->blkif, pending_req->id,
474  pending_req->operation, pending_req->status);
475  xen_blkif_put(pending_req->blkif);
476  if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
477  if (atomic_read(&pending_req->blkif->drain))
478  complete(&pending_req->blkif->drain_complete);
479  }
480  free_req(pending_req);
481  }
482 }
483 
484 /*
485  * bio callback.
486  */
487 static void end_block_io_op(struct bio *bio, int error)
488 {
489  __end_block_io_op(bio->bi_private, error);
490  bio_put(bio);
491 }
492 
493 
494 
495 /*
496  * Function to copy the from the ring buffer the 'struct blkif_request'
497  * (which has the sectors we want, number of them, grant references, etc),
498  * and transmute it to the block API to hand it over to the proper block disk.
499  */
500 static int
501 __do_block_io_op(struct xen_blkif *blkif)
502 {
503  union blkif_back_rings *blk_rings = &blkif->blk_rings;
504  struct blkif_request req;
505  struct pending_req *pending_req;
506  RING_IDX rc, rp;
507  int more_to_do = 0;
508 
509  rc = blk_rings->common.req_cons;
510  rp = blk_rings->common.sring->req_prod;
511  rmb(); /* Ensure we see queued requests up to 'rp'. */
512 
513  while (rc != rp) {
514 
515  if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
516  break;
517 
518  if (kthread_should_stop()) {
519  more_to_do = 1;
520  break;
521  }
522 
523  pending_req = alloc_req();
524  if (NULL == pending_req) {
525  blkif->st_oo_req++;
526  more_to_do = 1;
527  break;
528  }
529 
530  switch (blkif->blk_protocol) {
532  memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
533  break;
535  blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
536  break;
538  blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
539  break;
540  default:
541  BUG();
542  }
543  blk_rings->common.req_cons = ++rc; /* before make_response() */
544 
545  /* Apply all sanity checks to /private copy/ of request. */
546  barrier();
547  if (unlikely(req.operation == BLKIF_OP_DISCARD)) {
548  free_req(pending_req);
549  if (dispatch_discard_io(blkif, &req))
550  break;
551  } else if (dispatch_rw_block_io(blkif, &req, pending_req))
552  break;
553 
554  /* Yield point for this unbounded loop. */
555  cond_resched();
556  }
557 
558  return more_to_do;
559 }
560 
561 static int
562 do_block_io_op(struct xen_blkif *blkif)
563 {
564  union blkif_back_rings *blk_rings = &blkif->blk_rings;
565  int more_to_do;
566 
567  do {
568  more_to_do = __do_block_io_op(blkif);
569  if (more_to_do)
570  break;
571 
572  RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
573  } while (more_to_do);
574 
575  return more_to_do;
576 }
577 /*
578  * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
579  * and call the 'submit_bio' to pass it to the underlying storage.
580  */
581 static int dispatch_rw_block_io(struct xen_blkif *blkif,
582  struct blkif_request *req,
583  struct pending_req *pending_req)
584 {
585  struct phys_req preq;
587  unsigned int nseg;
588  struct bio *bio = NULL;
589  struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
590  int i, nbio = 0;
591  int operation;
592  struct blk_plug plug;
593  bool drain = false;
594 
595  switch (req->operation) {
596  case BLKIF_OP_READ:
597  blkif->st_rd_req++;
598  operation = READ;
599  break;
600  case BLKIF_OP_WRITE:
601  blkif->st_wr_req++;
602  operation = WRITE_ODIRECT;
603  break;
605  drain = true;
607  blkif->st_f_req++;
608  operation = WRITE_FLUSH;
609  break;
610  default:
611  operation = 0; /* make gcc happy */
612  goto fail_response;
613  break;
614  }
615 
616  /* Check that the number of segments is sane. */
617  nseg = req->u.rw.nr_segments;
618 
619  if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
621  pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
622  nseg);
623  /* Haven't submitted any bio's yet. */
624  goto fail_response;
625  }
626 
627  preq.dev = req->u.rw.handle;
628  preq.sector_number = req->u.rw.sector_number;
629  preq.nr_sects = 0;
630 
631  pending_req->blkif = blkif;
632  pending_req->id = req->u.rw.id;
633  pending_req->operation = req->operation;
634  pending_req->status = BLKIF_RSP_OKAY;
635  pending_req->nr_pages = nseg;
636 
637  for (i = 0; i < nseg; i++) {
638  seg[i].nsec = req->u.rw.seg[i].last_sect -
639  req->u.rw.seg[i].first_sect + 1;
640  if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
641  (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
642  goto fail_response;
643  preq.nr_sects += seg[i].nsec;
644 
645  }
646 
647  if (xen_vbd_translate(&preq, blkif, operation) != 0) {
648  pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
649  operation == READ ? "read" : "write",
650  preq.sector_number,
651  preq.sector_number + preq.nr_sects, preq.dev);
652  goto fail_response;
653  }
654 
655  /*
656  * This check _MUST_ be done after xen_vbd_translate as the preq.bdev
657  * is set there.
658  */
659  for (i = 0; i < nseg; i++) {
660  if (((int)preq.sector_number|(int)seg[i].nsec) &
661  ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
662  pr_debug(DRV_PFX "Misaligned I/O request from domain %d",
663  blkif->domid);
664  goto fail_response;
665  }
666  }
667 
668  /* Wait on all outstanding I/O's and once that has been completed
669  * issue the WRITE_FLUSH.
670  */
671  if (drain)
672  xen_blk_drain_io(pending_req->blkif);
673 
674  /*
675  * If we have failed at this point, we need to undo the M2P override,
676  * set gnttab_set_unmap_op on all of the grant references and perform
677  * the hypercall to unmap the grants - that is all done in
678  * xen_blkbk_unmap.
679  */
680  if (xen_blkbk_map(req, pending_req, seg))
681  goto fail_flush;
682 
683  /*
684  * This corresponding xen_blkif_put is done in __end_block_io_op, or
685  * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
686  */
687  xen_blkif_get(blkif);
688 
689  for (i = 0; i < nseg; i++) {
690  while ((bio == NULL) ||
691  (bio_add_page(bio,
692  blkbk->pending_page(pending_req, i),
693  seg[i].nsec << 9,
694  seg[i].buf & ~PAGE_MASK) == 0)) {
695 
696  bio = bio_alloc(GFP_KERNEL, nseg-i);
697  if (unlikely(bio == NULL))
698  goto fail_put_bio;
699 
700  biolist[nbio++] = bio;
701  bio->bi_bdev = preq.bdev;
702  bio->bi_private = pending_req;
703  bio->bi_end_io = end_block_io_op;
704  bio->bi_sector = preq.sector_number;
705  }
706 
707  preq.sector_number += seg[i].nsec;
708  }
709 
710  /* This will be hit if the operation was a flush or discard. */
711  if (!bio) {
712  BUG_ON(operation != WRITE_FLUSH);
713 
714  bio = bio_alloc(GFP_KERNEL, 0);
715  if (unlikely(bio == NULL))
716  goto fail_put_bio;
717 
718  biolist[nbio++] = bio;
719  bio->bi_bdev = preq.bdev;
720  bio->bi_private = pending_req;
721  bio->bi_end_io = end_block_io_op;
722  }
723 
724  /*
725  * We set it one so that the last submit_bio does not have to call
726  * atomic_inc.
727  */
728  atomic_set(&pending_req->pendcnt, nbio);
729 
730  /* Get a reference count for the disk queue and start sending I/O */
731  blk_start_plug(&plug);
732 
733  for (i = 0; i < nbio; i++)
734  submit_bio(operation, biolist[i]);
735 
736  /* Let the I/Os go.. */
737  blk_finish_plug(&plug);
738 
739  if (operation == READ)
740  blkif->st_rd_sect += preq.nr_sects;
741  else if (operation & WRITE)
742  blkif->st_wr_sect += preq.nr_sects;
743 
744  return 0;
745 
746  fail_flush:
747  xen_blkbk_unmap(pending_req);
748  fail_response:
749  /* Haven't submitted any bio's yet. */
750  make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
751  free_req(pending_req);
752  msleep(1); /* back off a bit */
753  return -EIO;
754 
755  fail_put_bio:
756  for (i = 0; i < nbio; i++)
757  bio_put(biolist[i]);
758  __end_block_io_op(pending_req, -EINVAL);
759  msleep(1); /* back off a bit */
760  return -EIO;
761 }
762 
763 
764 
765 /*
766  * Put a response on the ring on how the operation fared.
767  */
768 static void make_response(struct xen_blkif *blkif, u64 id,
769  unsigned short op, int st)
770 {
771  struct blkif_response resp;
772  unsigned long flags;
773  union blkif_back_rings *blk_rings = &blkif->blk_rings;
774  int notify;
775 
776  resp.id = id;
777  resp.operation = op;
778  resp.status = st;
779 
780  spin_lock_irqsave(&blkif->blk_ring_lock, flags);
781  /* Place on the response ring for the relevant domain. */
782  switch (blkif->blk_protocol) {
784  memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
785  &resp, sizeof(resp));
786  break;
788  memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
789  &resp, sizeof(resp));
790  break;
792  memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
793  &resp, sizeof(resp));
794  break;
795  default:
796  BUG();
797  }
798  blk_rings->common.rsp_prod_pvt++;
799  RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
800  spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
801  if (notify)
802  notify_remote_via_irq(blkif->irq);
803 }
804 
805 static int __init xen_blkif_init(void)
806 {
807  int i, mmap_pages;
808  int rc = 0;
809 
810  if (!xen_domain())
811  return -ENODEV;
812 
813  blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
814  if (!blkbk) {
815  pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
816  return -ENOMEM;
817  }
818 
819  mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
820 
821  blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) *
822  xen_blkif_reqs, GFP_KERNEL);
823  blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
824  mmap_pages, GFP_KERNEL);
825  blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
826  mmap_pages, GFP_KERNEL);
827 
828  if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
829  !blkbk->pending_pages) {
830  rc = -ENOMEM;
831  goto out_of_memory;
832  }
833 
834  for (i = 0; i < mmap_pages; i++) {
837  if (blkbk->pending_pages[i] == NULL) {
838  rc = -ENOMEM;
839  goto out_of_memory;
840  }
841  }
843  if (rc)
844  goto failed_init;
845 
846  INIT_LIST_HEAD(&blkbk->pending_free);
849 
850  for (i = 0; i < xen_blkif_reqs; i++)
851  list_add_tail(&blkbk->pending_reqs[i].free_list,
852  &blkbk->pending_free);
853 
854  rc = xen_blkif_xenbus_init();
855  if (rc)
856  goto failed_init;
857 
858  return 0;
859 
861  pr_alert(DRV_PFX "%s: out of memory\n", __func__);
862  failed_init:
863  kfree(blkbk->pending_reqs);
865  if (blkbk->pending_pages) {
866  for (i = 0; i < mmap_pages; i++) {
867  if (blkbk->pending_pages[i])
868  __free_page(blkbk->pending_pages[i]);
869  }
870  kfree(blkbk->pending_pages);
871  }
872  kfree(blkbk);
873  blkbk = NULL;
874  return rc;
875 }
876 
877 module_init(xen_blkif_init);
878 
879 MODULE_LICENSE("Dual BSD/GPL");
880 MODULE_ALIAS("xen-backend:vbd");