Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
virtio_blk.c
Go to the documentation of this file.
1 //#define DEBUG
2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/module.h>
7 #include <linux/mutex.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_blk.h>
10 #include <linux/scatterlist.h>
11 #include <linux/string_helpers.h>
12 #include <scsi/scsi_cmnd.h>
13 #include <linux/idr.h>
14 
15 #define PART_BITS 4
16 
17 static bool use_bio;
18 module_param(use_bio, bool, S_IRUGO);
19 
20 static int major;
21 static DEFINE_IDA(vd_index_ida);
22 
24 
25 struct virtio_blk
26 {
28  struct virtqueue *vq;
30 
31  /* The disk structure for the kernel. */
32  struct gendisk *disk;
33 
35 
36  /* Process context for config space updates */
38 
39  /* Lock for config space updates */
41 
42  /* enable config space updates */
44 
45  /* What host tells us, plus 2 for header & tailer. */
46  unsigned int sg_elems;
47 
48  /* Ida index - used to track minor number allocations. */
49  int index;
50 
51  /* Scatterlist: can be too big for stack. */
52  struct scatterlist sg[/*sg_elems*/];
53 };
54 
56 {
57  struct request *req;
58  struct bio *bio;
61  struct work_struct work;
62  struct virtio_blk *vblk;
63  int flags;
65  struct scatterlist sg[];
66 };
67 
68 enum {
73 };
74 
75 static inline int virtblk_result(struct virtblk_req *vbr)
76 {
77  switch (vbr->status) {
78  case VIRTIO_BLK_S_OK:
79  return 0;
81  return -ENOTTY;
82  default:
83  return -EIO;
84  }
85 }
86 
87 static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
89 {
90  struct virtblk_req *vbr;
91 
92  vbr = mempool_alloc(vblk->pool, gfp_mask);
93  if (!vbr)
94  return NULL;
95 
96  vbr->vblk = vblk;
97  if (use_bio)
98  sg_init_table(vbr->sg, vblk->sg_elems);
99 
100  return vbr;
101 }
102 
103 static void virtblk_add_buf_wait(struct virtio_blk *vblk,
104  struct virtblk_req *vbr,
105  unsigned long out,
106  unsigned long in)
107 {
108  DEFINE_WAIT(wait);
109 
110  for (;;) {
113 
114  spin_lock_irq(vblk->disk->queue->queue_lock);
115  if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
116  GFP_ATOMIC) < 0) {
117  spin_unlock_irq(vblk->disk->queue->queue_lock);
118  io_schedule();
119  } else {
120  virtqueue_kick(vblk->vq);
121  spin_unlock_irq(vblk->disk->queue->queue_lock);
122  break;
123  }
124 
125  }
126 
127  finish_wait(&vblk->queue_wait, &wait);
128 }
129 
130 static inline void virtblk_add_req(struct virtblk_req *vbr,
131  unsigned int out, unsigned int in)
132 {
133  struct virtio_blk *vblk = vbr->vblk;
134 
135  spin_lock_irq(vblk->disk->queue->queue_lock);
136  if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
137  GFP_ATOMIC) < 0)) {
138  spin_unlock_irq(vblk->disk->queue->queue_lock);
139  virtblk_add_buf_wait(vblk, vbr, out, in);
140  return;
141  }
142  virtqueue_kick(vblk->vq);
143  spin_unlock_irq(vblk->disk->queue->queue_lock);
144 }
145 
146 static int virtblk_bio_send_flush(struct virtblk_req *vbr)
147 {
148  unsigned int out = 0, in = 0;
149 
150  vbr->flags |= VBLK_IS_FLUSH;
151  vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
152  vbr->out_hdr.sector = 0;
153  vbr->out_hdr.ioprio = 0;
154  sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
155  sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
156 
157  virtblk_add_req(vbr, out, in);
158 
159  return 0;
160 }
161 
162 static int virtblk_bio_send_data(struct virtblk_req *vbr)
163 {
164  struct virtio_blk *vblk = vbr->vblk;
165  unsigned int num, out = 0, in = 0;
166  struct bio *bio = vbr->bio;
167 
168  vbr->flags &= ~VBLK_IS_FLUSH;
169  vbr->out_hdr.type = 0;
170  vbr->out_hdr.sector = bio->bi_sector;
171  vbr->out_hdr.ioprio = bio_prio(bio);
172 
173  sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
174 
175  num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
176 
177  sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
178  sizeof(vbr->status));
179 
180  if (num) {
181  if (bio->bi_rw & REQ_WRITE) {
182  vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
183  out += num;
184  } else {
185  vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
186  in += num;
187  }
188  }
189 
190  virtblk_add_req(vbr, out, in);
191 
192  return 0;
193 }
194 
195 static void virtblk_bio_send_data_work(struct work_struct *work)
196 {
197  struct virtblk_req *vbr;
198 
199  vbr = container_of(work, struct virtblk_req, work);
200 
201  virtblk_bio_send_data(vbr);
202 }
203 
204 static void virtblk_bio_send_flush_work(struct work_struct *work)
205 {
206  struct virtblk_req *vbr;
207 
208  vbr = container_of(work, struct virtblk_req, work);
209 
210  virtblk_bio_send_flush(vbr);
211 }
212 
213 static inline void virtblk_request_done(struct virtblk_req *vbr)
214 {
215  struct virtio_blk *vblk = vbr->vblk;
216  struct request *req = vbr->req;
217  int error = virtblk_result(vbr);
218 
219  if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
220  req->resid_len = vbr->in_hdr.residual;
221  req->sense_len = vbr->in_hdr.sense_len;
222  req->errors = vbr->in_hdr.errors;
223  } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
224  req->errors = (error != 0);
225  }
226 
227  __blk_end_request_all(req, error);
228  mempool_free(vbr, vblk->pool);
229 }
230 
231 static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
232 {
233  struct virtio_blk *vblk = vbr->vblk;
234 
235  if (vbr->flags & VBLK_REQ_DATA) {
236  /* Send out the actual write data */
237  INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
238  queue_work(virtblk_wq, &vbr->work);
239  } else {
240  bio_endio(vbr->bio, virtblk_result(vbr));
241  mempool_free(vbr, vblk->pool);
242  }
243 }
244 
245 static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
246 {
247  struct virtio_blk *vblk = vbr->vblk;
248 
249  if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
250  /* Send out a flush before end the bio */
251  vbr->flags &= ~VBLK_REQ_DATA;
252  INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
253  queue_work(virtblk_wq, &vbr->work);
254  } else {
255  bio_endio(vbr->bio, virtblk_result(vbr));
256  mempool_free(vbr, vblk->pool);
257  }
258 }
259 
260 static inline void virtblk_bio_done(struct virtblk_req *vbr)
261 {
262  if (unlikely(vbr->flags & VBLK_IS_FLUSH))
263  virtblk_bio_flush_done(vbr);
264  else
265  virtblk_bio_data_done(vbr);
266 }
267 
268 static void virtblk_done(struct virtqueue *vq)
269 {
270  struct virtio_blk *vblk = vq->vdev->priv;
271  bool bio_done = false, req_done = false;
272  struct virtblk_req *vbr;
273  unsigned long flags;
274  unsigned int len;
275 
276  spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
277  do {
279  while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
280  if (vbr->bio) {
281  virtblk_bio_done(vbr);
282  bio_done = true;
283  } else {
284  virtblk_request_done(vbr);
285  req_done = true;
286  }
287  }
288  } while (!virtqueue_enable_cb(vq));
289  /* In case queue is stopped waiting for more buffers. */
290  if (req_done)
291  blk_start_queue(vblk->disk->queue);
292  spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
293 
294  if (bio_done)
295  wake_up(&vblk->queue_wait);
296 }
297 
298 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
299  struct request *req)
300 {
301  unsigned long num, out = 0, in = 0;
302  struct virtblk_req *vbr;
303 
304  vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
305  if (!vbr)
306  /* When another request finishes we'll try again. */
307  return false;
308 
309  vbr->req = req;
310  vbr->bio = NULL;
311  if (req->cmd_flags & REQ_FLUSH) {
312  vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
313  vbr->out_hdr.sector = 0;
314  vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
315  } else {
316  switch (req->cmd_type) {
317  case REQ_TYPE_FS:
318  vbr->out_hdr.type = 0;
319  vbr->out_hdr.sector = blk_rq_pos(vbr->req);
320  vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
321  break;
322  case REQ_TYPE_BLOCK_PC:
323  vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
324  vbr->out_hdr.sector = 0;
325  vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
326  break;
327  case REQ_TYPE_SPECIAL:
328  vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
329  vbr->out_hdr.sector = 0;
330  vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
331  break;
332  default:
333  /* We don't put anything else in the queue. */
334  BUG();
335  }
336  }
337 
338  sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
339 
340  /*
341  * If this is a packet command we need a couple of additional headers.
342  * Behind the normal outhdr we put a segment with the scsi command
343  * block, and before the normal inhdr we put the sense data and the
344  * inhdr with additional status information before the normal inhdr.
345  */
346  if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
347  sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
348 
349  num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
350 
351  if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
352  sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
353  sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
354  sizeof(vbr->in_hdr));
355  }
356 
357  sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
358  sizeof(vbr->status));
359 
360  if (num) {
361  if (rq_data_dir(vbr->req) == WRITE) {
362  vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
363  out += num;
364  } else {
365  vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
366  in += num;
367  }
368  }
369 
370  if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
371  GFP_ATOMIC) < 0) {
372  mempool_free(vbr, vblk->pool);
373  return false;
374  }
375 
376  return true;
377 }
378 
379 static void virtblk_request(struct request_queue *q)
380 {
381  struct virtio_blk *vblk = q->queuedata;
382  struct request *req;
383  unsigned int issued = 0;
384 
385  while ((req = blk_peek_request(q)) != NULL) {
386  BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
387 
388  /* If this request fails, stop queue and wait for something to
389  finish to restart it. */
390  if (!do_req(q, vblk, req)) {
391  blk_stop_queue(q);
392  break;
393  }
394  blk_start_request(req);
395  issued++;
396  }
397 
398  if (issued)
399  virtqueue_kick(vblk->vq);
400 }
401 
402 static void virtblk_make_request(struct request_queue *q, struct bio *bio)
403 {
404  struct virtio_blk *vblk = q->queuedata;
405  struct virtblk_req *vbr;
406 
407  BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
408 
409  vbr = virtblk_alloc_req(vblk, GFP_NOIO);
410  if (!vbr) {
411  bio_endio(bio, -ENOMEM);
412  return;
413  }
414 
415  vbr->bio = bio;
416  vbr->flags = 0;
417  if (bio->bi_rw & REQ_FLUSH)
418  vbr->flags |= VBLK_REQ_FLUSH;
419  if (bio->bi_rw & REQ_FUA)
420  vbr->flags |= VBLK_REQ_FUA;
421  if (bio->bi_size)
422  vbr->flags |= VBLK_REQ_DATA;
423 
424  if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
425  virtblk_bio_send_flush(vbr);
426  else
427  virtblk_bio_send_data(vbr);
428 }
429 
430 /* return id (s/n) string for *disk to *id_str
431  */
432 static int virtblk_get_id(struct gendisk *disk, char *id_str)
433 {
434  struct virtio_blk *vblk = disk->private_data;
435  struct request *req;
436  struct bio *bio;
437  int err;
438 
439  bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
440  GFP_KERNEL);
441  if (IS_ERR(bio))
442  return PTR_ERR(bio);
443 
444  req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
445  if (IS_ERR(req)) {
446  bio_put(bio);
447  return PTR_ERR(req);
448  }
449 
450  req->cmd_type = REQ_TYPE_SPECIAL;
451  err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
452  blk_put_request(req);
453 
454  return err;
455 }
456 
457 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
458  unsigned int cmd, unsigned long data)
459 {
460  struct gendisk *disk = bdev->bd_disk;
461  struct virtio_blk *vblk = disk->private_data;
462 
463  /*
464  * Only allow the generic SCSI ioctls if the host can support it.
465  */
467  return -ENOTTY;
468 
469  return scsi_cmd_blk_ioctl(bdev, mode, cmd,
470  (void __user *)data);
471 }
472 
473 /* We provide getgeo only to please some old bootloader/partitioning tools */
474 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
475 {
476  struct virtio_blk *vblk = bd->bd_disk->private_data;
477  struct virtio_blk_geometry vgeo;
478  int err;
479 
480  /* see if the host passed in geometry config */
483  &vgeo);
484 
485  if (!err) {
486  geo->heads = vgeo.heads;
487  geo->sectors = vgeo.sectors;
488  geo->cylinders = vgeo.cylinders;
489  } else {
490  /* some standard values, similar to sd */
491  geo->heads = 1 << 6;
492  geo->sectors = 1 << 5;
493  geo->cylinders = get_capacity(bd->bd_disk) >> 11;
494  }
495  return 0;
496 }
497 
498 static const struct block_device_operations virtblk_fops = {
499  .ioctl = virtblk_ioctl,
500  .owner = THIS_MODULE,
501  .getgeo = virtblk_getgeo,
502 };
503 
504 static int index_to_minor(int index)
505 {
506  return index << PART_BITS;
507 }
508 
509 static int minor_to_index(int minor)
510 {
511  return minor >> PART_BITS;
512 }
513 
514 static ssize_t virtblk_serial_show(struct device *dev,
515  struct device_attribute *attr, char *buf)
516 {
517  struct gendisk *disk = dev_to_disk(dev);
518  int err;
519 
520  /* sysfs gives us a PAGE_SIZE buffer */
522 
523  buf[VIRTIO_BLK_ID_BYTES] = '\0';
524  err = virtblk_get_id(disk, buf);
525  if (!err)
526  return strlen(buf);
527 
528  if (err == -EIO) /* Unsupported? Make it empty. */
529  return 0;
530 
531  return err;
532 }
533 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
534 
535 static void virtblk_config_changed_work(struct work_struct *work)
536 {
537  struct virtio_blk *vblk =
538  container_of(work, struct virtio_blk, config_work);
539  struct virtio_device *vdev = vblk->vdev;
540  struct request_queue *q = vblk->disk->queue;
541  char cap_str_2[10], cap_str_10[10];
542  u64 capacity, size;
543 
544  mutex_lock(&vblk->config_lock);
545  if (!vblk->config_enable)
546  goto done;
547 
548  /* Host must always specify the capacity. */
549  vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
550  &capacity, sizeof(capacity));
551 
552  /* If capacity is too big, truncate with warning. */
553  if ((sector_t)capacity != capacity) {
554  dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
555  (unsigned long long)capacity);
556  capacity = (sector_t)-1;
557  }
558 
559  size = capacity * queue_logical_block_size(q);
560  string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
561  string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
562 
563  dev_notice(&vdev->dev,
564  "new size: %llu %d-byte logical blocks (%s/%s)\n",
565  (unsigned long long)capacity,
566  queue_logical_block_size(q),
567  cap_str_10, cap_str_2);
568 
569  set_capacity(vblk->disk, capacity);
570  revalidate_disk(vblk->disk);
571 done:
572  mutex_unlock(&vblk->config_lock);
573 }
574 
575 static void virtblk_config_changed(struct virtio_device *vdev)
576 {
577  struct virtio_blk *vblk = vdev->priv;
578 
579  queue_work(virtblk_wq, &vblk->config_work);
580 }
581 
582 static int init_vq(struct virtio_blk *vblk)
583 {
584  int err = 0;
585 
586  /* We expect one virtqueue, for output. */
587  vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
588  if (IS_ERR(vblk->vq))
589  err = PTR_ERR(vblk->vq);
590 
591  return err;
592 }
593 
594 /*
595  * Legacy naming scheme used for virtio devices. We are stuck with it for
596  * virtio blk but don't ever use it for any new driver.
597  */
598 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
599 {
600  const int base = 'z' - 'a' + 1;
601  char *begin = buf + strlen(prefix);
602  char *end = buf + buflen;
603  char *p;
604  int unit;
605 
606  p = end - 1;
607  *p = '\0';
608  unit = base;
609  do {
610  if (p == begin)
611  return -EINVAL;
612  *--p = 'a' + (index % unit);
613  index = (index / unit) - 1;
614  } while (index >= 0);
615 
616  memmove(begin, p, end - p);
617  memcpy(buf, prefix, strlen(prefix));
618 
619  return 0;
620 }
621 
622 static int virtblk_get_cache_mode(struct virtio_device *vdev)
623 {
624  u8 writeback;
625  int err;
626 
628  offsetof(struct virtio_blk_config, wce),
629  &writeback);
630  if (err)
631  writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
632 
633  return writeback;
634 }
635 
636 static void virtblk_update_cache_mode(struct virtio_device *vdev)
637 {
638  u8 writeback = virtblk_get_cache_mode(vdev);
639  struct virtio_blk *vblk = vdev->priv;
640 
641  if (writeback)
642  blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
643  else
644  blk_queue_flush(vblk->disk->queue, 0);
645 
646  revalidate_disk(vblk->disk);
647 }
648 
649 static const char *const virtblk_cache_types[] = {
650  "write through", "write back"
651 };
652 
653 static ssize_t
654 virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
655  const char *buf, size_t count)
656 {
657  struct gendisk *disk = dev_to_disk(dev);
658  struct virtio_blk *vblk = disk->private_data;
659  struct virtio_device *vdev = vblk->vdev;
660  int i;
661  u8 writeback;
662 
664  for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
665  if (sysfs_streq(buf, virtblk_cache_types[i]))
666  break;
667 
668  if (i < 0)
669  return -EINVAL;
670 
671  writeback = i;
672  vdev->config->set(vdev,
673  offsetof(struct virtio_blk_config, wce),
674  &writeback, sizeof(writeback));
675 
676  virtblk_update_cache_mode(vdev);
677  return count;
678 }
679 
680 static ssize_t
681 virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
682  char *buf)
683 {
684  struct gendisk *disk = dev_to_disk(dev);
685  struct virtio_blk *vblk = disk->private_data;
686  u8 writeback = virtblk_get_cache_mode(vblk->vdev);
687 
688  BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
689  return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
690 }
691 
692 static const struct device_attribute dev_attr_cache_type_ro =
694  virtblk_cache_type_show, NULL);
695 static const struct device_attribute dev_attr_cache_type_rw =
697  virtblk_cache_type_show, virtblk_cache_type_store);
698 
699 static int __devinit virtblk_probe(struct virtio_device *vdev)
700 {
701  struct virtio_blk *vblk;
702  struct request_queue *q;
703  int err, index;
704  int pool_size;
705 
706  u64 cap;
707  u32 v, blk_size, sg_elems, opt_io_size;
710 
711  err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
712  GFP_KERNEL);
713  if (err < 0)
714  goto out;
715  index = err;
716 
717  /* We need to know how many segments before we allocate. */
720  &sg_elems);
721 
722  /* We need at least one SG element, whatever they say. */
723  if (err || !sg_elems)
724  sg_elems = 1;
725 
726  /* We need an extra sg elements at head and tail. */
727  sg_elems += 2;
728  vdev->priv = vblk = kmalloc(sizeof(*vblk) +
729  sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
730  if (!vblk) {
731  err = -ENOMEM;
732  goto out_free_index;
733  }
734 
736  vblk->vdev = vdev;
737  vblk->sg_elems = sg_elems;
738  sg_init_table(vblk->sg, vblk->sg_elems);
739  mutex_init(&vblk->config_lock);
740 
741  INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
742  vblk->config_enable = true;
743 
744  err = init_vq(vblk);
745  if (err)
746  goto out_free_vblk;
747 
748  pool_size = sizeof(struct virtblk_req);
749  if (use_bio)
750  pool_size += sizeof(struct scatterlist) * sg_elems;
751  vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
752  if (!vblk->pool) {
753  err = -ENOMEM;
754  goto out_free_vq;
755  }
756 
757  /* FIXME: How many partitions? How long is a piece of string? */
758  vblk->disk = alloc_disk(1 << PART_BITS);
759  if (!vblk->disk) {
760  err = -ENOMEM;
761  goto out_mempool;
762  }
763 
764  q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
765  if (!q) {
766  err = -ENOMEM;
767  goto out_put_disk;
768  }
769 
770  if (use_bio)
771  blk_queue_make_request(q, virtblk_make_request);
772  q->queuedata = vblk;
773 
774  virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
775 
776  vblk->disk->major = major;
777  vblk->disk->first_minor = index_to_minor(index);
778  vblk->disk->private_data = vblk;
779  vblk->disk->fops = &virtblk_fops;
780  vblk->disk->driverfs_dev = &vdev->dev;
781  vblk->index = index;
782 
783  /* configure queue flush support */
784  virtblk_update_cache_mode(vdev);
785 
786  /* If disk is read-only in the host, the guest should obey */
788  set_disk_ro(vblk->disk, 1);
789 
790  /* Host must always specify the capacity. */
791  vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
792  &cap, sizeof(cap));
793 
794  /* If capacity is too big, truncate with warning. */
795  if ((sector_t)cap != cap) {
796  dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
797  (unsigned long long)cap);
798  cap = (sector_t)-1;
799  }
800  set_capacity(vblk->disk, cap);
801 
802  /* We can handle whatever the host told us to handle. */
803  blk_queue_max_segments(q, vblk->sg_elems-2);
804 
805  /* No need to bounce any requests */
806  blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
807 
808  /* No real sector limit. */
810 
811  /* Host can optionally specify maximum segment size and number of
812  * segments. */
815  &v);
816  if (!err)
818  else
820 
821  /* Host can optionally specify the block size of the device */
823  offsetof(struct virtio_blk_config, blk_size),
824  &blk_size);
825  if (!err)
826  blk_queue_logical_block_size(q, blk_size);
827  else
828  blk_size = queue_logical_block_size(q);
829 
830  /* Use topology information if available */
832  offsetof(struct virtio_blk_config, physical_block_exp),
833  &physical_block_exp);
834  if (!err && physical_block_exp)
836  blk_size * (1 << physical_block_exp));
837 
839  offsetof(struct virtio_blk_config, alignment_offset),
840  &alignment_offset);
841  if (!err && alignment_offset)
842  blk_queue_alignment_offset(q, blk_size * alignment_offset);
843 
845  offsetof(struct virtio_blk_config, min_io_size),
846  &min_io_size);
847  if (!err && min_io_size)
848  blk_queue_io_min(q, blk_size * min_io_size);
849 
851  offsetof(struct virtio_blk_config, opt_io_size),
852  &opt_io_size);
853  if (!err && opt_io_size)
854  blk_queue_io_opt(q, blk_size * opt_io_size);
855 
856  add_disk(vblk->disk);
857  err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
858  if (err)
859  goto out_del_disk;
860 
862  err = device_create_file(disk_to_dev(vblk->disk),
863  &dev_attr_cache_type_rw);
864  else
865  err = device_create_file(disk_to_dev(vblk->disk),
866  &dev_attr_cache_type_ro);
867  if (err)
868  goto out_del_disk;
869  return 0;
870 
871 out_del_disk:
872  del_gendisk(vblk->disk);
873  blk_cleanup_queue(vblk->disk->queue);
874 out_put_disk:
875  put_disk(vblk->disk);
876 out_mempool:
877  mempool_destroy(vblk->pool);
878 out_free_vq:
879  vdev->config->del_vqs(vdev);
880 out_free_vblk:
881  kfree(vblk);
882 out_free_index:
883  ida_simple_remove(&vd_index_ida, index);
884 out:
885  return err;
886 }
887 
888 static void __devexit virtblk_remove(struct virtio_device *vdev)
889 {
890  struct virtio_blk *vblk = vdev->priv;
891  int index = vblk->index;
892 
893  /* Prevent config work handler from accessing the device. */
894  mutex_lock(&vblk->config_lock);
895  vblk->config_enable = false;
896  mutex_unlock(&vblk->config_lock);
897 
898  del_gendisk(vblk->disk);
899  blk_cleanup_queue(vblk->disk->queue);
900 
901  /* Stop all the virtqueues. */
902  vdev->config->reset(vdev);
903 
904  flush_work(&vblk->config_work);
905 
906  put_disk(vblk->disk);
907  mempool_destroy(vblk->pool);
908  vdev->config->del_vqs(vdev);
909  kfree(vblk);
910  ida_simple_remove(&vd_index_ida, index);
911 }
912 
913 #ifdef CONFIG_PM
914 static int virtblk_freeze(struct virtio_device *vdev)
915 {
916  struct virtio_blk *vblk = vdev->priv;
917 
918  /* Ensure we don't receive any more interrupts */
919  vdev->config->reset(vdev);
920 
921  /* Prevent config work handler from accessing the device. */
922  mutex_lock(&vblk->config_lock);
923  vblk->config_enable = false;
924  mutex_unlock(&vblk->config_lock);
925 
926  flush_work(&vblk->config_work);
927 
928  spin_lock_irq(vblk->disk->queue->queue_lock);
929  blk_stop_queue(vblk->disk->queue);
930  spin_unlock_irq(vblk->disk->queue->queue_lock);
931  blk_sync_queue(vblk->disk->queue);
932 
933  vdev->config->del_vqs(vdev);
934  return 0;
935 }
936 
937 static int virtblk_restore(struct virtio_device *vdev)
938 {
939  struct virtio_blk *vblk = vdev->priv;
940  int ret;
941 
942  vblk->config_enable = true;
943  ret = init_vq(vdev->priv);
944  if (!ret) {
945  spin_lock_irq(vblk->disk->queue->queue_lock);
946  blk_start_queue(vblk->disk->queue);
947  spin_unlock_irq(vblk->disk->queue->queue_lock);
948  }
949  return ret;
950 }
951 #endif
952 
953 static const struct virtio_device_id id_table[] = {
955  { 0 },
956 };
957 
958 static unsigned int features[] = {
962 };
963 
964 /*
965  * virtio_blk causes spurious section mismatch warning by
966  * simultaneously referring to a __devinit and a __devexit function.
967  * Use __refdata to avoid this warning.
968  */
969 static struct virtio_driver __refdata virtio_blk = {
970  .feature_table = features,
971  .feature_table_size = ARRAY_SIZE(features),
972  .driver.name = KBUILD_MODNAME,
973  .driver.owner = THIS_MODULE,
974  .id_table = id_table,
975  .probe = virtblk_probe,
976  .remove = __devexit_p(virtblk_remove),
977  .config_changed = virtblk_config_changed,
978 #ifdef CONFIG_PM
979  .freeze = virtblk_freeze,
980  .restore = virtblk_restore,
981 #endif
982 };
983 
984 static int __init init(void)
985 {
986  int error;
987 
988  virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
989  if (!virtblk_wq)
990  return -ENOMEM;
991 
992  major = register_blkdev(0, "virtblk");
993  if (major < 0) {
994  error = major;
995  goto out_destroy_workqueue;
996  }
997 
998  error = register_virtio_driver(&virtio_blk);
999  if (error)
1000  goto out_unregister_blkdev;
1001  return 0;
1002 
1003 out_unregister_blkdev:
1004  unregister_blkdev(major, "virtblk");
1005 out_destroy_workqueue:
1006  destroy_workqueue(virtblk_wq);
1007  return error;
1008 }
1009 
1010 static void __exit fini(void)
1011 {
1012  unregister_blkdev(major, "virtblk");
1013  unregister_virtio_driver(&virtio_blk);
1014  destroy_workqueue(virtblk_wq);
1015 }
1016 module_init(init);
1017 module_exit(fini);
1018 
1019 MODULE_DEVICE_TABLE(virtio, id_table);
1020 MODULE_DESCRIPTION("Virtio block driver");
1021 MODULE_LICENSE("GPL");