Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
virtio_pci.c
Go to the documentation of this file.
1 /*
2  * Virtio PCI driver
3  *
4  * This module allows virtio devices to be used over a virtual PCI device.
5  * This can be used with QEMU based VMMs like KVM or Xen.
6  *
7  * Copyright IBM Corp. 2007
8  *
9  * Authors:
10  * Anthony Liguori <[email protected]>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include <linux/module.h>
18 #include <linux/list.h>
19 #include <linux/pci.h>
20 #include <linux/slab.h>
21 #include <linux/interrupt.h>
22 #include <linux/virtio.h>
23 #include <linux/virtio_config.h>
24 #include <linux/virtio_ring.h>
25 #include <linux/virtio_pci.h>
26 #include <linux/highmem.h>
27 #include <linux/spinlock.h>
28 
29 MODULE_AUTHOR("Anthony Liguori <[email protected]>");
30 MODULE_DESCRIPTION("virtio-pci");
31 MODULE_LICENSE("GPL");
32 MODULE_VERSION("1");
33 
34 /* Our device structure */
36 {
38  struct pci_dev *pci_dev;
39 
40  /* the IO mapping for the PCI config space */
41  void __iomem *ioaddr;
42 
43  /* a list of queues so we can dispatch IRQs */
46 
47  /* MSI-X support */
50  struct msix_entry *msix_entries;
52  /* Name strings for interrupts. This size should be enough,
53  * and I'm too lazy to allocate each name separately. */
54  char (*msix_names)[256];
55  /* Number of available vectors */
56  unsigned msix_vectors;
57  /* Vectors allocated, excluding per-vq vectors if any */
59 
60  /* Status saved during hibernate/restore */
62 
63  /* Whether we have vector per vq */
65 };
66 
67 /* Constants for MSI-X */
68 /* Use first vector for configuration changes, second and the rest for
69  * virtqueues Thus, we need at least 2 vectors for MSI. */
70 enum {
73 };
74 
76 {
77  /* the actual virtqueue */
78  struct virtqueue *vq;
79 
80  /* the number of entries in the queue */
81  int num;
82 
83  /* the virtual address of the ring queue */
84  void *queue;
85 
86  /* the list node for the virtqueues list */
87  struct list_head node;
88 
89  /* MSI-X vector (or none) */
90  unsigned msix_vector;
91 };
92 
93 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
94 static struct pci_device_id virtio_pci_id_table[] = {
95  { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
96  { 0 },
97 };
98 
99 MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
100 
101 /* Convert a generic virtio device to our structure */
102 static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
103 {
104  return container_of(vdev, struct virtio_pci_device, vdev);
105 }
106 
107 /* virtio config->get_features() implementation */
108 static u32 vp_get_features(struct virtio_device *vdev)
109 {
110  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
111 
112  /* When someone needs more than 32 feature bits, we'll need to
113  * steal a bit to indicate that the rest are somewhere else. */
114  return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
115 }
116 
117 /* virtio config->finalize_features() implementation */
118 static void vp_finalize_features(struct virtio_device *vdev)
119 {
120  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
121 
122  /* Give virtio_ring a chance to accept features. */
124 
125  /* We only support 32 feature bits. */
126  BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
128 }
129 
130 /* virtio config->get() implementation */
131 static void vp_get(struct virtio_device *vdev, unsigned offset,
132  void *buf, unsigned len)
133 {
134  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
135  void __iomem *ioaddr = vp_dev->ioaddr +
136  VIRTIO_PCI_CONFIG(vp_dev) + offset;
137  u8 *ptr = buf;
138  int i;
139 
140  for (i = 0; i < len; i++)
141  ptr[i] = ioread8(ioaddr + i);
142 }
143 
144 /* the config->set() implementation. it's symmetric to the config->get()
145  * implementation */
146 static void vp_set(struct virtio_device *vdev, unsigned offset,
147  const void *buf, unsigned len)
148 {
149  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
150  void __iomem *ioaddr = vp_dev->ioaddr +
151  VIRTIO_PCI_CONFIG(vp_dev) + offset;
152  const u8 *ptr = buf;
153  int i;
154 
155  for (i = 0; i < len; i++)
156  iowrite8(ptr[i], ioaddr + i);
157 }
158 
159 /* config->{get,set}_status() implementations */
160 static u8 vp_get_status(struct virtio_device *vdev)
161 {
162  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
163  return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
164 }
165 
166 static void vp_set_status(struct virtio_device *vdev, u8 status)
167 {
168  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
169  /* We should never be setting status to 0. */
170  BUG_ON(status == 0);
171  iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
172 }
173 
174 /* wait for pending irq handlers */
175 static void vp_synchronize_vectors(struct virtio_device *vdev)
176 {
177  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
178  int i;
179 
180  if (vp_dev->intx_enabled)
181  synchronize_irq(vp_dev->pci_dev->irq);
182 
183  for (i = 0; i < vp_dev->msix_vectors; ++i)
184  synchronize_irq(vp_dev->msix_entries[i].vector);
185 }
186 
187 static void vp_reset(struct virtio_device *vdev)
188 {
189  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
190  /* 0 status means a reset. */
191  iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
192  /* Flush out the status write, and flush in device writes,
193  * including MSi-X interrupts, if any. */
194  ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
195  /* Flush pending VQ/configuration callbacks. */
196  vp_synchronize_vectors(vdev);
197 }
198 
199 /* the notify function used when creating a virt queue */
200 static void vp_notify(struct virtqueue *vq)
201 {
202  struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
203 
204  /* we write the queue's selector into the notification register to
205  * signal the other end */
207  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
208 }
209 
210 /* Handle a configuration change: Tell driver if it wants to know. */
211 static irqreturn_t vp_config_changed(int irq, void *opaque)
212 {
213  struct virtio_pci_device *vp_dev = opaque;
214  struct virtio_driver *drv;
215  drv = container_of(vp_dev->vdev.dev.driver,
216  struct virtio_driver, driver);
217 
218  if (drv && drv->config_changed)
219  drv->config_changed(&vp_dev->vdev);
220  return IRQ_HANDLED;
221 }
222 
223 /* Notify all virtqueues on an interrupt. */
224 static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
225 {
226  struct virtio_pci_device *vp_dev = opaque;
227  struct virtio_pci_vq_info *info;
229  unsigned long flags;
230 
231  spin_lock_irqsave(&vp_dev->lock, flags);
232  list_for_each_entry(info, &vp_dev->virtqueues, node) {
233  if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
234  ret = IRQ_HANDLED;
235  }
236  spin_unlock_irqrestore(&vp_dev->lock, flags);
237 
238  return ret;
239 }
240 
241 /* A small wrapper to also acknowledge the interrupt when it's handled.
242  * I really need an EIO hook for the vring so I can ack the interrupt once we
243  * know that we'll be handling the IRQ but before we invoke the callback since
244  * the callback may notify the host which results in the host attempting to
245  * raise an interrupt that we would then mask once we acknowledged the
246  * interrupt. */
247 static irqreturn_t vp_interrupt(int irq, void *opaque)
248 {
249  struct virtio_pci_device *vp_dev = opaque;
250  u8 isr;
251 
252  /* reading the ISR has the effect of also clearing it so it's very
253  * important to save off the value. */
254  isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
255 
256  /* It's definitely not us if the ISR was not high */
257  if (!isr)
258  return IRQ_NONE;
259 
260  /* Configuration change? Tell driver if it wants to know. */
261  if (isr & VIRTIO_PCI_ISR_CONFIG)
262  vp_config_changed(irq, opaque);
263 
264  return vp_vring_interrupt(irq, opaque);
265 }
266 
267 static void vp_free_vectors(struct virtio_device *vdev)
268 {
269  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
270  int i;
271 
272  if (vp_dev->intx_enabled) {
273  free_irq(vp_dev->pci_dev->irq, vp_dev);
274  vp_dev->intx_enabled = 0;
275  }
276 
277  for (i = 0; i < vp_dev->msix_used_vectors; ++i)
278  free_irq(vp_dev->msix_entries[i].vector, vp_dev);
279 
280  for (i = 0; i < vp_dev->msix_vectors; i++)
281  if (vp_dev->msix_affinity_masks[i])
282  free_cpumask_var(vp_dev->msix_affinity_masks[i]);
283 
284  if (vp_dev->msix_enabled) {
285  /* Disable the vector used for configuration */
287  vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
288  /* Flush the write out to device */
290 
291  pci_disable_msix(vp_dev->pci_dev);
292  vp_dev->msix_enabled = 0;
293  vp_dev->msix_vectors = 0;
294  }
295 
296  vp_dev->msix_used_vectors = 0;
297  kfree(vp_dev->msix_names);
298  vp_dev->msix_names = NULL;
299  kfree(vp_dev->msix_entries);
300  vp_dev->msix_entries = NULL;
301  kfree(vp_dev->msix_affinity_masks);
302  vp_dev->msix_affinity_masks = NULL;
303 }
304 
305 static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
306  bool per_vq_vectors)
307 {
308  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
309  const char *name = dev_name(&vp_dev->vdev.dev);
310  unsigned i, v;
311  int err = -ENOMEM;
312 
313  vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
314  GFP_KERNEL);
315  if (!vp_dev->msix_entries)
316  goto error;
317  vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
318  GFP_KERNEL);
319  if (!vp_dev->msix_names)
320  goto error;
321  vp_dev->msix_affinity_masks
322  = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
323  GFP_KERNEL);
324  if (!vp_dev->msix_affinity_masks)
325  goto error;
326  for (i = 0; i < nvectors; ++i)
327  if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
328  GFP_KERNEL))
329  goto error;
330 
331  for (i = 0; i < nvectors; ++i)
332  vp_dev->msix_entries[i].entry = i;
333 
334  /* pci_enable_msix returns positive if we can't get this many. */
335  err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors);
336  if (err > 0)
337  err = -ENOSPC;
338  if (err)
339  goto error;
340  vp_dev->msix_vectors = nvectors;
341  vp_dev->msix_enabled = 1;
342 
343  /* Set the vector used for configuration */
344  v = vp_dev->msix_used_vectors;
345  snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
346  "%s-config", name);
347  err = request_irq(vp_dev->msix_entries[v].vector,
348  vp_config_changed, 0, vp_dev->msix_names[v],
349  vp_dev);
350  if (err)
351  goto error;
352  ++vp_dev->msix_used_vectors;
353 
355  /* Verify we had enough resources to assign the vector */
357  if (v == VIRTIO_MSI_NO_VECTOR) {
358  err = -EBUSY;
359  goto error;
360  }
361 
362  if (!per_vq_vectors) {
363  /* Shared vector for all VQs */
364  v = vp_dev->msix_used_vectors;
365  snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
366  "%s-virtqueues", name);
367  err = request_irq(vp_dev->msix_entries[v].vector,
368  vp_vring_interrupt, 0, vp_dev->msix_names[v],
369  vp_dev);
370  if (err)
371  goto error;
372  ++vp_dev->msix_used_vectors;
373  }
374  return 0;
375 error:
376  vp_free_vectors(vdev);
377  return err;
378 }
379 
380 static int vp_request_intx(struct virtio_device *vdev)
381 {
382  int err;
383  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
384 
385  err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
386  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
387  if (!err)
388  vp_dev->intx_enabled = 1;
389  return err;
390 }
391 
392 static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
393  void (*callback)(struct virtqueue *vq),
394  const char *name,
395  u16 msix_vec)
396 {
397  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
398  struct virtio_pci_vq_info *info;
399  struct virtqueue *vq;
400  unsigned long flags, size;
401  u16 num;
402  int err;
403 
404  /* Select the queue we're interested in */
405  iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
406 
407  /* Check if queue is either not available or already active. */
408  num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
409  if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
410  return ERR_PTR(-ENOENT);
411 
412  /* allocate and fill out our structure the represents an active
413  * queue */
414  info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL);
415  if (!info)
416  return ERR_PTR(-ENOMEM);
417 
418  info->num = num;
419  info->msix_vector = msix_vec;
420 
421  size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
423  if (info->queue == NULL) {
424  err = -ENOMEM;
425  goto out_info;
426  }
427 
428  /* activate the queue */
430  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
431 
432  /* create the vring */
433  vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
434  true, info->queue, vp_notify, callback, name);
435  if (!vq) {
436  err = -ENOMEM;
437  goto out_activate_queue;
438  }
439 
440  vq->priv = info;
441  info->vq = vq;
442 
443  if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
444  iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
445  msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
446  if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
447  err = -EBUSY;
448  goto out_assign;
449  }
450  }
451 
452  if (callback) {
453  spin_lock_irqsave(&vp_dev->lock, flags);
454  list_add(&info->node, &vp_dev->virtqueues);
455  spin_unlock_irqrestore(&vp_dev->lock, flags);
456  } else {
457  INIT_LIST_HEAD(&info->node);
458  }
459 
460  return vq;
461 
462 out_assign:
464 out_activate_queue:
465  iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
466  free_pages_exact(info->queue, size);
467 out_info:
468  kfree(info);
469  return ERR_PTR(err);
470 }
471 
472 static void vp_del_vq(struct virtqueue *vq)
473 {
474  struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
475  struct virtio_pci_vq_info *info = vq->priv;
476  unsigned long flags, size;
477 
478  spin_lock_irqsave(&vp_dev->lock, flags);
479  list_del(&info->node);
480  spin_unlock_irqrestore(&vp_dev->lock, flags);
481 
483  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
484 
485  if (vp_dev->msix_enabled) {
487  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
488  /* Flush the write out to device */
489  ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
490  }
491 
493 
494  /* Select and deactivate the queue */
495  iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
496 
497  size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
498  free_pages_exact(info->queue, size);
499  kfree(info);
500 }
501 
502 /* the config->del_vqs() implementation */
503 static void vp_del_vqs(struct virtio_device *vdev)
504 {
505  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
506  struct virtqueue *vq, *n;
507  struct virtio_pci_vq_info *info;
508 
509  list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
510  info = vq->priv;
511  if (vp_dev->per_vq_vectors &&
513  free_irq(vp_dev->msix_entries[info->msix_vector].vector,
514  vq);
515  vp_del_vq(vq);
516  }
517  vp_dev->per_vq_vectors = false;
518 
519  vp_free_vectors(vdev);
520 }
521 
522 static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
523  struct virtqueue *vqs[],
524  vq_callback_t *callbacks[],
525  const char *names[],
526  bool use_msix,
527  bool per_vq_vectors)
528 {
529  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
530  u16 msix_vec;
531  int i, err, nvectors, allocated_vectors;
532 
533  if (!use_msix) {
534  /* Old style: one normal interrupt for change and all vqs. */
535  err = vp_request_intx(vdev);
536  if (err)
537  goto error_request;
538  } else {
539  if (per_vq_vectors) {
540  /* Best option: one for change interrupt, one per vq. */
541  nvectors = 1;
542  for (i = 0; i < nvqs; ++i)
543  if (callbacks[i])
544  ++nvectors;
545  } else {
546  /* Second best: one for change, shared for all vqs. */
547  nvectors = 2;
548  }
549 
550  err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
551  if (err)
552  goto error_request;
553  }
554 
555  vp_dev->per_vq_vectors = per_vq_vectors;
556  allocated_vectors = vp_dev->msix_used_vectors;
557  for (i = 0; i < nvqs; ++i) {
558  if (!names[i]) {
559  vqs[i] = NULL;
560  continue;
561  } else if (!callbacks[i] || !vp_dev->msix_enabled)
562  msix_vec = VIRTIO_MSI_NO_VECTOR;
563  else if (vp_dev->per_vq_vectors)
564  msix_vec = allocated_vectors++;
565  else
566  msix_vec = VP_MSIX_VQ_VECTOR;
567  vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
568  if (IS_ERR(vqs[i])) {
569  err = PTR_ERR(vqs[i]);
570  goto error_find;
571  }
572 
573  if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
574  continue;
575 
576  /* allocate per-vq irq if available and necessary */
577  snprintf(vp_dev->msix_names[msix_vec],
578  sizeof *vp_dev->msix_names,
579  "%s-%s",
580  dev_name(&vp_dev->vdev.dev), names[i]);
581  err = request_irq(vp_dev->msix_entries[msix_vec].vector,
582  vring_interrupt, 0,
583  vp_dev->msix_names[msix_vec],
584  vqs[i]);
585  if (err) {
586  vp_del_vq(vqs[i]);
587  goto error_find;
588  }
589  }
590  return 0;
591 
592 error_find:
593  vp_del_vqs(vdev);
594 
595 error_request:
596  return err;
597 }
598 
599 /* the config->find_vqs() implementation */
600 static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
601  struct virtqueue *vqs[],
602  vq_callback_t *callbacks[],
603  const char *names[])
604 {
605  int err;
606 
607  /* Try MSI-X with one vector per queue. */
608  err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
609  if (!err)
610  return 0;
611  /* Fallback: MSI-X with one vector for config, one shared for queues. */
612  err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
613  true, false);
614  if (!err)
615  return 0;
616  /* Finally fall back to regular interrupts. */
617  return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
618  false, false);
619 }
620 
621 static const char *vp_bus_name(struct virtio_device *vdev)
622 {
623  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
624 
625  return pci_name(vp_dev->pci_dev);
626 }
627 
628 /* Setup the affinity for a virtqueue:
629  * - force the affinity for per vq vector
630  * - OR over all affinities for shared MSI
631  * - ignore the affinity request if we're using INTX
632  */
633 static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
634 {
635  struct virtio_device *vdev = vq->vdev;
636  struct virtio_pci_device *vp_dev = to_vp_device(vdev);
637  struct virtio_pci_vq_info *info = vq->priv;
638  struct cpumask *mask;
639  unsigned int irq;
640 
641  if (!vq->callback)
642  return -EINVAL;
643 
644  if (vp_dev->msix_enabled) {
645  mask = vp_dev->msix_affinity_masks[info->msix_vector];
646  irq = vp_dev->msix_entries[info->msix_vector].vector;
647  if (cpu == -1)
648  irq_set_affinity_hint(irq, NULL);
649  else {
650  cpumask_set_cpu(cpu, mask);
651  irq_set_affinity_hint(irq, mask);
652  }
653  }
654  return 0;
655 }
656 
657 static struct virtio_config_ops virtio_pci_config_ops = {
658  .get = vp_get,
659  .set = vp_set,
660  .get_status = vp_get_status,
661  .set_status = vp_set_status,
662  .reset = vp_reset,
663  .find_vqs = vp_find_vqs,
664  .del_vqs = vp_del_vqs,
665  .get_features = vp_get_features,
666  .finalize_features = vp_finalize_features,
667  .bus_name = vp_bus_name,
668  .set_vq_affinity = vp_set_vq_affinity,
669 };
670 
671 static void virtio_pci_release_dev(struct device *_d)
672 {
673  /*
674  * No need for a release method as we allocate/free
675  * all devices together with the pci devices.
676  * Provide an empty one to avoid getting a warning from core.
677  */
678 }
679 
680 /* the PCI probing function */
681 static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
682  const struct pci_device_id *id)
683 {
684  struct virtio_pci_device *vp_dev;
685  int err;
686 
687  /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
688  if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
689  return -ENODEV;
690 
691  if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
692  printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
693  VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
694  return -ENODEV;
695  }
696 
697  /* allocate our structure and fill it out */
698  vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
699  if (vp_dev == NULL)
700  return -ENOMEM;
701 
702  vp_dev->vdev.dev.parent = &pci_dev->dev;
703  vp_dev->vdev.dev.release = virtio_pci_release_dev;
704  vp_dev->vdev.config = &virtio_pci_config_ops;
705  vp_dev->pci_dev = pci_dev;
706  INIT_LIST_HEAD(&vp_dev->virtqueues);
707  spin_lock_init(&vp_dev->lock);
708 
709  /* Disable MSI/MSIX to bring device to a known good state. */
710  pci_msi_off(pci_dev);
711 
712  /* enable the device */
713  err = pci_enable_device(pci_dev);
714  if (err)
715  goto out;
716 
717  err = pci_request_regions(pci_dev, "virtio-pci");
718  if (err)
719  goto out_enable_device;
720 
721  vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
722  if (vp_dev->ioaddr == NULL) {
723  err = -ENOMEM;
724  goto out_req_regions;
725  }
726 
727  pci_set_drvdata(pci_dev, vp_dev);
728  pci_set_master(pci_dev);
729 
730  /* we use the subsystem vendor/device id as the virtio vendor/device
731  * id. this allows us to use the same PCI vendor/device id for all
732  * virtio devices and to identify the particular virtio driver by
733  * the subsystem ids */
734  vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
735  vp_dev->vdev.id.device = pci_dev->subsystem_device;
736 
737  /* finally register the virtio device */
738  err = register_virtio_device(&vp_dev->vdev);
739  if (err)
740  goto out_set_drvdata;
741 
742  return 0;
743 
744 out_set_drvdata:
745  pci_set_drvdata(pci_dev, NULL);
746  pci_iounmap(pci_dev, vp_dev->ioaddr);
747 out_req_regions:
748  pci_release_regions(pci_dev);
749 out_enable_device:
750  pci_disable_device(pci_dev);
751 out:
752  kfree(vp_dev);
753  return err;
754 }
755 
756 static void __devexit virtio_pci_remove(struct pci_dev *pci_dev)
757 {
758  struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
759 
760  unregister_virtio_device(&vp_dev->vdev);
761 
762  vp_del_vqs(&vp_dev->vdev);
763  pci_set_drvdata(pci_dev, NULL);
764  pci_iounmap(pci_dev, vp_dev->ioaddr);
765  pci_release_regions(pci_dev);
766  pci_disable_device(pci_dev);
767  kfree(vp_dev);
768 }
769 
770 #ifdef CONFIG_PM
771 static int virtio_pci_freeze(struct device *dev)
772 {
773  struct pci_dev *pci_dev = to_pci_dev(dev);
774  struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
775  struct virtio_driver *drv;
776  int ret;
777 
778  drv = container_of(vp_dev->vdev.dev.driver,
779  struct virtio_driver, driver);
780 
781  ret = 0;
782  vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
783  if (drv && drv->freeze)
784  ret = drv->freeze(&vp_dev->vdev);
785 
786  if (!ret)
787  pci_disable_device(pci_dev);
788  return ret;
789 }
790 
791 static int virtio_pci_restore(struct device *dev)
792 {
793  struct pci_dev *pci_dev = to_pci_dev(dev);
794  struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
795  struct virtio_driver *drv;
796  int ret;
797 
798  drv = container_of(vp_dev->vdev.dev.driver,
799  struct virtio_driver, driver);
800 
801  ret = pci_enable_device(pci_dev);
802  if (ret)
803  return ret;
804 
805  pci_set_master(pci_dev);
806  vp_finalize_features(&vp_dev->vdev);
807 
808  if (drv && drv->restore)
809  ret = drv->restore(&vp_dev->vdev);
810 
811  /* Finally, tell the device we're all set */
812  if (!ret)
813  vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
814 
815  return ret;
816 }
817 
818 static const struct dev_pm_ops virtio_pci_pm_ops = {
819  SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
820 };
821 #endif
822 
823 static struct pci_driver virtio_pci_driver = {
824  .name = "virtio-pci",
825  .id_table = virtio_pci_id_table,
826  .probe = virtio_pci_probe,
827  .remove = __devexit_p(virtio_pci_remove),
828 #ifdef CONFIG_PM
829  .driver.pm = &virtio_pci_pm_ops,
830 #endif
831 };
832 
833 static int __init virtio_pci_init(void)
834 {
835  return pci_register_driver(&virtio_pci_driver);
836 }
837 
838 module_init(virtio_pci_init);
839 
840 static void __exit virtio_pci_exit(void)
841 {
842  pci_unregister_driver(&virtio_pci_driver);
843 }
844 
845 module_exit(virtio_pci_exit);