Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xenbus.c
Go to the documentation of this file.
1 /*
2  * PCI Backend Xenbus Setup - handles setup with frontend and xend
3  *
4  * Author: Ryan Wilson <[email protected]>
5  */
6 #include <linux/module.h>
7 #include <linux/init.h>
8 #include <linux/list.h>
9 #include <linux/vmalloc.h>
10 #include <linux/workqueue.h>
11 #include <xen/xenbus.h>
12 #include <xen/events.h>
13 #include <asm/xen/pci.h>
14 #include "pciback.h"
15 
16 #define INVALID_EVTCHN_IRQ (-1)
18 
19 static bool __read_mostly passthrough;
20 module_param(passthrough, bool, S_IRUGO);
21 MODULE_PARM_DESC(passthrough,
22  "Option to specify how to export PCI topology to guest:\n"\
23  " 0 - (default) Hide the true PCI topology and makes the frontend\n"\
24  " there is a single PCI bus with only the exported devices on it.\n"\
25  " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\
26  " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\
27  " 1 - Passthrough provides a real view of the PCI topology to the\n"\
28  " frontend (for example, a device at 06:01.b will still appear at\n"\
29  " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
30  " exposed PCI devices to its driver domains. This may be required\n"\
31  " for drivers which depend on finding their hardward in certain\n"\
32  " bus/slot locations.");
33 
34 static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
35 {
36  struct xen_pcibk_device *pdev;
37 
38  pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL);
39  if (pdev == NULL)
40  goto out;
41  dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
42 
43  pdev->xdev = xdev;
44  dev_set_drvdata(&xdev->dev, pdev);
45 
46  mutex_init(&pdev->dev_lock);
47 
48  pdev->sh_info = NULL;
50  pdev->be_watching = 0;
51 
53 
54  if (xen_pcibk_init_devices(pdev)) {
55  kfree(pdev);
56  pdev = NULL;
57  }
58 out:
59  return pdev;
60 }
61 
62 static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
63 {
64  mutex_lock(&pdev->dev_lock);
65  /* Ensure the guest can't trigger our handler before removing devices */
66  if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
69  }
70 
71  /* If the driver domain started an op, make sure we complete it
72  * before releasing the shared memory */
73 
74  /* Note, the workqueue does not use spinlocks at all.*/
75  flush_workqueue(xen_pcibk_wq);
76 
77  if (pdev->sh_info != NULL) {
78  xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
79  pdev->sh_info = NULL;
80  }
81  mutex_unlock(&pdev->dev_lock);
82 }
83 
84 static void free_pdev(struct xen_pcibk_device *pdev)
85 {
86  if (pdev->be_watching) {
88  pdev->be_watching = 0;
89  }
90 
91  xen_pcibk_disconnect(pdev);
92 
93  xen_pcibk_release_devices(pdev);
94 
95  dev_set_drvdata(&pdev->xdev->dev, NULL);
96  pdev->xdev = NULL;
97 
98  kfree(pdev);
99 }
100 
101 static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
102  int remote_evtchn)
103 {
104  int err = 0;
105  void *vaddr;
106 
107  dev_dbg(&pdev->xdev->dev,
108  "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
109  gnt_ref, remote_evtchn);
110 
111  err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
112  if (err < 0) {
113  xenbus_dev_fatal(pdev->xdev, err,
114  "Error mapping other domain page in ours.");
115  goto out;
116  }
117 
118  pdev->sh_info = vaddr;
119 
121  pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
122  0, DRV_NAME, pdev);
123  if (err < 0) {
124  xenbus_dev_fatal(pdev->xdev, err,
125  "Error binding event channel to IRQ");
126  goto out;
127  }
128  pdev->evtchn_irq = err;
129  err = 0;
130 
131  dev_dbg(&pdev->xdev->dev, "Attached!\n");
132 out:
133  return err;
134 }
135 
136 static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
137 {
138  int err = 0;
139  int gnt_ref, remote_evtchn;
140  char *magic = NULL;
141 
142 
143  mutex_lock(&pdev->dev_lock);
144  /* Make sure we only do this setup once */
145  if (xenbus_read_driver_state(pdev->xdev->nodename) !=
147  goto out;
148 
149  /* Wait for frontend to state that it has published the configuration */
150  if (xenbus_read_driver_state(pdev->xdev->otherend) !=
152  goto out;
153 
154  dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
155 
156  err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
157  "pci-op-ref", "%u", &gnt_ref,
158  "event-channel", "%u", &remote_evtchn,
159  "magic", NULL, &magic, NULL);
160  if (err) {
161  /* If configuration didn't get read correctly, wait longer */
162  xenbus_dev_fatal(pdev->xdev, err,
163  "Error reading configuration from frontend");
164  goto out;
165  }
166 
167  if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
168  xenbus_dev_fatal(pdev->xdev, -EFAULT,
169  "version mismatch (%s/%s) with pcifront - "
170  "halting " DRV_NAME,
171  magic, XEN_PCI_MAGIC);
172  goto out;
173  }
174 
175  err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn);
176  if (err)
177  goto out;
178 
179  dev_dbg(&pdev->xdev->dev, "Connecting...\n");
180 
182  if (err)
183  xenbus_dev_fatal(pdev->xdev, err,
184  "Error switching to connected state!");
185 
186  dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
187 out:
188  mutex_unlock(&pdev->dev_lock);
189 
190  kfree(magic);
191 
192  return err;
193 }
194 
195 static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
196  unsigned int domain, unsigned int bus,
197  unsigned int devfn, unsigned int devid)
198 {
199  int err;
200  int len;
201  char str[64];
202 
203  len = snprintf(str, sizeof(str), "vdev-%d", devid);
204  if (unlikely(len >= (sizeof(str) - 1))) {
205  err = -ENOMEM;
206  goto out;
207  }
208 
209  /* Note: The PV protocol uses %02x, don't change it */
210  err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
211  "%04x:%02x:%02x.%02x", domain, bus,
212  PCI_SLOT(devfn), PCI_FUNC(devfn));
213 
214 out:
215  return err;
216 }
217 
218 static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
219  int domain, int bus, int slot, int func,
220  int devid)
221 {
222  struct pci_dev *dev;
223  int err = 0;
224 
225  dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
226  domain, bus, slot, func);
227 
228  dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
229  if (!dev) {
230  err = -EINVAL;
231  xenbus_dev_fatal(pdev->xdev, err,
232  "Couldn't locate PCI device "
233  "(%04x:%02x:%02x.%d)! "
234  "perhaps already in-use?",
235  domain, bus, slot, func);
236  goto out;
237  }
238 
239  err = xen_pcibk_add_pci_dev(pdev, dev, devid,
240  xen_pcibk_publish_pci_dev);
241  if (err)
242  goto out;
243 
244  dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
245  if (xen_register_device_domain_owner(dev,
246  pdev->xdev->otherend_id) != 0) {
247  dev_err(&dev->dev, "Stealing ownership from dom%d.\n",
248  xen_find_device_domain_owner(dev));
249  xen_unregister_device_domain_owner(dev);
250  xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
251  }
252 
253  /* TODO: It'd be nice to export a bridge and have all of its children
254  * get exported with it. This may be best done in xend (which will
255  * have to calculate resource usage anyway) but we probably want to
256  * put something in here to ensure that if a bridge gets given to a
257  * driver domain, that all devices under that bridge are not given
258  * to other driver domains (as he who controls the bridge can disable
259  * it and stop the other devices from working).
260  */
261 out:
262  return err;
263 }
264 
265 static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
266  int domain, int bus, int slot, int func)
267 {
268  int err = 0;
269  struct pci_dev *dev;
270 
271  dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
272  domain, bus, slot, func);
273 
274  dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
275  if (!dev) {
276  err = -EINVAL;
277  dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
278  "(%04x:%02x:%02x.%d)! not owned by this domain\n",
279  domain, bus, slot, func);
280  goto out;
281  }
282 
283  dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
284  xen_unregister_device_domain_owner(dev);
285 
286  xen_pcibk_release_pci_dev(pdev, dev);
287 
288 out:
289  return err;
290 }
291 
292 static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev,
293  unsigned int domain, unsigned int bus)
294 {
295  unsigned int d, b;
296  int i, root_num, len, err;
297  char str[64];
298 
299  dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
300 
301  err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
302  "root_num", "%d", &root_num);
303  if (err == 0 || err == -ENOENT)
304  root_num = 0;
305  else if (err < 0)
306  goto out;
307 
308  /* Verify that we haven't already published this pci root */
309  for (i = 0; i < root_num; i++) {
310  len = snprintf(str, sizeof(str), "root-%d", i);
311  if (unlikely(len >= (sizeof(str) - 1))) {
312  err = -ENOMEM;
313  goto out;
314  }
315 
316  err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
317  str, "%x:%x", &d, &b);
318  if (err < 0)
319  goto out;
320  if (err != 2) {
321  err = -EINVAL;
322  goto out;
323  }
324 
325  if (d == domain && b == bus) {
326  err = 0;
327  goto out;
328  }
329  }
330 
331  len = snprintf(str, sizeof(str), "root-%d", root_num);
332  if (unlikely(len >= (sizeof(str) - 1))) {
333  err = -ENOMEM;
334  goto out;
335  }
336 
337  dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
338  root_num, domain, bus);
339 
340  err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
341  "%04x:%02x", domain, bus);
342  if (err)
343  goto out;
344 
345  err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
346  "root_num", "%d", (root_num + 1));
347 
348 out:
349  return err;
350 }
351 
352 static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
353 {
354  int err = 0;
355  int num_devs;
356  int domain, bus, slot, func;
357  int substate;
358  int i, len;
359  char state_str[64];
360  char dev_str[64];
361 
362 
363  dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
364 
365  mutex_lock(&pdev->dev_lock);
366  /* Make sure we only reconfigure once */
367  if (xenbus_read_driver_state(pdev->xdev->nodename) !=
369  goto out;
370 
371  err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
372  &num_devs);
373  if (err != 1) {
374  if (err >= 0)
375  err = -EINVAL;
376  xenbus_dev_fatal(pdev->xdev, err,
377  "Error reading number of devices");
378  goto out;
379  }
380 
381  for (i = 0; i < num_devs; i++) {
382  len = snprintf(state_str, sizeof(state_str), "state-%d", i);
383  if (unlikely(len >= (sizeof(state_str) - 1))) {
384  err = -ENOMEM;
385  xenbus_dev_fatal(pdev->xdev, err,
386  "String overflow while reading "
387  "configuration");
388  goto out;
389  }
390  err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
391  "%d", &substate);
392  if (err != 1)
393  substate = XenbusStateUnknown;
394 
395  switch (substate) {
397  dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
398 
399  len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
400  if (unlikely(len >= (sizeof(dev_str) - 1))) {
401  err = -ENOMEM;
402  xenbus_dev_fatal(pdev->xdev, err,
403  "String overflow while "
404  "reading configuration");
405  goto out;
406  }
407  err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
408  dev_str, "%x:%x:%x.%x",
409  &domain, &bus, &slot, &func);
410  if (err < 0) {
411  xenbus_dev_fatal(pdev->xdev, err,
412  "Error reading device "
413  "configuration");
414  goto out;
415  }
416  if (err != 4) {
417  err = -EINVAL;
418  xenbus_dev_fatal(pdev->xdev, err,
419  "Error parsing pci device "
420  "configuration");
421  goto out;
422  }
423 
424  err = xen_pcibk_export_device(pdev, domain, bus, slot,
425  func, i);
426  if (err)
427  goto out;
428 
429  /* Publish pci roots. */
430  err = xen_pcibk_publish_pci_roots(pdev,
431  xen_pcibk_publish_pci_root);
432  if (err) {
433  xenbus_dev_fatal(pdev->xdev, err,
434  "Error while publish PCI root"
435  "buses for frontend");
436  goto out;
437  }
438 
439  err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
440  state_str, "%d",
442  if (err) {
443  xenbus_dev_fatal(pdev->xdev, err,
444  "Error switching substate of "
445  "dev-%d\n", i);
446  goto out;
447  }
448  break;
449 
450  case XenbusStateClosing:
451  dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
452 
453  len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
454  if (unlikely(len >= (sizeof(dev_str) - 1))) {
455  err = -ENOMEM;
456  xenbus_dev_fatal(pdev->xdev, err,
457  "String overflow while "
458  "reading configuration");
459  goto out;
460  }
461  err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
462  dev_str, "%x:%x:%x.%x",
463  &domain, &bus, &slot, &func);
464  if (err < 0) {
465  xenbus_dev_fatal(pdev->xdev, err,
466  "Error reading device "
467  "configuration");
468  goto out;
469  }
470  if (err != 4) {
471  err = -EINVAL;
472  xenbus_dev_fatal(pdev->xdev, err,
473  "Error parsing pci device "
474  "configuration");
475  goto out;
476  }
477 
478  err = xen_pcibk_remove_device(pdev, domain, bus, slot,
479  func);
480  if (err)
481  goto out;
482 
483  /* TODO: If at some point we implement support for pci
484  * root hot-remove on pcifront side, we'll need to
485  * remove unnecessary xenstore nodes of pci roots here.
486  */
487 
488  break;
489 
490  default:
491  break;
492  }
493  }
494 
496  if (err) {
497  xenbus_dev_fatal(pdev->xdev, err,
498  "Error switching to reconfigured state!");
499  goto out;
500  }
501 
502 out:
503  mutex_unlock(&pdev->dev_lock);
504  return 0;
505 }
506 
507 static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
508  enum xenbus_state fe_state)
509 {
510  struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev);
511 
512  dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
513 
514  switch (fe_state) {
516  xen_pcibk_attach(pdev);
517  break;
518 
520  xen_pcibk_reconfigure(pdev);
521  break;
522 
524  /* pcifront switched its state from reconfiguring to connected.
525  * Then switch to connected state.
526  */
528  break;
529 
530  case XenbusStateClosing:
531  xen_pcibk_disconnect(pdev);
533  break;
534 
535  case XenbusStateClosed:
536  xen_pcibk_disconnect(pdev);
538  if (xenbus_dev_is_online(xdev))
539  break;
540  /* fall through if not online */
541  case XenbusStateUnknown:
542  dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
543  device_unregister(&xdev->dev);
544  break;
545 
546  default:
547  break;
548  }
549 }
550 
551 static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
552 {
553  /* Get configuration from xend (if available now) */
554  int domain, bus, slot, func;
555  int err = 0;
556  int i, num_devs;
557  char dev_str[64];
558  char state_str[64];
559 
560  mutex_lock(&pdev->dev_lock);
561  /* It's possible we could get the call to setup twice, so make sure
562  * we're not already connected.
563  */
564  if (xenbus_read_driver_state(pdev->xdev->nodename) !=
566  goto out;
567 
568  dev_dbg(&pdev->xdev->dev, "getting be setup\n");
569 
570  err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
571  &num_devs);
572  if (err != 1) {
573  if (err >= 0)
574  err = -EINVAL;
575  xenbus_dev_fatal(pdev->xdev, err,
576  "Error reading number of devices");
577  goto out;
578  }
579 
580  for (i = 0; i < num_devs; i++) {
581  int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
582  if (unlikely(l >= (sizeof(dev_str) - 1))) {
583  err = -ENOMEM;
584  xenbus_dev_fatal(pdev->xdev, err,
585  "String overflow while reading "
586  "configuration");
587  goto out;
588  }
589 
590  err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
591  "%x:%x:%x.%x", &domain, &bus, &slot, &func);
592  if (err < 0) {
593  xenbus_dev_fatal(pdev->xdev, err,
594  "Error reading device configuration");
595  goto out;
596  }
597  if (err != 4) {
598  err = -EINVAL;
599  xenbus_dev_fatal(pdev->xdev, err,
600  "Error parsing pci device "
601  "configuration");
602  goto out;
603  }
604 
605  err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i);
606  if (err)
607  goto out;
608 
609  /* Switch substate of this device. */
610  l = snprintf(state_str, sizeof(state_str), "state-%d", i);
611  if (unlikely(l >= (sizeof(state_str) - 1))) {
612  err = -ENOMEM;
613  xenbus_dev_fatal(pdev->xdev, err,
614  "String overflow while reading "
615  "configuration");
616  goto out;
617  }
618  err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
619  "%d", XenbusStateInitialised);
620  if (err) {
621  xenbus_dev_fatal(pdev->xdev, err, "Error switching "
622  "substate of dev-%d\n", i);
623  goto out;
624  }
625  }
626 
627  err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root);
628  if (err) {
629  xenbus_dev_fatal(pdev->xdev, err,
630  "Error while publish PCI root buses "
631  "for frontend");
632  goto out;
633  }
634 
636  if (err)
637  xenbus_dev_fatal(pdev->xdev, err,
638  "Error switching to initialised state!");
639 
640 out:
641  mutex_unlock(&pdev->dev_lock);
642  if (!err)
643  /* see if pcifront is already configured (if not, we'll wait) */
644  xen_pcibk_attach(pdev);
645  return err;
646 }
647 
648 static void xen_pcibk_be_watch(struct xenbus_watch *watch,
649  const char **vec, unsigned int len)
650 {
651  struct xen_pcibk_device *pdev =
652  container_of(watch, struct xen_pcibk_device, be_watch);
653 
654  switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
655  case XenbusStateInitWait:
656  xen_pcibk_setup_backend(pdev);
657  break;
658 
659  default:
660  break;
661  }
662 }
663 
664 static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
665  const struct xenbus_device_id *id)
666 {
667  int err = 0;
668  struct xen_pcibk_device *pdev = alloc_pdev(dev);
669 
670  if (pdev == NULL) {
671  err = -ENOMEM;
672  xenbus_dev_fatal(dev, err,
673  "Error allocating xen_pcibk_device struct");
674  goto out;
675  }
676 
677  /* wait for xend to configure us */
679  if (err)
680  goto out;
681 
682  /* watch the backend node for backend configuration information */
683  err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
684  xen_pcibk_be_watch);
685  if (err)
686  goto out;
687 
688  pdev->be_watching = 1;
689 
690  /* We need to force a call to our callback here in case
691  * xend already configured us!
692  */
693  xen_pcibk_be_watch(&pdev->be_watch, NULL, 0);
694 
695 out:
696  return err;
697 }
698 
699 static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
700 {
701  struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
702 
703  if (pdev != NULL)
704  free_pdev(pdev);
705 
706  return 0;
707 }
708 
709 static const struct xenbus_device_id xen_pcibk_ids[] = {
710  {"pci"},
711  {""},
712 };
713 
714 static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME,
715  .probe = xen_pcibk_xenbus_probe,
716  .remove = xen_pcibk_xenbus_remove,
717  .otherend_changed = xen_pcibk_frontend_changed,
718 );
719 
721 
723 {
724  xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
725  if (!xen_pcibk_wq) {
726  printk(KERN_ERR "%s: create"
727  "xen_pciback_workqueue failed\n", __func__);
728  return -EFAULT;
729  }
730  xen_pcibk_backend = &xen_pcibk_vpci_backend;
731  if (passthrough)
732  xen_pcibk_backend = &xen_pcibk_passthrough_backend;
733  pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
734  return xenbus_register_backend(&xen_pcibk_driver);
735 }
736 
738 {
739  destroy_workqueue(xen_pcibk_wq);
740  xenbus_unregister_driver(&xen_pcibk_driver);
741 }