Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xenbus_dev_frontend.c
Go to the documentation of this file.
1 /*
2  * Driver giving user-space access to the kernel's xenbus connection
3  * to xenstore.
4  *
5  * Copyright (c) 2005, Christian Limpach
6  * Copyright (c) 2005, Rusty Russell, IBM Corporation
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  *
32  * Changes:
33  * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem
34  * and /proc/xen compatibility mount point.
35  * Turned xenfs into a loadable module.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/errno.h>
40 #include <linux/uio.h>
41 #include <linux/notifier.h>
42 #include <linux/wait.h>
43 #include <linux/fs.h>
44 #include <linux/poll.h>
45 #include <linux/mutex.h>
46 #include <linux/sched.h>
47 #include <linux/spinlock.h>
48 #include <linux/mount.h>
49 #include <linux/pagemap.h>
50 #include <linux/uaccess.h>
51 #include <linux/init.h>
52 #include <linux/namei.h>
53 #include <linux/string.h>
54 #include <linux/slab.h>
55 #include <linux/miscdevice.h>
56 #include <linux/module.h>
57 
58 #include "xenbus_comms.h"
59 
60 #include <xen/xenbus.h>
61 #include <xen/xen.h>
62 #include <asm/xen/hypervisor.h>
63 
64 MODULE_LICENSE("GPL");
65 
66 /*
67  * An element of a list of outstanding transactions, for which we're
68  * still waiting a reply.
69  */
71  struct list_head list;
73 };
74 
75 /*
76  * A buffer of data on the queue.
77  */
78 struct read_buffer {
79  struct list_head list;
80  unsigned int cons;
81  unsigned int len;
82  char msg[];
83 };
84 
86  /*
87  * msgbuffer_mutex is held while partial requests are built up
88  * and complete requests are acted on. It therefore protects
89  * the "transactions" and "watches" lists, and the partial
90  * request length and buffer.
91  *
92  * reply_mutex protects the reply being built up to return to
93  * usermode. It nests inside msgbuffer_mutex but may be held
94  * alone during a watch callback.
95  */
97 
98  /* In-progress transactions */
100 
101  /* Active watches. */
103 
104  /* Partial request. */
105  unsigned int len;
106  union {
107  struct xsd_sockmsg msg;
109  } u;
110 
111  /* Response queue. */
115 
116 };
117 
118 /* Read out any raw xenbus messages queued up. */
119 static ssize_t xenbus_file_read(struct file *filp,
120  char __user *ubuf,
121  size_t len, loff_t *ppos)
122 {
123  struct xenbus_file_priv *u = filp->private_data;
124  struct read_buffer *rb;
125  unsigned i;
126  int ret;
127 
128  mutex_lock(&u->reply_mutex);
129 again:
130  while (list_empty(&u->read_buffers)) {
132  if (filp->f_flags & O_NONBLOCK)
133  return -EAGAIN;
134 
136  !list_empty(&u->read_buffers));
137  if (ret)
138  return ret;
139  mutex_lock(&u->reply_mutex);
140  }
141 
142  rb = list_entry(u->read_buffers.next, struct read_buffer, list);
143  i = 0;
144  while (i < len) {
145  unsigned sz = min((unsigned)len - i, rb->len - rb->cons);
146 
147  ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
148 
149  i += sz - ret;
150  rb->cons += sz - ret;
151 
152  if (ret != 0) {
153  if (i == 0)
154  i = -EFAULT;
155  goto out;
156  }
157 
158  /* Clear out buffer if it has been consumed */
159  if (rb->cons == rb->len) {
160  list_del(&rb->list);
161  kfree(rb);
162  if (list_empty(&u->read_buffers))
163  break;
164  rb = list_entry(u->read_buffers.next,
165  struct read_buffer, list);
166  }
167  }
168  if (i == 0)
169  goto again;
170 
171 out:
173  return i;
174 }
175 
176 /*
177  * Add a buffer to the queue. Caller must hold the appropriate lock
178  * if the queue is not local. (Commonly the caller will build up
179  * multiple queued buffers on a temporary local list, and then add it
180  * to the appropriate list under lock once all the buffers have een
181  * successfully allocated.)
182  */
183 static int queue_reply(struct list_head *queue, const void *data, size_t len)
184 {
185  struct read_buffer *rb;
186 
187  if (len == 0)
188  return 0;
189 
190  rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
191  if (rb == NULL)
192  return -ENOMEM;
193 
194  rb->cons = 0;
195  rb->len = len;
196 
197  memcpy(rb->msg, data, len);
198 
199  list_add_tail(&rb->list, queue);
200  return 0;
201 }
202 
203 /*
204  * Free all the read_buffer s on a list.
205  * Caller must have sole reference to list.
206  */
207 static void queue_cleanup(struct list_head *list)
208 {
209  struct read_buffer *rb;
210 
211  while (!list_empty(list)) {
212  rb = list_entry(list->next, struct read_buffer, list);
213  list_del(list->next);
214  kfree(rb);
215  }
216 }
217 
219  struct list_head list;
222  char *token;
223 };
224 
225 static void free_watch_adapter(struct watch_adapter *watch)
226 {
227  kfree(watch->watch.node);
228  kfree(watch->token);
229  kfree(watch);
230 }
231 
232 static struct watch_adapter *alloc_watch_adapter(const char *path,
233  const char *token)
234 {
235  struct watch_adapter *watch;
236 
237  watch = kzalloc(sizeof(*watch), GFP_KERNEL);
238  if (watch == NULL)
239  goto out_fail;
240 
241  watch->watch.node = kstrdup(path, GFP_KERNEL);
242  if (watch->watch.node == NULL)
243  goto out_free;
244 
245  watch->token = kstrdup(token, GFP_KERNEL);
246  if (watch->token == NULL)
247  goto out_free;
248 
249  return watch;
250 
251 out_free:
252  free_watch_adapter(watch);
253 
254 out_fail:
255  return NULL;
256 }
257 
258 static void watch_fired(struct xenbus_watch *watch,
259  const char **vec,
260  unsigned int len)
261 {
262  struct watch_adapter *adap;
263  struct xsd_sockmsg hdr;
264  const char *path, *token;
265  int path_len, tok_len, body_len, data_len = 0;
266  int ret;
267  LIST_HEAD(staging_q);
268 
269  adap = container_of(watch, struct watch_adapter, watch);
270 
271  path = vec[XS_WATCH_PATH];
272  token = adap->token;
273 
274  path_len = strlen(path) + 1;
275  tok_len = strlen(token) + 1;
276  if (len > 2)
277  data_len = vec[len] - vec[2] + 1;
278  body_len = path_len + tok_len + data_len;
279 
280  hdr.type = XS_WATCH_EVENT;
281  hdr.len = body_len;
282 
283  mutex_lock(&adap->dev_data->reply_mutex);
284 
285  ret = queue_reply(&staging_q, &hdr, sizeof(hdr));
286  if (!ret)
287  ret = queue_reply(&staging_q, path, path_len);
288  if (!ret)
289  ret = queue_reply(&staging_q, token, tok_len);
290  if (!ret && len > 2)
291  ret = queue_reply(&staging_q, vec[2], data_len);
292 
293  if (!ret) {
294  /* success: pass reply list onto watcher */
295  list_splice_tail(&staging_q, &adap->dev_data->read_buffers);
296  wake_up(&adap->dev_data->read_waitq);
297  } else
298  queue_cleanup(&staging_q);
299 
300  mutex_unlock(&adap->dev_data->reply_mutex);
301 }
302 
303 static int xenbus_write_transaction(unsigned msg_type,
304  struct xenbus_file_priv *u)
305 {
306  int rc;
307  void *reply;
309  LIST_HEAD(staging_q);
310 
311  if (msg_type == XS_TRANSACTION_START) {
312  trans = kmalloc(sizeof(*trans), GFP_KERNEL);
313  if (!trans) {
314  rc = -ENOMEM;
315  goto out;
316  }
317  }
318 
319  reply = xenbus_dev_request_and_reply(&u->u.msg);
320  if (IS_ERR(reply)) {
321  kfree(trans);
322  rc = PTR_ERR(reply);
323  goto out;
324  }
325 
326  if (msg_type == XS_TRANSACTION_START) {
327  trans->handle.id = simple_strtoul(reply, NULL, 0);
328 
329  list_add(&trans->list, &u->transactions);
330  } else if (msg_type == XS_TRANSACTION_END) {
331  list_for_each_entry(trans, &u->transactions, list)
332  if (trans->handle.id == u->u.msg.tx_id)
333  break;
334  BUG_ON(&trans->list == &u->transactions);
335  list_del(&trans->list);
336 
337  kfree(trans);
338  }
339 
340  mutex_lock(&u->reply_mutex);
341  rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
342  if (!rc)
343  rc = queue_reply(&staging_q, reply, u->u.msg.len);
344  if (!rc) {
345  list_splice_tail(&staging_q, &u->read_buffers);
346  wake_up(&u->read_waitq);
347  } else {
348  queue_cleanup(&staging_q);
349  }
351 
352  kfree(reply);
353 
354 out:
355  return rc;
356 }
357 
358 static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
359 {
360  struct watch_adapter *watch, *tmp_watch;
361  char *path, *token;
362  int err, rc;
363  LIST_HEAD(staging_q);
364 
365  path = u->u.buffer + sizeof(u->u.msg);
366  token = memchr(path, 0, u->u.msg.len);
367  if (token == NULL) {
368  rc = -EILSEQ;
369  goto out;
370  }
371  token++;
372  if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) {
373  rc = -EILSEQ;
374  goto out;
375  }
376 
377  if (msg_type == XS_WATCH) {
378  watch = alloc_watch_adapter(path, token);
379  if (watch == NULL) {
380  rc = -ENOMEM;
381  goto out;
382  }
383 
384  watch->watch.callback = watch_fired;
385  watch->dev_data = u;
386 
387  err = register_xenbus_watch(&watch->watch);
388  if (err) {
389  free_watch_adapter(watch);
390  rc = err;
391  goto out;
392  }
393  list_add(&watch->list, &u->watches);
394  } else {
395  list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
396  if (!strcmp(watch->token, token) &&
397  !strcmp(watch->watch.node, path)) {
399  list_del(&watch->list);
400  free_watch_adapter(watch);
401  break;
402  }
403  }
404  }
405 
406  /* Success. Synthesize a reply to say all is OK. */
407  {
408  struct {
409  struct xsd_sockmsg hdr;
410  char body[3];
411  } __packed reply = {
412  {
413  .type = msg_type,
414  .len = sizeof(reply.body)
415  },
416  "OK"
417  };
418 
419  mutex_lock(&u->reply_mutex);
420  rc = queue_reply(&u->read_buffers, &reply, sizeof(reply));
421  wake_up(&u->read_waitq);
423  }
424 
425 out:
426  return rc;
427 }
428 
429 static ssize_t xenbus_file_write(struct file *filp,
430  const char __user *ubuf,
431  size_t len, loff_t *ppos)
432 {
433  struct xenbus_file_priv *u = filp->private_data;
435  int rc = len;
436  int ret;
437  LIST_HEAD(staging_q);
438 
439  /*
440  * We're expecting usermode to be writing properly formed
441  * xenbus messages. If they write an incomplete message we
442  * buffer it up. Once it is complete, we act on it.
443  */
444 
445  /*
446  * Make sure concurrent writers can't stomp all over each
447  * other's messages and make a mess of our partial message
448  * buffer. We don't make any attemppt to stop multiple
449  * writers from making a mess of each other's incomplete
450  * messages; we're just trying to guarantee our own internal
451  * consistency and make sure that single writes are handled
452  * atomically.
453  */
455 
456  /* Get this out of the way early to avoid confusion */
457  if (len == 0)
458  goto out;
459 
460  /* Can't write a xenbus message larger we can buffer */
461  if (len > sizeof(u->u.buffer) - u->len) {
462  /* On error, dump existing buffer */
463  u->len = 0;
464  rc = -EINVAL;
465  goto out;
466  }
467 
468  ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
469 
470  if (ret != 0) {
471  rc = -EFAULT;
472  goto out;
473  }
474 
475  /* Deal with a partial copy. */
476  len -= ret;
477  rc = len;
478 
479  u->len += len;
480 
481  /* Return if we haven't got a full message yet */
482  if (u->len < sizeof(u->u.msg))
483  goto out; /* not even the header yet */
484 
485  /* If we're expecting a message that's larger than we can
486  possibly send, dump what we have and return an error. */
487  if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) {
488  rc = -E2BIG;
489  u->len = 0;
490  goto out;
491  }
492 
493  if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
494  goto out; /* incomplete data portion */
495 
496  /*
497  * OK, now we have a complete message. Do something with it.
498  */
499 
500  msg_type = u->u.msg.type;
501 
502  switch (msg_type) {
503  case XS_WATCH:
504  case XS_UNWATCH:
505  /* (Un)Ask for some path to be watched for changes */
506  ret = xenbus_write_watch(msg_type, u);
507  break;
508 
509  default:
510  /* Send out a transaction */
511  ret = xenbus_write_transaction(msg_type, u);
512  break;
513  }
514  if (ret != 0)
515  rc = ret;
516 
517  /* Buffered message consumed */
518  u->len = 0;
519 
520  out:
522  return rc;
523 }
524 
525 static int xenbus_file_open(struct inode *inode, struct file *filp)
526 {
527  struct xenbus_file_priv *u;
528 
529  if (xen_store_evtchn == 0)
530  return -ENOENT;
531 
532  nonseekable_open(inode, filp);
533 
534  u = kzalloc(sizeof(*u), GFP_KERNEL);
535  if (u == NULL)
536  return -ENOMEM;
537 
538  INIT_LIST_HEAD(&u->transactions);
539  INIT_LIST_HEAD(&u->watches);
540  INIT_LIST_HEAD(&u->read_buffers);
542 
543  mutex_init(&u->reply_mutex);
545 
546  filp->private_data = u;
547 
548  return 0;
549 }
550 
551 static int xenbus_file_release(struct inode *inode, struct file *filp)
552 {
553  struct xenbus_file_priv *u = filp->private_data;
555  struct watch_adapter *watch, *tmp_watch;
556  struct read_buffer *rb, *tmp_rb;
557 
558  /*
559  * No need for locking here because there are no other users,
560  * by definition.
561  */
562 
563  list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
564  xenbus_transaction_end(trans->handle, 1);
565  list_del(&trans->list);
566  kfree(trans);
567  }
568 
569  list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
571  list_del(&watch->list);
572  free_watch_adapter(watch);
573  }
574 
575  list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
576  list_del(&rb->list);
577  kfree(rb);
578  }
579  kfree(u);
580 
581  return 0;
582 }
583 
584 static unsigned int xenbus_file_poll(struct file *file, poll_table *wait)
585 {
586  struct xenbus_file_priv *u = file->private_data;
587 
588  poll_wait(file, &u->read_waitq, wait);
589  if (!list_empty(&u->read_buffers))
590  return POLLIN | POLLRDNORM;
591  return 0;
592 }
593 
595  .read = xenbus_file_read,
596  .write = xenbus_file_write,
597  .open = xenbus_file_open,
598  .release = xenbus_file_release,
599  .poll = xenbus_file_poll,
600  .llseek = no_llseek,
601 };
602 EXPORT_SYMBOL_GPL(xen_xenbus_fops);
603 
604 static struct miscdevice xenbus_dev = {
605  .minor = MISC_DYNAMIC_MINOR,
606  .name = "xen/xenbus",
607  .fops = &xen_xenbus_fops,
608 };
609 
610 static int __init xenbus_init(void)
611 {
612  int err;
613 
614  if (!xen_domain())
615  return -ENODEV;
616 
617  err = misc_register(&xenbus_dev);
618  if (err)
619  printk(KERN_ERR "Could not register xenbus frontend device\n");
620  return err;
621 }
622 
623 static void __exit xenbus_exit(void)
624 {
625  misc_deregister(&xenbus_dev);
626 }
627 
628 module_init(xenbus_init);
629 module_exit(xenbus_exit);