Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dev.c
Go to the documentation of this file.
1 /*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2008 Miklos Szeredi <[email protected]>
4 
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 #include <linux/pipe_fs_i.h>
20 #include <linux/swap.h>
21 #include <linux/splice.h>
22 
24 MODULE_ALIAS("devname:fuse");
25 
26 static struct kmem_cache *fuse_req_cachep;
27 
28 static struct fuse_conn *fuse_get_conn(struct file *file)
29 {
30  /*
31  * Lockless access is OK, because file->private data is set
32  * once during mount and is valid until the file is released.
33  */
34  return file->private_data;
35 }
36 
37 static void fuse_request_init(struct fuse_req *req)
38 {
39  memset(req, 0, sizeof(*req));
40  INIT_LIST_HEAD(&req->list);
41  INIT_LIST_HEAD(&req->intr_entry);
43  atomic_set(&req->count, 1);
44 }
45 
47 {
48  struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
49  if (req)
50  fuse_request_init(req);
51  return req;
52 }
54 
56 {
57  struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
58  if (req)
59  fuse_request_init(req);
60  return req;
61 }
62 
63 void fuse_request_free(struct fuse_req *req)
64 {
65  kmem_cache_free(fuse_req_cachep, req);
66 }
67 
68 static void block_sigs(sigset_t *oldset)
69 {
70  sigset_t mask;
71 
72  siginitsetinv(&mask, sigmask(SIGKILL));
73  sigprocmask(SIG_BLOCK, &mask, oldset);
74 }
75 
76 static void restore_sigs(sigset_t *oldset)
77 {
78  sigprocmask(SIG_SETMASK, oldset, NULL);
79 }
80 
81 static void __fuse_get_request(struct fuse_req *req)
82 {
83  atomic_inc(&req->count);
84 }
85 
86 /* Must be called with > 1 refcount */
87 static void __fuse_put_request(struct fuse_req *req)
88 {
89  BUG_ON(atomic_read(&req->count) < 2);
90  atomic_dec(&req->count);
91 }
92 
93 static void fuse_req_init_context(struct fuse_req *req)
94 {
95  req->in.h.uid = current_fsuid();
96  req->in.h.gid = current_fsgid();
97  req->in.h.pid = current->pid;
98 }
99 
101 {
102  struct fuse_req *req;
103  sigset_t oldset;
104  int intr;
105  int err;
106 
107  atomic_inc(&fc->num_waiting);
108  block_sigs(&oldset);
110  restore_sigs(&oldset);
111  err = -EINTR;
112  if (intr)
113  goto out;
114 
115  err = -ENOTCONN;
116  if (!fc->connected)
117  goto out;
118 
119  req = fuse_request_alloc();
120  err = -ENOMEM;
121  if (!req)
122  goto out;
123 
124  fuse_req_init_context(req);
125  req->waiting = 1;
126  return req;
127 
128  out:
129  atomic_dec(&fc->num_waiting);
130  return ERR_PTR(err);
131 }
133 
134 /*
135  * Return request in fuse_file->reserved_req. However that may
136  * currently be in use. If that is the case, wait for it to become
137  * available.
138  */
139 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
140  struct file *file)
141 {
142  struct fuse_req *req = NULL;
143  struct fuse_file *ff = file->private_data;
144 
145  do {
147  spin_lock(&fc->lock);
148  if (ff->reserved_req) {
149  req = ff->reserved_req;
150  ff->reserved_req = NULL;
151  req->stolen_file = get_file(file);
152  }
153  spin_unlock(&fc->lock);
154  } while (!req);
155 
156  return req;
157 }
158 
159 /*
160  * Put stolen request back into fuse_file->reserved_req
161  */
162 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
163 {
164  struct file *file = req->stolen_file;
165  struct fuse_file *ff = file->private_data;
166 
167  spin_lock(&fc->lock);
168  fuse_request_init(req);
169  BUG_ON(ff->reserved_req);
170  ff->reserved_req = req;
172  spin_unlock(&fc->lock);
173  fput(file);
174 }
175 
176 /*
177  * Gets a requests for a file operation, always succeeds
178  *
179  * This is used for sending the FLUSH request, which must get to
180  * userspace, due to POSIX locks which may need to be unlocked.
181  *
182  * If allocation fails due to OOM, use the reserved request in
183  * fuse_file.
184  *
185  * This is very unlikely to deadlock accidentally, since the
186  * filesystem should not have it's own file open. If deadlock is
187  * intentional, it can still be broken by "aborting" the filesystem.
188  */
189 struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
190 {
191  struct fuse_req *req;
192 
193  atomic_inc(&fc->num_waiting);
194  wait_event(fc->blocked_waitq, !fc->blocked);
195  req = fuse_request_alloc();
196  if (!req)
197  req = get_reserved_req(fc, file);
198 
199  fuse_req_init_context(req);
200  req->waiting = 1;
201  return req;
202 }
203 
204 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
205 {
206  if (atomic_dec_and_test(&req->count)) {
207  if (req->waiting)
208  atomic_dec(&fc->num_waiting);
209 
210  if (req->stolen_file)
211  put_reserved_req(fc, req);
212  else
213  fuse_request_free(req);
214  }
215 }
217 
218 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
219 {
220  unsigned nbytes = 0;
221  unsigned i;
222 
223  for (i = 0; i < numargs; i++)
224  nbytes += args[i].size;
225 
226  return nbytes;
227 }
228 
229 static u64 fuse_get_unique(struct fuse_conn *fc)
230 {
231  fc->reqctr++;
232  /* zero is special */
233  if (fc->reqctr == 0)
234  fc->reqctr = 1;
235 
236  return fc->reqctr;
237 }
238 
239 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
240 {
241  req->in.h.len = sizeof(struct fuse_in_header) +
242  len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
243  list_add_tail(&req->list, &fc->pending);
244  req->state = FUSE_REQ_PENDING;
245  if (!req->waiting) {
246  req->waiting = 1;
247  atomic_inc(&fc->num_waiting);
248  }
249  wake_up(&fc->waitq);
251 }
252 
253 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
254  u64 nodeid, u64 nlookup)
255 {
256  forget->forget_one.nodeid = nodeid;
257  forget->forget_one.nlookup = nlookup;
258 
259  spin_lock(&fc->lock);
260  if (fc->connected) {
261  fc->forget_list_tail->next = forget;
262  fc->forget_list_tail = forget;
263  wake_up(&fc->waitq);
265  } else {
266  kfree(forget);
267  }
268  spin_unlock(&fc->lock);
269 }
270 
271 static void flush_bg_queue(struct fuse_conn *fc)
272 {
273  while (fc->active_background < fc->max_background &&
274  !list_empty(&fc->bg_queue)) {
275  struct fuse_req *req;
276 
277  req = list_entry(fc->bg_queue.next, struct fuse_req, list);
278  list_del(&req->list);
279  fc->active_background++;
280  req->in.h.unique = fuse_get_unique(fc);
281  queue_request(fc, req);
282  }
283 }
284 
285 /*
286  * This function is called when a request is finished. Either a reply
287  * has arrived or it was aborted (and not yet sent) or some error
288  * occurred during communication with userspace, or the device file
289  * was closed. The requester thread is woken up (if still waiting),
290  * the 'end' callback is called if given, else the reference to the
291  * request is released
292  *
293  * Called with fc->lock, unlocks it
294  */
295 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
296 __releases(fc->lock)
297 {
298  void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
299  req->end = NULL;
300  list_del(&req->list);
301  list_del(&req->intr_entry);
302  req->state = FUSE_REQ_FINISHED;
303  if (req->background) {
304  if (fc->num_background == fc->max_background) {
305  fc->blocked = 0;
307  }
308  if (fc->num_background == fc->congestion_threshold &&
309  fc->connected && fc->bdi_initialized) {
312  }
313  fc->num_background--;
314  fc->active_background--;
315  flush_bg_queue(fc);
316  }
317  spin_unlock(&fc->lock);
318  wake_up(&req->waitq);
319  if (end)
320  end(fc, req);
321  fuse_put_request(fc, req);
322 }
323 
324 static void wait_answer_interruptible(struct fuse_conn *fc,
325  struct fuse_req *req)
326 __releases(fc->lock)
327 __acquires(fc->lock)
328 {
329  if (signal_pending(current))
330  return;
331 
332  spin_unlock(&fc->lock);
334  spin_lock(&fc->lock);
335 }
336 
337 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
338 {
339  list_add_tail(&req->intr_entry, &fc->interrupts);
340  wake_up(&fc->waitq);
342 }
343 
344 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
345 __releases(fc->lock)
346 __acquires(fc->lock)
347 {
348  if (!fc->no_interrupt) {
349  /* Any signal may interrupt this */
350  wait_answer_interruptible(fc, req);
351 
352  if (req->aborted)
353  goto aborted;
354  if (req->state == FUSE_REQ_FINISHED)
355  return;
356 
357  req->interrupted = 1;
358  if (req->state == FUSE_REQ_SENT)
359  queue_interrupt(fc, req);
360  }
361 
362  if (!req->force) {
363  sigset_t oldset;
364 
365  /* Only fatal signals may interrupt this */
366  block_sigs(&oldset);
367  wait_answer_interruptible(fc, req);
368  restore_sigs(&oldset);
369 
370  if (req->aborted)
371  goto aborted;
372  if (req->state == FUSE_REQ_FINISHED)
373  return;
374 
375  /* Request is not yet in userspace, bail out */
376  if (req->state == FUSE_REQ_PENDING) {
377  list_del(&req->list);
378  __fuse_put_request(req);
379  req->out.h.error = -EINTR;
380  return;
381  }
382  }
383 
384  /*
385  * Either request is already in userspace, or it was forced.
386  * Wait it out.
387  */
388  spin_unlock(&fc->lock);
389  wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
390  spin_lock(&fc->lock);
391 
392  if (!req->aborted)
393  return;
394 
395  aborted:
396  BUG_ON(req->state != FUSE_REQ_FINISHED);
397  if (req->locked) {
398  /* This is uninterruptible sleep, because data is
399  being copied to/from the buffers of req. During
400  locked state, there mustn't be any filesystem
401  operation (e.g. page fault), since that could lead
402  to deadlock */
403  spin_unlock(&fc->lock);
404  wait_event(req->waitq, !req->locked);
405  spin_lock(&fc->lock);
406  }
407 }
408 
409 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
410 {
411  req->isreply = 1;
412  spin_lock(&fc->lock);
413  if (!fc->connected)
414  req->out.h.error = -ENOTCONN;
415  else if (fc->conn_error)
416  req->out.h.error = -ECONNREFUSED;
417  else {
418  req->in.h.unique = fuse_get_unique(fc);
419  queue_request(fc, req);
420  /* acquire extra reference, since request is still needed
421  after request_end() */
422  __fuse_get_request(req);
423 
424  request_wait_answer(fc, req);
425  }
426  spin_unlock(&fc->lock);
427 }
429 
430 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
431  struct fuse_req *req)
432 {
433  req->background = 1;
434  fc->num_background++;
435  if (fc->num_background == fc->max_background)
436  fc->blocked = 1;
437  if (fc->num_background == fc->congestion_threshold &&
438  fc->bdi_initialized) {
441  }
442  list_add_tail(&req->list, &fc->bg_queue);
443  flush_bg_queue(fc);
444 }
445 
446 static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
447 {
448  spin_lock(&fc->lock);
449  if (fc->connected) {
450  fuse_request_send_nowait_locked(fc, req);
451  spin_unlock(&fc->lock);
452  } else {
453  req->out.h.error = -ENOTCONN;
454  request_end(fc, req);
455  }
456 }
457 
458 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
459 {
460  req->isreply = 1;
461  fuse_request_send_nowait(fc, req);
462 }
464 
465 static int fuse_request_send_notify_reply(struct fuse_conn *fc,
466  struct fuse_req *req, u64 unique)
467 {
468  int err = -ENODEV;
469 
470  req->isreply = 0;
471  req->in.h.unique = unique;
472  spin_lock(&fc->lock);
473  if (fc->connected) {
474  queue_request(fc, req);
475  err = 0;
476  }
477  spin_unlock(&fc->lock);
478 
479  return err;
480 }
481 
482 /*
483  * Called under fc->lock
484  *
485  * fc->connected must have been checked previously
486  */
488  struct fuse_req *req)
489 {
490  req->isreply = 1;
491  fuse_request_send_nowait_locked(fc, req);
492 }
493 
494 /*
495  * Lock the request. Up to the next unlock_request() there mustn't be
496  * anything that could cause a page-fault. If the request was already
497  * aborted bail out.
498  */
499 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
500 {
501  int err = 0;
502  if (req) {
503  spin_lock(&fc->lock);
504  if (req->aborted)
505  err = -ENOENT;
506  else
507  req->locked = 1;
508  spin_unlock(&fc->lock);
509  }
510  return err;
511 }
512 
513 /*
514  * Unlock request. If it was aborted during being locked, the
515  * requester thread is currently waiting for it to be unlocked, so
516  * wake it up.
517  */
518 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
519 {
520  if (req) {
521  spin_lock(&fc->lock);
522  req->locked = 0;
523  if (req->aborted)
524  wake_up(&req->waitq);
525  spin_unlock(&fc->lock);
526  }
527 }
528 
530  struct fuse_conn *fc;
531  int write;
532  struct fuse_req *req;
533  const struct iovec *iov;
537  unsigned long nr_segs;
538  unsigned long seglen;
539  unsigned long addr;
540  struct page *pg;
541  void *mapaddr;
542  void *buf;
543  unsigned len;
544  unsigned move_pages:1;
545 };
546 
547 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
548  int write,
549  const struct iovec *iov, unsigned long nr_segs)
550 {
551  memset(cs, 0, sizeof(*cs));
552  cs->fc = fc;
553  cs->write = write;
554  cs->iov = iov;
555  cs->nr_segs = nr_segs;
556 }
557 
558 /* Unmap and put previous page of userspace buffer */
559 static void fuse_copy_finish(struct fuse_copy_state *cs)
560 {
561  if (cs->currbuf) {
562  struct pipe_buffer *buf = cs->currbuf;
563 
564  if (!cs->write) {
565  buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
566  } else {
567  kunmap(buf->page);
568  buf->len = PAGE_SIZE - cs->len;
569  }
570  cs->currbuf = NULL;
571  cs->mapaddr = NULL;
572  } else if (cs->mapaddr) {
573  kunmap(cs->pg);
574  if (cs->write) {
575  flush_dcache_page(cs->pg);
576  set_page_dirty_lock(cs->pg);
577  }
578  put_page(cs->pg);
579  cs->mapaddr = NULL;
580  }
581 }
582 
583 /*
584  * Get another pagefull of userspace buffer, and map it to kernel
585  * address space, and lock request
586  */
587 static int fuse_copy_fill(struct fuse_copy_state *cs)
588 {
589  unsigned long offset;
590  int err;
591 
592  unlock_request(cs->fc, cs->req);
593  fuse_copy_finish(cs);
594  if (cs->pipebufs) {
595  struct pipe_buffer *buf = cs->pipebufs;
596 
597  if (!cs->write) {
598  err = buf->ops->confirm(cs->pipe, buf);
599  if (err)
600  return err;
601 
602  BUG_ON(!cs->nr_segs);
603  cs->currbuf = buf;
604  cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
605  cs->len = buf->len;
606  cs->buf = cs->mapaddr + buf->offset;
607  cs->pipebufs++;
608  cs->nr_segs--;
609  } else {
610  struct page *page;
611 
612  if (cs->nr_segs == cs->pipe->buffers)
613  return -EIO;
614 
615  page = alloc_page(GFP_HIGHUSER);
616  if (!page)
617  return -ENOMEM;
618 
619  buf->page = page;
620  buf->offset = 0;
621  buf->len = 0;
622 
623  cs->currbuf = buf;
624  cs->mapaddr = kmap(page);
625  cs->buf = cs->mapaddr;
626  cs->len = PAGE_SIZE;
627  cs->pipebufs++;
628  cs->nr_segs++;
629  }
630  } else {
631  if (!cs->seglen) {
632  BUG_ON(!cs->nr_segs);
633  cs->seglen = cs->iov[0].iov_len;
634  cs->addr = (unsigned long) cs->iov[0].iov_base;
635  cs->iov++;
636  cs->nr_segs--;
637  }
638  err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
639  if (err < 0)
640  return err;
641  BUG_ON(err != 1);
642  offset = cs->addr % PAGE_SIZE;
643  cs->mapaddr = kmap(cs->pg);
644  cs->buf = cs->mapaddr + offset;
645  cs->len = min(PAGE_SIZE - offset, cs->seglen);
646  cs->seglen -= cs->len;
647  cs->addr += cs->len;
648  }
649 
650  return lock_request(cs->fc, cs->req);
651 }
652 
653 /* Do as much copy to/from userspace buffer as we can */
654 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
655 {
656  unsigned ncpy = min(*size, cs->len);
657  if (val) {
658  if (cs->write)
659  memcpy(cs->buf, *val, ncpy);
660  else
661  memcpy(*val, cs->buf, ncpy);
662  *val += ncpy;
663  }
664  *size -= ncpy;
665  cs->len -= ncpy;
666  cs->buf += ncpy;
667  return ncpy;
668 }
669 
670 static int fuse_check_page(struct page *page)
671 {
672  if (page_mapcount(page) ||
673  page->mapping != NULL ||
674  page_count(page) != 1 ||
676  ~(1 << PG_locked |
677  1 << PG_referenced |
678  1 << PG_uptodate |
679  1 << PG_lru |
680  1 << PG_active |
681  1 << PG_reclaim))) {
682  printk(KERN_WARNING "fuse: trying to steal weird page\n");
683  printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
684  return 1;
685  }
686  return 0;
687 }
688 
689 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
690 {
691  int err;
692  struct page *oldpage = *pagep;
693  struct page *newpage;
694  struct pipe_buffer *buf = cs->pipebufs;
695  struct address_space *mapping;
696  pgoff_t index;
697 
698  unlock_request(cs->fc, cs->req);
699  fuse_copy_finish(cs);
700 
701  err = buf->ops->confirm(cs->pipe, buf);
702  if (err)
703  return err;
704 
705  BUG_ON(!cs->nr_segs);
706  cs->currbuf = buf;
707  cs->len = buf->len;
708  cs->pipebufs++;
709  cs->nr_segs--;
710 
711  if (cs->len != PAGE_SIZE)
712  goto out_fallback;
713 
714  if (buf->ops->steal(cs->pipe, buf) != 0)
715  goto out_fallback;
716 
717  newpage = buf->page;
718 
719  if (WARN_ON(!PageUptodate(newpage)))
720  return -EIO;
721 
722  ClearPageMappedToDisk(newpage);
723 
724  if (fuse_check_page(newpage) != 0)
725  goto out_fallback_unlock;
726 
727  mapping = oldpage->mapping;
728  index = oldpage->index;
729 
730  /*
731  * This is a new and locked page, it shouldn't be mapped or
732  * have any special flags on it
733  */
734  if (WARN_ON(page_mapped(oldpage)))
735  goto out_fallback_unlock;
736  if (WARN_ON(page_has_private(oldpage)))
737  goto out_fallback_unlock;
738  if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
739  goto out_fallback_unlock;
740  if (WARN_ON(PageMlocked(oldpage)))
741  goto out_fallback_unlock;
742 
743  err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
744  if (err) {
745  unlock_page(newpage);
746  return err;
747  }
748 
749  page_cache_get(newpage);
750 
751  if (!(buf->flags & PIPE_BUF_FLAG_LRU))
752  lru_cache_add_file(newpage);
753 
754  err = 0;
755  spin_lock(&cs->fc->lock);
756  if (cs->req->aborted)
757  err = -ENOENT;
758  else
759  *pagep = newpage;
760  spin_unlock(&cs->fc->lock);
761 
762  if (err) {
763  unlock_page(newpage);
764  page_cache_release(newpage);
765  return err;
766  }
767 
768  unlock_page(oldpage);
769  page_cache_release(oldpage);
770  cs->len = 0;
771 
772  return 0;
773 
774 out_fallback_unlock:
775  unlock_page(newpage);
776 out_fallback:
777  cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
778  cs->buf = cs->mapaddr + buf->offset;
779 
780  err = lock_request(cs->fc, cs->req);
781  if (err)
782  return err;
783 
784  return 1;
785 }
786 
787 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
788  unsigned offset, unsigned count)
789 {
790  struct pipe_buffer *buf;
791 
792  if (cs->nr_segs == cs->pipe->buffers)
793  return -EIO;
794 
795  unlock_request(cs->fc, cs->req);
796  fuse_copy_finish(cs);
797 
798  buf = cs->pipebufs;
799  page_cache_get(page);
800  buf->page = page;
801  buf->offset = offset;
802  buf->len = count;
803 
804  cs->pipebufs++;
805  cs->nr_segs++;
806  cs->len = 0;
807 
808  return 0;
809 }
810 
811 /*
812  * Copy a page in the request to/from the userspace buffer. Must be
813  * done atomically
814  */
815 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
816  unsigned offset, unsigned count, int zeroing)
817 {
818  int err;
819  struct page *page = *pagep;
820 
821  if (page && zeroing && count < PAGE_SIZE)
822  clear_highpage(page);
823 
824  while (count) {
825  if (cs->write && cs->pipebufs && page) {
826  return fuse_ref_page(cs, page, offset, count);
827  } else if (!cs->len) {
828  if (cs->move_pages && page &&
829  offset == 0 && count == PAGE_SIZE) {
830  err = fuse_try_move_page(cs, pagep);
831  if (err <= 0)
832  return err;
833  } else {
834  err = fuse_copy_fill(cs);
835  if (err)
836  return err;
837  }
838  }
839  if (page) {
840  void *mapaddr = kmap_atomic(page);
841  void *buf = mapaddr + offset;
842  offset += fuse_copy_do(cs, &buf, &count);
843  kunmap_atomic(mapaddr);
844  } else
845  offset += fuse_copy_do(cs, NULL, &count);
846  }
847  if (page && !cs->write)
848  flush_dcache_page(page);
849  return 0;
850 }
851 
852 /* Copy pages in the request to/from userspace buffer */
853 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
854  int zeroing)
855 {
856  unsigned i;
857  struct fuse_req *req = cs->req;
858  unsigned offset = req->page_offset;
859  unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
860 
861  for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
862  int err;
863 
864  err = fuse_copy_page(cs, &req->pages[i], offset, count,
865  zeroing);
866  if (err)
867  return err;
868 
869  nbytes -= count;
870  count = min(nbytes, (unsigned) PAGE_SIZE);
871  offset = 0;
872  }
873  return 0;
874 }
875 
876 /* Copy a single argument in the request to/from userspace buffer */
877 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
878 {
879  while (size) {
880  if (!cs->len) {
881  int err = fuse_copy_fill(cs);
882  if (err)
883  return err;
884  }
885  fuse_copy_do(cs, &val, &size);
886  }
887  return 0;
888 }
889 
890 /* Copy request arguments to/from userspace buffer */
891 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
892  unsigned argpages, struct fuse_arg *args,
893  int zeroing)
894 {
895  int err = 0;
896  unsigned i;
897 
898  for (i = 0; !err && i < numargs; i++) {
899  struct fuse_arg *arg = &args[i];
900  if (i == numargs - 1 && argpages)
901  err = fuse_copy_pages(cs, arg->size, zeroing);
902  else
903  err = fuse_copy_one(cs, arg->value, arg->size);
904  }
905  return err;
906 }
907 
908 static int forget_pending(struct fuse_conn *fc)
909 {
910  return fc->forget_list_head.next != NULL;
911 }
912 
913 static int request_pending(struct fuse_conn *fc)
914 {
915  return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
916  forget_pending(fc);
917 }
918 
919 /* Wait until a request is available on the pending list */
920 static void request_wait(struct fuse_conn *fc)
921 __releases(fc->lock)
922 __acquires(fc->lock)
923 {
925 
927  while (fc->connected && !request_pending(fc)) {
929  if (signal_pending(current))
930  break;
931 
932  spin_unlock(&fc->lock);
933  schedule();
934  spin_lock(&fc->lock);
935  }
937  remove_wait_queue(&fc->waitq, &wait);
938 }
939 
940 /*
941  * Transfer an interrupt request to userspace
942  *
943  * Unlike other requests this is assembled on demand, without a need
944  * to allocate a separate fuse_req structure.
945  *
946  * Called with fc->lock held, releases it
947  */
948 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
949  size_t nbytes, struct fuse_req *req)
950 __releases(fc->lock)
951 {
952  struct fuse_in_header ih;
953  struct fuse_interrupt_in arg;
954  unsigned reqsize = sizeof(ih) + sizeof(arg);
955  int err;
956 
957  list_del_init(&req->intr_entry);
958  req->intr_unique = fuse_get_unique(fc);
959  memset(&ih, 0, sizeof(ih));
960  memset(&arg, 0, sizeof(arg));
961  ih.len = reqsize;
962  ih.opcode = FUSE_INTERRUPT;
963  ih.unique = req->intr_unique;
964  arg.unique = req->in.h.unique;
965 
966  spin_unlock(&fc->lock);
967  if (nbytes < reqsize)
968  return -EINVAL;
969 
970  err = fuse_copy_one(cs, &ih, sizeof(ih));
971  if (!err)
972  err = fuse_copy_one(cs, &arg, sizeof(arg));
973  fuse_copy_finish(cs);
974 
975  return err ? err : reqsize;
976 }
977 
978 static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
979  unsigned max,
980  unsigned *countp)
981 {
982  struct fuse_forget_link *head = fc->forget_list_head.next;
983  struct fuse_forget_link **newhead = &head;
984  unsigned count;
985 
986  for (count = 0; *newhead != NULL && count < max; count++)
987  newhead = &(*newhead)->next;
988 
989  fc->forget_list_head.next = *newhead;
990  *newhead = NULL;
991  if (fc->forget_list_head.next == NULL)
993 
994  if (countp != NULL)
995  *countp = count;
996 
997  return head;
998 }
999 
1000 static int fuse_read_single_forget(struct fuse_conn *fc,
1001  struct fuse_copy_state *cs,
1002  size_t nbytes)
1003 __releases(fc->lock)
1004 {
1005  int err;
1006  struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1007  struct fuse_forget_in arg = {
1008  .nlookup = forget->forget_one.nlookup,
1009  };
1010  struct fuse_in_header ih = {
1011  .opcode = FUSE_FORGET,
1012  .nodeid = forget->forget_one.nodeid,
1013  .unique = fuse_get_unique(fc),
1014  .len = sizeof(ih) + sizeof(arg),
1015  };
1016 
1017  spin_unlock(&fc->lock);
1018  kfree(forget);
1019  if (nbytes < ih.len)
1020  return -EINVAL;
1021 
1022  err = fuse_copy_one(cs, &ih, sizeof(ih));
1023  if (!err)
1024  err = fuse_copy_one(cs, &arg, sizeof(arg));
1025  fuse_copy_finish(cs);
1026 
1027  if (err)
1028  return err;
1029 
1030  return ih.len;
1031 }
1032 
1033 static int fuse_read_batch_forget(struct fuse_conn *fc,
1034  struct fuse_copy_state *cs, size_t nbytes)
1035 __releases(fc->lock)
1036 {
1037  int err;
1038  unsigned max_forgets;
1039  unsigned count;
1040  struct fuse_forget_link *head;
1041  struct fuse_batch_forget_in arg = { .count = 0 };
1042  struct fuse_in_header ih = {
1044  .unique = fuse_get_unique(fc),
1045  .len = sizeof(ih) + sizeof(arg),
1046  };
1047 
1048  if (nbytes < ih.len) {
1049  spin_unlock(&fc->lock);
1050  return -EINVAL;
1051  }
1052 
1053  max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1054  head = dequeue_forget(fc, max_forgets, &count);
1055  spin_unlock(&fc->lock);
1056 
1057  arg.count = count;
1058  ih.len += count * sizeof(struct fuse_forget_one);
1059  err = fuse_copy_one(cs, &ih, sizeof(ih));
1060  if (!err)
1061  err = fuse_copy_one(cs, &arg, sizeof(arg));
1062 
1063  while (head) {
1064  struct fuse_forget_link *forget = head;
1065 
1066  if (!err) {
1067  err = fuse_copy_one(cs, &forget->forget_one,
1068  sizeof(forget->forget_one));
1069  }
1070  head = forget->next;
1071  kfree(forget);
1072  }
1073 
1074  fuse_copy_finish(cs);
1075 
1076  if (err)
1077  return err;
1078 
1079  return ih.len;
1080 }
1081 
1082 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1083  size_t nbytes)
1084 __releases(fc->lock)
1085 {
1086  if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1087  return fuse_read_single_forget(fc, cs, nbytes);
1088  else
1089  return fuse_read_batch_forget(fc, cs, nbytes);
1090 }
1091 
1092 /*
1093  * Read a single request into the userspace filesystem's buffer. This
1094  * function waits until a request is available, then removes it from
1095  * the pending list and copies request data to userspace buffer. If
1096  * no reply is needed (FORGET) or request has been aborted or there
1097  * was an error during the copying then it's finished by calling
1098  * request_end(). Otherwise add it to the processing list, and set
1099  * the 'sent' flag.
1100  */
1101 static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1102  struct fuse_copy_state *cs, size_t nbytes)
1103 {
1104  int err;
1105  struct fuse_req *req;
1106  struct fuse_in *in;
1107  unsigned reqsize;
1108 
1109  restart:
1110  spin_lock(&fc->lock);
1111  err = -EAGAIN;
1112  if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1113  !request_pending(fc))
1114  goto err_unlock;
1115 
1116  request_wait(fc);
1117  err = -ENODEV;
1118  if (!fc->connected)
1119  goto err_unlock;
1120  err = -ERESTARTSYS;
1121  if (!request_pending(fc))
1122  goto err_unlock;
1123 
1124  if (!list_empty(&fc->interrupts)) {
1125  req = list_entry(fc->interrupts.next, struct fuse_req,
1126  intr_entry);
1127  return fuse_read_interrupt(fc, cs, nbytes, req);
1128  }
1129 
1130  if (forget_pending(fc)) {
1131  if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1132  return fuse_read_forget(fc, cs, nbytes);
1133 
1134  if (fc->forget_batch <= -8)
1135  fc->forget_batch = 16;
1136  }
1137 
1138  req = list_entry(fc->pending.next, struct fuse_req, list);
1139  req->state = FUSE_REQ_READING;
1140  list_move(&req->list, &fc->io);
1141 
1142  in = &req->in;
1143  reqsize = in->h.len;
1144  /* If request is too large, reply with an error and restart the read */
1145  if (nbytes < reqsize) {
1146  req->out.h.error = -EIO;
1147  /* SETXATTR is special, since it may contain too large data */
1148  if (in->h.opcode == FUSE_SETXATTR)
1149  req->out.h.error = -E2BIG;
1150  request_end(fc, req);
1151  goto restart;
1152  }
1153  spin_unlock(&fc->lock);
1154  cs->req = req;
1155  err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1156  if (!err)
1157  err = fuse_copy_args(cs, in->numargs, in->argpages,
1158  (struct fuse_arg *) in->args, 0);
1159  fuse_copy_finish(cs);
1160  spin_lock(&fc->lock);
1161  req->locked = 0;
1162  if (req->aborted) {
1163  request_end(fc, req);
1164  return -ENODEV;
1165  }
1166  if (err) {
1167  req->out.h.error = -EIO;
1168  request_end(fc, req);
1169  return err;
1170  }
1171  if (!req->isreply)
1172  request_end(fc, req);
1173  else {
1174  req->state = FUSE_REQ_SENT;
1175  list_move_tail(&req->list, &fc->processing);
1176  if (req->interrupted)
1177  queue_interrupt(fc, req);
1178  spin_unlock(&fc->lock);
1179  }
1180  return reqsize;
1181 
1182  err_unlock:
1183  spin_unlock(&fc->lock);
1184  return err;
1185 }
1186 
1187 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1188  unsigned long nr_segs, loff_t pos)
1189 {
1190  struct fuse_copy_state cs;
1191  struct file *file = iocb->ki_filp;
1192  struct fuse_conn *fc = fuse_get_conn(file);
1193  if (!fc)
1194  return -EPERM;
1195 
1196  fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1197 
1198  return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1199 }
1200 
1201 static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1202  struct pipe_buffer *buf)
1203 {
1204  return 1;
1205 }
1206 
1207 static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1208  .can_merge = 0,
1209  .map = generic_pipe_buf_map,
1210  .unmap = generic_pipe_buf_unmap,
1211  .confirm = generic_pipe_buf_confirm,
1212  .release = generic_pipe_buf_release,
1213  .steal = fuse_dev_pipe_buf_steal,
1214  .get = generic_pipe_buf_get,
1215 };
1216 
1217 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1218  struct pipe_inode_info *pipe,
1219  size_t len, unsigned int flags)
1220 {
1221  int ret;
1222  int page_nr = 0;
1223  int do_wakeup = 0;
1224  struct pipe_buffer *bufs;
1225  struct fuse_copy_state cs;
1226  struct fuse_conn *fc = fuse_get_conn(in);
1227  if (!fc)
1228  return -EPERM;
1229 
1230  bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1231  if (!bufs)
1232  return -ENOMEM;
1233 
1234  fuse_copy_init(&cs, fc, 1, NULL, 0);
1235  cs.pipebufs = bufs;
1236  cs.pipe = pipe;
1237  ret = fuse_dev_do_read(fc, in, &cs, len);
1238  if (ret < 0)
1239  goto out;
1240 
1241  ret = 0;
1242  pipe_lock(pipe);
1243 
1244  if (!pipe->readers) {
1245  send_sig(SIGPIPE, current, 0);
1246  if (!ret)
1247  ret = -EPIPE;
1248  goto out_unlock;
1249  }
1250 
1251  if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1252  ret = -EIO;
1253  goto out_unlock;
1254  }
1255 
1256  while (page_nr < cs.nr_segs) {
1257  int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1258  struct pipe_buffer *buf = pipe->bufs + newbuf;
1259 
1260  buf->page = bufs[page_nr].page;
1261  buf->offset = bufs[page_nr].offset;
1262  buf->len = bufs[page_nr].len;
1263  buf->ops = &fuse_dev_pipe_buf_ops;
1264 
1265  pipe->nrbufs++;
1266  page_nr++;
1267  ret += buf->len;
1268 
1269  if (pipe->inode)
1270  do_wakeup = 1;
1271  }
1272 
1273 out_unlock:
1274  pipe_unlock(pipe);
1275 
1276  if (do_wakeup) {
1277  smp_mb();
1278  if (waitqueue_active(&pipe->wait))
1279  wake_up_interruptible(&pipe->wait);
1281  }
1282 
1283 out:
1284  for (; page_nr < cs.nr_segs; page_nr++)
1285  page_cache_release(bufs[page_nr].page);
1286 
1287  kfree(bufs);
1288  return ret;
1289 }
1290 
1291 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1292  struct fuse_copy_state *cs)
1293 {
1294  struct fuse_notify_poll_wakeup_out outarg;
1295  int err = -EINVAL;
1296 
1297  if (size != sizeof(outarg))
1298  goto err;
1299 
1300  err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1301  if (err)
1302  goto err;
1303 
1304  fuse_copy_finish(cs);
1305  return fuse_notify_poll_wakeup(fc, &outarg);
1306 
1307 err:
1308  fuse_copy_finish(cs);
1309  return err;
1310 }
1311 
1312 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1313  struct fuse_copy_state *cs)
1314 {
1315  struct fuse_notify_inval_inode_out outarg;
1316  int err = -EINVAL;
1317 
1318  if (size != sizeof(outarg))
1319  goto err;
1320 
1321  err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1322  if (err)
1323  goto err;
1324  fuse_copy_finish(cs);
1325 
1326  down_read(&fc->killsb);
1327  err = -ENOENT;
1328  if (fc->sb) {
1329  err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1330  outarg.off, outarg.len);
1331  }
1332  up_read(&fc->killsb);
1333  return err;
1334 
1335 err:
1336  fuse_copy_finish(cs);
1337  return err;
1338 }
1339 
1340 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1341  struct fuse_copy_state *cs)
1342 {
1343  struct fuse_notify_inval_entry_out outarg;
1344  int err = -ENOMEM;
1345  char *buf;
1346  struct qstr name;
1347 
1348  buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1349  if (!buf)
1350  goto err;
1351 
1352  err = -EINVAL;
1353  if (size < sizeof(outarg))
1354  goto err;
1355 
1356  err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1357  if (err)
1358  goto err;
1359 
1360  err = -ENAMETOOLONG;
1361  if (outarg.namelen > FUSE_NAME_MAX)
1362  goto err;
1363 
1364  err = -EINVAL;
1365  if (size != sizeof(outarg) + outarg.namelen + 1)
1366  goto err;
1367 
1368  name.name = buf;
1369  name.len = outarg.namelen;
1370  err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1371  if (err)
1372  goto err;
1373  fuse_copy_finish(cs);
1374  buf[outarg.namelen] = 0;
1375  name.hash = full_name_hash(name.name, name.len);
1376 
1377  down_read(&fc->killsb);
1378  err = -ENOENT;
1379  if (fc->sb)
1380  err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1381  up_read(&fc->killsb);
1382  kfree(buf);
1383  return err;
1384 
1385 err:
1386  kfree(buf);
1387  fuse_copy_finish(cs);
1388  return err;
1389 }
1390 
1391 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1392  struct fuse_copy_state *cs)
1393 {
1394  struct fuse_notify_delete_out outarg;
1395  int err = -ENOMEM;
1396  char *buf;
1397  struct qstr name;
1398 
1399  buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1400  if (!buf)
1401  goto err;
1402 
1403  err = -EINVAL;
1404  if (size < sizeof(outarg))
1405  goto err;
1406 
1407  err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1408  if (err)
1409  goto err;
1410 
1411  err = -ENAMETOOLONG;
1412  if (outarg.namelen > FUSE_NAME_MAX)
1413  goto err;
1414 
1415  err = -EINVAL;
1416  if (size != sizeof(outarg) + outarg.namelen + 1)
1417  goto err;
1418 
1419  name.name = buf;
1420  name.len = outarg.namelen;
1421  err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1422  if (err)
1423  goto err;
1424  fuse_copy_finish(cs);
1425  buf[outarg.namelen] = 0;
1426  name.hash = full_name_hash(name.name, name.len);
1427 
1428  down_read(&fc->killsb);
1429  err = -ENOENT;
1430  if (fc->sb)
1431  err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1432  outarg.child, &name);
1433  up_read(&fc->killsb);
1434  kfree(buf);
1435  return err;
1436 
1437 err:
1438  kfree(buf);
1439  fuse_copy_finish(cs);
1440  return err;
1441 }
1442 
1443 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1444  struct fuse_copy_state *cs)
1445 {
1446  struct fuse_notify_store_out outarg;
1447  struct inode *inode;
1448  struct address_space *mapping;
1449  u64 nodeid;
1450  int err;
1451  pgoff_t index;
1452  unsigned int offset;
1453  unsigned int num;
1454  loff_t file_size;
1455  loff_t end;
1456 
1457  err = -EINVAL;
1458  if (size < sizeof(outarg))
1459  goto out_finish;
1460 
1461  err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1462  if (err)
1463  goto out_finish;
1464 
1465  err = -EINVAL;
1466  if (size - sizeof(outarg) != outarg.size)
1467  goto out_finish;
1468 
1469  nodeid = outarg.nodeid;
1470 
1471  down_read(&fc->killsb);
1472 
1473  err = -ENOENT;
1474  if (!fc->sb)
1475  goto out_up_killsb;
1476 
1477  inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1478  if (!inode)
1479  goto out_up_killsb;
1480 
1481  mapping = inode->i_mapping;
1482  index = outarg.offset >> PAGE_CACHE_SHIFT;
1483  offset = outarg.offset & ~PAGE_CACHE_MASK;
1484  file_size = i_size_read(inode);
1485  end = outarg.offset + outarg.size;
1486  if (end > file_size) {
1487  file_size = end;
1488  fuse_write_update_size(inode, file_size);
1489  }
1490 
1491  num = outarg.size;
1492  while (num) {
1493  struct page *page;
1494  unsigned int this_num;
1495 
1496  err = -ENOMEM;
1497  page = find_or_create_page(mapping, index,
1498  mapping_gfp_mask(mapping));
1499  if (!page)
1500  goto out_iput;
1501 
1502  this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1503  err = fuse_copy_page(cs, &page, offset, this_num, 0);
1504  if (!err && offset == 0 && (num != 0 || file_size == end))
1505  SetPageUptodate(page);
1506  unlock_page(page);
1507  page_cache_release(page);
1508 
1509  if (err)
1510  goto out_iput;
1511 
1512  num -= this_num;
1513  offset = 0;
1514  index++;
1515  }
1516 
1517  err = 0;
1518 
1519 out_iput:
1520  iput(inode);
1521 out_up_killsb:
1522  up_read(&fc->killsb);
1523 out_finish:
1524  fuse_copy_finish(cs);
1525  return err;
1526 }
1527 
1528 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1529 {
1530  release_pages(req->pages, req->num_pages, 0);
1531 }
1532 
1533 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1534  struct fuse_notify_retrieve_out *outarg)
1535 {
1536  int err;
1537  struct address_space *mapping = inode->i_mapping;
1538  struct fuse_req *req;
1539  pgoff_t index;
1540  loff_t file_size;
1541  unsigned int num;
1542  unsigned int offset;
1543  size_t total_len = 0;
1544 
1545  req = fuse_get_req(fc);
1546  if (IS_ERR(req))
1547  return PTR_ERR(req);
1548 
1549  offset = outarg->offset & ~PAGE_CACHE_MASK;
1550 
1551  req->in.h.opcode = FUSE_NOTIFY_REPLY;
1552  req->in.h.nodeid = outarg->nodeid;
1553  req->in.numargs = 2;
1554  req->in.argpages = 1;
1555  req->page_offset = offset;
1556  req->end = fuse_retrieve_end;
1557 
1558  index = outarg->offset >> PAGE_CACHE_SHIFT;
1559  file_size = i_size_read(inode);
1560  num = outarg->size;
1561  if (outarg->offset > file_size)
1562  num = 0;
1563  else if (outarg->offset + num > file_size)
1564  num = file_size - outarg->offset;
1565 
1566  while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1567  struct page *page;
1568  unsigned int this_num;
1569 
1570  page = find_get_page(mapping, index);
1571  if (!page)
1572  break;
1573 
1574  this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1575  req->pages[req->num_pages] = page;
1576  req->num_pages++;
1577 
1578  offset = 0;
1579  num -= this_num;
1580  total_len += this_num;
1581  index++;
1582  }
1583  req->misc.retrieve_in.offset = outarg->offset;
1584  req->misc.retrieve_in.size = total_len;
1585  req->in.args[0].size = sizeof(req->misc.retrieve_in);
1586  req->in.args[0].value = &req->misc.retrieve_in;
1587  req->in.args[1].size = total_len;
1588 
1589  err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1590  if (err)
1591  fuse_retrieve_end(fc, req);
1592 
1593  return err;
1594 }
1595 
1596 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1597  struct fuse_copy_state *cs)
1598 {
1599  struct fuse_notify_retrieve_out outarg;
1600  struct inode *inode;
1601  int err;
1602 
1603  err = -EINVAL;
1604  if (size != sizeof(outarg))
1605  goto copy_finish;
1606 
1607  err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1608  if (err)
1609  goto copy_finish;
1610 
1611  fuse_copy_finish(cs);
1612 
1613  down_read(&fc->killsb);
1614  err = -ENOENT;
1615  if (fc->sb) {
1616  u64 nodeid = outarg.nodeid;
1617 
1618  inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1619  if (inode) {
1620  err = fuse_retrieve(fc, inode, &outarg);
1621  iput(inode);
1622  }
1623  }
1624  up_read(&fc->killsb);
1625 
1626  return err;
1627 
1628 copy_finish:
1629  fuse_copy_finish(cs);
1630  return err;
1631 }
1632 
1633 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1634  unsigned int size, struct fuse_copy_state *cs)
1635 {
1636  switch (code) {
1637  case FUSE_NOTIFY_POLL:
1638  return fuse_notify_poll(fc, size, cs);
1639 
1641  return fuse_notify_inval_inode(fc, size, cs);
1642 
1644  return fuse_notify_inval_entry(fc, size, cs);
1645 
1646  case FUSE_NOTIFY_STORE:
1647  return fuse_notify_store(fc, size, cs);
1648 
1649  case FUSE_NOTIFY_RETRIEVE:
1650  return fuse_notify_retrieve(fc, size, cs);
1651 
1652  case FUSE_NOTIFY_DELETE:
1653  return fuse_notify_delete(fc, size, cs);
1654 
1655  default:
1656  fuse_copy_finish(cs);
1657  return -EINVAL;
1658  }
1659 }
1660 
1661 /* Look up request on processing list by unique ID */
1662 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1663 {
1664  struct list_head *entry;
1665 
1666  list_for_each(entry, &fc->processing) {
1667  struct fuse_req *req;
1668  req = list_entry(entry, struct fuse_req, list);
1669  if (req->in.h.unique == unique || req->intr_unique == unique)
1670  return req;
1671  }
1672  return NULL;
1673 }
1674 
1675 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1676  unsigned nbytes)
1677 {
1678  unsigned reqsize = sizeof(struct fuse_out_header);
1679 
1680  if (out->h.error)
1681  return nbytes != reqsize ? -EINVAL : 0;
1682 
1683  reqsize += len_args(out->numargs, out->args);
1684 
1685  if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1686  return -EINVAL;
1687  else if (reqsize > nbytes) {
1688  struct fuse_arg *lastarg = &out->args[out->numargs-1];
1689  unsigned diffsize = reqsize - nbytes;
1690  if (diffsize > lastarg->size)
1691  return -EINVAL;
1692  lastarg->size -= diffsize;
1693  }
1694  return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1695  out->page_zeroing);
1696 }
1697 
1698 /*
1699  * Write a single reply to a request. First the header is copied from
1700  * the write buffer. The request is then searched on the processing
1701  * list by the unique ID found in the header. If found, then remove
1702  * it from the list and copy the rest of the buffer to the request.
1703  * The request is finished by calling request_end()
1704  */
1705 static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1706  struct fuse_copy_state *cs, size_t nbytes)
1707 {
1708  int err;
1709  struct fuse_req *req;
1710  struct fuse_out_header oh;
1711 
1712  if (nbytes < sizeof(struct fuse_out_header))
1713  return -EINVAL;
1714 
1715  err = fuse_copy_one(cs, &oh, sizeof(oh));
1716  if (err)
1717  goto err_finish;
1718 
1719  err = -EINVAL;
1720  if (oh.len != nbytes)
1721  goto err_finish;
1722 
1723  /*
1724  * Zero oh.unique indicates unsolicited notification message
1725  * and error contains notification code.
1726  */
1727  if (!oh.unique) {
1728  err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1729  return err ? err : nbytes;
1730  }
1731 
1732  err = -EINVAL;
1733  if (oh.error <= -1000 || oh.error > 0)
1734  goto err_finish;
1735 
1736  spin_lock(&fc->lock);
1737  err = -ENOENT;
1738  if (!fc->connected)
1739  goto err_unlock;
1740 
1741  req = request_find(fc, oh.unique);
1742  if (!req)
1743  goto err_unlock;
1744 
1745  if (req->aborted) {
1746  spin_unlock(&fc->lock);
1747  fuse_copy_finish(cs);
1748  spin_lock(&fc->lock);
1749  request_end(fc, req);
1750  return -ENOENT;
1751  }
1752  /* Is it an interrupt reply? */
1753  if (req->intr_unique == oh.unique) {
1754  err = -EINVAL;
1755  if (nbytes != sizeof(struct fuse_out_header))
1756  goto err_unlock;
1757 
1758  if (oh.error == -ENOSYS)
1759  fc->no_interrupt = 1;
1760  else if (oh.error == -EAGAIN)
1761  queue_interrupt(fc, req);
1762 
1763  spin_unlock(&fc->lock);
1764  fuse_copy_finish(cs);
1765  return nbytes;
1766  }
1767 
1768  req->state = FUSE_REQ_WRITING;
1769  list_move(&req->list, &fc->io);
1770  req->out.h = oh;
1771  req->locked = 1;
1772  cs->req = req;
1773  if (!req->out.page_replace)
1774  cs->move_pages = 0;
1775  spin_unlock(&fc->lock);
1776 
1777  err = copy_out_args(cs, &req->out, nbytes);
1778  fuse_copy_finish(cs);
1779 
1780  spin_lock(&fc->lock);
1781  req->locked = 0;
1782  if (!err) {
1783  if (req->aborted)
1784  err = -ENOENT;
1785  } else if (!req->aborted)
1786  req->out.h.error = -EIO;
1787  request_end(fc, req);
1788 
1789  return err ? err : nbytes;
1790 
1791  err_unlock:
1792  spin_unlock(&fc->lock);
1793  err_finish:
1794  fuse_copy_finish(cs);
1795  return err;
1796 }
1797 
1798 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1799  unsigned long nr_segs, loff_t pos)
1800 {
1801  struct fuse_copy_state cs;
1802  struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1803  if (!fc)
1804  return -EPERM;
1805 
1806  fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1807 
1808  return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1809 }
1810 
1811 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1812  struct file *out, loff_t *ppos,
1813  size_t len, unsigned int flags)
1814 {
1815  unsigned nbuf;
1816  unsigned idx;
1817  struct pipe_buffer *bufs;
1818  struct fuse_copy_state cs;
1819  struct fuse_conn *fc;
1820  size_t rem;
1821  ssize_t ret;
1822 
1823  fc = fuse_get_conn(out);
1824  if (!fc)
1825  return -EPERM;
1826 
1827  bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1828  if (!bufs)
1829  return -ENOMEM;
1830 
1831  pipe_lock(pipe);
1832  nbuf = 0;
1833  rem = 0;
1834  for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1835  rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1836 
1837  ret = -EINVAL;
1838  if (rem < len) {
1839  pipe_unlock(pipe);
1840  goto out;
1841  }
1842 
1843  rem = len;
1844  while (rem) {
1845  struct pipe_buffer *ibuf;
1846  struct pipe_buffer *obuf;
1847 
1848  BUG_ON(nbuf >= pipe->buffers);
1849  BUG_ON(!pipe->nrbufs);
1850  ibuf = &pipe->bufs[pipe->curbuf];
1851  obuf = &bufs[nbuf];
1852 
1853  if (rem >= ibuf->len) {
1854  *obuf = *ibuf;
1855  ibuf->ops = NULL;
1856  pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1857  pipe->nrbufs--;
1858  } else {
1859  ibuf->ops->get(pipe, ibuf);
1860  *obuf = *ibuf;
1861  obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1862  obuf->len = rem;
1863  ibuf->offset += obuf->len;
1864  ibuf->len -= obuf->len;
1865  }
1866  nbuf++;
1867  rem -= obuf->len;
1868  }
1869  pipe_unlock(pipe);
1870 
1871  fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1872  cs.pipebufs = bufs;
1873  cs.pipe = pipe;
1874 
1875  if (flags & SPLICE_F_MOVE)
1876  cs.move_pages = 1;
1877 
1878  ret = fuse_dev_do_write(fc, &cs, len);
1879 
1880  for (idx = 0; idx < nbuf; idx++) {
1881  struct pipe_buffer *buf = &bufs[idx];
1882  buf->ops->release(pipe, buf);
1883  }
1884 out:
1885  kfree(bufs);
1886  return ret;
1887 }
1888 
1889 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1890 {
1891  unsigned mask = POLLOUT | POLLWRNORM;
1892  struct fuse_conn *fc = fuse_get_conn(file);
1893  if (!fc)
1894  return POLLERR;
1895 
1896  poll_wait(file, &fc->waitq, wait);
1897 
1898  spin_lock(&fc->lock);
1899  if (!fc->connected)
1900  mask = POLLERR;
1901  else if (request_pending(fc))
1902  mask |= POLLIN | POLLRDNORM;
1903  spin_unlock(&fc->lock);
1904 
1905  return mask;
1906 }
1907 
1908 /*
1909  * Abort all requests on the given list (pending or processing)
1910  *
1911  * This function releases and reacquires fc->lock
1912  */
1913 static void end_requests(struct fuse_conn *fc, struct list_head *head)
1914 __releases(fc->lock)
1915 __acquires(fc->lock)
1916 {
1917  while (!list_empty(head)) {
1918  struct fuse_req *req;
1919  req = list_entry(head->next, struct fuse_req, list);
1920  req->out.h.error = -ECONNABORTED;
1921  request_end(fc, req);
1922  spin_lock(&fc->lock);
1923  }
1924 }
1925 
1926 /*
1927  * Abort requests under I/O
1928  *
1929  * The requests are set to aborted and finished, and the request
1930  * waiter is woken up. This will make request_wait_answer() wait
1931  * until the request is unlocked and then return.
1932  *
1933  * If the request is asynchronous, then the end function needs to be
1934  * called after waiting for the request to be unlocked (if it was
1935  * locked).
1936  */
1937 static void end_io_requests(struct fuse_conn *fc)
1938 __releases(fc->lock)
1939 __acquires(fc->lock)
1940 {
1941  while (!list_empty(&fc->io)) {
1942  struct fuse_req *req =
1943  list_entry(fc->io.next, struct fuse_req, list);
1944  void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1945 
1946  req->aborted = 1;
1947  req->out.h.error = -ECONNABORTED;
1948  req->state = FUSE_REQ_FINISHED;
1949  list_del_init(&req->list);
1950  wake_up(&req->waitq);
1951  if (end) {
1952  req->end = NULL;
1953  __fuse_get_request(req);
1954  spin_unlock(&fc->lock);
1955  wait_event(req->waitq, !req->locked);
1956  end(fc, req);
1957  fuse_put_request(fc, req);
1958  spin_lock(&fc->lock);
1959  }
1960  }
1961 }
1962 
1963 static void end_queued_requests(struct fuse_conn *fc)
1964 __releases(fc->lock)
1965 __acquires(fc->lock)
1966 {
1967  fc->max_background = UINT_MAX;
1968  flush_bg_queue(fc);
1969  end_requests(fc, &fc->pending);
1970  end_requests(fc, &fc->processing);
1971  while (forget_pending(fc))
1972  kfree(dequeue_forget(fc, 1, NULL));
1973 }
1974 
1975 static void end_polls(struct fuse_conn *fc)
1976 {
1977  struct rb_node *p;
1978 
1979  p = rb_first(&fc->polled_files);
1980 
1981  while (p) {
1982  struct fuse_file *ff;
1983  ff = rb_entry(p, struct fuse_file, polled_node);
1985 
1986  p = rb_next(p);
1987  }
1988 }
1989 
1990 /*
1991  * Abort all requests.
1992  *
1993  * Emergency exit in case of a malicious or accidental deadlock, or
1994  * just a hung filesystem.
1995  *
1996  * The same effect is usually achievable through killing the
1997  * filesystem daemon and all users of the filesystem. The exception
1998  * is the combination of an asynchronous request and the tricky
1999  * deadlock (see Documentation/filesystems/fuse.txt).
2000  *
2001  * During the aborting, progression of requests from the pending and
2002  * processing lists onto the io list, and progression of new requests
2003  * onto the pending list is prevented by req->connected being false.
2004  *
2005  * Progression of requests under I/O to the processing list is
2006  * prevented by the req->aborted flag being true for these requests.
2007  * For this reason requests on the io list must be aborted first.
2008  */
2009 void fuse_abort_conn(struct fuse_conn *fc)
2010 {
2011  spin_lock(&fc->lock);
2012  if (fc->connected) {
2013  fc->connected = 0;
2014  fc->blocked = 0;
2015  end_io_requests(fc);
2016  end_queued_requests(fc);
2017  end_polls(fc);
2018  wake_up_all(&fc->waitq);
2019  wake_up_all(&fc->blocked_waitq);
2020  kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2021  }
2022  spin_unlock(&fc->lock);
2023 }
2025 
2026 int fuse_dev_release(struct inode *inode, struct file *file)
2027 {
2028  struct fuse_conn *fc = fuse_get_conn(file);
2029  if (fc) {
2030  spin_lock(&fc->lock);
2031  fc->connected = 0;
2032  fc->blocked = 0;
2033  end_queued_requests(fc);
2034  end_polls(fc);
2035  wake_up_all(&fc->blocked_waitq);
2036  spin_unlock(&fc->lock);
2037  fuse_conn_put(fc);
2038  }
2039 
2040  return 0;
2041 }
2043 
2044 static int fuse_dev_fasync(int fd, struct file *file, int on)
2045 {
2046  struct fuse_conn *fc = fuse_get_conn(file);
2047  if (!fc)
2048  return -EPERM;
2049 
2050  /* No locking - fasync_helper does its own locking */
2051  return fasync_helper(fd, file, on, &fc->fasync);
2052 }
2053 
2055  .owner = THIS_MODULE,
2056  .llseek = no_llseek,
2057  .read = do_sync_read,
2058  .aio_read = fuse_dev_read,
2059  .splice_read = fuse_dev_splice_read,
2060  .write = do_sync_write,
2061  .aio_write = fuse_dev_write,
2062  .splice_write = fuse_dev_splice_write,
2063  .poll = fuse_dev_poll,
2064  .release = fuse_dev_release,
2065  .fasync = fuse_dev_fasync,
2066 };
2067 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2068 
2069 static struct miscdevice fuse_miscdevice = {
2070  .minor = FUSE_MINOR,
2071  .name = "fuse",
2072  .fops = &fuse_dev_operations,
2073 };
2074 
2076 {
2077  int err = -ENOMEM;
2078  fuse_req_cachep = kmem_cache_create("fuse_request",
2079  sizeof(struct fuse_req),
2080  0, 0, NULL);
2081  if (!fuse_req_cachep)
2082  goto out;
2083 
2084  err = misc_register(&fuse_miscdevice);
2085  if (err)
2086  goto out_cache_clean;
2087 
2088  return 0;
2089 
2090  out_cache_clean:
2091  kmem_cache_destroy(fuse_req_cachep);
2092  out:
2093  return err;
2094 }
2095 
2097 {
2098  misc_deregister(&fuse_miscdevice);
2099  kmem_cache_destroy(fuse_req_cachep);
2100 }