Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ioctl.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007 Oracle. All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/buffer_head.h>
22 #include <linux/file.h>
23 #include <linux/fs.h>
24 #include <linux/fsnotify.h>
25 #include <linux/pagemap.h>
26 #include <linux/highmem.h>
27 #include <linux/time.h>
28 #include <linux/init.h>
29 #include <linux/string.h>
30 #include <linux/backing-dev.h>
31 #include <linux/mount.h>
32 #include <linux/mpage.h>
33 #include <linux/namei.h>
34 #include <linux/swap.h>
35 #include <linux/writeback.h>
36 #include <linux/statfs.h>
37 #include <linux/compat.h>
38 #include <linux/bit_spinlock.h>
39 #include <linux/security.h>
40 #include <linux/xattr.h>
41 #include <linux/vmalloc.h>
42 #include <linux/slab.h>
43 #include <linux/blkdev.h>
44 #include <linux/uuid.h>
45 #include "compat.h"
46 #include "ctree.h"
47 #include "disk-io.h"
48 #include "transaction.h"
49 #include "btrfs_inode.h"
50 #include "ioctl.h"
51 #include "print-tree.h"
52 #include "volumes.h"
53 #include "locking.h"
54 #include "inode-map.h"
55 #include "backref.h"
56 #include "rcu-string.h"
57 #include "send.h"
58 
59 /* Mask out flags that are inappropriate for the given type of inode. */
60 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
61 {
62  if (S_ISDIR(mode))
63  return flags;
64  else if (S_ISREG(mode))
65  return flags & ~FS_DIRSYNC_FL;
66  else
67  return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
68 }
69 
70 /*
71  * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
72  */
73 static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
74 {
75  unsigned int iflags = 0;
76 
77  if (flags & BTRFS_INODE_SYNC)
78  iflags |= FS_SYNC_FL;
79  if (flags & BTRFS_INODE_IMMUTABLE)
80  iflags |= FS_IMMUTABLE_FL;
81  if (flags & BTRFS_INODE_APPEND)
82  iflags |= FS_APPEND_FL;
83  if (flags & BTRFS_INODE_NODUMP)
84  iflags |= FS_NODUMP_FL;
85  if (flags & BTRFS_INODE_NOATIME)
86  iflags |= FS_NOATIME_FL;
87  if (flags & BTRFS_INODE_DIRSYNC)
88  iflags |= FS_DIRSYNC_FL;
89  if (flags & BTRFS_INODE_NODATACOW)
90  iflags |= FS_NOCOW_FL;
91 
92  if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
93  iflags |= FS_COMPR_FL;
94  else if (flags & BTRFS_INODE_NOCOMPRESS)
95  iflags |= FS_NOCOMP_FL;
96 
97  return iflags;
98 }
99 
100 /*
101  * Update inode->i_flags based on the btrfs internal flags.
102  */
104 {
105  struct btrfs_inode *ip = BTRFS_I(inode);
106 
108 
109  if (ip->flags & BTRFS_INODE_SYNC)
110  inode->i_flags |= S_SYNC;
111  if (ip->flags & BTRFS_INODE_IMMUTABLE)
112  inode->i_flags |= S_IMMUTABLE;
113  if (ip->flags & BTRFS_INODE_APPEND)
114  inode->i_flags |= S_APPEND;
115  if (ip->flags & BTRFS_INODE_NOATIME)
116  inode->i_flags |= S_NOATIME;
117  if (ip->flags & BTRFS_INODE_DIRSYNC)
118  inode->i_flags |= S_DIRSYNC;
119 }
120 
121 /*
122  * Inherit flags from the parent inode.
123  *
124  * Currently only the compression flags and the cow flags are inherited.
125  */
126 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
127 {
128  unsigned int flags;
129 
130  if (!dir)
131  return;
132 
133  flags = BTRFS_I(dir)->flags;
134 
135  if (flags & BTRFS_INODE_NOCOMPRESS) {
136  BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
137  BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
138  } else if (flags & BTRFS_INODE_COMPRESS) {
139  BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
140  BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
141  }
142 
143  if (flags & BTRFS_INODE_NODATACOW)
144  BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
145 
146  btrfs_update_iflags(inode);
147 }
148 
149 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
150 {
151  struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
152  unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
153 
154  if (copy_to_user(arg, &flags, sizeof(flags)))
155  return -EFAULT;
156  return 0;
157 }
158 
159 static int check_flags(unsigned int flags)
160 {
161  if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
165  FS_NOCOW_FL))
166  return -EOPNOTSUPP;
167 
168  if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
169  return -EINVAL;
170 
171  return 0;
172 }
173 
174 static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
175 {
176  struct inode *inode = file->f_path.dentry->d_inode;
177  struct btrfs_inode *ip = BTRFS_I(inode);
178  struct btrfs_root *root = ip->root;
179  struct btrfs_trans_handle *trans;
180  unsigned int flags, oldflags;
181  int ret;
182  u64 ip_oldflags;
183  unsigned int i_oldflags;
184  umode_t mode;
185 
186  if (btrfs_root_readonly(root))
187  return -EROFS;
188 
189  if (copy_from_user(&flags, arg, sizeof(flags)))
190  return -EFAULT;
191 
192  ret = check_flags(flags);
193  if (ret)
194  return ret;
195 
196  if (!inode_owner_or_capable(inode))
197  return -EACCES;
198 
199  ret = mnt_want_write_file(file);
200  if (ret)
201  return ret;
202 
203  mutex_lock(&inode->i_mutex);
204 
205  ip_oldflags = ip->flags;
206  i_oldflags = inode->i_flags;
207  mode = inode->i_mode;
208 
209  flags = btrfs_mask_flags(inode->i_mode, flags);
210  oldflags = btrfs_flags_to_ioctl(ip->flags);
211  if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
213  ret = -EPERM;
214  goto out_unlock;
215  }
216  }
217 
218  if (flags & FS_SYNC_FL)
219  ip->flags |= BTRFS_INODE_SYNC;
220  else
221  ip->flags &= ~BTRFS_INODE_SYNC;
222  if (flags & FS_IMMUTABLE_FL)
224  else
225  ip->flags &= ~BTRFS_INODE_IMMUTABLE;
226  if (flags & FS_APPEND_FL)
227  ip->flags |= BTRFS_INODE_APPEND;
228  else
229  ip->flags &= ~BTRFS_INODE_APPEND;
230  if (flags & FS_NODUMP_FL)
231  ip->flags |= BTRFS_INODE_NODUMP;
232  else
233  ip->flags &= ~BTRFS_INODE_NODUMP;
234  if (flags & FS_NOATIME_FL)
235  ip->flags |= BTRFS_INODE_NOATIME;
236  else
237  ip->flags &= ~BTRFS_INODE_NOATIME;
238  if (flags & FS_DIRSYNC_FL)
239  ip->flags |= BTRFS_INODE_DIRSYNC;
240  else
241  ip->flags &= ~BTRFS_INODE_DIRSYNC;
242  if (flags & FS_NOCOW_FL) {
243  if (S_ISREG(mode)) {
244  /*
245  * It's safe to turn csums off here, no extents exist.
246  * Otherwise we want the flag to reflect the real COW
247  * status of the file and will not set it.
248  */
249  if (inode->i_size == 0)
250  ip->flags |= BTRFS_INODE_NODATACOW
252  } else {
254  }
255  } else {
256  /*
257  * Revert back under same assuptions as above
258  */
259  if (S_ISREG(mode)) {
260  if (inode->i_size == 0)
261  ip->flags &= ~(BTRFS_INODE_NODATACOW
263  } else {
264  ip->flags &= ~BTRFS_INODE_NODATACOW;
265  }
266  }
267 
268  /*
269  * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
270  * flag may be changed automatically if compression code won't make
271  * things smaller.
272  */
273  if (flags & FS_NOCOMP_FL) {
274  ip->flags &= ~BTRFS_INODE_COMPRESS;
276  } else if (flags & FS_COMPR_FL) {
278  ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
279  } else {
280  ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
281  }
282 
283  trans = btrfs_start_transaction(root, 1);
284  if (IS_ERR(trans)) {
285  ret = PTR_ERR(trans);
286  goto out_drop;
287  }
288 
289  btrfs_update_iflags(inode);
290  inode_inc_iversion(inode);
291  inode->i_ctime = CURRENT_TIME;
292  ret = btrfs_update_inode(trans, root, inode);
293 
294  btrfs_end_transaction(trans, root);
295  out_drop:
296  if (ret) {
297  ip->flags = ip_oldflags;
298  inode->i_flags = i_oldflags;
299  }
300 
301  out_unlock:
302  mutex_unlock(&inode->i_mutex);
303  mnt_drop_write_file(file);
304  return ret;
305 }
306 
307 static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
308 {
309  struct inode *inode = file->f_path.dentry->d_inode;
310 
311  return put_user(inode->i_generation, arg);
312 }
313 
314 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
315 {
316  struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb);
317  struct btrfs_device *device;
318  struct request_queue *q;
319  struct fstrim_range range;
321  u64 num_devices = 0;
322  u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
323  int ret;
324 
325  if (!capable(CAP_SYS_ADMIN))
326  return -EPERM;
327 
328  rcu_read_lock();
329  list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
330  dev_list) {
331  if (!device->bdev)
332  continue;
333  q = bdev_get_queue(device->bdev);
334  if (blk_queue_discard(q)) {
335  num_devices++;
336  minlen = min((u64)q->limits.discard_granularity,
337  minlen);
338  }
339  }
340  rcu_read_unlock();
341 
342  if (!num_devices)
343  return -EOPNOTSUPP;
344  if (copy_from_user(&range, arg, sizeof(range)))
345  return -EFAULT;
346  if (range.start > total_bytes ||
347  range.len < fs_info->sb->s_blocksize)
348  return -EINVAL;
349 
350  range.len = min(range.len, total_bytes - range.start);
351  range.minlen = max(range.minlen, minlen);
352  ret = btrfs_trim_fs(fs_info->tree_root, &range);
353  if (ret < 0)
354  return ret;
355 
356  if (copy_to_user(arg, &range, sizeof(range)))
357  return -EFAULT;
358 
359  return 0;
360 }
361 
362 static noinline int create_subvol(struct btrfs_root *root,
363  struct dentry *dentry,
364  char *name, int namelen,
365  u64 *async_transid,
366  struct btrfs_qgroup_inherit **inherit)
367 {
368  struct btrfs_trans_handle *trans;
369  struct btrfs_key key;
370  struct btrfs_root_item root_item;
371  struct btrfs_inode_item *inode_item;
372  struct extent_buffer *leaf;
373  struct btrfs_root *new_root;
374  struct dentry *parent = dentry->d_parent;
375  struct inode *dir;
376  struct timespec cur_time = CURRENT_TIME;
377  int ret;
378  int err;
379  u64 objectid;
380  u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
381  u64 index = 0;
382  uuid_le new_uuid;
383 
384  ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
385  if (ret)
386  return ret;
387 
388  dir = parent->d_inode;
389 
390  /*
391  * 1 - inode item
392  * 2 - refs
393  * 1 - root item
394  * 2 - dir items
395  */
396  trans = btrfs_start_transaction(root, 6);
397  if (IS_ERR(trans))
398  return PTR_ERR(trans);
399 
400  ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
401  inherit ? *inherit : NULL);
402  if (ret)
403  goto fail;
404 
405  leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
406  0, objectid, NULL, 0, 0, 0);
407  if (IS_ERR(leaf)) {
408  ret = PTR_ERR(leaf);
409  goto fail;
410  }
411 
412  memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
413  btrfs_set_header_bytenr(leaf, leaf->start);
414  btrfs_set_header_generation(leaf, trans->transid);
415  btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
416  btrfs_set_header_owner(leaf, objectid);
417 
418  write_extent_buffer(leaf, root->fs_info->fsid,
419  (unsigned long)btrfs_header_fsid(leaf),
421  write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
422  (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
425 
426  memset(&root_item, 0, sizeof(root_item));
427 
428  inode_item = &root_item.inode;
429  inode_item->generation = cpu_to_le64(1);
430  inode_item->size = cpu_to_le64(3);
431  inode_item->nlink = cpu_to_le32(1);
432  inode_item->nbytes = cpu_to_le64(root->leafsize);
433  inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
434 
435  root_item.flags = 0;
436  root_item.byte_limit = 0;
438 
439  btrfs_set_root_bytenr(&root_item, leaf->start);
440  btrfs_set_root_generation(&root_item, trans->transid);
441  btrfs_set_root_level(&root_item, 0);
442  btrfs_set_root_refs(&root_item, 1);
443  btrfs_set_root_used(&root_item, leaf->len);
444  btrfs_set_root_last_snapshot(&root_item, 0);
445 
446  btrfs_set_root_generation_v2(&root_item,
447  btrfs_root_generation(&root_item));
448  uuid_le_gen(&new_uuid);
449  memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
450  root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
451  root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
452  root_item.ctime = root_item.otime;
453  btrfs_set_root_ctransid(&root_item, trans->transid);
454  btrfs_set_root_otransid(&root_item, trans->transid);
455 
456  btrfs_tree_unlock(leaf);
457  free_extent_buffer(leaf);
458  leaf = NULL;
459 
460  btrfs_set_root_dirid(&root_item, new_dirid);
461 
462  key.objectid = objectid;
463  key.offset = 0;
464  btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
465  ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
466  &root_item);
467  if (ret)
468  goto fail;
469 
470  key.offset = (u64)-1;
471  new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
472  if (IS_ERR(new_root)) {
473  btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
474  ret = PTR_ERR(new_root);
475  goto fail;
476  }
477 
478  btrfs_record_root_in_trans(trans, new_root);
479 
480  ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
481  if (ret) {
482  /* We potentially lose an unused inode item here */
483  btrfs_abort_transaction(trans, root, ret);
484  goto fail;
485  }
486 
487  /*
488  * insert the directory item
489  */
490  ret = btrfs_set_inode_index(dir, &index);
491  if (ret) {
492  btrfs_abort_transaction(trans, root, ret);
493  goto fail;
494  }
495 
496  ret = btrfs_insert_dir_item(trans, root,
497  name, namelen, dir, &key,
498  BTRFS_FT_DIR, index);
499  if (ret) {
500  btrfs_abort_transaction(trans, root, ret);
501  goto fail;
502  }
503 
504  btrfs_i_size_write(dir, dir->i_size + namelen * 2);
505  ret = btrfs_update_inode(trans, root, dir);
506  BUG_ON(ret);
507 
508  ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
509  objectid, root->root_key.objectid,
510  btrfs_ino(dir), index, name, namelen);
511 
512  BUG_ON(ret);
513 
514  d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
515 fail:
516  if (async_transid) {
517  *async_transid = trans->transid;
518  err = btrfs_commit_transaction_async(trans, root, 1);
519  } else {
520  err = btrfs_commit_transaction(trans, root);
521  }
522  if (err && !ret)
523  ret = err;
524  return ret;
525 }
526 
527 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
528  char *name, int namelen, u64 *async_transid,
529  bool readonly, struct btrfs_qgroup_inherit **inherit)
530 {
531  struct inode *inode;
532  struct btrfs_pending_snapshot *pending_snapshot;
533  struct btrfs_trans_handle *trans;
534  int ret;
535 
536  if (!root->ref_cows)
537  return -EINVAL;
538 
539  pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
540  if (!pending_snapshot)
541  return -ENOMEM;
542 
543  btrfs_init_block_rsv(&pending_snapshot->block_rsv,
545  pending_snapshot->dentry = dentry;
546  pending_snapshot->root = root;
547  pending_snapshot->readonly = readonly;
548  if (inherit) {
549  pending_snapshot->inherit = *inherit;
550  *inherit = NULL; /* take responsibility to free it */
551  }
552 
553  trans = btrfs_start_transaction(root->fs_info->extent_root, 6);
554  if (IS_ERR(trans)) {
555  ret = PTR_ERR(trans);
556  goto fail;
557  }
558 
559  ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
560  BUG_ON(ret);
561 
562  spin_lock(&root->fs_info->trans_lock);
563  list_add(&pending_snapshot->list,
564  &trans->transaction->pending_snapshots);
565  spin_unlock(&root->fs_info->trans_lock);
566  if (async_transid) {
567  *async_transid = trans->transid;
568  ret = btrfs_commit_transaction_async(trans,
569  root->fs_info->extent_root, 1);
570  } else {
571  ret = btrfs_commit_transaction(trans,
572  root->fs_info->extent_root);
573  }
574  if (ret)
575  goto fail;
576 
577  ret = pending_snapshot->error;
578  if (ret)
579  goto fail;
580 
581  ret = btrfs_orphan_cleanup(pending_snapshot->snap);
582  if (ret)
583  goto fail;
584 
585  inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
586  if (IS_ERR(inode)) {
587  ret = PTR_ERR(inode);
588  goto fail;
589  }
590  BUG_ON(!inode);
591  d_instantiate(dentry, inode);
592  ret = 0;
593 fail:
594  kfree(pending_snapshot);
595  return ret;
596 }
597 
598 /* copy of check_sticky in fs/namei.c()
599 * It's inline, so penalty for filesystems that don't use sticky bit is
600 * minimal.
601 */
602 static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
603 {
604  kuid_t fsuid = current_fsuid();
605 
606  if (!(dir->i_mode & S_ISVTX))
607  return 0;
608  if (uid_eq(inode->i_uid, fsuid))
609  return 0;
610  if (uid_eq(dir->i_uid, fsuid))
611  return 0;
612  return !capable(CAP_FOWNER);
613 }
614 
615 /* copy of may_delete in fs/namei.c()
616  * Check whether we can remove a link victim from directory dir, check
617  * whether the type of victim is right.
618  * 1. We can't do it if dir is read-only (done in permission())
619  * 2. We should have write and exec permissions on dir
620  * 3. We can't remove anything from append-only dir
621  * 4. We can't do anything with immutable dir (done in permission())
622  * 5. If the sticky bit on dir is set we should either
623  * a. be owner of dir, or
624  * b. be owner of victim, or
625  * c. have CAP_FOWNER capability
626  * 6. If the victim is append-only or immutable we can't do antyhing with
627  * links pointing to it.
628  * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
629  * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
630  * 9. We can't remove a root or mountpoint.
631  * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
632  * nfs_async_unlink().
633  */
634 
635 static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
636 {
637  int error;
638 
639  if (!victim->d_inode)
640  return -ENOENT;
641 
642  BUG_ON(victim->d_parent->d_inode != dir);
643  audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
644 
645  error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
646  if (error)
647  return error;
648  if (IS_APPEND(dir))
649  return -EPERM;
650  if (btrfs_check_sticky(dir, victim->d_inode)||
651  IS_APPEND(victim->d_inode)||
652  IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
653  return -EPERM;
654  if (isdir) {
655  if (!S_ISDIR(victim->d_inode->i_mode))
656  return -ENOTDIR;
657  if (IS_ROOT(victim))
658  return -EBUSY;
659  } else if (S_ISDIR(victim->d_inode->i_mode))
660  return -EISDIR;
661  if (IS_DEADDIR(dir))
662  return -ENOENT;
663  if (victim->d_flags & DCACHE_NFSFS_RENAMED)
664  return -EBUSY;
665  return 0;
666 }
667 
668 /* copy of may_create in fs/namei.c() */
669 static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
670 {
671  if (child->d_inode)
672  return -EEXIST;
673  if (IS_DEADDIR(dir))
674  return -ENOENT;
675  return inode_permission(dir, MAY_WRITE | MAY_EXEC);
676 }
677 
678 /*
679  * Create a new subvolume below @parent. This is largely modeled after
680  * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
681  * inside this filesystem so it's quite a bit simpler.
682  */
683 static noinline int btrfs_mksubvol(struct path *parent,
684  char *name, int namelen,
685  struct btrfs_root *snap_src,
686  u64 *async_transid, bool readonly,
687  struct btrfs_qgroup_inherit **inherit)
688 {
689  struct inode *dir = parent->dentry->d_inode;
690  struct dentry *dentry;
691  int error;
692 
694 
695  dentry = lookup_one_len(name, parent->dentry, namelen);
696  error = PTR_ERR(dentry);
697  if (IS_ERR(dentry))
698  goto out_unlock;
699 
700  error = -EEXIST;
701  if (dentry->d_inode)
702  goto out_dput;
703 
704  error = btrfs_may_create(dir, dentry);
705  if (error)
706  goto out_dput;
707 
708  down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
709 
710  if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
711  goto out_up_read;
712 
713  if (snap_src) {
714  error = create_snapshot(snap_src, dentry, name, namelen,
715  async_transid, readonly, inherit);
716  } else {
717  error = create_subvol(BTRFS_I(dir)->root, dentry,
718  name, namelen, async_transid, inherit);
719  }
720  if (!error)
721  fsnotify_mkdir(dir, dentry);
722 out_up_read:
723  up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
724 out_dput:
725  dput(dentry);
726 out_unlock:
727  mutex_unlock(&dir->i_mutex);
728  return error;
729 }
730 
731 /*
732  * When we're defragging a range, we don't want to kick it off again
733  * if it is really just waiting for delalloc to send it down.
734  * If we find a nice big extent or delalloc range for the bytes in the
735  * file you want to defrag, we return 0 to let you know to skip this
736  * part of the file
737  */
738 static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh)
739 {
740  struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
741  struct extent_map *em = NULL;
742  struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
743  u64 end;
744 
745  read_lock(&em_tree->lock);
746  em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
747  read_unlock(&em_tree->lock);
748 
749  if (em) {
750  end = extent_map_end(em);
751  free_extent_map(em);
752  if (end - offset > thresh)
753  return 0;
754  }
755  /* if we already have a nice delalloc here, just stop */
756  thresh /= 2;
757  end = count_range_bits(io_tree, &offset, offset + thresh,
758  thresh, EXTENT_DELALLOC, 1);
759  if (end >= thresh)
760  return 0;
761  return 1;
762 }
763 
764 /*
765  * helper function to walk through a file and find extents
766  * newer than a specific transid, and smaller than thresh.
767  *
768  * This is used by the defragging code to find new and small
769  * extents
770  */
771 static int find_new_extents(struct btrfs_root *root,
772  struct inode *inode, u64 newer_than,
773  u64 *off, int thresh)
774 {
775  struct btrfs_path *path;
776  struct btrfs_key min_key;
777  struct btrfs_key max_key;
778  struct extent_buffer *leaf;
780  int type;
781  int ret;
782  u64 ino = btrfs_ino(inode);
783 
784  path = btrfs_alloc_path();
785  if (!path)
786  return -ENOMEM;
787 
788  min_key.objectid = ino;
789  min_key.type = BTRFS_EXTENT_DATA_KEY;
790  min_key.offset = *off;
791 
792  max_key.objectid = ino;
793  max_key.type = (u8)-1;
794  max_key.offset = (u64)-1;
795 
796  path->keep_locks = 1;
797 
798  while(1) {
799  ret = btrfs_search_forward(root, &min_key, &max_key,
800  path, 0, newer_than);
801  if (ret != 0)
802  goto none;
803  if (min_key.objectid != ino)
804  goto none;
805  if (min_key.type != BTRFS_EXTENT_DATA_KEY)
806  goto none;
807 
808  leaf = path->nodes[0];
809  extent = btrfs_item_ptr(leaf, path->slots[0],
810  struct btrfs_file_extent_item);
811 
812  type = btrfs_file_extent_type(leaf, extent);
813  if (type == BTRFS_FILE_EXTENT_REG &&
814  btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
815  check_defrag_in_cache(inode, min_key.offset, thresh)) {
816  *off = min_key.offset;
817  btrfs_free_path(path);
818  return 0;
819  }
820 
821  if (min_key.offset == (u64)-1)
822  goto none;
823 
824  min_key.offset++;
825  btrfs_release_path(path);
826  }
827 none:
828  btrfs_free_path(path);
829  return -ENOENT;
830 }
831 
832 static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
833 {
834  struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
835  struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
836  struct extent_map *em;
838 
839  /*
840  * hopefully we have this extent in the tree already, try without
841  * the full extent lock
842  */
843  read_lock(&em_tree->lock);
844  em = lookup_extent_mapping(em_tree, start, len);
845  read_unlock(&em_tree->lock);
846 
847  if (!em) {
848  /* get the big lock and read metadata off disk */
849  lock_extent(io_tree, start, start + len - 1);
850  em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
851  unlock_extent(io_tree, start, start + len - 1);
852 
853  if (IS_ERR(em))
854  return NULL;
855  }
856 
857  return em;
858 }
859 
860 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
861 {
862  struct extent_map *next;
863  bool ret = true;
864 
865  /* this is the last extent */
866  if (em->start + em->len >= i_size_read(inode))
867  return false;
868 
869  next = defrag_lookup_extent(inode, em->start + em->len);
870  if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
871  ret = false;
872 
873  free_extent_map(next);
874  return ret;
875 }
876 
877 static int should_defrag_range(struct inode *inode, u64 start, int thresh,
878  u64 *last_len, u64 *skip, u64 *defrag_end,
879  int compress)
880 {
881  struct extent_map *em;
882  int ret = 1;
883  bool next_mergeable = true;
884 
885  /*
886  * make sure that once we start defragging an extent, we keep on
887  * defragging it
888  */
889  if (start < *defrag_end)
890  return 1;
891 
892  *skip = 0;
893 
894  em = defrag_lookup_extent(inode, start);
895  if (!em)
896  return 0;
897 
898  /* this will cover holes, and inline extents */
899  if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
900  ret = 0;
901  goto out;
902  }
903 
904  next_mergeable = defrag_check_next_extent(inode, em);
905 
906  /*
907  * we hit a real extent, if it is big or the next extent is not a
908  * real extent, don't bother defragging it
909  */
910  if (!compress && (*last_len == 0 || *last_len >= thresh) &&
911  (em->len >= thresh || !next_mergeable))
912  ret = 0;
913 out:
914  /*
915  * last_len ends up being a counter of how many bytes we've defragged.
916  * every time we choose not to defrag an extent, we reset *last_len
917  * so that the next tiny extent will force a defrag.
918  *
919  * The end result of this is that tiny extents before a single big
920  * extent will force at least part of that big extent to be defragged.
921  */
922  if (ret) {
923  *defrag_end = extent_map_end(em);
924  } else {
925  *last_len = 0;
926  *skip = extent_map_end(em);
927  *defrag_end = 0;
928  }
929 
930  free_extent_map(em);
931  return ret;
932 }
933 
934 /*
935  * it doesn't do much good to defrag one or two pages
936  * at a time. This pulls in a nice chunk of pages
937  * to COW and defrag.
938  *
939  * It also makes sure the delalloc code has enough
940  * dirty data to avoid making new small extents as part
941  * of the defrag
942  *
943  * It's a good idea to start RA on this range
944  * before calling this.
945  */
946 static int cluster_pages_for_defrag(struct inode *inode,
947  struct page **pages,
948  unsigned long start_index,
949  int num_pages)
950 {
951  unsigned long file_end;
952  u64 isize = i_size_read(inode);
953  u64 page_start;
954  u64 page_end;
955  u64 page_cnt;
956  int ret;
957  int i;
958  int i_done;
959  struct btrfs_ordered_extent *ordered;
960  struct extent_state *cached_state = NULL;
961  struct extent_io_tree *tree;
962  gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
963 
964  file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
965  if (!isize || start_index > file_end)
966  return 0;
967 
968  page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
969 
970  ret = btrfs_delalloc_reserve_space(inode,
971  page_cnt << PAGE_CACHE_SHIFT);
972  if (ret)
973  return ret;
974  i_done = 0;
975  tree = &BTRFS_I(inode)->io_tree;
976 
977  /* step one, lock all the pages */
978  for (i = 0; i < page_cnt; i++) {
979  struct page *page;
980 again:
981  page = find_or_create_page(inode->i_mapping,
982  start_index + i, mask);
983  if (!page)
984  break;
985 
986  page_start = page_offset(page);
987  page_end = page_start + PAGE_CACHE_SIZE - 1;
988  while (1) {
989  lock_extent(tree, page_start, page_end);
990  ordered = btrfs_lookup_ordered_extent(inode,
991  page_start);
992  unlock_extent(tree, page_start, page_end);
993  if (!ordered)
994  break;
995 
996  unlock_page(page);
997  btrfs_start_ordered_extent(inode, ordered, 1);
998  btrfs_put_ordered_extent(ordered);
999  lock_page(page);
1000  /*
1001  * we unlocked the page above, so we need check if
1002  * it was released or not.
1003  */
1004  if (page->mapping != inode->i_mapping) {
1005  unlock_page(page);
1006  page_cache_release(page);
1007  goto again;
1008  }
1009  }
1010 
1011  if (!PageUptodate(page)) {
1012  btrfs_readpage(NULL, page);
1013  lock_page(page);
1014  if (!PageUptodate(page)) {
1015  unlock_page(page);
1016  page_cache_release(page);
1017  ret = -EIO;
1018  break;
1019  }
1020  }
1021 
1022  if (page->mapping != inode->i_mapping) {
1023  unlock_page(page);
1024  page_cache_release(page);
1025  goto again;
1026  }
1027 
1028  pages[i] = page;
1029  i_done++;
1030  }
1031  if (!i_done || ret)
1032  goto out;
1033 
1034  if (!(inode->i_sb->s_flags & MS_ACTIVE))
1035  goto out;
1036 
1037  /*
1038  * so now we have a nice long stream of locked
1039  * and up to date pages, lets wait on them
1040  */
1041  for (i = 0; i < i_done; i++)
1042  wait_on_page_writeback(pages[i]);
1043 
1044  page_start = page_offset(pages[0]);
1045  page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
1046 
1047  lock_extent_bits(&BTRFS_I(inode)->io_tree,
1048  page_start, page_end - 1, 0, &cached_state);
1049  clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
1050  page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
1052  &cached_state, GFP_NOFS);
1053 
1054  if (i_done != page_cnt) {
1055  spin_lock(&BTRFS_I(inode)->lock);
1056  BTRFS_I(inode)->outstanding_extents++;
1057  spin_unlock(&BTRFS_I(inode)->lock);
1059  (page_cnt - i_done) << PAGE_CACHE_SHIFT);
1060  }
1061 
1062 
1063  set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1,
1064  &cached_state, GFP_NOFS);
1065 
1066  unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1067  page_start, page_end - 1, &cached_state,
1068  GFP_NOFS);
1069 
1070  for (i = 0; i < i_done; i++) {
1071  clear_page_dirty_for_io(pages[i]);
1072  ClearPageChecked(pages[i]);
1073  set_page_extent_mapped(pages[i]);
1074  set_page_dirty(pages[i]);
1075  unlock_page(pages[i]);
1076  page_cache_release(pages[i]);
1077  }
1078  return i_done;
1079 out:
1080  for (i = 0; i < i_done; i++) {
1081  unlock_page(pages[i]);
1082  page_cache_release(pages[i]);
1083  }
1084  btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT);
1085  return ret;
1086 
1087 }
1088 
1089 int btrfs_defrag_file(struct inode *inode, struct file *file,
1091  u64 newer_than, unsigned long max_to_defrag)
1092 {
1093  struct btrfs_root *root = BTRFS_I(inode)->root;
1094  struct file_ra_state *ra = NULL;
1095  unsigned long last_index;
1096  u64 isize = i_size_read(inode);
1097  u64 last_len = 0;
1098  u64 skip = 0;
1099  u64 defrag_end = 0;
1100  u64 newer_off = range->start;
1101  unsigned long i;
1102  unsigned long ra_index = 0;
1103  int ret;
1104  int defrag_count = 0;
1105  int compress_type = BTRFS_COMPRESS_ZLIB;
1106  int extent_thresh = range->extent_thresh;
1107  int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
1108  int cluster = max_cluster;
1109  u64 new_align = ~((u64)128 * 1024 - 1);
1110  struct page **pages = NULL;
1111 
1112  if (extent_thresh == 0)
1113  extent_thresh = 256 * 1024;
1114 
1115  if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
1116  if (range->compress_type > BTRFS_COMPRESS_TYPES)
1117  return -EINVAL;
1118  if (range->compress_type)
1119  compress_type = range->compress_type;
1120  }
1121 
1122  if (isize == 0)
1123  return 0;
1124 
1125  /*
1126  * if we were not given a file, allocate a readahead
1127  * context
1128  */
1129  if (!file) {
1130  ra = kzalloc(sizeof(*ra), GFP_NOFS);
1131  if (!ra)
1132  return -ENOMEM;
1133  file_ra_state_init(ra, inode->i_mapping);
1134  } else {
1135  ra = &file->f_ra;
1136  }
1137 
1138  pages = kmalloc(sizeof(struct page *) * max_cluster,
1139  GFP_NOFS);
1140  if (!pages) {
1141  ret = -ENOMEM;
1142  goto out_ra;
1143  }
1144 
1145  /* find the last page to defrag */
1146  if (range->start + range->len > range->start) {
1147  last_index = min_t(u64, isize - 1,
1148  range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
1149  } else {
1150  last_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1151  }
1152 
1153  if (newer_than) {
1154  ret = find_new_extents(root, inode, newer_than,
1155  &newer_off, 64 * 1024);
1156  if (!ret) {
1157  range->start = newer_off;
1158  /*
1159  * we always align our defrag to help keep
1160  * the extents in the file evenly spaced
1161  */
1162  i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1163  } else
1164  goto out_ra;
1165  } else {
1166  i = range->start >> PAGE_CACHE_SHIFT;
1167  }
1168  if (!max_to_defrag)
1169  max_to_defrag = last_index + 1;
1170 
1171  /*
1172  * make writeback starts from i, so the defrag range can be
1173  * written sequentially.
1174  */
1175  if (i < inode->i_mapping->writeback_index)
1176  inode->i_mapping->writeback_index = i;
1177 
1178  while (i <= last_index && defrag_count < max_to_defrag &&
1179  (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
1180  PAGE_CACHE_SHIFT)) {
1181  /*
1182  * make sure we stop running if someone unmounts
1183  * the FS
1184  */
1185  if (!(inode->i_sb->s_flags & MS_ACTIVE))
1186  break;
1187 
1188  if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1189  extent_thresh, &last_len, &skip,
1190  &defrag_end, range->flags &
1192  unsigned long next;
1193  /*
1194  * the should_defrag function tells us how much to skip
1195  * bump our counter by the suggested amount
1196  */
1197  next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1198  i = max(i + 1, next);
1199  continue;
1200  }
1201 
1202  if (!newer_than) {
1203  cluster = (PAGE_CACHE_ALIGN(defrag_end) >>
1204  PAGE_CACHE_SHIFT) - i;
1205  cluster = min(cluster, max_cluster);
1206  } else {
1207  cluster = max_cluster;
1208  }
1209 
1210  if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
1211  BTRFS_I(inode)->force_compress = compress_type;
1212 
1213  if (i + cluster > ra_index) {
1214  ra_index = max(i, ra_index);
1215  btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
1216  cluster);
1217  ra_index += max_cluster;
1218  }
1219 
1220  mutex_lock(&inode->i_mutex);
1221  ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1222  if (ret < 0) {
1223  mutex_unlock(&inode->i_mutex);
1224  goto out_ra;
1225  }
1226 
1227  defrag_count += ret;
1229  mutex_unlock(&inode->i_mutex);
1230 
1231  if (newer_than) {
1232  if (newer_off == (u64)-1)
1233  break;
1234 
1235  if (ret > 0)
1236  i += ret;
1237 
1238  newer_off = max(newer_off + 1,
1239  (u64)i << PAGE_CACHE_SHIFT);
1240 
1241  ret = find_new_extents(root, inode,
1242  newer_than, &newer_off,
1243  64 * 1024);
1244  if (!ret) {
1245  range->start = newer_off;
1246  i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1247  } else {
1248  break;
1249  }
1250  } else {
1251  if (ret > 0) {
1252  i += ret;
1253  last_len += ret << PAGE_CACHE_SHIFT;
1254  } else {
1255  i++;
1256  last_len = 0;
1257  }
1258  }
1259  }
1260 
1261  if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
1262  filemap_flush(inode->i_mapping);
1263 
1264  if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
1265  /* the filemap_flush will queue IO into the worker threads, but
1266  * we have to make sure the IO is actually started and that
1267  * ordered extents get created before we return
1268  */
1269  atomic_inc(&root->fs_info->async_submit_draining);
1270  while (atomic_read(&root->fs_info->nr_async_submits) ||
1271  atomic_read(&root->fs_info->async_delalloc_pages)) {
1272  wait_event(root->fs_info->async_submit_wait,
1273  (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
1274  atomic_read(&root->fs_info->async_delalloc_pages) == 0));
1275  }
1276  atomic_dec(&root->fs_info->async_submit_draining);
1277 
1278  mutex_lock(&inode->i_mutex);
1279  BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
1280  mutex_unlock(&inode->i_mutex);
1281  }
1282 
1283  if (range->compress_type == BTRFS_COMPRESS_LZO) {
1284  btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
1285  }
1286 
1287  ret = defrag_count;
1288 
1289 out_ra:
1290  if (!file)
1291  kfree(ra);
1292  kfree(pages);
1293  return ret;
1294 }
1295 
1296 static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1297  void __user *arg)
1298 {
1299  u64 new_size;
1300  u64 old_size;
1301  u64 devid = 1;
1302  struct btrfs_ioctl_vol_args *vol_args;
1303  struct btrfs_trans_handle *trans;
1304  struct btrfs_device *device = NULL;
1305  char *sizestr;
1306  char *devstr = NULL;
1307  int ret = 0;
1308  int mod = 0;
1309 
1310  if (root->fs_info->sb->s_flags & MS_RDONLY)
1311  return -EROFS;
1312 
1313  if (!capable(CAP_SYS_ADMIN))
1314  return -EPERM;
1315 
1316  mutex_lock(&root->fs_info->volume_mutex);
1317  if (root->fs_info->balance_ctl) {
1318  printk(KERN_INFO "btrfs: balance in progress\n");
1319  ret = -EINVAL;
1320  goto out;
1321  }
1322 
1323  vol_args = memdup_user(arg, sizeof(*vol_args));
1324  if (IS_ERR(vol_args)) {
1325  ret = PTR_ERR(vol_args);
1326  goto out;
1327  }
1328 
1329  vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1330 
1331  sizestr = vol_args->name;
1332  devstr = strchr(sizestr, ':');
1333  if (devstr) {
1334  char *end;
1335  sizestr = devstr + 1;
1336  *devstr = '\0';
1337  devstr = vol_args->name;
1338  devid = simple_strtoull(devstr, &end, 10);
1339  printk(KERN_INFO "btrfs: resizing devid %llu\n",
1340  (unsigned long long)devid);
1341  }
1342  device = btrfs_find_device(root, devid, NULL, NULL);
1343  if (!device) {
1344  printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1345  (unsigned long long)devid);
1346  ret = -EINVAL;
1347  goto out_free;
1348  }
1349  if (device->fs_devices && device->fs_devices->seeding) {
1350  printk(KERN_INFO "btrfs: resizer unable to apply on "
1351  "seeding device %llu\n",
1352  (unsigned long long)devid);
1353  ret = -EINVAL;
1354  goto out_free;
1355  }
1356 
1357  if (!strcmp(sizestr, "max"))
1358  new_size = device->bdev->bd_inode->i_size;
1359  else {
1360  if (sizestr[0] == '-') {
1361  mod = -1;
1362  sizestr++;
1363  } else if (sizestr[0] == '+') {
1364  mod = 1;
1365  sizestr++;
1366  }
1367  new_size = memparse(sizestr, NULL);
1368  if (new_size == 0) {
1369  ret = -EINVAL;
1370  goto out_free;
1371  }
1372  }
1373 
1374  old_size = device->total_bytes;
1375 
1376  if (mod < 0) {
1377  if (new_size > old_size) {
1378  ret = -EINVAL;
1379  goto out_free;
1380  }
1381  new_size = old_size - new_size;
1382  } else if (mod > 0) {
1383  new_size = old_size + new_size;
1384  }
1385 
1386  if (new_size < 256 * 1024 * 1024) {
1387  ret = -EINVAL;
1388  goto out_free;
1389  }
1390  if (new_size > device->bdev->bd_inode->i_size) {
1391  ret = -EFBIG;
1392  goto out_free;
1393  }
1394 
1395  do_div(new_size, root->sectorsize);
1396  new_size *= root->sectorsize;
1397 
1398  printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1399  rcu_str_deref(device->name),
1400  (unsigned long long)new_size);
1401 
1402  if (new_size > old_size) {
1403  trans = btrfs_start_transaction(root, 0);
1404  if (IS_ERR(trans)) {
1405  ret = PTR_ERR(trans);
1406  goto out_free;
1407  }
1408  ret = btrfs_grow_device(trans, device, new_size);
1409  btrfs_commit_transaction(trans, root);
1410  } else if (new_size < old_size) {
1411  ret = btrfs_shrink_device(device, new_size);
1412  }
1413 
1414 out_free:
1415  kfree(vol_args);
1416 out:
1417  mutex_unlock(&root->fs_info->volume_mutex);
1418  return ret;
1419 }
1420 
1421 static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1422  char *name, unsigned long fd, int subvol,
1423  u64 *transid, bool readonly,
1424  struct btrfs_qgroup_inherit **inherit)
1425 {
1426  int namelen;
1427  int ret = 0;
1428 
1429  ret = mnt_want_write_file(file);
1430  if (ret)
1431  goto out;
1432 
1433  namelen = strlen(name);
1434  if (strchr(name, '/')) {
1435  ret = -EINVAL;
1436  goto out_drop_write;
1437  }
1438 
1439  if (name[0] == '.' &&
1440  (namelen == 1 || (name[1] == '.' && namelen == 2))) {
1441  ret = -EEXIST;
1442  goto out_drop_write;
1443  }
1444 
1445  if (subvol) {
1446  ret = btrfs_mksubvol(&file->f_path, name, namelen,
1447  NULL, transid, readonly, inherit);
1448  } else {
1449  struct fd src = fdget(fd);
1450  struct inode *src_inode;
1451  if (!src.file) {
1452  ret = -EINVAL;
1453  goto out_drop_write;
1454  }
1455 
1456  src_inode = src.file->f_path.dentry->d_inode;
1457  if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
1458  printk(KERN_INFO "btrfs: Snapshot src from "
1459  "another FS\n");
1460  ret = -EINVAL;
1461  } else {
1462  ret = btrfs_mksubvol(&file->f_path, name, namelen,
1463  BTRFS_I(src_inode)->root,
1464  transid, readonly, inherit);
1465  }
1466  fdput(src);
1467  }
1468 out_drop_write:
1469  mnt_drop_write_file(file);
1470 out:
1471  return ret;
1472 }
1473 
1474 static noinline int btrfs_ioctl_snap_create(struct file *file,
1475  void __user *arg, int subvol)
1476 {
1477  struct btrfs_ioctl_vol_args *vol_args;
1478  int ret;
1479 
1480  vol_args = memdup_user(arg, sizeof(*vol_args));
1481  if (IS_ERR(vol_args))
1482  return PTR_ERR(vol_args);
1483  vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1484 
1485  ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1486  vol_args->fd, subvol,
1487  NULL, false, NULL);
1488 
1489  kfree(vol_args);
1490  return ret;
1491 }
1492 
1493 static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1494  void __user *arg, int subvol)
1495 {
1496  struct btrfs_ioctl_vol_args_v2 *vol_args;
1497  int ret;
1498  u64 transid = 0;
1499  u64 *ptr = NULL;
1500  bool readonly = false;
1501  struct btrfs_qgroup_inherit *inherit = NULL;
1502 
1503  vol_args = memdup_user(arg, sizeof(*vol_args));
1504  if (IS_ERR(vol_args))
1505  return PTR_ERR(vol_args);
1506  vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
1507 
1508  if (vol_args->flags &
1511  ret = -EOPNOTSUPP;
1512  goto out;
1513  }
1514 
1515  if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
1516  ptr = &transid;
1517  if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
1518  readonly = true;
1519  if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1520  if (vol_args->size > PAGE_CACHE_SIZE) {
1521  ret = -EINVAL;
1522  goto out;
1523  }
1524  inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
1525  if (IS_ERR(inherit)) {
1526  ret = PTR_ERR(inherit);
1527  goto out;
1528  }
1529  }
1530 
1531  ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1532  vol_args->fd, subvol, ptr,
1533  readonly, &inherit);
1534 
1535  if (ret == 0 && ptr &&
1536  copy_to_user(arg +
1538  transid), ptr, sizeof(*ptr)))
1539  ret = -EFAULT;
1540 out:
1541  kfree(vol_args);
1542  kfree(inherit);
1543  return ret;
1544 }
1545 
1546 static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1547  void __user *arg)
1548 {
1549  struct inode *inode = fdentry(file)->d_inode;
1550  struct btrfs_root *root = BTRFS_I(inode)->root;
1551  int ret = 0;
1552  u64 flags = 0;
1553 
1554  if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1555  return -EINVAL;
1556 
1557  down_read(&root->fs_info->subvol_sem);
1558  if (btrfs_root_readonly(root))
1559  flags |= BTRFS_SUBVOL_RDONLY;
1560  up_read(&root->fs_info->subvol_sem);
1561 
1562  if (copy_to_user(arg, &flags, sizeof(flags)))
1563  ret = -EFAULT;
1564 
1565  return ret;
1566 }
1567 
1568 static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1569  void __user *arg)
1570 {
1571  struct inode *inode = fdentry(file)->d_inode;
1572  struct btrfs_root *root = BTRFS_I(inode)->root;
1573  struct btrfs_trans_handle *trans;
1574  u64 root_flags;
1575  u64 flags;
1576  int ret = 0;
1577 
1578  ret = mnt_want_write_file(file);
1579  if (ret)
1580  goto out;
1581 
1582  if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
1583  ret = -EINVAL;
1584  goto out_drop_write;
1585  }
1586 
1587  if (copy_from_user(&flags, arg, sizeof(flags))) {
1588  ret = -EFAULT;
1589  goto out_drop_write;
1590  }
1591 
1592  if (flags & BTRFS_SUBVOL_CREATE_ASYNC) {
1593  ret = -EINVAL;
1594  goto out_drop_write;
1595  }
1596 
1597  if (flags & ~BTRFS_SUBVOL_RDONLY) {
1598  ret = -EOPNOTSUPP;
1599  goto out_drop_write;
1600  }
1601 
1602  if (!inode_owner_or_capable(inode)) {
1603  ret = -EACCES;
1604  goto out_drop_write;
1605  }
1606 
1607  down_write(&root->fs_info->subvol_sem);
1608 
1609  /* nothing to do */
1610  if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
1611  goto out_drop_sem;
1612 
1613  root_flags = btrfs_root_flags(&root->root_item);
1614  if (flags & BTRFS_SUBVOL_RDONLY)
1615  btrfs_set_root_flags(&root->root_item,
1616  root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
1617  else
1618  btrfs_set_root_flags(&root->root_item,
1619  root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1620 
1621  trans = btrfs_start_transaction(root, 1);
1622  if (IS_ERR(trans)) {
1623  ret = PTR_ERR(trans);
1624  goto out_reset;
1625  }
1626 
1627  ret = btrfs_update_root(trans, root->fs_info->tree_root,
1628  &root->root_key, &root->root_item);
1629 
1630  btrfs_commit_transaction(trans, root);
1631 out_reset:
1632  if (ret)
1633  btrfs_set_root_flags(&root->root_item, root_flags);
1634 out_drop_sem:
1635  up_write(&root->fs_info->subvol_sem);
1636 out_drop_write:
1637  mnt_drop_write_file(file);
1638 out:
1639  return ret;
1640 }
1641 
1642 /*
1643  * helper to check if the subvolume references other subvolumes
1644  */
1645 static noinline int may_destroy_subvol(struct btrfs_root *root)
1646 {
1647  struct btrfs_path *path;
1648  struct btrfs_key key;
1649  int ret;
1650 
1651  path = btrfs_alloc_path();
1652  if (!path)
1653  return -ENOMEM;
1654 
1655  key.objectid = root->root_key.objectid;
1656  key.type = BTRFS_ROOT_REF_KEY;
1657  key.offset = (u64)-1;
1658 
1659  ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
1660  &key, path, 0, 0);
1661  if (ret < 0)
1662  goto out;
1663  BUG_ON(ret == 0);
1664 
1665  ret = 0;
1666  if (path->slots[0] > 0) {
1667  path->slots[0]--;
1668  btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1669  if (key.objectid == root->root_key.objectid &&
1670  key.type == BTRFS_ROOT_REF_KEY)
1671  ret = -ENOTEMPTY;
1672  }
1673 out:
1674  btrfs_free_path(path);
1675  return ret;
1676 }
1677 
1678 static noinline int key_in_sk(struct btrfs_key *key,
1679  struct btrfs_ioctl_search_key *sk)
1680 {
1681  struct btrfs_key test;
1682  int ret;
1683 
1684  test.objectid = sk->min_objectid;
1685  test.type = sk->min_type;
1686  test.offset = sk->min_offset;
1687 
1688  ret = btrfs_comp_cpu_keys(key, &test);
1689  if (ret < 0)
1690  return 0;
1691 
1692  test.objectid = sk->max_objectid;
1693  test.type = sk->max_type;
1694  test.offset = sk->max_offset;
1695 
1696  ret = btrfs_comp_cpu_keys(key, &test);
1697  if (ret > 0)
1698  return 0;
1699  return 1;
1700 }
1701 
1702 static noinline int copy_to_sk(struct btrfs_root *root,
1703  struct btrfs_path *path,
1704  struct btrfs_key *key,
1705  struct btrfs_ioctl_search_key *sk,
1706  char *buf,
1707  unsigned long *sk_offset,
1708  int *num_found)
1709 {
1710  u64 found_transid;
1711  struct extent_buffer *leaf;
1712  struct btrfs_ioctl_search_header sh;
1713  unsigned long item_off;
1714  unsigned long item_len;
1715  int nritems;
1716  int i;
1717  int slot;
1718  int ret = 0;
1719 
1720  leaf = path->nodes[0];
1721  slot = path->slots[0];
1722  nritems = btrfs_header_nritems(leaf);
1723 
1724  if (btrfs_header_generation(leaf) > sk->max_transid) {
1725  i = nritems;
1726  goto advance_key;
1727  }
1728  found_transid = btrfs_header_generation(leaf);
1729 
1730  for (i = slot; i < nritems; i++) {
1731  item_off = btrfs_item_ptr_offset(leaf, i);
1732  item_len = btrfs_item_size_nr(leaf, i);
1733 
1734  if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
1735  item_len = 0;
1736 
1737  if (sizeof(sh) + item_len + *sk_offset >
1739  ret = 1;
1740  goto overflow;
1741  }
1742 
1743  btrfs_item_key_to_cpu(leaf, key, i);
1744  if (!key_in_sk(key, sk))
1745  continue;
1746 
1747  sh.objectid = key->objectid;
1748  sh.offset = key->offset;
1749  sh.type = key->type;
1750  sh.len = item_len;
1751  sh.transid = found_transid;
1752 
1753  /* copy search result header */
1754  memcpy(buf + *sk_offset, &sh, sizeof(sh));
1755  *sk_offset += sizeof(sh);
1756 
1757  if (item_len) {
1758  char *p = buf + *sk_offset;
1759  /* copy the item */
1760  read_extent_buffer(leaf, p,
1761  item_off, item_len);
1762  *sk_offset += item_len;
1763  }
1764  (*num_found)++;
1765 
1766  if (*num_found >= sk->nr_items)
1767  break;
1768  }
1769 advance_key:
1770  ret = 0;
1771  if (key->offset < (u64)-1 && key->offset < sk->max_offset)
1772  key->offset++;
1773  else if (key->type < (u8)-1 && key->type < sk->max_type) {
1774  key->offset = 0;
1775  key->type++;
1776  } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) {
1777  key->offset = 0;
1778  key->type = 0;
1779  key->objectid++;
1780  } else
1781  ret = 1;
1782 overflow:
1783  return ret;
1784 }
1785 
1786 static noinline int search_ioctl(struct inode *inode,
1787  struct btrfs_ioctl_search_args *args)
1788 {
1789  struct btrfs_root *root;
1790  struct btrfs_key key;
1791  struct btrfs_key max_key;
1792  struct btrfs_path *path;
1793  struct btrfs_ioctl_search_key *sk = &args->key;
1794  struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
1795  int ret;
1796  int num_found = 0;
1797  unsigned long sk_offset = 0;
1798 
1799  path = btrfs_alloc_path();
1800  if (!path)
1801  return -ENOMEM;
1802 
1803  if (sk->tree_id == 0) {
1804  /* search the root of the inode that was passed */
1805  root = BTRFS_I(inode)->root;
1806  } else {
1807  key.objectid = sk->tree_id;
1808  key.type = BTRFS_ROOT_ITEM_KEY;
1809  key.offset = (u64)-1;
1810  root = btrfs_read_fs_root_no_name(info, &key);
1811  if (IS_ERR(root)) {
1812  printk(KERN_ERR "could not find root %llu\n",
1813  sk->tree_id);
1814  btrfs_free_path(path);
1815  return -ENOENT;
1816  }
1817  }
1818 
1819  key.objectid = sk->min_objectid;
1820  key.type = sk->min_type;
1821  key.offset = sk->min_offset;
1822 
1823  max_key.objectid = sk->max_objectid;
1824  max_key.type = sk->max_type;
1825  max_key.offset = sk->max_offset;
1826 
1827  path->keep_locks = 1;
1828 
1829  while(1) {
1830  ret = btrfs_search_forward(root, &key, &max_key, path, 0,
1831  sk->min_transid);
1832  if (ret != 0) {
1833  if (ret > 0)
1834  ret = 0;
1835  goto err;
1836  }
1837  ret = copy_to_sk(root, path, &key, sk, args->buf,
1838  &sk_offset, &num_found);
1839  btrfs_release_path(path);
1840  if (ret || num_found >= sk->nr_items)
1841  break;
1842 
1843  }
1844  ret = 0;
1845 err:
1846  sk->nr_items = num_found;
1847  btrfs_free_path(path);
1848  return ret;
1849 }
1850 
1851 static noinline int btrfs_ioctl_tree_search(struct file *file,
1852  void __user *argp)
1853 {
1854  struct btrfs_ioctl_search_args *args;
1855  struct inode *inode;
1856  int ret;
1857 
1858  if (!capable(CAP_SYS_ADMIN))
1859  return -EPERM;
1860 
1861  args = memdup_user(argp, sizeof(*args));
1862  if (IS_ERR(args))
1863  return PTR_ERR(args);
1864 
1865  inode = fdentry(file)->d_inode;
1866  ret = search_ioctl(inode, args);
1867  if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1868  ret = -EFAULT;
1869  kfree(args);
1870  return ret;
1871 }
1872 
1873 /*
1874  * Search INODE_REFs to identify path name of 'dirid' directory
1875  * in a 'tree_id' tree. and sets path name to 'name'.
1876  */
1877 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1878  u64 tree_id, u64 dirid, char *name)
1879 {
1880  struct btrfs_root *root;
1881  struct btrfs_key key;
1882  char *ptr;
1883  int ret = -1;
1884  int slot;
1885  int len;
1886  int total_len = 0;
1887  struct btrfs_inode_ref *iref;
1888  struct extent_buffer *l;
1889  struct btrfs_path *path;
1890 
1891  if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1892  name[0]='\0';
1893  return 0;
1894  }
1895 
1896  path = btrfs_alloc_path();
1897  if (!path)
1898  return -ENOMEM;
1899 
1900  ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
1901 
1902  key.objectid = tree_id;
1903  key.type = BTRFS_ROOT_ITEM_KEY;
1904  key.offset = (u64)-1;
1905  root = btrfs_read_fs_root_no_name(info, &key);
1906  if (IS_ERR(root)) {
1907  printk(KERN_ERR "could not find root %llu\n", tree_id);
1908  ret = -ENOENT;
1909  goto out;
1910  }
1911 
1912  key.objectid = dirid;
1913  key.type = BTRFS_INODE_REF_KEY;
1914  key.offset = (u64)-1;
1915 
1916  while(1) {
1917  ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1918  if (ret < 0)
1919  goto out;
1920 
1921  l = path->nodes[0];
1922  slot = path->slots[0];
1923  if (ret > 0 && slot > 0)
1924  slot--;
1925  btrfs_item_key_to_cpu(l, &key, slot);
1926 
1927  if (ret > 0 && (key.objectid != dirid ||
1928  key.type != BTRFS_INODE_REF_KEY)) {
1929  ret = -ENOENT;
1930  goto out;
1931  }
1932 
1933  iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1934  len = btrfs_inode_ref_name_len(l, iref);
1935  ptr -= len + 1;
1936  total_len += len + 1;
1937  if (ptr < name)
1938  goto out;
1939 
1940  *(ptr + len) = '/';
1941  read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
1942 
1943  if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1944  break;
1945 
1946  btrfs_release_path(path);
1947  key.objectid = key.offset;
1948  key.offset = (u64)-1;
1949  dirid = key.objectid;
1950  }
1951  if (ptr < name)
1952  goto out;
1953  memmove(name, ptr, total_len);
1954  name[total_len]='\0';
1955  ret = 0;
1956 out:
1957  btrfs_free_path(path);
1958  return ret;
1959 }
1960 
1961 static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1962  void __user *argp)
1963 {
1964  struct btrfs_ioctl_ino_lookup_args *args;
1965  struct inode *inode;
1966  int ret;
1967 
1968  if (!capable(CAP_SYS_ADMIN))
1969  return -EPERM;
1970 
1971  args = memdup_user(argp, sizeof(*args));
1972  if (IS_ERR(args))
1973  return PTR_ERR(args);
1974 
1975  inode = fdentry(file)->d_inode;
1976 
1977  if (args->treeid == 0)
1978  args->treeid = BTRFS_I(inode)->root->root_key.objectid;
1979 
1980  ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
1981  args->treeid, args->objectid,
1982  args->name);
1983 
1984  if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1985  ret = -EFAULT;
1986 
1987  kfree(args);
1988  return ret;
1989 }
1990 
1991 static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1992  void __user *arg)
1993 {
1994  struct dentry *parent = fdentry(file);
1995  struct dentry *dentry;
1996  struct inode *dir = parent->d_inode;
1997  struct inode *inode;
1998  struct btrfs_root *root = BTRFS_I(dir)->root;
1999  struct btrfs_root *dest = NULL;
2000  struct btrfs_ioctl_vol_args *vol_args;
2001  struct btrfs_trans_handle *trans;
2002  int namelen;
2003  int ret;
2004  int err = 0;
2005 
2006  vol_args = memdup_user(arg, sizeof(*vol_args));
2007  if (IS_ERR(vol_args))
2008  return PTR_ERR(vol_args);
2009 
2010  vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2011  namelen = strlen(vol_args->name);
2012  if (strchr(vol_args->name, '/') ||
2013  strncmp(vol_args->name, "..", namelen) == 0) {
2014  err = -EINVAL;
2015  goto out;
2016  }
2017 
2018  err = mnt_want_write_file(file);
2019  if (err)
2020  goto out;
2021 
2023  dentry = lookup_one_len(vol_args->name, parent, namelen);
2024  if (IS_ERR(dentry)) {
2025  err = PTR_ERR(dentry);
2026  goto out_unlock_dir;
2027  }
2028 
2029  if (!dentry->d_inode) {
2030  err = -ENOENT;
2031  goto out_dput;
2032  }
2033 
2034  inode = dentry->d_inode;
2035  dest = BTRFS_I(inode)->root;
2036  if (!capable(CAP_SYS_ADMIN)){
2037  /*
2038  * Regular user. Only allow this with a special mount
2039  * option, when the user has write+exec access to the
2040  * subvol root, and when rmdir(2) would have been
2041  * allowed.
2042  *
2043  * Note that this is _not_ check that the subvol is
2044  * empty or doesn't contain data that we wouldn't
2045  * otherwise be able to delete.
2046  *
2047  * Users who want to delete empty subvols should try
2048  * rmdir(2).
2049  */
2050  err = -EPERM;
2051  if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
2052  goto out_dput;
2053 
2054  /*
2055  * Do not allow deletion if the parent dir is the same
2056  * as the dir to be deleted. That means the ioctl
2057  * must be called on the dentry referencing the root
2058  * of the subvol, not a random directory contained
2059  * within it.
2060  */
2061  err = -EINVAL;
2062  if (root == dest)
2063  goto out_dput;
2064 
2065  err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
2066  if (err)
2067  goto out_dput;
2068 
2069  /* check if subvolume may be deleted by a non-root user */
2070  err = btrfs_may_delete(dir, dentry, 1);
2071  if (err)
2072  goto out_dput;
2073  }
2074 
2075  if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
2076  err = -EINVAL;
2077  goto out_dput;
2078  }
2079 
2080  mutex_lock(&inode->i_mutex);
2081  err = d_invalidate(dentry);
2082  if (err)
2083  goto out_unlock;
2084 
2085  down_write(&root->fs_info->subvol_sem);
2086 
2087  err = may_destroy_subvol(dest);
2088  if (err)
2089  goto out_up_write;
2090 
2091  trans = btrfs_start_transaction(root, 0);
2092  if (IS_ERR(trans)) {
2093  err = PTR_ERR(trans);
2094  goto out_up_write;
2095  }
2096  trans->block_rsv = &root->fs_info->global_block_rsv;
2097 
2098  ret = btrfs_unlink_subvol(trans, root, dir,
2099  dest->root_key.objectid,
2100  dentry->d_name.name,
2101  dentry->d_name.len);
2102  if (ret) {
2103  err = ret;
2104  btrfs_abort_transaction(trans, root, ret);
2105  goto out_end_trans;
2106  }
2107 
2108  btrfs_record_root_in_trans(trans, dest);
2109 
2110  memset(&dest->root_item.drop_progress, 0,
2111  sizeof(dest->root_item.drop_progress));
2112  dest->root_item.drop_level = 0;
2113  btrfs_set_root_refs(&dest->root_item, 0);
2114 
2115  if (!xchg(&dest->orphan_item_inserted, 1)) {
2116  ret = btrfs_insert_orphan_item(trans,
2117  root->fs_info->tree_root,
2118  dest->root_key.objectid);
2119  if (ret) {
2120  btrfs_abort_transaction(trans, root, ret);
2121  err = ret;
2122  goto out_end_trans;
2123  }
2124  }
2125 out_end_trans:
2126  ret = btrfs_end_transaction(trans, root);
2127  if (ret && !err)
2128  err = ret;
2129  inode->i_flags |= S_DEAD;
2130 out_up_write:
2131  up_write(&root->fs_info->subvol_sem);
2132 out_unlock:
2133  mutex_unlock(&inode->i_mutex);
2134  if (!err) {
2135  shrink_dcache_sb(root->fs_info->sb);
2137  d_delete(dentry);
2138  }
2139 out_dput:
2140  dput(dentry);
2141 out_unlock_dir:
2142  mutex_unlock(&dir->i_mutex);
2143  mnt_drop_write_file(file);
2144 out:
2145  kfree(vol_args);
2146  return err;
2147 }
2148 
2149 static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2150 {
2151  struct inode *inode = fdentry(file)->d_inode;
2152  struct btrfs_root *root = BTRFS_I(inode)->root;
2154  int ret;
2155 
2156  if (btrfs_root_readonly(root))
2157  return -EROFS;
2158 
2159  ret = mnt_want_write_file(file);
2160  if (ret)
2161  return ret;
2162 
2163  switch (inode->i_mode & S_IFMT) {
2164  case S_IFDIR:
2165  if (!capable(CAP_SYS_ADMIN)) {
2166  ret = -EPERM;
2167  goto out;
2168  }
2169  ret = btrfs_defrag_root(root, 0);
2170  if (ret)
2171  goto out;
2172  ret = btrfs_defrag_root(root->fs_info->extent_root, 0);
2173  break;
2174  case S_IFREG:
2175  if (!(file->f_mode & FMODE_WRITE)) {
2176  ret = -EINVAL;
2177  goto out;
2178  }
2179 
2180  range = kzalloc(sizeof(*range), GFP_KERNEL);
2181  if (!range) {
2182  ret = -ENOMEM;
2183  goto out;
2184  }
2185 
2186  if (argp) {
2187  if (copy_from_user(range, argp,
2188  sizeof(*range))) {
2189  ret = -EFAULT;
2190  kfree(range);
2191  goto out;
2192  }
2193  /* compression requires us to start the IO */
2194  if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
2196  range->extent_thresh = (u32)-1;
2197  }
2198  } else {
2199  /* the rest are all set to zero by kzalloc */
2200  range->len = (u64)-1;
2201  }
2202  ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
2203  range, 0, 0);
2204  if (ret > 0)
2205  ret = 0;
2206  kfree(range);
2207  break;
2208  default:
2209  ret = -EINVAL;
2210  }
2211 out:
2212  mnt_drop_write_file(file);
2213  return ret;
2214 }
2215 
2216 static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2217 {
2218  struct btrfs_ioctl_vol_args *vol_args;
2219  int ret;
2220 
2221  if (!capable(CAP_SYS_ADMIN))
2222  return -EPERM;
2223 
2224  mutex_lock(&root->fs_info->volume_mutex);
2225  if (root->fs_info->balance_ctl) {
2226  printk(KERN_INFO "btrfs: balance in progress\n");
2227  ret = -EINVAL;
2228  goto out;
2229  }
2230 
2231  vol_args = memdup_user(arg, sizeof(*vol_args));
2232  if (IS_ERR(vol_args)) {
2233  ret = PTR_ERR(vol_args);
2234  goto out;
2235  }
2236 
2237  vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2238  ret = btrfs_init_new_device(root, vol_args->name);
2239 
2240  kfree(vol_args);
2241 out:
2242  mutex_unlock(&root->fs_info->volume_mutex);
2243  return ret;
2244 }
2245 
2246 static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2247 {
2248  struct btrfs_ioctl_vol_args *vol_args;
2249  int ret;
2250 
2251  if (!capable(CAP_SYS_ADMIN))
2252  return -EPERM;
2253 
2254  if (root->fs_info->sb->s_flags & MS_RDONLY)
2255  return -EROFS;
2256 
2257  mutex_lock(&root->fs_info->volume_mutex);
2258  if (root->fs_info->balance_ctl) {
2259  printk(KERN_INFO "btrfs: balance in progress\n");
2260  ret = -EINVAL;
2261  goto out;
2262  }
2263 
2264  vol_args = memdup_user(arg, sizeof(*vol_args));
2265  if (IS_ERR(vol_args)) {
2266  ret = PTR_ERR(vol_args);
2267  goto out;
2268  }
2269 
2270  vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2271  ret = btrfs_rm_device(root, vol_args->name);
2272 
2273  kfree(vol_args);
2274 out:
2275  mutex_unlock(&root->fs_info->volume_mutex);
2276  return ret;
2277 }
2278 
2279 static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2280 {
2281  struct btrfs_ioctl_fs_info_args *fi_args;
2282  struct btrfs_device *device;
2283  struct btrfs_device *next;
2284  struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2285  int ret = 0;
2286 
2287  if (!capable(CAP_SYS_ADMIN))
2288  return -EPERM;
2289 
2290  fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
2291  if (!fi_args)
2292  return -ENOMEM;
2293 
2294  fi_args->num_devices = fs_devices->num_devices;
2295  memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2296 
2297  mutex_lock(&fs_devices->device_list_mutex);
2298  list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2299  if (device->devid > fi_args->max_id)
2300  fi_args->max_id = device->devid;
2301  }
2302  mutex_unlock(&fs_devices->device_list_mutex);
2303 
2304  if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2305  ret = -EFAULT;
2306 
2307  kfree(fi_args);
2308  return ret;
2309 }
2310 
2311 static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2312 {
2313  struct btrfs_ioctl_dev_info_args *di_args;
2314  struct btrfs_device *dev;
2315  struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2316  int ret = 0;
2317  char *s_uuid = NULL;
2318  char empty_uuid[BTRFS_UUID_SIZE] = {0};
2319 
2320  if (!capable(CAP_SYS_ADMIN))
2321  return -EPERM;
2322 
2323  di_args = memdup_user(arg, sizeof(*di_args));
2324  if (IS_ERR(di_args))
2325  return PTR_ERR(di_args);
2326 
2327  if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
2328  s_uuid = di_args->uuid;
2329 
2330  mutex_lock(&fs_devices->device_list_mutex);
2331  dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
2332  mutex_unlock(&fs_devices->device_list_mutex);
2333 
2334  if (!dev) {
2335  ret = -ENODEV;
2336  goto out;
2337  }
2338 
2339  di_args->devid = dev->devid;
2340  di_args->bytes_used = dev->bytes_used;
2341  di_args->total_bytes = dev->total_bytes;
2342  memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2343  if (dev->name) {
2344  struct rcu_string *name;
2345 
2346  rcu_read_lock();
2347  name = rcu_dereference(dev->name);
2348  strncpy(di_args->path, name->str, sizeof(di_args->path));
2349  rcu_read_unlock();
2350  di_args->path[sizeof(di_args->path) - 1] = 0;
2351  } else {
2352  di_args->path[0] = '\0';
2353  }
2354 
2355 out:
2356  if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
2357  ret = -EFAULT;
2358 
2359  kfree(di_args);
2360  return ret;
2361 }
2362 
2363 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2364  u64 off, u64 olen, u64 destoff)
2365 {
2366  struct inode *inode = fdentry(file)->d_inode;
2367  struct btrfs_root *root = BTRFS_I(inode)->root;
2368  struct fd src_file;
2369  struct inode *src;
2370  struct btrfs_trans_handle *trans;
2371  struct btrfs_path *path;
2372  struct extent_buffer *leaf;
2373  char *buf;
2374  struct btrfs_key key;
2375  u32 nritems;
2376  int slot;
2377  int ret;
2378  u64 len = olen;
2379  u64 bs = root->fs_info->sb->s_blocksize;
2380 
2381  /*
2382  * TODO:
2383  * - split compressed inline extents. annoying: we need to
2384  * decompress into destination's address_space (the file offset
2385  * may change, so source mapping won't do), then recompress (or
2386  * otherwise reinsert) a subrange.
2387  * - allow ranges within the same file to be cloned (provided
2388  * they don't overlap)?
2389  */
2390 
2391  /* the destination must be opened for writing */
2392  if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
2393  return -EINVAL;
2394 
2395  if (btrfs_root_readonly(root))
2396  return -EROFS;
2397 
2398  ret = mnt_want_write_file(file);
2399  if (ret)
2400  return ret;
2401 
2402  src_file = fdget(srcfd);
2403  if (!src_file.file) {
2404  ret = -EBADF;
2405  goto out_drop_write;
2406  }
2407 
2408  ret = -EXDEV;
2409  if (src_file.file->f_path.mnt != file->f_path.mnt)
2410  goto out_fput;
2411 
2412  src = src_file.file->f_dentry->d_inode;
2413 
2414  ret = -EINVAL;
2415  if (src == inode)
2416  goto out_fput;
2417 
2418  /* the src must be open for reading */
2419  if (!(src_file.file->f_mode & FMODE_READ))
2420  goto out_fput;
2421 
2422  /* don't make the dst file partly checksummed */
2423  if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
2424  (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
2425  goto out_fput;
2426 
2427  ret = -EISDIR;
2428  if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
2429  goto out_fput;
2430 
2431  ret = -EXDEV;
2432  if (src->i_sb != inode->i_sb)
2433  goto out_fput;
2434 
2435  ret = -ENOMEM;
2436  buf = vmalloc(btrfs_level_size(root, 0));
2437  if (!buf)
2438  goto out_fput;
2439 
2440  path = btrfs_alloc_path();
2441  if (!path) {
2442  vfree(buf);
2443  goto out_fput;
2444  }
2445  path->reada = 2;
2446 
2447  if (inode < src) {
2450  } else {
2453  }
2454 
2455  /* determine range to clone */
2456  ret = -EINVAL;
2457  if (off + len > src->i_size || off + len < off)
2458  goto out_unlock;
2459  if (len == 0)
2460  olen = len = src->i_size - off;
2461  /* if we extend to eof, continue to block boundary */
2462  if (off + len == src->i_size)
2463  len = ALIGN(src->i_size, bs) - off;
2464 
2465  /* verify the end result is block aligned */
2466  if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
2467  !IS_ALIGNED(destoff, bs))
2468  goto out_unlock;
2469 
2470  if (destoff > inode->i_size) {
2471  ret = btrfs_cont_expand(inode, inode->i_size, destoff);
2472  if (ret)
2473  goto out_unlock;
2474  }
2475 
2476  /* truncate page cache pages from target inode range */
2477  truncate_inode_pages_range(&inode->i_data, destoff,
2478  PAGE_CACHE_ALIGN(destoff + len) - 1);
2479 
2480  /* do any pending delalloc/csum calc on src, one way or
2481  another, and lock file content */
2482  while (1) {
2483  struct btrfs_ordered_extent *ordered;
2484  lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
2485  ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1);
2486  if (!ordered &&
2487  !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1,
2488  EXTENT_DELALLOC, 0, NULL))
2489  break;
2490  unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
2491  if (ordered)
2492  btrfs_put_ordered_extent(ordered);
2493  btrfs_wait_ordered_range(src, off, len);
2494  }
2495 
2496  /* clone data */
2497  key.objectid = btrfs_ino(src);
2499  key.offset = 0;
2500 
2501  while (1) {
2502  /*
2503  * note the key will change type as we walk through the
2504  * tree.
2505  */
2506  ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
2507  0, 0);
2508  if (ret < 0)
2509  goto out;
2510 
2511  nritems = btrfs_header_nritems(path->nodes[0]);
2512  if (path->slots[0] >= nritems) {
2513  ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
2514  if (ret < 0)
2515  goto out;
2516  if (ret > 0)
2517  break;
2518  nritems = btrfs_header_nritems(path->nodes[0]);
2519  }
2520  leaf = path->nodes[0];
2521  slot = path->slots[0];
2522 
2523  btrfs_item_key_to_cpu(leaf, &key, slot);
2524  if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
2525  key.objectid != btrfs_ino(src))
2526  break;
2527 
2528  if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
2530  int type;
2531  u32 size;
2532  struct btrfs_key new_key;
2533  u64 disko = 0, diskl = 0;
2534  u64 datao = 0, datal = 0;
2535  u8 comp;
2536  u64 endoff;
2537 
2538  size = btrfs_item_size_nr(leaf, slot);
2539  read_extent_buffer(leaf, buf,
2540  btrfs_item_ptr_offset(leaf, slot),
2541  size);
2542 
2543  extent = btrfs_item_ptr(leaf, slot,
2544  struct btrfs_file_extent_item);
2545  comp = btrfs_file_extent_compression(leaf, extent);
2546  type = btrfs_file_extent_type(leaf, extent);
2547  if (type == BTRFS_FILE_EXTENT_REG ||
2548  type == BTRFS_FILE_EXTENT_PREALLOC) {
2549  disko = btrfs_file_extent_disk_bytenr(leaf,
2550  extent);
2551  diskl = btrfs_file_extent_disk_num_bytes(leaf,
2552  extent);
2553  datao = btrfs_file_extent_offset(leaf, extent);
2554  datal = btrfs_file_extent_num_bytes(leaf,
2555  extent);
2556  } else if (type == BTRFS_FILE_EXTENT_INLINE) {
2557  /* take upper bound, may be compressed */
2558  datal = btrfs_file_extent_ram_bytes(leaf,
2559  extent);
2560  }
2561  btrfs_release_path(path);
2562 
2563  if (key.offset + datal <= off ||
2564  key.offset >= off + len - 1)
2565  goto next;
2566 
2567  memcpy(&new_key, &key, sizeof(new_key));
2568  new_key.objectid = btrfs_ino(inode);
2569  if (off <= key.offset)
2570  new_key.offset = key.offset + destoff - off;
2571  else
2572  new_key.offset = destoff;
2573 
2574  /*
2575  * 1 - adjusting old extent (we may have to split it)
2576  * 1 - add new extent
2577  * 1 - inode update
2578  */
2579  trans = btrfs_start_transaction(root, 3);
2580  if (IS_ERR(trans)) {
2581  ret = PTR_ERR(trans);
2582  goto out;
2583  }
2584 
2585  if (type == BTRFS_FILE_EXTENT_REG ||
2586  type == BTRFS_FILE_EXTENT_PREALLOC) {
2587  /*
2588  * a | --- range to clone ---| b
2589  * | ------------- extent ------------- |
2590  */
2591 
2592  /* substract range b */
2593  if (key.offset + datal > off + len)
2594  datal = off + len - key.offset;
2595 
2596  /* substract range a */
2597  if (off > key.offset) {
2598  datao += off - key.offset;
2599  datal -= off - key.offset;
2600  }
2601 
2602  ret = btrfs_drop_extents(trans, root, inode,
2603  new_key.offset,
2604  new_key.offset + datal,
2605  1);
2606  if (ret) {
2607  btrfs_abort_transaction(trans, root,
2608  ret);
2609  btrfs_end_transaction(trans, root);
2610  goto out;
2611  }
2612 
2613  ret = btrfs_insert_empty_item(trans, root, path,
2614  &new_key, size);
2615  if (ret) {
2616  btrfs_abort_transaction(trans, root,
2617  ret);
2618  btrfs_end_transaction(trans, root);
2619  goto out;
2620  }
2621 
2622  leaf = path->nodes[0];
2623  slot = path->slots[0];
2624  write_extent_buffer(leaf, buf,
2625  btrfs_item_ptr_offset(leaf, slot),
2626  size);
2627 
2628  extent = btrfs_item_ptr(leaf, slot,
2629  struct btrfs_file_extent_item);
2630 
2631  /* disko == 0 means it's a hole */
2632  if (!disko)
2633  datao = 0;
2634 
2635  btrfs_set_file_extent_offset(leaf, extent,
2636  datao);
2637  btrfs_set_file_extent_num_bytes(leaf, extent,
2638  datal);
2639  if (disko) {
2640  inode_add_bytes(inode, datal);
2641  ret = btrfs_inc_extent_ref(trans, root,
2642  disko, diskl, 0,
2643  root->root_key.objectid,
2644  btrfs_ino(inode),
2645  new_key.offset - datao,
2646  0);
2647  if (ret) {
2649  root,
2650  ret);
2651  btrfs_end_transaction(trans,
2652  root);
2653  goto out;
2654 
2655  }
2656  }
2657  } else if (type == BTRFS_FILE_EXTENT_INLINE) {
2658  u64 skip = 0;
2659  u64 trim = 0;
2660  if (off > key.offset) {
2661  skip = off - key.offset;
2662  new_key.offset += skip;
2663  }
2664 
2665  if (key.offset + datal > off + len)
2666  trim = key.offset + datal - (off + len);
2667 
2668  if (comp && (skip || trim)) {
2669  ret = -EINVAL;
2670  btrfs_end_transaction(trans, root);
2671  goto out;
2672  }
2673  size -= skip + trim;
2674  datal -= skip + trim;
2675 
2676  ret = btrfs_drop_extents(trans, root, inode,
2677  new_key.offset,
2678  new_key.offset + datal,
2679  1);
2680  if (ret) {
2681  btrfs_abort_transaction(trans, root,
2682  ret);
2683  btrfs_end_transaction(trans, root);
2684  goto out;
2685  }
2686 
2687  ret = btrfs_insert_empty_item(trans, root, path,
2688  &new_key, size);
2689  if (ret) {
2690  btrfs_abort_transaction(trans, root,
2691  ret);
2692  btrfs_end_transaction(trans, root);
2693  goto out;
2694  }
2695 
2696  if (skip) {
2697  u32 start =
2698  btrfs_file_extent_calc_inline_size(0);
2699  memmove(buf+start, buf+start+skip,
2700  datal);
2701  }
2702 
2703  leaf = path->nodes[0];
2704  slot = path->slots[0];
2705  write_extent_buffer(leaf, buf,
2706  btrfs_item_ptr_offset(leaf, slot),
2707  size);
2708  inode_add_bytes(inode, datal);
2709  }
2710 
2712  btrfs_release_path(path);
2713 
2714  inode_inc_iversion(inode);
2715  inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2716 
2717  /*
2718  * we round up to the block size at eof when
2719  * determining which extents to clone above,
2720  * but shouldn't round up the file size
2721  */
2722  endoff = new_key.offset + datal;
2723  if (endoff > destoff+olen)
2724  endoff = destoff+olen;
2725  if (endoff > inode->i_size)
2726  btrfs_i_size_write(inode, endoff);
2727 
2728  ret = btrfs_update_inode(trans, root, inode);
2729  if (ret) {
2730  btrfs_abort_transaction(trans, root, ret);
2731  btrfs_end_transaction(trans, root);
2732  goto out;
2733  }
2734  ret = btrfs_end_transaction(trans, root);
2735  }
2736 next:
2737  btrfs_release_path(path);
2738  key.offset++;
2739  }
2740  ret = 0;
2741 out:
2742  btrfs_release_path(path);
2743  unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
2744 out_unlock:
2745  mutex_unlock(&src->i_mutex);
2746  mutex_unlock(&inode->i_mutex);
2747  vfree(buf);
2748  btrfs_free_path(path);
2749 out_fput:
2750  fdput(src_file);
2751 out_drop_write:
2752  mnt_drop_write_file(file);
2753  return ret;
2754 }
2755 
2756 static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
2757 {
2758  struct btrfs_ioctl_clone_range_args args;
2759 
2760  if (copy_from_user(&args, argp, sizeof(args)))
2761  return -EFAULT;
2762  return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
2763  args.src_length, args.dest_offset);
2764 }
2765 
2766 /*
2767  * there are many ways the trans_start and trans_end ioctls can lead
2768  * to deadlocks. They should only be used by applications that
2769  * basically own the machine, and have a very in depth understanding
2770  * of all the possible deadlocks and enospc problems.
2771  */
2772 static long btrfs_ioctl_trans_start(struct file *file)
2773 {
2774  struct inode *inode = fdentry(file)->d_inode;
2775  struct btrfs_root *root = BTRFS_I(inode)->root;
2776  struct btrfs_trans_handle *trans;
2777  int ret;
2778 
2779  ret = -EPERM;
2780  if (!capable(CAP_SYS_ADMIN))
2781  goto out;
2782 
2783  ret = -EINPROGRESS;
2784  if (file->private_data)
2785  goto out;
2786 
2787  ret = -EROFS;
2788  if (btrfs_root_readonly(root))
2789  goto out;
2790 
2791  ret = mnt_want_write_file(file);
2792  if (ret)
2793  goto out;
2794 
2795  atomic_inc(&root->fs_info->open_ioctl_trans);
2796 
2797  ret = -ENOMEM;
2798  trans = btrfs_start_ioctl_transaction(root);
2799  if (IS_ERR(trans))
2800  goto out_drop;
2801 
2802  file->private_data = trans;
2803  return 0;
2804 
2805 out_drop:
2806  atomic_dec(&root->fs_info->open_ioctl_trans);
2807  mnt_drop_write_file(file);
2808 out:
2809  return ret;
2810 }
2811 
2812 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
2813 {
2814  struct inode *inode = fdentry(file)->d_inode;
2815  struct btrfs_root *root = BTRFS_I(inode)->root;
2816  struct btrfs_root *new_root;
2817  struct btrfs_dir_item *di;
2818  struct btrfs_trans_handle *trans;
2819  struct btrfs_path *path;
2820  struct btrfs_key location;
2821  struct btrfs_disk_key disk_key;
2822  u64 objectid = 0;
2823  u64 dir_id;
2824 
2825  if (!capable(CAP_SYS_ADMIN))
2826  return -EPERM;
2827 
2828  if (copy_from_user(&objectid, argp, sizeof(objectid)))
2829  return -EFAULT;
2830 
2831  if (!objectid)
2832  objectid = root->root_key.objectid;
2833 
2834  location.objectid = objectid;
2836  location.offset = (u64)-1;
2837 
2838  new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
2839  if (IS_ERR(new_root))
2840  return PTR_ERR(new_root);
2841 
2842  if (btrfs_root_refs(&new_root->root_item) == 0)
2843  return -ENOENT;
2844 
2845  path = btrfs_alloc_path();
2846  if (!path)
2847  return -ENOMEM;
2848  path->leave_spinning = 1;
2849 
2850  trans = btrfs_start_transaction(root, 1);
2851  if (IS_ERR(trans)) {
2852  btrfs_free_path(path);
2853  return PTR_ERR(trans);
2854  }
2855 
2856  dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
2857  di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
2858  dir_id, "default", 7, 1);
2859  if (IS_ERR_OR_NULL(di)) {
2860  btrfs_free_path(path);
2861  btrfs_end_transaction(trans, root);
2862  printk(KERN_ERR "Umm, you don't have the default dir item, "
2863  "this isn't going to work\n");
2864  return -ENOENT;
2865  }
2866 
2867  btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
2868  btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
2869  btrfs_mark_buffer_dirty(path->nodes[0]);
2870  btrfs_free_path(path);
2871 
2872  btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
2873  btrfs_end_transaction(trans, root);
2874 
2875  return 0;
2876 }
2877 
2878 void btrfs_get_block_group_info(struct list_head *groups_list,
2879  struct btrfs_ioctl_space_info *space)
2880 {
2882 
2883  space->total_bytes = 0;
2884  space->used_bytes = 0;
2885  space->flags = 0;
2886  list_for_each_entry(block_group, groups_list, list) {
2887  space->flags = block_group->flags;
2888  space->total_bytes += block_group->key.offset;
2889  space->used_bytes +=
2890  btrfs_block_group_used(&block_group->item);
2891  }
2892 }
2893 
2894 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2895 {
2896  struct btrfs_ioctl_space_args space_args;
2897  struct btrfs_ioctl_space_info space;
2898  struct btrfs_ioctl_space_info *dest;
2899  struct btrfs_ioctl_space_info *dest_orig;
2900  struct btrfs_ioctl_space_info __user *user_dest;
2901  struct btrfs_space_info *info;
2902  u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2905  BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
2906  int num_types = 4;
2907  int alloc_size;
2908  int ret = 0;
2909  u64 slot_count = 0;
2910  int i, c;
2911 
2912  if (copy_from_user(&space_args,
2913  (struct btrfs_ioctl_space_args __user *)arg,
2914  sizeof(space_args)))
2915  return -EFAULT;
2916 
2917  for (i = 0; i < num_types; i++) {
2918  struct btrfs_space_info *tmp;
2919 
2920  info = NULL;
2921  rcu_read_lock();
2922  list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2923  list) {
2924  if (tmp->flags == types[i]) {
2925  info = tmp;
2926  break;
2927  }
2928  }
2929  rcu_read_unlock();
2930 
2931  if (!info)
2932  continue;
2933 
2934  down_read(&info->groups_sem);
2935  for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2936  if (!list_empty(&info->block_groups[c]))
2937  slot_count++;
2938  }
2939  up_read(&info->groups_sem);
2940  }
2941 
2942  /* space_slots == 0 means they are asking for a count */
2943  if (space_args.space_slots == 0) {
2944  space_args.total_spaces = slot_count;
2945  goto out;
2946  }
2947 
2948  slot_count = min_t(u64, space_args.space_slots, slot_count);
2949 
2950  alloc_size = sizeof(*dest) * slot_count;
2951 
2952  /* we generally have at most 6 or so space infos, one for each raid
2953  * level. So, a whole page should be more than enough for everyone
2954  */
2955  if (alloc_size > PAGE_CACHE_SIZE)
2956  return -ENOMEM;
2957 
2958  space_args.total_spaces = 0;
2959  dest = kmalloc(alloc_size, GFP_NOFS);
2960  if (!dest)
2961  return -ENOMEM;
2962  dest_orig = dest;
2963 
2964  /* now we have a buffer to copy into */
2965  for (i = 0; i < num_types; i++) {
2966  struct btrfs_space_info *tmp;
2967 
2968  if (!slot_count)
2969  break;
2970 
2971  info = NULL;
2972  rcu_read_lock();
2973  list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2974  list) {
2975  if (tmp->flags == types[i]) {
2976  info = tmp;
2977  break;
2978  }
2979  }
2980  rcu_read_unlock();
2981 
2982  if (!info)
2983  continue;
2984  down_read(&info->groups_sem);
2985  for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2986  if (!list_empty(&info->block_groups[c])) {
2988  &info->block_groups[c], &space);
2989  memcpy(dest, &space, sizeof(space));
2990  dest++;
2991  space_args.total_spaces++;
2992  slot_count--;
2993  }
2994  if (!slot_count)
2995  break;
2996  }
2997  up_read(&info->groups_sem);
2998  }
2999 
3000  user_dest = (struct btrfs_ioctl_space_info __user *)
3001  (arg + sizeof(struct btrfs_ioctl_space_args));
3002 
3003  if (copy_to_user(user_dest, dest_orig, alloc_size))
3004  ret = -EFAULT;
3005 
3006  kfree(dest_orig);
3007 out:
3008  if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
3009  ret = -EFAULT;
3010 
3011  return ret;
3012 }
3013 
3014 /*
3015  * there are many ways the trans_start and trans_end ioctls can lead
3016  * to deadlocks. They should only be used by applications that
3017  * basically own the machine, and have a very in depth understanding
3018  * of all the possible deadlocks and enospc problems.
3019  */
3020 long btrfs_ioctl_trans_end(struct file *file)
3021 {
3022  struct inode *inode = fdentry(file)->d_inode;
3023  struct btrfs_root *root = BTRFS_I(inode)->root;
3024  struct btrfs_trans_handle *trans;
3025 
3026  trans = file->private_data;
3027  if (!trans)
3028  return -EINVAL;
3029  file->private_data = NULL;
3030 
3031  btrfs_end_transaction(trans, root);
3032 
3033  atomic_dec(&root->fs_info->open_ioctl_trans);
3034 
3035  mnt_drop_write_file(file);
3036  return 0;
3037 }
3038 
3039 static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
3040 {
3041  struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
3042  struct btrfs_trans_handle *trans;
3043  u64 transid;
3044  int ret;
3045 
3046  trans = btrfs_start_transaction(root, 0);
3047  if (IS_ERR(trans))
3048  return PTR_ERR(trans);
3049  transid = trans->transid;
3050  ret = btrfs_commit_transaction_async(trans, root, 0);
3051  if (ret) {
3052  btrfs_end_transaction(trans, root);
3053  return ret;
3054  }
3055 
3056  if (argp)
3057  if (copy_to_user(argp, &transid, sizeof(transid)))
3058  return -EFAULT;
3059  return 0;
3060 }
3061 
3062 static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
3063 {
3064  struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
3065  u64 transid;
3066 
3067  if (argp) {
3068  if (copy_from_user(&transid, argp, sizeof(transid)))
3069  return -EFAULT;
3070  } else {
3071  transid = 0; /* current trans */
3072  }
3073  return btrfs_wait_for_commit(root, transid);
3074 }
3075 
3076 static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
3077 {
3078  int ret;
3079  struct btrfs_ioctl_scrub_args *sa;
3080 
3081  if (!capable(CAP_SYS_ADMIN))
3082  return -EPERM;
3083 
3084  sa = memdup_user(arg, sizeof(*sa));
3085  if (IS_ERR(sa))
3086  return PTR_ERR(sa);
3087 
3088  ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
3089  &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
3090 
3091  if (copy_to_user(arg, sa, sizeof(*sa)))
3092  ret = -EFAULT;
3093 
3094  kfree(sa);
3095  return ret;
3096 }
3097 
3098 static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
3099 {
3100  if (!capable(CAP_SYS_ADMIN))
3101  return -EPERM;
3102 
3103  return btrfs_scrub_cancel(root);
3104 }
3105 
3106 static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
3107  void __user *arg)
3108 {
3109  struct btrfs_ioctl_scrub_args *sa;
3110  int ret;
3111 
3112  if (!capable(CAP_SYS_ADMIN))
3113  return -EPERM;
3114 
3115  sa = memdup_user(arg, sizeof(*sa));
3116  if (IS_ERR(sa))
3117  return PTR_ERR(sa);
3118 
3119  ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
3120 
3121  if (copy_to_user(arg, sa, sizeof(*sa)))
3122  ret = -EFAULT;
3123 
3124  kfree(sa);
3125  return ret;
3126 }
3127 
3128 static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
3129  void __user *arg)
3130 {
3131  struct btrfs_ioctl_get_dev_stats *sa;
3132  int ret;
3133 
3134  sa = memdup_user(arg, sizeof(*sa));
3135  if (IS_ERR(sa))
3136  return PTR_ERR(sa);
3137 
3138  if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
3139  kfree(sa);
3140  return -EPERM;
3141  }
3142 
3143  ret = btrfs_get_dev_stats(root, sa);
3144 
3145  if (copy_to_user(arg, sa, sizeof(*sa)))
3146  ret = -EFAULT;
3147 
3148  kfree(sa);
3149  return ret;
3150 }
3151 
3152 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
3153 {
3154  int ret = 0;
3155  int i;
3156  u64 rel_ptr;
3157  int size;
3159  struct inode_fs_paths *ipath = NULL;
3160  struct btrfs_path *path;
3161 
3162  if (!capable(CAP_SYS_ADMIN))
3163  return -EPERM;
3164 
3165  path = btrfs_alloc_path();
3166  if (!path) {
3167  ret = -ENOMEM;
3168  goto out;
3169  }
3170 
3171  ipa = memdup_user(arg, sizeof(*ipa));
3172  if (IS_ERR(ipa)) {
3173  ret = PTR_ERR(ipa);
3174  ipa = NULL;
3175  goto out;
3176  }
3177 
3178  size = min_t(u32, ipa->size, 4096);
3179  ipath = init_ipath(size, root, path);
3180  if (IS_ERR(ipath)) {
3181  ret = PTR_ERR(ipath);
3182  ipath = NULL;
3183  goto out;
3184  }
3185 
3186  ret = paths_from_inode(ipa->inum, ipath);
3187  if (ret < 0)
3188  goto out;
3189 
3190  for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
3191  rel_ptr = ipath->fspath->val[i] -
3192  (u64)(unsigned long)ipath->fspath->val;
3193  ipath->fspath->val[i] = rel_ptr;
3194  }
3195 
3196  ret = copy_to_user((void *)(unsigned long)ipa->fspath,
3197  (void *)(unsigned long)ipath->fspath, size);
3198  if (ret) {
3199  ret = -EFAULT;
3200  goto out;
3201  }
3202 
3203 out:
3204  btrfs_free_path(path);
3205  free_ipath(ipath);
3206  kfree(ipa);
3207 
3208  return ret;
3209 }
3210 
3211 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
3212 {
3213  struct btrfs_data_container *inodes = ctx;
3214  const size_t c = 3 * sizeof(u64);
3215 
3216  if (inodes->bytes_left >= c) {
3217  inodes->bytes_left -= c;
3218  inodes->val[inodes->elem_cnt] = inum;
3219  inodes->val[inodes->elem_cnt + 1] = offset;
3220  inodes->val[inodes->elem_cnt + 2] = root;
3221  inodes->elem_cnt += 3;
3222  } else {
3223  inodes->bytes_missing += c - inodes->bytes_left;
3224  inodes->bytes_left = 0;
3225  inodes->elem_missed += 3;
3226  }
3227 
3228  return 0;
3229 }
3230 
3231 static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
3232  void __user *arg)
3233 {
3234  int ret = 0;
3235  int size;
3236  struct btrfs_ioctl_logical_ino_args *loi;
3237  struct btrfs_data_container *inodes = NULL;
3238  struct btrfs_path *path = NULL;
3239 
3240  if (!capable(CAP_SYS_ADMIN))
3241  return -EPERM;
3242 
3243  loi = memdup_user(arg, sizeof(*loi));
3244  if (IS_ERR(loi)) {
3245  ret = PTR_ERR(loi);
3246  loi = NULL;
3247  goto out;
3248  }
3249 
3250  path = btrfs_alloc_path();
3251  if (!path) {
3252  ret = -ENOMEM;
3253  goto out;
3254  }
3255 
3256  size = min_t(u32, loi->size, 64 * 1024);
3257  inodes = init_data_container(size);
3258  if (IS_ERR(inodes)) {
3259  ret = PTR_ERR(inodes);
3260  inodes = NULL;
3261  goto out;
3262  }
3263 
3264  ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path,
3265  build_ino_list, inodes);
3266  if (ret == -EINVAL)
3267  ret = -ENOENT;
3268  if (ret < 0)
3269  goto out;
3270 
3271  ret = copy_to_user((void *)(unsigned long)loi->inodes,
3272  (void *)(unsigned long)inodes, size);
3273  if (ret)
3274  ret = -EFAULT;
3275 
3276 out:
3277  btrfs_free_path(path);
3278  vfree(inodes);
3279  kfree(loi);
3280 
3281  return ret;
3282 }
3283 
3284 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3285  struct btrfs_ioctl_balance_args *bargs)
3286 {
3287  struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3288 
3289  bargs->flags = bctl->flags;
3290 
3291  if (atomic_read(&fs_info->balance_running))
3293  if (atomic_read(&fs_info->balance_pause_req))
3295  if (atomic_read(&fs_info->balance_cancel_req))
3297 
3298  memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
3299  memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
3300  memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
3301 
3302  if (lock) {
3303  spin_lock(&fs_info->balance_lock);
3304  memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3305  spin_unlock(&fs_info->balance_lock);
3306  } else {
3307  memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3308  }
3309 }
3310 
3311 static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3312 {
3313  struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3314  struct btrfs_fs_info *fs_info = root->fs_info;
3315  struct btrfs_ioctl_balance_args *bargs;
3316  struct btrfs_balance_control *bctl;
3317  int ret;
3318 
3319  if (!capable(CAP_SYS_ADMIN))
3320  return -EPERM;
3321 
3322  ret = mnt_want_write_file(file);
3323  if (ret)
3324  return ret;
3325 
3326  mutex_lock(&fs_info->volume_mutex);
3327  mutex_lock(&fs_info->balance_mutex);
3328 
3329  if (arg) {
3330  bargs = memdup_user(arg, sizeof(*bargs));
3331  if (IS_ERR(bargs)) {
3332  ret = PTR_ERR(bargs);
3333  goto out;
3334  }
3335 
3336  if (bargs->flags & BTRFS_BALANCE_RESUME) {
3337  if (!fs_info->balance_ctl) {
3338  ret = -ENOTCONN;
3339  goto out_bargs;
3340  }
3341 
3342  bctl = fs_info->balance_ctl;
3343  spin_lock(&fs_info->balance_lock);
3344  bctl->flags |= BTRFS_BALANCE_RESUME;
3345  spin_unlock(&fs_info->balance_lock);
3346 
3347  goto do_balance;
3348  }
3349  } else {
3350  bargs = NULL;
3351  }
3352 
3353  if (fs_info->balance_ctl) {
3354  ret = -EINPROGRESS;
3355  goto out_bargs;
3356  }
3357 
3358  bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
3359  if (!bctl) {
3360  ret = -ENOMEM;
3361  goto out_bargs;
3362  }
3363 
3364  bctl->fs_info = fs_info;
3365  if (arg) {
3366  memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
3367  memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
3368  memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
3369 
3370  bctl->flags = bargs->flags;
3371  } else {
3372  /* balance everything - no filters */
3373  bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
3374  }
3375 
3376 do_balance:
3377  ret = btrfs_balance(bctl, bargs);
3378  /*
3379  * bctl is freed in __cancel_balance or in free_fs_info if
3380  * restriper was paused all the way until unmount
3381  */
3382  if (arg) {
3383  if (copy_to_user(arg, bargs, sizeof(*bargs)))
3384  ret = -EFAULT;
3385  }
3386 
3387 out_bargs:
3388  kfree(bargs);
3389 out:
3390  mutex_unlock(&fs_info->balance_mutex);
3391  mutex_unlock(&fs_info->volume_mutex);
3392  mnt_drop_write_file(file);
3393  return ret;
3394 }
3395 
3396 static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
3397 {
3398  if (!capable(CAP_SYS_ADMIN))
3399  return -EPERM;
3400 
3401  switch (cmd) {
3403  return btrfs_pause_balance(root->fs_info);
3405  return btrfs_cancel_balance(root->fs_info);
3406  }
3407 
3408  return -EINVAL;
3409 }
3410 
3411 static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
3412  void __user *arg)
3413 {
3414  struct btrfs_fs_info *fs_info = root->fs_info;
3415  struct btrfs_ioctl_balance_args *bargs;
3416  int ret = 0;
3417 
3418  if (!capable(CAP_SYS_ADMIN))
3419  return -EPERM;
3420 
3421  mutex_lock(&fs_info->balance_mutex);
3422  if (!fs_info->balance_ctl) {
3423  ret = -ENOTCONN;
3424  goto out;
3425  }
3426 
3427  bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
3428  if (!bargs) {
3429  ret = -ENOMEM;
3430  goto out;
3431  }
3432 
3433  update_ioctl_balance_args(fs_info, 1, bargs);
3434 
3435  if (copy_to_user(arg, bargs, sizeof(*bargs)))
3436  ret = -EFAULT;
3437 
3438  kfree(bargs);
3439 out:
3440  mutex_unlock(&fs_info->balance_mutex);
3441  return ret;
3442 }
3443 
3444 static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg)
3445 {
3447  struct btrfs_trans_handle *trans = NULL;
3448  int ret;
3449  int err;
3450 
3451  if (!capable(CAP_SYS_ADMIN))
3452  return -EPERM;
3453 
3454  if (root->fs_info->sb->s_flags & MS_RDONLY)
3455  return -EROFS;
3456 
3457  sa = memdup_user(arg, sizeof(*sa));
3458  if (IS_ERR(sa))
3459  return PTR_ERR(sa);
3460 
3461  if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
3462  trans = btrfs_start_transaction(root, 2);
3463  if (IS_ERR(trans)) {
3464  ret = PTR_ERR(trans);
3465  goto out;
3466  }
3467  }
3468 
3469  switch (sa->cmd) {
3471  ret = btrfs_quota_enable(trans, root->fs_info);
3472  break;
3474  ret = btrfs_quota_disable(trans, root->fs_info);
3475  break;
3477  ret = btrfs_quota_rescan(root->fs_info);
3478  break;
3479  default:
3480  ret = -EINVAL;
3481  break;
3482  }
3483 
3484  if (copy_to_user(arg, sa, sizeof(*sa)))
3485  ret = -EFAULT;
3486 
3487  if (trans) {
3488  err = btrfs_commit_transaction(trans, root);
3489  if (err && !ret)
3490  ret = err;
3491  }
3492 
3493 out:
3494  kfree(sa);
3495  return ret;
3496 }
3497 
3498 static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg)
3499 {
3501  struct btrfs_trans_handle *trans;
3502  int ret;
3503  int err;
3504 
3505  if (!capable(CAP_SYS_ADMIN))
3506  return -EPERM;
3507 
3508  if (root->fs_info->sb->s_flags & MS_RDONLY)
3509  return -EROFS;
3510 
3511  sa = memdup_user(arg, sizeof(*sa));
3512  if (IS_ERR(sa))
3513  return PTR_ERR(sa);
3514 
3515  trans = btrfs_join_transaction(root);
3516  if (IS_ERR(trans)) {
3517  ret = PTR_ERR(trans);
3518  goto out;
3519  }
3520 
3521  /* FIXME: check if the IDs really exist */
3522  if (sa->assign) {
3523  ret = btrfs_add_qgroup_relation(trans, root->fs_info,
3524  sa->src, sa->dst);
3525  } else {
3526  ret = btrfs_del_qgroup_relation(trans, root->fs_info,
3527  sa->src, sa->dst);
3528  }
3529 
3530  err = btrfs_end_transaction(trans, root);
3531  if (err && !ret)
3532  ret = err;
3533 
3534 out:
3535  kfree(sa);
3536  return ret;
3537 }
3538 
3539 static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg)
3540 {
3542  struct btrfs_trans_handle *trans;
3543  int ret;
3544  int err;
3545 
3546  if (!capable(CAP_SYS_ADMIN))
3547  return -EPERM;
3548 
3549  if (root->fs_info->sb->s_flags & MS_RDONLY)
3550  return -EROFS;
3551 
3552  sa = memdup_user(arg, sizeof(*sa));
3553  if (IS_ERR(sa))
3554  return PTR_ERR(sa);
3555 
3556  trans = btrfs_join_transaction(root);
3557  if (IS_ERR(trans)) {
3558  ret = PTR_ERR(trans);
3559  goto out;
3560  }
3561 
3562  /* FIXME: check if the IDs really exist */
3563  if (sa->create) {
3564  ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
3565  NULL);
3566  } else {
3567  ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
3568  }
3569 
3570  err = btrfs_end_transaction(trans, root);
3571  if (err && !ret)
3572  ret = err;
3573 
3574 out:
3575  kfree(sa);
3576  return ret;
3577 }
3578 
3579 static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg)
3580 {
3582  struct btrfs_trans_handle *trans;
3583  int ret;
3584  int err;
3585  u64 qgroupid;
3586 
3587  if (!capable(CAP_SYS_ADMIN))
3588  return -EPERM;
3589 
3590  if (root->fs_info->sb->s_flags & MS_RDONLY)
3591  return -EROFS;
3592 
3593  sa = memdup_user(arg, sizeof(*sa));
3594  if (IS_ERR(sa))
3595  return PTR_ERR(sa);
3596 
3597  trans = btrfs_join_transaction(root);
3598  if (IS_ERR(trans)) {
3599  ret = PTR_ERR(trans);
3600  goto out;
3601  }
3602 
3603  qgroupid = sa->qgroupid;
3604  if (!qgroupid) {
3605  /* take the current subvol as qgroup */
3606  qgroupid = root->root_key.objectid;
3607  }
3608 
3609  /* FIXME: check if the IDs really exist */
3610  ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim);
3611 
3612  err = btrfs_end_transaction(trans, root);
3613  if (err && !ret)
3614  ret = err;
3615 
3616 out:
3617  kfree(sa);
3618  return ret;
3619 }
3620 
3621 static long btrfs_ioctl_set_received_subvol(struct file *file,
3622  void __user *arg)
3623 {
3625  struct inode *inode = fdentry(file)->d_inode;
3626  struct btrfs_root *root = BTRFS_I(inode)->root;
3627  struct btrfs_root_item *root_item = &root->root_item;
3628  struct btrfs_trans_handle *trans;
3629  struct timespec ct = CURRENT_TIME;
3630  int ret = 0;
3631 
3632  ret = mnt_want_write_file(file);
3633  if (ret < 0)
3634  return ret;
3635 
3636  down_write(&root->fs_info->subvol_sem);
3637 
3638  if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
3639  ret = -EINVAL;
3640  goto out;
3641  }
3642 
3643  if (btrfs_root_readonly(root)) {
3644  ret = -EROFS;
3645  goto out;
3646  }
3647 
3648  if (!inode_owner_or_capable(inode)) {
3649  ret = -EACCES;
3650  goto out;
3651  }
3652 
3653  sa = memdup_user(arg, sizeof(*sa));
3654  if (IS_ERR(sa)) {
3655  ret = PTR_ERR(sa);
3656  sa = NULL;
3657  goto out;
3658  }
3659 
3660  trans = btrfs_start_transaction(root, 1);
3661  if (IS_ERR(trans)) {
3662  ret = PTR_ERR(trans);
3663  trans = NULL;
3664  goto out;
3665  }
3666 
3667  sa->rtransid = trans->transid;
3668  sa->rtime.sec = ct.tv_sec;
3669  sa->rtime.nsec = ct.tv_nsec;
3670 
3671  memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
3672  btrfs_set_root_stransid(root_item, sa->stransid);
3673  btrfs_set_root_rtransid(root_item, sa->rtransid);
3674  root_item->stime.sec = cpu_to_le64(sa->stime.sec);
3675  root_item->stime.nsec = cpu_to_le32(sa->stime.nsec);
3676  root_item->rtime.sec = cpu_to_le64(sa->rtime.sec);
3677  root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec);
3678 
3679  ret = btrfs_update_root(trans, root->fs_info->tree_root,
3680  &root->root_key, &root->root_item);
3681  if (ret < 0) {
3682  btrfs_end_transaction(trans, root);
3683  trans = NULL;
3684  goto out;
3685  } else {
3686  ret = btrfs_commit_transaction(trans, root);
3687  if (ret < 0)
3688  goto out;
3689  }
3690 
3691  ret = copy_to_user(arg, sa, sizeof(*sa));
3692  if (ret)
3693  ret = -EFAULT;
3694 
3695 out:
3696  kfree(sa);
3697  up_write(&root->fs_info->subvol_sem);
3698  mnt_drop_write_file(file);
3699  return ret;
3700 }
3701 
3702 long btrfs_ioctl(struct file *file, unsigned int
3703  cmd, unsigned long arg)
3704 {
3705  struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3706  void __user *argp = (void __user *)arg;
3707 
3708  switch (cmd) {
3709  case FS_IOC_GETFLAGS:
3710  return btrfs_ioctl_getflags(file, argp);
3711  case FS_IOC_SETFLAGS:
3712  return btrfs_ioctl_setflags(file, argp);
3713  case FS_IOC_GETVERSION:
3714  return btrfs_ioctl_getversion(file, argp);
3715  case FITRIM:
3716  return btrfs_ioctl_fitrim(file, argp);
3717  case BTRFS_IOC_SNAP_CREATE:
3718  return btrfs_ioctl_snap_create(file, argp, 0);
3720  return btrfs_ioctl_snap_create_v2(file, argp, 0);
3722  return btrfs_ioctl_snap_create(file, argp, 1);
3724  return btrfs_ioctl_snap_create_v2(file, argp, 1);
3726  return btrfs_ioctl_snap_destroy(file, argp);
3728  return btrfs_ioctl_subvol_getflags(file, argp);
3730  return btrfs_ioctl_subvol_setflags(file, argp);
3732  return btrfs_ioctl_default_subvol(file, argp);
3733  case BTRFS_IOC_DEFRAG:
3734  return btrfs_ioctl_defrag(file, NULL);
3736  return btrfs_ioctl_defrag(file, argp);
3737  case BTRFS_IOC_RESIZE:
3738  return btrfs_ioctl_resize(root, argp);
3739  case BTRFS_IOC_ADD_DEV:
3740  return btrfs_ioctl_add_dev(root, argp);
3741  case BTRFS_IOC_RM_DEV:
3742  return btrfs_ioctl_rm_dev(root, argp);
3743  case BTRFS_IOC_FS_INFO:
3744  return btrfs_ioctl_fs_info(root, argp);
3745  case BTRFS_IOC_DEV_INFO:
3746  return btrfs_ioctl_dev_info(root, argp);
3747  case BTRFS_IOC_BALANCE:
3748  return btrfs_ioctl_balance(file, NULL);
3749  case BTRFS_IOC_CLONE:
3750  return btrfs_ioctl_clone(file, arg, 0, 0, 0);
3751  case BTRFS_IOC_CLONE_RANGE:
3752  return btrfs_ioctl_clone_range(file, argp);
3753  case BTRFS_IOC_TRANS_START:
3754  return btrfs_ioctl_trans_start(file);
3755  case BTRFS_IOC_TRANS_END:
3756  return btrfs_ioctl_trans_end(file);
3757  case BTRFS_IOC_TREE_SEARCH:
3758  return btrfs_ioctl_tree_search(file, argp);
3759  case BTRFS_IOC_INO_LOOKUP:
3760  return btrfs_ioctl_ino_lookup(file, argp);
3761  case BTRFS_IOC_INO_PATHS:
3762  return btrfs_ioctl_ino_to_path(root, argp);
3763  case BTRFS_IOC_LOGICAL_INO:
3764  return btrfs_ioctl_logical_to_ino(root, argp);
3765  case BTRFS_IOC_SPACE_INFO:
3766  return btrfs_ioctl_space_info(root, argp);
3767  case BTRFS_IOC_SYNC:
3768  btrfs_sync_fs(file->f_dentry->d_sb, 1);
3769  return 0;
3770  case BTRFS_IOC_START_SYNC:
3771  return btrfs_ioctl_start_sync(file, argp);
3772  case BTRFS_IOC_WAIT_SYNC:
3773  return btrfs_ioctl_wait_sync(file, argp);
3774  case BTRFS_IOC_SCRUB:
3775  return btrfs_ioctl_scrub(root, argp);
3777  return btrfs_ioctl_scrub_cancel(root, argp);
3779  return btrfs_ioctl_scrub_progress(root, argp);
3780  case BTRFS_IOC_BALANCE_V2:
3781  return btrfs_ioctl_balance(file, argp);
3782  case BTRFS_IOC_BALANCE_CTL:
3783  return btrfs_ioctl_balance_ctl(root, arg);
3785  return btrfs_ioctl_balance_progress(root, argp);
3787  return btrfs_ioctl_set_received_subvol(file, argp);
3788  case BTRFS_IOC_SEND:
3789  return btrfs_ioctl_send(file, argp);
3791  return btrfs_ioctl_get_dev_stats(root, argp);
3792  case BTRFS_IOC_QUOTA_CTL:
3793  return btrfs_ioctl_quota_ctl(root, argp);
3795  return btrfs_ioctl_qgroup_assign(root, argp);
3797  return btrfs_ioctl_qgroup_create(root, argp);
3799  return btrfs_ioctl_qgroup_limit(root, argp);
3800  }
3801 
3802  return -ENOTTY;
3803 }