Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
blk-cgroup.c
Go to the documentation of this file.
1 /*
2  * Common Block IO controller cgroup interface
3  *
4  * Based on ideas and code from CFQ, CFS and BFQ:
5  * Copyright (C) 2003 Jens Axboe <[email protected]>
6  *
7  * Copyright (C) 2008 Fabio Checconi <[email protected]>
8  * Paolo Valente <[email protected]>
9  *
10  * Copyright (C) 2009 Vivek Goyal <[email protected]>
11  * Nauman Rafique <[email protected]>
12  */
13 #include <linux/ioprio.h>
14 #include <linux/kdev_t.h>
15 #include <linux/module.h>
16 #include <linux/err.h>
17 #include <linux/blkdev.h>
18 #include <linux/slab.h>
19 #include <linux/genhd.h>
20 #include <linux/delay.h>
21 #include <linux/atomic.h>
22 #include "blk-cgroup.h"
23 #include "blk.h"
24 
25 #define MAX_KEY_LEN 100
26 
27 static DEFINE_MUTEX(blkcg_pol_mutex);
28 
29 struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
31 
32 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
33 
34 static bool blkcg_policy_enabled(struct request_queue *q,
35  const struct blkcg_policy *pol)
36 {
37  return pol && test_bit(pol->plid, q->blkcg_pols);
38 }
39 
46 static void blkg_free(struct blkcg_gq *blkg)
47 {
48  int i;
49 
50  if (!blkg)
51  return;
52 
53  for (i = 0; i < BLKCG_MAX_POLS; i++) {
54  struct blkcg_policy *pol = blkcg_policy[i];
55  struct blkg_policy_data *pd = blkg->pd[i];
56 
57  if (!pd)
58  continue;
59 
60  if (pol && pol->pd_exit_fn)
61  pol->pd_exit_fn(blkg);
62 
63  kfree(pd);
64  }
65 
66  blk_exit_rl(&blkg->rl);
67  kfree(blkg);
68 }
69 
78 static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
80 {
81  struct blkcg_gq *blkg;
82  int i;
83 
84  /* alloc and init base part */
85  blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
86  if (!blkg)
87  return NULL;
88 
89  blkg->q = q;
90  INIT_LIST_HEAD(&blkg->q_node);
91  blkg->blkcg = blkcg;
92  blkg->refcnt = 1;
93 
94  /* root blkg uses @q->root_rl, init rl only for !root blkgs */
95  if (blkcg != &blkcg_root) {
96  if (blk_init_rl(&blkg->rl, q, gfp_mask))
97  goto err_free;
98  blkg->rl.blkg = blkg;
99  }
100 
101  for (i = 0; i < BLKCG_MAX_POLS; i++) {
102  struct blkcg_policy *pol = blkcg_policy[i];
103  struct blkg_policy_data *pd;
104 
105  if (!blkcg_policy_enabled(q, pol))
106  continue;
107 
108  /* alloc per-policy data and attach it to blkg */
109  pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
110  if (!pd)
111  goto err_free;
112 
113  blkg->pd[i] = pd;
114  pd->blkg = blkg;
115 
116  /* invoke per-policy init */
117  if (blkcg_policy_enabled(blkg->q, pol))
118  pol->pd_init_fn(blkg);
119  }
120 
121  return blkg;
122 
123 err_free:
124  blkg_free(blkg);
125  return NULL;
126 }
127 
128 static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
129  struct request_queue *q)
130 {
131  struct blkcg_gq *blkg;
132 
133  blkg = rcu_dereference(blkcg->blkg_hint);
134  if (blkg && blkg->q == q)
135  return blkg;
136 
137  /*
138  * Hint didn't match. Look up from the radix tree. Note that we
139  * may not be holding queue_lock and thus are not sure whether
140  * @blkg from blkg_tree has already been removed or not, so we
141  * can't update hint to the lookup result. Leave it to the caller.
142  */
143  blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
144  if (blkg && blkg->q == q)
145  return blkg;
146 
147  return NULL;
148 }
149 
159 struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
160 {
161  WARN_ON_ONCE(!rcu_read_lock_held());
162 
163  if (unlikely(blk_queue_bypass(q)))
164  return NULL;
165  return __blkg_lookup(blkcg, q);
166 }
168 
169 /*
170  * If @new_blkg is %NULL, this function tries to allocate a new one as
171  * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return.
172  */
173 static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
174  struct request_queue *q,
175  struct blkcg_gq *new_blkg)
176 {
177  struct blkcg_gq *blkg;
178  int ret;
179 
180  WARN_ON_ONCE(!rcu_read_lock_held());
181  lockdep_assert_held(q->queue_lock);
182 
183  /* lookup and update hint on success, see __blkg_lookup() for details */
184  blkg = __blkg_lookup(blkcg, q);
185  if (blkg) {
186  rcu_assign_pointer(blkcg->blkg_hint, blkg);
187  goto out_free;
188  }
189 
190  /* blkg holds a reference to blkcg */
191  if (!css_tryget(&blkcg->css)) {
192  blkg = ERR_PTR(-EINVAL);
193  goto out_free;
194  }
195 
196  /* allocate */
197  if (!new_blkg) {
198  new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
199  if (unlikely(!new_blkg)) {
200  blkg = ERR_PTR(-ENOMEM);
201  goto out_put;
202  }
203  }
204  blkg = new_blkg;
205 
206  /* insert */
207  spin_lock(&blkcg->lock);
208  ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
209  if (likely(!ret)) {
210  hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
211  list_add(&blkg->q_node, &q->blkg_list);
212  }
213  spin_unlock(&blkcg->lock);
214 
215  if (!ret)
216  return blkg;
217 
218  blkg = ERR_PTR(ret);
219 out_put:
220  css_put(&blkcg->css);
221 out_free:
222  blkg_free(new_blkg);
223  return blkg;
224 }
225 
226 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
227  struct request_queue *q)
228 {
229  /*
230  * This could be the first entry point of blkcg implementation and
231  * we shouldn't allow anything to go through for a bypassing queue.
232  */
233  if (unlikely(blk_queue_bypass(q)))
234  return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
235  return __blkg_lookup_create(blkcg, q, NULL);
236 }
238 
239 static void blkg_destroy(struct blkcg_gq *blkg)
240 {
241  struct blkcg *blkcg = blkg->blkcg;
242 
243  lockdep_assert_held(blkg->q->queue_lock);
244  lockdep_assert_held(&blkcg->lock);
245 
246  /* Something wrong if we are trying to remove same group twice */
247  WARN_ON_ONCE(list_empty(&blkg->q_node));
248  WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
249 
250  radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
251  list_del_init(&blkg->q_node);
252  hlist_del_init_rcu(&blkg->blkcg_node);
253 
254  /*
255  * Both setting lookup hint to and clearing it from @blkg are done
256  * under queue_lock. If it's not pointing to @blkg now, it never
257  * will. Hint assignment itself can race safely.
258  */
259  if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
260  rcu_assign_pointer(blkcg->blkg_hint, NULL);
261 
262  /*
263  * Put the reference taken at the time of creation so that when all
264  * queues are gone, group can be destroyed.
265  */
266  blkg_put(blkg);
267 }
268 
275 static void blkg_destroy_all(struct request_queue *q)
276 {
277  struct blkcg_gq *blkg, *n;
278 
279  lockdep_assert_held(q->queue_lock);
280 
281  list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
282  struct blkcg *blkcg = blkg->blkcg;
283 
284  spin_lock(&blkcg->lock);
285  blkg_destroy(blkg);
286  spin_unlock(&blkcg->lock);
287  }
288 
289  /*
290  * root blkg is destroyed. Just clear the pointer since
291  * root_rl does not take reference on root blkg.
292  */
293  q->root_blkg = NULL;
294  q->root_rl.blkg = NULL;
295 }
296 
297 static void blkg_rcu_free(struct rcu_head *rcu_head)
298 {
299  blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));
300 }
301 
302 void __blkg_release(struct blkcg_gq *blkg)
303 {
304  /* release the extra blkcg reference this blkg has been holding */
305  css_put(&blkg->blkcg->css);
306 
307  /*
308  * A group is freed in rcu manner. But having an rcu lock does not
309  * mean that one can access all the fields of blkg and assume these
310  * are valid. For example, don't try to follow throtl_data and
311  * request queue links.
312  *
313  * Having a reference to blkg under an rcu allows acess to only
314  * values local to groups like group stats and group rate limits
315  */
316  call_rcu(&blkg->rcu_head, blkg_rcu_free);
317 }
319 
320 /*
321  * The next function used by blk_queue_for_each_rl(). It's a bit tricky
322  * because the root blkg uses @q->root_rl instead of its own rl.
323  */
324 struct request_list *__blk_queue_next_rl(struct request_list *rl,
325  struct request_queue *q)
326 {
327  struct list_head *ent;
328  struct blkcg_gq *blkg;
329 
330  /*
331  * Determine the current blkg list_head. The first entry is
332  * root_rl which is off @q->blkg_list and mapped to the head.
333  */
334  if (rl == &q->root_rl) {
335  ent = &q->blkg_list;
336  /* There are no more block groups, hence no request lists */
337  if (list_empty(ent))
338  return NULL;
339  } else {
340  blkg = container_of(rl, struct blkcg_gq, rl);
341  ent = &blkg->q_node;
342  }
343 
344  /* walk to the next list_head, skip root blkcg */
345  ent = ent->next;
346  if (ent == &q->root_blkg->q_node)
347  ent = ent->next;
348  if (ent == &q->blkg_list)
349  return NULL;
350 
351  blkg = container_of(ent, struct blkcg_gq, q_node);
352  return &blkg->rl;
353 }
354 
355 static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
356  u64 val)
357 {
358  struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
359  struct blkcg_gq *blkg;
360  struct hlist_node *n;
361  int i;
362 
363  mutex_lock(&blkcg_pol_mutex);
364  spin_lock_irq(&blkcg->lock);
365 
366  /*
367  * Note that stat reset is racy - it doesn't synchronize against
368  * stat updates. This is a debug feature which shouldn't exist
369  * anyway. If you get hit by a race, retry.
370  */
371  hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
372  for (i = 0; i < BLKCG_MAX_POLS; i++) {
373  struct blkcg_policy *pol = blkcg_policy[i];
374 
375  if (blkcg_policy_enabled(blkg->q, pol) &&
376  pol->pd_reset_stats_fn)
377  pol->pd_reset_stats_fn(blkg);
378  }
379  }
380 
381  spin_unlock_irq(&blkcg->lock);
382  mutex_unlock(&blkcg_pol_mutex);
383  return 0;
384 }
385 
386 static const char *blkg_dev_name(struct blkcg_gq *blkg)
387 {
388  /* some drivers (floppy) instantiate a queue w/o disk registered */
389  if (blkg->q->backing_dev_info.dev)
390  return dev_name(blkg->q->backing_dev_info.dev);
391  return NULL;
392 }
393 
411 void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
412  u64 (*prfill)(struct seq_file *,
413  struct blkg_policy_data *, int),
414  const struct blkcg_policy *pol, int data,
415  bool show_total)
416 {
417  struct blkcg_gq *blkg;
418  struct hlist_node *n;
419  u64 total = 0;
420 
421  spin_lock_irq(&blkcg->lock);
422  hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
423  if (blkcg_policy_enabled(blkg->q, pol))
424  total += prfill(sf, blkg->pd[pol->plid], data);
425  spin_unlock_irq(&blkcg->lock);
426 
427  if (show_total)
428  seq_printf(sf, "Total %llu\n", (unsigned long long)total);
429 }
431 
441 {
442  const char *dname = blkg_dev_name(pd->blkg);
443 
444  if (!dname)
445  return 0;
446 
447  seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
448  return v;
449 }
451 
461  const struct blkg_rwstat *rwstat)
462 {
463  static const char *rwstr[] = {
464  [BLKG_RWSTAT_READ] = "Read",
465  [BLKG_RWSTAT_WRITE] = "Write",
466  [BLKG_RWSTAT_SYNC] = "Sync",
467  [BLKG_RWSTAT_ASYNC] = "Async",
468  };
469  const char *dname = blkg_dev_name(pd->blkg);
470  u64 v;
471  int i;
472 
473  if (!dname)
474  return 0;
475 
476  for (i = 0; i < BLKG_RWSTAT_NR; i++)
477  seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
478  (unsigned long long)rwstat->cnt[i]);
479 
480  v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
481  seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
482  return v;
483 }
484 
493 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off)
494 {
495  return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off));
496 }
498 
508  int off)
509 {
510  struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off);
511 
512  return __blkg_prfill_rwstat(sf, pd, &rwstat);
513 }
515 
528 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
529  const char *input, struct blkg_conf_ctx *ctx)
530  __acquires(rcu) __acquires(disk->queue->queue_lock)
531 {
532  struct gendisk *disk;
533  struct blkcg_gq *blkg;
534  unsigned int major, minor;
535  unsigned long long v;
536  int part, ret;
537 
538  if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
539  return -EINVAL;
540 
541  disk = get_gendisk(MKDEV(major, minor), &part);
542  if (!disk || part)
543  return -EINVAL;
544 
545  rcu_read_lock();
546  spin_lock_irq(disk->queue->queue_lock);
547 
548  if (blkcg_policy_enabled(disk->queue, pol))
549  blkg = blkg_lookup_create(blkcg, disk->queue);
550  else
551  blkg = ERR_PTR(-EINVAL);
552 
553  if (IS_ERR(blkg)) {
554  ret = PTR_ERR(blkg);
555  rcu_read_unlock();
556  spin_unlock_irq(disk->queue->queue_lock);
557  put_disk(disk);
558  /*
559  * If queue was bypassing, we should retry. Do so after a
560  * short msleep(). It isn't strictly necessary but queue
561  * can be bypassing for some time and it's always nice to
562  * avoid busy looping.
563  */
564  if (ret == -EBUSY) {
565  msleep(10);
566  ret = restart_syscall();
567  }
568  return ret;
569  }
570 
571  ctx->disk = disk;
572  ctx->blkg = blkg;
573  ctx->v = v;
574  return 0;
575 }
577 
585 void blkg_conf_finish(struct blkg_conf_ctx *ctx)
586  __releases(ctx->disk->queue->queue_lock) __releases(rcu)
587 {
588  spin_unlock_irq(ctx->disk->queue->queue_lock);
589  rcu_read_unlock();
590  put_disk(ctx->disk);
591 }
593 
594 struct cftype blkcg_files[] = {
595  {
596  .name = "reset_stats",
597  .write_u64 = blkcg_reset_stats,
598  },
599  { } /* terminate */
600 };
601 
613 static int blkcg_pre_destroy(struct cgroup *cgroup)
614 {
615  struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
616 
617  spin_lock_irq(&blkcg->lock);
618 
619  while (!hlist_empty(&blkcg->blkg_list)) {
620  struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
621  struct blkcg_gq, blkcg_node);
622  struct request_queue *q = blkg->q;
623 
624  if (spin_trylock(q->queue_lock)) {
625  blkg_destroy(blkg);
626  spin_unlock(q->queue_lock);
627  } else {
628  spin_unlock_irq(&blkcg->lock);
629  cpu_relax();
630  spin_lock_irq(&blkcg->lock);
631  }
632  }
633 
634  spin_unlock_irq(&blkcg->lock);
635  return 0;
636 }
637 
638 static void blkcg_destroy(struct cgroup *cgroup)
639 {
640  struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
641 
642  if (blkcg != &blkcg_root)
643  kfree(blkcg);
644 }
645 
646 static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup)
647 {
648  static atomic64_t id_seq = ATOMIC64_INIT(0);
649  struct blkcg *blkcg;
650  struct cgroup *parent = cgroup->parent;
651 
652  if (!parent) {
653  blkcg = &blkcg_root;
654  goto done;
655  }
656 
657  blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
658  if (!blkcg)
659  return ERR_PTR(-ENOMEM);
660 
661  blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
662  blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
663 done:
664  spin_lock_init(&blkcg->lock);
665  INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
666  INIT_HLIST_HEAD(&blkcg->blkg_list);
667 
668  return &blkcg->css;
669 }
670 
682 {
683  might_sleep();
684 
685  return blk_throtl_init(q);
686 }
687 
695 {
696  lockdep_assert_held(q->queue_lock);
697 
698  blk_throtl_drain(q);
699 }
700 
708 {
709  spin_lock_irq(q->queue_lock);
710  blkg_destroy_all(q);
711  spin_unlock_irq(q->queue_lock);
712 
713  blk_throtl_exit(q);
714 }
715 
716 /*
717  * We cannot support shared io contexts, as we have no mean to support
718  * two tasks with the same ioc in two different groups without major rework
719  * of the main cic data structures. For now we allow a task to change
720  * its cgroup only if it's the only owner of its ioc.
721  */
722 static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
723 {
724  struct task_struct *task;
725  struct io_context *ioc;
726  int ret = 0;
727 
728  /* task_lock() is needed to avoid races with exit_io_context() */
729  cgroup_taskset_for_each(task, cgrp, tset) {
730  task_lock(task);
731  ioc = task->io_context;
732  if (ioc && atomic_read(&ioc->nr_tasks) > 1)
733  ret = -EINVAL;
734  task_unlock(task);
735  if (ret)
736  break;
737  }
738  return ret;
739 }
740 
741 struct cgroup_subsys blkio_subsys = {
742  .name = "blkio",
743  .create = blkcg_create,
744  .can_attach = blkcg_can_attach,
745  .pre_destroy = blkcg_pre_destroy,
746  .destroy = blkcg_destroy,
747  .subsys_id = blkio_subsys_id,
748  .base_cftypes = blkcg_files,
749  .module = THIS_MODULE,
750 
751  /*
752  * blkio subsystem is utterly broken in terms of hierarchy support.
753  * It treats all cgroups equally regardless of where they're
754  * located in the hierarchy - all cgroups are treated as if they're
755  * right below the root. Fix it and remove the following.
756  */
757  .broken_hierarchy = true,
758 };
759 EXPORT_SYMBOL_GPL(blkio_subsys);
760 
778  const struct blkcg_policy *pol)
779 {
780  LIST_HEAD(pds);
781  struct blkcg_gq *blkg;
782  struct blkg_policy_data *pd, *n;
783  int cnt = 0, ret;
784  bool preloaded;
785 
786  if (blkcg_policy_enabled(q, pol))
787  return 0;
788 
789  /* preallocations for root blkg */
790  blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
791  if (!blkg)
792  return -ENOMEM;
793 
794  preloaded = !radix_tree_preload(GFP_KERNEL);
795 
797 
798  /* make sure the root blkg exists and count the existing blkgs */
799  spin_lock_irq(q->queue_lock);
800 
801  rcu_read_lock();
802  blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
803  rcu_read_unlock();
804 
805  if (preloaded)
806  radix_tree_preload_end();
807 
808  if (IS_ERR(blkg)) {
809  ret = PTR_ERR(blkg);
810  goto out_unlock;
811  }
812  q->root_blkg = blkg;
813  q->root_rl.blkg = blkg;
814 
815  list_for_each_entry(blkg, &q->blkg_list, q_node)
816  cnt++;
817 
818  spin_unlock_irq(q->queue_lock);
819 
820  /* allocate policy_data for all existing blkgs */
821  while (cnt--) {
822  pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
823  if (!pd) {
824  ret = -ENOMEM;
825  goto out_free;
826  }
827  list_add_tail(&pd->alloc_node, &pds);
828  }
829 
830  /*
831  * Install the allocated pds. With @q bypassing, no new blkg
832  * should have been created while the queue lock was dropped.
833  */
834  spin_lock_irq(q->queue_lock);
835 
836  list_for_each_entry(blkg, &q->blkg_list, q_node) {
837  if (WARN_ON(list_empty(&pds))) {
838  /* umm... this shouldn't happen, just abort */
839  ret = -ENOMEM;
840  goto out_unlock;
841  }
842  pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
843  list_del_init(&pd->alloc_node);
844 
845  /* grab blkcg lock too while installing @pd on @blkg */
846  spin_lock(&blkg->blkcg->lock);
847 
848  blkg->pd[pol->plid] = pd;
849  pd->blkg = blkg;
850  pol->pd_init_fn(blkg);
851 
852  spin_unlock(&blkg->blkcg->lock);
853  }
854 
855  __set_bit(pol->plid, q->blkcg_pols);
856  ret = 0;
857 out_unlock:
858  spin_unlock_irq(q->queue_lock);
859 out_free:
861  list_for_each_entry_safe(pd, n, &pds, alloc_node)
862  kfree(pd);
863  return ret;
864 }
866 
876  const struct blkcg_policy *pol)
877 {
878  struct blkcg_gq *blkg;
879 
880  if (!blkcg_policy_enabled(q, pol))
881  return;
882 
884  spin_lock_irq(q->queue_lock);
885 
886  __clear_bit(pol->plid, q->blkcg_pols);
887 
888  /* if no policy is left, no need for blkgs - shoot them down */
889  if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
890  blkg_destroy_all(q);
891 
892  list_for_each_entry(blkg, &q->blkg_list, q_node) {
893  /* grab blkcg lock too while removing @pd from @blkg */
894  spin_lock(&blkg->blkcg->lock);
895 
896  if (pol->pd_exit_fn)
897  pol->pd_exit_fn(blkg);
898 
899  kfree(blkg->pd[pol->plid]);
900  blkg->pd[pol->plid] = NULL;
901 
902  spin_unlock(&blkg->blkcg->lock);
903  }
904 
905  spin_unlock_irq(q->queue_lock);
907 }
909 
917 int blkcg_policy_register(struct blkcg_policy *pol)
918 {
919  int i, ret;
920 
921  if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
922  return -EINVAL;
923 
924  mutex_lock(&blkcg_pol_mutex);
925 
926  /* find an empty slot */
927  ret = -ENOSPC;
928  for (i = 0; i < BLKCG_MAX_POLS; i++)
929  if (!blkcg_policy[i])
930  break;
931  if (i >= BLKCG_MAX_POLS)
932  goto out_unlock;
933 
934  /* register and update blkgs */
935  pol->plid = i;
936  blkcg_policy[i] = pol;
937 
938  /* everything is in place, add intf files for the new policy */
939  if (pol->cftypes)
940  WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes));
941  ret = 0;
942 out_unlock:
943  mutex_unlock(&blkcg_pol_mutex);
944  return ret;
945 }
947 
954 void blkcg_policy_unregister(struct blkcg_policy *pol)
955 {
956  mutex_lock(&blkcg_pol_mutex);
957 
958  if (WARN_ON(blkcg_policy[pol->plid] != pol))
959  goto out_unlock;
960 
961  /* kill the intf files first */
962  if (pol->cftypes)
963  cgroup_rm_cftypes(&blkio_subsys, pol->cftypes);
964 
965  /* unregister and update blkgs */
966  blkcg_policy[pol->plid] = NULL;
967 out_unlock:
968  mutex_unlock(&blkcg_pol_mutex);
969 }