Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
bio-integrity.c
Go to the documentation of this file.
1 /*
2  * bio-integrity.c - bio data integrity extensions
3  *
4  * Copyright (C) 2007, 2008, 2009 Oracle Corporation
5  * Written by: Martin K. Petersen <[email protected]>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License version
9  * 2 as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; see the file COPYING. If not, write to
18  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19  * USA.
20  *
21  */
22 
23 #include <linux/blkdev.h>
24 #include <linux/mempool.h>
25 #include <linux/export.h>
26 #include <linux/bio.h>
27 #include <linux/workqueue.h>
28 #include <linux/slab.h>
29 
31  struct kmem_cache *slab;
32  unsigned short nr_vecs;
33  char name[8];
34 };
35 
36 #define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
37 struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
38  IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
39 };
40 #undef IS
41 
42 static struct workqueue_struct *kintegrityd_wq;
43 
44 static inline unsigned int vecs_to_idx(unsigned int nr)
45 {
46  switch (nr) {
47  case 1:
48  return 0;
49  case 2 ... 4:
50  return 1;
51  case 5 ... 16:
52  return 2;
53  case 17 ... 64:
54  return 3;
55  case 65 ... 128:
56  return 4;
57  case 129 ... BIO_MAX_PAGES:
58  return 5;
59  default:
60  BUG();
61  }
62 }
63 
64 static inline int use_bip_pool(unsigned int idx)
65 {
66  if (idx == BIOVEC_MAX_IDX)
67  return 1;
68 
69  return 0;
70 }
71 
82 struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
84  unsigned int nr_vecs)
85 {
86  struct bio_integrity_payload *bip;
87  unsigned int idx = vecs_to_idx(nr_vecs);
88  struct bio_set *bs = bio->bi_pool;
89 
90  if (!bs)
91  bs = fs_bio_set;
92 
93  BUG_ON(bio == NULL);
94  bip = NULL;
95 
96  /* Lower order allocations come straight from slab */
97  if (!use_bip_pool(idx))
98  bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
99 
100  /* Use mempool if lower order alloc failed or max vecs were requested */
101  if (bip == NULL) {
102  idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */
103  bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
104 
105  if (unlikely(bip == NULL)) {
106  printk(KERN_ERR "%s: could not alloc bip\n", __func__);
107  return NULL;
108  }
109  }
110 
111  memset(bip, 0, sizeof(*bip));
112 
113  bip->bip_slab = idx;
114  bip->bip_bio = bio;
115  bio->bi_integrity = bip;
116 
117  return bip;
118 }
120 
128 void bio_integrity_free(struct bio *bio)
129 {
130  struct bio_integrity_payload *bip = bio->bi_integrity;
131  struct bio_set *bs = bio->bi_pool;
132 
133  if (!bs)
134  bs = fs_bio_set;
135 
136  BUG_ON(bip == NULL);
137 
138  /* A cloned bio doesn't own the integrity metadata */
139  if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
140  && bip->bip_buf != NULL)
141  kfree(bip->bip_buf);
142 
143  if (use_bip_pool(bip->bip_slab))
144  mempool_free(bip, bs->bio_integrity_pool);
145  else
146  kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
147 
148  bio->bi_integrity = NULL;
149 }
151 
161 int bio_integrity_add_page(struct bio *bio, struct page *page,
162  unsigned int len, unsigned int offset)
163 {
164  struct bio_integrity_payload *bip = bio->bi_integrity;
165  struct bio_vec *iv;
166 
167  if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
168  printk(KERN_ERR "%s: bip_vec full\n", __func__);
169  return 0;
170  }
171 
172  iv = bip_vec_idx(bip, bip->bip_vcnt);
173  BUG_ON(iv == NULL);
174 
175  iv->bv_page = page;
176  iv->bv_len = len;
177  iv->bv_offset = offset;
178  bip->bip_vcnt++;
179 
180  return len;
181 }
183 
184 static int bdev_integrity_enabled(struct block_device *bdev, int rw)
185 {
186  struct blk_integrity *bi = bdev_get_integrity(bdev);
187 
188  if (bi == NULL)
189  return 0;
190 
191  if (rw == READ && bi->verify_fn != NULL &&
192  (bi->flags & INTEGRITY_FLAG_READ))
193  return 1;
194 
195  if (rw == WRITE && bi->generate_fn != NULL &&
196  (bi->flags & INTEGRITY_FLAG_WRITE))
197  return 1;
198 
199  return 0;
200 }
201 
211 int bio_integrity_enabled(struct bio *bio)
212 {
213  /* Already protected? */
214  if (bio_integrity(bio))
215  return 0;
216 
217  return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
218 }
220 
231 static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
232  unsigned int sectors)
233 {
234  /* At this point there are only 512b or 4096b DIF/EPP devices */
235  if (bi->sector_size == 4096)
236  return sectors >>= 3;
237 
238  return sectors;
239 }
240 
249 unsigned int bio_integrity_tag_size(struct bio *bio)
250 {
251  struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
252 
253  BUG_ON(bio->bi_size == 0);
254 
255  return bi->tag_size * (bio->bi_size / bi->sector_size);
256 }
258 
259 int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
260 {
261  struct bio_integrity_payload *bip = bio->bi_integrity;
262  struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
263  unsigned int nr_sectors;
264 
265  BUG_ON(bip->bip_buf == NULL);
266 
267  if (bi->tag_size == 0)
268  return -1;
269 
270  nr_sectors = bio_integrity_hw_sectors(bi,
271  DIV_ROUND_UP(len, bi->tag_size));
272 
273  if (nr_sectors * bi->tuple_size > bip->bip_size) {
274  printk(KERN_ERR "%s: tag too big for bio: %u > %u\n",
275  __func__, nr_sectors * bi->tuple_size, bip->bip_size);
276  return -1;
277  }
278 
279  if (set)
280  bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
281  else
282  bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
283 
284  return 0;
285 }
286 
298 int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
299 {
300  BUG_ON(bio_data_dir(bio) != WRITE);
301 
302  return bio_integrity_tag(bio, tag_buf, len, 1);
303 }
305 
316 int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
317 {
318  BUG_ON(bio_data_dir(bio) != READ);
319 
320  return bio_integrity_tag(bio, tag_buf, len, 0);
321 }
323 
333 static void bio_integrity_generate(struct bio *bio)
334 {
335  struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
336  struct blk_integrity_exchg bix;
337  struct bio_vec *bv;
338  sector_t sector = bio->bi_sector;
339  unsigned int i, sectors, total;
340  void *prot_buf = bio->bi_integrity->bip_buf;
341 
342  total = 0;
343  bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
344  bix.sector_size = bi->sector_size;
345 
346  bio_for_each_segment(bv, bio, i) {
347  void *kaddr = kmap_atomic(bv->bv_page);
348  bix.data_buf = kaddr + bv->bv_offset;
349  bix.data_size = bv->bv_len;
350  bix.prot_buf = prot_buf;
351  bix.sector = sector;
352 
353  bi->generate_fn(&bix);
354 
355  sectors = bv->bv_len / bi->sector_size;
356  sector += sectors;
357  prot_buf += sectors * bi->tuple_size;
358  total += sectors * bi->tuple_size;
359  BUG_ON(total > bio->bi_integrity->bip_size);
360 
361  kunmap_atomic(kaddr);
362  }
363 }
364 
365 static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
366 {
367  if (bi)
368  return bi->tuple_size;
369 
370  return 0;
371 }
372 
384 int bio_integrity_prep(struct bio *bio)
385 {
386  struct bio_integrity_payload *bip;
387  struct blk_integrity *bi;
388  struct request_queue *q;
389  void *buf;
390  unsigned long start, end;
391  unsigned int len, nr_pages;
392  unsigned int bytes, offset, i;
393  unsigned int sectors;
394 
395  bi = bdev_get_integrity(bio->bi_bdev);
396  q = bdev_get_queue(bio->bi_bdev);
397  BUG_ON(bi == NULL);
398  BUG_ON(bio_integrity(bio));
399 
400  sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
401 
402  /* Allocate kernel buffer for protection data */
403  len = sectors * blk_integrity_tuple_size(bi);
404  buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
405  if (unlikely(buf == NULL)) {
406  printk(KERN_ERR "could not allocate integrity buffer\n");
407  return -ENOMEM;
408  }
409 
410  end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
411  start = ((unsigned long) buf) >> PAGE_SHIFT;
412  nr_pages = end - start;
413 
414  /* Allocate bio integrity payload and integrity vectors */
415  bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
416  if (unlikely(bip == NULL)) {
417  printk(KERN_ERR "could not allocate data integrity bioset\n");
418  kfree(buf);
419  return -EIO;
420  }
421 
422  bip->bip_buf = buf;
423  bip->bip_size = len;
424  bip->bip_sector = bio->bi_sector;
425 
426  /* Map it */
427  offset = offset_in_page(buf);
428  for (i = 0 ; i < nr_pages ; i++) {
429  int ret;
430  bytes = PAGE_SIZE - offset;
431 
432  if (len <= 0)
433  break;
434 
435  if (bytes > len)
436  bytes = len;
437 
438  ret = bio_integrity_add_page(bio, virt_to_page(buf),
439  bytes, offset);
440 
441  if (ret == 0)
442  return 0;
443 
444  if (ret < bytes)
445  break;
446 
447  buf += bytes;
448  len -= bytes;
449  offset = 0;
450  }
451 
452  /* Install custom I/O completion handler if read verify is enabled */
453  if (bio_data_dir(bio) == READ) {
454  bip->bip_end_io = bio->bi_end_io;
455  bio->bi_end_io = bio_integrity_endio;
456  }
457 
458  /* Auto-generate integrity metadata if this is a write */
459  if (bio_data_dir(bio) == WRITE)
460  bio_integrity_generate(bio);
461 
462  return 0;
463 }
465 
474 static int bio_integrity_verify(struct bio *bio)
475 {
476  struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
477  struct blk_integrity_exchg bix;
478  struct bio_vec *bv;
479  sector_t sector = bio->bi_integrity->bip_sector;
480  unsigned int i, sectors, total, ret;
481  void *prot_buf = bio->bi_integrity->bip_buf;
482 
483  ret = total = 0;
484  bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
485  bix.sector_size = bi->sector_size;
486 
487  bio_for_each_segment(bv, bio, i) {
488  void *kaddr = kmap_atomic(bv->bv_page);
489  bix.data_buf = kaddr + bv->bv_offset;
490  bix.data_size = bv->bv_len;
491  bix.prot_buf = prot_buf;
492  bix.sector = sector;
493 
494  ret = bi->verify_fn(&bix);
495 
496  if (ret) {
497  kunmap_atomic(kaddr);
498  return ret;
499  }
500 
501  sectors = bv->bv_len / bi->sector_size;
502  sector += sectors;
503  prot_buf += sectors * bi->tuple_size;
504  total += sectors * bi->tuple_size;
505  BUG_ON(total > bio->bi_integrity->bip_size);
506 
507  kunmap_atomic(kaddr);
508  }
509 
510  return ret;
511 }
512 
521 static void bio_integrity_verify_fn(struct work_struct *work)
522 {
523  struct bio_integrity_payload *bip =
524  container_of(work, struct bio_integrity_payload, bip_work);
525  struct bio *bio = bip->bip_bio;
526  int error;
527 
528  error = bio_integrity_verify(bio);
529 
530  /* Restore original bio completion handler */
531  bio->bi_end_io = bip->bip_end_io;
532  bio_endio(bio, error);
533 }
534 
547 void bio_integrity_endio(struct bio *bio, int error)
548 {
549  struct bio_integrity_payload *bip = bio->bi_integrity;
550 
551  BUG_ON(bip->bip_bio != bio);
552 
553  /* In case of an I/O error there is no point in verifying the
554  * integrity metadata. Restore original bio end_io handler
555  * and run it.
556  */
557  if (error) {
558  bio->bi_end_io = bip->bip_end_io;
559  bio_endio(bio, error);
560 
561  return;
562  }
563 
564  INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
565  queue_work(kintegrityd_wq, &bip->bip_work);
566 }
568 
574 void bio_integrity_mark_head(struct bio_integrity_payload *bip,
575  unsigned int skip)
576 {
577  struct bio_vec *iv;
578  unsigned int i;
579 
580  bip_for_each_vec(iv, bip, i) {
581  if (skip == 0) {
582  bip->bip_idx = i;
583  return;
584  } else if (skip >= iv->bv_len) {
585  skip -= iv->bv_len;
586  } else { /* skip < iv->bv_len) */
587  iv->bv_offset += skip;
588  iv->bv_len -= skip;
589  bip->bip_idx = i;
590  return;
591  }
592  }
593 }
594 
600 void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
601  unsigned int len)
602 {
603  struct bio_vec *iv;
604  unsigned int i;
605 
606  bip_for_each_vec(iv, bip, i) {
607  if (len == 0) {
608  bip->bip_vcnt = i;
609  return;
610  } else if (len >= iv->bv_len) {
611  len -= iv->bv_len;
612  } else { /* len < iv->bv_len) */
613  iv->bv_len = len;
614  len = 0;
615  }
616  }
617 }
618 
628 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
629 {
630  struct bio_integrity_payload *bip = bio->bi_integrity;
631  struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
632  unsigned int nr_sectors;
633 
634  BUG_ON(bip == NULL);
635  BUG_ON(bi == NULL);
636 
637  nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
638  bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
639 }
641 
653 void bio_integrity_trim(struct bio *bio, unsigned int offset,
654  unsigned int sectors)
655 {
656  struct bio_integrity_payload *bip = bio->bi_integrity;
657  struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
658  unsigned int nr_sectors;
659 
660  BUG_ON(bip == NULL);
661  BUG_ON(bi == NULL);
662  BUG_ON(!bio_flagged(bio, BIO_CLONED));
663 
664  nr_sectors = bio_integrity_hw_sectors(bi, sectors);
665  bip->bip_sector = bip->bip_sector + offset;
666  bio_integrity_mark_head(bip, offset * bi->tuple_size);
667  bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
668 }
670 
679 void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
680 {
681  struct blk_integrity *bi;
682  struct bio_integrity_payload *bip = bio->bi_integrity;
683  unsigned int nr_sectors;
684 
685  if (bio_integrity(bio) == 0)
686  return;
687 
688  bi = bdev_get_integrity(bio->bi_bdev);
689  BUG_ON(bi == NULL);
690  BUG_ON(bip->bip_vcnt != 1);
691 
692  nr_sectors = bio_integrity_hw_sectors(bi, sectors);
693 
694  bp->bio1.bi_integrity = &bp->bip1;
695  bp->bio2.bi_integrity = &bp->bip2;
696 
697  bp->iv1 = bip->bip_vec[0];
698  bp->iv2 = bip->bip_vec[0];
699 
700  bp->bip1.bip_vec[0] = bp->iv1;
701  bp->bip2.bip_vec[0] = bp->iv2;
702 
703  bp->iv1.bv_len = sectors * bi->tuple_size;
704  bp->iv2.bv_offset += sectors * bi->tuple_size;
705  bp->iv2.bv_len -= sectors * bi->tuple_size;
706 
707  bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
708  bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
709 
710  bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
711  bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
712 }
714 
723 int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
724  gfp_t gfp_mask)
725 {
726  struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
727  struct bio_integrity_payload *bip;
728 
729  BUG_ON(bip_src == NULL);
730 
731  bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
732 
733  if (bip == NULL)
734  return -EIO;
735 
736  memcpy(bip->bip_vec, bip_src->bip_vec,
737  bip_src->bip_vcnt * sizeof(struct bio_vec));
738 
739  bip->bip_sector = bip_src->bip_sector;
740  bip->bip_vcnt = bip_src->bip_vcnt;
741  bip->bip_idx = bip_src->bip_idx;
742 
743  return 0;
744 }
746 
747 int bioset_integrity_create(struct bio_set *bs, int pool_size)
748 {
749  unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
750 
751  if (bs->bio_integrity_pool)
752  return 0;
753 
754  bs->bio_integrity_pool =
755  mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
756 
757  if (!bs->bio_integrity_pool)
758  return -1;
759 
760  return 0;
761 }
763 
764 void bioset_integrity_free(struct bio_set *bs)
765 {
766  if (bs->bio_integrity_pool)
767  mempool_destroy(bs->bio_integrity_pool);
768 }
770 
772 {
773  unsigned int i;
774 
775  /*
776  * kintegrityd won't block much but may burn a lot of CPU cycles.
777  * Make it highpri CPU intensive wq with max concurrency of 1.
778  */
779  kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
781  if (!kintegrityd_wq)
782  panic("Failed to create kintegrityd\n");
783 
784  for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
785  unsigned int size;
786 
787  size = sizeof(struct bio_integrity_payload)
788  + bip_slab[i].nr_vecs * sizeof(struct bio_vec);
789 
790  bip_slab[i].slab =
791  kmem_cache_create(bip_slab[i].name, size, 0,
793  }
794 }