Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
osdblk.c
Go to the documentation of this file.
1 
2 /*
3  osdblk.c -- Export a single SCSI OSD object as a Linux block device
4 
5 
6  Copyright 2009 Red Hat, Inc.
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program; see the file COPYING. If not, write to
19  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21 
22  Instructions for use
23  --------------------
24 
25  1) Map a Linux block device to an existing OSD object.
26 
27  In this example, we will use partition id 1234, object id 5678,
28  OSD device /dev/osd1.
29 
30  $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
31 
32 
33  2) List all active blkdev<->object mappings.
34 
35  In this example, we have performed step #1 twice, creating two blkdevs,
36  mapped to two separate OSD objects.
37 
38  $ cat /sys/class/osdblk/list
39  0 174 1234 5678 /dev/osd1
40  1 179 1994 897123 /dev/osd0
41 
42  The columns, in order, are:
43  - blkdev unique id
44  - blkdev assigned major
45  - OSD object partition id
46  - OSD object id
47  - OSD device
48 
49 
50  3) Remove an active blkdev<->object mapping.
51 
52  In this example, we remove the mapping with blkdev unique id 1.
53 
54  $ echo 1 > /sys/class/osdblk/remove
55 
56 
57  NOTE: The actual creation and deletion of OSD objects is outside the scope
58  of this driver.
59 
60  */
61 
62 #include <linux/kernel.h>
63 #include <linux/device.h>
64 #include <linux/module.h>
65 #include <linux/fs.h>
66 #include <linux/slab.h>
67 #include <scsi/osd_initiator.h>
68 #include <scsi/osd_attributes.h>
69 #include <scsi/osd_sec.h>
70 #include <scsi/scsi_device.h>
71 
72 #define DRV_NAME "osdblk"
73 #define PFX DRV_NAME ": "
74 
75 /* #define _OSDBLK_DEBUG */
76 #ifdef _OSDBLK_DEBUG
77 #define OSDBLK_DEBUG(fmt, a...) \
78  printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
79 #else
80 #define OSDBLK_DEBUG(fmt, a...) \
81  do { if (0) printk(fmt, ##a); } while (0)
82 #endif
83 
84 MODULE_AUTHOR("Jeff Garzik <[email protected]>");
85 MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
86 MODULE_LICENSE("GPL");
87 
88 struct osdblk_device;
89 
90 enum {
91  OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */
92  OSDBLK_MAX_REQ = 32, /* max parallel requests */
93  OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */
94 };
95 
97  struct request *rq; /* blk layer request */
98  struct bio *bio; /* cloned bio */
99  struct osdblk_device *osdev; /* associated blkdev */
100 };
101 
103  int id; /* blkdev unique id */
104 
105  int major; /* blkdev assigned major */
106  struct gendisk *disk; /* blkdev's gendisk and rq */
107  struct request_queue *q;
108 
109  struct osd_dev *osd; /* associated OSD */
110 
111  char name[32]; /* blkdev name, e.g. osdblk34 */
112 
113  spinlock_t lock; /* queue lock */
114 
115  struct osd_obj_id obj; /* OSD partition, obj id */
116  uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */
117 
118  struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */
119 
120  struct list_head node;
121 
122  char osd_path[0]; /* OSD device path */
123 };
124 
125 static struct class *class_osdblk; /* /sys/class/osdblk */
126 static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
127 static LIST_HEAD(osdblkdev_list);
128 
129 static const struct block_device_operations osdblk_bd_ops = {
130  .owner = THIS_MODULE,
131 };
132 
133 static const struct osd_attr g_attr_logical_length = ATTR_DEF(
135 
136 static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
137  const struct osd_obj_id *obj)
138 {
139  osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
140 }
141 
142 /* copied from exofs; move to libosd? */
143 /*
144  * Perform a synchronous OSD operation. copied from exofs; move to libosd?
145  */
146 static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
147 {
148  int ret;
149 
150  or->timeout = timeout;
151  ret = osd_finalize_request(or, 0, credential, NULL);
152  if (ret)
153  return ret;
154 
155  ret = osd_execute_request(or);
156 
157  /* osd_req_decode_sense(or, ret); */
158  return ret;
159 }
160 
161 /*
162  * Perform an asynchronous OSD operation. copied from exofs; move to libosd?
163  */
164 static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
165  void *caller_context, u8 *cred)
166 {
167  int ret;
168 
169  ret = osd_finalize_request(or, 0, cred, NULL);
170  if (ret)
171  return ret;
172 
173  ret = osd_execute_request_async(or, async_done, caller_context);
174 
175  return ret;
176 }
177 
178 /* copied from exofs; move to libosd? */
179 static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
180 {
181  struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
182  void *iter = NULL;
183  int nelem;
184 
185  do {
186  nelem = 1;
187  osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
188  if ((cur_attr.attr_page == attr->attr_page) &&
189  (cur_attr.attr_id == attr->attr_id)) {
190  attr->len = cur_attr.len;
191  attr->val_ptr = cur_attr.val_ptr;
192  return 0;
193  }
194  } while (iter);
195 
196  return -EIO;
197 }
198 
199 static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
200 {
201  struct osd_request *or;
202  struct osd_attr attr;
203  int ret;
204 
205  /* start request */
206  or = osd_start_request(osdev->osd, GFP_KERNEL);
207  if (!or)
208  return -ENOMEM;
209 
210  /* create a get-attributes(length) request */
211  osd_req_get_attributes(or, &osdev->obj);
212 
213  osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
214 
215  /* execute op synchronously */
216  ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
217  if (ret)
218  goto out;
219 
220  /* extract length from returned attribute info */
221  attr = g_attr_logical_length;
222  ret = extract_attr_from_req(or, &attr);
223  if (ret)
224  goto out;
225 
226  *size_out = get_unaligned_be64(attr.val_ptr);
227 
228 out:
229  osd_end_request(or);
230  return ret;
231 
232 }
233 
234 static void osdblk_osd_complete(struct osd_request *or, void *private)
235 {
236  struct osdblk_request *orq = private;
237  struct osd_sense_info osi;
238  int ret = osd_req_decode_sense(or, &osi);
239 
240  if (ret) {
241  ret = -EIO;
242  OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
243  }
244 
245  /* complete OSD request */
246  osd_end_request(or);
247 
248  /* complete request passed to osdblk by block layer */
249  __blk_end_request_all(orq->rq, ret);
250 }
251 
252 static void bio_chain_put(struct bio *chain)
253 {
254  struct bio *tmp;
255 
256  while (chain) {
257  tmp = chain;
258  chain = chain->bi_next;
259 
260  bio_put(tmp);
261  }
262 }
263 
264 static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
265 {
266  struct bio *tmp, *new_chain = NULL, *tail = NULL;
267 
268  while (old_chain) {
269  tmp = bio_clone_kmalloc(old_chain, gfpmask);
270  if (!tmp)
271  goto err_out;
272 
273  tmp->bi_bdev = NULL;
274  gfpmask &= ~__GFP_WAIT;
275  tmp->bi_next = NULL;
276 
277  if (!new_chain)
278  new_chain = tail = tmp;
279  else {
280  tail->bi_next = tmp;
281  tail = tmp;
282  }
283 
284  old_chain = old_chain->bi_next;
285  }
286 
287  return new_chain;
288 
289 err_out:
290  OSDBLK_DEBUG("bio_chain_clone with err\n");
291  bio_chain_put(new_chain);
292  return NULL;
293 }
294 
295 static void osdblk_rq_fn(struct request_queue *q)
296 {
297  struct osdblk_device *osdev = q->queuedata;
298 
299  while (1) {
300  struct request *rq;
301  struct osdblk_request *orq;
302  struct osd_request *or;
303  struct bio *bio;
304  bool do_write, do_flush;
305 
306  /* peek at request from block layer */
307  rq = blk_fetch_request(q);
308  if (!rq)
309  break;
310 
311  /* filter out block requests we don't understand */
312  if (rq->cmd_type != REQ_TYPE_FS) {
313  blk_end_request_all(rq, 0);
314  continue;
315  }
316 
317  /* deduce our operation (read, write, flush) */
318  /* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
319  * into a clearly defined set of RPC commands:
320  * read, write, flush, scsi command, power mgmt req,
321  * driver-specific, etc.
322  */
323 
324  do_flush = rq->cmd_flags & REQ_FLUSH;
325  do_write = (rq_data_dir(rq) == WRITE);
326 
327  if (!do_flush) { /* osd_flush does not use a bio */
328  /* a bio clone to be passed down to OSD request */
329  bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
330  if (!bio)
331  break;
332  } else
333  bio = NULL;
334 
335  /* alloc internal OSD request, for OSD command execution */
336  or = osd_start_request(osdev->osd, GFP_ATOMIC);
337  if (!or) {
338  bio_chain_put(bio);
339  OSDBLK_DEBUG("osd_start_request with err\n");
340  break;
341  }
342 
343  orq = &osdev->req[rq->tag];
344  orq->rq = rq;
345  orq->bio = bio;
346  orq->osdev = osdev;
347 
348  /* init OSD command: flush, write or read */
349  if (do_flush)
350  osd_req_flush_object(or, &osdev->obj,
351  OSD_CDB_FLUSH_ALL, 0, 0);
352  else if (do_write)
353  osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
354  bio, blk_rq_bytes(rq));
355  else
356  osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
357  bio, blk_rq_bytes(rq));
358 
359  OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
360  do_flush ? "flush" : do_write ?
361  "write" : "read", blk_rq_bytes(rq),
362  blk_rq_pos(rq) * 512ULL);
363 
364  /* begin OSD command execution */
365  if (osd_async_op(or, osdblk_osd_complete, orq,
366  osdev->obj_cred)) {
367  osd_end_request(or);
368  blk_requeue_request(q, rq);
369  bio_chain_put(bio);
370  OSDBLK_DEBUG("osd_execute_request_async with err\n");
371  break;
372  }
373 
374  /* remove the special 'flush' marker, now that the command
375  * is executing
376  */
377  rq->special = NULL;
378  }
379 }
380 
381 static void osdblk_free_disk(struct osdblk_device *osdev)
382 {
383  struct gendisk *disk = osdev->disk;
384 
385  if (!disk)
386  return;
387 
388  if (disk->flags & GENHD_FL_UP)
389  del_gendisk(disk);
390  if (disk->queue)
391  blk_cleanup_queue(disk->queue);
392  put_disk(disk);
393 }
394 
395 static int osdblk_init_disk(struct osdblk_device *osdev)
396 {
397  struct gendisk *disk;
398  struct request_queue *q;
399  int rc;
400  u64 obj_size = 0;
401 
402  /* contact OSD, request size info about the object being mapped */
403  rc = osdblk_get_obj_size(osdev, &obj_size);
404  if (rc)
405  return rc;
406 
407  /* create gendisk info */
409  if (!disk)
410  return -ENOMEM;
411 
412  sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
413  disk->major = osdev->major;
414  disk->first_minor = 0;
415  disk->fops = &osdblk_bd_ops;
416  disk->private_data = osdev;
417 
418  /* init rq */
419  q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
420  if (!q) {
421  put_disk(disk);
422  return -ENOMEM;
423  }
424 
425  /* switch queue to TCQ mode; allocate tag map */
427  if (rc) {
429  put_disk(disk);
430  return rc;
431  }
432 
433  /* Set our limits to the lower device limits, because osdblk cannot
434  * sleep when allocating a lower-request and therefore cannot be
435  * bouncing.
436  */
437  blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
438 
441 
442  disk->queue = q;
443 
444  q->queuedata = osdev;
445 
446  osdev->disk = disk;
447  osdev->q = q;
448 
449  /* finally, announce the disk to the world */
450  set_capacity(disk, obj_size / 512ULL);
451  add_disk(disk);
452 
453  printk(KERN_INFO "%s: Added of size 0x%llx\n",
454  disk->disk_name, (unsigned long long)obj_size);
455 
456  return 0;
457 }
458 
459 /********************************************************************
460  * /sys/class/osdblk/
461  * add map OSD object to blkdev
462  * remove unmap OSD object
463  * list show mappings
464  *******************************************************************/
465 
466 static void class_osdblk_release(struct class *cls)
467 {
468  kfree(cls);
469 }
470 
471 static ssize_t class_osdblk_list(struct class *c,
472  struct class_attribute *attr,
473  char *data)
474 {
475  int n = 0;
476  struct list_head *tmp;
477 
479 
480  list_for_each(tmp, &osdblkdev_list) {
481  struct osdblk_device *osdev;
482 
483  osdev = list_entry(tmp, struct osdblk_device, node);
484 
485  n += sprintf(data+n, "%d %d %llu %llu %s\n",
486  osdev->id,
487  osdev->major,
488  osdev->obj.partition,
489  osdev->obj.id,
490  osdev->osd_path);
491  }
492 
493  mutex_unlock(&ctl_mutex);
494  return n;
495 }
496 
497 static ssize_t class_osdblk_add(struct class *c,
498  struct class_attribute *attr,
499  const char *buf, size_t count)
500 {
501  struct osdblk_device *osdev;
502  ssize_t rc;
503  int irc, new_id = 0;
504  struct list_head *tmp;
505 
506  if (!try_module_get(THIS_MODULE))
507  return -ENODEV;
508 
509  /* new osdblk_device object */
510  osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
511  if (!osdev) {
512  rc = -ENOMEM;
513  goto err_out_mod;
514  }
515 
516  /* static osdblk_device initialization */
517  spin_lock_init(&osdev->lock);
518  INIT_LIST_HEAD(&osdev->node);
519 
520  /* generate unique id: find highest unique id, add one */
521 
523 
524  list_for_each(tmp, &osdblkdev_list) {
525  struct osdblk_device *osdev;
526 
527  osdev = list_entry(tmp, struct osdblk_device, node);
528  if (osdev->id > new_id)
529  new_id = osdev->id + 1;
530  }
531 
532  osdev->id = new_id;
533 
534  /* add to global list */
535  list_add_tail(&osdev->node, &osdblkdev_list);
536 
537  mutex_unlock(&ctl_mutex);
538 
539  /* parse add command */
540  if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
541  osdev->osd_path) != 3) {
542  rc = -EINVAL;
543  goto err_out_slot;
544  }
545 
546  /* initialize rest of new object */
547  sprintf(osdev->name, DRV_NAME "%d", osdev->id);
548 
549  /* contact requested OSD */
550  osdev->osd = osduld_path_lookup(osdev->osd_path);
551  if (IS_ERR(osdev->osd)) {
552  rc = PTR_ERR(osdev->osd);
553  goto err_out_slot;
554  }
555 
556  /* build OSD credential */
557  osdblk_make_credential(osdev->obj_cred, &osdev->obj);
558 
559  /* register our block device */
560  irc = register_blkdev(0, osdev->name);
561  if (irc < 0) {
562  rc = irc;
563  goto err_out_osd;
564  }
565 
566  osdev->major = irc;
567 
568  /* set up and announce blkdev mapping */
569  rc = osdblk_init_disk(osdev);
570  if (rc)
571  goto err_out_blkdev;
572 
573  return count;
574 
575 err_out_blkdev:
576  unregister_blkdev(osdev->major, osdev->name);
577 err_out_osd:
578  osduld_put_device(osdev->osd);
579 err_out_slot:
581  list_del_init(&osdev->node);
582  mutex_unlock(&ctl_mutex);
583 
584  kfree(osdev);
585 err_out_mod:
586  OSDBLK_DEBUG("Error adding device %s\n", buf);
587  module_put(THIS_MODULE);
588  return rc;
589 }
590 
591 static ssize_t class_osdblk_remove(struct class *c,
592  struct class_attribute *attr,
593  const char *buf,
594  size_t count)
595 {
596  struct osdblk_device *osdev = NULL;
597  int target_id, rc;
598  unsigned long ul;
599  struct list_head *tmp;
600 
601  rc = strict_strtoul(buf, 10, &ul);
602  if (rc)
603  return rc;
604 
605  /* convert to int; abort if we lost anything in the conversion */
606  target_id = (int) ul;
607  if (target_id != ul)
608  return -EINVAL;
609 
610  /* remove object from list immediately */
612 
613  list_for_each(tmp, &osdblkdev_list) {
614  osdev = list_entry(tmp, struct osdblk_device, node);
615  if (osdev->id == target_id) {
616  list_del_init(&osdev->node);
617  break;
618  }
619  osdev = NULL;
620  }
621 
622  mutex_unlock(&ctl_mutex);
623 
624  if (!osdev)
625  return -ENOENT;
626 
627  /* clean up and free blkdev and associated OSD connection */
628  osdblk_free_disk(osdev);
629  unregister_blkdev(osdev->major, osdev->name);
630  osduld_put_device(osdev->osd);
631  kfree(osdev);
632 
633  /* release module ref */
634  module_put(THIS_MODULE);
635 
636  return count;
637 }
638 
639 static struct class_attribute class_osdblk_attrs[] = {
640  __ATTR(add, 0200, NULL, class_osdblk_add),
641  __ATTR(remove, 0200, NULL, class_osdblk_remove),
642  __ATTR(list, 0444, class_osdblk_list, NULL),
644 };
645 
646 static int osdblk_sysfs_init(void)
647 {
648  int ret = 0;
649 
650  /*
651  * create control files in sysfs
652  * /sys/class/osdblk/...
653  */
654  class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
655  if (!class_osdblk)
656  return -ENOMEM;
657 
658  class_osdblk->name = DRV_NAME;
659  class_osdblk->owner = THIS_MODULE;
660  class_osdblk->class_release = class_osdblk_release;
661  class_osdblk->class_attrs = class_osdblk_attrs;
662 
663  ret = class_register(class_osdblk);
664  if (ret) {
665  kfree(class_osdblk);
666  class_osdblk = NULL;
667  printk(PFX "failed to create class osdblk\n");
668  return ret;
669  }
670 
671  return 0;
672 }
673 
674 static void osdblk_sysfs_cleanup(void)
675 {
676  if (class_osdblk)
677  class_destroy(class_osdblk);
678  class_osdblk = NULL;
679 }
680 
681 static int __init osdblk_init(void)
682 {
683  int rc;
684 
685  rc = osdblk_sysfs_init();
686  if (rc)
687  return rc;
688 
689  return 0;
690 }
691 
692 static void __exit osdblk_exit(void)
693 {
694  osdblk_sysfs_cleanup();
695 }
696 
697 module_init(osdblk_init);
698 module_exit(osdblk_exit);
699