36 #include <linux/kernel.h>
37 #include <linux/device.h>
38 #include <linux/module.h>
52 #define SECTOR_SHIFT 9
53 #define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
57 #define U64_MAX ((u64) (~0ULL))
59 #define RBD_DRV_NAME "rbd"
60 #define RBD_DRV_NAME_LONG "rbd (rados block device)"
62 #define RBD_MINORS_PER_MAJOR 256
64 #define RBD_MAX_SNAP_NAME_LEN 32
65 #define RBD_MAX_SNAP_COUNT 510
66 #define RBD_MAX_OPT_LEN 1024
68 #define RBD_SNAP_HEAD_NAME "-"
70 #define RBD_IMAGE_ID_LEN_MAX 64
71 #define RBD_OBJ_PREFIX_LEN_MAX 64
79 #define DEV_NAME_LEN 32
80 #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
82 #define RBD_READ_ONLY_DEFAULT false
217 static int rbd_dev_snaps_update(
struct rbd_device *rbd_dev);
218 static int rbd_dev_snaps_register(
struct rbd_device *rbd_dev);
220 static void rbd_dev_release(
struct device *
dev);
221 static void __rbd_remove_snap_dev(
struct rbd_snap *
snap);
234 static struct bus_type rbd_bus_type = {
236 .bus_attrs = rbd_bus_attrs,
239 static void rbd_root_dev_release(
struct device *
dev)
243 static struct device rbd_root_dev = {
245 .release = rbd_root_dev_release,
249 #define rbd_assert(expr) \
250 if (unlikely(!(expr))) { \
251 printk(KERN_ERR "\nAssertion failure in %s() " \
253 "\trbd_assert(%s);\n\n", \
254 __func__, __LINE__, #expr); \
258 # define rbd_assert(expr) ((void) 0)
266 static void rbd_put_dev(
struct rbd_device *rbd_dev)
271 static int rbd_refresh_header(
struct rbd_device *rbd_dev,
u64 *hver);
280 rbd_get_dev(rbd_dev);
286 static int rbd_release(
struct gendisk *
disk,
fmode_t mode)
288 struct rbd_device *rbd_dev = disk->private_data;
290 rbd_put_dev(rbd_dev);
295 static const struct block_device_operations rbd_bd_ops = {
298 .release = rbd_release,
310 dout(
"rbd_client_create\n");
315 kref_init(&rbdc->
kref);
316 INIT_LIST_HEAD(&rbdc->
node);
329 spin_lock(&rbd_client_list_lock);
331 spin_unlock(&rbd_client_list_lock);
335 dout(
"rbd_client_create created %p\n", rbdc);
361 spin_lock(&rbd_client_list_lock);
364 kref_get(&client_node->
kref);
369 spin_unlock(&rbd_client_list_lock);
371 return found ? client_node :
NULL;
399 static int parse_rbd_opts_token(
char *
c,
void *
private)
412 pr_err(
"bad mount option arg (not int) "
416 dout(
"got int token %d val %d\n", token, intval);
418 dout(
"got string token %d val %s\n", token,
421 dout(
"got Boolean token %d\n", token);
423 dout(
"got token %d\n", token);
444 static int rbd_get_client(
struct rbd_device *rbd_dev,
const char *mon_addr,
445 size_t mon_addr_len,
char *
options)
454 mon_addr + mon_addr_len,
455 parse_rbd_opts_token, rbd_opts);
456 if (IS_ERR(ceph_opts))
457 return PTR_ERR(ceph_opts);
459 rbdc = rbd_client_find(ceph_opts);
464 rbdc = rbd_client_create(ceph_opts);
466 return PTR_ERR(rbdc);
478 static void rbd_client_release(
struct kref *
kref)
482 dout(
"rbd_release_client %p\n", rbdc);
483 spin_lock(&rbd_client_list_lock);
485 spin_unlock(&rbd_client_list_lock);
495 static void rbd_put_client(
struct rbd_device *rbd_dev)
497 kref_put(&rbd_dev->
rbd_client->kref, rbd_client_release);
504 static void rbd_coll_release(
struct kref *
kref)
509 dout(
"rbd_coll_release %p\n", coll);
513 static bool rbd_image_format_valid(
u32 image_format)
515 return image_format == 1 || image_format == 2;
533 if (snap_count > size /
sizeof (
__le64))
540 size -= snap_count *
sizeof (
__le64);
559 memset(header, 0,
sizeof (*header));
591 size = snap_count *
sizeof (*header->
snap_sizes);
613 size += snap_count *
sizeof (header->
snapc->snaps[0]);
622 header->
snapc->snaps[i] =
638 static int snap_by_name(
struct rbd_device *rbd_dev,
const char *snap_name)
656 static int rbd_dev_set_mapping(
struct rbd_device *rbd_dev,
char *snap_name)
665 rbd_dev->
mapping.snap_exists =
false;
669 ret = snap_by_name(rbd_dev, snap_name);
672 rbd_dev->
mapping.snap_exists =
true;
673 rbd_dev->
mapping.read_only =
true;
675 rbd_dev->
mapping.snap_name = snap_name;
688 ceph_put_snap_context(header->
snapc);
701 segment = offset >> rbd_dev->
header.obj_order;
703 rbd_dev->
header.object_prefix, segment);
705 pr_err(
"error formatting segment name for #%llu (%d)\n",
716 u64 segment_size = (
u64) 1 << rbd_dev->
header.obj_order;
718 return offset & (segment_size - 1);
724 u64 segment_size = (
u64) 1 << rbd_dev->
header.obj_order;
726 offset &= segment_size - 1;
729 if (offset + length > segment_size)
730 length = segment_size -
offset;
747 end_seg = (ofs + len - 1) >> header->
obj_order;
749 return end_seg - start_seg + 1;
764 static void bio_chain_put(
struct bio *
chain)
770 chain = chain->bi_next;
778 static void zero_bio_chain(
struct bio *chain,
int start_ofs)
787 bio_for_each_segment(bv, chain, i) {
788 if (pos + bv->bv_len > start_ofs) {
789 int remainder =
max(start_ofs - pos, 0);
790 buf = bvec_kmap_irq(bv, &flags);
791 memset(buf + remainder, 0,
792 bv->bv_len - remainder);
793 bvec_kunmap_irq(buf, &flags);
798 chain = chain->bi_next;
806 static struct bio *bio_chain_clone(
struct bio **old,
struct bio **
next,
807 struct bio_pair **bp,
808 int len,
gfp_t gfpmask)
810 struct bio *old_chain = *old;
811 struct bio *new_chain =
NULL;
820 while (old_chain && (total < len)) {
823 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
828 if (total + old_chain->bi_size > len) {
835 dout(
"bio_chain_clone split! total=%d remaining=%d"
837 total, len - total, old_chain->bi_size);
850 *next = old_chain->bi_next;
860 old_chain = old_chain->bi_next;
862 total += tmp->bi_size;
872 dout(
"bio_chain_clone with err\n");
873 bio_chain_put(new_chain);
885 ops = kzalloc(
sizeof (*ops) * (num_ops + 1),
GFP_NOIO);
905 static void rbd_coll_end_req_index(
struct request *
rq,
913 dout(
"rbd_coll_end_req_index %p index %d ret %d len %llu\n",
914 coll, index, ret, (
unsigned long long) len);
926 spin_lock_irq(q->queue_lock);
931 while (max < coll->total && coll->
status[max].done)
934 for (i = min; i<
max; i++) {
938 kref_put(&coll->
kref, rbd_coll_release);
940 spin_unlock_irq(q->queue_lock);
952 static int rbd_do_request(
struct request *rq,
956 const char *object_name,
u64 ofs,
u64 len,
978 req_data = kzalloc(
sizeof(*req_data),
GFP_NOIO);
981 rbd_coll_end_req_index(rq, coll, coll_index,
987 req_data->
coll = coll;
991 dout(
"rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name,
992 (
unsigned long long) ofs, (
unsigned long long) len);
1005 req_data->
bio = bio;
1007 req_data->
len = len;
1011 reqhead = req->
r_request->front.iov_base;
1018 memset(layout, 0,
sizeof(*layout));
1046 dout(
"reassert_ver=%llu\n",
1047 (
unsigned long long)
1049 ceph_osdc_put_request(req);
1054 bio_chain_put(req_data->
bio);
1055 ceph_osdc_put_request(req);
1057 rbd_coll_end_req(req_data, ret, len);
1075 replyhead = msg->
front.iov_base;
1077 op = (
void *)(replyhead + 1);
1082 dout(
"rbd_req_cb bytes=%llu readop=%d rc=%d\n",
1083 (
unsigned long long) bytes, read_op, (
int) rc);
1085 if (rc == -
ENOENT && read_op) {
1086 zero_bio_chain(req_data->
bio, 0);
1088 }
else if (rc == 0 && read_op && bytes < req_data->len) {
1089 zero_bio_chain(req_data->
bio, bytes);
1090 bytes = req_data->
len;
1093 rbd_coll_end_req(req_data, rc, bytes);
1096 bio_chain_put(req_data->
bio);
1098 ceph_osdc_put_request(req);
1104 ceph_osdc_put_request(req);
1110 static int rbd_req_sync_op(
struct rbd_device *rbd_dev,
1115 const char *object_name,
1116 u64 ofs,
u64 inbound_size,
1127 num_pages = calc_pages_for(ofs, inbound_size);
1130 return PTR_ERR(pages);
1132 ret = rbd_do_request(
NULL, rbd_dev, snapc, snapid,
1133 object_name, ofs, inbound_size,
NULL,
1154 static int rbd_do_op(
struct request *rq,
1158 int opcode,
int flags,
1171 seg_name = rbd_segment_name(rbd_dev, ofs);
1174 seg_len = rbd_segment_length(rbd_dev, ofs, len);
1175 seg_ofs = rbd_segment_offset(rbd_dev, ofs);
1180 ops = rbd_create_rw_ops(1, opcode, payload_len);
1189 ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
1190 seg_name, seg_ofs, seg_len,
1196 rbd_req_cb, 0,
NULL);
1198 rbd_destroy_ops(ops);
1207 static int rbd_req_write(
struct request *rq,
1218 ofs, len, bio, coll, coll_index);
1224 static int rbd_req_read(
struct request *rq,
1232 return rbd_do_op(rq, rbd_dev,
NULL,
1236 ofs, len, bio, coll, coll_index);
1242 static int rbd_req_sync_read(
struct rbd_device *rbd_dev,
1244 const char *object_name,
1256 ret = rbd_req_sync_op(rbd_dev,
NULL,
1259 ops, object_name, ofs, len, buf,
NULL, ver);
1260 rbd_destroy_ops(ops);
1268 static int rbd_req_sync_notify_ack(
struct rbd_device *rbd_dev,
1280 ops[0].
watch.cookie = notify_id;
1281 ops[0].
watch.flag = 0;
1289 rbd_simple_req_cb, 0,
NULL);
1291 rbd_destroy_ops(ops);
1295 static void rbd_watch_cb(
u64 ver,
u64 notify_id,
u8 opcode,
void *
data)
1304 dout(
"rbd_watch_cb %s notify_id=%llu opcode=%u\n",
1305 rbd_dev->
header_name, (
unsigned long long) notify_id,
1306 (
unsigned int) opcode);
1307 rc = rbd_refresh_header(rbd_dev, &hver);
1310 " update snaps: %d\n", rbd_dev->
major, rc);
1312 rbd_req_sync_notify_ack(rbd_dev, hver, notify_id);
1318 static int rbd_req_sync_watch(
struct rbd_device *rbd_dev)
1335 ops[0].
watch.flag = 1;
1337 ret = rbd_req_sync_op(rbd_dev,
NULL,
1348 rbd_destroy_ops(ops);
1355 rbd_destroy_ops(ops);
1362 static int rbd_req_sync_unwatch(
struct rbd_device *rbd_dev)
1371 ops[0].
watch.ver = 0;
1373 ops[0].
watch.flag = 0;
1375 ret = rbd_req_sync_op(rbd_dev,
NULL,
1383 rbd_destroy_ops(ops);
1392 static int rbd_req_sync_exec(
struct rbd_device *rbd_dev,
1393 const char *object_name,
1396 const char *outbound,
1397 size_t outbound_size,
1399 size_t inbound_size,
1404 int class_name_len =
strlen(class_name);
1405 int method_name_len =
strlen(method_name);
1417 payload_size = class_name_len + method_name_len + outbound_size;
1423 ops[0].
cls.class_len = (
__u8) class_name_len;
1425 ops[0].
cls.method_len = (
__u8) method_name_len;
1426 ops[0].
cls.argc = 0;
1427 ops[0].
cls.indata = outbound;
1428 ops[0].
cls.indata_len = outbound_size;
1430 ret = rbd_req_sync_op(rbd_dev,
NULL,
1433 object_name, 0, inbound_size, inbound,
1436 rbd_destroy_ops(ops);
1438 dout(
"cls_exec returned %d\n", ret);
1442 static struct rbd_req_coll *rbd_alloc_coll(
int num_reqs)
1451 coll->
total = num_reqs;
1452 kref_init(&coll->
kref);
1463 struct bio_pair *bp =
NULL;
1467 struct bio *rq_bio, *next_bio =
NULL;
1472 int num_segs, cur_seg = 0;
1476 dout(
"fetched request\n");
1479 if ((rq->cmd_type != REQ_TYPE_FS)) {
1485 do_write = (rq_data_dir(rq) ==
WRITE);
1487 size = blk_rq_bytes(rq);
1490 if (do_write && rbd_dev->
mapping.read_only) {
1495 spin_unlock_irq(q->queue_lock);
1500 !rbd_dev->
mapping.snap_exists) {
1502 dout(
"request for non-existent snapshot");
1503 spin_lock_irq(q->queue_lock);
1508 snapc = ceph_get_snap_context(rbd_dev->
header.snapc);
1512 dout(
"%s 0x%x bytes at 0x%llx\n",
1513 do_write ?
"write" :
"read",
1514 size, (
unsigned long long) blk_rq_pos(rq) *
SECTOR_SIZE);
1516 num_segs = rbd_get_num_segments(&rbd_dev->
header, ofs, size);
1517 if (num_segs <= 0) {
1518 spin_lock_irq(q->queue_lock);
1520 ceph_put_snap_context(snapc);
1523 coll = rbd_alloc_coll(num_segs);
1525 spin_lock_irq(q->queue_lock);
1527 ceph_put_snap_context(snapc);
1533 dout(
"rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
1534 op_size = rbd_segment_length(rbd_dev, ofs, size);
1535 kref_get(&coll->
kref);
1536 bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
1539 rbd_coll_end_req_index(rq, coll, cur_seg,
1547 rbd_req_write(rq, rbd_dev,
1553 rbd_req_read(rq, rbd_dev,
1566 kref_put(&coll->
kref, rbd_coll_release);
1570 spin_lock_irq(q->queue_lock);
1572 ceph_put_snap_context(snapc);
1581 static int rbd_merge_bvec(
struct request_queue *q,
struct bvec_merge_data *bmd,
1582 struct bio_vec *bvec)
1585 unsigned int chunk_sectors;
1587 unsigned int bio_sectors;
1591 sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
1594 max = (chunk_sectors - ((sector & (chunk_sectors - 1))
1598 if (max <= bvec->bv_len && bio_sectors == 0)
1599 return bvec->bv_len;
1603 static void rbd_free_disk(
struct rbd_device *rbd_dev)
1605 struct gendisk *disk = rbd_dev->
disk;
1610 if (disk->flags & GENHD_FL_UP)
1648 size =
sizeof (*ondisk);
1658 (
char *) ondisk, version);
1662 if (
WARN_ON((
size_t) ret < size)) {
1665 " (want %zd got %d)\n",
1669 if (!rbd_dev_ondisk_valid(ondisk)) {
1679 }
while (snap_count != want_count);
1686 return ERR_PTR(ret);
1692 static int rbd_read_header(
struct rbd_device *rbd_dev,
1699 ondisk = rbd_dev_v1_header_read(rbd_dev, &ver);
1701 return PTR_ERR(ondisk);
1702 ret = rbd_header_from_disk(header, ondisk);
1710 static void __rbd_remove_all_snaps(
struct rbd_device *rbd_dev)
1716 __rbd_remove_snap_dev(snap);
1727 ret = rbd_read_header(rbd_dev, &
h);
1737 if (size != (
sector_t) rbd_dev->mapping.size) {
1738 dout(
"setting size to %llu sectors",
1739 (
unsigned long long) size);
1740 rbd_dev->mapping.size = (
u64) size;
1741 set_capacity(rbd_dev->disk, size);
1746 kfree(rbd_dev->header.snap_sizes);
1747 kfree(rbd_dev->header.snap_names);
1749 ceph_put_snap_context(rbd_dev->header.snapc);
1752 *hver =
h.obj_version;
1753 rbd_dev->header.obj_version =
h.obj_version;
1754 rbd_dev->header.image_size =
h.image_size;
1755 rbd_dev->header.snapc =
h.snapc;
1756 rbd_dev->header.snap_names =
h.snap_names;
1757 rbd_dev->header.snap_sizes =
h.snap_sizes;
1762 ret = rbd_dev_snaps_update(rbd_dev);
1764 ret = rbd_dev_snaps_register(rbd_dev);
1771 static int rbd_refresh_header(
struct rbd_device *rbd_dev,
u64 *hver)
1776 ret = __rbd_refresh_header(rbd_dev, hver);
1782 static int rbd_init_disk(
struct rbd_device *rbd_dev)
1784 struct gendisk *disk;
1795 disk->major = rbd_dev->
major;
1796 disk->first_minor = 0;
1797 disk->fops = &rbd_bd_ops;
1798 disk->private_data = rbd_dev;
1809 segment_size = rbd_obj_bytes(&rbd_dev->
header);
1818 q->queuedata = rbd_dev;
1820 rbd_dev->
disk = disk;
1843 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1847 size = get_capacity(rbd_dev->
disk);
1860 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1862 return sprintf(buf,
"0x%016llx\n",
1863 (
unsigned long long) rbd_dev->
mapping.features);
1869 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1877 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1879 return sprintf(buf,
"client%lld\n",
1886 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1894 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1902 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1910 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1923 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1933 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1936 ret = rbd_refresh_header(rbd_dev,
NULL);
1938 return ret < 0 ? ret :
size;
1952 static struct attribute *rbd_attrs[] = {
1953 &dev_attr_size.attr,
1954 &dev_attr_features.attr,
1955 &dev_attr_major.attr,
1956 &dev_attr_client_id.attr,
1957 &dev_attr_pool.attr,
1958 &dev_attr_pool_id.attr,
1959 &dev_attr_name.attr,
1960 &dev_attr_image_id.attr,
1961 &dev_attr_current_snap.attr,
1962 &dev_attr_refresh.attr,
1975 static void rbd_sysfs_dev_release(
struct device *dev)
1981 .groups = rbd_attr_groups,
1982 .release = rbd_sysfs_dev_release,
1996 return sprintf(buf,
"%llu\n", (
unsigned long long)snap->
size);
2005 return sprintf(buf,
"%llu\n", (
unsigned long long)snap->
id);
2014 return sprintf(buf,
"0x%016llx\n",
2015 (
unsigned long long) snap->
features);
2022 static struct attribute *rbd_snap_attrs[] = {
2023 &dev_attr_snap_size.attr,
2024 &dev_attr_snap_id.attr,
2025 &dev_attr_snap_features.attr,
2030 .attrs = rbd_snap_attrs,
2033 static void rbd_snap_dev_release(
struct device *dev)
2041 &rbd_snap_attr_group,
2045 static struct device_type rbd_snap_device_type = {
2046 .groups = rbd_snap_attr_groups,
2047 .release = rbd_snap_dev_release,
2050 static bool rbd_snap_registered(
struct rbd_snap *snap)
2052 bool ret = snap->
dev.type == &rbd_snap_device_type;
2053 bool reg = device_is_registered(&snap->
dev);
2060 static void __rbd_remove_snap_dev(
struct rbd_snap *snap)
2063 if (device_is_registered(&snap->
dev))
2067 static int rbd_register_snap_dev(
struct rbd_snap *snap,
2073 dev->
type = &rbd_snap_device_type;
2075 dev->
release = rbd_snap_dev_release;
2077 dout(
"%s: registering device for snapshot %s\n", __func__, snap->
name);
2085 const char *snap_name,
2086 u64 snap_id,
u64 snap_size,
2102 snap->
size = snap_size;
2111 return ERR_PTR(ret);
2114 static char *rbd_dev_v1_snap_info(
struct rbd_device *rbd_dev,
u32 which,
2115 u64 *snap_size,
u64 *snap_features)
2121 *snap_size = rbd_dev->
header.snap_sizes[which];
2126 snap_name = rbd_dev->
header.snap_names;
2128 snap_name +=
strlen(snap_name) + 1;
2138 static int _rbd_dev_v2_snap_size(
struct rbd_device *rbd_dev,
u64 snap_id,
2148 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->
header_name,
2150 (
char *) &snapid,
sizeof (snapid),
2151 (
char *) &size_buf,
sizeof (size_buf),
2153 dout(
"%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2157 *order = size_buf.order;
2160 dout(
" snap_id 0x%016llx order = %u, snap_size = %llu\n",
2161 (
unsigned long long) snap_id, (
unsigned int) *order,
2162 (
unsigned long long) *snap_size);
2167 static int rbd_dev_v2_image_size(
struct rbd_device *rbd_dev)
2169 return _rbd_dev_v2_snap_size(rbd_dev,
CEPH_NOSNAP,
2170 &rbd_dev->
header.obj_order,
2171 &rbd_dev->
header.image_size);
2174 static int rbd_dev_v2_object_prefix(
struct rbd_device *rbd_dev)
2184 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->
header_name,
2185 "rbd",
"get_object_prefix",
2189 dout(
"%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2194 rbd_dev->
header.object_prefix = ceph_extract_encoded_string(&p,
2198 if (IS_ERR(rbd_dev->
header.object_prefix)) {
2199 ret = PTR_ERR(rbd_dev->
header.object_prefix);
2202 dout(
" object_prefix = %s\n", rbd_dev->
header.object_prefix);
2211 static int _rbd_dev_v2_snap_features(
struct rbd_device *rbd_dev,
u64 snap_id,
2218 } features_buf = { 0 };
2221 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->
header_name,
2222 "rbd",
"get_features",
2223 (
char *) &snapid,
sizeof (snapid),
2224 (
char *) &features_buf,
sizeof (features_buf),
2226 dout(
"%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2229 *snap_features =
le64_to_cpu(features_buf.features);
2231 dout(
" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
2232 (
unsigned long long) snap_id,
2233 (
unsigned long long) *snap_features,
2234 (
unsigned long long)
le64_to_cpu(features_buf.incompat));
2239 static int rbd_dev_v2_features(
struct rbd_device *rbd_dev)
2241 return _rbd_dev_v2_snap_features(rbd_dev,
CEPH_NOSNAP,
2242 &rbd_dev->
header.features);
2245 static int rbd_dev_v2_snap_context(
struct rbd_device *rbd_dev,
u64 *ver)
2269 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->
header_name,
2270 "rbd",
"get_snapcontext",
2274 dout(
"%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2280 end = (
char *) reply_buf + size;
2295 if (!ceph_has_room(&p, end, snap_count *
sizeof (
__le64)))
2299 snap_count *
sizeof (snapc->
snaps[0]);
2310 snapc->
snaps[i] = ceph_decode_64(&p);
2312 rbd_dev->
header.snapc = snapc;
2314 dout(
" snap context seq = %llu, snap_count = %u\n",
2315 (
unsigned long long) seq, (
unsigned int) snap_count);
2323 static char *rbd_dev_v2_snap_name(
struct rbd_device *rbd_dev,
u32 which)
2331 size_t snap_name_len;
2340 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->
header_name,
2341 "rbd",
"get_snapshot_name",
2342 (
char *) &snap_id,
sizeof (snap_id),
2345 dout(
"%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2350 end = (
char *) reply_buf + size;
2352 snap_name = ceph_extract_encoded_string(&p, end, &snap_name_len,
2354 if (IS_ERR(snap_name)) {
2355 ret = PTR_ERR(snap_name);
2358 dout(
" snap_id 0x%016llx snap_name = %s\n",
2359 (
unsigned long long)
le64_to_cpu(snap_id), snap_name);
2367 return ERR_PTR(ret);
2370 static char *rbd_dev_v2_snap_info(
struct rbd_device *rbd_dev,
u32 which,
2371 u64 *snap_size,
u64 *snap_features)
2377 snap_id = rbd_dev->
header.snapc->snaps[which];
2378 ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size);
2380 return ERR_PTR(ret);
2381 ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features);
2383 return ERR_PTR(ret);
2385 return rbd_dev_v2_snap_name(rbd_dev, which);
2388 static char *rbd_dev_snap_info(
struct rbd_device *rbd_dev,
u32 which,
2389 u64 *snap_size,
u64 *snap_features)
2392 return rbd_dev_v1_snap_info(rbd_dev, which,
2393 snap_size, snap_features);
2395 return rbd_dev_v2_snap_info(rbd_dev, which,
2396 snap_size, snap_features);
2412 static int rbd_dev_snaps_update(
struct rbd_device *rbd_dev)
2420 dout(
"%s: snap count is %u\n", __func__, (
unsigned int) snap_count);
2421 while (index < snap_count || links != head) {
2426 u64 snap_features = 0;
2428 snap_id = index < snap_count ? snapc->
snaps[
index]
2434 if (snap_id ==
CEPH_NOSNAP || (snap && snap->
id > snap_id)) {
2439 if (rbd_dev->
mapping.snap_id == snap->
id)
2440 rbd_dev->
mapping.snap_exists =
false;
2441 __rbd_remove_snap_dev(snap);
2442 dout(
"%ssnap id %llu has been removed\n",
2445 (
unsigned long long) snap->
id);
2453 snap_name = rbd_dev_snap_info(rbd_dev, index,
2454 &snap_size, &snap_features);
2455 if (IS_ERR(snap_name))
2456 return PTR_ERR(snap_name);
2458 dout(
"entry %u: snap_id = %llu\n", (
unsigned int) snap_count,
2459 (
unsigned long long) snap_id);
2460 if (!snap || (snap_id !=
CEPH_NOSNAP && snap->
id < snap_id)) {
2465 new_snap = __rbd_add_snap_dev(rbd_dev, snap_name,
2466 snap_id, snap_size, snap_features);
2467 if (IS_ERR(new_snap)) {
2468 int err = PTR_ERR(new_snap);
2470 dout(
" failed to add dev, error %d\n", err);
2477 dout(
" added dev%s\n", snap ?
"" :
" at end\n");
2485 dout(
" already present\n");
2493 links = links->
next;
2500 dout(
"%s: done\n", __func__);
2509 static int rbd_dev_snaps_register(
struct rbd_device *rbd_dev)
2514 dout(
"%s called\n", __func__);
2515 if (
WARN_ON(!device_is_registered(&rbd_dev->
dev)))
2519 if (!rbd_snap_registered(snap)) {
2520 ret = rbd_register_snap_dev(snap, &rbd_dev->
dev);
2525 dout(
"%s: returning %d\n", __func__, ret);
2530 static int rbd_bus_add_dev(
struct rbd_device *rbd_dev)
2537 dev = &rbd_dev->
dev;
2538 dev->
bus = &rbd_bus_type;
2539 dev->
type = &rbd_device_type;
2540 dev->
parent = &rbd_root_dev;
2541 dev->
release = rbd_dev_release;
2550 static void rbd_bus_del_dev(
struct rbd_device *rbd_dev)
2555 static int rbd_init_watch_dev(
struct rbd_device *rbd_dev)
2560 ret = rbd_req_sync_watch(rbd_dev);
2562 rc = rbd_refresh_header(rbd_dev,
NULL);
2566 }
while (ret == -
ERANGE);
2577 static void rbd_dev_id_get(
struct rbd_device *rbd_dev)
2581 spin_lock(&rbd_dev_list_lock);
2583 spin_unlock(&rbd_dev_list_lock);
2584 dout(
"rbd_dev %p given dev id %llu\n", rbd_dev,
2585 (
unsigned long long) rbd_dev->
dev_id);
2592 static void rbd_dev_id_put(
struct rbd_device *rbd_dev)
2595 int rbd_id = rbd_dev->
dev_id;
2600 dout(
"rbd_dev %p released dev id %llu\n", rbd_dev,
2601 (
unsigned long long) rbd_dev->
dev_id);
2602 spin_lock(&rbd_dev_list_lock);
2603 list_del_init(&rbd_dev->
node);
2610 spin_unlock(&rbd_dev_list_lock);
2624 if (rbd_id > max_id)
2627 spin_unlock(&rbd_dev_list_lock);
2636 dout(
" max dev id has been reset\n");
2645 static inline size_t next_token(
const char **buf)
2651 const char *spaces =
" \f\n\r\t\v";
2653 *buf +=
strspn(*buf, spaces);
2672 static inline size_t copy_token(
const char **buf,
2678 len = next_token(buf);
2679 if (len < token_size) {
2680 memcpy(token, *buf, len);
2681 *(token + len) =
'\0';
2704 static inline char *dup_token(
const char **buf,
size_t *lenp)
2709 len = next_token(buf);
2715 *(dup + len) =
'\0';
2734 static char *rbd_add_parse_args(
struct rbd_device *rbd_dev,
2736 const char **mon_addrs,
2737 size_t *mon_addrs_size,
2739 size_t options_size)
2742 char *err_ptr = ERR_PTR(-
EINVAL);
2747 len = next_token(&buf);
2750 *mon_addrs_size = len + 1;
2755 len = copy_token(&buf, options, options_size);
2756 if (!len || len >= options_size)
2759 err_ptr = ERR_PTR(-
ENOMEM);
2769 len = next_token(&buf);
2777 memcpy(snap_name, buf, len);
2778 *(snap_name + len) =
'\0';
2780 dout(
" SNAP_NAME is <%s>, len is %zd\n", snap_name, len);
2808 static int rbd_dev_image_id(
struct rbd_device *rbd_dev)
2825 dout(
"rbd id object name is %s\n", object_name);
2830 response = kzalloc(size,
GFP_NOIO);
2836 ret = rbd_req_sync_exec(rbd_dev, object_name,
2841 dout(
"%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2846 rbd_dev->
image_id = ceph_extract_encoded_string(&p,
2863 static int rbd_dev_v1_probe(
struct rbd_device *rbd_dev)
2887 ret = rbd_read_header(rbd_dev, &rbd_dev->
header);
2892 dout(
"discovered version 1 image, header name is %s\n",
2906 static int rbd_dev_v2_probe(
struct rbd_device *rbd_dev)
2925 ret = rbd_dev_v2_image_size(rbd_dev);
2931 ret = rbd_dev_v2_object_prefix(rbd_dev);
2937 ret = rbd_dev_v2_features(rbd_dev);
2943 rbd_dev->
header.crypt_type = 0;
2944 rbd_dev->
header.comp_type = 0;
2948 ret = rbd_dev_v2_snap_context(rbd_dev, &ver);
2955 dout(
"discovered version 2 image, header name is %s\n",
2973 static int rbd_dev_probe(
struct rbd_device *rbd_dev)
2982 ret = rbd_dev_image_id(rbd_dev);
2984 ret = rbd_dev_v1_probe(rbd_dev);
2986 ret = rbd_dev_v2_probe(rbd_dev);
2988 dout(
"probe failed, returning %d\n", ret);
2999 const char *mon_addrs =
NULL;
3000 size_t mon_addrs_size = 0;
3011 rbd_dev = kzalloc(
sizeof(*rbd_dev),
GFP_KERNEL);
3017 INIT_LIST_HEAD(&rbd_dev->
node);
3018 INIT_LIST_HEAD(&rbd_dev->
snaps);
3022 snap_name = rbd_add_parse_args(rbd_dev, buf,
3023 &mon_addrs, &mon_addrs_size, options, count);
3024 if (IS_ERR(snap_name)) {
3025 rc = PTR_ERR(snap_name);
3029 rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options);
3037 goto err_out_client;
3040 rc = rbd_dev_probe(rbd_dev);
3042 goto err_out_client;
3046 rc = rbd_dev_snaps_update(rbd_dev);
3048 goto err_out_header;
3050 rc = rbd_dev_set_mapping(rbd_dev, snap_name);
3052 goto err_out_header;
3055 rbd_dev_id_get(rbd_dev);
3071 rc = rbd_init_disk(rbd_dev);
3073 goto err_out_blkdev;
3075 rc = rbd_bus_add_dev(rbd_dev);
3085 rc = rbd_dev_snaps_register(rbd_dev);
3090 rc = rbd_init_watch_dev(rbd_dev);
3098 pr_info(
"%s: added with size 0x%llx\n", rbd_dev->
disk->disk_name,
3099 (
unsigned long long) rbd_dev->
mapping.size);
3106 rbd_bus_del_dev(rbd_dev);
3111 rbd_free_disk(rbd_dev);
3115 rbd_dev_id_put(rbd_dev);
3117 rbd_header_free(&rbd_dev->
header);
3120 rbd_put_client(rbd_dev);
3130 dout(
"Error adding device %s\n", buf);
3141 spin_lock(&rbd_dev_list_lock);
3144 if (rbd_dev->
dev_id == dev_id) {
3145 spin_unlock(&rbd_dev_list_lock);
3149 spin_unlock(&rbd_dev_list_lock);
3153 static void rbd_dev_release(
struct device *dev)
3155 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
3164 rbd_req_sync_unwatch(rbd_dev);
3166 rbd_put_client(rbd_dev);
3169 rbd_free_disk(rbd_dev);
3173 rbd_header_free(&rbd_dev->
header);
3181 rbd_dev_id_put(rbd_dev);
3202 target_id = (
int) ul;
3203 if (target_id != ul)
3208 rbd_dev = __rbd_get_dev(target_id);
3214 __rbd_remove_all_snaps(rbd_dev);
3215 rbd_bus_del_dev(rbd_dev);
3227 static int rbd_sysfs_init(
void)
3242 static void rbd_sysfs_cleanup(
void)
3252 rc = rbd_sysfs_init();
3261 rbd_sysfs_cleanup();