18 #include <linux/kernel.h>
20 #include <linux/blktrace_api.h>
24 #include <linux/slab.h>
26 #include <linux/export.h>
27 #include <linux/time.h>
34 #ifdef CONFIG_BLK_DEV_IO_TRACE
42 #define TRACE_BLK_OPT_CLASSIC 0x1
46 {
TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
52 .opts = blk_tracer_opts,
58 static void blk_register_tracepoints(
void);
59 static void blk_unregister_tracepoints(
void);
65 const void *
data,
size_t len)
72 bool blk_tracer = blk_tracer_enabled;
89 t = relay_reserve(bt->rchan,
sizeof(*t) + len);
99 memcpy((
void *) t +
sizeof(*t), data, len);
110 static void trace_note_tsk(
struct blk_trace *bt,
struct task_struct *tsk)
112 tsk->btrace_seq = blktrace_seq;
116 static void trace_note_time(
struct blk_trace *bt)
123 words[0] = now.tv_sec;
124 words[1] = now.tv_nsec;
131 void __trace_note_message(
struct blk_trace *bt,
const char *
fmt, ...)
139 !blk_tracer_enabled))
152 n =
vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
165 if (sector && (sector < bt->start_lba || sector > bt->end_lba))
167 if (bt->pid && pid != bt->pid)
179 #define BLK_TC_RAHEAD BLK_TC_AHEAD
182 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
183 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
189 static void __blk_add_trace(
struct blk_trace *bt,
sector_t sector,
int bytes,
190 int rw,
u32 what,
int error,
int pdu_len,
void *pdu_data)
196 unsigned long flags = 0;
200 bool blk_tracer = blk_tracer_enabled;
205 what |= ddir_act[rw &
WRITE];
206 what |= MASK_TC_BIT(rw,
SYNC);
207 what |= MASK_TC_BIT(rw, RAHEAD);
208 what |= MASK_TC_BIT(rw, META);
209 what |= MASK_TC_BIT(rw, DISCARD);
210 what |= MASK_TC_BIT(rw,
FLUSH);
211 what |= MASK_TC_BIT(rw, FUA);
214 if (act_log_check(bt, what, sector, pid))
224 sizeof(*t) + pdu_len,
239 if (
unlikely(tsk->btrace_seq != blktrace_seq))
240 trace_note_tsk(bt, tsk);
242 t = relay_reserve(bt->rchan,
sizeof(*t) + pdu_len);
267 memcpy((
void *) t +
sizeof(*t), pdu_data, pdu_len);
278 static struct dentry *blk_tree_root;
281 static void blk_trace_free(
struct blk_trace *bt)
292 static void blk_trace_cleanup(
struct blk_trace *bt)
296 blk_unregister_tracepoints();
301 struct blk_trace *
bt;
308 blk_trace_cleanup(bt);
314 static ssize_t blk_dropped_read(
struct file *filp,
char __user *buffer,
315 size_t count, loff_t *ppos)
328 .read = blk_dropped_read,
332 static ssize_t blk_msg_write(
struct file *filp,
const char __user *buffer,
333 size_t count, loff_t *ppos)
336 struct blk_trace *
bt;
338 if (count >= BLK_TN_MAX_MSG)
352 __trace_note_message(bt,
"%s", msg);
361 .write = blk_msg_write,
369 static int blk_subbuf_start_callback(
struct rchan_buf *buf,
void *subbuf,
370 void *prev_subbuf,
size_t prev_padding)
372 struct blk_trace *
bt;
377 bt = buf->
chan->private_data;
382 static int blk_remove_buf_file_callback(
struct dentry *
dentry)
389 static struct dentry *blk_create_buf_file_callback(
const char *
filename,
401 .create_buf_file = blk_create_buf_file_callback,
402 .remove_buf_file = blk_remove_buf_file_callback,
405 static void blk_trace_setup_lba(
struct blk_trace *bt,
414 bt->start_lba = part->start_sect;
415 bt->end_lba = part->start_sect + part->nr_sects;
429 struct blk_trace *old_bt, *bt =
NULL;
444 if (buts->
name[i] ==
'/')
463 if (!blk_tree_root) {
465 if (!blk_tree_root) {
484 if (!bt->dropped_file)
492 buts->
buf_nr, &blk_relay_callbacks, bt);
498 bt->act_mask = (
u16) -1;
500 blk_trace_setup_lba(bt, bdev);
512 old_bt =
xchg(&q->blk_trace, bt);
519 blk_register_tracepoints();
550 #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
551 static int compat_blk_trace_setup(
struct request_queue *q,
char *name,
556 struct compat_blk_user_trace_setup cbuts;
564 .buf_size = cbuts.buf_size,
565 .buf_nr = cbuts.buf_nr,
566 .start_lba = cbuts.start_lba,
567 .end_lba = cbuts.end_lba,
588 struct blk_trace *bt = q->blk_trace;
633 q = bdev_get_queue(bdev);
644 #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
645 case BLKTRACESETUP32:
647 ret = compat_blk_trace_setup(q, b, bdev->
bd_dev, bdev, arg);
697 struct blk_trace *bt = q->blk_trace;
702 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
704 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
705 what, rq->errors, rq->cmd_len, rq->cmd);
708 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
709 rq->cmd_flags, what, rq->errors, 0,
NULL);
713 static void blk_add_trace_rq_abort(
void *ignore,
719 static void blk_add_trace_rq_insert(
void *ignore,
725 static void blk_add_trace_rq_issue(
void *ignore,
731 static void blk_add_trace_rq_requeue(
void *ignore,
738 static void blk_add_trace_rq_complete(
void *ignore,
756 static void blk_add_trace_bio(
struct request_queue *q,
struct bio *bio,
759 struct blk_trace *bt = q->blk_trace;
764 if (!error && !bio_flagged(bio, BIO_UPTODATE))
767 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
771 static void blk_add_trace_bio_bounce(
void *ignore,
777 static void blk_add_trace_bio_complete(
void *ignore,
784 static void blk_add_trace_bio_backmerge(
void *ignore,
791 static void blk_add_trace_bio_frontmerge(
void *ignore,
798 static void blk_add_trace_bio_queue(
void *ignore,
804 static void blk_add_trace_getrq(
void *ignore,
806 struct bio *bio,
int rw)
811 struct blk_trace *bt = q->blk_trace;
819 static void blk_add_trace_sleeprq(
void *ignore,
821 struct bio *bio,
int rw)
826 struct blk_trace *bt = q->blk_trace;
834 static void blk_add_trace_plug(
void *ignore,
struct request_queue *q)
836 struct blk_trace *bt = q->blk_trace;
842 static void blk_add_trace_unplug(
void *ignore,
struct request_queue *q,
843 unsigned int depth,
bool explicit)
845 struct blk_trace *bt = q->blk_trace;
856 __blk_add_trace(bt, 0, 0, 0, what, 0,
sizeof(rpdu), &rpdu);
860 static void blk_add_trace_split(
void *ignore,
864 struct blk_trace *bt = q->blk_trace;
869 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
871 sizeof(rpdu), &rpdu);
888 static void blk_add_trace_bio_remap(
void *ignore,
892 struct blk_trace *bt = q->blk_trace;
902 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
920 static void blk_add_trace_rq_remap(
void *ignore,
925 struct blk_trace *bt = q->blk_trace;
935 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
953 void *data,
size_t len)
955 struct blk_trace *bt = q->blk_trace;
960 if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
961 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
964 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
969 static void blk_register_tracepoints(
void)
973 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort,
NULL);
975 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert,
NULL);
977 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue,
NULL);
979 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue,
NULL);
981 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete,
NULL);
983 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce,
NULL);
985 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete,
NULL);
987 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge,
NULL);
989 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge,
NULL);
991 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue,
NULL);
993 ret = register_trace_block_getrq(blk_add_trace_getrq,
NULL);
995 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq,
NULL);
997 ret = register_trace_block_plug(blk_add_trace_plug,
NULL);
999 ret = register_trace_block_unplug(blk_add_trace_unplug,
NULL);
1001 ret = register_trace_block_split(blk_add_trace_split,
NULL);
1003 ret = register_trace_block_bio_remap(blk_add_trace_bio_remap,
NULL);
1005 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap,
NULL);
1009 static void blk_unregister_tracepoints(
void)
1011 unregister_trace_block_rq_remap(blk_add_trace_rq_remap,
NULL);
1012 unregister_trace_block_bio_remap(blk_add_trace_bio_remap,
NULL);
1013 unregister_trace_block_split(blk_add_trace_split,
NULL);
1014 unregister_trace_block_unplug(blk_add_trace_unplug,
NULL);
1015 unregister_trace_block_plug(blk_add_trace_plug,
NULL);
1016 unregister_trace_block_sleeprq(blk_add_trace_sleeprq,
NULL);
1017 unregister_trace_block_getrq(blk_add_trace_getrq,
NULL);
1018 unregister_trace_block_bio_queue(blk_add_trace_bio_queue,
NULL);
1019 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge,
NULL);
1020 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge,
NULL);
1021 unregister_trace_block_bio_complete(blk_add_trace_bio_complete,
NULL);
1022 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce,
NULL);
1023 unregister_trace_block_rq_complete(blk_add_trace_rq_complete,
NULL);
1024 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue,
NULL);
1025 unregister_trace_block_rq_issue(blk_add_trace_rq_issue,
NULL);
1026 unregister_trace_block_rq_insert(blk_add_trace_rq_insert,
NULL);
1027 unregister_trace_block_rq_abort(blk_add_trace_rq_abort,
NULL);
1029 tracepoint_synchronize_unregister();
1036 static void fill_rwbs(
char *rwbs,
const struct blk_io_trace *t)
1076 static inline const void *pdu_start(
const struct trace_entry *ent)
1078 return te_blk_io_trace(ent) + 1;
1083 return te_blk_io_trace(ent)->action;
1088 return te_blk_io_trace(ent)->bytes;
1093 return te_blk_io_trace(ent)->bytes >> 9;
1096 static inline unsigned long long t_sector(
const struct trace_entry *ent)
1098 return te_blk_io_trace(ent)->sector;
1103 return te_blk_io_trace(ent)->error;
1112 static void get_pdu_remap(
const struct trace_entry *ent,
1125 static int blk_log_action_classic(
struct trace_iterator *iter,
const char *act)
1128 unsigned long long ts = iter->
ts;
1130 unsigned secs = (
unsigned long)ts;
1136 "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ",
1138 secs, nsec_rem, iter->
ent->pid, act, rwbs);
1141 static int blk_log_action(
struct trace_iterator *iter,
const char *act)
1153 const unsigned char *pdu_buf;
1157 pdu_buf = pdu_start(ent);
1158 pdu_len = te_blk_io_trace(ent)->pdu_len;
1164 for (end = pdu_len - 1; end >= 0; end--)
1172 for (i = 0; i <
pdu_len; i++) {
1175 i == 0 ?
"" :
" ", pdu_buf[i]);
1183 if (i == end && end != pdu_len - 1)
1202 ret = blk_log_dump_pdu(s, ent);
1209 t_sector(ent), t_sec(ent), cmd);
1214 static int blk_log_with_error(
struct trace_seq *s,
1220 ret = blk_log_dump_pdu(s, ent);
1228 t_sec(ent), t_error(ent));
1230 t_sector(ent), t_error(ent));
1238 get_pdu_remap(ent, &r);
1240 t_sector(ent), t_sec(ent),
1270 get_pdu_int(ent), cmd);
1288 static void blk_tracer_print_header(
struct seq_file *
m)
1290 if (!(blk_tracer_flags.
val & TRACE_BLK_OPT_CLASSIC))
1292 seq_puts(m,
"# DEV CPU TIMESTAMP PID ACT FLG\n"
1298 blk_tracer_enabled =
true;
1304 blk_tracer_start(tr);
1310 blk_tracer_enabled =
false;
1315 blk_tracer_stop(tr);
1318 static const struct {
1347 blk_log_action_t *log_action;
1349 t = te_blk_io_trace(iter->
ent);
1352 log_action = classic ? &blk_log_action_classic : &blk_log_action;
1355 ret = log_action(iter, long_act ?
"message" :
"m");
1357 ret = blk_log_msg(s, iter->
ent);
1364 ret = log_action(iter, what2act[what].act[long_act]);
1366 ret = what2act[
what].print(s, iter->
ent);
1375 return print_one_line(iter,
false);
1378 static int blk_trace_synthesize_old_trace(
struct trace_iterator *iter)
1395 blk_trace_event_print_binary(
struct trace_iterator *iter,
int flags,
1398 return blk_trace_synthesize_old_trace(iter) ?
1404 if (!(blk_tracer_flags.
val & TRACE_BLK_OPT_CLASSIC))
1407 return print_one_line(iter,
true);
1410 static int blk_tracer_set_flag(
u32 old_flags,
u32 bit,
int set)
1413 if (bit == TRACE_BLK_OPT_CLASSIC) {
1424 .init = blk_tracer_init,
1425 .reset = blk_tracer_reset,
1426 .start = blk_tracer_start,
1427 .stop = blk_tracer_stop,
1428 .print_header = blk_tracer_print_header,
1429 .print_line = blk_tracer_print_line,
1430 .flags = &blk_tracer_flags,
1431 .set_flag = blk_tracer_set_flag,
1435 .
trace = blk_trace_event_print,
1436 .binary = blk_trace_event_print_binary,
1441 .funcs = &trace_blk_event_funcs,
1444 static int __init init_blk_tracer(
void)
1447 pr_warning(
"Warning: could not register block events\n");
1452 pr_warning(
"Warning: could not register the block tracer\n");
1464 struct blk_trace *
bt;
1471 blk_unregister_tracepoints();
1483 struct blk_trace *old_bt, *bt =
NULL;
1490 bt->msg_data =
__alloc_percpu(BLK_TN_MAX_MSG, __alignof__(
char));
1495 bt->act_mask = (
u16)-1;
1497 blk_trace_setup_lba(bt, bdev);
1499 old_bt =
xchg(&q->blk_trace, bt);
1500 if (old_bt !=
NULL) {
1507 blk_register_tracepoints();
1519 static ssize_t sysfs_blk_trace_attr_show(
struct device *dev,
1522 static ssize_t sysfs_blk_trace_attr_store(
struct device *dev,
1524 const char *buf,
size_t count);
1525 #define BLK_TRACE_DEVICE_ATTR(_name) \
1526 DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
1527 sysfs_blk_trace_attr_show, \
1528 sysfs_blk_trace_attr_store)
1530 static BLK_TRACE_DEVICE_ATTR(
enable);
1531 static BLK_TRACE_DEVICE_ATTR(act_mask);
1532 static BLK_TRACE_DEVICE_ATTR(pid);
1533 static BLK_TRACE_DEVICE_ATTR(start_lba);
1534 static BLK_TRACE_DEVICE_ATTR(end_lba);
1536 static struct attribute *blk_trace_attrs[] = {
1537 &dev_attr_enable.attr,
1538 &dev_attr_act_mask.attr,
1540 &dev_attr_start_lba.attr,
1541 &dev_attr_end_lba.attr,
1547 .attrs = blk_trace_attrs,
1550 static const struct {
1571 static int blk_trace_str2mask(
const char *
str)
1590 for (i = 0; i <
ARRAY_SIZE(mask_maps); i++) {
1591 if (
strcasecmp(token, mask_maps[i].str) == 0) {
1592 mask |= mask_maps[
i].mask;
1606 static ssize_t blk_trace_mask2str(
char *buf,
int mask)
1611 for (i = 0; i <
ARRAY_SIZE(mask_maps); i++) {
1612 if (mask & mask_maps[i].mask) {
1614 (p == buf) ?
"" :
",", mask_maps[i].str);
1627 return bdev_get_queue(bdev);
1630 static ssize_t sysfs_blk_trace_attr_show(
struct device *dev,
1634 struct hd_struct *p = dev_to_part(dev);
1639 bdev =
bdget(part_devt(p));
1643 q = blk_trace_get_queue(bdev);
1649 if (attr == &dev_attr_enable) {
1650 ret =
sprintf(buf,
"%u\n", !!q->blk_trace);
1651 goto out_unlock_bdev;
1654 if (q->blk_trace ==
NULL)
1655 ret =
sprintf(buf,
"disabled\n");
1656 else if (attr == &dev_attr_act_mask)
1657 ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
1658 else if (attr == &dev_attr_pid)
1659 ret =
sprintf(buf,
"%u\n", q->blk_trace->pid);
1660 else if (attr == &dev_attr_start_lba)
1661 ret =
sprintf(buf,
"%llu\n", q->blk_trace->start_lba);
1662 else if (attr == &dev_attr_end_lba)
1663 ret =
sprintf(buf,
"%llu\n", q->blk_trace->end_lba);
1673 static ssize_t sysfs_blk_trace_attr_store(
struct device *dev,
1675 const char *buf,
size_t count)
1679 struct hd_struct *
p;
1686 if (attr == &dev_attr_act_mask) {
1687 if (
sscanf(buf,
"%llx", &value) != 1) {
1689 ret = blk_trace_str2mask(buf);
1694 }
else if (
sscanf(buf,
"%llu", &value) != 1)
1699 p = dev_to_part(dev);
1700 bdev =
bdget(part_devt(p));
1704 q = blk_trace_get_queue(bdev);
1710 if (attr == &dev_attr_enable) {
1712 ret = blk_trace_setup_queue(q, bdev);
1714 ret = blk_trace_remove_queue(q);
1715 goto out_unlock_bdev;
1719 if (q->blk_trace ==
NULL)
1720 ret = blk_trace_setup_queue(q, bdev);
1723 if (attr == &dev_attr_act_mask)
1724 q->blk_trace->act_mask =
value;
1725 else if (attr == &dev_attr_pid)
1726 q->blk_trace->pid =
value;
1727 else if (attr == &dev_attr_start_lba)
1728 q->blk_trace->start_lba =
value;
1729 else if (attr == &dev_attr_end_lba)
1730 q->blk_trace->end_lba =
value;
1738 return ret ? ret :
count;
1741 int blk_trace_init_sysfs(
struct device *dev)
1753 #ifdef CONFIG_EVENT_TRACING
1755 void blk_dump_cmd(
char *buf,
struct request *rq)
1758 int len = rq->cmd_len;
1759 unsigned char *cmd = rq->cmd;
1761 if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
1766 for (end = len - 1; end >= 0; end--)
1771 for (i = 0; i < len; i++) {
1772 buf +=
sprintf(buf,
"%s%02x", i == 0 ?
"" :
" ", cmd[i]);
1773 if (i == end && end != len - 1) {
1780 void blk_fill_rwbs(
char *rwbs,
u32 rw,
int bytes)