diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst index 9453196ade51b8065563d8e76989f274098f0fe8..6d675788362ef324e50b248b9418085f047a73c3 100644 --- a/Documentation/admin-guide/iostats.rst +++ b/Documentation/admin-guide/iostats.rst @@ -108,6 +108,12 @@ Field 16 -- # of flush requests completed Field 17 -- # of milliseconds spent flushing This is the total number of milliseconds spent by all flush requests. +Field 18 -- # of milliseconds spent reading on device driver's side + +Field 19 -- # of milliseconds spent writing on device driver's side + +Field 20 -- # of milliseconds spent discarding on device driver's side + To avoid introducing performance bottlenecks, no locks are held while modifying these counters. This implies that minor inaccuracies may be introduced when changes collide, so (for instance) adding up all the diff --git a/block/bio.c b/block/bio.c index 641ef0928d73585c965ec75f2bbe8b6519df393c..4d500beef061b5777401d6c7e7f4e90c96ecde0f 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1510,7 +1510,9 @@ EXPORT_SYMBOL_GPL(bio_await); */ int submit_bio_wait(struct bio *bio) { + task_set_wait_res(TASK_WAIT_BIO, bio); bio_await(bio, NULL, NULL); + task_clear_wait_res(); return blk_status_to_errno(bio->bi_status); } EXPORT_SYMBOL(submit_bio_wait); diff --git a/block/blk-core.c b/block/blk-core.c index 474700ffaa1c895cb28f5fb0ec9e6b1f5c601b74..9b96acd218dbbb1a261c16fc70655e5840d60b97 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -401,6 +401,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) return ERR_PTR(-ENOMEM); q->last_merge = NULL; + q->rq_hang_threshold = BLK_REQ_HANG_THRESHOLD; q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL); if (q->id < 0) { diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 047ec887456b6da5388293d3abc3cac89fba47d6..0870e368445b4f3d55a6dfb39708b97836f1d7f9 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -148,12 +148,36 @@ static ssize_t queue_state_write(void *data, const char __user *buf, return count; } +static void blk_mq_debugfs_rq_hang_show(struct seq_file *m, struct request *rq); + +static bool blk_mq_check_rq_hang(struct request *rq, void *priv) +{ + struct seq_file *m = priv; + u64 now = ktime_get_ns(); + u64 duration; + + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if (duration >= READ_ONCE(rq->q->rq_hang_threshold)) + blk_mq_debugfs_rq_hang_show(m, rq); + + return true; +} + +static int queue_rq_hang_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + + blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, m); + return 0; +} + static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "poll_stat", 0400, queue_poll_stat_show }, { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, { "pm_only", 0600, queue_pm_only_show, NULL }, { "state", 0600, queue_state_show, queue_state_write }, { "zone_wplugs", 0400, queue_zone_wplugs_show, NULL }, + { "rq_hang", 0400, queue_rq_hang_show, NULL }, { }, }; @@ -297,6 +321,52 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) } EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); +static void blk_mq_debugfs_rq_hang_show(struct seq_file *m, struct request *rq) +{ + const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; + const unsigned int op = req_op(rq); + const char *op_str = blk_op_str(op); + struct bio *bio; + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + seq_printf(m, "%px {.op=", rq); + if (strcmp(op_str, "UNKNOWN") == 0) + seq_printf(m, "%u", op); + else + seq_printf(m, "%s", op_str); + seq_puts(m, ", .cmd_flags="); + blk_flags_show(m, rq->cmd_flags & ~REQ_OP_MASK, cmd_flag_name, + ARRAY_SIZE(cmd_flag_name)); + seq_puts(m, ", .rq_flags="); + blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name, + ARRAY_SIZE(rqf_name)); + seq_printf(m, ", .state=%s", blk_mq_rq_state_name(blk_mq_rq_state(rq))); + seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag, + rq->internal_tag); + seq_printf(m, ", .start_time_ns=%llu", rq->start_time_ns); + seq_printf(m, ", .io_start_time_ns=%llu", rq->io_start_time_ns); + seq_printf(m, ", .current_time=%llu", ktime_get_ns()); + + __rq_for_each_bio(bio, rq) { + seq_printf(m, ", .bio = %px", bio); + seq_printf(m, ", .sector = %llu, .len=%u", + bio->bi_iter.bi_sector, bio->bi_iter.bi_size); + seq_puts(m, ", .bio_pages = { "); + bio_for_each_segment_all(bvec, bio, iter_all) { + struct page *page = bvec->bv_page; + + if (!page) + continue; + seq_printf(m, "%px ", page); + } + seq_puts(m, "}"); + } + if (mq_ops->show_rq) + mq_ops->show_rq(m, rq); + seq_puts(m, "}\n"); +} + static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) __acquires(&hctx->lock) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 4c5c16cce4f8f3102f4d86964d4bf9f90df6582e..cbb5ffa031c33c4cd642e1e200d33e9dff1e677b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -112,6 +112,34 @@ void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2]) inflight[WRITE] = mi.inflight[WRITE]; } +struct mq_hang { + struct block_device *part; + unsigned int hang[2]; +}; + +static bool blk_mq_check_hang(struct request *rq, void *priv) +{ + struct mq_hang *mh = priv; + u64 now = ktime_get_ns(), duration; + + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if ((duration >= READ_ONCE(rq->q->rq_hang_threshold)) && + (!bdev_partno(mh->part) || rq->part == mh->part)) + mh->hang[rq_data_dir(rq)]++; + + return true; +} + +void blk_mq_hang_rw(struct request_queue *q, struct block_device *part, + unsigned int hang[2]) +{ + struct mq_hang mh = { .part = part }; + + blk_mq_queue_tag_busy_iter(q, blk_mq_check_hang, &mh); + hang[0] = mh.hang[0]; + hang[1] = mh.hang[1]; +} + #ifdef CONFIG_LOCKDEP static bool blk_freeze_set_owner(struct request_queue *q, struct task_struct *owner) @@ -1084,6 +1112,10 @@ static inline void blk_account_io_done(struct request *req, u64 now) part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); part_stat_local_dec(req->part, in_flight[op_is_write(req_op(req))]); + if (req->rq_flags & RQF_STATS) { + part_stat_add(req->part, d2c_nsecs[sgrp], + now - req->io_start_time_ns); + } part_stat_unlock(); } } @@ -1371,8 +1403,8 @@ void blk_mq_start_request(struct request *rq) trace_block_rq_issue(rq); - if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) && - !blk_rq_is_passthrough(rq)) { + if ((test_bit(QUEUE_FLAG_STATS, &q->queue_flags) || + READ_ONCE(q->enable_d2c_stats)) && !blk_rq_is_passthrough(rq)) { rq->io_start_time_ns = blk_time_get_ns(); rq->stats_sectors = blk_rq_sectors(rq); rq->rq_flags |= RQF_STATS; diff --git a/block/blk-mq.h b/block/blk-mq.h index 7fc651df78f4f803ad3c75c0b9925b422f0e78df..aa1407e693e00752d6289d84141a5608cb28c7c5 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -265,6 +265,8 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) } void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2]); +void blk_mq_hang_rw(struct request_queue *q, struct block_device *part, + unsigned int hang[2]); static inline void blk_mq_put_dispatch_budget(struct request_queue *q, int budget_token) diff --git a/block/blk-settings.c b/block/blk-settings.c index 78c83817b9d36b6781236c9ce688e3071dcddbe9..81eb9ec38a3ff00938595dd8e898b1ce06d201fd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -27,6 +27,19 @@ void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) } EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); +void blk_queue_rq_hang_threshold(struct request_queue *q, + unsigned int hang_threshold) +{ + WRITE_ONCE(q->rq_hang_threshold, hang_threshold); +} +EXPORT_SYMBOL_GPL(blk_queue_rq_hang_threshold); + +void blk_queue_d2c_stats(struct request_queue *q, bool enable) +{ + WRITE_ONCE(q->enable_d2c_stats, enable); +} +EXPORT_SYMBOL_GPL(blk_queue_d2c_stats); + /** * blk_set_stacking_limits - set default limits for stacking devices * @lim: the queue_limits structure to reset diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index f22c1f253eb3a77c6329fa8aba89f97abe95e1bd..9bba438a4851e844059d1aedadb131d973c596be 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -553,6 +553,48 @@ static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, return count; } +static ssize_t queue_hang_threshold_show(struct gendisk *disk, char *page) +{ + return sysfs_emit(page, "%u\n", READ_ONCE(disk->queue->rq_hang_threshold)); +} + +static ssize_t queue_hang_threshold_store(struct gendisk *disk, const char *page, + size_t count) +{ + unsigned int hang_threshold; + int err; + struct request_queue *q = disk->queue; + + err = kstrtou32(page, 10, &hang_threshold); + if (err || hang_threshold == 0) + return -EINVAL; + + blk_queue_rq_hang_threshold(q, hang_threshold); + + return count; +} + +static ssize_t queue_d2c_stats_show(struct gendisk *disk, char *page) +{ + return sysfs_emit(page, "%u\n", READ_ONCE(disk->queue->enable_d2c_stats)); +} + +static ssize_t queue_d2c_stats_store(struct gendisk *disk, const char *page, + size_t count) +{ + struct request_queue *q = disk->queue; + bool enable; + int err; + + err = kstrtobool(page, &enable); + if (err) + return -EINVAL; + + blk_queue_d2c_stats(q, enable); + + return count; +} + static ssize_t queue_wc_show(struct gendisk *disk, char *page) { if (blk_queue_write_cache(disk->queue)) @@ -663,6 +705,8 @@ QUEUE_LIM_RO_ENTRY(queue_dax, "dax"); QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment"); +QUEUE_RW_ENTRY(queue_hang_threshold, "hang_threshold"); +QUEUE_RW_ENTRY(queue_d2c_stats, "d2c_stats"); /* legacy alias for logical_block_size: */ static const struct queue_sysfs_entry queue_hw_sector_size_entry = { @@ -775,6 +819,8 @@ static const struct attribute *const queue_attrs[] = { &queue_virt_boundary_mask_entry.attr, &queue_dma_alignment_entry.attr, &queue_ra_entry.attr, + &queue_hang_threshold_entry.attr, + &queue_d2c_stats_entry.attr, /* * Attributes which don't require locking. diff --git a/block/blk.h b/block/blk.h index ec4674cdf2ead4fd259ff5fc42401f591e684ee9..4e3f26205276e64f985549c194357d035490d924 100644 --- a/block/blk.h +++ b/block/blk.h @@ -342,6 +342,8 @@ ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf); +ssize_t part_hang_show(struct device *dev, struct device_attribute *attr, + char *buf); ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, diff --git a/block/genhd.c b/block/genhd.c index 7d6854fd28e95ae9134309679a7c6a937f5b7db8..594cd1e51c50945ccb31c049a73bd360c4002457 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -116,6 +116,7 @@ static void part_stat_read_all(struct block_device *part, for (group = 0; group < NR_STAT_GROUPS; group++) { stat->nsecs[group] += ptr->nsecs[group]; + stat->d2c_nsecs[group] += ptr->d2c_nsecs[group]; stat->sectors[group] += ptr->sectors[group]; stat->ios[group] += ptr->ios[group]; stat->merges[group] += ptr->merges[group]; @@ -1078,7 +1079,8 @@ ssize_t part_stat_show(struct device *dev, "%8lu %8lu %8llu %8u " "%8u %8u %8u " "%8lu %8lu %8llu %8u " - "%8lu %8u" + "%8lu %8u " + "%8u %8u %8u" "\n", stat.ios[STAT_READ], stat.merges[STAT_READ], @@ -1100,7 +1102,10 @@ ssize_t part_stat_show(struct device *dev, (unsigned long long)stat.sectors[STAT_DISCARD], (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC), stat.ios[STAT_FLUSH], - (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); + (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC), + (unsigned int)div_u64(stat.d2c_nsecs[STAT_READ], NSEC_PER_MSEC), + (unsigned int)div_u64(stat.d2c_nsecs[STAT_WRITE], NSEC_PER_MSEC), + (unsigned int)div_u64(stat.d2c_nsecs[STAT_DISCARD], NSEC_PER_MSEC)); } /* @@ -1120,6 +1125,23 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%8u %8u\n", inflight[READ], inflight[WRITE]); } +ssize_t part_hang_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); + unsigned int hang[2] = {0, 0}; + + /* + * For now, we only support mq device, since don't find a generic method + * to track reqs in single queue device. + */ + if (queue_is_mq(q)) + blk_mq_hang_rw(q, bdev, hang); + + return sysfs_emit(buf, "%8u %8u\n", hang[0], hang[1]); +} + static ssize_t disk_capability_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1170,6 +1192,7 @@ static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL); static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL); static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); +static DEVICE_ATTR(hang, 0444, part_hang_show, NULL); static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL); static DEVICE_ATTR(partscan, 0444, partscan_show, NULL); @@ -1218,6 +1241,7 @@ static struct attribute *disk_attrs[] = { &dev_attr_capability.attr, &dev_attr_stat.attr, &dev_attr_inflight.attr, + &dev_attr_hang.attr, &dev_attr_badblocks.attr, &dev_attr_events.attr, &dev_attr_events_async.attr, @@ -1406,6 +1430,12 @@ static int diskstats_show(struct seq_file *seqf, void *v) seq_put_decimal_ull(seqf, " ", stat.ios[STAT_FLUSH]); seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); + seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.d2c_nsecs[STAT_READ], + NSEC_PER_MSEC)); + seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.d2c_nsecs[STAT_WRITE], + NSEC_PER_MSEC)); + seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.d2c_nsecs[STAT_DISCARD], + NSEC_PER_MSEC)); seq_putc(seqf, '\n'); } rcu_read_unlock(); diff --git a/block/partitions/core.c b/block/partitions/core.c index 5d5332ce586b6b292325e7dc02dca7e41f7ef6bc..29d47c7d1dcc67a58c0ac10c134aefcc5f2f44e4 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -213,6 +213,7 @@ static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL); static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL); static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); +static DEVICE_ATTR(hang, 0444, part_hang_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); @@ -227,6 +228,7 @@ static struct attribute *part_attrs[] = { &dev_attr_discard_alignment.attr, &dev_attr_stat.attr, &dev_attr_inflight.attr, + &dev_attr_hang.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index f8c238b3aadbb9f11ffd6e15430c522be5d8db36..9a5a9970a8bfc5cfd3899b32dc94117193d07033 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1115,7 +1115,9 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, if (buffer_shadow(bh)) { JBUFFER_TRACE(jh, "on shadow: sleep"); spin_unlock(&jh->b_state_lock); + task_set_wait_res(TASK_WAIT_FOLIO, bh->b_folio); wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE); + task_clear_wait_res(); goto repeat; } diff --git a/fs/proc/base.c b/fs/proc/base.c index d9acfa89c894bd1608580331e1d5b3018c59123b..abf4903dafa9f62dac91c0f8ef049307905d966a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -603,6 +603,16 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, return 0; } +static int proc_wait_res(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + seq_printf(m, "%d %px %lu %lu\n", READ_ONCE(task->wait_res_type), + READ_ONCE(task->wait_folio), READ_ONCE(task->wait_moment), + jiffies); + + return 0; +} + struct limit_names { const char *name; const char *unit; @@ -3421,6 +3431,7 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif + ONE("wait_res", S_IRUSR, proc_wait_res), }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3755,6 +3766,7 @@ static const struct pid_entry tid_base_stuff[] = { ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif + ONE("wait_res", S_IRUSR, proc_wait_res), }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e896b223d43529bf5b459c9b1e65456f618a73d0..d18a276739bf83d3e2fb2d436c972dfcf3b3df98 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -495,6 +495,12 @@ struct blk_independent_access_ranges { struct blk_independent_access_range ia_range[]; }; +/* + * default request hang threshold, unit is millisecond. If one request does + * not complete in this threashold time, consider this request as hang. + */ +#define BLK_REQ_HANG_THRESHOLD 5000 + struct request_queue { /* * The queue owner gets to use this for whatever they like. @@ -515,6 +521,7 @@ struct request_queue { unsigned long queue_flags; unsigned int __data_racy rq_timeout; + unsigned int rq_hang_threshold; unsigned int queue_depth; @@ -668,6 +675,8 @@ struct request_queue { */ struct mutex debugfs_mutex; + bool enable_d2c_stats; + CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) CK_KABI_RESERVE(3) @@ -1169,6 +1178,9 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); +extern void blk_queue_rq_hang_threshold(struct request_queue *, + unsigned int hang_threshold); +extern void blk_queue_d2c_stats(struct request_queue *, bool enable); struct blk_independent_access_ranges * disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e4939e33b4b514a4f20bdb41315227902e1bc0c8..5f1a1dcbbd2069b26bd0d59ff56f2f5e19855c78 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -416,8 +416,11 @@ map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) static inline void wait_on_buffer(struct buffer_head *bh) { might_sleep(); - if (buffer_locked(bh)) + if (buffer_locked(bh)) { + task_set_wait_res(TASK_WAIT_FOLIO, bh->b_folio); __wait_on_buffer(bh); + task_clear_wait_res(); + } } static inline int trylock_buffer(struct buffer_head *bh) @@ -428,8 +431,11 @@ static inline int trylock_buffer(struct buffer_head *bh) static inline void lock_buffer(struct buffer_head *bh) { might_sleep(); - if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) { + task_set_wait_res(TASK_WAIT_FOLIO, bh->b_folio); __lock_buffer(bh); + task_clear_wait_res(); + } } static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 729415e91215d60dc8c5d2dbb84ed739eb15a8c1..7440f274041ba5f117317d695a94782ccc7c86f0 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -7,6 +7,7 @@ struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; + u64 d2c_nsecs[NR_STAT_GROUPS]; unsigned long sectors[NR_STAT_GROUPS]; unsigned long ios[NR_STAT_GROUPS]; unsigned long merges[NR_STAT_GROUPS]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 213033e7109af3649ecf049a7407a6927513802c..3614e5f20d91cd1520c121ad70b7d7dfd40e7a7f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1678,6 +1678,13 @@ struct task_struct { struct unwind_task_info unwind_info; #endif + int wait_res_type; + union { + struct folio *wait_folio; + struct bio *wait_bio; + }; + unsigned long wait_moment; + /* CPU-specific state of this task: */ struct thread_struct thread; @@ -1697,6 +1704,39 @@ struct task_struct { CK_KABI_RESERVE(8) } __attribute__ ((aligned (64))); +/* copy from jiffies.h to avoid circular dependency */ +extern unsigned long volatile __cacheline_aligned_in_smp jiffies; + +enum { + TASK_WAIT_FOLIO = 1, + TASK_WAIT_BIO, +}; + +static inline void task_set_wait_res(int type, void *res) +{ + switch (type) { + case TASK_WAIT_FOLIO: + current->wait_folio = (struct folio *)res; + break; + case TASK_WAIT_BIO: + current->wait_bio = (struct bio *)res; + break; + default: + current->wait_folio = NULL; + break; + } + + current->wait_res_type = type; + current->wait_moment = jiffies; +} + +static inline void task_clear_wait_res(void) +{ + current->wait_folio = NULL; + current->wait_res_type = 0; + current->wait_moment = 0; +} + #ifdef CONFIG_SCHED_PROXY_EXEC DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec); static inline bool sched_proxy_exec(void) diff --git a/mm/filemap.c b/mm/filemap.c index 4e636647100c1dddab3b319a1946510089105dbe..4b3f2c183044b02d1af90a897c92ffa25cdba355 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1702,8 +1702,10 @@ EXPORT_SYMBOL(folio_end_writeback); */ void __folio_lock(struct folio *folio) { + task_set_wait_res(TASK_WAIT_FOLIO, folio); folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE, EXCLUSIVE); + task_clear_wait_res(); } EXPORT_SYMBOL(__folio_lock); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 88cd53d4ba0929e3d4e74d7d72d36b3ceca51189..2a6d361a2e96458aa7771b7b1f0cb63f6d5a8626 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -3063,7 +3063,9 @@ void folio_wait_writeback(struct folio *folio) { while (folio_test_writeback(folio)) { trace_folio_wait_writeback(folio, folio_mapping(folio)); + task_set_wait_res(TASK_WAIT_FOLIO, folio); folio_wait_bit(folio, PG_writeback); + task_clear_wait_res(); } } EXPORT_SYMBOL_GPL(folio_wait_writeback);