From dc3670386ee38f68e5ab4e778b137d6fed3d0d24 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Tue, 4 Jun 2024 11:05:22 +0800 Subject: [PATCH] blk-wbt: don't throttle swap writes in direct reclaim Upstream: commit 4e63aeb5d0101ddada36a2f64f048e2f9d2202fc Conflict: minor, and there are more users of several flags so convert them all Backport-reason: avoid hung task when doing SWAP over loop file Now we avoid throttling swap writes by determining whether the current process is kswapd (aka current_is_kswapd()), but swap writes can come from either kswapd or direct reclaim, so the swap writes from direct reclaim will still be throttled. When a process holds a lock to allocate a free page, and enters direct reclaim because there is no free memory, then it might trigger a hung due to the wbt throttling that causes other processes to fail to get the lock. Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP instead of current_is_kswapd() to avoid throttling swap writes. Also renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP. Signed-off-by: Baokun Li Reviewed-by: Yu Kuai Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240604030522.3686177-1-libaokun@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Kairui Song Signed-off-by: Yongliang Gao Signed-off-by: yilingjin Reviewed-by: caelli --- block/blk-wbt.c | 30 +++++++++++++++--------------- block/blk-wbt.h | 4 ++-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index cf27d04ea042..0dca570a3467 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -130,8 +130,8 @@ static bool wb_recent_wait(struct rq_wb *rwb) static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, enum wbt_flags wb_acct) { - if (wb_acct & WBT_KSWAPD) - return &rwb->rq_wait[WBT_RWQ_KSWAPD]; + if (wb_acct & WBT_SWAP) + return &rwb->rq_wait[WBT_RWQ_SWAP]; else if (wb_acct & WBT_DISCARD) return &rwb->rq_wait[WBT_RWQ_DISCARD]; @@ -479,7 +479,7 @@ static bool close_io(struct rq_wb *rwb) time_before(now, rwb->last_comp + HZ / 10); } -#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) +#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP) static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) { @@ -497,13 +497,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) /* * At this point we know it's a buffered write. If this is - * kswapd trying to free memory, or REQ_SYNC is set, then + * swap trying to free memory, or REQ_SYNC is set, then * it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing * IO for a bit. */ - if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) + if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb)) limit = rwb->rq_depth.max_depth; else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { /* @@ -609,8 +609,8 @@ static enum wbt_flags bio_to_wbt_class_flags(struct bio *bio) if (bio_op(bio) == REQ_OP_READ) { flags = WBT_READ; } else if (wbt_should_throttle(NULL, bio)) { - if (current_is_kswapd()) - flags |= WBT_KSWAPD; + if (bio->bi_opf & REQ_SWAP) + flags |= WBT_SWAP; if (bio_op(bio) == REQ_OP_DISCARD) flags |= WBT_DISCARD; flags |= WBT_CLASS_TRACKED; @@ -630,8 +630,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) if (bio_op(bio) == REQ_OP_READ) { flags = WBT_READ; } else if (wbt_should_throttle(rwb, bio)) { - if (current_is_kswapd()) - flags |= WBT_KSWAPD; + if (bio->bi_opf & REQ_SWAP) + flags |= WBT_SWAP; if (bio_op(bio) == REQ_OP_DISCARD) flags |= WBT_DISCARD; flags |= WBT_TRACKED; @@ -710,8 +710,8 @@ static int wbt_flags_to_counter_idx(enum wbt_flags flags) { int i; - if (flags & WBT_KSWAPD) - i = WBT_RWQ_KSWAPD; + if (flags & WBT_SWAP) + i = WBT_RWQ_SWAP; else if (flags & WBT_DISCARD) i = WBT_RWQ_DISCARD; else @@ -747,8 +747,8 @@ static inline void throtl_info_wake_all(struct wbt_throtl_info *ti) static inline struct rq_wait *throtl_info_get_rq_wait(struct wbt_throtl_info *ti, enum wbt_flags wb_acct) { - if (wb_acct & WBT_KSWAPD) - return &ti->rq_wait[WBT_RWQ_KSWAPD]; + if (wb_acct & WBT_SWAP) + return &ti->rq_wait[WBT_RWQ_SWAP]; else if (wb_acct & WBT_DISCARD) return &ti->rq_wait[WBT_RWQ_DISCARD]; @@ -1358,10 +1358,10 @@ static int wbt_debug_show(void *data, struct seq_file *m) seq_printf(m, "track_bg=%llu track_kswp=%llu track_disc=%llu finished_bg=%llu finished_kswp=%llu finished_disc=%llu " "untrack_read=%llu untrack_direct_wr=%llu escape_merg=%llu sync_write=%llu rd_expired=%llu ", atomic64_read(&ti->tracked_cnt[WBT_RWQ_BG]), - atomic64_read(&ti->tracked_cnt[WBT_RWQ_KSWAPD]), + atomic64_read(&ti->tracked_cnt[WBT_RWQ_SWAP]), atomic64_read(&ti->tracked_cnt[WBT_RWQ_DISCARD]), atomic64_read(&ti->finished_cnt[WBT_RWQ_BG]), - atomic64_read(&ti->finished_cnt[WBT_RWQ_KSWAPD]), + atomic64_read(&ti->finished_cnt[WBT_RWQ_SWAP]), atomic64_read(&ti->finished_cnt[WBT_RWQ_DISCARD]), atomic64_read(&ti->read_cnt), atomic64_read(&ti->direct_write_cnt), diff --git a/block/blk-wbt.h b/block/blk-wbt.h index 4031eda1ec88..d20feb334f93 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -14,7 +14,7 @@ enum wbt_flags { WBT_TRACKED = 1, /* write, tracked for throttling */ WBT_READ = 2, /* read */ - WBT_KSWAPD = 4, /* write, from kswapd */ + WBT_SWAP = 4, /* write, from swap_writepage() */ WBT_DISCARD = 8, /* discard */ WBT_CLASS_TRACKED = 16, /*bio tracked wbt class*/ @@ -23,7 +23,7 @@ enum wbt_flags { enum { WBT_RWQ_BG = 0, - WBT_RWQ_KSWAPD, + WBT_RWQ_SWAP, WBT_RWQ_DISCARD, WBT_NUM_RWQ, }; -- Gitee