From b45972265f823ed01eae0867a176320071665787 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 3 Jul 2013 15:02:05 -0700 Subject: [PATCH] mm: vmscan: take page buffers dirty and locked state into account Page reclaim keeps track of dirty and under writeback pages and uses it to determine if wait_iff_congested() should stall or if kswapd should begin writing back pages. This fails to account for buffer pages that can be under writeback but not PageWriteback which is the case for filesystems like ext3 ordered mode. Furthermore, PageDirty buffer pages can have all the buffers clean and writepage does no IO so it should not be accounted as congested. This patch adds an address_space operation that filesystems may optionally use to check if a page is really dirty or really under writeback. An implementation is provided for for buffer_heads is added and used for block operations and ext3 in ordered mode. By default the page flags are obeyed. Credit goes to Jan Kara for identifying that the page flags alone are not sufficient for ext3 and sanity checking a number of ideas on how the problem could be addressed. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Valdis Kletnieks Cc: Zlatko Calusic Cc: dormando Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/block_dev.c | 1 + fs/buffer.c | 34 ++++++++++++++++++++++++++++++++++ fs/ext3/inode.c | 1 + include/linux/buffer_head.h | 3 +++ include/linux/fs.h | 1 + mm/vmscan.c | 10 ++++++++++ 6 files changed, 50 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 431b6a04ebfd..bb43ce081d6e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1562,6 +1562,7 @@ static const struct address_space_operations def_blk_aops = { .writepages = generic_writepages, .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, + .is_dirty_writeback = buffer_check_dirty_writeback, }; const struct file_operations def_blk_fops = { diff --git a/fs/buffer.c b/fs/buffer.c index f93392e2df12..4d7433534f5c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -82,6 +82,40 @@ void unlock_buffer(struct buffer_head *bh) } EXPORT_SYMBOL(unlock_buffer); +/* + * Returns if the page has dirty or writeback buffers. If all the buffers + * are unlocked and clean then the PageDirty information is stale. If + * any of the pages are locked, it is assumed they are locked for IO. + */ +void buffer_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback) +{ + struct buffer_head *head, *bh; + *dirty = false; + *writeback = false; + + BUG_ON(!PageLocked(page)); + + if (!page_has_buffers(page)) + return; + + if (PageWriteback(page)) + *writeback = true; + + head = page_buffers(page); + bh = head; + do { + if (buffer_locked(bh)) + *writeback = true; + + if (buffer_dirty(bh)) + *dirty = true; + + bh = bh->b_this_page; + } while (bh != head); +} +EXPORT_SYMBOL(buffer_check_dirty_writeback); + /* * Block until a buffer comes unlocked. This doesn't stop it * from becoming locked again - you have to lock it yourself diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f67668f724ba..2bd85486b879 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1985,6 +1985,7 @@ static const struct address_space_operations ext3_ordered_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .is_dirty_writeback = buffer_check_dirty_writeback, .error_remove_page = generic_error_remove_page, }; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f5a3b838ddb0..91fa9a94ae92 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -139,6 +139,9 @@ BUFFER_FNS(Prio, prio) }) #define page_has_buffers(page) PagePrivate(page) +void buffer_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback); + /* * Declarations */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 2b82c8041490..99be011e00de 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -380,6 +380,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page)(struct address_space *, struct page *); /* swapfile support */ diff --git a/mm/vmscan.c b/mm/vmscan.c index bf4778479e3a..c85794399848 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -673,6 +673,8 @@ static enum page_references page_check_references(struct page *page, static void page_check_dirty_writeback(struct page *page, bool *dirty, bool *writeback) { + struct address_space *mapping; + /* * Anonymous pages are not handled by flushers and must be written * from reclaim context. Do not stall reclaim based on them @@ -686,6 +688,14 @@ static void page_check_dirty_writeback(struct page *page, /* By default assume that the page flags are accurate */ *dirty = PageDirty(page); *writeback = PageWriteback(page); + + /* Verify dirty/writeback state if the filesystem supports it */ + if (!page_has_private(page)) + return; + + mapping = page_mapping(page); + if (mapping && mapping->a_ops->is_dirty_writeback) + mapping->a_ops->is_dirty_writeback(page, dirty, writeback); } /* -- 2.39.5