swap: add a simple detector for inappropriate swapin readahead

author Shaohua Li <shli@kernel.org>

Thu, 23 May 2013 00:37:10 +0000 (10:37 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Mon, 27 May 2013 06:09:00 +0000 (16:09 +1000)
author Shaohua Li <shli@kernel.org>
Thu, 23 May 2013 00:37:10 +0000 (10:37 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Mon, 27 May 2013 06:09:00 +0000 (16:09 +1000)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h

index 6d53675c2b54691225b12f3f23c914aca86c35ac..f1a5b5937be4f2570110967a7303cefaa15b2b90 100644 (file)
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
  TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
  PAGEFLAG(MappedToDisk, mappedtodisk)
  
-/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
+/* PG_readahead is only used for reads; PG_reclaim is only for writes */
  PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
-PAGEFLAG(Readahead, reclaim)           /* Reminder to do async read-ahead */
+PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
  
  #ifdef CONFIG_HIGHMEM
  /*
diff --git a/mm/swap_state.c b/mm/swap_state.c

index b3d40dcf36247975ac05b1f7a7119521e7cb65bb..7a6d3b1a7d67abfbede9054e0ff76502a949a0c7 100644 (file)
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void)
         return ret;
  }
  
+static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
+
  void show_swap_cache_info(void)
  {
         printk("%lu pages in swap cache\n", total_swapcache_pages());
@@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry)
  
         page = find_get_page(swap_address_space(entry), entry.val);
  
-       if (page)
+       if (page) {
                 INC_CACHE_INFO(find_success);
+               if (TestClearPageReadahead(page))
+                       atomic_inc(&swapin_readahead_hits);
+       }
  
         INC_CACHE_INFO(find_total);
         return page;
@@ -373,6 +378,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
         return found_page;
  }
  
+unsigned long swapin_nr_pages(unsigned long offset)
+{
+       static unsigned long prev_offset;
+       unsigned int pages, max_pages, last_ra;
+       static atomic_t last_readahead_pages;
+
+       max_pages = 1 << ACCESS_ONCE(page_cluster);
+       if (max_pages <= 1)
+               return 1;
+
+       /*
+        * This heuristic has been found to work well on both sequential and
+        * random loads, swapping to hard disk or to SSD: please don't ask
+        * what the "+ 2" means, it just happens to work well, that's all.
+        */
+       pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
+       if (pages == 2) {
+               /*
+                * We can have no readahead hits to judge by: but must not get
+                * stuck here forever, so check for an adjacent offset instead
+                * (and don't even bother to check whether swap type is same).
+                */
+               if (offset != prev_offset + 1 && offset != prev_offset - 1)
+                       pages = 1;
+               prev_offset = offset;
+       } else {
+               unsigned int roundup = 4;
+               while (roundup < pages)
+                       roundup <<= 1;
+               pages = roundup;
+       }
+
+       if (pages > max_pages)
+               pages = max_pages;
+
+       /* Don't shrink readahead too fast */
+       last_ra = atomic_read(&last_readahead_pages) / 2;
+       if (pages < last_ra)
+               pages = last_ra;
+       atomic_set(&last_readahead_pages, pages);
+
+       return pages;
+}
+
  /**
   * swapin_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
@@ -396,11 +445,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
                         struct vm_area_struct *vma, unsigned long addr)
  {
         struct page *page;
-       unsigned long offset = swp_offset(entry);
+       unsigned long entry_offset = swp_offset(entry);
+       unsigned long offset = entry_offset;
         unsigned long start_offset, end_offset;
-       unsigned long mask = (1UL << page_cluster) - 1;
+       unsigned long mask;
         struct blk_plug plug;
  
+       mask = swapin_nr_pages(offset) - 1;
+       if (!mask)
+               goto skip;
+
         /* Read a page_cluster sized and aligned cluster around offset. */
         start_offset = offset & ~mask;
         end_offset = offset | mask;
@@ -414,10 +468,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
                                                 gfp_mask, vma, addr);
                 if (!page)
                         continue;
+               if (offset != entry_offset)
+                       SetPageReadahead(page);
                 page_cache_release(page);
         }
         blk_finish_plug(&plug);
  
         lru_add_drain();        /* Push any new pages onto the LRU now */
+skip:
         return read_swap_cache_async(entry, gfp_mask, vma, addr);
  }
author	Shaohua Li <shli@kernel.org>
	Thu, 23 May 2013 00:37:10 +0000 (10:37 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Mon, 27 May 2013 06:09:00 +0000 (16:09 +1000)
include/linux/page-flags.h		patch \| blob \| history
mm/swap_state.c		patch \| blob \| history