swap: add a simple detector for inappropriate swapin readahead

author Shaohua Li <shli@kernel.org>

Thu, 25 Oct 2012 01:14:53 +0000 (12:14 +1100)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 25 Oct 2012 03:14:40 +0000 (14:14 +1100)
author Shaohua Li <shli@kernel.org>
Thu, 25 Oct 2012 01:14:53 +0000 (12:14 +1100)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 25 Oct 2012 03:14:40 +0000 (14:14 +1100)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h

index bfe1f4780644434cd8901abae42939d661e70a8e..1bd87d6e38f305f56e2e782980ac8cfb8f31401d 100644 (file)
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -35,6 +35,9 @@ struct anon_vma {
          * anon_vma if they are the last user on release
          */
         atomic_t refcount;
+#ifdef CONFIG_SWAP
+       atomic_t swapra_miss;
+#endif
  
         /*
          * NOTE: the LSB of the rb_root.rb_node is set by
diff --git a/mm/internal.h b/mm/internal.h

index 52d1fa957194f8f8b574a29cef6569b9f8ed0306..f7de7340cd803e6d10c7a2efb58a3db50746b471 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -12,6 +12,7 @@
  #define __MM_INTERNAL_H
  
  #include <linux/mm.h>
+#include <linux/rmap.h>
  
  void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
                 unsigned long floor, unsigned long ceiling);
@@ -372,4 +373,54 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
  #define ALLOC_CPUSET           0x40 /* check for correct cpuset */
  #define ALLOC_CMA              0x80 /* allow allocations from CMA areas */
  
+/*
+ * Unnecessary readahead harms performance. 1. for SSD, big size read is more
+ * expensive than small size read, so extra unnecessary read only has overhead.
+ * For harddisk, this overhead doesn't exist. 2. unnecessary readahead will
+ * allocate extra memroy, which further tights memory pressure, so more
+ * swapout/swapin.
+ * These adds a simple swap random access detection. In swap page fault, if
+ * page is found in swap cache, decrease an account of vma, otherwise we need
+ * do sync swapin and the account is increased. Optionally swapin will do
+ * readahead if the counter is below a threshold.
+ */
+#ifdef CONFIG_SWAP
+#define SWAPRA_MISS_THRESHOLD  (100)
+#define SWAPRA_MAX_MISS ((SWAPRA_MISS_THRESHOLD) * 10)
+static inline void swap_cache_hit(struct vm_area_struct *vma)
+{
+       if (vma && vma->anon_vma)
+               atomic_dec_if_positive(&vma->anon_vma->swapra_miss);
+}
+
+static inline void swap_cache_miss(struct vm_area_struct *vma)
+{
+       if (!vma || !vma->anon_vma)
+               return;
+       if (atomic_read(&vma->anon_vma->swapra_miss) < SWAPRA_MAX_MISS)
+               atomic_inc(&vma->anon_vma->swapra_miss);
+}
+
+static inline int swap_cache_skip_readahead(struct vm_area_struct *vma)
+{
+       if (!vma || !vma->anon_vma)
+               return 0;
+       return atomic_read(&vma->anon_vma->swapra_miss) >
+               SWAPRA_MISS_THRESHOLD;
+}
+#else
+static inline void swap_cache_hit(struct vm_area_struct *vma)
+{
+}
+
+static inline void swap_cache_miss(struct vm_area_struct *vma)
+{
+}
+
+static inline int swap_cache_skip_readahead(struct vm_area_struct *vma)
+{
+       return 0;
+}
+#endif /* CONFIG_SWAP */
+
  #endif /* __MM_INTERNAL_H */
diff --git a/mm/memory.c b/mm/memory.c

index b197b19d540073017d0cb68614066789ac81b1fe..da0360314e094d1181c60b41462535fbe1002a4c 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3013,7 +3013,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 ret = VM_FAULT_HWPOISON;
                 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
                 goto out_release;
-       }
+       } else if (!(flags & FAULT_FLAG_TRIED))
+               swap_cache_hit(vma);
  
         locked = lock_page_or_retry(page, mm, flags);
  
diff --git a/mm/rmap.c b/mm/rmap.c

index 62a4a8cb8879c20e03ad28fb27c5a43fc0559118..6c686c2bd5d4c42f954ce84bb692b839e086b708 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -367,6 +367,9 @@ static void anon_vma_ctor(void *data)
  
         mutex_init(&anon_vma->mutex);
         atomic_set(&anon_vma->refcount, 0);
+#ifdef CONFIG_SWAP
+       atomic_set(&anon_vma->swapra_miss, 0);
+#endif
         anon_vma->rb_root = RB_ROOT;
  }
  
diff --git a/mm/shmem.c b/mm/shmem.c

index 67afba5117f2ebe80ef54e487d068fdf99e986bb..5a0802796a2c7d08318de67da4a7d776d45cf996 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,6 +922,7 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
         pvma.vm_pgoff = index + info->vfs_inode.i_ino;
         pvma.vm_ops = NULL;
         pvma.vm_policy = spol;
+       pvma.anon_vma = NULL;
         return swapin_readahead(swap, gfp, &pvma, 0);
  }
  
diff --git a/mm/swap_state.c b/mm/swap_state.c

index 0cb36fb1f61cc539baa143319c40da30ada3d04e..d1f6c2df820e995ffb067597e779c9a491d8bfe7 100644 (file)
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -20,6 +20,7 @@
  #include <linux/page_cgroup.h>
  
  #include <asm/pgtable.h>
+#include "internal.h"
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
@@ -379,6 +380,10 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
         unsigned long mask = (1UL << page_cluster) - 1;
         struct blk_plug plug;
  
+       swap_cache_miss(vma);
+       if (swap_cache_skip_readahead(vma))
+               goto skip;
+
         /* Read a page_cluster sized and aligned cluster around offset. */
         start_offset = offset & ~mask;
         end_offset = offset | mask;
@@ -397,5 +402,6 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
         blk_finish_plug(&plug);
  
         lru_add_drain();        /* Push any new pages onto the LRU now */
+skip:
         return read_swap_cache_async(entry, gfp_mask, vma, addr);
  }
author	Shaohua Li <shli@kernel.org>
	Thu, 25 Oct 2012 01:14:53 +0000 (12:14 +1100)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 25 Oct 2012 03:14:40 +0000 (14:14 +1100)
include/linux/rmap.h		patch \| blob \| history
mm/internal.h		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history
mm/shmem.c		patch \| blob \| history
mm/swap_state.c		patch \| blob \| history