swap: add a simple detector for inappropriate swapin readahead

author Shaohua Li <shli@kernel.org>

Thu, 13 Sep 2012 00:59:00 +0000 (10:59 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 13 Sep 2012 07:28:02 +0000 (17:28 +1000)
author Shaohua Li <shli@kernel.org>
Thu, 13 Sep 2012 00:59:00 +0000 (10:59 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 13 Sep 2012 07:28:02 +0000 (17:28 +1000)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h

index b2cce644ffc712faf6630e23c9d5fdbc10c3ae3d..1767045c287294836f274c030cda9dabdbc3f5bb 100644 (file)
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -35,6 +35,9 @@ struct anon_vma {
          * anon_vma if they are the last user on release
          */
         atomic_t refcount;
+#ifdef CONFIG_SWAP
+       atomic_t swapra_miss;
+#endif
  
         /*
          * NOTE: the LSB of the rb_root.rb_node is set by
diff --git a/mm/internal.h b/mm/internal.h

index 8312d4fadf59dd32f21733b66ab14737e64da7b7..d09f59366de09692eb9c0bf73106f123bad5b77c 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -12,6 +12,7 @@
  #define __MM_INTERNAL_H
  
  #include <linux/mm.h>
+#include <linux/rmap.h>
  
  void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
                 unsigned long floor, unsigned long ceiling);
@@ -358,4 +359,55 @@ extern unsigned long vm_mmap_pgoff(struct file *, unsigned long,
  extern void set_pageblock_order(void);
  unsigned long reclaim_clean_pages_from_list(struct zone *zone,
                                             struct list_head *page_list);
+
+/*
+ * Unnecessary readahead harms performance. 1. for SSD, big size read is more
+ * expensive than small size read, so extra unnecessary read only has overhead.
+ * For harddisk, this overhead doesn't exist. 2. unnecessary readahead will
+ * allocate extra memroy, which further tights memory pressure, so more
+ * swapout/swapin.
+ * These adds a simple swap random access detection. In swap page fault, if
+ * page is found in swap cache, decrease an account of vma, otherwise we need
+ * do sync swapin and the account is increased. Optionally swapin will do
+ * readahead if the counter is below a threshold.
+ */
+#ifdef CONFIG_SWAP
+#define SWAPRA_MISS_THRESHOLD  (100)
+#define SWAPRA_MAX_MISS ((SWAPRA_MISS_THRESHOLD) * 10)
+static inline void swap_cache_hit(struct vm_area_struct *vma)
+{
+       if (vma && vma->anon_vma)
+               atomic_dec_if_positive(&vma->anon_vma->swapra_miss);
+}
+
+static inline void swap_cache_miss(struct vm_area_struct *vma)
+{
+       if (!vma || !vma->anon_vma)
+               return;
+       if (atomic_read(&vma->anon_vma->swapra_miss) < SWAPRA_MAX_MISS)
+               atomic_inc(&vma->anon_vma->swapra_miss);
+}
+
+static inline int swap_cache_skip_readahead(struct vm_area_struct *vma)
+{
+       if (!vma || !vma->anon_vma)
+               return 0;
+       return atomic_read(&vma->anon_vma->swapra_miss) >
+               SWAPRA_MISS_THRESHOLD;
+}
+#else
+static inline void swap_cache_hit(struct vm_area_struct *vma)
+{
+}
+
+static inline void swap_cache_miss(struct vm_area_struct *vma)
+{
+}
+
+static inline int swap_cache_skip_readahead(struct vm_area_struct *vma)
+{
+       return 0;
+}
+#endif /* CONFIG_SWAP */
+
  #endif /* __MM_INTERNAL_H */
diff --git a/mm/memory.c b/mm/memory.c

index be4a2e6c487af29197cdd6c15fd208243e2b1cad..1a92d87575d74133e0dd62259ff07f8d1033d199 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2967,7 +2967,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 ret = VM_FAULT_HWPOISON;
                 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
                 goto out_release;
-       }
+       } else if (!(flags & FAULT_FLAG_TRIED))
+               swap_cache_hit(vma);
  
         locked = lock_page_or_retry(page, mm, flags);
  
diff --git a/mm/rmap.c b/mm/rmap.c

index 11b25a1394bd38f54696e091a9094ab3e2e00df7..4d9c7acba5d9d2e48fd3d9dc2cbc5a3b5c87ed68 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -366,6 +366,9 @@ static void anon_vma_ctor(void *data)
  
         mutex_init(&anon_vma->mutex);
         atomic_set(&anon_vma->refcount, 0);
+#ifdef CONFIG_SWAP
+       atomic_set(&anon_vma->swapra_miss, 0);
+#endif
         anon_vma->rb_root = RB_ROOT;
  }
  
diff --git a/mm/shmem.c b/mm/shmem.c

index cc12072f8787d5adacc6cd49a78849ca5c2005a6..217aa9cd59df904e5031cef207052e65aec34900 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,6 +922,7 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
         pvma.vm_pgoff = index + info->vfs_inode.i_ino;
         pvma.vm_ops = NULL;
         pvma.vm_policy = spol;
+       pvma.anon_vma = NULL;
         return swapin_readahead(swap, gfp, &pvma, 0);
  }
  
diff --git a/mm/swap_state.c b/mm/swap_state.c

index 0cb36fb1f61cc539baa143319c40da30ada3d04e..d1f6c2df820e995ffb067597e779c9a491d8bfe7 100644 (file)
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -20,6 +20,7 @@
  #include <linux/page_cgroup.h>
  
  #include <asm/pgtable.h>
+#include "internal.h"
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
@@ -379,6 +380,10 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
         unsigned long mask = (1UL << page_cluster) - 1;
         struct blk_plug plug;
  
+       swap_cache_miss(vma);
+       if (swap_cache_skip_readahead(vma))
+               goto skip;
+
         /* Read a page_cluster sized and aligned cluster around offset. */
         start_offset = offset & ~mask;
         end_offset = offset | mask;
@@ -397,5 +402,6 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
         blk_finish_plug(&plug);
  
         lru_add_drain();        /* Push any new pages onto the LRU now */
+skip:
         return read_swap_cache_async(entry, gfp_mask, vma, addr);
  }
author	Shaohua Li <shli@kernel.org>
	Thu, 13 Sep 2012 00:59:00 +0000 (10:59 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 13 Sep 2012 07:28:02 +0000 (17:28 +1000)
include/linux/rmap.h		patch \| blob \| history
mm/internal.h		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history
mm/shmem.c		patch \| blob \| history
mm/swap_state.c		patch \| blob \| history