]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
mm: free swp_entry in madvise_free
authorMinchan Kim <minchan@kernel.org>
Wed, 21 Oct 2015 22:03:48 +0000 (09:03 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Wed, 21 Oct 2015 22:03:48 +0000 (09:03 +1100)
When I test below piece of code with 12 processes(ie, 512M * 12 = 6G
consume) on my (3G ram + 12 cpu + 8G swap, the madvise_free is siginficat
slower (ie, 2x times) than madvise_dontneed.

loop = 5;
mmap(512M);
while (loop--) {
        memset(512M);
        madvise(MADV_FREE or MADV_DONTNEED);
}

The reason is lots of swapin.

1) dontneed: 1,612 swapin
2) madvfree: 879,585 swapin

If we find hinted pages were already swapped out when syscall is called,
it's pointless to keep the swapped-out pages in pte.
Instead, let's free the cold page because swapin is more expensive
than (alloc page + zeroing).

With this patch, it reduced swapin from 879,585 to 1,878 so elapsed time

1) dontneed: 6.10user 233.50system 0:50.44elapsed
2) madvfree: 6.03user 401.17system 1:30.67elapsed
2) madvfree + below patch: 6.70user 339.14system 1:04.45elapsed

Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/madvise.c

index f68b1130f3e32d569ac7338670f0f907ed0b69c1..50099f602589116ab32b913c031bd7059ba76c57 100644 (file)
@@ -270,7 +270,9 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
        spinlock_t *ptl;
        pte_t *pte, ptent;
        struct page *page;
+       swp_entry_t entry;
        unsigned long next;
+       int nr_swap = 0;
 
        next = pmd_addr_end(addr, end);
        if (pmd_trans_huge(*pmd)) {
@@ -289,8 +291,22 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
        for (; addr != end; pte++, addr += PAGE_SIZE) {
                ptent = *pte;
 
-               if (!pte_present(ptent))
+               if (pte_none(ptent))
                        continue;
+               /*
+                * If the pte has swp_entry, just clear page table to
+                * prevent swap-in which is more expensive rather than
+                * (page allocation + zeroing).
+                */
+               if (!pte_present(ptent)) {
+                       entry = pte_to_swp_entry(ptent);
+                       if (non_swap_entry(entry))
+                               continue;
+                       nr_swap--;
+                       free_swap_and_cache(entry);
+                       pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+                       continue;
+               }
 
                page = vm_normal_page(vma, addr, ptent);
                if (!page)
@@ -330,6 +346,14 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                set_pte_at(mm, addr, pte, ptent);
                tlb_remove_tlb_entry(tlb, pte, addr);
        }
+
+       if (nr_swap) {
+               if (current->mm == mm)
+                       sync_mm_rss(mm);
+
+               add_mm_counter(mm, MM_SWAPENTS, nr_swap);
+       }
+
        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(pte - 1, ptl);
 next: