]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/proc/task_mmu.c
Merge branch 'akpm-current/current'
[karo-tx-linux.git] / fs / proc / task_mmu.c
index 9b2f5d62ce63dfe187e98763dadbf1c21fc5c00a..25e5a1e044f201e79691f5010be826664e8f8ce7 100644 (file)
@@ -430,7 +430,6 @@ const struct file_operations proc_tid_maps_operations = {
 
 #ifdef CONFIG_PROC_PAGE_MONITOR
 struct mem_size_stats {
-       struct vm_area_struct *vma;
        unsigned long resident;
        unsigned long shared_clean;
        unsigned long shared_dirty;
@@ -444,15 +443,16 @@ struct mem_size_stats {
        u64 pss;
 };
 
-
-static void smaps_pte_entry(pte_t ptent, unsigned long addr,
-               unsigned long ptent_size, struct mm_walk *walk)
+static int smaps_pte(pte_t *pte, unsigned long addr, unsigned long end,
+                       struct mm_walk *walk)
 {
        struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
+       struct vm_area_struct *vma = walk->vma;
        pgoff_t pgoff = linear_page_index(vma, addr);
        struct page *page = NULL;
        int mapcount;
+       pte_t ptent = *pte;
+       unsigned long ptent_size = end - addr;
 
        if (pte_present(ptent)) {
                page = vm_normal_page(vma, addr, ptent);
@@ -469,7 +469,7 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
        }
 
        if (!page)
-               return;
+               return 0;
 
        if (PageAnon(page))
                mss->anonymous += ptent_size;
@@ -495,35 +495,22 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
                        mss->private_clean += ptent_size;
                mss->pss += (ptent_size << PSS_SHIFT);
        }
+       return 0;
 }
 
-static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-                          struct mm_walk *walk)
+static int smaps_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
+                       struct mm_walk *walk)
 {
        struct mem_size_stats *mss = walk->private;
-       struct vm_area_struct *vma = mss->vma;
-       pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-               smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
+       if (pmd_trans_huge_lock(pmd, walk->vma, &ptl) == 1) {
+               smaps_pte((pte_t *)pmd, addr, addr + HPAGE_PMD_SIZE, walk);
                spin_unlock(ptl);
                mss->anonymous_thp += HPAGE_PMD_SIZE;
-               return 0;
+               /* don't call smaps_pte() */
+               walk->skip = 1;
        }
-
-       if (pmd_trans_unstable(pmd))
-               return 0;
-       /*
-        * The mmap_sem held all the way back in m_start() is what
-        * keeps khugepaged out of here and from collapsing things
-        * in here.
-        */
-       pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-       for (; addr != end; pte++, addr += PAGE_SIZE)
-               smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
-       pte_unmap_unlock(pte - 1, ptl);
-       cond_resched();
        return 0;
 }
 
@@ -588,16 +575,16 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
        struct vm_area_struct *vma = v;
        struct mem_size_stats mss;
        struct mm_walk smaps_walk = {
-               .pmd_entry = smaps_pte_range,
+               .pmd_entry = smaps_pmd,
+               .pte_entry = smaps_pte,
                .mm = vma->vm_mm,
+               .vma = vma,
                .private = &mss,
        };
 
        memset(&mss, 0, sizeof mss);
-       mss.vma = vma;
        /* mmap_sem is held in m_start */
-       if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-               walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
+       walk_page_vma(vma, &smaps_walk);
 
        show_map_vma(m, vma, is_pid);
 
@@ -718,7 +705,6 @@ enum clear_refs_types {
 };
 
 struct clear_refs_private {
-       struct vm_area_struct *vma;
        enum clear_refs_types type;
 };
 
@@ -743,48 +729,52 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
                ptent = pte_file_clear_soft_dirty(ptent);
        }
 
-       if (vma->vm_flags & VM_SOFTDIRTY)
-               vma->vm_flags &= ~VM_SOFTDIRTY;
-
        set_pte_at(vma->vm_mm, addr, pte, ptent);
 #endif
 }
 
-static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
+static int clear_refs_pte(pte_t *pte, unsigned long addr,
                                unsigned long end, struct mm_walk *walk)
 {
        struct clear_refs_private *cp = walk->private;
-       struct vm_area_struct *vma = cp->vma;
-       pte_t *pte, ptent;
-       spinlock_t *ptl;
+       struct vm_area_struct *vma = walk->vma;
        struct page *page;
 
-       split_huge_page_pmd(vma, addr, pmd);
-       if (pmd_trans_unstable(pmd))
+       if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
+               clear_soft_dirty(vma, addr, pte);
                return 0;
+       }
+       if (!pte_present(*pte))
+               return 0;
+       page = vm_normal_page(vma, addr, *pte);
+       if (!page)
+               return 0;
+       /* Clear accessed and referenced bits. */
+       ptep_test_and_clear_young(vma, addr, pte);
+       ClearPageReferenced(page);
+       return 0;
+}
 
-       pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-       for (; addr != end; pte++, addr += PAGE_SIZE) {
-               ptent = *pte;
-
-               if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
-                       clear_soft_dirty(vma, addr, pte);
-                       continue;
-               }
-
-               if (!pte_present(ptent))
-                       continue;
-
-               page = vm_normal_page(vma, addr, ptent);
-               if (!page)
-                       continue;
+static int clear_refs_test_walk(unsigned long start, unsigned long end,
+                               struct mm_walk *walk)
+{
+       struct clear_refs_private *cp = walk->private;
+       struct vm_area_struct *vma = walk->vma;
 
-               /* Clear accessed and referenced bits. */
-               ptep_test_and_clear_young(vma, addr, pte);
-               ClearPageReferenced(page);
+       /*
+        * Writing 1 to /proc/pid/clear_refs affects all pages.
+        * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
+        * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
+        * Writing 4 to /proc/pid/clear_refs affects all pages.
+        */
+       if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
+               walk->skip = 1;
+       if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
+               walk->skip = 1;
+       if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
+               if (vma->vm_flags & VM_SOFTDIRTY)
+                       vma->vm_flags &= ~VM_SOFTDIRTY;
        }
-       pte_unmap_unlock(pte - 1, ptl);
-       cond_resched();
        return 0;
 }
 
@@ -813,8 +803,9 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 
        if (type == CLEAR_REFS_SOFT_DIRTY) {
                soft_dirty_cleared = true;
-               pr_warn_once("The pagemap bits 55-60 has changed their meaning! "
-                               "See the linux/Documentation/vm/pagemap.txt for details.\n");
+               pr_warn_once("The pagemap bits 55-60 has changed their meaning!"
+                            " See the linux/Documentation/vm/pagemap.txt for "
+                            "details.\n");
        }
 
        task = get_proc_task(file_inode(file));
@@ -826,33 +817,16 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                        .type = type,
                };
                struct mm_walk clear_refs_walk = {
-                       .pmd_entry = clear_refs_pte_range,
+                       .pte_entry = clear_refs_pte,
+                       .test_walk = clear_refs_test_walk,
                        .mm = mm,
                        .private = &cp,
                };
                down_read(&mm->mmap_sem);
                if (type == CLEAR_REFS_SOFT_DIRTY)
                        mmu_notifier_invalidate_range_start(mm, 0, -1);
-               for (vma = mm->mmap; vma; vma = vma->vm_next) {
-                       cp.vma = vma;
-                       if (is_vm_hugetlb_page(vma))
-                               continue;
-                       /*
-                        * Writing 1 to /proc/pid/clear_refs affects all pages.
-                        *
-                        * Writing 2 to /proc/pid/clear_refs only affects
-                        * Anonymous pages.
-                        *
-                        * Writing 3 to /proc/pid/clear_refs only affects file
-                        * mapped pages.
-                        */
-                       if (type == CLEAR_REFS_ANON && vma->vm_file)
-                               continue;
-                       if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
-                               continue;
-                       walk_page_range(vma->vm_start, vma->vm_end,
-                                       &clear_refs_walk);
-               }
+               for (vma = mm->mmap; vma; vma = vma->vm_next)
+                       walk_page_vma(vma, &clear_refs_walk);
                if (type == CLEAR_REFS_SOFT_DIRTY)
                        mmu_notifier_invalidate_range_end(mm, 0, -1);
                flush_tlb_mm(mm);
@@ -993,19 +967,33 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap
 }
 #endif
 
-static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+static int pagemap_pte(pte_t *pte, unsigned long addr, unsigned long end,
                             struct mm_walk *walk)
 {
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma = walk->vma;
        struct pagemapread *pm = walk->private;
-       spinlock_t *ptl;
-       pte_t *pte;
+       pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+
+       if (vma && vma->vm_start <= addr && end <= vma->vm_end) {
+               pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
+               /* unmap before userspace copy */
+               pte_unmap(pte);
+       }
+       return add_to_pagemap(addr, &pme, pm);
+}
+
+static int pagemap_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
+                            struct mm_walk *walk)
+{
        int err = 0;
+       struct vm_area_struct *vma = walk->vma;
+       struct pagemapread *pm = walk->private;
        pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+       spinlock_t *ptl;
 
-       /* find the first VMA at or above 'addr' */
-       vma = find_vma(walk->mm, addr);
-       if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+       if (!vma)
+               return err;
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                int pmd_flags2;
 
                if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1024,41 +1012,9 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                                break;
                }
                spin_unlock(ptl);
-               return err;
+               /* don't call pagemap_pte() */
+               walk->skip = 1;
        }
-
-       if (pmd_trans_unstable(pmd))
-               return 0;
-       for (; addr != end; addr += PAGE_SIZE) {
-               int flags2;
-
-               /* check to see if we've left 'vma' behind
-                * and need a new, higher one */
-               if (vma && (addr >= vma->vm_end)) {
-                       vma = find_vma(walk->mm, addr);
-                       if (vma && (vma->vm_flags & VM_SOFTDIRTY))
-                               flags2 = __PM_SOFT_DIRTY;
-                       else
-                               flags2 = 0;
-                       pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
-               }
-
-               /* check that 'vma' actually covers this address,
-                * and that it isn't a huge page vma */
-               if (vma && (vma->vm_start <= addr) &&
-                   !is_vm_hugetlb_page(vma)) {
-                       pte = pte_offset_map(pmd, addr);
-                       pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
-                       /* unmap before userspace copy */
-                       pte_unmap(pte);
-               }
-               err = add_to_pagemap(addr, &pme, pm);
-               if (err)
-                       return err;
-       }
-
-       cond_resched();
-
        return err;
 }
 
@@ -1076,24 +1032,22 @@ static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *
 }
 
 /* This function walks within one hugetlb entry in the single call */
-static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
-                                unsigned long addr, unsigned long end,
-                                struct mm_walk *walk)
+static int pagemap_hugetlb(pte_t *pte, unsigned long addr, unsigned long end,
+                          struct mm_walk *walk)
 {
        struct pagemapread *pm = walk->private;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma = walk->vma;
        int err = 0;
        int flags2;
        pagemap_entry_t pme;
+       unsigned long hmask;
 
-       vma = find_vma(walk->mm, addr);
-       WARN_ON_ONCE(!vma);
-
-       if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+       if (vma->vm_flags & VM_SOFTDIRTY)
                flags2 = __PM_SOFT_DIRTY;
        else
                flags2 = 0;
 
+       hmask = huge_page_mask(hstate_vma(vma));
        for (; addr != end; addr += PAGE_SIZE) {
                int offset = (addr & ~hmask) >> PAGE_SHIFT;
                huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
@@ -1101,9 +1055,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
                if (err)
                        return err;
        }
-
-       cond_resched();
-
        return err;
 }
 #endif /* HUGETLB_PAGE */
@@ -1170,10 +1121,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        if (!mm || IS_ERR(mm))
                goto out_free;
 
-       pagemap_walk.pmd_entry = pagemap_pte_range;
+       pagemap_walk.pte_entry = pagemap_pte;
+       pagemap_walk.pmd_entry = pagemap_pmd;
        pagemap_walk.pte_hole = pagemap_pte_hole;
 #ifdef CONFIG_HUGETLB_PAGE
-       pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
+       pagemap_walk.hugetlb_entry = pagemap_hugetlb;
 #endif
        pagemap_walk.mm = mm;
        pagemap_walk.private = &pm;
@@ -1249,7 +1201,6 @@ const struct file_operations proc_pagemap_operations = {
 #ifdef CONFIG_NUMA
 
 struct numa_maps {
-       struct vm_area_struct *vma;
        unsigned long pages;
        unsigned long anon;
        unsigned long active;
@@ -1315,44 +1266,42 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
        return page;
 }
 
-static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
+static int gather_pte_stats(pte_t *pte, unsigned long addr,
                unsigned long end, struct mm_walk *walk)
 {
-       struct numa_maps *md;
-       spinlock_t *ptl;
-       pte_t *orig_pte;
-       pte_t *pte;
+       struct numa_maps *md = walk->private;
 
-       md = walk->private;
+       struct page *page = can_gather_numa_stats(*pte, walk->vma, addr);
+       if (!page)
+               return 0;
+       gather_stats(page, md, pte_dirty(*pte), 1);
+       return 0;
+}
 
-       if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
+static int gather_pmd_stats(pmd_t *pmd, unsigned long addr,
+               unsigned long end, struct mm_walk *walk)
+{
+       struct numa_maps *md = walk->private;
+       struct vm_area_struct *vma = walk->vma;
+       spinlock_t *ptl;
+
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                pte_t huge_pte = *(pte_t *)pmd;
                struct page *page;
 
-               page = can_gather_numa_stats(huge_pte, md->vma, addr);
+               page = can_gather_numa_stats(huge_pte, vma, addr);
                if (page)
                        gather_stats(page, md, pte_dirty(huge_pte),
                                     HPAGE_PMD_SIZE/PAGE_SIZE);
                spin_unlock(ptl);
-               return 0;
+               /* don't call gather_pte_stats() */
+               walk->skip = 1;
        }
-
-       if (pmd_trans_unstable(pmd))
-               return 0;
-       orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
-       do {
-               struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
-               if (!page)
-                       continue;
-               gather_stats(page, md, pte_dirty(*pte), 1);
-
-       } while (pte++, addr += PAGE_SIZE, addr != end);
-       pte_unmap_unlock(orig_pte, ptl);
        return 0;
 }
 #ifdef CONFIG_HUGETLB_PAGE
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
-               unsigned long addr, unsigned long end, struct mm_walk *walk)
+static int gather_hugetlb_stats(pte_t *pte, unsigned long addr,
+                               unsigned long end, struct mm_walk *walk)
 {
        struct numa_maps *md;
        struct page *page;
@@ -1360,6 +1309,9 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
        if (pte_none(*pte))
                return 0;
 
+       if (!pte_present(*pte))
+               return 0;
+
        page = pte_page(*pte);
        if (!page)
                return 0;
@@ -1370,8 +1322,8 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
 }
 
 #else
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
-               unsigned long addr, unsigned long end, struct mm_walk *walk)
+static int gather_hugetlb_stats(pte_t *pte, unsigned long addr,
+                               unsigned long end, struct mm_walk *walk)
 {
        return 0;
 }
@@ -1400,12 +1352,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        /* Ensure we start with an empty set of numa_maps statistics. */
        memset(md, 0, sizeof(*md));
 
-       md->vma = vma;
-
-       walk.hugetlb_entry = gather_hugetbl_stats;
-       walk.pmd_entry = gather_pte_stats;
+       walk.hugetlb_entry = gather_hugetlb_stats;
+       walk.pmd_entry = gather_pmd_stats;
+       walk.pte_entry = gather_pte_stats;
        walk.private = md;
        walk.mm = mm;
+       walk.vma = vma;
 
        pol = get_vma_policy(task, vma, vma->vm_start);
        mpol_to_str(buffer, sizeof(buffer), pol);
@@ -1436,6 +1388,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        if (is_vm_hugetlb_page(vma))
                seq_printf(m, " huge");
 
+       /* mmap_sem is held by m_start */
        walk_page_range(vma->vm_start, vma->vm_end, &walk);
 
        if (!md->pages)