pwm: make the PWM_POLARITY flag optional

[karo-tx-linux.git] / mm / ksm.c
diff --git a/mm/ksm.c b/mm/ksm.c

index 7ee101eaacdfe9eb82061585bb820df243a3650b..659e2b5119c043671cf7a0e19afcf1c68a78a3c0 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -349,6 +349,24 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
         return atomic_read(&mm->mm_users) == 0;
  }
  
+/*
+ * If the mm isn't the one associated with the current
+ * ksm_scan.mm_slot ksm_exit() will not down_write();up_write() and in
+ * turn the ksm_test_exit() check run inside a mm->mmap_sem critical
+ * section, will not prevent exit_mmap() to run from under us. In
+ * turn, in those cases where we could work with an "mm" that isn't
+ * guaranteed to be associated with the current ksm_scan.mm_slot,
+ * ksm_get_mm() is needed instead of the ksm_test_exit() run inside
+ * the mmap_sem. Return true if the mm_users was incremented or false
+ * if it we failed at taking the mm because it was freed from under
+ * us. If it returns 1, the caller must take care of calling mmput()
+ * after it finishes using the mm.
+ */
+static __always_inline bool ksm_get_mm(struct mm_struct *mm)
+{
+       return likely(atomic_inc_not_zero(&mm->mm_users));
+}
+
  /*
   * We use break_ksm to break COW on a ksm page: it's a stripped down
   *
@@ -412,8 +430,6 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
                 unsigned long addr)
  {
         struct vm_area_struct *vma;
-       if (ksm_test_exit(mm))
-               return NULL;
         vma = find_vma(mm, addr);
         if (!vma || vma->vm_start > addr)
                 return NULL;
@@ -434,25 +450,21 @@ static void break_cow(struct rmap_item *rmap_item)
          */
         put_anon_vma(rmap_item->anon_vma);
  
+       /*
+        * The "mm" of the unstable tree rmap_item isn't necessairly
+        * associated with the current ksm_scan.mm_slot, it could be
+        * any random mm. So we need ksm_get_mm here to prevent the
+        * exit_mmap to run from under us in mmput().
+        */
+       if (!ksm_get_mm(mm))
+               return;
+
         down_read(&mm->mmap_sem);
         vma = find_mergeable_vma(mm, addr);
         if (vma)
                 break_ksm(vma, addr);
         up_read(&mm->mmap_sem);
-}
-
-static struct page *page_trans_compound_anon(struct page *page)
-{
-       if (PageTransCompound(page)) {
-               struct page *head = compound_head(page);
-               /*
-                * head may actually be splitted and freed from under
-                * us but it's ok here.
-                */
-               if (PageAnon(head))
-                       return head;
-       }
-       return NULL;
+       mmput(mm);
  }
  
  static struct page *get_mergeable_page(struct rmap_item *rmap_item)
@@ -462,6 +474,15 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
         struct vm_area_struct *vma;
         struct page *page;
  
+       /*
+        * The "mm" of the unstable tree rmap_item isn't necessairly
+        * associated with the current ksm_scan.mm_slot, it could be
+        * any random mm. So we need ksm_get_mm here to prevent the
+        * exit_mmap to run from under us in mmput().
+        */
+       if (!ksm_get_mm(mm))
+               return NULL;
+
         down_read(&mm->mmap_sem);
         vma = find_mergeable_vma(mm, addr);
         if (!vma)
@@ -470,7 +491,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
         page = follow_page(vma, addr, FOLL_GET);
         if (IS_ERR_OR_NULL(page))
                 goto out;
-       if (PageAnon(page) || page_trans_compound_anon(page)) {
+       if (PageAnon(page)) {
                 flush_anon_page(vma, page, addr);
                 flush_dcache_page(page);
         } else {
@@ -478,6 +499,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
  out:           page = NULL;
         }
         up_read(&mm->mmap_sem);
+       mmput(mm);
         return page;
  }
  
@@ -625,7 +647,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
                 unlock_page(page);
                 put_page(page);
  
-               if (stable_node->hlist.first)
+               if (!hlist_empty(&stable_node->hlist))
                         ksm_pages_sharing--;
                 else
                         ksm_pages_shared--;
@@ -957,13 +979,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
         }
  
         get_page(kpage);
-       page_add_anon_rmap(kpage, vma, addr);
+       page_add_anon_rmap(kpage, vma, addr, false);
  
         flush_cache_page(vma, addr, pte_pfn(*ptep));
         ptep_clear_flush_notify(vma, addr, ptep);
         set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
  
-       page_remove_rmap(page);
+       page_remove_rmap(page, false);
         if (!page_mapped(page))
                 try_to_free_swap(page);
         put_page(page);
@@ -976,33 +998,6 @@ out:
         return err;
  }
  
-static int page_trans_compound_anon_split(struct page *page)
-{
-       int ret = 0;
-       struct page *transhuge_head = page_trans_compound_anon(page);
-       if (transhuge_head) {
-               /* Get the reference on the head to split it. */
-               if (get_page_unless_zero(transhuge_head)) {
-                       /*
-                        * Recheck we got the reference while the head
-                        * was still anonymous.
-                        */
-                       if (PageAnon(transhuge_head))
-                               ret = split_huge_page(transhuge_head);
-                       else
-                               /*
-                                * Retry later if split_huge_page run
-                                * from under us.
-                                */
-                               ret = 1;
-                       put_page(transhuge_head);
-               } else
-                       /* Retry later if split_huge_page run from under us. */
-                       ret = 1;
-       }
-       return ret;
-}
-
  /*
   * try_to_merge_one_page - take two pages and merge them into one
   * @vma: the vma that holds the pte pointing to page
@@ -1021,11 +1016,6 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
         if (page == kpage)                      /* ksm page forked */
                 return 0;
  
-       if (!(vma->vm_flags & VM_MERGEABLE))
-               goto out;
-       if (PageTransCompound(page) && page_trans_compound_anon_split(page))
-               goto out;
-       BUG_ON(PageTransCompound(page));
         if (!PageAnon(page))
                 goto out;
  
@@ -1038,6 +1028,13 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
          */
         if (!trylock_page(page))
                 goto out;
+
+       if (PageTransCompound(page)) {
+               err = split_huge_page(page);
+               if (err)
+                       goto out_unlock;
+       }
+
         /*
          * If this anonymous page is mapped only here, its pte may need
          * to be write-protected.  If it's mapped elsewhere, all of its
@@ -1053,6 +1050,18 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
                          */
                         set_page_stable_node(page, NULL);
                         mark_page_accessed(page);
+                       /*
+                        * Stable page could be shared by several processes
+                        * and last process could own the page among them after
+                        * CoW or zapping for every process except last process
+                        * happens. Then, page table entry of the page
+                        * in last process can have no dirty bit.
+                        * In this case, MADV_FREE could discard the page
+                        * wrongly.
+                        * For preventing it, we mark stable page dirty.
+                        */
+                       if (!PageDirty(page))
+                               SetPageDirty(page);
                         err = 0;
                 } else if (pages_identical(page, kpage))
                         err = replace_page(vma, page, kpage, orig_pte);
@@ -1068,6 +1077,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
                 }
         }
  
+out_unlock:
         unlock_page(page);
  out:
         return err;
@@ -1086,11 +1096,21 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
         struct vm_area_struct *vma;
         int err = -EFAULT;
  
+       /*
+        * The "mm" of the unstable tree rmap_item isn't necessairly
+        * associated with the current ksm_scan.mm_slot, it could be
+        * any random mm. So we need ksm_get_mm() here to prevent the
+        * exit_mmap to run from under us in mmput(). Otherwise
+        * rmap_item->anon_vma could point to an anon_vma that has
+        * already been freed (i.e. get_anon_vma() below would run too
+        * late).
+        */
+       if (!ksm_get_mm(mm))
+               return err;
+
         down_read(&mm->mmap_sem);
-       if (ksm_test_exit(mm))
-               goto out;
-       vma = find_vma(mm, rmap_item->address);
-       if (!vma || vma->vm_start > rmap_item->address)
+       vma = find_mergeable_vma(mm, rmap_item->address);
+       if (!vma)
                 goto out;
  
         err = try_to_merge_one_page(vma, page, kpage);
@@ -1105,6 +1125,7 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
         get_anon_vma(vma->anon_vma);
  out:
         up_read(&mm->mmap_sem);
+       mmput(mm);
         return err;
  }
  
@@ -1178,7 +1199,18 @@ again:
                 stable_node = rb_entry(*new, struct stable_node, node);
                 tree_page = get_ksm_page(stable_node, false);
                 if (!tree_page)
-                       return NULL;
+                       /*
+                        * If we walked over a stale stable_node,
+                        * get_ksm_page() will call rb_erase() and it
+                        * may rebalance the tree from under us. So
+                        * restart the search from scratch. Returning
+                        * NULL would be safe too, but we'd generate
+                        * false negative insertions just because some
+                        * stable_node was stale which would waste CPU
+                        * by doing the preparation work twice at the
+                        * next KSM pass.
+                        */
+                       goto again;
  
                 ret = memcmp_pages(page, tree_page);
                 put_page(tree_page);
@@ -1254,12 +1286,14 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
         unsigned long kpfn;
         struct rb_root *root;
         struct rb_node **new;
-       struct rb_node *parent = NULL;
+       struct rb_node *parent;
         struct stable_node *stable_node;
  
         kpfn = page_to_pfn(kpage);
         nid = get_kpfn_nid(kpfn);
         root = root_stable_tree + nid;
+again:
+       parent = NULL;
         new = &root->rb_node;
  
         while (*new) {
@@ -1270,7 +1304,18 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
                 stable_node = rb_entry(*new, struct stable_node, node);
                 tree_page = get_ksm_page(stable_node, false);
                 if (!tree_page)
-                       return NULL;
+                       /*
+                        * If we walked over a stale stable_node,
+                        * get_ksm_page() will call rb_erase() and it
+                        * may rebalance the tree from under us. So
+                        * restart the search from scratch. Returning
+                        * NULL would be safe too, but we'd generate
+                        * false negative insertions just because some
+                        * stable_node was stale which would waste CPU
+                        * by doing the preparation work twice at the
+                        * next KSM pass.
+                        */
+                       goto again;
  
                 ret = memcmp_pages(kpage, tree_page);
                 put_page(tree_page);
@@ -1340,7 +1385,7 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
                 cond_resched();
                 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
                 tree_page = get_mergeable_page(tree_rmap_item);
-               if (IS_ERR_OR_NULL(tree_page))
+               if (!tree_page)
                         return NULL;
  
                 /*
@@ -1620,8 +1665,7 @@ next_mm:
                                 cond_resched();
                                 continue;
                         }
-                       if (PageAnon(*page) ||
-                           page_trans_compound_anon(*page)) {
+                       if (PageAnon(*page)) {
                                 flush_anon_page(vma, *page, ksm_scan.address);
                                 flush_dcache_page(*page);
                                 rmap_item = get_next_rmap_item(slot,
@@ -1884,7 +1928,7 @@ struct page *ksm_might_need_to_copy(struct page *page,
  
                 SetPageDirty(new_page);
                 __SetPageUptodate(new_page);
-               __set_page_locked(new_page);
+               __SetPageLocked(new_page);
         }
  
         return new_page;
@@ -1914,9 +1958,11 @@ again:
                 struct anon_vma_chain *vmac;
                 struct vm_area_struct *vma;
  
+               cond_resched();
                 anon_vma_lock_read(anon_vma);
                 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
                                                0, ULONG_MAX) {
+                       cond_resched();
                         vma = vmac->vma;
                         if (rmap_item->address < vma->vm_start ||
                             rmap_item->address >= vma->vm_end)