]> git.karo-electronics.de Git - linux-beck.git/blobdiff - mm/hugetlb.c
Make UART4 the default UART for kernel messages
[linux-beck.git] / mm / hugetlb.c
index ec49d9ef1eefd0155f099d813ed547c16172d69e..b6adedbafaf59cbfa2d16b5c7a6d6c3fd46463e7 100644 (file)
@@ -1773,23 +1773,32 @@ free:
 }
 
 /*
- * When releasing a hugetlb pool reservation, any surplus pages that were
- * allocated to satisfy the reservation must be explicitly freed if they were
- * never used.
- * Called with hugetlb_lock held.
+ * This routine has two main purposes:
+ * 1) Decrement the reservation count (resv_huge_pages) by the value passed
+ *    in unused_resv_pages.  This corresponds to the prior adjustments made
+ *    to the associated reservation map.
+ * 2) Free any unused surplus pages that may have been allocated to satisfy
+ *    the reservation.  As many as unused_resv_pages may be freed.
+ *
+ * Called with hugetlb_lock held.  However, the lock could be dropped (and
+ * reacquired) during calls to cond_resched_lock.  Whenever dropping the lock,
+ * we must make sure nobody else can claim pages we are in the process of
+ * freeing.  Do this by ensuring resv_huge_page always is greater than the
+ * number of huge pages we plan to free when dropping the lock.
  */
 static void return_unused_surplus_pages(struct hstate *h,
                                        unsigned long unused_resv_pages)
 {
        unsigned long nr_pages;
 
-       /* Uncommit the reservation */
-       h->resv_huge_pages -= unused_resv_pages;
-
        /* Cannot return gigantic pages currently */
        if (hstate_is_gigantic(h))
-               return;
+               goto out;
 
+       /*
+        * Part (or even all) of the reservation could have been backed
+        * by pre-allocated pages. Only free surplus pages.
+        */
        nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
 
        /*
@@ -1799,12 +1808,22 @@ static void return_unused_surplus_pages(struct hstate *h,
         * when the nodes with surplus pages have no free pages.
         * free_pool_huge_page() will balance the the freed pages across the
         * on-line nodes with memory and will handle the hstate accounting.
+        *
+        * Note that we decrement resv_huge_pages as we free the pages.  If
+        * we drop the lock, resv_huge_pages will still be sufficiently large
+        * to cover subsequent pages we may free.
         */
        while (nr_pages--) {
+               h->resv_huge_pages--;
+               unused_resv_pages--;
                if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
-                       break;
+                       goto out;
                cond_resched_lock(&hugetlb_lock);
        }
+
+out:
+       /* Fully uncommit the reservation */
+       h->resv_huge_pages -= unused_resv_pages;
 }
 
 
@@ -1826,11 +1845,17 @@ static void return_unused_surplus_pages(struct hstate *h,
  * is not the case is if a reserve map was changed between calls.  It
  * is the responsibility of the caller to notice the difference and
  * take appropriate action.
+ *
+ * vma_add_reservation is used in error paths where a reservation must
+ * be restored when a newly allocated huge page must be freed.  It is
+ * to be called after calling vma_needs_reservation to determine if a
+ * reservation exists.
  */
 enum vma_resv_mode {
        VMA_NEEDS_RESV,
        VMA_COMMIT_RESV,
        VMA_END_RESV,
+       VMA_ADD_RESV,
 };
 static long __vma_reservation_common(struct hstate *h,
                                struct vm_area_struct *vma, unsigned long addr,
@@ -1856,6 +1881,14 @@ static long __vma_reservation_common(struct hstate *h,
                region_abort(resv, idx, idx + 1);
                ret = 0;
                break;
+       case VMA_ADD_RESV:
+               if (vma->vm_flags & VM_MAYSHARE)
+                       ret = region_add(resv, idx, idx + 1);
+               else {
+                       region_abort(resv, idx, idx + 1);
+                       ret = region_del(resv, idx, idx + 1);
+               }
+               break;
        default:
                BUG();
        }
@@ -1903,6 +1936,56 @@ static void vma_end_reservation(struct hstate *h,
        (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
 }
 
+static long vma_add_reservation(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long addr)
+{
+       return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
+}
+
+/*
+ * This routine is called to restore a reservation on error paths.  In the
+ * specific error paths, a huge page was allocated (via alloc_huge_page)
+ * and is about to be freed.  If a reservation for the page existed,
+ * alloc_huge_page would have consumed the reservation and set PagePrivate
+ * in the newly allocated page.  When the page is freed via free_huge_page,
+ * the global reservation count will be incremented if PagePrivate is set.
+ * However, free_huge_page can not adjust the reserve map.  Adjust the
+ * reserve map here to be consistent with global reserve count adjustments
+ * to be made by free_huge_page.
+ */
+static void restore_reserve_on_error(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long address,
+                       struct page *page)
+{
+       if (unlikely(PagePrivate(page))) {
+               long rc = vma_needs_reservation(h, vma, address);
+
+               if (unlikely(rc < 0)) {
+                       /*
+                        * Rare out of memory condition in reserve map
+                        * manipulation.  Clear PagePrivate so that
+                        * global reserve count will not be incremented
+                        * by free_huge_page.  This will make it appear
+                        * as though the reservation for this page was
+                        * consumed.  This may prevent the task from
+                        * faulting in the page at a later time.  This
+                        * is better than inconsistent global huge page
+                        * accounting of reserve counts.
+                        */
+                       ClearPagePrivate(page);
+               } else if (rc) {
+                       rc = vma_add_reservation(h, vma, address);
+                       if (unlikely(rc < 0))
+                               /*
+                                * See above comment about rare out of
+                                * memory condition.
+                                */
+                               ClearPagePrivate(page);
+               } else
+                       vma_end_reservation(h, vma, address);
+       }
+}
+
 struct page *alloc_huge_page(struct vm_area_struct *vma,
                                    unsigned long addr, int avoid_reserve)
 {
@@ -3386,15 +3469,17 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
  * Keep the pte_same checks anyway to make transition from the mutex easier.
  */
 static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
-                       unsigned long address, pte_t *ptep, pte_t pte,
-                       struct page *pagecache_page, spinlock_t *ptl)
+                      unsigned long address, pte_t *ptep,
+                      struct page *pagecache_page, spinlock_t *ptl)
 {
+       pte_t pte;
        struct hstate *h = hstate_vma(vma);
        struct page *old_page, *new_page;
        int ret = 0, outside_reserve = 0;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
 
+       pte = huge_ptep_get(ptep);
        old_page = pte_page(pte);
 
 retry_avoidcopy:
@@ -3498,6 +3583,7 @@ retry_avoidcopy:
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 out_release_all:
+       restore_reserve_on_error(h, vma, address, new_page);
        put_page(new_page);
 out_release_old:
        put_page(old_page);
@@ -3668,7 +3754,7 @@ retry:
        hugetlb_count_add(pages_per_huge_page(h), mm);
        if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
                /* Optimization, do the COW without a second fault */
-               ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
+               ret = hugetlb_cow(mm, vma, address, ptep, page, ptl);
        }
 
        spin_unlock(ptl);
@@ -3680,6 +3766,7 @@ backout:
        spin_unlock(ptl);
 backout_unlocked:
        unlock_page(page);
+       restore_reserve_on_error(h, vma, address, page);
        put_page(page);
        goto out;
 }
@@ -3822,8 +3909,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
        if (flags & FAULT_FLAG_WRITE) {
                if (!huge_pte_write(entry)) {
-                       ret = hugetlb_cow(mm, vma, address, ptep, entry,
-                                       pagecache_page, ptl);
+                       ret = hugetlb_cow(mm, vma, address, ptep,
+                                         pagecache_page, ptl);
                        goto out_put_page;
                }
                entry = huge_pte_mkdirty(entry);