Merge tag 'v2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

[mv-sheeva.git] / include / linux / mm.h
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 721f451c3029bb756324458f93eccf107778d746..f6385fc17ad46a6137bb7fdd387f8f81bb91e492 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
  #include <linux/mm_types.h>
  #include <linux/range.h>
  #include <linux/pfn.h>
+#include <linux/bit_spinlock.h>
  
  struct mempolicy;
  struct anon_vma;
@@ -82,6 +83,7 @@ extern unsigned int kobjsize(const void *objp);
  #define VM_GROWSUP     0x00000200
  #else
  #define VM_GROWSUP     0x00000000
+#define VM_NOHUGEPAGE  0x00000200      /* MADV_NOHUGEPAGE marked this vma */
  #endif
  #define VM_PFNMAP      0x00000400      /* Page-ranges managed without "struct page", just pure PFN */
  #define VM_DENYWRITE   0x00000800      /* ETXTBSY on write attempts.. */
@@ -101,7 +103,11 @@ extern unsigned int kobjsize(const void *objp);
  #define VM_NORESERVE   0x00200000      /* should the VM suppress accounting */
  #define VM_HUGETLB     0x00400000      /* Huge TLB Page VM */
  #define VM_NONLINEAR   0x00800000      /* Is non-linear (remap_file_pages) */
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
  #define VM_MAPPED_COPY 0x01000000      /* T if mapped copy of data (nommu mmap) */
+#else
+#define VM_HUGEPAGE    0x01000000      /* MADV_HUGEPAGE marked this vma */
+#endif
  #define VM_INSERTPAGE  0x02000000      /* The vma has had "vm_insert_page()" done on it */
  #define VM_ALWAYSDUMP  0x04000000      /* Always include in core dumps */
  
@@ -242,6 +248,7 @@ struct inode;
   * files which need it (119 of them)
   */
  #include <linux/page-flags.h>
+#include <linux/huge_mm.h>
  
  /*
   * Methods to modify the page usage count.
@@ -305,6 +312,39 @@ static inline int is_vmalloc_or_module_addr(const void *x)
  }
  #endif
  
+static inline void compound_lock(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       bit_spin_lock(PG_compound_lock, &page->flags);
+#endif
+}
+
+static inline void compound_unlock(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       bit_spin_unlock(PG_compound_lock, &page->flags);
+#endif
+}
+
+static inline unsigned long compound_lock_irqsave(struct page *page)
+{
+       unsigned long uninitialized_var(flags);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       local_irq_save(flags);
+       compound_lock(page);
+#endif
+       return flags;
+}
+
+static inline void compound_unlock_irqrestore(struct page *page,
+                                             unsigned long flags)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       compound_unlock(page);
+       local_irq_restore(flags);
+#endif
+}
+
  static inline struct page *compound_head(struct page *page)
  {
         if (unlikely(PageTail(page)))
@@ -319,9 +359,29 @@ static inline int page_count(struct page *page)
  
  static inline void get_page(struct page *page)
  {
-       page = compound_head(page);
-       VM_BUG_ON(atomic_read(&page->_count) == 0);
+       /*
+        * Getting a normal page or the head of a compound page
+        * requires to already have an elevated page->_count. Only if
+        * we're getting a tail page, the elevated page->_count is
+        * required only in the head page, so for tail pages the
+        * bugcheck only verifies that the page->_count isn't
+        * negative.
+        */
+       VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
         atomic_inc(&page->_count);
+       /*
+        * Getting a tail page will elevate both the head and tail
+        * page->_count(s).
+        */
+       if (unlikely(PageTail(page))) {
+               /*
+                * This is safe only because
+                * __split_huge_page_refcount can't run under
+                * get_page().
+                */
+               VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+               atomic_inc(&page->first_page->_count);
+       }
  }
  
  static inline struct page *virt_to_head_page(const void *x)
@@ -339,6 +399,27 @@ static inline void init_page_count(struct page *page)
         atomic_set(&page->_count, 1);
  }
  
+/*
+ * PageBuddy() indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ */
+static inline int PageBuddy(struct page *page)
+{
+       return atomic_read(&page->_mapcount) == -2;
+}
+
+static inline void __SetPageBuddy(struct page *page)
+{
+       VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
+       atomic_set(&page->_mapcount, -2);
+}
+
+static inline void __ClearPageBuddy(struct page *page)
+{
+       VM_BUG_ON(!PageBuddy(page));
+       atomic_set(&page->_mapcount, -1);
+}
+
  void put_page(struct page *page);
  void put_pages_list(struct list_head *pages);
  
@@ -370,11 +451,40 @@ static inline int compound_order(struct page *page)
         return (unsigned long)page[1].lru.prev;
  }
  
+static inline int compound_trans_order(struct page *page)
+{
+       int order;
+       unsigned long flags;
+
+       if (!PageHead(page))
+               return 0;
+
+       flags = compound_lock_irqsave(page);
+       order = compound_order(page);
+       compound_unlock_irqrestore(page, flags);
+       return order;
+}
+
  static inline void set_compound_order(struct page *page, unsigned long order)
  {
         page[1].lru.prev = (void *)order;
  }
  
+#ifdef CONFIG_MMU
+/*
+ * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
+ * servicing faults for write access.  In the normal case, do always want
+ * pte_mkwrite.  But get_user_pages can cause write faults for mappings
+ * that do not have writing enabled, when used by access_process_vm.
+ */
+static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+       if (likely(vma->vm_flags & VM_WRITE))
+               pte = pte_mkwrite(pte);
+       return pte;
+}
+#endif
+
  /*
   * Multiple processes may "see" the same page. E.g. for untouched
   * mappings of /dev/null, all processes see the same page full of
@@ -657,7 +767,7 @@ static inline struct address_space *page_mapping(struct page *page)
         VM_BUG_ON(PageSlab(page));
         if (unlikely(PageSwapCache(page)))
                 mapping = &swapper_space;
-       else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
+       else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
                 mapping = NULL;
         return mapping;
  }
@@ -1064,7 +1174,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
  int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
  #endif
  
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+               pmd_t *pmd, unsigned long address);
  int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
  
  /*
@@ -1133,16 +1244,18 @@ static inline void pgtable_page_dtor(struct page *page)
         pte_unmap(pte);                                 \
  } while (0)
  
-#define pte_alloc_map(mm, pmd, address)                        \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
-               NULL: pte_offset_map(pmd, address))
+#define pte_alloc_map(mm, vma, pmd, address)                           \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma,    \
+                                                       pmd, address))? \
+        NULL: pte_offset_map(pmd, address))
  
  #define pte_alloc_map_lock(mm, pmd, address, ptlp)     \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL,   \
+                                                       pmd, address))? \
                 NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
  
  #define pte_alloc_kernel(pmd, address)                 \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
                 NULL: pte_offset_kernel(pmd, address))
  
  extern void free_area_init(unsigned long * zones_size);
@@ -1415,6 +1528,8 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
  #define FOLL_GET       0x04    /* do get_page on page */
  #define FOLL_DUMP      0x08    /* give error on hole if it would be zero */
  #define FOLL_FORCE     0x10    /* get_user_pages read/write w/o permission */
+#define FOLL_MLOCK     0x40    /* mark page as mlocked */
+#define FOLL_SPLIT     0x80    /* don't return transhuge pages, split them */
  
  typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
                         void *data);
@@ -1518,5 +1633,14 @@ static inline int is_hwpoison_address(unsigned long addr)
  
  extern void dump_page(struct page *page);
  
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
+extern void clear_huge_page(struct page *page,
+                           unsigned long addr,
+                           unsigned int pages_per_huge_page);
+extern void copy_user_huge_page(struct page *dst, struct page *src,
+                               unsigned long addr, struct vm_area_struct *vma,
+                               unsigned int pages_per_huge_page);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
  #endif /* __KERNEL__ */
  #endif /* _LINUX_MM_H */