Merge remote-tracking branch 'tip/auto-latest'

author Stephen Rothwell <sfr@canb.auug.org.au>

Fri, 19 Oct 2012 00:49:37 +0000 (11:49 +1100)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Fri, 19 Oct 2012 01:16:32 +0000 (12:16 +1100)
author Stephen Rothwell <sfr@canb.auug.org.au>
Fri, 19 Oct 2012 00:49:37 +0000 (11:49 +1100)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Fri, 19 Oct 2012 01:16:32 +0000 (12:16 +1100)
diff --cc MAINTAINERS
Simple merge
diff --cc arch/mips/include/asm/pgtable.h

index 252202d24a84f50c467f11dff8937863150aedab,b2202a68cf0f377b7f881cfde45578f3be56c8f3..5f7c615d670d44b39db97ba276116774bd4ca10a
--- 1/arch/mips/include/asm/pgtable.h
--- 2/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@@ -407,157 -394,6 +407,159 @@@ static inline int io_remap_pfn_range(st
                 remap_pfn_range(vma, vaddr, pfn, size, prot)
   #endif
   
+ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ +
+ +extern int has_transparent_hugepage(void);
+ +
+ +static inline int pmd_trans_huge(pmd_t pmd)
+ +{
+ +      return !!(pmd_val(pmd) & _PAGE_HUGE);
+ +}
+ +
+ +static inline pmd_t pmd_mkhuge(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) |= _PAGE_HUGE;
+ +
+ +      return pmd;
+ +}
+ +
+ +static inline int pmd_trans_splitting(pmd_t pmd)
+ +{
+ +      return !!(pmd_val(pmd) & _PAGE_SPLITTING);
+ +}
+ +
+ +static inline pmd_t pmd_mksplitting(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) |= _PAGE_SPLITTING;
+ +
+ +      return pmd;
+ +}
+ +
+ +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ +                     pmd_t *pmdp, pmd_t pmd);
+ +
+ +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+ +/* Extern to avoid header file madness */
+ +extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+ +                                      unsigned long address,
+ +                                      pmd_t *pmdp);
+ +
+ +#define __HAVE_ARCH_PMD_WRITE
+ +static inline int pmd_write(pmd_t pmd)
+ +{
+ +      return !!(pmd_val(pmd) & _PAGE_WRITE);
+ +}
+ +
+ +static inline pmd_t pmd_wrprotect(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) &= ~(_PAGE_WRITE | _PAGE_SILENT_WRITE);
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_mkwrite(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) |= _PAGE_WRITE;
+ +      if (pmd_val(pmd) & _PAGE_MODIFIED)
+ +              pmd_val(pmd) |= _PAGE_SILENT_WRITE;
+ +
+ +      return pmd;
+ +}
+ +
+ +static inline int pmd_dirty(pmd_t pmd)
+ +{
+ +      return !!(pmd_val(pmd) & _PAGE_MODIFIED);
+ +}
+ +
+ +static inline pmd_t pmd_mkclean(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) &= ~(_PAGE_MODIFIED | _PAGE_SILENT_WRITE);
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_mkdirty(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) |= _PAGE_MODIFIED;
+ +      if (pmd_val(pmd) & _PAGE_WRITE)
+ +              pmd_val(pmd) |= _PAGE_SILENT_WRITE;
+ +
+ +      return pmd;
+ +}
+ +
+ +static inline int pmd_young(pmd_t pmd)
+ +{
+ +      return !!(pmd_val(pmd) & _PAGE_ACCESSED);
+ +}
+ +
+ +static inline pmd_t pmd_mkold(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) &= ~(_PAGE_ACCESSED|_PAGE_SILENT_READ);
+ +
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_mkyoung(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) |= _PAGE_ACCESSED;
+ +
+ +      if (cpu_has_rixi) {
+ +              if (!(pmd_val(pmd) & _PAGE_NO_READ))
+ +                      pmd_val(pmd) |= _PAGE_SILENT_READ;
+ +      } else {
+ +              if (pmd_val(pmd) & _PAGE_READ)
+ +                      pmd_val(pmd) |= _PAGE_SILENT_READ;
+ +      }
+ +
+ +      return pmd;
+ +}
+ +
+ +/* Extern to avoid header file madness */
+ +extern pmd_t mk_pmd(struct page *page, pgprot_t prot);
+ +
+ +static inline unsigned long pmd_pfn(pmd_t pmd)
+ +{
+ +      return pmd_val(pmd) >> _PFN_SHIFT;
+ +}
+ +
+ +static inline struct page *pmd_page(pmd_t pmd)
+ +{
+ +      if (pmd_trans_huge(pmd))
+ +              return pfn_to_page(pmd_pfn(pmd));
+ +
+ +      return pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT);
+ +}
+ +
+ +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ +{
+ +      pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+ +      return pmd;
+ +}
+ +
++#define pmd_pgprot(x) __pgprot(pmd_val(x) & ~_PAGE_CHG_MASK)
++
+ +static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY);
+ +
+ +      return pmd;
+ +}
+ +
+ +/*
+ + * The generic version pmdp_get_and_clear uses a version of pmd_clear() with a
+ + * different prototype.
+ + */
+ +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+ +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ +                                     unsigned long address, pmd_t *pmdp)
+ +{
+ +      pmd_t old = *pmdp;
+ +
+ +      pmd_clear(pmdp);
+ +
+ +      return old;
+ +}
+ +
+ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ +
   #include <asm-generic/pgtable.h>
   
   /*
diff --cc arch/s390/include/asm/pgtable.h

index f8887abaca563da7cdfad7b24569d0fbcd76c866,6bd7d74830171096c82ac74f5dc671dbb7098090..28469202eadbfbb33970b889ca4eff4f82b42f30
--- 1/arch/s390/include/asm/pgtable.h
--- 2/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@@ -1216,185 -1159,6 +1216,198 @@@ static inline pmd_t *pmd_offset(pud_t *
   #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
   #define pte_unmap(pte) do { } while (0)
   
+ +static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
+ +{
+ +      unsigned long sto = (unsigned long) pmdp -
+ +                          pmd_index(address) * sizeof(pmd_t);
+ +
+ +      if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
+ +              asm volatile(
+ +                      "       .insn   rrf,0xb98e0000,%2,%3,0,0"
+ +                      : "=m" (*pmdp)
+ +                      : "m" (*pmdp), "a" (sto),
+ +                        "a" ((address & HPAGE_MASK))
+ +                      : "cc"
+ +              );
+ +      }
+ +}
+ +
+ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ +#define __HAVE_ARCH_PGTABLE_DEPOSIT
+ +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+ +
+ +#define __HAVE_ARCH_PGTABLE_WITHDRAW
+ +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+ +
+ +static inline int pmd_trans_splitting(pmd_t pmd)
+ +{
+ +      return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+ +}
+ +
+ +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ +                            pmd_t *pmdp, pmd_t entry)
+ +{
+ +      *pmdp = entry;
+ +}
+ +
++static inline pgprot_t pmd_pgprot(pmd_t pmd)
++{
++      pgprot_t prot = PAGE_RW;
++
++      if (pmd_val(pmd) & _SEGMENT_ENTRY_RO) {
++              if (pmd_val(pmd) & _SEGMENT_ENTRY_INV)
++                      prot = PAGE_NONE;
++              else
++                      prot = PAGE_RO;
++      }
++      return prot;
++}
++
+ +static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+ +{
+ +      unsigned long pgprot_pmd = 0;
+ +
+ +      if (pgprot_val(pgprot) & _PAGE_INVALID) {
+ +              if (pgprot_val(pgprot) & _PAGE_SWT)
+ +                      pgprot_pmd |= _HPAGE_TYPE_NONE;
+ +              pgprot_pmd |= _SEGMENT_ENTRY_INV;
+ +      }
+ +      if (pgprot_val(pgprot) & _PAGE_RO)
+ +              pgprot_pmd |= _SEGMENT_ENTRY_RO;
+ +      return pgprot_pmd;
+ +}
+ +
+ +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ +{
+ +      pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+ +      pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_mkhuge(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_mkwrite(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_wrprotect(pmd_t pmd)
+ +{
+ +      pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_mkdirty(pmd_t pmd)
+ +{
+ +      /* No dirty bit in the segment table entry. */
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_mkold(pmd_t pmd)
+ +{
+ +      /* No referenced bit in the segment table entry. */
+ +      return pmd;
+ +}
+ +
+ +static inline pmd_t pmd_mkyoung(pmd_t pmd)
+ +{
+ +      /* No referenced bit in the segment table entry. */
+ +      return pmd;
+ +}
+ +
+ +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+ +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ +                                          unsigned long address, pmd_t *pmdp)
+ +{
+ +      unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
+ +      long tmp, rc;
+ +      int counter;
+ +
+ +      rc = 0;
+ +      if (MACHINE_HAS_RRBM) {
+ +              counter = PTRS_PER_PTE >> 6;
+ +              asm volatile(
+ +                      "0:     .insn   rre,0xb9ae0000,%0,%3\n" /* rrbm */
+ +                      "       ogr     %1,%0\n"
+ +                      "       la      %3,0(%4,%3)\n"
+ +                      "       brct    %2,0b\n"
+ +                      : "=&d" (tmp), "+&d" (rc), "+d" (counter),
+ +                        "+a" (pmd_addr)
+ +                      : "a" (64 * 4096UL) : "cc");
+ +              rc = !!rc;
+ +      } else {
+ +              counter = PTRS_PER_PTE;
+ +              asm volatile(
+ +                      "0:     rrbe    0,%2\n"
+ +                      "       la      %2,0(%3,%2)\n"
+ +                      "       brc     12,1f\n"
+ +                      "       lhi     %0,1\n"
+ +                      "1:     brct    %1,0b\n"
+ +                      : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
+ +                      : "a" (4096UL) : "cc");
+ +      }
+ +      return rc;
+ +}
+ +
+ +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+ +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ +                                     unsigned long address, pmd_t *pmdp)
+ +{
+ +      pmd_t pmd = *pmdp;
+ +
+ +      __pmd_idte(address, pmdp);
+ +      pmd_clear(pmdp);
+ +      return pmd;
+ +}
+ +
+ +#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
+ +static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+ +                                   unsigned long address, pmd_t *pmdp)
+ +{
+ +      return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
+ +}
+ +
+ +#define __HAVE_ARCH_PMDP_INVALIDATE
+ +static inline void pmdp_invalidate(struct vm_area_struct *vma,
+ +                                 unsigned long address, pmd_t *pmdp)
+ +{
+ +      __pmd_idte(address, pmdp);
+ +}
+ +
+ +static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+ +{
+ +      pmd_t __pmd;
+ +      pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+ +      return __pmd;
+ +}
+ +
+ +#define pfn_pmd(pfn, pgprot)  mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+ +#define mk_pmd(page, pgprot)  pfn_pmd(page_to_pfn(page), (pgprot))
+ +
+ +static inline int pmd_trans_huge(pmd_t pmd)
+ +{
+ +      return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+ +}
+ +
+ +static inline int has_transparent_hugepage(void)
+ +{
+ +      return MACHINE_HAS_HPAGE ? 1 : 0;
+ +}
+ +
+ +static inline unsigned long pmd_pfn(pmd_t pmd)
+ +{
+ +      if (pmd_trans_huge(pmd))
+ +              return pmd_val(pmd) >> HPAGE_SHIFT;
+ +      else
+ +              return pmd_val(pmd) >> PAGE_SHIFT;
+ +}
+ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ +
   /*
    * 31 bit swap entry format:
    * A page-table entry has some bits we have to treat in a special way.
diff --cc arch/x86/Kconfig
Simple merge
diff --cc arch/x86/include/asm/pgtable.h
Simple merge
diff --cc arch/x86/kernel/setup.c
Simple merge
diff --cc include/linux/acpi.h
Simple merge
diff --cc include/linux/huge_mm.h
Simple merge
diff --cc include/linux/init_task.h
Simple merge
diff --cc include/linux/mempolicy.h

index e5ccb9ddd90eeb634665e88afd13d56184fff241,7f303d10f35f06746e9126a33e8e8681065ab036..05c1bc97ecf06db5f4120c6988ee3fcae92459f0
--- 1/include/linux/mempolicy.h
--- 2/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@@ -2,10 -7,72 +2,9 @@@
    * NUMA memory policies for Linux.
    * Copyright 2003,2004 Andi Kleen SuSE Labs
    */
- -
- -/*
- - * Both the MPOL_* mempolicy mode and the MPOL_F_* optional mode flags are
- - * passed by the user to either set_mempolicy() or mbind() in an 'int' actual.
- - * The MPOL_MODE_FLAGS macro determines the legal set of optional mode flags.
- - */
- -
- -/* Policies */
- -enum {
- -      MPOL_DEFAULT,
- -      MPOL_PREFERRED,
- -      MPOL_BIND,
- -      MPOL_INTERLEAVE,
- -      MPOL_LOCAL,
- -      MPOL_NOOP,              /* retain existing policy for range */
- -      MPOL_MAX,       /* always last member of enum */
- -};
- -
- -enum mpol_rebind_step {
- -      MPOL_REBIND_ONCE,       /* do rebind work at once(not by two step) */
- -      MPOL_REBIND_STEP1,      /* first step(set all the newly nodes) */
- -      MPOL_REBIND_STEP2,      /* second step(clean all the disallowed nodes)*/
- -      MPOL_REBIND_NSTEP,
- -};
- -
- -/* Flags for set_mempolicy */
- -#define MPOL_F_STATIC_NODES   (1 << 15)
- -#define MPOL_F_RELATIVE_NODES (1 << 14)
- -
- -/*
- - * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
- - * either set_mempolicy() or mbind().
- - */
- -#define MPOL_MODE_FLAGS       (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES)
- -
- -/* Flags for get_mempolicy */
- -#define MPOL_F_NODE   (1<<0)  /* return next IL mode instead of node mask */
- -#define MPOL_F_ADDR   (1<<1)  /* look up vma using address */
- -#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
- -
- -/* Flags for mbind */
- -#define MPOL_MF_STRICT        (1<<0)  /* Verify existing pages in the mapping */
- -#define MPOL_MF_MOVE   (1<<1) /* Move pages owned by this process to conform
- -                                 to policy */
- -#define MPOL_MF_MOVE_ALL (1<<2)       /* Move every page to conform to policy */
- -#define MPOL_MF_LAZY   (1<<3) /* Modifies '_MOVE:  lazy migrate on fault */
- -#define MPOL_MF_INTERNAL (1<<4)       /* Internal flags start here */
- -
- -#define MPOL_MF_VALID (MPOL_MF_STRICT   |     \
- -                       MPOL_MF_MOVE     |     \
- -                       MPOL_MF_MOVE_ALL |     \
- -                       MPOL_MF_LAZY)
- -
- -/*
- - * Internal flags that share the struct mempolicy flags word with
- - * "mode flags".  These flags are allocated from bit 0 up, as they
- - * are never OR'ed into the mode in mempolicy API arguments.
- - */
- -#define MPOL_F_SHARED  (1 << 0)       /* identify shared policies */
- -#define MPOL_F_LOCAL   (1 << 1)       /* preferred local allocation */
- -#define MPOL_F_REBINDING (1 << 2)     /* identify policies in rebinding */
- -#define MPOL_F_MOF    (1 << 3) /* this policy wants migrate on fault */
- -#define MPOL_F_HOME   (1 << 4) /* this is the home-node policy */
- -
- -#ifdef __KERNEL__
+ +#ifndef _LINUX_MEMPOLICY_H
+ +#define _LINUX_MEMPOLICY_H 1
   
- 
   #include <linux/mmzone.h>
   #include <linux/slab.h>
   #include <linux/rbtree.h>
@@@ -323,5 -394,13 +327,11 @@@ static inline int mpol_to_str(char *buf
         return 0;
   }
   
+ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+                                unsigned long address, int multi)
+ {
+       return -1; /* no node preference */
+ }
+ 
   #endif /* CONFIG_NUMA */
- -#endif /* __KERNEL__ */
- -
   #endif
diff --cc include/linux/mm.h

index fa068040273893c27d71461e459e2eb268739396,02e8c1f28bf84bab11fdb6b6694efdbfe46d5fd7..423464bb332c5ad3d517f38105205213e8b977a2
--- 1/include/linux/mm.h
--- 2/include/linux/mm.h
+++ b/include/linux/mm.h
@@@ -161,8 -157,38 +161,21 @@@ extern pgprot_t protection_map[16]
   #define FAULT_FLAG_ALLOW_RETRY        0x08    /* Retry fault if blocking */
   #define FAULT_FLAG_RETRY_NOWAIT       0x10    /* Don't drop mmap_sem and wait when retrying */
   #define FAULT_FLAG_KILLABLE   0x20    /* The fault task is in SIGKILL killable region */
- -
- -/*
- - * This interface is used by x86 PAT code to identify a pfn mapping that is
- - * linear over entire vma. This is to optimize PAT code that deals with
- - * marking the physical region with a particular prot. This is not for generic
- - * mm use. Note also that this check will not work if the pfn mapping is
- - * linear for a vma starting at physical address 0. In which case PAT code
- - * falls back to slow path of reserving physical range page by page.
- - */
- -static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
- -{
- -      return !!(vma->vm_flags & VM_PFN_AT_MMAP);
- -}
- -
- -static inline int is_pfn_mapping(struct vm_area_struct *vma)
- -{
- -      return !!(vma->vm_flags & VM_PFNMAP);
- -}
+ +#define FAULT_FLAG_TRIED      0x40    /* second try */
   
+ /*
+  * Some architectures (such as x86) may need to preserve certain pgprot
+  * bits, without complicating generic pgprot code.
+  *
+  * Most architectures don't care:
+  */
+ #ifndef pgprot_modify
+ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
+ {
+       return newprot;
+ }
+ #endif
+ 
   /*
    * vm_fault is filled by the the pagefault handler and passed to the vma's
    * ->fault function. The vma's ->fault is responsible for returning a bitmask
diff --cc include/linux/mm_types.h
Simple merge
diff --cc include/linux/mmzone.h
Simple merge
diff --cc include/linux/sched.h
Simple merge
diff --cc include/uapi/linux/mempolicy.h

index 23e62e0537e2d1080f0c78ce2ffb946a23264676,0000000000000000000000000000000000000000..0c774c64ceda971388c1b39c34169962936a666c

mode 100644,000000..100644
--- 1/include/uapi/linux/mempolicy.h
--- /dev/null
+++ b/include/uapi/linux/mempolicy.h
@@@ -1,64 -1,0 +1,74 @@@
- #define MPOL_MF_MOVE  (1<<1)  /* Move pages owned by this process to conform to mapping */
- #define MPOL_MF_MOVE_ALL (1<<2)       /* Move every page to conform to mapping */
- #define MPOL_MF_INTERNAL (1<<3)       /* Internal flags start here */
+ +/*
+ + * NUMA memory policies for Linux.
+ + * Copyright 2003,2004 Andi Kleen SuSE Labs
+ + */
+ +#ifndef _UAPI_LINUX_MEMPOLICY_H
+ +#define _UAPI_LINUX_MEMPOLICY_H
+ +
+ +#include <linux/errno.h>
+ +
+ +
+ +/*
+ + * Both the MPOL_* mempolicy mode and the MPOL_F_* optional mode flags are
+ + * passed by the user to either set_mempolicy() or mbind() in an 'int' actual.
+ + * The MPOL_MODE_FLAGS macro determines the legal set of optional mode flags.
+ + */
+ +
+ +/* Policies */
+ +enum {
+ +      MPOL_DEFAULT,
+ +      MPOL_PREFERRED,
+ +      MPOL_BIND,
+ +      MPOL_INTERLEAVE,
++      MPOL_LOCAL,
++      MPOL_NOOP,              /* retain existing policy for range */
+ +      MPOL_MAX,       /* always last member of enum */
+ +};
+ +
+ +enum mpol_rebind_step {
+ +      MPOL_REBIND_ONCE,       /* do rebind work at once(not by two step) */
+ +      MPOL_REBIND_STEP1,      /* first step(set all the newly nodes) */
+ +      MPOL_REBIND_STEP2,      /* second step(clean all the disallowed nodes)*/
+ +      MPOL_REBIND_NSTEP,
+ +};
+ +
+ +/* Flags for set_mempolicy */
+ +#define MPOL_F_STATIC_NODES   (1 << 15)
+ +#define MPOL_F_RELATIVE_NODES (1 << 14)
+ +
+ +/*
+ + * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
+ + * either set_mempolicy() or mbind().
+ + */
+ +#define MPOL_MODE_FLAGS       (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES)
+ +
+ +/* Flags for get_mempolicy */
+ +#define MPOL_F_NODE   (1<<0)  /* return next IL mode instead of node mask */
+ +#define MPOL_F_ADDR   (1<<1)  /* look up vma using address */
+ +#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
+ +
+ +/* Flags for mbind */
+ +#define MPOL_MF_STRICT        (1<<0)  /* Verify existing pages in the mapping */
- 
++#define MPOL_MF_MOVE   (1<<1) /* Move pages owned by this process to conform
++                                 to policy */
++#define MPOL_MF_MOVE_ALL (1<<2)       /* Move every page to conform to policy */
++#define MPOL_MF_LAZY   (1<<3) /* Modifies '_MOVE:  lazy migrate on fault */
++#define MPOL_MF_INTERNAL (1<<4)       /* Internal flags start here */
++
++#define MPOL_MF_VALID (MPOL_MF_STRICT   |     \
++                       MPOL_MF_MOVE     |     \
++                       MPOL_MF_MOVE_ALL |     \
++                       MPOL_MF_LAZY)
+ +
+ +/*
+ + * Internal flags that share the struct mempolicy flags word with
+ + * "mode flags".  These flags are allocated from bit 0 up, as they
+ + * are never OR'ed into the mode in mempolicy API arguments.
+ + */
+ +#define MPOL_F_SHARED  (1 << 0)       /* identify shared policies */
+ +#define MPOL_F_LOCAL   (1 << 1)       /* preferred local allocation */
+ +#define MPOL_F_REBINDING (1 << 2)     /* identify policies in rebinding */
++#define MPOL_F_MOF    (1 << 3) /* this policy wants migrate on fault */
++#define MPOL_F_HOME   (1 << 4) /* this is the home-node policy */
+ +
+ +#endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --cc init/Kconfig
Simple merge
diff --cc kernel/sched/core.c
Simple merge
diff --cc kernel/sysctl.c
Simple merge
diff --cc lib/Makefile

index 821a16229111eba69f189ae9be683f02eeb70862,0924041b6959188b074b3240b51ce5e31aa2c70c..e91b9dffc853e95009c44e647e0cb9e6a8de374d
--- 1/lib/Makefile
--- 2/lib/Makefile
+++ b/lib/Makefile
@@@ -9,10 -9,10 +9,10 @@@ endi
   
   lib-y := ctype.o string.o vsprintf.o cmdline.o \
          rbtree.o radix-tree.o dump_stack.o timerqueue.o\
- -       idr.o int_sqrt.o extable.o prio_tree.o \
+ +       idr.o int_sqrt.o extable.o \
          sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
          proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
-        is_single_threaded.o plist.o decompress.o
+        is_single_threaded.o plist.o decompress.o earlycpio.o
   
   lib-$(CONFIG_MMU) += ioremap.o
   lib-$(CONFIG_SMP) += cpumask.o
diff --cc mm/huge_memory.c

index 40f17c34b4153fab93b4f1a2685dee0b8cac4da8,5b9ab256910c387f853593eedf6156d2581cf70e..62ce20264d149f6b840ad8c09ff979b5e79836ce
--- 1/mm/huge_memory.c
--- 2/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@@ -17,7 -17,7 +17,8 @@@
   #include <linux/khugepaged.h>
   #include <linux/freezer.h>
   #include <linux/mman.h>
+ +#include <linux/pagemap.h>
+ #include <linux/migrate.h>
   #include <asm/tlb.h>
   #include <asm/pgalloc.h>
   #include "internal.h"
@@@ -1347,59 -1428,55 +1418,54 @@@ static int __split_huge_page_map(struc
         spin_lock(&mm->page_table_lock);
         pmd = page_check_address_pmd(page, mm, address,
                                      PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG);
-       if (pmd) {
-               pgtable = pgtable_trans_huge_withdraw(mm);
-               pmd_populate(mm, &_pmd, pgtable);
- 
-               haddr = address;
-               for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
-                       pte_t *pte, entry;
-                       BUG_ON(PageCompound(page+i));
-                       entry = mk_pte(page + i, vma->vm_page_prot);
-                       entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-                       if (!pmd_write(*pmd))
-                               entry = pte_wrprotect(entry);
-                       else
-                               BUG_ON(page_mapcount(page) != 1);
-                       if (!pmd_young(*pmd))
-                               entry = pte_mkold(entry);
-                       pte = pte_offset_map(&_pmd, haddr);
-                       BUG_ON(!pte_none(*pte));
-                       set_pte_at(mm, haddr, pte, entry);
-                       pte_unmap(pte);
-               }
+       if (!pmd)
+               goto unlock;
   
-               smp_wmb(); /* make pte visible before pmd */
-               /*
-                * Up to this point the pmd is present and huge and
-                * userland has the whole access to the hugepage
-                * during the split (which happens in place). If we
-                * overwrite the pmd with the not-huge version
-                * pointing to the pte here (which of course we could
-                * if all CPUs were bug free), userland could trigger
-                * a small page size TLB miss on the small sized TLB
-                * while the hugepage TLB entry is still established
-                * in the huge TLB. Some CPU doesn't like that. See
-                * http://support.amd.com/us/Processor_TechDocs/41322.pdf,
-                * Erratum 383 on page 93. Intel should be safe but is
-                * also warns that it's only safe if the permission
-                * and cache attributes of the two entries loaded in
-                * the two TLB is identical (which should be the case
-                * here). But it is generally safer to never allow
-                * small and huge TLB entries for the same virtual
-                * address to be loaded simultaneously. So instead of
-                * doing "pmd_populate(); flush_tlb_range();" we first
-                * mark the current pmd notpresent (atomically because
-                * here the pmd_trans_huge and pmd_trans_splitting
-                * must remain set at all times on the pmd until the
-                * split is complete for this pmd), then we flush the
-                * SMP TLB and finally we write the non-huge version
-                * of the pmd entry with pmd_populate.
-                */
-               pmdp_invalidate(vma, address, pmd);
-               pmd_populate(mm, pmd, pgtable);
-               ret = 1;
+       prot = pmd_pgprot(*pmd);
- -      pgtable = get_pmd_huge_pte(mm);
++      pgtable = pgtable_trans_huge_withdraw(mm);
+       pmd_populate(mm, &_pmd, pgtable);
+ 
+       for (i = 0, haddr = address; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+               pte_t *pte, entry;
+ 
+               BUG_ON(PageCompound(page+i));
+               entry = mk_pte(page + i, prot);
+               entry = pte_mkdirty(entry);
+               if (!pmd_young(*pmd))
+                       entry = pte_mkold(entry);
+               pte = pte_offset_map(&_pmd, haddr);
+               BUG_ON(!pte_none(*pte));
+               set_pte_at(mm, haddr, pte, entry);
+               pte_unmap(pte);
         }
- -      set_pmd_at(mm, address, pmd, pmd_mknotpresent(*pmd));
- -      flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ 
+       smp_wmb(); /* make ptes visible before pmd, see __pte_alloc */
+       /*
+        * Up to this point the pmd is present and huge.
+        *
+        * If we overwrite the pmd with the not-huge version, we could trigger
+        * a small page size TLB miss on the small sized TLB while the hugepage
+        * TLB entry is still established in the huge TLB.
+        *
+        * Some CPUs don't like that. See
+        * http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum 383
+        * on page 93.
+        *
+        * Thus it is generally safer to never allow small and huge TLB entries
+        * for overlapping virtual addresses to be loaded. So we first mark the
+        * current pmd not present, then we flush the TLB and finally we write
+        * the non-huge version of the pmd entry with pmd_populate.
+        *
+        * The above needs to be done under the ptl because pmd_trans_huge and
+        * pmd_trans_splitting must remain set on the pmd until the split is
+        * complete. The ptl also protects against concurrent faults due to
+        * making the pmd not-present.
+        */
++      pmdp_invalidate(vma, address, pmd);
+       pmd_populate(mm, pmd, pgtable);
+       ret = 1;
+ 
+ unlock:
         spin_unlock(&mm->page_table_lock);
   
         return ret;
@@@ -2280,23 -2300,30 +2346,21 @@@ static int khugepaged_has_work(void
   static int khugepaged_wait_event(void)
   {
         return !list_empty(&khugepaged_scan.mm_head) ||
- -              !khugepaged_enabled();
+ +              kthread_should_stop();
   }
   
- -static void khugepaged_do_scan(struct page **hpage)
+ +static void khugepaged_do_scan(void)
   {
+ +      struct page *hpage = NULL;
         unsigned int progress = 0, pass_through_head = 0;
-       unsigned int pages = khugepaged_pages_to_scan;
+       unsigned int pages = ACCESS_ONCE(khugepaged_pages_to_scan);
+ +      bool wait = true;
   
-       barrier(); /* write khugepaged_pages_to_scan to local stack */
- 
         while (progress < pages) {
- -              cond_resched();
- -
- -#ifndef CONFIG_NUMA
- -              if (!*hpage) {
- -                      *hpage = alloc_hugepage(khugepaged_defrag());
- -                      if (unlikely(!*hpage)) {
- -                              count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
- -                              break;
- -                      }
- -                      count_vm_event(THP_COLLAPSE_ALLOC);
- -              }
- -#else
- -              if (IS_ERR(*hpage))
+ +              if (!khugepaged_prealloc_page(&hpage, &wait))
                         break;
- -#endif
+ +
+ +              cond_resched();
   
                 if (unlikely(kthread_should_stop() || freezing(current)))
                         break;
diff --cc mm/memory.c
Simple merge
diff --cc mm/mempolicy.c

index d04a8a54c294f852f55e624e3c41d1b03b7a56e9,3360a8dee5c549172ad30ae8319f08f769133aca..8a5b2fc0da5cd4ecff41294969beb0968fcb8404
--- 1/mm/mempolicy.c
--- 2/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@@ -2169,12 -2168,116 +2202,122 @@@ mpol_shared_policy_lookup(struct shared
         return pol;
   }
   
+ +static void sp_free(struct sp_node *n)
+ +{
+ +      mpol_put(n->policy);
+ +      kmem_cache_free(sn_cache, n);
+ +}
+ +
+ /**
+  * mpol_misplaced - check whether current page node is valid in policy
+  *
+  * @page   - page to be checked
+  * @vma    - vm area where page mapped
+  * @addr   - virtual address where page mapped
+  * @multi  - use multi-stage node binding
+  *
+  * Lookup current policy node id for vma,addr and "compare to" page's
+  * node id.
+  *
+  * Returns:
+  *    -1      - not misplaced, page is in the right node
+  *    node    - node id where the page should be
+  *
+  * Policy determination "mimics" alloc_page_vma().
+  * Called from fault path where we know the vma and faulting address.
+  */
+ int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+                  unsigned long addr, int multi)
+ {
+       struct mempolicy *pol;
+       struct zone *zone;
+       int curnid = page_to_nid(page);
+       unsigned long pgoff;
+       int polnid = -1;
+       int ret = -1;
+ 
+       BUG_ON(!vma);
+ 
+       pol = get_vma_policy(current, vma, addr);
+       if (!(pol->flags & MPOL_F_MOF))
+               goto out;
+ 
+       switch (pol->mode) {
+       case MPOL_INTERLEAVE:
+               BUG_ON(addr >= vma->vm_end);
+               BUG_ON(addr < vma->vm_start);
+ 
+               pgoff = vma->vm_pgoff;
+               pgoff += (addr - vma->vm_start) >> PAGE_SHIFT;
+               polnid = offset_il_node(pol, vma, pgoff);
+               break;
+ 
+       case MPOL_PREFERRED:
+               if (pol->flags & MPOL_F_LOCAL)
+                       polnid = numa_node_id();
+               else
+                       polnid = pol->v.preferred_node;
+               break;
+ 
+       case MPOL_BIND:
+               /*
+                * allows binding to multiple nodes.
+                * use current page if in policy nodemask,
+                * else select nearest allowed node, if any.
+                * If no allowed nodes, use current [!misplaced].
+                */
+               if (node_isset(curnid, pol->v.nodes))
+                       goto out;
+               (void)first_zones_zonelist(
+                               node_zonelist(numa_node_id(), GFP_HIGHUSER),
+                               gfp_zone(GFP_HIGHUSER),
+                               &pol->v.nodes, &zone);
+               polnid = zone->node;
+               break;
+ 
+       default:
+               BUG();
+       }
+ 
+       /*
+        * Multi-stage node selection is used in conjunction with a periodic
+        * migration fault to build a temporal task<->page relation. By
+        * using a two-stage filter we remove short/unlikely relations.
+        *
+        * Using P(p) ~ n_p / n_t as per frequentist probability, we can
+        * equate a task's usage of a particular page (n_p) per total usage
+        * of this page (n_t) (in a given time-span) to a probability.
+        *
+        * Our periodic faults will then sample this probability and getting
+        * the same result twice in a row, given these samples are fully
+        * independent, is then given by P(n)^2, provided our sample period
+        * is sufficiently short compared to the usage pattern.
+        *
+        * This quadric squishes small probabilities, making it less likely
+        * we act on an unlikely task<->page relation.
+        *
+        * NOTE: effectively we're using task-home-node<->page-node relations
+        * since those are the only thing we can affect.
+        *
+        * NOTE: we're using task-home-node as opposed to the current node
+        * the task might be running on, since the task-home-node is the
+        * long-term node of this task, further reducing noise. Also see
+        * task_tick_numa().
+        */
+       if (multi && (pol->flags & MPOL_F_HOME)) {
+               int last_nid = page_xchg_last_nid(page, polnid);
+               if (last_nid != polnid)
+                       goto out;
+       }
+ 
+       if (curnid != polnid)
+               ret = polnid;
+ out:
+       mpol_cond_put(pol);
+ 
+       return ret;
+ }
+ 
   static void sp_delete(struct shared_policy *sp, struct sp_node *n)
   {
         pr_debug("deleting %lx-l%lx\n", n->start, n->end);
diff --cc mm/vmstat.c
Simple merge
diff --cc scripts/kconfig/Makefile
Simple merge
diff --cc tools/perf/Makefile
Simple merge
author	Stephen Rothwell <sfr@canb.auug.org.au>
	Fri, 19 Oct 2012 00:49:37 +0000 (11:49 +1100)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Fri, 19 Oct 2012 01:16:32 +0000 (12:16 +1100)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/mips/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/acpi.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/huge_mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/init_task.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mempolicy.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm_types.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mmzone.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/mempolicy.h	patch \|	diff1 \|	\|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
mm/huge_memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/mempolicy.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmstat.c	patch \|	diff1 \|	diff2 \|	blob \| history
scripts/kconfig/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
tools/perf/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history