#define vxtime_lock() do {} while (0)
#define vxtime_unlock() do {} while (0)
+/* This attribute is used in include/linux/jiffies.h alongside with
+ * __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp
+ * for frv does not contain another section specification.
+ */
+#define __jiffy_arch_data __attribute__((__section__(".data")))
+
#endif
rc = write_sync_reg(SCR_HOST_TO_READER_START, dev);
if (rc <= 0) {
- DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc);
+ DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc);
DEBUGP(2, dev, "<- cm4040_write (failed)\n");
if (rc == -ERESTARTSYS)
return rc;
for (i = 0; i < bytes_to_write; i++) {
rc = wait_for_bulk_out_ready(dev);
if (rc <= 0) {
- DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2Zx\n",
+ DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2zx\n",
rc);
DEBUGP(2, dev, "<- cm4040_write (failed)\n");
if (rc == -ERESTARTSYS)
rc = write_sync_reg(SCR_HOST_TO_READER_DONE, dev);
if (rc <= 0) {
- DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc);
+ DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc);
DEBUGP(2, dev, "<- cm4040_write (failed)\n");
if (rc == -ERESTARTSYS)
return rc;
goto out;
}
+ /*
+ * It is possible, particularly with mixed reads & writes to private
+ * mappings, that we have raced with a PMD fault that overlaps with
+ * the PTE we need to set up. If so just return and the fault will be
+ * retried.
+ */
+ if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
+ vmf_ret = VM_FAULT_NOPAGE;
+ goto unlock_entry;
+ }
+
/*
* Note that we don't bother to use iomap_apply here: DAX required
* the file system block size to be equal the page size, which means
if (IS_ERR(entry))
goto fallback;
+ /*
+ * It is possible, particularly with mixed reads & writes to private
+ * mappings, that we have raced with a PTE fault that overlaps with
+ * the PMD we need to set up. If so just return and the fault will be
+ * retried.
+ */
+ if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
+ !pmd_devmap(*vmf->pmd)) {
+ result = 0;
+ goto unlock_entry;
+ }
+
/*
* Note that we don't use iomap_apply here. We aren't doing I/O, only
* setting up a mapping, so really we're using iomap_begin() as a way
#define ___GFP_WRITE 0x800000u
#define ___GFP_KSWAPD_RECLAIM 0x1000000u
#ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP 0x4000000u
+#define ___GFP_NOLOCKDEP 0x2000000u
#else
#define ___GFP_NOLOCKDEP 0
#endif
/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
+#ifndef __jiffy_arch_data
+#define __jiffy_arch_data
+#endif
+
/*
* The 64-bit value is not atomic - you MUST NOT read it
* without sampling the sequence number in jiffies_lock.
* get_jiffies_64() will do this for you as appropriate.
*/
extern u64 __cacheline_aligned_in_smp jiffies_64;
-extern unsigned long volatile __cacheline_aligned_in_smp jiffies;
+extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void);
}
#endif
+extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
+ phys_addr_t end_addr);
#else
static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
{
return 0;
}
+static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
+ phys_addr_t end_addr)
+{
+ return 0;
+}
+
#endif /* CONFIG_HAVE_MEMBLOCK */
#endif /* __KERNEL__ */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
#define FOLL_COW 0x4000 /* internal GUP flag */
+static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
+{
+ if (vm_fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
+ return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
+ if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
+ return -EFAULT;
+ return 0;
+}
+
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
* is the first PFN that needs to be initialised.
*/
unsigned long first_deferred_pfn;
+ unsigned long static_init_size;
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
ret = handle_mm_fault(vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
- if (ret & VM_FAULT_OOM)
- return -ENOMEM;
- if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
- return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
- if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
- return -EFAULT;
+ int err = vm_fault_to_errno(ret, *flags);
+
+ if (err)
+ return err;
BUG();
}
ret = handle_mm_fault(vma, address, fault_flags);
major |= ret & VM_FAULT_MAJOR;
if (ret & VM_FAULT_ERROR) {
- if (ret & VM_FAULT_OOM)
- return -ENOMEM;
- if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
- return -EHWPOISON;
- if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
- return -EFAULT;
+ int err = vm_fault_to_errno(ret, 0);
+
+ if (err)
+ return err;
BUG();
}
}
ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
if (ret & VM_FAULT_ERROR) {
+ int err = vm_fault_to_errno(ret, flags);
+
+ if (err)
+ return err;
+
remainder = 0;
break;
}
goto out;
if (PageTransCompound(page)) {
- err = split_huge_page(page);
- if (err)
+ if (split_huge_page(page))
goto out_unlock;
}
}
}
+extern unsigned long __init_memblock
+memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
+{
+ struct memblock_region *rgn;
+ unsigned long size = 0;
+ int idx;
+
+ for_each_memblock_type((&memblock.reserved), rgn) {
+ phys_addr_t start, end;
+
+ if (rgn->base + rgn->size < start_addr)
+ continue;
+ if (rgn->base > end_addr)
+ continue;
+
+ start = rgn->base;
+ end = start + rgn->size;
+ size += end - start;
+ }
+
+ return size;
+}
+
void __init_memblock __memblock_dump_all(void)
{
pr_info("MEMBLOCK configuration:\n");
if (ret) {
pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n",
pfn, ret, page->flags, &page->flags);
- /*
- * We know that soft_offline_huge_page() tries to migrate
- * only one hugepage pointed to by hpage, so we need not
- * run through the pagelist here.
- */
- putback_active_hugepage(hpage);
+ if (!list_empty(&pagelist))
+ putback_movable_pages(&pagelist);
if (ret > 0)
ret = -EIO;
} else {
return ret;
}
+/*
+ * The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
+ * If we check pmd_trans_unstable() first we will trip the bad_pmd() check
+ * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
+ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
+ */
+static int pmd_devmap_trans_unstable(pmd_t *pmd)
+{
+ return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
+}
+
static int pte_alloc_one_map(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
map_pte:
/*
* If a huge pmd materialized under us just retry later. Use
- * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
- * didn't become pmd_trans_huge under us and then back to pmd_none, as
- * a result of MADV_DONTNEED running immediately after a huge pmd fault
- * in a different thread of this mm, in turn leading to a misleading
- * pmd_trans_huge() retval. All we have to ensure is that it is a
- * regular pmd that we can walk with pte_offset_map() and we can do that
- * through an atomic read in C, which is what pmd_trans_unstable()
- * provides.
+ * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
+ * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
+ * under us and then back to pmd_none, as a result of MADV_DONTNEED
+ * running immediately after a huge pmd fault in a different thread of
+ * this mm, in turn leading to a misleading pmd_trans_huge() retval.
+ * All we have to ensure is that it is a regular pmd that we can walk
+ * with pte_offset_map() and we can do that through an atomic read in
+ * C, which is what pmd_trans_unstable() provides.
*/
- if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+ if (pmd_devmap_trans_unstable(vmf->pmd))
return VM_FAULT_NOPAGE;
+ /*
+ * At this point we know that our vmf->pmd points to a page of ptes
+ * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
+ * for the duration of the fault. If a racing MADV_DONTNEED runs and
+ * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
+ * be valid and we will re-check to make sure the vmf->pte isn't
+ * pte_none() under vmf->ptl protection when we return to
+ * alloc_set_pte().
+ */
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
return 0;
vmf->pte = NULL;
} else {
/* See comment in pte_alloc_one_map() */
- if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+ if (pmd_devmap_trans_unstable(vmf->pmd))
return 0;
/*
* A regular pmd is established and it can't morph into a huge
{
int i;
int nr = pagevec_count(pvec);
- int delta_munlocked;
+ int delta_munlocked = -nr;
struct pagevec pvec_putback;
int pgrescued = 0;
continue;
else
__munlock_isolation_failed(page);
+ } else {
+ delta_munlocked++;
}
/*
pagevec_add(&pvec_putback, pvec->pages[i]);
pvec->pages[i] = NULL;
}
- delta_munlocked = -nr + pagevec_count(&pvec_putback);
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
spin_unlock_irq(zone_lru_lock(zone));
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
static inline void reset_deferred_meminit(pg_data_t *pgdat)
{
+ unsigned long max_initialise;
+ unsigned long reserved_lowmem;
+
+ /*
+ * Initialise at least 2G of a node but also take into account that
+ * two large system hashes that can take up 1GB for 0.25TB/node.
+ */
+ max_initialise = max(2UL << (30 - PAGE_SHIFT),
+ (pgdat->node_spanned_pages >> 8));
+
+ /*
+ * Compensate the all the memblock reservations (e.g. crash kernel)
+ * from the initial estimation to make sure we will initialize enough
+ * memory to boot.
+ */
+ reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
+ pgdat->node_start_pfn + max_initialise);
+ max_initialise += reserved_lowmem;
+
+ pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
pgdat->first_deferred_pfn = ULONG_MAX;
}
unsigned long pfn, unsigned long zone_end,
unsigned long *nr_initialised)
{
- unsigned long max_initialise;
-
/* Always populate low zones for address-contrained allocations */
if (zone_end < pgdat_end_pfn(pgdat))
return true;
- /*
- * Initialise at least 2G of a node but also take into account that
- * two large system hashes that can take up 1GB for 0.25TB/node.
- */
- max_initialise = max(2UL << (30 - PAGE_SHIFT),
- (pgdat->node_spanned_pages >> 8));
-
(*nr_initialised)++;
- if ((*nr_initialised > max_initialise) &&
+ if ((*nr_initialised > pgdat->static_init_size) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
pgdat->first_deferred_pfn = pfn;
return false;
goto got_pg;
/* Avoid allocations with no watermarks from looping endlessly */
- if (test_thread_flag(TIF_MEMDIE))
+ if (test_thread_flag(TIF_MEMDIE) &&
+ (alloc_flags == ALLOC_NO_WATERMARKS ||
+ (gfp_mask & __GFP_NOMEMALLOC)))
goto nopage;
/* Retry as long as the OOM killer is making progress */
/* pg_data_t should be reset to zero when it's allocated */
WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
- reset_deferred_meminit(pgdat);
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
pgdat->per_cpu_nodestats = NULL;
(unsigned long)pgdat->node_mem_map);
#endif
+ reset_deferred_meminit(pgdat);
free_area_init_core(pgdat);
}
char mbuf[64];
char *buf;
struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+ ssize_t len;
if (!attr || !attr->store || !attr->show)
continue;
buf = buffer;
}
- attr->show(root_cache, buf);
- attr->store(s, buf, strlen(buf));
+ len = attr->show(root_cache, buf);
+ if (len > 0)
+ attr->store(s, buf, len);
}
if (buffer)
WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
/*
- * Make sure that larger requests are not too disruptive - no OOM
- * killer and no allocation failure warnings as we have a fallback
+ * We want to attempt a large physically contiguous block first because
+ * it is less likely to fragment multiple larger blocks and therefore
+ * contribute to a long term fragmentation less than vmalloc fallback.
+ * However make sure that larger requests are not too disruptive - no
+ * OOM killer and no allocation failure warnings as we have a fallback.
*/
if (size > PAGE_SIZE) {
kmalloc_flags |= __GFP_NOWARN;
super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA)
def invoke(self, arg, from_tty):
- log_buf_addr = int(str(gdb.parse_and_eval("log_buf")).split()[0], 16)
- log_first_idx = int(gdb.parse_and_eval("log_first_idx"))
- log_next_idx = int(gdb.parse_and_eval("log_next_idx"))
- log_buf_len = int(gdb.parse_and_eval("log_buf_len"))
+ log_buf_addr = int(str(gdb.parse_and_eval(
+ "'printk.c'::log_buf")).split()[0], 16)
+ log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
+ log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
+ log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
inf = gdb.inferiors()[0]
start = log_buf_addr + log_first_idx
endchoice
config INITRAMFS_COMPRESSION
+ depends on INITRAMFS_SOURCE!=""
string
default "" if INITRAMFS_COMPRESSION_NONE
default ".gz" if INITRAMFS_COMPRESSION_GZIP