]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'akpm' (fixes from Andrew Morton)
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 30 Sep 2013 21:32:32 +0000 (14:32 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 30 Sep 2013 21:32:32 +0000 (14:32 -0700)
Merge misc fixes from Andrew Morton.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (22 commits)
  pidns: fix free_pid() to handle the first fork failure
  ipc,msg: prevent race with rmid in msgsnd,msgrcv
  ipc/sem.c: update sem_otime for all operations
  mm/hwpoison: fix the lack of one reference count against poisoned page
  mm/hwpoison: fix false report on 2nd attempt at page recovery
  mm/hwpoison: fix test for a transparent huge page
  mm/hwpoison: fix traversal of hugetlbfs pages to avoid printk flood
  block: change config option name for cmdline partition parsing
  mm/mlock.c: prevent walking off the end of a pagetable in no-pmd configuration
  mm: avoid reinserting isolated balloon pages into LRU lists
  arch/parisc/mm/fault.c: fix uninitialized variable usage
  include/asm-generic/vtime.h: avoid zero-length file
  nilfs2: fix issue with race condition of competition between segments for dirty blocks
  Documentation/kernel-parameters.txt: replace kernelcore with Movable
  mm/bounce.c: fix a regression where MS_SNAP_STABLE (stable pages snapshotting) was ignored
  kernel/kmod.c: check for NULL in call_usermodehelper_exec()
  ipc/sem.c: synchronize the proc interface
  ipc/sem.c: optimize sem_lock()
  ipc/sem.c: fix race in sem_lock()
  mm/compaction.c: periodically schedule when freeing pages
  ...

26 files changed:
Documentation/block/00-INDEX
Documentation/block/cmdline-partition.txt
Documentation/kernel-parameters.txt
arch/parisc/mm/fault.c
block/Kconfig
block/Makefile
block/partitions/Kconfig
block/partitions/cmdline.c
fs/binfmt_elf.c
fs/nilfs2/page.c
fs/nilfs2/segment.c
include/asm-generic/vtime.h
include/linux/balloon_compaction.h
ipc/msg.c
ipc/sem.c
kernel/kmod.c
kernel/pid.c
mm/bounce.c
mm/compaction.c
mm/hwpoison-inject.c
mm/madvise.c
mm/memory-failure.c
mm/migrate.c
mm/mlock.c
mm/page_alloc.c
mm/vmscan.c

index d18ecd827c408d0fb42a8d8a7fc669ce88c95fc2..929d9904f74b7eb94bac71e81308b0bf335c3108 100644 (file)
@@ -6,6 +6,8 @@ capability.txt
        - Generic Block Device Capability (/sys/block/<device>/capability)
 cfq-iosched.txt
        - CFQ IO scheduler tunables
+cmdline-partition.txt
+       - how to specify block device partitions on kernel command line
 data-integrity.txt
        - Block data integrity
 deadline-iosched.txt
index 2bbf4cc40c3f7f92a02df9bd42b1bdabc8a93791..525b9f6d7fb49e4c9bbd5f7ea76833dcde640e59 100644 (file)
@@ -1,9 +1,9 @@
-Embedded device command line partition
+Embedded device command line partition parsing
 =====================================================================
 
-Read block device partition table from command line.
-The partition used for fixed block device (eMMC) embedded device.
-It is no MBR, save storage space. Bootloader can be easily accessed
+Support for reading the block device partition table from the command line.
+It is typically used for fixed block (eMMC) embedded devices.
+It has no MBR, so saves storage space. Bootloader can be easily accessed
 by absolute address of data on the block device.
 Users can easily change the partition.
 
index 539a236319907739faded777280b9352a84a284f..fcbb736d55feb439c1152be71355d5ab79f8edd2 100644 (file)
@@ -480,6 +480,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        Format: <io>,<irq>,<mode>
                        See header of drivers/net/hamradio/baycom_ser_hdx.c.
 
+       blkdevparts=    Manual partition parsing of block device(s) for
+                       embedded devices based on command line input.
+                       See Documentation/block/cmdline-partition.txt
+
        boot_delay=     Milliseconds to delay each printk during boot.
                        Values larger than 10 seconds (10000) are changed to
                        no delay (0).
@@ -1357,7 +1361,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        pages. In the event, a node is too small to have both
                        kernelcore and Movable pages, kernelcore pages will
                        take priority and other nodes will have a larger number
-                       of kernelcore pages.  The Movable zone is used for the
+                       of Movable pages.  The Movable zone is used for the
                        allocation of pages that may be reclaimed or moved
                        by the page migration subsystem.  This means that
                        HugeTLB pages may not be allocated from this zone.
index d10d27a720c0d1f323c2248f01cc93193e73ca1e..00c0ed333a3d53421a161369cf1fa621c8a02d7f 100644 (file)
@@ -182,6 +182,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 
        if (user_mode(regs))
                flags |= FAULT_FLAG_USER;
+
+       acc_type = parisc_acctyp(code, regs->iir);
+
        if (acc_type & VM_WRITE)
                flags |= FAULT_FLAG_WRITE;
 retry:
@@ -196,8 +199,6 @@ retry:
 
 good_area:
 
-       acc_type = parisc_acctyp(code,regs->iir);
-
        if ((vma->vm_flags & acc_type) != acc_type)
                goto bad_area;
 
index 7f38e40fee0819a74ec6b3d10c759c07d3b04bb4..2429515c05c2ed1b6f457a60def817c33c178651 100644 (file)
@@ -99,11 +99,16 @@ config BLK_DEV_THROTTLING
 
        See Documentation/cgroups/blkio-controller.txt for more information.
 
-config CMDLINE_PARSER
+config BLK_CMDLINE_PARSER
        bool "Block device command line partition parser"
        default n
        ---help---
-       Parsing command line, get the partitions information.
+       Enabling this option allows you to specify the partition layout from
+       the kernel boot args.  This is typically of use for embedded devices
+       which don't otherwise have any standardized method for listing the
+       partitions on a block device.
+
+       See Documentation/block/cmdline-partition.txt for more information.
 
 menu "Partition Types"
 
index 4fa4be544ece94c05d0956a0c6f488c1f7a65161..671a83d063a5ba290c93efc08457eba6217ea80d 100644 (file)
@@ -18,4 +18,4 @@ obj-$(CONFIG_IOSCHED_CFQ)     += cfq-iosched.o
 
 obj-$(CONFIG_BLOCK_COMPAT)     += compat_ioctl.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY)        += blk-integrity.o
-obj-$(CONFIG_CMDLINE_PARSER)   += cmdline-parser.o
+obj-$(CONFIG_BLK_CMDLINE_PARSER)       += cmdline-parser.o
index 87a32086535d5228b513af3935d29765b763b2ec..9b29a996c3114a3d09c41abdcaaa0f3f19abe3ff 100644 (file)
@@ -263,7 +263,7 @@ config SYSV68_PARTITION
 
 config CMDLINE_PARTITION
        bool "Command line partition support" if PARTITION_ADVANCED
-       select CMDLINE_PARSER
+       select BLK_CMDLINE_PARSER
        help
-         Say Y here if you would read the partitions table from bootargs.
+         Say Y here if you want to read the partition table from bootargs.
          The format for the command line is just like mtdparts.
index 56cf4ffad51ed903128ac079d9845acafbd639a4..5141b563adf1b9a41c6b6054a8d2dcf5618da4d6 100644 (file)
@@ -2,15 +2,15 @@
  * Copyright (C) 2013 HUAWEI
  * Author: Cai Zhiyong <caizhiyong@huawei.com>
  *
- * Read block device partition table from command line.
- * The partition used for fixed block device (eMMC) embedded device.
- * It is no MBR, save storage space. Bootloader can be easily accessed
+ * Read block device partition table from the command line.
+ * Typically used for fixed block (eMMC) embedded devices.
+ * It has no MBR, so saves storage space. Bootloader can be easily accessed
  * by absolute address of data on the block device.
  * Users can easily change the partition.
  *
  * The format for the command line is just like mtdparts.
  *
- * Verbose config please reference "Documentation/block/cmdline-partition.txt"
+ * For further information, see "Documentation/block/cmdline-partition.txt"
  *
  */
 
index 100edcc5e3122323eb8f4087305e623c666eceb0..4c94a79991bb6d8ae0d2e12ae8c647e2fe22f7fe 100644 (file)
@@ -1413,7 +1413,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
  *   long file_ofs
  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
  */
-static void fill_files_note(struct memelfnote *note)
+static int fill_files_note(struct memelfnote *note)
 {
        struct vm_area_struct *vma;
        unsigned count, size, names_ofs, remaining, n;
@@ -1428,11 +1428,11 @@ static void fill_files_note(struct memelfnote *note)
        names_ofs = (2 + 3 * count) * sizeof(data[0]);
  alloc:
        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
-               goto err;
+               return -EINVAL;
        size = round_up(size, PAGE_SIZE);
        data = vmalloc(size);
        if (!data)
-               goto err;
+               return -ENOMEM;
 
        start_end_ofs = data + 2;
        name_base = name_curpos = ((char *)data) + names_ofs;
@@ -1485,7 +1485,7 @@ static void fill_files_note(struct memelfnote *note)
 
        size = name_curpos - (char *)data;
        fill_note(note, "CORE", NT_FILE, size, data);
err: ;
      return 0;
 }
 
 #ifdef CORE_DUMP_USE_REGSET
@@ -1686,8 +1686,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
        fill_auxv_note(&info->auxv, current->mm);
        info->size += notesize(&info->auxv);
 
-       fill_files_note(&info->files);
-       info->size += notesize(&info->files);
+       if (fill_files_note(&info->files) == 0)
+               info->size += notesize(&info->files);
 
        return 1;
 }
@@ -1719,7 +1719,8 @@ static int write_note_info(struct elf_note_info *info,
                        return 0;
                if (first && !writenote(&info->auxv, file, foffset))
                        return 0;
-               if (first && !writenote(&info->files, file, foffset))
+               if (first && info->files.data &&
+                               !writenote(&info->files, file, foffset))
                        return 0;
 
                for (i = 1; i < info->thread_notes; ++i)
@@ -1806,6 +1807,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
 
 struct elf_note_info {
        struct memelfnote *notes;
+       struct memelfnote *notes_files;
        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
        struct list_head thread_list;
@@ -1896,9 +1898,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 
        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
        fill_auxv_note(info->notes + 3, current->mm);
-       fill_files_note(info->notes + 4);
+       info->numnote = 4;
 
-       info->numnote = 5;
+       if (fill_files_note(info->notes + info->numnote) == 0) {
+               info->notes_files = info->notes + info->numnote;
+               info->numnote++;
+       }
 
        /* Try to dump the FPU. */
        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
@@ -1960,8 +1965,9 @@ static void free_note_info(struct elf_note_info *info)
                kfree(list_entry(tmp, struct elf_thread_status, list));
        }
 
-       /* Free data allocated by fill_files_note(): */
-       vfree(info->notes[4].data);
+       /* Free data possibly allocated by fill_files_note(): */
+       if (info->notes_files)
+               vfree(info->notes_files->data);
 
        kfree(info->prstatus);
        kfree(info->psinfo);
@@ -2044,7 +2050,7 @@ static int elf_core_dump(struct coredump_params *cprm)
        struct vm_area_struct *vma, *gate_vma;
        struct elfhdr *elf = NULL;
        loff_t offset = 0, dataoff, foffset;
-       struct elf_note_info info;
+       struct elf_note_info info = { };
        struct elf_phdr *phdr4note = NULL;
        struct elf_shdr *shdr4extnum = NULL;
        Elf_Half e_phnum;
index 0ba679866e504ec126720f3eb7b78e210703f538..da276640f7763d2463c0fa99cb832a86d243f10c 100644 (file)
@@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
        clear_buffer_nilfs_volatile(bh);
        clear_buffer_nilfs_checked(bh);
        clear_buffer_nilfs_redirected(bh);
+       clear_buffer_async_write(bh);
        clear_buffer_dirty(bh);
        if (nilfs_page_buffers_clean(page))
                __nilfs_clear_page_dirty(page);
@@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
                                        "discard block %llu, size %zu",
                                        (u64)bh->b_blocknr, bh->b_size);
                        }
+                       clear_buffer_async_write(bh);
                        clear_buffer_dirty(bh);
                        clear_buffer_nilfs_volatile(bh);
                        clear_buffer_nilfs_checked(bh);
index bd88a7461063bba02f31c6f873902c9c8e87e943..9f6b486b6c01a0e6711ca5930e78474d0739e35e 100644 (file)
@@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
 
                bh = head = page_buffers(page);
                do {
-                       if (!buffer_dirty(bh))
+                       if (!buffer_dirty(bh) || buffer_async_write(bh))
                                continue;
                        get_bh(bh);
                        list_add_tail(&bh->b_assoc_buffers, listp);
@@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        bh = head = page_buffers(pvec.pages[i]);
                        do {
-                               if (buffer_dirty(bh)) {
+                               if (buffer_dirty(bh) &&
+                                               !buffer_async_write(bh)) {
                                        get_bh(bh);
                                        list_add_tail(&bh->b_assoc_buffers,
                                                      listp);
@@ -1579,6 +1580,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
 
                list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
                                    b_assoc_buffers) {
+                       set_buffer_async_write(bh);
                        if (bh->b_page != bd_page) {
                                if (bd_page) {
                                        lock_page(bd_page);
@@ -1592,6 +1594,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
 
                list_for_each_entry(bh, &segbuf->sb_payload_buffers,
                                    b_assoc_buffers) {
+                       set_buffer_async_write(bh);
                        if (bh == segbuf->sb_super_root) {
                                if (bh->b_page != bd_page) {
                                        lock_page(bd_page);
@@ -1677,6 +1680,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
        list_for_each_entry(segbuf, logs, sb_list) {
                list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
                                    b_assoc_buffers) {
+                       clear_buffer_async_write(bh);
                        if (bh->b_page != bd_page) {
                                if (bd_page)
                                        end_page_writeback(bd_page);
@@ -1686,6 +1690,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
 
                list_for_each_entry(bh, &segbuf->sb_payload_buffers,
                                    b_assoc_buffers) {
+                       clear_buffer_async_write(bh);
                        if (bh == segbuf->sb_super_root) {
                                if (bh->b_page != bd_page) {
                                        end_page_writeback(bd_page);
@@ -1755,6 +1760,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
                                    b_assoc_buffers) {
                        set_buffer_uptodate(bh);
                        clear_buffer_dirty(bh);
+                       clear_buffer_async_write(bh);
                        if (bh->b_page != bd_page) {
                                if (bd_page)
                                        end_page_writeback(bd_page);
@@ -1776,6 +1782,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
                                    b_assoc_buffers) {
                        set_buffer_uptodate(bh);
                        clear_buffer_dirty(bh);
+                       clear_buffer_async_write(bh);
                        clear_buffer_delay(bh);
                        clear_buffer_nilfs_volatile(bh);
                        clear_buffer_nilfs_redirected(bh);
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b1a49677fe2540e93aae3b40b1c235949ac756cf 100644 (file)
@@ -0,0 +1 @@
+/* no content, but patch(1) dislikes empty files */
index f7f1d7169b11c332612ba09a1663141870e08849..089743ade734fc564e18bfa7be35608da68d6fde 100644 (file)
@@ -158,6 +158,26 @@ static inline bool balloon_page_movable(struct page *page)
        return false;
 }
 
+/*
+ * isolated_balloon_page - identify an isolated balloon page on private
+ *                        compaction/migration page lists.
+ *
+ * After a compaction thread isolates a balloon page for migration, it raises
+ * the page refcount to prevent concurrent compaction threads from re-isolating
+ * the same page. For that reason putback_movable_pages(), or other routines
+ * that need to identify isolated balloon pages on private pagelists, cannot
+ * rely on balloon_page_movable() to accomplish the task.
+ */
+static inline bool isolated_balloon_page(struct page *page)
+{
+       /* Already isolated balloon pages, by default, have a raised refcount */
+       if (page_flags_cleared(page) && !page_mapped(page) &&
+           page_count(page) >= 2)
+               return __is_movable_balloon_page(page);
+
+       return false;
+}
+
 /*
  * balloon_page_insert - insert a page into the balloon's page list and make
  *                      the page->mapping assignment accordingly.
@@ -243,6 +263,11 @@ static inline bool balloon_page_movable(struct page *page)
        return false;
 }
 
+static inline bool isolated_balloon_page(struct page *page)
+{
+       return false;
+}
+
 static inline bool balloon_page_isolate(struct page *page)
 {
        return false;
index 9e4310c546ae93fa85f3932675bcd53ae33ece7a..558aa91186b6ced1a27b1e05b65c5ee129e0a175 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -695,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                if (ipcperms(ns, &msq->q_perm, S_IWUGO))
                        goto out_unlock0;
 
+               /* raced with RMID? */
+               if (msq->q_perm.deleted) {
+                       err = -EIDRM;
+                       goto out_unlock0;
+               }
+
                err = security_msg_queue_msgsnd(msq, msg, msgflg);
                if (err)
                        goto out_unlock0;
@@ -901,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
                        goto out_unlock1;
 
                ipc_lock_object(&msq->q_perm);
+
+               /* raced with RMID? */
+               if (msq->q_perm.deleted) {
+                       msg = ERR_PTR(-EIDRM);
+                       goto out_unlock0;
+               }
+
                msg = find_msg(msq, &msgtyp, mode);
                if (!IS_ERR(msg)) {
                        /*
index 19c8b980d1feff430d82297a53be528cea801c42..8c4f59b0204a2821811a4b28fd48e0ce6163d987 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -252,71 +252,113 @@ static void sem_rcu_free(struct rcu_head *head)
        ipc_rcu_free(head);
 }
 
+/*
+ * Wait until all currently ongoing simple ops have completed.
+ * Caller must own sem_perm.lock.
+ * New simple ops cannot start, because simple ops first check
+ * that sem_perm.lock is free.
+ * that a) sem_perm.lock is free and b) complex_count is 0.
+ */
+static void sem_wait_array(struct sem_array *sma)
+{
+       int i;
+       struct sem *sem;
+
+       if (sma->complex_count)  {
+               /* The thread that increased sma->complex_count waited on
+                * all sem->lock locks. Thus we don't need to wait again.
+                */
+               return;
+       }
+
+       for (i = 0; i < sma->sem_nsems; i++) {
+               sem = sma->sem_base + i;
+               spin_unlock_wait(&sem->lock);
+       }
+}
+
 /*
  * If the request contains only one semaphore operation, and there are
  * no complex transactions pending, lock only the semaphore involved.
  * Otherwise, lock the entire semaphore array, since we either have
  * multiple semaphores in our own semops, or we need to look at
  * semaphores from other pending complex operations.
- *
- * Carefully guard against sma->complex_count changing between zero
- * and non-zero while we are spinning for the lock. The value of
- * sma->complex_count cannot change while we are holding the lock,
- * so sem_unlock should be fine.
- *
- * The global lock path checks that all the local locks have been released,
- * checking each local lock once. This means that the local lock paths
- * cannot start their critical sections while the global lock is held.
  */
 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
                              int nsops)
 {
-       int locknum;
- again:
-       if (nsops == 1 && !sma->complex_count) {
-               struct sem *sem = sma->sem_base + sops->sem_num;
+       struct sem *sem;
 
-               /* Lock just the semaphore we are interested in. */
-               spin_lock(&sem->lock);
+       if (nsops != 1) {
+               /* Complex operation - acquire a full lock */
+               ipc_lock_object(&sma->sem_perm);
 
-               /*
-                * If sma->complex_count was set while we were spinning,
-                * we may need to look at things we did not lock here.
+               /* And wait until all simple ops that are processed
+                * right now have dropped their locks.
                 */
-               if (unlikely(sma->complex_count)) {
-                       spin_unlock(&sem->lock);
-                       goto lock_array;
-               }
+               sem_wait_array(sma);
+               return -1;
+       }
 
+       /*
+        * Only one semaphore affected - try to optimize locking.
+        * The rules are:
+        * - optimized locking is possible if no complex operation
+        *   is either enqueued or processed right now.
+        * - The test for enqueued complex ops is simple:
+        *      sma->complex_count != 0
+        * - Testing for complex ops that are processed right now is
+        *   a bit more difficult. Complex ops acquire the full lock
+        *   and first wait that the running simple ops have completed.
+        *   (see above)
+        *   Thus: If we own a simple lock and the global lock is free
+        *      and complex_count is now 0, then it will stay 0 and
+        *      thus just locking sem->lock is sufficient.
+        */
+       sem = sma->sem_base + sops->sem_num;
+
+       if (sma->complex_count == 0) {
                /*
-                * Another process is holding the global lock on the
-                * sem_array; we cannot enter our critical section,
-                * but have to wait for the global lock to be released.
+                * It appears that no complex operation is around.
+                * Acquire the per-semaphore lock.
                 */
-               if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
-                       spin_unlock(&sem->lock);
-                       spin_unlock_wait(&sma->sem_perm.lock);
-                       goto again;
+               spin_lock(&sem->lock);
+
+               /* Then check that the global lock is free */
+               if (!spin_is_locked(&sma->sem_perm.lock)) {
+                       /* spin_is_locked() is not a memory barrier */
+                       smp_mb();
+
+                       /* Now repeat the test of complex_count:
+                        * It can't change anymore until we drop sem->lock.
+                        * Thus: if is now 0, then it will stay 0.
+                        */
+                       if (sma->complex_count == 0) {
+                               /* fast path successful! */
+                               return sops->sem_num;
+                       }
                }
+               spin_unlock(&sem->lock);
+       }
 
-               locknum = sops->sem_num;
+       /* slow path: acquire the full lock */
+       ipc_lock_object(&sma->sem_perm);
+
+       if (sma->complex_count == 0) {
+               /* False alarm:
+                * There is no complex operation, thus we can switch
+                * back to the fast path.
+                */
+               spin_lock(&sem->lock);
+               ipc_unlock_object(&sma->sem_perm);
+               return sops->sem_num;
        } else {
-               int i;
-               /*
-                * Lock the semaphore array, and wait for all of the
-                * individual semaphore locks to go away.  The code
-                * above ensures no new single-lock holders will enter
-                * their critical section while the array lock is held.
+               /* Not a false alarm, thus complete the sequence for a
+                * full lock.
                 */
- lock_array:
-               ipc_lock_object(&sma->sem_perm);
-               for (i = 0; i < sma->sem_nsems; i++) {
-                       struct sem *sem = sma->sem_base + i;
-                       spin_unlock_wait(&sem->lock);
-               }
-               locknum = -1;
+               sem_wait_array(sma);
+               return -1;
        }
-       return locknum;
 }
 
 static inline void sem_unlock(struct sem_array *sma, int locknum)
@@ -875,6 +917,24 @@ again:
        return semop_completed;
 }
 
+/**
+ * set_semotime(sma, sops) - set sem_otime
+ * @sma: semaphore array
+ * @sops: operations that modified the array, may be NULL
+ *
+ * sem_otime is replicated to avoid cache line trashing.
+ * This function sets one instance to the current time.
+ */
+static void set_semotime(struct sem_array *sma, struct sembuf *sops)
+{
+       if (sops == NULL) {
+               sma->sem_base[0].sem_otime = get_seconds();
+       } else {
+               sma->sem_base[sops[0].sem_num].sem_otime =
+                                                       get_seconds();
+       }
+}
+
 /**
  * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
  * @sma: semaphore array
@@ -925,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
                        }
                }
        }
-       if (otime) {
-               if (sops == NULL) {
-                       sma->sem_base[0].sem_otime = get_seconds();
-               } else {
-                       sma->sem_base[sops[0].sem_num].sem_otime =
-                                                               get_seconds();
-               }
-       }
+       if (otime)
+               set_semotime(sma, sops);
 }
 
-
 /* The following counts are associated to each semaphore:
  *   semncnt        number of tasks waiting on semval being nonzero
  *   semzcnt        number of tasks waiting on semval being zero
@@ -1797,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
        error = perform_atomic_semop(sma, sops, nsops, un,
                                        task_tgid_vnr(current));
-       if (error <= 0) {
-               if (alter && error == 0)
+       if (error == 0) {
+               /* If the operation was successful, then do
+                * the required updates.
+                */
+               if (alter)
                        do_smart_update(sma, sops, nsops, 1, &tasks);
-
-               goto out_unlock_free;
+               else
+                       set_semotime(sma, sops);
        }
+       if (error <= 0)
+               goto out_unlock_free;
 
        /* We need to sleep on this operation, so we put the current
         * task into the pending queue and go to sleep.
@@ -2061,6 +2119,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
        struct sem_array *sma = it;
        time_t sem_otime;
 
+       /*
+        * The proc interface isn't aware of sem_lock(), it calls
+        * ipc_lock_object() directly (in sysvipc_find_ipc).
+        * In order to stay compatible with sem_lock(), we must wait until
+        * all simple semop() calls have left their critical regions.
+        */
+       sem_wait_array(sma);
+
        sem_otime = get_semotime(sma);
 
        return seq_printf(s,
index fb326365b69466158b03b726e53c4fe15448fa89..b086006c59e7c6957a51984a3ec101ea2db8525d 100644 (file)
@@ -571,6 +571,10 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
        DECLARE_COMPLETION_ONSTACK(done);
        int retval = 0;
 
+       if (!sub_info->path) {
+               call_usermodehelper_freeinfo(sub_info);
+               return -EINVAL;
+       }
        helper_lock();
        if (!khelper_wq || usermodehelper_disabled) {
                retval = -EBUSY;
index ebe5e80b10f8495a3d6459e0203821f987237df0..9b9a26698144e126486e162955e49561514b1ce8 100644 (file)
@@ -273,6 +273,11 @@ void free_pid(struct pid *pid)
                         */
                        wake_up_process(ns->child_reaper);
                        break;
+               case PIDNS_HASH_ADDING:
+                       /* Handle a fork failure of the first process */
+                       WARN_ON(ns->child_reaper);
+                       ns->nr_hashed = 0;
+                       /* fall through */
                case 0:
                        schedule_work(&ns->proc_work);
                        break;
index c9f0a4339a7dafc2ba7295e49ad8fcdda8fa13de..5a7d58fb883bfa1c4917e48d251cd132c8d9baf9 100644 (file)
@@ -204,6 +204,8 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        struct bio_vec *to, *from;
        unsigned i;
 
+       if (force)
+               goto bounce;
        bio_for_each_segment(from, *bio_orig, i)
                if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
                        goto bounce;
index c43789388cd8667536bfd9644a5bbe2cd0d35f3e..b5326b141a251905a96cd5cd995063a29ca20f91 100644 (file)
@@ -677,6 +677,13 @@ static void isolate_freepages(struct zone *zone,
                                        pfn -= pageblock_nr_pages) {
                unsigned long isolated;
 
+               /*
+                * This can iterate a massively long zone without finding any
+                * suitable migration targets, so periodically check if we need
+                * to schedule.
+                */
+               cond_resched();
+
                if (!pfn_valid(pfn))
                        continue;
 
index afc2daa91c609dd8aab70f50fb58fd9100ed8326..4c84678371eb5b5905cc8c4386b512ec57e4f5e3 100644 (file)
@@ -20,8 +20,6 @@ static int hwpoison_inject(void *data, u64 val)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!hwpoison_filter_enable)
-               goto inject;
        if (!pfn_valid(pfn))
                return -ENXIO;
 
@@ -33,6 +31,9 @@ static int hwpoison_inject(void *data, u64 val)
        if (!get_page_unless_zero(hpage))
                return 0;
 
+       if (!hwpoison_filter_enable)
+               goto inject;
+
        if (!PageLRU(p) && !PageHuge(p))
                shake_page(p, 0);
        /*
index 6975bc812542d2c13642363d928005f355199c54..539eeb96b323bf649f83783e0dddcb4f907e1d6e 100644 (file)
@@ -343,10 +343,11 @@ static long madvise_remove(struct vm_area_struct *vma,
  */
 static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
 {
+       struct page *p;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-       for (; start < end; start += PAGE_SIZE) {
-               struct page *p;
+       for (; start < end; start += PAGE_SIZE <<
+                               compound_order(compound_head(p))) {
                int ret;
 
                ret = get_user_pages_fast(start, 1, 0, &p);
index 947ed5413279261a830eeaeb42d9392aea0f8fa8..bf3351b5115e54915a3d7eaa718d10a9771b2c5f 100644 (file)
@@ -1114,8 +1114,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
                         * shake_page could have turned it free.
                         */
                        if (is_free_buddy_page(p)) {
-                               action_result(pfn, "free buddy, 2nd try",
-                                               DELAYED);
+                               if (flags & MF_COUNT_INCREASED)
+                                       action_result(pfn, "free buddy", DELAYED);
+                               else
+                                       action_result(pfn, "free buddy, 2nd try", DELAYED);
                                return 0;
                        }
                        action_result(pfn, "non LRU", IGNORED);
@@ -1349,7 +1351,7 @@ int unpoison_memory(unsigned long pfn)
         * worked by memory_failure() and the page lock is not held yet.
         * In such case, we yield to memory_failure() and make unpoison fail.
         */
-       if (PageTransHuge(page)) {
+       if (!PageHuge(page) && PageTransHuge(page)) {
                pr_info("MCE: Memory failure is now running on %#lx\n", pfn);
                        return 0;
        }
index 9c8d5f59d30bb87e63c9990c085eb646e69b4ed7..a26bccd44ccb0a907662c08135399462bd816b9c 100644 (file)
@@ -107,7 +107,7 @@ void putback_movable_pages(struct list_head *l)
                list_del(&page->lru);
                dec_zone_page_state(page, NR_ISOLATED_ANON +
                                page_is_file_cache(page));
-               if (unlikely(balloon_page_movable(page)))
+               if (unlikely(isolated_balloon_page(page)))
                        balloon_page_putback(page);
                else
                        putback_lru_page(page);
index 67ba6da7d0e311b1b45007bbd0e37bd78db0f3a8..d480cd6fc475854259bdd51021d5125dbdfbe479 100644 (file)
@@ -379,10 +379,14 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
 
        /*
         * Initialize pte walk starting at the already pinned page where we
-        * are sure that there is a pte.
+        * are sure that there is a pte, as it was pinned under the same
+        * mmap_sem write op.
         */
        pte = get_locked_pte(vma->vm_mm, start, &ptl);
-       end = min(end, pmd_addr_end(start, end));
+       /* Make sure we do not cross the page table boundary */
+       end = pgd_addr_end(start, end);
+       end = pud_addr_end(start, end);
+       end = pmd_addr_end(start, end);
 
        /* The page next to the pinned page is the first we will try to get */
        start += PAGE_SIZE;
index 0ee638f76ebe584cd40d7541bac886b96b03162e..dd886fac451ab6ab7d6a3132fd2587c10538f300 100644 (file)
@@ -6366,10 +6366,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
                list_del(&page->lru);
                rmv_page_order(page);
                zone->free_area[order].nr_free--;
-#ifdef CONFIG_HIGHMEM
-               if (PageHighMem(page))
-                       totalhigh_pages -= 1 << order;
-#endif
                for (i = 0; i < (1 << order); i++)
                        SetPageReserved((page+i));
                pfn += (1 << order);
index beb35778c69f147e47c30b44ee151953c68c4969..53f2f82f83ae0d16bf19646cdb5b3bce5fc4e4cf 100644 (file)
@@ -48,6 +48,7 @@
 #include <asm/div64.h>
 
 #include <linux/swapops.h>
+#include <linux/balloon_compaction.h>
 
 #include "internal.h"
 
@@ -1113,7 +1114,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
        LIST_HEAD(clean_pages);
 
        list_for_each_entry_safe(page, next, page_list, lru) {
-               if (page_is_file_cache(page) && !PageDirty(page)) {
+               if (page_is_file_cache(page) && !PageDirty(page) &&
+                   !isolated_balloon_page(page)) {
                        ClearPageActive(page);
                        list_move(&page->lru, &clean_pages);
                }