Merge branch 'master' into next

author James Morris <jmorris@namei.org>

Fri, 14 Nov 2008 00:29:12 +0000 (11:29 +1100)

committer James Morris <jmorris@namei.org>

Fri, 14 Nov 2008 00:29:12 +0000 (11:29 +1100)
author James Morris <jmorris@namei.org>
Fri, 14 Nov 2008 00:29:12 +0000 (11:29 +1100)
committer James Morris <jmorris@namei.org>
Fri, 14 Nov 2008 00:29:12 +0000 (11:29 +1100)
diff --combined Documentation/kernel-parameters.txt

index 784443acca9c7307f7e44b174ef713853ac21557,c600c4ffc6573a1b8cbf7ae811c31a047dbb1cfc..b3b82f92f1dd523c6537a6871df74369d6a8a411
--- 1/Documentation/kernel-parameters.txt
--- 2/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -198,59 -198,42 +198,42 @@@ and is between 256 and 4096 characters
                         that require a timer override, but don't have
                         HPET
   
-       acpi.debug_layer=       [HW,ACPI]
+       acpi_backlight= [HW,ACPI]
+                       acpi_backlight=vendor
+                       acpi_backlight=video
+                       If set to vendor, prefer vendor specific driver
+                       (e.g. thinkpad_acpi, sony_acpi, etc.) instead
+                       of the ACPI video.ko driver.
+ 
+       acpi_display_output=    [HW,ACPI]
+                       acpi_display_output=vendor
+                       acpi_display_output=video
+                       See above.
+ 
+       acpi.debug_layer=       [HW,ACPI,ACPI_DEBUG]
+       acpi.debug_level=       [HW,ACPI,ACPI_DEBUG]
                         Format: <int>
-                       Each bit of the <int> indicates an ACPI debug layer,
-                       1: enable, 0: disable. It is useful for boot time
-                       debugging. After system has booted up, it can be set
-                       via /sys/module/acpi/parameters/debug_layer.
-                       CONFIG_ACPI_DEBUG must be enabled for this to produce any output.
-                       Available bits (add the numbers together) to enable debug output
-                       for specific parts of the ACPI subsystem:
-                       0x01 utilities 0x02 hardware 0x04 events 0x08 tables
-                       0x10 namespace 0x20 parser 0x40 dispatcher
-                       0x80 executer 0x100 resources 0x200 acpica debugger
-                       0x400 os services 0x800 acpica disassembler.
-                       The number can be in decimal or prefixed with 0x in hex.
-                       Warning: Many of these options can produce a lot of
-                       output and make your system unusable. Be very careful.
- 
-       acpi.debug_level=       [HW,ACPI]
-                       Format: <int>
-                       Each bit of the <int> indicates an ACPI debug level,
-                       which corresponds to the level in an ACPI_DEBUG_PRINT
-                       statement.  After system has booted up, this mask
-                       can be set via /sys/module/acpi/parameters/debug_level.
- 
-                       CONFIG_ACPI_DEBUG must be enabled for this to produce
-                       any output.  The number can be in decimal or prefixed
-                       with 0x in hex.  Some of these options produce so much
-                       output that the system is unusable.
- 
-                       The following global components are defined by the
-                       ACPI CA:
-                              0x01 error
-                              0x02 warn
-                              0x04 init
-                              0x08 debug object
-                              0x10 info
-                              0x20 init names
-                              0x40 parse
-                              0x80 load
-                             0x100 dispatch
-                             0x200 execute
-                             0x400 names
-                             0x800 operation region
-                            0x1000 bfield
-                            0x2000 tables
-                            0x4000 values
-                            0x8000 objects
-                           0x10000 resources
-                           0x20000 user requests
-                           0x40000 package
-                       The number can be in decimal or prefixed with 0x in hex.
-                       Warning: Many of these options can produce a lot of
-                       output and make your system unusable. Be very careful.
+                       CONFIG_ACPI_DEBUG must be enabled to produce any ACPI
+                       debug output.  Bits in debug_layer correspond to a
+                       _COMPONENT in an ACPI source file, e.g.,
+                           #define _COMPONENT ACPI_PCI_COMPONENT
+                       Bits in debug_level correspond to a level in
+                       ACPI_DEBUG_PRINT statements, e.g.,
+                           ACPI_DEBUG_PRINT((ACPI_DB_INFO, ...
+                       See Documentation/acpi/debug.txt for more information
+                       about debug layers and levels.
+ 
+                       Enable AML "Debug" output, i.e., stores to the Debug
+                       object while interpreting AML:
+                           acpi.debug_layer=0xffffffff acpi.debug_level=0x2
+                       Enable PCI/PCI interrupt routing info messages:
+                           acpi.debug_layer=0x400000 acpi.debug_level=0x4
+                       Enable all messages related to ACPI hardware:
+                           acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
+ 
+                       Some values produce so much output that the system is
+                       unusable.  The "log_buf_len" parameter may be useful
+                       if you need to capture more output.
   
         acpi.power_nocheck=     [HW,ACPI]
                         Format: 1/0 enable/disable the check of power state.
@@@ -995,13 -978,15 +978,15 @@@
                         Format:
                         <cpu number>,...,<cpu number>
                         or
-                       <cpu number>-<cpu number>  (must be a positive range in ascending order)
+                       <cpu number>-<cpu number>
+                       (must be a positive range in ascending order)
                         or a mixture
                         <cpu number>,...,<cpu number>-<cpu number>
+ 
                         This option can be used to specify one or more CPUs
                         to isolate from the general SMP balancing and scheduling
-                       algorithms. The only way to move a process onto or off
-                       an "isolated" CPU is via the CPU affinity syscalls.
+                       algorithms. You can move a process onto or off an
+                       "isolated" CPU via the CPU affinity syscalls or cpuset.
                         <cpu number> begins at 0 and the maximum value is
                         "number of CPUs in system - 1".
   
@@@ -1459,10 -1444,6 +1444,10 @@@
                         instruction doesn't work correctly and not to
                         use it.
   
+ +      no_file_caps    Tells the kernel not to honor file capabilities.  The
+ +                      only way then for a file to be executed with privilege
+ +                      is to be setuid root or executed by root.
+ +
         nohalt          [IA-64] Tells the kernel not to use the power saving
                         function PAL_HALT_LIGHT when idle. This increases
                         power-consumption. On the positive side, it reduces
@@@ -1474,8 -1455,6 +1459,6 @@@
                         Valid arguments: on, off
                         Default: on
   
-       noirqbalance    [X86-32,SMP,KNL] Disable kernel irq balancing
- 
         noirqdebug      [X86-32] Disables the code which attempts to detect and
                         disable unhandled interrupt sources.
   
diff --combined fs/autofs4/dev-ioctl.c

index ec16255d27dd49aaf8980408d10c4b30a8d01ed3,33bf8cbfd05172cc020200d4f4eec08d23eab420..63b7c7afe8df96172b1fe9a11bbf95e1c86c0e36
--- 1/fs/autofs4/dev-ioctl.c
--- 2/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@@ -128,9 -128,10 +128,10 @@@ static inline void free_dev_ioctl(struc
    */
   static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
   {
-       int err = -EINVAL;
+       int err;
   
-       if (check_dev_ioctl_version(cmd, param)) {
+       err = check_dev_ioctl_version(cmd, param);
+       if (err) {
                 AUTOFS_WARN("invalid device control module version "
                      "supplied for cmd(0x%08x)", cmd);
                 goto out;
@@@ -307,8 -308,7 +308,8 @@@ static int autofs_dev_ioctl_open_mountp
                         goto out;
                 }
   
- -              filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
+ +              filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+ +                                 current_cred());
                 if (IS_ERR(filp)) {
                         err = PTR_ERR(filp);
                         goto out;
diff --combined fs/ext4/ialloc.c

index c8ea50ed023a7322876c74a7463906ab12b496f5,2a117e286e5420c6d5567961a14b340162b14a67..08cac9fcace287e9c9fbd29677a4ba8e84009290
--- 1/fs/ext4/ialloc.c
--- 2/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@@ -718,6 -718,8 +718,8 @@@ got
                         gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
                         free = ext4_free_blocks_after_init(sb, group, gdp);
                         gdp->bg_free_blocks_count = cpu_to_le16(free);
+                       gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
+                                                               gdp);
                 }
                 spin_unlock(sb_bgl_lock(sbi, group));
   
@@@ -785,7 -787,7 +787,7 @@@
                 spin_unlock(sb_bgl_lock(sbi, flex_group));
         }
   
- -      inode->i_uid = current->fsuid;
+ +      inode->i_uid = current_fsuid();
         if (test_opt(sb, GRPID))
                 inode->i_gid = dir->i_gid;
         else if (dir->i_mode & S_ISGID) {
@@@ -793,7 -795,7 +795,7 @@@
                 if (S_ISDIR(mode))
                         mode |= S_ISGID;
         } else
- -              inode->i_gid = current->fsgid;
+ +              inode->i_gid = current_fsgid();
         inode->i_mode = mode;
   
         inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
diff --combined fs/fat/file.c

index 81e203288340cc85ab5c3c1c706e5744207f9392,f06a4e525eceff2b758ba0ff62ebd6e6f2fca9bb..0a7f4a9918b346d117a814b1911f11e1b72b22c7
--- 1/fs/fat/file.c
--- 2/fs/fat/file.c
+++ b/fs/fat/file.c
@@@ -10,13 -10,13 +10,13 @@@
   #include <linux/module.h>
   #include <linux/mount.h>
   #include <linux/time.h>
- #include <linux/msdos_fs.h>
   #include <linux/buffer_head.h>
   #include <linux/writeback.h>
   #include <linux/backing-dev.h>
   #include <linux/blkdev.h>
   #include <linux/fsnotify.h>
   #include <linux/security.h>
+ #include "fat.h"
   
   int fat_generic_ioctl(struct inode *inode, struct file *filp,
                       unsigned int cmd, unsigned long arg)
@@@ -29,10 -29,9 +29,9 @@@
         {
                 u32 attr;
   
-               if (inode->i_ino == MSDOS_ROOT_INO)
-                       attr = ATTR_DIR;
-               else
-                       attr = fat_attr(inode);
+               mutex_lock(&inode->i_mutex);
+               attr = fat_make_attrs(inode);
+               mutex_unlock(&inode->i_mutex);
   
                 return put_user(attr, user_attr);
         }
@@@ -62,20 -61,16 +61,16 @@@
                 /* Merge in ATTR_VOLUME and ATTR_DIR */
                 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
                         (is_dir ? ATTR_DIR : 0);
-               oldattr = fat_attr(inode);
+               oldattr = fat_make_attrs(inode);
   
                 /* Equivalent to a chmod() */
                 ia.ia_valid = ATTR_MODE | ATTR_CTIME;
                 ia.ia_ctime = current_fs_time(inode->i_sb);
-               if (is_dir) {
-                       ia.ia_mode = MSDOS_MKMODE(attr,
-                               S_IRWXUGO & ~sbi->options.fs_dmask)
-                               | S_IFDIR;
-               } else {
-                       ia.ia_mode = MSDOS_MKMODE(attr,
-                               (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO))
-                               & ~sbi->options.fs_fmask)
-                               | S_IFREG;
+               if (is_dir)
+                       ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
+               else {
+                       ia.ia_mode = fat_make_mode(sbi, attr,
+                               S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
                 }
   
                 /* The root directory has no attributes */
@@@ -115,7 -110,7 +110,7 @@@
                                 inode->i_flags &= S_IMMUTABLE;
                 }
   
-               MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
+               fat_save_attrs(inode, attr);
                 mark_inode_dirty(inode);
   up:
                 mnt_drop_write(filp->f_path.mnt);
@@@ -274,7 -269,7 +269,7 @@@ static int fat_sanitize_mode(const stru
   
         /*
          * Note, the basic check is already done by a caller of
-        * (attr->ia_mode & ~MSDOS_VALID_MODE)
+        * (attr->ia_mode & ~FAT_VALID_MODE)
          */
   
         if (S_ISREG(inode->i_mode))
@@@ -287,11 -282,18 +282,18 @@@
         /*
          * Of the r and x bits, all (subject to umask) must be present. Of the
          * w bits, either all (subject to umask) or none must be present.
+        *
+        * If fat_mode_can_hold_ro(inode) is false, can't change w bits.
          */
         if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
                 return -EPERM;
-       if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
-               return -EPERM;
+       if (fat_mode_can_hold_ro(inode)) {
+               if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
+                       return -EPERM;
+       } else {
+               if ((perm & S_IWUGO) != (S_IWUGO & ~mask))
+                       return -EPERM;
+       }
   
         *mode_ptr &= S_IFMT | perm;
   
@@@ -302,7 -304,7 +304,7 @@@ static int fat_allow_set_time(struct ms
   {
         mode_t allow_utime = sbi->options.allow_utime;
   
- -      if (current->fsuid != inode->i_uid) {
+ +      if (current_fsuid() != inode->i_uid) {
                 if (in_group_p(inode->i_gid))
                         allow_utime >>= 3;
                 if (allow_utime & MAY_WRITE)
@@@ -314,13 -316,15 +316,15 @@@
   }
   
   #define TIMES_SET_FLAGS       (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+ /* valid file mode bits */
+ #define FAT_VALID_MODE        (S_IFREG | S_IFDIR | S_IRWXUGO)
   
   int fat_setattr(struct dentry *dentry, struct iattr *attr)
   {
         struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
         struct inode *inode = dentry->d_inode;
-       int error = 0;
         unsigned int ia_valid;
+       int error;
   
         /*
          * Expand the file. Since inode_setattr() updates ->i_size
@@@ -356,7 -360,7 +360,7 @@@
             ((attr->ia_valid & ATTR_GID) &&
              (attr->ia_gid != sbi->options.fs_gid)) ||
             ((attr->ia_valid & ATTR_MODE) &&
-            (attr->ia_mode & ~MSDOS_VALID_MODE)))
+            (attr->ia_mode & ~FAT_VALID_MODE)))
                 error = -EPERM;
   
         if (error) {
@@@ -374,7 -378,8 +378,8 @@@
                         attr->ia_valid &= ~ATTR_MODE;
         }
   
-       error = inode_setattr(inode, attr);
+       if (attr->ia_valid)
+               error = inode_setattr(inode, attr);
   out:
         return error;
   }
diff --combined fs/fat/inode.c

index cf621acd9e9a1385af8ac7b22b4dbcc11b2827e6,bdd8fb7be2ca48e5ac4e49b5b2dd147375f6c8b2..d937aaf77374f4b720aa85328da0779daad027ae
--- 1/fs/fat/inode.c
--- 2/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@@ -16,7 -16,6 +16,6 @@@
   #include <linux/slab.h>
   #include <linux/smp_lock.h>
   #include <linux/seq_file.h>
- #include <linux/msdos_fs.h>
   #include <linux/pagemap.h>
   #include <linux/mpage.h>
   #include <linux/buffer_head.h>
@@@ -27,7 -26,9 +26,9 @@@
   #include <linux/uio.h>
   #include <linux/writeback.h>
   #include <linux/log2.h>
+ #include <linux/hash.h>
   #include <asm/unaligned.h>
+ #include "fat.h"
   
   #ifndef CONFIG_FAT_DEFAULT_IOCHARSET
   /* if user don't select VFAT, this is undefined. */
@@@ -63,7 -64,7 +64,7 @@@ static inline int __fat_get_block(struc
         sector_t phys;
         int err, offset;
   
-       err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
+       err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
         if (err)
                 return err;
         if (phys) {
@@@ -93,7 -94,7 +94,7 @@@
         *max_blocks = min(mapped_blocks, *max_blocks);
         MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
   
-       err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
+       err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
         if (err)
                 return err;
   
@@@ -198,7 -199,14 +199,14 @@@ static ssize_t fat_direct_IO(int rw, st
   
   static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
   {
-       return generic_block_bmap(mapping, block, fat_get_block);
+       sector_t blocknr;
+ 
+       /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
+       mutex_lock(&mapping->host->i_mutex);
+       blocknr = generic_block_bmap(mapping, block, fat_get_block);
+       mutex_unlock(&mapping->host->i_mutex);
+ 
+       return blocknr;
   }
   
   static const struct address_space_operations fat_aops = {
@@@ -247,25 -255,21 +255,21 @@@ static void fat_hash_init(struct super_
                 INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
   }
   
- static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos)
+ static inline unsigned long fat_hash(loff_t i_pos)
   {
-       unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb;
-       tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2);
-       return tmp & FAT_HASH_MASK;
+       return hash_32(i_pos, FAT_HASH_BITS);
   }
   
   void fat_attach(struct inode *inode, loff_t i_pos)
   {
-       struct super_block *sb = inode->i_sb;
-       struct msdos_sb_info *sbi = MSDOS_SB(sb);
+       struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+       struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
   
         spin_lock(&sbi->inode_hash_lock);
         MSDOS_I(inode)->i_pos = i_pos;
-       hlist_add_head(&MSDOS_I(inode)->i_fat_hash,
-                       sbi->inode_hashtable + fat_hash(sb, i_pos));
+       hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
         spin_unlock(&sbi->inode_hash_lock);
   }
- 
   EXPORT_SYMBOL_GPL(fat_attach);
   
   void fat_detach(struct inode *inode)
@@@ -276,13 -280,12 +280,12 @@@
         hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
         spin_unlock(&sbi->inode_hash_lock);
   }
- 
   EXPORT_SYMBOL_GPL(fat_detach);
   
   struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
   {
         struct msdos_sb_info *sbi = MSDOS_SB(sb);
-       struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos);
+       struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
         struct hlist_node *_p;
         struct msdos_inode_info *i;
         struct inode *inode = NULL;
@@@ -341,8 -344,7 +344,7 @@@ static int fat_fill_inode(struct inode 
   
         if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
                 inode->i_generation &= ~1;
-               inode->i_mode = MSDOS_MKMODE(de->attr,
-                       S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
+               inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO);
                 inode->i_op = sbi->dir_ops;
                 inode->i_fop = &fat_dir_operations;
   
@@@ -359,10 -361,9 +361,9 @@@
                 inode->i_nlink = fat_subdirs(inode);
         } else { /* not a directory */
                 inode->i_generation |= 1;
-               inode->i_mode = MSDOS_MKMODE(de->attr,
-                   ((sbi->options.showexec && !is_exec(de->name + 8))
-                       ? S_IRUGO|S_IWUGO : S_IRWXUGO)
-                   & ~sbi->options.fs_fmask) | S_IFREG;
+               inode->i_mode = fat_make_mode(sbi, de->attr,
+                       ((sbi->options.showexec && !is_exec(de->name + 8))
+                        ? S_IRUGO|S_IWUGO : S_IRWXUGO));
                 MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
                 if (sbi->fat_bits == 32)
                         MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16);
@@@ -378,25 -379,16 +379,16 @@@
                 if (sbi->options.sys_immutable)
                         inode->i_flags |= S_IMMUTABLE;
         }
-       MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED;
+       fat_save_attrs(inode, de->attr);
+ 
         inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
                            & ~((loff_t)sbi->cluster_size - 1)) >> 9;
-       inode->i_mtime.tv_sec =
-               date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
-                             sbi->options.tz_utc);
-       inode->i_mtime.tv_nsec = 0;
+ 
+       fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
         if (sbi->options.isvfat) {
-               int secs = de->ctime_cs / 100;
-               int csecs = de->ctime_cs % 100;
-               inode->i_ctime.tv_sec  =
-                       date_dos2unix(le16_to_cpu(de->ctime),
-                                     le16_to_cpu(de->cdate),
-                                     sbi->options.tz_utc) + secs;
-               inode->i_ctime.tv_nsec = csecs * 10000000;
-               inode->i_atime.tv_sec =
-                       date_dos2unix(0, le16_to_cpu(de->adate),
-                                     sbi->options.tz_utc);
-               inode->i_atime.tv_nsec = 0;
+               fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
+                                 de->cdate, de->ctime_cs);
+               fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
         } else
                 inode->i_ctime = inode->i_atime = inode->i_mtime;
   
@@@ -443,13 -435,8 +435,8 @@@ static void fat_delete_inode(struct ino
   
   static void fat_clear_inode(struct inode *inode)
   {
-       struct super_block *sb = inode->i_sb;
-       struct msdos_sb_info *sbi = MSDOS_SB(sb);
- 
-       spin_lock(&sbi->inode_hash_lock);
         fat_cache_inval_inode(inode);
-       hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
-       spin_unlock(&sbi->inode_hash_lock);
+       fat_detach(inode);
   }
   
   static void fat_write_super(struct super_block *sb)
@@@ -555,6 -542,20 +542,20 @@@ static int fat_statfs(struct dentry *de
         return 0;
   }
   
+ static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
+                                   struct inode *inode)
+ {
+       loff_t i_pos;
+ #if BITS_PER_LONG == 32
+       spin_lock(&sbi->inode_hash_lock);
+ #endif
+       i_pos = MSDOS_I(inode)->i_pos;
+ #if BITS_PER_LONG == 32
+       spin_unlock(&sbi->inode_hash_lock);
+ #endif
+       return i_pos;
+ }
+ 
   static int fat_write_inode(struct inode *inode, int wait)
   {
         struct super_block *sb = inode->i_sb;
@@@ -564,9 -565,12 +565,12 @@@
         loff_t i_pos;
         int err;
   
+       if (inode->i_ino == MSDOS_ROOT_INO)
+               return 0;
+ 
   retry:
-       i_pos = MSDOS_I(inode)->i_pos;
-       if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
+       i_pos = fat_i_pos_read(sbi, inode);
+       if (!i_pos)
                 return 0;
   
         bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
@@@ -588,19 -592,17 +592,17 @@@
                 raw_entry->size = 0;
         else
                 raw_entry->size = cpu_to_le32(inode->i_size);
-       raw_entry->attr = fat_attr(inode);
+       raw_entry->attr = fat_make_attrs(inode);
         raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
         raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
-       fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
-                         &raw_entry->date, sbi->options.tz_utc);
+       fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
+                         &raw_entry->date, NULL);
         if (sbi->options.isvfat) {
                 __le16 atime;
-               fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
-                                 &raw_entry->cdate, sbi->options.tz_utc);
-               fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
-                                 &raw_entry->adate, sbi->options.tz_utc);
-               raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
-                       inode->i_ctime.tv_nsec / 10000000;
+               fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
+                                 &raw_entry->cdate, &raw_entry->ctime_cs);
+               fat_time_unix2fat(sbi, &inode->i_atime, &atime,
+                                 &raw_entry->adate, NULL);
         }
         spin_unlock(&sbi->inode_hash_lock);
         mark_buffer_dirty(bh);
@@@ -819,8 -821,10 +821,10 @@@ static int fat_show_options(struct seq_
                         seq_puts(m, ",uni_xlate");
                 if (!opts->numtail)
                         seq_puts(m, ",nonumtail");
+               if (opts->rodir)
+                       seq_puts(m, ",rodir");
         }
-       if (sbi->options.flush)
+       if (opts->flush)
                 seq_puts(m, ",flush");
         if (opts->tz_utc)
                 seq_puts(m, ",tz=UTC");
@@@ -836,7 -840,7 +840,7 @@@ enum 
         Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
         Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
         Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-       Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
+       Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err,
   };
   
   static const match_table_t fat_tokens = {
@@@ -908,6 -912,7 +912,7 @@@ static const match_table_t vfat_tokens 
         {Opt_nonumtail_yes, "nonumtail=yes"},
         {Opt_nonumtail_yes, "nonumtail=true"},
         {Opt_nonumtail_yes, "nonumtail"},
+       {Opt_rodir, "rodir"},
         {Opt_err, NULL}
   };
   
@@@ -921,16 -926,19 +926,19 @@@ static int parse_options(char *options
   
         opts->isvfat = is_vfat;
   
- -      opts->fs_uid = current->uid;
- -      opts->fs_gid = current->gid;
+ +      opts->fs_uid = current_uid();
+ +      opts->fs_gid = current_gid();
         opts->fs_fmask = opts->fs_dmask = current->fs->umask;
         opts->allow_utime = -1;
         opts->codepage = fat_default_codepage;
         opts->iocharset = fat_default_iocharset;
-       if (is_vfat)
+       if (is_vfat) {
                 opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95;
-       else
+               opts->rodir = 0;
+       } else {
                 opts->shortname = 0;
+               opts->rodir = 1;
+       }
         opts->name_check = 'n';
         opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK =  0;
         opts->utf8 = opts->unicode_xlate = 0;
@@@ -1081,6 -1089,9 +1089,9 @@@
                 case Opt_nonumtail_yes:         /* empty or 1 or yes or true */
                         opts->numtail = 0;      /* negated option */
                         break;
+               case Opt_rodir:
+                       opts->rodir = 1;
+                       break;
   
                 /* obsolete mount options */
                 case Opt_obsolate:
@@@ -1126,7 -1137,7 +1137,7 @@@ static int fat_read_root(struct inode *
         inode->i_gid = sbi->options.fs_gid;
         inode->i_version++;
         inode->i_generation = 0;
-       inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
+       inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
         inode->i_op = sbi->dir_ops;
         inode->i_fop = &fat_dir_operations;
         if (sbi->fat_bits == 32) {
@@@ -1143,7 -1154,7 +1154,7 @@@
         MSDOS_I(inode)->i_logstart = 0;
         MSDOS_I(inode)->mmu_private = inode->i_size;
   
-       MSDOS_I(inode)->i_attrs = ATTR_NONE;
+       fat_save_attrs(inode, ATTR_DIR);
         inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
         inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
         inode->i_nlink = fat_subdirs(inode)+2;
diff --combined fs/namespace.c

index d8bc2c4704a50259b524d997a47d30bf8555c2b3,65b3dc844c879c059c004425487672eff21b5682..1c09cab8f7cf7c4faea9b1e0e4aa8fc6382824c4
--- 1/fs/namespace.c
--- 2/fs/namespace.c
+++ b/fs/namespace.c
@@@ -1176,7 -1176,7 +1176,7 @@@ static int mount_is_safe(struct path *p
         if (S_ISLNK(path->dentry->d_inode->i_mode))
                 return -EPERM;
         if (path->dentry->d_inode->i_mode & S_ISVTX) {
- -              if (current->uid != path->dentry->d_inode->i_uid)
+ +              if (current_uid() != path->dentry->d_inode->i_uid)
                         return -EPERM;
         }
         if (inode_permission(path->dentry->d_inode, MAY_WRITE))
@@@ -1815,8 -1815,8 +1815,8 @@@ static void shrink_submounts(struct vfs
                 while (!list_empty(&graveyard)) {
                         m = list_first_entry(&graveyard, struct vfsmount,
                                                 mnt_expire);
-                       touch_mnt_namespace(mnt->mnt_ns);
-                       umount_tree(mnt, 1, umounts);
+                       touch_mnt_namespace(m->mnt_ns);
+                       umount_tree(m, 1, umounts);
                 }
         }
   }
diff --combined fs/nfsd/vfs.c

index b59ec5a6ed246fc994adc27e2749a986c0a96edd,4433c8f001635862419edb603ecf5725462f3222..d1c5f787b365dc727fb65b81ca33e3cfb0ca2555
--- 1/fs/nfsd/vfs.c
--- 2/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@@ -671,7 -671,6 +671,7 @@@ __be3
   nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
                         int access, struct file **filp)
   {
+ +      const struct cred *cred = current_cred();
         struct dentry   *dentry;
         struct inode    *inode;
         int             flags = O_RDONLY|O_LARGEFILE;
@@@ -726,7 -725,7 +726,7 @@@
                 DQUOT_INIT(inode);
         }
         *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
- -                              flags);
+ +                          flags, cred);
         if (IS_ERR(*filp))
                 host_err = PTR_ERR(*filp);
   out_nfserr:
@@@ -1170,7 -1169,7 +1170,7 @@@ nfsd_create_setattr(struct svc_rqst *rq
          * send along the gid on create when it tries to implement
          * setgid directories via NFS:
          */
- -      if (current->fsuid != 0)
+ +      if (current_fsuid() != 0)
                 iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
         if (iap->ia_valid)
                 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
@@@ -1876,11 -1875,11 +1876,11 @@@ static int nfsd_buffered_readdir(struc
                 return -ENOMEM;
   
         offset = *offsetp;
-       cdp->err = nfserr_eof; /* will be cleared on successful read */
   
         while (1) {
                 unsigned int reclen;
   
+               cdp->err = nfserr_eof; /* will be cleared on successful read */
                 buf.used = 0;
                 buf.full = 0;
   
@@@ -1913,9 -1912,6 +1913,6 @@@
                         de = (struct buffered_dirent *)((char *)de + reclen);
                 }
                 offset = vfs_llseek(file, 0, SEEK_CUR);
-               cdp->err = nfserr_eof;
-               if (!buf.full)
-                       break;
         }
   
    done:
@@@ -2005,7 -2001,7 +2002,7 @@@ nfsd_permission(struct svc_rqst *rqstp
                 IS_APPEND(inode)?       " append" : "",
                 __mnt_is_readonly(exp->ex_path.mnt)?    " ro" : "");
         dprintk("      owner %d/%d user %d/%d\n",
- -              inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
+ +              inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
   #endif
   
         /* Normally we reject any write/sattr etc access on a read-only file
@@@ -2048,7 -2044,7 +2045,7 @@@
          * with NFSv3.
          */
         if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
- -          inode->i_uid == current->fsuid)
+ +          inode->i_uid == current_fsuid())
                 return 0;
   
         /* This assumes  NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
diff --combined fs/ocfs2/namei.c

index f95f3654ee688b350de24c3feec5933764bb1de7,f4967e634ffd01283e299bb6a7ee0d886048b813..2545e7402efed1453d69347c676beaf2231043e0
--- 1/fs/ocfs2/namei.c
--- 2/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@@ -378,8 -378,8 +378,8 @@@ static int ocfs2_mknod_locked(struct oc
         }
   
         inode = new_inode(dir->i_sb);
-       if (IS_ERR(inode)) {
-               status = PTR_ERR(inode);
+       if (!inode) {
+               status = -ENOMEM;
                 mlog(ML_ERROR, "new_inode failed!\n");
                 goto leave;
         }
@@@ -421,13 -421,13 +421,13 @@@
         fe->i_blkno = cpu_to_le64(fe_blkno);
         fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
         fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
- -      fe->i_uid = cpu_to_le32(current->fsuid);
+ +      fe->i_uid = cpu_to_le32(current_fsuid());
         if (dir->i_mode & S_ISGID) {
                 fe->i_gid = cpu_to_le32(dir->i_gid);
                 if (S_ISDIR(mode))
                         mode |= S_ISGID;
         } else
- -              fe->i_gid = cpu_to_le32(current->fsgid);
+ +              fe->i_gid = cpu_to_le32(current_fsgid());
         fe->i_mode = cpu_to_le16(mode);
         if (S_ISCHR(mode) || S_ISBLK(mode))
                 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
@@@ -491,8 -491,10 +491,10 @@@ leave
                         brelse(*new_fe_bh);
                         *new_fe_bh = NULL;
                 }
-               if (inode)
+               if (inode) {
+                       clear_nlink(inode);
                         iput(inode);
+               }
         }
   
         mlog_exit(status);
diff --combined include/linux/sched.h

index 3443123b07096512f14c283a075043db1bdee54e,644ffbda17cad0ccfc977386ac81be74ef69cc6b..2036e9f260202bbe9129788a7f30a14d424e133c
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -247,6 -247,7 +247,7 @@@ extern void init_idle(struct task_struc
   extern void init_idle_bootup_task(struct task_struct *idle);
   
   extern int runqueue_is_locked(void);
+ extern void task_rq_unlock_wait(struct task_struct *p);
   
   extern cpumask_t nohz_cpu_mask;
   #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
@@@ -571,6 -572,12 +572,6 @@@ struct signal_struct 
          */
         struct rlimit rlim[RLIM_NLIMITS];
   
- -      /* keep the process-shared keyrings here so that they do the right
- -       * thing in threads created with CLONE_THREAD */
- -#ifdef CONFIG_KEYS
- -      struct key *session_keyring;    /* keyring inherited over fork */
- -      struct key *process_keyring;    /* keyring private to this process */
- -#endif
   #ifdef CONFIG_BSD_PROCESS_ACCT
         struct pacct_struct pacct;      /* per-process accounting information */
   #endif
@@@ -654,7 -661,6 +655,7 @@@ extern struct user_struct *find_user(ui
   extern struct user_struct root_user;
   #define INIT_USER (&root_user)
   
+ +
   struct backing_dev_info;
   struct reclaim_state;
   
@@@ -878,7 -884,38 +879,7 @@@ partition_sched_domains(int ndoms_new, 
   #endif        /* !CONFIG_SMP */
   
   struct io_context;                    /* See blkdev.h */
- -#define NGROUPS_SMALL         32
- -#define NGROUPS_PER_BLOCK     ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
- -struct group_info {
- -      int ngroups;
- -      atomic_t usage;
- -      gid_t small_block[NGROUPS_SMALL];
- -      int nblocks;
- -      gid_t *blocks[0];
- -};
   
- -/*
- - * get_group_info() must be called with the owning task locked (via task_lock())
- - * when task != current.  The reason being that the vast majority of callers are
- - * looking at current->group_info, which can not be changed except by the
- - * current task.  Changing current->group_info requires the task lock, too.
- - */
- -#define get_group_info(group_info) do { \
- -      atomic_inc(&(group_info)->usage); \
- -} while (0)
- -
- -#define put_group_info(group_info) do { \
- -      if (atomic_dec_and_test(&(group_info)->usage)) \
- -              groups_free(group_info); \
- -} while (0)
- -
- -extern struct group_info *groups_alloc(int gidsetsize);
- -extern void groups_free(struct group_info *group_info);
- -extern int set_current_groups(struct group_info *group_info);
- -extern int groups_search(struct group_info *group_info, gid_t grp);
- -/* access the groups "array" with this macro */
- -#define GROUP_AT(gi, i) \
- -    ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
   
   #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
   extern void prefetch_stack(struct task_struct *t);
@@@ -1145,12 -1182,17 +1146,12 @@@ struct task_struct 
         struct list_head cpu_timers[3];
   
   /* process credentials */
- -      uid_t uid,euid,suid,fsuid;
- -      gid_t gid,egid,sgid,fsgid;
- -      struct group_info *group_info;
- -      kernel_cap_t   cap_effective, cap_inheritable, cap_permitted, cap_bset;
- -      struct user_struct *user;
- -      unsigned securebits;
- -#ifdef CONFIG_KEYS
- -      unsigned char jit_keyring;      /* default keyring to attach requested keys to */
- -      struct key *request_key_auth;   /* assumed request_key authority */
- -      struct key *thread_keyring;     /* keyring private to this thread */
- -#endif
+ +      const struct cred *real_cred;   /* objective and real subjective task
+ +                                       * credentials (COW) */
+ +      const struct cred *cred;        /* effective (overridable) subjective task
+ +                                       * credentials (COW) */
+ +      struct mutex cred_exec_mutex;   /* execve vs ptrace cred calculation mutex */
+ +
         char comm[TASK_COMM_LEN]; /* executable name excluding path
                                      - access with [gs]et_task_comm (which lock
                                        it with task_lock())
@@@ -1187,6 -1229,9 +1188,6 @@@
         int (*notifier)(void *priv);
         void *notifier_data;
         sigset_t *notifier_mask;
- -#ifdef CONFIG_SECURITY
- -      void *security;
- -#endif
         struct audit_context *audit_context;
   #ifdef CONFIG_AUDITSYSCALL
         uid_t loginuid;
@@@ -1305,6 -1350,8 +1306,8 @@@
          */
         unsigned long timer_slack_ns;
         unsigned long default_timer_slack_ns;
+ 
+       struct list_head        *scm_work_list;
   };
   
   /*
@@@ -1724,6 -1771,7 +1727,6 @@@ static inline struct user_struct *get_u
         return u;
   }
   extern void free_uid(struct user_struct *);
- -extern void switch_uid(struct user_struct *);
   extern void release_uids(struct user_namespace *ns);
   
   #include <asm/current.h>
@@@ -1742,6 -1790,9 +1745,6 @@@ extern void wake_up_new_task(struct tas
   extern void sched_fork(struct task_struct *p, int clone_flags);
   extern void sched_dead(struct task_struct *p);
   
- -extern int in_group_p(gid_t);
- -extern int in_egroup_p(gid_t);
- -
   extern void proc_caches_init(void);
   extern void flush_signals(struct task_struct *);
   extern void ignore_signals(struct task_struct *);
@@@ -1873,8 -1924,6 +1876,8 @@@ static inline unsigned long wait_task_i
   #define for_each_process(p) \
         for (p = &init_task ; (p = next_task(p)) != &init_task ; )
   
+ +extern bool is_single_threaded(struct task_struct *);
+ +
   /*
    * Careful: do_each_thread/while_each_thread is a double loop so
    *          'break' will not work as expected - use goto instead.
diff --combined include/net/scm.h

index f160116db54a94c5cf671fd51c46222287d7c635,33e9986beb86dd95d6f3f962ca8a89281c874a9d..f45bb6eca7d4263e179a203872c56c87f765ebe5
--- 1/include/net/scm.h
--- 2/include/net/scm.h
+++ b/include/net/scm.h
@@@ -14,8 -14,9 +14,9 @@@
   
   struct scm_fp_list
   {
-       int             count;
-       struct file     *fp[SCM_MAX_FD];
+       struct list_head        list;
+       int                     count;
+       struct file             *fp[SCM_MAX_FD];
   };
   
   struct scm_cookie
@@@ -54,8 -55,8 +55,8 @@@ static __inline__ int scm_send(struct s
                                struct scm_cookie *scm)
   {
         struct task_struct *p = current;
- -      scm->creds.uid = p->uid;
- -      scm->creds.gid = p->gid;
+ +      scm->creds.uid = current_uid();
+ +      scm->creds.gid = current_gid();
         scm->creds.pid = task_tgid_vnr(p);
         scm->fp = NULL;
         scm->seq = 0;
diff --combined kernel/cgroup.c

index a512a75a55600896f1dc5782a11c9bdb7f48e948,358e77564e6f8b0b4c3964a36da48cc3051d6e73..8fe8c0cb137bb4e38073df7c1a0c03d72c25afa3
--- 1/kernel/cgroup.c
--- 2/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@@ -571,8 -571,8 +571,8 @@@ static struct inode *cgroup_new_inode(m
   
         if (inode) {
                 inode->i_mode = mode;
- -              inode->i_uid = current->fsuid;
- -              inode->i_gid = current->fsgid;
+ +              inode->i_uid = current_fsuid();
+ +              inode->i_gid = current_fsgid();
                 inode->i_blocks = 0;
                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
@@@ -1279,7 -1279,6 +1279,7 @@@ int cgroup_attach_task(struct cgroup *c
   static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
   {
         struct task_struct *tsk;
+ +      const struct cred *cred = current_cred(), *tcred;
         int ret;
   
         if (pid) {
@@@ -1289,16 -1288,14 +1289,16 @@@
                         rcu_read_unlock();
                         return -ESRCH;
                 }
- -              get_task_struct(tsk);
- -              rcu_read_unlock();
   
- -              if ((current->euid) && (current->euid != tsk->uid)
- -                  && (current->euid != tsk->suid)) {
- -                      put_task_struct(tsk);
+ +              tcred = __task_cred(tsk);
+ +              if (cred->euid &&
+ +                  cred->euid != tcred->uid &&
+ +                  cred->euid != tcred->suid) {
+ +                      rcu_read_unlock();
                         return -EACCES;
                 }
+ +              get_task_struct(tsk);
+ +              rcu_read_unlock();
         } else {
                 tsk = current;
                 get_task_struct(tsk);
@@@ -2500,7 -2497,6 +2500,6 @@@ static int cgroup_rmdir(struct inode *u
         list_del(&cgrp->sibling);
         spin_lock(&cgrp->dentry->d_lock);
         d = dget(cgrp->dentry);
-       cgrp->dentry = NULL;
         spin_unlock(&d->d_lock);
   
         cgroup_d_remove_dir(d);
diff --combined kernel/exit.c

index c0711da15486d6b7090a54e316fe73cf21bb3666,ae2b92be5faec1efa73beefb63304a22d030fc16..16eda9b39f8d18d8ed043c46eb08433f94e617a8
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -47,14 -47,12 +47,14 @@@
   #include <linux/blkdev.h>
   #include <linux/task_io_accounting_ops.h>
   #include <linux/tracehook.h>
+ +#include <linux/init_task.h>
   #include <trace/sched.h>
   
   #include <asm/uaccess.h>
   #include <asm/unistd.h>
   #include <asm/pgtable.h>
   #include <asm/mmu_context.h>
+ +#include "cred-internals.h"
   
   static void exit_mm(struct task_struct * tsk);
   
@@@ -143,6 -141,11 +143,11 @@@ static void __exit_signal(struct task_s
         if (sig) {
                 flush_sigqueue(&sig->shared_pending);
                 taskstats_tgid_free(sig);
+               /*
+                * Make sure ->signal can't go away under rq->lock,
+                * see account_group_exec_runtime().
+                */
+               task_rq_unlock_wait(tsk);
                 __cleanup_signal(sig);
         }
   }
@@@ -162,10 -165,7 +167,10 @@@ void release_task(struct task_struct * 
         int zap_leader;
   repeat:
         tracehook_prepare_release_task(p);
- -      atomic_dec(&p->user->processes);
+ +      /* don't need to get the RCU readlock here - the process is dead and
+ +       * can't be modifying its own credentials */
+ +      atomic_dec(&__task_cred(p)->user->processes);
+ +
         proc_flush_task(p);
         write_lock_irq(&tasklist_lock);
         tracehook_finish_release_task(p);
@@@ -340,12 -340,12 +345,12 @@@ static void reparent_to_kthreadd(void
         /* cpus_allowed? */
         /* rt_priority? */
         /* signals? */
- -      security_task_reparent_to_init(current);
         memcpy(current->signal->rlim, init_task.signal->rlim,
                sizeof(current->signal->rlim));
- -      atomic_inc(&(INIT_USER->__count));
+ +
+ +      atomic_inc(&init_cred.usage);
+ +      commit_creds(&init_cred);
         write_unlock_irq(&tasklist_lock);
- -      switch_uid(INIT_USER);
   }
   
   void __set_special_pids(struct pid *pid)
@@@ -1087,6 -1087,7 +1092,6 @@@ NORET_TYPE void do_exit(long code
         check_stack_usage();
         exit_thread();
         cgroup_exit(tsk, 1);
- -      exit_keys(tsk);
   
         if (group_dead && tsk->signal->leader)
                 disassociate_ctty(1);
@@@ -1271,12 -1272,12 +1276,12 @@@ static int wait_task_zombie(struct task
         unsigned long state;
         int retval, status, traced;
         pid_t pid = task_pid_vnr(p);
+ +      uid_t uid = __task_cred(p)->uid;
   
         if (!likely(options & WEXITED))
                 return 0;
   
         if (unlikely(options & WNOWAIT)) {
- -              uid_t uid = p->uid;
                 int exit_code = p->exit_code;
                 int why, status;
   
@@@ -1397,7 -1398,7 +1402,7 @@@
         if (!retval && infop)
                 retval = put_user(pid, &infop->si_pid);
         if (!retval && infop)
- -              retval = put_user(p->uid, &infop->si_uid);
+ +              retval = put_user(uid, &infop->si_uid);
         if (!retval)
                 retval = pid;
   
@@@ -1462,8 -1463,7 +1467,8 @@@ static int wait_task_stopped(int ptrace
         if (!unlikely(options & WNOWAIT))
                 p->exit_code = 0;
   
- -      uid = p->uid;
+ +      /* don't need the RCU readlock here as we're holding a spinlock */
+ +      uid = __task_cred(p)->uid;
   unlock_sig:
         spin_unlock_irq(&p->sighand->siglock);
         if (!exit_code)
@@@ -1537,10 -1537,10 +1542,10 @@@ static int wait_task_continued(struct t
         }
         if (!unlikely(options & WNOWAIT))
                 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
+ +      uid = __task_cred(p)->uid;
         spin_unlock_irq(&p->sighand->siglock);
   
         pid = task_pid_vnr(p);
- -      uid = p->uid;
         get_task_struct(p);
         read_unlock(&tasklist_lock);
   
diff --combined kernel/sched.c

index 92992e287b10864dad4ee518134be6a8c13074b2,c94baf2969e7b7e663050f31805986688ce106d0..204d0662b438a5831fa1578bab3ea119050c45bc
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -345,9 -345,7 +345,9 @@@ static inline struct task_group *task_g
         struct task_group *tg;
   
   #ifdef CONFIG_USER_SCHED
- -      tg = p->user->tg;
+ +      rcu_read_lock();
+ +      tg = __task_cred(p)->user->tg;
+ +      rcu_read_unlock();
   #elif defined(CONFIG_CGROUP_SCHED)
         tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
                                 struct task_group, css);
@@@ -399,9 -397,9 +399,9 @@@ struct cfs_rq 
          * 'curr' points to currently running entity on this cfs_rq.
          * It is set to NULL otherwise (i.e when none are currently running).
          */
-       struct sched_entity *curr, *next;
+       struct sched_entity *curr, *next, *last;
   
-       unsigned long nr_spread_over;
+       unsigned int nr_spread_over;
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
         struct rq *rq;  /* cpu runqueue to which this cfs_rq is attached */
@@@ -971,6 -969,14 +971,14 @@@ static struct rq *task_rq_lock(struct t
         }
   }
   
+ void task_rq_unlock_wait(struct task_struct *p)
+ {
+       struct rq *rq = task_rq(p);
+ 
+       smp_mb(); /* spin-unlock-wait is not a full memory barrier */
+       spin_unlock_wait(&rq->lock);
+ }
+ 
   static void __task_rq_unlock(struct rq *rq)
         __releases(rq->lock)
   {
@@@ -1450,6 -1456,8 +1458,8 @@@ static unsigned long cpu_avg_load_per_t
   
         if (rq->nr_running)
                 rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+       else
+               rq->avg_load_per_task = 0;
   
         return rq->avg_load_per_task;
   }
@@@ -1807,7 -1815,9 +1817,9 @@@ task_hot(struct task_struct *p, u64 now
         /*
          * Buddy candidates are cache hot:
          */
-       if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
+       if (sched_feat(CACHE_HOT_BUDDY) &&
+                       (&p->se == cfs_rq_of(&p->se)->next ||
+                        &p->se == cfs_rq_of(&p->se)->last))
                 return 1;
   
         if (p->sched_class != &fair_sched_class)
@@@ -5123,22 -5133,6 +5135,22 @@@ __setscheduler(struct rq *rq, struct ta
         set_load_weight(p);
   }
   
+ +/*
+ + * check the target process has a UID that matches the current process's
+ + */
+ +static bool check_same_owner(struct task_struct *p)
+ +{
+ +      const struct cred *cred = current_cred(), *pcred;
+ +      bool match;
+ +
+ +      rcu_read_lock();
+ +      pcred = __task_cred(p);
+ +      match = (cred->euid == pcred->euid ||
+ +               cred->euid == pcred->uid);
+ +      rcu_read_unlock();
+ +      return match;
+ +}
+ +
   static int __sched_setscheduler(struct task_struct *p, int policy,
                                 struct sched_param *param, bool user)
   {
@@@ -5198,7 -5192,8 +5210,7 @@@ recheck
                         return -EPERM;
   
                 /* can't change other user's priorities */
- -              if ((current->euid != p->euid) &&
- -                  (current->euid != p->uid))
+ +              if (!check_same_owner(p))
                         return -EPERM;
         }
   
@@@ -5430,7 -5425,8 +5442,7 @@@ long sched_setaffinity(pid_t pid, cons
         read_unlock(&tasklist_lock);
   
         retval = -EPERM;
- -      if ((current->euid != p->euid) && (current->euid != p->uid) &&
- -                      !capable(CAP_SYS_NICE))
+ +      if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
                 goto out_unlock;
   
         retval = security_task_setscheduler(p, 0, NULL);
@@@ -5874,6 -5870,8 +5886,8 @@@ void __cpuinit init_idle(struct task_st
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
   
+       spin_lock_irqsave(&rq->lock, flags);
+ 
         __sched_fork(idle);
         idle->se.exec_start = sched_clock();
   
@@@ -5881,7 -5879,6 +5895,6 @@@
         idle->cpus_allowed = cpumask_of_cpu(cpu);
         __set_task_cpu(idle, cpu);
   
-       spin_lock_irqsave(&rq->lock, flags);
         rq->curr = rq->idle = idle;
   #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
         idle->oncpu = 1;
@@@ -6891,15 -6888,17 +6904,17 @@@ cpu_attach_domain(struct sched_domain *
         struct sched_domain *tmp;
   
         /* Remove the sched domains which do not contribute to scheduling. */
-       for (tmp = sd; tmp; tmp = tmp->parent) {
+       for (tmp = sd; tmp; ) {
                 struct sched_domain *parent = tmp->parent;
                 if (!parent)
                         break;
+ 
                 if (sd_parent_degenerate(tmp, parent)) {
                         tmp->parent = parent->parent;
                         if (parent->parent)
                                 parent->parent->child = tmp;
-               }
+               } else
+                       tmp = tmp->parent;
         }
   
         if (sd && sd_degenerate(sd)) {
@@@ -7688,6 -7687,7 +7703,7 @@@ static int __build_sched_domains(const 
   error:
         free_sched_groups(cpu_map, tmpmask);
         SCHED_CPUMASK_FREE((void *)allmasks);
+       kfree(rd);
         return -ENOMEM;
   #endif
   }
diff --combined kernel/timer.c

index b54e4646cee74599699f7cc482565024c0333fe2,dbd50fabe4c74ab241523fbf2b427c596e2838c5..566257d1dc10327818f787e5ca04559c05bd2566
--- 1/kernel/timer.c
--- 2/kernel/timer.c
+++ b/kernel/timer.c
@@@ -112,27 -112,8 +112,8 @@@ timer_set_base(struct timer_list *timer
                                       tbase_get_deferrable(timer->base));
   }
   
- /**
-  * __round_jiffies - function to round jiffies to a full second
-  * @j: the time in (absolute) jiffies that should be rounded
-  * @cpu: the processor number on which the timeout will happen
-  *
-  * __round_jiffies() rounds an absolute time in the future (in jiffies)
-  * up or down to (approximately) full seconds. This is useful for timers
-  * for which the exact time they fire does not matter too much, as long as
-  * they fire approximately every X seconds.
-  *
-  * By rounding these timers to whole seconds, all such timers will fire
-  * at the same time, rather than at various times spread out. The goal
-  * of this is to have the CPU wake up less, which saves power.
-  *
-  * The exact rounding is skewed for each processor to avoid all
-  * processors firing at the exact same time, which could lead
-  * to lock contention or spurious cache line bouncing.
-  *
-  * The return value is the rounded version of the @j parameter.
-  */
- unsigned long __round_jiffies(unsigned long j, int cpu)
+ static unsigned long round_jiffies_common(unsigned long j, int cpu,
+               bool force_up)
   {
         int rem;
         unsigned long original = j;
@@@ -154,8 -135,9 +135,9 @@@
          * due to delays of the timer irq, long irq off times etc etc) then
          * we should round down to the whole second, not up. Use 1/4th second
          * as cutoff for this rounding as an extreme upper bound for this.
+        * But never round down if @force_up is set.
          */
-       if (rem < HZ/4) /* round down */
+       if (rem < HZ/4 && !force_up) /* round down */
                 j = j - rem;
         else /* round up */
                 j = j - rem + HZ;
@@@ -167,6 -149,31 +149,31 @@@
                 return original;
         return j;
   }
+ 
+ /**
+  * __round_jiffies - function to round jiffies to a full second
+  * @j: the time in (absolute) jiffies that should be rounded
+  * @cpu: the processor number on which the timeout will happen
+  *
+  * __round_jiffies() rounds an absolute time in the future (in jiffies)
+  * up or down to (approximately) full seconds. This is useful for timers
+  * for which the exact time they fire does not matter too much, as long as
+  * they fire approximately every X seconds.
+  *
+  * By rounding these timers to whole seconds, all such timers will fire
+  * at the same time, rather than at various times spread out. The goal
+  * of this is to have the CPU wake up less, which saves power.
+  *
+  * The exact rounding is skewed for each processor to avoid all
+  * processors firing at the exact same time, which could lead
+  * to lock contention or spurious cache line bouncing.
+  *
+  * The return value is the rounded version of the @j parameter.
+  */
+ unsigned long __round_jiffies(unsigned long j, int cpu)
+ {
+       return round_jiffies_common(j, cpu, false);
+ }
   EXPORT_SYMBOL_GPL(__round_jiffies);
   
   /**
@@@ -191,13 -198,10 +198,10 @@@
    */
   unsigned long __round_jiffies_relative(unsigned long j, int cpu)
   {
-       /*
-        * In theory the following code can skip a jiffy in case jiffies
-        * increments right between the addition and the later subtraction.
-        * However since the entire point of this function is to use approximate
-        * timeouts, it's entirely ok to not handle that.
-        */
-       return  __round_jiffies(j + jiffies, cpu) - jiffies;
+       unsigned long j0 = jiffies;
+ 
+       /* Use j0 because jiffies might change while we run */
+       return round_jiffies_common(j + j0, cpu, false) - j0;
   }
   EXPORT_SYMBOL_GPL(__round_jiffies_relative);
   
@@@ -218,7 -222,7 +222,7 @@@
    */
   unsigned long round_jiffies(unsigned long j)
   {
-       return __round_jiffies(j, raw_smp_processor_id());
+       return round_jiffies_common(j, raw_smp_processor_id(), false);
   }
   EXPORT_SYMBOL_GPL(round_jiffies);
   
@@@ -243,6 -247,71 +247,71 @@@ unsigned long round_jiffies_relative(un
   }
   EXPORT_SYMBOL_GPL(round_jiffies_relative);
   
+ /**
+  * __round_jiffies_up - function to round jiffies up to a full second
+  * @j: the time in (absolute) jiffies that should be rounded
+  * @cpu: the processor number on which the timeout will happen
+  *
+  * This is the same as __round_jiffies() except that it will never
+  * round down.  This is useful for timeouts for which the exact time
+  * of firing does not matter too much, as long as they don't fire too
+  * early.
+  */
+ unsigned long __round_jiffies_up(unsigned long j, int cpu)
+ {
+       return round_jiffies_common(j, cpu, true);
+ }
+ EXPORT_SYMBOL_GPL(__round_jiffies_up);
+ 
+ /**
+  * __round_jiffies_up_relative - function to round jiffies up to a full second
+  * @j: the time in (relative) jiffies that should be rounded
+  * @cpu: the processor number on which the timeout will happen
+  *
+  * This is the same as __round_jiffies_relative() except that it will never
+  * round down.  This is useful for timeouts for which the exact time
+  * of firing does not matter too much, as long as they don't fire too
+  * early.
+  */
+ unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
+ {
+       unsigned long j0 = jiffies;
+ 
+       /* Use j0 because jiffies might change while we run */
+       return round_jiffies_common(j + j0, cpu, true) - j0;
+ }
+ EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
+ 
+ /**
+  * round_jiffies_up - function to round jiffies up to a full second
+  * @j: the time in (absolute) jiffies that should be rounded
+  *
+  * This is the same as round_jiffies() except that it will never
+  * round down.  This is useful for timeouts for which the exact time
+  * of firing does not matter too much, as long as they don't fire too
+  * early.
+  */
+ unsigned long round_jiffies_up(unsigned long j)
+ {
+       return round_jiffies_common(j, raw_smp_processor_id(), true);
+ }
+ EXPORT_SYMBOL_GPL(round_jiffies_up);
+ 
+ /**
+  * round_jiffies_up_relative - function to round jiffies up to a full second
+  * @j: the time in (relative) jiffies that should be rounded
+  *
+  * This is the same as round_jiffies_relative() except that it will never
+  * round down.  This is useful for timeouts for which the exact time
+  * of firing does not matter too much, as long as they don't fire too
+  * early.
+  */
+ unsigned long round_jiffies_up_relative(unsigned long j)
+ {
+       return __round_jiffies_up_relative(j, raw_smp_processor_id());
+ }
+ EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
+ 
   
   static inline void set_running_timer(struct tvec_base *base,
                                         struct timer_list *timer)
@@@ -1123,25 -1192,25 +1192,25 @@@ asmlinkage long sys_getppid(void
   asmlinkage long sys_getuid(void)
   {
         /* Only we change this so SMP safe */
- -      return current->uid;
+ +      return current_uid();
   }
   
   asmlinkage long sys_geteuid(void)
   {
         /* Only we change this so SMP safe */
- -      return current->euid;
+ +      return current_euid();
   }
   
   asmlinkage long sys_getgid(void)
   {
         /* Only we change this so SMP safe */
- -      return current->gid;
+ +      return current_gid();
   }
   
   asmlinkage long sys_getegid(void)
   {
         /* Only we change this so SMP safe */
- -      return  current->egid;
+ +      return  current_egid();
   }
   
   #endif
diff --combined kernel/trace/trace.c

index 5c97c5b4ea8f9eed935a9c52c8952b9a7199124e,697eda36b86a54e902a2289d2adf767ca1278460..ffe7c96fa09be9f11a8b995b7acb0822f6e83cda
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -246,7 -246,7 +246,7 @@@ __update_max_tr(struct trace_array *tr
   
         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
         data->pid = tsk->pid;
- -      data->uid = tsk->uid;
+ +      data->uid = task_uid(tsk);
         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
         data->policy = tsk->policy;
         data->rt_priority = tsk->rt_priority;
@@@ -1755,7 -1755,7 +1755,7 @@@ static enum print_line_t print_bin_fmt(
                 return TRACE_TYPE_HANDLED;
   
         SEQ_PUT_FIELD_RET(s, entry->pid);
-       SEQ_PUT_FIELD_RET(s, iter->cpu);
+       SEQ_PUT_FIELD_RET(s, entry->cpu);
         SEQ_PUT_FIELD_RET(s, iter->ts);
   
         switch (entry->type) {
@@@ -2676,7 -2676,7 +2676,7 @@@ tracing_entries_write(struct file *filp
   {
         unsigned long val;
         char buf[64];
-       int ret;
+       int ret, cpu;
         struct trace_array *tr = filp->private_data;
   
         if (cnt >= sizeof(buf))
@@@ -2704,6 -2704,14 +2704,14 @@@
                 goto out;
         }
   
+       /* disable all cpu buffers */
+       for_each_tracing_cpu(cpu) {
+               if (global_trace.data[cpu])
+                       atomic_inc(&global_trace.data[cpu]->disabled);
+               if (max_tr.data[cpu])
+                       atomic_inc(&max_tr.data[cpu]->disabled);
+       }
+ 
         if (val != global_trace.entries) {
                 ret = ring_buffer_resize(global_trace.buffer, val);
                 if (ret < 0) {
@@@ -2735,6 -2743,13 +2743,13 @@@
         if (tracing_disabled)
                 cnt = -ENOMEM;
    out:
+       for_each_tracing_cpu(cpu) {
+               if (global_trace.data[cpu])
+                       atomic_dec(&global_trace.data[cpu]->disabled);
+               if (max_tr.data[cpu])
+                       atomic_dec(&max_tr.data[cpu]->disabled);
+       }
+ 
         max_tr.entries = global_trace.entries;
         mutex_unlock(&trace_types_lock);
   
diff --combined kernel/workqueue.c

index f12ab5c4dec4aa464cd11dfcb13c18a840c89f33,d4dc69ddebd7276684e2564737618859e96fbd1c..4952322cba45b05c90999f21b51016612c8144ca
--- 1/kernel/workqueue.c
--- 2/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@@ -84,21 -84,21 +84,21 @@@ static cpumask_t cpu_singlethread_map _
   static cpumask_t cpu_populated_map __read_mostly;
   
   /* If it's single threaded, it isn't in the list of workqueues. */
- -static inline int is_single_threaded(struct workqueue_struct *wq)
+ +static inline int is_wq_single_threaded(struct workqueue_struct *wq)
   {
         return wq->singlethread;
   }
   
   static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq)
   {
- -      return is_single_threaded(wq)
+ +      return is_wq_single_threaded(wq)
                 ? &cpu_singlethread_map : &cpu_populated_map;
   }
   
   static
   struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
   {
- -      if (unlikely(is_single_threaded(wq)))
+ +      if (unlikely(is_wq_single_threaded(wq)))
                 cpu = singlethread_cpu;
         return per_cpu_ptr(wq->cpu_wq, cpu);
   }
@@@ -769,7 -769,7 +769,7 @@@ static int create_workqueue_thread(stru
   {
         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
         struct workqueue_struct *wq = cwq->wq;
- -      const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
+ +      const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
         struct task_struct *p;
   
         p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);
@@@ -970,6 -970,51 +970,51 @@@ undo
         return ret;
   }
   
+ #ifdef CONFIG_SMP
+ struct work_for_cpu {
+       struct work_struct work;
+       long (*fn)(void *);
+       void *arg;
+       long ret;
+ };
+ 
+ static void do_work_for_cpu(struct work_struct *w)
+ {
+       struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
+ 
+       wfc->ret = wfc->fn(wfc->arg);
+ }
+ 
+ /**
+  * work_on_cpu - run a function in user context on a particular cpu
+  * @cpu: the cpu to run on
+  * @fn: the function to run
+  * @arg: the function arg
+  *
+  * This will return -EINVAL in the cpu is not online, or the return value
+  * of @fn otherwise.
+  */
+ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+ {
+       struct work_for_cpu wfc;
+ 
+       INIT_WORK(&wfc.work, do_work_for_cpu);
+       wfc.fn = fn;
+       wfc.arg = arg;
+       get_online_cpus();
+       if (unlikely(!cpu_online(cpu)))
+               wfc.ret = -EINVAL;
+       else {
+               schedule_work_on(cpu, &wfc.work);
+               flush_work(&wfc.work);
+       }
+       put_online_cpus();
+ 
+       return wfc.ret;
+ }
+ EXPORT_SYMBOL_GPL(work_on_cpu);
+ #endif /* CONFIG_SMP */
+ 
   void __init init_workqueues(void)
   {
         cpu_populated_map = cpu_online_map;
diff --combined mm/mempolicy.c

index 7555219c535bfdad20f17f38223cb00271823c73,e9493b1c1117a4090fa8998b9ea2e29c1239ebbc..e412ffa8e52ea945fb2ffa94a8de388e2de0875e
--- 1/mm/mempolicy.c
--- 2/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@@ -489,12 -489,6 +489,6 @@@ check_range(struct mm_struct *mm, unsig
         int err;
         struct vm_area_struct *first, *vma, *prev;
   
-       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
- 
-               err = migrate_prep();
-               if (err)
-                       return ERR_PTR(err);
-       }
   
         first = find_vma(mm, start);
         if (!first)
@@@ -809,9 -803,13 +803,13 @@@ int do_migrate_pages(struct mm_struct *
         const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
   {
         int busy = 0;
-       int err = 0;
+       int err;
         nodemask_t tmp;
   
+       err = migrate_prep();
+       if (err)
+               return err;
+ 
         down_read(&mm->mmap_sem);
   
         err = migrate_vmas(mm, from_nodes, to_nodes, flags);
@@@ -974,6 -972,12 +972,12 @@@ static long do_mbind(unsigned long star
                  start, start + len, mode, mode_flags,
                  nmask ? nodes_addr(*nmask)[0] : -1);
   
+       if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ 
+               err = migrate_prep();
+               if (err)
+                       return err;
+       }
         down_write(&mm->mmap_sem);
         vma = check_range(mm, start, end, nmask,
                           flags | MPOL_MF_INVERT, &pagelist);
@@@ -1110,7 -1114,6 +1114,7 @@@ asmlinkage long sys_migrate_pages(pid_
                 const unsigned long __user *old_nodes,
                 const unsigned long __user *new_nodes)
   {
+ +      const struct cred *cred = current_cred(), *tcred;
         struct mm_struct *mm;
         struct task_struct *task;
         nodemask_t old;
@@@ -1145,16 -1148,12 +1149,16 @@@
          * capabilities, superuser privileges or the same
          * userid as the target process.
          */
- -      if ((current->euid != task->suid) && (current->euid != task->uid) &&
- -          (current->uid != task->suid) && (current->uid != task->uid) &&
+ +      rcu_read_lock();
+ +      tcred = __task_cred(task);
+ +      if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+ +          cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
             !capable(CAP_SYS_NICE)) {
+ +              rcu_read_unlock();
                 err = -EPERM;
                 goto out;
         }
+ +      rcu_read_unlock();
   
         task_nodes = cpuset_mems_allowed(task);
         /* Is the user allowed to access the target nodes? */
diff --combined mm/migrate.c

index 142284229ce20d3f4db8967795e32ab59ce40f35,385db89f0c33e48a421b8adf63e63b60b0e4881e..9dd10da1cc23f34c026ce2822bf5f23416576bc0
--- 1/mm/migrate.c
--- 2/mm/migrate.c
+++ b/mm/migrate.c
@@@ -841,12 -841,12 +841,12 @@@ static int do_move_page_to_node_array(s
         struct page_to_node *pp;
         LIST_HEAD(pagelist);
   
+       migrate_prep();
         down_read(&mm->mmap_sem);
   
         /*
          * Build a list of pages to migrate
          */
-       migrate_prep();
         for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
                 struct vm_area_struct *vma;
                 struct page *page;
@@@ -1045,7 -1045,6 +1045,7 @@@ asmlinkage long sys_move_pages(pid_t pi
                         const int __user *nodes,
                         int __user *status, int flags)
   {
+ +      const struct cred *cred = current_cred(), *tcred;
         struct task_struct *task;
         struct mm_struct *mm;
         int err;
@@@ -1076,16 -1075,12 +1076,16 @@@
          * capabilities, superuser privileges or the same
          * userid as the target process.
          */
- -      if ((current->euid != task->suid) && (current->euid != task->uid) &&
- -          (current->uid != task->suid) && (current->uid != task->uid) &&
+ +      rcu_read_lock();
+ +      tcred = __task_cred(task);
+ +      if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+ +          cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
             !capable(CAP_SYS_NICE)) {
+ +              rcu_read_unlock();
                 err = -EPERM;
                 goto out;
         }
+ +      rcu_read_unlock();
   
         err = security_task_movememory(task);
         if (err)
diff --combined mm/oom_kill.c

index 0e0b282a2073e97f7d575246a20d90326bc4e937,a0a01902f551dd45b19385b64435ec6708db249d..558f9afe6e4e6b0085ddb51633a8b845e2518d18
--- 1/mm/oom_kill.c
--- 2/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@@ -38,7 -38,6 +38,6 @@@ static DEFINE_SPINLOCK(zone_scan_mutex)
    * badness - calculate a numeric value for how bad this task has been
    * @p: task struct of which task we should calculate
    * @uptime: current uptime in seconds
-  * @mem: target memory controller
    *
    * The formula used is relatively simple and documented inline in the
    * function. The main rationale is that we want to select a good task
@@@ -129,8 -128,8 +128,8 @@@ unsigned long badness(struct task_struc
          * Superuser processes are usually more important, so we make it
          * less likely that we kill those.
          */
- -      if (has_capability(p, CAP_SYS_ADMIN) ||
- -          has_capability(p, CAP_SYS_RESOURCE))
+ +      if (has_capability_noaudit(p, CAP_SYS_ADMIN) ||
+ +          has_capability_noaudit(p, CAP_SYS_RESOURCE))
                 points /= 4;
   
         /*
@@@ -139,7 -138,7 +138,7 @@@
          * tend to only have this flag set on applications they think
          * of as important.
          */
- -      if (has_capability(p, CAP_SYS_RAWIO))
+ +      if (has_capability_noaudit(p, CAP_SYS_RAWIO))
                 points /= 4;
   
         /*
@@@ -295,12 -294,14 +294,14 @@@ static void dump_tasks(const struct mem
                         continue;
                 if (mem && !task_in_mem_cgroup(p, mem))
                         continue;
+               if (!thread_group_leader(p))
+                       continue;
   
                 task_lock(p);
                 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
- -                     p->pid, p->uid, p->tgid, p->mm->total_vm,
- -                     get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj,
- -                     p->comm);
+ +                     p->pid, __task_cred(p)->uid, p->tgid,
+ +                     p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p),
+ +                     p->oomkilladj, p->comm);
                 task_unlock(p);
         } while_each_thread(g, p);
   }
diff --combined net/9p/client.c

index c3fb6f8bfa926b623cc93d407b5aaa832b998c4a,4b529454616d0427020e5b8b30620249e400c268..821f1ec0b2c38084444b7da0efb32f993f1e363c
--- 1/net/9p/client.c
--- 2/net/9p/client.c
+++ b/net/9p/client.c
@@@ -189,6 -189,9 +189,9 @@@ static struct p9_req_t *p9_tag_alloc(st
                         printk(KERN_ERR "Couldn't grow tag array\n");
                         kfree(req->tc);
                         kfree(req->rc);
+                       kfree(req->wq);
+                       req->tc = req->rc = NULL;
+                       req->wq = NULL;
                         return ERR_PTR(-ENOMEM);
                 }
                 req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
@@@ -311,12 -314,6 +314,6 @@@ static void p9_free_req(struct p9_clien
         r->status = REQ_STATUS_IDLE;
         if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
                 p9_idpool_put(tag, c->tagpool);
- 
-       /* if this was a flush request we have to free response fcall */
-       if (r->rc->id == P9_RFLUSH) {
-               kfree(r->tc);
-               kfree(r->rc);
-       }
   }
   
   /**
@@@ -611,48 -608,51 +608,51 @@@ reterr
   
   static struct p9_fid *p9_fid_create(struct p9_client *clnt)
   {
-       int err;
+       int ret;
         struct p9_fid *fid;
+       unsigned long flags;
   
         P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt);
         fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
         if (!fid)
                 return ERR_PTR(-ENOMEM);
   
-       fid->fid = p9_idpool_get(clnt->fidpool);
+       ret = p9_idpool_get(clnt->fidpool);
         if (fid->fid < 0) {
-               err = -ENOSPC;
+               ret = -ENOSPC;
                 goto error;
         }
+       fid->fid = ret;
   
         memset(&fid->qid, 0, sizeof(struct p9_qid));
         fid->mode = -1;
         fid->rdir_fpos = 0;
- -      fid->uid = current->fsuid;
+ +      fid->uid = current_fsuid();
         fid->clnt = clnt;
         fid->aux = NULL;
   
-       spin_lock(&clnt->lock);
+       spin_lock_irqsave(&clnt->lock, flags);
         list_add(&fid->flist, &clnt->fidlist);
-       spin_unlock(&clnt->lock);
+       spin_unlock_irqrestore(&clnt->lock, flags);
   
         return fid;
   
   error:
         kfree(fid);
-       return ERR_PTR(err);
+       return ERR_PTR(ret);
   }
   
   static void p9_fid_destroy(struct p9_fid *fid)
   {
         struct p9_client *clnt;
+       unsigned long flags;
   
         P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid);
         clnt = fid->clnt;
         p9_idpool_put(fid->fid, clnt->fidpool);
-       spin_lock(&clnt->lock);
+       spin_lock_irqsave(&clnt->lock, flags);
         list_del(&fid->flist);
-       spin_unlock(&clnt->lock);
+       spin_unlock_irqrestore(&clnt->lock, flags);
         kfree(fid);
   }
   
@@@ -818,7 -818,9 +818,9 @@@ struct p9_fid *p9_client_attach(struct 
         }
   
         P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
-                                       qid.type, qid.path, qid.version);
+                                       qid.type,
+                                       (unsigned long long)qid.path,
+                                       qid.version);
   
         memmove(&fid->qid, &qid, sizeof(struct p9_qid));
   
@@@ -865,7 -867,9 +867,9 @@@ p9_client_auth(struct p9_client *clnt, 
         }
   
         P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
-                                       qid.type, qid.path, qid.version);
+                                       qid.type,
+                                       (unsigned long long)qid.path,
+                                       qid.version);
   
         memmove(&afid->qid, &qid, sizeof(struct p9_qid));
         p9_free_req(clnt, req);
@@@ -930,7 -934,8 +934,8 @@@ struct p9_fid *p9_client_walk(struct p9
   
         for (count = 0; count < nwqids; count++)
                 P9_DPRINTK(P9_DEBUG_9P, "<<<     [%d] %x.%llx.%x\n",
-                       count, wqids[count].type, wqids[count].path,
+                       count, wqids[count].type,
+                       (unsigned long long)wqids[count].path,
                         wqids[count].version);
   
         if (nwname)
@@@ -980,7 -985,9 +985,9 @@@ int p9_client_open(struct p9_fid *fid, 
         }
   
         P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
-                               qid.type, qid.path, qid.version, iounit);
+                               qid.type,
+                               (unsigned long long)qid.path,
+                               qid.version, iounit);
   
         fid->mode = mode;
         fid->iounit = iounit;
@@@ -1023,7 -1030,9 +1030,9 @@@ int p9_client_fcreate(struct p9_fid *fi
         }
   
         P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
-                               qid.type, qid.path, qid.version, iounit);
+                               qid.type,
+                               (unsigned long long)qid.path,
+                               qid.version, iounit);
   
         fid->mode = mode;
         fid->iounit = iounit;
@@@ -1230,9 -1239,9 +1239,9 @@@ struct p9_wstat *p9_client_stat(struct 
                 "<<<    name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
                 "<<<    uid=%d gid=%d n_muid=%d\n",
                 ret->size, ret->type, ret->dev, ret->qid.type,
-               ret->qid.path, ret->qid.version, ret->mode,
-               ret->atime, ret->mtime, ret->length, ret->name,
-               ret->uid, ret->gid, ret->muid, ret->extension,
+               (unsigned long long)ret->qid.path, ret->qid.version, ret->mode,
+               ret->atime, ret->mtime, (unsigned long long)ret->length,
+               ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
                 ret->n_uid, ret->n_gid, ret->n_muid);
   
   free_and_error:
@@@ -1255,9 -1264,9 +1264,9 @@@ int p9_client_wstat(struct p9_fid *fid
                 "     name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
                 "     uid=%d gid=%d n_muid=%d\n",
                 wst->size, wst->type, wst->dev, wst->qid.type,
-               wst->qid.path, wst->qid.version, wst->mode,
-               wst->atime, wst->mtime, wst->length, wst->name,
-               wst->uid, wst->gid, wst->muid, wst->extension,
+               (unsigned long long)wst->qid.path, wst->qid.version, wst->mode,
+               wst->atime, wst->mtime, (unsigned long long)wst->length,
+               wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
                 wst->n_uid, wst->n_gid, wst->n_muid);
         err = 0;
         clnt = fid->clnt;
diff --combined net/core/dev.c

index 262df226b3c9a85852d41af69cc269cb930008fb,9174c77d3112c65237cbbd2e063eb55b03b5c39e..89912ae6de651f5931cd76fe79b278e20addf4ad
--- 1/net/core/dev.c
--- 2/net/core/dev.c
+++ b/net/core/dev.c
@@@ -2218,6 -2218,9 +2218,9 @@@ int netif_receive_skb(struct sk_buff *s
         int ret = NET_RX_DROP;
         __be16 type;
   
+       if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
+               return NET_RX_SUCCESS;
+ 
         /* if we've gotten here through NAPI, check netpoll */
         if (netpoll_receive_skb(skb))
                 return NET_RX_DROP;
@@@ -2958,8 -2961,6 +2961,8 @@@ static void dev_change_rx_flags(struct 
   static int __dev_set_promiscuity(struct net_device *dev, int inc)
   {
         unsigned short old_flags = dev->flags;
+ +      uid_t uid;
+ +      gid_t gid;
   
         ASSERT_RTNL();
   
@@@ -2984,17 -2985,15 +2987,17 @@@
                 printk(KERN_INFO "device %s %s promiscuous mode\n",
                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
                                                                "left");
- -              if (audit_enabled)
+ +              if (audit_enabled) {
+ +                      current_uid_gid(&uid, &gid);
                         audit_log(current->audit_context, GFP_ATOMIC,
                                 AUDIT_ANOM_PROMISCUOUS,
                                 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
                                 dev->name, (dev->flags & IFF_PROMISC),
                                 (old_flags & IFF_PROMISC),
                                 audit_get_loginuid(current),
- -                              current->uid, current->gid,
+ +                              uid, gid,
                                 audit_get_sessionid(current));
+ +              }
   
                 dev_change_rx_flags(dev, IFF_PROMISC);
         }
diff --combined net/core/scm.c

index f73c44b17dda4bfa9cdd4564e5416b632aea0fb2,ab242cc1accaae7673b76784aa5838a76d9b10c4..1e17949c12cace374b31b877bf49ae469c35f91c
--- 1/net/core/scm.c
--- 2/net/core/scm.c
+++ b/net/core/scm.c
@@@ -44,13 -44,11 +44,13 @@@
   
   static __inline__ int scm_check_creds(struct ucred *creds)
   {
+ +      const struct cred *cred = current_cred();
+ +
         if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
- -          ((creds->uid == current->uid || creds->uid == current->euid ||
- -            creds->uid == current->suid) || capable(CAP_SETUID)) &&
- -          ((creds->gid == current->gid || creds->gid == current->egid ||
- -            creds->gid == current->sgid) || capable(CAP_SETGID))) {
+ +          ((creds->uid == cred->uid   || creds->uid == cred->euid ||
+ +            creds->uid == cred->suid) || capable(CAP_SETUID)) &&
+ +          ((creds->gid == cred->gid   || creds->gid == cred->egid ||
+ +            creds->gid == cred->sgid) || capable(CAP_SETGID))) {
                return 0;
         }
         return -EPERM;
@@@ -77,6 -75,7 +77,7 @@@ static int scm_fp_copy(struct cmsghdr *
                 if (!fpl)
                         return -ENOMEM;
                 *fplp = fpl;
+               INIT_LIST_HEAD(&fpl->list);
                 fpl->count = 0;
         }
         fpp = &fpl->fp[fpl->count];
@@@ -108,9 -107,25 +109,25 @@@ void __scm_destroy(struct scm_cookie *s
   
         if (fpl) {
                 scm->fp = NULL;
-               for (i=fpl->count-1; i>=0; i--)
-                       fput(fpl->fp[i]);
-               kfree(fpl);
+               if (current->scm_work_list) {
+                       list_add_tail(&fpl->list, current->scm_work_list);
+               } else {
+                       LIST_HEAD(work_list);
+ 
+                       current->scm_work_list = &work_list;
+ 
+                       list_add(&fpl->list, &work_list);
+                       while (!list_empty(&work_list)) {
+                               fpl = list_first_entry(&work_list, struct scm_fp_list, list);
+ 
+                               list_del(&fpl->list);
+                               for (i=fpl->count-1; i>=0; i--)
+                                       fput(fpl->fp[i]);
+                               kfree(fpl);
+                       }
+ 
+                       current->scm_work_list = NULL;
+               }
         }
   }
   
@@@ -286,6 -301,7 +303,7 @@@ struct scm_fp_list *scm_fp_dup(struct s
   
         new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
         if (new_fpl) {
+               INIT_LIST_HEAD(&new_fpl->list);
                 for (i=fpl->count-1; i>=0; i--)
                         get_file(fpl->fp[i]);
                 memcpy(new_fpl, fpl, sizeof(*fpl));
diff --combined net/unix/af_unix.c

index 338c1aec708944262c793fbdb6898dcc6e5eaa27,eb90f77bb0e294b6358db3361740deaa0bec969e..2775acbca199c9a4482d1ffa8df7c4dc59083038
--- 1/net/unix/af_unix.c
--- 2/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@@ -467,7 -467,8 +467,7 @@@ static int unix_listen(struct socket *s
         sk->sk_state            = TCP_LISTEN;
         /* set credentials so connect can copy them */
         sk->sk_peercred.pid     = task_tgid_vnr(current);
- -      sk->sk_peercred.uid     = current->euid;
- -      sk->sk_peercred.gid     = current->egid;
+ +      current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
         err = 0;
   
   out_unlock:
@@@ -1125,7 -1126,8 +1125,7 @@@ restart
         newsk->sk_state         = TCP_ESTABLISHED;
         newsk->sk_type          = sk->sk_type;
         newsk->sk_peercred.pid  = task_tgid_vnr(current);
- -      newsk->sk_peercred.uid  = current->euid;
- -      newsk->sk_peercred.gid  = current->egid;
+ +      current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
         newu = unix_sk(newsk);
         newsk->sk_sleep         = &newu->peer_wait;
         otheru = unix_sk(other);
@@@ -1185,9 -1187,8 +1185,9 @@@ static int unix_socketpair(struct socke
         unix_peer(ska)=skb;
         unix_peer(skb)=ska;
         ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
- -      ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
- -      ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+ +      current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
+ +      ska->sk_peercred.uid = skb->sk_peercred.uid;
+ +      ska->sk_peercred.gid = skb->sk_peercred.gid;
   
         if (ska->sk_type != SOCK_DGRAM) {
                 ska->sk_state = TCP_ESTABLISHED;
@@@ -1301,14 -1302,23 +1301,23 @@@ static void unix_destruct_fds(struct sk
         sock_wfree(skb);
   }
   
- static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
   {
         int i;
+ 
+       /*
+        * Need to duplicate file references for the sake of garbage
+        * collection.  Otherwise a socket in the fps might become a
+        * candidate for GC while the skb is not yet queued.
+        */
+       UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+       if (!UNIXCB(skb).fp)
+               return -ENOMEM;
+ 
         for (i=scm->fp->count-1; i>=0; i--)
                 unix_inflight(scm->fp->fp[i]);
-       UNIXCB(skb).fp = scm->fp;
         skb->destructor = unix_destruct_fds;
-       scm->fp = NULL;
+       return 0;
   }
   
   /*
@@@ -1367,8 -1377,11 +1376,11 @@@ static int unix_dgram_sendmsg(struct ki
                 goto out;
   
         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-       if (siocb->scm->fp)
-               unix_attach_fds(siocb->scm, skb);
+       if (siocb->scm->fp) {
+               err = unix_attach_fds(siocb->scm, skb);
+               if (err)
+                       goto out_free;
+       }
         unix_get_secdata(siocb->scm, skb);
   
         skb_reset_transport_header(skb);
@@@ -1537,8 -1550,13 +1549,13 @@@ static int unix_stream_sendmsg(struct k
                 size = min_t(int, size, skb_tailroom(skb));
   
                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-               if (siocb->scm->fp)
-                       unix_attach_fds(siocb->scm, skb);
+               if (siocb->scm->fp) {
+                       err = unix_attach_fds(siocb->scm, skb);
+                       if (err) {
+                               kfree_skb(skb);
+                               goto out_err;
+                       }
+               }
   
                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
                         kfree_skb(skb);
author	James Morris <jmorris@namei.org>
	Fri, 14 Nov 2008 00:29:12 +0000 (11:29 +1100)
committer	James Morris <jmorris@namei.org>
	Fri, 14 Nov 2008 00:29:12 +0000 (11:29 +1100)
		1	2
Documentation/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
fs/autofs4/dev-ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ialloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fat/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fat/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namespace.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfsd/vfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ocfs2/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/net/scm.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/timer.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/workqueue.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/mempolicy.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/migrate.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/oom_kill.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/9p/client.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/scm.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/unix/af_unix.c	patch \|	diff1 \|	diff2 \|	blob \| history