that require a timer override, but don't have
HPET
- acpi.debug_layer= [HW,ACPI]
+ acpi_backlight= [HW,ACPI]
+ acpi_backlight=vendor
+ acpi_backlight=video
+ If set to vendor, prefer vendor specific driver
+ (e.g. thinkpad_acpi, sony_acpi, etc.) instead
+ of the ACPI video.ko driver.
+
+ acpi_display_output= [HW,ACPI]
+ acpi_display_output=vendor
+ acpi_display_output=video
+ See above.
+
+ acpi.debug_layer= [HW,ACPI,ACPI_DEBUG]
+ acpi.debug_level= [HW,ACPI,ACPI_DEBUG]
Format: <int>
- Each bit of the <int> indicates an ACPI debug layer,
- 1: enable, 0: disable. It is useful for boot time
- debugging. After system has booted up, it can be set
- via /sys/module/acpi/parameters/debug_layer.
- CONFIG_ACPI_DEBUG must be enabled for this to produce any output.
- Available bits (add the numbers together) to enable debug output
- for specific parts of the ACPI subsystem:
- 0x01 utilities 0x02 hardware 0x04 events 0x08 tables
- 0x10 namespace 0x20 parser 0x40 dispatcher
- 0x80 executer 0x100 resources 0x200 acpica debugger
- 0x400 os services 0x800 acpica disassembler.
- The number can be in decimal or prefixed with 0x in hex.
- Warning: Many of these options can produce a lot of
- output and make your system unusable. Be very careful.
-
- acpi.debug_level= [HW,ACPI]
- Format: <int>
- Each bit of the <int> indicates an ACPI debug level,
- which corresponds to the level in an ACPI_DEBUG_PRINT
- statement. After system has booted up, this mask
- can be set via /sys/module/acpi/parameters/debug_level.
-
- CONFIG_ACPI_DEBUG must be enabled for this to produce
- any output. The number can be in decimal or prefixed
- with 0x in hex. Some of these options produce so much
- output that the system is unusable.
-
- The following global components are defined by the
- ACPI CA:
- 0x01 error
- 0x02 warn
- 0x04 init
- 0x08 debug object
- 0x10 info
- 0x20 init names
- 0x40 parse
- 0x80 load
- 0x100 dispatch
- 0x200 execute
- 0x400 names
- 0x800 operation region
- 0x1000 bfield
- 0x2000 tables
- 0x4000 values
- 0x8000 objects
- 0x10000 resources
- 0x20000 user requests
- 0x40000 package
- The number can be in decimal or prefixed with 0x in hex.
- Warning: Many of these options can produce a lot of
- output and make your system unusable. Be very careful.
+ CONFIG_ACPI_DEBUG must be enabled to produce any ACPI
+ debug output. Bits in debug_layer correspond to a
+ _COMPONENT in an ACPI source file, e.g.,
+ #define _COMPONENT ACPI_PCI_COMPONENT
+ Bits in debug_level correspond to a level in
+ ACPI_DEBUG_PRINT statements, e.g.,
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, ...
+ See Documentation/acpi/debug.txt for more information
+ about debug layers and levels.
+
+ Enable AML "Debug" output, i.e., stores to the Debug
+ object while interpreting AML:
+ acpi.debug_layer=0xffffffff acpi.debug_level=0x2
+ Enable PCI/PCI interrupt routing info messages:
+ acpi.debug_layer=0x400000 acpi.debug_level=0x4
+ Enable all messages related to ACPI hardware:
+ acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
+
+ Some values produce so much output that the system is
+ unusable. The "log_buf_len" parameter may be useful
+ if you need to capture more output.
acpi.power_nocheck= [HW,ACPI]
Format: 1/0 enable/disable the check of power state.
Format:
<cpu number>,...,<cpu number>
or
- <cpu number>-<cpu number> (must be a positive range in ascending order)
+ <cpu number>-<cpu number>
+ (must be a positive range in ascending order)
or a mixture
<cpu number>,...,<cpu number>-<cpu number>
+
This option can be used to specify one or more CPUs
to isolate from the general SMP balancing and scheduling
- algorithms. The only way to move a process onto or off
- an "isolated" CPU is via the CPU affinity syscalls.
+ algorithms. You can move a process onto or off an
+ "isolated" CPU via the CPU affinity syscalls or cpuset.
<cpu number> begins at 0 and the maximum value is
"number of CPUs in system - 1".
instruction doesn't work correctly and not to
use it.
+ no_file_caps Tells the kernel not to honor file capabilities. The
+ only way then for a file to be executed with privilege
+ is to be setuid root or executed by root.
+
nohalt [IA-64] Tells the kernel not to use the power saving
function PAL_HALT_LIGHT when idle. This increases
power-consumption. On the positive side, it reduces
Valid arguments: on, off
Default: on
- noirqbalance [X86-32,SMP,KNL] Disable kernel irq balancing
-
noirqdebug [X86-32] Disables the code which attempts to detect and
disable unhandled interrupt sources.
*/
static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
{
- int err = -EINVAL;
+ int err;
- if (check_dev_ioctl_version(cmd, param)) {
+ err = check_dev_ioctl_version(cmd, param);
+ if (err) {
AUTOFS_WARN("invalid device control module version "
"supplied for cmd(0x%08x)", cmd);
goto out;
goto out;
}
- filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
+ filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+ current_cred());
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
goto out;
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
free = ext4_free_blocks_after_init(sb, group, gdp);
gdp->bg_free_blocks_count = cpu_to_le16(free);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
+ gdp);
}
spin_unlock(sb_bgl_lock(sbi, group));
spin_unlock(sb_bgl_lock(sbi, flex_group));
}
- inode->i_uid = current->fsuid;
+ inode->i_uid = current_fsuid();
if (test_opt(sb, GRPID))
inode->i_gid = dir->i_gid;
else if (dir->i_mode & S_ISGID) {
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- inode->i_gid = current->fsgid;
+ inode->i_gid = current_fsgid();
inode->i_mode = mode;
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/time.h>
- #include <linux/msdos_fs.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
+ #include "fat.h"
int fat_generic_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
u32 attr;
- if (inode->i_ino == MSDOS_ROOT_INO)
- attr = ATTR_DIR;
- else
- attr = fat_attr(inode);
+ mutex_lock(&inode->i_mutex);
+ attr = fat_make_attrs(inode);
+ mutex_unlock(&inode->i_mutex);
return put_user(attr, user_attr);
}
/* Merge in ATTR_VOLUME and ATTR_DIR */
attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
(is_dir ? ATTR_DIR : 0);
- oldattr = fat_attr(inode);
+ oldattr = fat_make_attrs(inode);
/* Equivalent to a chmod() */
ia.ia_valid = ATTR_MODE | ATTR_CTIME;
ia.ia_ctime = current_fs_time(inode->i_sb);
- if (is_dir) {
- ia.ia_mode = MSDOS_MKMODE(attr,
- S_IRWXUGO & ~sbi->options.fs_dmask)
- | S_IFDIR;
- } else {
- ia.ia_mode = MSDOS_MKMODE(attr,
- (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO))
- & ~sbi->options.fs_fmask)
- | S_IFREG;
+ if (is_dir)
+ ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
+ else {
+ ia.ia_mode = fat_make_mode(sbi, attr,
+ S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
}
/* The root directory has no attributes */
inode->i_flags &= S_IMMUTABLE;
}
- MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
+ fat_save_attrs(inode, attr);
mark_inode_dirty(inode);
up:
mnt_drop_write(filp->f_path.mnt);
/*
* Note, the basic check is already done by a caller of
- * (attr->ia_mode & ~MSDOS_VALID_MODE)
+ * (attr->ia_mode & ~FAT_VALID_MODE)
*/
if (S_ISREG(inode->i_mode))
/*
* Of the r and x bits, all (subject to umask) must be present. Of the
* w bits, either all (subject to umask) or none must be present.
+ *
+ * If fat_mode_can_hold_ro(inode) is false, can't change w bits.
*/
if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
return -EPERM;
- if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
- return -EPERM;
+ if (fat_mode_can_hold_ro(inode)) {
+ if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
+ return -EPERM;
+ } else {
+ if ((perm & S_IWUGO) != (S_IWUGO & ~mask))
+ return -EPERM;
+ }
*mode_ptr &= S_IFMT | perm;
{
mode_t allow_utime = sbi->options.allow_utime;
- if (current->fsuid != inode->i_uid) {
+ if (current_fsuid() != inode->i_uid) {
if (in_group_p(inode->i_gid))
allow_utime >>= 3;
if (allow_utime & MAY_WRITE)
}
#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
+ /* valid file mode bits */
+ #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
int fat_setattr(struct dentry *dentry, struct iattr *attr)
{
struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
struct inode *inode = dentry->d_inode;
- int error = 0;
unsigned int ia_valid;
+ int error;
/*
* Expand the file. Since inode_setattr() updates ->i_size
((attr->ia_valid & ATTR_GID) &&
(attr->ia_gid != sbi->options.fs_gid)) ||
((attr->ia_valid & ATTR_MODE) &&
- (attr->ia_mode & ~MSDOS_VALID_MODE)))
+ (attr->ia_mode & ~FAT_VALID_MODE)))
error = -EPERM;
if (error) {
attr->ia_valid &= ~ATTR_MODE;
}
- error = inode_setattr(inode, attr);
+ if (attr->ia_valid)
+ error = inode_setattr(inode, attr);
out:
return error;
}
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/seq_file.h>
- #include <linux/msdos_fs.h>
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/buffer_head.h>
#include <linux/uio.h>
#include <linux/writeback.h>
#include <linux/log2.h>
+ #include <linux/hash.h>
#include <asm/unaligned.h>
+ #include "fat.h"
#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
/* if user don't select VFAT, this is undefined. */
sector_t phys;
int err, offset;
- err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
+ err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
if (err)
return err;
if (phys) {
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
- err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
+ err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
if (err)
return err;
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
{
- return generic_block_bmap(mapping, block, fat_get_block);
+ sector_t blocknr;
+
+ /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
+ mutex_lock(&mapping->host->i_mutex);
+ blocknr = generic_block_bmap(mapping, block, fat_get_block);
+ mutex_unlock(&mapping->host->i_mutex);
+
+ return blocknr;
}
static const struct address_space_operations fat_aops = {
INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
}
- static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos)
+ static inline unsigned long fat_hash(loff_t i_pos)
{
- unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb;
- tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2);
- return tmp & FAT_HASH_MASK;
+ return hash_32(i_pos, FAT_HASH_BITS);
}
void fat_attach(struct inode *inode, loff_t i_pos)
{
- struct super_block *sb = inode->i_sb;
- struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+ struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
spin_lock(&sbi->inode_hash_lock);
MSDOS_I(inode)->i_pos = i_pos;
- hlist_add_head(&MSDOS_I(inode)->i_fat_hash,
- sbi->inode_hashtable + fat_hash(sb, i_pos));
+ hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
spin_unlock(&sbi->inode_hash_lock);
}
-
EXPORT_SYMBOL_GPL(fat_attach);
void fat_detach(struct inode *inode)
hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
spin_unlock(&sbi->inode_hash_lock);
}
-
EXPORT_SYMBOL_GPL(fat_detach);
struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos);
+ struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
struct hlist_node *_p;
struct msdos_inode_info *i;
struct inode *inode = NULL;
if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
inode->i_generation &= ~1;
- inode->i_mode = MSDOS_MKMODE(de->attr,
- S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
+ inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO);
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
inode->i_nlink = fat_subdirs(inode);
} else { /* not a directory */
inode->i_generation |= 1;
- inode->i_mode = MSDOS_MKMODE(de->attr,
- ((sbi->options.showexec && !is_exec(de->name + 8))
- ? S_IRUGO|S_IWUGO : S_IRWXUGO)
- & ~sbi->options.fs_fmask) | S_IFREG;
+ inode->i_mode = fat_make_mode(sbi, de->attr,
+ ((sbi->options.showexec && !is_exec(de->name + 8))
+ ? S_IRUGO|S_IWUGO : S_IRWXUGO));
MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
if (sbi->fat_bits == 32)
MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16);
if (sbi->options.sys_immutable)
inode->i_flags |= S_IMMUTABLE;
}
- MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED;
+ fat_save_attrs(inode, de->attr);
+
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
- inode->i_mtime.tv_sec =
- date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
- sbi->options.tz_utc);
- inode->i_mtime.tv_nsec = 0;
+
+ fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
if (sbi->options.isvfat) {
- int secs = de->ctime_cs / 100;
- int csecs = de->ctime_cs % 100;
- inode->i_ctime.tv_sec =
- date_dos2unix(le16_to_cpu(de->ctime),
- le16_to_cpu(de->cdate),
- sbi->options.tz_utc) + secs;
- inode->i_ctime.tv_nsec = csecs * 10000000;
- inode->i_atime.tv_sec =
- date_dos2unix(0, le16_to_cpu(de->adate),
- sbi->options.tz_utc);
- inode->i_atime.tv_nsec = 0;
+ fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
+ de->cdate, de->ctime_cs);
+ fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
} else
inode->i_ctime = inode->i_atime = inode->i_mtime;
static void fat_clear_inode(struct inode *inode)
{
- struct super_block *sb = inode->i_sb;
- struct msdos_sb_info *sbi = MSDOS_SB(sb);
-
- spin_lock(&sbi->inode_hash_lock);
fat_cache_inval_inode(inode);
- hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
- spin_unlock(&sbi->inode_hash_lock);
+ fat_detach(inode);
}
static void fat_write_super(struct super_block *sb)
return 0;
}
+ static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
+ struct inode *inode)
+ {
+ loff_t i_pos;
+ #if BITS_PER_LONG == 32
+ spin_lock(&sbi->inode_hash_lock);
+ #endif
+ i_pos = MSDOS_I(inode)->i_pos;
+ #if BITS_PER_LONG == 32
+ spin_unlock(&sbi->inode_hash_lock);
+ #endif
+ return i_pos;
+ }
+
static int fat_write_inode(struct inode *inode, int wait)
{
struct super_block *sb = inode->i_sb;
loff_t i_pos;
int err;
+ if (inode->i_ino == MSDOS_ROOT_INO)
+ return 0;
+
retry:
- i_pos = MSDOS_I(inode)->i_pos;
- if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
+ i_pos = fat_i_pos_read(sbi, inode);
+ if (!i_pos)
return 0;
bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
raw_entry->size = 0;
else
raw_entry->size = cpu_to_le32(inode->i_size);
- raw_entry->attr = fat_attr(inode);
+ raw_entry->attr = fat_make_attrs(inode);
raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
- fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
- &raw_entry->date, sbi->options.tz_utc);
+ fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
+ &raw_entry->date, NULL);
if (sbi->options.isvfat) {
__le16 atime;
- fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
- &raw_entry->cdate, sbi->options.tz_utc);
- fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
- &raw_entry->adate, sbi->options.tz_utc);
- raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
- inode->i_ctime.tv_nsec / 10000000;
+ fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
+ &raw_entry->cdate, &raw_entry->ctime_cs);
+ fat_time_unix2fat(sbi, &inode->i_atime, &atime,
+ &raw_entry->adate, NULL);
}
spin_unlock(&sbi->inode_hash_lock);
mark_buffer_dirty(bh);
seq_puts(m, ",uni_xlate");
if (!opts->numtail)
seq_puts(m, ",nonumtail");
+ if (opts->rodir)
+ seq_puts(m, ",rodir");
}
- if (sbi->options.flush)
+ if (opts->flush)
seq_puts(m, ",flush");
if (opts->tz_utc)
seq_puts(m, ",tz=UTC");
Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
- Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
+ Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err,
};
static const match_table_t fat_tokens = {
{Opt_nonumtail_yes, "nonumtail=yes"},
{Opt_nonumtail_yes, "nonumtail=true"},
{Opt_nonumtail_yes, "nonumtail"},
+ {Opt_rodir, "rodir"},
{Opt_err, NULL}
};
opts->isvfat = is_vfat;
- opts->fs_uid = current->uid;
- opts->fs_gid = current->gid;
+ opts->fs_uid = current_uid();
+ opts->fs_gid = current_gid();
opts->fs_fmask = opts->fs_dmask = current->fs->umask;
opts->allow_utime = -1;
opts->codepage = fat_default_codepage;
opts->iocharset = fat_default_iocharset;
- if (is_vfat)
+ if (is_vfat) {
opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95;
- else
+ opts->rodir = 0;
+ } else {
opts->shortname = 0;
+ opts->rodir = 1;
+ }
opts->name_check = 'n';
opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
opts->utf8 = opts->unicode_xlate = 0;
case Opt_nonumtail_yes: /* empty or 1 or yes or true */
opts->numtail = 0; /* negated option */
break;
+ case Opt_rodir:
+ opts->rodir = 1;
+ break;
/* obsolete mount options */
case Opt_obsolate:
inode->i_gid = sbi->options.fs_gid;
inode->i_version++;
inode->i_generation = 0;
- inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
+ inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
if (sbi->fat_bits == 32) {
MSDOS_I(inode)->i_logstart = 0;
MSDOS_I(inode)->mmu_private = inode->i_size;
- MSDOS_I(inode)->i_attrs = ATTR_NONE;
+ fat_save_attrs(inode, ATTR_DIR);
inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
inode->i_nlink = fat_subdirs(inode)+2;
if (S_ISLNK(path->dentry->d_inode->i_mode))
return -EPERM;
if (path->dentry->d_inode->i_mode & S_ISVTX) {
- if (current->uid != path->dentry->d_inode->i_uid)
+ if (current_uid() != path->dentry->d_inode->i_uid)
return -EPERM;
}
if (inode_permission(path->dentry->d_inode, MAY_WRITE))
while (!list_empty(&graveyard)) {
m = list_first_entry(&graveyard, struct vfsmount,
mnt_expire);
- touch_mnt_namespace(mnt->mnt_ns);
- umount_tree(mnt, 1, umounts);
+ touch_mnt_namespace(m->mnt_ns);
+ umount_tree(m, 1, umounts);
}
}
}
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
int access, struct file **filp)
{
+ const struct cred *cred = current_cred();
struct dentry *dentry;
struct inode *inode;
int flags = O_RDONLY|O_LARGEFILE;
DQUOT_INIT(inode);
}
*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
- flags);
+ flags, cred);
if (IS_ERR(*filp))
host_err = PTR_ERR(*filp);
out_nfserr:
* send along the gid on create when it tries to implement
* setgid directories via NFS:
*/
- if (current->fsuid != 0)
+ if (current_fsuid() != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
return -ENOMEM;
offset = *offsetp;
- cdp->err = nfserr_eof; /* will be cleared on successful read */
while (1) {
unsigned int reclen;
+ cdp->err = nfserr_eof; /* will be cleared on successful read */
buf.used = 0;
buf.full = 0;
de = (struct buffered_dirent *)((char *)de + reclen);
}
offset = vfs_llseek(file, 0, SEEK_CUR);
- cdp->err = nfserr_eof;
- if (!buf.full)
- break;
}
done:
IS_APPEND(inode)? " append" : "",
__mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
dprintk(" owner %d/%d user %d/%d\n",
- inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
+ inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
#endif
/* Normally we reject any write/sattr etc access on a read-only file
* with NFSv3.
*/
if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
- inode->i_uid == current->fsuid)
+ inode->i_uid == current_fsuid())
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
}
inode = new_inode(dir->i_sb);
- if (IS_ERR(inode)) {
- status = PTR_ERR(inode);
+ if (!inode) {
+ status = -ENOMEM;
mlog(ML_ERROR, "new_inode failed!\n");
goto leave;
}
fe->i_blkno = cpu_to_le64(fe_blkno);
fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
- fe->i_uid = cpu_to_le32(current->fsuid);
+ fe->i_uid = cpu_to_le32(current_fsuid());
if (dir->i_mode & S_ISGID) {
fe->i_gid = cpu_to_le32(dir->i_gid);
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
- fe->i_gid = cpu_to_le32(current->fsgid);
+ fe->i_gid = cpu_to_le32(current_fsgid());
fe->i_mode = cpu_to_le16(mode);
if (S_ISCHR(mode) || S_ISBLK(mode))
fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
brelse(*new_fe_bh);
*new_fe_bh = NULL;
}
- if (inode)
+ if (inode) {
+ clear_nlink(inode);
iput(inode);
+ }
}
mlog_exit(status);
extern void init_idle_bootup_task(struct task_struct *idle);
extern int runqueue_is_locked(void);
+ extern void task_rq_unlock_wait(struct task_struct *p);
extern cpumask_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
*/
struct rlimit rlim[RLIM_NLIMITS];
- /* keep the process-shared keyrings here so that they do the right
- * thing in threads created with CLONE_THREAD */
-#ifdef CONFIG_KEYS
- struct key *session_keyring; /* keyring inherited over fork */
- struct key *process_keyring; /* keyring private to this process */
-#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
struct pacct_struct pacct; /* per-process accounting information */
#endif
extern struct user_struct root_user;
#define INIT_USER (&root_user)
+
struct backing_dev_info;
struct reclaim_state;
#endif /* !CONFIG_SMP */
struct io_context; /* See blkdev.h */
-#define NGROUPS_SMALL 32
-#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
-struct group_info {
- int ngroups;
- atomic_t usage;
- gid_t small_block[NGROUPS_SMALL];
- int nblocks;
- gid_t *blocks[0];
-};
-/*
- * get_group_info() must be called with the owning task locked (via task_lock())
- * when task != current. The reason being that the vast majority of callers are
- * looking at current->group_info, which can not be changed except by the
- * current task. Changing current->group_info requires the task lock, too.
- */
-#define get_group_info(group_info) do { \
- atomic_inc(&(group_info)->usage); \
-} while (0)
-
-#define put_group_info(group_info) do { \
- if (atomic_dec_and_test(&(group_info)->usage)) \
- groups_free(group_info); \
-} while (0)
-
-extern struct group_info *groups_alloc(int gidsetsize);
-extern void groups_free(struct group_info *group_info);
-extern int set_current_groups(struct group_info *group_info);
-extern int groups_search(struct group_info *group_info, gid_t grp);
-/* access the groups "array" with this macro */
-#define GROUP_AT(gi, i) \
- ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
extern void prefetch_stack(struct task_struct *t);
struct list_head cpu_timers[3];
/* process credentials */
- uid_t uid,euid,suid,fsuid;
- gid_t gid,egid,sgid,fsgid;
- struct group_info *group_info;
- kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset;
- struct user_struct *user;
- unsigned securebits;
-#ifdef CONFIG_KEYS
- unsigned char jit_keyring; /* default keyring to attach requested keys to */
- struct key *request_key_auth; /* assumed request_key authority */
- struct key *thread_keyring; /* keyring private to this thread */
-#endif
+ const struct cred *real_cred; /* objective and real subjective task
+ * credentials (COW) */
+ const struct cred *cred; /* effective (overridable) subjective task
+ * credentials (COW) */
+ struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */
+
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
it with task_lock())
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
-#ifdef CONFIG_SECURITY
- void *security;
-#endif
struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
uid_t loginuid;
*/
unsigned long timer_slack_ns;
unsigned long default_timer_slack_ns;
+
+ struct list_head *scm_work_list;
};
/*
return u;
}
extern void free_uid(struct user_struct *);
-extern void switch_uid(struct user_struct *);
extern void release_uids(struct user_namespace *ns);
#include <asm/current.h>
extern void sched_fork(struct task_struct *p, int clone_flags);
extern void sched_dead(struct task_struct *p);
-extern int in_group_p(gid_t);
-extern int in_egroup_p(gid_t);
-
extern void proc_caches_init(void);
extern void flush_signals(struct task_struct *);
extern void ignore_signals(struct task_struct *);
#define for_each_process(p) \
for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+extern bool is_single_threaded(struct task_struct *);
+
/*
* Careful: do_each_thread/while_each_thread is a double loop so
* 'break' will not work as expected - use goto instead.
struct scm_fp_list
{
- int count;
- struct file *fp[SCM_MAX_FD];
+ struct list_head list;
+ int count;
+ struct file *fp[SCM_MAX_FD];
};
struct scm_cookie
struct scm_cookie *scm)
{
struct task_struct *p = current;
- scm->creds.uid = p->uid;
- scm->creds.gid = p->gid;
+ scm->creds.uid = current_uid();
+ scm->creds.gid = current_gid();
scm->creds.pid = task_tgid_vnr(p);
scm->fp = NULL;
scm->seq = 0;
if (inode) {
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
{
struct task_struct *tsk;
+ const struct cred *cred = current_cred(), *tcred;
int ret;
if (pid) {
rcu_read_unlock();
return -ESRCH;
}
- get_task_struct(tsk);
- rcu_read_unlock();
- if ((current->euid) && (current->euid != tsk->uid)
- && (current->euid != tsk->suid)) {
- put_task_struct(tsk);
+ tcred = __task_cred(tsk);
+ if (cred->euid &&
+ cred->euid != tcred->uid &&
+ cred->euid != tcred->suid) {
+ rcu_read_unlock();
return -EACCES;
}
+ get_task_struct(tsk);
+ rcu_read_unlock();
} else {
tsk = current;
get_task_struct(tsk);
list_del(&cgrp->sibling);
spin_lock(&cgrp->dentry->d_lock);
d = dget(cgrp->dentry);
- cgrp->dentry = NULL;
spin_unlock(&d->d_lock);
cgroup_d_remove_dir(d);
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/tracehook.h>
+#include <linux/init_task.h>
#include <trace/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
+#include "cred-internals.h"
static void exit_mm(struct task_struct * tsk);
if (sig) {
flush_sigqueue(&sig->shared_pending);
taskstats_tgid_free(sig);
+ /*
+ * Make sure ->signal can't go away under rq->lock,
+ * see account_group_exec_runtime().
+ */
+ task_rq_unlock_wait(tsk);
__cleanup_signal(sig);
}
}
int zap_leader;
repeat:
tracehook_prepare_release_task(p);
- atomic_dec(&p->user->processes);
+ /* don't need to get the RCU readlock here - the process is dead and
+ * can't be modifying its own credentials */
+ atomic_dec(&__task_cred(p)->user->processes);
+
proc_flush_task(p);
write_lock_irq(&tasklist_lock);
tracehook_finish_release_task(p);
/* cpus_allowed? */
/* rt_priority? */
/* signals? */
- security_task_reparent_to_init(current);
memcpy(current->signal->rlim, init_task.signal->rlim,
sizeof(current->signal->rlim));
- atomic_inc(&(INIT_USER->__count));
+
+ atomic_inc(&init_cred.usage);
+ commit_creds(&init_cred);
write_unlock_irq(&tasklist_lock);
- switch_uid(INIT_USER);
}
void __set_special_pids(struct pid *pid)
check_stack_usage();
exit_thread();
cgroup_exit(tsk, 1);
- exit_keys(tsk);
if (group_dead && tsk->signal->leader)
disassociate_ctty(1);
unsigned long state;
int retval, status, traced;
pid_t pid = task_pid_vnr(p);
+ uid_t uid = __task_cred(p)->uid;
if (!likely(options & WEXITED))
return 0;
if (unlikely(options & WNOWAIT)) {
- uid_t uid = p->uid;
int exit_code = p->exit_code;
int why, status;
if (!retval && infop)
retval = put_user(pid, &infop->si_pid);
if (!retval && infop)
- retval = put_user(p->uid, &infop->si_uid);
+ retval = put_user(uid, &infop->si_uid);
if (!retval)
retval = pid;
if (!unlikely(options & WNOWAIT))
p->exit_code = 0;
- uid = p->uid;
+ /* don't need the RCU readlock here as we're holding a spinlock */
+ uid = __task_cred(p)->uid;
unlock_sig:
spin_unlock_irq(&p->sighand->siglock);
if (!exit_code)
}
if (!unlikely(options & WNOWAIT))
p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
+ uid = __task_cred(p)->uid;
spin_unlock_irq(&p->sighand->siglock);
pid = task_pid_vnr(p);
- uid = p->uid;
get_task_struct(p);
read_unlock(&tasklist_lock);
struct task_group *tg;
#ifdef CONFIG_USER_SCHED
- tg = p->user->tg;
+ rcu_read_lock();
+ tg = __task_cred(p)->user->tg;
+ rcu_read_unlock();
#elif defined(CONFIG_CGROUP_SCHED)
tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
struct task_group, css);
* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
- struct sched_entity *curr, *next;
+ struct sched_entity *curr, *next, *last;
- unsigned long nr_spread_over;
+ unsigned int nr_spread_over;
#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
}
}
+ void task_rq_unlock_wait(struct task_struct *p)
+ {
+ struct rq *rq = task_rq(p);
+
+ smp_mb(); /* spin-unlock-wait is not a full memory barrier */
+ spin_unlock_wait(&rq->lock);
+ }
+
static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
if (rq->nr_running)
rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+ else
+ rq->avg_load_per_task = 0;
return rq->avg_load_per_task;
}
/*
* Buddy candidates are cache hot:
*/
- if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
+ if (sched_feat(CACHE_HOT_BUDDY) &&
+ (&p->se == cfs_rq_of(&p->se)->next ||
+ &p->se == cfs_rq_of(&p->se)->last))
return 1;
if (p->sched_class != &fair_sched_class)
set_load_weight(p);
}
+/*
+ * check the target process has a UID that matches the current process's
+ */
+static bool check_same_owner(struct task_struct *p)
+{
+ const struct cred *cred = current_cred(), *pcred;
+ bool match;
+
+ rcu_read_lock();
+ pcred = __task_cred(p);
+ match = (cred->euid == pcred->euid ||
+ cred->euid == pcred->uid);
+ rcu_read_unlock();
+ return match;
+}
+
static int __sched_setscheduler(struct task_struct *p, int policy,
struct sched_param *param, bool user)
{
return -EPERM;
/* can't change other user's priorities */
- if ((current->euid != p->euid) &&
- (current->euid != p->uid))
+ if (!check_same_owner(p))
return -EPERM;
}
read_unlock(&tasklist_lock);
retval = -EPERM;
- if ((current->euid != p->euid) && (current->euid != p->uid) &&
- !capable(CAP_SYS_NICE))
+ if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
goto out_unlock;
retval = security_task_setscheduler(p, 0, NULL);
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
+ spin_lock_irqsave(&rq->lock, flags);
+
__sched_fork(idle);
idle->se.exec_start = sched_clock();
idle->cpus_allowed = cpumask_of_cpu(cpu);
__set_task_cpu(idle, cpu);
- spin_lock_irqsave(&rq->lock, flags);
rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
idle->oncpu = 1;
struct sched_domain *tmp;
/* Remove the sched domains which do not contribute to scheduling. */
- for (tmp = sd; tmp; tmp = tmp->parent) {
+ for (tmp = sd; tmp; ) {
struct sched_domain *parent = tmp->parent;
if (!parent)
break;
+
if (sd_parent_degenerate(tmp, parent)) {
tmp->parent = parent->parent;
if (parent->parent)
parent->parent->child = tmp;
- }
+ } else
+ tmp = tmp->parent;
}
if (sd && sd_degenerate(sd)) {
error:
free_sched_groups(cpu_map, tmpmask);
SCHED_CPUMASK_FREE((void *)allmasks);
+ kfree(rd);
return -ENOMEM;
#endif
}
tbase_get_deferrable(timer->base));
}
- /**
- * __round_jiffies - function to round jiffies to a full second
- * @j: the time in (absolute) jiffies that should be rounded
- * @cpu: the processor number on which the timeout will happen
- *
- * __round_jiffies() rounds an absolute time in the future (in jiffies)
- * up or down to (approximately) full seconds. This is useful for timers
- * for which the exact time they fire does not matter too much, as long as
- * they fire approximately every X seconds.
- *
- * By rounding these timers to whole seconds, all such timers will fire
- * at the same time, rather than at various times spread out. The goal
- * of this is to have the CPU wake up less, which saves power.
- *
- * The exact rounding is skewed for each processor to avoid all
- * processors firing at the exact same time, which could lead
- * to lock contention or spurious cache line bouncing.
- *
- * The return value is the rounded version of the @j parameter.
- */
- unsigned long __round_jiffies(unsigned long j, int cpu)
+ static unsigned long round_jiffies_common(unsigned long j, int cpu,
+ bool force_up)
{
int rem;
unsigned long original = j;
* due to delays of the timer irq, long irq off times etc etc) then
* we should round down to the whole second, not up. Use 1/4th second
* as cutoff for this rounding as an extreme upper bound for this.
+ * But never round down if @force_up is set.
*/
- if (rem < HZ/4) /* round down */
+ if (rem < HZ/4 && !force_up) /* round down */
j = j - rem;
else /* round up */
j = j - rem + HZ;
return original;
return j;
}
+
+ /**
+ * __round_jiffies - function to round jiffies to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * __round_jiffies() rounds an absolute time in the future (in jiffies)
+ * up or down to (approximately) full seconds. This is useful for timers
+ * for which the exact time they fire does not matter too much, as long as
+ * they fire approximately every X seconds.
+ *
+ * By rounding these timers to whole seconds, all such timers will fire
+ * at the same time, rather than at various times spread out. The goal
+ * of this is to have the CPU wake up less, which saves power.
+ *
+ * The exact rounding is skewed for each processor to avoid all
+ * processors firing at the exact same time, which could lead
+ * to lock contention or spurious cache line bouncing.
+ *
+ * The return value is the rounded version of the @j parameter.
+ */
+ unsigned long __round_jiffies(unsigned long j, int cpu)
+ {
+ return round_jiffies_common(j, cpu, false);
+ }
EXPORT_SYMBOL_GPL(__round_jiffies);
/**
*/
unsigned long __round_jiffies_relative(unsigned long j, int cpu)
{
- /*
- * In theory the following code can skip a jiffy in case jiffies
- * increments right between the addition and the later subtraction.
- * However since the entire point of this function is to use approximate
- * timeouts, it's entirely ok to not handle that.
- */
- return __round_jiffies(j + jiffies, cpu) - jiffies;
+ unsigned long j0 = jiffies;
+
+ /* Use j0 because jiffies might change while we run */
+ return round_jiffies_common(j + j0, cpu, false) - j0;
}
EXPORT_SYMBOL_GPL(__round_jiffies_relative);
*/
unsigned long round_jiffies(unsigned long j)
{
- return __round_jiffies(j, raw_smp_processor_id());
+ return round_jiffies_common(j, raw_smp_processor_id(), false);
}
EXPORT_SYMBOL_GPL(round_jiffies);
}
EXPORT_SYMBOL_GPL(round_jiffies_relative);
+ /**
+ * __round_jiffies_up - function to round jiffies up to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * This is the same as __round_jiffies() except that it will never
+ * round down. This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+ unsigned long __round_jiffies_up(unsigned long j, int cpu)
+ {
+ return round_jiffies_common(j, cpu, true);
+ }
+ EXPORT_SYMBOL_GPL(__round_jiffies_up);
+
+ /**
+ * __round_jiffies_up_relative - function to round jiffies up to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * This is the same as __round_jiffies_relative() except that it will never
+ * round down. This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+ unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
+ {
+ unsigned long j0 = jiffies;
+
+ /* Use j0 because jiffies might change while we run */
+ return round_jiffies_common(j + j0, cpu, true) - j0;
+ }
+ EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
+
+ /**
+ * round_jiffies_up - function to round jiffies up to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * This is the same as round_jiffies() except that it will never
+ * round down. This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+ unsigned long round_jiffies_up(unsigned long j)
+ {
+ return round_jiffies_common(j, raw_smp_processor_id(), true);
+ }
+ EXPORT_SYMBOL_GPL(round_jiffies_up);
+
+ /**
+ * round_jiffies_up_relative - function to round jiffies up to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ *
+ * This is the same as round_jiffies_relative() except that it will never
+ * round down. This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+ unsigned long round_jiffies_up_relative(unsigned long j)
+ {
+ return __round_jiffies_up_relative(j, raw_smp_processor_id());
+ }
+ EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
+
static inline void set_running_timer(struct tvec_base *base,
struct timer_list *timer)
asmlinkage long sys_getuid(void)
{
/* Only we change this so SMP safe */
- return current->uid;
+ return current_uid();
}
asmlinkage long sys_geteuid(void)
{
/* Only we change this so SMP safe */
- return current->euid;
+ return current_euid();
}
asmlinkage long sys_getgid(void)
{
/* Only we change this so SMP safe */
- return current->gid;
+ return current_gid();
}
asmlinkage long sys_getegid(void)
{
/* Only we change this so SMP safe */
- return current->egid;
+ return current_egid();
}
#endif
memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
data->pid = tsk->pid;
- data->uid = tsk->uid;
+ data->uid = task_uid(tsk);
data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
data->policy = tsk->policy;
data->rt_priority = tsk->rt_priority;
return TRACE_TYPE_HANDLED;
SEQ_PUT_FIELD_RET(s, entry->pid);
- SEQ_PUT_FIELD_RET(s, iter->cpu);
+ SEQ_PUT_FIELD_RET(s, entry->cpu);
SEQ_PUT_FIELD_RET(s, iter->ts);
switch (entry->type) {
{
unsigned long val;
char buf[64];
- int ret;
+ int ret, cpu;
struct trace_array *tr = filp->private_data;
if (cnt >= sizeof(buf))
goto out;
}
+ /* disable all cpu buffers */
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_inc(&max_tr.data[cpu]->disabled);
+ }
+
if (val != global_trace.entries) {
ret = ring_buffer_resize(global_trace.buffer, val);
if (ret < 0) {
if (tracing_disabled)
cnt = -ENOMEM;
out:
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_dec(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_dec(&max_tr.data[cpu]->disabled);
+ }
+
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
static cpumask_t cpu_populated_map __read_mostly;
/* If it's single threaded, it isn't in the list of workqueues. */
-static inline int is_single_threaded(struct workqueue_struct *wq)
+static inline int is_wq_single_threaded(struct workqueue_struct *wq)
{
return wq->singlethread;
}
static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq)
{
- return is_single_threaded(wq)
+ return is_wq_single_threaded(wq)
? &cpu_singlethread_map : &cpu_populated_map;
}
static
struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
{
- if (unlikely(is_single_threaded(wq)))
+ if (unlikely(is_wq_single_threaded(wq)))
cpu = singlethread_cpu;
return per_cpu_ptr(wq->cpu_wq, cpu);
}
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
struct workqueue_struct *wq = cwq->wq;
- const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
+ const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
struct task_struct *p;
p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);
return ret;
}
+ #ifdef CONFIG_SMP
+ struct work_for_cpu {
+ struct work_struct work;
+ long (*fn)(void *);
+ void *arg;
+ long ret;
+ };
+
+ static void do_work_for_cpu(struct work_struct *w)
+ {
+ struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
+
+ wfc->ret = wfc->fn(wfc->arg);
+ }
+
+ /**
+ * work_on_cpu - run a function in user context on a particular cpu
+ * @cpu: the cpu to run on
+ * @fn: the function to run
+ * @arg: the function arg
+ *
+ * This will return -EINVAL in the cpu is not online, or the return value
+ * of @fn otherwise.
+ */
+ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+ {
+ struct work_for_cpu wfc;
+
+ INIT_WORK(&wfc.work, do_work_for_cpu);
+ wfc.fn = fn;
+ wfc.arg = arg;
+ get_online_cpus();
+ if (unlikely(!cpu_online(cpu)))
+ wfc.ret = -EINVAL;
+ else {
+ schedule_work_on(cpu, &wfc.work);
+ flush_work(&wfc.work);
+ }
+ put_online_cpus();
+
+ return wfc.ret;
+ }
+ EXPORT_SYMBOL_GPL(work_on_cpu);
+ #endif /* CONFIG_SMP */
+
void __init init_workqueues(void)
{
cpu_populated_map = cpu_online_map;
int err;
struct vm_area_struct *first, *vma, *prev;
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-
- err = migrate_prep();
- if (err)
- return ERR_PTR(err);
- }
first = find_vma(mm, start);
if (!first)
const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
{
int busy = 0;
- int err = 0;
+ int err;
nodemask_t tmp;
+ err = migrate_prep();
+ if (err)
+ return err;
+
down_read(&mm->mmap_sem);
err = migrate_vmas(mm, from_nodes, to_nodes, flags);
start, start + len, mode, mode_flags,
nmask ? nodes_addr(*nmask)[0] : -1);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+
+ err = migrate_prep();
+ if (err)
+ return err;
+ }
down_write(&mm->mmap_sem);
vma = check_range(mm, start, end, nmask,
flags | MPOL_MF_INVERT, &pagelist);
const unsigned long __user *old_nodes,
const unsigned long __user *new_nodes)
{
+ const struct cred *cred = current_cred(), *tcred;
struct mm_struct *mm;
struct task_struct *task;
nodemask_t old;
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- if ((current->euid != task->suid) && (current->euid != task->uid) &&
- (current->uid != task->suid) && (current->uid != task->uid) &&
+ rcu_read_lock();
+ tcred = __task_cred(task);
+ if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+ cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
task_nodes = cpuset_mems_allowed(task);
/* Is the user allowed to access the target nodes? */
struct page_to_node *pp;
LIST_HEAD(pagelist);
+ migrate_prep();
down_read(&mm->mmap_sem);
/*
* Build a list of pages to migrate
*/
- migrate_prep();
for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
struct vm_area_struct *vma;
struct page *page;
const int __user *nodes,
int __user *status, int flags)
{
+ const struct cred *cred = current_cred(), *tcred;
struct task_struct *task;
struct mm_struct *mm;
int err;
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- if ((current->euid != task->suid) && (current->euid != task->uid) &&
- (current->uid != task->suid) && (current->uid != task->uid) &&
+ rcu_read_lock();
+ tcred = __task_cred(task);
+ if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+ cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
err = security_task_movememory(task);
if (err)
* badness - calculate a numeric value for how bad this task has been
* @p: task struct of which task we should calculate
* @uptime: current uptime in seconds
- * @mem: target memory controller
*
* The formula used is relatively simple and documented inline in the
* function. The main rationale is that we want to select a good task
* Superuser processes are usually more important, so we make it
* less likely that we kill those.
*/
- if (has_capability(p, CAP_SYS_ADMIN) ||
- has_capability(p, CAP_SYS_RESOURCE))
+ if (has_capability_noaudit(p, CAP_SYS_ADMIN) ||
+ has_capability_noaudit(p, CAP_SYS_RESOURCE))
points /= 4;
/*
* tend to only have this flag set on applications they think
* of as important.
*/
- if (has_capability(p, CAP_SYS_RAWIO))
+ if (has_capability_noaudit(p, CAP_SYS_RAWIO))
points /= 4;
/*
continue;
if (mem && !task_in_mem_cgroup(p, mem))
continue;
+ if (!thread_group_leader(p))
+ continue;
task_lock(p);
printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
- p->pid, p->uid, p->tgid, p->mm->total_vm,
- get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj,
- p->comm);
+ p->pid, __task_cred(p)->uid, p->tgid,
+ p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p),
+ p->oomkilladj, p->comm);
task_unlock(p);
} while_each_thread(g, p);
}
printk(KERN_ERR "Couldn't grow tag array\n");
kfree(req->tc);
kfree(req->rc);
+ kfree(req->wq);
+ req->tc = req->rc = NULL;
+ req->wq = NULL;
return ERR_PTR(-ENOMEM);
}
req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
r->status = REQ_STATUS_IDLE;
if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
p9_idpool_put(tag, c->tagpool);
-
- /* if this was a flush request we have to free response fcall */
- if (r->rc->id == P9_RFLUSH) {
- kfree(r->tc);
- kfree(r->rc);
- }
}
/**
static struct p9_fid *p9_fid_create(struct p9_client *clnt)
{
- int err;
+ int ret;
struct p9_fid *fid;
+ unsigned long flags;
P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt);
fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
if (!fid)
return ERR_PTR(-ENOMEM);
- fid->fid = p9_idpool_get(clnt->fidpool);
+ ret = p9_idpool_get(clnt->fidpool);
if (fid->fid < 0) {
- err = -ENOSPC;
+ ret = -ENOSPC;
goto error;
}
+ fid->fid = ret;
memset(&fid->qid, 0, sizeof(struct p9_qid));
fid->mode = -1;
fid->rdir_fpos = 0;
- fid->uid = current->fsuid;
+ fid->uid = current_fsuid();
fid->clnt = clnt;
fid->aux = NULL;
- spin_lock(&clnt->lock);
+ spin_lock_irqsave(&clnt->lock, flags);
list_add(&fid->flist, &clnt->fidlist);
- spin_unlock(&clnt->lock);
+ spin_unlock_irqrestore(&clnt->lock, flags);
return fid;
error:
kfree(fid);
- return ERR_PTR(err);
+ return ERR_PTR(ret);
}
static void p9_fid_destroy(struct p9_fid *fid)
{
struct p9_client *clnt;
+ unsigned long flags;
P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid);
clnt = fid->clnt;
p9_idpool_put(fid->fid, clnt->fidpool);
- spin_lock(&clnt->lock);
+ spin_lock_irqsave(&clnt->lock, flags);
list_del(&fid->flist);
- spin_unlock(&clnt->lock);
+ spin_unlock_irqrestore(&clnt->lock, flags);
kfree(fid);
}
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
- qid.type, qid.path, qid.version);
+ qid.type,
+ (unsigned long long)qid.path,
+ qid.version);
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
- qid.type, qid.path, qid.version);
+ qid.type,
+ (unsigned long long)qid.path,
+ qid.version);
memmove(&afid->qid, &qid, sizeof(struct p9_qid));
p9_free_req(clnt, req);
for (count = 0; count < nwqids; count++)
P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n",
- count, wqids[count].type, wqids[count].path,
+ count, wqids[count].type,
+ (unsigned long long)wqids[count].path,
wqids[count].version);
if (nwname)
}
P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
- qid.type, qid.path, qid.version, iounit);
+ qid.type,
+ (unsigned long long)qid.path,
+ qid.version, iounit);
fid->mode = mode;
fid->iounit = iounit;
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
- qid.type, qid.path, qid.version, iounit);
+ qid.type,
+ (unsigned long long)qid.path,
+ qid.version, iounit);
fid->mode = mode;
fid->iounit = iounit;
"<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
"<<< uid=%d gid=%d n_muid=%d\n",
ret->size, ret->type, ret->dev, ret->qid.type,
- ret->qid.path, ret->qid.version, ret->mode,
- ret->atime, ret->mtime, ret->length, ret->name,
- ret->uid, ret->gid, ret->muid, ret->extension,
+ (unsigned long long)ret->qid.path, ret->qid.version, ret->mode,
+ ret->atime, ret->mtime, (unsigned long long)ret->length,
+ ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
ret->n_uid, ret->n_gid, ret->n_muid);
free_and_error:
" name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
" uid=%d gid=%d n_muid=%d\n",
wst->size, wst->type, wst->dev, wst->qid.type,
- wst->qid.path, wst->qid.version, wst->mode,
- wst->atime, wst->mtime, wst->length, wst->name,
- wst->uid, wst->gid, wst->muid, wst->extension,
+ (unsigned long long)wst->qid.path, wst->qid.version, wst->mode,
+ wst->atime, wst->mtime, (unsigned long long)wst->length,
+ wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
wst->n_uid, wst->n_gid, wst->n_muid);
err = 0;
clnt = fid->clnt;
int ret = NET_RX_DROP;
__be16 type;
+ if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
+ return NET_RX_SUCCESS;
+
/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
return NET_RX_DROP;
static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
+ uid_t uid;
+ gid_t gid;
ASSERT_RTNL();
printk(KERN_INFO "device %s %s promiscuous mode\n",
dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
"left");
- if (audit_enabled)
+ if (audit_enabled) {
+ current_uid_gid(&uid, &gid);
audit_log(current->audit_context, GFP_ATOMIC,
AUDIT_ANOM_PROMISCUOUS,
"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
dev->name, (dev->flags & IFF_PROMISC),
(old_flags & IFF_PROMISC),
audit_get_loginuid(current),
- current->uid, current->gid,
+ uid, gid,
audit_get_sessionid(current));
+ }
dev_change_rx_flags(dev, IFF_PROMISC);
}
static __inline__ int scm_check_creds(struct ucred *creds)
{
+ const struct cred *cred = current_cred();
+
if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
- ((creds->uid == current->uid || creds->uid == current->euid ||
- creds->uid == current->suid) || capable(CAP_SETUID)) &&
- ((creds->gid == current->gid || creds->gid == current->egid ||
- creds->gid == current->sgid) || capable(CAP_SETGID))) {
+ ((creds->uid == cred->uid || creds->uid == cred->euid ||
+ creds->uid == cred->suid) || capable(CAP_SETUID)) &&
+ ((creds->gid == cred->gid || creds->gid == cred->egid ||
+ creds->gid == cred->sgid) || capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
if (!fpl)
return -ENOMEM;
*fplp = fpl;
+ INIT_LIST_HEAD(&fpl->list);
fpl->count = 0;
}
fpp = &fpl->fp[fpl->count];
if (fpl) {
scm->fp = NULL;
- for (i=fpl->count-1; i>=0; i--)
- fput(fpl->fp[i]);
- kfree(fpl);
+ if (current->scm_work_list) {
+ list_add_tail(&fpl->list, current->scm_work_list);
+ } else {
+ LIST_HEAD(work_list);
+
+ current->scm_work_list = &work_list;
+
+ list_add(&fpl->list, &work_list);
+ while (!list_empty(&work_list)) {
+ fpl = list_first_entry(&work_list, struct scm_fp_list, list);
+
+ list_del(&fpl->list);
+ for (i=fpl->count-1; i>=0; i--)
+ fput(fpl->fp[i]);
+ kfree(fpl);
+ }
+
+ current->scm_work_list = NULL;
+ }
}
}
new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
if (new_fpl) {
+ INIT_LIST_HEAD(&new_fpl->list);
for (i=fpl->count-1; i>=0; i--)
get_file(fpl->fp[i]);
memcpy(new_fpl, fpl, sizeof(*fpl));
sk->sk_state = TCP_LISTEN;
/* set credentials so connect can copy them */
sk->sk_peercred.pid = task_tgid_vnr(current);
- sk->sk_peercred.uid = current->euid;
- sk->sk_peercred.gid = current->egid;
+ current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
err = 0;
out_unlock:
newsk->sk_state = TCP_ESTABLISHED;
newsk->sk_type = sk->sk_type;
newsk->sk_peercred.pid = task_tgid_vnr(current);
- newsk->sk_peercred.uid = current->euid;
- newsk->sk_peercred.gid = current->egid;
+ current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
newu = unix_sk(newsk);
newsk->sk_sleep = &newu->peer_wait;
otheru = unix_sk(other);
unix_peer(ska)=skb;
unix_peer(skb)=ska;
ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
- ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
- ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+ current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
+ ska->sk_peercred.uid = skb->sk_peercred.uid;
+ ska->sk_peercred.gid = skb->sk_peercred.gid;
if (ska->sk_type != SOCK_DGRAM) {
ska->sk_state = TCP_ESTABLISHED;
sock_wfree(skb);
}
- static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
+
+ /*
+ * Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
for (i=scm->fp->count-1; i>=0; i--)
unix_inflight(scm->fp->fp[i]);
- UNIXCB(skb).fp = scm->fp;
skb->destructor = unix_destruct_fds;
- scm->fp = NULL;
+ return 0;
}
/*
goto out;
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err)
+ goto out_free;
+ }
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
size = min_t(int, size, skb_tailroom(skb));
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
+ }
+ }
if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
kfree_skb(skb);