( On 32-bit x86, the necessary options added to the compiler
flags may increase the size of the kernel slightly. )
+config STATIC_KEYS_SELFTEST
+ bool "Static key selftest"
+ depends on JUMP_LABEL
+ help
+ Boot time self-test of the branch patching code.
+
config OPTPROBES
def_bool y
depends on KPROBES && HAVE_OPTPROBES
config UPROBES
def_bool n
- select PERCPU_RWSEM
help
Uprobes is the user-space counterpart to kprobes: they
enable instrumentation applications (such as 'perf probe')
mutex_init(&cur_trans->cache_write_mutex);
cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
+ INIT_LIST_HEAD(&cur_trans->deleted_bgs);
+ spin_lock_init(&cur_trans->deleted_bgs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping);
*/
btrfs_set_skip_qgroup(trans, objectid);
- btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
+ btrfs_reloc_pre_snapshot(pending, &to_reserve);
if (to_reserve > 0) {
pending->error = btrfs_block_rsv_add(root,
* Tell lockdep about it.
*/
if (ac->newtrans->type & __TRANS_FREEZABLE)
- rwsem_acquire_read(
- &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
- 0, 1, _THIS_IP_);
+ __sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);
current->journal_info = ac->newtrans;
* async commit thread will be the one to unlock it.
*/
if (ac->newtrans->type & __TRANS_FREEZABLE)
- rwsem_release(
- &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
- 1, _THIS_IP_);
+ __sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);
schedule_work(&ac->work);
spin_unlock(&root->fs_info->trans_lock);
wait_for_commit(root, prev_trans);
+ ret = prev_trans->aborted;
btrfs_put_transaction(prev_trans);
+ if (ret)
+ goto cleanup_transaction;
} else {
spin_unlock(&root->fs_info->trans_lock);
}
static inline struct inode *wb_inode(struct list_head *head)
{
- return list_entry(head, struct inode, i_wb_list);
+ return list_entry(head, struct inode, i_io_list);
}
/*
}
/**
- * inode_wb_list_move_locked - move an inode onto a bdi_writeback IO list
+ * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
* @inode: inode to be moved
* @wb: target bdi_writeback
* @head: one of @wb->b_{dirty|io|more_io}
*
- * Move @inode->i_wb_list to @list of @wb and set %WB_has_dirty_io.
+ * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
* Returns %true if @inode is the first occupant of the !dirty_time IO
* lists; otherwise, %false.
*/
- static bool inode_wb_list_move_locked(struct inode *inode,
+ static bool inode_io_list_move_locked(struct inode *inode,
struct bdi_writeback *wb,
struct list_head *head)
{
assert_spin_locked(&wb->list_lock);
- list_move(&inode->i_wb_list, head);
+ list_move(&inode->i_io_list, head);
/* dirty_time doesn't count as dirty_io until expiration */
if (head != &wb->b_dirty_time)
}
/**
- * inode_wb_list_del_locked - remove an inode from its bdi_writeback IO list
+ * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
* @inode: inode to be removed
* @wb: bdi_writeback @inode is being removed from
*
* Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
* clear %WB_has_dirty_io if all are empty afterwards.
*/
- static void inode_wb_list_del_locked(struct inode *inode,
+ static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);
- list_del_init(&inode->i_wb_list);
+ list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}
/*
* Once I_FREEING is visible under i_lock, the eviction path owns
- * the inode and we shouldn't modify ->i_wb_list.
+ * the inode and we shouldn't modify ->i_io_list.
*/
if (unlikely(inode->i_state & I_FREEING))
goto skip_switch;
* is always correct including from ->b_dirty_time. The transfer
* preserves @inode->dirtied_when ordering.
*/
- if (!list_empty(&inode->i_wb_list)) {
+ if (!list_empty(&inode->i_io_list)) {
struct inode *pos;
- inode_wb_list_del_locked(inode, old_wb);
+ inode_io_list_del_locked(inode, old_wb);
inode->i_wb = new_wb;
- list_for_each_entry(pos, &new_wb->b_dirty, i_wb_list)
+ list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
if (time_after_eq(inode->dirtied_when,
pos->dirtied_when))
break;
- inode_wb_list_move_locked(inode, new_wb, pos->i_wb_list.prev);
+ inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
} else {
inode->i_wb = new_wb;
}
struct wb_iter iter;
might_sleep();
-
- if (!bdi_has_dirty_io(bdi))
- return;
restart:
rcu_read_lock();
bdi_for_each_wb(wb, bdi, &iter, next_blkcg_id) {
- if (!wb_has_dirty_io(wb) ||
- (skip_if_busy && writeback_in_progress(wb)))
+ /* SYNC_ALL writes out I_DIRTY_TIME too */
+ if (!wb_has_dirty_io(wb) &&
+ (base_work->sync_mode == WB_SYNC_NONE ||
+ list_empty(&wb->b_dirty_time)))
+ continue;
+ if (skip_if_busy && writeback_in_progress(wb))
continue;
base_work->nr_pages = wb_split_bdi_pages(wb, nr_pages);
{
might_sleep();
- if (bdi_has_dirty_io(bdi) &&
- (!skip_if_busy || !writeback_in_progress(&bdi->wb))) {
+ if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
base_work->auto_free = 0;
base_work->single_wait = 0;
base_work->single_done = 0;
/*
* Remove the inode from the writeback list it is on.
*/
- void inode_wb_list_del(struct inode *inode)
+ void inode_io_list_del(struct inode *inode)
{
struct bdi_writeback *wb;
wb = inode_to_wb_and_lock_list(inode);
- inode_wb_list_del_locked(inode, wb);
+ inode_io_list_del_locked(inode, wb);
spin_unlock(&wb->list_lock);
}
if (time_before(inode->dirtied_when, tail->dirtied_when))
inode->dirtied_when = jiffies;
}
- inode_wb_list_move_locked(inode, wb, &wb->b_dirty);
+ inode_io_list_move_locked(inode, wb, &wb->b_dirty);
}
/*
*/
static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
{
- inode_wb_list_move_locked(inode, wb, &wb->b_more_io);
+ inode_io_list_move_locked(inode, wb, &wb->b_more_io);
}
static void inode_sync_complete(struct inode *inode)
if (older_than_this &&
inode_dirtied_after(inode, *older_than_this))
break;
- list_move(&inode->i_wb_list, &tmp);
+ list_move(&inode->i_io_list, &tmp);
moved++;
if (flags & EXPIRE_DIRTY_ATIME)
set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
list_for_each_prev_safe(pos, node, &tmp) {
inode = wb_inode(pos);
if (inode->i_sb == sb)
- list_move(&inode->i_wb_list, dispatch_queue);
+ list_move(&inode->i_io_list, dispatch_queue);
}
}
out:
redirty_tail(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies;
- inode_wb_list_move_locked(inode, wb, &wb->b_dirty_time);
+ inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
} else {
/* The inode is clean. Remove from writeback lists. */
- inode_wb_list_del_locked(inode, wb);
+ inode_io_list_del_locked(inode, wb);
}
}
* touch it. See comment above for explanation.
*/
if (!(inode->i_state & I_DIRTY_ALL))
- inode_wb_list_del_locked(inode, wb);
+ inode_io_list_del_locked(inode, wb);
spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
out:
unsigned long start_time = jiffies;
long write_chunk;
long wrote = 0; /* count both pages and inodes */
+ struct blk_plug plug;
+ blk_start_plug(&plug);
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);
break;
}
}
+ blk_finish_plug(&plug);
return wrote;
}
else
dirty_list = &wb->b_dirty_time;
- wakeup_bdi = inode_wb_list_move_locked(inode, wb,
+ wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list);
spin_unlock(&wb->list_lock);
}
EXPORT_SYMBOL(__mark_inode_dirty);
+ /*
+ * The @s_sync_lock is used to serialise concurrent sync operations
+ * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
+ * Concurrent callers will block on the s_sync_lock rather than doing contending
+ * walks. The queueing maintains sync(2) required behaviour as all the IO that
+ * has been issued up to the time this function is enter is guaranteed to be
+ * completed by the time we have gained the lock and waited for all IO that is
+ * in progress regardless of the order callers are granted the lock.
+ */
static void wait_sb_inodes(struct super_block *sb)
{
struct inode *inode, *old_inode = NULL;
*/
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- spin_lock(&inode_sb_list_lock);
+ mutex_lock(&sb->s_sync_lock);
+ spin_lock(&sb->s_inode_list_lock);
/*
* Data integrity sync. Must wait for all pages under writeback,
}
__iget(inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
/*
* We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the
- * inode_sb_list_lock. We cannot iput the inode now as we can
+ * s_inode_list_lock. We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it under
- * inode_sb_list_lock. So we keep the reference and iput it
+ * s_inode_list_lock. So we keep the reference and iput it
* later.
*/
iput(old_inode);
cond_resched();
- spin_lock(&inode_sb_list_lock);
+ spin_lock(&sb->s_inode_list_lock);
}
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
iput(old_inode);
+ mutex_unlock(&sb->s_sync_lock);
}
static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
};
struct backing_dev_info *bdi = sb->s_bdi;
- /* Nothing to do? */
- if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
+ /*
+ * Can't skip on !bdi_has_dirty() because we should wait for !dirty
+ * inodes under writeback and I_DIRTY_TIME inodes ignored by
+ * bdi_has_dirty() need to be written out too.
+ */
+ if (bdi == &noop_backing_dev_info)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_unlock(&inode->i_lock);
}
-/*
- * Given an inode, destroy all of the marks associated with that inode.
- */
-void fsnotify_clear_marks_by_inode(struct inode *inode)
-{
- struct fsnotify_mark *mark;
- struct hlist_node *n;
- LIST_HEAD(free_list);
-
- spin_lock(&inode->i_lock);
- hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, obj_list) {
- list_add(&mark->free_list, &free_list);
- hlist_del_init_rcu(&mark->obj_list);
- fsnotify_get_mark(mark);
- }
- spin_unlock(&inode->i_lock);
-
- fsnotify_destroy_marks(&free_list);
-}
-
/*
* Given a group clear all of the inode marks associated with that group.
*/
/**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
- * @list: list of inodes being unmounted (sb->s_inodes)
+ * @sb: superblock being unmounted.
*
* Called during unmount with no locks held, so needs to be safe against
- * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
+ * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
*/
- void fsnotify_unmount_inodes(struct list_head *list)
+ void fsnotify_unmount_inodes(struct super_block *sb)
{
struct inode *inode, *next_i, *need_iput = NULL;
- spin_lock(&inode_sb_list_lock);
- list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+ spin_lock(&sb->s_inode_list_lock);
+ list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) {
struct inode *need_iput_tmp;
/*
spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */
- while (&next_i->i_sb_list != list) {
+ while (&next_i->i_sb_list != &sb->s_inodes) {
spin_lock(&next_i->i_lock);
if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
atomic_read(&next_i->i_count)) {
}
/*
- * We can safely drop inode_sb_list_lock here because either
+ * We can safely drop s_inode_list_lock here because either
* we actually hold references on both inode and next_i or
* end of list. Also no new inodes will be added since the
* umount has begun.
*/
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
if (need_iput_tmp)
iput(need_iput_tmp);
iput(inode);
- spin_lock(&inode_sb_list_lock);
+ spin_lock(&sb->s_inode_list_lock);
}
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
}
EXPORT_SYMBOL(dqstats);
static qsize_t inode_get_rsv_space(struct inode *inode);
-static void __dquot_initialize(struct inode *inode, int type);
+static int __dquot_initialize(struct inode *inode, int type);
static inline unsigned int
hashfn(const struct super_block *sb, struct kqid qid)
struct dquot *dqget(struct super_block *sb, struct kqid qid)
{
unsigned int hashent = hashfn(sb, qid);
- struct dquot *dquot = NULL, *empty = NULL;
+ struct dquot *dquot, *empty = NULL;
if (!sb_has_quota_active(sb, qid.type))
- return NULL;
+ return ERR_PTR(-ESRCH);
we_slept:
spin_lock(&dq_list_lock);
spin_lock(&dq_state_lock);
if (!sb_has_quota_active(sb, qid.type)) {
spin_unlock(&dq_state_lock);
spin_unlock(&dq_list_lock);
+ dquot = ERR_PTR(-ESRCH);
goto out;
}
spin_unlock(&dq_state_lock);
* already finished or it will be canceled due to dq_count > 1 test */
wait_on_dquot(dquot);
/* Read the dquot / allocate space in quota file */
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) &&
- sb->dq_op->acquire_dquot(dquot) < 0) {
- dqput(dquot);
- dquot = NULL;
- goto out;
+ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ int err;
+
+ err = sb->dq_op->acquire_dquot(dquot);
+ if (err < 0) {
+ dqput(dquot);
+ dquot = ERR_PTR(err);
+ goto out;
+ }
}
#ifdef CONFIG_QUOTA_DEBUG
BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */
int reserved = 0;
#endif
- spin_lock(&inode_sb_list_lock);
+ spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
}
__iget(inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
#ifdef CONFIG_QUOTA_DEBUG
if (unlikely(inode_get_rsv_space(inode) > 0))
/*
* We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the
- * inode_sb_list_lock We cannot iput the inode now as we can be
+ * s_inode_list_lock. We cannot iput the inode now as we can be
* holding the last reference and we cannot iput it under
- * inode_sb_list_lock. So we keep the reference and iput it
+ * s_inode_list_lock. So we keep the reference and iput it
* later.
*/
old_inode = inode;
- spin_lock(&inode_sb_list_lock);
+ spin_lock(&sb->s_inode_list_lock);
}
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
iput(old_inode);
#ifdef CONFIG_QUOTA_DEBUG
struct inode *inode;
int reserved = 0;
- spin_lock(&inode_sb_list_lock);
+ spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
/*
* We have to scan also I_NEW inodes because they can already
}
spin_unlock(&dq_data_lock);
}
- spin_unlock(&inode_sb_list_lock);
+ spin_unlock(&sb->s_inode_list_lock);
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened after quota"
* It is better to call this function outside of any transaction as it
* might need a lot of space in journal for dquot structure allocation.
*/
-static void __dquot_initialize(struct inode *inode, int type)
+static int __dquot_initialize(struct inode *inode, int type)
{
int cnt, init_needed = 0;
struct dquot **dquots, *got[MAXQUOTAS];
struct super_block *sb = inode->i_sb;
qsize_t rsv;
+ int ret = 0;
if (!dquot_active(inode))
- return;
+ return 0;
dquots = i_dquot(inode);
struct kqid qid;
kprojid_t projid;
int rc;
+ struct dquot *dquot;
got[cnt] = NULL;
if (type != -1 && cnt != type)
qid = make_kqid_projid(projid);
break;
}
- got[cnt] = dqget(sb, qid);
+ dquot = dqget(sb, qid);
+ if (IS_ERR(dquot)) {
+ /* We raced with somebody turning quotas off... */
+ if (PTR_ERR(dquot) != -ESRCH) {
+ ret = PTR_ERR(dquot);
+ goto out_put;
+ }
+ dquot = NULL;
+ }
+ got[cnt] = dquot;
}
/* All required i_dquot has been initialized */
if (!init_needed)
- return;
+ return 0;
spin_lock(&dq_data_lock);
if (IS_NOQUOTA(inode))
- goto out_err;
+ goto out_lock;
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
dquot_resv_space(dquots[cnt], rsv);
}
}
-out_err:
+out_lock:
spin_unlock(&dq_data_lock);
+out_put:
/* Drop unused references */
dqput_all(got);
+
+ return ret;
}
-void dquot_initialize(struct inode *inode)
+int dquot_initialize(struct inode *inode)
{
- __dquot_initialize(inode, -1);
+ return __dquot_initialize(inode, -1);
}
EXPORT_SYMBOL(dquot_initialize);
int dquot_transfer(struct inode *inode, struct iattr *iattr)
{
struct dquot *transfer_to[MAXQUOTAS] = {};
+ struct dquot *dquot;
struct super_block *sb = inode->i_sb;
int ret;
if (!dquot_active(inode))
return 0;
- if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid))
- transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(iattr->ia_uid));
- if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))
- transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(iattr->ia_gid));
-
+ if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)){
+ dquot = dqget(sb, make_kqid_uid(iattr->ia_uid));
+ if (IS_ERR(dquot)) {
+ if (PTR_ERR(dquot) != -ESRCH) {
+ ret = PTR_ERR(dquot);
+ goto out_put;
+ }
+ dquot = NULL;
+ }
+ transfer_to[USRQUOTA] = dquot;
+ }
+ if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)){
+ dquot = dqget(sb, make_kqid_gid(iattr->ia_gid));
+ if (IS_ERR(dquot)) {
+ if (PTR_ERR(dquot) != -ESRCH) {
+ ret = PTR_ERR(dquot);
+ goto out_put;
+ }
+ dquot = NULL;
+ }
+ transfer_to[GRPQUOTA] = dquot;
+ }
ret = __dquot_transfer(inode, transfer_to);
+out_put:
dqput_all(transfer_to);
return ret;
}
struct dquot *dquot;
dquot = dqget(sb, qid);
- if (!dquot)
- return -ESRCH;
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
do_get_dqblk(dquot, di);
dqput(dquot);
int rc;
dquot = dqget(sb, qid);
- if (!dquot) {
- rc = -ESRCH;
+ if (IS_ERR(dquot)) {
+ rc = PTR_ERR(dquot);
goto out;
}
rc = do_set_dqblk(dquot, di);
* We may pass freeze protection with a transaction. So tell lockdep
* we released it.
*/
- rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
- 1, _THIS_IP_);
+ __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
/*
* We hand off the transaction to the completion thread now, so
* clear the flag here.
* Similarly for freeze protection.
*/
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
- rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
- 0, 1, _THIS_IP_);
+ __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
}
*/
STATIC void
xfs_end_bio(
- struct bio *bio,
- int error)
+ struct bio *bio)
{
xfs_ioend_t *ioend = bio->bi_private;
- ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
+ ioend->io_error = bio->bi_error;
/* Toss bio and pass work off to an xfsdatad thread */
bio->bi_private = NULL;
xfs_alloc_ioend_bio(
struct buffer_head *bh)
{
- int nvecs = bio_get_nr_vecs(bh->b_bdev);
- struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
+ struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
ASSERT(bio->bi_private == NULL);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
-
#include <linux/linkage.h>
#include <linux/wait.h>
#include <linux/kdev_t.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/blk_types.h>
+ #include <linux/workqueue.h>
+ #include <linux/percpu-rwsem.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
unsigned long dirtied_time_when;
struct hlist_node i_hash;
- struct list_head i_wb_list; /* backing dev IO list */
+ struct list_head i_io_list; /* backing dev IO list */
#ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *i_wb; /* the associated cgroup wb */
struct lock_manager {
struct list_head list;
+ /*
+ * NFSv4 and up also want opens blocked during the grace period;
+ * NLM doesn't care:
+ */
+ bool block_opens;
};
struct net;
void locks_start_grace(struct net *, struct lock_manager *);
void locks_end_grace(struct lock_manager *);
int locks_in_grace(struct net *);
+int opens_in_grace(struct net *);
/* that will die - we need it for nfs_lock_info */
#include <linux/nfs_fs_i.h>
/* sb->s_iflags */
#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */
+#define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */
/* Possible states of 'frozen' field */
enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers {
- /* Counters for counting writers at each level */
- struct percpu_counter counter[SB_FREEZE_LEVELS];
- wait_queue_head_t wait; /* queue for waiting for
- writers / faults to finish */
- int frozen; /* Is sb frozen? */
- wait_queue_head_t wait_unfrozen; /* queue for waiting for
- sb to be thawed */
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map lock_map[SB_FREEZE_LEVELS];
- #endif
+ int frozen; /* Is sb frozen? */
+ wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */
+ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
struct super_block {
#endif
const struct xattr_handler **s_xattr;
- struct list_head s_inodes; /* all inodes */
struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev;
struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
struct rcu_head rcu;
+ struct work_struct destroy_work;
+
+ struct mutex s_sync_lock; /* sync serialisation lock */
/*
* Indicates how deep in a filesystem stack this SB is
*/
int s_stack_depth;
+
+ /* s_inode_list_lock protects s_inodes */
+ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp;
+ struct list_head s_inodes; /* all inodes */
};
extern struct timespec current_fs_time(struct super_block *sb);
void __sb_end_write(struct super_block *sb, int level);
int __sb_start_write(struct super_block *sb, int level, bool wait);
+ #define __sb_writers_acquired(sb, lev) \
+ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+ #define __sb_writers_release(sb, lev) \
+ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+
/**
* sb_end_write - drop write access to a superblock
* @sb: the super we wrote to
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
- int (*mremap)(struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
extern void __remove_inode_hash(struct inode *);
static inline void remove_inode_hash(struct inode *inode)
{
- if (!inode_unhashed(inode))
+ if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
__remove_inode_hash(inode);
}
return !IS_DEADDIR(inode);
}
+extern bool path_noexec(const struct path *path);
+
#endif /* _LINUX_FS_H */
#define FSNOTIFY_EVENT_INODE 2
/*
- * a mark is simply an object attached to an in core inode which allows an
+ * A mark is simply an object attached to an in core inode which allows an
* fsnotify listener to indicate they are either no longer interested in events
* of a type matching mask or only interested in those events.
*
- * these are flushed when an inode is evicted from core and may be flushed
- * when the inode is modified (as seen by fsnotify_access). Some fsnotify users
- * (such as dnotify) will flush these when the open fd is closed and not at
- * inode eviction or modification.
+ * These are flushed when an inode is evicted from core and may be flushed
+ * when the inode is modified (as seen by fsnotify_access). Some fsnotify
+ * users (such as dnotify) will flush these when the open fd is closed and not
+ * at inode eviction or modification.
+ *
+ * Text in brackets is showing the lock(s) protecting modifications of a
+ * particular entry. obj_lock means either inode->i_lock or
+ * mnt->mnt_root->d_lock depending on the mark type.
*/
struct fsnotify_mark {
- __u32 mask; /* mask this mark is for */
- /* we hold ref for each i_list and g_list. also one ref for each 'thing'
+ /* Mask this mark is for [mark->lock, group->mark_mutex] */
+ __u32 mask;
+ /* We hold one for presence in g_list. Also one ref for each 'thing'
* in kernel that found and may be using this mark. */
- atomic_t refcnt; /* active things looking at this mark */
- struct fsnotify_group *group; /* group this mark is for */
- struct list_head g_list; /* list of marks by group->i_fsnotify_marks
- * Also reused for queueing mark into
- * destroy_list when it's waiting for
- * the end of SRCU period before it can
- * be freed */
- spinlock_t lock; /* protect group and inode */
- struct hlist_node obj_list; /* list of marks for inode / vfsmount */
- struct list_head free_list; /* tmp list used when freeing this mark */
- union {
+ atomic_t refcnt;
+ /* Group this mark is for. Set on mark creation, stable until last ref
+ * is dropped */
+ struct fsnotify_group *group;
+ /* List of marks by group->i_fsnotify_marks. Also reused for queueing
+ * mark into destroy_list when it's waiting for the end of SRCU period
+ * before it can be freed. [group->mark_mutex] */
+ struct list_head g_list;
+ /* Protects inode / mnt pointers, flags, masks */
+ spinlock_t lock;
+ /* List of marks for inode / vfsmount [obj_lock] */
+ struct hlist_node obj_list;
+ union { /* Object pointer [mark->lock, group->mark_mutex] */
struct inode *inode; /* inode this mark is associated with */
struct vfsmount *mnt; /* vfsmount this mark is associated with */
};
- __u32 ignored_mask; /* events types to ignore */
+ /* Events types to ignore [mark->lock, group->mark_mutex] */
+ __u32 ignored_mask;
#define FSNOTIFY_MARK_FLAG_INODE 0x01
#define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02
#define FSNOTIFY_MARK_FLAG_OBJECT_PINNED 0x04
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08
#define FSNOTIFY_MARK_FLAG_ALIVE 0x10
- unsigned int flags; /* vfsmount or inode mark? */
+#define FSNOTIFY_MARK_FLAG_ATTACHED 0x20
+ unsigned int flags; /* flags [mark->lock] */
void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
};
/* given a group and a mark, flag mark to be freed when all references are dropped */
extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group);
-extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
- struct fsnotify_group *group);
+/* detach mark from inode / mount list, group list, drop inode reference */
+extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
+/* free mark */
+extern void fsnotify_free_mark(struct fsnotify_mark *mark);
/* run all the marks in a group, and clear all of the vfsmount marks */
extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
/* run all the marks in a group, and clear all of the inode marks */
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
extern void fsnotify_get_mark(struct fsnotify_mark *mark);
extern void fsnotify_put_mark(struct fsnotify_mark *mark);
- extern void fsnotify_unmount_inodes(struct list_head *list);
+ extern void fsnotify_unmount_inodes(struct super_block *sb);
/* put here because inotify does some weird stuff when destroying watches */
extern void fsnotify_init_event(struct fsnotify_event *event,
return 0;
}
- static inline void fsnotify_unmount_inodes(struct list_head *list)
+ static inline void fsnotify_unmount_inodes(struct super_block *sb)
{}
#endif /* CONFIG_FSNOTIFY */
config CONTEXT_TRACKING
bool
-config RCU_USER_QS
- bool
- help
- This option sets hooks on kernel / userspace boundaries and
- puts RCU in extended quiescent state when the CPU runs in
- userspace. It means that when a CPU runs in userspace, it is
- excluded from the global RCU state machine and thus doesn't
- try to keep the timer tick on for RCU.
-
config CONTEXT_TRACKING_FORCE
bool "Force context tracking"
depends on CONTEXT_TRACKING
config RCU_NOCB_CPU
bool "Offload RCU callback processing from boot-selected CPUs"
depends on TREE_RCU || PREEMPT_RCU
+ depends on RCU_EXPERT || NO_HZ_FULL
default n
help
Use this option to reduce OS jitter for aggressive HPC or
config ARCH_SUPPORTS_NUMA_BALANCING
bool
+#
+# For architectures that prefer to flush all TLBs after a number of pages
+# are unmapped instead of sending one IPI per page to flush. The architecture
+# must provide guarantees on what happens if a clean TLB cache entry is
+# written after the unmap. Details are in mm/rmap.c near the check for
+# should_defer_flush. The architecture should also consider if the full flush
+# and the refill costs are offset by the savings of sending fewer IPIs.
+config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ bool
+
#
# For architectures that know their GCC __int128 support is sound
#
menuconfig CGROUPS
bool "Control Group support"
select KERNFS
- select PERCPU_RWSEM
help
This option adds support for grouping sets of processes together, for
use with process control subsystems such as Cpusets, CFS, memory
Provides a way to freeze and unfreeze all tasks in a
cgroup.
+config CGROUP_PIDS
+ bool "PIDs cgroup subsystem"
+ help
+ Provides enforcement of process number limits in the scope of a
+ cgroup. Any attempt to fork more processes than is allowed in the
+ cgroup will fail. PIDs are fundamentally a global resource because it
+ is fairly trivial to reach PID exhaustion before you reach even a
+ conservative kmemcg limit. As a result, it is possible to grind a
+ system to halt without being limited by other cgroup policies. The
+ PIDs cgroup subsystem is designed to stop this from happening.
+
+ It should be noted that organisational operations (such as attaching
+ to a cgroup hierarchy will *not* be blocked by the PIDs subsystem),
+ since the PIDs limit only affects a process's ability to fork, not to
+ attach to a cgroup.
+
config CGROUP_DEVICE
bool "Device controller for cgroups"
help
applications use these syscalls, you can disable this option to save
space.
+config USERFAULTFD
+ bool "Enable userfaultfd() system call"
+ select ANON_INODES
+ depends on MMU
+ help
+ Enable the userfaultfd() system call that allows to intercept and
+ handle page faults in userland.
+
config PCI_QUIRKS
default y
bool "Enable PCI quirk workarounds" if EXPERT
- obj-y += mutex.o semaphore.o rwsem.o
+ obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
-obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
- obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
config STMP_DEVICE
bool
-
- config PERCPU_RWSEM
- bool
config ARCH_USE_CMPXCHG_LOCKREF
bool
config LRU_CACHE
tristate
-config AVERAGE
- bool "Averaging functions"
- help
- This option is provided for the case where no in-kernel-tree
- modules require averaging functions, but a module built outside
- the kernel tree does. Such modules that use library averaging
- functions require Y here.
-
- If unsure, say N.
-
config CLZ_TAB
bool
source "lib/fonts/Kconfig"
+config SG_SPLIT
+ def_bool n
+ help
+ Provides a heler to split scatterlists into chunks, each chunk being a
+ scatterlist. This should be selected by a driver or an API which
+ whishes to split a scatterlist amongst multiple DMA channel.
+
#
# sg chaining option
#