exp.2015.10.07a: Reduce OS jitter of RCU-sched expedited grace periods.
fixes.2015.10.06a: Miscellaneous fixes.
cache-to-cache transfer latencies.
rcutree.rcu_fanout_leaf= [KNL]
- Increase the number of CPUs assigned to each
- leaf rcu_node structure. Useful for very large
- systems.
+ Change the number of CPUs assigned to each
+ leaf rcu_node structure. Useful for very
+ large systems, which will choose the value 64,
+ and for NUMA systems with large remote-access
+ latencies, which will choose a value aligned
+ with the appropriate hardware boundaries.
rcutree.jiffies_till_sched_qs= [KNL]
Set required age in jiffies for a
static inline void __list_del(struct list_head * prev, struct list_head * next)
{
next->prev = prev;
- prev->next = next;
+ WRITE_ONCE(prev->next, next);
}
/**
{
struct hlist_node *next = n->next;
struct hlist_node **pprev = n->pprev;
- *pprev = next;
+
+ WRITE_ONCE(*pprev, next);
if (next)
next->pprev = pprev;
}
LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
/* pprev may be `first`, so be careful not to lose the lock bit */
- *pprev = (struct hlist_bl_node *)
+ WRITE_ONCE(*pprev,
+ (struct hlist_bl_node *)
((unsigned long)next |
- ((unsigned long)*pprev & LIST_BL_LOCKMASK));
+ ((unsigned long)*pprev & LIST_BL_LOCKMASK)));
if (next)
next->pprev = pprev;
}
{
struct hlist_nulls_node *next = n->next;
struct hlist_nulls_node **pprev = n->pprev;
- *pprev = next;
+
+ WRITE_ONCE(*pprev, next);
if (!is_a_nulls(next))
next->pprev = pprev;
}
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
#define list_entry_rcu(ptr, type, member) \
-({ \
- typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
- container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
-})
+ container_of(lockless_dereference(ptr), type, member)
/**
* Where are list_empty_rcu() and list_first_entry_rcu()?
* more than one CPU).
*/
void call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *head));
+ rcu_callback_t func);
#else /* #ifdef CONFIG_PREEMPT_RCU */
* memory ordering guarantees.
*/
void call_rcu_bh(struct rcu_head *head,
- void (*func)(struct rcu_head *head));
+ rcu_callback_t func);
/**
* call_rcu_sched() - Queue an RCU for invocation after sched grace period.
* memory ordering guarantees.
*/
void call_rcu_sched(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu));
+ rcu_callback_t func);
void synchronize_sched(void);
* See the description of call_rcu() for more detailed information on
* memory ordering guarantees.
*/
-void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
+void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
void synchronize_rcu_tasks(void);
void rcu_barrier_tasks(void);
static inline void __rcu_read_lock(void)
{
- preempt_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
+ preempt_disable();
}
static inline void __rcu_read_unlock(void)
{
- preempt_enable();
+ if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
+ preempt_enable();
}
static inline void synchronize_rcu(void)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-/* Deprecate rcu_lockdep_assert(): Use RCU_LOCKDEP_WARN() instead. */
-static inline void __attribute((deprecated)) deprecate_rcu_lockdep_assert(void)
-{
-}
-
#ifdef CONFIG_PROVE_RCU
-/**
- * rcu_lockdep_assert - emit lockdep splat if specified condition not met
- * @c: condition to check
- * @s: informative message
- */
-#define rcu_lockdep_assert(c, s) \
- do { \
- static bool __section(.data.unlikely) __warned; \
- deprecate_rcu_lockdep_assert(); \
- if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
- __warned = true; \
- lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
- } \
- } while (0)
-
/**
* RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
* @c: condition to check
#else /* #ifdef CONFIG_PROVE_RCU */
-#define rcu_lockdep_assert(c, s) deprecate_rcu_lockdep_assert()
#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
#define rcu_sleep_check() do { } while (0)
*/
#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
+/**
+ * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism
+ * @p: The pointer to hand off
+ *
+ * This is simply an identity function, but it documents where a pointer
+ * is handed off from RCU to some other synchronization mechanism, for
+ * example, reference counting or locking. In C11, it would map to
+ * kill_dependency(). It could be used as follows:
+ *
+ * rcu_read_lock();
+ * p = rcu_dereference(gp);
+ * long_lived = is_long_lived(p);
+ * if (long_lived) {
+ * if (!atomic_inc_not_zero(p->refcnt))
+ * long_lived = false;
+ * else
+ * p = rcu_pointer_handoff(p);
+ * }
+ * rcu_read_unlock();
+ */
+#define rcu_pointer_handoff(p) (p)
+
/**
* rcu_read_lock() - mark the beginning of an RCU read-side critical section
*
#define __kfree_rcu(head, offset) \
do { \
BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
- kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
+ kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
} while (0)
/**
}
static inline void kfree_call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu))
+ rcu_callback_t func)
{
call_rcu(head, func);
}
static inline void rcu_all_qs(void)
{
+ barrier(); /* Avoid RCU read-side critical sections leaking across. */
}
#endif /* __LINUX_RCUTINY_H */
void synchronize_sched_expedited(void);
void synchronize_rcu_expedited(void);
-void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
/**
* synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
*/
static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
{
- int retval = __srcu_read_lock(sp);
+ int retval;
+ preempt_disable();
+ retval = __srcu_read_lock(sp);
+ preempt_enable();
rcu_lock_acquire(&(sp)->dep_map);
return retval;
}
*/
flush_ptrace_hw_breakpoint(tsk);
+ TASKS_RCU(preempt_disable());
TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
+ TASKS_RCU(preempt_enable());
exit_notify(tsk, group_dead);
proc_exit_connector(tsk);
#ifdef CONFIG_NUMA
void (*exp_sync)(void);
unsigned long (*get_state)(void);
void (*cond_sync)(unsigned long oldstate);
- void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+ call_rcu_func_t call;
void (*cb_barrier)(void);
void (*fqs)(void);
void (*stats)(void);
}
static void
-call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+call_rcu_busted(struct rcu_head *head, rcu_callback_t func)
{
/* This is a deliberate bug for testing purposes only! */
func(head);
}
static void srcu_torture_call(struct rcu_head *head,
- void (*func)(struct rcu_head *head))
+ rcu_callback_t func)
{
call_srcu(srcu_ctlp, head, func);
}
int idx;
idx = READ_ONCE(sp->completed) & 0x1;
- preempt_disable();
__this_cpu_inc(sp->per_cpu_ref->c[idx]);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
__this_cpu_inc(sp->per_cpu_ref->seq[idx]);
- preempt_enable();
return idx;
}
EXPORT_SYMBOL_GPL(__srcu_read_lock);
* srcu_struct structure.
*/
void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
- void (*func)(struct rcu_head *head))
+ rcu_callback_t func)
{
unsigned long flags;
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
static void rcu_process_callbacks(struct softirq_action *unused);
static void __call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu),
+ rcu_callback_t func,
struct rcu_ctrlblk *rcp);
#include "tiny_plugin.h"
* Helper function for call_rcu() and call_rcu_bh().
*/
static void __call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu),
+ rcu_callback_t func,
struct rcu_ctrlblk *rcp)
{
unsigned long flags;
* period. But since we have but one CPU, that would be after any
* quiescent state.
*/
-void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, &rcu_sched_ctrlblk);
}
* Post an RCU bottom-half callback to be invoked after any subsequent
* quiescent state.
*/
-void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, &rcu_bh_ctrlblk);
}
.level = { &sname##_state.node[0] }, \
.rda = &sname##_data, \
.call = cr, \
- .fqs_state = RCU_GP_IDLE, \
+ .gp_state = RCU_GP_IDLE, \
.gpnum = 0UL - 300UL, \
.completed = 0UL - 300UL, \
.orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
*/
void rcu_note_context_switch(void)
{
+ barrier(); /* Avoid RCU read-side critical sections leaking down. */
trace_rcu_utilization(TPS("Start context switch"));
rcu_sched_qs();
rcu_preempt_note_context_switch();
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
rcu_momentary_dyntick_idle();
trace_rcu_utilization(TPS("End context switch"));
+ barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
* RCU flavors in desperate need of a quiescent state, which will normally
* be none of them). Either way, do a lightweight quiescent state for
* all RCU flavors.
+ *
+ * The barrier() calls are redundant in the common case when this is
+ * called externally, but just in case this is called from within this
+ * file.
+ *
*/
void rcu_all_qs(void)
{
+ barrier(); /* Avoid RCU read-side critical sections leaking down. */
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
rcu_momentary_dyntick_idle();
this_cpu_inc(rcu_qs_ctr);
+ barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
EXPORT_SYMBOL_GPL(rcu_all_qs);
/*
* Do one round of quiescent-state forcing.
*/
-static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
+static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
{
- int fqs_state = fqs_state_in;
bool isidle = false;
unsigned long maxj;
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
rsp->n_force_qs++;
- if (fqs_state == RCU_SAVE_DYNTICK) {
+ if (first_time) {
/* Collect dyntick-idle snapshots. */
if (is_sysidle_rcu_state(rsp)) {
isidle = true;
force_qs_rnp(rsp, dyntick_save_progress_counter,
&isidle, &maxj);
rcu_sysidle_report_gp(rsp, isidle, maxj);
- fqs_state = RCU_FORCE_QS;
} else {
/* Handle dyntick-idle and offline CPUs. */
isidle = true;
READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
raw_spin_unlock_irq(&rnp->lock);
}
- return fqs_state;
}
/*
/* Declare grace period done. */
WRITE_ONCE(rsp->completed, rsp->gpnum);
trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
- rsp->fqs_state = RCU_GP_IDLE;
+ rsp->gp_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
/* Advance CBs to reduce false positives below. */
needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
*/
static int __noreturn rcu_gp_kthread(void *arg)
{
- int fqs_state;
+ bool first_gp_fqs;
int gf;
unsigned long j;
int ret;
}
/* Handle quiescent-state forcing. */
- fqs_state = RCU_SAVE_DYNTICK;
+ first_gp_fqs = true;
j = jiffies_till_first_fqs;
if (j > HZ) {
j = HZ;
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqsstart"));
- fqs_state = rcu_gp_fqs(rsp, fqs_state);
+ rcu_gp_fqs(rsp, first_gp_fqs);
+ first_gp_fqs = false;
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqsend"));
* is expected to specify a CPU.
*/
static void
-__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
+__call_rcu(struct rcu_head *head, rcu_callback_t func,
struct rcu_state *rsp, int cpu, bool lazy)
{
unsigned long flags;
/*
* Queue an RCU-sched callback for invocation after a grace period.
*/
-void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, &rcu_sched_state, -1, 0);
}
/*
* Queue an RCU callback for invocation after a quicker grace period.
*/
-void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, &rcu_bh_state, -1, 0);
}
* function may only be called from __kfree_rcu().
*/
void kfree_call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu))
+ rcu_callback_t func)
{
__call_rcu(head, func, rcu_state_p, -1, 1);
}
rcu_fanout_leaf, nr_cpu_ids);
/*
- * The boot-time rcu_fanout_leaf parameter is only permitted
- * to increase the leaf-level fanout, not decrease it. Of course,
- * the leaf-level fanout cannot exceed the number of bits in
- * the rcu_node masks. Complain and fall back to the compile-
- * time values if these limits are exceeded.
+ * The boot-time rcu_fanout_leaf parameter must be at least two
+ * and cannot exceed the number of bits in the rcu_node masks.
+ * Complain and fall back to the compile-time values if this
+ * limit is exceeded.
*/
- if (rcu_fanout_leaf < RCU_FANOUT_LEAF ||
+ if (rcu_fanout_leaf < 2 ||
rcu_fanout_leaf > sizeof(unsigned long) * 8) {
rcu_fanout_leaf = RCU_FANOUT_LEAF;
WARN_ON(1);
/*
* The tree must be able to accommodate the configured number of CPUs.
- * If this limit is exceeded than we have a serious problem elsewhere.
+ * If this limit is exceeded, fall back to the compile-time values.
*/
- if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1])
- panic("rcu_init_geometry: rcu_capacity[] is too small");
+ if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) {
+ rcu_fanout_leaf = RCU_FANOUT_LEAF;
+ WARN_ON(1);
+ return;
+ }
/* Calculate the number of levels in the tree. */
for (i = 0; nr_cpu_ids > rcu_capacity[i]; i++) {
struct rcu_state *rsp;
};
-/* Values for fqs_state field in struct rcu_state. */
-#define RCU_GP_IDLE 0 /* No grace period in progress. */
-#define RCU_GP_INIT 1 /* Grace period being initialized. */
-#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
-#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
-#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
-
/* Values for nocb_defer_wakeup field in struct rcu_data. */
#define RCU_NOGP_WAKE_NOT 0
#define RCU_NOGP_WAKE 1
/* shut bogus gcc warning) */
u8 flavor_mask; /* bit in flavor mask. */
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
- void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
- void (*func)(struct rcu_head *head));
+ call_rcu_func_t call; /* call_rcu() flavor. */
int ncpus; /* # CPUs seen so far. */
/* The following fields are guarded by the root rcu_node's lock. */
- u8 fqs_state ____cacheline_internodealigned_in_smp;
- /* Force QS state. */
- u8 boost; /* Subject to priority boost. */
+ u8 boost ____cacheline_internodealigned_in_smp;
+ /* Subject to priority boost. */
unsigned long gpnum; /* Current gp number. */
unsigned long completed; /* # of last completed gp. */
struct task_struct *gp_kthread; /* Task for grace periods. */
#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */
#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
-/* Values for rcu_state structure's gp_flags field. */
-#define RCU_GP_WAIT_INIT 0 /* Initial state. */
+/* Values for rcu_state structure's gp_state field. */
+#define RCU_GP_IDLE 0 /* Initial state and no GP in progress. */
#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */
#define RCU_GP_DONE_GPS 2 /* Wait done for grace-period start. */
#define RCU_GP_WAIT_FQS 3 /* Wait for force-quiescent-state time. */
static int rcu_print_task_exp_stall(struct rcu_node *rnp);
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
static void rcu_preempt_check_callbacks(void);
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
static void __init __rcu_init_preempt(void);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
/*
* Queue a preemptible-RCU callback for invocation after a grace period.
*/
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, rcu_state_p, -1, 0);
}
ticks_value = rsp->gpnum - rdp->gpnum;
}
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
- pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
- cpu, ticks_value, ticks_title,
+ pr_err("\t%d-%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
+ cpu,
+ "O."[!!cpu_online(cpu)],
+ "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
+ "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
+ ticks_value, ticks_title,
atomic_read(&rdtp->dynticks) & 0xfff,
rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
gpnum = rsp->gpnum;
seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
ulong2long(rsp->completed), ulong2long(gpnum),
- rsp->fqs_state,
+ rsp->gp_state,
(long)(rsp->jiffies_force_qs - jiffies),
(int)(jiffies & 0xffff));
seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
* Post an RCU-tasks callback. First call must be from process context
* after the scheduler if fully operational.
*/
-void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
+void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
{
unsigned long flags;
bool needwake;
{
return _sched_setscheduler(p, policy, param, false);
}
+EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
static int
do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)