Merge branch 'tracing/ftrace' into auto-ftrace-next

[mv-sheeva.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 2c65bf29d133f3b734965fb809a105fdb0358a12..42899dce837d6f27a5f90db3d2037c2ff0a05489 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -70,6 +70,7 @@
  #include <linux/bootmem.h>
  #include <linux/debugfs.h>
  #include <linux/ctype.h>
+#include <linux/ftrace.h>
  
  #include <asm/tlb.h>
  #include <asm/irq_regs.h>
@@ -312,12 +313,15 @@ static DEFINE_SPINLOCK(task_group_lock);
  #endif
  
  /*
- * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems.
+ * A weight of 0 or 1 can cause arithmetics problems.
+ * A weight of a cfs_rq is the sum of weights of which entities
+ * are queued on this cfs_rq, so a weight of a entity should not be
+ * too large, so as the shares value of a task group.
   * (The default weight is 1024 - so there's no practical
   *  limitation from this.)
   */
  #define MIN_SHARES     2
-#define MAX_SHARES     (ULONG_MAX - 1)
+#define MAX_SHARES     (1UL << 18)
  
  static int init_task_group_load = INIT_TASK_GROUP_LOAD;
  #endif
@@ -604,6 +608,24 @@ static inline void update_rq_clock(struct rq *rq)
  # define const_debug static const
  #endif
  
+/**
+ * runqueue_is_locked
+ *
+ * Returns true if the current cpu runqueue is locked.
+ * This interface allows printk to be called with the runqueue lock
+ * held and know whether or not it is OK to wake up the klogd.
+ */
+int runqueue_is_locked(void)
+{
+       int cpu = get_cpu();
+       struct rq *rq = cpu_rq(cpu);
+       int ret;
+
+       ret = spin_is_locked(&rq->lock);
+       put_cpu();
+       return ret;
+}
+
  /*
   * Debugging: various feature bits
   */
@@ -828,7 +850,7 @@ static unsigned long long __cpu_clock(int cpu)
   * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
   * clock constructed from sched_clock():
   */
-unsigned long long cpu_clock(int cpu)
+unsigned long long notrace cpu_clock(int cpu)
  {
         unsigned long long prev_cpu_time, time, delta_time;
         unsigned long flags;
@@ -1124,6 +1146,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
         return HRTIMER_NORESTART;
  }
  
+#ifdef CONFIG_SMP
  static void hotplug_hrtick_disable(int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
@@ -1179,6 +1202,7 @@ static void init_hrtick(void)
  {
         hotcpu_notifier(hotplug_hrtick, 0);
  }
+#endif /* CONFIG_SMP */
  
  static void init_rq_hrtick(struct rq *rq)
  {
@@ -1337,8 +1361,13 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
  {
         u64 tmp;
  
-       if (!lw->inv_weight)
-               lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1);
+       if (!lw->inv_weight) {
+               if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
+                       lw->inv_weight = 1;
+               else
+                       lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)
+                               / (lw->weight+1);
+       }
  
         tmp = (u64)delta_exec * weight;
         /*
@@ -2139,6 +2168,9 @@ out_activate:
         success = 1;
  
  out_running:
+       trace_mark(kernel_sched_wakeup,
+               "pid %d state %ld ## rq %p task %p rq->curr %p",
+               p->pid, p->state, rq, p, rq->curr);
         check_preempt_curr(rq, p);
  
         p->state = TASK_RUNNING;
@@ -2269,6 +2301,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
                 p->sched_class->task_new(rq, p);
                 inc_nr_running(p, rq);
         }
+       trace_mark(kernel_sched_wakeup_new,
+               "pid %d state %ld ## rq %p task %p rq->curr %p",
+               p->pid, p->state, rq, p, rq->curr);
         check_preempt_curr(rq, p);
  #ifdef CONFIG_SMP
         if (p->sched_class->task_wake_up)
@@ -2441,6 +2476,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
         struct mm_struct *mm, *oldmm;
  
         prepare_task_switch(rq, prev, next);
+       trace_mark(kernel_sched_schedule,
+               "prev_pid %d next_pid %d prev_state %ld "
+               "## rq %p prev %p next %p",
+               prev->pid, next->pid, prev->state,
+               rq, prev, next);
         mm = next->mm;
         oldmm = prev->active_mm;
         /*
@@ -4011,26 +4051,44 @@ void scheduler_tick(void)
  #endif
  }
  
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+                               defined(CONFIG_PREEMPT_TRACER))
+
+static inline unsigned long get_parent_ip(unsigned long addr)
+{
+       if (in_lock_functions(addr)) {
+               addr = CALLER_ADDR2;
+               if (in_lock_functions(addr))
+                       addr = CALLER_ADDR3;
+       }
+       return addr;
+}
  
  void __kprobes add_preempt_count(int val)
  {
+#ifdef CONFIG_DEBUG_PREEMPT
         /*
          * Underflow?
          */
         if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
                 return;
+#endif
         preempt_count() += val;
+#ifdef CONFIG_DEBUG_PREEMPT
         /*
          * Spinlock count overflowing soon?
          */
         DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
                                 PREEMPT_MASK - 10);
+#endif
+       if (preempt_count() == val)
+               trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
  }
  EXPORT_SYMBOL(add_preempt_count);
  
  void __kprobes sub_preempt_count(int val)
  {
+#ifdef CONFIG_DEBUG_PREEMPT
         /*
          * Underflow?
          */
@@ -4042,7 +4100,10 @@ void __kprobes sub_preempt_count(int val)
         if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
                         !(preempt_count() & PREEMPT_MASK)))
                 return;
+#endif
  
+       if (preempt_count() == val)
+               trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
         preempt_count() -= val;
  }
  EXPORT_SYMBOL(sub_preempt_count);
@@ -4388,22 +4449,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
                              signal_pending(current)) ||
                             (state == TASK_KILLABLE &&
                              fatal_signal_pending(current))) {
-                               __remove_wait_queue(&x->wait, &wait);
-                               return -ERESTARTSYS;
+                               timeout = -ERESTARTSYS;
+                               break;
                         }
                         __set_current_state(state);
                         spin_unlock_irq(&x->wait.lock);
                         timeout = schedule_timeout(timeout);
                         spin_lock_irq(&x->wait.lock);
-                       if (!timeout) {
-                               __remove_wait_queue(&x->wait, &wait);
-                               return timeout;
-                       }
-               } while (!x->done);
+               } while (!x->done && timeout);
                 __remove_wait_queue(&x->wait, &wait);
+               if (!x->done)
+                       return timeout;
         }
         x->done--;
-       return timeout;
+       return timeout ?: 1;
  }
  
  static long __sched
@@ -5376,7 +5435,7 @@ out_unlock:
         return retval;
  }
  
-static const char stat_nam[] = "RSDTtZX";
+static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
  
  void sched_show_task(struct task_struct *p)
  {
@@ -5879,6 +5938,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
                 next = pick_next_task(rq, rq->curr);
                 if (!next)
                         break;
+               next->sched_class->put_prev_task(rq, next);
                 migrate_dead(dead_cpu, next);
  
         }
@@ -6869,7 +6929,12 @@ static int default_relax_domain_level = -1;
  
  static int __init setup_relax_domain_level(char *str)
  {
-       default_relax_domain_level = simple_strtoul(str, NULL, 0);
+       unsigned long val;
+
+       val = simple_strtoul(str, NULL, 0);
+       if (val < SD_LV_MAX)
+               default_relax_domain_level = val;
+
         return 1;
  }
  __setup("relax_domain_level=", setup_relax_domain_level);
@@ -7227,6 +7292,18 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
  {
  }
  
+/*
+ * Free current domain masks.
+ * Called after all cpus are attached to NULL domain.
+ */
+static void free_sched_domains(void)
+{
+       ndoms_cur = 0;
+       if (doms_cur != &fallback_doms)
+               kfree(doms_cur);
+       doms_cur = &fallback_doms;
+}
+
  /*
   * Set up scheduler domains and groups. Callers must hold the hotplug lock.
   * For now this just excludes isolated cpus, but could be used to
@@ -7374,6 +7451,7 @@ int arch_reinit_sched_domains(void)
         get_online_cpus();
         mutex_lock(&sched_domains_mutex);
         detach_destroy_domains(&cpu_online_map);
+       free_sched_domains();
         err = arch_init_sched_domains(&cpu_online_map);
         mutex_unlock(&sched_domains_mutex);
         put_online_cpus();
@@ -7459,6 +7537,7 @@ static int update_sched_domains(struct notifier_block *nfb,
         case CPU_DOWN_PREPARE:
         case CPU_DOWN_PREPARE_FROZEN:
                 detach_destroy_domains(&cpu_online_map);
+               free_sched_domains();
                 return NOTIFY_OK;
  
         case CPU_UP_CANCELED:
@@ -7477,8 +7556,16 @@ static int update_sched_domains(struct notifier_block *nfb,
                 return NOTIFY_DONE;
         }
  
+#ifndef CONFIG_CPUSETS
+       /*
+        * Create default domain partitioning if cpusets are disabled.
+        * Otherwise we let cpusets rebuild the domains based on the
+        * current setup.
+        */
+
         /* The hotplug lock is already held by cpu_up/cpu_down */
         arch_init_sched_domains(&cpu_online_map);
+#endif
  
         return NOTIFY_OK;
  }
@@ -7618,7 +7705,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
         else
                 rt_se->rt_rq = parent->my_q;
  
-       rt_se->rt_rq = &rq->rt;
         rt_se->my_q = rt_rq;
         rt_se->parent = parent;
         INIT_LIST_HEAD(&rt_se->run_list);
@@ -8340,7 +8426,7 @@ static unsigned long to_ratio(u64 period, u64 runtime)
  #ifdef CONFIG_CGROUP_SCHED
  static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
  {
-       struct task_group *tgi, *parent = tg->parent;
+       struct task_group *tgi, *parent = tg ? tg->parent : NULL;
         unsigned long total = 0;
  
         if (!parent) {
@@ -8467,6 +8553,9 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
         rt_period = (u64)rt_period_us * NSEC_PER_USEC;
         rt_runtime = tg->rt_bandwidth.rt_runtime;
  
+       if (rt_period == 0)
+               return -EINVAL;
+
         return tg_set_bandwidth(tg, rt_period, rt_runtime);
  }