2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
4 * This code is licenced under the GPL.
6 #include <linux/proc_fs.h>
8 #include <linux/init.h>
9 #include <linux/notifier.h>
10 #include <linux/sched.h>
11 #include <linux/unistd.h>
12 #include <linux/cpu.h>
13 #include <linux/module.h>
14 #include <linux/kthread.h>
15 #include <linux/stop_machine.h>
16 #include <linux/mutex.h>
19 * Represents all cpu's present in the system
20 * In systems capable of hotplug, this map could dynamically grow
21 * as new cpu's are detected in the system via any platform specific
22 * method, such as ACPI for e.g.
24 cpumask_t cpu_present_map __read_mostly;
25 EXPORT_SYMBOL(cpu_present_map);
30 * Represents all cpu's that are currently online.
32 cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
33 EXPORT_SYMBOL(cpu_online_map);
35 cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
36 EXPORT_SYMBOL(cpu_possible_map);
38 #else /* CONFIG_SMP */
40 /* Serializes the updates to cpu_online_map, cpu_present_map */
41 static DEFINE_MUTEX(cpu_add_remove_lock);
43 static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
45 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
46 * Should always be manipulated under cpu_add_remove_lock
48 static int cpu_hotplug_disabled;
51 struct task_struct *active_writer;
52 struct mutex lock; /* Synchronizes accesses to refcount, */
54 * Also blocks the new readers during
55 * an ongoing cpu hotplug operation.
60 void __init cpu_hotplug_init(void)
62 cpu_hotplug.active_writer = NULL;
63 mutex_init(&cpu_hotplug.lock);
64 cpu_hotplug.refcount = 0;
67 cpumask_t cpu_active_map;
69 #ifdef CONFIG_HOTPLUG_CPU
71 void get_online_cpus(void)
74 if (cpu_hotplug.active_writer == current)
76 mutex_lock(&cpu_hotplug.lock);
77 cpu_hotplug.refcount++;
78 mutex_unlock(&cpu_hotplug.lock);
81 EXPORT_SYMBOL_GPL(get_online_cpus);
83 void put_online_cpus(void)
85 if (cpu_hotplug.active_writer == current)
87 mutex_lock(&cpu_hotplug.lock);
88 if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
89 wake_up_process(cpu_hotplug.active_writer);
90 mutex_unlock(&cpu_hotplug.lock);
93 EXPORT_SYMBOL_GPL(put_online_cpus);
95 #endif /* CONFIG_HOTPLUG_CPU */
98 * The following two API's must be used when attempting
99 * to serialize the updates to cpu_online_map, cpu_present_map.
101 void cpu_maps_update_begin(void)
103 mutex_lock(&cpu_add_remove_lock);
106 void cpu_maps_update_done(void)
108 mutex_unlock(&cpu_add_remove_lock);
112 * This ensures that the hotplug operation can begin only when the
113 * refcount goes to zero.
115 * Note that during a cpu-hotplug operation, the new readers, if any,
116 * will be blocked by the cpu_hotplug.lock
118 * Since cpu_hotplug_begin() is always called after invoking
119 * cpu_maps_update_begin(), we can be sure that only one writer is active.
121 * Note that theoretically, there is a possibility of a livelock:
122 * - Refcount goes to zero, last reader wakes up the sleeping
124 * - Last reader unlocks the cpu_hotplug.lock.
125 * - A new reader arrives at this moment, bumps up the refcount.
126 * - The writer acquires the cpu_hotplug.lock finds the refcount
127 * non zero and goes to sleep again.
129 * However, this is very difficult to achieve in practice since
130 * get_online_cpus() not an api which is called all that often.
133 static void cpu_hotplug_begin(void)
135 cpu_hotplug.active_writer = current;
138 mutex_lock(&cpu_hotplug.lock);
139 if (likely(!cpu_hotplug.refcount))
141 __set_current_state(TASK_UNINTERRUPTIBLE);
142 mutex_unlock(&cpu_hotplug.lock);
147 static void cpu_hotplug_done(void)
149 cpu_hotplug.active_writer = NULL;
150 mutex_unlock(&cpu_hotplug.lock);
152 /* Need to know about CPUs going up/down? */
153 int __ref register_cpu_notifier(struct notifier_block *nb)
156 cpu_maps_update_begin();
157 ret = raw_notifier_chain_register(&cpu_chain, nb);
158 cpu_maps_update_done();
162 #ifdef CONFIG_HOTPLUG_CPU
164 EXPORT_SYMBOL(register_cpu_notifier);
166 void __ref unregister_cpu_notifier(struct notifier_block *nb)
168 cpu_maps_update_begin();
169 raw_notifier_chain_unregister(&cpu_chain, nb);
170 cpu_maps_update_done();
172 EXPORT_SYMBOL(unregister_cpu_notifier);
174 static inline void check_for_tasks(int cpu)
176 struct task_struct *p;
178 write_lock_irq(&tasklist_lock);
179 for_each_process(p) {
180 if (task_cpu(p) == cpu &&
181 (!cputime_eq(p->utime, cputime_zero) ||
182 !cputime_eq(p->stime, cputime_zero)))
183 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
184 (state = %ld, flags = %x) \n",
185 p->comm, task_pid_nr(p), cpu,
188 write_unlock_irq(&tasklist_lock);
191 struct take_cpu_down_param {
196 /* Take this CPU down. */
197 static int __ref take_cpu_down(void *_param)
199 struct take_cpu_down_param *param = _param;
202 raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
204 /* Ensure this CPU doesn't handle any more interrupts. */
205 err = __cpu_disable();
209 /* Force idle task to run as soon as we yield: it should
210 immediately notice cpu is offline and die quickly. */
215 /* Requires cpu_add_remove_lock to be held */
216 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
218 int err, nr_calls = 0;
219 struct task_struct *p;
220 cpumask_t old_allowed, tmp;
221 void *hcpu = (void *)(long)cpu;
222 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
223 struct take_cpu_down_param tcd_param = {
228 if (num_online_cpus() == 1)
231 if (!cpu_online(cpu))
235 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
236 hcpu, -1, &nr_calls);
237 if (err == NOTIFY_BAD) {
239 __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
240 hcpu, nr_calls, NULL);
241 printk("%s: attempt to take down CPU %u failed\n",
247 /* Ensure that we are not runnable on dying cpu */
248 old_allowed = current->cpus_allowed;
251 set_cpus_allowed_ptr(current, &tmp);
253 p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
255 if (IS_ERR(p) || cpu_online(cpu)) {
256 /* CPU didn't die: tell everyone. Can't complain. */
257 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
268 /* Wait for it to sleep (leaving idle task). */
269 while (!idle_cpu(cpu))
272 /* This actually kills the CPU. */
275 /* CPU is completely dead: tell everyone. Too late to complain. */
276 if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
280 check_for_tasks(cpu);
283 err = kthread_stop(p);
285 set_cpus_allowed_ptr(current, &old_allowed);
289 if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
296 int __ref cpu_down(unsigned int cpu)
300 cpu_maps_update_begin();
302 if (cpu_hotplug_disabled) {
307 cpu_clear(cpu, cpu_active_map);
310 * Make sure the all cpus did the reschedule and are not
311 * using stale version of the cpu_active_map.
312 * This is not strictly necessary becuase stop_machine()
313 * that we run down the line already provides the required
314 * synchronization. But it's really a side effect and we do not
315 * want to depend on the innards of the stop_machine here.
319 err = _cpu_down(cpu, 0);
322 cpu_set(cpu, cpu_active_map);
325 cpu_maps_update_done();
328 EXPORT_SYMBOL(cpu_down);
329 #endif /*CONFIG_HOTPLUG_CPU*/
331 /* Requires cpu_add_remove_lock to be held */
332 static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
334 int ret, nr_calls = 0;
335 void *hcpu = (void *)(long)cpu;
336 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
338 if (cpu_online(cpu) || !cpu_present(cpu))
342 ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
344 if (ret == NOTIFY_BAD) {
346 printk("%s: attempt to bring up CPU %u failed\n",
352 /* Arch-specific enabling code. */
356 BUG_ON(!cpu_online(cpu));
358 /* Now call notifier in preparation. */
359 raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
363 __raw_notifier_call_chain(&cpu_chain,
364 CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
370 int __cpuinit cpu_up(unsigned int cpu)
373 if (!cpu_isset(cpu, cpu_possible_map)) {
374 printk(KERN_ERR "can't online cpu %d because it is not "
375 "configured as may-hotadd at boot time\n", cpu);
376 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
377 printk(KERN_ERR "please check additional_cpus= boot "
383 cpu_maps_update_begin();
385 if (cpu_hotplug_disabled) {
390 err = _cpu_up(cpu, 0);
393 cpu_set(cpu, cpu_active_map);
396 cpu_maps_update_done();
400 #ifdef CONFIG_PM_SLEEP_SMP
401 static cpumask_t frozen_cpus;
403 int disable_nonboot_cpus(void)
405 int cpu, first_cpu, error = 0;
407 cpu_maps_update_begin();
408 first_cpu = first_cpu(cpu_online_map);
409 /* We take down all of the non-boot CPUs in one shot to avoid races
410 * with the userspace trying to use the CPU hotplug at the same time
412 cpus_clear(frozen_cpus);
413 printk("Disabling non-boot CPUs ...\n");
414 for_each_online_cpu(cpu) {
415 if (cpu == first_cpu)
417 error = _cpu_down(cpu, 1);
419 cpu_set(cpu, frozen_cpus);
420 printk("CPU%d is down\n", cpu);
422 printk(KERN_ERR "Error taking CPU%d down: %d\n",
428 BUG_ON(num_online_cpus() > 1);
429 /* Make sure the CPUs won't be enabled by someone else */
430 cpu_hotplug_disabled = 1;
432 printk(KERN_ERR "Non-boot CPUs are not disabled\n");
434 cpu_maps_update_done();
438 void __ref enable_nonboot_cpus(void)
442 /* Allow everyone to use the CPU hotplug again */
443 cpu_maps_update_begin();
444 cpu_hotplug_disabled = 0;
445 if (cpus_empty(frozen_cpus))
448 printk("Enabling non-boot CPUs ...\n");
449 for_each_cpu_mask_nr(cpu, frozen_cpus) {
450 error = _cpu_up(cpu, 1);
452 printk("CPU%d is up\n", cpu);
455 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
457 cpus_clear(frozen_cpus);
459 cpu_maps_update_done();
461 #endif /* CONFIG_PM_SLEEP_SMP */
463 #endif /* CONFIG_SMP */
465 /* 64 bits of zeros, for initializers. */
466 #if BITS_PER_LONG == 32
472 /* Initializer macros. */
473 #define CMI0(n) { .bits = { 1UL << (n) } }
474 #define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }
476 #define CMI8(n, ...) \
477 CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__), \
478 CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__), \
479 CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__), \
480 CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
482 #if BITS_PER_LONG == 32
483 #define CMI64(n, ...) \
484 CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \
485 CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \
486 CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__), \
487 CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
489 #define CMI64(n, ...) \
490 CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \
491 CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \
492 CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__), \
493 CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
496 #define CMI256(n, ...) \
497 CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__), \
498 CMI64((n)+128, Z64, Z64, __VA_ARGS__), \
499 CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
500 #define Z256 Z64, Z64, Z64, Z64
502 #define CMI1024(n, ...) \
503 CMI256((n), __VA_ARGS__), \
504 CMI256((n)+256, Z256, __VA_ARGS__), \
505 CMI256((n)+512, Z256, Z256, __VA_ARGS__), \
506 CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
507 #define Z1024 Z256, Z256, Z256, Z256
509 /* We want this statically initialized, just to be safe. We try not
510 * to waste too much space, either. */
511 static const cpumask_t cpumask_map[]
512 #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
516 CMI0(0), CMI0(1), CMI0(2), CMI0(3),
518 CMI0(4), CMI0(5), CMI0(6), CMI0(7),
521 CMI0(8), CMI0(9), CMI0(10), CMI0(11),
522 CMI0(12), CMI0(13), CMI0(14), CMI0(15),
525 CMI0(16), CMI0(17), CMI0(18), CMI0(19),
526 CMI0(20), CMI0(21), CMI0(22), CMI0(23),
527 CMI0(24), CMI0(25), CMI0(26), CMI0(27),
528 CMI0(28), CMI0(29), CMI0(30), CMI0(31),
531 #if BITS_PER_LONG == 32
532 CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
533 CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
534 CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
535 CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
536 CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
537 CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
538 CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
539 CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
541 CMI0(32), CMI0(33), CMI0(34), CMI0(35),
542 CMI0(36), CMI0(37), CMI0(38), CMI0(39),
543 CMI0(40), CMI0(41), CMI0(42), CMI0(43),
544 CMI0(44), CMI0(45), CMI0(46), CMI0(47),
545 CMI0(48), CMI0(49), CMI0(50), CMI0(51),
546 CMI0(52), CMI0(53), CMI0(54), CMI0(55),
547 CMI0(56), CMI0(57), CMI0(58), CMI0(59),
548 CMI0(60), CMI0(61), CMI0(62), CMI0(63),
549 #endif /* BITS_PER_LONG == 64 */
555 CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
561 CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
564 CMI1024(1024, Z1024),
567 CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
570 #error NR_CPUS too big. Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
574 const cpumask_t *cpumask_of_cpu_map = cpumask_map;
576 EXPORT_SYMBOL_GPL(cpumask_of_cpu_map);