u32 msr_pkg_therm_low;
u32 msr_pkg_therm_high;
struct thermal_zone_device *tzone;
+ struct cpumask cpumask;
};
static struct thermal_zone_params pkg_temp_tz_params = {
/* List maintaining number of package instances */
static LIST_HEAD(phy_dev_list);
-static DEFINE_MUTEX(phy_dev_list_mutex);
+/* Serializes interrupt notification, work and hotplug */
+static DEFINE_SPINLOCK(pkg_temp_lock);
+/* Protects zone operation in the work function against hotplug removal */
+static DEFINE_MUTEX(thermal_zone_mutex);
/* Interrupt to work function schedule queue */
static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work);
/* To track if the work is already scheduled on a package */
static u8 *pkg_work_scheduled;
-/* Spin lock to prevent races with pkg_work_scheduled */
-static spinlock_t pkg_work_lock;
static u16 max_phy_id;
/* Debug counters to show using debugfs */
return -ENOENT;
}
+/*
+ * Protection:
+ *
+ * - cpu hotplug: Read serialized by cpu hotplug lock
+ * Write must hold pkg_temp_lock
+ *
+ * - Other callsites: Must hold pkg_temp_lock
+ */
static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu)
{
u16 phys_proc_id = topology_physical_package_id(cpu);
struct pkg_device *pkgdev;
- mutex_lock(&phy_dev_list_mutex);
-
- list_for_each_entry(pkgdev, &phy_dev_list, list)
- if (pkgdev->phys_proc_id == phys_proc_id) {
- mutex_unlock(&phy_dev_list_mutex);
+ list_for_each_entry(pkgdev, &phy_dev_list, list) {
+ if (pkgdev->phys_proc_id == phys_proc_id)
return pkgdev;
- }
-
- mutex_unlock(&phy_dev_list_mutex);
-
+ }
return NULL;
}
static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
{
- int cpu = smp_processor_id();
- struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
- int phy_id = topology_physical_package_id(cpu);
- bool notify = false;
- unsigned long flags;
+ struct thermal_zone_device *tzone = NULL;
+ int phy_id, cpu = smp_processor_id();
+ struct pkg_device *pkgdev;
u64 msr_val, wr_val;
- if (!pkgdev)
- return;
-
- spin_lock_irqsave(&pkg_work_lock, flags);
+ mutex_lock(&thermal_zone_mutex);
+ spin_lock_irq(&pkg_temp_lock);
++pkg_work_cnt;
- if (unlikely(phy_id > max_phy_id)) {
- spin_unlock_irqrestore(&pkg_work_lock, flags);
+
+ pkgdev = pkg_temp_thermal_get_dev(cpu);
+ if (!pkgdev) {
+ spin_unlock_irq(&pkg_temp_lock);
+ mutex_unlock(&thermal_zone_mutex);
return;
}
+
pkg_work_scheduled[phy_id] = 0;
- spin_unlock_irqrestore(&pkg_work_lock, flags);
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
if (wr_val != msr_val) {
wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
- notify = true;
+ tzone = pkgdev->tzone;
}
enable_pkg_thres_interrupt();
+ spin_unlock_irq(&pkg_temp_lock);
- if (notify) {
- pr_debug("thermal_zone_device_update\n");
- thermal_zone_device_update(pkgdev->tzone,
- THERMAL_EVENT_UNSPECIFIED);
- }
+ /*
+ * If tzone is not NULL, then thermal_zone_mutex will prevent the
+ * concurrent removal in the cpu offline callback.
+ */
+ if (tzone)
+ thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
+
+ mutex_unlock(&thermal_zone_mutex);
}
static int pkg_thermal_notify(u64 msr_val)
{
int cpu = smp_processor_id();
int phy_id = topology_physical_package_id(cpu);
+ struct pkg_device *pkgdev;
unsigned long flags;
- /*
- * When a package is in interrupted state, all CPU's in that package
- * are in the same interrupt state. So scheduling on any one CPU in
- * the package is enough and simply return for others.
- */
- spin_lock_irqsave(&pkg_work_lock, flags);
+ spin_lock_irqsave(&pkg_temp_lock, flags);
++pkg_interrupt_cnt;
- if (unlikely(phy_id > max_phy_id) || unlikely(!pkg_work_scheduled) ||
- pkg_work_scheduled[phy_id]) {
- disable_pkg_thres_interrupt();
- spin_unlock_irqrestore(&pkg_work_lock, flags);
- return -EINVAL;
- }
- pkg_work_scheduled[phy_id] = 1;
- spin_unlock_irqrestore(&pkg_work_lock, flags);
disable_pkg_thres_interrupt();
- schedule_delayed_work_on(cpu,
+
+ /* Work is per package, so scheduling it once is enough. */
+ pkgdev = pkg_temp_thermal_get_dev(cpu);
+ if (pkgdev && pkg_work_scheduled && !pkg_work_scheduled[phy_id]) {
+ pkg_work_scheduled[phy_id] = 1;
+ schedule_delayed_work_on(cpu,
&per_cpu(pkg_temp_thermal_threshold_work, cpu),
msecs_to_jiffies(notify_delay_ms));
+ }
+
+ spin_unlock_irqrestore(&pkg_temp_lock, flags);
return 0;
}
err = get_tj_max(cpu, &tj_max);
if (err)
- goto err_ret;
-
- mutex_lock(&phy_dev_list_mutex);
+ return err;
pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL);
- if (!pkgdev) {
- err = -ENOMEM;
- goto err_ret_unlock;
- }
+ if (!pkgdev)
+ return -ENOMEM;
- spin_lock_irqsave(&pkg_work_lock, flags);
+ spin_lock_irqsave(&pkg_temp_lock, flags);
if (topology_physical_package_id(cpu) > max_phy_id)
max_phy_id = topology_physical_package_id(cpu);
temp = krealloc(pkg_work_scheduled,
(max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
if (!temp) {
- spin_unlock_irqrestore(&pkg_work_lock, flags);
- err = -ENOMEM;
- goto err_ret_free;
+ spin_unlock_irqrestore(&pkg_temp_lock, flags);
+ kfree(pkgdev);
+ return -ENOMEM;
}
pkg_work_scheduled = temp;
pkg_work_scheduled[topology_physical_package_id(cpu)] = 0;
- spin_unlock_irqrestore(&pkg_work_lock, flags);
+ spin_unlock_irqrestore(&pkg_temp_lock, flags);
pkgdev->phys_proc_id = topology_physical_package_id(cpu);
pkgdev->cpu = cpu;
pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
if (IS_ERR(pkgdev->tzone)) {
err = PTR_ERR(pkgdev->tzone);
- goto err_ret_free;
+ kfree(pkgdev);
+ return err;
}
/* Store MSR value for package thermal interrupt, to restore at exit */
rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
&pkgdev->msr_pkg_therm_low,
&pkgdev->msr_pkg_therm_high);
+ cpumask_set_cpu(cpu, &pkgdev->cpumask);
+ spin_lock_irq(&pkg_temp_lock);
list_add_tail(&pkgdev->list, &phy_dev_list);
- pr_debug("pkg_temp_thermal_device_add :phy_id %d cpu %d\n",
- pkgdev->phys_proc_id, cpu);
-
- mutex_unlock(&phy_dev_list_mutex);
-
+ spin_unlock_irq(&pkg_temp_lock);
return 0;
-
-err_ret_free:
- kfree(pkgdev);
-err_ret_unlock:
- mutex_unlock(&phy_dev_list_mutex);
-
-err_ret:
- return err;
}
-static int pkg_temp_thermal_device_remove(unsigned int cpu)
+static void put_core_offline(unsigned int cpu)
{
struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
+ bool lastcpu;
int target;
if (!pkgdev)
- return -ENODEV;
+ return;
+
+ target = cpumask_any_but(&pkgdev->cpumask, cpu);
+ cpumask_clear_cpu(cpu, &pkgdev->cpumask);
+ lastcpu = target >= nr_cpu_ids;
+ /*
+ * Remove the sysfs files, if this is the last cpu in the package
+ * before doing further cleanups.
+ */
+ if (lastcpu) {
+ struct thermal_zone_device *tzone = pkgdev->tzone;
- mutex_lock(&phy_dev_list_mutex);
+ /*
+ * We must protect against a work function calling
+ * thermal_zone_update, after/while unregister. We null out
+ * the pointer under the zone mutex, so the worker function
+ * won't try to call.
+ */
+ mutex_lock(&thermal_zone_mutex);
+ pkgdev->tzone = NULL;
+ mutex_unlock(&thermal_zone_mutex);
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
- /* This might be the last cpu in this package */
- if (target >= nr_cpu_ids) {
- thermal_zone_device_unregister(pkgdev->tzone);
+ thermal_zone_device_unregister(tzone);
+ }
+
+ /*
+ * If this is the last CPU in the package, restore the interrupt
+ * MSR and remove the package reference from the array.
+ */
+ if (lastcpu) {
+ /* Protect against work and interrupts */
+ spin_lock_irq(&pkg_temp_lock);
+ list_del(&pkgdev->list);
/*
- * Restore original MSR value for package thermal
- * interrupt.
+ * After this point nothing touches the MSR anymore. We
+ * must drop the lock to make the cross cpu call. This goes
+ * away once we move that code to the hotplug state machine.
*/
+ spin_unlock_irq(&pkg_temp_lock);
wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
pkgdev->msr_pkg_therm_low,
pkgdev->msr_pkg_therm_high);
- list_del(&pkgdev->list);
kfree(pkgdev);
- } else if (pkgdev->cpu == cpu) {
- pkgdev->cpu = target;
}
- mutex_unlock(&phy_dev_list_mutex);
-
- return 0;
+ /*
+ * Note, this is broken when work was really scheduled on the
+ * outgoing cpu because this will leave the work_scheduled flag set
+ * and the thermal interrupts disabled. Will be fixed in the next
+ * step as there is no way to fix it in a sane way with the per cpu
+ * work nonsense.
+ */
+ cancel_delayed_work_sync(&per_cpu(pkg_temp_thermal_threshold_work, cpu));
}
static int get_core_online(unsigned int cpu)
pkg_temp_thermal_threshold_work_fn);
/* If the package exists, nothing to do */
- if (pkgdev)
+ if (pkgdev) {
+ cpumask_set_cpu(cpu, &pkgdev->cpumask);
return 0;
+ }
return pkg_temp_thermal_device_add(cpu);
}
-static void put_core_offline(unsigned int cpu)
-{
- if (!pkg_temp_thermal_device_remove(cpu))
- cancel_delayed_work_sync(
- &per_cpu(pkg_temp_thermal_threshold_work, cpu));
-
- pr_debug("put_core_offline: cpu %d\n", cpu);
-}
-
static int pkg_temp_thermal_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
if (!x86_match_cpu(pkg_temp_thermal_ids))
return -ENODEV;
- spin_lock_init(&pkg_work_lock);
-
cpu_notifier_register_begin();
for_each_online_cpu(i)
if (get_core_online(i))
static void __exit pkg_temp_thermal_exit(void)
{
- struct pkg_device *pkgdev, *n;
int i;
platform_thermal_package_notify = NULL;
cpu_notifier_register_begin();
__unregister_hotcpu_notifier(&pkg_temp_thermal_notifier);
- mutex_lock(&phy_dev_list_mutex);
- list_for_each_entry_safe(pkgdev, n, &phy_dev_list, list) {
- /* Retore old MSR value for package thermal interrupt */
- wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
- pkgdev->msr_pkg_therm_low,
- pkgdev->msr_pkg_therm_high);
- thermal_zone_device_unregister(pkgdev->tzone);
- list_del(&pkgdev->list);
- kfree(pkgdev);
- }
- mutex_unlock(&phy_dev_list_mutex);
for_each_online_cpu(i)
- cancel_delayed_work_sync(
- &per_cpu(pkg_temp_thermal_threshold_work, i));
+ put_core_offline(i);
cpu_notifier_register_done();
kfree(pkg_work_scheduled);