4 * A watchdog timer based upon the IPMI interface.
6 * Author: MontaVista Software, Inc.
7 * Corey Minyard <minyard@mvista.com>
10 * Copyright 2002 MontaVista Software Inc.
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 2 of the License, or (at your
15 * option) any later version.
18 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
24 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
26 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
27 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * You should have received a copy of the GNU General Public License along
30 * with this program; if not, write to the Free Software Foundation, Inc.,
31 * 675 Mass Ave, Cambridge, MA 02139, USA.
34 #include <linux/module.h>
35 #include <linux/moduleparam.h>
36 #include <linux/ipmi.h>
37 #include <linux/ipmi_smi.h>
38 #include <linux/watchdog.h>
39 #include <linux/miscdevice.h>
40 #include <linux/init.h>
41 #include <linux/completion.h>
42 #include <linux/kdebug.h>
43 #include <linux/rwsem.h>
44 #include <linux/errno.h>
45 #include <asm/uaccess.h>
46 #include <linux/notifier.h>
47 #include <linux/nmi.h>
48 #include <linux/reboot.h>
49 #include <linux/wait.h>
50 #include <linux/poll.h>
51 #include <linux/string.h>
52 #include <linux/ctype.h>
53 #include <linux/delay.h>
54 #include <asm/atomic.h>
57 /* This is ugly, but I've determined that x86 is the only architecture
58 that can reasonably support the IPMI NMI watchdog timeout at this
59 time. If another architecture adds this capability somehow, it
60 will have to be a somewhat different mechanism and I have no idea
61 how it will work. So in the unlikely event that another
62 architecture supports this, we can figure out a good generic
63 mechanism for it at that time. */
64 #define HAVE_DIE_NMI_POST
67 #define PFX "IPMI Watchdog: "
70 * The IPMI command/response information for the watchdog timer.
73 /* values for byte 1 of the set command, byte 2 of the get response. */
74 #define WDOG_DONT_LOG (1 << 7)
75 #define WDOG_DONT_STOP_ON_SET (1 << 6)
76 #define WDOG_SET_TIMER_USE(byte, use) \
77 byte = ((byte) & 0xf8) | ((use) & 0x7)
78 #define WDOG_GET_TIMER_USE(byte) ((byte) & 0x7)
79 #define WDOG_TIMER_USE_BIOS_FRB2 1
80 #define WDOG_TIMER_USE_BIOS_POST 2
81 #define WDOG_TIMER_USE_OS_LOAD 3
82 #define WDOG_TIMER_USE_SMS_OS 4
83 #define WDOG_TIMER_USE_OEM 5
85 /* values for byte 2 of the set command, byte 3 of the get response. */
86 #define WDOG_SET_PRETIMEOUT_ACT(byte, use) \
87 byte = ((byte) & 0x8f) | (((use) & 0x7) << 4)
88 #define WDOG_GET_PRETIMEOUT_ACT(byte) (((byte) >> 4) & 0x7)
89 #define WDOG_PRETIMEOUT_NONE 0
90 #define WDOG_PRETIMEOUT_SMI 1
91 #define WDOG_PRETIMEOUT_NMI 2
92 #define WDOG_PRETIMEOUT_MSG_INT 3
94 /* Operations that can be performed on a pretimout. */
95 #define WDOG_PREOP_NONE 0
96 #define WDOG_PREOP_PANIC 1
97 #define WDOG_PREOP_GIVE_DATA 2 /* Cause data to be available to
98 read. Doesn't work in NMI
101 /* Actions to perform on a full timeout. */
102 #define WDOG_SET_TIMEOUT_ACT(byte, use) \
103 byte = ((byte) & 0xf8) | ((use) & 0x7)
104 #define WDOG_GET_TIMEOUT_ACT(byte) ((byte) & 0x7)
105 #define WDOG_TIMEOUT_NONE 0
106 #define WDOG_TIMEOUT_RESET 1
107 #define WDOG_TIMEOUT_POWER_DOWN 2
108 #define WDOG_TIMEOUT_POWER_CYCLE 3
110 /* Byte 3 of the get command, byte 4 of the get response is the
111 pre-timeout in seconds. */
113 /* Bits for setting byte 4 of the set command, byte 5 of the get response. */
114 #define WDOG_EXPIRE_CLEAR_BIOS_FRB2 (1 << 1)
115 #define WDOG_EXPIRE_CLEAR_BIOS_POST (1 << 2)
116 #define WDOG_EXPIRE_CLEAR_OS_LOAD (1 << 3)
117 #define WDOG_EXPIRE_CLEAR_SMS_OS (1 << 4)
118 #define WDOG_EXPIRE_CLEAR_OEM (1 << 5)
120 /* Setting/getting the watchdog timer value. This is for bytes 5 and
121 6 (the timeout time) of the set command, and bytes 6 and 7 (the
122 timeout time) and 8 and 9 (the current countdown value) of the
123 response. The timeout value is given in seconds (in the command it
124 is 100ms intervals). */
125 #define WDOG_SET_TIMEOUT(byte1, byte2, val) \
126 (byte1) = (((val) * 10) & 0xff), (byte2) = (((val) * 10) >> 8)
127 #define WDOG_GET_TIMEOUT(byte1, byte2) \
128 (((byte1) | ((byte2) << 8)) / 10)
130 #define IPMI_WDOG_RESET_TIMER 0x22
131 #define IPMI_WDOG_SET_TIMER 0x24
132 #define IPMI_WDOG_GET_TIMER 0x25
134 /* These are here until the real ones get into the watchdog.h interface. */
135 #ifndef WDIOC_GETTIMEOUT
136 #define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int)
138 #ifndef WDIOC_SET_PRETIMEOUT
139 #define WDIOC_SET_PRETIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 21, int)
141 #ifndef WDIOC_GET_PRETIMEOUT
142 #define WDIOC_GET_PRETIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 22, int)
145 static int nowayout = WATCHDOG_NOWAYOUT;
147 static ipmi_user_t watchdog_user;
148 static int watchdog_ifnum;
150 /* Default the timeout to 10 seconds. */
151 static int timeout = 10;
153 /* The pre-timeout is disabled by default. */
154 static int pretimeout;
156 /* Default action is to reset the board on a timeout. */
157 static unsigned char action_val = WDOG_TIMEOUT_RESET;
159 static char action[16] = "reset";
161 static unsigned char preaction_val = WDOG_PRETIMEOUT_NONE;
163 static char preaction[16] = "pre_none";
165 static unsigned char preop_val = WDOG_PREOP_NONE;
167 static char preop[16] = "preop_none";
168 static DEFINE_SPINLOCK(ipmi_read_lock);
169 static char data_to_read;
170 static DECLARE_WAIT_QUEUE_HEAD(read_q);
171 static struct fasync_struct *fasync_q;
172 static char pretimeout_since_last_heartbeat;
173 static char expect_close;
175 static int ifnum_to_use = -1;
177 static DECLARE_RWSEM(register_sem);
179 /* Parameters to ipmi_set_timeout */
180 #define IPMI_SET_TIMEOUT_NO_HB 0
181 #define IPMI_SET_TIMEOUT_HB_IF_NECESSARY 1
182 #define IPMI_SET_TIMEOUT_FORCE_HB 2
184 static int ipmi_set_timeout(int do_heartbeat);
185 static void ipmi_register_watchdog(int ipmi_intf);
186 static void ipmi_unregister_watchdog(int ipmi_intf);
188 /* If true, the driver will start running as soon as it is configured
190 static int start_now;
192 static int set_param_int(const char *val, struct kernel_param *kp)
200 l = simple_strtoul(val, &endp, 0);
204 down_read(®ister_sem);
205 *((int *)kp->arg) = l;
207 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
208 up_read(®ister_sem);
213 static int get_param_int(char *buffer, struct kernel_param *kp)
215 return sprintf(buffer, "%i", *((int *)kp->arg));
218 typedef int (*action_fn)(const char *intval, char *outval);
220 static int action_op(const char *inval, char *outval);
221 static int preaction_op(const char *inval, char *outval);
222 static int preop_op(const char *inval, char *outval);
223 static void check_parms(void);
225 static int set_param_str(const char *val, struct kernel_param *kp)
227 action_fn fn = (action_fn) kp->arg;
232 strncpy(valcp, val, 16);
237 down_read(®ister_sem);
244 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
247 up_read(®ister_sem);
251 static int get_param_str(char *buffer, struct kernel_param *kp)
253 action_fn fn = (action_fn) kp->arg;
256 rv = fn(NULL, buffer);
259 return strlen(buffer);
263 static int set_param_wdog_ifnum(const char *val, struct kernel_param *kp)
265 int rv = param_set_int(val, kp);
268 if ((ifnum_to_use < 0) || (ifnum_to_use == watchdog_ifnum))
271 ipmi_unregister_watchdog(watchdog_ifnum);
272 ipmi_register_watchdog(ifnum_to_use);
276 module_param_call(ifnum_to_use, set_param_wdog_ifnum, get_param_int,
277 &ifnum_to_use, 0644);
278 MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog "
279 "timer. Setting to -1 defaults to the first registered "
282 module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644);
283 MODULE_PARM_DESC(timeout, "Timeout value in seconds.");
285 module_param_call(pretimeout, set_param_int, get_param_int, &pretimeout, 0644);
286 MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds.");
288 module_param_call(action, set_param_str, get_param_str, action_op, 0644);
289 MODULE_PARM_DESC(action, "Timeout action. One of: "
290 "reset, none, power_cycle, power_off.");
292 module_param_call(preaction, set_param_str, get_param_str, preaction_op, 0644);
293 MODULE_PARM_DESC(preaction, "Pretimeout action. One of: "
294 "pre_none, pre_smi, pre_nmi, pre_int.");
296 module_param_call(preop, set_param_str, get_param_str, preop_op, 0644);
297 MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: "
298 "preop_none, preop_panic, preop_give_data.");
300 module_param(start_now, int, 0444);
301 MODULE_PARM_DESC(start_now, "Set to 1 to start the watchdog as"
302 "soon as the driver is loaded.");
304 module_param(nowayout, int, 0644);
305 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
306 "(default=CONFIG_WATCHDOG_NOWAYOUT)");
308 /* Default state of the timer. */
309 static unsigned char ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
311 /* If shutting down via IPMI, we ignore the heartbeat. */
312 static int ipmi_ignore_heartbeat;
314 /* Is someone using the watchdog? Only one user is allowed. */
315 static unsigned long ipmi_wdog_open;
317 /* If set to 1, the heartbeat command will set the state to reset and
318 start the timer. The timer doesn't normally run when the driver is
319 first opened until the heartbeat is set the first time, this
320 variable is used to accomplish this. */
321 static int ipmi_start_timer_on_heartbeat;
323 /* IPMI version of the BMC. */
324 static unsigned char ipmi_version_major;
325 static unsigned char ipmi_version_minor;
327 /* If a pretimeout occurs, this is used to allow only one panic to happen. */
328 static atomic_t preop_panic_excl = ATOMIC_INIT(-1);
330 #ifdef HAVE_DIE_NMI_POST
331 static int testing_nmi;
332 static int nmi_handler_registered;
335 static int ipmi_heartbeat(void);
336 static void panic_halt_ipmi_heartbeat(void);
339 /* We use a mutex to make sure that only one thing can send a set
340 timeout at one time, because we only have one copy of the data.
341 The mutex is claimed when the set_timeout is sent and freed
342 when both messages are free. */
343 static atomic_t set_timeout_tofree = ATOMIC_INIT(0);
344 static DEFINE_MUTEX(set_timeout_lock);
345 static DECLARE_COMPLETION(set_timeout_wait);
346 static void set_timeout_free_smi(struct ipmi_smi_msg *msg)
348 if (atomic_dec_and_test(&set_timeout_tofree))
349 complete(&set_timeout_wait);
351 static void set_timeout_free_recv(struct ipmi_recv_msg *msg)
353 if (atomic_dec_and_test(&set_timeout_tofree))
354 complete(&set_timeout_wait);
356 static struct ipmi_smi_msg set_timeout_smi_msg =
358 .done = set_timeout_free_smi
360 static struct ipmi_recv_msg set_timeout_recv_msg =
362 .done = set_timeout_free_recv
365 static int i_ipmi_set_timeout(struct ipmi_smi_msg *smi_msg,
366 struct ipmi_recv_msg *recv_msg,
367 int *send_heartbeat_now)
369 struct kernel_ipmi_msg msg;
370 unsigned char data[6];
372 struct ipmi_system_interface_addr addr;
376 /* These can be cleared as we are setting the timeout. */
377 ipmi_start_timer_on_heartbeat = 0;
378 pretimeout_since_last_heartbeat = 0;
381 WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);
383 if ((ipmi_version_major > 1)
384 || ((ipmi_version_major == 1) && (ipmi_version_minor >= 5)))
386 /* This is an IPMI 1.5-only feature. */
387 data[0] |= WDOG_DONT_STOP_ON_SET;
388 } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
389 /* In ipmi 1.0, setting the timer stops the watchdog, we
390 need to start it back up again. */
395 WDOG_SET_TIMEOUT_ACT(data[1], ipmi_watchdog_state);
396 if ((pretimeout > 0) && (ipmi_watchdog_state != WDOG_TIMEOUT_NONE)) {
397 WDOG_SET_PRETIMEOUT_ACT(data[1], preaction_val);
398 data[2] = pretimeout;
400 WDOG_SET_PRETIMEOUT_ACT(data[1], WDOG_PRETIMEOUT_NONE);
401 data[2] = 0; /* No pretimeout. */
404 WDOG_SET_TIMEOUT(data[4], data[5], timeout);
406 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
407 addr.channel = IPMI_BMC_CHANNEL;
411 msg.cmd = IPMI_WDOG_SET_TIMER;
413 msg.data_len = sizeof(data);
414 rv = ipmi_request_supply_msgs(watchdog_user,
415 (struct ipmi_addr *) &addr,
423 printk(KERN_WARNING PFX "set timeout error: %d\n",
427 if (send_heartbeat_now)
428 *send_heartbeat_now = hbnow;
433 static int ipmi_set_timeout(int do_heartbeat)
435 int send_heartbeat_now;
439 /* We can only send one of these at a time. */
440 mutex_lock(&set_timeout_lock);
442 atomic_set(&set_timeout_tofree, 2);
444 rv = i_ipmi_set_timeout(&set_timeout_smi_msg,
445 &set_timeout_recv_msg,
446 &send_heartbeat_now);
448 mutex_unlock(&set_timeout_lock);
452 wait_for_completion(&set_timeout_wait);
454 mutex_unlock(&set_timeout_lock);
456 if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
457 || ((send_heartbeat_now)
458 && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
459 rv = ipmi_heartbeat();
465 static void dummy_smi_free(struct ipmi_smi_msg *msg)
468 static void dummy_recv_free(struct ipmi_recv_msg *msg)
471 static struct ipmi_smi_msg panic_halt_smi_msg =
473 .done = dummy_smi_free
475 static struct ipmi_recv_msg panic_halt_recv_msg =
477 .done = dummy_recv_free
480 /* Special call, doesn't claim any locks. This is only to be called
481 at panic or halt time, in run-to-completion mode, when the caller
482 is the only CPU and the only thing that will be going is these IPMI
484 static void panic_halt_ipmi_set_timeout(void)
486 int send_heartbeat_now;
489 rv = i_ipmi_set_timeout(&panic_halt_smi_msg,
490 &panic_halt_recv_msg,
491 &send_heartbeat_now);
493 if (send_heartbeat_now)
494 panic_halt_ipmi_heartbeat();
498 /* We use a semaphore to make sure that only one thing can send a
499 heartbeat at one time, because we only have one copy of the data.
500 The semaphore is claimed when the set_timeout is sent and freed
501 when both messages are free. */
502 static atomic_t heartbeat_tofree = ATOMIC_INIT(0);
503 static DEFINE_MUTEX(heartbeat_lock);
504 static DECLARE_COMPLETION(heartbeat_wait);
505 static void heartbeat_free_smi(struct ipmi_smi_msg *msg)
507 if (atomic_dec_and_test(&heartbeat_tofree))
508 complete(&heartbeat_wait);
510 static void heartbeat_free_recv(struct ipmi_recv_msg *msg)
512 if (atomic_dec_and_test(&heartbeat_tofree))
513 complete(&heartbeat_wait);
515 static struct ipmi_smi_msg heartbeat_smi_msg =
517 .done = heartbeat_free_smi
519 static struct ipmi_recv_msg heartbeat_recv_msg =
521 .done = heartbeat_free_recv
524 static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg =
526 .done = dummy_smi_free
528 static struct ipmi_recv_msg panic_halt_heartbeat_recv_msg =
530 .done = dummy_recv_free
533 static int ipmi_heartbeat(void)
535 struct kernel_ipmi_msg msg;
537 struct ipmi_system_interface_addr addr;
539 if (ipmi_ignore_heartbeat)
542 if (ipmi_start_timer_on_heartbeat) {
543 ipmi_watchdog_state = action_val;
544 return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
545 } else if (pretimeout_since_last_heartbeat) {
546 /* A pretimeout occurred, make sure we set the timeout.
547 We don't want to set the action, though, we want to
548 leave that alone (thus it can't be combined with the
550 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
553 mutex_lock(&heartbeat_lock);
555 atomic_set(&heartbeat_tofree, 2);
557 /* Don't reset the timer if we have the timer turned off, that
558 re-enables the watchdog. */
559 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) {
560 mutex_unlock(&heartbeat_lock);
564 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
565 addr.channel = IPMI_BMC_CHANNEL;
569 msg.cmd = IPMI_WDOG_RESET_TIMER;
572 rv = ipmi_request_supply_msgs(watchdog_user,
573 (struct ipmi_addr *) &addr,
581 mutex_unlock(&heartbeat_lock);
582 printk(KERN_WARNING PFX "heartbeat failure: %d\n",
587 /* Wait for the heartbeat to be sent. */
588 wait_for_completion(&heartbeat_wait);
590 if (heartbeat_recv_msg.msg.data[0] != 0) {
591 /* Got an error in the heartbeat response. It was already
592 reported in ipmi_wdog_msg_handler, but we should return
597 mutex_unlock(&heartbeat_lock);
602 static void panic_halt_ipmi_heartbeat(void)
604 struct kernel_ipmi_msg msg;
605 struct ipmi_system_interface_addr addr;
608 /* Don't reset the timer if we have the timer turned off, that
609 re-enables the watchdog. */
610 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
613 addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
614 addr.channel = IPMI_BMC_CHANNEL;
618 msg.cmd = IPMI_WDOG_RESET_TIMER;
621 ipmi_request_supply_msgs(watchdog_user,
622 (struct ipmi_addr *) &addr,
626 &panic_halt_heartbeat_smi_msg,
627 &panic_halt_heartbeat_recv_msg,
631 static struct watchdog_info ident =
633 .options = 0, /* WDIOF_SETTIMEOUT, */
634 .firmware_version = 1,
638 static int ipmi_ioctl(struct inode *inode, struct file *file,
639 unsigned int cmd, unsigned long arg)
641 void __user *argp = (void __user *)arg;
646 case WDIOC_GETSUPPORT:
647 i = copy_to_user(argp, &ident, sizeof(ident));
648 return i ? -EFAULT : 0;
650 case WDIOC_SETTIMEOUT:
651 i = copy_from_user(&val, argp, sizeof(int));
655 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
657 case WDIOC_GETTIMEOUT:
658 i = copy_to_user(argp, &timeout, sizeof(timeout));
663 case WDIOC_SET_PRETIMEOUT:
664 i = copy_from_user(&val, argp, sizeof(int));
668 return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
670 case WDIOC_GET_PRETIMEOUT:
671 i = copy_to_user(argp, &pretimeout, sizeof(pretimeout));
676 case WDIOC_KEEPALIVE:
677 return ipmi_heartbeat();
679 case WDIOC_SETOPTIONS:
680 i = copy_from_user(&val, argp, sizeof(int));
683 if (val & WDIOS_DISABLECARD)
685 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
686 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
687 ipmi_start_timer_on_heartbeat = 0;
690 if (val & WDIOS_ENABLECARD)
692 ipmi_watchdog_state = action_val;
693 ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
697 case WDIOC_GETSTATUS:
699 i = copy_to_user(argp, &val, sizeof(val));
709 static ssize_t ipmi_write(struct file *file,
710 const char __user *buf,
720 /* In case it was set long ago */
723 for (i = 0; i != len; i++) {
726 if (get_user(c, buf + i))
732 rv = ipmi_heartbeat();
740 static ssize_t ipmi_read(struct file *file,
751 /* Reading returns if the pretimeout has gone off, and it only does
752 it once per pretimeout. */
753 spin_lock(&ipmi_read_lock);
755 if (file->f_flags & O_NONBLOCK) {
760 init_waitqueue_entry(&wait, current);
761 add_wait_queue(&read_q, &wait);
762 while (!data_to_read) {
763 set_current_state(TASK_INTERRUPTIBLE);
764 spin_unlock(&ipmi_read_lock);
766 spin_lock(&ipmi_read_lock);
768 remove_wait_queue(&read_q, &wait);
770 if (signal_pending(current)) {
778 spin_unlock(&ipmi_read_lock);
781 if (copy_to_user(buf, &data_to_read, 1))
790 static int ipmi_open(struct inode *ino, struct file *filep)
792 switch (iminor(ino)) {
794 if (test_and_set_bit(0, &ipmi_wdog_open))
797 /* Don't start the timer now, let it start on the
799 ipmi_start_timer_on_heartbeat = 1;
800 return nonseekable_open(ino, filep);
807 static unsigned int ipmi_poll(struct file *file, poll_table *wait)
809 unsigned int mask = 0;
811 poll_wait(file, &read_q, wait);
813 spin_lock(&ipmi_read_lock);
815 mask |= (POLLIN | POLLRDNORM);
816 spin_unlock(&ipmi_read_lock);
821 static int ipmi_fasync(int fd, struct file *file, int on)
825 result = fasync_helper(fd, file, on, &fasync_q);
830 static int ipmi_close(struct inode *ino, struct file *filep)
832 if (iminor(ino) == WATCHDOG_MINOR) {
833 if (expect_close == 42) {
834 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
835 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
838 "Unexpected close, not stopping watchdog!\n");
841 clear_bit(0, &ipmi_wdog_open);
844 ipmi_fasync (-1, filep, 0);
850 static const struct file_operations ipmi_wdog_fops = {
851 .owner = THIS_MODULE,
857 .release = ipmi_close,
858 .fasync = ipmi_fasync,
861 static struct miscdevice ipmi_wdog_miscdev = {
862 .minor = WATCHDOG_MINOR,
864 .fops = &ipmi_wdog_fops
867 static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
870 if (msg->msg.data[0] != 0) {
871 printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
876 ipmi_free_recv_msg(msg);
879 static void ipmi_wdog_pretimeout_handler(void *handler_data)
881 if (preaction_val != WDOG_PRETIMEOUT_NONE) {
882 if (preop_val == WDOG_PREOP_PANIC) {
883 if (atomic_inc_and_test(&preop_panic_excl))
884 panic("Watchdog pre-timeout");
885 } else if (preop_val == WDOG_PREOP_GIVE_DATA) {
886 spin_lock(&ipmi_read_lock);
888 wake_up_interruptible(&read_q);
889 kill_fasync(&fasync_q, SIGIO, POLL_IN);
891 spin_unlock(&ipmi_read_lock);
895 /* On some machines, the heartbeat will give
896 an error and not work unless we re-enable
897 the timer. So do so. */
898 pretimeout_since_last_heartbeat = 1;
901 static struct ipmi_user_hndl ipmi_hndlrs =
903 .ipmi_recv_hndl = ipmi_wdog_msg_handler,
904 .ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler
907 static void ipmi_register_watchdog(int ipmi_intf)
911 down_write(®ister_sem);
915 if ((ifnum_to_use >= 0) && (ifnum_to_use != ipmi_intf))
918 watchdog_ifnum = ipmi_intf;
920 rv = ipmi_create_user(ipmi_intf, &ipmi_hndlrs, NULL, &watchdog_user);
922 printk(KERN_CRIT PFX "Unable to register with ipmi\n");
926 ipmi_get_version(watchdog_user,
928 &ipmi_version_minor);
930 rv = misc_register(&ipmi_wdog_miscdev);
932 ipmi_destroy_user(watchdog_user);
933 watchdog_user = NULL;
934 printk(KERN_CRIT PFX "Unable to register misc device\n");
937 #ifdef HAVE_DIE_NMI_POST
938 if (nmi_handler_registered) {
939 int old_pretimeout = pretimeout;
940 int old_timeout = timeout;
941 int old_preop_val = preop_val;
943 /* Set the pretimeout to go off in a second and give
944 ourselves plenty of time to stop the timer. */
945 ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
946 preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */
952 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
954 printk(KERN_WARNING PFX "Error starting timer to"
955 " test NMI: 0x%x. The NMI pretimeout will"
956 " likely not work\n", rv);
963 if (testing_nmi != 2) {
964 printk(KERN_WARNING PFX "IPMI NMI didn't seem to"
965 " occur. The NMI pretimeout will"
966 " likely not work\n");
970 preop_val = old_preop_val;
971 pretimeout = old_pretimeout;
972 timeout = old_timeout;
977 up_write(®ister_sem);
979 if ((start_now) && (rv == 0)) {
980 /* Run from startup, so start the timer now. */
981 start_now = 0; /* Disable this function after first startup. */
982 ipmi_watchdog_state = action_val;
983 ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
984 printk(KERN_INFO PFX "Starting now!\n");
986 /* Stop the timer now. */
987 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
988 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
992 static void ipmi_unregister_watchdog(int ipmi_intf)
996 down_write(®ister_sem);
1001 if (watchdog_ifnum != ipmi_intf)
1004 /* Make sure no one can call us any more. */
1005 misc_deregister(&ipmi_wdog_miscdev);
1007 /* Wait to make sure the message makes it out. The lower layer has
1008 pointers to our buffers, we want to make sure they are done before
1009 we release our memory. */
1010 while (atomic_read(&set_timeout_tofree))
1011 schedule_timeout_uninterruptible(1);
1013 /* Disconnect from IPMI. */
1014 rv = ipmi_destroy_user(watchdog_user);
1016 printk(KERN_WARNING PFX "error unlinking from IPMI: %d\n",
1019 watchdog_user = NULL;
1022 up_write(®ister_sem);
1025 #ifdef HAVE_DIE_NMI_POST
1027 ipmi_nmi(struct notifier_block *self, unsigned long val, void *data)
1029 if (val != DIE_NMI_POST)
1037 /* If we are not expecting a timeout, ignore it. */
1038 if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
1041 if (preaction_val != WDOG_PRETIMEOUT_NMI)
1044 /* If no one else handled the NMI, we assume it was the IPMI
1046 if (preop_val == WDOG_PREOP_PANIC) {
1047 /* On some machines, the heartbeat will give
1048 an error and not work unless we re-enable
1049 the timer. So do so. */
1050 pretimeout_since_last_heartbeat = 1;
1051 if (atomic_inc_and_test(&preop_panic_excl))
1052 panic(PFX "pre-timeout");
1058 static struct notifier_block ipmi_nmi_handler = {
1059 .notifier_call = ipmi_nmi
1063 static int wdog_reboot_handler(struct notifier_block *this,
1067 static int reboot_event_handled = 0;
1069 if ((watchdog_user) && (!reboot_event_handled)) {
1070 /* Make sure we only do this once. */
1071 reboot_event_handled = 1;
1073 if (code == SYS_DOWN || code == SYS_HALT) {
1074 /* Disable the WDT if we are shutting down. */
1075 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
1076 panic_halt_ipmi_set_timeout();
1077 } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
1078 /* Set a long timer to let the reboot happens, but
1079 reboot if it hangs, but only if the watchdog
1080 timer was already running. */
1083 ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
1084 panic_halt_ipmi_set_timeout();
1090 static struct notifier_block wdog_reboot_notifier = {
1091 .notifier_call = wdog_reboot_handler,
1096 static int wdog_panic_handler(struct notifier_block *this,
1097 unsigned long event,
1100 static int panic_event_handled = 0;
1102 /* On a panic, if we have a panic timeout, make sure to extend
1103 the watchdog timer to a reasonable value to complete the
1104 panic, if the watchdog timer is running. Plus the
1105 pretimeout is meaningless at panic time. */
1106 if (watchdog_user && !panic_event_handled &&
1107 ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
1108 /* Make sure we do this only once. */
1109 panic_event_handled = 1;
1113 panic_halt_ipmi_set_timeout();
1119 static struct notifier_block wdog_panic_notifier = {
1120 .notifier_call = wdog_panic_handler,
1122 .priority = 150 /* priority: INT_MAX >= x >= 0 */
1126 static void ipmi_new_smi(int if_num, struct device *device)
1128 ipmi_register_watchdog(if_num);
1131 static void ipmi_smi_gone(int if_num)
1133 ipmi_unregister_watchdog(if_num);
1136 static struct ipmi_smi_watcher smi_watcher =
1138 .owner = THIS_MODULE,
1139 .new_smi = ipmi_new_smi,
1140 .smi_gone = ipmi_smi_gone
1143 static int action_op(const char *inval, char *outval)
1146 strcpy(outval, action);
1151 if (strcmp(inval, "reset") == 0)
1152 action_val = WDOG_TIMEOUT_RESET;
1153 else if (strcmp(inval, "none") == 0)
1154 action_val = WDOG_TIMEOUT_NONE;
1155 else if (strcmp(inval, "power_cycle") == 0)
1156 action_val = WDOG_TIMEOUT_POWER_CYCLE;
1157 else if (strcmp(inval, "power_off") == 0)
1158 action_val = WDOG_TIMEOUT_POWER_DOWN;
1161 strcpy(action, inval);
1165 static int preaction_op(const char *inval, char *outval)
1168 strcpy(outval, preaction);
1173 if (strcmp(inval, "pre_none") == 0)
1174 preaction_val = WDOG_PRETIMEOUT_NONE;
1175 else if (strcmp(inval, "pre_smi") == 0)
1176 preaction_val = WDOG_PRETIMEOUT_SMI;
1177 #ifdef HAVE_DIE_NMI_POST
1178 else if (strcmp(inval, "pre_nmi") == 0)
1179 preaction_val = WDOG_PRETIMEOUT_NMI;
1181 else if (strcmp(inval, "pre_int") == 0)
1182 preaction_val = WDOG_PRETIMEOUT_MSG_INT;
1185 strcpy(preaction, inval);
1189 static int preop_op(const char *inval, char *outval)
1192 strcpy(outval, preop);
1197 if (strcmp(inval, "preop_none") == 0)
1198 preop_val = WDOG_PREOP_NONE;
1199 else if (strcmp(inval, "preop_panic") == 0)
1200 preop_val = WDOG_PREOP_PANIC;
1201 else if (strcmp(inval, "preop_give_data") == 0)
1202 preop_val = WDOG_PREOP_GIVE_DATA;
1205 strcpy(preop, inval);
1209 static void check_parms(void)
1211 #ifdef HAVE_DIE_NMI_POST
1215 if (preaction_val == WDOG_PRETIMEOUT_NMI) {
1217 if (preop_val == WDOG_PREOP_GIVE_DATA) {
1218 printk(KERN_WARNING PFX "Pretimeout op is to give data"
1219 " but NMI pretimeout is enabled, setting"
1220 " pretimeout op to none\n");
1221 preop_op("preop_none", NULL);
1225 if (do_nmi && !nmi_handler_registered) {
1226 rv = register_die_notifier(&ipmi_nmi_handler);
1228 printk(KERN_WARNING PFX
1229 "Can't register nmi handler\n");
1232 nmi_handler_registered = 1;
1233 } else if (!do_nmi && nmi_handler_registered) {
1234 unregister_die_notifier(&ipmi_nmi_handler);
1235 nmi_handler_registered = 0;
1240 static int __init ipmi_wdog_init(void)
1244 if (action_op(action, NULL)) {
1245 action_op("reset", NULL);
1246 printk(KERN_INFO PFX "Unknown action '%s', defaulting to"
1247 " reset\n", action);
1250 if (preaction_op(preaction, NULL)) {
1251 preaction_op("pre_none", NULL);
1252 printk(KERN_INFO PFX "Unknown preaction '%s', defaulting to"
1253 " none\n", preaction);
1256 if (preop_op(preop, NULL)) {
1257 preop_op("preop_none", NULL);
1258 printk(KERN_INFO PFX "Unknown preop '%s', defaulting to"
1264 register_reboot_notifier(&wdog_reboot_notifier);
1265 atomic_notifier_chain_register(&panic_notifier_list,
1266 &wdog_panic_notifier);
1268 rv = ipmi_smi_watcher_register(&smi_watcher);
1270 #ifdef HAVE_DIE_NMI_POST
1271 if (nmi_handler_registered)
1272 unregister_die_notifier(&ipmi_nmi_handler);
1274 atomic_notifier_chain_unregister(&panic_notifier_list,
1275 &wdog_panic_notifier);
1276 unregister_reboot_notifier(&wdog_reboot_notifier);
1277 printk(KERN_WARNING PFX "can't register smi watcher\n");
1281 printk(KERN_INFO PFX "driver initialized\n");
1286 static void __exit ipmi_wdog_exit(void)
1288 ipmi_smi_watcher_unregister(&smi_watcher);
1289 ipmi_unregister_watchdog(watchdog_ifnum);
1291 #ifdef HAVE_DIE_NMI_POST
1292 if (nmi_handler_registered)
1293 unregister_die_notifier(&ipmi_nmi_handler);
1296 atomic_notifier_chain_unregister(&panic_notifier_list,
1297 &wdog_panic_notifier);
1298 unregister_reboot_notifier(&wdog_reboot_notifier);
1300 module_exit(ipmi_wdog_exit);
1301 module_init(ipmi_wdog_init);
1302 MODULE_LICENSE("GPL");
1303 MODULE_AUTHOR("Corey Minyard <minyard@mvista.com>");
1304 MODULE_DESCRIPTION("watchdog timer based upon the IPMI interface.");