2 * UEFI Common Platform Error Record (CPER) support
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
38 * CPER record ID need to be unique even after reboot, because record
39 * ID is used as index for ERST storage, while CPER records from
40 * multiple boot may co-exist in ERST.
42 u64 cper_next_record_id(void)
44 static atomic64_t seq;
46 if (!atomic64_read(&seq))
47 atomic64_set(&seq, ((u64)get_seconds()) << 32);
49 return atomic64_inc_return(&seq);
51 EXPORT_SYMBOL_GPL(cper_next_record_id);
53 static const char *cper_severity_strs[] = {
60 static const char *cper_severity_str(unsigned int severity)
62 return severity < ARRAY_SIZE(cper_severity_strs) ?
63 cper_severity_strs[severity] : "unknown";
67 * cper_print_bits - print strings for set bits
68 * @pfx: prefix for each line, including log level and prefix string
70 * @strs: string array, indexed by bit position
71 * @strs_size: size of the string array: @strs
73 * For each set bit in @bits, print the corresponding string in @strs.
74 * If the output length is longer than 80, multiple line will be
75 * printed, with @pfx is printed at the beginning of each line.
77 void cper_print_bits(const char *pfx, unsigned int bits,
78 const char * const strs[], unsigned int strs_size)
84 for (i = 0; i < strs_size; i++) {
85 if (!(bits & (1U << i)))
90 if (len && len + strlen(str) + 2 > 80) {
95 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
97 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
103 static const char * const cper_proc_type_strs[] = {
108 static const char * const cper_proc_isa_strs[] = {
114 static const char * const cper_proc_error_type_strs[] = {
118 "micro-architectural error",
121 static const char * const cper_proc_op_strs[] = {
122 "unknown or generic",
125 "instruction execution",
128 static const char * const cper_proc_flag_strs[] = {
135 static void cper_print_proc_generic(const char *pfx,
136 const struct cper_sec_proc_generic *proc)
138 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
139 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
140 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
141 cper_proc_type_strs[proc->proc_type] : "unknown");
142 if (proc->validation_bits & CPER_PROC_VALID_ISA)
143 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
144 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
145 cper_proc_isa_strs[proc->proc_isa] : "unknown");
146 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
147 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
148 cper_print_bits(pfx, proc->proc_error_type,
149 cper_proc_error_type_strs,
150 ARRAY_SIZE(cper_proc_error_type_strs));
152 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
153 printk("%s""operation: %d, %s\n", pfx, proc->operation,
154 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
155 cper_proc_op_strs[proc->operation] : "unknown");
156 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
157 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
158 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
159 ARRAY_SIZE(cper_proc_flag_strs));
161 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
162 printk("%s""level: %d\n", pfx, proc->level);
163 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
164 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
165 if (proc->validation_bits & CPER_PROC_VALID_ID)
166 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
167 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
168 printk("%s""target_address: 0x%016llx\n",
169 pfx, proc->target_addr);
170 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
171 printk("%s""requestor_id: 0x%016llx\n",
172 pfx, proc->requestor_id);
173 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
174 printk("%s""responder_id: 0x%016llx\n",
175 pfx, proc->responder_id);
176 if (proc->validation_bits & CPER_PROC_VALID_IP)
177 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
180 static const char *cper_mem_err_type_strs[] = {
185 "single-symbol chipkill ECC",
186 "multi-symbol chipkill ECC",
194 "scrub corrected error",
195 "scrub uncorrected error",
196 "physical memory map-out event",
199 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
201 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
202 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
203 if (mem->validation_bits & CPER_MEM_VALID_PA)
204 printk("%s""physical_address: 0x%016llx\n",
205 pfx, mem->physical_addr);
206 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
207 printk("%s""physical_address_mask: 0x%016llx\n",
208 pfx, mem->physical_addr_mask);
209 if (mem->validation_bits & CPER_MEM_VALID_NODE)
210 pr_debug("node: %d\n", mem->node);
211 if (mem->validation_bits & CPER_MEM_VALID_CARD)
212 pr_debug("card: %d\n", mem->card);
213 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
214 pr_debug("module: %d\n", mem->module);
215 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
216 pr_debug("rank: %d\n", mem->rank);
217 if (mem->validation_bits & CPER_MEM_VALID_BANK)
218 pr_debug("bank: %d\n", mem->bank);
219 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
220 pr_debug("device: %d\n", mem->device);
221 if (mem->validation_bits & CPER_MEM_VALID_ROW)
222 pr_debug("row: %d\n", mem->row);
223 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
224 pr_debug("column: %d\n", mem->column);
225 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
226 pr_debug("bit_position: %d\n", mem->bit_pos);
227 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
228 pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
229 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
230 pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
231 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
232 pr_debug("target_id: 0x%016llx\n", mem->target_id);
233 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
234 u8 etype = mem->error_type;
235 printk("%s""error_type: %d, %s\n", pfx, etype,
236 etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
237 cper_mem_err_type_strs[etype] : "unknown");
239 if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
240 const char *bank = NULL, *device = NULL;
241 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
242 if (bank != NULL && device != NULL)
243 printk("%s""DIMM location: %s %s", pfx, bank, device);
245 printk("%s""DIMM DMI handle: 0x%.4x",
246 pfx, mem->mem_dev_handle);
250 static const char *cper_pcie_port_type_strs[] = {
252 "legacy PCI end point",
256 "upstream switch port",
257 "downstream switch port",
258 "PCIe to PCI/PCI-X bridge",
259 "PCI/PCI-X to PCIe bridge",
260 "root complex integrated endpoint device",
261 "root complex event collector",
264 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
265 const struct acpi_generic_data *gdata)
267 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
268 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
269 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
270 cper_pcie_port_type_strs[pcie->port_type] : "unknown");
271 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
272 printk("%s""version: %d.%d\n", pfx,
273 pcie->version.major, pcie->version.minor);
274 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
275 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
276 pcie->command, pcie->status);
277 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
279 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
280 pcie->device_id.segment, pcie->device_id.bus,
281 pcie->device_id.device, pcie->device_id.function);
282 printk("%s""slot: %d\n", pfx,
283 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
284 printk("%s""secondary_bus: 0x%02x\n", pfx,
285 pcie->device_id.secondary_bus);
286 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
287 pcie->device_id.vendor_id, pcie->device_id.device_id);
288 p = pcie->device_id.class_code;
289 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
291 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
292 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
293 pcie->serial_number.lower, pcie->serial_number.upper);
294 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
296 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
297 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
300 static void cper_estatus_print_section(
301 const char *pfx, const struct acpi_generic_data *gdata, int sec_no)
303 uuid_le *sec_type = (uuid_le *)gdata->section_type;
307 severity = gdata->error_severity;
308 printk("%s""Error %d, type: %s\n", pfx, sec_no,
309 cper_severity_str(severity));
310 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
311 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
312 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
313 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
315 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
316 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
317 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
318 printk("%s""section_type: general processor error\n", newpfx);
319 if (gdata->error_data_length >= sizeof(*proc_err))
320 cper_print_proc_generic(newpfx, proc_err);
322 goto err_section_too_small;
323 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
324 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
325 printk("%s""section_type: memory error\n", newpfx);
326 if (gdata->error_data_length >= sizeof(*mem_err))
327 cper_print_mem(newpfx, mem_err);
329 goto err_section_too_small;
330 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
331 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
332 printk("%s""section_type: PCIe error\n", newpfx);
333 if (gdata->error_data_length >= sizeof(*pcie))
334 cper_print_pcie(newpfx, pcie, gdata);
336 goto err_section_too_small;
338 printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
342 err_section_too_small:
343 pr_err(FW_WARN "error section length is too small\n");
346 void cper_estatus_print(const char *pfx,
347 const struct acpi_generic_status *estatus)
349 struct acpi_generic_data *gdata;
350 unsigned int data_len, gedata_len;
355 severity = estatus->error_severity;
356 if (severity == CPER_SEV_CORRECTED)
357 printk("%s%s\n", pfx,
358 "It has been corrected by h/w "
359 "and requires no further action");
360 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
361 data_len = estatus->data_length;
362 gdata = (struct acpi_generic_data *)(estatus + 1);
363 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
364 while (data_len >= sizeof(*gdata)) {
365 gedata_len = gdata->error_data_length;
366 cper_estatus_print_section(newpfx, gdata, sec_no);
367 data_len -= gedata_len + sizeof(*gdata);
368 gdata = (void *)(gdata + 1) + gedata_len;
372 EXPORT_SYMBOL_GPL(cper_estatus_print);
374 int cper_estatus_check_header(const struct acpi_generic_status *estatus)
376 if (estatus->data_length &&
377 estatus->data_length < sizeof(struct acpi_generic_data))
379 if (estatus->raw_data_length &&
380 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
385 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
387 int cper_estatus_check(const struct acpi_generic_status *estatus)
389 struct acpi_generic_data *gdata;
390 unsigned int data_len, gedata_len;
393 rc = cper_estatus_check_header(estatus);
396 data_len = estatus->data_length;
397 gdata = (struct acpi_generic_data *)(estatus + 1);
398 while (data_len >= sizeof(*gdata)) {
399 gedata_len = gdata->error_data_length;
400 if (gedata_len > data_len - sizeof(*gdata))
402 data_len -= gedata_len + sizeof(*gdata);
403 gdata = (void *)(gdata + 1) + gedata_len;
410 EXPORT_SYMBOL_GPL(cper_estatus_check);