Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | /* * NFIT - Machine Check Handler * * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. */ #include <linux/notifier.h> #include <linux/acpi.h> #include <linux/nd.h> #include <asm/mce.h> #include "nfit.h" static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *mce = (struct mce *)data; struct acpi_nfit_desc *acpi_desc; struct nfit_spa *nfit_spa; /* We only care about memory errors */ if (!mce_is_memory_error(mce)) return NOTIFY_DONE; /* * mce->addr contains the physical addr accessed that caused the * machine check. We need to walk through the list of NFITs, and see * if any of them matches that address, and only then start a scrub. */ mutex_lock(&acpi_desc_lock); list_for_each_entry(acpi_desc, &acpi_descs, list) { struct device *dev = acpi_desc->dev; int found_match = 0; mutex_lock(&acpi_desc->init_mutex); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { struct acpi_nfit_system_address *spa = nfit_spa->spa; if (nfit_spa_type(spa) != NFIT_SPA_PM) continue; /* find the spa that covers the mce addr */ if (spa->address > mce->addr) continue; if ((spa->address + spa->length - 1) < mce->addr) continue; found_match = 1; dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n", __func__, spa->range_index, spa->address, spa->length); /* * We can break at the first match because we're going * to rescan all the SPA ranges. There shouldn't be any * aliasing anyway. */ break; } mutex_unlock(&acpi_desc->init_mutex); if (!found_match) continue; /* If this fails due to an -ENOMEM, there is little we can do */ nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus, ALIGN(mce->addr, L1_CACHE_BYTES), L1_CACHE_BYTES); nvdimm_region_notify(nfit_spa->nd_region, NVDIMM_REVALIDATE_POISON); if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) { /* * We can ignore an -EBUSY here because if an ARS is * already in progress, just let that be the last * authoritative one */ acpi_nfit_ars_rescan(acpi_desc, 0); } break; } mutex_unlock(&acpi_desc_lock); return NOTIFY_DONE; } static struct notifier_block nfit_mce_dec = { .notifier_call = nfit_handle_mce, .priority = MCE_PRIO_NFIT, }; void nfit_mce_register(void) { mce_register_decode_chain(&nfit_mce_dec); } void nfit_mce_unregister(void) { mce_unregister_decode_chain(&nfit_mce_dec); } |