Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_MCE_H #define _ASM_X86_MCE_H #include <uapi/asm/mce.h> /* * Machine Check support for x86 */ /* MCG_CAP register defines */ #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ #define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ #define MCG_EXT_P BIT_ULL(9) /* Extended registers available */ #define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ #define MCG_EXT_CNT_SHIFT 16 #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) #define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */ #define MCG_ELOG_P BIT_ULL(26) /* Extended error log supported */ #define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */ /* MCG_STATUS register defines */ #define MCG_STATUS_RIPV BIT_ULL(0) /* restart ip valid */ #define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */ #define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */ /* MCG_EXT_CTL register defines */ #define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */ /* MCi_STATUS register defines */ #define MCI_STATUS_VAL BIT_ULL(63) /* valid error */ #define MCI_STATUS_OVER BIT_ULL(62) /* previous errors lost */ #define MCI_STATUS_UC BIT_ULL(61) /* uncorrected error */ #define MCI_STATUS_EN BIT_ULL(60) /* error enabled */ #define MCI_STATUS_MISCV BIT_ULL(59) /* misc error reg. valid */ #define MCI_STATUS_ADDRV BIT_ULL(58) /* addr reg. valid */ #define MCI_STATUS_PCC BIT_ULL(57) /* processor context corrupt */ #define MCI_STATUS_S BIT_ULL(56) /* Signaled machine check */ #define MCI_STATUS_AR BIT_ULL(55) /* Action required */ #define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */ #define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38) #define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT) #define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff) /* AMD-specific bits */ #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ #define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ #define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ #define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ #define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */ /* * McaX field if set indicates a given bank supports MCA extensions: * - Deferred error interrupt type is specifiable by bank. * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers, * But should not be used to determine MSR numbers. * - TCC bit is present in MCx_STATUS. */ #define MCI_CONFIG_MCAX 0x1 #define MCI_IPID_MCATYPE 0xFFFF0000 #define MCI_IPID_HWID 0xFFF /* * Note that the full MCACOD field of IA32_MCi_STATUS MSR is * bits 15:0. But bit 12 is the 'F' bit, defined for corrected * errors to indicate that errors are being filtered by hardware. * We should mask out bit 12 when looking for specific signatures * of uncorrected errors - so the F bit is deliberately skipped * in this #define. */ #define MCACOD 0xefff /* MCA Error Code */ /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ #define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */ #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ #define MCACOD_DATA 0x0134 /* Data Load */ #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ /* MCi_MISC register defines */ #define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) #define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) #define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ #define MCI_MISC_ADDR_LINEAR 1 /* linear address */ #define MCI_MISC_ADDR_PHYS 2 /* physical address */ #define MCI_MISC_ADDR_MEM 3 /* memory address */ #define MCI_MISC_ADDR_GENERIC 7 /* generic */ /* CTL2 register defines */ #define MCI_CTL2_CMCI_EN BIT_ULL(30) #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL #define MCJ_CTX_MASK 3 #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) #define MCJ_CTX_RANDOM 0 /* inject context: random */ #define MCJ_CTX_PROCESS 0x1 /* inject context: process */ #define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ #define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ #define MCJ_EXCEPTION 0x8 /* raise as exception */ #define MCJ_IRQ_BROADCAST 0x10 /* do IRQ broadcasting */ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ #define MCE_LOG_MIN_LEN 32U #define MCE_LOG_SIGNATURE "MACHINECHECK" /* AMD Scalable MCA */ #define MSR_AMD64_SMCA_MC0_CTL 0xc0002000 #define MSR_AMD64_SMCA_MC0_STATUS 0xc0002001 #define MSR_AMD64_SMCA_MC0_ADDR 0xc0002002 #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 #define MSR_AMD64_SMCA_MC0_SYND 0xc0002006 #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a #define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) #define XEC(x, mask) (((x) >> 16) & mask) /* mce.kflags flag bits for logging etc. */ #define MCE_HANDLED_CEC BIT_ULL(0) #define MCE_HANDLED_UC BIT_ULL(1) #define MCE_HANDLED_EXTLOG BIT_ULL(2) #define MCE_HANDLED_NFIT BIT_ULL(3) #define MCE_HANDLED_EDAC BIT_ULL(4) #define MCE_HANDLED_MCELOG BIT_ULL(5) /* * Indicates an MCE which has happened in kernel space but from * which the kernel can recover simply by executing fixup_exception() * so that an error is returned to the caller of the function that * hit the machine check. */ #define MCE_IN_KERNEL_RECOV BIT_ULL(6) /* * Indicates an MCE that happened in kernel space while copying data * from user. In this case fixup_exception() gets the kernel to the * error exit for the copy function. Machine check handler can then * treat it like a fault taken in user mode. */ #define MCE_IN_KERNEL_COPYIN BIT_ULL(7) /* * This structure contains all data related to the MCE log. Also * carries a signature to make it easier to find from external * debugging tools. Each entry is only valid when its finished flag * is set. */ struct mce_log_buffer { char signature[12]; /* "MACHINECHECK" */ unsigned len; /* = elements in .mce_entry[] */ unsigned next; unsigned flags; unsigned recordlen; /* length of struct mce */ struct mce entry[]; }; /* Highest last */ enum mce_notifier_prios { MCE_PRIO_LOWEST, MCE_PRIO_MCELOG, MCE_PRIO_EDAC, MCE_PRIO_NFIT, MCE_PRIO_EXTLOG, MCE_PRIO_UC, MCE_PRIO_EARLY, MCE_PRIO_CEC, MCE_PRIO_HIGHEST = MCE_PRIO_CEC }; struct notifier_block; extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); #include <linux/percpu.h> #include <linux/atomic.h> extern int mce_p5_enabled; #ifdef CONFIG_ARCH_HAS_COPY_MC extern void enable_copy_mc_fragile(void); unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt); #else static inline void enable_copy_mc_fragile(void) { } #endif struct cper_ia_proc_ctx; #ifdef CONFIG_X86_MCE int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_clear(struct cpuinfo_x86 *c); int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id); #else static inline int mcheck_init(void) { return 0; } static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) { return -EINVAL; } #endif void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct device *, mce_device); /* Maximum number of MCA banks per CPU. */ #define MAX_NR_BANKS 64 #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); void mce_intel_feature_clear(struct cpuinfo_x86 *c); void cmci_clear(void); void cmci_reenable(void); void cmci_rediscover(void); void cmci_recheck(void); #else static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { } static inline void cmci_clear(void) {} static inline void cmci_reenable(void) {} static inline void cmci_rediscover(void) {} static inline void cmci_recheck(void) {} #endif int mce_available(struct cpuinfo_x86 *c); bool mce_is_memory_error(struct mce *m); bool mce_is_correctable(struct mce *m); int mce_usable_address(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); enum mcp_flags { MCP_TIMESTAMP = BIT(0), /* log time stamp */ MCP_UC = BIT(1), /* log uncorrected errors */ MCP_DONTLOG = BIT(2), /* only clear, don't log */ MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ }; bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); int mce_notify_irq(void); DECLARE_PER_CPU(struct mce, injectm); /* Disable CMCI/polling for MCA bank claimed by firmware */ extern void mce_disable_bank(int bank); /* * Exception handler */ void do_machine_check(struct pt_regs *pt_regs); /* * Threshold handler */ extern void (*mce_threshold_vector)(void); /* Deferred error interrupt handler */ extern void (*deferred_error_int_vector)(void); /* * Used by APEI to report memory error via /dev/mcelog */ struct cper_sec_mem_err; extern void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err); /* * Enumerate new IP types and HWID values in AMD processors which support * Scalable MCA. */ #ifdef CONFIG_X86_MCE_AMD /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ SMCA_LS_V2, SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ SMCA_RESERVED, /* Reserved */ SMCA_EX, /* Execution Unit */ SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ SMCA_CS, /* Coherent Slave */ SMCA_CS_V2, SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_UMC, /* Unified Memory Controller */ SMCA_UMC_V2, SMCA_PB, /* Parameter Block */ SMCA_PSP, /* Platform Security Processor */ SMCA_PSP_V2, SMCA_SMU, /* System Management Unit */ SMCA_SMU_V2, SMCA_MP5, /* Microprocessor 5 Unit */ SMCA_MPDMA, /* MPDMA Unit */ SMCA_NBIO, /* Northbridge IO Unit */ SMCA_PCIE, /* PCI Express Unit */ SMCA_PCIE_V2, SMCA_XGMI_PCS, /* xGMI PCS Unit */ SMCA_NBIF, /* NBIF Unit */ SMCA_SHUB, /* System HUB Unit */ SMCA_SATA, /* SATA Unit */ SMCA_USB, /* USB Unit */ SMCA_GMI_PCS, /* GMI PCS Unit */ SMCA_XGMI_PHY, /* xGMI PHY Unit */ SMCA_WAFL_PHY, /* WAFL PHY Unit */ SMCA_GMI_PHY, /* GMI PHY Unit */ N_SMCA_BANK_TYPES }; extern const char *smca_get_long_name(enum smca_bank_types t); extern bool amd_mce_is_memory_error(struct mce *m); extern int mce_threshold_create_device(unsigned int cpu); extern int mce_threshold_remove_device(unsigned int cpu); void mce_amd_feature_init(struct cpuinfo_x86 *c); enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank); #else static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } #endif /* _ASM_X86_MCE_H */ |