aboutsummaryrefslogtreecommitdiffstats
path: root/roms/skiboot/core/hmi.c
diff options
context:
space:
mode:
Diffstat (limited to 'roms/skiboot/core/hmi.c')
-rw-r--r--roms/skiboot/core/hmi.c1558
1 files changed, 1558 insertions, 0 deletions
diff --git a/roms/skiboot/core/hmi.c b/roms/skiboot/core/hmi.c
new file mode 100644
index 000000000..9363cc5fb
--- /dev/null
+++ b/roms/skiboot/core/hmi.c
@@ -0,0 +1,1558 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Deal with Hypervisor Maintenance Interrupts
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "HMI: " fmt
+
+#include <skiboot.h>
+#include <opal.h>
+#include <opal-msg.h>
+#include <processor.h>
+#include <chiptod.h>
+#include <xscom.h>
+#include <xscom-p8-regs.h>
+#include <xscom-p9-regs.h>
+#include <xscom-p10-regs.h>
+#include <pci.h>
+#include <cpu.h>
+#include <chip.h>
+#include <npu-regs.h>
+#include <npu2-regs.h>
+#include <npu2.h>
+#include <npu.h>
+#include <capp.h>
+#include <nvram.h>
+#include <cpu.h>
+
+/*
+ * P9 HMER register layout:
+ * +===+==========+============================+========+===================+
+ * |Bit|Name |Description |PowerKVM|Action |
+ * | | | |HMI | |
+ * | | | |enabled | |
+ * | | | |for this| |
+ * | | | |bit ? | |
+ * +===+==========+============================+========+===================+
+ * |0 |malfunctio|A processor core in the |Yes |Raise attn from |
+ * | |n_allert |system has checkstopped | |sapphire resulting |
+ * | | |(failed recovery) and has | |xstop |
+ * | | |requested a CP Sparing | | |
+ * | | |to occur. This is | | |
+ * | | |broadcasted to every | | |
+ * | | |processor in the system | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |1 |Reserved |reserved |n/a | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |2 |proc_recv_|Processor recovery occurred |Yes |Log message and |
+ * | |done |error-bit in fir not masked | |continue working. |
+ * | | |(see bit 11) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |3 |proc_recv_|Processor went through |Yes |Log message and |
+ * | |error_mask|recovery for an error which | |continue working. |
+ * | |ed |is actually masked for | | |
+ * | | |reporting | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |4 | |Timer facility experienced |Yes |Raise attn from |
+ * | |tfac_error|an error. | |sapphire resulting |
+ * | | |TB, DEC, HDEC, PURR or SPURR| |xstop |
+ * | | |may be corrupted (details in| | |
+ * | | |TFMR) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |5 | |TFMR SPR itself is |Yes |Raise attn from |
+ * | |tfmr_parit|corrupted. | |sapphire resulting |
+ * | |y_error |Entire timing facility may | |xstop |
+ * | | |be compromised. | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |6 |ha_overflo| UPS (Uniterrupted Power |No |N/A |
+ * | |w_warning |System) Overflow indication | | |
+ * | | |indicating that the UPS | | |
+ * | | |DirtyAddrTable has | | |
+ * | | |reached a limit where it | | |
+ * | | |requires PHYP unload support| | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |7 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |8 |xscom_fail|An XSCOM operation caused by|No |We handle it by |
+ * | | |a cache inhibited load/store| |manually reading |
+ * | | |from this thread failed. A | |HMER register. |
+ * | | |trap register is | | |
+ * | | |available. | | |
+ * | | | | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |9 |xscom_done|An XSCOM operation caused by|No |We handle it by |
+ * | | |a cache inhibited load/store| |manually reading |
+ * | | |from this thread completed. | |HMER register. |
+ * | | |If hypervisor | | |
+ * | | |intends to use this bit, it | | |
+ * | | |is responsible for clearing | | |
+ * | | |it before performing the | | |
+ * | | |xscom operation. | | |
+ * | | |NOTE: this bit should always| | |
+ * | | |be masked in HMEER | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |10 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |11 |proc_recv_|Processor recovery occurred |y |Log message and |
+ * | |again |again before bit2 or bit3 | |continue working. |
+ * | | |was cleared | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |12-|reserved |was temperature sensor |n/a |n/a |
+ * |15 | |passed the critical point on| | |
+ * | | |the way up | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |16 | |SCOM has set a reserved FIR |No |n/a |
+ * | |scom_fir_h|bit to cause recovery | | |
+ * | |m | | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |17 |trig_fir_h|Debug trigger has set a |No |n/a |
+ * | |mi |reserved FIR bit to cause | | |
+ * | | |recovery | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |18 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |19 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |20 |hyp_resour|A hypervisor resource error |y |Raise attn from |
+ * | |ce_err |occurred: data parity error | |sapphire resulting |
+ * | | |on, SPRC0:3; SPR_Modereg or | |xstop. |
+ * | | |HMEER. | | |
+ * | | |Note: this bit will cause an| | |
+ * | | |check_stop when (HV=1, PR=0 | | |
+ * | | |and EE=0) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |21-| |if bit 8 is active, the |No |We handle it by |
+ * |23 |xscom_stat|reason will be detailed in | |Manually reading |
+ * | |us |these bits. see chapter 11.1| |HMER register. |
+ * | | |This bits are information | | |
+ * | | |only and always masked | | |
+ * | | |(mask = '0') | | |
+ * | | |If hypervisor intends to use| | |
+ * | | |this bit, it is responsible | | |
+ * | | |for clearing it before | | |
+ * | | |performing the xscom | | |
+ * | | |operation. | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |24-|Not |Not implemented |n/a |n/a |
+ * |63 |implemente| | | |
+ * | |d | | | |
+ * +-- +----------+----------------------------+--------+-------------------+
+ *
+ * Above HMER bits can be enabled/disabled by modifying
+ * SPR_HMEER_HMI_ENABLE_MASK #define in include/processor.h
+ * If you modify support for any of the bits listed above, please make sure
+ * you change the above table to refelct that.
+ *
+ * NOTE: Per Dave Larson, never enable 8,9,21-23
+ */
+
+/*
+ * P10 HMER register layout:
+ * Bit Name Description
+ * 0 malfunction_alert A processor core in the system has checkstopped
+ * (failed recovery). This is broadcasted to every
+ * processor in the system
+ *
+ * 1 reserved reserved
+ *
+ * 2 proc_rcvy_done Processor recovery occurred error-bit in fir not
+ * masked (see bit 11)
+ *
+ * 3 reserved reserved
+ *
+ * 4 tfac_error Timer facility experienced an error. TB, DEC,
+ * HDEC, PURR or SPURR may be corrupted (details in
+ * TFMR)
+ *
+ * 5 tfx_error Error occurred on transfer from tfac shadow to
+ * core
+ *
+ * 6 spurr_scale_limit Nominal frequency exceeded 399 percent
+ *
+ * 7 reserved reserved
+ *
+ * 8 xscom_fail An XSCOM operation caused by a cache inhibited
+ * load/store from this thread failed. A trap
+ * register is available.
+ *
+ * 9 xscom_done An XSCOM operation caused by a cache inhibited
+ * load/store from this thread completed. If
+ * hypervisor intends to use this bit, it is
+ * responsible for clearing it before performing the
+ * xscom operation. NOTE: this bit should always be
+ * masked in HMEER
+ *
+ * 10 reserved reserved
+ *
+ * 11 proc_rcvy_again Processor recovery occurred again before bit 2
+ * was cleared
+ *
+ * 12-15 reserved reserved
+ *
+ * 16 scom_fir_hmi An error inject to PC FIR has occurred to set HMI.
+ * This error inject can also set FIR(61) to cause
+ * recovery.
+ *
+ * 17 reserved reserved
+ *
+ * 18 trig_fir_hmi Debug trigger has occurred to set HMI. This
+ * trigger can also set FIR(60) to cause recovery
+ *
+ * 19-20 reserved reserved
+ *
+ * 21-23 xscom_status If bit 8 is active, the reason will be detailed in
+ * these bits. These bits are information only and
+ * always masked (mask = ‘0’) If hypervisor intends
+ * to use this field, it is responsible for clearing
+ * it before performing the xscom operation.
+ *
+ * 24:63 Not implemented Not implemented.
+ *
+ * P10 HMEER enabled bits:
+ * Name Action
+ * malfunction_alert Decode and log FIR bits.
+ * proc_rcvy_done Log and continue.
+ * tfac_error Log and attempt to recover time facilities.
+ * tfx_error Log and attempt to recover time facilities.
+ * spurr_scale_limit Log and continue. XXX?
+ * proc_rcvy_again Log and continue.
+ */
+
+/* Used for tracking cpu threads inside hmi handling. */
+#define HMI_STATE_CLEANUP_DONE 0x100
+#define CORE_THREAD_MASK 0x0ff
+#define SUBCORE_THREAD_MASK(s_id, t_count) \
+ ((((1UL) << (t_count)) - 1) << ((s_id) * (t_count)))
+#define SINGLE_THREAD_MASK(t_id) ((1UL) << (t_id))
+
+/*
+ * Number of iterations for the various timeouts. We can't use the timebase
+ * as it might be broken. We measured experimentally that 40 millions loops
+ * of cpu_relax() gives us more than 1s. The margin is comfortable enough.
+ */
+#define TIMEOUT_LOOPS 40000000
+
+/* TFMR other errors. (other than bit 26 and 45) */
+#define SPR_TFMR_OTHER_ERRORS \
+ (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \
+ SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \
+ SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \
+ SPR_TFMR_DEC_PARITY_ERR | SPR_TFMR_TFMR_CORRUPT | \
+ SPR_TFMR_CHIP_TOD_INTERRUPT)
+
+/* TFMR "all core" errors (sent to all threads) */
+#define SPR_TFMR_CORE_ERRORS \
+ (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \
+ SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \
+ SPR_TFMR_TFMR_CORRUPT | SPR_TFMR_TB_RESIDUE_ERR | \
+ SPR_TFMR_HDEC_PARITY_ERROR | SPR_TFMR_TFAC_XFER_ERROR)
+
+/* TFMR "thread" errors */
+#define SPR_TFMR_THREAD_ERRORS \
+ (SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \
+ SPR_TFMR_DEC_PARITY_ERR)
+
+/*
+ * Starting from p9, core inits are setup to escalate all core
+ * local checkstop to system checkstop. Review this list when that changes.
+ */
+static const struct core_xstop_bit_info {
+ uint8_t bit; /* CORE FIR bit number */
+ enum OpalHMI_CoreXstopReason reason;
+} xstop_bits[] = {
+ { 3, CORE_CHECKSTOP_IFU_REGFILE },
+ { 5, CORE_CHECKSTOP_IFU_LOGIC },
+ { 8, CORE_CHECKSTOP_PC_DURING_RECOV },
+ { 10, CORE_CHECKSTOP_ISU_REGFILE },
+ { 12, CORE_CHECKSTOP_ISU_LOGIC },
+ { 21, CORE_CHECKSTOP_FXU_LOGIC },
+ { 25, CORE_CHECKSTOP_VSU_LOGIC },
+ { 26, CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE },
+ { 32, CORE_CHECKSTOP_LSU_REGFILE },
+ { 36, CORE_CHECKSTOP_PC_FWD_PROGRESS },
+ { 38, CORE_CHECKSTOP_LSU_LOGIC },
+ { 45, CORE_CHECKSTOP_PC_LOGIC },
+ { 48, CORE_CHECKSTOP_PC_HYP_RESOURCE },
+ { 52, CORE_CHECKSTOP_PC_HANG_RECOV_FAILED },
+ { 54, CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED },
+ { 63, CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ },
+};
+
+struct core_fir_bit_info {
+ uint8_t bit; /* CORE FIR bit number */
+ const char *reason;
+};
+
+static const struct core_fir_bit_info p9_recoverable_bits[] = {
+ { 0, "IFU - SRAM (ICACHE parity, etc)" },
+ { 2, "IFU - RegFile" },
+ { 4, "IFU - Logic" },
+ { 9, "ISU - RegFile" },
+ { 11, "ISU - Logic" },
+ { 13, "ISU - Recoverable due to not in MT window" },
+ { 24, "VSU - Logic" },
+ { 27, "VSU - DFU logic" },
+ { 29, "LSU - SRAM (DCACHE parity, etc)" },
+ { 31, "LSU - RegFile" },
+ /* The following 3 bits may be set by SRAM errors. */
+ { 33, "LSU - TLB multi hit" },
+ { 34, "LSU - SLB multi hit" },
+ { 35, "LSU - ERAT multi hit" },
+ { 37, "LSU - Logic" },
+ { 39, "LSU - Recoverable due to not in MT window" },
+ { 43, "PC - Thread hang recovery" },
+};
+
+static const struct core_fir_bit_info p10_core_fir_bits[] = {
+ { 0, "IFU - SRAM recoverable error (ICACHE parity error, etc.)" },
+ { 1, "PC - TC checkstop" },
+ { 2, "IFU - RegFile recoverable error" },
+ { 3, "IFU - RegFile core checkstop" },
+ { 4, "IFU - Logic recoverable error" },
+ { 5, "IFU - Logic core checkstop" },
+ { 7, "VSU - Inference accumulator recoverable error" },
+ { 8, "PC - Recovery core checkstop" },
+ { 9, "VSU - Slice Target File (STF) recoverable error" },
+ { 11, "ISU - Logic recoverable error" },
+ { 12, "ISU - Logic core checkstop" },
+ { 14, "ISU - Machine check received while ME=0 checkstop" },
+ { 15, "ISU - UE from L2" },
+ { 16, "ISU - Number of UEs from L2 above threshold" },
+ { 17, "ISU - UE on CI load" },
+ { 18, "MMU - TLB recoverable error" },
+ { 19, "MMU - SLB error" },
+ { 21, "MMU - CXT recoverable error" },
+ { 22, "MMU - Logic core checkstop" },
+ { 23, "MMU - MMU system checkstop" },
+ { 24, "VSU - Logic recoverable error" },
+ { 25, "VSU - Logic core checkstop" },
+ { 26, "PC - In maint mode and recovery in progress" },
+ { 28, "PC - PC system checkstop" },
+ { 29, "LSU - SRAM recoverable error (DCACHE parity error, etc.)" },
+ { 30, "LSU - Set deleted" },
+ { 31, "LSU - RegFile recoverable error" },
+ { 32, "LSU - RegFile core checkstop" },
+ { 33, "MMU - TLB multi hit error occurred" },
+ { 34, "MMU - SLB multi hit error occurred" },
+ { 35, "LSU - ERAT multi hit error occurred" },
+ { 36, "PC - Forward progress error" },
+ { 37, "LSU - Logic recoverable error" },
+ { 38, "LSU - Logic core checkstop" },
+ { 41, "LSU - System checkstop" },
+ { 43, "PC - Thread hang recoverable error" },
+ { 45, "PC - Logic core checkstop" },
+ { 47, "PC - TimeBase facility checkstop" },
+ { 52, "PC - Hang recovery failed core checkstop" },
+ { 53, "PC - Core internal hang detected" },
+ { 55, "PC - Nest hang detected" },
+ { 56, "PC - Other core chiplet recoverable error" },
+ { 57, "PC - Other core chiplet core checkstop" },
+ { 58, "PC - Other core chiplet system checkstop" },
+ { 59, "PC - SCOM satellite error detected" },
+ { 60, "PC - Debug trigger error inject" },
+ { 61, "PC - SCOM or firmware recoverable error inject" },
+ { 62, "PC - Firmware checkstop error inject" },
+ { 63, "PC - Firmware SPRC / SPRD checkstop" },
+};
+
+static const struct nx_xstop_bit_info {
+ uint8_t bit; /* NX FIR bit number */
+ enum OpalHMI_NestAccelXstopReason reason;
+} nx_dma_xstop_bits[] = {
+ { 1, NX_CHECKSTOP_SHM_INVAL_STATE_ERR },
+ { 15, NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1 },
+ { 16, NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2 },
+ { 20, NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR },
+ { 21, NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR },
+ { 22, NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR },
+ { 23, NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR },
+ { 24, NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR },
+ { 25, NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR },
+ { 26, NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR },
+ { 27, NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR },
+ { 31, NX_CHECKSTOP_DMA_CRB_UE },
+ { 32, NX_CHECKSTOP_DMA_CRB_SUE },
+};
+
+static const struct nx_xstop_bit_info nx_pbi_xstop_bits[] = {
+ { 12, NX_CHECKSTOP_PBI_ISN_UE },
+};
+
+static struct lock hmi_lock = LOCK_UNLOCKED;
+static uint32_t malf_alert_scom;
+static uint32_t nx_status_reg;
+static uint32_t nx_dma_engine_fir;
+static uint32_t nx_pbi_fir;
+
+static int setup_scom_addresses(void)
+{
+ switch (proc_gen) {
+ case proc_gen_p8:
+ malf_alert_scom = P8_MALFUNC_ALERT;
+ nx_status_reg = P8_NX_STATUS_REG;
+ nx_dma_engine_fir = P8_NX_DMA_ENGINE_FIR;
+ nx_pbi_fir = P8_NX_PBI_FIR;
+ return 1;
+ case proc_gen_p9:
+ malf_alert_scom = P9_MALFUNC_ALERT;
+ nx_status_reg = P9_NX_STATUS_REG;
+ nx_dma_engine_fir = P9_NX_DMA_ENGINE_FIR;
+ nx_pbi_fir = P9_NX_PBI_FIR;
+ return 1;
+ case proc_gen_p10:
+ malf_alert_scom = P10_MALFUNC_ALERT;
+ nx_status_reg = P10_NX_STATUS_REG;
+ nx_dma_engine_fir = P10_NX_DMA_ENGINE_FIR;
+ nx_pbi_fir = P10_NX_PBI_FIR;
+ return 1;
+ default:
+ prerror("%s: Unknown CPU type\n", __func__);
+ break;
+ }
+ return 0;
+}
+
+static int queue_hmi_event(struct OpalHMIEvent *hmi_evt, int recover, uint64_t *out_flags)
+{
+ size_t size;
+
+ /* Don't queue up event if recover == -1 */
+ if (recover == -1)
+ return 0;
+
+ /* set disposition */
+ if (recover == 1)
+ hmi_evt->disposition = OpalHMI_DISPOSITION_RECOVERED;
+ else if (recover == 0)
+ hmi_evt->disposition = OpalHMI_DISPOSITION_NOT_RECOVERED;
+
+ /*
+ * V2 of struct OpalHMIEvent is of (5 * 64 bits) size and well packed
+ * structure. Hence use uint64_t pointer to pass entire structure
+ * using 5 params in generic message format. Instead of hard coding
+ * num_params divide the struct size by 8 bytes to get exact
+ * num_params value.
+ */
+ size = ALIGN_UP(sizeof(*hmi_evt), sizeof(u64));
+
+ *out_flags |= OPAL_HMI_FLAGS_NEW_EVENT;
+
+ /* queue up for delivery to host. */
+ return _opal_queue_msg(OPAL_MSG_HMI_EVT, NULL, NULL,
+ size, hmi_evt);
+}
+
+static int read_core_fir(uint32_t chip_id, uint32_t core_id, uint64_t *core_fir)
+{
+ int rc;
+
+ switch (proc_gen) {
+ case proc_gen_p8:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P8_EX(core_id, P8_CORE_FIR), core_fir);
+ break;
+ case proc_gen_p9:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P9_EC(core_id, P9_CORE_FIR), core_fir);
+ break;
+ case proc_gen_p10:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIR), core_fir);
+ break;
+ default:
+ rc = OPAL_HARDWARE;
+ }
+ return rc;
+}
+
+static int read_core_wof(uint32_t chip_id, uint32_t core_id, uint64_t *core_wof)
+{
+ int rc;
+
+ switch (proc_gen) {
+ case proc_gen_p9:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P9_EC(core_id, P9_CORE_WOF), core_wof);
+ break;
+ case proc_gen_p10:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P10_EC(core_id, P10_CORE_WOF), core_wof);
+ break;
+ default:
+ rc = OPAL_HARDWARE;
+ }
+ return rc;
+}
+
+static bool decode_core_fir(struct cpu_thread *cpu,
+ struct OpalHMIEvent *hmi_evt)
+{
+ uint64_t core_fir;
+ uint32_t core_id;
+ int i, swkup_rc;
+ bool found = false;
+ int64_t ret;
+ const char *loc;
+
+ /* Sanity check */
+ if (!cpu || !hmi_evt)
+ return false;
+
+ core_id = pir_to_core_id(cpu->pir);
+
+ /* Force the core to wakeup, otherwise reading core_fir is unrealiable
+ * if stop-state 5 is enabled.
+ */
+ swkup_rc = dctl_set_special_wakeup(cpu);
+
+ /* Get CORE FIR register value. */
+ ret = read_core_fir(cpu->chip_id, core_id, &core_fir);
+
+ if (!swkup_rc)
+ dctl_clear_special_wakeup(cpu);
+
+
+ if (ret == OPAL_WRONG_STATE) {
+ /*
+ * CPU is asleep, so it probably didn't cause the checkstop.
+ * If no other HMI cause is found a "catchall" checkstop
+ * will be raised, so if this CPU should've been awake the
+ * error will be handled appropriately.
+ */
+ prlog(PR_DEBUG,
+ "FIR read failed, chip %d core %d asleep\n",
+ cpu->chip_id, core_id);
+ return false;
+ } else if (ret != OPAL_SUCCESS) {
+ prerror("XSCOM error reading CORE FIR\n");
+ /* If the FIR can't be read, we should checkstop. */
+ return true;
+ }
+
+ if (!core_fir)
+ return false;
+
+ loc = chip_loc_code(cpu->chip_id);
+ prlog(PR_INFO, "[Loc: %s]: CHIP ID: %x, CORE ID: %x, FIR: %016llx\n",
+ loc ? loc : "Not Available",
+ cpu->chip_id, core_id, core_fir);
+
+ if (proc_gen == proc_gen_p10) {
+ for (i = 0; i < ARRAY_SIZE(p10_core_fir_bits); i++) {
+ if (core_fir & PPC_BIT(p10_core_fir_bits[i].bit))
+ prlog(PR_INFO, " %s\n", p10_core_fir_bits[i].reason);
+ }
+ }
+
+ /* Check CORE FIR bits and populate HMI event with error info. */
+ for (i = 0; i < ARRAY_SIZE(xstop_bits); i++) {
+ if (core_fir & PPC_BIT(xstop_bits[i].bit)) {
+ found = true;
+ hmi_evt->u.xstop_error.xstop_reason
+ |= cpu_to_be32(xstop_bits[i].reason);
+ }
+ }
+ return found;
+}
+
+static void find_core_checkstop_reason(struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct cpu_thread *cpu;
+
+ /* Initialize HMI event */
+ hmi_evt->severity = OpalHMI_SEV_FATAL;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_CORE;
+
+ /*
+ * Check CORE FIRs and find the reason for core checkstop.
+ * Send a separate HMI event for each core that has checkstopped.
+ */
+ for_each_cpu(cpu) {
+ /* GARDed CPUs are marked unavailable. Skip them. */
+ if (cpu->state == cpu_state_unavailable)
+ continue;
+
+ /* Only check on primaries (ie. core), not threads */
+ if (cpu->is_secondary)
+ continue;
+
+ /* Initialize xstop_error fields. */
+ hmi_evt->u.xstop_error.xstop_reason = 0;
+ hmi_evt->u.xstop_error.u.pir = cpu_to_be32(cpu->pir);
+
+ if (decode_core_fir(cpu, hmi_evt))
+ queue_hmi_event(hmi_evt, 0, out_flags);
+ }
+}
+
+static void find_capp_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct capp_info info;
+ struct phb *phb;
+ uint64_t capp_fir;
+ uint64_t capp_fir_mask;
+ uint64_t capp_fir_action0;
+ uint64_t capp_fir_action1;
+ uint64_t reg;
+ int64_t rc;
+
+ /* CAPP exists on P8 and P9 only */
+ if (proc_gen != proc_gen_p8 && proc_gen != proc_gen_p9)
+ return;
+
+ /* Find the CAPP on the chip associated with the HMI. */
+ for_each_phb(phb) {
+ /* get the CAPP info */
+ rc = capp_get_info(flat_chip_id, phb, &info);
+ if (rc == OPAL_PARAMETER)
+ continue;
+
+ if (xscom_read(flat_chip_id, info.capp_fir_reg, &capp_fir) ||
+ xscom_read(flat_chip_id, info.capp_fir_mask_reg,
+ &capp_fir_mask) ||
+ xscom_read(flat_chip_id, info.capp_fir_action0_reg,
+ &capp_fir_action0) ||
+ xscom_read(flat_chip_id, info.capp_fir_action1_reg,
+ &capp_fir_action1)) {
+ prerror("CAPP: Couldn't read CAPP#%d (PHB:#%x) FIR registers by XSCOM!\n",
+ info.capp_index, info.phb_index);
+ continue;
+ }
+
+ if (!(capp_fir & ~capp_fir_mask))
+ continue;
+
+ prlog(PR_DEBUG, "CAPP#%d (PHB:#%x): FIR 0x%016llx mask 0x%016llx\n",
+ info.capp_index, info.phb_index, capp_fir,
+ capp_fir_mask);
+ prlog(PR_DEBUG, "CAPP#%d (PHB:#%x): ACTION0 0x%016llx, ACTION1 0x%016llx\n",
+ info.capp_index, info.phb_index, capp_fir_action0,
+ capp_fir_action1);
+
+ /*
+ * If this bit is set (=1) a Recoverable Error has been
+ * detected
+ */
+ xscom_read(flat_chip_id, info.capp_err_status_ctrl_reg, &reg);
+ if ((reg & PPC_BIT(0)) != 0) {
+ phb_lock(phb);
+ phb->ops->set_capp_recovery(phb);
+ phb_unlock(phb);
+
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_CAPP_RECOVERY;
+ queue_hmi_event(hmi_evt, 1, out_flags);
+
+ return;
+ }
+ }
+}
+
+static void find_nx_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ uint64_t nx_status;
+ uint64_t nx_dma_fir;
+ uint64_t nx_pbi_fir_val;
+ int i;
+
+ /* Get NX status register value. */
+ if (xscom_read(flat_chip_id, nx_status_reg, &nx_status) != 0) {
+ prerror("XSCOM error reading NX_STATUS_REG\n");
+ return;
+ }
+
+ /* Check if NX has driven an HMI interrupt. */
+ if (!(nx_status & NX_HMI_ACTIVE))
+ return;
+
+ /* Initialize HMI event */
+ hmi_evt->severity = OpalHMI_SEV_FATAL;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NX;
+ hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
+
+ /* Get DMA & Engine FIR data register value. */
+ if (xscom_read(flat_chip_id, nx_dma_engine_fir, &nx_dma_fir) != 0) {
+ prerror("XSCOM error reading NX_DMA_ENGINE_FIR\n");
+ return;
+ }
+
+ /* Get PowerBus Interface FIR data register value. */
+ if (xscom_read(flat_chip_id, nx_pbi_fir, &nx_pbi_fir_val) != 0) {
+ prerror("XSCOM error reading NX_PBI_FIR\n");
+ return;
+ }
+
+ /* Find NX checkstop reason and populate HMI event with error info. */
+ for (i = 0; i < ARRAY_SIZE(nx_dma_xstop_bits); i++)
+ if (nx_dma_fir & PPC_BIT(nx_dma_xstop_bits[i].bit))
+ hmi_evt->u.xstop_error.xstop_reason
+ |= cpu_to_be32(nx_dma_xstop_bits[i].reason);
+
+ for (i = 0; i < ARRAY_SIZE(nx_pbi_xstop_bits); i++)
+ if (nx_pbi_fir_val & PPC_BIT(nx_pbi_xstop_bits[i].bit))
+ hmi_evt->u.xstop_error.xstop_reason
+ |= cpu_to_be32(nx_pbi_xstop_bits[i].reason);
+
+ /*
+ * Set NXDMAENGFIR[38] to signal PRD that service action is required.
+ * Without this inject, PRD will not be able to do NX unit checkstop
+ * error analysis. NXDMAENGFIR[38] is a spare bit and used to report
+ * a software initiated attention.
+ *
+ * The behavior of this bit and all FIR bits are documented in
+ * RAS spreadsheet.
+ */
+ xscom_write(flat_chip_id, nx_dma_engine_fir, PPC_BIT(38));
+
+ /* Send an HMI event. */
+ queue_hmi_event(hmi_evt, 0, out_flags);
+}
+
+static bool phb_is_npu2(struct dt_node *dn)
+{
+ return (dt_node_is_compatible(dn, "ibm,power9-npu-pciex") ||
+ dt_node_is_compatible(dn, "ibm,power9-npu-opencapi-pciex"));
+}
+
+static void add_npu2_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
+{
+ int i, reason_count;
+ uint8_t *ptr;
+
+ reason_count = sizeof(*xstop_reason) / sizeof(reason);
+ ptr = (uint8_t *) xstop_reason;
+ for (i = 0; i < reason_count; i++) {
+ if (*ptr == 0) {
+ *ptr = reason;
+ break;
+ }
+ ptr++;
+ }
+}
+
+static void encode_npu2_xstop_reason(uint32_t *xstop_reason,
+ uint64_t fir, int fir_number)
+{
+ int bit;
+ uint8_t reason;
+
+ /*
+ * There are three 64-bit FIRs but the xstop reason field of
+ * the hmi event is only 32-bit. Encode which FIR bit is set as:
+ * - 2 bits for the FIR number
+ * - 6 bits for the bit number (0 -> 63)
+ *
+ * So we could even encode up to 4 reasons for the HMI, if
+ * that can ever happen
+ */
+ while (fir) {
+ bit = ilog2(fir);
+ reason = fir_number << 6;
+ reason |= (63 - bit); // IBM numbering
+ add_npu2_xstop_reason(xstop_reason, reason);
+ fir ^= 1ULL << bit;
+ }
+}
+
+static void find_npu2_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct phb *phb;
+ int i;
+ bool npu2_hmi_verbose = false, found = false;
+ uint64_t npu2_fir;
+ uint64_t npu2_fir_mask;
+ uint64_t npu2_fir_action0;
+ uint64_t npu2_fir_action1;
+ uint64_t npu2_fir_addr;
+ uint64_t npu2_fir_mask_addr;
+ uint64_t npu2_fir_action0_addr;
+ uint64_t npu2_fir_action1_addr;
+ uint64_t fatal_errors;
+ uint32_t xstop_reason = 0;
+ int total_errors = 0;
+ const char *loc;
+
+ /* NPU2 only */
+ if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P9)
+ return;
+
+ /* Find the NPU on the chip associated with the HMI. */
+ for_each_phb(phb) {
+ /* NOTE: if a chip ever has >1 NPU this will need adjusting */
+ if (phb_is_npu2(phb->dt_node) &&
+ (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
+ found = true;
+ break;
+ }
+ }
+
+ /* If we didn't find a NPU on the chip, it's not our checkstop. */
+ if (!found)
+ return;
+
+ npu2_fir_addr = NPU2_FIR_REGISTER_0;
+ npu2_fir_mask_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_MASK_OFFSET;
+ npu2_fir_action0_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION0_OFFSET;
+ npu2_fir_action1_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION1_OFFSET;
+
+ for (i = 0; i < NPU2_TOTAL_FIR_REGISTERS; i++) {
+ /* Read all the registers necessary to find a checkstop condition. */
+ if (xscom_read(flat_chip_id, npu2_fir_addr, &npu2_fir) ||
+ xscom_read(flat_chip_id, npu2_fir_mask_addr, &npu2_fir_mask) ||
+ xscom_read(flat_chip_id, npu2_fir_action0_addr, &npu2_fir_action0) ||
+ xscom_read(flat_chip_id, npu2_fir_action1_addr, &npu2_fir_action1)) {
+ prerror("HMI: Couldn't read NPU FIR register%d with XSCOM\n", i);
+ continue;
+ }
+
+ fatal_errors = npu2_fir & ~npu2_fir_mask & npu2_fir_action0 & npu2_fir_action1;
+
+ if (fatal_errors) {
+ loc = chip_loc_code(flat_chip_id);
+ if (!loc)
+ loc = "Not Available";
+ prlog(PR_ERR, "NPU: [Loc: %s] P:%d FIR#%d FIR 0x%016llx mask 0x%016llx\n",
+ loc, flat_chip_id, i, npu2_fir, npu2_fir_mask);
+ prlog(PR_ERR, "NPU: [Loc: %s] P:%d ACTION0 0x%016llx, ACTION1 0x%016llx\n",
+ loc, flat_chip_id, npu2_fir_action0, npu2_fir_action1);
+ total_errors++;
+
+ encode_npu2_xstop_reason(&xstop_reason, fatal_errors, i);
+ }
+
+ /* Can't do a fence yet, we are just logging fir information for now */
+ npu2_fir_addr += NPU2_FIR_OFFSET;
+ npu2_fir_mask_addr += NPU2_FIR_OFFSET;
+ npu2_fir_action0_addr += NPU2_FIR_OFFSET;
+ npu2_fir_action1_addr += NPU2_FIR_OFFSET;
+
+ }
+
+ if (!total_errors)
+ return;
+
+ npu2_hmi_verbose = nvram_query_eq_safe("npu2-hmi-verbose", "true");
+ /* Force this for now until we sort out something better */
+ npu2_hmi_verbose = true;
+
+ if (npu2_hmi_verbose) {
+ npu2_dump_scoms(flat_chip_id);
+ prlog(PR_ERR, " _________________________ \n");
+ prlog(PR_ERR, "< It's Debug time! >\n");
+ prlog(PR_ERR, " ------------------------- \n");
+ prlog(PR_ERR, " \\ ,__, \n");
+ prlog(PR_ERR, " \\ (oo)____ \n");
+ prlog(PR_ERR, " (__) )\\ \n");
+ prlog(PR_ERR, " ||--|| * \n");
+ }
+
+ /* Set up the HMI event */
+ hmi_evt->severity = OpalHMI_SEV_WARNING;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
+ hmi_evt->u.xstop_error.xstop_reason = cpu_to_be32(xstop_reason);
+ hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
+
+ /* Marking the event as recoverable so that we don't crash */
+ queue_hmi_event(hmi_evt, 1, out_flags);
+}
+
+static void find_npu_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct phb *phb;
+ struct npu *p = NULL;
+
+ uint64_t npu_fir;
+ uint64_t npu_fir_mask;
+ uint64_t npu_fir_action0;
+ uint64_t npu_fir_action1;
+ uint64_t fatal_errors;
+
+ /* Only check for NPU errors if the chip has a NPU */
+ if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P8NVL)
+ return find_npu2_checkstop_reason(flat_chip_id, hmi_evt, out_flags);
+
+ /* Find the NPU on the chip associated with the HMI. */
+ for_each_phb(phb) {
+ /* NOTE: if a chip ever has >1 NPU this will need adjusting */
+ if (dt_node_is_compatible(phb->dt_node, "ibm,power8-npu-pciex") &&
+ (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
+ p = phb_to_npu(phb);
+ break;
+ }
+ }
+
+ /* If we didn't find a NPU on the chip, it's not our checkstop. */
+ if (p == NULL)
+ return;
+
+ /* Read all the registers necessary to find a checkstop condition. */
+ if (xscom_read(flat_chip_id,
+ p->at_xscom + NX_FIR, &npu_fir) ||
+ xscom_read(flat_chip_id,
+ p->at_xscom + NX_FIR_MASK, &npu_fir_mask) ||
+ xscom_read(flat_chip_id,
+ p->at_xscom + NX_FIR_ACTION0, &npu_fir_action0) ||
+ xscom_read(flat_chip_id,
+ p->at_xscom + NX_FIR_ACTION1, &npu_fir_action1)) {
+ prerror("Couldn't read NPU registers with XSCOM\n");
+ return;
+ }
+
+ fatal_errors = npu_fir & ~npu_fir_mask & npu_fir_action0 & npu_fir_action1;
+
+ /* If there's no errors, we don't need to do anything. */
+ if (!fatal_errors)
+ return;
+
+ prlog(PR_DEBUG, "NPU: FIR 0x%016llx mask 0x%016llx\n",
+ npu_fir, npu_fir_mask);
+ prlog(PR_DEBUG, "NPU: ACTION0 0x%016llx, ACTION1 0x%016llx\n",
+ npu_fir_action0, npu_fir_action1);
+
+ /* Set the NPU to fenced since it can't recover. */
+ npu_set_fence_state(p, true);
+
+ /* Set up the HMI event */
+ hmi_evt->severity = OpalHMI_SEV_WARNING;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
+ hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
+
+ /* The HMI is "recoverable" because it shouldn't crash the system */
+ queue_hmi_event(hmi_evt, 1, out_flags);
+}
+
+static void decode_malfunction(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags)
+{
+ int i;
+ uint64_t malf_alert, flags;
+
+ flags = 0;
+
+ if (!setup_scom_addresses()) {
+ prerror("Failed to setup scom addresses\n");
+ /* Send an unknown HMI event. */
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_UNKNOWN;
+ hmi_evt->u.xstop_error.xstop_reason = 0;
+ queue_hmi_event(hmi_evt, false, out_flags);
+ return;
+ }
+
+ xscom_read(this_cpu()->chip_id, malf_alert_scom, &malf_alert);
+
+ if (!malf_alert)
+ return;
+
+ for (i = 0; i < 64; i++) {
+ if (malf_alert & PPC_BIT(i)) {
+ xscom_write(this_cpu()->chip_id, malf_alert_scom,
+ ~PPC_BIT(i));
+ find_capp_checkstop_reason(i, hmi_evt, &flags);
+ find_nx_checkstop_reason(i, hmi_evt, &flags);
+ find_npu_checkstop_reason(i, hmi_evt, &flags);
+ }
+ }
+
+ find_core_checkstop_reason(hmi_evt, &flags);
+
+ /*
+ * If we fail to find checkstop reason, send an unknown HMI event.
+ */
+ if (!(flags & OPAL_HMI_FLAGS_NEW_EVENT)) {
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_UNKNOWN;
+ hmi_evt->u.xstop_error.xstop_reason = 0;
+ queue_hmi_event(hmi_evt, false, &flags);
+ }
+ *out_flags |= flags;
+}
+
+/*
+ * This will "rendez-vous" all threads on the core to the rendez-vous
+ * id "sig". You need to make sure that "sig" is different from the
+ * previous rendez vous. The sig value must be between 0 and 7 with
+ * boot time being set to 0.
+ *
+ * Note: in theory, we could just use a flip flop "sig" in the thread
+ * structure (binary rendez-vous with no argument). This is a bit more
+ * debuggable and better at handling timeouts (arguably).
+ *
+ * This should be called with the no lock held
+ */
+static void hmi_rendez_vous(uint32_t sig)
+{
+ struct cpu_thread *t = this_cpu();
+ uint32_t my_id = cpu_get_thread_index(t);
+ uint32_t my_shift = my_id << 2;
+ uint32_t *sptr = t->core_hmi_state_ptr;
+ uint32_t val, prev, shift, i;
+ uint64_t timeout;
+
+ assert(sig <= 0x7);
+
+ /*
+ * Mark ourselves as having reached the rendez vous point with
+ * the exit bit cleared
+ */
+ do {
+ val = prev = *sptr;
+ val &= ~(0xfu << my_shift);
+ val |= sig << my_shift;
+ } while (cmpxchg32(sptr, prev, val) != prev);
+
+ /*
+ * Wait for everybody else to reach that point, ignore the
+ * exit bit as another thread could have already set it.
+ */
+ for (i = 0; i < cpu_thread_count; i++) {
+ shift = i << 2;
+
+ timeout = TIMEOUT_LOOPS;
+ while (((*sptr >> shift) & 0x7) != sig && --timeout)
+ cpu_relax();
+ if (!timeout)
+ prlog(PR_ERR, "Rendez-vous stage 1 timeout, CPU 0x%x"
+ " waiting for thread %d (sptr=%08x)\n",
+ t->pir, i, *sptr);
+ }
+
+ /* Set the exit bit */
+ do {
+ val = prev = *sptr;
+ val &= ~(0xfu << my_shift);
+ val |= (sig | 8) << my_shift;
+ } while (cmpxchg32(sptr, prev, val) != prev);
+
+ /* At this point, we need to wait for everybody else to have a value
+ * that is *not* sig. IE. they either have set the exit bit *or* they
+ * have changed the rendez-vous (meaning they have moved on to another
+ * rendez vous point).
+ */
+ for (i = 0; i < cpu_thread_count; i++) {
+ shift = i << 2;
+
+ timeout = TIMEOUT_LOOPS;
+ while (((*sptr >> shift) & 0xf) == sig && --timeout)
+ cpu_relax();
+ if (!timeout)
+ prlog(PR_ERR, "Rendez-vous stage 2 timeout, CPU 0x%x"
+ " waiting for thread %d (sptr=%08x)\n",
+ t->pir, i, *sptr);
+ }
+}
+
+static void hmi_print_debug(const uint8_t *msg, uint64_t hmer)
+{
+ const char *loc;
+ uint32_t core_id, thread_index;
+
+ core_id = pir_to_core_id(this_cpu()->pir);
+ thread_index = cpu_get_thread_index(this_cpu());
+
+ loc = chip_loc_code(this_cpu()->chip_id);
+ if (!loc)
+ loc = "Not Available";
+
+ /* Also covers P10 SPR_HMER_TFAC_SHADOW_XFER_ERROR */
+ if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) {
+ prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: TFMR(%016lx) %s\n",
+ loc, this_cpu()->chip_id, core_id, thread_index,
+ mfspr(SPR_TFMR), msg);
+ } else {
+ prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: %s\n",
+ loc, this_cpu()->chip_id, core_id, thread_index,
+ msg);
+ }
+}
+
+static int handle_thread_tfac_error(uint64_t tfmr, uint64_t *out_flags)
+{
+ int recover = 1;
+
+ if (tfmr & SPR_TFMR_DEC_PARITY_ERR)
+ *out_flags |= OPAL_HMI_FLAGS_DEC_LOST;
+ if (!tfmr_recover_local_errors(tfmr))
+ recover = 0;
+ tfmr &= ~(SPR_TFMR_PURR_PARITY_ERR |
+ SPR_TFMR_SPURR_PARITY_ERR |
+ SPR_TFMR_DEC_PARITY_ERR);
+ return recover;
+}
+
+static int64_t opal_handle_hmi(void);
+
+static void opal_handle_hmi_job(void *data __unused)
+{
+ opal_handle_hmi();
+}
+
+/*
+ * Queue hmi handling job If secondaries are still in OPAL
+ * This function is called by thread 0.
+ */
+static struct cpu_job **hmi_kick_secondaries(void)
+{
+ struct cpu_thread *ts = this_cpu();
+ struct cpu_job **hmi_jobs = NULL;
+ int job_sz = sizeof(struct cpu_job *) * cpu_thread_count;
+ int i;
+
+ for (i = 1; i < cpu_thread_count; i++) {
+ ts = next_cpu(ts);
+
+ /* Is this thread still in OPAL ? */
+ if (ts->state == cpu_state_active) {
+ if (!hmi_jobs) {
+ hmi_jobs = zalloc(job_sz);
+ assert(hmi_jobs);
+ }
+
+ prlog(PR_DEBUG, "Sending hmi job to thread %d\n", i);
+ hmi_jobs[i] = cpu_queue_job(ts, "handle_hmi_job",
+ opal_handle_hmi_job, NULL);
+ }
+ }
+ return hmi_jobs;
+}
+
+static int handle_all_core_tfac_error(uint64_t tfmr, uint64_t *out_flags)
+{
+ struct cpu_thread *t, *t0;
+ int recover = -1;
+ struct cpu_job **hmi_jobs = NULL;
+
+ t = this_cpu();
+ t0 = find_cpu_by_pir(cpu_get_thread0(t));
+
+ if (t == t0 && t0->state == cpu_state_os)
+ hmi_jobs = hmi_kick_secondaries();
+
+ /* Rendez vous all threads */
+ hmi_rendez_vous(1);
+
+ /* We use a lock here as some of the TFMR bits are shared and I
+ * prefer avoiding doing the cleanup simultaneously.
+ */
+ lock(&hmi_lock);
+
+ /* First handle corrupt TFMR otherwise we can't trust anything.
+ * We'll use a lock here so that the threads don't try to do it at
+ * the same time
+ */
+ if (tfmr & SPR_TFMR_TFMR_CORRUPT) {
+ /* Check if it's still in error state */
+ if (mfspr(SPR_TFMR) & SPR_TFMR_TFMR_CORRUPT)
+ if (!recover_corrupt_tfmr()) {
+ unlock(&hmi_lock);
+ recover = 0;
+ goto error_out;
+ }
+
+ tfmr = mfspr(SPR_TFMR);
+
+ /* We could have got new thread errors in the meantime */
+ if (tfmr & SPR_TFMR_THREAD_ERRORS) {
+ recover = handle_thread_tfac_error(tfmr, out_flags);
+ tfmr &= ~SPR_TFMR_THREAD_ERRORS;
+ }
+ if (!recover) {
+ unlock(&hmi_lock);
+ goto error_out;
+ }
+ }
+
+ /* Tell the OS ... */
+ if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR)
+ *out_flags |= OPAL_HMI_FLAGS_HDEC_LOST;
+
+ /* Cleanup bad HDEC or TB on all threads or subcures before we clear
+ * the error conditions
+ */
+ tfmr_cleanup_core_errors(tfmr);
+
+ /* Unlock before next rendez-vous */
+ unlock(&hmi_lock);
+
+ /* Second rendez vous, ensure the above cleanups are all done before
+ * we proceed further
+ */
+ hmi_rendez_vous(2);
+
+ /* We can now clear the error conditions in the core. */
+ recover = tfmr_clear_core_errors(tfmr);
+ if (recover == 0)
+ goto error_out;
+
+ /* Third rendez-vous. We could in theory do the timebase resync as
+ * part of the previous one, but I prefer having all the error
+ * conditions cleared before we start trying.
+ */
+ hmi_rendez_vous(3);
+
+ /* Now perform the actual TB recovery on thread 0 */
+ if (t == t0)
+ recover = chiptod_recover_tb_errors(&this_cpu()->tb_resynced);
+
+error_out:
+ /* Last rendez-vous */
+ hmi_rendez_vous(4);
+
+ /* Now all threads have gone past rendez-vous 3 and not yet past another
+ * rendez-vous 1, so the value of tb_resynced of thread 0 of the core
+ * contains an accurate indication as to whether the timebase was lost.
+ */
+ if (t0->tb_resynced)
+ *out_flags |= OPAL_HMI_FLAGS_TB_RESYNC;
+
+ if (t == t0 && hmi_jobs) {
+ int i;
+ for (i = 1; i < cpu_thread_count; i++)
+ if (hmi_jobs[i])
+ cpu_wait_job(hmi_jobs[i], true);
+ free(hmi_jobs);
+ }
+
+ return recover;
+}
+
+static uint64_t read_tfmr_t0(void)
+{
+ uint64_t tfmr_t0;
+ uint32_t chip_id = this_cpu()->chip_id;
+ uint32_t core_id = pir_to_core_id(this_cpu()->pir);
+
+ lock(&hmi_lock);
+
+ xscom_write(chip_id, XSCOM_ADDR_P9_EC(core_id, P9_SCOM_SPRC),
+ SETFIELD(P9_SCOMC_SPR_SELECT, 0, P9_SCOMC_TFMR_T0));
+ xscom_read(chip_id, XSCOM_ADDR_P9_EC(core_id, P9_SCOM_SPRD),
+ &tfmr_t0);
+ unlock(&hmi_lock);
+ return tfmr_t0;
+}
+
+/* P9 errata: In theory, an HDEC error is sent to all threads. However,
+ * due to an errata on P9 where TFMR bit 26 (HDEC parity) cannot be
+ * cleared on thread 1..3, I am not confident we can do a rendez-vous
+ * in all cases.
+ *
+ * Our current approach is to ignore that error unless it is present
+ * on thread 0 TFMR. Also, ignore TB residue error due to a similar
+ * errata as above.
+ */
+static void validate_latched_errors(uint64_t *tfmr)
+{
+ if ((*tfmr & (SPR_TFMR_HDEC_PARITY_ERROR | SPR_TFMR_TB_RESIDUE_ERR))
+ && this_cpu()->is_secondary) {
+ uint64_t tfmr_t0 = read_tfmr_t0();
+
+ if (!(tfmr_t0 & SPR_TFMR_HDEC_PARITY_ERROR))
+ *tfmr &= ~SPR_TFMR_HDEC_PARITY_ERROR;
+
+ if (!(tfmr_t0 & SPR_TFMR_TB_RESIDUE_ERR))
+ *tfmr &= ~SPR_TFMR_TB_RESIDUE_ERR;
+ }
+}
+
+static int handle_tfac_errors(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags)
+{
+ int recover = -1;
+ uint64_t tfmr = mfspr(SPR_TFMR);
+
+ /* Initialize the hmi event with old value of TFMR */
+ hmi_evt->tfmr = cpu_to_be64(tfmr);
+
+ /* A TFMR parity/corrupt error makes us ignore all the local stuff.*/
+ if (tfmr & SPR_TFMR_TFMR_CORRUPT) {
+ /* Mark TB as invalid for now as we don't trust TFMR, we'll fix
+ * it up later
+ */
+ this_cpu()->tb_invalid = true;
+ goto bad_tfmr;
+ }
+
+ this_cpu()->tb_invalid = !(tfmr & SPR_TFMR_TB_VALID);
+
+ if (proc_gen == proc_gen_p9)
+ validate_latched_errors(&tfmr);
+
+ /* First, handle thread local errors */
+ if (tfmr & SPR_TFMR_THREAD_ERRORS) {
+ recover = handle_thread_tfac_error(tfmr, out_flags);
+ tfmr &= ~SPR_TFMR_THREAD_ERRORS;
+ }
+
+ bad_tfmr:
+
+ /* Let's see if we still have a all-core error to deal with, if
+ * not, we just bail out
+ */
+ if (tfmr & SPR_TFMR_CORE_ERRORS) {
+ int recover2;
+
+ /* Only update "recover" if it's not already 0 (non-recovered)
+ */
+ recover2 = handle_all_core_tfac_error(tfmr, out_flags);
+ if (recover != 0)
+ recover = recover2;
+ } else if (tfmr & SPR_TFMR_CHIP_TOD_INTERRUPT) {
+ int recover2;
+
+ /*
+ * There are some TOD errors which do not affect working of
+ * TOD and TB. They stay in valid state. Hence we don't need
+ * rendez vous.
+ *
+ * TOD errors that affects TOD/TB will report a global error
+ * on TFMR alongwith bit 51, and they will go in rendez vous.
+ */
+ recover2 = chiptod_recover_tod_errors();
+ if (recover != 0)
+ recover = recover2;
+ } else if (this_cpu()->tb_invalid) {
+ /* This shouldn't happen, TB is invalid and no global error
+ * was reported. We just return for now assuming one will
+ * be. We can't do a rendez vous without a core-global HMI.
+ */
+ prlog(PR_ERR, "HMI: TB invalid without core error reported ! "
+ "CPU=%x, TFMR=0x%016lx\n", this_cpu()->pir,
+ mfspr(SPR_TFMR));
+ }
+
+ if (recover != -1 && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_ERROR_SYNC;
+ hmi_evt->type = OpalHMI_ERROR_TFAC;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+
+ /* Set the TB state looking at TFMR register before we head out. */
+ this_cpu()->tb_invalid = !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID);
+
+ if (this_cpu()->tb_invalid) {
+ *out_flags |= OPAL_HMI_FLAGS_TOD_TB_FAIL;
+ prlog(PR_WARNING, "Failed to get TB in running state! "
+ "CPU=%x, TFMR=%016lx\n", this_cpu()->pir,
+ mfspr(SPR_TFMR));
+ }
+
+ return recover;
+}
+
+static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct cpu_thread *cpu = this_cpu();
+ int recover = 1;
+ uint64_t handled = 0;
+
+ prlog(PR_DEBUG, "Received HMI interrupt: HMER = 0x%016llx\n", hmer);
+ /* Initialize the hmi event with old value of HMER */
+ if (hmi_evt)
+ hmi_evt->hmer = cpu_to_be64(hmer);
+
+ /* Handle Timer/TOD errors separately */
+ if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) {
+ hmi_print_debug("Timer Facility Error", hmer);
+ handled = hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR);
+ mtspr(SPR_HMER, ~handled);
+ recover = handle_tfac_errors(hmi_evt, out_flags);
+ handled = 0;
+ }
+
+ lock(&hmi_lock);
+ /*
+ * Not all HMIs would move TB into invalid state. Set the TB state
+ * looking at TFMR register. TFMR will tell us correct state of
+ * TB register.
+ */
+ if (hmer & SPR_HMER_PROC_RECV_DONE) {
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint64_t core_wof;
+
+ hmi_print_debug("Processor recovery occurred.", hmer);
+ if (!read_core_wof(chip_id, core_id, &core_wof)) {
+ int i;
+
+ prlog(PR_DEBUG, "Core WOF = 0x%016llx recovered error:\n", core_wof);
+ if (proc_gen <= proc_gen_p9) {
+ for (i = 0; i < ARRAY_SIZE(p9_recoverable_bits); i++) {
+ if (core_wof & PPC_BIT(p9_recoverable_bits[i].bit))
+ prlog(PR_DEBUG, " %s\n", p9_recoverable_bits[i].reason);
+ }
+ } else if (proc_gen == proc_gen_p10) {
+ for (i = 0; i < ARRAY_SIZE(p10_core_fir_bits); i++) {
+ if (core_wof & PPC_BIT(p10_core_fir_bits[i].bit))
+ prlog(PR_DEBUG, " %s\n", p10_core_fir_bits[i].reason);
+ }
+ }
+ }
+
+ handled |= SPR_HMER_PROC_RECV_DONE;
+ if (cpu_is_thread0(cpu) && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+
+ if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED)) {
+ handled |= SPR_HMER_PROC_RECV_ERROR_MASKED;
+ if (cpu_is_thread0(cpu) && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ hmi_print_debug("Processor recovery Done (masked).", hmer);
+ }
+
+ if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
+ handled |= SPR_HMER_PROC_RECV_AGAIN;
+ if (cpu_is_thread0(cpu) && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ hmi_print_debug("Processor recovery occurred again before"
+ "bit2 was cleared\n", hmer);
+ }
+
+ /* XXX: what to do with this? */
+ if (hmer & SPR_HMER_SPURR_SCALE_LIMIT) {
+ handled |= SPR_HMER_SPURR_SCALE_LIMIT;
+ if (cpu_is_thread0(cpu) && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ hmi_print_debug("Turbo versus nominal frequency exceeded limit.", hmer);
+ }
+
+ /* Assert if we see malfunction alert, we can not continue. */
+ if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
+ handled |= SPR_HMER_MALFUNCTION_ALERT;
+
+ hmi_print_debug("Malfunction Alert", hmer);
+ recover = 0;
+ if (hmi_evt)
+ decode_malfunction(hmi_evt, out_flags);
+ }
+
+ /* Assert if we see Hypervisor resource error, we can not continue. */
+ if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_HYP_RESOURCE_ERR)) {
+ handled |= SPR_HMER_HYP_RESOURCE_ERR;
+
+ hmi_print_debug("Hypervisor resource error", hmer);
+ recover = 0;
+ if (hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_FATAL;
+ hmi_evt->type = OpalHMI_ERROR_HYP_RESOURCE;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+
+ /* XXX: what to do with this? */
+ if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND)) {
+ handled |= SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND;
+ hmer &= ~SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND;
+
+ hmi_print_debug("Attempted to wake thread when threads in TM suspend mode.", hmer);
+ if (hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE,
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+
+ if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_TRIG_FIR_HMI)) {
+ handled |= SPR_HMER_TRIG_FIR_HMI;
+ hmer &= ~SPR_HMER_TRIG_FIR_HMI;
+
+ hmi_print_debug("Clearing unknown debug trigger", hmer);
+ if (hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_DEBUG_TRIG_FIR,
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+ if ((proc_gen == proc_gen_p10) && (hmer & SPR_HMER_P10_TRIG_FIR_HMI)) {
+ handled |= SPR_HMER_P10_TRIG_FIR_HMI;
+ hmer &= ~SPR_HMER_P10_TRIG_FIR_HMI;
+
+ hmi_print_debug("Clearing unknown debug trigger", hmer);
+ if (hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_DEBUG_TRIG_FIR,
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+
+ if (recover == 0)
+ disable_fast_reboot("Unrecoverable HMI");
+ /*
+ * HMER bits are sticky, once set to 1 they remain set to 1 until
+ * they are set to 0. Reset the error source bit to 0, otherwise
+ * we keep getting HMI interrupt again and again. Writing to HMER
+ * acts as an AND, so we write mask of all 1's except for the bits
+ * we want to clear.
+ */
+ mtspr(SPR_HMER, ~handled);
+ unlock(&hmi_lock);
+ return recover;
+}
+
+static int64_t opal_handle_hmi(void)
+{
+ uint64_t hmer, dummy_flags;
+ struct OpalHMIEvent hmi_evt;
+
+ /*
+ * Compiled time check to see size of OpalHMIEvent do not exceed
+ * that of struct opal_msg.
+ */
+ BUILD_ASSERT(sizeof(struct opal_msg) >= sizeof(struct OpalHMIEvent));
+
+ memset(&hmi_evt, 0, sizeof(struct OpalHMIEvent));
+ hmi_evt.version = OpalHMIEvt_V2;
+
+ hmer = mfspr(SPR_HMER); /* Get HMER register value */
+ handle_hmi_exception(hmer, &hmi_evt, &dummy_flags);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_HANDLE_HMI, opal_handle_hmi, 0);
+
+static int64_t opal_handle_hmi2(__be64 *out_flags)
+{
+ uint64_t hmer, flags = 0;
+ struct OpalHMIEvent hmi_evt;
+
+ /*
+ * Compiled time check to see size of OpalHMIEvent do not exceed
+ * that of struct opal_msg.
+ */
+ BUILD_ASSERT(sizeof(struct opal_msg) >= sizeof(struct OpalHMIEvent));
+
+ memset(&hmi_evt, 0, sizeof(struct OpalHMIEvent));
+ hmi_evt.version = OpalHMIEvt_V2;
+
+ hmer = mfspr(SPR_HMER); /* Get HMER register value */
+ handle_hmi_exception(hmer, &hmi_evt, &flags);
+ *out_flags = cpu_to_be64(flags);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_HANDLE_HMI2, opal_handle_hmi2, 1);