aboutsummaryrefslogtreecommitdiffstats
path: root/roms/skiboot/hw/phb4.c
diff options
context:
space:
mode:
Diffstat (limited to 'roms/skiboot/hw/phb4.c')
-rw-r--r--roms/skiboot/hw/phb4.c6400
1 files changed, 6400 insertions, 0 deletions
diff --git a/roms/skiboot/hw/phb4.c b/roms/skiboot/hw/phb4.c
new file mode 100644
index 000000000..79083d4a1
--- /dev/null
+++ b/roms/skiboot/hw/phb4.c
@@ -0,0 +1,6400 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * PHB4: PCI Host Bridge 4, in POWER9
+ *
+ * Copyright 2013-2019 IBM Corp.
+ * Copyright 2018 Raptor Engineering, LLC
+ */
+
+/*
+ *
+ * FIXME:
+ * More stuff for EEH support:
+ * - PBCQ error reporting interrupt
+ * - I2C-based power management (replacing SHPC)
+ * - Directly detect fenced PHB through one dedicated HW reg
+ */
+
+/*
+ * This is a simplified view of the PHB4 reset and link training steps
+ *
+ * Step 1:
+ * - Check for hotplug status:
+ * o PHB_PCIE_HOTPLUG_STATUS bit PHB_PCIE_HPSTAT_PRESENCE
+ * o If not set -> Bail out (Slot is empty)
+ *
+ * Step 2:
+ * - Do complete PHB reset:
+ * o PHB/ETU reset procedure
+ *
+ * Step 3:
+ * - Drive PERST active (skip if already asserted. ie. after cold reboot)
+ * - Wait 250ms (for cards to reset)
+ * o powervm have used 250ms for a long time without any problems
+ *
+ * Step 4:
+ * - Drive PERST inactive
+ *
+ * Step 5:
+ * - Look for inband presence:
+ * o From PERST we have two stages to get inband presence detected
+ * 1) Devices must enter Detect state within 20 ms of the end of
+ * Fundamental Reset
+ * 2) Receiver detect pulse are every 12ms
+ * - Hence minimum wait time 20 + 12 = 32ms
+ * o Unfortunatey, we've seen cards take 440ms
+ * o Hence we are conservative and poll here for 1000ms (> 440ms)
+ * - If no inband presence after 100ms -> Bail out (Slot is broken)
+ * o PHB_PCIE_DLP_TRAIN_CTL bit PHB_PCIE_DLP_INBAND_PRESENCE
+ *
+ * Step 6:
+ * - Look for link training done:
+ * o PHB_PCIE_DLP_TRAIN_CTL bit PHB_PCIE_DLP_TL_LINKACT
+ * - If not set after 2000ms, Retry (3 times) -> Goto Step 2
+ * o phy lockup could link training failure, hence going back to a
+ * complete PHB reset on retry
+ * o not expect to happen very often
+ *
+ * Step 7:
+ * - Wait for 1 sec (before touching device config space):
+ * - From PCIe spec:
+ * Root Complex and/or system software must allow at least 1.0 s after
+ * a Conventional Reset of a device, before it may determine that a
+ * device which fails to return a Successful Completion status for a
+ * valid Configuration Request is a broken device.
+ *
+ * Step 8:
+ * - Sanity check for fence and link still up:
+ * o If fenced or link down, Retry (3 times) -> Goto Step 2
+ * o This is not nessary but takes no time and can be useful
+ * o Once we leave here, much harder to recover from errors
+ *
+ * Step 9:
+ * - Check for optimised link for directly attached devices:
+ * o Wait for CRS (so we can read device config space)
+ * o Check chip and device are in allowlist. if not, Goto Step 10
+ * o If trained link speed is degraded, retry -> Goto Step 2
+ * o If trained link width is degraded, retry -> Goto Step 2
+ * o If still degraded after 3 retries. Give up, Goto Step 10.
+ *
+ * Step 10:
+ * - PHB good, start probing config space.
+ * o core/pci.c: pci_reset_phb() -> pci_scan_phb()
+ */
+
+
+#undef NO_ASB
+#undef LOG_CFG
+
+#include <skiboot.h>
+#include <io.h>
+#include <timebase.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <pci-slot.h>
+#include <vpd.h>
+#include <interrupts.h>
+#include <opal.h>
+#include <cpu.h>
+#include <device.h>
+#include <ccan/str/str.h>
+#include <ccan/array_size/array_size.h>
+#include <xscom.h>
+#include <affinity.h>
+#include <phb4.h>
+#include <phb4-regs.h>
+#include <phb4-capp.h>
+#include <capp.h>
+#include <fsp.h>
+#include <chip.h>
+#include <chiptod.h>
+#include <xive.h>
+#include <xscom-p9-regs.h>
+#include <phys-map.h>
+#include <nvram.h>
+
+/* Enable this to disable error interrupts for debug purposes */
+#undef DISABLE_ERR_INTS
+
+static void phb4_init_hw(struct phb4 *p);
+
+#define PHBDBG(p, fmt, a...) prlog(PR_DEBUG, "PHB#%04x[%d:%d]: " fmt, \
+ (p)->phb.opal_id, (p)->chip_id, \
+ (p)->index, ## a)
+#define PHBINF(p, fmt, a...) prlog(PR_INFO, "PHB#%04x[%d:%d]: " fmt, \
+ (p)->phb.opal_id, (p)->chip_id, \
+ (p)->index, ## a)
+#define PHBNOTICE(p, fmt, a...) prlog(PR_NOTICE, "PHB#%04x[%d:%d]: " fmt, \
+ (p)->phb.opal_id, (p)->chip_id, \
+ (p)->index, ## a)
+#define PHBERR(p, fmt, a...) prlog(PR_ERR, "PHB#%04x[%d:%d]: " fmt, \
+ (p)->phb.opal_id, (p)->chip_id, \
+ (p)->index, ## a)
+#ifdef LOG_CFG
+#define PHBLOGCFG(p, fmt, a...) PHBDBG(p, fmt, ## a)
+#else
+#define PHBLOGCFG(p, fmt, a...) do {} while (0)
+#endif
+
+static bool pci_eeh_mmio;
+static bool pci_retry_all;
+static int rx_err_max = PHB4_RX_ERR_MAX;
+
+static inline bool is_phb4(void)
+{
+ return (proc_gen == proc_gen_p9);
+}
+
+static inline bool is_phb5(void)
+{
+ return (proc_gen == proc_gen_p10);
+}
+
+/* PQ offloading on the XIVE IC. */
+static inline bool phb_pq_disable(struct phb4 *p __unused)
+{
+ if (is_phb5())
+ return xive2_cap_phb_pq_disable();
+
+ return false;
+}
+
+/*
+ * Use the ESB page of the XIVE IC for event notification. Latency
+ * improvement.
+ */
+static inline bool phb_abt_mode(struct phb4 *p __unused)
+{
+ if (is_phb5())
+ return xive2_cap_phb_abt();
+
+ return false;
+}
+
+static inline bool phb_can_store_eoi(struct phb4 *p)
+{
+ if (is_phb5())
+ /* PQ offloading is required for StoreEOI */
+ return XIVE2_STORE_EOI_ENABLED && phb_pq_disable(p);
+
+ return XIVE_STORE_EOI_ENABLED;
+}
+
+/* Note: The "ASB" name is historical, practically this means access via
+ * the XSCOM backdoor
+ */
+static inline uint64_t phb4_read_reg_asb(struct phb4 *p, uint32_t offset)
+{
+#ifdef NO_ASB
+ return in_be64(p->regs + offset);
+#else
+ int64_t rc;
+ uint64_t addr, val;
+
+ /* Address register: must use 4 bytes for built-in config space.
+ *
+ * This path isn't usable for outbound configuration space
+ */
+ if (((offset & 0xfffffffc) == PHB_CONFIG_DATA) && (offset & 3)) {
+ PHBERR(p, "XSCOM unaligned access to CONFIG_DATA unsupported\n");
+ return -1ull;
+ }
+ addr = XETU_HV_IND_ADDR_VALID | offset;
+ if ((offset >= 0x1000 && offset < 0x1800) || (offset == PHB_CONFIG_DATA))
+ addr |= XETU_HV_IND_ADDR_4B;
+ rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_ADDRESS, addr);
+ if (rc != 0) {
+ PHBERR(p, "XSCOM error addressing register 0x%x\n", offset);
+ return -1ull;
+ }
+ rc = xscom_read(p->chip_id, p->etu_xscom + XETU_HV_IND_DATA, &val);
+ if (rc != 0) {
+ PHBERR(p, "XSCOM error reading register 0x%x\n", offset);
+ return -1ull;
+ }
+ return val;
+#endif
+}
+
+static inline void phb4_write_reg_asb(struct phb4 *p,
+ uint32_t offset, uint64_t val)
+{
+#ifdef NO_ASB
+ out_be64(p->regs + offset, val);
+#else
+ int64_t rc;
+ uint64_t addr;
+
+ /* Address register: must use 4 bytes for built-in config space.
+ *
+ * This path isn't usable for outbound configuration space
+ */
+ if (((offset & 0xfffffffc) == PHB_CONFIG_DATA) && (offset & 3)) {
+ PHBERR(p, "XSCOM access to CONFIG_DATA unsupported\n");
+ return;
+ }
+ addr = XETU_HV_IND_ADDR_VALID | offset;
+ if ((offset >= 0x1000 && offset < 0x1800) || (offset == PHB_CONFIG_DATA))
+ addr |= XETU_HV_IND_ADDR_4B;
+ rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_ADDRESS, addr);
+ if (rc != 0) {
+ PHBERR(p, "XSCOM error addressing register 0x%x\n", offset);
+ return;
+ }
+ rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_DATA, val);
+ if (rc != 0) {
+ PHBERR(p, "XSCOM error writing register 0x%x\n", offset);
+ return;
+ }
+#endif
+}
+
+static uint64_t phb4_read_reg(struct phb4 *p, uint32_t offset)
+{
+ /* No register accesses are permitted while in reset */
+ if (p->flags & PHB4_ETU_IN_RESET)
+ return -1ull;
+
+ if (p->flags & PHB4_CFG_USE_ASB)
+ return phb4_read_reg_asb(p, offset);
+ else
+ return in_be64(p->regs + offset);
+}
+
+static void phb4_write_reg(struct phb4 *p, uint32_t offset, uint64_t val)
+{
+ /* No register accesses are permitted while in reset */
+ if (p->flags & PHB4_ETU_IN_RESET)
+ return;
+
+ if (p->flags & PHB4_CFG_USE_ASB)
+ phb4_write_reg_asb(p, offset, val);
+ else
+ return out_be64(p->regs + offset, val);
+}
+
+/* Helper to select an IODA table entry */
+static inline void phb4_ioda_sel(struct phb4 *p, uint32_t table,
+ uint32_t addr, bool autoinc)
+{
+ phb4_write_reg(p, PHB_IODA_ADDR,
+ (autoinc ? PHB_IODA_AD_AUTOINC : 0) |
+ SETFIELD(PHB_IODA_AD_TSEL, 0ul, table) |
+ SETFIELD(PHB_IODA_AD_TADR, 0ul, addr));
+}
+
+/*
+ * Configuration space access
+ *
+ * The PHB lock is assumed to be already held
+ */
+static int64_t phb4_pcicfg_check(struct phb4 *p, uint32_t bdfn,
+ uint32_t offset, uint32_t size,
+ uint16_t *pe)
+{
+ uint32_t sm = size - 1;
+
+ if (offset > 0xfff || bdfn > 0xffff)
+ return OPAL_PARAMETER;
+ if (offset & sm)
+ return OPAL_PARAMETER;
+
+ /* The root bus only has a device at 0 and we get into an
+ * error state if we try to probe beyond that, so let's
+ * avoid that and just return an error to Linux
+ */
+ if (PCI_BUS_NUM(bdfn) == 0 && (bdfn & 0xff))
+ return OPAL_HARDWARE;
+
+ /* Check PHB state */
+ if (p->broken)
+ return OPAL_HARDWARE;
+
+ /* Fetch the PE# from cache */
+ *pe = be16_to_cpu(p->tbl_rtt[bdfn]);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_rc_read(struct phb4 *p, uint32_t offset, uint8_t sz,
+ void *data, bool use_asb)
+{
+ uint32_t reg = offset & ~3;
+ uint32_t oval;
+
+ /* Some registers are handled locally */
+ switch (reg) {
+ /* Bridge base/limit registers are cached here as HW
+ * doesn't implement them (it hard codes values that
+ * will confuse a proper PCI implementation).
+ */
+ case PCI_CFG_MEM_BASE: /* Includes PCI_CFG_MEM_LIMIT */
+ oval = p->rc_cache[(reg - 0x20) >> 2] & 0xfff0fff0;
+ break;
+ case PCI_CFG_PREF_MEM_BASE: /* Includes PCI_CFG_PREF_MEM_LIMIT */
+ oval = p->rc_cache[(reg - 0x20) >> 2] & 0xfff0fff0;
+ oval |= 0x00010001;
+ break;
+ case PCI_CFG_IO_BASE_U16: /* Includes PCI_CFG_IO_LIMIT_U16 */
+ oval = 0;
+ break;
+ case PCI_CFG_PREF_MEM_BASE_U32:
+ case PCI_CFG_PREF_MEM_LIMIT_U32:
+ oval = p->rc_cache[(reg - 0x20) >> 2];
+ break;
+ default:
+ oval = 0xffffffff; /* default if offset too big */
+ if (reg < PHB_RC_CONFIG_SIZE) {
+ if (use_asb)
+ oval = bswap_32(phb4_read_reg_asb(p, PHB_RC_CONFIG_BASE
+ + reg));
+ else
+ oval = in_le32(p->regs + PHB_RC_CONFIG_BASE + reg);
+ }
+ }
+
+ /* Apply any post-read fixups */
+ switch (reg) {
+ case PCI_CFG_IO_BASE:
+ oval |= 0x01f1; /* Set IO base < limit to disable the window */
+ break;
+ }
+
+ switch (sz) {
+ case 1:
+ offset &= 3;
+ *((uint8_t *)data) = (oval >> (offset << 3)) & 0xff;
+ PHBLOGCFG(p, "000 CFG08 Rd %02x=%02x\n",
+ offset, *((uint8_t *)data));
+ break;
+ case 2:
+ offset &= 2;
+ *((uint16_t *)data) = (oval >> (offset << 3)) & 0xffff;
+ PHBLOGCFG(p, "000 CFG16 Rd %02x=%04x\n",
+ offset, *((uint16_t *)data));
+ break;
+ case 4:
+ *((uint32_t *)data) = oval;
+ PHBLOGCFG(p, "000 CFG32 Rd %02x=%08x\n",
+ offset, *((uint32_t *)data));
+ break;
+ default:
+ assert(false);
+ }
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_rc_write(struct phb4 *p, uint32_t offset, uint8_t sz,
+ uint32_t val, bool use_asb)
+{
+ uint32_t reg = offset & ~3;
+ uint32_t old, mask, shift, oldold;
+ int64_t rc;
+
+ if (reg > PHB_RC_CONFIG_SIZE)
+ return OPAL_SUCCESS;
+
+ /* If size isn't 4-bytes, do a RMW cycle */
+ if (sz < 4) {
+ rc = phb4_rc_read(p, reg, 4, &old, use_asb);
+ if (rc != OPAL_SUCCESS)
+ return rc;
+
+ /*
+ * Since we have to Read-Modify-Write here, we need to filter
+ * out registers that have write-1-to-clear bits to prevent
+ * clearing stuff we shouldn't be. So for any register this
+ * applies to, mask out those bits.
+ */
+ oldold = old;
+ switch(reg) {
+ case 0x1C: /* Secondary status */
+ old &= 0x00ffffff; /* mask out 24-31 */
+ break;
+ case 0x50: /* EC - Device status */
+ old &= 0xfff0ffff; /* mask out 16-19 */
+ break;
+ case 0x58: /* EC - Link status */
+ old &= 0x3fffffff; /* mask out 30-31 */
+ break;
+ case 0x78: /* EC - Link status 2 */
+ old &= 0xf000ffff; /* mask out 16-27 */
+ break;
+ /* These registers *only* have write-1-to-clear bits */
+ case 0x104: /* AER - Uncorr. error status */
+ case 0x110: /* AER - Corr. error status */
+ case 0x130: /* AER - Root error status */
+ case 0x180: /* P16 - status */
+ case 0x184: /* P16 - LDPM status */
+ case 0x188: /* P16 - FRDPM status */
+ case 0x18C: /* P16 - SRDPM status */
+ old &= 0x00000000;
+ break;
+ }
+
+ if (old != oldold) {
+ PHBLOGCFG(p, "Rewrote %x to %x for reg %x for W1C\n",
+ oldold, old, reg);
+ }
+
+ if (sz == 1) {
+ shift = (offset & 3) << 3;
+ mask = 0xff << shift;
+ val = (old & ~mask) | ((val & 0xff) << shift);
+ } else {
+ shift = (offset & 2) << 3;
+ mask = 0xffff << shift;
+ val = (old & ~mask) | ((val & 0xffff) << shift);
+ }
+ }
+
+ /* Some registers are handled locally */
+ switch (reg) {
+ /* See comment in phb4_rc_read() */
+ case PCI_CFG_MEM_BASE: /* Includes PCI_CFG_MEM_LIMIT */
+ case PCI_CFG_PREF_MEM_BASE: /* Includes PCI_CFG_PREF_MEM_LIMIT */
+ case PCI_CFG_PREF_MEM_BASE_U32:
+ case PCI_CFG_PREF_MEM_LIMIT_U32:
+ p->rc_cache[(reg - 0x20) >> 2] = val;
+ break;
+ case PCI_CFG_IO_BASE_U16: /* Includes PCI_CFG_IO_LIMIT_U16 */
+ break;
+ default:
+ /* Workaround PHB config space enable */
+ PHBLOGCFG(p, "000 CFG%02d Wr %02x=%08x\n", 8 * sz, reg, val);
+ if (use_asb)
+ phb4_write_reg_asb(p, PHB_RC_CONFIG_BASE + reg, val);
+ else
+ out_le32(p->regs + PHB_RC_CONFIG_BASE + reg, val);
+ }
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_pcicfg_read(struct phb4 *p, uint32_t bdfn,
+ uint32_t offset, uint32_t size,
+ void *data)
+{
+ uint64_t addr, val64;
+ int64_t rc;
+ uint16_t pe;
+ bool use_asb = false;
+
+ rc = phb4_pcicfg_check(p, bdfn, offset, size, &pe);
+ if (rc)
+ return rc;
+
+ if (p->flags & PHB4_AIB_FENCED) {
+ if (!(p->flags & PHB4_CFG_USE_ASB))
+ return OPAL_HARDWARE;
+ if (bdfn != 0)
+ return OPAL_HARDWARE;
+ use_asb = true;
+ } else if ((p->flags & PHB4_CFG_BLOCKED) && bdfn != 0) {
+ return OPAL_HARDWARE;
+ }
+
+ /* Handle per-device filters */
+ rc = pci_handle_cfg_filters(&p->phb, bdfn, offset, size,
+ (uint32_t *)data, false);
+ if (rc != OPAL_PARTIAL)
+ return rc;
+
+ /* Handle root complex MMIO based config space */
+ if (bdfn == 0)
+ return phb4_rc_read(p, offset, size, data, use_asb);
+
+ addr = PHB_CA_ENABLE;
+ addr = SETFIELD(PHB_CA_BDFN, addr, bdfn);
+ addr = SETFIELD(PHB_CA_REG, addr, offset & ~3u);
+ addr = SETFIELD(PHB_CA_PE, addr, pe);
+ if (use_asb) {
+ phb4_write_reg_asb(p, PHB_CONFIG_ADDRESS, addr);
+ sync();
+ val64 = bswap_64(phb4_read_reg_asb(p, PHB_CONFIG_DATA));
+ switch(size) {
+ case 1:
+ *((uint8_t *)data) = val64 >> (8 * (offset & 3));
+ break;
+ case 2:
+ *((uint16_t *)data) = val64 >> (8 * (offset & 2));
+ break;
+ case 4:
+ *((uint32_t *)data) = val64;
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+ } else {
+ out_be64(p->regs + PHB_CONFIG_ADDRESS, addr);
+ switch(size) {
+ case 1:
+ *((uint8_t *)data) =
+ in_8(p->regs + PHB_CONFIG_DATA + (offset & 3));
+ PHBLOGCFG(p, "%03x CFG08 Rd %02x=%02x\n",
+ bdfn, offset, *((uint8_t *)data));
+ break;
+ case 2:
+ *((uint16_t *)data) =
+ in_le16(p->regs + PHB_CONFIG_DATA + (offset & 2));
+ PHBLOGCFG(p, "%03x CFG16 Rd %02x=%04x\n",
+ bdfn, offset, *((uint16_t *)data));
+ break;
+ case 4:
+ *((uint32_t *)data) = in_le32(p->regs + PHB_CONFIG_DATA);
+ PHBLOGCFG(p, "%03x CFG32 Rd %02x=%08x\n",
+ bdfn, offset, *((uint32_t *)data));
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+ }
+ return OPAL_SUCCESS;
+}
+
+
+#define PHB4_PCI_CFG_READ(size, type) \
+static int64_t phb4_pcicfg_read##size(struct phb *phb, uint32_t bdfn, \
+ uint32_t offset, type *data) \
+{ \
+ struct phb4 *p = phb_to_phb4(phb); \
+ \
+ /* Initialize data in case of error */ \
+ *data = (type)0xffffffff; \
+ return phb4_pcicfg_read(p, bdfn, offset, sizeof(type), data); \
+}
+
+static int64_t phb4_pcicfg_write(struct phb4 *p, uint32_t bdfn,
+ uint32_t offset, uint32_t size,
+ uint32_t data)
+{
+ uint64_t addr;
+ int64_t rc;
+ uint16_t pe;
+ bool use_asb = false;
+
+ rc = phb4_pcicfg_check(p, bdfn, offset, size, &pe);
+ if (rc)
+ return rc;
+
+ if (p->flags & PHB4_AIB_FENCED) {
+ if (!(p->flags & PHB4_CFG_USE_ASB))
+ return OPAL_HARDWARE;
+ if (bdfn != 0)
+ return OPAL_HARDWARE;
+ use_asb = true;
+ } else if ((p->flags & PHB4_CFG_BLOCKED) && bdfn != 0) {
+ return OPAL_HARDWARE;
+ }
+
+ /* Handle per-device filters */
+ rc = pci_handle_cfg_filters(&p->phb, bdfn, offset, size,
+ (uint32_t *)&data, true);
+ if (rc != OPAL_PARTIAL)
+ return rc;
+
+ /* Handle root complex MMIO based config space */
+ if (bdfn == 0)
+ return phb4_rc_write(p, offset, size, data, use_asb);
+
+ addr = PHB_CA_ENABLE;
+ addr = SETFIELD(PHB_CA_BDFN, addr, bdfn);
+ addr = SETFIELD(PHB_CA_REG, addr, offset & ~3u);
+ addr = SETFIELD(PHB_CA_PE, addr, pe);
+ if (use_asb) {
+ /* We don't support ASB config space writes */
+ return OPAL_UNSUPPORTED;
+ } else {
+ out_be64(p->regs + PHB_CONFIG_ADDRESS, addr);
+ switch(size) {
+ case 1:
+ out_8(p->regs + PHB_CONFIG_DATA + (offset & 3), data);
+ break;
+ case 2:
+ out_le16(p->regs + PHB_CONFIG_DATA + (offset & 2), data);
+ break;
+ case 4:
+ out_le32(p->regs + PHB_CONFIG_DATA, data);
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+ }
+ PHBLOGCFG(p, "%03x CFG%d Wr %02x=%08x\n", bdfn, 8 * size, offset, data);
+ return OPAL_SUCCESS;
+}
+
+#define PHB4_PCI_CFG_WRITE(size, type) \
+static int64_t phb4_pcicfg_write##size(struct phb *phb, uint32_t bdfn, \
+ uint32_t offset, type data) \
+{ \
+ struct phb4 *p = phb_to_phb4(phb); \
+ \
+ return phb4_pcicfg_write(p, bdfn, offset, sizeof(type), data); \
+}
+
+PHB4_PCI_CFG_READ(8, u8)
+PHB4_PCI_CFG_READ(16, u16)
+PHB4_PCI_CFG_READ(32, u32)
+PHB4_PCI_CFG_WRITE(8, u8)
+PHB4_PCI_CFG_WRITE(16, u16)
+PHB4_PCI_CFG_WRITE(32, u32)
+
+static int64_t phb4_get_reserved_pe_number(struct phb *phb)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+
+ return PHB4_RESERVED_PE_NUM(p);
+}
+
+
+static void phb4_root_port_init(struct phb *phb, struct pci_device *dev,
+ int ecap, int aercap)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ struct pci_slot *slot = dev->slot;
+ uint16_t bdfn = dev->bdfn;
+ uint16_t val16;
+ uint32_t val32;
+
+ /*
+ * Use the PHB's callback so that UTL events will be masked or
+ * unmasked when the link is down or up.
+ */
+ if (dev->slot && dev->slot->ops.prepare_link_change &&
+ phb->slot && phb->slot->ops.prepare_link_change)
+ dev->slot->ops.prepare_link_change =
+ phb->slot->ops.prepare_link_change;
+
+ // FIXME: check recommended init values for phb4
+
+ /*
+ * Enable the bridge slot capability in the root port's config
+ * space. This should probably be done *before* we start
+ * scanning config space, but we need a pci_device struct to
+ * exist before we do a slot lookup so *faaaaaaaaaaaaaart*
+ */
+ if (slot && slot->pluggable && slot->power_limit) {
+ uint64_t val;
+
+ val = in_be64(p->regs + PHB_PCIE_SCR);
+ val |= PHB_PCIE_SCR_SLOT_CAP;
+ out_be64(p->regs + PHB_PCIE_SCR, val);
+
+ /* update the cached slotcap */
+ pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_SLOTCAP,
+ &slot->slot_cap);
+ }
+
+ /* Enable SERR and parity checking */
+ pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
+ val16 |= (PCI_CFG_CMD_SERR_EN | PCI_CFG_CMD_PERR_RESP |
+ PCI_CFG_CMD_MEM_EN);
+ pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
+
+ /* Enable reporting various errors */
+ if (!ecap) return;
+ pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
+ val16 |= (PCICAP_EXP_DEVCTL_CE_REPORT |
+ PCICAP_EXP_DEVCTL_NFE_REPORT |
+ PCICAP_EXP_DEVCTL_FE_REPORT |
+ PCICAP_EXP_DEVCTL_UR_REPORT);
+ pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
+
+ if (!aercap) return;
+
+ /* Mask various unrecoverable errors */
+ pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, &val32);
+ val32 |= (PCIECAP_AER_UE_MASK_POISON_TLP |
+ PCIECAP_AER_UE_MASK_COMPL_TIMEOUT |
+ PCIECAP_AER_UE_MASK_COMPL_ABORT |
+ PCIECAP_AER_UE_MASK_ECRC);
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, val32);
+
+ /* Report various unrecoverable errors as fatal errors */
+ pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, &val32);
+ val32 |= (PCIECAP_AER_UE_SEVERITY_DLLP |
+ PCIECAP_AER_UE_SEVERITY_SURPRISE_DOWN |
+ PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
+ PCIECAP_AER_UE_SEVERITY_UNEXP_COMPL |
+ PCIECAP_AER_UE_SEVERITY_RECV_OVFLOW |
+ PCIECAP_AER_UE_SEVERITY_MALFORMED_TLP);
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, val32);
+
+ /* Mask various recoverable errors */
+ pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, &val32);
+ val32 |= PCIECAP_AER_CE_MASK_ADV_NONFATAL;
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
+
+ /* Enable ECRC check */
+ pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
+ val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
+ PCIECAP_AER_CAPCTL_ECRCC_EN);
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
+
+ /* Enable all error reporting */
+ pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, &val32);
+ val32 |= (PCIECAP_AER_RERR_CMD_FE |
+ PCIECAP_AER_RERR_CMD_NFE |
+ PCIECAP_AER_RERR_CMD_CE);
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, val32);
+}
+
+static void phb4_switch_port_init(struct phb *phb,
+ struct pci_device *dev,
+ int ecap, int aercap)
+{
+ uint16_t bdfn = dev->bdfn;
+ uint16_t val16;
+ uint32_t val32;
+
+ // FIXME: update AER settings for phb4
+
+ /* Enable SERR and parity checking and disable INTx */
+ pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
+ val16 |= (PCI_CFG_CMD_PERR_RESP |
+ PCI_CFG_CMD_SERR_EN |
+ PCI_CFG_CMD_INTx_DIS);
+ pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
+
+ /* Disable partity error and enable system error */
+ pci_cfg_read16(phb, bdfn, PCI_CFG_BRCTL, &val16);
+ val16 &= ~PCI_CFG_BRCTL_PERR_RESP_EN;
+ val16 |= PCI_CFG_BRCTL_SERR_EN;
+ pci_cfg_write16(phb, bdfn, PCI_CFG_BRCTL, val16);
+
+ /* Enable reporting various errors */
+ if (!ecap) return;
+ pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
+ val16 |= (PCICAP_EXP_DEVCTL_CE_REPORT |
+ PCICAP_EXP_DEVCTL_NFE_REPORT |
+ PCICAP_EXP_DEVCTL_FE_REPORT);
+ /* HW279570 - Disable reporting of correctable errors */
+ val16 &= ~PCICAP_EXP_DEVCTL_CE_REPORT;
+ pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
+
+ /* Unmask all unrecoverable errors */
+ if (!aercap) return;
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, 0x0);
+
+ /* Severity of unrecoverable errors */
+ if (dev->dev_type == PCIE_TYPE_SWITCH_UPPORT)
+ val32 = (PCIECAP_AER_UE_SEVERITY_DLLP |
+ PCIECAP_AER_UE_SEVERITY_SURPRISE_DOWN |
+ PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
+ PCIECAP_AER_UE_SEVERITY_RECV_OVFLOW |
+ PCIECAP_AER_UE_SEVERITY_MALFORMED_TLP |
+ PCIECAP_AER_UE_SEVERITY_INTERNAL);
+ else
+ val32 = (PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
+ PCIECAP_AER_UE_SEVERITY_INTERNAL);
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, val32);
+
+ /*
+ * Mask various correctable errors
+ */
+ val32 = PCIECAP_AER_CE_MASK_ADV_NONFATAL;
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
+
+ /* Enable ECRC generation and disable ECRC check */
+ pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
+ val32 |= PCIECAP_AER_CAPCTL_ECRCG_EN;
+ val32 &= ~PCIECAP_AER_CAPCTL_ECRCC_EN;
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
+}
+
+static void phb4_endpoint_init(struct phb *phb,
+ struct pci_device *dev,
+ int ecap, int aercap)
+{
+ uint16_t bdfn = dev->bdfn;
+ uint16_t val16;
+ uint32_t val32;
+
+ /* Enable SERR and parity checking */
+ pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
+ val16 |= (PCI_CFG_CMD_PERR_RESP |
+ PCI_CFG_CMD_SERR_EN);
+ pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
+
+ /* Enable reporting various errors */
+ if (!ecap) return;
+ pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
+ val16 &= ~PCICAP_EXP_DEVCTL_CE_REPORT;
+ val16 |= (PCICAP_EXP_DEVCTL_NFE_REPORT |
+ PCICAP_EXP_DEVCTL_FE_REPORT |
+ PCICAP_EXP_DEVCTL_UR_REPORT);
+ pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
+
+ /* Enable ECRC generation and check */
+ if (!aercap)
+ return;
+
+ pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
+ val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
+ PCIECAP_AER_CAPCTL_ECRCC_EN);
+ pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
+}
+
+static int64_t phb4_pcicfg_no_dstate(void *dev __unused,
+ struct pci_cfg_reg_filter *pcrf,
+ uint32_t offset, uint32_t len __unused,
+ uint32_t *data __unused, bool write)
+{
+ uint32_t loff = offset - pcrf->start;
+
+ /* Disable D-state change on children of the PHB. For now we
+ * simply block all writes to the PM control/status
+ */
+ if (write && loff >= 4 && loff < 6)
+ return OPAL_SUCCESS;
+
+ return OPAL_PARTIAL;
+}
+
+void phb4_pec2_dma_engine_realloc(struct phb4 *p)
+{
+ uint64_t reg;
+
+ /*
+ * Allocate 16 extra dma read engines to stack 0, to boost dma
+ * performance for devices on stack 0 of PEC2, i.e PHB3.
+ * It comes at a price of reduced read engine allocation for
+ * devices on stack 1 and 2. The engine allocation becomes
+ * 48/8/8 instead of the default 32/16/16.
+ *
+ * The reallocation magic value should be 0xffff0000ff008000,
+ * but per the PCI designers, dma engine 32 (bit 0) has a
+ * quirk, and 0x7fff80007F008000 has the same effect (engine
+ * 32 goes to PHB4).
+ */
+ if (p->index != 3) /* shared slot on PEC2 */
+ return;
+
+ PHBINF(p, "Allocating an extra 16 dma read engines on PEC2 stack0\n");
+ reg = 0x7fff80007F008000ULL;
+ xscom_write(p->chip_id,
+ p->pci_xscom + XPEC_PCI_PRDSTKOVR, reg);
+ xscom_write(p->chip_id,
+ p->pe_xscom + XPEC_NEST_READ_STACK_OVERRIDE, reg);
+}
+
+static void phb4_check_device_quirks(struct pci_device *dev)
+{
+ /* Some special adapter tweaks for devices directly under the PHB */
+ if (dev->primary_bus != 1)
+ return;
+
+ /* PM quirk */
+ if (!pci_has_cap(dev, PCI_CFG_CAP_ID_PM, false))
+ return;
+
+ pci_add_cfg_reg_filter(dev,
+ pci_cap(dev, PCI_CFG_CAP_ID_PM, false), 8,
+ PCI_REG_FLAG_WRITE,
+ phb4_pcicfg_no_dstate);
+}
+
+static int phb4_device_init(struct phb *phb, struct pci_device *dev,
+ void *data __unused)
+{
+ int ecap, aercap;
+
+ /* Setup special device quirks */
+ phb4_check_device_quirks(dev);
+
+ /* Common initialization for the device */
+ pci_device_init(phb, dev);
+
+ ecap = pci_cap(dev, PCI_CFG_CAP_ID_EXP, false);
+ aercap = pci_cap(dev, PCIECAP_ID_AER, true);
+ if (dev->dev_type == PCIE_TYPE_ROOT_PORT)
+ phb4_root_port_init(phb, dev, ecap, aercap);
+ else if (dev->dev_type == PCIE_TYPE_SWITCH_UPPORT ||
+ dev->dev_type == PCIE_TYPE_SWITCH_DNPORT)
+ phb4_switch_port_init(phb, dev, ecap, aercap);
+ else
+ phb4_endpoint_init(phb, dev, ecap, aercap);
+
+ return 0;
+}
+
+static int64_t phb4_pci_reinit(struct phb *phb, uint64_t scope, uint64_t data)
+{
+ struct pci_device *pd;
+ uint16_t bdfn = data;
+ int ret;
+
+ if (scope != OPAL_REINIT_PCI_DEV)
+ return OPAL_PARAMETER;
+
+ pd = pci_find_dev(phb, bdfn);
+ if (!pd)
+ return OPAL_PARAMETER;
+
+ ret = phb4_device_init(phb, pd, NULL);
+ if (ret)
+ return OPAL_HARDWARE;
+
+ return OPAL_SUCCESS;
+}
+
+/* Default value for MBT0, see comments in init_ioda_cache() */
+static uint64_t phb4_default_mbt0(struct phb4 *p, unsigned int bar_idx)
+{
+ uint64_t mbt0;
+
+ switch (p->mbt_size - bar_idx - 1) {
+ case 0:
+ mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
+ mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 3);
+ break;
+ case 1:
+ mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
+ mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 2);
+ break;
+ case 2:
+ mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
+ mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 1);
+ break;
+ default:
+ mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_PE_SEG);
+ }
+ return mbt0;
+}
+
+/*
+ * Clear the saved (cached) IODA state.
+ *
+ * The caches here are used to save the configuration of the IODA tables
+ * done by the OS. When the PHB is reset it loses all of its internal state
+ * so we need to keep a copy to restore from. This function re-initialises
+ * the saved state to sane defaults.
+ */
+static void phb4_init_ioda_cache(struct phb4 *p)
+{
+ uint32_t i;
+
+ /*
+ * The RTT entries (RTE) are supposed to be initialised to
+ * 0xFF which indicates an invalid PE# for that RTT index
+ * (the bdfn). However, we set them to 0x00 since Linux
+ * needs to find the devices first by scanning config space
+ * and this occurs before PEs have been assigned.
+ */
+ for (i = 0; i < RTT_TABLE_ENTRIES; i++)
+ p->tbl_rtt[i] = cpu_to_be16(PHB4_RESERVED_PE_NUM(p));
+ memset(p->tbl_peltv, 0x0, p->tbl_peltv_size);
+ memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
+
+ /* XXX Should we mask them ? */
+ memset(p->mist_cache, 0x0, sizeof(p->mist_cache));
+
+ /* Configure MBT entries 1...N */
+
+ /* Column 0 is left 0 and will be used fo M32 and configured
+ * by the OS. We use MDT column 1..3 for the last 3 BARs, thus
+ * allowing Linux to remap those, and setup all the other ones
+ * for now in mode 00 (segment# == PE#). By default those
+ * columns are set to map the same way.
+ */
+ for (i = 0; i < p->max_num_pes; i++) {
+ p->mdt_cache[i] = SETFIELD(IODA3_MDT_PE_B, 0ull, i);
+ p->mdt_cache[i] |= SETFIELD(IODA3_MDT_PE_C, 0ull, i);
+ p->mdt_cache[i] |= SETFIELD(IODA3_MDT_PE_D, 0ull, i);
+ }
+
+ /* Initialize MBT entries for BARs 1...N */
+ for (i = 1; i < p->mbt_size; i++) {
+ p->mbt_cache[i][0] = phb4_default_mbt0(p, i);
+ p->mbt_cache[i][1] = 0;
+ }
+
+ /* Initialize M32 bar using MBT entry 0, MDT colunm A */
+ p->mbt_cache[0][0] = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
+ p->mbt_cache[0][0] |= SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0);
+ p->mbt_cache[0][0] |= IODA3_MBT0_TYPE_M32 | (p->mm1_base & IODA3_MBT0_BASE_ADDR);
+ p->mbt_cache[0][1] = IODA3_MBT1_ENABLE | ((~(M32_PCI_SIZE - 1)) & IODA3_MBT1_MASK);
+}
+
+static int64_t phb4_wait_bit(struct phb4 *p, uint32_t reg,
+ uint64_t mask, uint64_t want_val)
+{
+ uint64_t val;
+
+ /* Wait for all pending TCE kills to complete
+ *
+ * XXX Add timeout...
+ */
+ /* XXX SIMICS is nasty... */
+ if ((reg == PHB_TCE_KILL || reg == PHB_DMA_READ_WRITE_SYNC) &&
+ chip_quirk(QUIRK_SIMICS))
+ return OPAL_SUCCESS;
+
+ for (;;) {
+ val = in_be64(p->regs + reg);
+ if (val == 0xffffffffffffffffull) {
+ /* XXX Fenced ? */
+ return OPAL_HARDWARE;
+ }
+ if ((val & mask) == want_val)
+ break;
+
+ }
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_tce_kill(struct phb *phb, uint32_t kill_type,
+ uint64_t pe_number, uint32_t tce_size,
+ uint64_t dma_addr, uint32_t npages)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t val;
+ int64_t rc;
+
+ sync();
+ switch(kill_type) {
+ case OPAL_PCI_TCE_KILL_PAGES:
+ while (npages--) {
+ /* Wait for a slot in the HW kill queue */
+ rc = phb4_wait_bit(p, PHB_TCE_KILL,
+ PHB_TCE_KILL_ALL |
+ PHB_TCE_KILL_PE |
+ PHB_TCE_KILL_ONE, 0);
+ if (rc)
+ return rc;
+ val = SETFIELD(PHB_TCE_KILL_PENUM, dma_addr, pe_number);
+
+ /* Set appropriate page size */
+ switch(tce_size) {
+ case 0x1000:
+ if (dma_addr & 0xf000000000000fffull)
+ return OPAL_PARAMETER;
+ break;
+ case 0x10000:
+ if (dma_addr & 0xf00000000000ffffull)
+ return OPAL_PARAMETER;
+ val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_64K;
+ break;
+ case 0x200000:
+ if (dma_addr & 0xf0000000001fffffull)
+ return OPAL_PARAMETER;
+ val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_2M;
+ break;
+ case 0x40000000:
+ if (dma_addr & 0xf00000003fffffffull)
+ return OPAL_PARAMETER;
+ val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_1G;
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+ /* Perform kill */
+ out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_ONE | val);
+ /* Next page */
+ dma_addr += tce_size;
+ }
+ break;
+ case OPAL_PCI_TCE_KILL_PE:
+ /* Wait for a slot in the HW kill queue */
+ rc = phb4_wait_bit(p, PHB_TCE_KILL,
+ PHB_TCE_KILL_ALL |
+ PHB_TCE_KILL_PE |
+ PHB_TCE_KILL_ONE, 0);
+ if (rc)
+ return rc;
+ /* Perform kill */
+ out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_PE |
+ SETFIELD(PHB_TCE_KILL_PENUM, 0ull, pe_number));
+ break;
+ case OPAL_PCI_TCE_KILL_ALL:
+ /* Wait for a slot in the HW kill queue */
+ rc = phb4_wait_bit(p, PHB_TCE_KILL,
+ PHB_TCE_KILL_ALL |
+ PHB_TCE_KILL_PE |
+ PHB_TCE_KILL_ONE, 0);
+ if (rc)
+ return rc;
+ /* Perform kill */
+ out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_ALL);
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+
+ /* Start DMA sync process */
+ if (is_phb5()){
+ val = in_be64(p->regs + PHB_DMA_READ_WRITE_SYNC) &
+ (PHB_DMA_READ_SYNC_COMPLETE |
+ PHB_DMA_WRITE_SYNC_COMPLETE);
+ out_be64(p->regs + PHB_DMA_READ_WRITE_SYNC,
+ val | PHB_DMA_READ_SYNC_START);
+
+ } else {
+ out_be64(p->regs + PHB_DMA_READ_WRITE_SYNC,
+ PHB_DMA_READ_SYNC_START);
+ }
+
+ /* Wait for kill to complete */
+ rc = phb4_wait_bit(p, PHB_Q_DMA_R, PHB_Q_DMA_R_TCE_KILL_STATUS, 0);
+ if (rc)
+ return rc;
+
+ /* Wait for DMA sync to complete */
+ return phb4_wait_bit(p, PHB_DMA_READ_WRITE_SYNC,
+ PHB_DMA_READ_SYNC_COMPLETE,
+ PHB_DMA_READ_SYNC_COMPLETE);
+}
+
+/* phb4_ioda_reset - Reset the IODA tables
+ *
+ * @purge: If true, the cache is cleared and the cleared values
+ * are applied to HW. If false, the cached values are
+ * applied to HW
+ *
+ * This reset the IODA tables in the PHB. It is called at
+ * initialization time, on PHB reset, and can be called
+ * explicitly from OPAL
+ */
+static int64_t phb4_ioda_reset(struct phb *phb, bool purge)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint32_t i;
+ uint64_t val;
+
+ if (purge) {
+ PHBDBG(p, "Purging all IODA tables...\n");
+ if (phb->slot)
+ phb->slot->link_retries = PHB4_LINK_LINK_RETRIES;
+ phb4_init_ioda_cache(p);
+ }
+
+ /* Init_30..31 - Errata workaround, clear PESTA entry 0 */
+ phb4_ioda_sel(p, IODA3_TBL_PESTA, 0, false);
+ out_be64(p->regs + PHB_IODA_DATA0, 0);
+
+ /* Init_32..33 - MIST */
+ phb4_ioda_sel(p, IODA3_TBL_MIST, 0, true);
+ val = in_be64(p->regs + PHB_IODA_ADDR);
+ val = SETFIELD(PHB_IODA_AD_MIST_PWV, val, 0xf);
+ out_be64(p->regs + PHB_IODA_ADDR, val);
+ for (i = 0; i < (p->num_irqs/4); i++)
+ out_be64(p->regs + PHB_IODA_DATA0, p->mist_cache[i]);
+
+ /* Init_34..35 - MRT */
+ phb4_ioda_sel(p, IODA3_TBL_MRT, 0, true);
+ for (i = 0; i < p->mrt_size; i++)
+ out_be64(p->regs + PHB_IODA_DATA0, 0);
+
+ /* Init_36..37 - TVT */
+ phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
+ for (i = 0; i < p->tvt_size; i++)
+ out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
+
+ /* Init_38..39 - MBT */
+ phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
+ for (i = 0; i < p->mbt_size; i++) {
+ out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
+ out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
+ }
+
+ /* Init_40..41 - MDT */
+ phb4_ioda_sel(p, IODA3_TBL_MDT, 0, true);
+ for (i = 0; i < p->max_num_pes; i++)
+ out_be64(p->regs + PHB_IODA_DATA0, p->mdt_cache[i]);
+
+ /* Additional OPAL specific inits */
+
+ /* Clear PEST & PEEV */
+ for (i = 0; i < p->max_num_pes; i++) {
+ phb4_ioda_sel(p, IODA3_TBL_PESTA, i, false);
+ out_be64(p->regs + PHB_IODA_DATA0, 0);
+ phb4_ioda_sel(p, IODA3_TBL_PESTB, i, false);
+ out_be64(p->regs + PHB_IODA_DATA0, 0);
+ }
+
+ phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
+ for (i = 0; i < p->max_num_pes/64; i++)
+ out_be64(p->regs + PHB_IODA_DATA0, 0);
+
+ /* Invalidate RTE, TCE cache */
+ out_be64(p->regs + PHB_RTC_INVALIDATE, PHB_RTC_INVALIDATE_ALL);
+
+ return phb4_tce_kill(&p->phb, OPAL_PCI_TCE_KILL_ALL, 0, 0, 0, 0);
+}
+
+/*
+ * Clear anything we have in PAPR Error Injection registers. Though
+ * the spec says the PAPR error injection should be one-shot without
+ * the "sticky" bit. However, that's false according to the experiments
+ * I had. So we have to clear it at appropriate point in kernel to
+ * avoid endless frozen PE.
+ */
+static int64_t phb4_papr_errinjct_reset(struct phb *phb)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+
+ out_be64(p->regs + PHB_PAPR_ERR_INJ_CTL, 0x0ul);
+ out_be64(p->regs + PHB_PAPR_ERR_INJ_ADDR, 0x0ul);
+ out_be64(p->regs + PHB_PAPR_ERR_INJ_MASK, 0x0ul);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_set_phb_mem_window(struct phb *phb,
+ uint16_t window_type,
+ uint16_t window_num,
+ uint64_t addr,
+ uint64_t pci_addr __unused,
+ uint64_t size)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t mbt0, mbt1;
+
+ /*
+ * We have a unified MBT for all BARs on PHB4.
+ *
+ * So we use it as follow:
+ *
+ * - M32 is hard wired to be MBT[0] and uses MDT column 0
+ * for remapping.
+ *
+ * - MBT[1..n] are available to the OS, currently only as
+ * fully segmented or single PE (we don't yet expose the
+ * new segmentation modes).
+ *
+ * - We configure the 3 last BARs to columnt 1..3 initially
+ * set to segment# == PE#. We will need to provide some
+ * extensions to the existing APIs to enable remapping of
+ * segments on those BARs (and only those) as the current
+ * API forces single segment mode.
+ */
+ switch (window_type) {
+ case OPAL_IO_WINDOW_TYPE:
+ case OPAL_M32_WINDOW_TYPE:
+ return OPAL_UNSUPPORTED;
+ case OPAL_M64_WINDOW_TYPE:
+ if (window_num == 0 || window_num >= p->mbt_size) {
+ PHBERR(p, "%s: Invalid window %d\n",
+ __func__, window_num);
+ return OPAL_PARAMETER;
+ }
+
+ mbt0 = p->mbt_cache[window_num][0];
+ mbt1 = p->mbt_cache[window_num][1];
+
+ /* XXX For now we assume the 4K minimum alignment,
+ * todo: check with the HW folks what the exact limits
+ * are based on the segmentation model.
+ */
+ if ((addr & 0xFFFul) || (size & 0xFFFul)) {
+ PHBERR(p, "%s: Bad addr/size alignment %llx/%llx\n",
+ __func__, addr, size);
+ return OPAL_PARAMETER;
+ }
+
+ /* size should be 2^N */
+ if (!size || size & (size-1)) {
+ PHBERR(p, "%s: size not a power of 2: %llx\n",
+ __func__, size);
+ return OPAL_PARAMETER;
+ }
+
+ /* address should be size aligned */
+ if (addr & (size - 1)) {
+ PHBERR(p, "%s: addr not size aligned %llx/%llx\n",
+ __func__, addr, size);
+ return OPAL_PARAMETER;
+ }
+
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+
+ /* The BAR shouldn't be enabled yet */
+ if (mbt0 & IODA3_MBT0_ENABLE)
+ return OPAL_PARTIAL;
+
+ /* Apply the settings */
+ mbt0 = SETFIELD(IODA3_MBT0_BASE_ADDR, mbt0, addr >> 12);
+ mbt1 = SETFIELD(IODA3_MBT1_MASK, mbt1, ~((size >> 12) -1));
+ p->mbt_cache[window_num][0] = mbt0;
+ p->mbt_cache[window_num][1] = mbt1;
+
+ return OPAL_SUCCESS;
+}
+
+/*
+ * For one specific M64 BAR, it can be shared by all PEs,
+ * or owned by single PE exclusively.
+ */
+static int64_t phb4_phb_mmio_enable(struct phb __unused *phb,
+ uint16_t window_type,
+ uint16_t window_num,
+ uint16_t enable)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t mbt0, mbt1, base, mask;
+
+ /*
+ * By design, PHB4 doesn't support IODT any more.
+ * Besides, we can't enable M32 BAR as well. So
+ * the function is used to do M64 mapping and each
+ * BAR is supposed to be shared by all PEs.
+ *
+ * TODO: Add support for some of the new PHB4 split modes
+ */
+ switch (window_type) {
+ case OPAL_IO_WINDOW_TYPE:
+ case OPAL_M32_WINDOW_TYPE:
+ return OPAL_UNSUPPORTED;
+ case OPAL_M64_WINDOW_TYPE:
+ /* Window 0 is reserved for M32 */
+ if (window_num == 0 || window_num >= p->mbt_size ||
+ enable > OPAL_ENABLE_M64_NON_SPLIT) {
+ PHBDBG(p,
+ "phb4_phb_mmio_enable wrong args (window %d enable %d)\n",
+ window_num, enable);
+ return OPAL_PARAMETER;
+ }
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+
+ /*
+ * We need check the base/mask while enabling
+ * the M64 BAR. Otherwise, invalid base/mask
+ * might cause fenced AIB unintentionally
+ */
+ mbt0 = p->mbt_cache[window_num][0];
+ mbt1 = p->mbt_cache[window_num][1];
+
+ if (enable == OPAL_DISABLE_M64) {
+ /* Reset the window to disabled & default mode */
+ mbt0 = phb4_default_mbt0(p, window_num);
+ mbt1 = 0;
+ } else {
+ /* Verify that the mode is valid and consistent */
+ if (enable == OPAL_ENABLE_M64_SPLIT) {
+ uint64_t mode = GETFIELD(IODA3_MBT0_MODE, mbt0);
+ if (mode != IODA3_MBT0_MODE_PE_SEG &&
+ mode != IODA3_MBT0_MODE_MDT)
+ return OPAL_PARAMETER;
+ } else if (enable == OPAL_ENABLE_M64_NON_SPLIT) {
+ if (GETFIELD(IODA3_MBT0_MODE, mbt0) !=
+ IODA3_MBT0_MODE_SINGLE_PE)
+ return OPAL_PARAMETER;
+ } else
+ return OPAL_PARAMETER;
+
+ base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbt0);
+ base = (base << 12);
+ mask = GETFIELD(IODA3_MBT1_MASK, mbt1);
+ if (base < p->mm0_base || !mask)
+ return OPAL_PARTIAL;
+
+ mbt0 |= IODA3_MBT0_ENABLE;
+ mbt1 |= IODA3_MBT1_ENABLE;
+ }
+
+ /* Update HW and cache */
+ p->mbt_cache[window_num][0] = mbt0;
+ p->mbt_cache[window_num][1] = mbt1;
+ phb4_ioda_sel(p, IODA3_TBL_MBT, window_num << 1, true);
+ out_be64(p->regs + PHB_IODA_DATA0, mbt0);
+ out_be64(p->regs + PHB_IODA_DATA0, mbt1);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_map_pe_mmio_window(struct phb *phb,
+ uint64_t pe_number,
+ uint16_t window_type,
+ uint16_t window_num,
+ uint16_t segment_num)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t mbt0, mbt1, mdt0;
+
+ if (pe_number >= p->num_pes)
+ return OPAL_PARAMETER;
+
+ /*
+ * We support a combined MDT that has 4 columns. We let the OS
+ * use kernel 0 for M32.
+ *
+ * We configure the 3 last BARs to map column 3..1 which by default
+ * are set to map segment# == pe#, but can be remapped here if we
+ * extend this function.
+ *
+ * The problem is that the current API was "hijacked" so that an
+ * attempt at remapping any segment of an M64 has the effect of
+ * turning it into a single-PE mode BAR. So if we want to support
+ * remapping we'll have to play around this for example by creating
+ * a new API or a new window type...
+ */
+ switch(window_type) {
+ case OPAL_IO_WINDOW_TYPE:
+ return OPAL_UNSUPPORTED;
+ case OPAL_M32_WINDOW_TYPE:
+ if (window_num != 0 || segment_num >= p->num_pes)
+ return OPAL_PARAMETER;
+
+ mdt0 = p->mdt_cache[segment_num];
+ mdt0 = SETFIELD(IODA3_MDT_PE_A, mdt0, pe_number);
+ phb4_ioda_sel(p, IODA3_TBL_MDT, segment_num, false);
+ out_be64(p->regs + PHB_IODA_DATA0, mdt0);
+ break;
+ case OPAL_M64_WINDOW_TYPE:
+ if (window_num == 0 || window_num >= p->mbt_size)
+ return OPAL_PARAMETER;
+
+ mbt0 = p->mbt_cache[window_num][0];
+ mbt1 = p->mbt_cache[window_num][1];
+
+ /* The BAR shouldn't be enabled yet */
+ if (mbt0 & IODA3_MBT0_ENABLE)
+ return OPAL_PARTIAL;
+
+ /* Set to single PE mode and configure the PE */
+ mbt0 = SETFIELD(IODA3_MBT0_MODE, mbt0,
+ IODA3_MBT0_MODE_SINGLE_PE);
+ mbt1 = SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, mbt1, pe_number);
+ p->mbt_cache[window_num][0] = mbt0;
+ p->mbt_cache[window_num][1] = mbt1;
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_map_pe_dma_window(struct phb *phb,
+ uint64_t pe_number,
+ uint16_t window_id,
+ uint16_t tce_levels,
+ uint64_t tce_table_addr,
+ uint64_t tce_table_size,
+ uint64_t tce_page_size)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t tts_encoded;
+ uint64_t data64 = 0;
+
+ /*
+ * We configure the PHB in 2 TVE per PE mode to match phb3.
+ * Current Linux implementation *requires* the two windows per
+ * PE.
+ *
+ * Note: On DD2.0 this is the normal mode of operation.
+ */
+
+ /*
+ * Sanity check. We currently only support "2 window per PE" mode
+ * ie, only bit 59 of the PCI address is used to select the window
+ */
+ if (pe_number >= p->num_pes || (window_id >> 1) != pe_number)
+ return OPAL_PARAMETER;
+
+ /*
+ * tce_table_size == 0 is used to disable an entry, in this case
+ * we ignore other arguments
+ */
+ if (tce_table_size == 0) {
+ phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
+ out_be64(p->regs + PHB_IODA_DATA0, 0);
+ p->tve_cache[window_id] = 0;
+ return OPAL_SUCCESS;
+ }
+
+ /* Additional arguments validation */
+ if (tce_levels < 1 || tce_levels > 5 ||
+ !is_pow2(tce_table_size) ||
+ tce_table_size < 0x1000)
+ return OPAL_PARAMETER;
+
+ /* Encode TCE table size */
+ data64 = SETFIELD(IODA3_TVT_TABLE_ADDR, 0ul, tce_table_addr >> 12);
+ tts_encoded = ilog2(tce_table_size) - 11;
+ if (tts_encoded > 31)
+ return OPAL_PARAMETER;
+ data64 = SETFIELD(IODA3_TVT_TCE_TABLE_SIZE, data64, tts_encoded);
+
+ /* Encode TCE page size */
+ switch (tce_page_size) {
+ case 0x1000: /* 4K */
+ data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 1);
+ break;
+ case 0x10000: /* 64K */
+ data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 5);
+ break;
+ case 0x200000: /* 2M */
+ data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 10);
+ break;
+ case 0x40000000: /* 1G */
+ data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 19);
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+
+ /* Encode number of levels */
+ data64 = SETFIELD(IODA3_TVT_NUM_LEVELS, data64, tce_levels - 1);
+
+ phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
+ out_be64(p->regs + PHB_IODA_DATA0, data64);
+ p->tve_cache[window_id] = data64;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_map_pe_dma_window_real(struct phb *phb,
+ uint64_t pe_number,
+ uint16_t window_id,
+ uint64_t pci_start_addr,
+ uint64_t pci_mem_size)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t end = pci_start_addr + pci_mem_size;
+ uint64_t tve;
+
+ if (pe_number >= p->num_pes ||
+ (window_id >> 1) != pe_number)
+ return OPAL_PARAMETER;
+
+ if (pci_mem_size) {
+ /* Enable */
+
+ /*
+ * Check that the start address has the right TVE index,
+ * we only support the 1 bit mode where each PE has 2
+ * TVEs
+ */
+ if ((pci_start_addr >> 59) != (window_id & 1))
+ return OPAL_PARAMETER;
+ pci_start_addr &= ((1ull << 59) - 1);
+ end = pci_start_addr + pci_mem_size;
+
+ /* We have to be 16M aligned */
+ if ((pci_start_addr & 0x00ffffff) ||
+ (pci_mem_size & 0x00ffffff))
+ return OPAL_PARAMETER;
+
+ /*
+ * It *looks* like this is the max we can support (we need
+ * to verify this. Also we are not checking for rollover,
+ * but then we aren't trying too hard to protect ourselves
+ * againt a completely broken OS.
+ */
+ if (end > 0x0003ffffffffffffull)
+ return OPAL_PARAMETER;
+
+ /*
+ * Put start address bits 49:24 into TVE[52:53]||[0:23]
+ * and end address bits 49:24 into TVE[54:55]||[24:47]
+ * and set TVE[51]
+ */
+ tve = (pci_start_addr << 16) & (0xffffffull << 40);
+ tve |= (pci_start_addr >> 38) & (3ull << 10);
+ tve |= (end >> 8) & (0xfffffful << 16);
+ tve |= (end >> 40) & (3ull << 8);
+ tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
+ } else {
+ /* Disable */
+ tve = 0;
+ }
+
+ phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
+ out_be64(p->regs + PHB_IODA_DATA0, tve);
+ p->tve_cache[window_id] = tve;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_set_option(struct phb *phb, enum OpalPhbOption opt,
+ uint64_t setting)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t data64;
+
+ data64 = phb4_read_reg(p, PHB_CTRLR);
+ switch (opt) {
+ case OPAL_PHB_OPTION_TVE1_4GB:
+ if (setting > 1)
+ return OPAL_PARAMETER;
+
+ PHBDBG(p, "4GB bypass mode = %lld\n", setting);
+ if (setting)
+ data64 |= PPC_BIT(24);
+ else
+ data64 &= ~PPC_BIT(24);
+ break;
+ case OPAL_PHB_OPTION_MMIO_EEH_DISABLE:
+ if (setting > 1)
+ return OPAL_PARAMETER;
+
+ PHBDBG(p, "MMIO EEH Disable = %lld\n", setting);
+ if (setting)
+ data64 |= PPC_BIT(14);
+ else
+ data64 &= ~PPC_BIT(14);
+ break;
+ default:
+ return OPAL_UNSUPPORTED;
+ }
+ phb4_write_reg(p, PHB_CTRLR, data64);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_get_option(struct phb *phb, enum OpalPhbOption opt,
+ __be64 *setting)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t data64;
+
+ data64 = phb4_read_reg(p, PHB_CTRLR);
+ switch (opt) {
+ case OPAL_PHB_OPTION_TVE1_4GB:
+ *setting = cpu_to_be64((data64 & PPC_BIT(24)) ? 1 : 0);
+ break;
+ case OPAL_PHB_OPTION_MMIO_EEH_DISABLE:
+ *setting = cpu_to_be64((data64 & PPC_BIT(14)) ? 1 : 0);
+ break;
+ default:
+ return OPAL_UNSUPPORTED;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_set_ive_pe(struct phb *phb,
+ uint64_t pe_number,
+ uint32_t ive_num)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint32_t mist_idx;
+ uint32_t mist_quad;
+ uint32_t mist_shift;
+ uint64_t val;
+
+ if (pe_number >= p->num_pes || ive_num >= (p->num_irqs - 8))
+ return OPAL_PARAMETER;
+
+ mist_idx = ive_num >> 2;
+ mist_quad = ive_num & 3;
+ mist_shift = (3 - mist_quad) << 4;
+ p->mist_cache[mist_idx] &= ~(0x0fffull << mist_shift);
+ p->mist_cache[mist_idx] |= ((uint64_t)pe_number) << mist_shift;
+
+ /* Note: This has the side effect of clearing P/Q, so this
+ * shouldn't be called while the interrupt is "hot"
+ */
+
+ phb4_ioda_sel(p, IODA3_TBL_MIST, mist_idx, false);
+
+ /* We need to inject the appropriate MIST write enable bit
+ * in the IODA table address register
+ */
+ val = in_be64(p->regs + PHB_IODA_ADDR);
+ val = SETFIELD(PHB_IODA_AD_MIST_PWV, val, 8 >> mist_quad);
+ out_be64(p->regs + PHB_IODA_ADDR, val);
+
+ /* Write entry */
+ out_be64(p->regs + PHB_IODA_DATA0, p->mist_cache[mist_idx]);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_get_msi_32(struct phb *phb,
+ uint64_t pe_number,
+ uint32_t ive_num,
+ uint8_t msi_range,
+ uint32_t *msi_address,
+ uint32_t *message_data)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+
+ /*
+ * Sanity check. We needn't check on mve_number (PE#)
+ * on PHB3 since the interrupt source is purely determined
+ * by its DMA address and data, but the check isn't
+ * harmful.
+ */
+ if (pe_number >= p->num_pes ||
+ ive_num >= (p->num_irqs - 8) ||
+ msi_range != 1 || !msi_address|| !message_data)
+ return OPAL_PARAMETER;
+
+ /*
+ * DMA address and data will form the IVE index.
+ * For more details, please refer to IODA2 spec.
+ */
+ *msi_address = 0xFFFF0000 | ((ive_num << 4) & 0xFFFFFE0F);
+ *message_data = ive_num & 0x1F;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_get_msi_64(struct phb *phb,
+ uint64_t pe_number,
+ uint32_t ive_num,
+ uint8_t msi_range,
+ uint64_t *msi_address,
+ uint32_t *message_data)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+
+ /* Sanity check */
+ if (pe_number >= p->num_pes ||
+ ive_num >= (p->num_irqs - 8) ||
+ msi_range != 1 || !msi_address || !message_data)
+ return OPAL_PARAMETER;
+
+ /*
+ * DMA address and data will form the IVE index.
+ * For more details, please refer to IODA2 spec.
+ */
+ *msi_address = (0x1ul << 60) | ((ive_num << 4) & 0xFFFFFFFFFFFFFE0Ful);
+ *message_data = ive_num & 0x1F;
+
+ return OPAL_SUCCESS;
+}
+
+static void phb4_rc_err_clear(struct phb4 *p)
+{
+ /* Init_47 - Clear errors */
+ phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_SECONDARY_STATUS, 0xffff);
+
+ if (p->ecap <= 0)
+ return;
+
+ phb4_pcicfg_write16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVSTAT,
+ PCICAP_EXP_DEVSTAT_CE |
+ PCICAP_EXP_DEVSTAT_NFE |
+ PCICAP_EXP_DEVSTAT_FE |
+ PCICAP_EXP_DEVSTAT_UE);
+
+ if (p->aercap <= 0)
+ return;
+
+ /* Clear all UE status */
+ phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_UE_STATUS,
+ 0xffffffff);
+ /* Clear all CE status */
+ phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_CE_STATUS,
+ 0xffffffff);
+ /* Clear root error status */
+ phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_RERR_STA,
+ 0xffffffff);
+}
+
+static void phb4_err_clear_regb(struct phb4 *p)
+{
+ uint64_t val64;
+
+ val64 = phb4_read_reg(p, PHB_REGB_ERR_STATUS);
+ phb4_write_reg(p, PHB_REGB_ERR_STATUS, val64);
+ phb4_write_reg(p, PHB_REGB_ERR1_STATUS, 0x0ul);
+ phb4_write_reg(p, PHB_REGB_ERR_LOG_0, 0x0ul);
+ phb4_write_reg(p, PHB_REGB_ERR_LOG_1, 0x0ul);
+}
+
+/*
+ * The function can be called during error recovery for all classes of
+ * errors. This is new to PHB4; previous revisions had separate
+ * sequences for INF/ER/Fatal errors.
+ *
+ * "Rec #" in this function refer to "Recov_#" steps in the
+ * PHB4 INF recovery sequence.
+ */
+static void phb4_err_clear(struct phb4 *p)
+{
+ uint64_t val64;
+ uint64_t fir = phb4_read_reg(p, PHB_LEM_FIR_ACCUM);
+
+ /* Rec 1: Acquire the PCI config lock (we don't need to do this) */
+
+ /* Rec 2...15: Clear error status in RC config space */
+ phb4_rc_err_clear(p);
+
+ /* Rec 16...23: Clear PBL errors */
+ val64 = phb4_read_reg(p, PHB_PBL_ERR_STATUS);
+ phb4_write_reg(p, PHB_PBL_ERR_STATUS, val64);
+ phb4_write_reg(p, PHB_PBL_ERR1_STATUS, 0x0ul);
+ phb4_write_reg(p, PHB_PBL_ERR_LOG_0, 0x0ul);
+ phb4_write_reg(p, PHB_PBL_ERR_LOG_1, 0x0ul);
+
+ /* Rec 24...31: Clear REGB errors */
+ phb4_err_clear_regb(p);
+
+ /* Rec 32...59: Clear PHB error trap */
+ val64 = phb4_read_reg(p, PHB_TXE_ERR_STATUS);
+ phb4_write_reg(p, PHB_TXE_ERR_STATUS, val64);
+ phb4_write_reg(p, PHB_TXE_ERR1_STATUS, 0x0ul);
+ phb4_write_reg(p, PHB_TXE_ERR_LOG_0, 0x0ul);
+ phb4_write_reg(p, PHB_TXE_ERR_LOG_1, 0x0ul);
+
+ val64 = phb4_read_reg(p, PHB_RXE_ARB_ERR_STATUS);
+ phb4_write_reg(p, PHB_RXE_ARB_ERR_STATUS, val64);
+ phb4_write_reg(p, PHB_RXE_ARB_ERR1_STATUS, 0x0ul);
+ phb4_write_reg(p, PHB_RXE_ARB_ERR_LOG_0, 0x0ul);
+ phb4_write_reg(p, PHB_RXE_ARB_ERR_LOG_1, 0x0ul);
+
+ val64 = phb4_read_reg(p, PHB_RXE_MRG_ERR_STATUS);
+ phb4_write_reg(p, PHB_RXE_MRG_ERR_STATUS, val64);
+ phb4_write_reg(p, PHB_RXE_MRG_ERR1_STATUS, 0x0ul);
+ phb4_write_reg(p, PHB_RXE_MRG_ERR_LOG_0, 0x0ul);
+ phb4_write_reg(p, PHB_RXE_MRG_ERR_LOG_1, 0x0ul);
+
+ val64 = phb4_read_reg(p, PHB_RXE_TCE_ERR_STATUS);
+ phb4_write_reg(p, PHB_RXE_TCE_ERR_STATUS, val64);
+ phb4_write_reg(p, PHB_RXE_TCE_ERR1_STATUS, 0x0ul);
+ phb4_write_reg(p, PHB_RXE_TCE_ERR_LOG_0, 0x0ul);
+ phb4_write_reg(p, PHB_RXE_TCE_ERR_LOG_1, 0x0ul);
+
+ val64 = phb4_read_reg(p, PHB_ERR_STATUS);
+ phb4_write_reg(p, PHB_ERR_STATUS, val64);
+ phb4_write_reg(p, PHB_ERR1_STATUS, 0x0ul);
+ phb4_write_reg(p, PHB_ERR_LOG_0, 0x0ul);
+ phb4_write_reg(p, PHB_ERR_LOG_1, 0x0ul);
+
+ /* Rec 61/62: Clear FIR/WOF */
+ phb4_write_reg(p, PHB_LEM_FIR_AND_MASK, ~fir);
+ phb4_write_reg(p, PHB_LEM_WOF, 0x0ul);
+
+ /* Rec 63: Update LEM mask to its initial value */
+ phb4_write_reg(p, PHB_LEM_ERROR_MASK, 0x0ul);
+
+ /* Rec 64: Clear the PCI config lock (we don't need to do this) */
+}
+
+static void phb4_read_phb_status(struct phb4 *p,
+ struct OpalIoPhb4ErrorData *stat)
+{
+ uint32_t i;
+ __be64 *pPEST;
+ uint16_t __16;
+ uint32_t __32;
+ uint64_t __64;
+
+ memset(stat, 0, sizeof(struct OpalIoPhb4ErrorData));
+
+ /* Error data common part */
+ stat->common.version = cpu_to_be32(OPAL_PHB_ERROR_DATA_VERSION_1);
+ stat->common.ioType = cpu_to_be32(OPAL_PHB_ERROR_DATA_TYPE_PHB4);
+ stat->common.len = cpu_to_be32(sizeof(struct OpalIoPhb4ErrorData));
+
+ /* Use ASB for config space if the PHB is fenced */
+ if (p->flags & PHB4_AIB_FENCED)
+ p->flags |= PHB4_CFG_USE_ASB;
+
+ /* Grab RC bridge control, make it 32-bit */
+ phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &__16);
+ stat->brdgCtl = cpu_to_be32(__16);
+
+ /*
+ * Grab various RC PCIe capability registers. All device, slot
+ * and link status are 16-bit, so we grab the pair control+status
+ * for each of them
+ */
+ phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_DEVCTL, &__32);
+ stat->deviceStatus = cpu_to_be32(__32);
+ phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_SLOTCTL, &__32);
+ stat->slotStatus = cpu_to_be32(__32);
+ phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, &__32);
+ stat->linkStatus = cpu_to_be32(__32);
+
+ /*
+ * I assume those are the standard config space header, cmd & status
+ * together makes 32-bit. Secondary status is 16-bit so I'll clear
+ * the top on that one
+ */
+ phb4_pcicfg_read32(&p->phb, 0, PCI_CFG_CMD, &__32);
+ stat->devCmdStatus = cpu_to_be32(__32);
+ phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_SECONDARY_STATUS, &__16);
+ stat->devSecStatus = cpu_to_be32(__16);
+
+ /* Grab a bunch of AER regs */
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_RERR_STA, &__32);
+ stat->rootErrorStatus = cpu_to_be32(__32);
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_UE_STATUS, &__32);
+ stat->uncorrErrorStatus = cpu_to_be32(__32);
+
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_CE_STATUS, &__32);
+ stat->corrErrorStatus = cpu_to_be32(__32);
+
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG0, &__32);
+ stat->tlpHdr1 = cpu_to_be32(__32);
+
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG1, &__32);
+ stat->tlpHdr2 = cpu_to_be32(__32);
+
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG2, &__32);
+ stat->tlpHdr3 = cpu_to_be32(__32);
+
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG3, &__32);
+ stat->tlpHdr4 = cpu_to_be32(__32);
+
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_SRCID, &__32);
+ stat->sourceId = cpu_to_be32(__32);
+
+
+ /* PEC NFIR, same as P8/PHB3 */
+ xscom_read(p->chip_id, p->pe_stk_xscom + 0x0, &__64);
+ stat->nFir = cpu_to_be64(__64);
+ xscom_read(p->chip_id, p->pe_stk_xscom + 0x3, &__64);
+ stat->nFirMask = cpu_to_be64(__64);
+ xscom_read(p->chip_id, p->pe_stk_xscom + 0x8, &__64);
+ stat->nFirWOF = cpu_to_be64(__64);
+
+ /* PHB4 inbound and outbound error Regs */
+ stat->phbPlssr = cpu_to_be64(phb4_read_reg_asb(p, PHB_CPU_LOADSTORE_STATUS));
+ stat->phbCsr = cpu_to_be64(phb4_read_reg_asb(p, PHB_DMA_CHAN_STATUS));
+ stat->lemFir = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_FIR_ACCUM));
+ stat->lemErrorMask = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_ERROR_MASK));
+ stat->lemWOF = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_WOF));
+ stat->phbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_STATUS));
+ stat->phbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR1_STATUS));
+ stat->phbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_LOG_0));
+ stat->phbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_LOG_1));
+ stat->phbTxeErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_STATUS));
+ stat->phbTxeFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR1_STATUS));
+ stat->phbTxeErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_LOG_0));
+ stat->phbTxeErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_LOG_1));
+ stat->phbRxeArbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_STATUS));
+ stat->phbRxeArbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR1_STATUS));
+ stat->phbRxeArbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_LOG_0));
+ stat->phbRxeArbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_LOG_1));
+ stat->phbRxeMrgErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_STATUS));
+ stat->phbRxeMrgFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR1_STATUS));
+ stat->phbRxeMrgErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_LOG_0));
+ stat->phbRxeMrgErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_LOG_1));
+ stat->phbRxeTceErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_STATUS));
+ stat->phbRxeTceFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR1_STATUS));
+ stat->phbRxeTceErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_LOG_0));
+ stat->phbRxeTceErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_LOG_1));
+
+ /* PHB4 REGB error registers */
+ stat->phbPblErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_STATUS));
+ stat->phbPblFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR1_STATUS));
+ stat->phbPblErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_LOG_0));
+ stat->phbPblErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_LOG_1));
+
+ stat->phbPcieDlpErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERR_STATUS));
+ stat->phbPcieDlpErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERRLOG1));
+ stat->phbPcieDlpErrorLog2 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERRLOG2));
+
+ stat->phbRegbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_STATUS));
+ stat->phbRegbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR1_STATUS));
+ stat->phbRegbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_LOG_0));
+ stat->phbRegbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_LOG_1));
+
+ /*
+ * Grab PESTA & B content. The error bit (bit#0) should
+ * be fetched from IODA and the left content from memory
+ * resident tables.
+ */
+ pPEST = (__be64 *)p->tbl_pest;
+ phb4_ioda_sel(p, IODA3_TBL_PESTA, 0, true);
+ for (i = 0; i < p->max_num_pes; i++) {
+ stat->pestA[i] = cpu_to_be64(phb4_read_reg_asb(p, PHB_IODA_DATA0));
+ stat->pestA[i] |= pPEST[2 * i];
+ }
+
+ phb4_ioda_sel(p, IODA3_TBL_PESTB, 0, true);
+ for (i = 0; i < p->max_num_pes; i++) {
+ stat->pestB[i] = cpu_to_be64(phb4_read_reg_asb(p, PHB_IODA_DATA0));
+ stat->pestB[i] |= pPEST[2 * i + 1];
+ }
+}
+
+static void __unused phb4_dump_peltv(struct phb4 *p)
+{
+ int stride = p->max_num_pes / 64;
+ uint64_t *tbl = (void *) p->tbl_peltv;
+ unsigned int pe;
+
+ PHBERR(p, "PELT-V: base addr: %p size: %llx (%d PEs, stride = %d)\n",
+ tbl, p->tbl_peltv_size, p->max_num_pes, stride);
+
+ for (pe = 0; pe < p->max_num_pes; pe++) {
+ unsigned int i, j;
+ uint64_t sum = 0;
+
+ i = pe * stride;
+
+ /*
+ * Only print an entry if there's bits set in the PE's
+ * PELT-V entry. There's a few hundred possible PEs and
+ * generally only a handful will be in use.
+ */
+
+ for (j = 0; j < stride; j++)
+ sum |= tbl[i + j];
+ if (!sum)
+ continue; /* unused PE, skip it */
+
+ if (p->max_num_pes == 512) {
+ PHBERR(p, "PELT-V[%03x] = "
+ "%016llx %016llx %016llx %016llx"
+ "%016llx %016llx %016llx %016llx\n", pe,
+ tbl[i + 0], tbl[i + 1], tbl[i + 2], tbl[i + 3],
+ tbl[i + 4], tbl[i + 5], tbl[i + 6], tbl[i + 7]);
+ } else if (p->max_num_pes == 256) {
+ PHBERR(p, "PELT-V[%03x] = "
+ "%016llx %016llx %016llx %016llx\n", pe,
+ tbl[i + 0], tbl[i + 1], tbl[i + 2], tbl[i + 3]);
+ }
+ }
+}
+
+static void __unused phb4_dump_ioda_table(struct phb4 *p, int table)
+{
+ const char *name;
+ int entries, i;
+
+ switch (table) {
+ case IODA3_TBL_LIST:
+ name = "LIST";
+ entries = 8;
+ break;
+ case IODA3_TBL_MIST:
+ name = "MIST";
+ entries = 1024;
+ break;
+ case IODA3_TBL_RCAM:
+ name = "RCAM";
+ entries = 128;
+ break;
+ case IODA3_TBL_MRT:
+ name = "MRT";
+ entries = 16;
+ break;
+ case IODA3_TBL_PESTA:
+ name = "PESTA";
+ entries = 512;
+ break;
+ case IODA3_TBL_PESTB:
+ name = "PESTB";
+ entries = 512;
+ break;
+ case IODA3_TBL_TVT:
+ name = "TVT";
+ entries = 512;
+ break;
+ case IODA3_TBL_TCAM:
+ name = "TCAM";
+ entries = 1024;
+ break;
+ case IODA3_TBL_TDR:
+ name = "TDR";
+ entries = 1024;
+ break;
+ case IODA3_TBL_MBT: /* special case, see below */
+ name = "MBT";
+ entries = 64;
+ break;
+ case IODA3_TBL_MDT:
+ name = "MDT";
+ entries = 512;
+ break;
+ case IODA3_TBL_PEEV:
+ name = "PEEV";
+ entries = 8;
+ break;
+ default:
+ PHBERR(p, "Invalid IODA table %d!\n", table);
+ return;
+ }
+
+ PHBERR(p, "Start %s dump (only non-zero entries are printed):\n", name);
+
+ phb4_ioda_sel(p, table, 0, true);
+
+ /*
+ * Each entry in the MBT is 16 bytes. Every other table has 8 byte
+ * entries so we special case the MDT to keep the output readable.
+ */
+ if (table == IODA3_TBL_MBT) {
+ for (i = 0; i < 32; i++) {
+ uint64_t v1 = phb4_read_reg_asb(p, PHB_IODA_DATA0);
+ uint64_t v2 = phb4_read_reg_asb(p, PHB_IODA_DATA0);
+
+ if (!v1 && !v2)
+ continue;
+ PHBERR(p, "MBT[%03x] = %016llx %016llx\n", i, v1, v2);
+ }
+ } else {
+ for (i = 0; i < entries; i++) {
+ uint64_t v = phb4_read_reg_asb(p, PHB_IODA_DATA0);
+
+ if (!v)
+ continue;
+ PHBERR(p, "%s[%03x] = %016llx\n", name, i, v);
+ }
+ }
+
+ PHBERR(p, "End %s dump\n", name);
+}
+
+static void phb4_eeh_dump_regs(struct phb4 *p)
+{
+ struct OpalIoPhb4ErrorData *s;
+ uint16_t reg;
+ unsigned int i;
+
+ if (!verbose_eeh)
+ return;
+
+ s = zalloc(sizeof(struct OpalIoPhb4ErrorData));
+ if (!s) {
+ PHBERR(p, "Failed to allocate error info !\n");
+ return;
+ }
+ phb4_read_phb_status(p, s);
+
+ PHBERR(p, " brdgCtl = %08x\n", be32_to_cpu(s->brdgCtl));
+
+ /* PHB4 cfg regs */
+ PHBERR(p, " deviceStatus = %08x\n", be32_to_cpu(s->deviceStatus));
+ PHBERR(p, " slotStatus = %08x\n", be32_to_cpu(s->slotStatus));
+ PHBERR(p, " linkStatus = %08x\n", be32_to_cpu(s->linkStatus));
+ PHBERR(p, " devCmdStatus = %08x\n", be32_to_cpu(s->devCmdStatus));
+ PHBERR(p, " devSecStatus = %08x\n", be32_to_cpu(s->devSecStatus));
+ PHBERR(p, " rootErrorStatus = %08x\n", be32_to_cpu(s->rootErrorStatus));
+ PHBERR(p, " corrErrorStatus = %08x\n", be32_to_cpu(s->corrErrorStatus));
+ PHBERR(p, " uncorrErrorStatus = %08x\n", be32_to_cpu(s->uncorrErrorStatus));
+
+ /* Two non OPAL API registers that are useful */
+ phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVCTL, &reg);
+ PHBERR(p, " devctl = %08x\n", reg);
+ phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVSTAT,
+ &reg);
+ PHBERR(p, " devStat = %08x\n", reg);
+
+ /* Byte swap TLP headers so they are the same as the PCIe spec */
+ PHBERR(p, " tlpHdr1 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr1)));
+ PHBERR(p, " tlpHdr2 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr2)));
+ PHBERR(p, " tlpHdr3 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr3)));
+ PHBERR(p, " tlpHdr4 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr4)));
+ PHBERR(p, " sourceId = %08x\n", be32_to_cpu(s->sourceId));
+ PHBERR(p, " nFir = %016llx\n", be64_to_cpu(s->nFir));
+ PHBERR(p, " nFirMask = %016llx\n", be64_to_cpu(s->nFirMask));
+ PHBERR(p, " nFirWOF = %016llx\n", be64_to_cpu(s->nFirWOF));
+ PHBERR(p, " phbPlssr = %016llx\n", be64_to_cpu(s->phbPlssr));
+ PHBERR(p, " phbCsr = %016llx\n", be64_to_cpu(s->phbCsr));
+ PHBERR(p, " lemFir = %016llx\n", be64_to_cpu(s->lemFir));
+ PHBERR(p, " lemErrorMask = %016llx\n", be64_to_cpu(s->lemErrorMask));
+ PHBERR(p, " lemWOF = %016llx\n", be64_to_cpu(s->lemWOF));
+ PHBERR(p, " phbErrorStatus = %016llx\n", be64_to_cpu(s->phbErrorStatus));
+ PHBERR(p, " phbFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbFirstErrorStatus));
+ PHBERR(p, " phbErrorLog0 = %016llx\n", be64_to_cpu(s->phbErrorLog0));
+ PHBERR(p, " phbErrorLog1 = %016llx\n", be64_to_cpu(s->phbErrorLog1));
+ PHBERR(p, " phbTxeErrorStatus = %016llx\n", be64_to_cpu(s->phbTxeErrorStatus));
+ PHBERR(p, " phbTxeFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbTxeFirstErrorStatus));
+ PHBERR(p, " phbTxeErrorLog0 = %016llx\n", be64_to_cpu(s->phbTxeErrorLog0));
+ PHBERR(p, " phbTxeErrorLog1 = %016llx\n", be64_to_cpu(s->phbTxeErrorLog1));
+ PHBERR(p, " phbRxeArbErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeArbErrorStatus));
+ PHBERR(p, "phbRxeArbFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeArbFirstErrorStatus));
+ PHBERR(p, " phbRxeArbErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeArbErrorLog0));
+ PHBERR(p, " phbRxeArbErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeArbErrorLog1));
+ PHBERR(p, " phbRxeMrgErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorStatus));
+ PHBERR(p, "phbRxeMrgFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeMrgFirstErrorStatus));
+ PHBERR(p, " phbRxeMrgErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorLog0));
+ PHBERR(p, " phbRxeMrgErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorLog1));
+ PHBERR(p, " phbRxeTceErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeTceErrorStatus));
+ PHBERR(p, "phbRxeTceFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeTceFirstErrorStatus));
+ PHBERR(p, " phbRxeTceErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeTceErrorLog0));
+ PHBERR(p, " phbRxeTceErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeTceErrorLog1));
+ PHBERR(p, " phbPblErrorStatus = %016llx\n", be64_to_cpu(s->phbPblErrorStatus));
+ PHBERR(p, " phbPblFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbPblFirstErrorStatus));
+ PHBERR(p, " phbPblErrorLog0 = %016llx\n", be64_to_cpu(s->phbPblErrorLog0));
+ PHBERR(p, " phbPblErrorLog1 = %016llx\n", be64_to_cpu(s->phbPblErrorLog1));
+ PHBERR(p, " phbPcieDlpErrorLog1 = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorLog1));
+ PHBERR(p, " phbPcieDlpErrorLog2 = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorLog2));
+ PHBERR(p, " phbPcieDlpErrorStatus = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorStatus));
+
+ PHBERR(p, " phbRegbErrorStatus = %016llx\n", be64_to_cpu(s->phbRegbErrorStatus));
+ PHBERR(p, " phbRegbFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbRegbFirstErrorStatus));
+ PHBERR(p, " phbRegbErrorLog0 = %016llx\n", be64_to_cpu(s->phbRegbErrorLog0));
+ PHBERR(p, " phbRegbErrorLog1 = %016llx\n", be64_to_cpu(s->phbRegbErrorLog1));
+
+ for (i = 0; i < p->max_num_pes; i++) {
+ if (!s->pestA[i] && !s->pestB[i])
+ continue;
+ PHBERR(p, " PEST[%03x] = %016llx %016llx\n",
+ i, be64_to_cpu(s->pestA[i]), be64_to_cpu(s->pestB[i]));
+ }
+ free(s);
+}
+
+static int64_t phb4_set_pe(struct phb *phb,
+ uint64_t pe_number,
+ uint64_t bdfn,
+ uint8_t bcompare,
+ uint8_t dcompare,
+ uint8_t fcompare,
+ uint8_t action)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t mask, idx;
+
+ /* Sanity check */
+ if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
+ return OPAL_PARAMETER;
+ if (pe_number >= p->num_pes || bdfn > 0xffff ||
+ bcompare > OpalPciBusAll ||
+ dcompare > OPAL_COMPARE_RID_DEVICE_NUMBER ||
+ fcompare > OPAL_COMPARE_RID_FUNCTION_NUMBER)
+ return OPAL_PARAMETER;
+
+ /* match everything by default */
+ mask = 0;
+
+ /* Figure out the RID range */
+ if (bcompare != OpalPciBusAny)
+ mask = ((0x1 << (bcompare + 1)) - 1) << (15 - bcompare);
+
+ if (dcompare == OPAL_COMPARE_RID_DEVICE_NUMBER)
+ mask |= 0xf8;
+
+ if (fcompare == OPAL_COMPARE_RID_FUNCTION_NUMBER)
+ mask |= 0x7;
+
+ if (action == OPAL_UNMAP_PE)
+ pe_number = PHB4_RESERVED_PE_NUM(p);
+
+ /* Map or unmap the RTT range */
+ for (idx = 0; idx < RTT_TABLE_ENTRIES; idx++)
+ if ((idx & mask) == (bdfn & mask))
+ p->tbl_rtt[idx] = cpu_to_be16(pe_number);
+
+ /* Invalidate the RID Translation Cache (RTC) inside the PHB */
+ out_be64(p->regs + PHB_RTC_INVALIDATE, PHB_RTC_INVALIDATE_ALL);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_set_peltv(struct phb *phb,
+ uint32_t parent_pe,
+ uint32_t child_pe,
+ uint8_t state)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint32_t idx, mask;
+
+ /* Sanity check */
+ if (parent_pe >= p->num_pes || child_pe >= p->num_pes)
+ return OPAL_PARAMETER;
+
+ /* Find index for parent PE */
+ idx = parent_pe * (p->max_num_pes / 8);
+ idx += (child_pe / 8);
+ mask = 0x1 << (7 - (child_pe % 8));
+
+ if (state)
+ p->tbl_peltv[idx] |= mask;
+ else
+ p->tbl_peltv[idx] &= ~mask;
+
+ return OPAL_SUCCESS;
+}
+
+static void phb4_prepare_link_change(struct pci_slot *slot, bool is_up)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+ uint32_t reg32;
+
+ p->has_link = is_up;
+
+ if (is_up) {
+ /* Clear AER receiver error status */
+ phb4_pcicfg_write32(&p->phb, 0, p->aercap +
+ PCIECAP_AER_CE_STATUS,
+ PCIECAP_AER_CE_RECVR_ERR);
+ /* Unmask receiver error status in AER */
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap +
+ PCIECAP_AER_CE_MASK, &reg32);
+ reg32 &= ~PCIECAP_AER_CE_RECVR_ERR;
+ phb4_pcicfg_write32(&p->phb, 0, p->aercap +
+ PCIECAP_AER_CE_MASK, reg32);
+
+ /* Don't block PCI-CFG */
+ p->flags &= ~PHB4_CFG_BLOCKED;
+
+ /* Re-enable link down errors */
+ out_be64(p->regs + PHB_PCIE_MISC_STRAP,
+ 0x0000060000000000ull);
+
+ /* Re-enable error status indicators that trigger irqs */
+ out_be64(p->regs + PHB_REGB_ERR_INF_ENABLE,
+ 0x2130006efca8bc00ull);
+ out_be64(p->regs + PHB_REGB_ERR_ERC_ENABLE,
+ 0x0080000000000000ull);
+ out_be64(p->regs + PHB_REGB_ERR_FAT_ENABLE,
+ 0xde0fff91035743ffull);
+
+ } else {
+ /* Mask AER receiver error */
+ phb4_pcicfg_read32(&p->phb, 0, p->aercap +
+ PCIECAP_AER_CE_MASK, &reg32);
+ reg32 |= PCIECAP_AER_CE_RECVR_ERR;
+ phb4_pcicfg_write32(&p->phb, 0, p->aercap +
+ PCIECAP_AER_CE_MASK, reg32);
+
+ /* Clear error link enable & error link down kill enable */
+ out_be64(p->regs + PHB_PCIE_MISC_STRAP, 0);
+
+ /* Disable all error status indicators that trigger irqs */
+ out_be64(p->regs + PHB_REGB_ERR_INF_ENABLE, 0);
+ out_be64(p->regs + PHB_REGB_ERR_ERC_ENABLE, 0);
+ out_be64(p->regs + PHB_REGB_ERR_FAT_ENABLE, 0);
+
+ /* Block PCI-CFG access */
+ p->flags |= PHB4_CFG_BLOCKED;
+ }
+}
+
+static int64_t phb4_get_presence_state(struct pci_slot *slot, uint8_t *val)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+ uint64_t hps, dtctl;
+
+ /* Test for PHB in error state ? */
+ if (p->broken)
+ return OPAL_HARDWARE;
+
+ /* Check hotplug status */
+ hps = in_be64(p->regs + PHB_PCIE_HOTPLUG_STATUS);
+ if (!(hps & PHB_PCIE_HPSTAT_PRESENCE)) {
+ *val = OPAL_PCI_SLOT_PRESENT;
+ } else {
+ /*
+ * If it says not present but link is up, then we assume
+ * we are on a broken simulation environment and still
+ * return a valid presence. Otherwise, not present.
+ */
+ dtctl = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
+ if (dtctl & PHB_PCIE_DLP_TL_LINKACT) {
+ PHBERR(p, "Presence detect 0 but link set !\n");
+ *val = OPAL_PCI_SLOT_PRESENT;
+ } else {
+ *val = OPAL_PCI_SLOT_EMPTY;
+ }
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_get_link_info(struct pci_slot *slot, uint8_t *speed,
+ uint8_t *width)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+ uint64_t reg;
+ uint16_t state;
+ int64_t rc;
+ uint8_t s;
+
+ /* Link is up, let's find the actual speed */
+ reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
+ if (!(reg & PHB_PCIE_DLP_TL_LINKACT)) {
+ *width = 0;
+ if (speed)
+ *speed = 0;
+ return OPAL_SUCCESS;
+ }
+
+ rc = phb4_pcicfg_read16(&p->phb, 0,
+ p->ecap + PCICAP_EXP_LSTAT, &state);
+ if (rc != OPAL_SUCCESS) {
+ PHBERR(p, "%s: Error %lld getting link state\n", __func__, rc);
+ return OPAL_HARDWARE;
+ }
+
+ if (state & PCICAP_EXP_LSTAT_DLLL_ACT) {
+ *width = ((state & PCICAP_EXP_LSTAT_WIDTH) >> 4);
+ s = state & PCICAP_EXP_LSTAT_SPEED;
+ } else {
+ *width = 0;
+ s = 0;
+ }
+
+ if (speed)
+ *speed = s;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_get_link_state(struct pci_slot *slot, uint8_t *val)
+{
+ return phb4_get_link_info(slot, NULL, val);
+}
+
+static int64_t phb4_retry_state(struct pci_slot *slot)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+
+ /* Mark link as down */
+ phb4_prepare_link_change(slot, false);
+
+ /* Last attempt to activate link */
+ if (slot->link_retries == 1) {
+ if (slot->state == PHB4_SLOT_LINK_WAIT) {
+ PHBERR(p, "Falling back to GEN1 training\n");
+ p->max_link_speed = 1;
+ }
+ }
+
+ if (!slot->link_retries--) {
+ switch (slot->state) {
+ case PHB4_SLOT_LINK_WAIT_ELECTRICAL:
+ PHBERR(p, "Presence detected but no electrical link\n");
+ break;
+ case PHB4_SLOT_LINK_WAIT:
+ PHBERR(p, "Electrical link detected but won't train\n");
+ break;
+ case PHB4_SLOT_LINK_STABLE:
+ PHBERR(p, "Linked trained but was degraded or unstable\n");
+ break;
+ default:
+ PHBERR(p, "Unknown link issue\n");
+ }
+ return OPAL_HARDWARE;
+ }
+
+ pci_slot_set_state(slot, PHB4_SLOT_CRESET_START);
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
+}
+
+static uint64_t phb4_train_info(struct phb4 *p, uint64_t reg, unsigned long dt)
+{
+ uint64_t ltssm_state = GETFIELD(PHB_PCIE_DLP_LTSSM_TRC, reg);
+ char s[80];
+
+ snprintf(s, sizeof(s), "TRACE:0x%016llx % 2lims",
+ reg, tb_to_msecs(dt));
+
+ if (reg & PHB_PCIE_DLP_TL_LINKACT)
+ snprintf(s, sizeof(s), "%s trained ", s);
+ else if (reg & PHB_PCIE_DLP_TRAINING)
+ snprintf(s, sizeof(s), "%s training", s);
+ else if (reg & PHB_PCIE_DLP_INBAND_PRESENCE)
+ snprintf(s, sizeof(s), "%s presence", s);
+ else
+ snprintf(s, sizeof(s), "%s ", s);
+
+ snprintf(s, sizeof(s), "%s GEN%lli:x%02lli:", s,
+ GETFIELD(PHB_PCIE_DLP_LINK_SPEED, reg),
+ GETFIELD(PHB_PCIE_DLP_LINK_WIDTH, reg));
+
+ switch (ltssm_state) {
+ case PHB_PCIE_DLP_LTSSM_RESET:
+ snprintf(s, sizeof(s), "%sreset", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_DETECT:
+ snprintf(s, sizeof(s), "%sdetect", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_POLLING:
+ snprintf(s, sizeof(s), "%spolling", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_CONFIG:
+ snprintf(s, sizeof(s), "%sconfig", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_L0:
+ snprintf(s, sizeof(s), "%sL0", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_REC:
+ snprintf(s, sizeof(s), "%srecovery", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_L1:
+ snprintf(s, sizeof(s), "%sL1", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_L2:
+ snprintf(s, sizeof(s), "%sL2", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_HOTRESET:
+ snprintf(s, sizeof(s), "%shotreset", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_DISABLED:
+ snprintf(s, sizeof(s), "%sdisabled", s);
+ break;
+ case PHB_PCIE_DLP_LTSSM_LOOPBACK:
+ snprintf(s, sizeof(s), "%sloopback", s);
+ break;
+ default:
+ snprintf(s, sizeof(s), "%sunvalid", s);
+ }
+ PHBNOTICE(p, "%s\n", s);
+
+ return ltssm_state;
+}
+
+static void phb4_dump_pec_err_regs(struct phb4 *p)
+{
+ uint64_t nfir_p_wof, nfir_n_wof, err_aib;
+ uint64_t err_rpt0, err_rpt1;
+
+ /* Read the PCI and NEST FIRs and dump them. Also cache PCI/NEST FIRs */
+ xscom_read(p->chip_id,
+ p->pci_stk_xscom + XPEC_PCI_STK_PCI_FIR, &p->pfir_cache);
+ xscom_read(p->chip_id,
+ p->pci_stk_xscom + XPEC_PCI_STK_PCI_FIR_WOF, &nfir_p_wof);
+ xscom_read(p->chip_id,
+ p->pe_stk_xscom + XPEC_NEST_STK_PCI_NFIR, &p->nfir_cache);
+ xscom_read(p->chip_id,
+ p->pe_stk_xscom + XPEC_NEST_STK_PCI_NFIR_WOF, &nfir_n_wof);
+ xscom_read(p->chip_id,
+ p->pe_stk_xscom + XPEC_NEST_STK_ERR_RPT0, &err_rpt0);
+ xscom_read(p->chip_id,
+ p->pe_stk_xscom + XPEC_NEST_STK_ERR_RPT1, &err_rpt1);
+ xscom_read(p->chip_id,
+ p->pci_stk_xscom + XPEC_PCI_STK_PBAIB_ERR_REPORT, &err_aib);
+
+ PHBERR(p, " PCI FIR=%016llx\n", p->pfir_cache);
+ PHBERR(p, " PCI FIR WOF=%016llx\n", nfir_p_wof);
+ PHBERR(p, " NEST FIR=%016llx\n", p->nfir_cache);
+ PHBERR(p, " NEST FIR WOF=%016llx\n", nfir_n_wof);
+ PHBERR(p, " ERR RPT0=%016llx\n", err_rpt0);
+ PHBERR(p, " ERR RPT1=%016llx\n", err_rpt1);
+ PHBERR(p, " AIB ERR=%016llx\n", err_aib);
+}
+
+static void phb4_dump_capp_err_regs(struct phb4 *p)
+{
+ uint64_t fir, apc_master_err, snoop_err, transport_err;
+ uint64_t tlbi_err, capp_err_status;
+ uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
+
+ xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
+ xscom_read(p->chip_id, CAPP_APC_MASTER_ERR_RPT + offset,
+ &apc_master_err);
+ xscom_read(p->chip_id, CAPP_SNOOP_ERR_RTP + offset, &snoop_err);
+ xscom_read(p->chip_id, CAPP_TRANSPORT_ERR_RPT + offset, &transport_err);
+ xscom_read(p->chip_id, CAPP_TLBI_ERR_RPT + offset, &tlbi_err);
+ xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &capp_err_status);
+
+ PHBERR(p, " CAPP FIR=%016llx\n", fir);
+ PHBERR(p, "CAPP APC MASTER ERR=%016llx\n", apc_master_err);
+ PHBERR(p, " CAPP SNOOP ERR=%016llx\n", snoop_err);
+ PHBERR(p, " CAPP TRANSPORT ERR=%016llx\n", transport_err);
+ PHBERR(p, " CAPP TLBI ERR=%016llx\n", tlbi_err);
+ PHBERR(p, " CAPP ERR STATUS=%016llx\n", capp_err_status);
+}
+
+/* Check if AIB is fenced via PBCQ NFIR */
+static bool phb4_fenced(struct phb4 *p)
+{
+
+ /* Already fenced ? */
+ if (p->flags & PHB4_AIB_FENCED)
+ return true;
+
+ /*
+ * An all 1's from the PHB indicates a PHB freeze/fence. We
+ * don't really differenciate them at this point.
+ */
+ if (in_be64(p->regs + PHB_CPU_LOADSTORE_STATUS)!= 0xfffffffffffffffful)
+ return false;
+
+ /* Mark ourselves fenced */
+ p->flags |= PHB4_AIB_FENCED;
+
+ PHBERR(p, "PHB Freeze/Fence detected !\n");
+ phb4_dump_pec_err_regs(p);
+
+ /*
+ * dump capp error registers in case phb was fenced due to capp.
+ * Expect p->nfir_cache already updated in phb4_dump_pec_err_regs()
+ */
+ if (p->nfir_cache & XPEC_NEST_STK_PCI_NFIR_CXA_PE_CAPP)
+ phb4_dump_capp_err_regs(p);
+
+ phb4_eeh_dump_regs(p);
+
+ return true;
+}
+
+static bool phb4_check_reg(struct phb4 *p, uint64_t reg)
+{
+ if (reg == 0xffffffffffffffffUL)
+ return !phb4_fenced(p);
+ return true;
+}
+
+static void phb4_get_info(struct phb *phb, uint16_t bdfn, uint8_t *speed,
+ uint8_t *width)
+{
+ int32_t ecap;
+ uint32_t cap;
+
+ ecap = pci_find_cap(phb, bdfn, PCI_CFG_CAP_ID_EXP);
+ pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_LCAP, &cap);
+ *width = (cap & PCICAP_EXP_LCAP_MAXWDTH) >> 4;
+ *speed = cap & PCICAP_EXP_LCAP_MAXSPD;
+}
+
+#define PVR_POWER9_CUMULUS 0x00002000
+
+static bool phb4_chip_retry_workaround(void)
+{
+ unsigned int pvr;
+
+ if (pci_retry_all)
+ return true;
+
+ /* Chips that need this retry are:
+ * - CUMULUS DD1.0
+ * - NIMBUS DD2.0 (and DD1.0, but it is unsupported so no check).
+ */
+ pvr = mfspr(SPR_PVR);
+ if (pvr & PVR_POWER9_CUMULUS) {
+ if ((PVR_VERS_MAJ(pvr) == 1) && (PVR_VERS_MIN(pvr) == 0))
+ return true;
+ } else { /* NIMBUS */
+ if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
+ return true;
+ }
+ return false;
+}
+
+struct pci_card_id {
+ uint16_t vendor;
+ uint16_t device;
+};
+
+static struct pci_card_id retry_allowlist[] = {
+ { 0x1000, 0x005d }, /* LSI Logic MegaRAID SAS-3 3108 */
+ { 0x1000, 0x00c9 }, /* LSI MPT SAS-3 */
+ { 0x104c, 0x8241 }, /* TI xHCI USB */
+ { 0x1077, 0x2261 }, /* QLogic ISP2722-based 16/32Gb FC */
+ { 0x10b5, 0x8725 }, /* PLX Switch: p9dsu, witherspoon */
+ { 0x10b5, 0x8748 }, /* PLX Switch: ZZ */
+ { 0x11f8, 0xf117 }, /* PMC-Sierra/MicroSemi NV1604 */
+ { 0x15b3, 0x1013 }, /* Mellanox ConnectX-4 */
+ { 0x15b3, 0x1017 }, /* Mellanox ConnectX-5 */
+ { 0x15b3, 0x1019 }, /* Mellanox ConnectX-5 Ex */
+ { 0x1a03, 0x1150 }, /* ASPEED AST2500 Switch */
+ { 0x8086, 0x10fb }, /* Intel x520 10G Eth */
+ { 0x9005, 0x028d }, /* MicroSemi PM8069 */
+};
+
+#define VENDOR(vdid) ((vdid) & 0xffff)
+#define DEVICE(vdid) (((vdid) >> 16) & 0xffff)
+
+static bool phb4_adapter_in_allowlist(uint32_t vdid)
+{
+ int i;
+
+ if (pci_retry_all)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(retry_allowlist); i++)
+ if ((retry_allowlist[i].vendor == VENDOR(vdid)) &&
+ (retry_allowlist[i].device == DEVICE(vdid)))
+ return true;
+
+ return false;
+}
+
+static struct pci_card_id lane_eq_disable[] = {
+ { 0x10de, 0x17fd }, /* Nvidia GM200GL [Tesla M40] */
+ { 0x10de, 0x1db4 }, /* Nvidia GV100 */
+};
+
+static bool phb4_lane_eq_retry_allowlist(uint32_t vdid)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lane_eq_disable); i++)
+ if ((lane_eq_disable[i].vendor == VENDOR(vdid)) &&
+ (lane_eq_disable[i].device == DEVICE(vdid)))
+ return true;
+ return false;
+}
+
+static void phb4_lane_eq_change(struct phb4 *p, uint32_t vdid)
+{
+ p->lane_eq_en = !phb4_lane_eq_retry_allowlist(vdid);
+}
+
+static bool phb4_link_optimal(struct pci_slot *slot, uint32_t *vdid)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+ uint64_t reg;
+ uint32_t id;
+ uint16_t bdfn, lane_errs;
+ uint8_t trained_speed, dev_speed, target_speed, rx_errs;
+ uint8_t trained_width, dev_width, target_width;
+ bool optimal_speed, optimal_width, optimal, retry_enabled, rx_err_ok;
+
+
+ /* Current trained state */
+ phb4_get_link_info(slot, &trained_speed, &trained_width);
+
+ /* Get device capability */
+ bdfn = 0x0100; /* bus=1 dev=0 device=0 */
+ /* Since this is the first access, we need to wait for CRS */
+ if (!pci_wait_crs(slot->phb, bdfn , &id))
+ return true;
+ phb4_get_info(slot->phb, bdfn, &dev_speed, &dev_width);
+
+ /* Work out if we are optimally trained */
+ target_speed = MIN(p->max_link_speed, dev_speed);
+ optimal_speed = (trained_speed >= target_speed);
+ target_width = MIN(p->max_link_width, dev_width);
+ optimal_width = (trained_width >= target_width);
+ optimal = optimal_width && optimal_speed;
+ retry_enabled = (phb4_chip_retry_workaround() &&
+ phb4_adapter_in_allowlist(id)) ||
+ phb4_lane_eq_retry_allowlist(id);
+ reg = in_be64(p->regs + PHB_PCIE_DLP_ERR_COUNTERS);
+ rx_errs = GETFIELD(PHB_PCIE_DLP_RX_ERR_CNT, reg);
+ rx_err_ok = (rx_errs < rx_err_max);
+ reg = in_be64(p->regs + PHB_PCIE_DLP_ERR_STATUS);
+ lane_errs = GETFIELD(PHB_PCIE_DLP_LANE_ERR, reg);
+
+ PHBDBG(p, "LINK: Card [%04x:%04x] %s Retry:%s\n", VENDOR(id),
+ DEVICE(id), optimal ? "Optimal" : "Degraded",
+ retry_enabled ? "enabled" : "disabled");
+ PHBDBG(p, "LINK: Speed Train:GEN%i PHB:GEN%i DEV:GEN%i%s\n",
+ trained_speed, p->max_link_speed, dev_speed,
+ optimal_speed ? "" : " *");
+ PHBDBG(p, "LINK: Width Train:x%02i PHB:x%02i DEV:x%02i%s\n",
+ trained_width, p->max_link_width, dev_width,
+ optimal_width ? "" : " *");
+ PHBDBG(p, "LINK: RX Errors Now:%i Max:%i Lane:0x%04x%s\n",
+ rx_errs, rx_err_max, lane_errs, rx_err_ok ? "" : " *");
+
+ if (vdid)
+ *vdid = id;
+
+ /* Always do RX error retry irrespective of chip and card */
+ if (!rx_err_ok)
+ return false;
+
+ if (!retry_enabled)
+ return true;
+
+ return optimal;
+}
+
+/*
+ * This is a trace function to watch what's happening duing pcie link
+ * training. If any errors are detected it simply returns so the
+ * normal code can deal with it.
+ */
+static void phb4_link_trace(struct phb4 *p, uint64_t target_state, int max_ms)
+{
+ unsigned long now, end, start = mftb(), state = 0;
+ uint64_t trwctl, reg, reglast = -1;
+ bool enabled;
+
+ /*
+ * Enable the DLP trace outputs. If we don't the LTSSM state in
+ * PHB_PCIE_DLP_TRAIN_CTL won't be updated and always reads zero.
+ */
+ trwctl = phb4_read_reg(p, PHB_PCIE_DLP_TRWCTL);
+ enabled = !!(trwctl & PHB_PCIE_DLP_TRWCTL_EN);
+ if (!enabled) {
+ phb4_write_reg(p, PHB_PCIE_DLP_TRWCTL,
+ trwctl | PHB_PCIE_DLP_TRWCTL_EN);
+ }
+
+ end = start + msecs_to_tb(max_ms);
+ now = start;
+
+ do {
+ reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
+ if (reg != reglast)
+ state = phb4_train_info(p, reg, now - start);
+ reglast = reg;
+
+ if (!phb4_check_reg(p, reg)) {
+ PHBNOTICE(p, "TRACE: PHB fenced.\n");
+ goto out;
+ }
+
+ if (tb_compare(now, end) == TB_AAFTERB) {
+ PHBNOTICE(p, "TRACE: Timed out after %dms\n", max_ms);
+ goto out;
+ }
+
+ now = mftb();
+ } while (state != target_state);
+
+ PHBNOTICE(p, "TRACE: Reached target state\n");
+
+out:
+ /*
+ * The trace enable bit is a clock gate for the tracing logic. Turn
+ * it off to save power if we're not using it otherwise.
+ */
+ if (!enabled)
+ phb4_write_reg(p, PHB_PCIE_DLP_TRWCTL, trwctl);
+}
+
+/*
+ * This helper is called repeatedly by the host sync notifier mechanism, which
+ * relies on the kernel to regularly poll the OPAL_SYNC_HOST_REBOOT call as it
+ * shuts down.
+ */
+static bool phb4_host_sync_reset(void *data)
+{
+ struct phb4 *p = (struct phb4 *)data;
+ struct phb *phb = &p->phb;
+ int64_t rc = 0;
+
+ /* Make sure no-one modifies the phb flags while we are active */
+ phb_lock(phb);
+
+ /* Make sure CAPP is attached to the PHB */
+ if (p->capp)
+ /* Call phb ops to disable capi */
+ rc = phb->ops->set_capi_mode(phb, OPAL_PHB_CAPI_MODE_PCIE,
+ p->capp->attached_pe);
+ else
+ rc = OPAL_SUCCESS;
+
+ /* Continue kicking state-machine if in middle of a mode transition */
+ if (rc == OPAL_BUSY)
+ rc = phb->slot->ops.run_sm(phb->slot);
+
+ phb_unlock(phb);
+
+ return rc <= OPAL_SUCCESS;
+}
+
+/*
+ * Notification from the pci-core that a pci slot state machine completed.
+ * We use this callback to mark the CAPP disabled if we were waiting for it.
+ */
+static int64_t phb4_slot_sm_run_completed(struct pci_slot *slot, uint64_t err)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+
+ /* Check if we are disabling the capp */
+ if (p->flags & PHB4_CAPP_DISABLE) {
+
+ /* Unset struct capp so that we dont fall into a creset loop */
+ p->flags &= ~(PHB4_CAPP_DISABLE);
+ p->capp->phb = NULL;
+ p->capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
+
+ /* Remove the host sync notifier is we are done.*/
+ opal_del_host_sync_notifier(phb4_host_sync_reset, p);
+ if (err) {
+ /* Force a CEC ipl reboot */
+ disable_fast_reboot("CAPP: reset failed");
+ PHBERR(p, "CAPP: Unable to reset. Error=%lld\n", err);
+ } else {
+ PHBINF(p, "CAPP: reset complete\n");
+ }
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_poll_link(struct pci_slot *slot)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+ uint64_t reg;
+ uint32_t vdid;
+
+ switch (slot->state) {
+ case PHB4_SLOT_NORMAL:
+ case PHB4_SLOT_LINK_START:
+ PHBDBG(p, "LINK: Start polling\n");
+ slot->retries = PHB4_LINK_ELECTRICAL_RETRIES;
+ pci_slot_set_state(slot, PHB4_SLOT_LINK_WAIT_ELECTRICAL);
+ /* Polling early here has no chance of a false positive */
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
+ case PHB4_SLOT_LINK_WAIT_ELECTRICAL:
+ /*
+ * Wait for the link electrical connection to be
+ * established (shorter timeout). This allows us to
+ * workaround spurrious presence detect on some machines
+ * without waiting 10s each time
+ *
+ * Note: We *also* check for the full link up bit here
+ * because simics doesn't seem to implement the electrical
+ * link bit at all
+ */
+ reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
+ if (!phb4_check_reg(p, reg)) {
+ PHBERR(p, "PHB fence waiting for electrical link\n");
+ return phb4_retry_state(slot);
+ }
+
+ if (reg & (PHB_PCIE_DLP_INBAND_PRESENCE |
+ PHB_PCIE_DLP_TL_LINKACT)) {
+ PHBDBG(p, "LINK: Electrical link detected\n");
+ pci_slot_set_state(slot, PHB4_SLOT_LINK_WAIT);
+ slot->retries = PHB4_LINK_WAIT_RETRIES;
+ /* No wait here since already have an elec link */
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
+ }
+
+ if (slot->retries-- == 0) {
+ PHBDBG(p, "LINK: No in-band presence\n");
+ return OPAL_SUCCESS;
+ }
+ /* Retry */
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
+ case PHB4_SLOT_LINK_WAIT:
+ reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
+ if (!phb4_check_reg(p, reg)) {
+ PHBERR(p, "LINK: PHB fence waiting for link training\n");
+ return phb4_retry_state(slot);
+ }
+ if (reg & PHB_PCIE_DLP_TL_LINKACT) {
+ PHBDBG(p, "LINK: Link is up\n");
+ phb4_prepare_link_change(slot, true);
+ pci_slot_set_state(slot, PHB4_SLOT_LINK_STABLE);
+ return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
+ }
+
+ if (slot->retries-- == 0) {
+ PHBERR(p, "LINK: Timeout waiting for link up\n");
+ PHBDBG(p, "LINK: DLP train control: 0x%016llx\n", reg);
+ return phb4_retry_state(slot);
+ }
+ /* Retry */
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
+ case PHB4_SLOT_LINK_STABLE:
+ /* Sanity check link */
+ if (phb4_fenced(p)) {
+ PHBERR(p, "LINK: PHB fenced waiting for stabilty\n");
+ return phb4_retry_state(slot);
+ }
+ reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
+ if (!phb4_check_reg(p, reg)) {
+ PHBERR(p, "LINK: PHB fence reading training control\n");
+ return phb4_retry_state(slot);
+ }
+ if (reg & PHB_PCIE_DLP_TL_LINKACT) {
+ PHBDBG(p, "LINK: Link is stable\n");
+ if (!phb4_link_optimal(slot, &vdid)) {
+ PHBDBG(p, "LINK: Link degraded\n");
+ if (slot->link_retries) {
+ phb4_lane_eq_change(p, vdid);
+ return phb4_retry_state(slot);
+ }
+ /*
+ * Link is degraded but no more retries, so
+ * settle for what we have :-(
+ */
+ PHBERR(p, "LINK: Degraded but no more retries\n");
+ }
+ pci_restore_slot_bus_configs(slot);
+ pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
+ return OPAL_SUCCESS;
+ }
+ PHBERR(p, "LINK: Went down waiting for stabilty\n");
+ PHBDBG(p, "LINK: DLP train control: 0x%016llx\n", reg);
+ return phb4_retry_state(slot);
+ default:
+ PHBERR(p, "LINK: Unexpected slot state %08x\n",
+ slot->state);
+ }
+
+ pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
+ return OPAL_HARDWARE;
+}
+
+static unsigned int phb4_get_max_link_speed(struct phb4 *p, struct dt_node *np)
+{
+ unsigned int max_link_speed, hw_max_link_speed;
+ struct proc_chip *chip;
+ chip = get_chip(p->chip_id);
+
+ hw_max_link_speed = 4;
+ if (is_phb5() && (p->index == 0 || p->index == 3))
+ hw_max_link_speed = 5;
+
+ /* Priority order: NVRAM -> dt -> GEN3 dd2.00 -> hw default */
+ max_link_speed = hw_max_link_speed;
+ if (p->rev == PHB4_REV_NIMBUS_DD20 &&
+ ((0xf & chip->ec_level) == 0) && chip->ec_rev == 0)
+ max_link_speed = 3;
+ if (np) {
+ if (dt_has_node_property(np, "ibm,max-link-speed", NULL)) {
+ max_link_speed = dt_prop_get_u32(np, "ibm,max-link-speed");
+ p->dt_max_link_speed = max_link_speed;
+ }
+ else {
+ p->dt_max_link_speed = 0;
+ }
+ }
+ else {
+ if (p->dt_max_link_speed > 0) {
+ max_link_speed = p->dt_max_link_speed;
+ }
+ }
+ if (pcie_max_link_speed)
+ max_link_speed = pcie_max_link_speed;
+ if (max_link_speed > hw_max_link_speed)
+ max_link_speed = hw_max_link_speed;
+
+ return max_link_speed;
+}
+
+static unsigned int __phb4_get_max_link_width(struct phb4 *p)
+{
+ uint64_t addr, reg;
+ unsigned int lane_config, width = 16;
+
+ /*
+ * On P9, only PEC2 is configurable (no-/bi-/tri-furcation)
+ */
+ switch (p->pec) {
+ case 0:
+ width = 16;
+ break;
+ case 1:
+ width = 8;
+ break;
+ case 2:
+ addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
+ xscom_read(p->chip_id, addr, &reg);
+ lane_config = GETFIELD(XPEC_P9_PCI_LANE_CFG, reg);
+
+ if (lane_config == 0b10 && p->index >= 4)
+ width = 4;
+ else
+ width = 8;
+ }
+ return width;
+}
+
+static unsigned int __phb5_get_max_link_width(struct phb4 *p)
+{
+ uint64_t addr, reg;
+ unsigned int lane_config, width = 16;
+
+ /*
+ * On P10, the 2 PECs are identical and each can have a
+ * different furcation, so we always need to check the PEC
+ * config
+ */
+ addr = XPEC_P10_PCI_CPLT_CONF1 + p->pec * XPEC_PCI_CPLT_OFFSET;
+ xscom_read(p->chip_id, addr, &reg);
+ lane_config = GETFIELD(XPEC_P10_PCI_LANE_CFG, reg);
+
+ switch (lane_config) {
+ case 0b00:
+ width = 16;
+ break;
+ case 0b01:
+ width = 8;
+ break;
+ case 0b10:
+ if (p->index == 0 || p->index == 3)
+ width = 8;
+ else
+ width = 4;
+ break;
+ default:
+ PHBERR(p, "Unexpected PEC lane config value %#x\n",
+ lane_config);
+ }
+ return width;
+}
+
+static unsigned int phb4_get_max_link_width(struct phb4 *p)
+{
+ if (is_phb5())
+ return __phb5_get_max_link_width(p);
+ else
+ return __phb4_get_max_link_width(p);
+}
+
+static void phb4_assert_perst(struct pci_slot *slot, bool assert)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+ uint16_t linkctl;
+ uint64_t reg;
+
+ /*
+ * Disable the link before asserting PERST. The Cursed RAID card
+ * in ozrom1 (9005:028c) has problems coming back if PERST is asserted
+ * while link is active. To work around the problem we assert the link
+ * disable bit before asserting PERST. Asserting the secondary reset
+ * bit in the btctl register also works.
+ */
+ phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, &linkctl);
+ reg = phb4_read_reg(p, PHB_PCIE_CRESET);
+
+ if (assert) {
+ linkctl |= PCICAP_EXP_LCTL_LINK_DIS;
+ reg &= ~PHB_PCIE_CRESET_PERST_N;
+ } else {
+ linkctl &= ~PCICAP_EXP_LCTL_LINK_DIS;
+ reg |= PHB_PCIE_CRESET_PERST_N;
+ }
+
+ phb4_write_reg(p, PHB_PCIE_CRESET, reg);
+ phb4_pcicfg_write16(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, linkctl);
+}
+
+static void set_sys_disable_detect(struct phb4 *p, bool set)
+{
+ uint64_t val;
+
+ val = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
+ if (set)
+ val |= PHB_PCIE_DLP_SYS_DISABLEDETECT;
+ else
+ val &= ~PHB_PCIE_DLP_SYS_DISABLEDETECT;
+ out_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL, val);
+}
+
+static int64_t phb4_hreset(struct pci_slot *slot)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+ uint16_t brctl;
+ uint8_t presence = 1;
+
+ switch (slot->state) {
+ case PHB4_SLOT_NORMAL:
+ PHBDBG(p, "HRESET: Starts\n");
+ if (slot->ops.get_presence_state)
+ slot->ops.get_presence_state(slot, &presence);
+ if (!presence) {
+ PHBDBG(p, "HRESET: No device\n");
+ return OPAL_SUCCESS;
+ }
+
+ /* circumvention for HW551382 */
+ if (is_phb5()) {
+ PHBINF(p, "HRESET: Workaround for HW551382\n");
+ set_sys_disable_detect(p, true);
+ }
+
+ PHBDBG(p, "HRESET: Prepare for link down\n");
+ phb4_prepare_link_change(slot, false);
+ /* fall through */
+ case PHB4_SLOT_HRESET_START:
+ PHBDBG(p, "HRESET: Assert\n");
+
+ phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &brctl);
+ brctl |= PCI_CFG_BRCTL_SECONDARY_RESET;
+ phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_BRCTL, brctl);
+ pci_slot_set_state(slot, PHB4_SLOT_HRESET_DELAY);
+
+ return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
+ case PHB4_SLOT_HRESET_DELAY:
+ PHBDBG(p, "HRESET: Deassert\n");
+
+ /* Clear link errors before we deassert reset */
+ phb4_err_clear_regb(p);
+
+ phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &brctl);
+ brctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET;
+ phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_BRCTL, brctl);
+
+ /*
+ * Due to some oddball adapters bouncing the link
+ * training a couple of times, we wait for a full second
+ * before we start checking the link status, otherwise
+ * we can get a spurrious link down interrupt which
+ * causes us to EEH immediately.
+ */
+ pci_slot_set_state(slot, PHB4_SLOT_HRESET_DELAY2);
+ return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
+ case PHB4_SLOT_HRESET_DELAY2:
+ if (is_phb5())
+ set_sys_disable_detect(p, false);
+ pci_slot_set_state(slot, PHB4_SLOT_LINK_START);
+ return slot->ops.poll_link(slot);
+ default:
+ PHBERR(p, "Unexpected slot state %08x\n", slot->state);
+ }
+
+ pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
+ return OPAL_HARDWARE;
+}
+
+static int64_t phb4_freset(struct pci_slot *slot)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+
+ switch(slot->state) {
+ case PHB4_SLOT_NORMAL:
+ case PHB4_SLOT_FRESET_START:
+ PHBDBG(p, "FRESET: Starts\n");
+
+ /* Reset max link speed for training */
+ p->max_link_speed = phb4_get_max_link_speed(p, NULL);
+
+ PHBDBG(p, "FRESET: Prepare for link down\n");
+ phb4_prepare_link_change(slot, false);
+
+ if (!p->skip_perst) {
+ /* circumvention for HW551382 */
+ if (is_phb5()) {
+ PHBINF(p, "FRESET: Workaround for HW551382\n");
+ set_sys_disable_detect(p, true);
+ }
+
+ PHBDBG(p, "FRESET: Assert\n");
+ phb4_assert_perst(slot, true);
+ pci_slot_set_state(slot, PHB4_SLOT_FRESET_ASSERT_DELAY);
+
+ /* 250ms assert time aligns with powernv */
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
+ }
+
+ /* To skip the assert during boot time */
+ PHBDBG(p, "FRESET: Assert skipped\n");
+ pci_slot_set_state(slot, PHB4_SLOT_FRESET_ASSERT_DELAY);
+ p->skip_perst = false;
+ /* fall through */
+ case PHB4_SLOT_FRESET_ASSERT_DELAY:
+ /* Clear link errors before we deassert PERST */
+ phb4_err_clear_regb(p);
+
+ PHBDBG(p, "FRESET: Deassert\n");
+ phb4_assert_perst(slot, false);
+
+ if (pci_tracing)
+ phb4_link_trace(p, PHB_PCIE_DLP_LTSSM_L0, 3000);
+
+ if (is_phb5())
+ set_sys_disable_detect(p, false);
+
+ pci_slot_set_state(slot, PHB4_SLOT_LINK_START);
+ return slot->ops.poll_link(slot);
+ default:
+ PHBERR(p, "Unexpected slot state %08x\n", slot->state);
+ }
+
+ pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
+ return OPAL_HARDWARE;
+}
+
+static int64_t load_capp_ucode(struct phb4 *p)
+{
+ int64_t rc;
+
+ if (p->index != CAPP0_PHB_INDEX && p->index != CAPP1_PHB_INDEX)
+ return OPAL_HARDWARE;
+
+ /* 0x434150504c494448 = 'CAPPLIDH' in ASCII */
+ rc = capp_load_ucode(p->chip_id, p->phb.opal_id, p->index,
+ 0x434150504c494448UL, PHB4_CAPP_REG_OFFSET(p),
+ CAPP_APC_MASTER_ARRAY_ADDR_REG,
+ CAPP_APC_MASTER_ARRAY_WRITE_REG,
+ CAPP_SNP_ARRAY_ADDR_REG,
+ CAPP_SNP_ARRAY_WRITE_REG);
+ return rc;
+}
+
+static int do_capp_recovery_scoms(struct phb4 *p)
+{
+ uint64_t rc, reg, end;
+ uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
+
+
+ /* Get the status of CAPP recovery */
+ xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
+
+ /* No recovery in progress ignore */
+ if ((reg & PPC_BIT(0)) == 0) {
+ PHBDBG(p, "CAPP: No recovery in progress\n");
+ return OPAL_SUCCESS;
+ }
+
+ PHBDBG(p, "CAPP: Waiting for recovery to complete\n");
+ /* recovery timer failure period 168ms */
+ end = mftb() + msecs_to_tb(168);
+ while ((reg & (PPC_BIT(1) | PPC_BIT(5) | PPC_BIT(9))) == 0) {
+
+ time_wait_ms(5);
+ xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
+
+ if (tb_compare(mftb(), end) != TB_ABEFOREB) {
+ PHBERR(p, "CAPP: Capp recovery Timed-out.\n");
+ end = 0;
+ break;
+ }
+ }
+
+ /* Check if the recovery failed or passed */
+ if (reg & PPC_BIT(1)) {
+ uint64_t act0, act1, mask, fir;
+
+ /* Use the Action0/1 and mask to only clear the bits
+ * that cause local checkstop. Other bits needs attention
+ * of the PRD daemon.
+ */
+ xscom_read(p->chip_id, CAPP_FIR_ACTION0 + offset, &act0);
+ xscom_read(p->chip_id, CAPP_FIR_ACTION1 + offset, &act1);
+ xscom_read(p->chip_id, CAPP_FIR_MASK + offset, &mask);
+ xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
+
+ fir = ~(fir & ~mask & act0 & act1);
+ PHBDBG(p, "Doing CAPP recovery scoms\n");
+
+ /* update capp fir clearing bits causing local checkstop */
+ PHBDBG(p, "Resetting CAPP Fir with mask 0x%016llX\n", fir);
+ xscom_write(p->chip_id, CAPP_FIR_CLEAR + offset, fir);
+
+ /* disable snoops */
+ xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0);
+ load_capp_ucode(p);
+
+ /* clear err rpt reg*/
+ xscom_write(p->chip_id, CAPP_ERR_RPT_CLR + offset, 0);
+
+ /* clear capp fir */
+ xscom_write(p->chip_id, CAPP_FIR + offset, 0);
+
+ /* Just reset Bit-0,1 and dont touch any other bit */
+ xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
+ reg &= ~(PPC_BIT(0) | PPC_BIT(1));
+ xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, reg);
+
+ PHBDBG(p, "CAPP recovery complete\n");
+ rc = OPAL_SUCCESS;
+
+ } else {
+ /* Most likely will checkstop here due to FIR ACTION for
+ * failed recovery. So this message would never be logged.
+ * But if we still enter here then return an error forcing a
+ * fence of the PHB.
+ */
+ if (reg & PPC_BIT(5))
+ PHBERR(p, "CAPP: Capp recovery Failed\n");
+ else if (reg & PPC_BIT(9))
+ PHBERR(p, "CAPP: Capp recovery hang detected\n");
+ else if (end != 0)
+ PHBERR(p, "CAPP: Unknown recovery failure\n");
+
+ PHBDBG(p, "CAPP: Err/Status-reg=0x%016llx\n", reg);
+ rc = OPAL_HARDWARE;
+ }
+
+ return rc;
+}
+
+/*
+ * Disable CAPI mode on a PHB. Must be done while PHB is fenced and
+ * not in recovery.
+ */
+static void disable_capi_mode(struct phb4 *p)
+{
+ uint64_t reg;
+ struct capp *capp = p->capp;
+
+ PHBINF(p, "CAPP: Deactivating\n");
+
+ /* Check if CAPP attached to the PHB and active */
+ if (!capp || capp->phb != &p->phb) {
+ PHBDBG(p, "CAPP: Not attached to this PHB!\n");
+ return;
+ }
+
+ xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, &reg);
+ if (!(reg & PPC_BIT(0))) {
+ /* Not in CAPI mode, no action required */
+ PHBERR(p, "CAPP: Not enabled!\n");
+ return;
+ }
+
+ /* CAPP should already be out of recovery in this function */
+ capp_xscom_read(capp, CAPP_ERR_STATUS_CTRL, &reg);
+ if (reg & PPC_BIT(0)) {
+ PHBERR(p, "CAPP: Can't disable while still in recovery!\n");
+ return;
+ }
+
+ PHBINF(p, "CAPP: Disabling CAPI mode\n");
+
+ /* First Phase Reset CAPP Registers */
+ /* CAPP about to be disabled mark TLBI_FENCED and tlbi_psl_is_dead */
+ capp_xscom_write(capp, CAPP_ERR_STATUS_CTRL, PPC_BIT(3) | PPC_BIT(4));
+
+ /* Flush SUE uOP1 Register */
+ if (p->rev != PHB4_REV_NIMBUS_DD10)
+ capp_xscom_write(capp, FLUSH_SUE_UOP1, 0);
+
+ /* Release DMA/STQ engines */
+ capp_xscom_write(capp, APC_FSM_READ_MASK, 0ull);
+ capp_xscom_write(capp, XPT_FSM_RMM, 0ull);
+
+ /* Disable snoop */
+ capp_xscom_write(capp, SNOOP_CAPI_CONFIG, 0);
+
+ /* Clear flush SUE state map register */
+ capp_xscom_write(capp, FLUSH_SUE_STATE_MAP, 0);
+
+ /* Disable epoch timer */
+ capp_xscom_write(capp, EPOCH_RECOVERY_TIMERS_CTRL, 0);
+
+ /* CAPP Transport Control Register */
+ capp_xscom_write(capp, TRANSPORT_CONTROL, PPC_BIT(15));
+
+ /* Disable snooping */
+ capp_xscom_write(capp, SNOOP_CONTROL, 0);
+ capp_xscom_write(capp, SNOOP_CAPI_CONFIG, 0);
+
+ /* APC Master PB Control Register - disable examining cResps */
+ capp_xscom_write(capp, APC_MASTER_PB_CTRL, 0);
+
+ /* APC Master Config Register - de-select PHBs */
+ xscom_write_mask(p->chip_id, capp->capp_xscom_offset +
+ APC_MASTER_CAPI_CTRL, 0, PPC_BITMASK(2, 3));
+
+ /* Clear all error registers */
+ capp_xscom_write(capp, CAPP_ERR_RPT_CLR, 0);
+ capp_xscom_write(capp, CAPP_FIR, 0);
+ capp_xscom_write(capp, CAPP_FIR_ACTION0, 0);
+ capp_xscom_write(capp, CAPP_FIR_ACTION1, 0);
+ capp_xscom_write(capp, CAPP_FIR_MASK, 0);
+
+ /* Second Phase Reset PEC/PHB Registers */
+
+ /* Reset the stack overrides if any */
+ xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PRDSTKOVR, 0);
+ xscom_write(p->chip_id, p->pe_xscom +
+ XPEC_NEST_READ_STACK_OVERRIDE, 0);
+
+ /* PE Bus AIB Mode Bits. Disable Tracing. Leave HOL Blocking as it is */
+ if (!(p->rev == PHB4_REV_NIMBUS_DD10) && p->index == CAPP1_PHB_INDEX)
+ xscom_write_mask(p->chip_id,
+ p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, 0,
+ PPC_BIT(30));
+
+ /* Reset for PCI to PB data movement */
+ xscom_write_mask(p->chip_id, p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
+ 0, XPEC_NEST_PBCQ_HW_CONFIG_PBINIT);
+
+ /* Disable CAPP mode in PEC CAPP Control Register */
+ xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, 0ull);
+}
+
+static int64_t phb4_creset(struct pci_slot *slot)
+{
+ struct phb4 *p = phb_to_phb4(slot->phb);
+ struct capp *capp = p->capp;
+ uint64_t pbcq_status;
+ uint64_t creset_time, wait_time;
+
+ /* Don't even try fixing a broken PHB */
+ if (p->broken)
+ return OPAL_HARDWARE;
+
+ switch (slot->state) {
+ case PHB4_SLOT_NORMAL:
+ case PHB4_SLOT_CRESET_START:
+ PHBDBG(p, "CRESET: Starts\n");
+
+ p->creset_start_time = mftb();
+
+ /* circumvention for HW551382 */
+ if (is_phb5()) {
+ PHBINF(p, "CRESET: Workaround for HW551382\n");
+ set_sys_disable_detect(p, true);
+ }
+
+ phb4_prepare_link_change(slot, false);
+ /* Clear error inject register, preventing recursive errors */
+ xscom_write(p->chip_id, p->pe_xscom + 0x2, 0x0);
+
+ /* Prevent HMI when PHB gets fenced as we are disabling CAPP */
+ if (p->flags & PHB4_CAPP_DISABLE &&
+ capp && capp->phb == slot->phb) {
+ /* Since no HMI, So set the recovery flag manually. */
+ p->flags |= PHB4_CAPP_RECOVERY;
+ xscom_write_mask(p->chip_id, capp->capp_xscom_offset +
+ CAPP_FIR_MASK,
+ PPC_BIT(31), PPC_BIT(31));
+ }
+
+ /* Force fence on the PHB to work around a non-existent PE */
+ if (!phb4_fenced(p))
+ xscom_write(p->chip_id, p->pe_stk_xscom + 0x2,
+ 0x0000002000000000UL);
+
+ /*
+ * Force use of ASB for register access until the PHB has
+ * been fully reset.
+ */
+ p->flags |= PHB4_CFG_USE_ASB | PHB4_AIB_FENCED;
+
+ /* Assert PREST before clearing errors */
+ phb4_assert_perst(slot, true);
+
+ /* Clear errors, following the proper sequence */
+ phb4_err_clear(p);
+
+ /* Actual reset */
+ p->flags |= PHB4_ETU_IN_RESET;
+ xscom_write(p->chip_id, p->pci_stk_xscom + XPEC_PCI_STK_ETU_RESET,
+ 0x8000000000000000UL);
+
+ /* Read errors in PFIR and NFIR */
+ xscom_read(p->chip_id, p->pci_stk_xscom + 0x0, &p->pfir_cache);
+ xscom_read(p->chip_id, p->pe_stk_xscom + 0x0, &p->nfir_cache);
+
+ pci_slot_set_state(slot, PHB4_SLOT_CRESET_WAIT_CQ);
+ slot->retries = 500;
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
+ case PHB4_SLOT_CRESET_WAIT_CQ:
+
+ // Wait until operations are complete
+ xscom_read(p->chip_id, p->pe_stk_xscom + 0xc, &pbcq_status);
+ if (!(pbcq_status & 0xC000000000000000UL)) {
+ PHBDBG(p, "CRESET: No pending transactions\n");
+
+ /* capp recovery */
+ if ((p->flags & PHB4_CAPP_RECOVERY) &&
+ (do_capp_recovery_scoms(p) != OPAL_SUCCESS))
+ goto error;
+
+ if (p->flags & PHB4_CAPP_DISABLE)
+ disable_capi_mode(p);
+
+ /* Clear errors in PFIR and NFIR */
+ xscom_write(p->chip_id, p->pci_stk_xscom + 0x1,
+ ~p->pfir_cache);
+ xscom_write(p->chip_id, p->pe_stk_xscom + 0x1,
+ ~p->nfir_cache);
+
+ /* Re-read errors in PFIR and NFIR and reset any new
+ * error reported.
+ */
+ xscom_read(p->chip_id, p->pci_stk_xscom +
+ XPEC_PCI_STK_PCI_FIR, &p->pfir_cache);
+ xscom_read(p->chip_id, p->pe_stk_xscom +
+ XPEC_NEST_STK_PCI_NFIR, &p->nfir_cache);
+
+ if (p->pfir_cache || p->nfir_cache) {
+ PHBERR(p, "CRESET: PHB still fenced !!\n");
+ phb4_dump_pec_err_regs(p);
+
+ /* Reset the PHB errors */
+ xscom_write(p->chip_id, p->pci_stk_xscom +
+ XPEC_PCI_STK_PCI_FIR, 0);
+ xscom_write(p->chip_id, p->pe_stk_xscom +
+ XPEC_NEST_STK_PCI_NFIR, 0);
+ }
+
+ /* Clear PHB from reset */
+ xscom_write(p->chip_id,
+ p->pci_stk_xscom + XPEC_PCI_STK_ETU_RESET, 0x0);
+ p->flags &= ~PHB4_ETU_IN_RESET;
+
+ pci_slot_set_state(slot, PHB4_SLOT_CRESET_REINIT);
+ /* After lifting PHB reset, wait while logic settles */
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
+ }
+
+ if (slot->retries-- == 0) {
+ PHBERR(p, "Timeout waiting for pending transaction\n");
+ goto error;
+ }
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(100));
+ case PHB4_SLOT_CRESET_REINIT:
+ PHBDBG(p, "CRESET: Reinitialization\n");
+ p->flags &= ~PHB4_AIB_FENCED;
+ p->flags &= ~PHB4_CAPP_RECOVERY;
+ p->flags &= ~PHB4_CFG_USE_ASB;
+ phb4_init_hw(p);
+ pci_slot_set_state(slot, PHB4_SLOT_CRESET_FRESET);
+
+ /*
+ * The PERST is sticky across resets, but LINK_DIS isn't.
+ * Re-assert it here now that we've reset the PHB.
+ */
+ phb4_assert_perst(slot, true);
+
+ /*
+ * wait either 100ms (for the ETU logic) or until we've had
+ * PERST asserted for 250ms.
+ */
+ creset_time = tb_to_msecs(mftb() - p->creset_start_time);
+ if (creset_time < 250)
+ wait_time = MAX(100, 250 - creset_time);
+ else
+ wait_time = 100;
+ PHBDBG(p, "CRESET: wait_time = %lld\n", wait_time);
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(wait_time));
+
+ case PHB4_SLOT_CRESET_FRESET:
+ /*
+ * We asserted PERST at the beginning of the CRESET and we
+ * have waited long enough, so we can skip it in the freset
+ * procedure.
+ */
+ p->skip_perst = true;
+ pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
+ return slot->ops.freset(slot);
+ default:
+ PHBERR(p, "CRESET: Unexpected slot state %08x, resetting...\n",
+ slot->state);
+ pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
+ return slot->ops.creset(slot);
+
+ }
+
+error:
+ /* Mark the PHB as dead and expect it to be removed */
+ p->broken = true;
+ return OPAL_HARDWARE;
+}
+
+/*
+ * Initialize root complex slot, which is mainly used to
+ * do fundamental reset before PCI enumeration in PCI core.
+ * When probing root complex and building its real slot,
+ * the operations will be copied over.
+ */
+static struct pci_slot *phb4_slot_create(struct phb *phb)
+{
+ struct pci_slot *slot;
+
+ slot = pci_slot_alloc(phb, NULL);
+ if (!slot)
+ return slot;
+
+ /* Elementary functions */
+ slot->ops.get_presence_state = phb4_get_presence_state;
+ slot->ops.get_link_state = phb4_get_link_state;
+ slot->ops.get_power_state = NULL;
+ slot->ops.get_attention_state = NULL;
+ slot->ops.get_latch_state = NULL;
+ slot->ops.set_power_state = NULL;
+ slot->ops.set_attention_state = NULL;
+
+ /*
+ * For PHB slots, we have to split the fundamental reset
+ * into 2 steps. We might not have the first step which
+ * is to power off/on the slot, or it's controlled by
+ * individual platforms.
+ */
+ slot->ops.prepare_link_change = phb4_prepare_link_change;
+ slot->ops.poll_link = phb4_poll_link;
+ slot->ops.hreset = phb4_hreset;
+ slot->ops.freset = phb4_freset;
+ slot->ops.creset = phb4_creset;
+ slot->ops.completed_sm_run = phb4_slot_sm_run_completed;
+ slot->link_retries = PHB4_LINK_LINK_RETRIES;
+
+ return slot;
+}
+
+static void phb4_int_unmask_all(struct phb4 *p)
+{
+ /* Init_126..130 - Re-enable error interrupts */
+ out_be64(p->regs + PHB_ERR_IRQ_ENABLE, 0xca8880cc00000000ull);
+
+ if (is_phb5())
+ out_be64(p->regs + PHB_TXE_ERR_IRQ_ENABLE, 0x200850be08200020ull);
+ else
+ out_be64(p->regs + PHB_TXE_ERR_IRQ_ENABLE, 0x2008400e08200000ull);
+ out_be64(p->regs + PHB_RXE_ARB_ERR_IRQ_ENABLE, 0xc40038fc01804070ull);
+ out_be64(p->regs + PHB_RXE_MRG_ERR_IRQ_ENABLE, 0x00006100008000a8ull);
+ out_be64(p->regs + PHB_RXE_TCE_ERR_IRQ_ENABLE, 0x60510050c0000000ull);
+}
+
+/*
+ * Mask the IRQ for any currently set error bits. This prevents the PHB's ERR
+ * and INF interrupts from being re-fired before the kernel can handle the
+ * underlying condition.
+ */
+static void phb4_int_mask_active(struct phb4 *p)
+{
+ const uint64_t error_regs[] = {
+ PHB_ERR_STATUS,
+ PHB_TXE_ERR_STATUS,
+ PHB_RXE_ARB_ERR_STATUS,
+ PHB_RXE_MRG_ERR_STATUS,
+ PHB_RXE_TCE_ERR_STATUS
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(error_regs); i++) {
+ uint64_t stat, mask;
+
+ /* The IRQ mask reg is always offset 0x20 from the status reg */
+ stat = phb4_read_reg(p, error_regs[i]);
+ mask = phb4_read_reg(p, error_regs[i] + 0x20);
+
+ phb4_write_reg(p, error_regs[i] + 0x20, mask & ~stat);
+ }
+}
+
+static uint64_t phb4_get_pesta(struct phb4 *p, uint64_t pe_number)
+{
+ uint64_t pesta;
+ __be64 *pPEST;
+
+ pPEST = (__be64 *)p->tbl_pest;
+
+ phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
+ pesta = phb4_read_reg(p, PHB_IODA_DATA0);
+ if (pesta & IODA3_PESTA_MMIO_FROZEN)
+ pesta |= be64_to_cpu(pPEST[2*pe_number]);
+
+ return pesta;
+}
+
+/* Check if the chip requires escalating a freeze to fence on MMIO loads */
+static bool phb4_escalation_required(void)
+{
+ uint64_t pvr = mfspr(SPR_PVR);
+
+ /* Only on Power9 */
+ if (proc_gen != proc_gen_p9)
+ return false;
+
+ /*
+ * Escalation is required on the following chip versions:
+ * - Cumulus DD1.0
+ * - Nimbus DD2.0, DD2.1 (and DD1.0, but it is unsupported so no check).
+ */
+ if (pvr & PVR_POWER9_CUMULUS) {
+ if (PVR_VERS_MAJ(pvr) == 1 && PVR_VERS_MIN(pvr) == 0)
+ return true;
+ } else { /* Nimbus */
+ if (PVR_VERS_MAJ(pvr) == 2 && PVR_VERS_MIN(pvr) < 2)
+ return true;
+ }
+
+ return false;
+}
+
+static bool phb4_freeze_escalate(uint64_t pesta)
+{
+ if ((GETFIELD(IODA3_PESTA_TRANS_TYPE, pesta) ==
+ IODA3_PESTA_TRANS_TYPE_MMIOLOAD) &&
+ (pesta & (IODA3_PESTA_CA_CMPLT_TMT | IODA3_PESTA_UR)))
+ return true;
+ return false;
+}
+
+static int64_t phb4_eeh_freeze_status(struct phb *phb, uint64_t pe_number,
+ uint8_t *freeze_state,
+ uint16_t *pci_error_type,
+ uint16_t *severity)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t peev_bit = PPC_BIT(pe_number & 0x3f);
+ uint64_t peev, pesta, pestb;
+
+ /* Defaults: not frozen */
+ *freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
+ *pci_error_type = OPAL_EEH_NO_ERROR;
+
+ /* Check dead */
+ if (p->broken) {
+ *freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
+ *pci_error_type = OPAL_EEH_PHB_ERROR;
+ if (severity)
+ *severity = OPAL_EEH_SEV_PHB_DEAD;
+ return OPAL_HARDWARE;
+ }
+
+ /* Check fence and CAPP recovery */
+ if (phb4_fenced(p) || (p->flags & PHB4_CAPP_RECOVERY)) {
+ *freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
+ *pci_error_type = OPAL_EEH_PHB_ERROR;
+ if (severity)
+ *severity = OPAL_EEH_SEV_PHB_FENCED;
+ return OPAL_SUCCESS;
+ }
+
+ /* Check the PEEV */
+ phb4_ioda_sel(p, IODA3_TBL_PEEV, pe_number / 64, false);
+ peev = in_be64(p->regs + PHB_IODA_DATA0);
+ if (!(peev & peev_bit))
+ return OPAL_SUCCESS;
+
+ /* Indicate that we have an ER pending */
+ phb4_set_err_pending(p, true);
+ if (severity)
+ *severity = OPAL_EEH_SEV_PE_ER;
+
+ /* Read the full PESTA */
+ pesta = phb4_get_pesta(p, pe_number);
+ /* Check if we need to escalate to fence */
+ if (phb4_escalation_required() && phb4_freeze_escalate(pesta)) {
+ PHBERR(p, "Escalating freeze to fence PESTA[%lli]=%016llx\n",
+ pe_number, pesta);
+ *severity = OPAL_EEH_SEV_PHB_FENCED;
+ *pci_error_type = OPAL_EEH_PHB_ERROR;
+ }
+
+ /* Read the PESTB in the PHB */
+ phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
+ pestb = phb4_read_reg(p, PHB_IODA_DATA0);
+
+ /* Convert PESTA/B to freeze_state */
+ if (pesta & IODA3_PESTA_MMIO_FROZEN)
+ *freeze_state |= OPAL_EEH_STOPPED_MMIO_FREEZE;
+ if (pestb & IODA3_PESTB_DMA_STOPPED)
+ *freeze_state |= OPAL_EEH_STOPPED_DMA_FREEZE;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_eeh_freeze_clear(struct phb *phb, uint64_t pe_number,
+ uint64_t eeh_action_token)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t err, peev;
+ int32_t i;
+ bool frozen_pe = false;
+
+ if (p->broken)
+ return OPAL_HARDWARE;
+
+ /* Summary. If nothing, move to clearing the PESTs which can
+ * contain a freeze state from a previous error or simply set
+ * explicitely by the user
+ */
+ err = in_be64(p->regs + PHB_ETU_ERR_SUMMARY);
+ if (err == 0xffffffffffffffffUL) {
+ if (phb4_fenced(p)) {
+ PHBERR(p, "eeh_freeze_clear on fenced PHB\n");
+ return OPAL_HARDWARE;
+ }
+ }
+ if (err != 0)
+ phb4_err_clear(p);
+
+ /*
+ * We have PEEV in system memory. It would give more performance
+ * to access that directly.
+ */
+ if (eeh_action_token & OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO) {
+ phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
+ out_be64(p->regs + PHB_IODA_DATA0, 0);
+ }
+ if (eeh_action_token & OPAL_EEH_ACTION_CLEAR_FREEZE_DMA) {
+ phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
+ out_be64(p->regs + PHB_IODA_DATA0, 0);
+ }
+
+
+ /* Update ER pending indication */
+ phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
+ for (i = 0; i < p->num_pes/64; i++) {
+ peev = in_be64(p->regs + PHB_IODA_DATA0);
+ if (peev) {
+ frozen_pe = true;
+ break;
+ }
+ }
+ if (frozen_pe) {
+ p->err.err_src = PHB4_ERR_SRC_PHB;
+ p->err.err_class = PHB4_ERR_CLASS_ER;
+ p->err.err_bit = -1;
+ phb4_set_err_pending(p, true);
+ } else
+ phb4_set_err_pending(p, false);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_eeh_freeze_set(struct phb *phb, uint64_t pe_number,
+ uint64_t eeh_action_token)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t data;
+
+ if (p->broken)
+ return OPAL_HARDWARE;
+
+ if (pe_number >= p->num_pes)
+ return OPAL_PARAMETER;
+
+ if (eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_MMIO &&
+ eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_DMA &&
+ eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_ALL)
+ return OPAL_PARAMETER;
+
+ if (eeh_action_token & OPAL_EEH_ACTION_SET_FREEZE_MMIO) {
+ phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
+ data = in_be64(p->regs + PHB_IODA_DATA0);
+ data |= IODA3_PESTA_MMIO_FROZEN;
+ out_be64(p->regs + PHB_IODA_DATA0, data);
+ }
+
+ if (eeh_action_token & OPAL_EEH_ACTION_SET_FREEZE_DMA) {
+ phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
+ data = in_be64(p->regs + PHB_IODA_DATA0);
+ data |= IODA3_PESTB_DMA_STOPPED;
+ out_be64(p->regs + PHB_IODA_DATA0, data);
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_eeh_next_error(struct phb *phb,
+ uint64_t *first_frozen_pe,
+ uint16_t *pci_error_type,
+ uint16_t *severity)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t peev, pesta;
+ uint32_t peev_size = p->num_pes/64;
+ int32_t i, j;
+
+ /* If the PHB is broken, we needn't go forward */
+ if (p->broken) {
+ *pci_error_type = OPAL_EEH_PHB_ERROR;
+ *severity = OPAL_EEH_SEV_PHB_DEAD;
+ return OPAL_SUCCESS;
+ }
+
+ if ((p->flags & PHB4_CAPP_RECOVERY)) {
+ *pci_error_type = OPAL_EEH_PHB_ERROR;
+ *severity = OPAL_EEH_SEV_PHB_FENCED;
+ return OPAL_SUCCESS;
+ }
+
+ /*
+ * Check if we already have pending errors. If that's
+ * the case, then to get more information about the
+ * pending errors. Here we try PBCQ prior to PHB.
+ */
+ if (phb4_err_pending(p) /*&&
+ !phb4_err_check_pbcq(p) &&
+ !phb4_err_check_lem(p) */)
+ phb4_set_err_pending(p, false);
+
+ /* Clear result */
+ *pci_error_type = OPAL_EEH_NO_ERROR;
+ *severity = OPAL_EEH_SEV_NO_ERROR;
+ *first_frozen_pe = (uint64_t)-1;
+
+ /* Check frozen PEs */
+ if (!phb4_err_pending(p)) {
+ phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
+ for (i = 0; i < peev_size; i++) {
+ peev = in_be64(p->regs + PHB_IODA_DATA0);
+ if (peev) {
+ p->err.err_src = PHB4_ERR_SRC_PHB;
+ p->err.err_class = PHB4_ERR_CLASS_ER;
+ p->err.err_bit = -1;
+ phb4_set_err_pending(p, true);
+ break;
+ }
+ }
+ }
+
+ if (!phb4_err_pending(p))
+ return OPAL_SUCCESS;
+ /*
+ * If the frozen PE is caused by a malfunctioning TLP, we
+ * need reset the PHB. So convert ER to PHB-fatal error
+ * for the case.
+ */
+ if (p->err.err_class == PHB4_ERR_CLASS_ER) {
+ for (i = peev_size - 1; i >= 0; i--) {
+ phb4_ioda_sel(p, IODA3_TBL_PEEV, i, false);
+ peev = in_be64(p->regs + PHB_IODA_DATA0);
+ for (j = 0; j < 64; j++) {
+ if (peev & PPC_BIT(j)) {
+ *first_frozen_pe = i * 64 + j;
+ break;
+ }
+ }
+ if (*first_frozen_pe != (uint64_t)(-1))
+ break;
+ }
+ }
+
+ if (*first_frozen_pe != (uint64_t)(-1)) {
+ pesta = phb4_get_pesta(p, *first_frozen_pe);
+ if (phb4_escalation_required() && phb4_freeze_escalate(pesta)) {
+ PHBINF(p, "Escalating freeze to fence. PESTA[%lli]=%016llx\n",
+ *first_frozen_pe, pesta);
+ p->err.err_class = PHB4_ERR_CLASS_FENCED;
+ }
+ }
+
+ switch (p->err.err_class) {
+ case PHB4_ERR_CLASS_DEAD:
+ *pci_error_type = OPAL_EEH_PHB_ERROR;
+ *severity = OPAL_EEH_SEV_PHB_DEAD;
+ break;
+ case PHB4_ERR_CLASS_FENCED:
+ *pci_error_type = OPAL_EEH_PHB_ERROR;
+ *severity = OPAL_EEH_SEV_PHB_FENCED;
+ break;
+ case PHB4_ERR_CLASS_ER:
+ *pci_error_type = OPAL_EEH_PE_ERROR;
+ *severity = OPAL_EEH_SEV_PE_ER;
+
+ /* No frozen PE ? */
+ if (*first_frozen_pe == (uint64_t)-1) {
+ *pci_error_type = OPAL_EEH_NO_ERROR;
+ *severity = OPAL_EEH_SEV_NO_ERROR;
+ phb4_set_err_pending(p, false);
+ }
+
+ break;
+ case PHB4_ERR_CLASS_INF:
+ *pci_error_type = OPAL_EEH_PHB_ERROR;
+ *severity = OPAL_EEH_SEV_INF;
+ break;
+ default:
+ *pci_error_type = OPAL_EEH_NO_ERROR;
+ *severity = OPAL_EEH_SEV_NO_ERROR;
+ phb4_set_err_pending(p, false);
+ }
+
+ /*
+ * Unmask all our error interrupts once all pending errors
+ * have been handled.
+ */
+ if (!phb4_err_pending(p))
+ phb4_int_unmask_all(p);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_err_inject_finalize(struct phb4 *phb, uint64_t addr,
+ uint64_t mask, uint64_t ctrl,
+ bool is_write)
+{
+ if (is_write)
+ ctrl |= PHB_PAPR_ERR_INJ_CTL_WR;
+ else
+ ctrl |= PHB_PAPR_ERR_INJ_CTL_RD;
+
+ out_be64(phb->regs + PHB_PAPR_ERR_INJ_ADDR, addr);
+ out_be64(phb->regs + PHB_PAPR_ERR_INJ_MASK, mask);
+ out_be64(phb->regs + PHB_PAPR_ERR_INJ_CTL, ctrl);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t phb4_err_inject_mem32(struct phb4 *phb __unused,
+ uint64_t pe_number __unused,
+ uint64_t addr __unused,
+ uint64_t mask __unused,
+ bool is_write __unused)
+{
+ return OPAL_UNSUPPORTED;
+}
+
+static int64_t phb4_err_inject_mem64(struct phb4 *phb __unused,
+ uint64_t pe_number __unused,
+ uint64_t addr __unused,
+ uint64_t mask __unused,
+ bool is_write __unused)
+{
+ return OPAL_UNSUPPORTED;
+}
+
+static int64_t phb4_err_inject_cfg(struct phb4 *phb, uint64_t pe_number,
+ uint64_t addr, uint64_t mask,
+ bool is_write)
+{
+ uint64_t a, m, prefer, ctrl;
+ int bdfn;
+ bool is_bus_pe = false;
+
+ a = 0xffffull;
+ prefer = 0xffffull;
+ m = PHB_PAPR_ERR_INJ_MASK_CFG_ALL;
+ ctrl = PHB_PAPR_ERR_INJ_CTL_CFG;
+
+ for (bdfn = 0; bdfn < RTT_TABLE_ENTRIES; bdfn++) {
+ if (be16_to_cpu(phb->tbl_rtt[bdfn]) != pe_number)
+ continue;
+
+ /* The PE can be associated with PCI bus or device */
+ is_bus_pe = false;
+ if ((bdfn + 8) < RTT_TABLE_ENTRIES &&
+ be16_to_cpu(phb->tbl_rtt[bdfn + 8]) == pe_number)
+ is_bus_pe = true;
+
+ /* Figure out the PCI config address */
+ if (prefer == 0xffffull) {
+ if (is_bus_pe) {
+ m = PHB_PAPR_ERR_INJ_MASK_CFG;
+ prefer = SETFIELD(m, 0x0ull, PCI_BUS_NUM(bdfn));
+ } else {
+ m = PHB_PAPR_ERR_INJ_MASK_CFG_ALL;
+ prefer = SETFIELD(m, 0x0ull, bdfn);
+ }
+ }
+
+ /* Check the input address is valid or not */
+ if (!is_bus_pe &&
+ GETFIELD(PHB_PAPR_ERR_INJ_MASK_CFG_ALL, addr) == bdfn) {
+ a = addr;
+ break;
+ }
+
+ if (is_bus_pe &&
+ GETFIELD(PHB_PAPR_ERR_INJ_MASK_CFG, addr) == PCI_BUS_NUM(bdfn)) {
+ a = addr;
+ break;
+ }
+ }
+
+ /* Invalid PE number */
+ if (prefer == 0xffffull)
+ return OPAL_PARAMETER;
+
+ /* Specified address is out of range */
+ if (a == 0xffffull)
+ a = prefer;
+ else
+ m = mask;
+
+ return phb4_err_inject_finalize(phb, a, m, ctrl, is_write);
+}
+
+static int64_t phb4_err_inject_dma(struct phb4 *phb __unused,
+ uint64_t pe_number __unused,
+ uint64_t addr __unused,
+ uint64_t mask __unused,
+ bool is_write __unused,
+ bool is_64bits __unused)
+{
+ return OPAL_UNSUPPORTED;
+}
+
+static int64_t phb4_err_inject_dma32(struct phb4 *phb, uint64_t pe_number,
+ uint64_t addr, uint64_t mask,
+ bool is_write)
+{
+ return phb4_err_inject_dma(phb, pe_number, addr, mask, is_write, false);
+}
+
+static int64_t phb4_err_inject_dma64(struct phb4 *phb, uint64_t pe_number,
+ uint64_t addr, uint64_t mask,
+ bool is_write)
+{
+ return phb4_err_inject_dma(phb, pe_number, addr, mask, is_write, true);
+}
+
+
+static int64_t phb4_err_inject(struct phb *phb, uint64_t pe_number,
+ uint32_t type, uint32_t func,
+ uint64_t addr, uint64_t mask)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ int64_t (*handler)(struct phb4 *p, uint64_t pe_number,
+ uint64_t addr, uint64_t mask, bool is_write);
+ bool is_write;
+
+ /* We can't inject error to the reserved PE */
+ if (pe_number == PHB4_RESERVED_PE_NUM(p) || pe_number >= p->num_pes)
+ return OPAL_PARAMETER;
+
+ /* Clear leftover from last time */
+ out_be64(p->regs + PHB_PAPR_ERR_INJ_CTL, 0x0ul);
+
+ switch (func) {
+ case OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR:
+ case OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA:
+ is_write = false;
+ if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
+ handler = phb4_err_inject_mem64;
+ else
+ handler = phb4_err_inject_mem32;
+ break;
+ case OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR:
+ case OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA:
+ is_write = true;
+ if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
+ handler = phb4_err_inject_mem64;
+ else
+ handler = phb4_err_inject_mem32;
+ break;
+ case OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR:
+ case OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA:
+ is_write = false;
+ handler = phb4_err_inject_cfg;
+ break;
+ case OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR:
+ case OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA:
+ is_write = true;
+ handler = phb4_err_inject_cfg;
+ break;
+ case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR:
+ case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA:
+ case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER:
+ case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET:
+ is_write = false;
+ if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
+ handler = phb4_err_inject_dma64;
+ else
+ handler = phb4_err_inject_dma32;
+ break;
+ case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR:
+ case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA:
+ case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER:
+ case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET:
+ is_write = true;
+ if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
+ handler = phb4_err_inject_dma64;
+ else
+ handler = phb4_err_inject_dma32;
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+
+ return handler(p, pe_number, addr, mask, is_write);
+}
+
+static int64_t phb4_get_diag_data(struct phb *phb,
+ void *diag_buffer,
+ uint64_t diag_buffer_len)
+{
+ bool fenced;
+ struct phb4 *p = phb_to_phb4(phb);
+ struct OpalIoPhb4ErrorData *data = diag_buffer;
+
+ if (diag_buffer_len < sizeof(struct OpalIoPhb4ErrorData))
+ return OPAL_PARAMETER;
+ if (p->broken)
+ return OPAL_HARDWARE;
+
+ /*
+ * Dummy check for fence so that phb4_read_phb_status knows
+ * whether to use ASB or AIB
+ */
+ fenced = phb4_fenced(p);
+ phb4_read_phb_status(p, data);
+
+ if (!fenced)
+ phb4_eeh_dump_regs(p);
+
+ /*
+ * We're running to here probably because of errors
+ * (INF class). For that case, we need clear the error
+ * explicitly.
+ */
+ if (phb4_err_pending(p) &&
+ p->err.err_class == PHB4_ERR_CLASS_INF &&
+ p->err.err_src == PHB4_ERR_SRC_PHB) {
+ phb4_err_clear(p);
+ phb4_set_err_pending(p, false);
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static uint64_t tve_encode_50b_noxlate(uint64_t start_addr, uint64_t end_addr)
+{
+ uint64_t tve;
+
+ /*
+ * Put start address bits 49:24 into TVE[52:53]||[0:23]
+ * and end address bits 49:24 into TVE[54:55]||[24:47]
+ * and set TVE[51]
+ */
+ tve = (start_addr << 16) & (0xffffffull << 40);
+ tve |= (start_addr >> 38) & (3ull << 10);
+ tve |= (end_addr >> 8) & (0xfffffful << 16);
+ tve |= (end_addr >> 40) & (3ull << 8);
+ tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
+ return tve;
+}
+
+static bool phb4_is_dd20(struct phb4 *p)
+{
+ struct proc_chip *chip = get_chip(p->chip_id);
+
+ if (p->rev == PHB4_REV_NIMBUS_DD20 && ((0xf & chip->ec_level) == 0))
+ return true;
+ return false;
+}
+
+static int64_t phb4_get_capp_info(int chip_id, struct phb *phb,
+ struct capp_info *info)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint32_t offset;
+
+ /* Not even supposed to be here on P10, but doesn't hurt */
+ if (is_phb5())
+ return OPAL_UNSUPPORTED;
+
+ if (chip_id != p->chip_id)
+ return OPAL_PARAMETER;
+
+ /* Check is CAPP is attached to the PHB */
+ if (p->capp == NULL || p->capp->phb != phb)
+ return OPAL_PARAMETER;
+
+ offset = PHB4_CAPP_REG_OFFSET(p);
+
+ if (p->index == CAPP0_PHB_INDEX)
+ info->capp_index = 0;
+ if (p->index == CAPP1_PHB_INDEX)
+ info->capp_index = 1;
+ info->phb_index = p->index;
+ info->capp_fir_reg = CAPP_FIR + offset;
+ info->capp_fir_mask_reg = CAPP_FIR_MASK + offset;
+ info->capp_fir_action0_reg = CAPP_FIR_ACTION0 + offset;
+ info->capp_fir_action1_reg = CAPP_FIR_ACTION1 + offset;
+ info->capp_err_status_ctrl_reg = CAPP_ERR_STATUS_CTRL + offset;
+
+ return OPAL_SUCCESS;
+}
+
+static void phb4_init_capp_regs(struct phb4 *p, uint32_t capp_eng)
+{
+ uint64_t addr, reg;
+ uint32_t offset;
+ uint8_t link_width_x16 = 1;
+
+ offset = PHB4_CAPP_REG_OFFSET(p);
+
+ /* Calculate the phb link width if card is attached to PEC2 */
+ if (p->index == CAPP1_PHB_INDEX) {
+ /* Check if PEC2 is in x8 or x16 mode.
+ * PEC0 is always in x16
+ */
+ addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
+ xscom_read(p->chip_id, addr, &reg);
+ link_width_x16 = ((reg & XPEC_P9_PCI_IOVALID_MASK) ==
+ XPEC_P9_PCI_IOVALID_X16);
+ }
+
+ /* APC Master PowerBus Control Register */
+ xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, &reg);
+ reg |= PPC_BIT(0); /* enable cResp exam */
+ reg |= PPC_BIT(3); /* disable vg not sys */
+ reg |= PPC_BIT(12);/* HW417025: disable capp virtual machines */
+ reg |= PPC_BIT(2); /* disable nn rn */
+ reg |= PPC_BIT(4); /* disable g */
+ reg |= PPC_BIT(5); /* disable ln */
+ xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
+
+ /* Set PHB mode, HPC Dir State and P9 mode */
+ xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset,
+ 0x1772000000000000UL);
+ PHBINF(p, "CAPP: port attached\n");
+
+ /* Set snoop ttype decoding , dir size to 512K */
+ xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0x9000000000000000UL);
+
+ /* Use Read Epsilon Tier2 for all scopes.
+ * Set Tier2 Read Epsilon.
+ */
+ xscom_read(p->chip_id, SNOOP_CONTROL + offset, &reg);
+ reg |= PPC_BIT(0);
+ reg |= PPC_BIT(35);
+ reg |= PPC_BIT(45);
+ reg |= PPC_BIT(46);
+ reg |= PPC_BIT(47);
+ reg |= PPC_BIT(50);
+ xscom_write(p->chip_id, SNOOP_CONTROL + offset, reg);
+
+ /* Transport Control Register */
+ xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, &reg);
+ if (p->index == CAPP0_PHB_INDEX) {
+ reg |= PPC_BIT(1); /* Send Packet Timer Value */
+ reg |= PPC_BITMASK(10, 13); /* Send Packet Timer Value */
+ reg &= ~PPC_BITMASK(14, 17); /* Set Max LPC CI store buffer to zeros */
+ reg &= ~PPC_BITMASK(18, 21); /* Set Max tlbi divider */
+ if (capp_eng & CAPP_MIN_STQ_ENGINES) {
+ /* 2 CAPP msg engines */
+ reg |= PPC_BIT(58);
+ reg |= PPC_BIT(59);
+ reg |= PPC_BIT(60);
+ }
+ if (capp_eng & CAPP_MAX_STQ_ENGINES) {
+ /* 14 CAPP msg engines */
+ reg |= PPC_BIT(60);
+ }
+ reg |= PPC_BIT(62);
+ }
+ if (p->index == CAPP1_PHB_INDEX) {
+ reg |= PPC_BIT(4); /* Send Packet Timer Value */
+ reg &= ~PPC_BIT(10); /* Set CI Store Buffer Threshold=5 */
+ reg |= PPC_BIT(11); /* Set CI Store Buffer Threshold=5 */
+ reg &= ~PPC_BIT(12); /* Set CI Store Buffer Threshold=5 */
+ reg |= PPC_BIT(13); /* Set CI Store Buffer Threshold=5 */
+ reg &= ~PPC_BITMASK(14, 17); /* Set Max LPC CI store buffer to zeros */
+ reg &= ~PPC_BITMASK(18, 21); /* Set Max tlbi divider */
+ if (capp_eng & CAPP_MIN_STQ_ENGINES) {
+ /* 2 CAPP msg engines */
+ reg |= PPC_BIT(59);
+ reg |= PPC_BIT(60);
+
+ } else if (capp_eng & CAPP_MAX_STQ_ENGINES) {
+
+ if (link_width_x16)
+ /* 14 CAPP msg engines */
+ reg |= PPC_BIT(60) | PPC_BIT(62);
+ else
+ /* 6 CAPP msg engines */
+ reg |= PPC_BIT(60);
+ }
+ }
+ xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
+
+ /* The transport control register needs to be loaded in two
+ * steps. Once the register values have been set, we have to
+ * write bit 63 to a '1', which loads the register values into
+ * the ci store buffer logic.
+ */
+ xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, &reg);
+ reg |= PPC_BIT(63);
+ xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
+
+ /* Enable epoch timer */
+ xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset,
+ 0xC0000000FFF8FFE0UL);
+
+ /* Flush SUE State Map Register */
+ xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset,
+ 0x08020A0000000000UL);
+
+ /* Flush SUE uOP1 Register */
+ xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset,
+ 0xDCE0280428000000);
+
+ /* capp owns PHB read buffers */
+ if (p->index == CAPP0_PHB_INDEX) {
+ /* max PHB read buffers 0-47 */
+ reg = 0xFFFFFFFFFFFF0000UL;
+ if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
+ reg = 0xF000000000000000UL;
+ xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
+ xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
+ }
+ if (p->index == CAPP1_PHB_INDEX) {
+
+ if (capp_eng & CAPP_MAX_DMA_READ_ENGINES) {
+ reg = 0xF000000000000000ULL;
+ } else if (link_width_x16) {
+ /* 0-47 (Read machines) are available for
+ * capp use
+ */
+ reg = 0x0000FFFFFFFFFFFFULL;
+ } else {
+ /* Set 30 Read machines for CAPP Minus
+ * 20-27 for DMA
+ */
+ reg = 0xFFFFF00E00000000ULL;
+ }
+ xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
+ xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
+ }
+
+ /* CAPP FIR Action 0 */
+ xscom_write(p->chip_id, CAPP_FIR_ACTION0 + offset, 0x0b1c000104060000UL);
+
+ /* CAPP FIR Action 1 */
+ xscom_write(p->chip_id, CAPP_FIR_ACTION1 + offset, 0x2b9c0001240E0000UL);
+
+ /* CAPP FIR MASK */
+ xscom_write(p->chip_id, CAPP_FIR_MASK + offset, 0x80031f98d8717000UL);
+
+ /* Mask the CAPP PSL Credit Timeout Register error */
+ xscom_write_mask(p->chip_id, CAPP_FIR_MASK + offset,
+ PPC_BIT(46), PPC_BIT(46));
+
+ /* Deassert TLBI_FENCED and tlbi_psl_is_dead */
+ xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, 0);
+}
+
+/* override some inits with CAPI defaults */
+static void phb4_init_capp_errors(struct phb4 *p)
+{
+ /* Init_77: TXE Error AIB Fence Enable Register */
+ if (phb4_is_dd20(p))
+ out_be64(p->regs + 0x0d30, 0xdfffbf0ff7ddfff0ull);
+ else
+ out_be64(p->regs + 0x0d30, 0xdff7bf0ff7ddfff0ull);
+ /* Init_86: RXE_ARB Error AIB Fence Enable Register */
+ out_be64(p->regs + 0x0db0, 0xfbffd7bbfb7fbfefull);
+
+ /* Init_95: RXE_MRG Error AIB Fence Enable Register */
+ out_be64(p->regs + 0x0e30, 0xfffffeffff7fff57ull);
+
+ /* Init_104: RXE_TCE Error AIB Fence Enable Register */
+ out_be64(p->regs + 0x0eb0, 0xffaeffafffffffffull);
+
+ /* Init_113: PHB Error AIB Fence Enable Register */
+ out_be64(p->regs + 0x0cb0, 0x35777073ff000000ull);
+}
+
+/*
+ * The capi, NBW and ASN indicators are used only on P9 to flag some
+ * types of incoming traffic for the PHB and have been removed on P10.
+ *
+ * The capi indicator is over the 8 most significant bits (and
+ * not 16). We stay away from bits 59 (TVE select), 60 and 61 (MSI)
+ *
+ * For the mask, we keep bit 59 in, as capi messages must hit TVE#0.
+ * Bit 56 is not part of the mask, so that a NBW message (see below)
+ * is also considered a capi message.
+ */
+#define CAPIIND 0x0200
+#define CAPIMASK 0xFE00
+
+/*
+ * Non-Blocking Write messages are a subset of capi messages, so the
+ * indicator is the same as capi + an extra bit (56) to differentiate.
+ * Mask is the same as capi + the extra bit
+ */
+#define NBWIND 0x0300
+#define NBWMASK 0xFF00
+
+/*
+ * The ASN indicator is used for tunneled operations (as_notify and
+ * atomics). Tunneled operation messages can be sent in PCI mode as
+ * well as CAPI mode.
+ *
+ * The format of those messages is specific and, for as_notify
+ * messages, the address field is hijacked to encode the LPID/PID/TID
+ * of the target thread, so those messages should not go through
+ * translation. They must hit TVE#1. Therefore bit 59 is part of the
+ * indicator.
+ */
+#define ASNIND 0x0C00
+#define ASNMASK 0xFF00
+
+/* Power Bus Common Queue Registers
+ * All PBCQ and PBAIB registers are accessed via SCOM
+ * NestBase = 4010C00 for PEC0
+ * 4011000 for PEC1
+ * 4011400 for PEC2
+ * PCIBase = D010800 for PE0
+ * E010800 for PE1
+ * F010800 for PE2
+ *
+ * Some registers are shared amongst all of the stacks and will only
+ * have 1 copy. Other registers are implemented one per stack.
+ * Registers that are duplicated will have an additional offset
+ * of “StackBase” so that they have a unique address.
+ * Stackoffset = 00000040 for Stack0
+ * = 00000080 for Stack1
+ * = 000000C0 for Stack2
+ */
+static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
+ uint32_t capp_eng)
+{
+ uint64_t addr, reg, start_addr, end_addr, stq_eng, dma_eng;
+ uint64_t mbt0, mbt1;
+ int i, window_num = -1;
+
+ /* CAPP Control Register */
+ xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, &reg);
+ if (reg & PPC_BIT(0)) {
+ PHBDBG(p, "Already in CAPP mode\n");
+ }
+
+ for (i = 0; i < 500000; i++) {
+ /* PBCQ General Status Register */
+ xscom_read(p->chip_id,
+ p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_STAT,
+ &reg);
+ if (!(reg & 0xC000000000000000UL))
+ break;
+ time_wait_us(10);
+ }
+ if (reg & 0xC000000000000000UL) {
+ PHBERR(p, "CAPP: Timeout waiting for pending transaction\n");
+ return OPAL_HARDWARE;
+ }
+
+ stq_eng = 0x0000000000000000ULL;
+ dma_eng = 0x0000000000000000ULL;
+ if (p->index == CAPP0_PHB_INDEX) {
+ /* PBCQ is operating as a x16 stack
+ * - The maximum number of engines give to CAPP will be
+ * 14 and will be assigned in the order of STQ 15 to 2.
+ * - 0-47 (Read machines) are available for capp use.
+ */
+ stq_eng = 0x000E000000000000ULL; /* 14 CAPP msg engines */
+ dma_eng = 0x0000FFFFFFFFFFFFULL; /* 48 CAPP Read machines */
+ }
+
+ if (p->index == CAPP1_PHB_INDEX) {
+ /* Check if PEC is in x8 or x16 mode */
+ addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
+ xscom_read(p->chip_id, addr, &reg);
+ if ((reg & XPEC_P9_PCI_IOVALID_MASK) == XPEC_P9_PCI_IOVALID_X16) {
+ /* PBCQ is operating as a x16 stack
+ * - The maximum number of engines give to CAPP will be
+ * 14 and will be assigned in the order of STQ 15 to 2.
+ * - 0-47 (Read machines) are available for capp use.
+ */
+ stq_eng = 0x000E000000000000ULL;
+ dma_eng = 0x0000FFFFFFFFFFFFULL;
+ } else {
+
+ /* PBCQ is operating as a x8 stack
+ * - The maximum number of engines given to CAPP should
+ * be 6 and will be assigned in the order of 7 to 2.
+ * - 0-30 (Read machines) are available for capp use.
+ */
+ stq_eng = 0x0006000000000000ULL;
+ /* 30 Read machines for CAPP Minus 20-27 for DMA */
+ dma_eng = 0x0000FFFFF00E0000ULL;
+ }
+ }
+
+ if (capp_eng & CAPP_MIN_STQ_ENGINES)
+ stq_eng = 0x0002000000000000ULL; /* 2 capp msg engines */
+
+ /* CAPP Control Register. Enable CAPP Mode */
+ reg = 0x8000000000000000ULL; /* PEC works in CAPP Mode */
+ reg |= stq_eng;
+ if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
+ dma_eng = 0x0000F00000000000ULL; /* 4 CAPP Read machines */
+ reg |= dma_eng;
+ xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, reg);
+
+ /* PEC2 has 3 ETU's + 16 pci lanes that can operate as x16,
+ * x8+x8 (bifurcated) or x8+x4+x4 (trifurcated) mode. When
+ * Mellanox CX5 card is attached to stack0 of this PEC, indicated by
+ * request to allocate CAPP_MAX_DMA_READ_ENGINES; we tweak the default
+ * dma-read engines allocations to maximize the DMA read performance
+ */
+ if ((p->index == CAPP1_PHB_INDEX) &&
+ (capp_eng & CAPP_MAX_DMA_READ_ENGINES))
+ phb4_pec2_dma_engine_realloc(p);
+
+ /* PCI to PB data movement ignores the PB init signal. */
+ xscom_write_mask(p->chip_id, p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
+ XPEC_NEST_PBCQ_HW_CONFIG_PBINIT,
+ XPEC_NEST_PBCQ_HW_CONFIG_PBINIT);
+
+ /* If pump mode is enabled don't do nodal broadcasts.
+ */
+ xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, &reg);
+ if (reg & PB_CFG_PUMP_MODE) {
+ reg = XPEC_NEST_PBCQ_HW_CONFIG_DIS_NODAL;
+ reg |= XPEC_NEST_PBCQ_HW_CONFIG_DIS_RNNN;
+ xscom_write_mask(p->chip_id,
+ p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
+ reg, reg);
+ }
+
+ /* PEC Phase 4 (PHB) registers adjustment
+ * Inbound CAPP traffic: The CAPI can send both CAPP packets and
+ * I/O packets. A PCIe packet is indentified as a CAPP packet in
+ * the PHB if the PCIe address matches either the CAPI
+ * Compare/Mask register or its NBW Compare/Mask register.
+ */
+
+ /*
+ * Bit [0:7] XSL_DSNCTL[capiind]
+ * Init_26 - CAPI Compare/Mask
+ */
+ out_be64(p->regs + PHB_CAPI_CMPM,
+ ((u64)CAPIIND << 48) |
+ ((u64)CAPIMASK << 32) | PHB_CAPI_CMPM_ENABLE);
+
+ /* PB AIB Hardware Control Register
+ * Wait 32 PCI clocks for a credit to become available
+ * before rejecting.
+ */
+ xscom_read(p->chip_id, p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, &reg);
+ reg |= PPC_BITMASK(40, 42);
+ if (p->index == CAPP1_PHB_INDEX)
+ reg |= PPC_BIT(30);
+ xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, reg);
+
+ /* non-translate/50-bit mode */
+ out_be64(p->regs + PHB_NXLATE_PREFIX, 0x0000000000000000Ull);
+
+ /* set tve no translate mode allow mmio window */
+ memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
+
+ /*
+ * In 50-bit non-translate mode, the fields of the TVE are
+ * used to perform an address range check. In this mode TCE
+ * Table Size(0) must be a '1' (TVE[51] = 1)
+ * PCI Addr(49:24) >= TVE[52:53]+TVE[0:23] and
+ * PCI Addr(49:24) < TVE[54:55]+TVE[24:47]
+ *
+ * TVE[51] = 1
+ * TVE[56] = 1: 50-bit Non-Translate Mode Enable
+ * TVE[0:23] = 0x000000
+ * TVE[24:47] = 0xFFFFFF
+ *
+ * capi dma mode: CAPP DMA mode needs access to all of memory
+ * capi mode: Allow address range (bit 14 = 1)
+ * 0x0002000000000000: 0x0002FFFFFFFFFFFF
+ * TVE[52:53] = '10' and TVE[54:55] = '10'
+ */
+
+ /* TVT#0: CAPI window + DMA, all memory */
+ start_addr = 0ull;
+ end_addr = 0x0003ffffffffffffull;
+ p->tve_cache[pe_number * 2] =
+ tve_encode_50b_noxlate(start_addr, end_addr);
+
+ /* TVT#1: CAPI window + DMA, all memory, in bypass mode */
+ start_addr = (1ull << 59);
+ end_addr = start_addr + 0x0003ffffffffffffull;
+ p->tve_cache[pe_number * 2 + 1] =
+ tve_encode_50b_noxlate(start_addr, end_addr);
+
+ phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
+ for (i = 0; i < p->tvt_size; i++)
+ out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
+
+ /*
+ * Since TVT#0 is in by-pass mode, disable 32-bit MSI, as a
+ * DMA write targeting 0x00000000FFFFxxxx would be interpreted
+ * as a 32-bit MSI
+ */
+ reg = in_be64(p->regs + PHB_PHB4_CONFIG);
+ reg &= ~PHB_PHB4C_32BIT_MSI_EN;
+ out_be64(p->regs + PHB_PHB4_CONFIG, reg);
+
+ /* set mbt bar to pass capi mmio window and keep the other
+ * mmio values
+ */
+ mbt0 = IODA3_MBT0_ENABLE | IODA3_MBT0_TYPE_M64 |
+ SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
+ SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
+ (0x0002000000000000ULL & IODA3_MBT0_BASE_ADDR);
+
+ mbt1 = IODA3_MBT1_ENABLE |
+ (0x00ff000000000000ULL & IODA3_MBT1_MASK) |
+ SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
+
+ for (i = 0; i < p->mbt_size; i++) {
+ /* search if the capi mmio window is already present */
+ if ((p->mbt_cache[i][0] == mbt0) &&
+ (p->mbt_cache[i][1] == mbt1))
+ break;
+
+ /* search a free entry */
+ if ((window_num == -1) &&
+ ((!(p->mbt_cache[i][0] & IODA3_MBT0_ENABLE)) &&
+ (!(p->mbt_cache[i][1] & IODA3_MBT1_ENABLE))))
+ window_num = i;
+ }
+
+ if (window_num >= 0 && i == p->mbt_size) {
+ /* no capi mmio window found, so add it */
+ p->mbt_cache[window_num][0] = mbt0;
+ p->mbt_cache[window_num][1] = mbt1;
+
+ phb4_ioda_sel(p, IODA3_TBL_MBT, window_num << 1, true);
+ out_be64(p->regs + PHB_IODA_DATA0, mbt0);
+ out_be64(p->regs + PHB_IODA_DATA0, mbt1);
+ } else if (i == p->mbt_size) {
+ /* mbt cache full, this case should never happen */
+ PHBERR(p, "CAPP: Failed to add CAPI mmio window\n");
+ } else {
+ /* duplicate entry. Nothing to do */
+ }
+
+ phb4_init_capp_errors(p);
+
+ phb4_init_capp_regs(p, capp_eng);
+
+ if (!chiptod_capp_timebase_sync(p->chip_id, CAPP_TFMR,
+ CAPP_TB,
+ PHB4_CAPP_REG_OFFSET(p)))
+ PHBERR(p, "CAPP: Failed to sync timebase\n");
+
+ /* set callbacks to handle HMI events */
+ capi_ops.get_capp_info = &phb4_get_capp_info;
+
+ return OPAL_SUCCESS;
+}
+
+
+static int64_t phb4_init_capp(struct phb4 *p)
+{
+ struct capp *capp;
+ int rc;
+
+ if (p->index != CAPP0_PHB_INDEX &&
+ p->index != CAPP1_PHB_INDEX)
+ return OPAL_UNSUPPORTED;
+
+ capp = zalloc(sizeof(struct capp));
+ if (capp == NULL)
+ return OPAL_NO_MEM;
+
+ if (p->index == CAPP0_PHB_INDEX) {
+ capp->capp_index = 0;
+ capp->capp_xscom_offset = 0;
+
+ } else if (p->index == CAPP1_PHB_INDEX) {
+ capp->capp_index = 1;
+ capp->capp_xscom_offset = CAPP1_REG_OFFSET;
+ }
+
+ capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
+ capp->chip_id = p->chip_id;
+
+ /* Load capp microcode into the capp unit */
+ rc = load_capp_ucode(p);
+
+ if (rc == OPAL_SUCCESS)
+ p->capp = capp;
+ else
+ free(capp);
+
+ return rc;
+}
+
+static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
+ uint64_t pe_number)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ struct proc_chip *chip = get_chip(p->chip_id);
+ struct capp *capp = p->capp;
+ uint64_t reg, ret;
+
+ /* No CAPI on P10. OpenCAPI only */
+ if (is_phb5())
+ return OPAL_UNSUPPORTED;
+
+ /* cant do a mode switch when capp is in recovery mode */
+ ret = capp_xscom_read(capp, CAPP_ERR_STATUS_CTRL, &reg);
+ if (ret != OPAL_SUCCESS)
+ return ret;
+
+ if ((reg & PPC_BIT(0)) && (!(reg & PPC_BIT(1)))) {
+ PHBDBG(p, "CAPP: recovery in progress\n");
+ return OPAL_BUSY;
+ }
+
+
+ switch (mode) {
+
+ case OPAL_PHB_CAPI_MODE_DMA: /* Enabled by default on p9 */
+ case OPAL_PHB_CAPI_MODE_SNOOP_ON:
+ /* nothing to do on P9 if CAPP is already enabled */
+ ret = p->capp->phb ? OPAL_SUCCESS : OPAL_UNSUPPORTED;
+ break;
+
+ case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
+ ret = p->capp->phb ? OPAL_UNSUPPORTED : OPAL_SUCCESS;
+ break;
+
+ case OPAL_PHB_CAPI_MODE_PCIE:
+ if (p->flags & PHB4_CAPP_DISABLE) {
+ /* We are in middle of a CAPP disable */
+ ret = OPAL_BUSY;
+
+ } else if (capp->phb) {
+ /* Kick start a creset */
+ p->flags |= PHB4_CAPP_DISABLE;
+ PHBINF(p, "CAPP: PCIE mode needs a cold-reset\n");
+ /* Kick off the pci state machine */
+ ret = phb4_creset(phb->slot);
+ ret = ret > 0 ? OPAL_BUSY : ret;
+
+ } else {
+ /* PHB already in PCI mode */
+ ret = OPAL_SUCCESS;
+ }
+ break;
+
+ case OPAL_PHB_CAPI_MODE_CAPI: /* Fall Through */
+ case OPAL_PHB_CAPI_MODE_DMA_TVT1:
+ /* Make sure that PHB is not disabling CAPP */
+ if (p->flags & PHB4_CAPP_DISABLE) {
+ PHBERR(p, "CAPP: Disable in progress\n");
+ ret = OPAL_BUSY;
+ break;
+ }
+
+ /* Check if ucode is available */
+ if (!capp_ucode_loaded(chip, p->index)) {
+ PHBERR(p, "CAPP: ucode not loaded\n");
+ ret = OPAL_RESOURCE;
+ break;
+ }
+
+ /*
+ * Mark the CAPP attached to the PHB right away so that
+ * if a MCE happens during CAPP init we can handle it.
+ * In case of an error in CAPP init we remove the PHB
+ * from the attached_mask later.
+ */
+ capp->phb = phb;
+ capp->attached_pe = pe_number;
+
+ if (mode == OPAL_PHB_CAPI_MODE_DMA_TVT1)
+ ret = enable_capi_mode(p, pe_number,
+ CAPP_MIN_STQ_ENGINES |
+ CAPP_MAX_DMA_READ_ENGINES);
+
+ else
+ ret = enable_capi_mode(p, pe_number,
+ CAPP_MAX_STQ_ENGINES |
+ CAPP_MIN_DMA_READ_ENGINES);
+ if (ret == OPAL_SUCCESS) {
+ /* register notification on system shutdown */
+ opal_add_host_sync_notifier(&phb4_host_sync_reset, p);
+
+ } else {
+ /* In case of an error mark the PHB detached */
+ capp->phb = NULL;
+ capp->attached_pe = phb4_get_reserved_pe_number(phb);
+ }
+ break;
+
+ default:
+ ret = OPAL_UNSUPPORTED;
+ break;
+ };
+
+ return ret;
+}
+
+static void phb4_p2p_set_initiator(struct phb4 *p, uint16_t pe_number)
+{
+ uint64_t tve;
+ uint16_t window_id = (pe_number << 1) + 1;
+
+ /*
+ * Initiator needs access to the MMIO space of the target,
+ * which is well beyond the 'normal' memory area. Set its TVE
+ * with no range checking.
+ */
+ PHBDBG(p, "Setting TVE#1 for peer-to-peer for pe %d\n", pe_number);
+ tve = PPC_BIT(51);
+ phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
+ out_be64(p->regs + PHB_IODA_DATA0, tve);
+ p->tve_cache[window_id] = tve;
+}
+
+static void phb4_p2p_set_target(struct phb4 *p, bool enable)
+{
+ uint64_t val;
+
+ /*
+ * Enabling p2p on a target PHB reserves an outbound (as seen
+ * from the CPU) store queue for p2p
+ */
+ PHBDBG(p, "%s peer-to-peer\n", (enable ? "Enabling" : "Disabling"));
+ xscom_read(p->chip_id,
+ p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, &val);
+ if (enable)
+ val |= XPEC_NEST_STK_PBCQ_MODE_P2P;
+ else
+ val &= ~XPEC_NEST_STK_PBCQ_MODE_P2P;
+ xscom_write(p->chip_id,
+ p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, val);
+}
+
+static void phb4_set_p2p(struct phb *phb, uint64_t mode, uint64_t flags,
+ uint16_t pe_number)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+
+ switch (mode) {
+ case OPAL_PCI_P2P_INITIATOR:
+ if (flags & OPAL_PCI_P2P_ENABLE)
+ phb4_p2p_set_initiator(p, pe_number);
+ /*
+ * When disabling p2p on the initiator, we should
+ * reset the TVE to its default bypass setting, but it
+ * is more easily done from the OS, as it knows the
+ * the start and end address and there's already an
+ * opal call for it, so let linux handle it.
+ */
+ break;
+ case OPAL_PCI_P2P_TARGET:
+ phb4_p2p_set_target(p, !!(flags & OPAL_PCI_P2P_ENABLE));
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static int64_t phb4_set_capp_recovery(struct phb *phb)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+
+ if (p->flags & PHB4_CAPP_RECOVERY)
+ return 0;
+
+ /* set opal event flag to indicate eeh condition */
+ opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
+ OPAL_EVENT_PCI_ERROR);
+
+ p->flags |= PHB4_CAPP_RECOVERY;
+
+ return 0;
+}
+
+/*
+ * Return the address out of a PBCQ Tunnel Bar register.
+ */
+static void phb4_get_tunnel_bar(struct phb *phb, uint64_t *addr)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t val;
+
+ xscom_read(p->chip_id, p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR,
+ &val);
+ *addr = val >> 8;
+}
+
+/*
+ * Set PBCQ Tunnel Bar register.
+ * Store addr bits [8:50] in PBCQ Tunnel Bar register bits [0:42].
+ * Note that addr bits [8:50] must also match PSL_TNR_ADDR[8:50].
+ * Reset register if val == 0.
+ *
+ * This interface is required to let device drivers set the Tunnel Bar
+ * value of their choice.
+ *
+ * Compatibility with older versions of linux, that do not set the
+ * Tunnel Bar with phb4_set_tunnel_bar(), is ensured by enable_capi_mode(),
+ * that will set the default value that used to be assumed.
+ */
+static int64_t phb4_set_tunnel_bar(struct phb *phb, uint64_t addr)
+{
+ struct phb4 *p = phb_to_phb4(phb);
+ uint64_t mask = 0x00FFFFFFFFFFE000ULL;
+
+ if (!addr) {
+ /* Reset register */
+ xscom_write(p->chip_id,
+ p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR, addr);
+ return OPAL_SUCCESS;
+ }
+ if ((addr & ~mask))
+ return OPAL_PARAMETER;
+ if (!(addr & mask))
+ return OPAL_PARAMETER;
+
+ xscom_write(p->chip_id, p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR,
+ (addr & mask) << 8);
+ return OPAL_SUCCESS;
+}
+
+static const struct phb_ops phb4_ops = {
+ .cfg_read8 = phb4_pcicfg_read8,
+ .cfg_read16 = phb4_pcicfg_read16,
+ .cfg_read32 = phb4_pcicfg_read32,
+ .cfg_write8 = phb4_pcicfg_write8,
+ .cfg_write16 = phb4_pcicfg_write16,
+ .cfg_write32 = phb4_pcicfg_write32,
+ .get_reserved_pe_number = phb4_get_reserved_pe_number,
+ .device_init = phb4_device_init,
+ .device_remove = NULL,
+ .ioda_reset = phb4_ioda_reset,
+ .papr_errinjct_reset = phb4_papr_errinjct_reset,
+ .pci_reinit = phb4_pci_reinit,
+ .set_phb_mem_window = phb4_set_phb_mem_window,
+ .phb_mmio_enable = phb4_phb_mmio_enable,
+ .map_pe_mmio_window = phb4_map_pe_mmio_window,
+ .map_pe_dma_window = phb4_map_pe_dma_window,
+ .map_pe_dma_window_real = phb4_map_pe_dma_window_real,
+ .set_option = phb4_set_option,
+ .get_option = phb4_get_option,
+ .set_xive_pe = phb4_set_ive_pe,
+ .get_msi_32 = phb4_get_msi_32,
+ .get_msi_64 = phb4_get_msi_64,
+ .set_pe = phb4_set_pe,
+ .set_peltv = phb4_set_peltv,
+ .eeh_freeze_status = phb4_eeh_freeze_status,
+ .eeh_freeze_clear = phb4_eeh_freeze_clear,
+ .eeh_freeze_set = phb4_eeh_freeze_set,
+ .next_error = phb4_eeh_next_error,
+ .err_inject = phb4_err_inject,
+ .get_diag_data2 = phb4_get_diag_data,
+ .tce_kill = phb4_tce_kill,
+ .set_capi_mode = phb4_set_capi_mode,
+ .set_p2p = phb4_set_p2p,
+ .set_capp_recovery = phb4_set_capp_recovery,
+ .get_tunnel_bar = phb4_get_tunnel_bar,
+ .set_tunnel_bar = phb4_set_tunnel_bar,
+};
+
+static void phb4_init_ioda3(struct phb4 *p)
+{
+ if (is_phb5()) {
+ /*
+ * When ABT is on, the MSIs on the PHB use the PQ state bits
+ * of the IC and MSI triggers from the PHB are forwarded
+ * directly to the IC ESB page. However, the LSIs are still
+ * controlled locally on the PHB and LSI triggers use a
+ * special offset for trigger injection.
+ */
+ if (phb_abt_mode(p)) {
+ uint64_t mmio_base = xive2_get_esb_base(p->base_msi);
+
+ PHBDBG(p, "Using ABT mode. ESB: 0x%016llx\n", mmio_base);
+
+ /* Init_18 - Interrupt Notify Base Address */
+ out_be64(p->regs + PHB_INT_NOTIFY_ADDR,
+ PHB_INT_NOTIFY_ADDR_64K | mmio_base);
+
+ /* Interrupt Notify Base Index is unused */
+ } else {
+ p->irq_port = xive2_get_notify_port(p->chip_id,
+ XIVE_HW_SRC_PHBn(p->index));
+
+ PHBDBG(p, "Using IC notif page at 0x%016llx\n",
+ p->irq_port);
+
+ /* Init_18 - Interrupt Notify Base Address */
+ out_be64(p->regs + PHB_INT_NOTIFY_ADDR, p->irq_port);
+
+ /* Init_19 - Interrupt Notify Base Index */
+ out_be64(p->regs + PHB_INT_NOTIFY_INDEX,
+ xive2_get_notify_base(p->base_msi));
+ }
+
+ } else { /* p9 */
+ p->irq_port = xive_get_notify_port(p->chip_id,
+ XIVE_HW_SRC_PHBn(p->index));
+ /* Init_18 - Interrupt Notify Base Address */
+ out_be64(p->regs + PHB_INT_NOTIFY_ADDR, p->irq_port);
+
+ /* Init_19 - Interrupt Notify Base Index */
+ out_be64(p->regs + PHB_INT_NOTIFY_INDEX,
+ xive_get_notify_base(p->base_msi));
+ }
+
+ /* Init_19x - Not in spec: Initialize source ID */
+ PHBDBG(p, "Reset state SRC_ID: %016llx\n",
+ in_be64(p->regs + PHB_LSI_SOURCE_ID));
+ out_be64(p->regs + PHB_LSI_SOURCE_ID,
+ SETFIELD(PHB_LSI_SRC_ID, 0ull, (p->num_irqs - 1) >> 3));
+
+ /* Init_20 - RTT BAR */
+ out_be64(p->regs + PHB_RTT_BAR, (u64) p->tbl_rtt | PHB_RTT_BAR_ENABLE);
+
+ /* Init_21 - PELT-V BAR */
+ out_be64(p->regs + PHB_PELTV_BAR,
+ (u64) p->tbl_peltv | PHB_PELTV_BAR_ENABLE);
+
+ /* Init_22 - Setup M32 starting address */
+ out_be64(p->regs + PHB_M32_START_ADDR, M32_PCI_START);
+
+ /* Init_23 - Setup PEST BAR */
+ out_be64(p->regs + PHB_PEST_BAR,
+ p->tbl_pest | PHB_PEST_BAR_ENABLE);
+
+ /* Init_24 - CRW Base Address Reg */
+ /* See enable_capi_mode() */
+
+ if (is_phb4()) {
+ /* Init_25 - ASN Compare/Mask - P9 only */
+ out_be64(p->regs + PHB_ASN_CMPM, ((u64)ASNIND << 48) |
+ ((u64)ASNMASK << 32) | PHB_ASN_CMPM_ENABLE);
+ }
+
+ /* Init_26 - CAPI Compare/Mask */
+ /* See enable_capi_mode() */
+ /* if CAPP being disabled then reset CAPI Compare/Mask Register */
+ if (p->flags & PHB4_CAPP_DISABLE)
+ out_be64(p->regs + PHB_CAPI_CMPM, 0);
+
+ /* Init_27 - PCIE Outbound upper address */
+ out_be64(p->regs + PHB_M64_UPPER_BITS, 0);
+
+ /* Init_28 - PHB4 Configuration */
+ out_be64(p->regs + PHB_PHB4_CONFIG,
+ PHB_PHB4C_32BIT_MSI_EN |
+ PHB_PHB4C_64BIT_MSI_EN);
+
+ /* Init_29 - At least 256ns delay according to spec. Do a dummy
+ * read first to flush posted writes
+ */
+ in_be64(p->regs + PHB_PHB4_CONFIG);
+ time_wait_us(2);
+
+ /* Init_30..41 - On-chip IODA tables init */
+ phb4_ioda_reset(&p->phb, false);
+}
+
+/* phb4_init_rc - Initialize the Root Complex config space
+ */
+static bool phb4_init_rc_cfg(struct phb4 *p)
+{
+ int64_t ecap, aercap;
+
+ /* XXX Handle errors ? */
+
+ /* Init_46:
+ *
+ * Set primary bus to 0, secondary to 1 and subordinate to 0xff
+ */
+ phb4_pcicfg_write32(&p->phb, 0, PCI_CFG_PRIMARY_BUS, 0x00ff0100);
+
+ /* Init_47 - Clear errors */
+ /* see phb4_rc_err_clear() called below */
+
+ /* Init_48
+ *
+ * PCIE Device control/status, enable error reporting, disable relaxed
+ * ordering, set MPS to 128 (see note), clear errors.
+ *
+ * Note: The doc recommends to set MPS to 512. This has proved to have
+ * some issues as it requires specific clamping of MRSS on devices and
+ * we've found devices in the field that misbehave when doing that.
+ *
+ * We currently leave it all to 128 bytes (minimum setting) at init
+ * time. The generic PCIe probing later on might apply a different
+ * value, or the kernel will, but we play it safe at early init
+ */
+ if (p->ecap <= 0) {
+ ecap = pci_find_cap(&p->phb, 0, PCI_CFG_CAP_ID_EXP);
+ if (ecap < 0) {
+ PHBERR(p, "Can't locate PCI-E capability\n");
+ return false;
+ }
+ p->ecap = ecap;
+ } else {
+ ecap = p->ecap;
+ }
+
+ phb4_pcicfg_write16(&p->phb, 0, ecap + PCICAP_EXP_DEVCTL,
+ PCICAP_EXP_DEVCTL_CE_REPORT |
+ PCICAP_EXP_DEVCTL_NFE_REPORT |
+ PCICAP_EXP_DEVCTL_FE_REPORT |
+ PCICAP_EXP_DEVCTL_UR_REPORT |
+ SETFIELD(PCICAP_EXP_DEVCTL_MPS, 0, PCIE_MPS_128B));
+
+ /* Init_49 - Device Control/Status 2 */
+ phb4_pcicfg_write16(&p->phb, 0, ecap + PCICAP_EXP_DCTL2,
+ SETFIELD(PCICAP_EXP_DCTL2_CMPTOUT, 0, 0x5) |
+ PCICAP_EXP_DCTL2_ARI_FWD);
+
+ /* Init_50..54
+ *
+ * AER inits
+ */
+ if (p->aercap <= 0) {
+ aercap = pci_find_ecap(&p->phb, 0, PCIECAP_ID_AER, NULL);
+ if (aercap < 0) {
+ PHBERR(p, "Can't locate AER capability\n");
+ return false;
+ }
+ p->aercap = aercap;
+ } else {
+ aercap = p->aercap;
+ }
+
+ /* Disable some error reporting as per the PHB4 spec */
+ phb4_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_UE_MASK,
+ PCIECAP_AER_UE_POISON_TLP |
+ PCIECAP_AER_UE_COMPL_TIMEOUT |
+ PCIECAP_AER_UE_COMPL_ABORT);
+
+ /* Enable ECRC generation & checking */
+ phb4_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_CAPCTL,
+ PCIECAP_AER_CAPCTL_ECRCG_EN |
+ PCIECAP_AER_CAPCTL_ECRCC_EN);
+
+ phb4_rc_err_clear(p);
+
+ return true;
+}
+
+static void phb4_init_errors(struct phb4 *p)
+{
+ /* Init_55..63 - PBL errors */
+ out_be64(p->regs + 0x1900, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x1908, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1920, 0x000000004d1780f8ull);
+ out_be64(p->regs + 0x1928, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1930, 0xffffffffb2f87f07ull);
+ out_be64(p->regs + 0x1940, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1948, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1950, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1958, 0x0000000000000000ull);
+
+ /* Init_64..72 - REGB errors */
+ out_be64(p->regs + 0x1c00, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x1c08, 0x0000000000000000ull);
+ /* Enable/disable error status indicators that trigger irqs */
+ if (p->has_link) {
+ out_be64(p->regs + 0x1c20, 0x2130006efca8bc00ull);
+ out_be64(p->regs + 0x1c30, 0xde1fff91035743ffull);
+ } else {
+ out_be64(p->regs + 0x1c20, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1c30, 0x0000000000000000ull);
+ }
+ out_be64(p->regs + 0x1c28, 0x0080000000000000ull);
+ out_be64(p->regs + 0x1c40, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1c48, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1c50, 0x0000000000000000ull);
+ out_be64(p->regs + 0x1c58, 0x0040000000000000ull);
+
+ /* Init_73..81 - TXE errors */
+ out_be64(p->regs + 0x0d08, 0x0000000000000000ull);
+
+ /* Errata: Clear bit 17, otherwise a CFG write UR/CA will incorrectly
+ * freeze a "random" PE (whatever last PE did an MMIO)
+ */
+ if (is_phb5()) {
+ out_be64(p->regs + 0x0d28, 0x0000500a00000000ull);
+ out_be64(p->regs + 0x0d00, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0d18, 0xffffff0fffffffffull);
+ out_be64(p->regs + 0x0d30, 0xdff7af41f7ddffdfull);
+ } else {
+ out_be64(p->regs + 0x0d28, 0x0000000a00000000ull);
+ if (phb4_is_dd20(p)) {
+ out_be64(p->regs + 0x0d00, 0xf3acff0ff7ddfff0ull);
+ out_be64(p->regs + 0x0d18, 0xf3acff0ff7ddfff0ull);
+ out_be64(p->regs + 0x0d30, 0xdfffbd05f7ddfff0ull); /* XXX CAPI has diff. value */
+ } else {
+ out_be64(p->regs + 0x0d00, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0d18, 0xffffff0fffffffffull);
+ out_be64(p->regs + 0x0d30, 0xdff7bd05f7ddfff0ull);
+ }
+ }
+
+ out_be64(p->regs + 0x0d40, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0d48, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0d50, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0d58, 0x0000000000000000ull);
+
+ /* Init_82..90 - RXE_ARB errors */
+ out_be64(p->regs + 0x0d80, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0d88, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0d98, 0xfffffffffbffffffull);
+ out_be64(p->regs + 0x0da8, 0xc00018b801000060ull);
+ /*
+ * Errata ER20161123 says we should set the top two bits in
+ * 0x0db0 but this causes config space accesses which don't
+ * get a response to fence the PHB. This breaks probing,
+ * hence we don't set them here.
+ */
+ out_be64(p->regs + 0x0db0, 0x3bffd703fa7fbf8full); /* XXX CAPI has diff. value */
+ out_be64(p->regs + 0x0dc0, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0dc8, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0dd0, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0dd8, 0x0000000004000000ull);
+
+ /* Init_91..99 - RXE_MRG errors */
+ out_be64(p->regs + 0x0e00, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0e08, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0e18, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0e28, 0x0000600000000000ull);
+ out_be64(p->regs + 0x0e30, 0xfffffeffff7fff57ull);
+ out_be64(p->regs + 0x0e40, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0e48, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0e50, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0e58, 0x0000000000000000ull);
+
+ /* Init_100..108 - RXE_TCE errors */
+ out_be64(p->regs + 0x0e80, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0e88, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0e98, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0ea8, 0x60000000c0000000ull);
+ out_be64(p->regs + 0x0eb0, 0x9faeffaf3fffffffull); /* XXX CAPI has diff. value */
+ out_be64(p->regs + 0x0ec0, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0ec8, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0ed0, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0ed8, 0x0000000000000000ull);
+
+ /* Init_109..117 - RXPHB errors */
+ out_be64(p->regs + 0x0c80, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0c88, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0c98, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0ca8, 0x0000004000000000ull);
+ out_be64(p->regs + 0x0cb0, 0x35777033ff000000ull); /* XXX CAPI has diff. value */
+ out_be64(p->regs + 0x0cc0, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0cc8, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0cd0, 0x0000000000000000ull);
+ out_be64(p->regs + 0x0cd8, 0x0000000000000000ull);
+
+ /* Init_118..121 - LEM */
+ out_be64(p->regs + 0x0c00, 0x0000000000000000ull);
+ if (phb4_is_dd20(p)) {
+ out_be64(p->regs + 0x0c30, 0xf3ffffffffffffffull);
+ out_be64(p->regs + 0x0c38, 0xf3ffffffffffffffull);
+ } else {
+ out_be64(p->regs + 0x0c30, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x0c38, 0xffffffffffffffffull);
+ }
+ out_be64(p->regs + 0x0c40, 0x0000000000000000ull);
+}
+
+
+static bool phb4_wait_dlp_reset(struct phb4 *p)
+{
+ unsigned int i;
+ uint64_t val;
+
+ /*
+ * Firmware cannot access the UTL core regs or PCI config space
+ * until the cores are out of DL_PGRESET.
+ * DL_PGRESET should be polled until it is inactive with a value
+ * of '0'. The recommended polling frequency is once every 1ms.
+ * Firmware should poll at least 200 attempts before giving up.
+ * MMIO Stores to the link are silently dropped by the UTL core if
+ * the link is down.
+ * MMIO Loads to the link will be dropped by the UTL core and will
+ * eventually time-out and will return an all ones response if the
+ * link is down.
+ */
+#define DLP_RESET_ATTEMPTS 200
+
+ PHBDBG(p, "Waiting for DLP PG reset to complete...\n");
+ for (i = 0; i < DLP_RESET_ATTEMPTS; i++) {
+ val = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
+ if (!(val & PHB_PCIE_DLP_DL_PGRESET))
+ break;
+ time_wait_ms(1);
+ }
+ if (val & PHB_PCIE_DLP_DL_PGRESET) {
+ PHBERR(p, "Timeout waiting for DLP PG reset !\n");
+ return false;
+ }
+ return true;
+}
+static void phb4_init_hw(struct phb4 *p)
+{
+ uint64_t val, creset;
+
+ PHBDBG(p, "Initializing PHB...\n");
+
+ /* Init_1 - Sync reset
+ *
+ * At this point we assume the PHB has already been reset.
+ */
+
+ /* Init_2 - Mask FIRs */
+ out_be64(p->regs + PHB_LEM_ERROR_MASK, 0xffffffffffffffffull);
+
+ /* Init_3 - TCE tag enable */
+ out_be64(p->regs + PHB_TCE_TAG_ENABLE, 0xffffffffffffffffull);
+
+ /* Init_4 - PCIE System Configuration Register
+ *
+ * Adjust max speed based on system config
+ */
+ val = in_be64(p->regs + PHB_PCIE_SCR);
+ PHBDBG(p, "Default system config: 0x%016llx\n", val);
+ val = SETFIELD(PHB_PCIE_SCR_MAXLINKSPEED, val, p->max_link_speed);
+ out_be64(p->regs + PHB_PCIE_SCR, val);
+ PHBDBG(p, "New system config : 0x%016llx\n",
+ in_be64(p->regs + PHB_PCIE_SCR));
+
+ /* Init_5 - deassert CFG reset */
+ creset = in_be64(p->regs + PHB_PCIE_CRESET);
+ PHBDBG(p, "Initial PHB CRESET is 0x%016llx\n", creset);
+ creset &= ~PHB_PCIE_CRESET_CFG_CORE;
+ out_be64(p->regs + PHB_PCIE_CRESET, creset);
+
+ /* Init_6..13 - PCIE DLP Lane EQ control */
+ if (p->lane_eq) {
+ out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL0, be64_to_cpu(p->lane_eq[0]));
+ out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL1, be64_to_cpu(p->lane_eq[1]));
+ out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL2, be64_to_cpu(p->lane_eq[2]));
+ out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL3, be64_to_cpu(p->lane_eq[3]));
+ out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL40, be64_to_cpu(p->lane_eq[4]));
+ out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL41, be64_to_cpu(p->lane_eq[5]));
+ if (is_phb5()) {
+ out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL50, be64_to_cpu(p->lane_eq[6]));
+ out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL51, be64_to_cpu(p->lane_eq[7]));
+ }
+ }
+ if (!p->lane_eq_en) {
+ /* Read modify write and set to 2 bits */
+ PHBDBG(p, "LINK: Disabling Lane EQ\n");
+ val = in_be64(p->regs + PHB_PCIE_DLP_CTL);
+ val |= PHB_PCIE_DLP_CTL_BYPASS_PH2 | PHB_PCIE_DLP_CTL_BYPASS_PH3;
+ out_be64(p->regs + PHB_PCIE_DLP_CTL, val);
+ }
+
+ if (is_phb5()) {
+ /* disable scaled flow control for now. SW527785 */
+ PHBDBG(p, "LINK: Disabling scaled flow control\n");
+ val = in_be64(p->regs + PHB_PCIE_DLP_CTL);
+ val |= PHB_PCIE_DLP_CTL_SFC_DISABLE;
+ out_be64(p->regs + PHB_PCIE_DLP_CTL, val);
+
+ /* lane equalization settings need to be tuned on P10 */
+ out_be64(p->regs + PHB_PCIE_PDL_PHY_EQ_CNTL,
+ 0x80F4FFFFFF0F9C00);
+ }
+
+ /* Init_14 - Clear link training */
+ phb4_pcicfg_write32(&p->phb, 0, 0x78,
+ 0x07FE0000 | p->max_link_speed);
+
+ /* Init_15 - deassert cores reset */
+ /*
+ * Lift the PHB resets but not PERST, this will be lifted
+ * later by the initial PERST state machine
+ */
+ creset &= ~(PHB_PCIE_CRESET_TLDLP | PHB_PCIE_CRESET_PBL);
+ creset |= PHB_PCIE_CRESET_PIPE_N;
+ out_be64(p->regs + PHB_PCIE_CRESET, creset);
+
+ /* Init_16 - Wait for DLP PGRESET to clear */
+ if (!phb4_wait_dlp_reset(p))
+ goto failed;
+
+ /* Init_17 - PHB Control */
+ val = PHB_CTRLR_IRQ_PGSZ_64K;
+ val |= PHB_CTRLR_TCE_CLB_DISABLE; // HW557787 circumvention
+ val |= SETFIELD(PHB_CTRLR_TVT_ADDR_SEL, 0ull, TVT_2_PER_PE);
+ if (phb_pq_disable(p))
+ val |= PHB_CTRLR_IRQ_PQ_DISABLE;
+ if (phb_abt_mode(p))
+ val |= PHB_CTRLR_IRQ_ABT_MODE;
+ if (phb_can_store_eoi(p)) {
+ val |= PHB_CTRLR_IRQ_STORE_EOI;
+ PHBDBG(p, "store EOI is enabled\n");
+ }
+
+ if (!pci_eeh_mmio)
+ val |= PHB_CTRLR_MMIO_EEH_DISABLE;
+
+ out_be64(p->regs + PHB_CTRLR, val);
+
+ /* Init_18..41 - Architected IODA3 inits */
+ phb4_init_ioda3(p);
+
+ /* Init_42..45 - Clear DLP error logs */
+ out_be64(p->regs + 0x1aa0, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x1aa8, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x1ab0, 0xffffffffffffffffull);
+ out_be64(p->regs + 0x1ab8, 0x0);
+
+
+ /* Init_46..54 : Init root complex config space */
+ if (!phb4_init_rc_cfg(p))
+ goto failed;
+
+ /* Init_55..121 : Setup error registers */
+ phb4_init_errors(p);
+
+ /* Init_122..123 : Wait for link
+ * NOTE: At this point the spec waits for the link to come up. We
+ * don't bother as we are doing a PERST soon.
+ */
+
+ /* Init_124 : NBW. XXX TODO */
+ /* See enable_capi_mode() */
+
+ /* Init_125 : Setup PCI command/status on root complex
+ * I don't know why the spec does this now and not earlier, so
+ * to be sure to get it right we might want to move it to the freset
+ * state machine, though the generic PCI layer will probably do
+ * this anyway (ie, enable MEM, etc... in the RC)
+
+ */
+ phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_CMD,
+ PCI_CFG_CMD_MEM_EN |
+ PCI_CFG_CMD_BUS_MASTER_EN);
+
+ /* Clear errors */
+ phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_STAT,
+ PCI_CFG_STAT_SENT_TABORT |
+ PCI_CFG_STAT_RECV_TABORT |
+ PCI_CFG_STAT_RECV_MABORT |
+ PCI_CFG_STAT_SENT_SERR |
+ PCI_CFG_STAT_RECV_PERR);
+
+ /* Init_126..130 - Re-enable error interrupts */
+ phb4_int_unmask_all(p);
+
+ /* Init_131 - Re-enable LEM error mask */
+ out_be64(p->regs + PHB_LEM_ERROR_MASK, 0x0000000000000000ull);
+
+
+ /* Init_132 - Enable DMA address speculation */
+ out_be64(p->regs + PHB_TCE_SPEC_CTL, 0x0000000000000000ull);
+
+ /* Init_133 - Timeout Control Register 1 */
+ out_be64(p->regs + PHB_TIMEOUT_CTRL1, 0x0015150000150000ull);
+
+ /* Init_134 - Timeout Control Register 2 */
+ out_be64(p->regs + PHB_TIMEOUT_CTRL2, 0x0000151500000000ull);
+
+ /* Init_135 - PBL Timeout Control Register */
+ out_be64(p->regs + PHB_PBL_TIMEOUT_CTRL, 0x2013000000000000ull);
+
+ /* Mark the PHB as functional which enables all the various sequences */
+ p->broken = false;
+
+ PHBDBG(p, "Initialization complete\n");
+
+ return;
+
+ failed:
+ PHBERR(p, "Initialization failed\n");
+ p->broken = true;
+}
+
+/* FIXME: Use scoms rather than MMIO incase we are fenced */
+static bool phb4_read_capabilities(struct phb4 *p)
+{
+ uint64_t val;
+
+ /* XXX Should make sure ETU is out of reset ! */
+
+ /* Grab version and fit it in an int */
+ val = phb4_read_reg_asb(p, PHB_VERSION);
+ if (val == 0 || val == 0xffffffffffffffffUL) {
+ PHBERR(p, "Failed to read version, PHB appears broken\n");
+ return false;
+ }
+
+ p->rev = ((val >> 16) & 0x00ff0000) | (val & 0xffff);
+ PHBDBG(p, "Core revision 0x%x\n", p->rev);
+
+ /* Read EEH capabilities */
+ val = in_be64(p->regs + PHB_PHB4_EEH_CAP);
+ if (val == 0xffffffffffffffffUL) {
+ PHBERR(p, "Failed to read EEH cap, PHB appears broken\n");
+ return false;
+ }
+ p->max_num_pes = val >> 52;
+ if (p->max_num_pes >= 512) {
+ p->mrt_size = 16;
+ p->mbt_size = 32;
+ p->tvt_size = 1024;
+ } else {
+ p->mrt_size = 8;
+ p->mbt_size = 16;
+ p->tvt_size = 512;
+ }
+
+ val = in_be64(p->regs + PHB_PHB4_IRQ_CAP);
+ if (val == 0xffffffffffffffffUL) {
+ PHBERR(p, "Failed to read IRQ cap, PHB appears broken\n");
+ return false;
+ }
+ p->num_irqs = val & 0xffff;
+
+ /* This works for 512 PEs. FIXME calculate for any hardware
+ * size returned above
+ */
+ p->tbl_peltv_size = PELTV_TABLE_SIZE_MAX;
+
+ p->tbl_pest_size = p->max_num_pes*16;
+
+ PHBDBG(p, "Found %d max PEs and %d IRQs \n",
+ p->max_num_pes, p->num_irqs);
+
+ return true;
+}
+
+static void phb4_allocate_tables(struct phb4 *p)
+{
+ uint32_t i;
+
+ /* XXX Our current memalign implementation sucks,
+ *
+ * It will do the job, however it doesn't support freeing
+ * the memory and wastes space by always allocating twice
+ * as much as requested (size + alignment)
+ */
+ p->tbl_rtt = local_alloc(p->chip_id, RTT_TABLE_SIZE, RTT_TABLE_SIZE);
+ assert(p->tbl_rtt);
+ for (i = 0; i < RTT_TABLE_ENTRIES; i++)
+ p->tbl_rtt[i] = cpu_to_be16(PHB4_RESERVED_PE_NUM(p));
+
+ p->tbl_peltv = local_alloc(p->chip_id, p->tbl_peltv_size, p->tbl_peltv_size);
+ assert(p->tbl_peltv);
+ memset(p->tbl_peltv, 0, p->tbl_peltv_size);
+
+ p->tbl_pest = (uint64_t)local_alloc(p->chip_id, p->tbl_pest_size, p->tbl_pest_size);
+ assert(p->tbl_pest);
+ memset((void *)p->tbl_pest, 0, p->tbl_pest_size);
+}
+
+static void phb4_add_properties(struct phb4 *p)
+{
+ struct dt_node *np = p->phb.dt_node;
+ uint32_t lsibase, icsp = get_ics_phandle();
+ uint64_t m32b, m64b, m64s;
+
+ /* Add various properties that HB doesn't have to
+ * add, some of them simply because they result from
+ * policy decisions made in skiboot rather than in HB
+ * such as the MMIO windows going to PCI, interrupts,
+ * etc...
+ */
+ dt_add_property_cells(np, "#address-cells", 3);
+ dt_add_property_cells(np, "#size-cells", 2);
+ dt_add_property_cells(np, "#interrupt-cells", 1);
+ dt_add_property_cells(np, "bus-range", 0, 0xff);
+ dt_add_property_cells(np, "clock-frequency", 0x200, 0); /* ??? */
+
+ dt_add_property_cells(np, "interrupt-parent", icsp);
+
+ /* XXX FIXME: add slot-name */
+ //dt_property_cell("bus-width", 8); /* Figure it out from VPD ? */
+
+ /* "ranges", we only expose M32 (PHB4 doesn't do IO)
+ *
+ * Note: The kernel expects us to have chopped of 64k from the
+ * M32 size (for the 32-bit MSIs). If we don't do that, it will
+ * get confused (OPAL does it)
+ */
+ m32b = cleanup_addr(p->mm1_base);
+ m64b = cleanup_addr(p->mm0_base);
+ m64s = p->mm0_size;
+ dt_add_property_cells(np, "ranges",
+ /* M32 space */
+ 0x02000000, 0x00000000, M32_PCI_START,
+ hi32(m32b), lo32(m32b), 0, M32_PCI_SIZE - 0x10000);
+
+ /* XXX FIXME: add opal-memwin32, dmawins, etc... */
+ dt_add_property_u64s(np, "ibm,opal-m64-window", m64b, m64b, m64s);
+ dt_add_property(np, "ibm,opal-single-pe", NULL, 0);
+ dt_add_property_cells(np, "ibm,opal-num-pes", p->num_pes);
+ dt_add_property_cells(np, "ibm,opal-reserved-pe",
+ PHB4_RESERVED_PE_NUM(p));
+ dt_add_property_cells(np, "ibm,opal-msi-ranges",
+ p->base_msi, p->num_irqs - 8);
+ /* M64 ranges start at 1 as MBT0 is used for M32 */
+ dt_add_property_cells(np, "ibm,opal-available-m64-ranges",
+ 1, p->mbt_size - 1);
+ dt_add_property_cells(np, "ibm,supported-tce-sizes",
+ 12, // 4K
+ 16, // 64K
+ 21, // 2M
+ 30); // 1G
+
+ /* Tell Linux about alignment limits for segment splits.
+ *
+ * XXX We currently only expose splits of 1 and "num PEs",
+ */
+ dt_add_property_cells(np, "ibm,opal-m64-segment-splits",
+ /* Full split, number of segments: */
+ p->num_pes,
+ /* Encoding passed to the enable call */
+ OPAL_ENABLE_M64_SPLIT,
+ /* Alignement/size restriction in #bits*/
+ /* XXX VERIFY VALUE */
+ 12,
+ /* Unused */
+ 0,
+ /* single PE, number of segments: */
+ 1,
+ /* Encoding passed to the enable call */
+ OPAL_ENABLE_M64_NON_SPLIT,
+ /* Alignement/size restriction in #bits*/
+ /* XXX VERIFY VALUE */
+ 12,
+ /* Unused */
+ 0);
+
+ /* The interrupt maps will be generated in the RC node by the
+ * PCI code based on the content of this structure:
+ */
+ lsibase = p->base_lsi;
+ p->phb.lstate.int_size = 2;
+ p->phb.lstate.int_val[0][0] = lsibase + PHB4_LSI_PCIE_INTA;
+ p->phb.lstate.int_val[0][1] = 1;
+ p->phb.lstate.int_val[1][0] = lsibase + PHB4_LSI_PCIE_INTB;
+ p->phb.lstate.int_val[1][1] = 1;
+ p->phb.lstate.int_val[2][0] = lsibase + PHB4_LSI_PCIE_INTC;
+ p->phb.lstate.int_val[2][1] = 1;
+ p->phb.lstate.int_val[3][0] = lsibase + PHB4_LSI_PCIE_INTD;
+ p->phb.lstate.int_val[3][1] = 1;
+ p->phb.lstate.int_parent[0] = icsp;
+ p->phb.lstate.int_parent[1] = icsp;
+ p->phb.lstate.int_parent[2] = icsp;
+ p->phb.lstate.int_parent[3] = icsp;
+
+ /* Indicators for variable tables */
+ dt_add_property_cells(np, "ibm,opal-rtt-table",
+ hi32((u64) p->tbl_rtt), lo32((u64) p->tbl_rtt), RTT_TABLE_SIZE);
+
+ dt_add_property_cells(np, "ibm,opal-peltv-table",
+ hi32((u64) p->tbl_peltv), lo32((u64) p->tbl_peltv),
+ p->tbl_peltv_size);
+
+ dt_add_property_cells(np, "ibm,opal-pest-table",
+ hi32(p->tbl_pest), lo32(p->tbl_pest), p->tbl_pest_size);
+
+ dt_add_property_cells(np, "ibm,phb-diag-data-size",
+ sizeof(struct OpalIoPhb4ErrorData));
+
+ /* Indicate to Linux that CAPP timebase sync is supported */
+ dt_add_property_string(np, "ibm,capp-timebase-sync", NULL);
+
+ /* Tell Linux Compare/Mask indication values */
+ dt_add_property_cells(np, "ibm,phb-indications", CAPIIND, ASNIND,
+ NBWIND);
+}
+
+static bool phb4_calculate_windows(struct phb4 *p)
+{
+ const struct dt_property *prop;
+
+ /* Get PBCQ MMIO windows from device-tree */
+ prop = dt_require_property(p->phb.dt_node,
+ "ibm,mmio-windows", -1);
+ assert(prop->len >= (2 * sizeof(uint64_t)));
+
+ p->mm0_base = dt_property_get_u64(prop, 0);
+ p->mm0_size = dt_property_get_u64(prop, 1);
+ if (prop->len > 16) {
+ p->mm1_base = dt_property_get_u64(prop, 2);
+ p->mm1_size = dt_property_get_u64(prop, 3);
+ }
+
+ /* Sort them so that 0 is big and 1 is small */
+ if (p->mm1_size && p->mm1_size > p->mm0_size) {
+ uint64_t b = p->mm0_base;
+ uint64_t s = p->mm0_size;
+ p->mm0_base = p->mm1_base;
+ p->mm0_size = p->mm1_size;
+ p->mm1_base = b;
+ p->mm1_size = s;
+ }
+
+ /* If 1 is too small, ditch it */
+ if (p->mm1_size < M32_PCI_SIZE)
+ p->mm1_size = 0;
+
+ /* If 1 doesn't exist, carve it out of 0 */
+ if (p->mm1_size == 0) {
+ p->mm0_size /= 2;
+ p->mm1_base = p->mm0_base + p->mm0_size;
+ p->mm1_size = p->mm0_size;
+ }
+
+ /* Crop mm1 to our desired size */
+ if (p->mm1_size > M32_PCI_SIZE)
+ p->mm1_size = M32_PCI_SIZE;
+
+ return true;
+}
+
+static void phb4_err_interrupt(struct irq_source *is, uint32_t isn)
+{
+ struct phb4 *p = is->data;
+
+ PHBDBG(p, "Got interrupt 0x%08x\n", isn);
+
+ /* mask the interrupt conditions to prevent it from re-firing */
+ phb4_int_mask_active(p);
+
+ /* Update pending event */
+ opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
+ OPAL_EVENT_PCI_ERROR);
+
+ /* If the PHB is broken, go away */
+ if (p->broken)
+ return;
+
+ /*
+ * Mark the PHB has pending error so that the OS
+ * can handle it at late point.
+ */
+ phb4_set_err_pending(p, true);
+}
+
+static uint64_t phb4_lsi_attributes(struct irq_source *is __unused,
+ uint32_t isn __unused)
+{
+#ifndef DISABLE_ERR_INTS
+ struct phb4 *p = is->data;
+ uint32_t idx = isn - p->base_lsi;
+
+ if (idx == PHB4_LSI_PCIE_INF || idx == PHB4_LSI_PCIE_ER)
+ return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_LSI;
+#endif
+ return IRQ_ATTR_TARGET_LINUX;
+}
+
+static char *phb4_lsi_name(struct irq_source *is, uint32_t isn)
+{
+ struct phb4 *p = is->data;
+ uint32_t idx = isn - p->base_lsi;
+ char buf[32];
+
+ if (idx == PHB4_LSI_PCIE_INF)
+ snprintf(buf, 32, "phb#%04x-inf", p->phb.opal_id);
+ else if (idx == PHB4_LSI_PCIE_ER)
+ snprintf(buf, 32, "phb#%04x-err", p->phb.opal_id);
+ else
+ assert(0); /* PCIe LSIs should never be directed to OPAL */
+
+ return strdup(buf);
+}
+
+static const struct irq_source_ops phb4_lsi_ops = {
+ .interrupt = phb4_err_interrupt,
+ .attributes = phb4_lsi_attributes,
+ .name = phb4_lsi_name,
+};
+
+static __be64 lane_eq_default[8] = {
+ CPU_TO_BE64(0x5454545454545454UL), CPU_TO_BE64(0x5454545454545454UL),
+ CPU_TO_BE64(0x5454545454545454UL), CPU_TO_BE64(0x5454545454545454UL),
+ CPU_TO_BE64(0x7777777777777777UL), CPU_TO_BE64(0x7777777777777777UL),
+ CPU_TO_BE64(0x7777777777777777UL), CPU_TO_BE64(0x7777777777777777UL),
+};
+
+static __be64 lane_eq_phb5_default[8] = {
+ CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
+ CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
+ CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
+ CPU_TO_BE64(0x9999999999999999UL), CPU_TO_BE64(0x9999999999999999UL),
+};
+
+static void phb4_create(struct dt_node *np)
+{
+ const struct dt_property *prop;
+ struct phb4 *p;
+ struct pci_slot *slot;
+ size_t lane_eq_len, lane_eq_len_req;
+ struct dt_node *iplp;
+ char *path;
+ uint32_t irq_base, irq_flags;
+ int i, eq_reg_count;
+ int chip_id;
+
+ chip_id = dt_prop_get_u32(np, "ibm,chip-id");
+ p = local_alloc(chip_id, sizeof(struct phb4), 8);
+ assert(p);
+ memset(p, 0x0, sizeof(struct phb4));
+
+ /* Populate base stuff */
+ p->index = dt_prop_get_u32(np, "ibm,phb-index");
+ p->chip_id = chip_id;
+ p->pec = dt_prop_get_u32(np, "ibm,phb-pec-index");
+ p->regs = (void *)dt_get_address(np, 0, NULL);
+ p->int_mmio = (void *)dt_get_address(np, 1, NULL);
+ p->phb.dt_node = np;
+ p->phb.ops = &phb4_ops;
+ p->phb.phb_type = phb_type_pcie_v4;
+ p->phb.scan_map = 0x1; /* Only device 0 to scan */
+
+ if (!phb4_calculate_windows(p))
+ return;
+
+ /* Get the various XSCOM register bases from the device-tree */
+ prop = dt_require_property(np, "ibm,xscom-bases", 5 * sizeof(uint32_t));
+ p->pe_xscom = dt_property_get_cell(prop, 0);
+ p->pe_stk_xscom = dt_property_get_cell(prop, 1);
+ p->pci_xscom = dt_property_get_cell(prop, 2);
+ p->pci_stk_xscom = dt_property_get_cell(prop, 3);
+ p->etu_xscom = dt_property_get_cell(prop, 4);
+
+ /*
+ * We skip the initial PERST assertion requested by the generic code
+ * when doing a cold boot because we are coming out of cold boot already
+ * so we save boot time that way. The PERST state machine will still
+ * handle waiting for the link to come up, it will just avoid actually
+ * asserting & deasserting the PERST output
+ *
+ * For a hot IPL, we still do a PERST
+ *
+ * Note: In absence of property (ie, FSP-less), we stick to the old
+ * behaviour and set skip_perst to true
+ */
+ p->skip_perst = true; /* Default */
+
+ iplp = dt_find_by_path(dt_root, "ipl-params/ipl-params");
+ if (iplp) {
+ const char *ipl_type = dt_prop_get_def(iplp, "cec-major-type", NULL);
+ if (ipl_type && (!strcmp(ipl_type, "hot")))
+ p->skip_perst = false;
+ }
+
+ /* By default link is assumed down */
+ p->has_link = false;
+
+ /* We register the PHB before we initialize it so we
+ * get a useful OPAL ID for it
+ */
+ pci_register_phb(&p->phb, phb4_get_opal_id(p->chip_id, p->index));
+
+ /* Create slot structure */
+ slot = phb4_slot_create(&p->phb);
+ if (!slot)
+ PHBERR(p, "Cannot create PHB slot\n");
+
+ /* Hello ! */
+ path = dt_get_path(np);
+ PHBINF(p, "Found %s @%p\n", path, p->regs);
+ PHBINF(p, " M32 [0x%016llx..0x%016llx]\n",
+ p->mm1_base, p->mm1_base + p->mm1_size - 1);
+ PHBINF(p, " M64 [0x%016llx..0x%016llx]\n",
+ p->mm0_base, p->mm0_base + p->mm0_size - 1);
+ free(path);
+
+ /* Find base location code from root node */
+ p->phb.base_loc_code = dt_prop_get_def(dt_root,
+ "ibm,io-base-loc-code", NULL);
+ if (!p->phb.base_loc_code)
+ PHBDBG(p, "Base location code not found !\n");
+
+ /*
+ * Grab CEC IO VPD load info from the root of the device-tree,
+ * on P8 there's a single such VPD for the whole machine
+ */
+ prop = dt_find_property(dt_root, "ibm,io-vpd");
+ if (!prop) {
+ /* LX VPD Lid not already loaded */
+ if (platform.vpd_iohub_load)
+ platform.vpd_iohub_load(dt_root);
+ }
+
+ /* Obtain informatin about the PHB from the hardware directly */
+ if (!phb4_read_capabilities(p))
+ goto failed;
+
+ p->max_link_speed = phb4_get_max_link_speed(p, np);
+ p->max_link_width = phb4_get_max_link_width(p);
+ PHBINF(p, "Max link speed: GEN%i, max link width %i\n",
+ p->max_link_speed, p->max_link_width);
+
+ /* Check for lane equalization values from HB or HDAT */
+ p->lane_eq_en = true;
+ p->lane_eq = dt_prop_get_def_size(np, "ibm,lane-eq", NULL, &lane_eq_len);
+ if (is_phb5())
+ eq_reg_count = 8;
+ else
+ eq_reg_count = 6;
+ lane_eq_len_req = eq_reg_count * 8;
+ if (p->lane_eq) {
+ if (lane_eq_len < lane_eq_len_req) {
+ PHBERR(p, "Device-tree has ibm,lane-eq too short: %ld"
+ " (want %ld)\n", lane_eq_len, lane_eq_len_req);
+ p->lane_eq = NULL;
+ }
+ } else {
+ PHBDBG(p, "Using default lane equalization settings\n");
+ if (is_phb5())
+ p->lane_eq = lane_eq_phb5_default;
+ else
+ p->lane_eq = lane_eq_default;
+ }
+ if (p->lane_eq) {
+ PHBDBG(p, "Override lane equalization settings:\n");
+ for (i = 0 ; i < lane_eq_len_req/(8 * 2) ; i++)
+ PHBDBG(p, " 0x%016llx 0x%016llx\n",
+ be64_to_cpu(p->lane_eq[2 * i]),
+ be64_to_cpu(p->lane_eq[2 * i + 1]));
+ }
+
+ /* Allocate a block of interrupts. We need to know if it needs
+ * 2K or 4K interrupts ... for now we just use 4K but that
+ * needs to be fixed
+ */
+ if (is_phb5())
+ irq_base = xive2_alloc_hw_irqs(p->chip_id, p->num_irqs, p->num_irqs);
+ else
+ irq_base = xive_alloc_hw_irqs(p->chip_id, p->num_irqs, p->num_irqs);
+ if (irq_base == XIVE_IRQ_ERROR) {
+ PHBERR(p, "Failed to allocate %d interrupt sources\n",
+ p->num_irqs);
+ goto failed;
+ }
+ p->base_msi = irq_base;
+ p->base_lsi = irq_base + p->num_irqs - 8;
+ p->num_pes = p->max_num_pes;
+
+ /* Allocate the SkiBoot internal in-memory tables for the PHB */
+ phb4_allocate_tables(p);
+
+ phb4_add_properties(p);
+
+ /* Clear IODA3 cache */
+ phb4_init_ioda_cache(p);
+
+ /* Get the HW up and running */
+ phb4_init_hw(p);
+
+ /* init capp that might get attached to the phb */
+ if (is_phb4())
+ phb4_init_capp(p);
+
+ /* Compute XIVE source flags depending on PHB revision */
+ irq_flags = 0;
+ if (phb_can_store_eoi(p))
+ irq_flags |= XIVE_SRC_STORE_EOI;
+ else
+ irq_flags |= XIVE_SRC_TRIGGER_PAGE;
+
+ if (is_phb5()) {
+ /*
+ * Register sources with XIVE. If offloading is on, use the
+ * ESB pages of the XIVE IC for the MSI sources instead of the
+ * ESB pages of the PHB.
+ */
+ if (phb_pq_disable(p) || phb_abt_mode(p)) {
+ xive2_register_esb_source(p->base_msi, p->num_irqs - 8);
+ } else {
+ xive2_register_hw_source(p->base_msi,
+ p->num_irqs - 8, 16,
+ p->int_mmio, irq_flags,
+ NULL, NULL);
+ }
+
+ /*
+ * LSI sources always use the ESB pages of the PHB.
+ */
+ xive2_register_hw_source(p->base_lsi, 8, 16,
+ p->int_mmio + ((p->num_irqs - 8) << 16),
+ XIVE_SRC_LSI | irq_flags, p, &phb4_lsi_ops);
+ } else {
+ /* Register all interrupt sources with XIVE */
+ xive_register_hw_source(p->base_msi, p->num_irqs - 8, 16,
+ p->int_mmio, irq_flags, NULL, NULL);
+
+ xive_register_hw_source(p->base_lsi, 8, 16,
+ p->int_mmio + ((p->num_irqs - 8) << 16),
+ XIVE_SRC_LSI, p, &phb4_lsi_ops);
+ }
+
+ /* Platform additional setup */
+ if (platform.pci_setup_phb)
+ platform.pci_setup_phb(&p->phb, p->index);
+
+ dt_add_property_string(np, "status", "okay");
+
+ return;
+
+ failed:
+ p->broken = true;
+
+ /* Tell Linux it's broken */
+ dt_add_property_string(np, "status", "error");
+}
+
+static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,
+ uint32_t nest_base, uint32_t pci_base)
+{
+ enum phys_map_type phys_mmio64, phys_mmio32, phys_xive_esb, phys_reg_spc;
+ uint32_t pci_stack, nest_stack, etu_base, gcid, phb_num, stk_index;
+ uint64_t val, phb_bar = 0, irq_bar = 0, bar_en;
+ uint64_t mmio0_bar = 0, mmio0_bmask, mmio0_sz;
+ uint64_t mmio1_bar = 0, mmio1_bmask, mmio1_sz;
+ void *foo;
+ __be64 mmio_win[4];
+ unsigned int mmio_win_sz;
+ struct dt_node *np;
+ char *path;
+ uint64_t capp_ucode_base;
+ unsigned int max_link_speed;
+ int rc;
+
+ assert(is_phb5() || is_phb4()); /* Sanity check */
+
+ gcid = dt_get_chip_id(stk_node);
+ stk_index = dt_prop_get_u32(stk_node, "reg");
+ phb_num = dt_prop_get_u32(stk_node, "ibm,phb-index");
+ path = dt_get_path(stk_node);
+ if (is_phb5()) {
+ phys_mmio64 = PHB5_64BIT_MMIO;
+ phys_mmio32 = PHB5_32BIT_MMIO;
+ phys_xive_esb = PHB5_XIVE_ESB;
+ phys_reg_spc = PHB5_REG_SPC;
+ prlog(PR_INFO, "PHB: Chip %d Found PHB5 PBCQ%d Stack %d at %s\n",
+ gcid, pec_index, stk_index, path);
+ } else {
+ phys_mmio64 = PHB4_64BIT_MMIO;
+ phys_mmio32 = PHB4_32BIT_MMIO;
+ phys_xive_esb = PHB4_XIVE_ESB;
+ phys_reg_spc = PHB4_REG_SPC;
+ prlog(PR_INFO, "PHB: Chip %d Found PHB4 PBCQ%d Stack %d at %s\n",
+ gcid, pec_index, stk_index, path);
+ }
+ free(path);
+
+ pci_stack = pci_base + 0x40 * (stk_index + 1);
+ nest_stack = nest_base + 0x40 * (stk_index + 1);
+ etu_base = pci_base + 0x100 + 0x40 * stk_index;
+
+ prlog(PR_DEBUG, "PHB[%d:%d] X[PE]=0x%08x/0x%08x X[PCI]=0x%08x/0x%08x X[ETU]=0x%08x\n",
+ gcid, phb_num, nest_base, nest_stack, pci_base, pci_stack, etu_base);
+
+ /* Default BAR enables */
+ bar_en = 0;
+
+ /* Initialize PHB register BAR */
+ phys_map_get(gcid, phys_reg_spc, phb_num, &phb_bar, NULL);
+ rc = xscom_write(gcid, nest_stack + XPEC_NEST_STK_PHB_REG_BAR,
+ phb_bar << 8);
+
+ /* A scom error here probably indicates a defective/garded PHB */
+ if (rc != OPAL_SUCCESS) {
+ prerror("PHB[%d:%d] Unable to set PHB BAR. Error=%d\n",
+ gcid, phb_num, rc);
+ return;
+ }
+
+ bar_en |= XPEC_NEST_STK_BAR_EN_PHB;
+
+ /* Same with INT BAR (ESB) */
+ phys_map_get(gcid, phys_xive_esb, phb_num, &irq_bar, NULL);
+ xscom_write(gcid, nest_stack + XPEC_NEST_STK_IRQ_BAR, irq_bar << 8);
+ bar_en |= XPEC_NEST_STK_BAR_EN_INT;
+
+
+ /* Same with MMIO windows */
+ phys_map_get(gcid, phys_mmio64, phb_num, &mmio0_bar, &mmio0_sz);
+ mmio0_bmask = (~(mmio0_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
+ xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0, mmio0_bar << 8);
+ xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0_MASK, mmio0_bmask << 8);
+
+ phys_map_get(gcid, phys_mmio32, phb_num, &mmio1_bar, &mmio1_sz);
+ mmio1_bmask = (~(mmio1_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
+ xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1, mmio1_bar << 8);
+ xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1_MASK, mmio1_bmask << 8);
+
+ /* Build MMIO windows list */
+ mmio_win_sz = 0;
+ if (mmio0_bar) {
+ mmio_win[mmio_win_sz++] = cpu_to_be64(mmio0_bar);
+ mmio_win[mmio_win_sz++] = cpu_to_be64(mmio0_sz);
+ bar_en |= XPEC_NEST_STK_BAR_EN_MMIO0;
+ }
+ if (mmio1_bar) {
+ mmio_win[mmio_win_sz++] = cpu_to_be64(mmio1_bar);
+ mmio_win[mmio_win_sz++] = cpu_to_be64(mmio1_sz);
+ bar_en |= XPEC_NEST_STK_BAR_EN_MMIO1;
+ }
+
+ /* Set the appropriate enables */
+ xscom_read(gcid, nest_stack + XPEC_NEST_STK_BAR_EN, &val);
+ val |= bar_en;
+ xscom_write(gcid, nest_stack + XPEC_NEST_STK_BAR_EN, val);
+
+ /* No MMIO windows ? Barf ! */
+ if (mmio_win_sz == 0) {
+ prerror("PHB[%d:%d] No MMIO windows enabled !\n", gcid, phb_num);
+ return;
+ }
+
+ /* Clear errors in PFIR and NFIR */
+ xscom_write(gcid, pci_stack + XPEC_PCI_STK_PCI_FIR, 0);
+ xscom_write(gcid, nest_stack + XPEC_NEST_STK_PCI_NFIR, 0);
+
+ /* Check ETU reset */
+ xscom_read(gcid, pci_stack + XPEC_PCI_STK_ETU_RESET, &val);
+ prlog_once(PR_DEBUG, "ETU reset: %llx\n", val);
+ xscom_write(gcid, pci_stack + XPEC_PCI_STK_ETU_RESET, 0);
+ time_wait_ms(1);
+
+ // show we can read phb mmio space
+ foo = (void *)(phb_bar + 0x800); // phb version register
+ prlog_once(PR_DEBUG, "Version reg: 0x%016llx\n", in_be64(foo));
+
+ /* Create PHB node */
+ np = dt_new_addr(dt_root, "pciex", phb_bar);
+ if (!np)
+ return;
+
+ if (is_phb5())
+ dt_add_property_strings(np, "compatible", "ibm,power10-pciex", "ibm,ioda3-phb");
+ else
+ dt_add_property_strings(np, "compatible", "ibm,power9-pciex", "ibm,ioda3-phb");
+ dt_add_property_strings(np, "device_type", "pciex");
+ dt_add_property_u64s(np, "reg",
+ phb_bar, 0x1000,
+ irq_bar, 0x10000000);
+
+ /* Everything else is handled later by skiboot, we just
+ * stick a few hints here
+ */
+ dt_add_property_cells(np, "ibm,xscom-bases",
+ nest_base, nest_stack, pci_base, pci_stack, etu_base);
+ dt_add_property(np, "ibm,mmio-windows", mmio_win, 8 * mmio_win_sz);
+ dt_add_property_cells(np, "ibm,phb-index", phb_num);
+ dt_add_property_cells(np, "ibm,phb-pec-index", pec_index);
+ dt_add_property_cells(np, "ibm,phb-stack", stk_node->phandle);
+ dt_add_property_cells(np, "ibm,phb-stack-index", stk_index);
+ dt_add_property_cells(np, "ibm,chip-id", gcid);
+
+ /* read the hub-id out of the pbcq node */
+ if (dt_has_node_property(stk_node->parent, "ibm,hub-id", NULL)) {
+ uint32_t hub_id;
+
+ hub_id = dt_prop_get_u32(stk_node->parent, "ibm,hub-id");
+ dt_add_property_cells(np, "ibm,hub-id", hub_id);
+ }
+
+ if (dt_has_node_property(stk_node->parent, "ibm,loc-code", NULL)) {
+ const char *lc = dt_prop_get(stk_node->parent, "ibm,loc-code");
+ dt_add_property_string(np, "ibm,loc-code", lc);
+ }
+ if (dt_has_node_property(stk_node, "ibm,lane-eq", NULL)) {
+ size_t leq_size;
+ const void *leq = dt_prop_get_def_size(stk_node, "ibm,lane-eq",
+ NULL, &leq_size);
+ if (leq != NULL && leq_size >= 6 * 8)
+ dt_add_property(np, "ibm,lane-eq", leq, leq_size);
+ }
+ if (dt_has_node_property(stk_node, "ibm,capp-ucode", NULL)) {
+ capp_ucode_base = dt_prop_get_u32(stk_node, "ibm,capp-ucode");
+ dt_add_property_cells(np, "ibm,capp-ucode", capp_ucode_base);
+ }
+ if (dt_has_node_property(stk_node, "ibm,max-link-speed", NULL)) {
+ max_link_speed = dt_prop_get_u32(stk_node, "ibm,max-link-speed");
+ dt_add_property_cells(np, "ibm,max-link-speed", max_link_speed);
+ }
+ dt_add_property_cells(np, "ibm,capi-flags",
+ OPAL_PHB_CAPI_FLAG_SNOOP_CONTROL);
+
+ add_chip_dev_associativity(np);
+}
+
+static void phb4_probe_pbcq(struct dt_node *pbcq)
+{
+ uint32_t nest_base, pci_base, pec_index;
+ struct dt_node *stk;
+
+ /* REMOVEME: force this for now until we stabalise PCIe */
+ verbose_eeh = 1;
+
+ nest_base = dt_get_address(pbcq, 0, NULL);
+ pci_base = dt_get_address(pbcq, 1, NULL);
+ pec_index = dt_prop_get_u32(pbcq, "ibm,pec-index");
+
+ dt_for_each_child(pbcq, stk) {
+ if (dt_node_is_enabled(stk))
+ phb4_probe_stack(stk, pec_index, nest_base, pci_base);
+ }
+}
+
+void probe_phb4(void)
+{
+ struct dt_node *np;
+ const char *s;
+
+ pci_eeh_mmio = !nvram_query_eq_dangerous("pci-eeh-mmio", "disabled");
+ pci_retry_all = nvram_query_eq_dangerous("pci-retry-all", "true");
+ s = nvram_query_dangerous("phb-rx-err-max");
+ if (s) {
+ rx_err_max = atoi(s);
+
+ /* Clip to uint8_t used by hardware */
+ rx_err_max = MAX(rx_err_max, 0);
+ rx_err_max = MIN(rx_err_max, 255);
+ }
+
+ if (is_phb5()) {
+ prlog(PR_DEBUG, "PHB5: Maximum RX errors during training: %d\n", rx_err_max);
+ /* Look for PBCQ XSCOM nodes */
+ dt_for_each_compatible(dt_root, np, "ibm,power10-pbcq")
+ phb4_probe_pbcq(np);
+
+ /* Look for newly created PHB nodes */
+ dt_for_each_compatible(dt_root, np, "ibm,power10-pciex")
+ phb4_create(np);
+ } else {
+ prlog(PR_DEBUG, "PHB4: Maximum RX errors during training: %d\n", rx_err_max);
+ /* Look for PBCQ XSCOM nodes */
+ dt_for_each_compatible(dt_root, np, "ibm,power9-pbcq")
+ phb4_probe_pbcq(np);
+
+ /* Look for newly created PHB nodes */
+ dt_for_each_compatible(dt_root, np, "ibm,power9-pciex")
+ phb4_create(np);
+ }
+}