aboutsummaryrefslogtreecommitdiffstats
path: root/roms/skiboot/hw/npu3.c
diff options
context:
space:
mode:
Diffstat (limited to 'roms/skiboot/hw/npu3.c')
-rw-r--r--roms/skiboot/hw/npu3.c549
1 files changed, 549 insertions, 0 deletions
diff --git a/roms/skiboot/hw/npu3.c b/roms/skiboot/hw/npu3.c
new file mode 100644
index 000000000..03461373e
--- /dev/null
+++ b/roms/skiboot/hw/npu3.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2019 IBM Corp.
+ */
+
+#include <io.h>
+#include <xscom.h>
+#include <npu3.h>
+#include <npu3-regs.h>
+#include <nvram.h>
+#include <interrupts.h>
+#include <xive.h>
+
+#define NPU3LOG(l, npu, fmt, a...) \
+ prlog(l, "NPU[%d:%d]: " fmt, (npu)->chip_id, (npu)->index, ##a)
+#define NPU3DBG(npu, fmt, a...) NPU3LOG(PR_DEBUG, npu, fmt, ##a)
+#define NPU3INF(npu, fmt, a...) NPU3LOG(PR_INFO, npu, fmt, ##a)
+#define NPU3ERR(npu, fmt, a...) NPU3LOG(PR_ERR, npu, fmt, ##a)
+
+#define NPU3DEVLOG(l, dev, fmt, a...) \
+ prlog(l, "NPU[%d:%d:%d]: " fmt, \
+ (dev)->npu->chip_id, \
+ (dev)->npu->index, \
+ (dev)->index, ##a)
+#define NPU3DEVDBG(dev, fmt, a...) NPU3DEVLOG(PR_DEBUG, dev, fmt, ##a)
+#define NPU3DEVINF(dev, fmt, a...) NPU3DEVLOG(PR_INFO, dev, fmt, ##a)
+#define NPU3DEVERR(dev, fmt, a...) NPU3DEVLOG(PR_ERR, dev, fmt, ##a)
+
+static void npu3_dt_create_link(struct dt_node *npu, uint32_t npu_index,
+ uint32_t dev_index)
+{
+ struct dt_node *link;
+ uint32_t phy_lane_mask, ob_chiplet;
+
+ link = dt_new_addr(npu, "link", dev_index);
+
+ dt_add_property_string(link, "compatible", "ibm,npu-link");
+ dt_add_property_cells(link, "reg", dev_index);
+ dt_add_property_cells(link, "ibm,npu-link-index", dev_index);
+
+ switch (npu_index) {
+ case 0:
+ /* fall through */
+ case 2:
+ ob_chiplet = npu_index ? 3 : 0;
+
+ switch (dev_index) {
+ case 0:
+ phy_lane_mask = PPC_BITMASK32(0, 3);
+ break;
+ case 1:
+ phy_lane_mask = PPC_BITMASK32(13, 16);
+ break;
+ case 2:
+ phy_lane_mask = PPC_BITMASK32(7, 10);
+ break;
+ case 3:
+ phy_lane_mask = PPC_BITMASK32(20, 23);
+ break;
+ }
+
+ break;
+ case 1:
+ switch (dev_index) {
+ case 0:
+ ob_chiplet = 1;
+ phy_lane_mask = PPC_BITMASK32(0, 3);
+ break;
+ case 1:
+ ob_chiplet = 2;
+ phy_lane_mask = PPC_BITMASK32(0, 3);
+ break;
+ case 2:
+ ob_chiplet = 1;
+ phy_lane_mask = PPC_BITMASK32(7, 10);
+ break;
+ case 3:
+ ob_chiplet = 2;
+ phy_lane_mask = PPC_BITMASK32(7, 10);
+ break;
+ }
+
+ break;
+ default:
+ return;
+ }
+
+ dt_add_property_cells(link, "ibm,npu-phy", ob_chiplet);
+ dt_add_property_cells(link, "ibm,npu-lane-mask", phy_lane_mask);
+}
+
+static void npu3_dt_create_npu(struct dt_node *xscom, uint32_t npu_index)
+{
+ const uint32_t npu_base[] = { 0x5011000, 0x5011400, 0x3011c00 };
+ struct dt_node *npu;
+
+ npu = dt_new_addr(xscom, "npu", npu_base[npu_index]);
+
+ dt_add_property_cells(npu, "#size-cells", 0);
+ dt_add_property_cells(npu, "#address-cells", 1);
+ dt_add_property_cells(npu, "reg", npu_base[npu_index], 0x2c);
+ dt_add_property_string(npu, "compatible", "ibm,power9-npu3");
+ dt_add_property_cells(npu, "ibm,npu-index", npu_index);
+
+ for (uint32_t i = 0; i < NPU3_LINKS_PER_NPU; i++)
+ npu3_dt_create_link(npu, npu_index, i);
+}
+
+/* This can be removed when/if we decide to use HDAT instead */
+static bool npu3_dt_create(void)
+{
+ struct proc_chip *chip = next_chip(NULL);
+ struct dt_node *xscom;
+
+ /* npu3 chips only */
+ if (proc_gen < proc_gen_p9 ||
+ chip->type == PROC_CHIP_P9_NIMBUS ||
+ chip->type == PROC_CHIP_P9_CUMULUS)
+ return false;
+
+ dt_for_each_compatible(dt_root, xscom, "ibm,xscom")
+ for (uint32_t i = 0; i < 3; i++)
+ npu3_dt_create_npu(xscom, i);
+
+ return true;
+}
+
+static struct npu3 *npu3_create(struct dt_node *dn)
+{
+ struct npu3 *npu;
+ struct dt_node *link;
+ struct npu3_dev *dev;
+ char *path;
+ uint32_t i;
+
+ npu = zalloc(sizeof(*npu));
+ assert(npu);
+
+ init_lock(&npu->lock);
+
+ npu->dt_node = dn;
+ npu->index = dt_prop_get_u32(dn, "ibm,npu-index");
+ npu->xscom_base = dt_get_address(dn, 0, NULL);
+
+ npu->chip_id = dt_get_chip_id(dn);
+ assert(get_chip(npu->chip_id));
+
+ dt_for_each_compatible(dn, link, "ibm,npu-link") {
+ i = dt_prop_get_u32(link, "ibm,npu-link-index");
+ assert(i < NPU3_LINKS_PER_NPU);
+
+ dev = &npu->devices[i];
+ dev->index = i;
+ dev->npu = npu;
+ dev->dn = link;
+ dev->ob_chiplet = dt_prop_get_u32(link, "ibm,npu-phy");
+ dev->phy_lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
+ dev->proc.status = NPU3_PROC_COMPLETE;
+ };
+
+ path = dt_get_path(dn);
+ NPU3INF(npu, "Found %s\n", path);
+ NPU3INF(npu, "SCOM base: 0x%llx\n", npu->xscom_base);
+ free(path);
+
+ return npu;
+}
+
+struct npu3_dev *npu3_next_dev(struct npu3 *npu, struct npu3_dev *dev,
+ enum npu3_dev_type type)
+{
+ uint32_t i = 0;
+
+ if (dev)
+ i = dev->index + 1;
+
+ for (; i < NPU3_LINKS_PER_NPU; i++) {
+ dev = &npu->devices[i];
+
+ if (dev->type == type || type == NPU3_DEV_TYPE_ANY)
+ return dev;
+ }
+
+ return NULL;
+}
+
+static void npu3_device_detect_fixup(struct npu3_dev *dev)
+{
+ struct dt_node *dn = dev->dn;
+
+ if (dev->type == NPU3_DEV_TYPE_NVLINK) {
+ dt_add_property_strings(dn, "ibm,npu-link-type", "nvlink");
+ dev->link_speed = dt_prop_get_u32_def(
+ dn, "nvidia,link-speed", 0xff);
+ return;
+ }
+
+ NPU3DEVDBG(dev, "Link type unknown\n");
+ dt_add_property_strings(dn, "ibm,npu-link-type", "unknown");
+}
+
+/*
+ * We use the indirect method because it uses the same addresses as
+ * the MMIO offsets (NPU RING)
+ */
+static void npu3_scom_sel(struct npu3 *npu, uint64_t reg, uint64_t size)
+{
+ uint64_t val;
+
+ val = SETFIELD(NPU3_MISC_DA_ADDR, 0ull, reg);
+ val = SETFIELD(NPU3_MISC_DA_LEN, val, size);
+ xscom_write(npu->chip_id,
+ npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_ADDR,
+ val);
+}
+
+static void npu3_scom_write(struct npu3 *npu, uint64_t reg, uint64_t size,
+ uint64_t val)
+{
+ npu3_scom_sel(npu, reg, size);
+ xscom_write(npu->chip_id,
+ npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA,
+ val);
+}
+
+static uint64_t npu3_scom_read(struct npu3 *npu, uint64_t reg, uint64_t size)
+{
+ uint64_t val;
+
+ npu3_scom_sel(npu, reg, size);
+ xscom_read(npu->chip_id,
+ npu->xscom_base + NPU3_MISC_SCOM_IND_SCOM_DATA,
+ &val);
+
+ return val;
+}
+
+void npu3_write(struct npu3 *npu, uint64_t reg, uint64_t val)
+{
+ void *mmio = (void *)npu->regs[0];
+
+ if (mmio)
+ out_be64(mmio + reg, val);
+ else
+ npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_8B, val);
+
+ /* CQ_SM writes should be mirrored in all four blocks */
+ if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0))
+ return;
+
+ for (uint32_t i = 1; i < 4; i++)
+ npu3_write(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg),
+ val);
+}
+
+uint64_t npu3_read(struct npu3 *npu, uint64_t reg)
+{
+ void *mmio = (void *)npu->regs[0];
+
+ if (mmio)
+ return in_be64(mmio + reg);
+
+ return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_8B);
+}
+
+void npu3_write_4b(struct npu3 *npu, uint64_t reg, uint32_t val)
+{
+ void *mmio = (void *)npu->regs[0];
+
+ if (mmio)
+ out_be32(mmio + reg, val);
+ else
+ npu3_scom_write(npu, reg, NPU3_MISC_DA_LEN_4B,
+ (uint64_t)val << 32);
+
+ if (NPU3_REG_BLOCK(reg) != NPU3_BLOCK_CQ_SM(0))
+ return;
+
+ for (uint32_t i = 1; i < 4; i++)
+ npu3_write_4b(npu, NPU3_BLOCK_CQ_SM(i) + NPU3_REG_OFFSET(reg),
+ val);
+}
+
+uint32_t npu3_read_4b(struct npu3 *npu, uint64_t reg)
+{
+ void *mmio = (void *)npu->regs[0];
+
+ if (mmio)
+ return in_be32(mmio + reg);
+
+ return npu3_scom_read(npu, reg, NPU3_MISC_DA_LEN_4B) >> 32;
+}
+
+static void npu3_misc_config(struct npu3 *npu)
+{
+ struct npu3_dev *dev;
+ uint32_t typemap = 0;
+ uint64_t reg, val;
+
+ npu3_for_each_nvlink_dev(dev, npu)
+ typemap |= 0x10 >> dev->index;
+
+ reg = NPU3_MCP_MISC_CFG0;
+ val = npu3_read(npu, reg);
+ val |= NPU3_MCP_MISC_CFG0_ENABLE_PBUS;
+ val &= ~NPU3_MCP_MISC_CFG0_ENABLE_SNARF_CPM;
+ val = SETFIELD(NPU3_MCP_MISC_CFG0_NVLINK_MODE, val, typemap);
+ val = SETFIELD(NPU3_MCP_MISC_CFG0_OCAPI_MODE, val, ~typemap);
+ npu3_write(npu, reg, val);
+
+ reg = NPU3_SNP_MISC_CFG0;
+ val = npu3_read(npu, reg);
+ val |= NPU3_SNP_MISC_CFG0_ENABLE_PBUS;
+ val = SETFIELD(NPU3_SNP_MISC_CFG0_NVLINK_MODE, val, typemap);
+ val = SETFIELD(NPU3_SNP_MISC_CFG0_OCAPI_MODE, val, ~typemap);
+ npu3_write(npu, reg, val);
+
+ reg = NPU3_CTL_MISC_CFG2;
+ val = npu3_read(npu, reg);
+ val = SETFIELD(NPU3_CTL_MISC_CFG2_NVLINK_MODE, val, typemap);
+ val = SETFIELD(NPU3_CTL_MISC_CFG2_OCAPI_MODE, val, ~typemap);
+ npu3_write(npu, reg, val);
+
+ reg = NPU3_DAT_MISC_CFG1;
+ val = npu3_read(npu, reg);
+ val = SETFIELD(NPU3_DAT_MISC_CFG1_NVLINK_MODE, val, typemap);
+ val = SETFIELD(NPU3_DAT_MISC_CFG1_OCAPI_MODE, val, ~typemap);
+ npu3_write(npu, reg, val);
+}
+
+static void npu3_assign_bars(struct npu3 *npu)
+{
+ struct npu3_dev *dev;
+ uint64_t addr, size, val;
+
+ /* Global MMIO bar (per npu) */
+ phys_map_get(npu->chip_id, NPU_REGS, npu->index, &addr, &size);
+ val = SETFIELD(NPU3_MMIO_BAR_ADDR, 0ull, addr >> 24);
+ val |= NPU3_MMIO_BAR_ENABLE;
+ npu3_write(npu, NPU3_MMIO_BAR, val);
+
+ NPU3INF(npu, "MMIO base: 0x%016llx (%lldMB)\n", addr, size >> 20);
+ npu->regs[0] = addr;
+ npu->regs[1] = size;
+
+ /* NTL bar (per device) */
+ npu3_for_each_dev(dev, npu) {
+ phys_map_get(npu->chip_id, NPU_NTL, npu3_chip_dev_index(dev),
+ &addr, &size);
+ val = SETFIELD(NPU3_NTL_BAR_ADDR, 0ull, addr >> 16);
+ val = SETFIELD(NPU3_NTL_BAR_SIZE, val, ilog2(size >> 16));
+ npu3_write(npu, NPU3_NTL_BAR(dev->index), val);
+
+ dev->ntl_bar.addr = addr;
+ dev->ntl_bar.size = size;
+ }
+
+ /* GENID bar (logically divided per device) */
+ phys_map_get(npu->chip_id, NPU_GENID, npu->index, &addr, NULL);
+ val = SETFIELD(NPU3_GENID_BAR_ADDR, 0ull, addr >> 19);
+ npu3_write(npu, NPU3_GENID_BAR, val);
+
+ npu3_for_each_dev(dev, npu) {
+ dev->genid_bar.addr = addr + (dev->index << 16);
+ dev->genid_bar.size = 64 << 10;
+ }
+}
+
+void npu3_dev_enable_bars(struct npu3_dev *dev, bool enable)
+{
+ struct npu3 *npu = dev->npu;
+ uint64_t reg, val;
+
+ if (dev->ntl_bar.enable == enable) /* No state change */
+ return;
+
+ dev->ntl_bar.enable = enable;
+ dev->genid_bar.enable = enable;
+
+ reg = NPU3_NTL_BAR(dev->index);
+ val = npu3_read(npu, reg);
+ val = SETFIELD(NPU3_NTL_BAR_ENABLE, val, enable);
+ npu3_write(npu, reg, val);
+
+ /*
+ * Generation IDs are a single space in the hardware but we split them
+ * per device. Only disable in hardware if every device has disabled.
+ */
+ if (!enable)
+ npu3_for_each_dev(dev, npu)
+ if (dev->genid_bar.enable)
+ return;
+
+ reg = NPU3_GENID_BAR;
+ val = npu3_read(npu, reg);
+ val = SETFIELD(NPU3_GENID_BAR_ENABLE, val, enable);
+ npu3_write(npu, reg, val);
+}
+
+static uint64_t npu3_ipi_attributes(struct irq_source *is, uint32_t isn)
+{
+ struct npu3 *npu = is->data;
+ uint32_t level = isn - npu->irq_base;
+
+ /* TCE interrupt is used to detect a frozen PE */
+ if (level == 18)
+ return IRQ_ATTR_TARGET_OPAL |
+ IRQ_ATTR_TARGET_RARE |
+ IRQ_ATTR_TYPE_MSI;
+
+ return IRQ_ATTR_TARGET_LINUX;
+}
+
+static void npu3_ipi_interrupt(struct irq_source *is, uint32_t isn)
+{
+ struct npu3 *npu = is->data;
+ uint32_t level = isn - npu->irq_base;
+
+ if (level != 18) {
+ NPU3ERR(npu, "Received unknown interrupt %d\n", level);
+ return;
+ }
+
+ opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR);
+}
+
+#define NPU3_IRQ_LEVELS 60
+
+static char *npu3_ipi_name(struct irq_source *is, uint32_t isn)
+{
+ struct npu3 *npu = is->data;
+ uint32_t level = isn - npu->irq_base;
+ static const char *names[NPU3_IRQ_LEVELS] = {
+ [0] = "NDL 0 Stall Event (brick 0)",
+ [1] = "NDL 0 No-Stall Event (brick 0)",
+ [2] = "NDL 1 Stall Event (brick 1)",
+ [3] = "NDL 1 No-Stall Event (brick 1)",
+ [4] = "NDL 2 Stall Event (brick 2)",
+ [5] = "NDL 2 No-Stall Event (brick 2)",
+ [6] = "NDL 3 Stall Event (brick 3)",
+ [7] = "NDL 3 No-Stall Event (brick 3)",
+ [8] = "NDL 4 Stall Event (brick 4)",
+ [9] = "NDL 4 No-Stall Event (brick 4)",
+ [10] = "NDL 5 Stall Event (brick 5)",
+ [11] = "NDL 5 No-Stall Event (brick 5)",
+ [12] = "NTL 0 Event",
+ [13] = "NTL 1 Event",
+ [14] = "NTL 2 Event",
+ [15] = "NTL 3 Event",
+ [16] = "NTL 4 Event",
+ [17] = "NTL 5 Event",
+ [18] = "TCE Event",
+ [19] = "ATS Event",
+ [20] = "CQ Event",
+ [21] = "MISC Event",
+ [41] = "Memory Controller Event",
+ [42] = "NDL 6 Stall Event (brick 6)",
+ [43] = "NDL 6 No-Stall Event (brick 6)",
+ [44] = "NDL 7 Stall Event (brick 7)",
+ [45] = "NDL 7 No-Stall Event (brick 7)",
+ [46] = "NDL 8 Stall Event (brick 8)",
+ [47] = "NDL 8 No-Stall Event (brick 8)",
+ [48] = "NDL 9 Stall Event (brick 9)",
+ [49] = "NDL 9 No-Stall Event (brick 9)",
+ [50] = "NDL 10 Stall Event (brick 10)",
+ [51] = "NDL 10 No-Stall Event (brick 10)",
+ [52] = "NDL 11 Stall Event (brick 11)",
+ [53] = "NDL 11 No-Stall Event (brick 11)",
+ [54] = "NTL 6 Event",
+ [55] = "NTL 7 Event",
+ [56] = "NTL 8 Event",
+ [57] = "NTL 9 Event",
+ [58] = "NTL 10 Event",
+ [59] = "NTL 11 Event",
+ };
+
+ if (level >= NPU3_IRQ_LEVELS || !names[level])
+ return strdup("Unknown");
+
+ return strdup(names[level]);
+}
+
+static const struct irq_source_ops npu3_ipi_ops = {
+ .attributes = npu3_ipi_attributes,
+ .interrupt = npu3_ipi_interrupt,
+ .name = npu3_ipi_name,
+};
+
+static void npu3_setup_irqs(struct npu3 *npu)
+{
+ uint64_t reg, val;
+ uint32_t base;
+
+ base = xive_alloc_ipi_irqs(npu->chip_id, NPU3_IRQ_LEVELS, 64);
+ if (base == XIVE_IRQ_ERROR) {
+ NPU3ERR(npu, "Failed to allocate interrupt sources\n");
+ return;
+ }
+
+ xive_register_ipi_source(base, NPU3_IRQ_LEVELS, npu, &npu3_ipi_ops);
+
+ /* Set IPI configuration */
+ reg = NPU3_MISC_CFG;
+ val = npu3_read(npu, reg);
+ val = SETFIELD(NPU3_MISC_CFG_IPI_PS, val, NPU3_MISC_CFG_IPI_PS_64K);
+ val = SETFIELD(NPU3_MISC_CFG_IPI_OS, val, NPU3_MISC_CFG_IPI_OS_AIX);
+ npu3_write(npu, reg, val);
+
+ /* Set IRQ base */
+ reg = NPU3_MISC_INT_BAR;
+ val = SETFIELD(NPU3_MISC_INT_BAR_ADDR, 0ull,
+ (uint64_t)xive_get_trigger_port(base) >> 12);
+ npu3_write(npu, reg, val);
+
+ npu->irq_base = base;
+}
+
+static void npu3_init(struct npu3 *npu)
+{
+ struct npu3_dev *dev;
+
+ platform.npu3_device_detect(npu);
+ npu3_for_each_dev(dev, npu)
+ npu3_device_detect_fixup(dev);
+
+ npu3_misc_config(npu);
+ npu3_assign_bars(npu);
+ npu3_setup_irqs(npu);
+ npu3_init_nvlink(npu);
+}
+
+void probe_npu3(void)
+{
+ struct dt_node *dn;
+ struct npu3 *npu;
+
+ if (!npu3_dt_create())
+ return;
+
+ if (!platform.npu3_device_detect) {
+ prlog(PR_INFO, "NPU: Platform does not support NPU\n");
+ return;
+ }
+
+ dt_for_each_compatible(dt_root, dn, "ibm,power9-npu3") {
+ npu = npu3_create(dn);
+ npu3_init(npu);
+ }
+}