aboutsummaryrefslogtreecommitdiffstats
path: root/roms/skiboot/platforms/astbmc/witherspoon.c
diff options
context:
space:
mode:
Diffstat (limited to 'roms/skiboot/platforms/astbmc/witherspoon.c')
-rw-r--r--roms/skiboot/platforms/astbmc/witherspoon.c604
1 files changed, 604 insertions, 0 deletions
diff --git a/roms/skiboot/platforms/astbmc/witherspoon.c b/roms/skiboot/platforms/astbmc/witherspoon.c
new file mode 100644
index 000000000..67c24b532
--- /dev/null
+++ b/roms/skiboot/platforms/astbmc/witherspoon.c
@@ -0,0 +1,604 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/* Copyright 2017-2019 IBM Corp. */
+
+#include <skiboot.h>
+#include <device.h>
+#include <console.h>
+#include <chip.h>
+#include <ipmi.h>
+#include <psi.h>
+#include <npu-regs.h>
+#include <xscom.h>
+#include <xscom-p9-regs.h>
+#include <timebase.h>
+#include <pci.h>
+#include <pci-slot.h>
+#include <phb4.h>
+#include <npu2.h>
+#include <occ.h>
+#include <i2c.h>
+#include <secvar.h>
+
+#include "astbmc.h"
+#include "ast.h"
+
+static enum {
+ WITHERSPOON_TYPE_UNKNOWN,
+ WITHERSPOON_TYPE_SEQUOIA,
+ WITHERSPOON_TYPE_REDBUD
+} witherspoon_type;
+
+/*
+ * HACK: Hostboot doesn't export the correct data for the system VPD EEPROM
+ * for this system. So we need to work around it here.
+ */
+static void vpd_dt_fixup(void)
+{
+ struct dt_node *n = dt_find_by_path(dt_root,
+ "/xscom@603fc00000000/i2cm@a2000/i2c-bus@0/eeprom@50");
+
+ if (n) {
+ dt_check_del_prop(n, "compatible");
+ dt_add_property_string(n, "compatible", "atmel,24c512");
+
+ dt_check_del_prop(n, "label");
+ dt_add_property_string(n, "label", "system-vpd");
+ }
+}
+
+static void witherspoon_create_ocapi_i2c_bus(void)
+{
+ struct dt_node *xscom, *i2cm, *i2c_bus;
+ prlog(PR_DEBUG, "OCAPI: Adding I2C bus device node for OCAPI reset\n");
+ dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
+ i2cm = dt_find_by_name(xscom, "i2cm@a1000");
+ if (!i2cm) {
+ prlog(PR_ERR, "OCAPI: Failed to add I2C bus device node\n");
+ continue;
+ }
+
+ if (dt_find_by_name(i2cm, "i2c-bus@4"))
+ continue;
+
+ i2c_bus = dt_new_addr(i2cm, "i2c-bus", 4);
+ dt_add_property_cells(i2c_bus, "reg", 4);
+ dt_add_property_cells(i2c_bus, "bus-frequency", 0x61a80);
+ dt_add_property_strings(i2c_bus, "compatible",
+ "ibm,opal-i2c", "ibm,power8-i2c-port",
+ "ibm,power9-i2c-port");
+ }
+}
+
+static bool witherspoon_probe(void)
+{
+ struct dt_node *np;
+ int highest_gpu_group_id = 0;
+ int gpu_group_id;
+
+ if (!dt_node_is_compatible(dt_root, "ibm,witherspoon"))
+ return false;
+
+ /* Lot of common early inits here */
+ astbmc_early_init();
+
+ /* Setup UART for use by OPAL (Linux hvc) */
+ uart_set_console_policy(UART_CONSOLE_OPAL);
+
+ vpd_dt_fixup();
+
+ witherspoon_create_ocapi_i2c_bus();
+
+ dt_for_each_compatible(dt_root, np, "ibm,npu-link") {
+ gpu_group_id = dt_prop_get_u32(np, "ibm,npu-group-id");
+ if (gpu_group_id > highest_gpu_group_id)
+ highest_gpu_group_id = gpu_group_id;
+ };
+
+ switch (highest_gpu_group_id) {
+ case 1:
+ witherspoon_type = WITHERSPOON_TYPE_REDBUD;
+ break;
+ case 2:
+ witherspoon_type = WITHERSPOON_TYPE_SEQUOIA;
+ break;
+ default:
+ witherspoon_type = WITHERSPOON_TYPE_UNKNOWN;
+ prlog(PR_NOTICE, "PLAT: Unknown Witherspoon variant detected\n");
+ }
+
+ return true;
+}
+
+static void phb4_activate_shared_slot_witherspoon(struct proc_chip *chip)
+{
+ uint64_t val;
+
+ /*
+ * Shared slot activation is done by raising a GPIO line on the
+ * chip with the secondary slot. It will somehow activate the
+ * sideband signals between the slots.
+ * Need to wait 100us for stability.
+ */
+ xscom_read(chip->id, P9_GPIO_DATA_OUT_ENABLE, &val);
+ val |= PPC_BIT(2);
+ xscom_write(chip->id, P9_GPIO_DATA_OUT_ENABLE, val);
+
+ xscom_read(chip->id, P9_GPIO_DATA_OUT, &val);
+ val |= PPC_BIT(2);
+ xscom_write(chip->id, P9_GPIO_DATA_OUT, val);
+ time_wait_us(100);
+ prlog(PR_INFO, "Shared PCI slot activated\n");
+}
+
+static void witherspoon_shared_slot_fixup(void)
+{
+ struct pci_slot *slot0, *slot1;
+ struct proc_chip *chip0, *chip1;
+ uint8_t p0 = 0, p1 = 0;
+
+ /*
+ * Detect if a x16 card is present on the shared slot and
+ * do some extra configuration if it is.
+ *
+ * The shared slot, a.k.a "Slot 2" in the documentation, is
+ * connected to PEC2 phb index 3 on both chips. From skiboot,
+ * it looks like two x8 slots, each with its own presence bit.
+ *
+ * Here is the matrix of possibilities for the presence bits:
+ *
+ * slot0 presence slot1 presence
+ * 0 0 => no card
+ * 1 0 => x8 or less card detected
+ * 1 1 => x16 card detected
+ * 0 1 => invalid combination
+ *
+ * We only act if a x16 card is detected ('1 1' combination above).
+ *
+ * One issue is that we don't really know if it is a
+ * shared-slot-compatible card (such as Mellanox CX5) or
+ * a 'normal' x16 PCI card. We activate the shared slot in both cases,
+ * as it doesn't seem to hurt.
+ *
+ * If the card is a normal x16 PCI card, the link won't train on the
+ * second slot (nothing to do with the shared slot activation), the
+ * procedure will timeout, thus adding some delay to the boot time.
+ * Therefore the recommendation is that we shouldn't use a normal
+ * x16 card on the shared slot of a witherspoon.
+ *
+ * Plugging a x8 or less adapter on the shared slot should work
+ * like any other physical slot.
+ */
+ chip0 = next_chip(NULL);
+ chip1 = next_chip(chip0);
+ if (!chip1 || next_chip(chip1)) {
+ prlog(PR_WARNING,
+ "PLAT: Can't find second chip, "
+ "skipping PCIe shared slot detection\n");
+ return;
+ }
+
+ /* the shared slot is connected to PHB3 on both chips */
+ slot0 = pci_slot_find(phb4_get_opal_id(chip0->id, 3));
+ slot1 = pci_slot_find(phb4_get_opal_id(chip1->id, 3));
+ if (slot0 && slot1) {
+ if (slot0->ops.get_presence_state)
+ slot0->ops.get_presence_state(slot0, &p0);
+ if (slot1->ops.get_presence_state)
+ slot1->ops.get_presence_state(slot1, &p1);
+ if (p0 == 1 && p1 == 1) {
+ phb4_activate_shared_slot_witherspoon(chip1);
+ slot0->peer_slot = slot1;
+ slot1->peer_slot = slot0;
+ }
+ }
+}
+
+static int check_mlx_cards(struct phb *phb __unused, struct pci_device *dev,
+ void *userdata __unused)
+{
+ uint16_t mlx_cards[] = {
+ 0x1017, /* ConnectX-5 */
+ 0x1019, /* ConnectX-5 Ex */
+ 0x101b, /* ConnectX-6 */
+ 0x101d, /* ConnectX-6 Dx */
+ 0x101f, /* ConnectX-6 Lx */
+ 0x1021, /* ConnectX-7 */
+ };
+
+ if (PCI_VENDOR_ID(dev->vdid) == 0x15b3) { /* Mellanox */
+ for (int i = 0; i < ARRAY_SIZE(mlx_cards); i++) {
+ if (mlx_cards[i] == PCI_DEVICE_ID(dev->vdid))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void witherspoon_pci_probe_complete(void)
+{
+ struct pci_device *dev;
+ struct phb *phb;
+ struct phb4 *p;
+
+ /*
+ * Reallocate dma engines between stacks in PEC2 if a Mellanox
+ * card is found on the shared slot, as it is required to get
+ * good GPU direct performance.
+ */
+ for_each_phb(phb) {
+ /* skip the virtual PHBs */
+ if (phb->phb_type != phb_type_pcie_v4)
+ continue;
+ p = phb_to_phb4(phb);
+ /* Keep only the first PHB on PEC2 */
+ if (p->index != 3)
+ continue;
+ dev = pci_walk_dev(phb, NULL, check_mlx_cards, NULL);
+ if (dev)
+ phb4_pec2_dma_engine_realloc(p);
+ }
+}
+
+static void set_link_details(struct npu2 *npu, uint32_t link_index,
+ uint32_t brick_index, enum npu2_dev_type type)
+{
+ struct npu2_dev *dev = NULL;
+ for (int i = 0; i < npu->total_devices; i++) {
+ if (npu->devices[i].link_index == link_index) {
+ dev = &npu->devices[i];
+ break;
+ }
+ }
+ if (!dev) {
+ prlog(PR_ERR, "PLAT: Could not find NPU link index %d\n",
+ link_index);
+ return;
+ }
+ dev->brick_index = brick_index;
+ dev->type = type;
+}
+
+static void witherspoon_npu2_device_detect(struct npu2 *npu)
+{
+ struct proc_chip *chip;
+ uint8_t state;
+ uint64_t i2c_port_id = 0;
+ char port_name[17];
+ struct dt_node *dn;
+ int rc;
+
+ bool gpu0_present, gpu1_present;
+
+ if (witherspoon_type != WITHERSPOON_TYPE_REDBUD) {
+ prlog(PR_DEBUG, "PLAT: Setting all NPU links to NVLink, OpenCAPI only supported on Redbud\n");
+ for (int i = 0; i < npu->total_devices; i++) {
+ npu->devices[i].type = NPU2_DEV_TYPE_NVLINK;
+ }
+ return;
+ }
+ assert(npu->total_devices == 6);
+
+ chip = get_chip(npu->chip_id);
+
+ /* Find I2C port */
+ snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
+ chip->id, platform.ocapi->i2c_engine,
+ platform.ocapi->i2c_port);
+ dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") {
+ if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) {
+ i2c_port_id = dt_prop_get_u32(dn, "ibm,opal-id");
+ break;
+ }
+ }
+
+ if (!i2c_port_id) {
+ prlog(PR_ERR, "PLAT: Could not find NPU presence I2C port\n");
+ return;
+ }
+
+ gpu0_present = occ_get_gpu_presence(chip, 0);
+ if (gpu0_present) {
+ prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
+ }
+
+ gpu1_present = occ_get_gpu_presence(chip, 1);
+ if (gpu1_present) {
+ prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
+ }
+
+ /*
+ * The following I2C ops generate errors if no device is
+ * present on any SXM2 slot. Since it's useless, let's skip it
+ */
+ if (!gpu0_present && !gpu1_present)
+ return;
+
+ /* Set pins to input */
+ state = 0xff;
+ rc = i2c_request_send(i2c_port_id,
+ platform.ocapi->i2c_presence_addr, SMBUS_WRITE, 3,
+ 1, &state, 1, 120);
+ if (rc)
+ goto i2c_failed;
+
+ /* Read the presence value */
+ state = 0x00;
+ rc = i2c_request_send(i2c_port_id,
+ platform.ocapi->i2c_presence_addr, SMBUS_READ, 0,
+ 1, &state, 1, 120);
+ if (rc)
+ goto i2c_failed;
+
+ if (gpu0_present) {
+ if (state & (1 << 0)) {
+ prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is OpenCAPI\n",
+ chip->id);
+ /*
+ * On witherspoon, bricks 2 and 3 are connected to
+ * the lanes matching links 0 and 1 in OpenCAPI mode.
+ */
+ set_link_details(npu, 1, 3, NPU2_DEV_TYPE_OPENCAPI);
+ /* We current don't support using the second link */
+ set_link_details(npu, 0, 2, NPU2_DEV_TYPE_UNKNOWN);
+ } else {
+ prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is NVLink\n",
+ chip->id);
+ set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
+ set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
+ set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
+ }
+ }
+
+ if (gpu1_present) {
+ if (state & (1 << 1)) {
+ prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is OpenCAPI\n",
+ chip->id);
+ set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
+ /* We current don't support using the second link */
+ set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
+ } else {
+ prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is NVLink\n",
+ chip->id);
+ set_link_details(npu, 3, 3, NPU2_DEV_TYPE_NVLINK);
+ set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
+ set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
+ }
+ }
+
+ return;
+
+i2c_failed:
+ prlog(PR_ERR, "PLAT: NPU device type detection failed, rc=%d\n", rc);
+ return;
+}
+
+static const char *witherspoon_ocapi_slot_label(uint32_t chip_id,
+ uint32_t brick_index)
+{
+ const char *name = NULL;
+
+ if (chip_id == 0) {
+ if (brick_index == 3)
+ name = "OPENCAPI-GPU0";
+ else if (brick_index == 4)
+ name = "OPENCAPI-GPU1";
+ } else {
+ if (brick_index == 3)
+ name = "OPENCAPI-GPU3";
+ else if (brick_index == 4)
+ name = "OPENCAPI-GPU4";
+ }
+ return name;
+}
+
+static const struct platform_ocapi witherspoon_ocapi = {
+ .i2c_engine = 1,
+ .i2c_port = 4,
+ .odl_phy_swap = false,
+ .i2c_reset_addr = 0x20,
+ /*
+ * Witherspoon uses SXM2 connectors, carrying 2 OCAPI links
+ * over a single connector - hence each pair of bricks shares
+ * the same pin for resets. We currently only support using
+ * bricks 3 and 4, among other reasons because we can't handle
+ * a reset on one link causing the other link to reset as
+ * well.
+ */
+ .i2c_reset_brick2 = 1 << 0,
+ .i2c_reset_brick3 = 1 << 0,
+ .i2c_reset_brick4 = 1 << 1,
+ .i2c_reset_brick5 = 1 << 1,
+ .i2c_presence_addr = 0x20,
+ /* unused, we do this in custom presence detect */
+ .i2c_presence_brick2 = 0,
+ .i2c_presence_brick3 = 0,
+ .i2c_presence_brick4 = 0,
+ .i2c_presence_brick5 = 0,
+ .ocapi_slot_label = witherspoon_ocapi_slot_label,
+};
+
+static int gpu_slot_to_num(const char *slot)
+{
+ char *p = NULL;
+ int ret;
+
+ if (!slot)
+ return -1;
+
+ if (memcmp(slot, "GPU", 3))
+ return -1;
+
+ ret = strtol(slot + 3, &p, 10);
+ if (*p || p == slot + 3)
+ return -1;
+
+ return ret;
+}
+
+static void npu2_phb_nvlink_dt(struct phb *npuphb)
+{
+ struct dt_node *g[3] = { NULL }; /* Current maximum 3 GPUs per 1 NPU */
+ struct dt_node *n[6] = { NULL };
+ int max_gpus, i, gpuid, first, last;
+ struct npu2 *npu2_phb = phb_to_npu2_nvlink(npuphb);
+ struct pci_device *npd;
+
+ switch (witherspoon_type) {
+ case WITHERSPOON_TYPE_REDBUD:
+ max_gpus = 4;
+ break;
+ case WITHERSPOON_TYPE_SEQUOIA:
+ max_gpus = 6;
+ break;
+ default:
+ /* witherspoon_probe() already reported missing support */
+ return;
+ }
+
+ /* Find the indexes of GPUs connected to this NPU */
+ for (i = 0, first = max_gpus, last = 0; i < npu2_phb->total_devices;
+ ++i) {
+ gpuid = gpu_slot_to_num(npu2_phb->devices[i].nvlink.slot_label);
+ if (gpuid < 0)
+ continue;
+ if (gpuid > last)
+ last = gpuid;
+ if (gpuid < first)
+ first = gpuid;
+ }
+
+ /* Either no "GPUx" slots found or they are not consecutive, abort */
+ if (!last || last + 1 - first > max_gpus)
+ return;
+
+ /* Collect GPU device nodes, sorted by an index from "GPUn" */
+ for (i = 0; i < npu2_phb->total_devices; ++i) {
+ gpuid = gpu_slot_to_num(npu2_phb->devices[i].nvlink.slot_label);
+ g[gpuid - first] = npu2_phb->devices[i].nvlink.pd->dn;
+
+ /* Collect NVLink bridge nodes too, for their phandles */
+ list_for_each(&npuphb->devices, npd, link) {
+ if (npd->bdfn == npu2_phb->devices[i].bdfn) {
+ assert(npu2_phb->devices[i].brick_index <
+ ARRAY_SIZE(n));
+ n[npu2_phb->devices[i].brick_index] = npd->dn;
+ }
+ }
+ }
+
+ /*
+ * Store interconnect phandles in the device tree.
+ * The mapping is from Witherspoon_Design_Workbook_v1.7_19June2018.pdf,
+ * pages 39 (Sequoia), 40 (Redbud):
+ * Figure 16: NVLink wiring diagram for planar with 6 GPUs
+ * Figure 17: NVLink wiring diagram for planar with 4 GPUs
+ */
+#define PEERPH(g) ((g)?(g)->phandle:0)
+ switch (witherspoon_type) {
+ case WITHERSPOON_TYPE_REDBUD:
+ if (g[0])
+ dt_add_property_cells(g[0], "ibm,nvlink-peers",
+ PEERPH(g[1]), PEERPH(n[0]),
+ PEERPH(g[1]), PEERPH(n[1]),
+ PEERPH(g[1]), PEERPH(n[2]));
+ if (g[1])
+ dt_add_property_cells(g[1], "ibm,nvlink-peers",
+ PEERPH(g[0]), PEERPH(n[3]),
+ PEERPH(g[0]), PEERPH(n[4]),
+ PEERPH(g[0]), PEERPH(n[5]));
+ break;
+ case WITHERSPOON_TYPE_SEQUOIA:
+ if (g[0])
+ dt_add_property_cells(g[0], "ibm,nvlink-peers",
+ PEERPH(g[1]), PEERPH(n[0]),
+ PEERPH(g[2]), PEERPH(g[2]),
+ PEERPH(g[1]), PEERPH(n[1]));
+ if (g[1])
+ dt_add_property_cells(g[1], "ibm,nvlink-peers",
+ PEERPH(g[0]), PEERPH(n[2]),
+ PEERPH(g[2]), PEERPH(g[2]),
+ PEERPH(g[0]), PEERPH(n[3]));
+ if (g[2])
+ dt_add_property_cells(g[2], "ibm,nvlink-peers",
+ PEERPH(g[1]), PEERPH(g[0]),
+ PEERPH(g[1]), PEERPH(n[4]),
+ PEERPH(g[0]), PEERPH(n[5]));
+ break;
+ default:
+ break;
+ }
+}
+
+static void witherspoon_finalise_dt(bool is_reboot)
+{
+ struct dt_node *np;
+ struct proc_chip *c;
+
+ if (is_reboot)
+ return;
+
+ dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex") {
+ u32 opal_id = dt_prop_get_cell(np, "ibm,opal-phbid", 1);
+ struct phb *npphb = pci_get_phb(opal_id);
+
+ if (!npphb)
+ continue;
+ if (npphb->phb_type != phb_type_npu_v2)
+ continue;
+ npu2_phb_nvlink_dt(npphb);
+ }
+
+ /*
+ * The I2C bus on used to talk to the GPUs has a 750K pullup
+ * which is way too big. If there's no GPUs connected to the
+ * chip all I2C transactions fail with an Arb loss error since
+ * SCL/SDA don't return to the idle state fast enough. Disable
+ * the port to squash the errors.
+ */
+ for (c = next_chip(NULL); c; c = next_chip(c)) {
+ bool detected = false;
+ int i;
+
+ np = dt_find_by_path(c->devnode, "i2cm@a1000/i2c-bus@4");
+ if (!np)
+ continue;
+
+ for (i = 0; i < 3; i++)
+ detected |= occ_get_gpu_presence(c, i);
+
+ if (!detected) {
+ dt_check_del_prop(np, "status");
+ dt_add_property_string(np, "status", "disabled");
+ }
+ }
+}
+
+static int witherspoon_secvar_init(void)
+{
+ return secvar_main(secboot_tpm_driver, edk2_compatible_v1);
+}
+
+/* The only difference between these is the PCI slot handling */
+
+DECLARE_PLATFORM(witherspoon) = {
+ .name = "Witherspoon",
+ .probe = witherspoon_probe,
+ .init = astbmc_init,
+ .pre_pci_fixup = witherspoon_shared_slot_fixup,
+ .pci_probe_complete = witherspoon_pci_probe_complete,
+ .start_preload_resource = flash_start_preload_resource,
+ .resource_loaded = flash_resource_loaded,
+ .bmc = &bmc_plat_ast2500_openbmc,
+ .cec_power_down = astbmc_ipmi_power_down,
+ .cec_reboot = astbmc_ipmi_reboot,
+ .elog_commit = ipmi_elog_commit,
+ .finalise_dt = witherspoon_finalise_dt,
+ .exit = astbmc_exit,
+ .terminate = ipmi_terminate,
+
+ .pci_get_slot_info = dt_slot_get_slot_info,
+ .ocapi = &witherspoon_ocapi,
+ .npu2_device_detect = witherspoon_npu2_device_detect,
+ .op_display = op_display_lpc,
+ .secvar_init = witherspoon_secvar_init,
+};