aboutsummaryrefslogtreecommitdiffstats
path: root/roms/skiboot/hw/centaur.c
diff options
context:
space:
mode:
authorAngelos Mouzakitis <a.mouzakitis@virtualopensystems.com>2023-10-10 14:33:42 +0000
committerAngelos Mouzakitis <a.mouzakitis@virtualopensystems.com>2023-10-10 14:33:42 +0000
commitaf1a266670d040d2f4083ff309d732d648afba2a (patch)
tree2fc46203448ddcc6f81546d379abfaeb323575e9 /roms/skiboot/hw/centaur.c
parente02cda008591317b1625707ff8e115a4841aa889 (diff)
Add submodule dependency filesHEADmaster
Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec
Diffstat (limited to 'roms/skiboot/hw/centaur.c')
-rw-r--r--roms/skiboot/hw/centaur.c555
1 files changed, 555 insertions, 0 deletions
diff --git a/roms/skiboot/hw/centaur.c b/roms/skiboot/hw/centaur.c
new file mode 100644
index 000000000..e9ff4197f
--- /dev/null
+++ b/roms/skiboot/hw/centaur.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Centaur memory buffer chip
+ *
+ * Copyright 2013-2017 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <xscom.h>
+#include <processor.h>
+#include <device.h>
+#include <chip.h>
+#include <centaur.h>
+#include <lock.h>
+#include <fsi-master.h>
+#include <timebase.h>
+
+/*
+ * Centaur chip IDs are using the XSCOM "partID" encoding
+ * described in xscom.h. recap:
+ *
+ * 0b1000.0000.0000.0000.0000.00NN.NCCC.MMMM
+ * N=Node, C=Chip, M=Memory Channel
+ *
+ * We currently use FSI exclusively for centaur access. We can
+ * start using MMIO on Centaur DD2.x when we have a way to handle
+ * machine checks happening inside Sapphire which we don't at the
+ * moment.
+ */
+
+/* Is that correct ? */
+#define MAX_CENTAURS_PER_CHIP 8
+
+/* Mark the centaur offline after this many consecutive errors */
+#define CENTAUR_ERR_OFFLINE_THRESHOLD 10
+
+/*
+ * FSI2PIB register definitions (this could be moved out if we were to
+ * support FSI master to other chips.
+ */
+#define FSI_DATA0_REG 0x1000
+#define FSI_DATA1_REG 0x1004
+#define FSI_CMD_REG 0x1008
+#define FSI_CMD_WR 0x80000000
+#define FSI_CMD_RD 0x00000000
+#define FSI_ENG_RESET_REG 0x1018
+#define FSI_STATUS_REG 0x101c
+#define FSI_STATUS_ABORT 0x00100000
+#define FSI_STATUS_ERRORS 0x00007000
+
+/* Some Centaur XSCOMs we care about */
+#define SCAC_CONFIG_REG 0x020115ce
+#define SCAC_CONFIG_SET 0x020115cf
+#define SCAC_CONFIG_CLR 0x020115d0
+#define SCAC_ENABLE_MSK PPC_BIT(0)
+
+#define cent_log(__lev, __c, __fmt, ...) \
+ prlog(__lev, "CENTAUR %x: " __fmt, __c->part_id, ##__VA_ARGS__)
+
+static int64_t centaur_fsiscom_complete(struct centaur_chip *centaur)
+{
+ int64_t rc;
+ uint32_t stat;
+
+ rc = mfsi_read(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
+ centaur->fsi_master_port, FSI_STATUS_REG, &stat);
+ if (rc) {
+ cent_log(PR_ERR, centaur, "MFSI read error %lld reading STAT\n", rc);
+ return rc;
+ }
+ if ((stat & (FSI_STATUS_ABORT | FSI_STATUS_ERRORS)) == 0)
+ return OPAL_SUCCESS;
+
+ cent_log(PR_ERR, centaur, "Remote FSI SCOM error, status=0x%08x\n", stat);
+
+ /* All 1's ? Assume it's gone */
+ if (stat == 0xffffffffu) {
+ cent_log(PR_ERR, centaur, "Chip appears to be dead !\n");
+ centaur->valid = false;
+
+ /* Here, hostboot grabs a pile of FFDC from the FSI layer,
+ * we could do that too ...
+ */
+ return OPAL_HARDWARE;
+ }
+
+ /* Here HB prints the GPx registers which I believe are only
+ * in the host (FSI master). We skip that for now, we don't have
+ * a good API to them
+ */
+
+ /* Recovery sequence from HostBoot fsiscom.C
+ * if SCOM fails and FSI Master displays "MasterTimeOut"
+ * then 7,6 <covered by FSI driver>
+ * else if SCOM fails and FSI2PIB Status shows PIB abort
+ * then just perform unit reset (6) and wait 1 ms
+ * else (PIB_abort='0' but PIB error is unequal 0)
+ * then just perform unit reset (6) (wait not needed).
+ *
+ * Note: Waiting 1ms inside OPAL is a BIG NO NO !!! We have
+ * no choice but doing it at the moment but that will have
+ * to be fixed one way or another, possibly by returning some
+ * kind of busy status until the delay is expired.
+ */
+ rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
+ centaur->fsi_master_port, FSI_ENG_RESET_REG, 0);
+ if (rc) {
+ cent_log(PR_ERR, centaur, "MFSI write error %lld resetting SCOM engine\n",
+ rc);
+ }
+ return OPAL_HARDWARE;
+}
+
+static int64_t centaur_fsiscom_read(struct centaur_chip *centaur, uint32_t pcb_addr,
+ uint64_t *val)
+{
+ int64_t rc;
+ uint32_t data0, data1;
+
+ rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
+ centaur->fsi_master_port, FSI_CMD_REG, pcb_addr | FSI_CMD_RD);
+ if (rc) {
+ cent_log(PR_ERR, centaur, "MFSI write error %lld writing CMD\n", rc);
+ return rc;
+ }
+
+ rc = centaur_fsiscom_complete(centaur);
+ if (rc)
+ return rc;
+
+ rc = mfsi_read(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
+ centaur->fsi_master_port, FSI_DATA0_REG, &data0);
+ if (rc) {
+ cent_log(PR_ERR, centaur, "MFSI read error %lld reading DATA0\n", rc);
+ return rc;
+ }
+ rc = mfsi_read(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
+ centaur->fsi_master_port, FSI_DATA1_REG, &data1);
+ if (rc) {
+ cent_log(PR_ERR, centaur, "MFSI read error %lld readking DATA1\n", rc);
+ return rc;
+ }
+
+ *val = (((uint64_t)data0) << 32) | data1;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t centaur_fsiscom_write(struct centaur_chip *centaur, uint32_t pcb_addr,
+ uint64_t val)
+{
+ int64_t rc;
+
+ rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
+ centaur->fsi_master_port, FSI_DATA0_REG, hi32(val));
+ if (rc) {
+ cent_log(PR_ERR, centaur, "MFSI write error %lld writing DATA0\n", rc);
+ return rc;
+ }
+ rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
+ centaur->fsi_master_port, FSI_DATA1_REG, lo32(val));
+ if (rc) {
+ cent_log(PR_ERR, centaur, "MFSI write error %lld writing DATA1\n", rc);
+ return rc;
+ }
+ rc = mfsi_write(centaur->fsi_master_chip_id, centaur->fsi_master_engine,
+ centaur->fsi_master_port, FSI_CMD_REG, pcb_addr | FSI_CMD_WR);
+ if (rc) {
+ cent_log(PR_ERR, centaur, "MFSI write error %lld writing CMD\n", rc);
+ return rc;
+ }
+
+ return centaur_fsiscom_complete(centaur);
+}
+
+struct centaur_chip *get_centaur(uint32_t part_id)
+{
+ uint32_t hchip_id, mchan;
+ struct proc_chip *hchip;
+ struct centaur_chip *centaur;
+
+ if ((part_id >> 28) != 8) {
+ prerror("CENTAUR: Invalid part ID 0x%x\n", part_id);
+ return NULL;
+ }
+ hchip_id = (part_id & 0x0fffffff) >> 4;
+ mchan = part_id & 0xf;
+
+ hchip = get_chip(hchip_id);
+ if (!hchip) {
+ prerror("CENTAUR: Centaur 0x%x not found on non-existing chip 0%x\n",
+ part_id, hchip_id);
+ return NULL;
+ }
+ if (mchan >= MAX_CENTAURS_PER_CHIP) {
+ prerror("CENTAUR: Centaur 0x%x channel out of bounds !\n", part_id);
+ return NULL;
+ }
+ if (!hchip->centaurs) {
+ prerror("CENTAUR: Centaur 0x%x not found on chip 0%x (no centaurs)\n",
+ part_id, hchip_id);
+ return NULL;
+ }
+ centaur = &hchip->centaurs[mchan];
+ if (!centaur->valid) {
+ prerror("CENTAUR: Centaur 0x%x not valid on chip 0%x\n",
+ part_id, hchip_id);
+ return NULL;
+ }
+ return centaur;
+}
+
+/*
+ * Indirect XSCOM access functions. Copied from xscom.c, at a
+ * latter date, we should merge these properly.
+ */
+static void centaur_xscom_handle_ind_error(struct centaur_chip *centaur,
+ uint64_t data, uint64_t pcb_addr,
+ bool is_write)
+{
+ unsigned int stat = GETFIELD(XSCOM_DATA_IND_ERR, data);
+ bool timeout = !(data & XSCOM_DATA_IND_COMPLETE);
+
+ /* XXX: Create error log entry ? */
+ if (timeout)
+ cent_log(PR_ERR, centaur,
+ "inddirect %s timeout, pcb_addr=0x%llx stat=0x%x\n",
+ is_write ? "write" : "read", pcb_addr, stat);
+ else
+ cent_log(PR_ERR, centaur,
+ "indirect %s error, pcb_addr=0x%llx stat=0x%x\n",
+ is_write ? "write" : "read", pcb_addr, stat);
+}
+
+static int centaur_xscom_ind_read(struct centaur_chip *centaur,
+ uint64_t pcb_addr, uint64_t *val)
+{
+ uint32_t addr;
+ uint64_t data;
+ int rc, retries;
+
+ /* Write indirect address */
+ addr = pcb_addr & 0x7fffffff;
+ data = XSCOM_DATA_IND_READ |
+ (pcb_addr & XSCOM_ADDR_IND_ADDR);
+ rc = centaur_fsiscom_write(centaur, addr, data);
+ if (rc)
+ goto bail;
+
+ /* Wait for completion */
+ for (retries = 0; retries < XSCOM_IND_MAX_RETRIES; retries++) {
+ rc = centaur_fsiscom_read(centaur, addr, &data);
+ if (rc)
+ goto bail;
+ if ((data & XSCOM_DATA_IND_COMPLETE) &&
+ ((data & XSCOM_DATA_IND_ERR) == 0)) {
+ *val = data & XSCOM_DATA_IND_DATA;
+ break;
+ }
+ if ((data & XSCOM_DATA_IND_COMPLETE) ||
+ (retries >= XSCOM_IND_MAX_RETRIES)) {
+ centaur_xscom_handle_ind_error(centaur, data, pcb_addr,
+ false);
+ rc = OPAL_HARDWARE;
+ goto bail;
+ }
+ }
+ bail:
+ if (rc)
+ *val = (uint64_t)-1;
+ return rc;
+}
+
+static int centaur_xscom_ind_write(struct centaur_chip *centaur,
+ uint64_t pcb_addr, uint64_t val)
+{
+ uint32_t addr;
+ uint64_t data;
+ int rc, retries;
+
+ /* Write indirect address & data */
+ addr = pcb_addr & 0x7fffffff;
+ data = pcb_addr & XSCOM_ADDR_IND_ADDR;
+ data |= val & XSCOM_ADDR_IND_DATA;
+
+ rc = centaur_fsiscom_write(centaur, addr, data);
+ if (rc)
+ goto bail;
+
+ /* Wait for completion */
+ for (retries = 0; retries < XSCOM_IND_MAX_RETRIES; retries++) {
+ rc = centaur_fsiscom_read(centaur, addr, &data);
+ if (rc)
+ goto bail;
+ if ((data & XSCOM_DATA_IND_COMPLETE) &&
+ ((data & XSCOM_DATA_IND_ERR) == 0))
+ break;
+ if ((data & XSCOM_DATA_IND_COMPLETE) ||
+ (retries >= XSCOM_IND_MAX_RETRIES)) {
+ centaur_xscom_handle_ind_error(centaur, data, pcb_addr,
+ true);
+ rc = OPAL_HARDWARE;
+ goto bail;
+ }
+ }
+ bail:
+ return rc;
+}
+
+static int64_t centaur_xscom_read(struct scom_controller *scom,
+ uint32_t id __unused, uint64_t pcb_addr,
+ uint64_t *val)
+{
+ struct centaur_chip *centaur = scom->private;
+ int64_t rc;
+
+ if (!centaur)
+ return OPAL_PARAMETER;
+ if (!centaur->online)
+ return OPAL_XSCOM_CTR_OFFLINED;
+
+ lock(&centaur->lock);
+ if (pcb_addr & XSCOM_ADDR_IND_FLAG)
+ rc = centaur_xscom_ind_read(centaur, pcb_addr, val);
+ else
+ rc = centaur_fsiscom_read(centaur, pcb_addr, val);
+
+ /* We mark the centaur offline if we get too many errors on
+ * consecutive accesses
+ */
+ if (rc) {
+ centaur->error_count++;
+ if (centaur->error_count > CENTAUR_ERR_OFFLINE_THRESHOLD) {
+ centaur->online = false;
+ /**
+ * @fwts-label CentaurOfflinedTooManyErrors
+ * @fwts-advice OPAL marked a Centaur (memory buffer)
+ * as offline due to CENTAUR_ERR_OFFLINE_THRESHOLD (10)
+ * consecutive errors on XSCOMs to this centaur.
+ * OPAL will now return OPAL_XSCOM_CTR_OFFLINED and not
+ * try any further XSCOMs. This is likely caused by
+ * some hardware issue or PRD recovery issue.
+ */
+ prlog(PR_ERR, "CENTAUR: Offlined %x due to > %d consecutive XSCOM errors. No more XSCOMs to this centaur.\n",
+ id, CENTAUR_ERR_OFFLINE_THRESHOLD);
+ }
+ } else
+ centaur->error_count = 0;
+ unlock(&centaur->lock);
+
+ return rc;
+}
+
+static int64_t centaur_xscom_write(struct scom_controller *scom,
+ uint32_t id __unused, uint64_t pcb_addr,
+ uint64_t val)
+{
+ struct centaur_chip *centaur = scom->private;
+ int64_t rc;
+
+ if (!centaur)
+ return OPAL_PARAMETER;
+ if (!centaur->online)
+ return OPAL_XSCOM_CTR_OFFLINED;
+
+ lock(&centaur->lock);
+ if (pcb_addr & XSCOM_ADDR_IND_FLAG)
+ rc = centaur_xscom_ind_write(centaur, pcb_addr, val);
+ else
+ rc = centaur_fsiscom_write(centaur, pcb_addr, val);
+
+ /* We mark the centaur offline if we get too many errors on
+ * consecutive accesses
+ */
+ if (rc) {
+ centaur->error_count++;
+ if (centaur->error_count > CENTAUR_ERR_OFFLINE_THRESHOLD)
+ centaur->online = false;
+ } else
+ centaur->error_count = 0;
+ unlock(&centaur->lock);
+
+ return rc;
+}
+
+static bool centaur_check_id(struct centaur_chip *centaur)
+{
+ int64_t rc;
+ uint64_t val;
+
+ rc = centaur_fsiscom_read(centaur, 0xf000f, &val);
+ if (rc) {
+ cent_log(PR_ERR, centaur,
+ " FSISCOM error %lld reading ID register\n",
+ rc);
+ return false;
+ }
+
+ /* Extract CFAM id */
+ val >>= 44;
+
+ /* Identify chip */
+ if ((val & 0xff) != 0xe9) {
+ cent_log(PR_ERR, centaur,
+ " CFAM ID 0x%02x is not a Centaur !\n",
+ (unsigned int)(val & 0xff));
+ return false;
+ }
+
+ /* Get EC level from CFAM ID */
+ centaur->ec_level = ((val >> 16) & 0xf) << 4;
+ centaur->ec_level |= (val >> 8) & 0xf;
+
+ return true;
+}
+
+static bool centaur_add(uint32_t part_id, uint32_t mchip, uint32_t meng,
+ uint32_t mport)
+{
+ uint32_t hchip_id, mchan;
+ struct proc_chip *hchip;
+ struct centaur_chip *centaur;
+
+ if ((part_id >> 28) != 8) {
+ prerror("CENTAUR: Invalid part ID 0x%x\n", part_id);
+ return false;
+ }
+ hchip_id = (part_id & 0x0fffffff) >> 4;
+ mchan = part_id & 0xf;
+
+ printf("CENTAUR: Found centaur for chip 0x%x channel %d\n",
+ hchip_id, mchan);
+ printf("CENTAUR: FSI host: 0x%x cMFSI%d port %d\n",
+ mchip, meng, mport);
+
+ hchip = get_chip(hchip_id);
+ if (!hchip) {
+ prerror("CENTAUR: No such chip !!!\n");
+ return false;
+ }
+
+ if (mchan >= MAX_CENTAURS_PER_CHIP) {
+ prerror("CENTAUR: Channel out of bounds !\n");
+ return false;
+ }
+
+ if (!hchip->centaurs) {
+ hchip->centaurs =
+ zalloc(sizeof(struct centaur_chip) *
+ MAX_CENTAURS_PER_CHIP);
+ assert(hchip->centaurs);
+ }
+
+ centaur = &hchip->centaurs[mchan];
+ if (centaur->valid) {
+ prerror("CENTAUR: Duplicate centaur !\n");
+ return false;
+ }
+ centaur->part_id = part_id;
+ centaur->fsi_master_chip_id = mchip;
+ centaur->fsi_master_port = mport;
+ centaur->fsi_master_engine = meng ? MFSI_cMFSI1 : MFSI_cMFSI0;
+ centaur->online = true;
+ init_lock(&centaur->lock);
+ list_head_init(&centaur->i2cms);
+
+ if (!centaur_check_id(centaur))
+ return false;
+
+ centaur->scom.part_id = part_id;
+ centaur->scom.private = centaur;
+ centaur->scom.read = centaur_xscom_read;
+ centaur->scom.write = centaur_xscom_write;
+ scom_register(&centaur->scom);
+
+ cent_log(PR_INFO, centaur, "Found DD%x.%x chip\n",
+ centaur->ec_level >> 4,
+ centaur->ec_level & 0xf);
+
+ centaur->valid = true;
+ return true;
+}
+
+/* Returns how long to wait for logic to stop in TB ticks or a negative
+ * value on error
+ */
+int64_t centaur_disable_sensor_cache(uint32_t part_id)
+{
+ struct centaur_chip *centaur = get_centaur(part_id);
+ int64_t rc = 0;
+ uint64_t ctrl;
+
+ if (!centaur)
+ return false;
+
+ lock(&centaur->lock);
+ centaur->scache_disable_count++;
+ if (centaur->scache_disable_count == 1) {
+ centaur->scache_was_enabled = false;
+ rc = centaur_fsiscom_read(centaur, SCAC_CONFIG_REG, &ctrl);
+ if (rc)
+ goto bail;
+ centaur->scache_was_enabled = !!(ctrl & SCAC_ENABLE_MSK);
+ rc = centaur_fsiscom_write(centaur, SCAC_CONFIG_CLR, SCAC_ENABLE_MSK);
+ if (rc)
+ goto bail;
+ rc = msecs_to_tb(30);
+ }
+ bail:
+ unlock(&centaur->lock);
+ return rc;
+}
+
+int64_t centaur_enable_sensor_cache(uint32_t part_id)
+{
+ struct centaur_chip *centaur = get_centaur(part_id);
+ int64_t rc = 0;
+
+ if (!centaur)
+ return false;
+
+ lock(&centaur->lock);
+ if (centaur->scache_disable_count == 0) {
+ cent_log(PR_ERR, centaur, "Cache count going negative !\n");
+ backtrace();
+ goto bail;
+ }
+ centaur->scache_disable_count--;
+ if (centaur->scache_disable_count == 0 && centaur->scache_was_enabled)
+ rc = centaur_fsiscom_write(centaur, SCAC_CONFIG_SET, SCAC_ENABLE_MSK);
+ bail:
+ unlock(&centaur->lock);
+ return rc;
+}
+
+void centaur_init(void)
+{
+ struct dt_node *cn;
+
+ dt_for_each_compatible(dt_root, cn, "ibm,centaur") {
+ uint32_t chip_id, mchip, meng, mport;
+
+ chip_id = dt_prop_get_u32(cn, "ibm,chip-id");
+ mchip = dt_prop_get_u32(cn, "ibm,fsi-master-chip-id");
+ meng = dt_prop_get_cell(cn, "ibm,fsi-master-port", 0);
+ mport = dt_prop_get_cell(cn, "ibm,fsi-master-port", 1);
+
+ /*
+ * If adding the centaur succeeds, we expose it to
+ * Linux as a scom-controller
+ */
+ if (centaur_add(chip_id, mchip, meng, mport))
+ dt_add_property(cn, "scom-controller", NULL, 0);
+ }
+}