aboutsummaryrefslogtreecommitdiffstats
path: root/roms/skiboot/core
diff options
context:
space:
mode:
Diffstat (limited to 'roms/skiboot/core')
-rw-r--r--roms/skiboot/core/Makefile.inc28
-rw-r--r--roms/skiboot/core/affinity.c125
-rw-r--r--roms/skiboot/core/bitmap.c44
-rw-r--r--roms/skiboot/core/buddy.c292
-rw-r--r--roms/skiboot/core/chip.c190
-rw-r--r--roms/skiboot/core/console-log.c71
-rw-r--r--roms/skiboot/core/console.c451
-rw-r--r--roms/skiboot/core/cpu.c1785
-rw-r--r--roms/skiboot/core/cpufeatures.c1043
-rw-r--r--roms/skiboot/core/device.c1128
-rw-r--r--roms/skiboot/core/direct-controls.c1161
-rw-r--r--roms/skiboot/core/errorlog.c223
-rw-r--r--roms/skiboot/core/exceptions.c233
-rw-r--r--roms/skiboot/core/fast-reboot.c467
-rw-r--r--roms/skiboot/core/fdt.c258
-rw-r--r--roms/skiboot/core/flash-firmware-versions.c164
-rw-r--r--roms/skiboot/core/flash-subpartition.c110
-rw-r--r--roms/skiboot/core/flash.c1186
-rw-r--r--roms/skiboot/core/gcov-profiling.c127
-rw-r--r--roms/skiboot/core/hmi.c1558
-rw-r--r--roms/skiboot/core/i2c.c288
-rw-r--r--roms/skiboot/core/init.c1469
-rw-r--r--roms/skiboot/core/interrupts.c513
-rw-r--r--roms/skiboot/core/ipmi-opal.c138
-rw-r--r--roms/skiboot/core/ipmi.c263
-rw-r--r--roms/skiboot/core/lock.c336
-rw-r--r--roms/skiboot/core/malloc.c76
-rw-r--r--roms/skiboot/core/mce.c309
-rw-r--r--roms/skiboot/core/mem_region.c1555
-rw-r--r--roms/skiboot/core/nvram-format.c331
-rw-r--r--roms/skiboot/core/nvram.c203
-rw-r--r--roms/skiboot/core/opal-dump.c582
-rw-r--r--roms/skiboot/core/opal-msg.c193
-rw-r--r--roms/skiboot/core/opal.c700
-rw-r--r--roms/skiboot/core/pci-dt-slot.c212
-rw-r--r--roms/skiboot/core/pci-opal.c1135
-rw-r--r--roms/skiboot/core/pci-quirk.c135
-rw-r--r--roms/skiboot/core/pci-slot.c241
-rw-r--r--roms/skiboot/core/pci-virt.c256
-rw-r--r--roms/skiboot/core/pci.c1962
-rw-r--r--roms/skiboot/core/pcie-slot.c566
-rw-r--r--roms/skiboot/core/pel.c279
-rw-r--r--roms/skiboot/core/platform.c319
-rw-r--r--roms/skiboot/core/pool.c68
-rw-r--r--roms/skiboot/core/powercap.c37
-rw-r--r--roms/skiboot/core/psr.c41
-rw-r--r--roms/skiboot/core/relocate.c55
-rw-r--r--roms/skiboot/core/rtc.c62
-rw-r--r--roms/skiboot/core/sensor.c152
-rw-r--r--roms/skiboot/core/stack.c266
-rw-r--r--roms/skiboot/core/test/Makefile.check101
-rw-r--r--roms/skiboot/core/test/dummy-cpu.h35
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-0bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-1bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-10bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-11bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-16bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-2bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-26bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-27bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-29bin0 -> 4096 bytes
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-long2
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-nodash2
-rw-r--r--roms/skiboot/core/test/firmware-versions-input/version-trunc2
-rw-r--r--roms/skiboot/core/test/run-api-test.c40
-rw-r--r--roms/skiboot/core/test/run-bitmap.c80
-rw-r--r--roms/skiboot/core/test/run-buddy.c73
-rw-r--r--roms/skiboot/core/test/run-console-log-buf-overrun.c105
-rw-r--r--roms/skiboot/core/test/run-console-log-pr_fmt.c63
-rw-r--r--roms/skiboot/core/test/run-console-log.c63
-rw-r--r--roms/skiboot/core/test/run-cpufeatures.c144
-rw-r--r--roms/skiboot/core/test/run-device.c471
-rw-r--r--roms/skiboot/core/test/run-flash-firmware-versions.c154
-rw-r--r--roms/skiboot/core/test/run-flash-subpartition.c48
-rw-r--r--roms/skiboot/core/test/run-malloc-speed.c88
-rw-r--r--roms/skiboot/core/test/run-malloc.c174
-rw-r--r--roms/skiboot/core/test/run-mem_range_is_reserved.c207
-rw-r--r--roms/skiboot/core/test/run-mem_region.c252
-rw-r--r--roms/skiboot/core/test/run-mem_region_init.c175
-rw-r--r--roms/skiboot/core/test/run-mem_region_next.c105
-rw-r--r--roms/skiboot/core/test/run-mem_region_release_unused.c177
-rw-r--r--roms/skiboot/core/test/run-mem_region_release_unused_noalloc.c156
-rw-r--r--roms/skiboot/core/test/run-mem_region_reservations.c228
-rw-r--r--roms/skiboot/core/test/run-msg.c281
-rw-r--r--roms/skiboot/core/test/run-nvram-format.c167
-rw-r--r--roms/skiboot/core/test/run-pci-quirk.c98
-rw-r--r--roms/skiboot/core/test/run-pel.c120
-rw-r--r--roms/skiboot/core/test/run-pool.c59
-rw-r--r--roms/skiboot/core/test/run-time-utils.c52
-rw-r--r--roms/skiboot/core/test/run-timebase.c47
-rw-r--r--roms/skiboot/core/test/run-timer.c84
-rw-r--r--roms/skiboot/core/test/run-trace.c397
-rw-r--r--roms/skiboot/core/test/stubs.c101
-rw-r--r--roms/skiboot/core/time-utils.c64
-rw-r--r--roms/skiboot/core/timebase.c141
-rw-r--r--roms/skiboot/core/timer.c298
-rw-r--r--roms/skiboot/core/trace.c265
-rw-r--r--roms/skiboot/core/utils.c101
-rw-r--r--roms/skiboot/core/vpd.c139
99 files changed, 28745 insertions, 0 deletions
diff --git a/roms/skiboot/core/Makefile.inc b/roms/skiboot/core/Makefile.inc
new file mode 100644
index 000000000..829800e5b
--- /dev/null
+++ b/roms/skiboot/core/Makefile.inc
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Copyright 2012-2019 IBM Corp
+# -*-Makefile-*-
+
+SUBDIRS += core
+CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
+CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
+CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
+CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
+CORE_OBJS += vpd.o platform.o nvram.o nvram-format.o hmi.o mce.o
+CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
+CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
+CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
+CORE_OBJS += pci-dt-slot.o direct-controls.o cpufeatures.o
+CORE_OBJS += flash-firmware-versions.o opal-dump.o
+
+ifeq ($(SKIBOOT_GCOV),1)
+CORE_OBJS += gcov-profiling.o
+CFLAGS_SKIP_core/gcov-profiling.o = -Wsuggest-attribute=const
+endif
+
+CORE=core/built-in.a
+
+CFLAGS_SKIP_core/relocate.o = -pg -fstack-protector-all
+CFLAGS_SKIP_core/relocate.o += -fstack-protector -fstack-protector-strong
+CFLAGS_SKIP_core/relocate.o += -fprofile-arcs -ftest-coverage
+
+$(CORE): $(CORE_OBJS:%=core/%)
diff --git a/roms/skiboot/core/affinity.c b/roms/skiboot/core/affinity.c
new file mode 100644
index 000000000..0209d3cd9
--- /dev/null
+++ b/roms/skiboot/core/affinity.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/* Copyright 2013-2019 IBM Corp. */
+
+/*
+ *
+ * We currently construct our associativity properties as such:
+ *
+ * - For "chip" devices (bridges, memory, ...), 4 entries:
+ *
+ * - CCM node ID
+ * - HW card ID
+ * - HW module ID
+ * - Chip ID
+ *
+ * The information is constructed based on the chip ID which (unlike
+ * pHyp) is our HW chip ID (aka "XSCOM" chip ID). We use it to retrieve
+ * the other properties from the corresponding chip/xscom node in the
+ * device-tree. If those properties are absent, 0 is used.
+ *
+ * - For "core" devices, we add a 5th entry:
+ *
+ * - Core ID
+ *
+ * Here too, we do not use the "cooked" HW processor ID from HDAT but
+ * instead use the real HW core ID which is basically the interrupt
+ * server number of thread 0 on that core.
+ *
+ *
+ * The ibm,associativity-reference-points property is currently set to
+ * 4,4 indicating that the chip ID is our only reference point. This
+ * should be extended to encompass the node IDs eventually.
+ */
+#include <skiboot.h>
+#include <opal.h>
+#include <device.h>
+#include <console.h>
+#include <trace.h>
+#include <chip.h>
+#include <cpu.h>
+#include <affinity.h>
+
+static uint32_t get_chip_node_id(struct proc_chip *chip)
+{
+ /* If the xscom node has an ibm,ccm-node-id property, use it */
+ if (dt_has_node_property(chip->devnode, "ibm,ccm-node-id", NULL))
+ return dt_prop_get_u32(chip->devnode, "ibm,ccm-node-id");
+
+ /*
+ * Else use the 3 top bits of the chip ID which should be
+ * the node on P8
+ */
+ return chip->id >> 3;
+}
+
+void add_associativity_ref_point(void)
+{
+ int ref2 = 0x4;
+
+ /*
+ * Note about our use of reference points:
+ *
+ * Linux currently supports up to three levels of NUMA. We use the
+ * first reference point for the node ID and the second reference
+ * point for a second level of affinity. We always use the chip ID
+ * (4) for the first reference point.
+ *
+ * Choosing the second level of affinity is model specific
+ * unfortunately. Current POWER8E models should use the DCM
+ * as a second level of NUMA.
+ *
+ * If there is a way to obtain this information from the FSP
+ * that would be ideal, but for now hardwire our POWER8E setting.
+ *
+ * For GPU nodes we add a third level of NUMA, such that the
+ * distance of the GPU node from all other nodes is uniformly
+ * the highest.
+ */
+ if (PVR_TYPE(mfspr(SPR_PVR)) == PVR_TYPE_P8E)
+ ref2 = 0x3;
+
+ dt_add_property_cells(opal_node, "ibm,associativity-reference-points",
+ 0x4, ref2, 0x2);
+}
+
+void add_chip_dev_associativity(struct dt_node *dev)
+{
+ uint32_t chip_id = dt_get_chip_id(dev);
+ struct proc_chip *chip = get_chip(chip_id);
+ uint32_t hw_cid, hw_mid;
+
+ if (!chip)
+ return;
+
+ hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0);
+ hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0);
+
+ dt_add_property_cells(dev, "ibm,associativity", 4,
+ get_chip_node_id(chip),
+ hw_cid, hw_mid, chip_id);
+}
+
+void add_core_associativity(struct cpu_thread *cpu)
+{
+ struct proc_chip *chip = get_chip(cpu->chip_id);
+ uint32_t hw_cid, hw_mid, core_id;
+
+ if (!chip)
+ return;
+
+ if (proc_gen == proc_gen_p8)
+ core_id = (cpu->pir >> 3) & 0xf;
+ else if (proc_gen == proc_gen_p9)
+ core_id = (cpu->pir >> 2) & 0x1f;
+ else if (proc_gen == proc_gen_p10)
+ core_id = (cpu->pir >> 2) & 0x1f;
+ else
+ return;
+
+ hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0);
+ hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0);
+
+ dt_add_property_cells(cpu->node, "ibm,associativity", 5,
+ get_chip_node_id(chip),
+ hw_cid, hw_mid, chip->id, core_id);
+}
diff --git a/roms/skiboot/core/bitmap.c b/roms/skiboot/core/bitmap.c
new file mode 100644
index 000000000..8de1356c3
--- /dev/null
+++ b/roms/skiboot/core/bitmap.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/* Copyright 2016 IBM Corp. */
+
+#include "bitmap.h"
+
+static int __bitmap_find_bit(bitmap_t map, unsigned int start, unsigned int count,
+ bool value)
+{
+ unsigned int el, first_bit;
+ unsigned int end = start + count;
+ bitmap_elem_t e, ev;
+ int b;
+
+ ev = value ? -1ul : 0;
+ el = BITMAP_ELEM(start);
+ first_bit = BITMAP_BIT(start);
+
+ while (start < end) {
+ e = map[el] ^ ev;
+ e |= ((1ul << first_bit) - 1);
+ if (~e)
+ break;
+ start = (start + BITMAP_ELSZ) & ~(BITMAP_ELSZ - 1);
+ first_bit = 0;
+ el++;
+ }
+ for (b = first_bit; b < BITMAP_ELSZ && start < end; b++,start++) {
+ if ((e & (1ull << b)) == 0)
+ return start;
+ }
+
+ return -1;
+}
+
+int bitmap_find_zero_bit(bitmap_t map, unsigned int start, unsigned int count)
+{
+ return __bitmap_find_bit(map, start, count, false);
+}
+
+int bitmap_find_one_bit(bitmap_t map, unsigned int start, unsigned int count)
+{
+ return __bitmap_find_bit(map, start, count, true);
+}
+
diff --git a/roms/skiboot/core/buddy.c b/roms/skiboot/core/buddy.c
new file mode 100644
index 000000000..b36e407d1
--- /dev/null
+++ b/roms/skiboot/core/buddy.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/* Copyright 2016-2017 IBM Corp. */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "buddy.h"
+
+#define BUDDY_DEBUG
+#undef BUDDY_VERBOSE
+
+#ifdef BUDDY_VERBOSE
+#define BUDDY_NOISE(fmt...) printf(fmt)
+#else
+#define BUDDY_NOISE(fmt...) do { } while(0)
+#endif
+
+static inline unsigned int buddy_map_size(struct buddy *b)
+{
+ return 1u << (b->max_order + 1);
+}
+
+static inline unsigned int buddy_order_start(struct buddy *b,
+ unsigned int order)
+{
+ unsigned int level = b->max_order - order;
+
+ /* Starting bit of index for order */
+ return 1u << level;
+}
+
+static inline unsigned int buddy_index_to_node(struct buddy *b,
+ unsigned int index,
+ unsigned int order)
+{
+ /* Ensure the index is a multiple of the order */
+ assert((index & ((1u << order) - 1)) == 0);
+
+ return buddy_order_start(b, order) + (index >> order);
+}
+
+static inline unsigned int buddy_node_to_index(struct buddy *b,
+ unsigned int node,
+ unsigned int order)
+{
+ unsigned int start = buddy_order_start(b, order);
+
+ return (node - start) << order;
+}
+
+#ifdef BUDDY_DEBUG
+static void buddy_check_alloc(struct buddy *b, unsigned int node)
+{
+ assert(bitmap_tst_bit(b->map, node));
+}
+
+static void buddy_check_alloc_down(struct buddy *b, unsigned int node)
+{
+ unsigned int i, count = 1;
+
+ while (node < buddy_map_size(b)) {
+ for (i = 0; i < count; i++)
+ buddy_check_alloc(b, node + i);
+
+ /* Down one level */
+ node <<= 1;
+ count <<= 1;
+ }
+}
+#else
+static inline void buddy_check_alloc(struct buddy *b __unused, unsigned int node __unused) {}
+static inline void buddy_check_alloc_down(struct buddy *b __unused, unsigned int node __unused) {}
+#endif
+
+int buddy_alloc(struct buddy *b, unsigned int order)
+{
+ unsigned int o;
+ int node, index;
+
+ BUDDY_NOISE("buddy_alloc(%d)\n", order);
+ /*
+ * Find the first order up the tree from our requested order that
+ * has at least one free node.
+ */
+ for (o = order; o <= b->max_order; o++) {
+ if (b->freecounts[o] > 0)
+ break;
+ }
+
+ /* Nothing found ? fail */
+ if (o > b->max_order) {
+ BUDDY_NOISE(" no free nodes !\n");
+ return -1;
+ }
+
+ BUDDY_NOISE(" %d free node(s) at order %d, bits %d(%d)\n",
+ b->freecounts[o], o,
+ buddy_order_start(b, o),
+ 1u << (b->max_order - o));
+
+ /* Now find a free node */
+ node = bitmap_find_zero_bit(b->map, buddy_order_start(b, o),
+ 1u << (b->max_order - o));
+
+ /* There should always be one */
+ assert(node >= 0);
+
+ /* Mark it allocated and decrease free count */
+ bitmap_set_bit(b->map, node);
+ b->freecounts[o]--;
+
+ /* We know that node was free which means all its children must have
+ * been marked "allocated". Double check.
+ */
+ buddy_check_alloc_down(b, node);
+
+ /* We have a node, we've marked it allocated, now we need to go down
+ * the tree until we reach "order" which is the order we need. For
+ * each level along the way, we mark the buddy free and leave the
+ * first child allocated.
+ */
+ while (o > order) {
+ /* Next level down */
+ o--;
+ node <<= 1;
+
+ BUDDY_NOISE(" order %d, using %d marking %d free\n",
+ o, node, node ^ 1);
+ bitmap_clr_bit(b->map, node ^ 1);
+ b->freecounts[o]++;
+ assert(bitmap_tst_bit(b->map, node));
+ }
+
+ index = buddy_node_to_index(b, node, order);
+
+ BUDDY_NOISE(" result is index %d (node %d)\n", index, node);
+
+ /* We have a node, convert it to an element number */
+ return index;
+}
+
+bool buddy_reserve(struct buddy *b, unsigned int index, unsigned int order)
+{
+ unsigned int node, freenode, o;
+
+ assert(index < (1u << b->max_order));
+
+ BUDDY_NOISE("buddy_reserve(%d,%d)\n", index, order);
+
+ /* Get bit number for node */
+ node = buddy_index_to_node(b, index, order);
+
+ BUDDY_NOISE(" node=%d\n", node);
+
+ /* Find something free */
+ for (freenode = node, o = order; freenode > 0; freenode >>= 1, o++)
+ if (!bitmap_tst_bit(b->map, freenode))
+ break;
+
+ BUDDY_NOISE(" freenode=%d order %d\n", freenode, o);
+
+ /* Nothing free, error out */
+ if (!freenode)
+ return false;
+
+ /* We sit on a free node, mark it busy */
+ bitmap_set_bit(b->map, freenode);
+ assert(b->freecounts[o]);
+ b->freecounts[o]--;
+
+ /* We know that node was free which means all its children must have
+ * been marked "allocated". Double check.
+ */
+ buddy_check_alloc_down(b, freenode);
+
+ /* Reverse-walk the path and break down nodes */
+ while (o > order) {
+ /* Next level down */
+ o--;
+ freenode <<= 1;
+
+ /* Find the right one on the path to node */
+ if (node & (1u << (o - order)))
+ freenode++;
+
+ BUDDY_NOISE(" order %d, using %d marking %d free\n",
+ o, freenode, freenode ^ 1);
+ bitmap_clr_bit(b->map, freenode ^ 1);
+ b->freecounts[o]++;
+ assert(bitmap_tst_bit(b->map, node));
+ }
+ assert(node == freenode);
+
+ return true;
+}
+
+void buddy_free(struct buddy *b, unsigned int index, unsigned int order)
+{
+ unsigned int node;
+
+ assert(index < (1u << b->max_order));
+
+ BUDDY_NOISE("buddy_free(%d,%d)\n", index, order);
+
+ /* Get bit number for node */
+ node = buddy_index_to_node(b, index, order);
+
+ BUDDY_NOISE(" node=%d\n", node);
+
+ /* We assume that anything freed was fully allocated, ie,
+ * there is no child node of that allocation index/order
+ * that is already free.
+ *
+ * BUDDY_DEBUG will verify it at the cost of performances
+ */
+ buddy_check_alloc_down(b, node);
+
+ /* Propagate if buddy is free */
+ while (order < b->max_order && !bitmap_tst_bit(b->map, node ^ 1)) {
+ BUDDY_NOISE(" order %d node %d buddy %d free, propagating\n",
+ order, node, node ^ 1);
+
+ /* Mark buddy busy (we are already marked busy) */
+ bitmap_set_bit(b->map, node ^ 1);
+
+ /* Reduce free count */
+ assert(b->freecounts[order] > 0);
+ b->freecounts[order]--;
+
+ /* Get parent */
+ node >>= 1;
+ order++;
+
+ /* It must be busy already ! */
+ buddy_check_alloc(b, node);
+
+ BUDDY_NOISE(" testing order %d node %d\n", order, node ^ 1);
+ }
+
+ /* No more coalescing, mark it free */
+ bitmap_clr_bit(b->map, node);
+
+ /* Increase the freelist count for that level */
+ b->freecounts[order]++;
+
+ BUDDY_NOISE(" free count at order %d is %d\n",
+ order, b->freecounts[order]);
+}
+
+void buddy_reset(struct buddy *b)
+{
+ unsigned int bsize = BITMAP_BYTES(1u << (b->max_order + 1));
+
+ BUDDY_NOISE("buddy_reset()\n");
+ /* We fill the bitmap with 1's to make it completely "busy" */
+ memset(b->map, 0xff, bsize);
+ memset(b->freecounts, 0, sizeof(b->freecounts));
+
+ /* We mark the root of the tree free, this is entry 1 as entry 0
+ * is unused.
+ */
+ buddy_free(b, 0, b->max_order);
+}
+
+struct buddy *buddy_create(unsigned int max_order)
+{
+ struct buddy *b;
+ unsigned int bsize;
+
+ assert(max_order <= BUDDY_MAX_ORDER);
+
+ bsize = BITMAP_BYTES(1u << (max_order + 1));
+
+ b = zalloc(sizeof(struct buddy) + bsize);
+ if (!b)
+ return NULL;
+ b->max_order = max_order;
+
+ BUDDY_NOISE("Map @%p, size: %d bytes\n", b->map, bsize);
+
+ buddy_reset(b);
+
+ return b;
+}
+
+void buddy_destroy(struct buddy *b)
+{
+ free(b);
+}
+
diff --git a/roms/skiboot/core/chip.c b/roms/skiboot/core/chip.c
new file mode 100644
index 000000000..2d95b2e05
--- /dev/null
+++ b/roms/skiboot/core/chip.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/* Copyright 2013-2019 IBM Corp. */
+
+#include <skiboot.h>
+#include <chip.h>
+#include <console.h>
+#include <device.h>
+#include <timebase.h>
+#include <cpu.h>
+
+static struct proc_chip *chips[MAX_CHIPS];
+enum proc_chip_quirks proc_chip_quirks;
+
+uint32_t pir_to_chip_id(uint32_t pir)
+{
+ if (proc_gen == proc_gen_p10)
+ return P10_PIR2GCID(pir);
+ else if (proc_gen == proc_gen_p9)
+ return P9_PIR2GCID(pir);
+ else if (proc_gen == proc_gen_p8)
+ return P8_PIR2GCID(pir);
+ else
+ assert(false);
+}
+
+uint32_t pir_to_core_id(uint32_t pir)
+{
+ if (proc_gen == proc_gen_p10) {
+ if (this_cpu()->is_fused_core)
+ return P10_PIRFUSED2NORMALCOREID(pir);
+ else
+ return P10_PIR2COREID(pir);
+ } else if (proc_gen == proc_gen_p9) {
+ if (this_cpu()->is_fused_core)
+ return P9_PIRFUSED2NORMALCOREID(pir);
+ else
+ return P9_PIR2COREID(pir);
+ } else if (proc_gen == proc_gen_p8) {
+ return P8_PIR2COREID(pir);
+ } else {
+ assert(false);
+ }
+}
+
+uint32_t pir_to_fused_core_id(uint32_t pir)
+{
+ if (proc_gen == proc_gen_p10) {
+ if (this_cpu()->is_fused_core)
+ return P10_PIR2FUSEDCOREID(pir);
+ else
+ return P10_PIR2COREID(pir);
+ } else if (proc_gen == proc_gen_p9) {
+ if (this_cpu()->is_fused_core)
+ return P9_PIR2FUSEDCOREID(pir);
+ else
+ return P9_PIR2COREID(pir);
+ } else if (proc_gen == proc_gen_p8) {
+ return P8_PIR2COREID(pir);
+ } else {
+ assert(false);
+ }
+}
+
+uint32_t pir_to_thread_id(uint32_t pir)
+{
+ if (proc_gen == proc_gen_p10) {
+ if (this_cpu()->is_fused_core)
+ return P10_PIRFUSED2NORMALTHREADID(pir);
+ else
+ return P10_PIR2THREADID(pir);
+ } else if (proc_gen == proc_gen_p9) {
+ if (this_cpu()->is_fused_core)
+ return P9_PIRFUSED2NORMALTHREADID(pir);
+ else
+ return P9_PIR2THREADID(pir);
+ } else if (proc_gen == proc_gen_p8) {
+ return P8_PIR2THREADID(pir);
+ } else {
+ assert(false);
+ }
+}
+
+struct proc_chip *next_chip(struct proc_chip *chip)
+{
+ unsigned int i;
+
+ for (i = chip ? (chip->id + 1) : 0; i < MAX_CHIPS; i++)
+ if (chips[i])
+ return chips[i];
+ return NULL;
+}
+
+
+struct proc_chip *get_chip(uint32_t chip_id)
+{
+ if (chip_id >= MAX_CHIPS)
+ return NULL;
+ return chips[chip_id];
+}
+
+static void init_chip(struct dt_node *dn)
+{
+ struct proc_chip *chip;
+ uint32_t id;
+ const char *lc = NULL;
+
+ id = dt_get_chip_id(dn);
+ assert(id < MAX_CHIPS);
+ assert(chips[id] == NULL);
+
+ chip = zalloc(sizeof(struct proc_chip));
+ assert(chip);
+
+ chip->id = id;
+ chip->devnode = dn;
+
+ chip->dbob_id = dt_prop_get_u32_def(dn, "ibm,dbob-id", 0xffffffff);
+ chip->pcid = dt_prop_get_u32_def(dn, "ibm,proc-chip-id", 0xffffffff);
+
+ if (dt_prop_get_u32_def(dn, "ibm,occ-functional-state", 0))
+ chip->occ_functional = true;
+ else
+ chip->occ_functional = false;
+
+ list_head_init(&chip->i2cms);
+
+ /* Update the location code for this chip. */
+ if (dt_has_node_property(dn, "ibm,loc-code", NULL))
+ lc = dt_prop_get(dn, "ibm,loc-code");
+ else if (dt_has_node_property(dn, "ibm,slot-location-code", NULL))
+ lc = dt_prop_get(dn, "ibm,slot-location-code");
+
+ if (lc)
+ chip->loc_code = strdup(lc);
+
+ chip->primary_topology = dt_prop_get_u32_def(dn,
+ "ibm,primary-topology-index", 0xffffffff);
+
+ prlog(PR_INFO, "CHIP: Initialised chip %d from %s\n", id, dn->name);
+ chips[id] = chip;
+}
+
+void init_chips(void)
+{
+ struct dt_node *xn;
+
+ /* Detect mambo chip */
+ if (dt_find_by_path(dt_root, "/mambo")) {
+ proc_chip_quirks |= QUIRK_NO_CHIPTOD | QUIRK_MAMBO_CALLOUTS
+ | QUIRK_NO_F000F | QUIRK_NO_PBA | QUIRK_NO_OCC_IRQ
+ | QUIRK_NO_RNG;
+
+ enable_mambo_console();
+
+ prlog(PR_NOTICE, "CHIP: Detected Mambo simulator\n");
+
+ dt_for_each_compatible(dt_root, xn, "ibm,mambo-chip")
+ init_chip(xn);
+ }
+
+ /* Detect simics */
+ if (dt_find_by_path(dt_root, "/simics")) {
+ proc_chip_quirks |= QUIRK_SIMICS
+ | QUIRK_NO_PBA | QUIRK_NO_OCC_IRQ | QUIRK_SLOW_SIM;
+ tb_hz = 512000;
+ prlog(PR_NOTICE, "CHIP: Detected Simics simulator\n");
+ }
+ /* Detect Awan emulator */
+ if (dt_find_by_path(dt_root, "/awan")) {
+ proc_chip_quirks |= QUIRK_NO_CHIPTOD | QUIRK_NO_F000F
+ | QUIRK_NO_PBA | QUIRK_NO_OCC_IRQ | QUIRK_SLOW_SIM;
+ tb_hz = 512000;
+ prlog(PR_NOTICE, "CHIP: Detected Awan emulator\n");
+ }
+ /* Detect Qemu */
+ if (dt_node_is_compatible(dt_root, "qemu,powernv") ||
+ dt_node_is_compatible(dt_root, "qemu,powernv8") ||
+ dt_node_is_compatible(dt_root, "qemu,powernv9") ||
+ dt_node_is_compatible(dt_root, "qemu,powernv10") ||
+ dt_find_by_path(dt_root, "/qemu")) {
+ proc_chip_quirks |= QUIRK_QEMU | QUIRK_NO_CHIPTOD
+ | QUIRK_NO_DIRECT_CTL | QUIRK_NO_RNG;
+ prlog(PR_NOTICE, "CHIP: Detected QEMU simulator\n");
+ }
+
+ /* We walk the chips based on xscom nodes in the tree */
+ dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
+ init_chip(xn);
+ }
+}
diff --git a/roms/skiboot/core/console-log.c b/roms/skiboot/core/console-log.c
new file mode 100644
index 000000000..21a1442bd
--- /dev/null
+++ b/roms/skiboot/core/console-log.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Console Log routines
+ * Wraps libc and console lower level functions
+ * does fancy-schmancy things like timestamps and priorities
+ * Doesn't make waffles.
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include "skiboot.h"
+#include "unistd.h"
+#include "stdio.h"
+#include "console.h"
+#include "timebase.h"
+#include <debug_descriptor.h>
+
+static int vprlog(int log_level, const char *fmt, va_list ap)
+{
+ int count;
+ char buffer[320];
+ bool flush_to_drivers = true;
+ unsigned long tb = mftb();
+
+ /* It's safe to return 0 when we "did" something here
+ * as only printf cares about how much we wrote, and
+ * if you change log_level to below PR_PRINTF then you
+ * get everything you deserve.
+ * By default, only PR_DEBUG and higher are stored in memory.
+ * PR_TRACE and PR_INSANE are for those having a bad day.
+ */
+ if (log_level > (debug_descriptor.console_log_levels >> 4))
+ return 0;
+
+ count = snprintf(buffer, sizeof(buffer), "[%5lu.%09lu,%d] ",
+ tb_to_secs(tb), tb_remaining_nsecs(tb), log_level);
+ count+= vsnprintf(buffer+count, sizeof(buffer)-count, fmt, ap);
+
+ if (log_level > (debug_descriptor.console_log_levels & 0x0f))
+ flush_to_drivers = false;
+
+ console_write(flush_to_drivers, buffer, count);
+
+ return count;
+}
+
+/* we don't return anything as what on earth are we going to do
+ * if we actually fail to print a log message? Print a log message about it?
+ * Callers shouldn't care, prlog and friends should do something generically
+ * sane in such crazy situations.
+ */
+void _prlog(int log_level, const char* fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vprlog(log_level, fmt, ap);
+ va_end(ap);
+}
+
+int _printf(const char* fmt, ...)
+{
+ int count;
+ va_list ap;
+
+ va_start(ap, fmt);
+ count = vprlog(PR_PRINTF, fmt, ap);
+ va_end(ap);
+
+ return count;
+}
diff --git a/roms/skiboot/core/console.c b/roms/skiboot/core/console.c
new file mode 100644
index 000000000..2a1509025
--- /dev/null
+++ b/roms/skiboot/core/console.c
@@ -0,0 +1,451 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Console IO routine for use by libc
+ *
+ * fd is the classic posix 0,1,2 (stdin, stdout, stderr)
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <unistd.h>
+#include <console.h>
+#include <opal.h>
+#include <device.h>
+#include <processor.h>
+#include <cpu.h>
+
+static char *con_buf = (char *)INMEM_CON_START;
+static size_t con_in;
+static size_t con_out;
+static bool con_wrapped;
+
+/* Internal console driver ops */
+static struct con_ops *con_driver;
+
+/* External (OPAL) console driver ops */
+static struct opal_con_ops *opal_con_driver = &dummy_opal_con;
+
+static struct lock con_lock = LOCK_UNLOCKED;
+
+/* This is mapped via TCEs so we keep it alone in a page */
+struct memcons memcons __section(".data.memcons") = {
+ .magic = CPU_TO_BE64(MEMCONS_MAGIC),
+ .obuf_phys = CPU_TO_BE64(INMEM_CON_START),
+ .ibuf_phys = CPU_TO_BE64(INMEM_CON_START + INMEM_CON_OUT_LEN),
+ .obuf_size = CPU_TO_BE32(INMEM_CON_OUT_LEN),
+ .ibuf_size = CPU_TO_BE32(INMEM_CON_IN_LEN),
+};
+
+static bool dummy_console_enabled(void)
+{
+#ifdef FORCE_DUMMY_CONSOLE
+ return true;
+#else
+ return dt_has_node_property(dt_chosen,
+ "sapphire,enable-dummy-console", NULL);
+#endif
+}
+
+/*
+ * Helper function for adding /ibm,opal/consoles/serial@<xyz> nodes
+ */
+struct dt_node *add_opal_console_node(int index, const char *type,
+ uint32_t write_buffer_size)
+{
+ struct dt_node *con, *consoles;
+ char buffer[32];
+
+ consoles = dt_find_by_name(opal_node, "consoles");
+ if (!consoles) {
+ consoles = dt_new(opal_node, "consoles");
+ assert(consoles);
+ dt_add_property_cells(consoles, "#address-cells", 1);
+ dt_add_property_cells(consoles, "#size-cells", 0);
+ }
+
+ con = dt_new_addr(consoles, "serial", index);
+ assert(con);
+
+ snprintf(buffer, sizeof(buffer), "ibm,opal-console-%s", type);
+ dt_add_property_string(con, "compatible", buffer);
+
+ dt_add_property_cells(con, "#write-buffer-size", write_buffer_size);
+ dt_add_property_cells(con, "reg", index);
+ dt_add_property_string(con, "device_type", "serial");
+
+ return con;
+}
+
+void clear_console(void)
+{
+ memset(con_buf, 0, INMEM_CON_LEN);
+}
+
+/*
+ * Flush the console buffer into the driver, returns true
+ * if there is more to go.
+ * Optionally can skip flushing to drivers, leaving messages
+ * just in memory console.
+ */
+static bool __flush_console(bool flush_to_drivers, bool need_unlock)
+{
+ struct cpu_thread *cpu = this_cpu();
+ size_t req, len = 0;
+ static bool in_flush, more_flush;
+
+ /* Is there anything to flush ? Bail out early if not */
+ if (con_in == con_out || !con_driver)
+ return false;
+
+ /*
+ * Console flushing is suspended on this CPU, typically because
+ * some critical locks are held that would potentially cause a
+ * flush to deadlock
+ *
+ * Also if it recursed on con_lock (need_unlock is false). This
+ * can happen due to debug code firing (e.g., list or stack
+ * debugging).
+ */
+ if (cpu->con_suspend || !need_unlock) {
+ cpu->con_need_flush = true;
+ return false;
+ }
+ cpu->con_need_flush = false;
+
+ /*
+ * We must call the underlying driver with the console lock
+ * dropped otherwise we get some deadlocks if anything down
+ * that path tries to printf() something.
+ *
+ * So instead what we do is we keep a static in_flush flag
+ * set/released with the lock held, which is used to prevent
+ * concurrent attempts at flushing the same chunk of buffer
+ * by other processors.
+ */
+ if (in_flush) {
+ more_flush = true;
+ return false;
+ }
+ in_flush = true;
+
+ /*
+ * NB: this must appear after the in_flush check since it modifies
+ * con_out.
+ */
+ if (!flush_to_drivers) {
+ con_out = con_in;
+ in_flush = false;
+ return false;
+ }
+
+ do {
+ more_flush = false;
+
+ if (con_out > con_in) {
+ req = INMEM_CON_OUT_LEN - con_out;
+ more_flush = true;
+ } else
+ req = con_in - con_out;
+
+ unlock(&con_lock);
+ len = con_driver->write(con_buf + con_out, req);
+ lock(&con_lock);
+
+ con_out = (con_out + len) % INMEM_CON_OUT_LEN;
+
+ /* write error? */
+ if (len < req)
+ break;
+ } while(more_flush);
+
+ in_flush = false;
+ return con_out != con_in;
+}
+
+bool flush_console(void)
+{
+ bool ret;
+
+ lock(&con_lock);
+ ret = __flush_console(true, true);
+ unlock(&con_lock);
+
+ return ret;
+}
+
+static void inmem_write(char c)
+{
+ uint32_t opos;
+
+ if (!c)
+ return;
+ con_buf[con_in++] = c;
+ if (con_in >= INMEM_CON_OUT_LEN) {
+ con_in = 0;
+ con_wrapped = true;
+ }
+
+ /*
+ * We must always re-generate memcons.out_pos because
+ * under some circumstances, the console script will
+ * use a broken putmemproc that does RMW on the full
+ * 8 bytes containing out_pos and in_prod, thus corrupting
+ * out_pos
+ */
+ opos = con_in;
+ if (con_wrapped)
+ opos |= MEMCONS_OUT_POS_WRAP;
+ lwsync();
+ memcons.out_pos = cpu_to_be32(opos);
+
+ /* If head reaches tail, push tail around & drop chars */
+ if (con_in == con_out)
+ con_out = (con_in + 1) % INMEM_CON_OUT_LEN;
+}
+
+static size_t inmem_read(char *buf, size_t req)
+{
+ size_t read = 0;
+ char *ibuf = (char *)be64_to_cpu(memcons.ibuf_phys);
+
+ while (req && be32_to_cpu(memcons.in_prod) != be32_to_cpu(memcons.in_cons)) {
+ *(buf++) = ibuf[be32_to_cpu(memcons.in_cons)];
+ lwsync();
+ memcons.in_cons = cpu_to_be32((be32_to_cpu(memcons.in_cons) + 1) % INMEM_CON_IN_LEN);
+ req--;
+ read++;
+ }
+ return read;
+}
+
+static void write_char(char c)
+{
+#ifdef MAMBO_DEBUG_CONSOLE
+ mambo_console_write(&c, 1);
+#endif
+ inmem_write(c);
+}
+
+ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count)
+{
+ /* We use recursive locking here as we can get called
+ * from fairly deep debug path
+ */
+ bool need_unlock = lock_recursive(&con_lock);
+ const char *cbuf = buf;
+
+ while(count--) {
+ char c = *(cbuf++);
+ if (c == '\n')
+ write_char('\r');
+ write_char(c);
+ }
+
+ __flush_console(flush_to_drivers, need_unlock);
+
+ if (need_unlock)
+ unlock(&con_lock);
+
+ return count;
+}
+
+ssize_t write(int fd __unused, const void *buf, size_t count)
+{
+ return console_write(true, buf, count);
+}
+
+ssize_t read(int fd __unused, void *buf, size_t req_count)
+{
+ bool need_unlock = lock_recursive(&con_lock);
+ size_t count = 0;
+
+ if (con_driver && con_driver->read)
+ count = con_driver->read(buf, req_count);
+ if (!count)
+ count = inmem_read(buf, req_count);
+ if (need_unlock)
+ unlock(&con_lock);
+ return count;
+}
+
+/* Helper function to perform a full synchronous flush */
+void console_complete_flush(void)
+{
+ /*
+ * Using term 0 here is a dumb hack that works because the UART
+ * only has term 0 and the FSP doesn't have an explicit flush method.
+ */
+ int64_t ret = opal_con_driver->flush(0);
+
+ if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER)
+ return;
+
+ while (ret != OPAL_SUCCESS) {
+ ret = opal_con_driver->flush(0);
+ }
+}
+
+/*
+ * set_console()
+ *
+ * This sets the driver used internally by Skiboot. This is different to the
+ * OPAL console driver.
+ */
+void set_console(struct con_ops *driver)
+{
+ con_driver = driver;
+ if (driver)
+ flush_console();
+}
+
+/*
+ * set_opal_console()
+ *
+ * Configure the console driver to handle the console provided by the OPAL API.
+ * They are different to the above in that they are typically buffered, and used
+ * by the host OS rather than skiboot.
+ */
+static bool opal_cons_init = false;
+
+void set_opal_console(struct opal_con_ops *driver)
+{
+ assert(!opal_cons_init);
+ opal_con_driver = driver;
+}
+
+void init_opal_console(void)
+{
+ assert(!opal_cons_init);
+ opal_cons_init = true;
+
+ if (dummy_console_enabled() && opal_con_driver != &dummy_opal_con) {
+ prlog(PR_WARNING, "OPAL: Dummy console forced, %s ignored\n",
+ opal_con_driver->name);
+
+ opal_con_driver = &dummy_opal_con;
+ }
+
+ prlog(PR_INFO, "OPAL: Using %s\n", opal_con_driver->name);
+
+ if (opal_con_driver->init)
+ opal_con_driver->init();
+
+ opal_register(OPAL_CONSOLE_READ, opal_con_driver->read, 3);
+ opal_register(OPAL_CONSOLE_WRITE, opal_con_driver->write, 3);
+ opal_register(OPAL_CONSOLE_FLUSH, opal_con_driver->flush, 1);
+ opal_register(OPAL_CONSOLE_WRITE_BUFFER_SPACE,
+ opal_con_driver->space, 2);
+}
+
+void memcons_add_properties(void)
+{
+ dt_add_property_u64(opal_node, "ibm,opal-memcons", (u64) &memcons);
+}
+
+/*
+ * The default OPAL console.
+ *
+ * In the absence of a "real" OPAL console driver we handle the OPAL_CONSOLE_*
+ * calls by writing into the skiboot log buffer. Reads are a little more
+ * complicated since they can come from the in-memory console (BML) or from the
+ * internal skiboot console driver.
+ */
+static int64_t dummy_console_write(int64_t term_number, __be64 *length,
+ const uint8_t *buffer)
+{
+ uint64_t l;
+
+ if (term_number != 0)
+ return OPAL_PARAMETER;
+
+ if (!opal_addr_valid(length) || !opal_addr_valid(buffer))
+ return OPAL_PARAMETER;
+
+ l = be64_to_cpu(*length);
+ write(0, buffer, l);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t dummy_console_write_buffer_space(int64_t term_number,
+ __be64 *length)
+{
+ if (term_number != 0)
+ return OPAL_PARAMETER;
+
+ if (!opal_addr_valid(length))
+ return OPAL_PARAMETER;
+
+ if (length)
+ *length = cpu_to_be64(INMEM_CON_OUT_LEN);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t dummy_console_read(int64_t term_number, __be64 *length,
+ uint8_t *buffer)
+{
+ uint64_t l;
+
+ if (term_number != 0)
+ return OPAL_PARAMETER;
+
+ if (!opal_addr_valid(length) || !opal_addr_valid(buffer))
+ return OPAL_PARAMETER;
+
+ l = be64_to_cpu(*length);
+ l = read(0, buffer, l);
+ *length = cpu_to_be64(l);
+ opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT, 0);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t dummy_console_flush(int64_t term_number __unused)
+{
+ return OPAL_UNSUPPORTED;
+}
+
+static void dummy_console_poll(void *data __unused)
+{
+ bool has_data = false;
+
+ lock(&con_lock);
+ if (con_driver && con_driver->poll_read)
+ has_data = con_driver->poll_read();
+ if (memcons.in_prod != memcons.in_cons)
+ has_data = true;
+ if (has_data)
+ opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT,
+ OPAL_EVENT_CONSOLE_INPUT);
+ else
+ opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT, 0);
+ unlock(&con_lock);
+}
+
+void dummy_console_add_nodes(void)
+{
+ struct dt_property *p;
+
+ add_opal_console_node(0, "raw", be32_to_cpu(memcons.obuf_size));
+
+ /* Mambo might have left a crap one, clear it */
+ p = __dt_find_property(dt_chosen, "linux,stdout-path");
+ if (p)
+ dt_del_property(dt_chosen, p);
+
+ dt_add_property_string(dt_chosen, "linux,stdout-path",
+ "/ibm,opal/consoles/serial@0");
+
+ opal_add_poller(dummy_console_poll, NULL);
+}
+
+struct opal_con_ops dummy_opal_con = {
+ .name = "Dummy Console",
+ .init = dummy_console_add_nodes,
+ .read = dummy_console_read,
+ .write = dummy_console_write,
+ .space = dummy_console_write_buffer_space,
+ .flush = dummy_console_flush,
+};
diff --git a/roms/skiboot/core/cpu.c b/roms/skiboot/core/cpu.c
new file mode 100644
index 000000000..f58aeb27a
--- /dev/null
+++ b/roms/skiboot/core/cpu.c
@@ -0,0 +1,1785 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Code to manage and manipulate CPUs
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <cpu.h>
+#include <device.h>
+#include <mem_region.h>
+#include <opal.h>
+#include <stack.h>
+#include <trace.h>
+#include <affinity.h>
+#include <chip.h>
+#include <timebase.h>
+#include <interrupts.h>
+#include <ccan/str/str.h>
+#include <ccan/container_of/container_of.h>
+#include <xscom.h>
+
+/* The cpu_threads array is static and indexed by PIR in
+ * order to speed up lookup from asm entry points
+ */
+struct cpu_stack {
+ union {
+ uint8_t stack[STACK_SIZE];
+ struct cpu_thread cpu;
+ };
+} __align(STACK_SIZE);
+
+static struct cpu_stack * const cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE;
+unsigned int cpu_thread_count;
+unsigned int cpu_max_pir;
+struct cpu_thread *boot_cpu;
+static struct lock reinit_lock = LOCK_UNLOCKED;
+static bool hile_supported;
+static bool radix_supported;
+static unsigned long hid0_hile;
+static unsigned long hid0_attn;
+static bool sreset_enabled;
+static bool ipi_enabled;
+static bool pm_enabled;
+static bool current_hile_mode = HAVE_LITTLE_ENDIAN;
+static bool current_radix_mode = true;
+static bool tm_suspend_enabled;
+
+unsigned long cpu_secondary_start __force_data = 0;
+
+struct cpu_job {
+ struct list_node link;
+ void (*func)(void *data);
+ void *data;
+ const char *name;
+ bool complete;
+ bool no_return;
+};
+
+/* attribute const as cpu_stacks is constant. */
+unsigned long __attrconst cpu_stack_bottom(unsigned int pir)
+{
+ return ((unsigned long)&cpu_stacks[pir]) +
+ sizeof(struct cpu_thread) + STACK_SAFETY_GAP;
+}
+
+unsigned long __attrconst cpu_stack_top(unsigned int pir)
+{
+ /* This is the top of the normal stack. */
+ return ((unsigned long)&cpu_stacks[pir]) +
+ NORMAL_STACK_SIZE - STACK_TOP_GAP;
+}
+
+unsigned long __attrconst cpu_emergency_stack_top(unsigned int pir)
+{
+ /* This is the top of the emergency stack, above the normal stack. */
+ return ((unsigned long)&cpu_stacks[pir]) +
+ NORMAL_STACK_SIZE + EMERGENCY_STACK_SIZE - STACK_TOP_GAP;
+}
+
+void __nomcount cpu_relax(void)
+{
+ /* Relax a bit to give sibling threads some breathing space */
+ smt_lowest();
+ asm volatile("nop; nop; nop; nop;\n"
+ "nop; nop; nop; nop;\n"
+ "nop; nop; nop; nop;\n"
+ "nop; nop; nop; nop;\n");
+ smt_medium();
+ barrier();
+}
+
+static void cpu_wake(struct cpu_thread *cpu)
+{
+ /* Is it idle ? If not, no need to wake */
+ sync();
+ if (!cpu->in_idle)
+ return;
+
+ if (proc_gen == proc_gen_p8) {
+ /* Poke IPI */
+ icp_kick_cpu(cpu);
+ } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
+ p9_dbell_send(cpu->pir);
+ }
+}
+
+/*
+ * If chip_id is >= 0, schedule the job on that node.
+ * Otherwise schedule the job anywhere.
+ */
+static struct cpu_thread *cpu_find_job_target(int32_t chip_id)
+{
+ struct cpu_thread *cpu, *best, *me = this_cpu();
+ uint32_t best_count;
+
+ /* We try to find a target to run a job. We need to avoid
+ * a CPU that has a "no return" job on its queue as it might
+ * never be able to process anything.
+ *
+ * Additionally we don't check the list but the job count
+ * on the target CPUs, since that is decremented *after*
+ * a job has been completed.
+ */
+
+
+ /* First we scan all available primary threads
+ */
+ for_each_available_cpu(cpu) {
+ if (chip_id >= 0 && cpu->chip_id != chip_id)
+ continue;
+ if (cpu == me || !cpu_is_thread0(cpu) || cpu->job_has_no_return)
+ continue;
+ if (cpu->job_count)
+ continue;
+ lock(&cpu->job_lock);
+ if (!cpu->job_count)
+ return cpu;
+ unlock(&cpu->job_lock);
+ }
+
+ /* Now try again with secondary threads included and keep
+ * track of the one with the less jobs queued up. This is
+ * done in a racy way, but it's just an optimization in case
+ * we are overcommitted on jobs. Could could also just pick
+ * a random one...
+ */
+ best = NULL;
+ best_count = -1u;
+ for_each_available_cpu(cpu) {
+ if (chip_id >= 0 && cpu->chip_id != chip_id)
+ continue;
+ if (cpu == me || cpu->job_has_no_return)
+ continue;
+ if (!best || cpu->job_count < best_count) {
+ best = cpu;
+ best_count = cpu->job_count;
+ }
+ if (cpu->job_count)
+ continue;
+ lock(&cpu->job_lock);
+ if (!cpu->job_count)
+ return cpu;
+ unlock(&cpu->job_lock);
+ }
+
+ /* We haven't found anybody, do we have a bestie ? */
+ if (best) {
+ lock(&best->job_lock);
+ return best;
+ }
+
+ /* Go away */
+ return NULL;
+}
+
+/* job_lock is held, returns with it released */
+static void queue_job_on_cpu(struct cpu_thread *cpu, struct cpu_job *job)
+{
+ /* That's bad, the job will never run */
+ if (cpu->job_has_no_return) {
+ prlog(PR_WARNING, "WARNING ! Job %s scheduled on CPU 0x%x"
+ " which has a no-return job on its queue !\n",
+ job->name, cpu->pir);
+ backtrace();
+ }
+ list_add_tail(&cpu->job_queue, &job->link);
+ if (job->no_return)
+ cpu->job_has_no_return = true;
+ else
+ cpu->job_count++;
+ if (pm_enabled)
+ cpu_wake(cpu);
+ unlock(&cpu->job_lock);
+}
+
+struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
+ const char *name,
+ void (*func)(void *data), void *data,
+ bool no_return)
+{
+ struct cpu_job *job;
+
+#ifdef DEBUG_SERIALIZE_CPU_JOBS
+ if (cpu == NULL)
+ cpu = this_cpu();
+#endif
+
+ if (cpu && !cpu_is_available(cpu)) {
+ prerror("CPU: Tried to queue job on unavailable CPU 0x%04x\n",
+ cpu->pir);
+ return NULL;
+ }
+
+ job = zalloc(sizeof(struct cpu_job));
+ if (!job)
+ return NULL;
+ job->func = func;
+ job->data = data;
+ job->name = name;
+ job->complete = false;
+ job->no_return = no_return;
+
+ /* Pick a candidate. Returns with target queue locked */
+ if (cpu == NULL)
+ cpu = cpu_find_job_target(-1);
+ else if (cpu != this_cpu())
+ lock(&cpu->job_lock);
+ else
+ cpu = NULL;
+
+ /* Can't be scheduled, run it now */
+ if (cpu == NULL) {
+ if (!this_cpu()->job_has_no_return)
+ this_cpu()->job_has_no_return = no_return;
+ func(data);
+ job->complete = true;
+ return job;
+ }
+
+ queue_job_on_cpu(cpu, job);
+
+ return job;
+}
+
+struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id,
+ const char *name,
+ void (*func)(void *data), void *data)
+{
+ struct cpu_thread *cpu;
+ struct cpu_job *job;
+
+ job = zalloc(sizeof(struct cpu_job));
+ if (!job)
+ return NULL;
+ job->func = func;
+ job->data = data;
+ job->name = name;
+ job->complete = false;
+ job->no_return = false;
+
+ /* Pick a candidate. Returns with target queue locked */
+ cpu = cpu_find_job_target(chip_id);
+
+ /* Can't be scheduled... */
+ if (cpu == NULL) {
+ cpu = this_cpu();
+ if (cpu->chip_id == chip_id) {
+ /* Run it now if we're the right node. */
+ func(data);
+ job->complete = true;
+ return job;
+ }
+ /* Otherwise fail. */
+ free(job);
+ return NULL;
+ }
+
+ queue_job_on_cpu(cpu, job);
+
+ return job;
+}
+
+bool cpu_poll_job(struct cpu_job *job)
+{
+ lwsync();
+ return job->complete;
+}
+
+void cpu_wait_job(struct cpu_job *job, bool free_it)
+{
+ unsigned long time_waited = 0;
+
+ if (!job)
+ return;
+
+ while (!job->complete) {
+ /* This will call OPAL pollers for us */
+ time_wait_ms(10);
+ time_waited += 10;
+ lwsync();
+ if ((time_waited % 30000) == 0) {
+ prlog(PR_INFO, "cpu_wait_job(%s) for %lums\n",
+ job->name, time_waited);
+ backtrace();
+ }
+ }
+ lwsync();
+
+ if (time_waited > 1000)
+ prlog(PR_DEBUG, "cpu_wait_job(%s) for %lums\n",
+ job->name, time_waited);
+
+ if (free_it)
+ free(job);
+}
+
+bool cpu_check_jobs(struct cpu_thread *cpu)
+{
+ return !list_empty_nocheck(&cpu->job_queue);
+}
+
+void cpu_process_jobs(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+ struct cpu_job *job = NULL;
+ void (*func)(void *);
+ void *data;
+
+ sync();
+ if (!cpu_check_jobs(cpu))
+ return;
+
+ lock(&cpu->job_lock);
+ while (true) {
+ bool no_return;
+
+ job = list_pop(&cpu->job_queue, struct cpu_job, link);
+ if (!job)
+ break;
+
+ func = job->func;
+ data = job->data;
+ no_return = job->no_return;
+ unlock(&cpu->job_lock);
+ prlog(PR_TRACE, "running job %s on %x\n", job->name, cpu->pir);
+ if (no_return)
+ free(job);
+ func(data);
+ if (!list_empty(&cpu->locks_held)) {
+ if (no_return)
+ prlog(PR_ERR, "OPAL no-return job returned with"
+ "locks held!\n");
+ else
+ prlog(PR_ERR, "OPAL job %s returning with locks held\n",
+ job->name);
+ drop_my_locks(true);
+ }
+ lock(&cpu->job_lock);
+ if (!no_return) {
+ cpu->job_count--;
+ lwsync();
+ job->complete = true;
+ }
+ }
+ unlock(&cpu->job_lock);
+}
+
+enum cpu_wake_cause {
+ cpu_wake_on_job,
+ cpu_wake_on_dec,
+};
+
+static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
+{
+ uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE;
+ struct cpu_thread *cpu = this_cpu();
+ unsigned int vec = 0;
+
+ if (!pm_enabled) {
+ prlog_once(PR_DEBUG, "cpu_idle_p8 called pm disabled\n");
+ return vec;
+ }
+
+ /* Clean up ICP, be ready for IPIs */
+ icp_prep_for_pm();
+
+ /* Synchronize with wakers */
+ if (wake_on == cpu_wake_on_job) {
+ /* Mark ourselves in idle so other CPUs know to send an IPI */
+ cpu->in_idle = true;
+ sync();
+
+ /* Check for jobs again */
+ if (cpu_check_jobs(cpu) || !pm_enabled)
+ goto skip_sleep;
+
+ /* Setup wakup cause in LPCR: EE (for IPI) */
+ lpcr |= SPR_LPCR_P8_PECE2;
+ mtspr(SPR_LPCR, lpcr);
+
+ } else {
+ /* Mark outselves sleeping so cpu_set_pm_enable knows to
+ * send an IPI
+ */
+ cpu->in_sleep = true;
+ sync();
+
+ /* Check if PM got disabled */
+ if (!pm_enabled)
+ goto skip_sleep;
+
+ /* EE and DEC */
+ lpcr |= SPR_LPCR_P8_PECE2 | SPR_LPCR_P8_PECE3;
+ mtspr(SPR_LPCR, lpcr);
+ }
+ isync();
+
+ /* Enter nap */
+ vec = enter_p8_pm_state(false);
+
+skip_sleep:
+ /* Restore */
+ sync();
+ cpu->in_idle = false;
+ cpu->in_sleep = false;
+ reset_cpu_icp();
+
+ return vec;
+}
+
+static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
+{
+ uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P9_PECE;
+ uint64_t psscr;
+ struct cpu_thread *cpu = this_cpu();
+ unsigned int vec = 0;
+
+ if (!pm_enabled) {
+ prlog(PR_DEBUG, "cpu_idle_p9 called on cpu 0x%04x with pm disabled\n", cpu->pir);
+ return vec;
+ }
+
+ /* Synchronize with wakers */
+ if (wake_on == cpu_wake_on_job) {
+ /* Mark ourselves in idle so other CPUs know to send an IPI */
+ cpu->in_idle = true;
+ sync();
+
+ /* Check for jobs again */
+ if (cpu_check_jobs(cpu) || !pm_enabled)
+ goto skip_sleep;
+
+ /* HV DBELL for IPI */
+ lpcr |= SPR_LPCR_P9_PECEL1;
+ } else {
+ /* Mark outselves sleeping so cpu_set_pm_enable knows to
+ * send an IPI
+ */
+ cpu->in_sleep = true;
+ sync();
+
+ /* Check if PM got disabled */
+ if (!pm_enabled)
+ goto skip_sleep;
+
+ /* HV DBELL and DEC */
+ lpcr |= SPR_LPCR_P9_PECEL1 | SPR_LPCR_P9_PECEL3;
+ }
+
+ mtspr(SPR_LPCR, lpcr);
+ isync();
+
+ if (sreset_enabled) {
+ /* stop with EC=1 (sreset) and ESL=1 (enable thread switch). */
+ /* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=1 */
+ psscr = PPC_BIT(42) | PPC_BIT(43) |
+ PPC_BITMASK(54, 55) | PPC_BIT(63);
+ vec = enter_p9_pm_state(psscr);
+ } else {
+ /* stop with EC=0 (resumes) which does not require sreset. */
+ /* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=1 */
+ psscr = PPC_BITMASK(54, 55) | PPC_BIT(63);
+ enter_p9_pm_lite_state(psscr);
+ }
+
+ /* Clear doorbell */
+ p9_dbell_receive();
+
+ skip_sleep:
+ /* Restore */
+ sync();
+ cpu->in_idle = false;
+ cpu->in_sleep = false;
+
+ return vec;
+}
+
+static void cpu_idle_pm(enum cpu_wake_cause wake_on)
+{
+ unsigned int vec;
+
+ switch(proc_gen) {
+ case proc_gen_p8:
+ vec = cpu_idle_p8(wake_on);
+ break;
+ case proc_gen_p9:
+ vec = cpu_idle_p9(wake_on);
+ break;
+ case proc_gen_p10:
+ vec = cpu_idle_p9(wake_on);
+ break;
+ default:
+ vec = 0;
+ prlog_once(PR_DEBUG, "cpu_idle_pm called with bad processor type\n");
+ break;
+ }
+
+ if (vec == 0x100) {
+ unsigned long srr1 = mfspr(SPR_SRR1);
+
+ switch (srr1 & SPR_SRR1_PM_WAKE_MASK) {
+ case SPR_SRR1_PM_WAKE_SRESET:
+ exception_entry_pm_sreset();
+ break;
+ default:
+ break;
+ }
+ mtmsrd(MSR_RI, 1);
+
+ } else if (vec == 0x200) {
+ exception_entry_pm_mce();
+ enable_machine_check();
+ mtmsrd(MSR_RI, 1);
+ }
+}
+
+void cpu_idle_job(void)
+{
+ if (pm_enabled) {
+ cpu_idle_pm(cpu_wake_on_job);
+ } else {
+ struct cpu_thread *cpu = this_cpu();
+
+ smt_lowest();
+ /* Check for jobs again */
+ while (!cpu_check_jobs(cpu)) {
+ if (pm_enabled)
+ break;
+ cpu_relax();
+ barrier();
+ }
+ smt_medium();
+ }
+}
+
+void cpu_idle_delay(unsigned long delay)
+{
+ unsigned long now = mftb();
+ unsigned long end = now + delay;
+ unsigned long min_pm = usecs_to_tb(10);
+
+ if (pm_enabled && delay > min_pm) {
+pm:
+ for (;;) {
+ if (delay >= 0x7fffffff)
+ delay = 0x7fffffff;
+ mtspr(SPR_DEC, delay);
+
+ cpu_idle_pm(cpu_wake_on_dec);
+
+ now = mftb();
+ if (tb_compare(now, end) == TB_AAFTERB)
+ break;
+ delay = end - now;
+ if (!(pm_enabled && delay > min_pm))
+ goto no_pm;
+ }
+ } else {
+no_pm:
+ smt_lowest();
+ for (;;) {
+ now = mftb();
+ if (tb_compare(now, end) == TB_AAFTERB)
+ break;
+ delay = end - now;
+ if (pm_enabled && delay > min_pm) {
+ smt_medium();
+ goto pm;
+ }
+ }
+ smt_medium();
+ }
+}
+
+static void cpu_pm_disable(void)
+{
+ struct cpu_thread *cpu;
+ unsigned int timeout;
+
+ pm_enabled = false;
+ sync();
+
+ if (proc_gen == proc_gen_p8) {
+ for_each_available_cpu(cpu) {
+ while (cpu->in_sleep || cpu->in_idle) {
+ icp_kick_cpu(cpu);
+ cpu_relax();
+ }
+ }
+ } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
+ for_each_available_cpu(cpu) {
+ if (cpu->in_sleep || cpu->in_idle)
+ p9_dbell_send(cpu->pir);
+ }
+
+ /* This code is racy with cpus entering idle, late ones miss the dbell */
+
+ smt_lowest();
+ for_each_available_cpu(cpu) {
+ timeout = 0x08000000;
+ while ((cpu->in_sleep || cpu->in_idle) && --timeout)
+ barrier();
+ if (!timeout) {
+ prlog(PR_DEBUG, "cpu_pm_disable TIMEOUT on cpu 0x%04x to exit idle\n",
+ cpu->pir);
+ p9_dbell_send(cpu->pir);
+ }
+ }
+ smt_medium();
+ }
+}
+
+void cpu_set_sreset_enable(bool enabled)
+{
+ if (sreset_enabled == enabled)
+ return;
+
+ if (proc_gen == proc_gen_p8) {
+ /* Public P8 Mambo has broken NAP */
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+ return;
+
+ sreset_enabled = enabled;
+ sync();
+
+ if (!enabled) {
+ cpu_pm_disable();
+ } else {
+ if (ipi_enabled)
+ pm_enabled = true;
+ }
+
+ } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
+ sreset_enabled = enabled;
+ sync();
+ /*
+ * Kick everybody out of PM so they can adjust the PM
+ * mode they are using (EC=0/1).
+ */
+ cpu_pm_disable();
+ if (ipi_enabled)
+ pm_enabled = true;
+ }
+}
+
+void cpu_set_ipi_enable(bool enabled)
+{
+ if (ipi_enabled == enabled)
+ return;
+
+ if (proc_gen == proc_gen_p8) {
+ ipi_enabled = enabled;
+ sync();
+ if (!enabled) {
+ cpu_pm_disable();
+ } else {
+ if (sreset_enabled)
+ pm_enabled = true;
+ }
+
+ } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
+ ipi_enabled = enabled;
+ sync();
+ if (!enabled)
+ cpu_pm_disable();
+ else
+ pm_enabled = true;
+ }
+}
+
+void cpu_process_local_jobs(void)
+{
+ struct cpu_thread *cpu = first_available_cpu();
+
+ while (cpu) {
+ if (cpu != this_cpu())
+ return;
+
+ cpu = next_available_cpu(cpu);
+ }
+
+ if (!cpu)
+ cpu = first_available_cpu();
+
+ /* No CPU to run on, just run synchro */
+ if (cpu == this_cpu()) {
+ prlog_once(PR_DEBUG, "Processing jobs synchronously\n");
+ cpu_process_jobs();
+ opal_run_pollers();
+ }
+}
+
+
+struct dt_node *get_cpu_node(u32 pir)
+{
+ struct cpu_thread *t = find_cpu_by_pir(pir);
+
+ return t ? t->node : NULL;
+}
+
+/* This only covers primary, active cpus */
+struct cpu_thread *find_cpu_by_chip_id(u32 chip_id)
+{
+ struct cpu_thread *t;
+
+ for_each_available_cpu(t) {
+ if (t->is_secondary)
+ continue;
+ if (t->chip_id == chip_id)
+ return t;
+ }
+ return NULL;
+}
+
+struct cpu_thread *find_cpu_by_node(struct dt_node *cpu)
+{
+ struct cpu_thread *t;
+
+ for_each_available_cpu(t) {
+ if (t->node == cpu)
+ return t;
+ }
+ return NULL;
+}
+
+struct cpu_thread *find_cpu_by_pir(u32 pir)
+{
+ if (pir > cpu_max_pir)
+ return NULL;
+ return &cpu_stacks[pir].cpu;
+}
+
+struct cpu_thread __nomcount *find_cpu_by_pir_nomcount(u32 pir)
+{
+ if (pir > cpu_max_pir)
+ return NULL;
+ return &cpu_stacks[pir].cpu;
+}
+
+struct cpu_thread *find_cpu_by_server(u32 server_no)
+{
+ struct cpu_thread *t;
+
+ for_each_cpu(t) {
+ if (t->server_no == server_no)
+ return t;
+ }
+ return NULL;
+}
+
+struct cpu_thread *next_cpu(struct cpu_thread *cpu)
+{
+ struct cpu_stack *s;
+ unsigned int index = 0;
+
+ if (cpu != NULL) {
+ s = container_of(cpu, struct cpu_stack, cpu);
+ index = s - cpu_stacks + 1;
+ }
+ for (; index <= cpu_max_pir; index++) {
+ cpu = &cpu_stacks[index].cpu;
+ if (cpu->state != cpu_state_no_cpu)
+ return cpu;
+ }
+ return NULL;
+}
+
+struct cpu_thread *first_cpu(void)
+{
+ return next_cpu(NULL);
+}
+
+struct cpu_thread *next_available_cpu(struct cpu_thread *cpu)
+{
+ do {
+ cpu = next_cpu(cpu);
+ } while(cpu && !cpu_is_available(cpu));
+
+ return cpu;
+}
+
+struct cpu_thread *first_available_cpu(void)
+{
+ return next_available_cpu(NULL);
+}
+
+struct cpu_thread *next_present_cpu(struct cpu_thread *cpu)
+{
+ do {
+ cpu = next_cpu(cpu);
+ } while(cpu && !cpu_is_present(cpu));
+
+ return cpu;
+}
+
+struct cpu_thread *first_present_cpu(void)
+{
+ return next_present_cpu(NULL);
+}
+
+struct cpu_thread *next_ungarded_cpu(struct cpu_thread *cpu)
+{
+ do {
+ cpu = next_cpu(cpu);
+ } while(cpu && cpu->state == cpu_state_unavailable);
+
+ return cpu;
+}
+
+struct cpu_thread *first_ungarded_cpu(void)
+{
+ return next_ungarded_cpu(NULL);
+}
+
+struct cpu_thread *next_ungarded_primary(struct cpu_thread *cpu)
+{
+ do {
+ cpu = next_ungarded_cpu(cpu);
+ } while (cpu && !(cpu == cpu->primary || cpu == cpu->ec_primary));
+
+ return cpu;
+}
+
+struct cpu_thread *first_ungarded_primary(void)
+{
+ return next_ungarded_primary(NULL);
+}
+
+u8 get_available_nr_cores_in_chip(u32 chip_id)
+{
+ struct cpu_thread *core;
+ u8 nr_cores = 0;
+
+ for_each_available_core_in_chip(core, chip_id)
+ nr_cores++;
+
+ return nr_cores;
+}
+
+struct cpu_thread *next_available_core_in_chip(struct cpu_thread *core,
+ u32 chip_id)
+{
+ do {
+ core = next_cpu(core);
+ } while(core && (!cpu_is_available(core) ||
+ core->chip_id != chip_id ||
+ core->is_secondary));
+ return core;
+}
+
+struct cpu_thread *first_available_core_in_chip(u32 chip_id)
+{
+ return next_available_core_in_chip(NULL, chip_id);
+}
+
+uint32_t cpu_get_core_index(struct cpu_thread *cpu)
+{
+ return pir_to_fused_core_id(cpu->pir);
+}
+
+void cpu_remove_node(const struct cpu_thread *t)
+{
+ struct dt_node *i;
+
+ /* Find this cpu node */
+ dt_for_each_node(dt_root, i) {
+ const struct dt_property *p;
+
+ if (!dt_has_node_property(i, "device_type", "cpu"))
+ continue;
+ p = dt_find_property(i, "ibm,pir");
+ if (!p)
+ continue;
+ if (dt_property_get_cell(p, 0) == t->pir) {
+ dt_free(i);
+ return;
+ }
+ }
+ prerror("CPU: Could not find cpu node %i to remove!\n", t->pir);
+ abort();
+}
+
+void cpu_disable_all_threads(struct cpu_thread *cpu)
+{
+ unsigned int i;
+ struct dt_property *p;
+
+ for (i = 0; i <= cpu_max_pir; i++) {
+ struct cpu_thread *t = &cpu_stacks[i].cpu;
+
+ if (t->primary == cpu->primary)
+ t->state = cpu_state_disabled;
+
+ }
+
+ /* Mark this core as bad so that Linux kernel don't use this CPU. */
+ prlog(PR_DEBUG, "CPU: Mark CPU bad (PIR 0x%04x)...\n", cpu->pir);
+ p = __dt_find_property(cpu->node, "status");
+ if (p)
+ dt_del_property(cpu->node, p);
+
+ dt_add_property_string(cpu->node, "status", "bad");
+
+ /* XXX Do something to actually stop the core */
+}
+
+static void init_cpu_thread(struct cpu_thread *t,
+ enum cpu_thread_state state,
+ unsigned int pir)
+{
+ /* offset within cpu_thread to prevent stack_guard clobber */
+ const size_t guard_skip = container_off_var(t, stack_guard) +
+ sizeof(t->stack_guard);
+
+ memset(((void *)t) + guard_skip, 0, sizeof(struct cpu_thread) - guard_skip);
+ init_lock(&t->dctl_lock);
+ init_lock(&t->job_lock);
+ list_head_init(&t->job_queue);
+ list_head_init(&t->locks_held);
+ t->stack_guard = STACK_CHECK_GUARD_BASE ^ pir;
+ t->state = state;
+ t->pir = pir;
+#ifdef STACK_CHECK_ENABLED
+ t->stack_bot_mark = LONG_MAX;
+#endif
+ t->is_fused_core = is_fused_core(mfspr(SPR_PVR));
+ assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
+}
+
+static void enable_attn(void)
+{
+ unsigned long hid0;
+
+ hid0 = mfspr(SPR_HID0);
+ hid0 |= hid0_attn;
+ set_hid0(hid0);
+}
+
+static void disable_attn(void)
+{
+ unsigned long hid0;
+
+ hid0 = mfspr(SPR_HID0);
+ hid0 &= ~hid0_attn;
+ set_hid0(hid0);
+}
+
+extern void __trigger_attn(void);
+void trigger_attn(void)
+{
+ enable_attn();
+ __trigger_attn();
+}
+
+static void init_hid(void)
+{
+ /* attn is enabled even when HV=0, so make sure it's off */
+ disable_attn();
+}
+
+void __nomcount pre_init_boot_cpu(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ /* We skip the stack guard ! */
+ memset(((void *)cpu) + 8, 0, sizeof(struct cpu_thread) - 8);
+}
+
+void init_boot_cpu(void)
+{
+ unsigned int pir, pvr;
+
+ pir = mfspr(SPR_PIR);
+ pvr = mfspr(SPR_PVR);
+
+ /* Get CPU family and other flags based on PVR */
+ switch(PVR_TYPE(pvr)) {
+ case PVR_TYPE_P8E:
+ case PVR_TYPE_P8:
+ proc_gen = proc_gen_p8;
+ hile_supported = PVR_VERS_MAJ(mfspr(SPR_PVR)) >= 2;
+ hid0_hile = SPR_HID0_POWER8_HILE;
+ hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN;
+ break;
+ case PVR_TYPE_P8NVL:
+ proc_gen = proc_gen_p8;
+ hile_supported = true;
+ hid0_hile = SPR_HID0_POWER8_HILE;
+ hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN;
+ break;
+ case PVR_TYPE_P9:
+ case PVR_TYPE_P9P:
+ proc_gen = proc_gen_p9;
+ hile_supported = true;
+ radix_supported = true;
+ hid0_hile = SPR_HID0_POWER9_HILE;
+ hid0_attn = SPR_HID0_POWER9_ENABLE_ATTN;
+ break;
+ case PVR_TYPE_P10:
+ proc_gen = proc_gen_p10;
+ hile_supported = true;
+ radix_supported = true;
+ hid0_hile = SPR_HID0_POWER10_HILE;
+ hid0_attn = SPR_HID0_POWER10_ENABLE_ATTN;
+ break;
+ default:
+ proc_gen = proc_gen_unknown;
+ }
+
+ /* Get a CPU thread count based on family */
+ switch(proc_gen) {
+ case proc_gen_p8:
+ cpu_thread_count = 8;
+ prlog(PR_INFO, "CPU: P8 generation processor"
+ " (max %d threads/core)\n", cpu_thread_count);
+ break;
+ case proc_gen_p9:
+ if (is_fused_core(pvr))
+ cpu_thread_count = 8;
+ else
+ cpu_thread_count = 4;
+ prlog(PR_INFO, "CPU: P9 generation processor"
+ " (max %d threads/core)\n", cpu_thread_count);
+ break;
+ case proc_gen_p10:
+ if (is_fused_core(pvr))
+ cpu_thread_count = 8;
+ else
+ cpu_thread_count = 4;
+ prlog(PR_INFO, "CPU: P10 generation processor"
+ " (max %d threads/core)\n", cpu_thread_count);
+ break;
+ default:
+ prerror("CPU: Unknown PVR, assuming 1 thread\n");
+ cpu_thread_count = 1;
+ }
+
+ if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) {
+ prerror("CPU: POWER9N DD1 is not supported\n");
+ abort();
+ }
+
+ prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n",
+ pir, pvr);
+
+ /*
+ * Adjust top of RAM to include the boot CPU stack. If we have less
+ * RAM than this, it's not possible to boot.
+ */
+ cpu_max_pir = pir;
+ top_of_ram += (cpu_max_pir + 1) * STACK_SIZE;
+
+ /* Setup boot CPU state */
+ boot_cpu = &cpu_stacks[pir].cpu;
+ init_cpu_thread(boot_cpu, cpu_state_active, pir);
+ init_boot_tracebuf(boot_cpu);
+ assert(this_cpu() == boot_cpu);
+ init_hid();
+}
+
+static void enable_large_dec(bool on)
+{
+ u64 lpcr = mfspr(SPR_LPCR);
+
+ if (on)
+ lpcr |= SPR_LPCR_P9_LD;
+ else
+ lpcr &= ~SPR_LPCR_P9_LD;
+
+ mtspr(SPR_LPCR, lpcr);
+ isync();
+}
+
+#define HIGH_BIT (1ull << 63)
+
+static int find_dec_bits(void)
+{
+ int bits = 65; /* we always decrement once */
+ u64 mask = ~0ull;
+
+ if (proc_gen < proc_gen_p9)
+ return 32;
+
+ /* The ISA doesn't specify the width of the decrementer register so we
+ * need to discover it. When in large mode (LPCR.LD = 1) reads from the
+ * DEC SPR are sign extended to 64 bits and writes are truncated to the
+ * physical register width. We can use this behaviour to detect the
+ * width by starting from an all 1s value and left shifting until we
+ * read a value from the DEC with it's high bit cleared.
+ */
+
+ enable_large_dec(true);
+
+ do {
+ bits--;
+ mask = mask >> 1;
+ mtspr(SPR_DEC, mask);
+ } while (mfspr(SPR_DEC) & HIGH_BIT);
+
+ enable_large_dec(false);
+
+ prlog(PR_DEBUG, "CPU: decrementer bits %d\n", bits);
+ return bits;
+}
+
+static void init_tm_suspend_mode_property(void)
+{
+ struct dt_node *node;
+
+ /* If we don't find anything, assume TM suspend is enabled */
+ tm_suspend_enabled = true;
+
+ node = dt_find_by_path(dt_root, "/ibm,opal/fw-features/tm-suspend-mode");
+ if (!node)
+ return;
+
+ if (dt_find_property(node, "disabled"))
+ tm_suspend_enabled = false;
+}
+
+void init_cpu_max_pir(void)
+{
+ struct dt_node *cpus, *cpu;
+
+ cpus = dt_find_by_path(dt_root, "/cpus");
+ assert(cpus);
+
+ /* Iterate all CPUs in the device-tree */
+ dt_for_each_child(cpus, cpu) {
+ unsigned int pir, server_no;
+
+ /* Skip cache nodes */
+ if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
+ continue;
+
+ server_no = dt_prop_get_u32(cpu, "reg");
+
+ /* If PIR property is absent, assume it's the same as the
+ * server number
+ */
+ pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
+
+ if (cpu_max_pir < pir + cpu_thread_count - 1)
+ cpu_max_pir = pir + cpu_thread_count - 1;
+ }
+
+ prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", cpu_max_pir);
+}
+
+/*
+ * Set cpu->state to cpu_state_no_cpu for all secondaries, before the dt is
+ * parsed and they will be flipped to present as populated CPUs are found.
+ *
+ * Some configurations (e.g., with memory encryption) will not zero system
+ * memory at boot, so can't rely on cpu->state to be zero (== cpu_state_no_cpu).
+ */
+static void mark_all_secondary_cpus_absent(void)
+{
+ unsigned int pir;
+ struct cpu_thread *cpu;
+
+ for (pir = 0; pir <= cpu_max_pir; pir++) {
+ cpu = &cpu_stacks[pir].cpu;
+ if (cpu == boot_cpu)
+ continue;
+ cpu->state = cpu_state_no_cpu;
+ }
+}
+
+void init_all_cpus(void)
+{
+ struct dt_node *cpus, *cpu;
+ unsigned int pir, thread;
+ int dec_bits = find_dec_bits();
+
+ cpus = dt_find_by_path(dt_root, "/cpus");
+ assert(cpus);
+
+ init_tm_suspend_mode_property();
+
+ mark_all_secondary_cpus_absent();
+
+ /* Iterate all CPUs in the device-tree */
+ dt_for_each_child(cpus, cpu) {
+ unsigned int server_no, chip_id, threads;
+ enum cpu_thread_state state;
+ const struct dt_property *p;
+ struct cpu_thread *t, *pt0, *pt1;
+
+ /* Skip cache nodes */
+ if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
+ continue;
+
+ server_no = dt_prop_get_u32(cpu, "reg");
+
+ /* If PIR property is absent, assume it's the same as the
+ * server number
+ */
+ pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
+
+ /* We should always have an ibm,chip-id property */
+ chip_id = dt_get_chip_id(cpu);
+
+ /* Only use operational CPUs */
+ if (!strcmp(dt_prop_get(cpu, "status"), "okay")) {
+ state = cpu_state_present;
+ get_chip(chip_id)->ex_present = true;
+ } else {
+ state = cpu_state_unavailable;
+ }
+
+ prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
+ " State=%d\n", pir, server_no, state);
+
+ /* Check max PIR */
+ if (cpu_max_pir < (pir + cpu_thread_count - 1)) {
+ prlog(PR_WARNING, "CPU: CPU potentially out of range"
+ "PIR=0x%04x MAX=0x%04x !\n",
+ pir, cpu_max_pir);
+ continue;
+ }
+
+ /* Setup thread 0 */
+ assert(pir <= cpu_max_pir);
+ t = pt0 = &cpu_stacks[pir].cpu;
+ if (t != boot_cpu) {
+ init_cpu_thread(t, state, pir);
+ /* Each cpu gets its own later in init_trace_buffers */
+ t->trace = boot_cpu->trace;
+ }
+ if (t->is_fused_core)
+ pt1 = &cpu_stacks[pir + 1].cpu;
+ else
+ pt1 = pt0;
+ t->server_no = server_no;
+ t->primary = t->ec_primary = t;
+ t->node = cpu;
+ t->chip_id = chip_id;
+ t->icp_regs = NULL; /* Will be set later */
+#ifdef DEBUG_LOCKS
+ t->requested_lock = NULL;
+#endif
+ t->core_hmi_state = 0;
+ t->core_hmi_state_ptr = &t->core_hmi_state;
+
+ /* Add associativity properties */
+ add_core_associativity(t);
+
+ /* Add the decrementer width property */
+ dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
+
+ if (t->is_fused_core)
+ dt_add_property(t->node, "ibm,fused-core", NULL, 0);
+
+ /* Iterate threads */
+ p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
+ if (!p)
+ continue;
+ threads = p->len / 4;
+ if (threads > cpu_thread_count) {
+ prlog(PR_WARNING, "CPU: Threads out of range for PIR 0x%04x"
+ " threads=%d max=%d\n",
+ pir, threads, cpu_thread_count);
+ threads = cpu_thread_count;
+ }
+ for (thread = 1; thread < threads; thread++) {
+ prlog(PR_TRACE, "CPU: secondary thread %d found\n",
+ thread);
+ t = &cpu_stacks[pir + thread].cpu;
+ init_cpu_thread(t, state, pir + thread);
+ t->trace = boot_cpu->trace;
+ t->server_no = dt_property_get_cell(p, thread);
+ t->is_secondary = true;
+ t->is_fused_core = pt0->is_fused_core;
+ t->primary = pt0;
+ t->ec_primary = (thread & 1) ? pt1 : pt0;
+ t->node = cpu;
+ t->chip_id = chip_id;
+ t->core_hmi_state_ptr = &pt0->core_hmi_state;
+ }
+ prlog(PR_INFO, "CPU: %d secondary threads\n", thread);
+ }
+}
+
+void cpu_bringup(void)
+{
+ struct cpu_thread *t;
+ uint32_t count = 0;
+
+ prlog(PR_INFO, "CPU: Setting up secondary CPU state\n");
+
+ op_display(OP_LOG, OP_MOD_CPU, 0x0000);
+
+ /* Tell everybody to chime in ! */
+ prlog(PR_INFO, "CPU: Calling in all processors...\n");
+ cpu_secondary_start = 1;
+ sync();
+
+ op_display(OP_LOG, OP_MOD_CPU, 0x0002);
+
+ for_each_cpu(t) {
+ if (t->state != cpu_state_present &&
+ t->state != cpu_state_active)
+ continue;
+
+ /* Add a callin timeout ? If so, call cpu_remove_node(t). */
+ while (t->state != cpu_state_active) {
+ smt_lowest();
+ sync();
+ }
+ smt_medium();
+ count++;
+ }
+
+ prlog(PR_NOTICE, "CPU: All %d processors called in...\n", count);
+
+ op_display(OP_LOG, OP_MOD_CPU, 0x0003);
+}
+
+void cpu_callin(struct cpu_thread *cpu)
+{
+ sync();
+ cpu->state = cpu_state_active;
+ sync();
+
+ cpu->job_has_no_return = false;
+ if (cpu_is_thread0(cpu))
+ init_hid();
+}
+
+static void opal_start_thread_job(void *data)
+{
+ cpu_give_self_os();
+
+ /* We do not return, so let's mark the job as
+ * complete
+ */
+ start_kernel_secondary((uint64_t)data);
+}
+
+static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address)
+{
+ struct cpu_thread *cpu;
+ struct cpu_job *job;
+
+ if (!opal_addr_valid((void *)start_address))
+ return OPAL_PARAMETER;
+
+ cpu = find_cpu_by_server(server_no);
+ if (!cpu) {
+ prerror("OPAL: Start invalid CPU 0x%04llx !\n", server_no);
+ return OPAL_PARAMETER;
+ }
+ prlog(PR_DEBUG, "OPAL: Start CPU 0x%04llx (PIR 0x%04x) -> 0x%016llx\n",
+ server_no, cpu->pir, start_address);
+
+ lock(&reinit_lock);
+ if (!cpu_is_available(cpu)) {
+ unlock(&reinit_lock);
+ prerror("OPAL: CPU not active in OPAL !\n");
+ return OPAL_WRONG_STATE;
+ }
+ if (cpu->in_reinit) {
+ unlock(&reinit_lock);
+ prerror("OPAL: CPU being reinitialized !\n");
+ return OPAL_WRONG_STATE;
+ }
+ job = __cpu_queue_job(cpu, "start_thread",
+ opal_start_thread_job, (void *)start_address,
+ true);
+ unlock(&reinit_lock);
+ if (!job) {
+ prerror("OPAL: Failed to create CPU start job !\n");
+ return OPAL_INTERNAL_ERROR;
+ }
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_START_CPU, opal_start_cpu_thread, 2);
+
+static int64_t opal_query_cpu_status(uint64_t server_no, uint8_t *thread_status)
+{
+ struct cpu_thread *cpu;
+
+ if (!opal_addr_valid(thread_status))
+ return OPAL_PARAMETER;
+
+ cpu = find_cpu_by_server(server_no);
+ if (!cpu) {
+ prerror("OPAL: Query invalid CPU 0x%04llx !\n", server_no);
+ return OPAL_PARAMETER;
+ }
+ if (!cpu_is_available(cpu) && cpu->state != cpu_state_os) {
+ prerror("OPAL: CPU not active in OPAL nor OS !\n");
+ return OPAL_PARAMETER;
+ }
+ switch(cpu->state) {
+ case cpu_state_os:
+ *thread_status = OPAL_THREAD_STARTED;
+ break;
+ case cpu_state_active:
+ /* Active in skiboot -> inactive in OS */
+ *thread_status = OPAL_THREAD_INACTIVE;
+ break;
+ default:
+ *thread_status = OPAL_THREAD_UNAVAILABLE;
+ }
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_QUERY_CPU_STATUS, opal_query_cpu_status, 2);
+
+static int64_t opal_return_cpu(void)
+{
+ prlog(PR_DEBUG, "OPAL: Returning CPU 0x%04x\n", this_cpu()->pir);
+
+ this_cpu()->in_opal_call--;
+ if (this_cpu()->in_opal_call != 0) {
+ printf("OPAL in_opal_call=%u\n", this_cpu()->in_opal_call);
+ }
+
+ __secondary_cpu_entry();
+
+ return OPAL_HARDWARE; /* Should not happen */
+}
+opal_call(OPAL_RETURN_CPU, opal_return_cpu, 0);
+
+struct hid0_change_req {
+ uint64_t clr_bits;
+ uint64_t set_bits;
+};
+
+static void cpu_change_hid0(void *__req)
+{
+ struct hid0_change_req *req = __req;
+ unsigned long hid0, new_hid0;
+
+ hid0 = new_hid0 = mfspr(SPR_HID0);
+ new_hid0 &= ~req->clr_bits;
+ new_hid0 |= req->set_bits;
+ prlog(PR_DEBUG, "CPU: [%08x] HID0 change 0x%016lx -> 0x%016lx\n",
+ this_cpu()->pir, hid0, new_hid0);
+ set_hid0(new_hid0);
+}
+
+static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
+{
+ struct cpu_thread *cpu;
+ struct cpu_job **jobs;
+
+ jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
+ assert(jobs);
+
+ for_each_available_cpu(cpu) {
+ if (!cpu_is_thread0(cpu) && !cpu_is_core_chiplet_primary(cpu))
+ continue;
+ if (cpu == this_cpu())
+ continue;
+ jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_change_hid0",
+ cpu_change_hid0, req);
+ }
+
+ /* this cpu */
+ cpu_change_hid0(req);
+
+ for_each_available_cpu(cpu) {
+ if (jobs[cpu->pir])
+ cpu_wait_job(jobs[cpu->pir], true);
+ }
+
+ free(jobs);
+
+ return OPAL_SUCCESS;
+}
+
+void cpu_set_hile_mode(bool hile)
+{
+ struct hid0_change_req req;
+
+ if (hile == current_hile_mode)
+ return;
+
+ if (hile) {
+ req.clr_bits = 0;
+ req.set_bits = hid0_hile;
+ } else {
+ req.clr_bits = hid0_hile;
+ req.set_bits = 0;
+ }
+ cpu_change_all_hid0(&req);
+ current_hile_mode = hile;
+}
+
+static void cpu_cleanup_one(void *param __unused)
+{
+ mtspr(SPR_AMR, 0);
+ mtspr(SPR_IAMR, 0);
+ mtspr(SPR_PCR, 0);
+}
+
+static int64_t cpu_cleanup_all(void)
+{
+ struct cpu_thread *cpu;
+ struct cpu_job **jobs;
+
+ jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
+ assert(jobs);
+
+ for_each_available_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+ jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_cleanup",
+ cpu_cleanup_one, NULL);
+ }
+
+ /* this cpu */
+ cpu_cleanup_one(NULL);
+
+ for_each_available_cpu(cpu) {
+ if (jobs[cpu->pir])
+ cpu_wait_job(jobs[cpu->pir], true);
+ }
+
+ free(jobs);
+
+
+ return OPAL_SUCCESS;
+}
+
+void cpu_fast_reboot_complete(void)
+{
+ /* Fast reboot will have set HID0:HILE to skiboot endian */
+ current_hile_mode = HAVE_LITTLE_ENDIAN;
+
+ /* and set HID0:RADIX */
+ if (proc_gen == proc_gen_p9)
+ current_radix_mode = true;
+}
+
+static int64_t opal_reinit_cpus(uint64_t flags)
+{
+ struct hid0_change_req req = { 0, 0 };
+ struct cpu_thread *cpu;
+ int64_t rc = OPAL_SUCCESS;
+ int i;
+
+ prlog(PR_DEBUG, "OPAL: CPU re-init with flags: 0x%llx\n", flags);
+
+ if (flags & OPAL_REINIT_CPUS_HILE_LE)
+ prlog(PR_INFO, "OPAL: Switch to little-endian OS\n");
+ else if (flags & OPAL_REINIT_CPUS_HILE_BE)
+ prlog(PR_INFO, "OPAL: Switch to big-endian OS\n");
+
+ again:
+ lock(&reinit_lock);
+
+ for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
+ if (cpu == this_cpu() || cpu->in_reinit)
+ continue;
+ if (cpu->state == cpu_state_os) {
+ unlock(&reinit_lock);
+ /*
+ * That might be a race with return CPU during kexec
+ * where we are still, wait a bit and try again
+ */
+ for (i = 0; (i < 1000) &&
+ (cpu->state == cpu_state_os); i++) {
+ time_wait_ms(1);
+ }
+ if (cpu->state == cpu_state_os) {
+ prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir);
+ return OPAL_WRONG_STATE;
+ }
+ goto again;
+ }
+ cpu->in_reinit = true;
+ }
+ /*
+ * Now we need to mark ourselves "active" or we'll be skipped
+ * by the various "for_each_active_..." calls done by slw_reinit()
+ */
+ this_cpu()->state = cpu_state_active;
+ this_cpu()->in_reinit = true;
+ unlock(&reinit_lock);
+
+ /*
+ * This cleans up a few things left over by Linux
+ * that can cause problems in cases such as radix->hash
+ * transitions. Ideally Linux should do it but doing it
+ * here works around existing broken kernels.
+ */
+ cpu_cleanup_all();
+
+ /* If HILE change via HID0 is supported ... */
+ if (hile_supported &&
+ (flags & (OPAL_REINIT_CPUS_HILE_BE |
+ OPAL_REINIT_CPUS_HILE_LE))) {
+ bool hile = !!(flags & OPAL_REINIT_CPUS_HILE_LE);
+
+ flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE);
+ if (hile != current_hile_mode) {
+ if (hile)
+ req.set_bits |= hid0_hile;
+ else
+ req.clr_bits |= hid0_hile;
+ current_hile_mode = hile;
+ }
+ }
+
+ /* If MMU mode change is supported */
+ if (radix_supported &&
+ (flags & (OPAL_REINIT_CPUS_MMU_HASH |
+ OPAL_REINIT_CPUS_MMU_RADIX))) {
+ bool radix = !!(flags & OPAL_REINIT_CPUS_MMU_RADIX);
+
+ flags &= ~(OPAL_REINIT_CPUS_MMU_HASH |
+ OPAL_REINIT_CPUS_MMU_RADIX);
+
+ if (proc_gen == proc_gen_p9 && radix != current_radix_mode) {
+ if (radix)
+ req.set_bits |= SPR_HID0_POWER9_RADIX;
+ else
+ req.clr_bits |= SPR_HID0_POWER9_RADIX;
+
+ current_radix_mode = radix;
+ }
+ }
+
+ /* Cleanup the TLB. We do that unconditionally, this works
+ * around issues where OSes fail to invalidate the PWC in Radix
+ * mode for example. This only works on P9 and later, but we
+ * also know we don't have a problem with Linux cleanups on
+ * P8 so this isn't a problem. If we wanted to cleanup the
+ * TLB on P8 as well, we'd have to use jobs to do it locally
+ * on each CPU.
+ */
+ cleanup_global_tlb();
+
+ /* Apply HID bits changes if any */
+ if (req.set_bits || req.clr_bits)
+ cpu_change_all_hid0(&req);
+
+ if (flags & OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) {
+ flags &= ~OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED;
+
+ if (tm_suspend_enabled)
+ rc = OPAL_UNSUPPORTED;
+ else
+ rc = OPAL_SUCCESS;
+ }
+
+ /* Handle P8 DD1 SLW reinit */
+ if (flags != 0 && proc_gen == proc_gen_p8 && !hile_supported)
+ rc = slw_reinit(flags);
+ else if (flags != 0)
+ rc = OPAL_UNSUPPORTED;
+
+ /* And undo the above */
+ lock(&reinit_lock);
+ this_cpu()->state = cpu_state_os;
+ for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
+ cpu->in_reinit = false;
+ unlock(&reinit_lock);
+
+ return rc;
+}
+opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
+
+#define NMMU_XLAT_CTL_PTCR 0xb
+static int64_t nmmu_set_ptcr(uint64_t chip_id, struct dt_node *node, uint64_t ptcr)
+{
+ uint32_t nmmu_base_addr;
+
+ nmmu_base_addr = dt_get_address(node, 0, NULL);
+ return xscom_write(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_PTCR, ptcr);
+}
+
+/*
+ * Setup the the Nest MMU PTCR register for all chips in the system or
+ * the specified chip id.
+ *
+ * The PTCR value may be overwritten so long as all users have been
+ * quiesced. If it is set to an invalid memory address the system will
+ * checkstop if anything attempts to use it.
+ *
+ * Returns OPAL_UNSUPPORTED if no nest mmu was found.
+ */
+static int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr)
+{
+ struct dt_node *node;
+ int64_t rc = OPAL_UNSUPPORTED;
+
+ if (chip_id == -1ULL)
+ dt_for_each_compatible(dt_root, node, "ibm,power9-nest-mmu") {
+ chip_id = dt_get_chip_id(node);
+ if ((rc = nmmu_set_ptcr(chip_id, node, ptcr)))
+ return rc;
+ }
+ else
+ dt_for_each_compatible_on_chip(dt_root, node, "ibm,power9-nest-mmu", chip_id)
+ if ((rc = nmmu_set_ptcr(chip_id, node, ptcr)))
+ return rc;
+
+ return rc;
+}
+opal_call(OPAL_NMMU_SET_PTCR, opal_nmmu_set_ptcr, 2);
+
+static void _exit_uv_mode(void *data __unused)
+{
+ prlog(PR_DEBUG, "Exit uv mode on cpu pir 0x%04x\n", this_cpu()->pir);
+ /* HW has smfctrl shared between threads but on Mambo it is per-thread */
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+ exit_uv_mode(1);
+ else
+ exit_uv_mode(cpu_is_thread0(this_cpu()));
+}
+
+void cpu_disable_pef(void)
+{
+ struct cpu_thread *cpu;
+ struct cpu_job **jobs;
+
+ if (!(mfmsr() & MSR_S)) {
+ prlog(PR_DEBUG, "UV mode off on cpu pir 0x%04x\n", this_cpu()->pir);
+ return;
+ }
+
+ jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
+ assert(jobs);
+
+ /* Exit uv mode on all secondary threads before touching
+ * smfctrl on thread 0 */
+ for_each_available_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+
+ if (!cpu_is_thread0(cpu))
+ jobs[cpu->pir] = cpu_queue_job(cpu, "exit_uv_mode",
+ _exit_uv_mode, NULL);
+ }
+
+ for_each_available_cpu(cpu)
+ if (jobs[cpu->pir]) {
+ cpu_wait_job(jobs[cpu->pir], true);
+ jobs[cpu->pir] = NULL;
+ }
+
+ /* Exit uv mode and disable smfctrl on primary threads */
+ for_each_available_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+
+ if (cpu_is_thread0(cpu))
+ jobs[cpu->pir] = cpu_queue_job(cpu, "exit_uv_mode",
+ _exit_uv_mode, NULL);
+ }
+
+ for_each_available_cpu(cpu)
+ if (jobs[cpu->pir])
+ cpu_wait_job(jobs[cpu->pir], true);
+
+ free(jobs);
+
+ _exit_uv_mode(NULL);
+}
diff --git a/roms/skiboot/core/cpufeatures.c b/roms/skiboot/core/cpufeatures.c
new file mode 100644
index 000000000..5620b741d
--- /dev/null
+++ b/roms/skiboot/core/cpufeatures.c
@@ -0,0 +1,1043 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * This file deals with setup of /cpus/ibm,powerpc-cpu-features dt
+ *
+ * Copyright 2017-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <cpu.h>
+#include <processor.h>
+#include <ccan/str/str.h>
+#include <device.h>
+
+#ifdef DEBUG
+#define DBG(fmt, a...) prlog(PR_DEBUG, "CPUFT: " fmt, ##a)
+#else
+#define DBG(fmt, a...)
+#endif
+
+/* Device-tree visible constants follow */
+#define ISA_V2_07B 2070
+#define ISA_V3_0B 3000
+#define ISA_V3_1 3100
+
+#define USABLE_PR (1U << 0)
+#define USABLE_OS (1U << 1)
+#define USABLE_HV (1U << 2)
+
+#define HV_SUPPORT_HFSCR (1U << 0)
+#define OS_SUPPORT_FSCR (1U << 0)
+
+/* Following are definitions for the match tables, not the DT binding itself */
+#define ISA_BASE 0
+
+#define HV_NONE 0
+#define HV_CUSTOM 1
+#define HV_HFSCR 2
+
+#define OS_NONE 0
+#define OS_CUSTOM 1
+#define OS_FSCR 2
+
+/* CPU bitmasks for match table */
+#define CPU_P8_DD1 (1U << 0)
+#define CPU_P8_DD2 (1U << 1)
+#define CPU_P9_DD1 (1U << 2)
+#define CPU_P9_DD2_0_1 (1U << 3) // 2.01 or 2.1
+#define CPU_P9P (1U << 4)
+#define CPU_P9_DD2_2 (1U << 5)
+#define CPU_P9_DD2_3 (1U << 6)
+#define CPU_P10 (1U << 7)
+
+#define CPU_P9_DD2 (CPU_P9_DD2_0_1|CPU_P9_DD2_2|CPU_P9_DD2_3|CPU_P9P)
+
+#define CPU_P8 (CPU_P8_DD1|CPU_P8_DD2)
+#define CPU_P9 (CPU_P9_DD1|CPU_P9_DD2|CPU_P9P)
+#define CPU_ALL (CPU_P8|CPU_P9|CPU_P10)
+
+struct cpu_feature {
+ const char *name;
+ uint32_t cpus_supported;
+ uint32_t isa;
+ uint32_t usable_privilege;
+ uint32_t hv_support;
+ uint32_t os_support;
+ uint32_t hfscr_bit_nr;
+ uint32_t fscr_bit_nr;
+ uint32_t hwcap_bit_nr;
+ const char *dependencies_names; /* space-delimited names */
+};
+
+/*
+ * The base (or NULL) cpu feature set is the CPU features available
+ * when no child nodes of the /cpus/ibm,powerpc-cpu-features node exist. The
+ * base feature set is POWER8 (ISAv2.07B), less features that are listed
+ * explicitly.
+ *
+ * XXX: currently, the feature dependencies are not necessarily captured
+ * exactly or completely. This is somewhat acceptable because all
+ * implementations must be aware of all these features.
+ */
+static const struct cpu_feature cpu_features_table[] = {
+ /*
+ * Big endian as in ISAv2.07B, MSR_LE=0
+ */
+ { "big-endian",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * Little endian as in ISAv2.07B, MSR_LE=1.
+ *
+ * When both big and little endian are defined, there is an LPCR ILE
+ * bit and implementation specific way to switch HILE mode, MSR_SLE,
+ * etc.
+ */
+ { "little-endian",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * MSR_HV=1 mode as in ISAv2.07B (i.e., hypervisor privileged
+ * instructions and registers).
+ */
+ { "hypervisor",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV,
+ HV_CUSTOM, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B interrupt vectors, registers, and control registers
+ * (e.g., AIL, ILE, HV, etc LPCR bits).
+ *
+ * This does not necessarily specify all possible interrupt types.
+ * floating-point, for example requires some ways to handle floating
+ * point exceptions, but the low level details of interrupt handler
+ * is not a dependency there. There will always be *some* interrupt
+ * handler, (and some way to provide memory magagement, etc.).
+ */
+ { "interrupt-facilities",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ { "smt",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, 14,
+ NULL, },
+
+ /*
+ * ISAv2.07B Program Priority Registers (PPR)
+ * PPR and associated control registers (e.g. RPR, PSPB),
+ * priority "or" instructions, etc.
+ */
+ { "program-priority-register",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B Book3S Chapter 5.7.9.1. Virtual Page Class Key Protecion
+ * AMR, IAMR, AMOR, UAMOR, etc registers and MMU key bits.
+ */
+ { "virtual-page-class-key-protection",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B SAO storage control attribute
+ */
+ { "strong-access-ordering",
+ CPU_ALL & ~CPU_P9_DD1,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B no-execute storage control attribute
+ */
+ { "no-execute",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * Cache inhibited attribute supported on large pages.
+ */
+ { "cache-inhibited-large-page",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B Book3S Chapter 8. Debug Facilities
+ * CIEA, CIABR, DEAW, MEte, trace interrupt, etc.
+ * Except CFAR, branch tracing.
+ */
+ { "debug-facilities",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * DAWR1, DAWRX1 etc.
+ */
+ { "debug-facilities-v31",
+ CPU_P10,
+ ISA_V3_1, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B CFAR
+ */
+ { "come-from-address-register",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ "debug-facilities", },
+
+ /*
+ * ISAv2.07B Branch tracing (optional in ISA)
+ */
+ { "branch-tracing",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ "debug-facilities", },
+
+ /*
+ * ISAv2.07B Floating-point Facility
+ */
+ { "floating-point",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ PPC_BITLSHIFT(63), -1, 27,
+ NULL, },
+
+ /*
+ * ISAv2.07B Vector Facility (VMX)
+ */
+ { "vector",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ PPC_BITLSHIFT(62), -1, 28,
+ "floating-point", },
+
+ /*
+ * ISAv2.07B Vector-scalar Facility (VSX)
+ */
+ { "vector-scalar",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, 7,
+ "vector", },
+
+ { "vector-crypto",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, 57,
+ "vector", },
+
+ /*
+ * ISAv2.07B Quadword Load and Store instructions
+ * including lqarx/stdqcx. instructions.
+ */
+ { "quadword-load-store",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B Binary Coded Decimal (BCD)
+ * BCD fixed point instructions
+ */
+ { "decimal-integer",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B Decimal floating-point Facility (DFP)
+ */
+ { "decimal-floating-point",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, 10,
+ "floating-point", },
+
+ /*
+ * ISAv2.07B
+ * DSCR, default data prefetch LPCR, etc
+ */
+ { "data-stream-control-register",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ PPC_BITLSHIFT(61), PPC_BITLSHIFT(61), 61,
+ NULL, },
+
+ /*
+ * ISAv2.07B Branch History Rolling Buffer (BHRB)
+ */
+ { "branch-history-rolling-buffer",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ PPC_BITLSHIFT(59), -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B Transactional Memory Facility (TM or HTM)
+ */
+ { "transactional-memory",
+ CPU_P8, /* P9 support is not enabled yet */
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ PPC_BITLSHIFT(58), -1, 62,
+ NULL, },
+
+ /*
+ * ISAv3.0B TM additions
+ * TEXASR bit 17, self-induced vs external footprint overflow
+ */
+ { "transactional-memory-v3",
+ 0,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "transactional-memory", },
+
+ /*
+ * ISAv2.07B Event-Based Branch Facility (EBB)
+ */
+ { "event-based-branch",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ PPC_BITLSHIFT(56), PPC_BITLSHIFT(56), 60,
+ NULL, },
+
+ /*
+ * ISAv2.07B Target Address Register (TAR)
+ */
+ { "target-address-register",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ PPC_BITLSHIFT(55), PPC_BITLSHIFT(55), 58,
+ NULL, },
+
+ /*
+ * ISAv2.07B Control Register (CTRL)
+ */
+ { "control-register",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B Book3S Chapter 11. Processor Control.
+ * msgsnd, msgsndp, doorbell, etc.
+ *
+ * ISAv3.0B is not compatible (different addressing, HFSCR required
+ * for msgsndp).
+ */
+ { "processor-control-facility",
+ CPU_P8_DD2, /* P8 DD1 has no dbell */
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B PURR, SPURR registers
+ */
+ { "processor-utilization-of-resources-register",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * POWER8 initiate coprocessor store word indexed (icswx) instruction
+ */
+ { "coprocessor-icswx",
+ CPU_P8,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B hash based MMU and all instructions, registers,
+ * data structures, exceptions, etc.
+ */
+ { "mmu-hash",
+ CPU_P8,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * POWER8 MCE / machine check exception.
+ */
+ { "machine-check-power8",
+ CPU_P8,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * POWER8 PMU / performance monitor unit.
+ */
+ { "performance-monitor-power8",
+ CPU_P8,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B alignment interrupts set DSISR register
+ *
+ * POWER CPUs do not used this, and it's removed from ISAv3.0B.
+ */
+ { "alignment-interrupt-dsisr",
+ 0,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B / POWER8 doze, nap, sleep, winkle instructions
+ * XXX: is Linux we using some BookIV specific implementation details
+ * in nap handling? We have no POWER8 specific key here.
+ */
+ { "idle-nap",
+ CPU_P8,
+ ISA_BASE, USABLE_HV,
+ HV_CUSTOM, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B wait instruction
+ */
+ { "wait",
+ CPU_P8,
+ ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ { "subcore",
+ CPU_P8,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ "smt", },
+
+ /*
+ * ISAv3.0B radix based MMU
+ */
+ { "mmu-radix",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B hash based MMU, new hash pte format, PCTR, etc
+ */
+ { "mmu-hash-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B wait instruction
+ */
+ { "wait-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B stop idle instructions and registers
+ * XXX: Same question as for idle-nap
+ */
+ { "idle-stop",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B Hypervisor Virtualization Interrupt
+ * Also associated system registers, LPCR EE, HEIC, HVICE,
+ * system reset SRR1 reason, etc.
+ */
+ { "hypervisor-virtualization-interrupt",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV,
+ HV_CUSTOM, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * POWER9 MCE / machine check exception.
+ */
+ { "machine-check-power9",
+ CPU_P9,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * POWER10 MCE / machine check exception.
+ */
+ { "machine-check-power10",
+ CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * POWER9 PMU / performance monitor unit.
+ */
+ { "performance-monitor-power9",
+ CPU_P9,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * POWER10 PMU / performance monitor unit.
+ */
+ { "performance-monitor-power10",
+ CPU_P10,
+ ISA_V3_1, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B scv/rfscv system call instructions and exceptions, fscr bit
+ * etc.
+ */
+ { "system-call-vectored",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_OS|USABLE_PR,
+ HV_NONE, OS_CUSTOM,
+ -1, PPC_BITLSHIFT(51), 52,
+ NULL, },
+
+ /*
+ * ISAv3.0B Book3S Chapter 10. Processor Control.
+ * global msgsnd, msgsndp, msgsync, doorbell, etc.
+ */
+ { "processor-control-facility-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_CUSTOM, OS_NONE,
+ PPC_BITLSHIFT(53), -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B addpcis instruction
+ */
+ { "pc-relative-addressing",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv2.07B Book3S Chapter 7. Timer Facilities
+ * TB, VTB, DEC, HDEC, IC, etc registers and exceptions.
+ * Not including PURR or SPURR registers.
+ */
+ { "timer-facilities",
+ CPU_ALL,
+ ISA_BASE, USABLE_HV|USABLE_OS,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B Book3S Chapter 7. Timer Facilities
+ * Large decrementer and hypervisor decrementer
+ */
+ { "timer-facilities-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "timer-facilities", },
+
+ /*
+ * ISAv3.0B deliver a random number instruction (darn)
+ */
+ { "random-number-generator",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, 53,
+ NULL, },
+
+ /*
+ * ISAv3.0B fixed point instructions and registers
+ * multiply-add, modulo, count trailing zeroes, cmprb, cmpeqb,
+ * extswsli, mfvsrld, mtvsrdd, mtvsrws, addex, CA32, OV32,
+ * mcrxrx, setb
+ */
+ { "fixed-point-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ { "decimal-integer-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "fixed-point-v3 decimal-integer", },
+
+ /*
+ * ISAv3.0B lightweight mffs
+ */
+ { "floating-point-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "floating-point", },
+
+ { "decimal-floating-point-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "floating-point-v3 decimal-floating-point", },
+
+ { "vector-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "vector", },
+
+ { "vector-scalar-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "vector-v3 vector-scalar" },
+
+ { "vector-binary128",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, 54,
+ "vector-scalar-v3", },
+
+ { "vector-binary16",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "vector-v3", },
+
+ /*
+ * ISAv3.0B external exception for EBB
+ */
+ { "event-based-branch-v3",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ "event-based-branch", },
+
+ /*
+ * ISAv3.0B Atomic Memory Operations (AMO)
+ */
+ { "atomic-memory-operations",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B Copy-Paste Facility
+ */
+ { "copy-paste",
+ CPU_P9|CPU_P10,
+ ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * ISAv3.0B GSR SPR register
+ * POWER9 does not implement it
+ */
+ { "group-start-register",
+ 0,
+ ISA_V3_0B, USABLE_HV|USABLE_OS,
+ HV_NONE, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * Enable matrix multiply accumulate.
+ */
+ { "matrix-multiply-accumulate",
+ CPU_P10,
+ ISA_V3_1, USABLE_PR,
+ HV_CUSTOM, OS_CUSTOM,
+ -1, -1, 49,
+ NULL, },
+
+ /*
+ * Enable prefix instructions. Toolchains assume this is
+ * enabled for when compiling for ISA 3.1.
+ */
+ { "prefix-instructions",
+ CPU_P10,
+ ISA_V3_1, USABLE_HV|USABLE_OS|USABLE_PR,
+ HV_HFSCR, OS_FSCR,
+ 13, 13, -1,
+ NULL, },
+
+ /*
+ * Due to hardware bugs in POWER9, the hypervisor needs to assist
+ * guests.
+ *
+ * Presence of this feature indicates presence of the bug.
+ *
+ * See linux kernel commit 4bb3c7a0208f
+ * and linux Documentation/powerpc/transactional_memory.txt
+ */
+ { "tm-suspend-hypervisor-assist",
+ CPU_P9_DD2_2|CPU_P9_DD2_3|CPU_P9P,
+ ISA_V3_0B, USABLE_HV,
+ HV_CUSTOM, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+
+ /*
+ * Due to hardware bugs in POWER9, the hypervisor can hit
+ * CPU bugs in the operations it needs to do for
+ * tm-suspend-hypervisor-assist.
+ *
+ * Presence of this "feature" means processor is affected by the bug.
+ *
+ * See linux kernel commit 4bb3c7a0208f
+ * and linux Documentation/powerpc/transactional_memory.txt
+ */
+ { "tm-suspend-xer-so-bug",
+ CPU_P9_DD2_2,
+ ISA_V3_0B, USABLE_HV,
+ HV_CUSTOM, OS_NONE,
+ -1, -1, -1,
+ NULL, },
+};
+
+static void add_cpu_feature_nodeps(struct dt_node *features,
+ const struct cpu_feature *f)
+{
+ struct dt_node *feature;
+
+ feature = dt_new(features, f->name);
+ assert(feature);
+
+ dt_add_property_cells(feature, "isa", f->isa);
+ dt_add_property_cells(feature, "usable-privilege", f->usable_privilege);
+
+ if (f->usable_privilege & USABLE_HV) {
+ if (f->hv_support != HV_NONE) {
+ uint32_t s = 0;
+ if (f->hv_support == HV_HFSCR)
+ s |= HV_SUPPORT_HFSCR;
+
+ dt_add_property_cells(feature, "hv-support", s);
+ if (f->hfscr_bit_nr != -1)
+ dt_add_property_cells(feature, "hfscr-bit-nr", f->hfscr_bit_nr);
+ } else {
+ assert(f->hfscr_bit_nr == -1);
+ }
+ }
+
+ if (f->usable_privilege & USABLE_OS) {
+ if (f->os_support != OS_NONE) {
+ uint32_t s = 0;
+ if (f->os_support == OS_FSCR)
+ s |= OS_SUPPORT_FSCR;
+ dt_add_property_cells(feature, "os-support", s);
+ if (f->fscr_bit_nr != -1)
+ dt_add_property_cells(feature, "fscr-bit-nr", f->fscr_bit_nr);
+ } else {
+ assert(f->fscr_bit_nr == -1);
+ }
+ }
+
+ if (f->usable_privilege & USABLE_PR) {
+ if (f->hwcap_bit_nr != -1)
+ dt_add_property_cells(feature, "hwcap-bit-nr", f->hwcap_bit_nr);
+ }
+
+ if (f->dependencies_names)
+ dt_add_property(feature, "dependencies", NULL, 0);
+}
+
+static void add_cpufeatures_dependencies(struct dt_node *features)
+{
+ struct dt_node *feature;
+
+ dt_for_each_node(features, feature) {
+ const struct cpu_feature *f = NULL;
+ const char *deps_names;
+ struct dt_property *deps;
+ int nr_deps;
+ int i;
+
+ /* Find features with dependencies */
+
+ deps = __dt_find_property(feature, "dependencies");
+ if (!deps)
+ continue;
+
+ /* Find the matching cpu table */
+ for (i = 0; i < ARRAY_SIZE(cpu_features_table); i++) {
+ f = &cpu_features_table[i];
+ if (!strcmp(f->name, feature->name))
+ break;
+ }
+ assert(f);
+ assert(f->dependencies_names);
+
+ /*
+ * Count number of depended features and allocate space
+ * for phandles in the property.
+ */
+ deps_names = f->dependencies_names;
+ nr_deps = strcount(deps_names, " ") + 1;
+ dt_resize_property(&deps, nr_deps * sizeof(u32));
+
+ DBG("feature %s has %d dependencies (%s)\n", f->name, nr_deps, deps_names);
+ /*
+ * For each one, find the depended feature then advance to
+ * next name.
+ */
+ for (i = 0; i < nr_deps; i++) {
+ struct dt_node *dep;
+ int len;
+
+ if (nr_deps - i == 1)
+ len = strlen(deps_names);
+ else
+ len = strchr(deps_names, ' ') - deps_names;
+
+ dt_for_each_node(features, dep) {
+ if (!strncmp(deps_names, dep->name, len))
+ goto found_dep;
+ }
+
+ prlog(PR_ERR, "CPUFT: feature %s dependencies not found\n", f->name);
+ break;
+found_dep:
+ DBG(" %s found dep (%s)\n", f->name, dep->name);
+ dt_property_set_cell(deps, i, dep->phandle);
+
+ /* Advance over the name + delimiter */
+ deps_names += len + 1;
+ }
+ }
+}
+
+static void add_cpufeatures(struct dt_node *cpus,
+ uint32_t cpu_feature_isa, uint32_t cpu_feature_cpu,
+ const char *cpu_name)
+{
+ struct dt_node *features;
+ int i;
+
+ DBG("creating cpufeatures for cpu:%d isa:%d\n", cpu_feature_cpu, cpu_feature_isa);
+
+ features = dt_new(cpus, "ibm,powerpc-cpu-features");
+ assert(features);
+
+ dt_add_property_cells(features, "isa", cpu_feature_isa);
+
+ dt_add_property_string(features, "device_type", "cpu-features");
+ dt_add_property_string(features, "compatible", "ibm,powerpc-cpu-features");
+ dt_add_property_string(features, "display-name", cpu_name);
+
+ /* add without dependencies */
+ for (i = 0; i < ARRAY_SIZE(cpu_features_table); i++) {
+ const struct cpu_feature *f = &cpu_features_table[i];
+
+ if (f->cpus_supported & cpu_feature_cpu) {
+ DBG(" '%s'\n", f->name);
+ add_cpu_feature_nodeps(features, f);
+ }
+ }
+
+ /* dependency construction pass */
+ add_cpufeatures_dependencies(features);
+}
+
+void dt_add_cpufeatures(struct dt_node *root)
+{
+ int version;
+ uint32_t cpu_feature_isa = 0;
+ uint32_t cpu_feature_cpu = 0;
+ struct dt_node *cpus;
+ const char *cpu_name = NULL;
+
+ version = mfspr(SPR_PVR);
+ switch(PVR_TYPE(version)) {
+ case PVR_TYPE_P8:
+ if (!cpu_name)
+ cpu_name = "POWER8";
+ /* fallthrough */
+ case PVR_TYPE_P8E:
+ if (!cpu_name)
+ cpu_name = "POWER8E";
+ /* fallthrough */
+ cpu_feature_isa = ISA_V2_07B;
+ if (PVR_VERS_MAJ(version) == 1)
+ cpu_feature_cpu = CPU_P8_DD1;
+ else
+ cpu_feature_cpu = CPU_P8_DD2;
+ break;
+ case PVR_TYPE_P8NVL:
+ cpu_name = "POWER8NVL";
+ cpu_feature_isa = ISA_V2_07B;
+ cpu_feature_cpu = CPU_P8_DD2;
+ break;
+ case PVR_TYPE_P9:
+ if (!cpu_name)
+ cpu_name = "POWER9";
+
+ cpu_feature_isa = ISA_V3_0B;
+ if (is_power9n(version) &&
+ (PVR_VERS_MAJ(version) == 2)) {
+ /* P9N DD2.x */
+ switch (PVR_VERS_MIN(version)) {
+ case 0:
+ case 1:
+ cpu_feature_cpu = CPU_P9_DD2_0_1;
+ break;
+ case 2:
+ cpu_feature_cpu = CPU_P9_DD2_2;
+ break;
+ case 3:
+ cpu_feature_cpu = CPU_P9_DD2_3;
+ break;
+ default:
+ assert(0);
+ }
+ } else if (is_power9c(version) &&
+ (PVR_VERS_MAJ(version) == 1)) {
+ /* P9C DD1.x */
+ switch (PVR_VERS_MIN(version)) {
+ case 1:
+ /* Cumulus DD1.1 => Nimbus DD2.1 */
+ cpu_feature_cpu = CPU_P9_DD2_0_1;
+ break;
+ case 2:
+ /* Cumulus DD1.2 */
+ cpu_feature_cpu = CPU_P9_DD2_2;
+ break;
+ case 3:
+ /* Cumulus DD1.3 */
+ cpu_feature_cpu = CPU_P9_DD2_3;
+ break;
+ default:
+ assert(0);
+ }
+ } else {
+ assert(0);
+ }
+
+ break;
+ case PVR_TYPE_P9P:
+ if (!cpu_name)
+ cpu_name = "POWER9P";
+
+ cpu_feature_isa = ISA_V3_0B;
+ cpu_feature_cpu = CPU_P9P;
+ break;
+ case PVR_TYPE_P10:
+ if (!cpu_name)
+ cpu_name = "POWER10";
+
+ cpu_feature_isa = ISA_V3_1;
+ cpu_feature_cpu = CPU_P10;
+ break;
+ default:
+ return;
+ }
+
+ cpus = dt_new_check(root, "cpus");
+
+ add_cpufeatures(cpus, cpu_feature_isa, cpu_feature_cpu, cpu_name);
+}
diff --git a/roms/skiboot/core/device.c b/roms/skiboot/core/device.c
new file mode 100644
index 000000000..b102dd973
--- /dev/null
+++ b/roms/skiboot/core/device.c
@@ -0,0 +1,1128 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Manipulate the device tree
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <stdarg.h>
+#include <device.h>
+#include <stdlib.h>
+#include <skiboot.h>
+#include <libfdt/libfdt.h>
+#include <libfdt/libfdt_internal.h>
+#include <ccan/str/str.h>
+#include <ccan/endian/endian.h>
+#include <inttypes.h>
+
+/* Used to give unique handles. */
+u32 last_phandle = 0;
+
+struct dt_node *dt_root;
+struct dt_node *dt_chosen;
+
+static const char *take_name(const char *name)
+{
+ if (!is_rodata(name) && !(name = strdup(name))) {
+ prerror("Failed to allocate copy of name");
+ abort();
+ }
+ return name;
+}
+
+static void free_name(const char *name)
+{
+ if (!is_rodata(name))
+ free((char *)name);
+}
+
+static struct dt_node *new_node(const char *name)
+{
+ struct dt_node *node = malloc(sizeof *node);
+ if (!node) {
+ prerror("Failed to allocate node\n");
+ abort();
+ }
+
+ node->name = take_name(name);
+ node->parent = NULL;
+ list_head_init(&node->properties);
+ list_head_init(&node->children);
+ /* FIXME: locking? */
+ node->phandle = new_phandle();
+ return node;
+}
+
+struct dt_node *dt_new_root(const char *name)
+{
+ return new_node(name);
+}
+
+static const char *get_unitname(const struct dt_node *node)
+{
+ const char *c = strchr(node->name, '@');
+
+ if (!c)
+ return NULL;
+
+ return c + 1;
+}
+
+int dt_cmp_subnodes(const struct dt_node *a, const struct dt_node *b)
+{
+ const char *a_unit = get_unitname(a);
+ const char *b_unit = get_unitname(b);
+
+ ptrdiff_t basenamelen = a_unit - a->name;
+
+ /* sort hex unit addresses by number */
+ if (a_unit && b_unit && !strncmp(a->name, b->name, basenamelen)) {
+ unsigned long long a_num, b_num;
+ char *a_end, *b_end;
+
+ a_num = strtoul(a_unit, &a_end, 16);
+ b_num = strtoul(b_unit, &b_end, 16);
+
+ /* only compare if the unit addr parsed correctly */
+ if (*a_end == 0 && *b_end == 0)
+ return (a_num > b_num) - (a_num < b_num);
+ }
+
+ return strcmp(a->name, b->name);
+}
+
+bool dt_attach_root(struct dt_node *parent, struct dt_node *root)
+{
+ struct dt_node *node;
+
+ assert(!root->parent);
+
+ if (list_empty(&parent->children)) {
+ list_add(&parent->children, &root->list);
+ root->parent = parent;
+
+ return true;
+ }
+
+ dt_for_each_child(parent, node) {
+ int cmp = dt_cmp_subnodes(node, root);
+
+ /* Look for duplicates */
+ if (cmp == 0) {
+ prerror("DT: %s failed, duplicate %s\n",
+ __func__, root->name);
+ return false;
+ }
+
+ /* insert before the first node that's larger
+ * the the node we're inserting */
+ if (cmp > 0)
+ break;
+ }
+
+ list_add_before(&parent->children, &root->list, &node->list);
+ root->parent = parent;
+
+ return true;
+}
+
+static inline void dt_destroy(struct dt_node *dn)
+{
+ if (!dn)
+ return;
+
+ free_name(dn->name);
+ free(dn);
+}
+
+struct dt_node *dt_new(struct dt_node *parent, const char *name)
+{
+ struct dt_node *new;
+ assert(parent);
+
+ new = new_node(name);
+ if (!dt_attach_root(parent, new)) {
+ dt_destroy(new);
+ return NULL;
+ }
+ return new;
+}
+
+/*
+ * low level variant, we export this because there are "weird" address
+ * formats, such as LPC/ISA bus addresses which have a letter to identify
+ * which bus space the address is inside of.
+ */
+struct dt_node *__dt_find_by_name_addr(struct dt_node *parent, const char *name,
+ const char *addr)
+{
+ struct dt_node *node;
+
+ if (list_empty(&parent->children))
+ return NULL;
+
+ dt_for_each_child(parent, node) {
+ const char *unit = get_unitname(node);
+ int len;
+
+ if (!unit)
+ continue;
+
+ /* match the name */
+ len = (int) (unit - node->name) - 1;
+ if (strncmp(node->name, name, len))
+ continue;
+
+ /* match the unit */
+ if (strcmp(unit, addr) == 0)
+ return node;
+ }
+
+ dt_for_each_child(parent, node) {
+ struct dt_node *ret = __dt_find_by_name_addr(node, name, addr);
+
+ if (ret)
+ return ret;
+ }
+
+ return NULL;
+}
+
+struct dt_node *dt_find_by_name_addr(struct dt_node *parent, const char *name,
+ uint64_t addr)
+{
+ char addr_str[16 + 1]; /* max size of a 64bit int */
+ snprintf(addr_str, sizeof(addr_str), "%" PRIx64, addr);
+
+ return __dt_find_by_name_addr(parent, name, addr_str);
+}
+
+struct dt_node *dt_new_addr(struct dt_node *parent, const char *name,
+ uint64_t addr)
+{
+ char *lname;
+ struct dt_node *new;
+ size_t len;
+
+ assert(parent);
+ len = strlen(name) + STR_MAX_CHARS(addr) + 2;
+ lname = malloc(len);
+ if (!lname)
+ return NULL;
+ snprintf(lname, len, "%s@%llx", name, (long long)addr);
+ new = new_node(lname);
+ free(lname);
+ if (!dt_attach_root(parent, new)) {
+ dt_destroy(new);
+ return NULL;
+ }
+ return new;
+}
+
+struct dt_node *dt_new_2addr(struct dt_node *parent, const char *name,
+ uint64_t addr0, uint64_t addr1)
+{
+ char *lname;
+ struct dt_node *new;
+ size_t len;
+ assert(parent);
+
+ len = strlen(name) + 2*STR_MAX_CHARS(addr0) + 3;
+ lname = malloc(len);
+ if (!lname)
+ return NULL;
+ snprintf(lname, len, "%s@%llx,%llx",
+ name, (long long)addr0, (long long)addr1);
+ new = new_node(lname);
+ free(lname);
+ if (!dt_attach_root(parent, new)) {
+ dt_destroy(new);
+ return NULL;
+ }
+ return new;
+}
+
+static struct dt_node *__dt_copy(struct dt_node *node, struct dt_node *parent,
+ bool root)
+{
+ struct dt_property *prop, *new_prop;
+ struct dt_node *new_node, *child;
+
+ new_node = dt_new(parent, node->name);
+ if (!new_node)
+ return NULL;
+
+ list_for_each(&node->properties, prop, list) {
+ new_prop = dt_add_property(new_node, prop->name, prop->prop,
+ prop->len);
+ if (!new_prop)
+ goto fail;
+ }
+
+ list_for_each(&node->children, child, list) {
+ child = __dt_copy(child, new_node, false);
+ if (!child)
+ goto fail;
+ }
+
+ return new_node;
+
+fail:
+ /* dt_free will recurse for us, so only free when we unwind to the
+ * top-level failure */
+ if (root)
+ dt_free(new_node);
+ return NULL;
+}
+
+struct dt_node *dt_copy(struct dt_node *node, struct dt_node *parent)
+{
+ return __dt_copy(node, parent, true);
+}
+
+char *dt_get_path(const struct dt_node *node)
+{
+ unsigned int len = 0;
+ const struct dt_node *n;
+ char *path, *p;
+
+ /* Dealing with NULL is for test/debug purposes */
+ if (!node)
+ return strdup("<NULL>");
+
+ for (n = node; n; n = n->parent) {
+ len += strlen(n->name);
+ if (n->parent || n == node)
+ len++;
+ }
+ path = zalloc(len + 1);
+ assert(path);
+ p = path + len;
+ for (n = node; n; n = n->parent) {
+ len = strlen(n->name);
+ p -= len;
+ memcpy(p, n->name, len);
+ if (n->parent || n == node)
+ *(--p) = '/';
+ }
+ assert(p == path);
+
+ return p;
+}
+
+static const char *__dt_path_split(const char *p,
+ const char **namep, unsigned int *namel,
+ const char **addrp, unsigned int *addrl)
+{
+ const char *at, *sl;
+
+ *namel = *addrl = 0;
+
+ /* Skip initial '/' */
+ while (*p == '/')
+ p++;
+
+ /* Check empty path */
+ if (*p == 0)
+ return p;
+
+ at = strchr(p, '@');
+ sl = strchr(p, '/');
+ if (sl == NULL)
+ sl = p + strlen(p);
+ if (sl < at)
+ at = NULL;
+ if (at) {
+ *addrp = at + 1;
+ *addrl = sl - at - 1;
+ }
+ *namep = p;
+ *namel = at ? (at - p) : (sl - p);
+
+ return sl;
+}
+
+struct dt_node *dt_find_by_path(struct dt_node *root, const char *path)
+{
+ struct dt_node *n;
+ const char *pn, *pa, *p = path, *nn, *na;
+ unsigned int pnl, pal, nnl, nal;
+ bool match;
+
+ /* Walk path components */
+ while (*p) {
+ /* Extract next path component */
+ p = __dt_path_split(p, &pn, &pnl, &pa, &pal);
+ if (pnl == 0 && pal == 0)
+ break;
+
+ /* Compare with each child node */
+ match = false;
+ list_for_each(&root->children, n, list) {
+ match = true;
+ __dt_path_split(n->name, &nn, &nnl, &na, &nal);
+ if (pnl && (pnl != nnl || strncmp(pn, nn, pnl)))
+ match = false;
+ if (pal && (pal != nal || strncmp(pa, na, pal)))
+ match = false;
+ if (match) {
+ root = n;
+ break;
+ }
+ }
+
+ /* No child match */
+ if (!match)
+ return NULL;
+ }
+ return root;
+}
+
+struct dt_node *dt_find_by_name(struct dt_node *root, const char *name)
+{
+ struct dt_node *child, *match;
+
+ list_for_each(&root->children, child, list) {
+ if (!strcmp(child->name, name))
+ return child;
+
+ match = dt_find_by_name(child, name);
+ if (match)
+ return match;
+ }
+
+ return NULL;
+}
+
+
+struct dt_node *dt_new_check(struct dt_node *parent, const char *name)
+{
+ struct dt_node *node = dt_find_by_name(parent, name);
+
+ if (!node) {
+ node = dt_new(parent, name);
+ assert(node);
+ }
+
+ return node;
+}
+
+
+struct dt_node *dt_find_by_phandle(struct dt_node *root, u32 phandle)
+{
+ struct dt_node *node;
+
+ dt_for_each_node(root, node)
+ if (node->phandle == phandle)
+ return node;
+ return NULL;
+}
+
+static struct dt_property *new_property(struct dt_node *node,
+ const char *name, size_t size)
+{
+ struct dt_property *p = malloc(sizeof(*p) + size);
+ char *path;
+
+ if (!p) {
+ path = dt_get_path(node);
+ prerror("Failed to allocate property \"%s\" for %s of %zu bytes\n",
+ name, path, size);
+ free(path);
+ abort();
+ }
+ if (dt_find_property(node, name)) {
+ path = dt_get_path(node);
+ prerror("Duplicate property \"%s\" in node %s\n",
+ name, path);
+ free(path);
+ abort();
+
+ }
+
+ p->name = take_name(name);
+ p->len = size;
+ list_add_tail(&node->properties, &p->list);
+ return p;
+}
+
+struct dt_property *dt_add_property(struct dt_node *node,
+ const char *name,
+ const void *val, size_t size)
+{
+ struct dt_property *p;
+
+ /*
+ * Filter out phandle properties, we re-generate them
+ * when flattening
+ */
+ if (strcmp(name, "linux,phandle") == 0 ||
+ strcmp(name, "phandle") == 0) {
+ assert(size == 4);
+ node->phandle = *(const u32 *)val;
+ if (node->phandle >= last_phandle)
+ set_last_phandle(node->phandle);
+ return NULL;
+ }
+
+ p = new_property(node, name, size);
+ if (size)
+ memcpy(p->prop, val, size);
+ return p;
+}
+
+void dt_resize_property(struct dt_property **prop, size_t len)
+{
+ size_t new_len = sizeof(**prop) + len;
+
+ *prop = realloc(*prop, new_len);
+ (*prop)->len = len;
+
+ /* Fix up linked lists in case we moved. (note: not an empty list). */
+ (*prop)->list.next->prev = &(*prop)->list;
+ (*prop)->list.prev->next = &(*prop)->list;
+}
+
+struct dt_property *dt_add_property_string(struct dt_node *node,
+ const char *name,
+ const char *value)
+{
+ size_t len = 0;
+ if (value)
+ len = strlen(value) + 1;
+ return dt_add_property(node, name, value, len);
+}
+
+struct dt_property *dt_add_property_nstr(struct dt_node *node,
+ const char *name,
+ const char *value, unsigned int vlen)
+{
+ struct dt_property *p;
+ char *tmp = zalloc(vlen + 1);
+
+ if (!tmp)
+ return NULL;
+
+ strncpy(tmp, value, vlen);
+ p = dt_add_property(node, name, tmp, strlen(tmp)+1);
+ free(tmp);
+
+ return p;
+}
+
+struct dt_property *__dt_add_property_cells(struct dt_node *node,
+ const char *name,
+ int count, ...)
+{
+ struct dt_property *p;
+ fdt32_t *val;
+ unsigned int i;
+ va_list args;
+
+ p = new_property(node, name, count * sizeof(u32));
+ val = (fdt32_t *)p->prop;
+ va_start(args, count);
+ for (i = 0; i < count; i++)
+ val[i] = cpu_to_fdt32(va_arg(args, u32));
+ va_end(args);
+ return p;
+}
+
+struct dt_property *__dt_add_property_u64s(struct dt_node *node,
+ const char *name,
+ int count, ...)
+{
+ struct dt_property *p;
+ fdt64_t *val;
+ unsigned int i;
+ va_list args;
+
+ p = new_property(node, name, count * sizeof(u64));
+ val = (fdt64_t *)p->prop;
+ va_start(args, count);
+ for (i = 0; i < count; i++)
+ val[i] = cpu_to_fdt64(va_arg(args, u64));
+ va_end(args);
+ return p;
+}
+
+struct dt_property *__dt_add_property_strings(struct dt_node *node,
+ const char *name,
+ int count, ...)
+{
+ struct dt_property *p;
+ unsigned int i, size;
+ va_list args;
+ const char *sstr;
+ char *s;
+
+ va_start(args, count);
+ for (i = size = 0; i < count; i++) {
+ sstr = va_arg(args, const char *);
+ if (sstr)
+ size += strlen(sstr) + 1;
+ }
+ va_end(args);
+ if (!size)
+ size = 1;
+ p = new_property(node, name, size);
+ s = (char *)p->prop;
+ *s = 0;
+ va_start(args, count);
+ for (i = 0; i < count; i++) {
+ sstr = va_arg(args, const char *);
+ if (sstr) {
+ strcpy(s, sstr);
+ s = s + strlen(sstr) + 1;
+ }
+ }
+ va_end(args);
+ return p;
+}
+
+void dt_del_property(struct dt_node *node, struct dt_property *prop)
+{
+ list_del_from(&node->properties, &prop->list);
+ free_name(prop->name);
+ free(prop);
+}
+
+u32 dt_property_get_cell(const struct dt_property *prop, u32 index)
+{
+ assert(prop->len >= (index+1)*sizeof(u32));
+ /* Always aligned, so this works. */
+ return fdt32_to_cpu(((const fdt32_t *)prop->prop)[index]);
+}
+
+u64 dt_property_get_u64(const struct dt_property *prop, u32 index)
+{
+ assert(prop->len >= (index+1)*sizeof(u64));
+ /* Always aligned, so this works. */
+ return fdt64_to_cpu(((const fdt64_t *)prop->prop)[index]);
+}
+
+void dt_property_set_cell(struct dt_property *prop, u32 index, u32 val)
+{
+ assert(prop->len >= (index+1)*sizeof(u32));
+ /* Always aligned, so this works. */
+ ((fdt32_t *)prop->prop)[index] = cpu_to_fdt32(val);
+}
+
+/* First child of this node. */
+struct dt_node *dt_first(const struct dt_node *root)
+{
+ return list_top(&root->children, struct dt_node, list);
+}
+
+/* Return next node, or NULL. */
+struct dt_node *dt_next(const struct dt_node *root,
+ const struct dt_node *prev)
+{
+ if (!prev) {
+ struct dt_node *first = dt_first(root);
+
+ if (!first)
+ return NULL;
+ else
+ return first;
+ }
+
+ /* Children? */
+ if (!list_empty(&prev->children))
+ return dt_first(prev);
+
+ do {
+ /* More siblings? */
+ if (prev->list.next != &prev->parent->children.n)
+ return list_entry(prev->list.next, struct dt_node,list);
+
+ /* No more siblings, move up to parent. */
+ prev = prev->parent;
+ } while (prev != root);
+
+ return NULL;
+}
+
+struct dt_property *__dt_find_property(struct dt_node *node, const char *name)
+{
+ struct dt_property *i;
+
+ list_for_each(&node->properties, i, list)
+ if (strcmp(i->name, name) == 0)
+ return i;
+ return NULL;
+}
+
+const struct dt_property *dt_find_property(const struct dt_node *node,
+ const char *name)
+{
+ const struct dt_property *i;
+
+ list_for_each(&node->properties, i, list)
+ if (strcmp(i->name, name) == 0)
+ return i;
+ return NULL;
+}
+
+void dt_check_del_prop(struct dt_node *node, const char *name)
+{
+ struct dt_property *p;
+
+ p = __dt_find_property(node, name);
+ if (p)
+ dt_del_property(node, p);
+}
+const struct dt_property *dt_require_property(const struct dt_node *node,
+ const char *name, int wanted_len)
+{
+ const struct dt_property *p = dt_find_property(node, name);
+
+ if (!p) {
+ const char *path = dt_get_path(node);
+
+ prerror("DT: Missing required property %s/%s\n",
+ path, name);
+ assert(false);
+ }
+ if (wanted_len >= 0 && p->len != wanted_len) {
+ const char *path = dt_get_path(node);
+
+ prerror("DT: Unexpected property length %s/%s\n",
+ path, name);
+ prerror("DT: Expected len: %d got len: %zu\n",
+ wanted_len, p->len);
+ assert(false);
+ }
+
+ return p;
+}
+
+bool dt_has_node_property(const struct dt_node *node,
+ const char *name, const char *val)
+{
+ const struct dt_property *p = dt_find_property(node, name);
+
+ if (!p)
+ return false;
+ if (!val)
+ return true;
+
+ return p->len == strlen(val) + 1 && memcmp(p->prop, val, p->len) == 0;
+}
+
+bool dt_prop_find_string(const struct dt_property *p, const char *s)
+{
+ const char *c, *end;
+
+ if (!p)
+ return false;
+ c = p->prop;
+ end = c + p->len;
+
+ while(c < end) {
+ if (!strcasecmp(s, c))
+ return true;
+ c += strlen(c) + 1;
+ }
+ return false;
+}
+
+bool dt_node_is_compatible(const struct dt_node *node, const char *compat)
+{
+ const struct dt_property *p = dt_find_property(node, "compatible");
+
+ return dt_prop_find_string(p, compat);
+}
+
+struct dt_node *dt_find_compatible_node(struct dt_node *root,
+ struct dt_node *prev,
+ const char *compat)
+{
+ struct dt_node *node = prev;
+
+ while ((node = dt_next(root, node)))
+ if (dt_node_is_compatible(node, compat))
+ return node;
+ return NULL;
+}
+
+u64 dt_prop_get_u64(const struct dt_node *node, const char *prop)
+{
+ const struct dt_property *p = dt_require_property(node, prop, 8);
+
+ return ((u64)dt_property_get_cell(p, 0) << 32)
+ | dt_property_get_cell(p, 1);
+}
+
+u64 dt_prop_get_u64_def(const struct dt_node *node, const char *prop, u64 def)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+
+ if (!p)
+ return def;
+
+ return ((u64)dt_property_get_cell(p, 0) << 32)
+ | dt_property_get_cell(p, 1);
+}
+
+u32 dt_prop_get_u32(const struct dt_node *node, const char *prop)
+{
+ const struct dt_property *p = dt_require_property(node, prop, 4);
+
+ return dt_property_get_cell(p, 0);
+}
+
+u32 dt_prop_get_u32_def(const struct dt_node *node, const char *prop, u32 def)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+
+ if (!p)
+ return def;
+
+ return dt_property_get_cell(p, 0);
+}
+
+const void *dt_prop_get(const struct dt_node *node, const char *prop)
+{
+ const struct dt_property *p = dt_require_property(node, prop, -1);
+
+ return p->prop;
+}
+
+const void *dt_prop_get_def(const struct dt_node *node, const char *prop,
+ void *def)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+
+ return p ? p->prop : def;
+}
+
+const void *dt_prop_get_def_size(const struct dt_node *node, const char *prop,
+ void *def, size_t *len)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+ *len = 0;
+ if (p)
+ *len = p->len;
+
+ return p ? p->prop : def;
+}
+
+u32 dt_prop_get_cell(const struct dt_node *node, const char *prop, u32 cell)
+{
+ const struct dt_property *p = dt_require_property(node, prop, -1);
+
+ return dt_property_get_cell(p, cell);
+}
+
+u32 dt_prop_get_cell_def(const struct dt_node *node, const char *prop,
+ u32 cell, u32 def)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+
+ if (!p)
+ return def;
+
+ return dt_property_get_cell(p, cell);
+}
+
+void dt_free(struct dt_node *node)
+{
+ struct dt_node *child;
+ struct dt_property *p;
+
+ while ((child = list_top(&node->children, struct dt_node, list)))
+ dt_free(child);
+
+ while ((p = list_pop(&node->properties, struct dt_property, list))) {
+ free_name(p->name);
+ free(p);
+ }
+
+ if (node->parent)
+ list_del_from(&node->parent->children, &node->list);
+ dt_destroy(node);
+}
+
+int dt_expand_node(struct dt_node *node, const void *fdt, int fdt_node)
+{
+ const struct fdt_property *prop;
+ int offset, nextoffset, err;
+ struct dt_node *child;
+ const char *name;
+ uint32_t tag;
+
+ if (((err = fdt_check_header(fdt)) != 0)
+ || ((err = fdt_check_node_offset_(fdt, fdt_node)) < 0)) {
+ prerror("FDT: Error %d parsing node 0x%x\n", err, fdt_node);
+ return -1;
+ }
+
+ nextoffset = err;
+ do {
+ offset = nextoffset;
+
+ tag = fdt_next_tag(fdt, offset, &nextoffset);
+ switch (tag) {
+ case FDT_PROP:
+ prop = fdt_offset_ptr_(fdt, offset);
+ name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+ dt_add_property(node, name, prop->data,
+ fdt32_to_cpu(prop->len));
+ break;
+ case FDT_BEGIN_NODE:
+ name = fdt_get_name(fdt, offset, NULL);
+ child = dt_new_root(name);
+ assert(child);
+ nextoffset = dt_expand_node(child, fdt, offset);
+
+ /*
+ * This may fail in case of duplicate, keep it
+ * going for now, we may ultimately want to
+ * assert
+ */
+ if (!dt_attach_root(node, child))
+ /**
+ * @fwts-label DTHasDuplicateNodeID
+ * @fwts-advice OPAL will parse the Flattened
+ * Device Tree(FDT), which can be generated
+ * from different firmware sources. During
+ * expansion of FDT, OPAL observed a node
+ * assigned multiple times (a duplicate). This
+ * indicates either a Hostboot bug *OR*, more
+ * likely, a bug in the platform XML. Check
+ * the platform XML for duplicate IDs for
+ * this type of device. Because of this
+ * duplicate node, OPAL won't add the hardware
+ * device found with a duplicate node ID into
+ * DT, rendering the corresponding device not
+ * functional.
+ */
+ prlog(PR_ERR, "DT: Found duplicate node: %s\n",
+ child->name);
+ break;
+ case FDT_END:
+ return -1;
+ }
+ } while (tag != FDT_END_NODE);
+
+ return nextoffset;
+}
+
+void dt_expand(const void *fdt)
+{
+ prlog(PR_DEBUG, "FDT: Parsing fdt @%p\n", fdt);
+
+ if (dt_expand_node(dt_root, fdt, 0) < 0)
+ abort();
+}
+
+u64 dt_get_number(const void *pdata, unsigned int cells)
+{
+ const __be32 *p = pdata;
+ u64 ret = 0;
+
+ while(cells--)
+ ret = (ret << 32) | be32_to_cpu(*(p++));
+ return ret;
+}
+
+u32 dt_n_address_cells(const struct dt_node *node)
+{
+ if (!node->parent)
+ return 0;
+ return dt_prop_get_u32_def(node->parent, "#address-cells", 2);
+}
+
+u32 dt_n_size_cells(const struct dt_node *node)
+{
+ if (!node->parent)
+ return 0;
+ return dt_prop_get_u32_def(node->parent, "#size-cells", 1);
+}
+
+u64 dt_get_address(const struct dt_node *node, unsigned int index,
+ u64 *out_size)
+{
+ const struct dt_property *p;
+ u32 na = dt_n_address_cells(node);
+ u32 ns = dt_n_size_cells(node);
+ u32 pos, n;
+
+ p = dt_require_property(node, "reg", -1);
+ n = (na + ns) * sizeof(u32);
+ pos = n * index;
+ assert((pos + n) <= p->len);
+ if (out_size)
+ *out_size = dt_get_number(p->prop + pos + na * sizeof(u32), ns);
+ return dt_get_number(p->prop + pos, na);
+}
+
+u32 __dt_get_chip_id(const struct dt_node *node)
+{
+ const struct dt_property *prop;
+
+ for (; node; node = node->parent) {
+ prop = dt_find_property(node, "ibm,chip-id");
+ if (prop)
+ return dt_property_get_cell(prop, 0);
+ }
+ return 0xffffffff;
+}
+
+u32 dt_get_chip_id(const struct dt_node *node)
+{
+ u32 id = __dt_get_chip_id(node);
+ assert(id != 0xffffffff);
+ return id;
+}
+
+struct dt_node *dt_find_compatible_node_on_chip(struct dt_node *root,
+ struct dt_node *prev,
+ const char *compat,
+ uint32_t chip_id)
+{
+ struct dt_node *node = prev;
+
+ while ((node = dt_next(root, node))) {
+ u32 cid = __dt_get_chip_id(node);
+ if (cid == chip_id &&
+ dt_node_is_compatible(node, compat))
+ return node;
+ }
+ return NULL;
+}
+
+unsigned int dt_count_addresses(const struct dt_node *node)
+{
+ const struct dt_property *p;
+ u32 na = dt_n_address_cells(node);
+ u32 ns = dt_n_size_cells(node);
+ u32 n;
+
+ p = dt_require_property(node, "reg", -1);
+ n = (na + ns) * sizeof(u32);
+
+ if (n == 0)
+ return 0;
+
+ return p->len / n;
+}
+
+/* Translates an address from the given bus into its parent's address space */
+static u64 dt_translate_one(const struct dt_node *bus, u64 addr)
+{
+ u32 ranges_count, na, ns, parent_na;
+ const struct dt_property *p;
+ const u32 *ranges;
+ int i, stride;
+
+ assert(bus->parent);
+
+ na = dt_prop_get_u32_def(bus, "#address-cells", 2);
+ ns = dt_prop_get_u32_def(bus, "#size-cells", 2);
+ parent_na = dt_n_address_cells(bus);
+
+ stride = na + ns + parent_na;
+
+ /*
+ * FIXME: We should handle arbitrary length addresses, rather than
+ * limiting it to 64bit. If someone wants/needs that they
+ * can implement the bignum math for it :)
+ */
+ assert(na <= 2);
+ assert(parent_na <= 2);
+
+ /* We should never be trying to translate an address without a ranges */
+ p = dt_require_property(bus, "ranges", -1);
+
+ ranges = (u32 *) &p->prop;
+ ranges_count = (p->len / 4) / (na + parent_na + ns);
+
+ /* An empty ranges property implies 1-1 translation */
+ if (ranges_count == 0)
+ return addr;
+
+ for (i = 0; i < ranges_count; i++, ranges += stride) {
+ /* ranges format: <child base> <parent base> <size> */
+ u64 child_base = dt_get_number(ranges, na);
+ u64 parent_base = dt_get_number(ranges + na, parent_na);
+ u64 size = dt_get_number(ranges + na + parent_na, ns);
+
+ if (addr >= child_base && addr < child_base + size)
+ return (addr - child_base) + parent_base;
+ }
+
+ /* input address was outside the any of our mapped ranges */
+ return 0;
+}
+
+u64 dt_translate_address(const struct dt_node *node, unsigned int index,
+ u64 *out_size)
+{
+ u64 addr = dt_get_address(node, index, NULL);
+ struct dt_node *bus = node->parent;
+
+ /* FIXME: One day we will probably want to use this, but for now just
+ * force it it to be zero since we only support returning a u64 or u32
+ */
+ assert(!out_size);
+
+ /* apply each translation until we hit the root bus */
+ while (bus->parent) {
+ addr = dt_translate_one(bus, addr);
+ bus = bus->parent;
+ }
+
+ return addr;
+}
+
+bool dt_node_is_enabled(struct dt_node *node)
+{
+ const struct dt_property *p = dt_find_property(node, "status");
+
+ if (!p)
+ return true;
+
+ return p->len > 1 && p->prop[0] == 'o' && p->prop[1] == 'k';
+}
+
+/*
+ * Function to fixup the phandle in the subtree.
+ */
+void dt_adjust_subtree_phandle(struct dt_node *dev,
+ const char** (get_properties_to_fix)(struct dt_node *n))
+{
+ struct dt_node *node;
+ struct dt_property *prop;
+ u32 phandle, max_phandle = 0, import_phandle = new_phandle();
+ __be32 p;
+ const char **name;
+
+ dt_for_each_node(dev, node) {
+ const char **props_to_update;
+ node->phandle += import_phandle;
+
+ /*
+ * calculate max_phandle(new_tree), needed to update
+ * last_phandle.
+ */
+ if (node->phandle >= max_phandle)
+ max_phandle = node->phandle;
+
+ props_to_update = get_properties_to_fix(node);
+ if (!props_to_update)
+ continue;
+ for (name = props_to_update; *name != NULL; name++) {
+ prop = __dt_find_property(node, *name);
+ if (!prop)
+ continue;
+ phandle = dt_prop_get_u32(node, *name);
+ phandle += import_phandle;
+ p = cpu_to_be32(phandle);
+ memcpy((char *)&prop->prop, &p, prop->len);
+ }
+ }
+
+ set_last_phandle(max_phandle);
+}
diff --git a/roms/skiboot/core/direct-controls.c b/roms/skiboot/core/direct-controls.c
new file mode 100644
index 000000000..37bcf9826
--- /dev/null
+++ b/roms/skiboot/core/direct-controls.c
@@ -0,0 +1,1161 @@
+// SPDX-License-Identifier: Apache-2.0
+/*
+ * Directly control CPU cores/threads. SRESET, special wakeup, etc
+ *
+ * Copyright 2017-2019 IBM Corp.
+ */
+
+#include <direct-controls.h>
+#include <skiboot.h>
+#include <opal.h>
+#include <cpu.h>
+#include <xscom.h>
+#include <xscom-p8-regs.h>
+#include <xscom-p9-regs.h>
+#include <xscom-p10-regs.h>
+#include <timebase.h>
+#include <chip.h>
+
+
+/**************** mambo direct controls ****************/
+
+extern unsigned long callthru_tcl(const char *str, int len);
+
+static void mambo_sreset_cpu(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ char tcl_cmd[50];
+
+ snprintf(tcl_cmd, sizeof(tcl_cmd),
+ "mysim cpu %i:%i:%i start_thread 0x100",
+ chip_id, core_id, thread_id);
+ callthru_tcl(tcl_cmd, strlen(tcl_cmd));
+}
+
+static void mambo_stop_cpu(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ char tcl_cmd[50];
+
+ snprintf(tcl_cmd, sizeof(tcl_cmd),
+ "mysim cpu %i:%i:%i stop_thread",
+ chip_id, core_id, thread_id);
+ callthru_tcl(tcl_cmd, strlen(tcl_cmd));
+}
+
+/**************** POWER8 direct controls ****************/
+
+static int p8_core_set_special_wakeup(struct cpu_thread *cpu)
+{
+ uint64_t val, poll_target, stamp;
+ uint32_t core_id;
+ int rc;
+
+ /*
+ * Note: HWP checks for checkstops, but I assume we don't need to
+ * as we wouldn't be running if one was present
+ */
+
+ /* Grab core ID once */
+ core_id = pir_to_core_id(cpu->pir);
+
+ prlog(PR_DEBUG, "RESET Waking up core 0x%x\n", core_id);
+
+ /*
+ * The original HWp reads the XSCOM first but ignores the result
+ * and error, let's do the same until I know for sure that is
+ * not necessary
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ /* Then we write special wakeup */
+ rc = xscom_write(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_SPECIAL_WAKEUP_PHYP),
+ PPC_BIT(0));
+ if (rc) {
+ prerror("RESET: XSCOM error %d asserting special"
+ " wakeup on 0x%x\n", rc, cpu->pir);
+ return rc;
+ }
+
+ /*
+ * HWP uses the history for Perf register here, dunno why it uses
+ * that one instead of the pHyp one, maybe to avoid clobbering it...
+ *
+ * In any case, it does that to check for run/nap vs.sleep/winkle/other
+ * to decide whether to poll on checkstop or not. Since we don't deal
+ * with checkstop conditions here, we ignore that part.
+ */
+
+ /*
+ * Now poll for completion of special wakeup. The HWP is nasty here,
+ * it will poll at 5ms intervals for up to 200ms. This is not quite
+ * acceptable for us at runtime, at least not until we have the
+ * ability to "context switch" HBRT. In practice, because we don't
+ * winkle, it will never take that long, so we increase the polling
+ * frequency to 1us per poll. However we do have to keep the same
+ * timeout.
+ *
+ * We don't use time_wait_ms() either for now as we don't want to
+ * poll the FSP here.
+ */
+ stamp = mftb();
+ poll_target = stamp + msecs_to_tb(200);
+ val = 0;
+ while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) {
+ /* Wait 1 us */
+ time_wait_us(1);
+
+ /* Read PM state */
+ rc = xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0),
+ &val);
+ if (rc) {
+ prerror("RESET: XSCOM error %d reading PM state on"
+ " 0x%x\n", rc, cpu->pir);
+ return rc;
+ }
+ /* Check timeout */
+ if (mftb() > poll_target)
+ break;
+ }
+
+ /* Success ? */
+ if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) {
+ uint64_t now = mftb();
+ prlog(PR_TRACE, "RESET: Special wakeup complete after %ld us\n",
+ tb_to_usecs(now - stamp));
+ return 0;
+ }
+
+ /*
+ * We timed out ...
+ *
+ * HWP has a complex workaround for HW255321 which affects
+ * Murano DD1 and Venice DD1. Ignore that for now
+ *
+ * Instead we just dump some XSCOMs for error logging
+ */
+ prerror("RESET: Timeout on special wakeup of 0x%0x\n", cpu->pir);
+ prerror("RESET: PM0 = 0x%016llx\n", val);
+ val = -1;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+ prerror("RESET: SPC_WKUP = 0x%016llx\n", val);
+ val = -1;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_IDLE_STATE_HISTORY_PHYP),
+ &val);
+ prerror("RESET: HISTORY = 0x%016llx\n", val);
+
+ return OPAL_HARDWARE;
+}
+
+static int p8_core_clear_special_wakeup(struct cpu_thread *cpu)
+{
+ uint64_t val;
+ uint32_t core_id;
+ int rc;
+
+ /*
+ * Note: HWP checks for checkstops, but I assume we don't need to
+ * as we wouldn't be running if one was present
+ */
+
+ /* Grab core ID once */
+ core_id = pir_to_core_id(cpu->pir);
+
+ prlog(PR_DEBUG, "RESET: Releasing core 0x%x wakeup\n", core_id);
+
+ /*
+ * The original HWp reads the XSCOM first but ignores the result
+ * and error, let's do the same until I know for sure that is
+ * not necessary
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ /* Then we write special wakeup */
+ rc = xscom_write(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_SPECIAL_WAKEUP_PHYP), 0);
+ if (rc) {
+ prerror("RESET: XSCOM error %d deasserting"
+ " special wakeup on 0x%x\n", rc, cpu->pir);
+ return rc;
+ }
+
+ /*
+ * The original HWp reads the XSCOM again with the comment
+ * "This puts an inherent delay in the propagation of the reset
+ * transition"
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ return 0;
+}
+
+static int p8_stop_thread(struct cpu_thread *cpu)
+{
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t xscom_addr;
+
+ xscom_addr = XSCOM_ADDR_P8_EX(core_id,
+ P8_EX_TCTL_DIRECT_CONTROLS(thread_id));
+
+ if (xscom_write(chip_id, xscom_addr, P8_DIRECT_CTL_STOP)) {
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Unable to write EX_TCTL_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static int p8_sreset_thread(struct cpu_thread *cpu)
+{
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t xscom_addr;
+
+ xscom_addr = XSCOM_ADDR_P8_EX(core_id,
+ P8_EX_TCTL_DIRECT_CONTROLS(thread_id));
+
+ if (xscom_write(chip_id, xscom_addr, P8_DIRECT_CTL_PRENAP)) {
+ prlog(PR_ERR, "Could not prenap thread %u:%u:%u:"
+ " Unable to write EX_TCTL_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+ if (xscom_write(chip_id, xscom_addr, P8_DIRECT_CTL_SRESET)) {
+ prlog(PR_ERR, "Could not sreset thread %u:%u:%u:"
+ " Unable to write EX_TCTL_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+
+/**************** POWER9 direct controls ****************/
+
+/* Long running instructions may take time to complete. Timeout 100ms */
+#define P9_QUIESCE_POLL_INTERVAL 100
+#define P9_QUIESCE_TIMEOUT 100000
+
+/* Waking may take up to 5ms for deepest sleep states. Set timeout to 100ms */
+#define P9_SPWKUP_POLL_INTERVAL 100
+#define P9_SPWKUP_TIMEOUT 100000
+
+/*
+ * This implements direct control facilities of processor cores and threads
+ * using scom registers.
+ */
+
+static int p9_core_is_gated(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t sshhyp_addr;
+ uint64_t val;
+
+ sshhyp_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, P9_EC_PPM_SSHHYP);
+
+ if (xscom_read(chip_id, sshhyp_addr, &val)) {
+ prlog(PR_ERR, "Could not query core gated on %u:%u:"
+ " Unable to read PPM_SSHHYP.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ return !!(val & P9_CORE_GATED);
+}
+
+static int p9_core_set_special_wakeup(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t swake_addr;
+ uint32_t sshhyp_addr;
+ uint64_t val;
+ int i;
+
+ swake_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, EC_PPM_SPECIAL_WKUP_HYP);
+ sshhyp_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, P9_EC_PPM_SSHHYP);
+
+ if (xscom_write(chip_id, swake_addr, P9_SPWKUP_SET)) {
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " Unable to write PPM_SPECIAL_WKUP_HYP.\n",
+ chip_id, core_id);
+ goto out_fail;
+ }
+
+ for (i = 0; i < P9_SPWKUP_TIMEOUT / P9_SPWKUP_POLL_INTERVAL; i++) {
+ if (xscom_read(chip_id, sshhyp_addr, &val)) {
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " Unable to read PPM_SSHHYP.\n",
+ chip_id, core_id);
+ goto out_fail;
+ }
+ if (val & P9_SPECIAL_WKUP_DONE) {
+ /*
+ * CORE_GATED will be unset on a successful special
+ * wakeup of the core which indicates that the core is
+ * out of stop state. If CORE_GATED is still set then
+ * raise error.
+ */
+ if (p9_core_is_gated(cpu)) {
+ /* Deassert spwu for this strange error */
+ xscom_write(chip_id, swake_addr, 0);
+ prlog(PR_ERR, "Failed special wakeup on %u:%u"
+ " as CORE_GATED is set\n",
+ chip_id, core_id);
+ goto out_fail;
+ } else {
+ return 0;
+ }
+ }
+ time_wait_us(P9_SPWKUP_POLL_INTERVAL);
+ }
+
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " timeout waiting for SPECIAL_WKUP_DONE.\n",
+ chip_id, core_id);
+
+out_fail:
+ /*
+ * As per the special wakeup protocol we should not de-assert
+ * the special wakeup on the core until WAKEUP_DONE is set.
+ * So even on error do not de-assert.
+ */
+ return OPAL_HARDWARE;
+}
+
+static int p9_core_clear_special_wakeup(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t swake_addr;
+
+ swake_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, EC_PPM_SPECIAL_WKUP_HYP);
+
+ /*
+ * De-assert special wakeup after a small delay.
+ * The delay may help avoid problems setting and clearing special
+ * wakeup back-to-back. This should be confirmed.
+ */
+ time_wait_us(1);
+ if (xscom_write(chip_id, swake_addr, 0)) {
+ prlog(PR_ERR, "Could not clear special wakeup on %u:%u:"
+ " Unable to write PPM_SPECIAL_WKUP_HYP.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ /*
+ * Don't wait for de-assert to complete as other components
+ * could have requested for special wkeup. Wait for 10ms to
+ * avoid back-to-back asserts
+ */
+ time_wait_us(10000);
+ return 0;
+}
+
+static int p9_thread_quiesced(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t ras_addr;
+ uint64_t ras_status;
+
+ ras_addr = XSCOM_ADDR_P9_EC(core_id, P9_RAS_STATUS);
+ if (xscom_read(chip_id, ras_addr, &ras_status)) {
+ prlog(PR_ERR, "Could not check thread state on %u:%u:"
+ " Unable to read RAS_STATUS.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ /*
+ * This returns true when the thread is quiesced and all
+ * instructions completed. For sreset this may not be necessary,
+ * but we may want to use instruction ramming or stepping
+ * direct controls where it is important.
+ */
+ if ((ras_status & P9_THREAD_QUIESCED(thread_id))
+ == P9_THREAD_QUIESCED(thread_id))
+ return 1;
+
+ return 0;
+}
+
+static int p9_cont_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t cts_addr;
+ uint32_t ti_addr;
+ uint32_t dctl_addr;
+ uint64_t core_thread_state;
+ uint64_t thread_info;
+ bool active, stop;
+ int rc;
+
+ rc = p9_thread_quiesced(cpu);
+ if (rc < 0)
+ return rc;
+ if (!rc) {
+ prlog(PR_ERR, "Could not cont thread %u:%u:%u:"
+ " Thread is not quiesced.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_BUSY;
+ }
+
+ cts_addr = XSCOM_ADDR_P9_EC(core_id, P9_CORE_THREAD_STATE);
+ ti_addr = XSCOM_ADDR_P9_EC(core_id, P9_THREAD_INFO);
+ dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+
+ if (xscom_read(chip_id, cts_addr, &core_thread_state)) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to read CORE_THREAD_STATE.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+ if (core_thread_state & PPC_BIT(56 + thread_id))
+ stop = true;
+ else
+ stop = false;
+
+ if (xscom_read(chip_id, ti_addr, &thread_info)) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to read THREAD_INFO.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+ if (thread_info & PPC_BIT(thread_id))
+ active = true;
+ else
+ active = false;
+
+ if (!active || stop) {
+ if (xscom_write(chip_id, dctl_addr, P9_THREAD_CLEAR_MAINT(thread_id))) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ }
+ } else {
+ if (xscom_write(chip_id, dctl_addr, P9_THREAD_CONT(thread_id))) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ }
+ }
+
+ return 0;
+}
+
+static int p9_stop_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t dctl_addr;
+ int rc;
+ int i;
+
+ dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+
+ rc = p9_thread_quiesced(cpu);
+ if (rc < 0)
+ return rc;
+ if (rc) {
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Thread is quiesced already.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_BUSY;
+ }
+
+ if (xscom_write(chip_id, dctl_addr, P9_THREAD_STOP(thread_id))) {
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+
+ for (i = 0; i < P9_QUIESCE_TIMEOUT / P9_QUIESCE_POLL_INTERVAL; i++) {
+ int rc = p9_thread_quiesced(cpu);
+ if (rc < 0)
+ break;
+ if (rc)
+ return 0;
+
+ time_wait_us(P9_QUIESCE_POLL_INTERVAL);
+ }
+
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Unable to quiesce thread.\n",
+ chip_id, core_id, thread_id);
+
+ return OPAL_HARDWARE;
+}
+
+static int p9_sreset_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t dctl_addr;
+
+ dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+
+ if (xscom_write(chip_id, dctl_addr, P9_THREAD_SRESET(thread_id))) {
+ prlog(PR_ERR, "Could not sreset thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+
+ return 0;
+}
+
+/**************** POWER10 direct controls ****************/
+
+/* Long running instructions may take time to complete. Timeout 100ms */
+#define P10_QUIESCE_POLL_INTERVAL 100
+#define P10_QUIESCE_TIMEOUT 100000
+
+/* Waking may take up to 5ms for deepest sleep states. Set timeout to 100ms */
+#define P10_SPWU_POLL_INTERVAL 100
+#define P10_SPWU_TIMEOUT 100000
+
+/*
+ * This implements direct control facilities of processor cores and threads
+ * using scom registers.
+ */
+static int p10_core_is_gated(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t ssh_addr;
+ uint64_t val;
+
+ ssh_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SSH_HYP);
+
+ if (xscom_read(chip_id, ssh_addr, &val)) {
+ prlog(PR_ERR, "Could not query core gated on %u:%u:"
+ " Unable to read QME_SSH_HYP.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ return !!(val & P10_SSH_CORE_GATED);
+}
+
+
+static int p10_core_set_special_wakeup(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t spwu_addr, ssh_addr;
+ uint64_t val;
+ int i;
+
+ /* P10 could use SPWU_HYP done bit instead of SSH? */
+ spwu_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SPWU_HYP);
+ ssh_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SSH_HYP);
+
+ if (xscom_write(chip_id, spwu_addr, P10_SPWU_REQ)) {
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " Unable to write QME_SPWU_HYP.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ for (i = 0; i < P10_SPWU_TIMEOUT / P10_SPWU_POLL_INTERVAL; i++) {
+ if (xscom_read(chip_id, ssh_addr, &val)) {
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " Unable to read QME_SSH_HYP.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+ if (val & P10_SSH_SPWU_DONE) {
+ /*
+ * CORE_GATED will be unset on a successful special
+ * wakeup of the core which indicates that the core is
+ * out of stop state. If CORE_GATED is still set then
+ * check SPWU register and raise error only if SPWU_DONE
+ * is not set, else print a warning and consider SPWU
+ * operation as successful.
+ * This is in conjunction with a micocode bug, which
+ * calls out the fact that SPW can succeed in the case
+ * the core is gated but SPWU_HYP bit is set.
+ */
+ if (p10_core_is_gated(cpu)) {
+ if(xscom_read(chip_id, spwu_addr, &val)) {
+ prlog(PR_ERR, "Core %u:%u:"
+ " unable to read QME_SPWU_HYP\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+ if (val & P10_SPWU_DONE) {
+ /*
+ * If SPWU DONE bit is set then
+ * SPWU operation is complete
+ */
+ prlog(PR_DEBUG, "Special wakeup on "
+ "%u:%u: core remains gated while"
+ " SPWU_HYP DONE set\n",
+ chip_id, core_id);
+ return 0;
+ }
+ /* Deassert spwu for this strange error */
+ xscom_write(chip_id, spwu_addr, 0);
+ prlog(PR_ERR,
+ "Failed special wakeup on %u:%u"
+ " core remains gated.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ } else {
+ return 0;
+ }
+ }
+ time_wait_us(P10_SPWU_POLL_INTERVAL);
+ }
+
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " operation timeout.\n",
+ chip_id, core_id);
+ /*
+ * As per the special wakeup protocol we should not de-assert
+ * the special wakeup on the core until WAKEUP_DONE is set.
+ * So even on error do not de-assert.
+ */
+
+ return OPAL_HARDWARE;
+}
+
+static int p10_core_clear_special_wakeup(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t spwu_addr;
+
+ spwu_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SPWU_HYP);
+
+ /* Add a small delay here if spwu problems time_wait_us(1); */
+ if (xscom_write(chip_id, spwu_addr, 0)) {
+ prlog(PR_ERR, "Could not clear special wakeup on %u:%u:"
+ " Unable to write QME_SPWU_HYP.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ return 0;
+}
+
+static int p10_thread_quiesced(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t ras_addr;
+ uint64_t ras_status;
+
+ ras_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_RAS_STATUS);
+ if (xscom_read(chip_id, ras_addr, &ras_status)) {
+ prlog(PR_ERR, "Could not check thread state on %u:%u:"
+ " Unable to read EC_RAS_STATUS.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ /*
+ * p10_thread_stop for the purpose of sreset wants QUIESCED
+ * and MAINT bits set. Step, RAM, etc. need more, but we don't
+ * use those in skiboot.
+ *
+ * P10 could try wait for more here in case of errors.
+ */
+ if (!(ras_status & P10_THREAD_QUIESCED(thread_id)))
+ return 0;
+
+ if (!(ras_status & P10_THREAD_MAINT(thread_id)))
+ return 0;
+
+ return 1;
+}
+
+static int p10_cont_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t cts_addr;
+ uint32_t ti_addr;
+ uint32_t dctl_addr;
+ uint64_t core_thread_state;
+ uint64_t thread_info;
+ bool active, stop;
+ int rc;
+ int i;
+
+ rc = p10_thread_quiesced(cpu);
+ if (rc < 0)
+ return rc;
+ if (!rc) {
+ prlog(PR_ERR, "Could not cont thread %u:%u:%u:"
+ " Thread is not quiesced.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_BUSY;
+ }
+
+ cts_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_CORE_THREAD_STATE);
+ ti_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_THREAD_INFO);
+ dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS);
+
+ if (xscom_read(chip_id, cts_addr, &core_thread_state)) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to read EC_CORE_THREAD_STATE.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+ if (core_thread_state & P10_THREAD_STOPPED(thread_id))
+ stop = true;
+ else
+ stop = false;
+
+ if (xscom_read(chip_id, ti_addr, &thread_info)) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to read EC_THREAD_INFO.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+ if (thread_info & P10_THREAD_ACTIVE(thread_id))
+ active = true;
+ else
+ active = false;
+
+ if (!active || stop) {
+ if (xscom_write(chip_id, dctl_addr, P10_THREAD_CLEAR_MAINT(thread_id))) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ }
+ } else {
+ if (xscom_write(chip_id, dctl_addr, P10_THREAD_START(thread_id))) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ }
+ }
+
+ for (i = 0; i < P10_QUIESCE_TIMEOUT / P10_QUIESCE_POLL_INTERVAL; i++) {
+ int rc = p10_thread_quiesced(cpu);
+ if (rc < 0)
+ break;
+ if (!rc)
+ return 0;
+
+ time_wait_us(P10_QUIESCE_POLL_INTERVAL);
+ }
+
+ prlog(PR_ERR, "Could not start thread %u:%u:%u:"
+ " Unable to start thread.\n",
+ chip_id, core_id, thread_id);
+
+ return OPAL_HARDWARE;
+}
+
+static int p10_stop_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t dctl_addr;
+ int rc;
+ int i;
+
+ dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS);
+
+ rc = p10_thread_quiesced(cpu);
+ if (rc < 0)
+ return rc;
+ if (rc) {
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Thread is quiesced already.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_BUSY;
+ }
+
+ if (xscom_write(chip_id, dctl_addr, P10_THREAD_STOP(thread_id))) {
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+
+ for (i = 0; i < P10_QUIESCE_TIMEOUT / P10_QUIESCE_POLL_INTERVAL; i++) {
+ int rc = p10_thread_quiesced(cpu);
+ if (rc < 0)
+ break;
+ if (rc)
+ return 0;
+
+ time_wait_us(P10_QUIESCE_POLL_INTERVAL);
+ }
+
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Unable to quiesce thread.\n",
+ chip_id, core_id, thread_id);
+
+ return OPAL_HARDWARE;
+}
+
+static int p10_sreset_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t dctl_addr;
+
+ dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS);
+
+ if (xscom_write(chip_id, dctl_addr, P10_THREAD_SRESET(thread_id))) {
+ prlog(PR_ERR, "Could not sreset thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+
+ return 0;
+}
+
+/**************** generic direct controls ****************/
+
+int dctl_set_special_wakeup(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->ec_primary;
+ int rc = OPAL_SUCCESS;
+
+ if (proc_gen == proc_gen_unknown)
+ return OPAL_UNSUPPORTED;
+
+ lock(&c->dctl_lock);
+ if (c->special_wakeup_count == 0) {
+ if (proc_gen == proc_gen_p10)
+ rc = p10_core_set_special_wakeup(c);
+ else if (proc_gen == proc_gen_p9)
+ rc = p9_core_set_special_wakeup(c);
+ else /* (proc_gen == proc_gen_p8) */
+ rc = p8_core_set_special_wakeup(c);
+ }
+ if (!rc)
+ c->special_wakeup_count++;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+int dctl_clear_special_wakeup(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->ec_primary;
+ int rc = OPAL_SUCCESS;
+
+ if (proc_gen == proc_gen_unknown)
+ return OPAL_UNSUPPORTED;
+
+ lock(&c->dctl_lock);
+ if (!c->special_wakeup_count)
+ goto out;
+ if (c->special_wakeup_count == 1) {
+ if (proc_gen == proc_gen_p10)
+ rc = p10_core_clear_special_wakeup(c);
+ else if (proc_gen == proc_gen_p9)
+ rc = p9_core_clear_special_wakeup(c);
+ else /* (proc_gen == proc_gen_p8) */
+ rc = p8_core_clear_special_wakeup(c);
+ }
+ if (!rc)
+ c->special_wakeup_count--;
+out:
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+int dctl_core_is_gated(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->primary;
+
+ if (proc_gen == proc_gen_p10)
+ return p10_core_is_gated(c);
+ else if (proc_gen == proc_gen_p9)
+ return p9_core_is_gated(c);
+ else
+ return OPAL_UNSUPPORTED;
+}
+
+static int dctl_stop(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->ec_primary;
+ int rc;
+
+ lock(&c->dctl_lock);
+ if (t->dctl_stopped) {
+ unlock(&c->dctl_lock);
+ return OPAL_BUSY;
+ }
+ if (proc_gen == proc_gen_p10)
+ rc = p10_stop_thread(t);
+ else if (proc_gen == proc_gen_p9)
+ rc = p9_stop_thread(t);
+ else /* (proc_gen == proc_gen_p8) */
+ rc = p8_stop_thread(t);
+ if (!rc)
+ t->dctl_stopped = true;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+static int dctl_cont(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->primary;
+ int rc;
+
+ if (proc_gen != proc_gen_p10 && proc_gen != proc_gen_p9)
+ return OPAL_UNSUPPORTED;
+
+ lock(&c->dctl_lock);
+ if (!t->dctl_stopped) {
+ unlock(&c->dctl_lock);
+ return OPAL_BUSY;
+ }
+ if (proc_gen == proc_gen_p10)
+ rc = p10_cont_thread(t);
+ else /* (proc_gen == proc_gen_p9) */
+ rc = p9_cont_thread(t);
+ if (!rc)
+ t->dctl_stopped = false;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+/*
+ * NOTE:
+ * The POWER8 sreset does not provide SRR registers, so it can be used
+ * for fast reboot, but not OPAL_SIGNAL_SYSTEM_RESET or anywhere that is
+ * expected to return. For now, callers beware.
+ */
+static int dctl_sreset(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->ec_primary;
+ int rc;
+
+ lock(&c->dctl_lock);
+ if (!t->dctl_stopped) {
+ unlock(&c->dctl_lock);
+ return OPAL_BUSY;
+ }
+ if (proc_gen == proc_gen_p10)
+ rc = p10_sreset_thread(t);
+ else if (proc_gen == proc_gen_p9)
+ rc = p9_sreset_thread(t);
+ else /* (proc_gen == proc_gen_p8) */
+ rc = p8_sreset_thread(t);
+ if (!rc)
+ t->dctl_stopped = false;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+
+/**************** fast reboot API ****************/
+
+int sreset_all_prepare(void)
+{
+ struct cpu_thread *cpu;
+
+ if (proc_gen == proc_gen_unknown)
+ return OPAL_UNSUPPORTED;
+
+ prlog(PR_DEBUG, "RESET: Resetting from cpu: 0x%x (core 0x%x)\n",
+ this_cpu()->pir, pir_to_core_id(this_cpu()->pir));
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ for_each_ungarded_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+ mambo_stop_cpu(cpu);
+ }
+ return OPAL_SUCCESS;
+ }
+
+ /* Assert special wakup on all cores. Only on operational cores. */
+ for_each_ungarded_primary(cpu) {
+ if (dctl_set_special_wakeup(cpu) != OPAL_SUCCESS)
+ return OPAL_HARDWARE;
+ }
+
+ prlog(PR_DEBUG, "RESET: Stopping the world...\n");
+
+ /* Put everybody in stop except myself */
+ for_each_ungarded_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+ if (dctl_stop(cpu) != OPAL_SUCCESS)
+ return OPAL_HARDWARE;
+
+ }
+
+ return OPAL_SUCCESS;
+}
+
+void sreset_all_finish(void)
+{
+ struct cpu_thread *cpu;
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+ return;
+
+ for_each_ungarded_primary(cpu)
+ dctl_clear_special_wakeup(cpu);
+}
+
+int sreset_all_others(void)
+{
+ struct cpu_thread *cpu;
+
+ prlog(PR_DEBUG, "RESET: Resetting all threads but self...\n");
+
+ /*
+ * mambo should actually implement stop as well, and implement
+ * the dctl_ helpers properly. Currently it's racy just sresetting.
+ */
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ for_each_ungarded_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+ mambo_sreset_cpu(cpu);
+ }
+ return OPAL_SUCCESS;
+ }
+
+ for_each_ungarded_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+ if (dctl_sreset(cpu) != OPAL_SUCCESS)
+ return OPAL_HARDWARE;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+
+/**************** OPAL_SIGNAL_SYSTEM_RESET API ****************/
+
+/*
+ * This provides a way for the host to raise system reset exceptions
+ * on other threads using direct control scoms on POWER9.
+ *
+ * We assert special wakeup on the core first.
+ * Then stop target thread and wait for it to quiesce.
+ * Then sreset the target thread, which resumes execution on that thread.
+ * Then de-assert special wakeup on the core.
+ */
+static int64_t do_sreset_cpu(struct cpu_thread *cpu)
+{
+ int rc;
+
+ if (this_cpu() == cpu) {
+ prlog(PR_ERR, "SRESET: Unable to reset self\n");
+ return OPAL_PARAMETER;
+ }
+
+ rc = dctl_set_special_wakeup(cpu);
+ if (rc)
+ return rc;
+
+ rc = dctl_stop(cpu);
+ if (rc)
+ goto out_spwk;
+
+ rc = dctl_sreset(cpu);
+ if (rc)
+ goto out_cont;
+
+ dctl_clear_special_wakeup(cpu);
+
+ return 0;
+
+out_cont:
+ dctl_cont(cpu);
+out_spwk:
+ dctl_clear_special_wakeup(cpu);
+
+ return rc;
+}
+
+static struct lock sreset_lock = LOCK_UNLOCKED;
+
+int64_t opal_signal_system_reset(int cpu_nr)
+{
+ struct cpu_thread *cpu;
+ int64_t ret;
+
+ if (proc_gen != proc_gen_p9 && proc_gen != proc_gen_p10)
+ return OPAL_UNSUPPORTED;
+
+ /*
+ * Broadcasts unsupported. Not clear what threads should be
+ * signaled, so it's better for the OS to perform one-at-a-time
+ * for now.
+ */
+ if (cpu_nr < 0)
+ return OPAL_CONSTRAINED;
+
+ /* Reset a single CPU */
+ cpu = find_cpu_by_server(cpu_nr);
+ if (!cpu) {
+ prlog(PR_ERR, "SRESET: could not find cpu by server %d\n", cpu_nr);
+ return OPAL_PARAMETER;
+ }
+
+ lock(&sreset_lock);
+ ret = do_sreset_cpu(cpu);
+ unlock(&sreset_lock);
+
+ return ret;
+}
+
+void direct_controls_init(void)
+{
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+ return;
+
+ if (proc_gen != proc_gen_p9 && proc_gen != proc_gen_p10)
+ return;
+
+ opal_register(OPAL_SIGNAL_SYSTEM_RESET, opal_signal_system_reset, 1);
+}
diff --git a/roms/skiboot/core/errorlog.c b/roms/skiboot/core/errorlog.c
new file mode 100644
index 000000000..f64ac3f23
--- /dev/null
+++ b/roms/skiboot/core/errorlog.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/* This file contains the front end for OPAL error logging. It is used
+ * to construct a struct errorlog representing the event/error to be
+ * logged which is then passed to the platform specific backend to log
+ * the actual errors.
+ *
+ * Copyright 2013-2017 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <lock.h>
+#include <errorlog.h>
+#include <pool.h>
+
+/*
+ * Maximum number buffers that are pre-allocated
+ * to hold elogs that are reported on Sapphire and
+ * PowerNV.
+ */
+#define ELOG_WRITE_MAX_RECORD 64
+/* Platform log id as per the spec */
+static uint32_t sapphire_elog_id = 0xB0000000;
+
+/* Reserved for future use */
+/* static uint32_t powernv_elog_id = 0xB1000000; */
+
+/* Pool to allocate elog messages from */
+static struct pool elog_pool;
+static struct lock elog_lock = LOCK_UNLOCKED;
+
+static bool elog_available = false;
+
+static struct errorlog *get_write_buffer(int opal_event_severity)
+{
+ struct errorlog *buf;
+
+ if (!elog_available)
+ return NULL;
+
+ lock(&elog_lock);
+ if (opal_event_severity == OPAL_ERROR_PANIC)
+ buf = pool_get(&elog_pool, POOL_HIGH);
+ else
+ buf = pool_get(&elog_pool, POOL_NORMAL);
+
+ unlock(&elog_lock);
+ return buf;
+}
+
+/* Reporting of error via struct errorlog */
+struct errorlog *opal_elog_create(struct opal_err_info *e_info, uint32_t tag)
+{
+ struct errorlog *buf;
+
+ buf = get_write_buffer(e_info->sev);
+ if (buf) {
+ buf->error_event_type = e_info->err_type;
+ buf->component_id = e_info->cmp_id;
+ buf->subsystem_id = e_info->subsystem;
+ buf->event_severity = e_info->sev;
+ buf->event_subtype = e_info->event_subtype;
+ buf->reason_code = e_info->reason_code;
+ buf->elog_origin = ORG_SAPPHIRE;
+
+ lock(&elog_lock);
+ buf->plid = ++sapphire_elog_id;
+ unlock(&elog_lock);
+
+ /* Initialise the first user dump section */
+ log_add_section(buf, tag);
+ }
+
+ return buf;
+}
+
+/* Add a new user data section to an existing error log */
+void log_add_section(struct errorlog *buf, uint32_t tag)
+{
+ size_t size = sizeof(struct elog_user_data_section) - 1;
+ struct elog_user_data_section *tmp;
+
+ if (!buf) {
+ prerror("ELOG: Cannot add user data section. "
+ "Buffer is invalid\n");
+ return;
+ }
+
+ if ((buf->user_section_size + size) > OPAL_LOG_MAX_DUMP) {
+ prerror("ELOG: Size of dump data overruns buffer\n");
+ return;
+ }
+
+ tmp = (struct elog_user_data_section *)(buf->user_data_dump +
+ buf->user_section_size);
+ /* Use DESC if no other tag provided */
+ tmp->tag = tag ? cpu_to_be32(tag) : cpu_to_be32(OPAL_ELOG_SEC_DESC);
+ tmp->size = cpu_to_be16(size);
+
+ buf->user_section_size += size;
+ buf->user_section_count++;
+}
+
+void opal_elog_complete(struct errorlog *buf, bool success)
+{
+ if (!success)
+ printf("Unable to log error\n");
+
+ lock(&elog_lock);
+ pool_free_object(&elog_pool, buf);
+ unlock(&elog_lock);
+}
+
+void log_commit(struct errorlog *elog)
+{
+ int rc;
+
+ if (!elog)
+ return;
+
+ if (platform.elog_commit) {
+ rc = platform.elog_commit(elog);
+ if (rc)
+ prerror("ELOG: Platform commit error %d\n", rc);
+
+ return;
+ }
+
+ opal_elog_complete(elog, false);
+}
+
+void log_append_data(struct errorlog *buf, unsigned char *data, uint16_t size)
+{
+ struct elog_user_data_section *section;
+ uint8_t n_sections;
+ char *buffer;
+ uint16_t ssize;
+
+ if (!buf) {
+ prerror("ELOG: Cannot update user data. Buffer is invalid\n");
+ return;
+ }
+
+ if ((buf->user_section_size + size) > OPAL_LOG_MAX_DUMP) {
+ prerror("ELOG: Size of dump data overruns buffer\n");
+ return;
+ }
+
+ /* Step through user sections to find latest dump section */
+ buffer = buf->user_data_dump;
+ n_sections = buf->user_section_count;
+ if (!n_sections) {
+ prerror("ELOG: User section invalid\n");
+ return;
+ }
+
+ while (--n_sections) {
+ section = (struct elog_user_data_section *)buffer;
+ buffer += be16_to_cpu(section->size);
+ }
+
+ section = (struct elog_user_data_section *)buffer;
+ ssize = be16_to_cpu(section->size);
+ buffer += ssize;
+ memcpy(buffer, data, size);
+ section->size = cpu_to_be16(ssize + size);
+ buf->user_section_size += size;
+}
+
+void log_append_msg(struct errorlog *buf, const char *fmt, ...)
+{
+ char err_msg[250];
+ va_list list;
+
+ if (!buf) {
+ prerror("Tried to append log to NULL buffer\n");
+ return;
+ }
+
+ va_start(list, fmt);
+ vsnprintf(err_msg, sizeof(err_msg), fmt, list);
+ va_end(list);
+
+ /* Log the error on to Sapphire console */
+ prerror("%s", err_msg);
+
+ log_append_data(buf, err_msg, strlen(err_msg));
+}
+
+uint32_t log_simple_error(struct opal_err_info *e_info, const char *fmt, ...)
+{
+ struct errorlog *buf;
+ va_list list;
+ char err_msg[250];
+
+ va_start(list, fmt);
+ vsnprintf(err_msg, sizeof(err_msg), fmt, list);
+ va_end(list);
+
+ /* Log the error on to Sapphire console */
+ prerror("%s", err_msg);
+
+ buf = opal_elog_create(e_info, 0);
+ if (buf == NULL) {
+ prerror("ELOG: Error getting buffer to log error\n");
+ return -1;
+ }
+
+ log_append_data(buf, err_msg, strlen(err_msg));
+ log_commit(buf);
+
+ return buf->plid;
+}
+
+int elog_init(void)
+{
+ /* Pre-allocate memory for records */
+ if (pool_init(&elog_pool, sizeof(struct errorlog),
+ ELOG_WRITE_MAX_RECORD, 1))
+ return OPAL_RESOURCE;
+
+ elog_available = true;
+ return 0;
+}
diff --git a/roms/skiboot/core/exceptions.c b/roms/skiboot/core/exceptions.c
new file mode 100644
index 000000000..389548d16
--- /dev/null
+++ b/roms/skiboot/core/exceptions.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Deal with exceptions when in OPAL.
+ *
+ * Copyright 2013-2014 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <stack.h>
+#include <opal.h>
+#include <processor.h>
+#include <cpu.h>
+#include <ras.h>
+
+#define REG "%016llx"
+#define REG32 "%08x"
+#define REGS_PER_LINE 4
+
+static void dump_regs(struct stack_frame *stack)
+{
+ unsigned int i;
+
+ prerror("CFAR : "REG" MSR : "REG"\n", stack->cfar, stack->msr);
+ prerror("SRR0 : "REG" SRR1 : "REG"\n", stack->srr0, stack->srr1);
+ prerror("HSRR0: "REG" HSRR1: "REG"\n", stack->hsrr0, stack->hsrr1);
+ prerror("DSISR: "REG32" DAR : "REG"\n", stack->dsisr, stack->dar);
+ prerror("LR : "REG" CTR : "REG"\n", stack->lr, stack->ctr);
+ prerror("CR : "REG32" XER : "REG32"\n", stack->cr, stack->xer);
+ for (i = 0; i < 16; i++)
+ prerror("GPR%02d: "REG" GPR%02d: "REG"\n",
+ i, stack->gpr[i], i + 16, stack->gpr[i + 16]);
+}
+
+#define EXCEPTION_MAX_STR 320
+
+static void handle_mce(struct stack_frame *stack, uint64_t nip, uint64_t msr, bool *fatal)
+{
+ uint64_t mce_flags, mce_addr;
+ const char *mce_err;
+ const char *mce_fix = NULL;
+ char buf[EXCEPTION_MAX_STR];
+ size_t l;
+
+ decode_mce(stack->srr0, stack->srr1, stack->dsisr, stack->dar,
+ &mce_flags, &mce_err, &mce_addr);
+
+ /* Try to recover. */
+ if (mce_flags & MCE_ERAT_ERROR) {
+ /* Real-mode still uses ERAT, flush transient bitflips */
+ flush_erat();
+ mce_fix = "ERAT flush";
+
+ } else {
+ *fatal = true;
+ }
+
+ prerror("***********************************************\n");
+ l = 0;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "%s MCE at "REG" ", *fatal ? "Fatal" : "Non-fatal", nip);
+ l += snprintf_symbol(buf + l, EXCEPTION_MAX_STR - l, nip);
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l, " MSR "REG, msr);
+ prerror("%s\n", buf);
+
+ l = 0;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Cause: %s", mce_err);
+ prerror("%s\n", buf);
+ if (mce_flags & MCE_INVOLVED_EA) {
+ l = 0;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Effective address: 0x%016llx", mce_addr);
+ prerror("%s\n", buf);
+ }
+
+ if (!*fatal) {
+ l = 0;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Attempting recovery: %s", mce_fix);
+ prerror("%s\n", buf);
+ }
+}
+
+void exception_entry(struct stack_frame *stack)
+{
+ bool fatal = false;
+ bool hv;
+ uint64_t nip;
+ uint64_t msr;
+ char buf[EXCEPTION_MAX_STR];
+ size_t l;
+
+ switch (stack->type) {
+ case 0x500:
+ case 0x980:
+ case 0xe00:
+ case 0xe20:
+ case 0xe40:
+ case 0xe60:
+ case 0xe80:
+ case 0xea0:
+ case 0xf80:
+ hv = true;
+ break;
+ default:
+ hv = false;
+ break;
+ }
+
+ if (hv) {
+ nip = stack->hsrr0;
+ msr = stack->hsrr1;
+ } else {
+ nip = stack->srr0;
+ msr = stack->srr1;
+ }
+ stack->msr = msr;
+ stack->pc = nip;
+
+ if (!(msr & MSR_RI))
+ fatal = true;
+
+ l = 0;
+ switch (stack->type) {
+ case 0x100:
+ prerror("***********************************************\n");
+ if (fatal) {
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Fatal System Reset at "REG" ", nip);
+ } else {
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "System Reset at "REG" ", nip);
+ }
+ break;
+
+ case 0x200:
+ handle_mce(stack, nip, msr, &fatal);
+ goto no_symbol;
+
+ case 0x700: {
+ struct trap_table_entry *tte;
+
+ fatal = true;
+ prerror("***********************************************\n");
+ for (tte = __trap_table_start; tte < __trap_table_end; tte++) {
+ if (tte->address == nip) {
+ prerror("< %s >\n", tte->message);
+ prerror(" .\n");
+ prerror(" .\n");
+ prerror(" .\n");
+ prerror(" OO__)\n");
+ prerror(" <\"__/\n");
+ prerror(" ^ ^\n");
+ break;
+ }
+ }
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Fatal TRAP at "REG" ", nip);
+ l += snprintf_symbol(buf + l, EXCEPTION_MAX_STR - l, nip);
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l, " MSR "REG, msr);
+ prerror("%s\n", buf);
+ dump_regs(stack);
+ backtrace_r1((uint64_t)stack);
+ if (platform.terminate)
+ platform.terminate(buf);
+ for (;;) ;
+ break; }
+
+ default:
+ fatal = true;
+ prerror("***********************************************\n");
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Fatal Exception 0x%llx at "REG" ", stack->type, nip);
+ break;
+ }
+ l += snprintf_symbol(buf + l, EXCEPTION_MAX_STR - l, nip);
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l, " MSR "REG, msr);
+ prerror("%s\n", buf);
+no_symbol:
+ dump_regs(stack);
+ backtrace_r1((uint64_t)stack);
+ if (fatal) {
+ if (platform.terminate)
+ platform.terminate(buf);
+ for (;;) ;
+ }
+
+ if (hv) {
+ /* Set up for SRR return */
+ stack->srr0 = nip;
+ stack->srr1 = msr;
+ }
+}
+
+void exception_entry_pm_sreset(void)
+{
+ char buf[EXCEPTION_MAX_STR];
+ size_t l;
+
+ prerror("***********************************************\n");
+ l = 0;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "System Reset in sleep");
+ prerror("%s\n", buf);
+ backtrace();
+}
+
+void __noreturn exception_entry_pm_mce(void)
+{
+ char buf[EXCEPTION_MAX_STR];
+ size_t l;
+
+ prerror("***********************************************\n");
+ l = 0;
+ l += snprintf(buf + l, EXCEPTION_MAX_STR - l,
+ "Fatal MCE in sleep");
+ prerror("%s\n", buf);
+ prerror("SRR0 : "REG" SRR1 : "REG"\n",
+ (uint64_t)mfspr(SPR_SRR0), (uint64_t)mfspr(SPR_SRR1));
+ prerror("DSISR: "REG32" DAR : "REG"\n",
+ (uint32_t)mfspr(SPR_DSISR), (uint64_t)mfspr(SPR_DAR));
+ abort();
+}
+
+static int64_t opal_register_exc_handler(uint64_t opal_exception __unused,
+ uint64_t handler_address __unused,
+ uint64_t glue_cache_line __unused)
+{
+ /* This interface is deprecated */
+ return OPAL_UNSUPPORTED;
+}
+opal_call(OPAL_REGISTER_OPAL_EXCEPTION_HANDLER, opal_register_exc_handler, 3);
+
diff --git a/roms/skiboot/core/fast-reboot.c b/roms/skiboot/core/fast-reboot.c
new file mode 100644
index 000000000..9f92525a9
--- /dev/null
+++ b/roms/skiboot/core/fast-reboot.c
@@ -0,0 +1,467 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Full IPL is slow, let's cheat!
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <cpu.h>
+#include <console.h>
+#include <fsp.h>
+#include <psi.h>
+#include <opal.h>
+#include <mem_region.h>
+#include <xscom.h>
+#include <interrupts.h>
+#include <cec.h>
+#include <timebase.h>
+#include <pci.h>
+#include <xive.h>
+#include <chip.h>
+#include <chiptod.h>
+#include <ipmi.h>
+#include <direct-controls.h>
+#include <nvram.h>
+
+/* Flag tested by the OPAL entry code */
+static volatile bool fast_boot_release;
+static volatile bool spr_set_release;
+static volatile bool nmi_mce_release;
+
+static void wait_on(volatile bool *cond)
+{
+ sync();
+ if (!*cond) {
+ smt_lowest();
+ while (!*cond)
+ barrier();
+ smt_medium();
+ }
+ sync();
+}
+
+static bool cpu_state_wait_all_others(enum cpu_thread_state state,
+ unsigned long timeout_tb)
+{
+ struct cpu_thread *cpu;
+ unsigned long end = mftb() + timeout_tb;
+
+ sync();
+ for_each_ungarded_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+
+ if (cpu->state != state) {
+ smt_lowest();
+ while (cpu->state != state) {
+ barrier();
+
+ if (timeout_tb && (tb_compare(mftb(), end) == TB_AAFTERB)) {
+ smt_medium();
+ return false;
+ }
+ }
+ smt_medium();
+ }
+ }
+ sync();
+
+ return true;
+}
+
+static const char *fast_reboot_disabled = NULL;
+
+void disable_fast_reboot(const char *reason)
+{
+ if (fast_reboot_disabled)
+ return;
+
+ prlog(PR_NOTICE, "RESET: Fast reboot disabled: %s\n", reason);
+ fast_reboot_disabled = reason;
+}
+
+void add_fast_reboot_dt_entries(void)
+{
+ dt_check_del_prop(opal_node, "fast-reboot");
+
+ if (fast_reboot_disabled) {
+ dt_add_property_string(opal_node, "fast-reboot", fast_reboot_disabled);
+ } else {
+ dt_add_property_string(opal_node, "fast-reboot", "okay");
+ }
+}
+
+/*
+ * This is called by the reboot CPU after all other CPUs have been
+ * quiesced and stopped, to perform various sanity checks on firmware
+ * data (and potentially hardware), to determine whether the fast
+ * reboot should go ahead.
+ */
+static bool fast_reboot_sanity_check(void)
+{
+ if (!mem_check_all()) {
+ disable_fast_reboot("Inconsistent firmware data");
+ return false;
+ }
+
+ if (!verify_romem()) {
+ disable_fast_reboot("Inconsistent firmware romem checksum");
+ return false;
+ }
+
+ return true;
+}
+
+void fast_reboot(void)
+{
+ static int fast_reboot_count = 0;
+
+ if (chip_quirk(QUIRK_NO_DIRECT_CTL)) {
+ prlog(PR_DEBUG,
+ "RESET: Fast reboot disabled by quirk\n");
+ return;
+ }
+
+ /*
+ * Ensure all other CPUs have left OPAL calls.
+ */
+ if (!opal_quiesce(QUIESCE_HOLD, -1)) {
+ disable_fast_reboot("OPAL quiesce timeout");
+ return;
+ }
+
+ if (fast_reboot_disabled &&
+ nvram_query_eq_dangerous("force-fast-reset", "1")) {
+ /* Do fast reboot even if it's been disabled */
+ prlog(PR_NOTICE, "RESET: Ignoring fast reboot disabled: %s\n",
+ fast_reboot_disabled);
+ } else if (fast_reboot_disabled) {
+ prlog(PR_NOTICE, "RESET: Fast reboot disabled: %s\n",
+ fast_reboot_disabled);
+ opal_quiesce(QUIESCE_RESUME, -1);
+ return;
+ }
+
+ prlog(PR_NOTICE, "RESET: Initiating fast reboot %d...\n", ++fast_reboot_count);
+ fast_boot_release = false;
+ spr_set_release = false;
+ nmi_mce_release = false;
+ sync();
+
+ /* Put everybody in stop except myself */
+ if (sreset_all_prepare()) {
+ prlog(PR_NOTICE, "RESET: Fast reboot failed to prepare "
+ "secondaries for system reset\n");
+ opal_quiesce(QUIESCE_RESUME, -1);
+ return;
+ }
+
+ if (!fast_reboot_sanity_check()) {
+ opal_quiesce(QUIESCE_RESUME, -1);
+ return;
+ }
+
+ cpu_set_sreset_enable(false);
+ cpu_set_ipi_enable(false);
+
+ /*
+ * The fast reboot sreset vector has FIXUP_ENDIAN, so secondaries can
+ * cope with a wrong HILE setting.
+ */
+ copy_sreset_vector_fast_reboot();
+
+ /*
+ * There is no point clearing special wakeup or un-quiesce due to
+ * failure after this point, because we will be going to full IPL.
+ * Less cleanup work means less opportunity to fail.
+ */
+
+ /* Send everyone else to 0x100 */
+ if (sreset_all_others() != OPAL_SUCCESS) {
+ prlog(PR_NOTICE, "RESET: Fast reboot failed to system reset "
+ "secondaries\n");
+ return;
+ }
+
+ /* Ensure all the sresets get through */
+ if (!cpu_state_wait_all_others(cpu_state_fast_reboot_entry, msecs_to_tb(1000))) {
+ prlog(PR_NOTICE, "RESET: Fast reboot timed out waiting for "
+ "secondaries to call in\n");
+ return;
+ }
+
+ prlog(PR_DEBUG, "RESET: Releasing special wakeups...\n");
+ sreset_all_finish();
+
+ /* This resets our quiesce state ready to enter the new kernel. */
+ opal_quiesce(QUIESCE_RESUME_FAST_REBOOT, -1);
+
+ console_complete_flush();
+
+ mtmsrd(0, 1); /* Clear MSR[RI] for 0x100 reset */
+ asm volatile("ba 0x100\n\t" : : : "memory");
+ for (;;)
+ ;
+}
+
+void __noreturn enter_nap(void);
+
+static void check_split_core(void)
+{
+ struct cpu_thread *cpu;
+ u64 mask, hid0;
+
+ hid0 = mfspr(SPR_HID0);
+ mask = SPR_HID0_POWER8_4LPARMODE | SPR_HID0_POWER8_2LPARMODE;
+
+ if ((hid0 & mask) == 0)
+ return;
+
+ prlog(PR_INFO, "RESET: CPU 0x%04x is split !\n", this_cpu()->pir);
+
+ /* If it's a secondary thread, just send it to nap */
+ if (this_cpu()->pir & 7) {
+ /* Prepare to be woken up */
+ icp_prep_for_pm();
+ /* Setup LPCR to wakeup on external interrupts only */
+ mtspr(SPR_LPCR, ((mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE) |
+ SPR_LPCR_P8_PECE2));
+ isync();
+ /* Go to nap (doesn't return) */
+ enter_nap();
+ }
+
+ prlog(PR_INFO, "RESET: Primary, unsplitting... \n");
+
+ /* Trigger unsplit operation and update SLW image */
+ hid0 &= ~SPR_HID0_POWER8_DYNLPARDIS;
+ set_hid0(hid0);
+ opal_slw_set_reg(this_cpu()->pir, SPR_HID0, hid0);
+
+ /* Wait for unsplit */
+ while (mfspr(SPR_HID0) & mask)
+ cpu_relax();
+
+ /* Now the guys are sleeping, wake'em up. They will come back
+ * via reset and continue the fast reboot process normally.
+ * No need to wait.
+ */
+ prlog(PR_INFO, "RESET: Waking unsplit secondaries... \n");
+
+ for_each_cpu(cpu) {
+ if (!cpu_is_sibling(cpu, this_cpu()) || (cpu == this_cpu()))
+ continue;
+ icp_kick_cpu(cpu);
+ }
+}
+
+static void cleanup_cpu_state(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ if (proc_gen == proc_gen_p9)
+ xive_cpu_reset();
+ else if (proc_gen == proc_gen_p10)
+ xive2_cpu_reset();
+
+ /* Per core cleanup */
+ if (cpu_is_thread0(cpu) || cpu_is_core_chiplet_primary(cpu)) {
+ /* Shared SPRs whacked back to normal */
+
+ /* XXX Update the SLW copies ! Also dbl check HIDs etc... */
+ init_shared_sprs();
+
+ if (proc_gen == proc_gen_p8) {
+ /* If somebody was in fast_sleep, we may have a
+ * workaround to undo
+ */
+ if (cpu->in_fast_sleep) {
+ prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep"
+ " undoing workarounds...\n", cpu->pir);
+ fast_sleep_exit();
+ }
+
+ /* The TLB surely contains garbage.
+ * P9 clears TLBs in cpu_fast_reboot_complete
+ */
+ cleanup_local_tlb();
+ }
+
+ /* And we might have lost TB sync */
+ chiptod_wakeup_resync();
+ }
+
+ /* Per-thread additional cleanup */
+ init_replicated_sprs();
+
+ // XXX Cleanup SLW, check HIDs ...
+}
+
+/* Entry from asm after a fast reset */
+void __noreturn fast_reboot_entry(void);
+
+void __noreturn fast_reboot_entry(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ if (proc_gen == proc_gen_p8) {
+ /* We reset our ICP first ! Otherwise we might get stray
+ * interrupts when unsplitting
+ */
+ reset_cpu_icp();
+
+ /* If we are split, we need to unsplit. Since that can send us
+ * to NAP, which will come back via reset, we do it now
+ */
+ check_split_core();
+ }
+
+ /* Until SPRs (notably HID[HILE]) are set and new exception vectors
+ * installed, nobody should take machine checks. Try to do minimal
+ * work between these points.
+ */
+ disable_machine_check();
+ mtmsrd(0, 1); /* Clear RI */
+
+ sync();
+ cpu->state = cpu_state_fast_reboot_entry;
+ sync();
+ if (cpu == boot_cpu) {
+ cpu_state_wait_all_others(cpu_state_fast_reboot_entry, 0);
+ spr_set_release = true;
+ } else {
+ wait_on(&spr_set_release);
+ }
+
+
+ /* Reset SPRs */
+ if (cpu_is_thread0(cpu))
+ init_shared_sprs();
+ init_replicated_sprs();
+
+ if (cpu == boot_cpu) {
+ /* Restore skiboot vectors */
+ copy_exception_vectors();
+ copy_sreset_vector();
+ patch_traps(true);
+ }
+
+ /* Must wait for others to because shared SPRs like HID0 are only set
+ * by thread0, so can't enable machine checks until those have been
+ * set.
+ */
+ sync();
+ cpu->state = cpu_state_present;
+ sync();
+ if (cpu == boot_cpu) {
+ cpu_state_wait_all_others(cpu_state_present, 0);
+ nmi_mce_release = true;
+ } else {
+ wait_on(&nmi_mce_release);
+ }
+
+ /* At this point skiboot exception vectors are in place and all
+ * cores/threads have SPRs set for running skiboot.
+ */
+ enable_machine_check();
+ mtmsrd(MSR_RI, 1);
+
+ cleanup_cpu_state();
+
+ prlog(PR_DEBUG, "RESET: CPU 0x%04x reset in\n", cpu->pir);
+
+ /* The original boot CPU (not the fast reboot initiator) takes
+ * command. Secondaries wait for the signal then go to their secondary
+ * entry point.
+ */
+ if (cpu != boot_cpu) {
+ wait_on(&fast_boot_release);
+
+ __secondary_cpu_entry();
+ }
+
+ if (proc_gen == proc_gen_p9)
+ xive_reset();
+ else if (proc_gen == proc_gen_p10)
+ xive2_reset();
+
+ /* Let the CPU layer do some last minute global cleanups */
+ cpu_fast_reboot_complete();
+
+ /* We can now do NAP mode */
+ cpu_set_sreset_enable(true);
+ cpu_set_ipi_enable(true);
+
+ prlog(PR_INFO, "RESET: Releasing secondaries...\n");
+
+ /* Release everybody */
+ sync();
+ fast_boot_release = true;
+ sync();
+ cpu->state = cpu_state_active;
+ sync();
+
+ /* Wait for them to respond */
+ cpu_state_wait_all_others(cpu_state_active, 0);
+
+ sync();
+
+ prlog(PR_INFO, "RESET: All done, cleaning up...\n");
+
+ /* Clear release flag for next time */
+ fast_boot_release = false;
+
+ if (!chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ /*
+ * mem_region_clear_unused avoids these preload regions
+ * so it can run along side image preloading. Clear these
+ * regions now to catch anything not overwritten by
+ * preload.
+ *
+ * Mambo may have embedded payload here, so don't clear
+ * it at all.
+ */
+ memset(KERNEL_LOAD_BASE, 0, KERNEL_LOAD_SIZE);
+ memset(INITRAMFS_LOAD_BASE, 0, INITRAMFS_LOAD_SIZE);
+ }
+
+ /* Start preloading kernel and ramdisk */
+ start_preload_kernel();
+
+ /* Start clearing memory */
+ start_mem_region_clear_unused();
+
+ if (platform.fast_reboot_init)
+ platform.fast_reboot_init();
+
+ if (proc_gen == proc_gen_p8) {
+ /* XXX */
+ /* Reset/EOI the PSI interrupt */
+ psi_irq_reset();
+ }
+
+ /* update pci nvram settings */
+ pci_nvram_init();
+
+ /* Remove all PCI devices */
+ if (pci_reset()) {
+ prlog(PR_NOTICE, "RESET: Fast reboot failed to reset PCI\n");
+
+ /*
+ * Can't return to caller here because we're past no-return.
+ * Attempt an IPL here which is what the caller would do.
+ */
+ if (platform.cec_reboot)
+ platform.cec_reboot();
+ for (;;)
+ ;
+ }
+
+ ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
+
+ wait_mem_region_clear_unused();
+
+ /* Load and boot payload */
+ load_and_boot_kernel(true);
+}
diff --git a/roms/skiboot/core/fdt.c b/roms/skiboot/core/fdt.c
new file mode 100644
index 000000000..463dc6912
--- /dev/null
+++ b/roms/skiboot/core/fdt.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Produce and consume flattened device trees
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <stdarg.h>
+#include <libfdt.h>
+#include <device.h>
+#include <chip.h>
+#include <cpu.h>
+#include <opal.h>
+#include <interrupts.h>
+#include <fsp.h>
+#include <cec.h>
+#include <vpd.h>
+#include <ccan/str/str.h>
+
+static int fdt_error;
+
+#undef DEBUG_FDT
+#ifdef DEBUG_FDT
+#define FDT_DBG(fmt, a...) prlog(PR_DEBUG, "FDT: " fmt, ##a)
+#else
+#define FDT_DBG(fmt, a...)
+#endif
+
+static void __save_err(int err, const char *str)
+{
+ FDT_DBG("rc: %d from \"%s\"\n", err, str);
+ if (err && !fdt_error) {
+ prerror("FDT: Error %d from \"%s\"\n", err, str);
+ fdt_error = err;
+ }
+}
+
+#define save_err(...) __save_err(__VA_ARGS__, #__VA_ARGS__)
+
+static void dt_property_cell(void *fdt, const char *name, u32 cell)
+{
+ save_err(fdt_property_cell(fdt, name, cell));
+}
+
+static void dt_begin_node(void *fdt, const struct dt_node *dn)
+{
+ save_err(fdt_begin_node(fdt, dn->name));
+
+ dt_property_cell(fdt, "phandle", dn->phandle);
+}
+
+static void dt_property(void *fdt, const struct dt_property *p)
+{
+ save_err(fdt_property(fdt, p->name, p->prop, p->len));
+}
+
+static void dt_end_node(void *fdt)
+{
+ save_err(fdt_end_node(fdt));
+}
+
+#ifdef DEBUG_FDT
+static void dump_fdt(void *fdt)
+{
+ int i, off, depth, err;
+
+ prlog(PR_INFO, "Device tree %u@%p\n", fdt_totalsize(fdt), fdt);
+ err = fdt_check_header(fdt);
+ if (err) {
+ prerror("fdt_check_header: %s\n", fdt_strerror(err));
+ return;
+ }
+ prlog(PR_INFO, "fdt_check_header passed\n");
+
+ prlog(PR_INFO, "fdt_num_mem_rsv = %u\n", fdt_num_mem_rsv(fdt));
+ for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+ u64 addr, size;
+
+ err = fdt_get_mem_rsv(fdt, i, &addr, &size);
+ if (err) {
+ prlog(PR_INFO, " ERR %s\n", fdt_strerror(err));
+ return;
+ }
+ prlog(PR_INFO, " mem_rsv[%i] = %lu@%#lx\n",
+ i, (long)addr, (long)size);
+ }
+
+ for (off = fdt_next_node(fdt, 0, &depth);
+ off > 0;
+ off = fdt_next_node(fdt, off, &depth)) {
+ int len;
+ const char *name;
+
+ name = fdt_get_name(fdt, off, &len);
+ if (!name) {
+ prerror("fdt: offset %i no name!\n", off);
+ return;
+ }
+ prlog(PR_INFO, "name: %s [%u]\n", name, off);
+ }
+}
+#endif
+
+static void flatten_dt_properties(void *fdt, const struct dt_node *dn)
+{
+ const struct dt_property *p;
+
+ list_for_each(&dn->properties, p, list) {
+ if (strstarts(p->name, DT_PRIVATE))
+ continue;
+
+ FDT_DBG(" prop: %s size: %ld\n", p->name, p->len);
+ dt_property(fdt, p);
+ }
+}
+
+static void flatten_dt_node(void *fdt, const struct dt_node *root,
+ bool exclusive)
+{
+ const struct dt_node *i;
+
+ if (!exclusive) {
+ FDT_DBG("node: %s\n", root->name);
+ dt_begin_node(fdt, root);
+ flatten_dt_properties(fdt, root);
+ }
+
+ list_for_each(&root->children, i, list)
+ flatten_dt_node(fdt, i, false);
+
+ if (!exclusive)
+ dt_end_node(fdt);
+}
+
+static void create_dtb_reservemap(void *fdt, const struct dt_node *root)
+{
+ uint64_t base, size;
+ const __be64 *ranges;
+ const struct dt_property *prop;
+ int i;
+
+ /* Duplicate the reserved-ranges property into the fdt reservemap */
+ prop = dt_find_property(root, "reserved-ranges");
+ if (prop) {
+ ranges = (const void *)prop->prop;
+
+ for (i = 0; i < prop->len / (sizeof(uint64_t) * 2); i++) {
+ base = be64_to_cpu(*(ranges++));
+ size = be64_to_cpu(*(ranges++));
+ save_err(fdt_add_reservemap_entry(fdt, base, size));
+ }
+ }
+
+ save_err(fdt_finish_reservemap(fdt));
+}
+
+static int __create_dtb(void *fdt, size_t len,
+ const struct dt_node *root,
+ bool exclusive)
+{
+ if (chip_quirk(QUIRK_SLOW_SIM))
+ save_err(fdt_create_with_flags(fdt, len, FDT_CREATE_FLAG_NO_NAME_DEDUP));
+ else
+ save_err(fdt_create_with_flags(fdt, len, 0));
+ if (fdt_error)
+ goto err;
+
+ if (root == dt_root && !exclusive)
+ create_dtb_reservemap(fdt, root);
+ else
+ save_err(fdt_finish_reservemap(fdt));
+
+ flatten_dt_node(fdt, root, exclusive);
+
+ save_err(fdt_finish(fdt));
+ if (fdt_error) {
+err:
+ prerror("dtb: error %s\n", fdt_strerror(fdt_error));
+ return fdt_error;
+ }
+
+#ifdef DEBUG_FDT
+ dump_fdt(fdt);
+#endif
+ return 0;
+}
+
+void *create_dtb(const struct dt_node *root, bool exclusive)
+{
+ void *fdt = NULL;
+ size_t len = DEVICE_TREE_MAX_SIZE;
+ uint32_t old_last_phandle = get_last_phandle();
+ int ret;
+
+ do {
+ set_last_phandle(old_last_phandle);
+ fdt_error = 0;
+ fdt = malloc(len);
+ if (!fdt) {
+ prerror("dtb: could not malloc %lu\n", (long)len);
+ return NULL;
+ }
+
+ ret = __create_dtb(fdt, len, root, exclusive);
+ if (ret) {
+ free(fdt);
+ fdt = NULL;
+ }
+
+ len *= 2;
+ } while (ret == -FDT_ERR_NOSPACE);
+
+ return fdt;
+}
+
+static int64_t opal_get_device_tree(uint32_t phandle,
+ uint64_t buf, uint64_t len)
+{
+ struct dt_node *root;
+ void *fdt = (void *)buf;
+ uint32_t old_last_phandle;
+ int64_t totalsize;
+ int ret;
+
+ if (!opal_addr_valid(fdt))
+ return OPAL_PARAMETER;
+
+ root = dt_find_by_phandle(dt_root, phandle);
+ if (!root)
+ return OPAL_PARAMETER;
+
+ if (!fdt) {
+ fdt = create_dtb(root, true);
+ if (!fdt)
+ return OPAL_INTERNAL_ERROR;
+ totalsize = fdt_totalsize(fdt);
+ free(fdt);
+ return totalsize;
+ }
+
+ if (!len)
+ return OPAL_PARAMETER;
+
+ fdt_error = 0;
+ old_last_phandle = get_last_phandle();
+ ret = __create_dtb(fdt, len, root, true);
+ if (ret) {
+ set_last_phandle(old_last_phandle);
+ if (ret == -FDT_ERR_NOSPACE)
+ return OPAL_NO_MEM;
+
+ return OPAL_EMPTY;
+ }
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_GET_DEVICE_TREE, opal_get_device_tree, 3);
diff --git a/roms/skiboot/core/flash-firmware-versions.c b/roms/skiboot/core/flash-firmware-versions.c
new file mode 100644
index 000000000..975ac6aff
--- /dev/null
+++ b/roms/skiboot/core/flash-firmware-versions.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Parse VERSION partition, add to device tree
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <device.h>
+#include <opal.h>
+#include <libstb/secureboot.h>
+#include <libstb/trustedboot.h>
+
+/* ibm,firmware-versions support */
+static char *version_buf;
+static size_t version_buf_size = 0x2000;
+
+static void __flash_dt_add_fw_version(struct dt_node *fw_version, char* data)
+{
+ static bool first = true;
+ char *prop;
+ int version_len, i;
+ int len = strlen(data);
+ const char *skiboot_version;
+ const char * version_str[] = {"open-power", "buildroot", "skiboot",
+ "hostboot-binaries", "hostboot", "linux",
+ "petitboot", "occ", "capp-ucode", "sbe",
+ "machine-xml", "hcode"};
+
+ if (first) {
+ first = false;
+
+ /* Increment past "key-" */
+ if (memcmp(data, "open-power", strlen("open-power")) == 0)
+ prop = data + strlen("open-power");
+ else
+ prop = strchr(data, '-');
+ if (!prop) {
+ prlog(PR_DEBUG,
+ "FLASH: Invalid fw version format (%s)\n", data);
+ return;
+ }
+ prop++;
+
+ dt_add_property_string(fw_version, "version", prop);
+ return;
+ }
+
+ /*
+ * PNOR version strings are not easily consumable. Split them into
+ * property, value.
+ *
+ * Example input from PNOR :
+ * "open-power-firestone-v1.8"
+ * "linux-4.4.6-openpower1-8420e0f"
+ *
+ * Desired output in device tree:
+ * open-power = "firestone-v1.8";
+ * linux = "4.4.6-openpower1-8420e0f";
+ */
+ for(i = 0; i < ARRAY_SIZE(version_str); i++)
+ {
+ version_len = strlen(version_str[i]);
+ if (len < version_len)
+ continue;
+
+ if (memcmp(data, version_str[i], version_len) != 0)
+ continue;
+
+ /* Found a match, add property */
+ if (dt_find_property(fw_version, version_str[i]))
+ continue;
+
+ /* Increment past "key-" */
+ prop = data + version_len + 1;
+ dt_add_property_string(fw_version, version_str[i], prop);
+
+ /* Sanity check against what Skiboot thinks its version is. */
+ if (strncmp(version_str[i], "skiboot",
+ strlen("skiboot")) == 0) {
+ /*
+ * If Skiboot was built with Buildroot its version may
+ * include a 'skiboot-' prefix; ignore it.
+ */
+ if (strncmp(version, "skiboot-",
+ strlen("skiboot-")) == 0)
+ skiboot_version = version + strlen("skiboot-");
+ else
+ skiboot_version = version;
+ if (strncmp(prop, skiboot_version,
+ strlen(skiboot_version)) != 0)
+ prlog(PR_WARNING, "WARNING! Skiboot version does not match VERSION partition!\n");
+ }
+ }
+}
+
+void flash_dt_add_fw_version(void)
+{
+ uint8_t version_data[80];
+ int rc;
+ int numbytes = 0, i = 0;
+ struct dt_node *fw_version;
+
+ if (version_buf == NULL)
+ return;
+
+ rc = wait_for_resource_loaded(RESOURCE_ID_VERSION, RESOURCE_SUBID_NONE);
+ if (rc != OPAL_SUCCESS) {
+ prlog(PR_WARNING, "FLASH: Failed to load VERSION data\n");
+ free(version_buf);
+ return;
+ }
+
+ fw_version = dt_new(dt_root, "ibm,firmware-versions");
+ assert(fw_version);
+
+ if (stb_is_container(version_buf, version_buf_size))
+ numbytes += SECURE_BOOT_HEADERS_SIZE;
+ for ( ; (numbytes < version_buf_size) && version_buf[numbytes]; numbytes++) {
+ if (version_buf[numbytes] == '\n') {
+ version_data[i] = '\0';
+ __flash_dt_add_fw_version(fw_version, version_data);
+ memset(version_data, 0, sizeof(version_data));
+ i = 0;
+ continue;
+ } else if (version_buf[numbytes] == '\t') {
+ continue; /* skip tabs */
+ }
+
+ version_data[i++] = version_buf[numbytes];
+ if (i == sizeof(version_data)) {
+ prlog(PR_WARNING, "VERSION item >%lu chars, skipping\n",
+ sizeof(version_data));
+ break;
+ }
+ }
+
+ free(version_buf);
+}
+
+void flash_fw_version_preload(void)
+{
+ int rc;
+
+ if (proc_gen < proc_gen_p9)
+ return;
+
+ prlog(PR_INFO, "FLASH: Loading VERSION section\n");
+
+ version_buf = malloc(version_buf_size);
+ if (!version_buf) {
+ prlog(PR_WARNING, "FLASH: Failed to allocate memory\n");
+ return;
+ }
+
+ rc = start_preload_resource(RESOURCE_ID_VERSION, RESOURCE_SUBID_NONE,
+ version_buf, &version_buf_size);
+ if (rc != OPAL_SUCCESS) {
+ prlog(PR_WARNING,
+ "FLASH: Failed to start loading VERSION data\n");
+ free(version_buf);
+ version_buf = NULL;
+ }
+}
diff --git a/roms/skiboot/core/flash-subpartition.c b/roms/skiboot/core/flash-subpartition.c
new file mode 100644
index 000000000..6e0fec6c3
--- /dev/null
+++ b/roms/skiboot/core/flash-subpartition.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Parse flash sub-partitions
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <opal-api.h>
+
+struct flash_hostboot_toc {
+ be32 ec;
+ be32 offset; /* From start of header. 4K aligned */
+ be32 size;
+};
+#define FLASH_HOSTBOOT_TOC_MAX_ENTRIES ((FLASH_SUBPART_HEADER_SIZE - 8) \
+ /sizeof(struct flash_hostboot_toc))
+
+struct flash_hostboot_header {
+ char eyecatcher[4];
+ be32 version;
+ struct flash_hostboot_toc toc[FLASH_HOSTBOOT_TOC_MAX_ENTRIES];
+};
+
+int flash_subpart_info(void *part_header, uint32_t header_len,
+ uint32_t part_size, uint32_t *part_actualp,
+ uint32_t subid, uint32_t *offset, uint32_t *size)
+{
+ struct flash_hostboot_header *header;
+ char eyecatcher[5];
+ uint32_t i, ec, o, s;
+ uint32_t part_actual;
+ bool subpart_found;
+
+ if (!part_header || ( !offset && !size && !part_actualp)) {
+ prlog(PR_ERR, "FLASH: invalid parameters: ph %p of %p sz %p "
+ "tsz %p\n", part_header, offset, size, part_actualp);
+ return OPAL_PARAMETER;
+ }
+
+ if (header_len < FLASH_SUBPART_HEADER_SIZE) {
+ prlog(PR_ERR, "FLASH: subpartition header too small 0x%x\n",
+ header_len);
+ return OPAL_PARAMETER;
+ }
+
+ header = (struct flash_hostboot_header*) part_header;
+
+ /* Perform sanity */
+ i = be32_to_cpu(header->version);
+ if (i != 1) {
+ prerror("FLASH: flash subpartition TOC version unknown %i\n", i);
+ return OPAL_RESOURCE;
+ }
+
+ /* NULL terminate eyecatcher */
+ strncpy(eyecatcher, header->eyecatcher, 4);
+ eyecatcher[4] = '\0';
+ prlog(PR_DEBUG, "FLASH: flash subpartition eyecatcher %s\n",
+ eyecatcher);
+
+ subpart_found = false;
+ part_actual = 0;
+ for (i = 0; i < FLASH_HOSTBOOT_TOC_MAX_ENTRIES; i++) {
+
+ ec = be32_to_cpu(header->toc[i].ec);
+ o = be32_to_cpu(header->toc[i].offset);
+ s = be32_to_cpu(header->toc[i].size);
+
+ /* Check for null terminating entry */
+ if (!ec && !o && !s)
+ break;
+
+ /* Sanity check the offset and size. */
+ if (o + s > part_size) {
+ prerror("FLASH: flash subpartition too big: %i\n", i);
+ return OPAL_RESOURCE;
+ }
+ if (!s) {
+ prerror("FLASH: flash subpartition zero size: %i\n", i);
+ return OPAL_RESOURCE;
+ }
+ if (o < FLASH_SUBPART_HEADER_SIZE) {
+ prerror("FLASH: flash subpartition offset too small: "
+ "%i\n", i);
+ return OPAL_RESOURCE;
+ }
+ /*
+ * Subpartitions content are different, but multiple toc entries
+ * may point to the same subpartition.
+ */
+ if (ALIGN_UP(o + s, FLASH_SUBPART_HEADER_SIZE) > part_actual)
+ part_actual = ALIGN_UP(o + s, FLASH_SUBPART_HEADER_SIZE);
+
+ if (ec == subid) {
+ if (offset)
+ *offset += o;
+ if (size)
+ *size = s;
+ subpart_found = true;
+ }
+ }
+ if (!subpart_found && (offset || size)) {
+ prerror("FLASH: flash subpartition not found.\n");
+ return OPAL_RESOURCE;
+ }
+ if (part_actualp)
+ *part_actualp = part_actual;
+ return OPAL_SUCCESS;
+}
diff --git a/roms/skiboot/core/flash.c b/roms/skiboot/core/flash.c
new file mode 100644
index 000000000..8c1e788c4
--- /dev/null
+++ b/roms/skiboot/core/flash.c
@@ -0,0 +1,1186 @@
+// SPDX-License-Identifier: Apache-2.0
+/*
+ * Init, manage, read, write, and load resources from flash
+ *
+ * Copyright 2013-2019 IBM Corp.
+ * Copyright 2018-2019 Raptor Engineering, LLC
+ */
+
+#define pr_fmt(fmt) "FLASH: " fmt
+
+#include <skiboot.h>
+#include <cpu.h>
+#include <lock.h>
+#include <opal.h>
+#include <opal-msg.h>
+#include <platform.h>
+#include <device.h>
+#include <libflash/libflash.h>
+#include <libflash/libffs.h>
+#include <libflash/ipmi-hiomap.h>
+#include <libflash/blocklevel.h>
+#include <libflash/ecc.h>
+#include <libstb/secureboot.h>
+#include <libstb/trustedboot.h>
+#include <libxz/xz.h>
+#include <elf.h>
+#include <timebase.h>
+
+struct flash {
+ struct list_node list;
+ bool busy;
+ bool no_erase;
+ struct blocklevel_device *bl;
+ uint64_t size;
+ uint32_t block_size;
+ int id;
+};
+
+static struct {
+ enum resource_id id;
+ uint32_t subid;
+ char name[PART_NAME_MAX+1];
+} part_name_map[] = {
+ { RESOURCE_ID_KERNEL, RESOURCE_SUBID_NONE, "BOOTKERNEL" },
+ { RESOURCE_ID_INITRAMFS,RESOURCE_SUBID_NONE, "ROOTFS" },
+ { RESOURCE_ID_CAPP, RESOURCE_SUBID_SUPPORTED, "CAPP" },
+ { RESOURCE_ID_IMA_CATALOG, RESOURCE_SUBID_SUPPORTED, "IMA_CATALOG" },
+ { RESOURCE_ID_VERSION, RESOURCE_SUBID_NONE, "VERSION" },
+ { RESOURCE_ID_KERNEL_FW, RESOURCE_SUBID_NONE, "BOOTKERNFW" },
+};
+
+static LIST_HEAD(flashes);
+static struct flash *system_flash;
+
+/* Using a single lock as we only have one flash at present. */
+static struct lock flash_lock;
+
+/* nvram-on-flash support */
+static struct flash *nvram_flash;
+static u32 nvram_offset, nvram_size;
+
+/* secboot-on-flash support */
+static struct flash *secboot_flash;
+static u32 secboot_offset, secboot_size;
+
+bool flash_reserve(void)
+{
+ bool rc = false;
+
+ if (!try_lock(&flash_lock))
+ return false;
+
+ if (!system_flash->busy) {
+ system_flash->busy = true;
+ rc = true;
+ }
+ unlock(&flash_lock);
+
+ return rc;
+}
+
+void flash_release(void)
+{
+ lock(&flash_lock);
+ system_flash->busy = false;
+ unlock(&flash_lock);
+}
+
+bool flash_unregister(void)
+{
+ struct blocklevel_device *bl = system_flash->bl;
+
+ if (bl->exit)
+ return bl->exit(bl);
+
+ prlog(PR_NOTICE, "Unregister flash device is not supported\n");
+ return true;
+}
+
+int flash_secboot_info(uint32_t *total_size)
+{
+ int rc;
+
+ lock(&flash_lock);
+ if (!secboot_flash) {
+ rc = OPAL_HARDWARE;
+ } else if (secboot_flash->busy) {
+ rc = OPAL_BUSY;
+ } else {
+ *total_size = secboot_size;
+ rc = OPAL_SUCCESS;
+ }
+ unlock(&flash_lock);
+
+ return rc;
+}
+
+int flash_secboot_read(void *dst, uint32_t src, uint32_t len)
+{
+ int rc;
+
+ if (!try_lock(&flash_lock))
+ return OPAL_BUSY;
+
+ if (!secboot_flash) {
+ rc = OPAL_HARDWARE;
+ goto out;
+ }
+
+ if (secboot_flash->busy) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+
+ if ((src + len) > secboot_size) {
+ prerror("FLASH_SECBOOT: read out of bound (0x%x,0x%x)\n",
+ src, len);
+ rc = OPAL_PARAMETER;
+ goto out;
+ }
+
+ secboot_flash->busy = true;
+ unlock(&flash_lock);
+
+ rc = blocklevel_read(secboot_flash->bl, secboot_offset + src, dst, len);
+
+ lock(&flash_lock);
+ secboot_flash->busy = false;
+out:
+ unlock(&flash_lock);
+ return rc;
+}
+
+int flash_secboot_write(uint32_t dst, void *src, uint32_t len)
+{
+ int rc;
+
+ if (!try_lock(&flash_lock))
+ return OPAL_BUSY;
+
+ if (secboot_flash->busy) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+
+ if ((dst + len) > secboot_size) {
+ prerror("FLASH_SECBOOT: write out of bound (0x%x,0x%x)\n",
+ dst, len);
+ rc = OPAL_PARAMETER;
+ goto out;
+ }
+
+ secboot_flash->busy = true;
+ unlock(&flash_lock);
+
+ rc = blocklevel_write(secboot_flash->bl, secboot_offset + dst, src, len);
+
+ lock(&flash_lock);
+ secboot_flash->busy = false;
+out:
+ unlock(&flash_lock);
+ return rc;
+}
+
+static int flash_nvram_info(uint32_t *total_size)
+{
+ int rc;
+
+ lock(&flash_lock);
+ if (!nvram_flash) {
+ rc = OPAL_HARDWARE;
+ } else if (nvram_flash->busy) {
+ rc = OPAL_BUSY;
+ } else {
+ *total_size = nvram_size;
+ rc = OPAL_SUCCESS;
+ }
+ unlock(&flash_lock);
+
+ return rc;
+}
+
+static int flash_nvram_start_read(void *dst, uint32_t src, uint32_t len)
+{
+ int rc;
+
+ if (!try_lock(&flash_lock))
+ return OPAL_BUSY;
+
+ if (!nvram_flash) {
+ rc = OPAL_HARDWARE;
+ goto out;
+ }
+
+ if (nvram_flash->busy) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+
+ if ((src + len) > nvram_size) {
+ prerror("NVRAM: read out of bound (0x%x,0x%x)\n",
+ src, len);
+ rc = OPAL_PARAMETER;
+ goto out;
+ }
+
+ nvram_flash->busy = true;
+ unlock(&flash_lock);
+
+ rc = blocklevel_read(nvram_flash->bl, nvram_offset + src, dst, len);
+
+ lock(&flash_lock);
+ nvram_flash->busy = false;
+out:
+ unlock(&flash_lock);
+ if (!rc)
+ nvram_read_complete(true);
+ return rc;
+}
+
+static int flash_nvram_write(uint32_t dst, void *src, uint32_t len)
+{
+ int rc;
+
+ if (!try_lock(&flash_lock))
+ return OPAL_BUSY;
+
+ if (nvram_flash->busy) {
+ rc = OPAL_BUSY;
+ goto out;
+ }
+
+ /* TODO: When we have async jobs for PRD, turn this into one */
+
+ if ((dst + len) > nvram_size) {
+ prerror("NVRAM: write out of bound (0x%x,0x%x)\n",
+ dst, len);
+ rc = OPAL_PARAMETER;
+ goto out;
+ }
+
+ nvram_flash->busy = true;
+ unlock(&flash_lock);
+
+ rc = blocklevel_write(nvram_flash->bl, nvram_offset + dst, src, len);
+
+ lock(&flash_lock);
+ nvram_flash->busy = false;
+out:
+ unlock(&flash_lock);
+ return rc;
+}
+
+
+static int flash_secboot_probe(struct flash *flash, struct ffs_handle *ffs)
+{
+ uint32_t start, size, part;
+ bool ecc;
+ int rc;
+
+ prlog(PR_DEBUG, "FLASH: probing for SECBOOT\n");
+
+ rc = ffs_lookup_part(ffs, "SECBOOT", &part);
+ if (rc) {
+ prlog(PR_WARNING, "FLASH: no SECBOOT partition found\n");
+ return OPAL_HARDWARE;
+ }
+
+ rc = ffs_part_info(ffs, part, NULL,
+ &start, &size, NULL, &ecc);
+ if (rc) {
+ /**
+ * @fwts-label SECBOOTNoPartition
+ * @fwts-advice OPAL could not find an SECBOOT partition
+ * on the system flash. Check that the system flash
+ * has a valid partition table, and that the firmware
+ * build process has added a SECBOOT partition.
+ */
+ prlog(PR_ERR, "FLASH: Can't parse ffs info for SECBOOT\n");
+ return OPAL_HARDWARE;
+ }
+
+ secboot_flash = flash;
+ secboot_offset = start;
+ secboot_size = ecc ? ecc_buffer_size_minus_ecc(size) : size;
+
+ return 0;
+}
+
+static int flash_nvram_probe(struct flash *flash, struct ffs_handle *ffs)
+{
+ uint32_t start, size, part;
+ bool ecc;
+ int rc;
+
+ prlog(PR_INFO, "probing for NVRAM\n");
+
+ rc = ffs_lookup_part(ffs, "NVRAM", &part);
+ if (rc) {
+ prlog(PR_WARNING, "no NVRAM partition found\n");
+ return OPAL_HARDWARE;
+ }
+
+ rc = ffs_part_info(ffs, part, NULL,
+ &start, &size, NULL, &ecc);
+ if (rc) {
+ /**
+ * @fwts-label NVRAMNoPartition
+ * @fwts-advice OPAL could not find an NVRAM partition
+ * on the system flash. Check that the system flash
+ * has a valid partition table, and that the firmware
+ * build process has added a NVRAM partition.
+ */
+ prlog(PR_ERR, "Can't parse ffs info for NVRAM\n");
+ return OPAL_HARDWARE;
+ }
+
+ nvram_flash = flash;
+ nvram_offset = start;
+ nvram_size = ecc ? ecc_buffer_size_minus_ecc(size) : size;
+
+ platform.nvram_info = flash_nvram_info;
+ platform.nvram_start_read = flash_nvram_start_read;
+ platform.nvram_write = flash_nvram_write;
+
+ return 0;
+}
+
+/* core flash support */
+
+static struct dt_node *flash_add_dt_node(struct flash *flash, int id)
+{
+ int i;
+ int rc;
+ const char *name;
+ bool ecc;
+ struct ffs_handle *ffs;
+ int ffs_part_num, ffs_part_start, ffs_part_size;
+ struct dt_node *flash_node;
+ struct dt_node *partition_container_node;
+ struct dt_node *partition_node;
+
+ flash_node = dt_new_addr(opal_node, "flash", id);
+ dt_add_property_strings(flash_node, "compatible", "ibm,opal-flash");
+ dt_add_property_cells(flash_node, "ibm,opal-id", id);
+ dt_add_property_u64(flash_node, "reg", flash->size);
+ dt_add_property_cells(flash_node, "ibm,flash-block-size",
+ flash->block_size);
+ if (flash->no_erase)
+ dt_add_property(flash_node, "no-erase", NULL, 0);
+
+ /* we fix to 32-bits */
+ dt_add_property_cells(flash_node, "#address-cells", 1);
+ dt_add_property_cells(flash_node, "#size-cells", 1);
+
+ /* Add partition container node */
+ partition_container_node = dt_new(flash_node, "partitions");
+ dt_add_property_strings(partition_container_node, "compatible", "fixed-partitions");
+
+ /* we fix to 32-bits */
+ dt_add_property_cells(partition_container_node, "#address-cells", 1);
+ dt_add_property_cells(partition_container_node, "#size-cells", 1);
+
+ /* Add partitions */
+ for (i = 0, name = NULL; i < ARRAY_SIZE(part_name_map); i++) {
+ name = part_name_map[i].name;
+
+ rc = ffs_init(0, flash->size, flash->bl, &ffs, 1);
+ if (rc) {
+ prerror("Can't open ffs handle\n");
+ continue;
+ }
+
+ rc = ffs_lookup_part(ffs, name, &ffs_part_num);
+ if (rc) {
+ /* This is not an error per-se, some partitions
+ * are purposefully absent, don't spam the logs
+ */
+ prlog(PR_DEBUG, "No %s partition\n", name);
+ continue;
+ }
+ rc = ffs_part_info(ffs, ffs_part_num, NULL,
+ &ffs_part_start, NULL, &ffs_part_size, &ecc);
+ if (rc) {
+ prerror("Failed to get %s partition info\n", name);
+ continue;
+ }
+
+ partition_node = dt_new_addr(partition_container_node, "partition", ffs_part_start);
+ dt_add_property_strings(partition_node, "label", name);
+ dt_add_property_cells(partition_node, "reg", ffs_part_start, ffs_part_size);
+ if (part_name_map[i].id != RESOURCE_ID_KERNEL_FW) {
+ /* Mark all partitions other than the full PNOR and the boot kernel
+ * firmware as read only. These two partitions are the only partitions
+ * that are properly erase block aligned at this time.
+ */
+ dt_add_property(partition_node, "read-only", NULL, 0);
+ }
+ }
+
+ partition_node = dt_new_addr(partition_container_node, "partition", 0);
+ dt_add_property_strings(partition_node, "label", "PNOR");
+ dt_add_property_cells(partition_node, "reg", 0, flash->size);
+
+ return flash_node;
+}
+
+static void setup_system_flash(struct flash *flash, struct dt_node *node,
+ const char *name, struct ffs_handle *ffs)
+{
+ char *path;
+
+ if (!ffs)
+ return;
+
+ if (system_flash) {
+ /**
+ * @fwts-label SystemFlashMultiple
+ * @fwts-advice OPAL Found multiple system flash.
+ * Since we've already found a system flash we are
+ * going to use that one but this ordering is not
+ * guaranteed so may change in future.
+ */
+ prlog(PR_WARNING, "Attempted to register multiple system "
+ "flash: %s\n", name);
+ return;
+ }
+
+ prlog(PR_NOTICE, "Found system flash: %s id:%i\n",
+ name, flash->id);
+
+ system_flash = flash;
+ path = dt_get_path(node);
+ dt_add_property_string(dt_chosen, "ibm,system-flash", path);
+ free(path);
+
+ prlog(PR_INFO, "registered system flash device %s\n", name);
+
+ flash_nvram_probe(flash, ffs);
+ flash_secboot_probe(flash, ffs);
+}
+
+static int num_flashes(void)
+{
+ struct flash *flash;
+ int i = 0;
+
+ list_for_each(&flashes, flash, list)
+ i++;
+
+ return i;
+}
+
+int flash_register(struct blocklevel_device *bl)
+{
+ uint64_t size;
+ uint32_t block_size;
+ struct ffs_handle *ffs;
+ struct dt_node *node;
+ struct flash *flash;
+ const char *name;
+ int rc;
+
+ rc = blocklevel_get_info(bl, &name, &size, &block_size);
+ if (rc)
+ return rc;
+
+ if (!name)
+ name = "(unnamed)";
+
+ prlog(PR_INFO, "registering flash device %s "
+ "(size 0x%llx, blocksize 0x%x)\n",
+ name, size, block_size);
+
+ flash = malloc(sizeof(struct flash));
+ if (!flash) {
+ prlog(PR_ERR, "Error allocating flash structure\n");
+ return OPAL_RESOURCE;
+ }
+
+ flash->busy = false;
+ flash->bl = bl;
+ flash->no_erase = !(bl->flags & WRITE_NEED_ERASE);
+ flash->size = size;
+ flash->block_size = block_size;
+ flash->id = num_flashes();
+
+ rc = ffs_init(0, flash->size, bl, &ffs, 1);
+ if (rc) {
+ /**
+ * @fwts-label NoFFS
+ * @fwts-advice System flash isn't formatted as expected.
+ * This could mean several OPAL utilities do not function
+ * as expected. e.g. gard, pflash.
+ */
+ prlog(PR_WARNING, "No ffs info; "
+ "using raw device only\n");
+ ffs = NULL;
+ }
+
+ node = flash_add_dt_node(flash, flash->id);
+
+ setup_system_flash(flash, node, name, ffs);
+
+ if (ffs)
+ ffs_close(ffs);
+
+ lock(&flash_lock);
+ list_add(&flashes, &flash->list);
+ unlock(&flash_lock);
+
+ return OPAL_SUCCESS;
+}
+
+enum flash_op {
+ FLASH_OP_READ,
+ FLASH_OP_WRITE,
+ FLASH_OP_ERASE,
+};
+
+static int64_t opal_flash_op(enum flash_op op, uint64_t id, uint64_t offset,
+ uint64_t buf, uint64_t size, uint64_t token)
+{
+ struct flash *flash = NULL;
+ int rc;
+
+ if (!try_lock(&flash_lock))
+ return OPAL_BUSY;
+
+ list_for_each(&flashes, flash, list)
+ if (flash->id == id)
+ break;
+
+ if (flash->id != id) {
+ /* Couldn't find the flash */
+ rc = OPAL_PARAMETER;
+ goto err;
+ }
+
+ if (flash->busy) {
+ rc = OPAL_BUSY;
+ goto err;
+ }
+
+ if (size >= flash->size || offset >= flash->size
+ || offset + size > flash->size) {
+ rc = OPAL_PARAMETER;
+ goto err;
+ }
+
+ /*
+ * These ops intentionally have no smarts (ecc correction or erase
+ * before write) to them.
+ * Skiboot is simply exposing the PNOR flash to the host.
+ * The host is expected to understand that this is a raw flash
+ * device and treat it as such.
+ */
+ switch (op) {
+ case FLASH_OP_READ:
+ rc = blocklevel_raw_read(flash->bl, offset, (void *)buf, size);
+ break;
+ case FLASH_OP_WRITE:
+ rc = blocklevel_raw_write(flash->bl, offset, (void *)buf, size);
+ break;
+ case FLASH_OP_ERASE:
+ rc = blocklevel_erase(flash->bl, offset, size);
+ break;
+ default:
+ assert(0);
+ }
+
+ if (rc) {
+ rc = OPAL_HARDWARE;
+ goto err;
+ }
+
+ unlock(&flash_lock);
+
+ opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+ cpu_to_be64(token),
+ cpu_to_be64(rc));
+
+ return OPAL_ASYNC_COMPLETION;
+
+err:
+ unlock(&flash_lock);
+ return rc;
+}
+
+static int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf,
+ uint64_t size, uint64_t token)
+{
+ if (!opal_addr_valid((void *)buf))
+ return OPAL_PARAMETER;
+
+ return opal_flash_op(FLASH_OP_READ, id, offset, buf, size, token);
+}
+
+static int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf,
+ uint64_t size, uint64_t token)
+{
+ if (!opal_addr_valid((void *)buf))
+ return OPAL_PARAMETER;
+
+ return opal_flash_op(FLASH_OP_WRITE, id, offset, buf, size, token);
+}
+
+static int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size,
+ uint64_t token)
+{
+ return opal_flash_op(FLASH_OP_ERASE, id, offset, 0L, size, token);
+}
+
+opal_call(OPAL_FLASH_READ, opal_flash_read, 5);
+opal_call(OPAL_FLASH_WRITE, opal_flash_write, 5);
+opal_call(OPAL_FLASH_ERASE, opal_flash_erase, 4);
+
+/* flash resource API */
+const char *flash_map_resource_name(enum resource_id id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(part_name_map); i++) {
+ if (part_name_map[i].id == id)
+ return part_name_map[i].name;
+ }
+ return NULL;
+}
+
+static size_t sizeof_elf_from_hdr(void *buf)
+{
+ struct elf_hdr *elf = (struct elf_hdr *)buf;
+ size_t sz = 0;
+
+ BUILD_ASSERT(SECURE_BOOT_HEADERS_SIZE > sizeof(struct elf_hdr));
+ BUILD_ASSERT(SECURE_BOOT_HEADERS_SIZE > sizeof(struct elf64be_hdr));
+ BUILD_ASSERT(SECURE_BOOT_HEADERS_SIZE > sizeof(struct elf32be_hdr));
+
+ if (elf->ei_ident == ELF_IDENT) {
+ if (elf->ei_class == ELF_CLASS_64) {
+ if (elf->ei_data == ELF_DATA_LSB) {
+ struct elf64le_hdr *kh = (struct elf64le_hdr *)buf;
+ sz = le64_to_cpu(kh->e_shoff) +
+ ((uint32_t)le16_to_cpu(kh->e_shentsize) *
+ (uint32_t)le16_to_cpu(kh->e_shnum));
+ } else {
+ struct elf64be_hdr *kh = (struct elf64be_hdr *)buf;
+ sz = be64_to_cpu(kh->e_shoff) +
+ ((uint32_t)be16_to_cpu(kh->e_shentsize) *
+ (uint32_t)be16_to_cpu(kh->e_shnum));
+ }
+ } else if (elf->ei_class == ELF_CLASS_32) {
+ if (elf->ei_data == ELF_DATA_LSB) {
+ struct elf32le_hdr *kh = (struct elf32le_hdr *)buf;
+ sz = le32_to_cpu(kh->e_shoff) +
+ (le16_to_cpu(kh->e_shentsize) *
+ le16_to_cpu(kh->e_shnum));
+ } else {
+ struct elf32be_hdr *kh = (struct elf32be_hdr *)buf;
+ sz = be32_to_cpu(kh->e_shoff) +
+ (be16_to_cpu(kh->e_shentsize) *
+ be16_to_cpu(kh->e_shnum));
+ }
+ }
+ }
+
+ return sz;
+}
+
+/*
+ * load a resource from FLASH
+ * buf and len shouldn't account for ECC even if partition is ECCed.
+ *
+ * The API here is a bit strange.
+ * If resource has a STB container, buf will contain it
+ * If loading subpartition with STB container, buff will *NOT* contain it
+ * For trusted boot, the whole partition containing the subpart is measured.
+ *
+ * Additionally, the logic to work out how much to read from flash is insane.
+ */
+static int flash_load_resource(enum resource_id id, uint32_t subid,
+ void *buf, size_t *len)
+{
+ int i;
+ int rc = OPAL_RESOURCE;
+ struct ffs_handle *ffs;
+ struct flash *flash;
+ const char *name;
+ bool status = false;
+ bool ecc;
+ bool part_signed = false;
+ void *bufp = buf;
+ size_t bufsz = *len;
+ int ffs_part_num, ffs_part_start, ffs_part_size;
+ int content_size = 0;
+ int offset = 0;
+
+ lock(&flash_lock);
+
+ if (!system_flash) {
+ /**
+ * @fwts-label SystemFlashNotFound
+ * @fwts-advice No system flash was found. Check for missing
+ * calls flash_register(...).
+ */
+ prlog(PR_WARNING, "Can't load resource id:%i. "
+ "No system flash found\n", id);
+ goto out_unlock;
+ }
+
+ flash = system_flash;
+
+ if (flash->busy)
+ goto out_unlock;
+
+ for (i = 0, name = NULL; i < ARRAY_SIZE(part_name_map); i++) {
+ if (part_name_map[i].id == id) {
+ name = part_name_map[i].name;
+ break;
+ }
+ }
+ if (!name) {
+ prerror("Couldn't find partition for id %d\n", id);
+ goto out_unlock;
+ }
+ /*
+ * If partition doesn't have a subindex but the caller specifies one,
+ * we fail. eg. kernel partition doesn't have a subindex
+ */
+ if ((part_name_map[i].subid == RESOURCE_SUBID_NONE) &&
+ (subid != RESOURCE_SUBID_NONE)) {
+ prerror("PLAT: Partition %s doesn't have subindex\n", name);
+ goto out_unlock;
+ }
+
+ rc = ffs_init(0, flash->size, flash->bl, &ffs, 1);
+ if (rc) {
+ prerror("Can't open ffs handle: %d\n", rc);
+ goto out_unlock;
+ }
+
+ rc = ffs_lookup_part(ffs, name, &ffs_part_num);
+ if (rc) {
+ /* This is not an error per-se, some partitions
+ * are purposefully absent, don't spam the logs
+ */
+ prlog(PR_DEBUG, "No %s partition\n", name);
+ goto out_free_ffs;
+ }
+ rc = ffs_part_info(ffs, ffs_part_num, NULL,
+ &ffs_part_start, NULL, &ffs_part_size, &ecc);
+ if (rc) {
+ prerror("Failed to get %s partition info\n", name);
+ goto out_free_ffs;
+ }
+ prlog(PR_DEBUG,"%s partition %s ECC\n",
+ name, ecc ? "has" : "doesn't have");
+
+ /*
+ * FIXME: Make the fact we don't support partitions smaller than 4K
+ * more explicit.
+ */
+ if (ffs_part_size < SECURE_BOOT_HEADERS_SIZE) {
+ prerror("secboot headers bigger than "
+ "partition size 0x%x\n", ffs_part_size);
+ goto out_free_ffs;
+ }
+
+ rc = blocklevel_read(flash->bl, ffs_part_start, bufp,
+ SECURE_BOOT_HEADERS_SIZE);
+ if (rc) {
+ prerror("failed to read the first 0x%x from "
+ "%s partition, rc %d\n", SECURE_BOOT_HEADERS_SIZE,
+ name, rc);
+ goto out_free_ffs;
+ }
+
+ part_signed = stb_is_container(bufp, SECURE_BOOT_HEADERS_SIZE);
+
+ prlog(PR_DEBUG, "%s partition %s signed\n", name,
+ part_signed ? "is" : "isn't");
+
+ /*
+ * part_start/size are raw pointers into the partition.
+ * ie. they will account for ECC if included.
+ */
+
+ if (part_signed) {
+ bufp += SECURE_BOOT_HEADERS_SIZE;
+ bufsz -= SECURE_BOOT_HEADERS_SIZE;
+ content_size = stb_sw_payload_size(buf, SECURE_BOOT_HEADERS_SIZE);
+ *len = content_size + SECURE_BOOT_HEADERS_SIZE;
+
+ if (content_size > bufsz) {
+ prerror("content size > buffer size\n");
+ rc = OPAL_PARAMETER;
+ goto out_free_ffs;
+ }
+
+ if (*len > ffs_part_size) {
+ prerror("FLASH: Cannot load %s. Content is larger than the partition\n",
+ name);
+ rc = OPAL_PARAMETER;
+ goto out_free_ffs;
+ }
+
+ ffs_part_start += SECURE_BOOT_HEADERS_SIZE;
+
+ rc = blocklevel_read(flash->bl, ffs_part_start, bufp,
+ content_size);
+ if (rc) {
+ prerror("failed to read content size %d"
+ " %s partition, rc %d\n",
+ content_size, name, rc);
+ goto out_free_ffs;
+ }
+
+ if (subid == RESOURCE_SUBID_NONE)
+ goto done_reading;
+
+ rc = flash_subpart_info(bufp, content_size, ffs_part_size,
+ NULL, subid, &offset, &content_size);
+ if (rc) {
+ prerror("Failed to parse subpart info for %s\n",
+ name);
+ goto out_free_ffs;
+ }
+ bufp += offset;
+ goto done_reading;
+ } else /* stb_signed */ {
+ /*
+ * Back to the old way of doing things, no STB header.
+ */
+ if (subid == RESOURCE_SUBID_NONE) {
+ if (id == RESOURCE_ID_KERNEL ||
+ id == RESOURCE_ID_INITRAMFS) {
+ /*
+ * Because actualSize is a lie, we compute the
+ * size of the BOOTKERNEL based on what the ELF
+ * headers say. Otherwise we end up reading more
+ * than we should
+ */
+ content_size = sizeof_elf_from_hdr(buf);
+ if (!content_size) {
+ prerror("Invalid ELF header part"
+ " %s\n", name);
+ rc = OPAL_RESOURCE;
+ goto out_free_ffs;
+ }
+ } else {
+ content_size = ffs_part_size;
+ }
+ if (content_size > bufsz) {
+ prerror("%s content size %d > "
+ " buffer size %lu\n", name,
+ content_size, bufsz);
+ rc = OPAL_PARAMETER;
+ goto out_free_ffs;
+ }
+ prlog(PR_DEBUG, "computed %s size %u\n",
+ name, content_size);
+ rc = blocklevel_read(flash->bl, ffs_part_start,
+ buf, content_size);
+ if (rc) {
+ prerror("failed to read content size %d"
+ " %s partition, rc %d\n",
+ content_size, name, rc);
+ goto out_free_ffs;
+ }
+ *len = content_size;
+ goto done_reading;
+ }
+ BUILD_ASSERT(FLASH_SUBPART_HEADER_SIZE <= SECURE_BOOT_HEADERS_SIZE);
+ rc = flash_subpart_info(bufp, SECURE_BOOT_HEADERS_SIZE,
+ ffs_part_size, &ffs_part_size, subid,
+ &offset, &content_size);
+ if (rc) {
+ prerror("FAILED reading subpart info. rc=%d\n",
+ rc);
+ goto out_free_ffs;
+ }
+
+ *len = ffs_part_size;
+ prlog(PR_DEBUG, "Computed %s partition size: %u "
+ "(subpart %u size %u offset %u)\n", name, ffs_part_size,
+ subid, content_size, offset);
+ /*
+ * For a sub partition, we read the whole (computed)
+ * partition, and then measure that.
+ * Afterwards, we memmove() things back into place for
+ * the caller.
+ */
+ rc = blocklevel_read(flash->bl, ffs_part_start,
+ buf, ffs_part_size);
+
+ bufp += offset;
+ }
+
+done_reading:
+ /*
+ * Verify and measure the retrieved PNOR partition as part of the
+ * secure boot and trusted boot requirements
+ */
+ secureboot_verify(id, buf, *len);
+ trustedboot_measure(id, buf, *len);
+
+ /* Find subpartition */
+ if (subid != RESOURCE_SUBID_NONE) {
+ memmove(buf, bufp, content_size);
+ *len = content_size;
+ }
+
+ status = true;
+
+out_free_ffs:
+ ffs_close(ffs);
+out_unlock:
+ unlock(&flash_lock);
+ return status ? OPAL_SUCCESS : rc;
+}
+
+
+struct flash_load_resource_item {
+ enum resource_id id;
+ uint32_t subid;
+ int result;
+ void *buf;
+ size_t *len;
+ struct list_node link;
+};
+
+static LIST_HEAD(flash_load_resource_queue);
+static LIST_HEAD(flash_loaded_resources);
+static struct lock flash_load_resource_lock = LOCK_UNLOCKED;
+static struct cpu_job *flash_load_job = NULL;
+
+int flash_resource_loaded(enum resource_id id, uint32_t subid)
+{
+ struct flash_load_resource_item *resource = NULL;
+ struct flash_load_resource_item *r;
+ int rc = OPAL_BUSY;
+
+ lock(&flash_load_resource_lock);
+ list_for_each(&flash_loaded_resources, r, link) {
+ if (r->id == id && r->subid == subid) {
+ resource = r;
+ break;
+ }
+ }
+
+ if (resource) {
+ rc = resource->result;
+ list_del(&resource->link);
+ free(resource);
+ }
+
+ if (list_empty(&flash_load_resource_queue) && flash_load_job) {
+ cpu_wait_job(flash_load_job, true);
+ flash_load_job = NULL;
+ }
+
+ unlock(&flash_load_resource_lock);
+
+ return rc;
+}
+
+/*
+ * Retry for 10 minutes in 5 second intervals: allow 5 minutes for a BMC reboot
+ * (need the BMC if we're using HIOMAP flash access), then 2x for some margin.
+ */
+#define FLASH_LOAD_WAIT_MS 5000
+#define FLASH_LOAD_RETRIES (2 * 5 * (60 / (FLASH_LOAD_WAIT_MS / 1000)))
+
+static void flash_load_resources(void *data __unused)
+{
+ struct flash_load_resource_item *r;
+ int retries = FLASH_LOAD_RETRIES;
+ int result = OPAL_RESOURCE;
+
+ lock(&flash_load_resource_lock);
+ do {
+ if (list_empty(&flash_load_resource_queue)) {
+ break;
+ }
+ r = list_top(&flash_load_resource_queue,
+ struct flash_load_resource_item, link);
+ if (r->result != OPAL_EMPTY)
+ prerror("flash_load_resources() list_top unexpected "
+ " result %d\n", r->result);
+ r->result = OPAL_BUSY;
+ unlock(&flash_load_resource_lock);
+
+ while (retries) {
+ result = flash_load_resource(r->id, r->subid, r->buf,
+ r->len);
+ if (result == OPAL_SUCCESS) {
+ retries = FLASH_LOAD_RETRIES;
+ break;
+ }
+
+ if (result != FLASH_ERR_AGAIN &&
+ result != FLASH_ERR_DEVICE_GONE)
+ break;
+
+ time_wait_ms(FLASH_LOAD_WAIT_MS);
+
+ retries--;
+
+ prlog(PR_WARNING,
+ "Retrying load of %d:%d, %d attempts remain\n",
+ r->id, r->subid, retries);
+ }
+
+ lock(&flash_load_resource_lock);
+ r = list_pop(&flash_load_resource_queue,
+ struct flash_load_resource_item, link);
+ /* Will reuse the result from when we hit retries == 0 */
+ r->result = result;
+ list_add_tail(&flash_loaded_resources, &r->link);
+ } while(true);
+ unlock(&flash_load_resource_lock);
+}
+
+static void start_flash_load_resource_job(void)
+{
+ if (flash_load_job)
+ cpu_wait_job(flash_load_job, true);
+
+ flash_load_job = cpu_queue_job(NULL, "flash_load_resources",
+ flash_load_resources, NULL);
+
+ cpu_process_local_jobs();
+}
+
+int flash_start_preload_resource(enum resource_id id, uint32_t subid,
+ void *buf, size_t *len)
+{
+ struct flash_load_resource_item *r;
+ bool start_thread = false;
+
+ r = malloc(sizeof(struct flash_load_resource_item));
+
+ assert(r != NULL);
+ r->id = id;
+ r->subid = subid;
+ r->buf = buf;
+ r->len = len;
+ r->result = OPAL_EMPTY;
+
+ prlog(PR_DEBUG, "Queueing preload of %x/%x\n",
+ r->id, r->subid);
+
+ lock(&flash_load_resource_lock);
+ if (list_empty(&flash_load_resource_queue)) {
+ start_thread = true;
+ }
+ list_add_tail(&flash_load_resource_queue, &r->link);
+ unlock(&flash_load_resource_lock);
+
+ if (start_thread)
+ start_flash_load_resource_job();
+
+ return OPAL_SUCCESS;
+}
+
+/*
+ * The `libxz` decompression routines are blocking; the new decompression
+ * routines, wrapper around `libxz` functions, provide support for asynchronous
+ * decompression. There are two routines, which start the decompression, and one
+ * which waits for the decompression to complete.
+ *
+ * The decompressed image will be present in the `dst` parameter of
+ * `xz_decompress` structure.
+ *
+ * When the decompression is successful, the xz_decompress->status will be
+ * `OPAL_SUCCESS` else OPAL_PARAMETER, see definition of xz_decompress structure
+ * for details.
+ */
+static void xz_decompress(void *data)
+{
+ struct xz_decompress *xz = (struct xz_decompress *)data;
+ struct xz_dec *s;
+ struct xz_buf b;
+
+ /* Initialize the xz library first */
+ xz_crc32_init();
+ s = xz_dec_init(XZ_SINGLE, 0);
+ if (s == NULL) {
+ prerror("initialization error for xz\n");
+ xz->status = OPAL_NO_MEM;
+ return;
+ }
+
+ xz->xz_error = XZ_DATA_ERROR;
+ xz->status = OPAL_PARTIAL;
+
+ b.in = xz->src;
+ b.in_pos = 0;
+ b.in_size = xz->src_size;
+ b.out = xz->dst;
+ b.out_pos = 0;
+ b.out_size = xz->dst_size;
+
+ /* Start decompressing */
+ xz->xz_error = xz_dec_run(s, &b);
+ if (xz->xz_error != XZ_STREAM_END) {
+ prerror("failed to decompress subpartition\n");
+ xz->status = OPAL_PARAMETER;
+ } else
+ xz->status = OPAL_SUCCESS;
+
+ xz_dec_end(s);
+}
+
+/*
+ * xz_start_decompress: start the decompression job and return.
+ *
+ * struct xz_decompress *xz, should be populated by the caller with
+ * - the starting address of the compressed binary
+ * - the address where the decompressed image should be placed
+ * - the sizes of the source and the destination
+ *
+ * xz->src: Source address (The compressed binary)
+ * xz->src_size: Source size
+ * xz->dst: Destination address (The memory area where the `src` will be
+ * decompressed)
+ * xz->dst_size: Destination size
+ *
+ * The `status` value will be OPAL_PARTIAL till the job completes (successfully
+ * or not)
+ */
+void xz_start_decompress(struct xz_decompress *xz)
+{
+ struct cpu_job *job;
+
+ if (!xz)
+ return;
+
+ if (!xz->dst || !xz->dst_size || !xz->src || !xz->src_size) {
+ xz->status = OPAL_PARAMETER;
+ return;
+ }
+
+ job = cpu_queue_job(NULL, "xz_decompress", xz_decompress,
+ (void *) xz);
+ if (!job) {
+ xz->status = OPAL_NO_MEM;
+ return;
+ }
+
+ xz->job = job;
+}
+
+/*
+ * This function waits for the decompression job to complete. The `ret`
+ * structure member in `xz_decompress` will have the status code.
+ *
+ * status == OPAL_SUCCESS on success, else the corresponding error code.
+ */
+void wait_xz_decompress(struct xz_decompress *xz)
+{
+ if (!xz)
+ return;
+
+ cpu_wait_job(xz->job, true);
+}
diff --git a/roms/skiboot/core/gcov-profiling.c b/roms/skiboot/core/gcov-profiling.c
new file mode 100644
index 000000000..fdad51ed9
--- /dev/null
+++ b/roms/skiboot/core/gcov-profiling.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * libgcov skeleton reimplementation to build skiboot with gcov support
+ *
+ * Copyright 2015-2018 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <compiler.h>
+#include <stdio.h>
+
+typedef long gcov_type;
+
+/*
+ * This is GCC internal data structure. See GCC libgcc/libgcov.h for
+ * details.
+ *
+ * If gcc changes this, we have to change it.
+ */
+
+typedef unsigned int gcov_unsigned_int;
+
+#if __GNUC__ == 4 && __GNUC_MINOR__ >= 9
+#define GCOV_COUNTERS 9
+#else
+#define GCOV_COUNTERS 8
+#endif
+
+struct gcov_info
+{
+ gcov_unsigned_int version;
+ struct gcov_info *next;
+ gcov_unsigned_int stamp;
+ const char *filename;
+ void (*merge[GCOV_COUNTERS])(gcov_type *, unsigned int);
+ unsigned int n_functions;
+ struct gcov_fn_info **functions;
+};
+
+/* We have a list of all gcov info set up at startup */
+struct gcov_info *gcov_info_list;
+
+void __gcov_init(struct gcov_info* f);
+void skiboot_gcov_done(void);
+void __gcov_flush(void);
+void __gcov_merge_add(gcov_type *counters, unsigned int n_counters);
+void __gcov_merge_single(gcov_type *counters, unsigned int n_counters);
+void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters);
+void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters);
+void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters);
+void __gcov_exit(void);
+
+void __gcov_init(struct gcov_info* f)
+{
+ static gcov_unsigned_int version = 0;
+
+ if (version == 0) {
+ printf("GCOV version: %u\n", f->version);
+ version = f->version;
+ }
+
+ if (gcov_info_list)
+ f->next = gcov_info_list;
+
+ gcov_info_list = f;
+ return;
+}
+
+void skiboot_gcov_done(void)
+{
+ struct gcov_info *i = gcov_info_list;
+
+ if (i->filename)
+ printf("GCOV: gcov_info_list looks sane (first file: %s)\n",
+ i->filename);
+ else
+ prlog(PR_WARNING, "GCOV: gcov_info_list doesn't look sane. "
+ "i->filename == NULL.");
+
+ printf("GCOV: gcov_info_list at 0x%p\n", gcov_info_list);
+}
+
+void __gcov_merge_add(gcov_type *counters, unsigned int n_counters)
+{
+ (void)counters;
+ (void)n_counters;
+
+ return;
+}
+
+void __gcov_flush(void)
+{
+ return;
+}
+
+void __gcov_merge_single(gcov_type *counters, unsigned int n_counters)
+{
+ (void)counters;
+ (void)n_counters;
+
+ return;
+}
+
+void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters)
+{
+ (void)counters;
+ (void)n_counters;
+
+ return;
+}
+
+void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters)
+{
+ (void)counters;
+ (void)n_counters;
+ return;
+}
+
+void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters)
+{
+ (void)counters;
+ (void)n_counters;
+}
+
+void __gcov_exit(void)
+{
+}
diff --git a/roms/skiboot/core/hmi.c b/roms/skiboot/core/hmi.c
new file mode 100644
index 000000000..9363cc5fb
--- /dev/null
+++ b/roms/skiboot/core/hmi.c
@@ -0,0 +1,1558 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Deal with Hypervisor Maintenance Interrupts
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "HMI: " fmt
+
+#include <skiboot.h>
+#include <opal.h>
+#include <opal-msg.h>
+#include <processor.h>
+#include <chiptod.h>
+#include <xscom.h>
+#include <xscom-p8-regs.h>
+#include <xscom-p9-regs.h>
+#include <xscom-p10-regs.h>
+#include <pci.h>
+#include <cpu.h>
+#include <chip.h>
+#include <npu-regs.h>
+#include <npu2-regs.h>
+#include <npu2.h>
+#include <npu.h>
+#include <capp.h>
+#include <nvram.h>
+#include <cpu.h>
+
+/*
+ * P9 HMER register layout:
+ * +===+==========+============================+========+===================+
+ * |Bit|Name |Description |PowerKVM|Action |
+ * | | | |HMI | |
+ * | | | |enabled | |
+ * | | | |for this| |
+ * | | | |bit ? | |
+ * +===+==========+============================+========+===================+
+ * |0 |malfunctio|A processor core in the |Yes |Raise attn from |
+ * | |n_allert |system has checkstopped | |sapphire resulting |
+ * | | |(failed recovery) and has | |xstop |
+ * | | |requested a CP Sparing | | |
+ * | | |to occur. This is | | |
+ * | | |broadcasted to every | | |
+ * | | |processor in the system | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |1 |Reserved |reserved |n/a | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |2 |proc_recv_|Processor recovery occurred |Yes |Log message and |
+ * | |done |error-bit in fir not masked | |continue working. |
+ * | | |(see bit 11) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |3 |proc_recv_|Processor went through |Yes |Log message and |
+ * | |error_mask|recovery for an error which | |continue working. |
+ * | |ed |is actually masked for | | |
+ * | | |reporting | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |4 | |Timer facility experienced |Yes |Raise attn from |
+ * | |tfac_error|an error. | |sapphire resulting |
+ * | | |TB, DEC, HDEC, PURR or SPURR| |xstop |
+ * | | |may be corrupted (details in| | |
+ * | | |TFMR) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |5 | |TFMR SPR itself is |Yes |Raise attn from |
+ * | |tfmr_parit|corrupted. | |sapphire resulting |
+ * | |y_error |Entire timing facility may | |xstop |
+ * | | |be compromised. | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |6 |ha_overflo| UPS (Uniterrupted Power |No |N/A |
+ * | |w_warning |System) Overflow indication | | |
+ * | | |indicating that the UPS | | |
+ * | | |DirtyAddrTable has | | |
+ * | | |reached a limit where it | | |
+ * | | |requires PHYP unload support| | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |7 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |8 |xscom_fail|An XSCOM operation caused by|No |We handle it by |
+ * | | |a cache inhibited load/store| |manually reading |
+ * | | |from this thread failed. A | |HMER register. |
+ * | | |trap register is | | |
+ * | | |available. | | |
+ * | | | | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |9 |xscom_done|An XSCOM operation caused by|No |We handle it by |
+ * | | |a cache inhibited load/store| |manually reading |
+ * | | |from this thread completed. | |HMER register. |
+ * | | |If hypervisor | | |
+ * | | |intends to use this bit, it | | |
+ * | | |is responsible for clearing | | |
+ * | | |it before performing the | | |
+ * | | |xscom operation. | | |
+ * | | |NOTE: this bit should always| | |
+ * | | |be masked in HMEER | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |10 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |11 |proc_recv_|Processor recovery occurred |y |Log message and |
+ * | |again |again before bit2 or bit3 | |continue working. |
+ * | | |was cleared | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |12-|reserved |was temperature sensor |n/a |n/a |
+ * |15 | |passed the critical point on| | |
+ * | | |the way up | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |16 | |SCOM has set a reserved FIR |No |n/a |
+ * | |scom_fir_h|bit to cause recovery | | |
+ * | |m | | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |17 |trig_fir_h|Debug trigger has set a |No |n/a |
+ * | |mi |reserved FIR bit to cause | | |
+ * | | |recovery | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |18 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |19 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |20 |hyp_resour|A hypervisor resource error |y |Raise attn from |
+ * | |ce_err |occurred: data parity error | |sapphire resulting |
+ * | | |on, SPRC0:3; SPR_Modereg or | |xstop. |
+ * | | |HMEER. | | |
+ * | | |Note: this bit will cause an| | |
+ * | | |check_stop when (HV=1, PR=0 | | |
+ * | | |and EE=0) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |21-| |if bit 8 is active, the |No |We handle it by |
+ * |23 |xscom_stat|reason will be detailed in | |Manually reading |
+ * | |us |these bits. see chapter 11.1| |HMER register. |
+ * | | |This bits are information | | |
+ * | | |only and always masked | | |
+ * | | |(mask = '0') | | |
+ * | | |If hypervisor intends to use| | |
+ * | | |this bit, it is responsible | | |
+ * | | |for clearing it before | | |
+ * | | |performing the xscom | | |
+ * | | |operation. | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |24-|Not |Not implemented |n/a |n/a |
+ * |63 |implemente| | | |
+ * | |d | | | |
+ * +-- +----------+----------------------------+--------+-------------------+
+ *
+ * Above HMER bits can be enabled/disabled by modifying
+ * SPR_HMEER_HMI_ENABLE_MASK #define in include/processor.h
+ * If you modify support for any of the bits listed above, please make sure
+ * you change the above table to refelct that.
+ *
+ * NOTE: Per Dave Larson, never enable 8,9,21-23
+ */
+
+/*
+ * P10 HMER register layout:
+ * Bit Name Description
+ * 0 malfunction_alert A processor core in the system has checkstopped
+ * (failed recovery). This is broadcasted to every
+ * processor in the system
+ *
+ * 1 reserved reserved
+ *
+ * 2 proc_rcvy_done Processor recovery occurred error-bit in fir not
+ * masked (see bit 11)
+ *
+ * 3 reserved reserved
+ *
+ * 4 tfac_error Timer facility experienced an error. TB, DEC,
+ * HDEC, PURR or SPURR may be corrupted (details in
+ * TFMR)
+ *
+ * 5 tfx_error Error occurred on transfer from tfac shadow to
+ * core
+ *
+ * 6 spurr_scale_limit Nominal frequency exceeded 399 percent
+ *
+ * 7 reserved reserved
+ *
+ * 8 xscom_fail An XSCOM operation caused by a cache inhibited
+ * load/store from this thread failed. A trap
+ * register is available.
+ *
+ * 9 xscom_done An XSCOM operation caused by a cache inhibited
+ * load/store from this thread completed. If
+ * hypervisor intends to use this bit, it is
+ * responsible for clearing it before performing the
+ * xscom operation. NOTE: this bit should always be
+ * masked in HMEER
+ *
+ * 10 reserved reserved
+ *
+ * 11 proc_rcvy_again Processor recovery occurred again before bit 2
+ * was cleared
+ *
+ * 12-15 reserved reserved
+ *
+ * 16 scom_fir_hmi An error inject to PC FIR has occurred to set HMI.
+ * This error inject can also set FIR(61) to cause
+ * recovery.
+ *
+ * 17 reserved reserved
+ *
+ * 18 trig_fir_hmi Debug trigger has occurred to set HMI. This
+ * trigger can also set FIR(60) to cause recovery
+ *
+ * 19-20 reserved reserved
+ *
+ * 21-23 xscom_status If bit 8 is active, the reason will be detailed in
+ * these bits. These bits are information only and
+ * always masked (mask = ‘0’) If hypervisor intends
+ * to use this field, it is responsible for clearing
+ * it before performing the xscom operation.
+ *
+ * 24:63 Not implemented Not implemented.
+ *
+ * P10 HMEER enabled bits:
+ * Name Action
+ * malfunction_alert Decode and log FIR bits.
+ * proc_rcvy_done Log and continue.
+ * tfac_error Log and attempt to recover time facilities.
+ * tfx_error Log and attempt to recover time facilities.
+ * spurr_scale_limit Log and continue. XXX?
+ * proc_rcvy_again Log and continue.
+ */
+
+/* Used for tracking cpu threads inside hmi handling. */
+#define HMI_STATE_CLEANUP_DONE 0x100
+#define CORE_THREAD_MASK 0x0ff
+#define SUBCORE_THREAD_MASK(s_id, t_count) \
+ ((((1UL) << (t_count)) - 1) << ((s_id) * (t_count)))
+#define SINGLE_THREAD_MASK(t_id) ((1UL) << (t_id))
+
+/*
+ * Number of iterations for the various timeouts. We can't use the timebase
+ * as it might be broken. We measured experimentally that 40 millions loops
+ * of cpu_relax() gives us more than 1s. The margin is comfortable enough.
+ */
+#define TIMEOUT_LOOPS 40000000
+
+/* TFMR other errors. (other than bit 26 and 45) */
+#define SPR_TFMR_OTHER_ERRORS \
+ (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \
+ SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \
+ SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \
+ SPR_TFMR_DEC_PARITY_ERR | SPR_TFMR_TFMR_CORRUPT | \
+ SPR_TFMR_CHIP_TOD_INTERRUPT)
+
+/* TFMR "all core" errors (sent to all threads) */
+#define SPR_TFMR_CORE_ERRORS \
+ (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \
+ SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \
+ SPR_TFMR_TFMR_CORRUPT | SPR_TFMR_TB_RESIDUE_ERR | \
+ SPR_TFMR_HDEC_PARITY_ERROR | SPR_TFMR_TFAC_XFER_ERROR)
+
+/* TFMR "thread" errors */
+#define SPR_TFMR_THREAD_ERRORS \
+ (SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \
+ SPR_TFMR_DEC_PARITY_ERR)
+
+/*
+ * Starting from p9, core inits are setup to escalate all core
+ * local checkstop to system checkstop. Review this list when that changes.
+ */
+static const struct core_xstop_bit_info {
+ uint8_t bit; /* CORE FIR bit number */
+ enum OpalHMI_CoreXstopReason reason;
+} xstop_bits[] = {
+ { 3, CORE_CHECKSTOP_IFU_REGFILE },
+ { 5, CORE_CHECKSTOP_IFU_LOGIC },
+ { 8, CORE_CHECKSTOP_PC_DURING_RECOV },
+ { 10, CORE_CHECKSTOP_ISU_REGFILE },
+ { 12, CORE_CHECKSTOP_ISU_LOGIC },
+ { 21, CORE_CHECKSTOP_FXU_LOGIC },
+ { 25, CORE_CHECKSTOP_VSU_LOGIC },
+ { 26, CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE },
+ { 32, CORE_CHECKSTOP_LSU_REGFILE },
+ { 36, CORE_CHECKSTOP_PC_FWD_PROGRESS },
+ { 38, CORE_CHECKSTOP_LSU_LOGIC },
+ { 45, CORE_CHECKSTOP_PC_LOGIC },
+ { 48, CORE_CHECKSTOP_PC_HYP_RESOURCE },
+ { 52, CORE_CHECKSTOP_PC_HANG_RECOV_FAILED },
+ { 54, CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED },
+ { 63, CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ },
+};
+
+struct core_fir_bit_info {
+ uint8_t bit; /* CORE FIR bit number */
+ const char *reason;
+};
+
+static const struct core_fir_bit_info p9_recoverable_bits[] = {
+ { 0, "IFU - SRAM (ICACHE parity, etc)" },
+ { 2, "IFU - RegFile" },
+ { 4, "IFU - Logic" },
+ { 9, "ISU - RegFile" },
+ { 11, "ISU - Logic" },
+ { 13, "ISU - Recoverable due to not in MT window" },
+ { 24, "VSU - Logic" },
+ { 27, "VSU - DFU logic" },
+ { 29, "LSU - SRAM (DCACHE parity, etc)" },
+ { 31, "LSU - RegFile" },
+ /* The following 3 bits may be set by SRAM errors. */
+ { 33, "LSU - TLB multi hit" },
+ { 34, "LSU - SLB multi hit" },
+ { 35, "LSU - ERAT multi hit" },
+ { 37, "LSU - Logic" },
+ { 39, "LSU - Recoverable due to not in MT window" },
+ { 43, "PC - Thread hang recovery" },
+};
+
+static const struct core_fir_bit_info p10_core_fir_bits[] = {
+ { 0, "IFU - SRAM recoverable error (ICACHE parity error, etc.)" },
+ { 1, "PC - TC checkstop" },
+ { 2, "IFU - RegFile recoverable error" },
+ { 3, "IFU - RegFile core checkstop" },
+ { 4, "IFU - Logic recoverable error" },
+ { 5, "IFU - Logic core checkstop" },
+ { 7, "VSU - Inference accumulator recoverable error" },
+ { 8, "PC - Recovery core checkstop" },
+ { 9, "VSU - Slice Target File (STF) recoverable error" },
+ { 11, "ISU - Logic recoverable error" },
+ { 12, "ISU - Logic core checkstop" },
+ { 14, "ISU - Machine check received while ME=0 checkstop" },
+ { 15, "ISU - UE from L2" },
+ { 16, "ISU - Number of UEs from L2 above threshold" },
+ { 17, "ISU - UE on CI load" },
+ { 18, "MMU - TLB recoverable error" },
+ { 19, "MMU - SLB error" },
+ { 21, "MMU - CXT recoverable error" },
+ { 22, "MMU - Logic core checkstop" },
+ { 23, "MMU - MMU system checkstop" },
+ { 24, "VSU - Logic recoverable error" },
+ { 25, "VSU - Logic core checkstop" },
+ { 26, "PC - In maint mode and recovery in progress" },
+ { 28, "PC - PC system checkstop" },
+ { 29, "LSU - SRAM recoverable error (DCACHE parity error, etc.)" },
+ { 30, "LSU - Set deleted" },
+ { 31, "LSU - RegFile recoverable error" },
+ { 32, "LSU - RegFile core checkstop" },
+ { 33, "MMU - TLB multi hit error occurred" },
+ { 34, "MMU - SLB multi hit error occurred" },
+ { 35, "LSU - ERAT multi hit error occurred" },
+ { 36, "PC - Forward progress error" },
+ { 37, "LSU - Logic recoverable error" },
+ { 38, "LSU - Logic core checkstop" },
+ { 41, "LSU - System checkstop" },
+ { 43, "PC - Thread hang recoverable error" },
+ { 45, "PC - Logic core checkstop" },
+ { 47, "PC - TimeBase facility checkstop" },
+ { 52, "PC - Hang recovery failed core checkstop" },
+ { 53, "PC - Core internal hang detected" },
+ { 55, "PC - Nest hang detected" },
+ { 56, "PC - Other core chiplet recoverable error" },
+ { 57, "PC - Other core chiplet core checkstop" },
+ { 58, "PC - Other core chiplet system checkstop" },
+ { 59, "PC - SCOM satellite error detected" },
+ { 60, "PC - Debug trigger error inject" },
+ { 61, "PC - SCOM or firmware recoverable error inject" },
+ { 62, "PC - Firmware checkstop error inject" },
+ { 63, "PC - Firmware SPRC / SPRD checkstop" },
+};
+
+static const struct nx_xstop_bit_info {
+ uint8_t bit; /* NX FIR bit number */
+ enum OpalHMI_NestAccelXstopReason reason;
+} nx_dma_xstop_bits[] = {
+ { 1, NX_CHECKSTOP_SHM_INVAL_STATE_ERR },
+ { 15, NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1 },
+ { 16, NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2 },
+ { 20, NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR },
+ { 21, NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR },
+ { 22, NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR },
+ { 23, NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR },
+ { 24, NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR },
+ { 25, NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR },
+ { 26, NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR },
+ { 27, NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR },
+ { 31, NX_CHECKSTOP_DMA_CRB_UE },
+ { 32, NX_CHECKSTOP_DMA_CRB_SUE },
+};
+
+static const struct nx_xstop_bit_info nx_pbi_xstop_bits[] = {
+ { 12, NX_CHECKSTOP_PBI_ISN_UE },
+};
+
+static struct lock hmi_lock = LOCK_UNLOCKED;
+static uint32_t malf_alert_scom;
+static uint32_t nx_status_reg;
+static uint32_t nx_dma_engine_fir;
+static uint32_t nx_pbi_fir;
+
+static int setup_scom_addresses(void)
+{
+ switch (proc_gen) {
+ case proc_gen_p8:
+ malf_alert_scom = P8_MALFUNC_ALERT;
+ nx_status_reg = P8_NX_STATUS_REG;
+ nx_dma_engine_fir = P8_NX_DMA_ENGINE_FIR;
+ nx_pbi_fir = P8_NX_PBI_FIR;
+ return 1;
+ case proc_gen_p9:
+ malf_alert_scom = P9_MALFUNC_ALERT;
+ nx_status_reg = P9_NX_STATUS_REG;
+ nx_dma_engine_fir = P9_NX_DMA_ENGINE_FIR;
+ nx_pbi_fir = P9_NX_PBI_FIR;
+ return 1;
+ case proc_gen_p10:
+ malf_alert_scom = P10_MALFUNC_ALERT;
+ nx_status_reg = P10_NX_STATUS_REG;
+ nx_dma_engine_fir = P10_NX_DMA_ENGINE_FIR;
+ nx_pbi_fir = P10_NX_PBI_FIR;
+ return 1;
+ default:
+ prerror("%s: Unknown CPU type\n", __func__);
+ break;
+ }
+ return 0;
+}
+
+static int queue_hmi_event(struct OpalHMIEvent *hmi_evt, int recover, uint64_t *out_flags)
+{
+ size_t size;
+
+ /* Don't queue up event if recover == -1 */
+ if (recover == -1)
+ return 0;
+
+ /* set disposition */
+ if (recover == 1)
+ hmi_evt->disposition = OpalHMI_DISPOSITION_RECOVERED;
+ else if (recover == 0)
+ hmi_evt->disposition = OpalHMI_DISPOSITION_NOT_RECOVERED;
+
+ /*
+ * V2 of struct OpalHMIEvent is of (5 * 64 bits) size and well packed
+ * structure. Hence use uint64_t pointer to pass entire structure
+ * using 5 params in generic message format. Instead of hard coding
+ * num_params divide the struct size by 8 bytes to get exact
+ * num_params value.
+ */
+ size = ALIGN_UP(sizeof(*hmi_evt), sizeof(u64));
+
+ *out_flags |= OPAL_HMI_FLAGS_NEW_EVENT;
+
+ /* queue up for delivery to host. */
+ return _opal_queue_msg(OPAL_MSG_HMI_EVT, NULL, NULL,
+ size, hmi_evt);
+}
+
+static int read_core_fir(uint32_t chip_id, uint32_t core_id, uint64_t *core_fir)
+{
+ int rc;
+
+ switch (proc_gen) {
+ case proc_gen_p8:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P8_EX(core_id, P8_CORE_FIR), core_fir);
+ break;
+ case proc_gen_p9:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P9_EC(core_id, P9_CORE_FIR), core_fir);
+ break;
+ case proc_gen_p10:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIR), core_fir);
+ break;
+ default:
+ rc = OPAL_HARDWARE;
+ }
+ return rc;
+}
+
+static int read_core_wof(uint32_t chip_id, uint32_t core_id, uint64_t *core_wof)
+{
+ int rc;
+
+ switch (proc_gen) {
+ case proc_gen_p9:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P9_EC(core_id, P9_CORE_WOF), core_wof);
+ break;
+ case proc_gen_p10:
+ rc = xscom_read(chip_id,
+ XSCOM_ADDR_P10_EC(core_id, P10_CORE_WOF), core_wof);
+ break;
+ default:
+ rc = OPAL_HARDWARE;
+ }
+ return rc;
+}
+
+static bool decode_core_fir(struct cpu_thread *cpu,
+ struct OpalHMIEvent *hmi_evt)
+{
+ uint64_t core_fir;
+ uint32_t core_id;
+ int i, swkup_rc;
+ bool found = false;
+ int64_t ret;
+ const char *loc;
+
+ /* Sanity check */
+ if (!cpu || !hmi_evt)
+ return false;
+
+ core_id = pir_to_core_id(cpu->pir);
+
+ /* Force the core to wakeup, otherwise reading core_fir is unrealiable
+ * if stop-state 5 is enabled.
+ */
+ swkup_rc = dctl_set_special_wakeup(cpu);
+
+ /* Get CORE FIR register value. */
+ ret = read_core_fir(cpu->chip_id, core_id, &core_fir);
+
+ if (!swkup_rc)
+ dctl_clear_special_wakeup(cpu);
+
+
+ if (ret == OPAL_WRONG_STATE) {
+ /*
+ * CPU is asleep, so it probably didn't cause the checkstop.
+ * If no other HMI cause is found a "catchall" checkstop
+ * will be raised, so if this CPU should've been awake the
+ * error will be handled appropriately.
+ */
+ prlog(PR_DEBUG,
+ "FIR read failed, chip %d core %d asleep\n",
+ cpu->chip_id, core_id);
+ return false;
+ } else if (ret != OPAL_SUCCESS) {
+ prerror("XSCOM error reading CORE FIR\n");
+ /* If the FIR can't be read, we should checkstop. */
+ return true;
+ }
+
+ if (!core_fir)
+ return false;
+
+ loc = chip_loc_code(cpu->chip_id);
+ prlog(PR_INFO, "[Loc: %s]: CHIP ID: %x, CORE ID: %x, FIR: %016llx\n",
+ loc ? loc : "Not Available",
+ cpu->chip_id, core_id, core_fir);
+
+ if (proc_gen == proc_gen_p10) {
+ for (i = 0; i < ARRAY_SIZE(p10_core_fir_bits); i++) {
+ if (core_fir & PPC_BIT(p10_core_fir_bits[i].bit))
+ prlog(PR_INFO, " %s\n", p10_core_fir_bits[i].reason);
+ }
+ }
+
+ /* Check CORE FIR bits and populate HMI event with error info. */
+ for (i = 0; i < ARRAY_SIZE(xstop_bits); i++) {
+ if (core_fir & PPC_BIT(xstop_bits[i].bit)) {
+ found = true;
+ hmi_evt->u.xstop_error.xstop_reason
+ |= cpu_to_be32(xstop_bits[i].reason);
+ }
+ }
+ return found;
+}
+
+static void find_core_checkstop_reason(struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct cpu_thread *cpu;
+
+ /* Initialize HMI event */
+ hmi_evt->severity = OpalHMI_SEV_FATAL;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_CORE;
+
+ /*
+ * Check CORE FIRs and find the reason for core checkstop.
+ * Send a separate HMI event for each core that has checkstopped.
+ */
+ for_each_cpu(cpu) {
+ /* GARDed CPUs are marked unavailable. Skip them. */
+ if (cpu->state == cpu_state_unavailable)
+ continue;
+
+ /* Only check on primaries (ie. core), not threads */
+ if (cpu->is_secondary)
+ continue;
+
+ /* Initialize xstop_error fields. */
+ hmi_evt->u.xstop_error.xstop_reason = 0;
+ hmi_evt->u.xstop_error.u.pir = cpu_to_be32(cpu->pir);
+
+ if (decode_core_fir(cpu, hmi_evt))
+ queue_hmi_event(hmi_evt, 0, out_flags);
+ }
+}
+
+static void find_capp_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct capp_info info;
+ struct phb *phb;
+ uint64_t capp_fir;
+ uint64_t capp_fir_mask;
+ uint64_t capp_fir_action0;
+ uint64_t capp_fir_action1;
+ uint64_t reg;
+ int64_t rc;
+
+ /* CAPP exists on P8 and P9 only */
+ if (proc_gen != proc_gen_p8 && proc_gen != proc_gen_p9)
+ return;
+
+ /* Find the CAPP on the chip associated with the HMI. */
+ for_each_phb(phb) {
+ /* get the CAPP info */
+ rc = capp_get_info(flat_chip_id, phb, &info);
+ if (rc == OPAL_PARAMETER)
+ continue;
+
+ if (xscom_read(flat_chip_id, info.capp_fir_reg, &capp_fir) ||
+ xscom_read(flat_chip_id, info.capp_fir_mask_reg,
+ &capp_fir_mask) ||
+ xscom_read(flat_chip_id, info.capp_fir_action0_reg,
+ &capp_fir_action0) ||
+ xscom_read(flat_chip_id, info.capp_fir_action1_reg,
+ &capp_fir_action1)) {
+ prerror("CAPP: Couldn't read CAPP#%d (PHB:#%x) FIR registers by XSCOM!\n",
+ info.capp_index, info.phb_index);
+ continue;
+ }
+
+ if (!(capp_fir & ~capp_fir_mask))
+ continue;
+
+ prlog(PR_DEBUG, "CAPP#%d (PHB:#%x): FIR 0x%016llx mask 0x%016llx\n",
+ info.capp_index, info.phb_index, capp_fir,
+ capp_fir_mask);
+ prlog(PR_DEBUG, "CAPP#%d (PHB:#%x): ACTION0 0x%016llx, ACTION1 0x%016llx\n",
+ info.capp_index, info.phb_index, capp_fir_action0,
+ capp_fir_action1);
+
+ /*
+ * If this bit is set (=1) a Recoverable Error has been
+ * detected
+ */
+ xscom_read(flat_chip_id, info.capp_err_status_ctrl_reg, &reg);
+ if ((reg & PPC_BIT(0)) != 0) {
+ phb_lock(phb);
+ phb->ops->set_capp_recovery(phb);
+ phb_unlock(phb);
+
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_CAPP_RECOVERY;
+ queue_hmi_event(hmi_evt, 1, out_flags);
+
+ return;
+ }
+ }
+}
+
+static void find_nx_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ uint64_t nx_status;
+ uint64_t nx_dma_fir;
+ uint64_t nx_pbi_fir_val;
+ int i;
+
+ /* Get NX status register value. */
+ if (xscom_read(flat_chip_id, nx_status_reg, &nx_status) != 0) {
+ prerror("XSCOM error reading NX_STATUS_REG\n");
+ return;
+ }
+
+ /* Check if NX has driven an HMI interrupt. */
+ if (!(nx_status & NX_HMI_ACTIVE))
+ return;
+
+ /* Initialize HMI event */
+ hmi_evt->severity = OpalHMI_SEV_FATAL;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NX;
+ hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
+
+ /* Get DMA & Engine FIR data register value. */
+ if (xscom_read(flat_chip_id, nx_dma_engine_fir, &nx_dma_fir) != 0) {
+ prerror("XSCOM error reading NX_DMA_ENGINE_FIR\n");
+ return;
+ }
+
+ /* Get PowerBus Interface FIR data register value. */
+ if (xscom_read(flat_chip_id, nx_pbi_fir, &nx_pbi_fir_val) != 0) {
+ prerror("XSCOM error reading NX_PBI_FIR\n");
+ return;
+ }
+
+ /* Find NX checkstop reason and populate HMI event with error info. */
+ for (i = 0; i < ARRAY_SIZE(nx_dma_xstop_bits); i++)
+ if (nx_dma_fir & PPC_BIT(nx_dma_xstop_bits[i].bit))
+ hmi_evt->u.xstop_error.xstop_reason
+ |= cpu_to_be32(nx_dma_xstop_bits[i].reason);
+
+ for (i = 0; i < ARRAY_SIZE(nx_pbi_xstop_bits); i++)
+ if (nx_pbi_fir_val & PPC_BIT(nx_pbi_xstop_bits[i].bit))
+ hmi_evt->u.xstop_error.xstop_reason
+ |= cpu_to_be32(nx_pbi_xstop_bits[i].reason);
+
+ /*
+ * Set NXDMAENGFIR[38] to signal PRD that service action is required.
+ * Without this inject, PRD will not be able to do NX unit checkstop
+ * error analysis. NXDMAENGFIR[38] is a spare bit and used to report
+ * a software initiated attention.
+ *
+ * The behavior of this bit and all FIR bits are documented in
+ * RAS spreadsheet.
+ */
+ xscom_write(flat_chip_id, nx_dma_engine_fir, PPC_BIT(38));
+
+ /* Send an HMI event. */
+ queue_hmi_event(hmi_evt, 0, out_flags);
+}
+
+static bool phb_is_npu2(struct dt_node *dn)
+{
+ return (dt_node_is_compatible(dn, "ibm,power9-npu-pciex") ||
+ dt_node_is_compatible(dn, "ibm,power9-npu-opencapi-pciex"));
+}
+
+static void add_npu2_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
+{
+ int i, reason_count;
+ uint8_t *ptr;
+
+ reason_count = sizeof(*xstop_reason) / sizeof(reason);
+ ptr = (uint8_t *) xstop_reason;
+ for (i = 0; i < reason_count; i++) {
+ if (*ptr == 0) {
+ *ptr = reason;
+ break;
+ }
+ ptr++;
+ }
+}
+
+static void encode_npu2_xstop_reason(uint32_t *xstop_reason,
+ uint64_t fir, int fir_number)
+{
+ int bit;
+ uint8_t reason;
+
+ /*
+ * There are three 64-bit FIRs but the xstop reason field of
+ * the hmi event is only 32-bit. Encode which FIR bit is set as:
+ * - 2 bits for the FIR number
+ * - 6 bits for the bit number (0 -> 63)
+ *
+ * So we could even encode up to 4 reasons for the HMI, if
+ * that can ever happen
+ */
+ while (fir) {
+ bit = ilog2(fir);
+ reason = fir_number << 6;
+ reason |= (63 - bit); // IBM numbering
+ add_npu2_xstop_reason(xstop_reason, reason);
+ fir ^= 1ULL << bit;
+ }
+}
+
+static void find_npu2_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct phb *phb;
+ int i;
+ bool npu2_hmi_verbose = false, found = false;
+ uint64_t npu2_fir;
+ uint64_t npu2_fir_mask;
+ uint64_t npu2_fir_action0;
+ uint64_t npu2_fir_action1;
+ uint64_t npu2_fir_addr;
+ uint64_t npu2_fir_mask_addr;
+ uint64_t npu2_fir_action0_addr;
+ uint64_t npu2_fir_action1_addr;
+ uint64_t fatal_errors;
+ uint32_t xstop_reason = 0;
+ int total_errors = 0;
+ const char *loc;
+
+ /* NPU2 only */
+ if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P9)
+ return;
+
+ /* Find the NPU on the chip associated with the HMI. */
+ for_each_phb(phb) {
+ /* NOTE: if a chip ever has >1 NPU this will need adjusting */
+ if (phb_is_npu2(phb->dt_node) &&
+ (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
+ found = true;
+ break;
+ }
+ }
+
+ /* If we didn't find a NPU on the chip, it's not our checkstop. */
+ if (!found)
+ return;
+
+ npu2_fir_addr = NPU2_FIR_REGISTER_0;
+ npu2_fir_mask_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_MASK_OFFSET;
+ npu2_fir_action0_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION0_OFFSET;
+ npu2_fir_action1_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION1_OFFSET;
+
+ for (i = 0; i < NPU2_TOTAL_FIR_REGISTERS; i++) {
+ /* Read all the registers necessary to find a checkstop condition. */
+ if (xscom_read(flat_chip_id, npu2_fir_addr, &npu2_fir) ||
+ xscom_read(flat_chip_id, npu2_fir_mask_addr, &npu2_fir_mask) ||
+ xscom_read(flat_chip_id, npu2_fir_action0_addr, &npu2_fir_action0) ||
+ xscom_read(flat_chip_id, npu2_fir_action1_addr, &npu2_fir_action1)) {
+ prerror("HMI: Couldn't read NPU FIR register%d with XSCOM\n", i);
+ continue;
+ }
+
+ fatal_errors = npu2_fir & ~npu2_fir_mask & npu2_fir_action0 & npu2_fir_action1;
+
+ if (fatal_errors) {
+ loc = chip_loc_code(flat_chip_id);
+ if (!loc)
+ loc = "Not Available";
+ prlog(PR_ERR, "NPU: [Loc: %s] P:%d FIR#%d FIR 0x%016llx mask 0x%016llx\n",
+ loc, flat_chip_id, i, npu2_fir, npu2_fir_mask);
+ prlog(PR_ERR, "NPU: [Loc: %s] P:%d ACTION0 0x%016llx, ACTION1 0x%016llx\n",
+ loc, flat_chip_id, npu2_fir_action0, npu2_fir_action1);
+ total_errors++;
+
+ encode_npu2_xstop_reason(&xstop_reason, fatal_errors, i);
+ }
+
+ /* Can't do a fence yet, we are just logging fir information for now */
+ npu2_fir_addr += NPU2_FIR_OFFSET;
+ npu2_fir_mask_addr += NPU2_FIR_OFFSET;
+ npu2_fir_action0_addr += NPU2_FIR_OFFSET;
+ npu2_fir_action1_addr += NPU2_FIR_OFFSET;
+
+ }
+
+ if (!total_errors)
+ return;
+
+ npu2_hmi_verbose = nvram_query_eq_safe("npu2-hmi-verbose", "true");
+ /* Force this for now until we sort out something better */
+ npu2_hmi_verbose = true;
+
+ if (npu2_hmi_verbose) {
+ npu2_dump_scoms(flat_chip_id);
+ prlog(PR_ERR, " _________________________ \n");
+ prlog(PR_ERR, "< It's Debug time! >\n");
+ prlog(PR_ERR, " ------------------------- \n");
+ prlog(PR_ERR, " \\ ,__, \n");
+ prlog(PR_ERR, " \\ (oo)____ \n");
+ prlog(PR_ERR, " (__) )\\ \n");
+ prlog(PR_ERR, " ||--|| * \n");
+ }
+
+ /* Set up the HMI event */
+ hmi_evt->severity = OpalHMI_SEV_WARNING;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
+ hmi_evt->u.xstop_error.xstop_reason = cpu_to_be32(xstop_reason);
+ hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
+
+ /* Marking the event as recoverable so that we don't crash */
+ queue_hmi_event(hmi_evt, 1, out_flags);
+}
+
+static void find_npu_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct phb *phb;
+ struct npu *p = NULL;
+
+ uint64_t npu_fir;
+ uint64_t npu_fir_mask;
+ uint64_t npu_fir_action0;
+ uint64_t npu_fir_action1;
+ uint64_t fatal_errors;
+
+ /* Only check for NPU errors if the chip has a NPU */
+ if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P8NVL)
+ return find_npu2_checkstop_reason(flat_chip_id, hmi_evt, out_flags);
+
+ /* Find the NPU on the chip associated with the HMI. */
+ for_each_phb(phb) {
+ /* NOTE: if a chip ever has >1 NPU this will need adjusting */
+ if (dt_node_is_compatible(phb->dt_node, "ibm,power8-npu-pciex") &&
+ (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
+ p = phb_to_npu(phb);
+ break;
+ }
+ }
+
+ /* If we didn't find a NPU on the chip, it's not our checkstop. */
+ if (p == NULL)
+ return;
+
+ /* Read all the registers necessary to find a checkstop condition. */
+ if (xscom_read(flat_chip_id,
+ p->at_xscom + NX_FIR, &npu_fir) ||
+ xscom_read(flat_chip_id,
+ p->at_xscom + NX_FIR_MASK, &npu_fir_mask) ||
+ xscom_read(flat_chip_id,
+ p->at_xscom + NX_FIR_ACTION0, &npu_fir_action0) ||
+ xscom_read(flat_chip_id,
+ p->at_xscom + NX_FIR_ACTION1, &npu_fir_action1)) {
+ prerror("Couldn't read NPU registers with XSCOM\n");
+ return;
+ }
+
+ fatal_errors = npu_fir & ~npu_fir_mask & npu_fir_action0 & npu_fir_action1;
+
+ /* If there's no errors, we don't need to do anything. */
+ if (!fatal_errors)
+ return;
+
+ prlog(PR_DEBUG, "NPU: FIR 0x%016llx mask 0x%016llx\n",
+ npu_fir, npu_fir_mask);
+ prlog(PR_DEBUG, "NPU: ACTION0 0x%016llx, ACTION1 0x%016llx\n",
+ npu_fir_action0, npu_fir_action1);
+
+ /* Set the NPU to fenced since it can't recover. */
+ npu_set_fence_state(p, true);
+
+ /* Set up the HMI event */
+ hmi_evt->severity = OpalHMI_SEV_WARNING;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
+ hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
+
+ /* The HMI is "recoverable" because it shouldn't crash the system */
+ queue_hmi_event(hmi_evt, 1, out_flags);
+}
+
+static void decode_malfunction(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags)
+{
+ int i;
+ uint64_t malf_alert, flags;
+
+ flags = 0;
+
+ if (!setup_scom_addresses()) {
+ prerror("Failed to setup scom addresses\n");
+ /* Send an unknown HMI event. */
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_UNKNOWN;
+ hmi_evt->u.xstop_error.xstop_reason = 0;
+ queue_hmi_event(hmi_evt, false, out_flags);
+ return;
+ }
+
+ xscom_read(this_cpu()->chip_id, malf_alert_scom, &malf_alert);
+
+ if (!malf_alert)
+ return;
+
+ for (i = 0; i < 64; i++) {
+ if (malf_alert & PPC_BIT(i)) {
+ xscom_write(this_cpu()->chip_id, malf_alert_scom,
+ ~PPC_BIT(i));
+ find_capp_checkstop_reason(i, hmi_evt, &flags);
+ find_nx_checkstop_reason(i, hmi_evt, &flags);
+ find_npu_checkstop_reason(i, hmi_evt, &flags);
+ }
+ }
+
+ find_core_checkstop_reason(hmi_evt, &flags);
+
+ /*
+ * If we fail to find checkstop reason, send an unknown HMI event.
+ */
+ if (!(flags & OPAL_HMI_FLAGS_NEW_EVENT)) {
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_UNKNOWN;
+ hmi_evt->u.xstop_error.xstop_reason = 0;
+ queue_hmi_event(hmi_evt, false, &flags);
+ }
+ *out_flags |= flags;
+}
+
+/*
+ * This will "rendez-vous" all threads on the core to the rendez-vous
+ * id "sig". You need to make sure that "sig" is different from the
+ * previous rendez vous. The sig value must be between 0 and 7 with
+ * boot time being set to 0.
+ *
+ * Note: in theory, we could just use a flip flop "sig" in the thread
+ * structure (binary rendez-vous with no argument). This is a bit more
+ * debuggable and better at handling timeouts (arguably).
+ *
+ * This should be called with the no lock held
+ */
+static void hmi_rendez_vous(uint32_t sig)
+{
+ struct cpu_thread *t = this_cpu();
+ uint32_t my_id = cpu_get_thread_index(t);
+ uint32_t my_shift = my_id << 2;
+ uint32_t *sptr = t->core_hmi_state_ptr;
+ uint32_t val, prev, shift, i;
+ uint64_t timeout;
+
+ assert(sig <= 0x7);
+
+ /*
+ * Mark ourselves as having reached the rendez vous point with
+ * the exit bit cleared
+ */
+ do {
+ val = prev = *sptr;
+ val &= ~(0xfu << my_shift);
+ val |= sig << my_shift;
+ } while (cmpxchg32(sptr, prev, val) != prev);
+
+ /*
+ * Wait for everybody else to reach that point, ignore the
+ * exit bit as another thread could have already set it.
+ */
+ for (i = 0; i < cpu_thread_count; i++) {
+ shift = i << 2;
+
+ timeout = TIMEOUT_LOOPS;
+ while (((*sptr >> shift) & 0x7) != sig && --timeout)
+ cpu_relax();
+ if (!timeout)
+ prlog(PR_ERR, "Rendez-vous stage 1 timeout, CPU 0x%x"
+ " waiting for thread %d (sptr=%08x)\n",
+ t->pir, i, *sptr);
+ }
+
+ /* Set the exit bit */
+ do {
+ val = prev = *sptr;
+ val &= ~(0xfu << my_shift);
+ val |= (sig | 8) << my_shift;
+ } while (cmpxchg32(sptr, prev, val) != prev);
+
+ /* At this point, we need to wait for everybody else to have a value
+ * that is *not* sig. IE. they either have set the exit bit *or* they
+ * have changed the rendez-vous (meaning they have moved on to another
+ * rendez vous point).
+ */
+ for (i = 0; i < cpu_thread_count; i++) {
+ shift = i << 2;
+
+ timeout = TIMEOUT_LOOPS;
+ while (((*sptr >> shift) & 0xf) == sig && --timeout)
+ cpu_relax();
+ if (!timeout)
+ prlog(PR_ERR, "Rendez-vous stage 2 timeout, CPU 0x%x"
+ " waiting for thread %d (sptr=%08x)\n",
+ t->pir, i, *sptr);
+ }
+}
+
+static void hmi_print_debug(const uint8_t *msg, uint64_t hmer)
+{
+ const char *loc;
+ uint32_t core_id, thread_index;
+
+ core_id = pir_to_core_id(this_cpu()->pir);
+ thread_index = cpu_get_thread_index(this_cpu());
+
+ loc = chip_loc_code(this_cpu()->chip_id);
+ if (!loc)
+ loc = "Not Available";
+
+ /* Also covers P10 SPR_HMER_TFAC_SHADOW_XFER_ERROR */
+ if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) {
+ prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: TFMR(%016lx) %s\n",
+ loc, this_cpu()->chip_id, core_id, thread_index,
+ mfspr(SPR_TFMR), msg);
+ } else {
+ prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: %s\n",
+ loc, this_cpu()->chip_id, core_id, thread_index,
+ msg);
+ }
+}
+
+static int handle_thread_tfac_error(uint64_t tfmr, uint64_t *out_flags)
+{
+ int recover = 1;
+
+ if (tfmr & SPR_TFMR_DEC_PARITY_ERR)
+ *out_flags |= OPAL_HMI_FLAGS_DEC_LOST;
+ if (!tfmr_recover_local_errors(tfmr))
+ recover = 0;
+ tfmr &= ~(SPR_TFMR_PURR_PARITY_ERR |
+ SPR_TFMR_SPURR_PARITY_ERR |
+ SPR_TFMR_DEC_PARITY_ERR);
+ return recover;
+}
+
+static int64_t opal_handle_hmi(void);
+
+static void opal_handle_hmi_job(void *data __unused)
+{
+ opal_handle_hmi();
+}
+
+/*
+ * Queue hmi handling job If secondaries are still in OPAL
+ * This function is called by thread 0.
+ */
+static struct cpu_job **hmi_kick_secondaries(void)
+{
+ struct cpu_thread *ts = this_cpu();
+ struct cpu_job **hmi_jobs = NULL;
+ int job_sz = sizeof(struct cpu_job *) * cpu_thread_count;
+ int i;
+
+ for (i = 1; i < cpu_thread_count; i++) {
+ ts = next_cpu(ts);
+
+ /* Is this thread still in OPAL ? */
+ if (ts->state == cpu_state_active) {
+ if (!hmi_jobs) {
+ hmi_jobs = zalloc(job_sz);
+ assert(hmi_jobs);
+ }
+
+ prlog(PR_DEBUG, "Sending hmi job to thread %d\n", i);
+ hmi_jobs[i] = cpu_queue_job(ts, "handle_hmi_job",
+ opal_handle_hmi_job, NULL);
+ }
+ }
+ return hmi_jobs;
+}
+
+static int handle_all_core_tfac_error(uint64_t tfmr, uint64_t *out_flags)
+{
+ struct cpu_thread *t, *t0;
+ int recover = -1;
+ struct cpu_job **hmi_jobs = NULL;
+
+ t = this_cpu();
+ t0 = find_cpu_by_pir(cpu_get_thread0(t));
+
+ if (t == t0 && t0->state == cpu_state_os)
+ hmi_jobs = hmi_kick_secondaries();
+
+ /* Rendez vous all threads */
+ hmi_rendez_vous(1);
+
+ /* We use a lock here as some of the TFMR bits are shared and I
+ * prefer avoiding doing the cleanup simultaneously.
+ */
+ lock(&hmi_lock);
+
+ /* First handle corrupt TFMR otherwise we can't trust anything.
+ * We'll use a lock here so that the threads don't try to do it at
+ * the same time
+ */
+ if (tfmr & SPR_TFMR_TFMR_CORRUPT) {
+ /* Check if it's still in error state */
+ if (mfspr(SPR_TFMR) & SPR_TFMR_TFMR_CORRUPT)
+ if (!recover_corrupt_tfmr()) {
+ unlock(&hmi_lock);
+ recover = 0;
+ goto error_out;
+ }
+
+ tfmr = mfspr(SPR_TFMR);
+
+ /* We could have got new thread errors in the meantime */
+ if (tfmr & SPR_TFMR_THREAD_ERRORS) {
+ recover = handle_thread_tfac_error(tfmr, out_flags);
+ tfmr &= ~SPR_TFMR_THREAD_ERRORS;
+ }
+ if (!recover) {
+ unlock(&hmi_lock);
+ goto error_out;
+ }
+ }
+
+ /* Tell the OS ... */
+ if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR)
+ *out_flags |= OPAL_HMI_FLAGS_HDEC_LOST;
+
+ /* Cleanup bad HDEC or TB on all threads or subcures before we clear
+ * the error conditions
+ */
+ tfmr_cleanup_core_errors(tfmr);
+
+ /* Unlock before next rendez-vous */
+ unlock(&hmi_lock);
+
+ /* Second rendez vous, ensure the above cleanups are all done before
+ * we proceed further
+ */
+ hmi_rendez_vous(2);
+
+ /* We can now clear the error conditions in the core. */
+ recover = tfmr_clear_core_errors(tfmr);
+ if (recover == 0)
+ goto error_out;
+
+ /* Third rendez-vous. We could in theory do the timebase resync as
+ * part of the previous one, but I prefer having all the error
+ * conditions cleared before we start trying.
+ */
+ hmi_rendez_vous(3);
+
+ /* Now perform the actual TB recovery on thread 0 */
+ if (t == t0)
+ recover = chiptod_recover_tb_errors(&this_cpu()->tb_resynced);
+
+error_out:
+ /* Last rendez-vous */
+ hmi_rendez_vous(4);
+
+ /* Now all threads have gone past rendez-vous 3 and not yet past another
+ * rendez-vous 1, so the value of tb_resynced of thread 0 of the core
+ * contains an accurate indication as to whether the timebase was lost.
+ */
+ if (t0->tb_resynced)
+ *out_flags |= OPAL_HMI_FLAGS_TB_RESYNC;
+
+ if (t == t0 && hmi_jobs) {
+ int i;
+ for (i = 1; i < cpu_thread_count; i++)
+ if (hmi_jobs[i])
+ cpu_wait_job(hmi_jobs[i], true);
+ free(hmi_jobs);
+ }
+
+ return recover;
+}
+
+static uint64_t read_tfmr_t0(void)
+{
+ uint64_t tfmr_t0;
+ uint32_t chip_id = this_cpu()->chip_id;
+ uint32_t core_id = pir_to_core_id(this_cpu()->pir);
+
+ lock(&hmi_lock);
+
+ xscom_write(chip_id, XSCOM_ADDR_P9_EC(core_id, P9_SCOM_SPRC),
+ SETFIELD(P9_SCOMC_SPR_SELECT, 0, P9_SCOMC_TFMR_T0));
+ xscom_read(chip_id, XSCOM_ADDR_P9_EC(core_id, P9_SCOM_SPRD),
+ &tfmr_t0);
+ unlock(&hmi_lock);
+ return tfmr_t0;
+}
+
+/* P9 errata: In theory, an HDEC error is sent to all threads. However,
+ * due to an errata on P9 where TFMR bit 26 (HDEC parity) cannot be
+ * cleared on thread 1..3, I am not confident we can do a rendez-vous
+ * in all cases.
+ *
+ * Our current approach is to ignore that error unless it is present
+ * on thread 0 TFMR. Also, ignore TB residue error due to a similar
+ * errata as above.
+ */
+static void validate_latched_errors(uint64_t *tfmr)
+{
+ if ((*tfmr & (SPR_TFMR_HDEC_PARITY_ERROR | SPR_TFMR_TB_RESIDUE_ERR))
+ && this_cpu()->is_secondary) {
+ uint64_t tfmr_t0 = read_tfmr_t0();
+
+ if (!(tfmr_t0 & SPR_TFMR_HDEC_PARITY_ERROR))
+ *tfmr &= ~SPR_TFMR_HDEC_PARITY_ERROR;
+
+ if (!(tfmr_t0 & SPR_TFMR_TB_RESIDUE_ERR))
+ *tfmr &= ~SPR_TFMR_TB_RESIDUE_ERR;
+ }
+}
+
+static int handle_tfac_errors(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags)
+{
+ int recover = -1;
+ uint64_t tfmr = mfspr(SPR_TFMR);
+
+ /* Initialize the hmi event with old value of TFMR */
+ hmi_evt->tfmr = cpu_to_be64(tfmr);
+
+ /* A TFMR parity/corrupt error makes us ignore all the local stuff.*/
+ if (tfmr & SPR_TFMR_TFMR_CORRUPT) {
+ /* Mark TB as invalid for now as we don't trust TFMR, we'll fix
+ * it up later
+ */
+ this_cpu()->tb_invalid = true;
+ goto bad_tfmr;
+ }
+
+ this_cpu()->tb_invalid = !(tfmr & SPR_TFMR_TB_VALID);
+
+ if (proc_gen == proc_gen_p9)
+ validate_latched_errors(&tfmr);
+
+ /* First, handle thread local errors */
+ if (tfmr & SPR_TFMR_THREAD_ERRORS) {
+ recover = handle_thread_tfac_error(tfmr, out_flags);
+ tfmr &= ~SPR_TFMR_THREAD_ERRORS;
+ }
+
+ bad_tfmr:
+
+ /* Let's see if we still have a all-core error to deal with, if
+ * not, we just bail out
+ */
+ if (tfmr & SPR_TFMR_CORE_ERRORS) {
+ int recover2;
+
+ /* Only update "recover" if it's not already 0 (non-recovered)
+ */
+ recover2 = handle_all_core_tfac_error(tfmr, out_flags);
+ if (recover != 0)
+ recover = recover2;
+ } else if (tfmr & SPR_TFMR_CHIP_TOD_INTERRUPT) {
+ int recover2;
+
+ /*
+ * There are some TOD errors which do not affect working of
+ * TOD and TB. They stay in valid state. Hence we don't need
+ * rendez vous.
+ *
+ * TOD errors that affects TOD/TB will report a global error
+ * on TFMR alongwith bit 51, and they will go in rendez vous.
+ */
+ recover2 = chiptod_recover_tod_errors();
+ if (recover != 0)
+ recover = recover2;
+ } else if (this_cpu()->tb_invalid) {
+ /* This shouldn't happen, TB is invalid and no global error
+ * was reported. We just return for now assuming one will
+ * be. We can't do a rendez vous without a core-global HMI.
+ */
+ prlog(PR_ERR, "HMI: TB invalid without core error reported ! "
+ "CPU=%x, TFMR=0x%016lx\n", this_cpu()->pir,
+ mfspr(SPR_TFMR));
+ }
+
+ if (recover != -1 && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_ERROR_SYNC;
+ hmi_evt->type = OpalHMI_ERROR_TFAC;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+
+ /* Set the TB state looking at TFMR register before we head out. */
+ this_cpu()->tb_invalid = !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID);
+
+ if (this_cpu()->tb_invalid) {
+ *out_flags |= OPAL_HMI_FLAGS_TOD_TB_FAIL;
+ prlog(PR_WARNING, "Failed to get TB in running state! "
+ "CPU=%x, TFMR=%016lx\n", this_cpu()->pir,
+ mfspr(SPR_TFMR));
+ }
+
+ return recover;
+}
+
+static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt,
+ uint64_t *out_flags)
+{
+ struct cpu_thread *cpu = this_cpu();
+ int recover = 1;
+ uint64_t handled = 0;
+
+ prlog(PR_DEBUG, "Received HMI interrupt: HMER = 0x%016llx\n", hmer);
+ /* Initialize the hmi event with old value of HMER */
+ if (hmi_evt)
+ hmi_evt->hmer = cpu_to_be64(hmer);
+
+ /* Handle Timer/TOD errors separately */
+ if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) {
+ hmi_print_debug("Timer Facility Error", hmer);
+ handled = hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR);
+ mtspr(SPR_HMER, ~handled);
+ recover = handle_tfac_errors(hmi_evt, out_flags);
+ handled = 0;
+ }
+
+ lock(&hmi_lock);
+ /*
+ * Not all HMIs would move TB into invalid state. Set the TB state
+ * looking at TFMR register. TFMR will tell us correct state of
+ * TB register.
+ */
+ if (hmer & SPR_HMER_PROC_RECV_DONE) {
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint64_t core_wof;
+
+ hmi_print_debug("Processor recovery occurred.", hmer);
+ if (!read_core_wof(chip_id, core_id, &core_wof)) {
+ int i;
+
+ prlog(PR_DEBUG, "Core WOF = 0x%016llx recovered error:\n", core_wof);
+ if (proc_gen <= proc_gen_p9) {
+ for (i = 0; i < ARRAY_SIZE(p9_recoverable_bits); i++) {
+ if (core_wof & PPC_BIT(p9_recoverable_bits[i].bit))
+ prlog(PR_DEBUG, " %s\n", p9_recoverable_bits[i].reason);
+ }
+ } else if (proc_gen == proc_gen_p10) {
+ for (i = 0; i < ARRAY_SIZE(p10_core_fir_bits); i++) {
+ if (core_wof & PPC_BIT(p10_core_fir_bits[i].bit))
+ prlog(PR_DEBUG, " %s\n", p10_core_fir_bits[i].reason);
+ }
+ }
+ }
+
+ handled |= SPR_HMER_PROC_RECV_DONE;
+ if (cpu_is_thread0(cpu) && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+
+ if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED)) {
+ handled |= SPR_HMER_PROC_RECV_ERROR_MASKED;
+ if (cpu_is_thread0(cpu) && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ hmi_print_debug("Processor recovery Done (masked).", hmer);
+ }
+
+ if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
+ handled |= SPR_HMER_PROC_RECV_AGAIN;
+ if (cpu_is_thread0(cpu) && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ hmi_print_debug("Processor recovery occurred again before"
+ "bit2 was cleared\n", hmer);
+ }
+
+ /* XXX: what to do with this? */
+ if (hmer & SPR_HMER_SPURR_SCALE_LIMIT) {
+ handled |= SPR_HMER_SPURR_SCALE_LIMIT;
+ if (cpu_is_thread0(cpu) && hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ hmi_print_debug("Turbo versus nominal frequency exceeded limit.", hmer);
+ }
+
+ /* Assert if we see malfunction alert, we can not continue. */
+ if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
+ handled |= SPR_HMER_MALFUNCTION_ALERT;
+
+ hmi_print_debug("Malfunction Alert", hmer);
+ recover = 0;
+ if (hmi_evt)
+ decode_malfunction(hmi_evt, out_flags);
+ }
+
+ /* Assert if we see Hypervisor resource error, we can not continue. */
+ if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_HYP_RESOURCE_ERR)) {
+ handled |= SPR_HMER_HYP_RESOURCE_ERR;
+
+ hmi_print_debug("Hypervisor resource error", hmer);
+ recover = 0;
+ if (hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_FATAL;
+ hmi_evt->type = OpalHMI_ERROR_HYP_RESOURCE;
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+
+ /* XXX: what to do with this? */
+ if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND)) {
+ handled |= SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND;
+ hmer &= ~SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND;
+
+ hmi_print_debug("Attempted to wake thread when threads in TM suspend mode.", hmer);
+ if (hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE,
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+
+ if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_TRIG_FIR_HMI)) {
+ handled |= SPR_HMER_TRIG_FIR_HMI;
+ hmer &= ~SPR_HMER_TRIG_FIR_HMI;
+
+ hmi_print_debug("Clearing unknown debug trigger", hmer);
+ if (hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_DEBUG_TRIG_FIR,
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+ if ((proc_gen == proc_gen_p10) && (hmer & SPR_HMER_P10_TRIG_FIR_HMI)) {
+ handled |= SPR_HMER_P10_TRIG_FIR_HMI;
+ hmer &= ~SPR_HMER_P10_TRIG_FIR_HMI;
+
+ hmi_print_debug("Clearing unknown debug trigger", hmer);
+ if (hmi_evt) {
+ hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+ hmi_evt->type = OpalHMI_ERROR_DEBUG_TRIG_FIR,
+ queue_hmi_event(hmi_evt, recover, out_flags);
+ }
+ }
+
+ if (recover == 0)
+ disable_fast_reboot("Unrecoverable HMI");
+ /*
+ * HMER bits are sticky, once set to 1 they remain set to 1 until
+ * they are set to 0. Reset the error source bit to 0, otherwise
+ * we keep getting HMI interrupt again and again. Writing to HMER
+ * acts as an AND, so we write mask of all 1's except for the bits
+ * we want to clear.
+ */
+ mtspr(SPR_HMER, ~handled);
+ unlock(&hmi_lock);
+ return recover;
+}
+
+static int64_t opal_handle_hmi(void)
+{
+ uint64_t hmer, dummy_flags;
+ struct OpalHMIEvent hmi_evt;
+
+ /*
+ * Compiled time check to see size of OpalHMIEvent do not exceed
+ * that of struct opal_msg.
+ */
+ BUILD_ASSERT(sizeof(struct opal_msg) >= sizeof(struct OpalHMIEvent));
+
+ memset(&hmi_evt, 0, sizeof(struct OpalHMIEvent));
+ hmi_evt.version = OpalHMIEvt_V2;
+
+ hmer = mfspr(SPR_HMER); /* Get HMER register value */
+ handle_hmi_exception(hmer, &hmi_evt, &dummy_flags);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_HANDLE_HMI, opal_handle_hmi, 0);
+
+static int64_t opal_handle_hmi2(__be64 *out_flags)
+{
+ uint64_t hmer, flags = 0;
+ struct OpalHMIEvent hmi_evt;
+
+ /*
+ * Compiled time check to see size of OpalHMIEvent do not exceed
+ * that of struct opal_msg.
+ */
+ BUILD_ASSERT(sizeof(struct opal_msg) >= sizeof(struct OpalHMIEvent));
+
+ memset(&hmi_evt, 0, sizeof(struct OpalHMIEvent));
+ hmi_evt.version = OpalHMIEvt_V2;
+
+ hmer = mfspr(SPR_HMER); /* Get HMER register value */
+ handle_hmi_exception(hmer, &hmi_evt, &flags);
+ *out_flags = cpu_to_be64(flags);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_HANDLE_HMI2, opal_handle_hmi2, 1);
diff --git a/roms/skiboot/core/i2c.c b/roms/skiboot/core/i2c.c
new file mode 100644
index 000000000..b4313d430
--- /dev/null
+++ b/roms/skiboot/core/i2c.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * I2C
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <i2c.h>
+#include <opal.h>
+#include <device.h>
+#include <opal-msg.h>
+#include <timebase.h>
+#include <processor.h>
+#include <timer.h>
+#include <trace.h>
+
+static LIST_HEAD(i2c_bus_list);
+
+/* Used to assign OPAL IDs */
+static uint32_t i2c_next_bus;
+
+void i2c_add_bus(struct i2c_bus *bus)
+{
+ bus->opal_id = ++i2c_next_bus;
+ dt_add_property_cells(bus->dt_node, "ibm,opal-id", bus->opal_id);
+
+ list_add_tail(&i2c_bus_list, &bus->link);
+}
+
+struct i2c_bus *i2c_find_bus_by_id(uint32_t opal_id)
+{
+ struct i2c_bus *bus;
+
+ list_for_each(&i2c_bus_list, bus, link) {
+ if (bus->opal_id == opal_id)
+ return bus;
+ }
+ return NULL;
+}
+
+static inline void i2c_trace_req(struct i2c_request *req, int rc)
+{
+ struct trace_i2c t;
+
+ memset(&t, 0, sizeof(t));
+
+ t.bus = req->bus->opal_id;
+ t.type = req->op | (req->offset_bytes << 4);
+ t.i2c_addr = req->dev_addr;
+ t.smbus_reg = req->offset & 0xffff; // FIXME: log whole offset
+ t.size = req->rw_len;
+ t.rc = rc;
+
+ /* FIXME: trace should not be a union... */
+ trace_add((void *)&t, TRACE_I2C, sizeof(t));
+}
+
+int64_t i2c_queue_req(struct i2c_request *req)
+{
+ int64_t ret = req->bus->queue_req(req);
+
+ i2c_trace_req(req, OPAL_ASYNC_COMPLETION);
+
+ if (!ret)
+ req->req_state = i2c_req_queued;
+ return ret;
+}
+
+static void opal_i2c_request_complete(int rc, struct i2c_request *req)
+{
+ uint64_t token = (uint64_t)(unsigned long)req->user_data;
+
+ opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+ cpu_to_be64(token),
+ cpu_to_be64(rc));
+ i2c_trace_req(req, rc);
+
+ free(req);
+}
+
+static int opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+ struct opal_i2c_request *oreq)
+{
+ struct i2c_bus *bus = NULL;
+ struct i2c_request *req;
+ int rc;
+
+ if (!opal_addr_valid(oreq))
+ return OPAL_PARAMETER;
+
+ if (oreq->flags & OPAL_I2C_ADDR_10)
+ return OPAL_UNSUPPORTED;
+
+ bus = i2c_find_bus_by_id(bus_id);
+ if (!bus) {
+ /**
+ * @fwts-label I2CInvalidBusID
+ * @fwts-advice opal_i2c_request was passed an invalid bus
+ * ID. This has likely come from the OS rather than OPAL
+ * and thus could indicate an OS bug rather than an OPAL
+ * bug.
+ */
+ prlog(PR_ERR, "I2C: Invalid 'bus_id' passed to the OPAL\n");
+ return OPAL_PARAMETER;
+ }
+
+ req = zalloc(sizeof(*req));
+ if (!req) {
+ /**
+ * @fwts-label I2CFailedAllocation
+ * @fwts-advice OPAL failed to allocate memory for an
+ * i2c_request. This points to an OPAL bug as OPAL ran
+ * out of memory and this should never happen.
+ */
+ prlog(PR_ERR, "I2C: Failed to allocate 'i2c_request'\n");
+ return OPAL_NO_MEM;
+ }
+
+ switch(oreq->type) {
+ case OPAL_I2C_RAW_READ:
+ req->op = I2C_READ;
+ break;
+ case OPAL_I2C_RAW_WRITE:
+ req->op = I2C_WRITE;
+ break;
+ case OPAL_I2C_SM_READ:
+ req->op = SMBUS_READ;
+ req->offset = be32_to_cpu(oreq->subaddr);
+ req->offset_bytes = oreq->subaddr_sz;
+ break;
+ case OPAL_I2C_SM_WRITE:
+ req->op = SMBUS_WRITE;
+ req->offset = be32_to_cpu(oreq->subaddr);
+ req->offset_bytes = oreq->subaddr_sz;
+ break;
+ default:
+ free(req);
+ return OPAL_PARAMETER;
+ }
+ req->dev_addr = be16_to_cpu(oreq->addr);
+ req->rw_len = be32_to_cpu(oreq->size);
+ req->rw_buf = (void *)be64_to_cpu(oreq->buffer_ra);
+ req->completion = opal_i2c_request_complete;
+ req->user_data = (void *)(unsigned long)async_token;
+ req->bus = bus;
+
+ if (i2c_check_quirk(req, &rc)) {
+ free(req);
+ return rc;
+ }
+
+ /* Finally, queue the OPAL i2c request and return */
+ rc = i2c_queue_req(req);
+ if (rc) {
+ free(req);
+ return rc;
+ }
+
+ return OPAL_ASYNC_COMPLETION;
+}
+opal_call(OPAL_I2C_REQUEST, opal_i2c_request, 3);
+
+#define MAX_NACK_RETRIES 2
+#define REQ_COMPLETE_POLLING 5 /* Check if req is complete
+ in 5ms interval */
+int64_t i2c_request_sync(struct i2c_request *req)
+{
+ uint64_t timer_period = msecs_to_tb(5), timer_count;
+ uint64_t time_to_wait = 0;
+ int64_t rc, waited, retries;
+ size_t i, count;
+ char buf[17]; /* 8 bytes in hex + NUL */
+
+ for (retries = 0; retries <= MAX_NACK_RETRIES; retries++) {
+ waited = 0;
+ timer_count = 0;
+
+ i2c_queue_req(req);
+
+ do {
+ time_to_wait = i2c_run_req(req);
+ if (!time_to_wait)
+ time_to_wait = REQ_COMPLETE_POLLING;
+ time_wait(time_to_wait);
+ waited += time_to_wait;
+ timer_count += time_to_wait;
+ if (timer_count > timer_period) {
+ /*
+ * The above request may be relying on
+ * timers to complete, yet there may
+ * not be called, especially during
+ * opal init. We could be looping here
+ * forever. So explicitly check the
+ * timers once in a while
+ */
+ check_timers(false);
+ timer_count = 0;
+ }
+ } while (req->req_state != i2c_req_done);
+
+ lwsync();
+ rc = req->result;
+
+ /* retry on NACK, otherwise exit */
+ if (rc != OPAL_I2C_NACK_RCVD)
+ break;
+ req->req_state = i2c_req_new;
+ }
+
+ i2c_trace_req(req, rc);
+ count = 0;
+ for (i = 0; i < req->rw_len && count < sizeof(buf); i++) {
+ count += snprintf(buf+count, sizeof(buf)-count, "%02x",
+ *(unsigned char *)(req->rw_buf+i));
+ }
+
+ prlog(PR_DEBUG, "I2C: %s req op=%x offset=%x buf=%s buflen=%d "
+ "delay=%lu/%lld rc=%lld\n",
+ (rc) ? "!!!!" : "----", req->op, req->offset,
+ buf, req->rw_len, tb_to_msecs(waited), req->timeout, rc);
+
+ return rc;
+}
+
+/**
+ * i2c_request_send - send request to i2c bus synchronously
+ * @bus_id: i2c bus id
+ * @dev_addr: address of the device
+ * @read_write: SMBUS_READ or SMBUS_WRITE
+ * @offset: any of the I2C interface offset defined
+ * @offset_bytes: offset size in bytes
+ * @buf: data to be read or written
+ * @buflen: buf length
+ * @timeout: request timeout in milliseconds
+ *
+ * Send an I2C request to a device synchronously
+ *
+ * Returns: Zero on success otherwise a negative error code
+ */
+int64_t i2c_request_send(int bus_id, int dev_addr, int read_write,
+ uint32_t offset, uint32_t offset_bytes, void* buf,
+ size_t buflen, int timeout)
+{
+ struct i2c_request *req;
+ struct i2c_bus *bus;
+ int64_t rc;
+
+ bus = i2c_find_bus_by_id(bus_id);
+ if (!bus) {
+ /**
+ * @fwts-label I2CInvalidBusID
+ * @fwts-advice i2c_request_send was passed an invalid bus
+ * ID. This indicates a bug.
+ */
+ prlog(PR_ERR, "I2C: Invalid bus_id=%x\n", bus_id);
+ return OPAL_PARAMETER;
+ }
+
+ req = zalloc(sizeof(*req));
+ if (!req) {
+ /**
+ * @fwts-label I2CAllocationFailed
+ * @fwts-advice OPAL failed to allocate memory for an
+ * i2c_request. This points to an OPAL bug as OPAL run out of
+ * memory and this should never happen.
+ */
+ prlog(PR_ERR, "I2C: allocating i2c_request failed\n");
+ return OPAL_INTERNAL_ERROR;
+ }
+
+ req->bus = bus;
+ req->dev_addr = dev_addr;
+ req->op = read_write;
+ req->offset = offset;
+ req->offset_bytes = offset_bytes;
+ req->rw_buf = (void*) buf;
+ req->rw_len = buflen;
+ req->timeout = timeout;
+
+ rc = i2c_request_sync(req);
+
+ free(req);
+ if (rc)
+ return OPAL_HARDWARE;
+
+ return OPAL_SUCCESS;
+}
diff --git a/roms/skiboot/core/init.c b/roms/skiboot/core/init.c
new file mode 100644
index 000000000..a8bac28a8
--- /dev/null
+++ b/roms/skiboot/core/init.c
@@ -0,0 +1,1469 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * skiboot C entry point
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <psi.h>
+#include <chiptod.h>
+#include <nx.h>
+#include <cpu.h>
+#include <processor.h>
+#include <xscom.h>
+#include <opal.h>
+#include <opal-msg.h>
+#include <elf.h>
+#include <elf-abi.h>
+#include <io.h>
+#include <cec.h>
+#include <device.h>
+#include <pci.h>
+#include <lpc.h>
+#include <i2c.h>
+#include <chip.h>
+#include <interrupts.h>
+#include <mem_region.h>
+#include <trace.h>
+#include <console.h>
+#include <fsi-master.h>
+#include <centaur.h>
+#include <ocmb.h>
+#include <libfdt/libfdt.h>
+#include <timer.h>
+#include <ipmi.h>
+#include <sensor.h>
+#include <xive.h>
+#include <nvram.h>
+#include <vas.h>
+#include <libstb/secureboot.h>
+#include <libstb/trustedboot.h>
+#include <phys-map.h>
+#include <imc.h>
+#include <dts.h>
+#include <dio-p9.h>
+#include <sbe-p9.h>
+#include <debug_descriptor.h>
+#include <occ.h>
+#include <opal-dump.h>
+#include <xscom-p10-regs.h>
+
+enum proc_gen proc_gen;
+unsigned int pcie_max_link_speed;
+bool pci_tracing;
+bool verbose_eeh;
+extern const char version[];
+
+static uint64_t kernel_entry;
+static size_t kernel_size;
+static bool kernel_32bit;
+
+/* We backup the previous vectors here before copying our own */
+static uint8_t old_vectors[EXCEPTION_VECTORS_END];
+
+#ifdef DEBUG
+#define DEBUG_STR "-debug"
+#else
+#define DEBUG_STR ""
+#endif
+
+#ifdef SKIBOOT_GCOV
+void skiboot_gcov_done(void);
+#endif
+
+struct debug_descriptor debug_descriptor = {
+ .eye_catcher = "OPALdbug",
+ .version = CPU_TO_BE32(DEBUG_DESC_VERSION),
+ .state_flags = 0,
+ .memcons_phys = 0, /* cpu_to_be64(&memcons) can't init constant */
+ .trace_mask = 0, /* All traces disabled by default */
+ /* console log level:
+ * high 4 bits in memory, low 4 bits driver (e.g. uart). */
+#ifdef DEBUG
+ .console_log_levels = (PR_TRACE << 4) | PR_DEBUG,
+#else
+ .console_log_levels = (PR_DEBUG << 4) | PR_NOTICE,
+#endif
+};
+
+static void checksum_romem(void);
+
+static bool try_load_elf64_le(struct elf_hdr *header)
+{
+ struct elf64le_hdr *kh = (struct elf64le_hdr *)header;
+ uint64_t load_base = (uint64_t)kh;
+ struct elf64le_phdr *ph;
+ unsigned int i;
+
+ printf("INIT: 64-bit LE kernel discovered\n");
+
+ /* Look for a loadable program header that has our entry in it
+ *
+ * Note that we execute the kernel in-place, we don't actually
+ * obey the load informations in the headers. This is expected
+ * to work for the Linux Kernel because it's a fairly dumb ELF
+ * but it will not work for any ELF binary.
+ */
+ ph = (struct elf64le_phdr *)(load_base + le64_to_cpu(kh->e_phoff));
+ for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
+ if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
+ continue;
+ if (le64_to_cpu(ph->p_vaddr) > le64_to_cpu(kh->e_entry) ||
+ (le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_memsz)) <
+ le64_to_cpu(kh->e_entry))
+ continue;
+
+ /* Get our entry */
+ kernel_entry = le64_to_cpu(kh->e_entry) -
+ le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_offset);
+ break;
+ }
+
+ if (!kernel_entry) {
+ prerror("INIT: Failed to find kernel entry !\n");
+ return false;
+ }
+ kernel_entry += load_base;
+ kernel_32bit = false;
+
+ kernel_size = le64_to_cpu(kh->e_shoff) +
+ ((uint32_t)le16_to_cpu(kh->e_shentsize) *
+ (uint32_t)le16_to_cpu(kh->e_shnum));
+
+ prlog(PR_DEBUG, "INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
+ kernel_entry, kernel_size);
+
+ return true;
+}
+
+static bool try_load_elf64(struct elf_hdr *header)
+{
+ struct elf64be_hdr *kh = (struct elf64be_hdr *)header;
+ struct elf64le_hdr *khle = (struct elf64le_hdr *)header;
+ uint64_t load_base = (uint64_t)kh;
+ struct elf64be_phdr *ph;
+ struct elf64be_shdr *sh;
+ unsigned int i;
+
+ /* Check it's a ppc64 LE ELF */
+ if (khle->ei_ident == ELF_IDENT &&
+ khle->ei_data == ELF_DATA_LSB &&
+ le16_to_cpu(khle->e_machine) == ELF_MACH_PPC64) {
+ return try_load_elf64_le(header);
+ }
+
+ /* Check it's a ppc64 ELF */
+ if (kh->ei_ident != ELF_IDENT ||
+ kh->ei_data != ELF_DATA_MSB ||
+ be16_to_cpu(kh->e_machine) != ELF_MACH_PPC64) {
+ prerror("INIT: Kernel doesn't look like an ppc64 ELF\n");
+ return false;
+ }
+
+ /* Look for a loadable program header that has our entry in it
+ *
+ * Note that we execute the kernel in-place, we don't actually
+ * obey the load informations in the headers. This is expected
+ * to work for the Linux Kernel because it's a fairly dumb ELF
+ * but it will not work for any ELF binary.
+ */
+ ph = (struct elf64be_phdr *)(load_base + be64_to_cpu(kh->e_phoff));
+ for (i = 0; i < be16_to_cpu(kh->e_phnum); i++, ph++) {
+ if (be32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
+ continue;
+ if (be64_to_cpu(ph->p_vaddr) > be64_to_cpu(kh->e_entry) ||
+ (be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_memsz)) <
+ be64_to_cpu(kh->e_entry))
+ continue;
+
+ /* Get our entry */
+ kernel_entry = be64_to_cpu(kh->e_entry) -
+ be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_offset);
+ break;
+ }
+
+ if (!kernel_entry) {
+ prerror("INIT: Failed to find kernel entry !\n");
+ return false;
+ }
+
+ /* For the normal big-endian ELF ABI, the kernel entry points
+ * to a function descriptor in the data section. Linux instead
+ * has it point directly to code. Test whether it is pointing
+ * into an executable section or not to figure this out. Default
+ * to assuming it obeys the ABI.
+ */
+ sh = (struct elf64be_shdr *)(load_base + be64_to_cpu(kh->e_shoff));
+ for (i = 0; i < be16_to_cpu(kh->e_shnum); i++, sh++) {
+ if (be64_to_cpu(sh->sh_addr) <= be64_to_cpu(kh->e_entry) &&
+ (be64_to_cpu(sh->sh_addr) + be64_to_cpu(sh->sh_size)) >
+ be64_to_cpu(kh->e_entry))
+ break;
+ }
+
+ if (i == be16_to_cpu(kh->e_shnum) ||
+ !(be64_to_cpu(sh->sh_flags) & ELF_SFLAGS_X)) {
+ kernel_entry = *(uint64_t *)(kernel_entry + load_base);
+ kernel_entry = kernel_entry -
+ be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_offset);
+ }
+
+ kernel_entry += load_base;
+ kernel_32bit = false;
+
+ kernel_size = be64_to_cpu(kh->e_shoff) +
+ ((uint32_t)be16_to_cpu(kh->e_shentsize) *
+ (uint32_t)be16_to_cpu(kh->e_shnum));
+
+ printf("INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
+ kernel_entry, kernel_size);
+
+ return true;
+}
+
+static bool try_load_elf32_le(struct elf_hdr *header)
+{
+ struct elf32le_hdr *kh = (struct elf32le_hdr *)header;
+ uint64_t load_base = (uint64_t)kh;
+ struct elf32le_phdr *ph;
+ unsigned int i;
+
+ printf("INIT: 32-bit LE kernel discovered\n");
+
+ /* Look for a loadable program header that has our entry in it
+ *
+ * Note that we execute the kernel in-place, we don't actually
+ * obey the load informations in the headers. This is expected
+ * to work for the Linux Kernel because it's a fairly dumb ELF
+ * but it will not work for any ELF binary.
+ */
+ ph = (struct elf32le_phdr *)(load_base + le32_to_cpu(kh->e_phoff));
+ for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
+ if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
+ continue;
+ if (le32_to_cpu(ph->p_vaddr) > le32_to_cpu(kh->e_entry) ||
+ (le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_memsz)) <
+ le32_to_cpu(kh->e_entry))
+ continue;
+
+ /* Get our entry */
+ kernel_entry = le32_to_cpu(kh->e_entry) -
+ le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_offset);
+ break;
+ }
+
+ if (!kernel_entry) {
+ prerror("INIT: Failed to find kernel entry !\n");
+ return false;
+ }
+
+ kernel_entry += load_base;
+ kernel_32bit = true;
+
+ printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
+
+ return true;
+}
+
+static bool try_load_elf32(struct elf_hdr *header)
+{
+ struct elf32be_hdr *kh = (struct elf32be_hdr *)header;
+ struct elf32le_hdr *khle = (struct elf32le_hdr *)header;
+ uint64_t load_base = (uint64_t)kh;
+ struct elf32be_phdr *ph;
+ unsigned int i;
+
+ /* Check it's a ppc32 LE ELF */
+ if (khle->ei_ident == ELF_IDENT &&
+ khle->ei_data == ELF_DATA_LSB &&
+ le16_to_cpu(khle->e_machine) == ELF_MACH_PPC32) {
+ return try_load_elf32_le(header);
+ }
+
+ /* Check it's a ppc32 ELF */
+ if (kh->ei_ident != ELF_IDENT ||
+ kh->ei_data != ELF_DATA_MSB ||
+ be16_to_cpu(kh->e_machine) != ELF_MACH_PPC32) {
+ prerror("INIT: Kernel doesn't look like an ppc32 ELF\n");
+ return false;
+ }
+
+ /* Look for a loadable program header that has our entry in it
+ *
+ * Note that we execute the kernel in-place, we don't actually
+ * obey the load informations in the headers. This is expected
+ * to work for the Linux Kernel because it's a fairly dumb ELF
+ * but it will not work for any ELF binary.
+ */
+ ph = (struct elf32be_phdr *)(load_base + be32_to_cpu(kh->e_phoff));
+ for (i = 0; i < be16_to_cpu(kh->e_phnum); i++, ph++) {
+ if (be32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
+ continue;
+ if (be32_to_cpu(ph->p_vaddr) > be32_to_cpu(kh->e_entry) ||
+ (be32_to_cpu(ph->p_vaddr) + be32_to_cpu(ph->p_memsz)) <
+ be32_to_cpu(kh->e_entry))
+ continue;
+
+ /* Get our entry */
+ kernel_entry = be32_to_cpu(kh->e_entry) -
+ be32_to_cpu(ph->p_vaddr) + be32_to_cpu(ph->p_offset);
+ break;
+ }
+
+ if (!kernel_entry) {
+ prerror("INIT: Failed to find kernel entry !\n");
+ return false;
+ }
+
+ kernel_entry += load_base;
+ kernel_32bit = true;
+
+ printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
+
+ return true;
+}
+
+extern char __builtin_kernel_start[];
+extern char __builtin_kernel_end[];
+extern uint64_t boot_offset;
+
+static size_t initramfs_size;
+
+bool start_preload_kernel(void)
+{
+ int loaded;
+
+ /* Try to load an external kernel payload through the platform hooks */
+ kernel_size = KERNEL_LOAD_SIZE;
+ loaded = start_preload_resource(RESOURCE_ID_KERNEL,
+ RESOURCE_SUBID_NONE,
+ KERNEL_LOAD_BASE,
+ &kernel_size);
+ if (loaded != OPAL_SUCCESS) {
+ printf("INIT: platform start load kernel failed\n");
+ kernel_size = 0;
+ return false;
+ }
+
+ initramfs_size = INITRAMFS_LOAD_SIZE;
+ loaded = start_preload_resource(RESOURCE_ID_INITRAMFS,
+ RESOURCE_SUBID_NONE,
+ INITRAMFS_LOAD_BASE, &initramfs_size);
+ if (loaded != OPAL_SUCCESS) {
+ printf("INIT: platform start load initramfs failed\n");
+ initramfs_size = 0;
+ return false;
+ }
+
+ return true;
+}
+
+static bool load_kernel(void)
+{
+ void *stb_container = NULL;
+ struct elf_hdr *kh;
+ int loaded;
+
+ prlog(PR_NOTICE, "INIT: Waiting for kernel...\n");
+
+ loaded = wait_for_resource_loaded(RESOURCE_ID_KERNEL,
+ RESOURCE_SUBID_NONE);
+
+ if (loaded != OPAL_SUCCESS) {
+ printf("INIT: platform wait for kernel load failed\n");
+ kernel_size = 0;
+ }
+
+ /* Try embedded kernel payload */
+ if (!kernel_size) {
+ kernel_size = __builtin_kernel_end - __builtin_kernel_start;
+ if (kernel_size) {
+ /* Move the built-in kernel up */
+ uint64_t builtin_base =
+ ((uint64_t)__builtin_kernel_start) -
+ SKIBOOT_BASE + boot_offset;
+ printf("Using built-in kernel\n");
+ memmove(KERNEL_LOAD_BASE, (void*)builtin_base,
+ kernel_size);
+ }
+ }
+
+ if (dt_has_node_property(dt_chosen, "kernel-base-address", NULL)) {
+ kernel_entry = dt_prop_get_u64(dt_chosen,
+ "kernel-base-address");
+ prlog(PR_DEBUG, "INIT: Kernel image at 0x%llx\n", kernel_entry);
+ kh = (struct elf_hdr *)kernel_entry;
+ /*
+ * If the kernel is at 0, restore it as it was overwritten
+ * by our vectors.
+ */
+ if (kernel_entry < EXCEPTION_VECTORS_END) {
+ cpu_set_sreset_enable(false);
+ memcpy_null(NULL, old_vectors, EXCEPTION_VECTORS_END);
+ sync_icache();
+ } else {
+ /* Hack for STB in Mambo, assume at least 4kb in mem */
+ if (!kernel_size)
+ kernel_size = SECURE_BOOT_HEADERS_SIZE;
+ if (stb_is_container((void*)kernel_entry, kernel_size)) {
+ stb_container = (void*)kernel_entry;
+ kh = (struct elf_hdr *) (kernel_entry + SECURE_BOOT_HEADERS_SIZE);
+ } else
+ kh = (struct elf_hdr *) (kernel_entry);
+ }
+ } else {
+ if (!kernel_size) {
+ printf("INIT: Assuming kernel at %p\n",
+ KERNEL_LOAD_BASE);
+ /* Hack for STB in Mambo, assume at least 4kb in mem */
+ kernel_size = SECURE_BOOT_HEADERS_SIZE;
+ kernel_entry = (uint64_t)KERNEL_LOAD_BASE;
+ }
+ if (stb_is_container(KERNEL_LOAD_BASE, kernel_size)) {
+ stb_container = KERNEL_LOAD_BASE;
+ kh = (struct elf_hdr *) (KERNEL_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE);
+ } else
+ kh = (struct elf_hdr *) (KERNEL_LOAD_BASE);
+
+ }
+
+ prlog(PR_DEBUG,
+ "INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
+ kernel_size);
+
+ if (kh->ei_ident != ELF_IDENT) {
+ prerror("INIT: ELF header not found. Assuming raw binary.\n");
+ return true;
+ }
+
+ if (kh->ei_class == ELF_CLASS_64) {
+ if (!try_load_elf64(kh))
+ return false;
+ } else if (kh->ei_class == ELF_CLASS_32) {
+ if (!try_load_elf32(kh))
+ return false;
+ } else {
+ prerror("INIT: Neither ELF32 not ELF64 ?\n");
+ return false;
+ }
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ secureboot_verify(RESOURCE_ID_KERNEL,
+ stb_container,
+ SECURE_BOOT_HEADERS_SIZE + kernel_size);
+ trustedboot_measure(RESOURCE_ID_KERNEL,
+ stb_container,
+ SECURE_BOOT_HEADERS_SIZE + kernel_size);
+ }
+
+ return true;
+}
+
+static void load_initramfs(void)
+{
+ uint64_t *initramfs_start;
+ void *stb_container = NULL;
+ int loaded;
+
+ loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS,
+ RESOURCE_SUBID_NONE);
+
+ if (loaded != OPAL_SUCCESS || !initramfs_size)
+ return;
+
+ if (stb_is_container(INITRAMFS_LOAD_BASE, initramfs_size)) {
+ stb_container = INITRAMFS_LOAD_BASE;
+ initramfs_start = INITRAMFS_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE;
+ } else {
+ initramfs_start = INITRAMFS_LOAD_BASE;
+ }
+
+ dt_check_del_prop(dt_chosen, "linux,initrd-start");
+ dt_check_del_prop(dt_chosen, "linux,initrd-end");
+
+ printf("INIT: Initramfs loaded, size: %zu bytes\n", initramfs_size);
+
+ dt_add_property_u64(dt_chosen, "linux,initrd-start",
+ (uint64_t)initramfs_start);
+ dt_add_property_u64(dt_chosen, "linux,initrd-end",
+ (uint64_t)initramfs_start + initramfs_size);
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ secureboot_verify(RESOURCE_ID_INITRAMFS,
+ stb_container,
+ SECURE_BOOT_HEADERS_SIZE + initramfs_size);
+ trustedboot_measure(RESOURCE_ID_INITRAMFS,
+ stb_container,
+ SECURE_BOOT_HEADERS_SIZE + initramfs_size);
+ }
+}
+
+static void cpu_disable_ME_RI_one(void *param __unused)
+{
+ disable_machine_check();
+ mtmsrd(0, 1);
+}
+
+static int64_t cpu_disable_ME_RI_all(void)
+{
+ struct cpu_thread *cpu;
+ struct cpu_job **jobs;
+
+ jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
+ assert(jobs);
+
+ for_each_available_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+ jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME_RI",
+ cpu_disable_ME_RI_one, NULL);
+ }
+
+ /* this cpu */
+ cpu_disable_ME_RI_one(NULL);
+
+ for_each_available_cpu(cpu) {
+ if (jobs[cpu->pir])
+ cpu_wait_job(jobs[cpu->pir], true);
+ }
+
+ free(jobs);
+
+ return OPAL_SUCCESS;
+}
+
+static void *fdt;
+
+void __noreturn load_and_boot_kernel(bool is_reboot)
+{
+ const struct dt_property *memprop;
+ const char *cmdline, *stdoutp;
+ uint64_t mem_top;
+
+ memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem");
+ if (memprop)
+ mem_top = (u64)dt_property_get_cell(memprop, 0) << 32
+ | dt_property_get_cell(memprop, 1);
+ else /* XXX HB hack, might want to calc it */
+ mem_top = 0x40000000;
+
+ op_display(OP_LOG, OP_MOD_INIT, 0x000A);
+
+ /* Load kernel LID */
+ if (!load_kernel()) {
+ op_display(OP_FATAL, OP_MOD_INIT, 1);
+ abort();
+ }
+
+ load_initramfs();
+
+ trustedboot_exit_boot_services();
+
+ ipmi_set_fw_progress_sensor(IPMI_FW_OS_BOOT);
+
+
+ if (!is_reboot) {
+ /* We wait for the nvram read to complete here so we can
+ * grab stuff from there such as the kernel arguments
+ */
+ nvram_wait_for_load();
+
+ if (!occ_sensors_init())
+ dts_sensor_create_nodes(sensor_node);
+
+ } else {
+ /* fdt will be rebuilt */
+ free(fdt);
+ fdt = NULL;
+
+ nvram_reinit();
+ occ_pstates_init();
+ }
+
+ /* Use nvram bootargs over device tree */
+ cmdline = nvram_query_safe("bootargs");
+ if (cmdline) {
+ dt_check_del_prop(dt_chosen, "bootargs");
+ dt_add_property_string(dt_chosen, "bootargs", cmdline);
+ prlog(PR_DEBUG, "INIT: Command line from NVRAM: %s\n",
+ cmdline);
+ }
+
+ op_display(OP_LOG, OP_MOD_INIT, 0x000B);
+
+ add_fast_reboot_dt_entries();
+
+ if (platform.finalise_dt)
+ platform.finalise_dt(is_reboot);
+
+ /* Create the device tree blob to boot OS. */
+ fdt = create_dtb(dt_root, false);
+ if (!fdt) {
+ op_display(OP_FATAL, OP_MOD_INIT, 2);
+ abort();
+ }
+
+ op_display(OP_LOG, OP_MOD_INIT, 0x000C);
+
+ mem_dump_free();
+
+ /* Dump the selected console */
+ stdoutp = dt_prop_get_def(dt_chosen, "linux,stdout-path", NULL);
+ prlog(PR_DEBUG, "INIT: stdout-path: %s\n", stdoutp ? stdoutp : "");
+
+ fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
+
+ /* Check there is something there before we branch to it */
+ if (*(uint32_t *)kernel_entry == 0) {
+ prlog(PR_EMERG, "FATAL: Kernel is zeros, can't execute!\n");
+ assert(0);
+ }
+
+ if (platform.exit)
+ platform.exit();
+
+ /* Take processors out of nap */
+ cpu_set_sreset_enable(false);
+ cpu_set_ipi_enable(false);
+
+ printf("INIT: Starting kernel at 0x%llx, fdt at %p %u bytes\n",
+ kernel_entry, fdt, fdt_totalsize(fdt));
+
+ /* Disable machine checks on all */
+ cpu_disable_ME_RI_all();
+
+ patch_traps(false);
+ cpu_set_hile_mode(false); /* Clear HILE on all CPUs */
+
+ /* init MPIPL */
+ if (!is_reboot)
+ opal_mpipl_init();
+
+ checksum_romem();
+
+ debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
+
+ cpu_give_self_os();
+
+ if (kernel_32bit)
+ start_kernel32(kernel_entry, fdt, mem_top);
+ start_kernel(kernel_entry, fdt, mem_top);
+}
+
+static void storage_keys_fixup(void)
+{
+ struct dt_node *cpus, *n;
+
+ cpus = dt_find_by_path(dt_root, "/cpus");
+ assert(cpus);
+
+ if (proc_gen == proc_gen_unknown)
+ return;
+
+ dt_for_each_child(cpus, n) {
+ /* There may be cache nodes in /cpus. */
+ if (!dt_has_node_property(n, "device_type", "cpu") ||
+ dt_has_node_property(n, "ibm,processor-storage-keys", NULL))
+ continue;
+
+ /*
+ * skiboot supports p8 & p9, both of which support the IAMR, and
+ * both of which support 32 keys. So advertise 32 keys for data
+ * accesses and 32 for instruction accesses.
+ */
+ dt_add_property_cells(n, "ibm,processor-storage-keys", 32, 32);
+ }
+}
+
+static void dt_fixups(void)
+{
+ struct dt_node *n;
+ struct dt_node *primary_lpc = NULL;
+
+ /* lpc node missing #address/size cells. Also pick one as
+ * primary for now (TBD: How to convey that from HB)
+ */
+ dt_for_each_compatible(dt_root, n, "ibm,power8-lpc") {
+ if (!primary_lpc || dt_has_node_property(n, "primary", NULL))
+ primary_lpc = n;
+ if (dt_has_node_property(n, "#address-cells", NULL))
+ break;
+ dt_add_property_cells(n, "#address-cells", 2);
+ dt_add_property_cells(n, "#size-cells", 1);
+ dt_add_property_strings(n, "status", "ok");
+ }
+
+ /* Missing "primary" property in LPC bus */
+ if (primary_lpc && !dt_has_node_property(primary_lpc, "primary", NULL))
+ dt_add_property(primary_lpc, "primary", NULL, 0);
+
+ /* Missing "scom-controller" */
+ dt_for_each_compatible(dt_root, n, "ibm,xscom") {
+ if (!dt_has_node_property(n, "scom-controller", NULL))
+ dt_add_property(n, "scom-controller", NULL, 0);
+ }
+
+ storage_keys_fixup();
+}
+
+static void add_arch_vector(void)
+{
+ /**
+ * vec5 = a PVR-list : Number-of-option-vectors :
+ * option-vectors[Number-of-option-vectors + 1]
+ */
+ uint8_t vec5[] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00};
+
+ if (dt_has_node_property(dt_chosen, "ibm,architecture-vec-5", NULL))
+ return;
+
+ dt_add_property(dt_chosen, "ibm,architecture-vec-5",
+ vec5, sizeof(vec5));
+}
+
+static void dt_init_misc(void)
+{
+ /* Check if there's a /chosen node, if not, add one */
+ dt_chosen = dt_find_by_path(dt_root, "/chosen");
+ if (!dt_chosen)
+ dt_chosen = dt_new(dt_root, "chosen");
+ assert(dt_chosen);
+
+ /* Add IBM architecture vectors if needed */
+ add_arch_vector();
+
+ /* Add the "OPAL virtual ICS*/
+ add_ics_node();
+
+ /* Additional fixups. TODO: Move into platform */
+ dt_fixups();
+}
+
+static u8 console_get_level(const char *s)
+{
+ if (strcmp(s, "emerg") == 0)
+ return PR_EMERG;
+ if (strcmp(s, "alert") == 0)
+ return PR_ALERT;
+ if (strcmp(s, "crit") == 0)
+ return PR_CRIT;
+ if (strcmp(s, "err") == 0)
+ return PR_ERR;
+ if (strcmp(s, "warning") == 0)
+ return PR_WARNING;
+ if (strcmp(s, "notice") == 0)
+ return PR_NOTICE;
+ if (strcmp(s, "printf") == 0)
+ return PR_PRINTF;
+ if (strcmp(s, "info") == 0)
+ return PR_INFO;
+ if (strcmp(s, "debug") == 0)
+ return PR_DEBUG;
+ if (strcmp(s, "trace") == 0)
+ return PR_TRACE;
+ if (strcmp(s, "insane") == 0)
+ return PR_INSANE;
+ /* Assume it's a number instead */
+ return atoi(s);
+}
+
+static void console_log_level(void)
+{
+ const char *s;
+ u8 level;
+
+ /* console log level:
+ * high 4 bits in memory, low 4 bits driver (e.g. uart). */
+ s = nvram_query_safe("log-level-driver");
+ if (s) {
+ level = console_get_level(s);
+ debug_descriptor.console_log_levels =
+ (debug_descriptor.console_log_levels & 0xf0 ) |
+ (level & 0x0f);
+ prlog(PR_NOTICE, "console: Setting driver log level to %i\n",
+ level & 0x0f);
+ }
+ s = nvram_query_safe("log-level-memory");
+ if (s) {
+ level = console_get_level(s);
+ debug_descriptor.console_log_levels =
+ (debug_descriptor.console_log_levels & 0x0f ) |
+ ((level & 0x0f) << 4);
+ prlog(PR_NOTICE, "console: Setting memory log level to %i\n",
+ level & 0x0f);
+ }
+}
+
+typedef void (*ctorcall_t)(void);
+
+static void __nomcount do_ctors(void)
+{
+ extern ctorcall_t __ctors_start[], __ctors_end[];
+ ctorcall_t *call;
+
+ for (call = __ctors_start; call < __ctors_end; call++)
+ (*call)();
+}
+
+#ifdef ELF_ABI_v2
+static void setup_branch_null_catcher(void)
+{
+ asm volatile( \
+ ".section .rodata" "\n\t" \
+ "3: .string \"branch to NULL\"" "\n\t" \
+ ".previous" "\n\t" \
+ ".section .trap_table,\"aw\"" "\n\t" \
+ ".llong 0" "\n\t" \
+ ".llong 3b" "\n\t" \
+ ".previous" "\n\t" \
+ );
+}
+#else
+static void branch_null(void)
+{
+ assert(0);
+}
+
+static void setup_branch_null_catcher(void)
+{
+ void (*bn)(void) = branch_null;
+
+ /*
+ * FIXME: This copies the function descriptor (16 bytes) for
+ * ABI v1 (ie. big endian). This will be broken if we ever
+ * move to ABI v2 (ie little endian)
+ */
+ memcpy_null((void *)0, bn, 16);
+}
+#endif
+
+void copy_sreset_vector(void)
+{
+ uint32_t *src, *dst;
+
+ /* Copy the reset code over the entry point. */
+ src = &reset_patch_start;
+ dst = (uint32_t *)0x100;
+ while(src < &reset_patch_end)
+ *(dst++) = *(src++);
+ sync_icache();
+}
+
+void copy_sreset_vector_fast_reboot(void)
+{
+ uint32_t *src, *dst;
+
+ /* Copy the reset code over the entry point. */
+ src = &reset_fast_reboot_patch_start;
+ dst = (uint32_t *)0x100;
+ while(src < &reset_fast_reboot_patch_end)
+ *(dst++) = *(src++);
+ sync_icache();
+}
+
+void copy_exception_vectors(void)
+{
+ /* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as
+ * this is the boot flag used by CPUs still potentially entering
+ * skiboot.
+ */
+ memcpy((void *)0x100, (void *)(SKIBOOT_BASE + 0x100),
+ EXCEPTION_VECTORS_END - 0x100);
+ sync_icache();
+}
+
+/*
+ * When skiboot owns the exception vectors, patch in 'trap' for assert fails.
+ * Otherwise use assert_fail()
+ */
+void patch_traps(bool enable)
+{
+ struct trap_table_entry *tte;
+
+ for (tte = __trap_table_start; tte < __trap_table_end; tte++) {
+ uint32_t *insn;
+
+ insn = (uint32_t *)tte->address;
+ if (enable) {
+ *insn = PPC_INST_TRAP;
+ } else {
+ *insn = PPC_INST_NOP;
+ }
+ }
+
+ sync_icache();
+}
+
+static void per_thread_sanity_checks(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ /**
+ * @fwts-label NonZeroHRMOR
+ * @fwts-advice The contents of the hypervisor real mode offset register
+ * (HRMOR) is bitwise orded with the address of any hypervisor real mode
+ * (i.e Skiboot) memory accesses. Skiboot does not support operating
+ * with a non-zero HRMOR and setting it will break some things (e.g
+ * XSCOMs) in hard-to-debug ways.
+ */
+ assert(mfspr(SPR_HRMOR) == 0);
+
+ /**
+ * @fwts-label UnknownSecondary
+ * @fwts-advice The boot CPU attampted to call in a secondary thread
+ * without initialising the corresponding cpu_thread structure. This may
+ * happen if the HDAT or devicetree reports too few threads or cores for
+ * this processor.
+ */
+ assert(cpu->state != cpu_state_no_cpu);
+}
+
+void pci_nvram_init(void)
+{
+ const char *nvram_speed;
+
+ verbose_eeh = nvram_query_eq_safe("pci-eeh-verbose", "true");
+ if (verbose_eeh)
+ prlog(PR_INFO, "PHB: Verbose EEH enabled\n");
+
+ pcie_max_link_speed = 0;
+
+ nvram_speed = nvram_query_dangerous("pcie-max-link-speed");
+ if (nvram_speed) {
+ pcie_max_link_speed = atoi(nvram_speed);
+ prlog(PR_NOTICE, "PHB: NVRAM set max link speed to GEN%i\n",
+ pcie_max_link_speed);
+ }
+
+ pci_tracing = nvram_query_eq_safe("pci-tracing", "true");
+}
+
+static uint32_t mem_csum(void *_p, void *_e)
+{
+ size_t len = _e - _p;
+ uint32_t *p = _p;
+ uint32_t v1 = 0, v2 = 0;
+ uint32_t csum;
+ unsigned int i;
+
+ for (i = 0; i < len; i += 4) {
+ uint32_t v = *p++;
+ v1 += v;
+ v2 += v1;
+ }
+
+ csum = v1 ^ v2;
+
+ return csum;
+}
+
+static uint32_t romem_csum;
+
+static void checksum_romem(void)
+{
+ uint32_t csum;
+
+ romem_csum = 0;
+ if (chip_quirk(QUIRK_SLOW_SIM))
+ return;
+
+ csum = mem_csum(_start, _head_end);
+ romem_csum ^= csum;
+
+ csum = mem_csum(_stext, _romem_end);
+ romem_csum ^= csum;
+
+ csum = mem_csum(__builtin_kernel_start, __builtin_kernel_end);
+ romem_csum ^= csum;
+}
+
+bool verify_romem(void)
+{
+ uint32_t old = romem_csum;
+ checksum_romem();
+ if (old != romem_csum) {
+ romem_csum = old;
+ prlog(PR_NOTICE, "OPAL checksums did not match\n");
+ return false;
+ }
+ return true;
+}
+
+static void mask_pc_system_xstop(void)
+{
+ struct cpu_thread *cpu;
+ uint32_t chip_id, core_id;
+ int rc;
+
+ if (proc_gen != proc_gen_p10)
+ return;
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+ return;
+
+ /*
+ * On P10 Mask PC system checkstop (bit 28). This is needed
+ * for HW570622. We keep processor recovery disabled via
+ * HID[5] and mask the checkstop that it can cause. CME does
+ * the recovery handling for us.
+ */
+ for_each_cpu(cpu) {
+ chip_id = cpu->chip_id;
+ core_id = pir_to_core_id(cpu->pir);
+
+ rc = xscom_write(chip_id,
+ XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIRMASK_OR),
+ PPC_BIT(28));
+ if (rc)
+ prerror("Error setting FIR MASK rc:%d on PIR:%x\n",
+ rc, cpu->pir);
+ }
+}
+
+
+/* Called from head.S, thus no prototype. */
+void __noreturn __nomcount main_cpu_entry(const void *fdt);
+
+void __noreturn __nomcount main_cpu_entry(const void *fdt)
+{
+ /*
+ * WARNING: At this point. the timebases have
+ * *not* been synchronized yet. Do not use any timebase
+ * related functions for timeouts etc... unless you can cope
+ * with the speed being some random core clock divider and
+ * the value jumping backward when the synchronization actually
+ * happens (in chiptod_init() below).
+ *
+ * Also the current cpu_thread() struct is not initialized
+ * either so we need to clear it out first thing first (without
+ * putting any other useful info in there jus yet) otherwise
+ * printf an locks are going to play funny games with "con_suspend"
+ */
+ pre_init_boot_cpu();
+
+ /*
+ * Point to our mem console
+ */
+ debug_descriptor.memcons_phys = cpu_to_be64((uint64_t)&memcons);
+
+ /*
+ * Before first printk, ensure console buffer is clear or
+ * reading tools might think it has wrapped
+ */
+ clear_console();
+
+ /* Backup previous vectors as this could contain a kernel
+ * image.
+ */
+ memcpy_null(old_vectors, NULL, EXCEPTION_VECTORS_END);
+
+ /*
+ * Some boot firmwares enter OPAL with MSR[ME]=1, as they presumably
+ * handle machine checks until we take over. As we overwrite the
+ * previous exception vectors with our own handlers, disable MSR[ME].
+ * This could be done atomically by patching in a branch then patching
+ * it out last, but that's a lot of effort.
+ */
+ disable_machine_check();
+
+ /* Copy all vectors down to 0 */
+ copy_exception_vectors();
+
+ /* Enable trap based asserts */
+ patch_traps(true);
+
+ /*
+ * Enable MSR[ME] bit so we can take MCEs. We don't currently
+ * recover, but we print some useful information.
+ */
+ enable_machine_check();
+ mtmsrd(MSR_RI, 1);
+
+ /* Setup a NULL catcher to catch accidental NULL ptr calls */
+ setup_branch_null_catcher();
+
+ /* Call library constructors */
+ do_ctors();
+
+ prlog(PR_NOTICE, "OPAL %s%s starting...\n", version, DEBUG_STR);
+
+ prlog(PR_DEBUG, "initial console log level: memory %d, driver %d\n",
+ (debug_descriptor.console_log_levels >> 4),
+ (debug_descriptor.console_log_levels & 0x0f));
+ prlog(PR_TRACE, "OPAL is Powered By Linked-List Technology.\n");
+
+#ifdef SKIBOOT_GCOV
+ skiboot_gcov_done();
+#endif
+
+ /* Initialize boot cpu's cpu_thread struct */
+ init_boot_cpu();
+
+ /* Now locks can be used */
+ init_locks();
+
+ /* Create the OPAL call table early on, entries can be overridden
+ * later on (FSP console code for example)
+ */
+ opal_table_init();
+
+ /* Init the physical map table so we can start mapping things */
+ phys_map_init(mfspr(SPR_PVR));
+
+ /*
+ * If we are coming in with a flat device-tree, we expand it
+ * now. Else look for HDAT and create a device-tree from them
+ *
+ * Hack alert: When entering via the OPAL entry point, fdt
+ * is set to -1, we record that and pass it to parse_hdat
+ */
+
+ dt_root = dt_new_root("");
+
+ if (fdt == (void *)-1ul) {
+ if (parse_hdat(true) < 0)
+ abort();
+ } else if (fdt == NULL) {
+ if (parse_hdat(false) < 0)
+ abort();
+ } else {
+ dt_expand(fdt);
+ }
+ dt_add_cpufeatures(dt_root);
+
+ /* Now that we have a full devicetree, verify that we aren't on fire. */
+ per_thread_sanity_checks();
+
+ /*
+ * From there, we follow a fairly strict initialization order.
+ *
+ * First we need to build up our chip data structures and initialize
+ * XSCOM which will be needed for a number of susbequent things.
+ *
+ * We want XSCOM available as early as the platform probe in case the
+ * probe requires some HW accesses.
+ *
+ * We also initialize the FSI master at that point in case we need
+ * to access chips via that path early on.
+ */
+ init_chips();
+
+ xscom_init();
+ mfsi_init();
+
+ /*
+ * Direct controls facilities provides some controls over CPUs
+ * using scoms.
+ */
+ direct_controls_init();
+
+ /*
+ * Put various bits & pieces in device-tree that might not
+ * already be there such as the /chosen node if not there yet,
+ * the ICS node, etc... This can potentially use XSCOM
+ */
+ dt_init_misc();
+
+ /*
+ * Initialize LPC (P8 and beyond) so we can get to UART, BMC and
+ * other system controller. This is done before probe_platform
+ * so that the platform probing code can access an external
+ * BMC if needed.
+ */
+ lpc_init();
+
+ /*
+ * This should be done before mem_region_init, so the stack
+ * region length can be set according to the maximum PIR.
+ */
+ init_cpu_max_pir();
+
+ /*
+ * Now, we init our memory map from the device-tree, and immediately
+ * reserve areas which we know might contain data coming from
+ * HostBoot. We need to do these things before we start doing
+ * allocations outside of our heap, such as chip local allocs,
+ * otherwise we might clobber those data.
+ */
+ mem_region_init();
+
+ /*
+ * Reserve memory required to capture OPAL dump. This should be done
+ * immediately after mem_region_init to avoid any clash with local
+ * memory allocation.
+ */
+ opal_mpipl_reserve_mem();
+
+ /* Reserve HOMER and OCC area */
+ homer_init();
+
+ /* Initialize the rest of the cpu thread structs */
+ init_all_cpus();
+ if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10)
+ cpu_set_ipi_enable(true);
+
+ /* Once all CPU are up apply this workaround */
+ mask_pc_system_xstop();
+
+ /* Add the /opal node to the device-tree */
+ add_opal_node();
+
+ /*
+ * We probe the platform now. This means the platform probe gets
+ * the opportunity to reserve additional areas of memory if needed.
+ *
+ * Note: Timebases still not synchronized.
+ */
+ probe_platform();
+
+ /* Allocate our split trace buffers now. Depends add_opal_node() */
+ init_trace_buffers();
+
+ /* On P8, get the ICPs and make sure they are in a sane state */
+ init_interrupts();
+ if (proc_gen == proc_gen_p8)
+ cpu_set_ipi_enable(true);
+
+ /* On P9 and P10, initialize XIVE */
+ if (proc_gen == proc_gen_p9)
+ init_xive();
+ else if (proc_gen == proc_gen_p10)
+ xive2_init();
+
+ /* Grab centaurs from device-tree if present (only on FSP-less) */
+ centaur_init();
+
+ /* initialize ocmb scom-controller */
+ ocmb_init();
+
+ /* Initialize PSI (depends on probe_platform being called) */
+ psi_init();
+
+ /* Initialize/enable LPC interrupts. This must be done after the
+ * PSI interface has been initialized since it serves as an interrupt
+ * source for LPC interrupts.
+ */
+ lpc_init_interrupts();
+
+ /* Call in secondary CPUs */
+ cpu_bringup();
+
+ /* We can now overwrite the 0x100 vector as we are no longer being
+ * entered there.
+ */
+ copy_sreset_vector();
+
+ /* We can now do NAP mode */
+ cpu_set_sreset_enable(true);
+
+ /*
+ * Synchronize time bases. Prior to chiptod_init() the timebase
+ * is free-running at a frequency based on the core clock rather
+ * than being synchronised to the ChipTOD network. This means
+ * that the timestamps in early boot might be a little off compared
+ * to wall clock time.
+ */
+ chiptod_init();
+
+ /* Initialize P9 DIO */
+ p9_dio_init();
+
+ /*
+ * SBE uses TB value for scheduling timer. Hence init after
+ * chiptod init
+ */
+ p9_sbe_init();
+
+ /* Initialize i2c */
+ p8_i2c_init();
+
+ /* Register routine to dispatch and read sensors */
+ sensor_init();
+
+ /*
+ * Initialize the opal messaging before platform.init as we are
+ * getting request to queue occ load opal message when host services
+ * got load occ request from FSP
+ */
+ opal_init_msg();
+
+ /*
+ * We have initialized the basic HW, we can now call into the
+ * platform to perform subsequent inits, such as establishing
+ * communication with the FSP or starting IPMI.
+ */
+ if (platform.init)
+ platform.init();
+
+ /* Read in NVRAM and set it up */
+ nvram_init();
+
+ /* Set the console level */
+ console_log_level();
+
+ /* Secure/Trusted Boot init. We look for /ibm,secureboot in DT */
+ secureboot_init();
+ trustedboot_init();
+
+ /* Secure variables init, handled by platform */
+ if (platform.secvar_init && is_fw_secureboot())
+ platform.secvar_init();
+
+ /*
+ * BMC platforms load version information from flash after
+ * secure/trustedboot init.
+ */
+ if (platform.bmc)
+ flash_fw_version_preload();
+
+ /* preload the IMC catalog dtb */
+ imc_catalog_preload();
+
+ /* Install the OPAL Console handlers */
+ init_opal_console();
+
+ /*
+ * Some platforms set a flag to wait for SBE validation to be
+ * performed by the BMC. If this occurs it leaves the SBE in a
+ * bad state and the system will reboot at this point.
+ */
+ if (platform.seeprom_update)
+ platform.seeprom_update();
+
+ /* Init SLW related stuff, including fastsleep */
+ slw_init();
+
+ op_display(OP_LOG, OP_MOD_INIT, 0x0002);
+
+ /*
+ * On some POWER9 BMC systems, we need to initialise the OCC
+ * before the NPU to facilitate NVLink/OpenCAPI presence
+ * detection, so we set it up as early as possible. On FSP
+ * systems, Hostboot starts booting the OCC later, so we delay
+ * OCC initialisation as late as possible to give it the
+ * maximum time to boot up.
+ */
+ if (platform.bmc)
+ occ_pstates_init();
+
+ pci_nvram_init();
+
+ preload_capp_ucode();
+ start_preload_kernel();
+
+ /* Catalog decompression routine */
+ imc_decompress_catalog();
+
+ /* Virtual Accelerator Switchboard */
+ vas_init();
+
+ /* NX init */
+ nx_init();
+
+ /* Probe PHB3 on P8 */
+ probe_phb3();
+
+ /* Probe PHB4 on P9 and PHB5 on P10 */
+ probe_phb4();
+
+ /* Probe NPUs */
+ probe_npu();
+ probe_npu2();
+ probe_npu3();
+
+ /* Initialize PCI */
+ pci_init_slots();
+
+ /* Add OPAL timer related properties */
+ late_init_timers();
+
+ /* Setup ibm,firmware-versions if able */
+ if (platform.bmc) {
+ flash_dt_add_fw_version();
+ ipmi_dt_add_bmc_info();
+ }
+
+ ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
+
+ /*
+ * These last few things must be done as late as possible
+ * because they rely on various other things having been setup,
+ * for example, add_opal_interrupts() will add all the interrupt
+ * sources that are going to the firmware. We can't add a new one
+ * after that call. Similarly, the mem_region calls will construct
+ * the reserve maps in the DT so we shouldn't affect the memory
+ * regions after that
+ */
+
+ /* Create the LPC bus interrupt-map on P9 */
+ lpc_finalize_interrupts();
+
+ /* Add the list of interrupts going to OPAL */
+ add_opal_interrupts();
+
+ /* Init In-Memory Collection related stuff (load the IMC dtb into memory) */
+ imc_init();
+
+ /* Disable protected execution facility in BML */
+ cpu_disable_pef();
+
+ /* export the trace buffers */
+ trace_add_dt_props();
+
+ /* Now release parts of memory nodes we haven't used ourselves... */
+ mem_region_release_unused();
+
+ /* ... and add remaining reservations to the DT */
+ mem_region_add_dt_reserved();
+
+ /*
+ * Update /ibm,secureboot/ibm,cvc/memory-region to point to
+ * /reserved-memory/secure-crypt-algo-code instead of
+ * /ibm,hostboot/reserved-memory/secure-crypt-algo-code.
+ */
+ cvc_update_reserved_memory_phandle();
+
+ prd_register_reserved_memory();
+
+ load_and_boot_kernel(false);
+}
+
+void __noreturn __secondary_cpu_entry(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ /* Secondary CPU called in */
+ cpu_callin(cpu);
+
+ enable_machine_check();
+ mtmsrd(MSR_RI, 1);
+
+ /* Some XIVE setup */
+ if (proc_gen == proc_gen_p9)
+ xive_cpu_callin(cpu);
+ else if (proc_gen == proc_gen_p10)
+ xive2_cpu_callin(cpu);
+
+ /* Wait for work to do */
+ while(true) {
+ if (cpu_check_jobs(cpu))
+ cpu_process_jobs();
+ else
+ cpu_idle_job();
+ }
+}
+
+/* Called from head.S, thus no prototype. */
+void __noreturn __nomcount secondary_cpu_entry(void);
+
+void __noreturn __nomcount secondary_cpu_entry(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ per_thread_sanity_checks();
+
+ prlog(PR_DEBUG, "INIT: CPU PIR 0x%04x called in\n", cpu->pir);
+
+ __secondary_cpu_entry();
+}
diff --git a/roms/skiboot/core/interrupts.c b/roms/skiboot/core/interrupts.c
new file mode 100644
index 000000000..0a617d385
--- /dev/null
+++ b/roms/skiboot/core/interrupts.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Excuse me, you do work for me now?
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <chip.h>
+#include <cpu.h>
+#include <fsp.h>
+#include <interrupts.h>
+#include <opal.h>
+#include <io.h>
+#include <cec.h>
+#include <device.h>
+#include <ccan/str/str.h>
+#include <timer.h>
+#include <sbe-p8.h>
+#include <sbe-p9.h>
+
+/* ICP registers */
+#define ICP_XIRR 0x4 /* 32-bit access */
+#define ICP_CPPR 0x4 /* 8-bit access */
+#define ICP_MFRR 0xc /* 8-bit access */
+
+static LIST_HEAD(irq_sources);
+static LIST_HEAD(irq_sources2);
+static struct lock irq_lock = LOCK_UNLOCKED;
+
+void __register_irq_source(struct irq_source *is, bool secondary)
+{
+ struct irq_source *is1;
+ struct list_head *list = secondary ? &irq_sources2 : &irq_sources;
+
+ prlog(PR_DEBUG, "IRQ: Registering %04x..%04x ops @%p (data %p)%s\n",
+ is->start, is->end - 1, is->ops, is->data,
+ secondary ? " [secondary]" : "");
+
+ lock(&irq_lock);
+ list_for_each(list, is1, link) {
+ if (is->end > is1->start && is->start < is1->end) {
+ prerror("register IRQ source overlap !\n");
+ prerror(" new: %x..%x old: %x..%x\n",
+ is->start, is->end - 1,
+ is1->start, is1->end - 1);
+ assert(0);
+ }
+ }
+ list_add_tail(list, &is->link);
+ unlock(&irq_lock);
+}
+
+void register_irq_source(const struct irq_source_ops *ops, void *data,
+ uint32_t start, uint32_t count)
+{
+ struct irq_source *is;
+
+ is = zalloc(sizeof(struct irq_source));
+ assert(is);
+ is->start = start;
+ is->end = start + count;
+ is->ops = ops;
+ is->data = data;
+
+ __register_irq_source(is, false);
+}
+
+void unregister_irq_source(uint32_t start, uint32_t count)
+{
+ struct irq_source *is;
+
+ /* Note: We currently only unregister from the primary sources */
+ lock(&irq_lock);
+ list_for_each(&irq_sources, is, link) {
+ if (start >= is->start && start < is->end) {
+ if (start != is->start ||
+ count != (is->end - is->start)) {
+ prerror("unregister IRQ source mismatch !\n");
+ prerror("start:%x, count: %x match: %x..%x\n",
+ start, count, is->start, is->end);
+ assert(0);
+ }
+ list_del(&is->link);
+ unlock(&irq_lock);
+ /* XXX Add synchronize / RCU */
+ free(is);
+ return;
+ }
+ }
+ unlock(&irq_lock);
+ prerror("unregister IRQ source not found !\n");
+ prerror("start:%x, count: %x\n", start, count);
+ assert(0);
+}
+
+struct irq_source *irq_find_source(uint32_t isn)
+{
+ struct irq_source *is;
+
+ lock(&irq_lock);
+ /*
+ * XXX This really needs some kind of caching !
+ */
+ list_for_each(&irq_sources, is, link) {
+ if (isn >= is->start && isn < is->end) {
+ unlock(&irq_lock);
+ return is;
+ }
+ }
+ list_for_each(&irq_sources2, is, link) {
+ if (isn >= is->start && isn < is->end) {
+ unlock(&irq_lock);
+ return is;
+ }
+ }
+ unlock(&irq_lock);
+
+ return NULL;
+}
+
+void irq_for_each_source(void (*cb)(struct irq_source *, void *), void *data)
+{
+ struct irq_source *is;
+
+ lock(&irq_lock);
+ list_for_each(&irq_sources, is, link)
+ cb(is, data);
+ list_for_each(&irq_sources2, is, link)
+ cb(is, data);
+ unlock(&irq_lock);
+}
+
+/*
+ * This takes a 6-bit chip id and returns a 20 bit value representing
+ * the PSI interrupt. This includes all the fields above, ie, is a
+ * global interrupt number.
+ *
+ * For P8, this returns the base of the 8-interrupts block for PSI
+ */
+uint32_t get_psi_interrupt(uint32_t chip_id)
+{
+ uint32_t irq;
+
+ switch(proc_gen) {
+ case proc_gen_p8:
+ irq = p8_chip_irq_block_base(chip_id, P8_IRQ_BLOCK_MISC);
+ irq += P8_IRQ_MISC_PSI_BASE;
+ break;
+ default:
+ assert(false);
+ };
+
+ return irq;
+}
+
+
+struct dt_node *add_ics_node(void)
+{
+ struct dt_node *ics = dt_new_addr(dt_root, "interrupt-controller", 0);
+ bool has_xive;
+
+ if (!ics)
+ return NULL;
+
+ has_xive = proc_gen >= proc_gen_p9;
+
+ dt_add_property_cells(ics, "reg", 0, 0, 0, 0);
+ dt_add_property_strings(ics, "compatible",
+ has_xive ? "ibm,opal-xive-vc" : "IBM,ppc-xics",
+ "IBM,opal-xics");
+ dt_add_property_cells(ics, "#address-cells", 0);
+ dt_add_property_cells(ics, "#interrupt-cells", 2);
+ dt_add_property_string(ics, "device_type",
+ "PowerPC-Interrupt-Source-Controller");
+ dt_add_property(ics, "interrupt-controller", NULL, 0);
+
+ return ics;
+}
+
+uint32_t get_ics_phandle(void)
+{
+ struct dt_node *i;
+
+ for (i = dt_first(dt_root); i; i = dt_next(dt_root, i)) {
+ if (streq(i->name, "interrupt-controller@0")) {
+ return i->phandle;
+ }
+ }
+ abort();
+}
+
+void add_opal_interrupts(void)
+{
+ struct irq_source *is;
+ unsigned int i, ns, tns = 0, count = 0;
+ uint32_t isn;
+ __be32 *irqs = NULL;
+ char *names = NULL;
+
+ lock(&irq_lock);
+ list_for_each(&irq_sources, is, link) {
+ /*
+ * Don't even consider sources that don't have an interrupts
+ * callback or don't have an attributes one.
+ */
+ if (!is->ops->interrupt || !is->ops->attributes)
+ continue;
+ for (isn = is->start; isn < is->end; isn++) {
+ uint64_t attr = is->ops->attributes(is, isn);
+ uint32_t iflags;
+ char *name;
+
+ if (attr & IRQ_ATTR_TARGET_LINUX)
+ continue;
+ if (attr & IRQ_ATTR_TYPE_MSI)
+ iflags = 0;
+ else
+ iflags = 1;
+ name = is->ops->name ? is->ops->name(is, isn) : NULL;
+ ns = name ? strlen(name) : 0;
+ prlog(PR_DEBUG, "irq %x name: %s %s\n",
+ isn,
+ name ? name : "<null>",
+ iflags ? "[level]" : "[edge]");
+ names = realloc(names, tns + ns + 1);
+ if (name) {
+ strcpy(names + tns, name);
+ tns += (ns + 1);
+ free(name);
+ } else
+ names[tns++] = 0;
+ i = count++;
+ irqs = realloc(irqs, 8 * count);
+ irqs[i*2] = cpu_to_be32(isn);
+ irqs[i*2+1] = cpu_to_be32(iflags);
+ }
+ }
+ unlock(&irq_lock);
+
+ /* First create the standard "interrupts" property and the
+ * corresponding names property
+ */
+ dt_add_property_cells(opal_node, "interrupt-parent", get_ics_phandle());
+ dt_add_property(opal_node, "interrupts", irqs, count * 8);
+ dt_add_property(opal_node, "opal-interrupts-names", names, tns);
+ dt_add_property(opal_node, "interrupt-names", names, tns);
+
+ /* Now "reduce" it to the old style "opal-interrupts" property
+ * format by stripping out the flags. The "opal-interrupts"
+ * property has one cell per interrupt, it is not a standard
+ * "interrupt" property.
+ *
+ * Note: Even if empty, create it, otherwise some bogus error
+ * handling in Linux can cause problems.
+ */
+ for (i = 1; i < count; i++)
+ irqs[i] = irqs[i * 2];
+ dt_add_property(opal_node, "opal-interrupts", irqs, count * 4);
+
+ free(irqs);
+ free(names);
+}
+
+/*
+ * This is called at init time (and one fast reboot) to sanitize the
+ * ICP. We set our priority to 0 to mask all interrupts and make sure
+ * no IPI is on the way. This is also called on wakeup from nap
+ */
+void reset_cpu_icp(void)
+{
+ void *icp = this_cpu()->icp_regs;
+
+ if (!icp)
+ return;
+
+ /* Dummy fetch */
+ in_be32(icp + ICP_XIRR);
+
+ /* Clear pending IPIs */
+ out_8(icp + ICP_MFRR, 0xff);
+
+ /* Set priority to max, ignore all incoming interrupts, EOI IPIs */
+ out_be32(icp + ICP_XIRR, 2);
+}
+
+/* Used by the PSI code to send an EOI during reset. This will also
+ * set the CPPR to 0 which should already be the case anyway
+ */
+void icp_send_eoi(uint32_t interrupt)
+{
+ void *icp = this_cpu()->icp_regs;
+
+ if (!icp)
+ return;
+
+ /* Set priority to max, ignore all incoming interrupts */
+ out_be32(icp + ICP_XIRR, interrupt & 0xffffff);
+}
+
+/* This is called before winkle or nap, we clear pending IPIs and
+ * set our priority to 1 to mask all but the IPI.
+ */
+void icp_prep_for_pm(void)
+{
+ void *icp = this_cpu()->icp_regs;
+
+ if (!icp)
+ return;
+
+ /* Clear pending IPIs */
+ out_8(icp + ICP_MFRR, 0xff);
+
+ /* Set priority to 1, ignore all incoming interrupts, EOI IPIs */
+ out_be32(icp + ICP_XIRR, 0x01000002);
+}
+
+/* This is called to wakeup somebody from winkle */
+void icp_kick_cpu(struct cpu_thread *cpu)
+{
+ void *icp = cpu->icp_regs;
+
+ if (!icp)
+ return;
+
+ /* Send high priority IPI */
+ out_8(icp + ICP_MFRR, 0);
+}
+
+/* Returns the number of chip ID bits used for interrupt numbers */
+static uint32_t p8_chip_id_bits(uint32_t chip)
+{
+ struct proc_chip *proc_chip = get_chip(chip);
+
+ assert(proc_chip);
+ switch (proc_chip->type) {
+ case PROC_CHIP_P8_MURANO:
+ case PROC_CHIP_P8_VENICE:
+ return 6;
+ break;
+
+ case PROC_CHIP_P8_NAPLES:
+ return 5;
+ break;
+
+ default:
+ /* This shouldn't be called on non-P8 based systems */
+ assert(0);
+ return 0;
+ break;
+ }
+}
+
+/* The chip id mask is the upper p8_chip_id_bits of the irq number */
+static uint32_t chip_id_mask(uint32_t chip)
+{
+ uint32_t chip_id_bits = p8_chip_id_bits(chip);
+ uint32_t chip_id_mask;
+
+ chip_id_mask = ((1 << chip_id_bits) - 1);
+ chip_id_mask <<= P8_IRQ_BITS - chip_id_bits;
+ return chip_id_mask;
+}
+
+/* The block mask is what remains of the 19 bit irq number after
+ * removing the upper 5 or 6 bits for the chip# and the lower 11 bits
+ * for the number of bits per block. */
+static uint32_t block_mask(uint32_t chip)
+{
+ uint32_t chip_id_bits = p8_chip_id_bits(chip);
+ uint32_t irq_block_mask;
+
+ irq_block_mask = P8_IRQ_BITS - chip_id_bits - P8_IVE_BITS;
+ irq_block_mask = ((1 << irq_block_mask) - 1) << P8_IVE_BITS;
+ return irq_block_mask;
+}
+
+uint32_t p8_chip_irq_block_base(uint32_t chip, uint32_t block)
+{
+ uint32_t irq;
+
+ assert(chip < (1 << p8_chip_id_bits(chip)));
+ irq = SETFIELD(chip_id_mask(chip), 0, chip);
+ irq = SETFIELD(block_mask(chip), irq, block);
+
+ return irq;
+}
+
+uint32_t p8_chip_irq_phb_base(uint32_t chip, uint32_t phb)
+{
+ assert(chip < (1 << p8_chip_id_bits(chip)));
+
+ return p8_chip_irq_block_base(chip, phb + P8_IRQ_BLOCK_PHB_BASE);
+}
+
+uint32_t p8_irq_to_chip(uint32_t irq)
+{
+ /* This assumes we only have one type of cpu in a system,
+ * which should be ok. */
+ return GETFIELD(chip_id_mask(this_cpu()->chip_id), irq);
+}
+
+uint32_t p8_irq_to_block(uint32_t irq)
+{
+ return GETFIELD(block_mask(this_cpu()->chip_id), irq);
+}
+
+uint32_t p8_irq_to_phb(uint32_t irq)
+{
+ return p8_irq_to_block(irq) - P8_IRQ_BLOCK_PHB_BASE;
+}
+
+bool __irq_source_eoi(struct irq_source *is, uint32_t isn)
+{
+ if (!is->ops->eoi)
+ return false;
+
+ is->ops->eoi(is, isn);
+ return true;
+}
+
+bool irq_source_eoi(uint32_t isn)
+{
+ struct irq_source *is = irq_find_source(isn);
+
+ if (!is)
+ return false;
+
+ return __irq_source_eoi(is, isn);
+}
+
+static int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority)
+{
+ struct irq_source *is = irq_find_source(isn);
+
+ if (!is || !is->ops->set_xive)
+ return OPAL_PARAMETER;
+
+ return is->ops->set_xive(is, isn, server, priority);
+}
+opal_call(OPAL_SET_XIVE, opal_set_xive, 3);
+
+static int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority)
+{
+ struct irq_source *is = irq_find_source(isn);
+ uint16_t s;
+ int64_t ret;
+
+ if (!opal_addr_valid(server))
+ return OPAL_PARAMETER;
+
+ if (!is || !is->ops->get_xive)
+ return OPAL_PARAMETER;
+
+ ret = is->ops->get_xive(is, isn, &s, priority);
+ *server = cpu_to_be16(s);
+ return ret;
+}
+opal_call(OPAL_GET_XIVE, opal_get_xive, 3);
+
+static int64_t opal_handle_interrupt(uint32_t isn, __be64 *outstanding_event_mask)
+{
+ struct irq_source *is = irq_find_source(isn);
+ int64_t rc = OPAL_SUCCESS;
+
+ if (!opal_addr_valid(outstanding_event_mask))
+ return OPAL_PARAMETER;
+
+ /* No source ? return */
+ if (!is || !is->ops->interrupt) {
+ rc = OPAL_PARAMETER;
+ goto bail;
+ }
+
+ /* Run it */
+ is->ops->interrupt(is, isn);
+
+ /* Check timers if SBE timer isn't working */
+ if (!p8_sbe_timer_ok() && !p9_sbe_timer_ok())
+ check_timers(true);
+
+ /* Update output events */
+ bail:
+ if (outstanding_event_mask)
+ *outstanding_event_mask = cpu_to_be64(opal_pending_events);
+
+ return rc;
+}
+opal_call(OPAL_HANDLE_INTERRUPT, opal_handle_interrupt, 2);
+
+void init_interrupts(void)
+{
+ struct dt_node *icp;
+ const struct dt_property *sranges;
+ struct cpu_thread *cpu;
+ u32 base, count, i;
+ u64 addr, size;
+
+ dt_for_each_compatible(dt_root, icp, "ibm,ppc-xicp") {
+ sranges = dt_require_property(icp,
+ "ibm,interrupt-server-ranges",
+ -1);
+ base = dt_get_number(sranges->prop, 1);
+ count = dt_get_number(sranges->prop + 4, 1);
+ for (i = 0; i < count; i++) {
+ addr = dt_get_address(icp, i, &size);
+ cpu = find_cpu_by_server(base + i);
+ if (cpu)
+ cpu->icp_regs = (void *)addr;
+ }
+ }
+}
+
diff --git a/roms/skiboot/core/ipmi-opal.c b/roms/skiboot/core/ipmi-opal.c
new file mode 100644
index 000000000..cc45b409b
--- /dev/null
+++ b/roms/skiboot/core/ipmi-opal.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * IPMI OPAL calls
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ipmi.h>
+#include <lock.h>
+#include <opal.h>
+#include <device.h>
+#include <ccan/list/list.h>
+
+static struct lock msgq_lock = LOCK_UNLOCKED;
+static struct list_head msgq = LIST_HEAD_INIT(msgq);
+
+static void opal_send_complete(struct ipmi_msg *msg)
+{
+ lock(&msgq_lock);
+ list_add_tail(&msgq, &msg->link);
+ opal_update_pending_evt(ipmi_backend->opal_event_ipmi_recv,
+ ipmi_backend->opal_event_ipmi_recv);
+ unlock(&msgq_lock);
+}
+
+static int64_t opal_ipmi_send(uint64_t interface,
+ struct opal_ipmi_msg *opal_ipmi_msg, uint64_t msg_len)
+{
+ struct ipmi_msg *msg;
+
+ if (opal_ipmi_msg->version != OPAL_IPMI_MSG_FORMAT_VERSION_1) {
+ prerror("OPAL IPMI: Incorrect version\n");
+ return OPAL_UNSUPPORTED;
+ }
+
+ msg_len -= sizeof(struct opal_ipmi_msg);
+ if (msg_len > IPMI_MAX_REQ_SIZE) {
+ prerror("OPAL IPMI: Invalid request length\n");
+ return OPAL_PARAMETER;
+ }
+
+ prlog(PR_TRACE, "opal_ipmi_send(cmd: 0x%02x netfn: 0x%02x len: 0x%02llx)\n",
+ opal_ipmi_msg->cmd, opal_ipmi_msg->netfn >> 2, msg_len);
+
+ msg = ipmi_mkmsg(interface,
+ IPMI_CODE(opal_ipmi_msg->netfn >> 2, opal_ipmi_msg->cmd),
+ opal_send_complete, NULL, opal_ipmi_msg->data,
+ msg_len, IPMI_MAX_RESP_SIZE);
+ if (!msg)
+ return OPAL_RESOURCE;
+
+ msg->complete = opal_send_complete;
+ msg->error = opal_send_complete;
+ return ipmi_queue_msg(msg);
+}
+
+static int64_t opal_ipmi_recv(uint64_t interface,
+ struct opal_ipmi_msg *opal_ipmi_msg, __be64 *msg_len)
+{
+ struct ipmi_msg *msg;
+ int64_t rc;
+
+ lock(&msgq_lock);
+ msg = list_top(&msgq, struct ipmi_msg, link);
+
+ if (!msg) {
+ rc = OPAL_EMPTY;
+ goto out_unlock;
+ }
+
+ if (opal_ipmi_msg->version != OPAL_IPMI_MSG_FORMAT_VERSION_1) {
+ prerror("OPAL IPMI: Incorrect version\n");
+ rc = OPAL_UNSUPPORTED;
+ goto out_del_msg;
+ }
+
+ if (interface != IPMI_DEFAULT_INTERFACE) {
+ prerror("IPMI: Invalid interface 0x%llx in opal_ipmi_recv\n", interface);
+ rc = OPAL_PARAMETER;
+ goto out_del_msg;
+ }
+
+ if (be64_to_cpu(*msg_len) - sizeof(struct opal_ipmi_msg) < msg->resp_size + 1) {
+ rc = OPAL_RESOURCE;
+ goto out_del_msg;
+ }
+
+ list_del(&msg->link);
+ if (list_empty(&msgq))
+ opal_update_pending_evt(ipmi_backend->opal_event_ipmi_recv, 0);
+ unlock(&msgq_lock);
+
+ opal_ipmi_msg->cmd = msg->cmd;
+ opal_ipmi_msg->netfn = msg->netfn;
+ opal_ipmi_msg->data[0] = msg->cc;
+ memcpy(&opal_ipmi_msg->data[1], msg->data, msg->resp_size);
+
+ prlog(PR_TRACE, "opal_ipmi_recv(cmd: 0x%02x netfn: 0x%02x resp_size: 0x%02x)\n",
+ msg->cmd, msg->netfn >> 2, msg->resp_size);
+
+ /* Add one as the completion code is returned in the message data */
+ *msg_len = cpu_to_be64(msg->resp_size + sizeof(struct opal_ipmi_msg) + 1);
+ ipmi_free_msg(msg);
+
+ return OPAL_SUCCESS;
+
+out_del_msg:
+ list_del(&msg->link);
+ if (list_empty(&msgq))
+ opal_update_pending_evt(ipmi_backend->opal_event_ipmi_recv, 0);
+ ipmi_free_msg(msg);
+out_unlock:
+ unlock(&msgq_lock);
+ return rc;
+}
+
+void ipmi_opal_init(void)
+{
+ struct dt_node *opal_ipmi, *opal_event = NULL;
+
+ opal_ipmi = dt_new(opal_node, "ipmi");
+ dt_add_property_strings(opal_ipmi, "compatible", "ibm,opal-ipmi");
+ dt_add_property_cells(opal_ipmi, "ibm,ipmi-interface-id",
+ IPMI_DEFAULT_INTERFACE);
+ dt_add_property_cells(opal_ipmi, "interrupts",
+ ilog2(ipmi_backend->opal_event_ipmi_recv));
+
+ if (proc_gen >= proc_gen_p9)
+ opal_event = dt_find_by_name(opal_node, "event");
+ if (opal_event)
+ dt_add_property_cells(opal_ipmi, "interrupt-parent",
+ opal_event->phandle);
+
+ opal_register(OPAL_IPMI_SEND, opal_ipmi_send, 3);
+ opal_register(OPAL_IPMI_RECV, opal_ipmi_recv, 3);
+}
diff --git a/roms/skiboot/core/ipmi.c b/roms/skiboot/core/ipmi.c
new file mode 100644
index 000000000..bbc1a7b69
--- /dev/null
+++ b/roms/skiboot/core/ipmi.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * in-band IPMI, probably over bt (or via FSP mbox on FSP)
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <bt.h>
+#include <ipmi.h>
+#include <opal.h>
+#include <device.h>
+#include <skiboot.h>
+#include <lock.h>
+#include <cpu.h>
+#include <timebase.h>
+#include <debug_descriptor.h>
+
+struct ipmi_backend *ipmi_backend = NULL;
+static struct lock sync_lock = LOCK_UNLOCKED;
+static struct ipmi_msg *sync_msg = NULL;
+
+void ipmi_free_msg(struct ipmi_msg *msg)
+{
+ /* ipmi_free_msg frees messages allocated by the
+ * backend. Without a backend we couldn't have allocated
+ * messages to free (we don't support removing backends
+ * yet). */
+ if (!ipmi_present()) {
+ prerror("IPMI: Trying to free message without backend\n");
+ return;
+ }
+
+ msg->backend->free_msg(msg);
+}
+
+void ipmi_init_msg(struct ipmi_msg *msg, int interface,
+ uint32_t code, void (*complete)(struct ipmi_msg *),
+ void *user_data, size_t req_size, size_t resp_size)
+{
+ /* We don't actually support multiple interfaces at the moment. */
+ assert(interface == IPMI_DEFAULT_INTERFACE);
+
+ msg->backend = ipmi_backend;
+ msg->cmd = IPMI_CMD(code);
+ msg->netfn = IPMI_NETFN(code) << 2;
+ msg->req_size = req_size;
+ msg->resp_size = resp_size;
+ msg->complete = complete;
+ msg->user_data = user_data;
+}
+
+struct ipmi_msg *ipmi_mkmsg_simple(uint32_t code, void *req_data, size_t req_size)
+{
+ return ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, code, ipmi_free_msg, NULL,
+ req_data, req_size, 0);
+}
+
+struct ipmi_msg *ipmi_mkmsg(int interface, uint32_t code,
+ void (*complete)(struct ipmi_msg *),
+ void *user_data, void *req_data, size_t req_size,
+ size_t resp_size)
+{
+ struct ipmi_msg *msg;
+
+ if (!ipmi_present())
+ return NULL;
+
+ msg = ipmi_backend->alloc_msg(req_size, resp_size);
+ if (!msg)
+ return NULL;
+
+ ipmi_init_msg(msg, interface, code, complete, user_data, req_size,
+ resp_size);
+
+ /* Commands are free to over ride this if they want to handle errors */
+ msg->error = ipmi_free_msg;
+
+ if (req_data)
+ memcpy(msg->data, req_data, req_size);
+
+ return msg;
+}
+
+int ipmi_queue_msg_head(struct ipmi_msg *msg)
+{
+ if (!ipmi_present())
+ return OPAL_HARDWARE;
+
+ if (!msg) {
+ prerror("%s: Attempting to queue NULL message\n", __func__);
+ return OPAL_PARAMETER;
+ }
+
+ return msg->backend->queue_msg_head(msg);
+}
+
+int ipmi_queue_msg(struct ipmi_msg *msg)
+{
+ /* Here we could choose which interface to use if we want to support
+ multiple interfaces. */
+ if (!ipmi_present())
+ return OPAL_HARDWARE;
+
+ if (!msg) {
+ prerror("%s: Attempting to queue NULL message\n", __func__);
+ return OPAL_PARAMETER;
+ }
+
+ return msg->backend->queue_msg(msg);
+}
+
+int ipmi_dequeue_msg(struct ipmi_msg *msg)
+{
+ if (!ipmi_present())
+ return OPAL_HARDWARE;
+
+ if (!msg) {
+ prerror("%s: Attempting to dequeue NULL message\n", __func__);
+ return OPAL_PARAMETER;
+ }
+
+ return msg->backend->dequeue_msg(msg);
+}
+
+void ipmi_cmd_done(uint8_t cmd, uint8_t netfn, uint8_t cc, struct ipmi_msg *msg)
+{
+ msg->cc = cc;
+ if (msg->cmd != cmd) {
+ prerror("IPMI: Incorrect cmd 0x%02x in response\n", cmd);
+ cc = IPMI_ERR_UNSPECIFIED;
+ }
+
+ if ((msg->netfn >> 2) + 1 != (netfn >> 2)) {
+ prerror("IPMI: Incorrect netfn 0x%02x in response\n", netfn >> 2);
+ cc = IPMI_ERR_UNSPECIFIED;
+ }
+ msg->netfn = netfn;
+
+ if (cc != IPMI_CC_NO_ERROR) {
+ prlog(PR_DEBUG, "IPMI: Got error response. cmd=0x%x, netfn=0x%x,"
+ " rc=0x%02x\n", msg->cmd, msg->netfn >> 2, msg->cc);
+
+ assert(msg->error);
+ msg->error(msg);
+ } else if (msg->complete)
+ msg->complete(msg);
+
+ /* At this point the message has should have been freed by the
+ completion functions. */
+
+ /* If this is a synchronous message flag that we are done */
+ if (msg == sync_msg) {
+ sync_msg = NULL;
+ barrier();
+ }
+}
+
+void ipmi_queue_msg_sync(struct ipmi_msg *msg)
+{
+ void (*poll)(void) = msg->backend->poll;
+
+ if (!ipmi_present())
+ return;
+
+ if (!msg) {
+ prerror("%s: Attempting to queue NULL message\n", __func__);
+ return;
+ }
+
+ lock(&sync_lock);
+ while (sync_msg);
+ sync_msg = msg;
+ if (msg->backend->disable_retry && !opal_booting())
+ msg->backend->disable_retry(msg);
+ ipmi_queue_msg_head(msg);
+ unlock(&sync_lock);
+
+ /*
+ * BT response handling relies on a timer. We can't just run all
+ * timers because we may have been called with a lock that a timer
+ * wants, and they're generally not written to cope with that.
+ * So, just run whatever the IPMI backend needs to make forward
+ * progress.
+ */
+ while (sync_msg == msg) {
+ if (poll)
+ poll();
+ time_wait_ms(10);
+ }
+}
+
+static void ipmi_read_event_complete(struct ipmi_msg *msg)
+{
+ prlog(PR_DEBUG, "IPMI read event %02x complete: %d bytes. cc: %02x\n",
+ msg->cmd, msg->resp_size, msg->cc);
+
+ /* Handle power control & PNOR handshake events */
+ ipmi_parse_sel(msg);
+
+ ipmi_free_msg(msg);
+}
+
+static void ipmi_get_message_flags_complete(struct ipmi_msg *msg)
+{
+ uint8_t flags = msg->data[0];
+
+ ipmi_free_msg(msg);
+
+ prlog(PR_DEBUG, "IPMI Get Message Flags: %02x\n", flags);
+
+ /* Once we see an interrupt we assume the payload has
+ * booted. We disable the wdt and let the OS setup its own
+ * wdt.
+ *
+ * This is also where we consider the OS to be booted, so we set
+ * the boot count sensor */
+ if (flags & IPMI_MESSAGE_FLAGS_WATCHDOG_PRE_TIMEOUT) {
+ ipmi_wdt_stop();
+ ipmi_set_boot_count();
+ }
+
+ /* Message available in the event buffer? Queue a Read Event command
+ * to retrieve it. The flag is cleared by performing a read */
+ if (flags & IPMI_MESSAGE_FLAGS_EVENT_BUFFER) {
+ msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_READ_EVENT,
+ ipmi_read_event_complete, NULL, NULL, 0, 16);
+ ipmi_queue_msg(msg);
+ }
+}
+
+void ipmi_sms_attention(void)
+{
+ struct ipmi_msg *msg;
+
+ if (!ipmi_present())
+ return;
+
+ /* todo: when we handle multiple IPMI interfaces, we'll need to
+ * ensure that this message is associated with the appropriate
+ * backend. */
+ msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_GET_MESSAGE_FLAGS,
+ ipmi_get_message_flags_complete, NULL, NULL, 0, 1);
+
+ ipmi_queue_msg(msg);
+}
+
+void ipmi_register_backend(struct ipmi_backend *backend)
+{
+ /* We only support one backend at the moment */
+ assert(backend->alloc_msg);
+ assert(backend->free_msg);
+ assert(backend->queue_msg);
+ assert(backend->dequeue_msg);
+ ipmi_backend = backend;
+ ipmi_backend->opal_event_ipmi_recv = opal_dynamic_event_alloc();
+}
+
+bool ipmi_present(void)
+{
+ return ipmi_backend != NULL;
+}
diff --git a/roms/skiboot/core/lock.c b/roms/skiboot/core/lock.c
new file mode 100644
index 000000000..f0ab595b1
--- /dev/null
+++ b/roms/skiboot/core/lock.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Simple spinlock
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <lock.h>
+#include <assert.h>
+#include <processor.h>
+#include <cpu.h>
+#include <console.h>
+#include <timebase.h>
+
+/* Set to bust locks. Note, this is initialized to true because our
+ * lock debugging code is not going to work until we have the per
+ * CPU data initialized
+ */
+bool bust_locks = true;
+
+#define LOCK_TIMEOUT_MS 5000
+
+#ifdef DEBUG_LOCKS
+
+static void __nomcount lock_error(struct lock *l, const char *reason, uint16_t err)
+{
+ fprintf(stderr, "LOCK ERROR: %s @%p (state: 0x%016llx)\n",
+ reason, l, l->lock_val);
+ op_display(OP_FATAL, OP_MOD_LOCK, err);
+
+ abort();
+}
+
+static inline void __nomcount lock_check(struct lock *l)
+{
+ if ((l->lock_val & 1) && (l->lock_val >> 32) == this_cpu()->pir)
+ lock_error(l, "Invalid recursive lock", 0);
+}
+
+static inline void __nomcount unlock_check(struct lock *l)
+{
+ if (!(l->lock_val & 1))
+ lock_error(l, "Unlocking unlocked lock", 1);
+
+ if ((l->lock_val >> 32) != this_cpu()->pir)
+ lock_error(l, "Unlocked non-owned lock", 2);
+
+ if (l->in_con_path && this_cpu()->con_suspend == 0)
+ lock_error(l, "Unlock con lock with console not suspended", 3);
+
+ if (list_empty(&this_cpu()->locks_held))
+ lock_error(l, "Releasing lock we don't hold depth", 4);
+}
+
+static inline bool __nomcount __try_lock(struct cpu_thread *cpu, struct lock *l)
+{
+ uint64_t val;
+
+ val = cpu->pir;
+ val <<= 32;
+ val |= 1;
+
+ barrier();
+ if (__cmpxchg64(&l->lock_val, 0, val) == 0) {
+ sync();
+ return true;
+ }
+ return false;
+}
+
+static inline bool lock_timeout(unsigned long start)
+{
+ /* Print warning if lock has been spinning for more than TIMEOUT_MS */
+ unsigned long wait = tb_to_msecs(mftb());
+
+ if (wait - start > LOCK_TIMEOUT_MS) {
+ /*
+ * If the timebase is invalid, we shouldn't
+ * throw an error. This is possible with pending HMIs
+ * that need to recover TB.
+ */
+ if( !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID))
+ return false;
+ return true;
+ }
+
+ return false;
+}
+#else
+static inline void lock_check(struct lock *l) { };
+static inline void unlock_check(struct lock *l) { };
+static inline bool lock_timeout(unsigned long s) { return false; }
+#endif /* DEBUG_LOCKS */
+
+#if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS)
+
+static struct lock dl_lock = {
+ .lock_val = 0,
+ .in_con_path = true,
+ .owner = LOCK_CALLER
+};
+
+/* Find circular dependencies in the lock requests. */
+static __nomcount inline bool check_deadlock(void)
+{
+ uint32_t lock_owner, start, i;
+ struct cpu_thread *next_cpu;
+ struct lock *next;
+
+ next = this_cpu()->requested_lock;
+ start = this_cpu()->pir;
+ i = 0;
+
+ while (i < cpu_max_pir) {
+
+ if (!next)
+ return false;
+
+ if (!(next->lock_val & 1) || next->in_con_path)
+ return false;
+
+ lock_owner = next->lock_val >> 32;
+
+ if (lock_owner == start)
+ return true;
+
+ next_cpu = find_cpu_by_pir_nomcount(lock_owner);
+
+ if (!next_cpu)
+ return false;
+
+ next = next_cpu->requested_lock;
+ i++;
+ }
+
+ return false;
+}
+
+static void add_lock_request(struct lock *l)
+{
+ struct cpu_thread *curr = this_cpu();
+ bool dead;
+
+ if (curr->state != cpu_state_active &&
+ curr->state != cpu_state_os)
+ return;
+
+ /*
+ * For deadlock detection we must keep the lock states constant
+ * while doing the deadlock check. However we need to avoid
+ * clashing with the stack checker, so no mcount and use an
+ * inline implementation of the lock for the dl_lock
+ */
+ for (;;) {
+ if (__try_lock(curr, &dl_lock))
+ break;
+ smt_lowest();
+ while (dl_lock.lock_val)
+ barrier();
+ smt_medium();
+ }
+
+ curr->requested_lock = l;
+
+ dead = check_deadlock();
+
+ lwsync();
+ dl_lock.lock_val = 0;
+
+ if (dead)
+ lock_error(l, "Deadlock detected", 0);
+}
+
+static void remove_lock_request(void)
+{
+ this_cpu()->requested_lock = NULL;
+}
+#else
+static inline void add_lock_request(struct lock *l) { };
+static inline void remove_lock_request(void) { };
+#endif /* #if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS) */
+
+bool lock_held_by_me(struct lock *l)
+{
+ uint64_t pir64 = this_cpu()->pir;
+
+ return l->lock_val == ((pir64 << 32) | 1);
+}
+
+bool try_lock_caller(struct lock *l, const char *owner)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ if (bust_locks)
+ return true;
+
+ if (l->in_con_path)
+ cpu->con_suspend++;
+ if (__try_lock(cpu, l)) {
+ l->owner = owner;
+
+#ifdef DEBUG_LOCKS_BACKTRACE
+ backtrace_create(l->bt_buf, LOCKS_BACKTRACE_MAX_ENTS,
+ &l->bt_metadata);
+#endif
+
+ list_add(&cpu->locks_held, &l->list);
+ return true;
+ }
+ if (l->in_con_path)
+ cpu->con_suspend--;
+ return false;
+}
+
+void lock_caller(struct lock *l, const char *owner)
+{
+ bool timeout_warn = false;
+ unsigned long start = 0;
+
+ if (bust_locks)
+ return;
+
+ lock_check(l);
+
+ if (try_lock_caller(l, owner))
+ return;
+ add_lock_request(l);
+
+#ifdef DEBUG_LOCKS
+ /*
+ * Ensure that we get a valid start value
+ * as we may be handling TFMR errors and taking
+ * a lock to do so, so timebase could be garbage
+ */
+ if( (mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID))
+ start = tb_to_msecs(mftb());
+#endif
+
+ for (;;) {
+ if (try_lock_caller(l, owner))
+ break;
+ smt_lowest();
+ while (l->lock_val)
+ barrier();
+ smt_medium();
+
+ if (start && !timeout_warn && lock_timeout(start)) {
+ /*
+ * Holding the lock request while printing a
+ * timeout and taking console locks can result
+ * in deadlock fals positive if the lock owner
+ * tries to take the console lock. So drop it.
+ */
+ remove_lock_request();
+ prlog(PR_WARNING, "WARNING: Lock has been spinning for over %dms\n", LOCK_TIMEOUT_MS);
+ backtrace();
+ add_lock_request(l);
+ timeout_warn = true;
+ }
+ }
+
+ remove_lock_request();
+}
+
+void unlock(struct lock *l)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ if (bust_locks)
+ return;
+
+ unlock_check(l);
+
+ l->owner = NULL;
+ list_del(&l->list);
+ lwsync();
+ l->lock_val = 0;
+
+ /* WARNING: On fast reboot, we can be reset right at that
+ * point, so the reset_lock in there cannot be in the con path
+ */
+ if (l->in_con_path) {
+ cpu->con_suspend--;
+ if (cpu->con_suspend == 0 && cpu->con_need_flush)
+ flush_console();
+ }
+}
+
+bool lock_recursive_caller(struct lock *l, const char *caller)
+{
+ if (bust_locks)
+ return false;
+
+ if (lock_held_by_me(l))
+ return false;
+
+ lock_caller(l, caller);
+ return true;
+}
+
+void init_locks(void)
+{
+ bust_locks = false;
+}
+
+void dump_locks_list(void)
+{
+ struct lock *l;
+
+ prlog(PR_ERR, "Locks held:\n");
+ list_for_each(&this_cpu()->locks_held, l, list) {
+ prlog(PR_ERR, " %s\n", l->owner);
+#ifdef DEBUG_LOCKS_BACKTRACE
+ backtrace_print(l->bt_buf, &l->bt_metadata, NULL, NULL, true);
+#endif
+ }
+}
+
+void drop_my_locks(bool warn)
+{
+ struct lock *l;
+
+ disable_fast_reboot("Lock corruption");
+ while((l = list_top(&this_cpu()->locks_held, struct lock, list)) != NULL) {
+ if (warn) {
+ prlog(PR_ERR, " %s\n", l->owner);
+#ifdef DEBUG_LOCKS_BACKTRACE
+ backtrace_print(l->bt_buf, &l->bt_metadata, NULL, NULL,
+ true);
+#endif
+ }
+ unlock(l);
+ }
+}
+
diff --git a/roms/skiboot/core/malloc.c b/roms/skiboot/core/malloc.c
new file mode 100644
index 000000000..76996fff4
--- /dev/null
+++ b/roms/skiboot/core/malloc.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Implement malloc()/free() etc on top of our memory region allocator,
+ * which provides mem_alloc()/mem_free().
+ *
+ * Copyright 2013-2015 IBM Corp.
+ */
+
+#include <mem_region.h>
+#include <lock.h>
+#include <string.h>
+#include <mem_region-malloc.h>
+
+#define DEFAULT_ALIGN __alignof__(long)
+
+void *__memalign(size_t blocksize, size_t bytes, const char *location)
+{
+ void *p;
+
+ lock(&skiboot_heap.free_list_lock);
+ p = mem_alloc(&skiboot_heap, bytes, blocksize, location);
+ unlock(&skiboot_heap.free_list_lock);
+
+ return p;
+}
+
+void *__malloc(size_t bytes, const char *location)
+{
+ return __memalign(DEFAULT_ALIGN, bytes, location);
+}
+
+void __free(void *p, const char *location)
+{
+ lock(&skiboot_heap.free_list_lock);
+ mem_free(&skiboot_heap, p, location);
+ unlock(&skiboot_heap.free_list_lock);
+}
+
+void *__realloc(void *ptr, size_t size, const char *location)
+{
+ void *newptr;
+
+ /* Two classic malloc corner cases. */
+ if (!size) {
+ __free(ptr, location);
+ return NULL;
+ }
+ if (!ptr)
+ return __malloc(size, location);
+
+ lock(&skiboot_heap.free_list_lock);
+ if (mem_resize(&skiboot_heap, ptr, size, location)) {
+ newptr = ptr;
+ } else {
+ newptr = mem_alloc(&skiboot_heap, size, DEFAULT_ALIGN,
+ location);
+ if (newptr) {
+ size_t copy = mem_allocated_size(ptr);
+ if (copy > size)
+ copy = size;
+ memcpy(newptr, ptr, copy);
+ mem_free(&skiboot_heap, ptr, location);
+ }
+ }
+ unlock(&skiboot_heap.free_list_lock);
+ return newptr;
+}
+
+void *__zalloc(size_t bytes, const char *location)
+{
+ void *p = __malloc(bytes, location);
+
+ if (p)
+ memset(p, 0, bytes);
+ return p;
+}
diff --git a/roms/skiboot/core/mce.c b/roms/skiboot/core/mce.c
new file mode 100644
index 000000000..47674abcb
--- /dev/null
+++ b/roms/skiboot/core/mce.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: Apache-2.0
+/*
+ * Machine Check Exceptions
+ *
+ * Copyright 2020 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "MCE: " fmt
+
+#include <ras.h>
+#include <opal.h>
+#include <cpu.h>
+
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
+struct mce_ierror_table {
+ unsigned long srr1_mask;
+ unsigned long srr1_value;
+ uint64_t type;
+ const char *error_str;
+};
+
+static const struct mce_ierror_table mce_p9_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000,
+ MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_INVOLVED_EA,
+ "instruction fetch memory uncorrectable error", },
+{ 0x00000000081c0000, 0x0000000000080000,
+ MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA,
+ "instruction fetch SLB parity error", },
+{ 0x00000000081c0000, 0x00000000000c0000,
+ MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA,
+ "instruction fetch SLB multi-hit error", },
+{ 0x00000000081c0000, 0x0000000000100000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_ERAT_ERROR,
+ "instruction fetch ERAT multi-hit error", },
+{ 0x00000000081c0000, 0x0000000000140000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_TLB_ERROR,
+ "instruction fetch TLB multi-hit error", },
+{ 0x00000000081c0000, 0x0000000000180000,
+ MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "instruction fetch page table access memory uncorrectable error", },
+{ 0x00000000081c0000, 0x00000000001c0000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA,
+ "instruction fetch to foreign address", },
+{ 0x00000000081c0000, 0x0000000008000000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA,
+ "instruction fetch foreign link time-out", },
+{ 0x00000000081c0000, 0x0000000008040000,
+ MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "instruction fetch page table access foreign link time-out", },
+{ 0x00000000081c0000, 0x00000000080c0000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA,
+ "instruction fetch real address error", },
+{ 0x00000000081c0000, 0x0000000008100000,
+ MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "instruction fetch page table access real address error", },
+{ 0x00000000081c0000, 0x0000000008140000,
+ MCE_LOADSTORE | MCE_IMPRECISE,
+ "store real address asynchronous error", },
+{ 0x00000000081c0000, 0x0000000008180000,
+ MCE_LOADSTORE | MCE_IMPRECISE,
+ "store foreign link time-out asynchronous error", },
+{ 0x00000000081c0000, 0x00000000081c0000,
+ MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "instruction fetch page table access to foreign address", },
+{ 0 } };
+
+static const struct mce_ierror_table mce_p10_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000,
+ MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_INVOLVED_EA,
+ "instruction fetch memory uncorrectable error", },
+{ 0x00000000081c0000, 0x0000000000080000,
+ MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA,
+ "instruction fetch SLB parity error", },
+{ 0x00000000081c0000, 0x00000000000c0000,
+ MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA,
+ "instruction fetch SLB multi-hit error", },
+{ 0x00000000081c0000, 0x0000000000100000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_ERAT_ERROR,
+ "instruction fetch ERAT multi-hit error", },
+{ 0x00000000081c0000, 0x0000000000140000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_TLB_ERROR,
+ "instruction fetch TLB multi-hit error", },
+{ 0x00000000081c0000, 0x0000000000180000,
+ MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "instruction fetch page table access memory uncorrectable error", },
+{ 0x00000000081c0000, 0x00000000001c0000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA,
+ "instruction fetch to control real address", },
+{ 0x00000000081c0000, 0x00000000080c0000,
+ MCE_INSNFETCH | MCE_INVOLVED_EA,
+ "instruction fetch real address error", },
+{ 0x00000000081c0000, 0x0000000008100000,
+ MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "instruction fetch page table access real address error", },
+{ 0x00000000081c0000, 0x0000000008140000,
+ MCE_LOADSTORE | MCE_IMPRECISE,
+ "store real address asynchronous error", },
+{ 0x00000000081c0000, 0x00000000081c0000,
+ MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "instruction fetch page table access to control real address", },
+{ 0 } };
+
+struct mce_derror_table {
+ unsigned long dsisr_value;
+ uint64_t type;
+ const char *error_str;
+};
+
+static const struct mce_derror_table mce_p9_derror_table[] = {
+{ 0x00008000,
+ MCE_LOADSTORE | MCE_MEMORY_ERROR,
+ "load/store memory uncorrectable error", },
+{ 0x00004000,
+ MCE_LOADSTORE | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "load/store page table access memory uncorrectable error", },
+{ 0x00002000,
+ MCE_LOADSTORE | MCE_INVOLVED_EA,
+ "load/store foreign link time-out", },
+{ 0x00001000,
+ MCE_LOADSTORE | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "load/store page table access foreign link time-out", },
+{ 0x00000800,
+ MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_ERAT_ERROR,
+ "load/store ERAT multi-hit error", },
+{ 0x00000400,
+ MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_TLB_ERROR,
+ "load/store TLB multi-hit error", },
+{ 0x00000200,
+ MCE_LOADSTORE | MCE_TLBIE_ERROR,
+ "TLBIE or TLBIEL instruction programming error", },
+{ 0x00000100,
+ MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR,
+ "load/store SLB parity error", },
+{ 0x00000080,
+ MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR,
+ "load/store SLB multi-hit error", },
+{ 0x00000040,
+ MCE_LOADSTORE | MCE_INVOLVED_EA,
+ "load real address error", },
+{ 0x00000020,
+ MCE_LOADSTORE | MCE_TABLE_WALK,
+ "load/store page table access real address error", },
+{ 0x00000010,
+ MCE_LOADSTORE | MCE_TABLE_WALK,
+ "load/store page table access to foreign address", },
+{ 0x00000008,
+ MCE_LOADSTORE,
+ "load/store to foreign address", },
+{ 0 } };
+
+static const struct mce_derror_table mce_p10_derror_table[] = {
+{ 0x00008000,
+ MCE_LOADSTORE | MCE_MEMORY_ERROR,
+ "load/store memory uncorrectable error", },
+{ 0x00004000,
+ MCE_LOADSTORE | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA,
+ "load/store page table access memory uncorrectable error", },
+{ 0x00000800,
+ MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_ERAT_ERROR,
+ "load/store ERAT multi-hit error", },
+{ 0x00000400,
+ MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_TLB_ERROR,
+ "load/store TLB multi-hit error", },
+{ 0x00000200,
+ MCE_TLBIE_ERROR,
+ "TLBIE or TLBIEL instruction programming error", },
+{ 0x00000100,
+ MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR,
+ "load/store SLB parity error", },
+{ 0x00000080,
+ MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR,
+ "load/store SLB multi-hit error", },
+{ 0x00000040,
+ MCE_LOADSTORE | MCE_INVOLVED_EA,
+ "load real address error", },
+{ 0x00000020,
+ MCE_LOADSTORE | MCE_TABLE_WALK,
+ "load/store page table access real address error", },
+{ 0x00000010,
+ MCE_LOADSTORE | MCE_TABLE_WALK,
+ "load/store page table access to control real address", },
+{ 0x00000008,
+ MCE_LOADSTORE,
+ "load/store to control real address", },
+{ 0 } };
+
+static void decode_ierror(const struct mce_ierror_table table[],
+ uint64_t srr1,
+ uint64_t *type,
+ const char **error_str)
+{
+ int i;
+
+ for (i = 0; table[i].srr1_mask; i++) {
+ if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
+ continue;
+
+ *type = table[i].type;
+ *error_str = table[i].error_str;
+ }
+}
+
+static void decode_derror(const struct mce_derror_table table[],
+ uint32_t dsisr,
+ uint64_t *type,
+ const char **error_str)
+{
+ int i;
+
+ for (i = 0; table[i].dsisr_value; i++) {
+ if (!(dsisr & table[i].dsisr_value))
+ continue;
+
+ *type = table[i].type;
+ *error_str = table[i].error_str;
+ }
+}
+
+static void decode_mce_p9(uint64_t srr0, uint64_t srr1,
+ uint32_t dsisr, uint64_t dar,
+ uint64_t *type, const char **error_str,
+ uint64_t *address)
+{
+ /*
+ * On POWER9 DD2.1 and below, it's possible to get a machine check
+ * caused by a paste instruction where only DSISR bit 25 is set. This
+ * will result in the MCE handler seeing an unknown event and the
+ * kernel crashing. An MCE that occurs like this is spurious, so we
+ * don't need to do anything in terms of servicing it. If there is
+ * something that needs to be serviced, the CPU will raise the MCE
+ * again with the correct DSISR so that it can be serviced properly.
+ * So detect this case and mark it as handled.
+ */
+ if (SRR1_MC_LOADSTORE(srr1) && dsisr == 0x02000000) {
+ *type = MCE_NO_ERROR;
+ *error_str = "no error (superfluous machine check)";
+ return;
+ }
+
+ /*
+ * Async machine check due to bad real address from store or foreign
+ * link time out comes with the load/store bit (PPC bit 42) set in
+ * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're
+ * directed to the ierror table so it will find the cause (which
+ * describes it correctly as a store error).
+ */
+ if (SRR1_MC_LOADSTORE(srr1) &&
+ ((srr1 & 0x081c0000) == 0x08140000 ||
+ (srr1 & 0x081c0000) == 0x08180000)) {
+ srr1 &= ~PPC_BIT(42);
+ }
+
+ if (SRR1_MC_LOADSTORE(srr1)) {
+ decode_derror(mce_p9_derror_table, dsisr, type, error_str);
+ if (*type & MCE_INVOLVED_EA)
+ *address = dar;
+ } else {
+ decode_ierror(mce_p9_ierror_table, srr1, type, error_str);
+ if (*type & MCE_INVOLVED_EA)
+ *address = srr0;
+ }
+}
+
+static void decode_mce_p10(uint64_t srr0, uint64_t srr1,
+ uint32_t dsisr, uint64_t dar,
+ uint64_t *type, const char **error_str,
+ uint64_t *address)
+{
+ /*
+ * Async machine check due to bad real address from store or foreign
+ * link time out comes with the load/store bit (PPC bit 42) set in
+ * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're
+ * directed to the ierror table so it will find the cause (which
+ * describes it correctly as a store error).
+ */
+ if (SRR1_MC_LOADSTORE(srr1) &&
+ (srr1 & 0x081c0000) == 0x08140000) {
+ srr1 &= ~PPC_BIT(42);
+ }
+
+ if (SRR1_MC_LOADSTORE(srr1)) {
+ decode_derror(mce_p10_derror_table, dsisr, type, error_str);
+ if (*type & MCE_INVOLVED_EA)
+ *address = dar;
+ } else {
+ decode_ierror(mce_p10_ierror_table, srr1, type, error_str);
+ if (*type & MCE_INVOLVED_EA)
+ *address = srr0;
+ }
+}
+
+void decode_mce(uint64_t srr0, uint64_t srr1,
+ uint32_t dsisr, uint64_t dar,
+ uint64_t *type, const char **error_str,
+ uint64_t *address)
+{
+ *type = MCE_UNKNOWN;
+ *error_str = "unknown error";
+ *address = 0;
+
+ if (proc_gen == proc_gen_p9) {
+ decode_mce_p9(srr0, srr1, dsisr, dar, type, error_str, address);
+ } else if (proc_gen == proc_gen_p10) {
+ decode_mce_p10(srr0, srr1, dsisr, dar, type, error_str, address);
+ } else {
+ *error_str = "unknown error (processor not supported)";
+ }
+}
diff --git a/roms/skiboot/core/mem_region.c b/roms/skiboot/core/mem_region.c
new file mode 100644
index 000000000..36de2d094
--- /dev/null
+++ b/roms/skiboot/core/mem_region.c
@@ -0,0 +1,1555 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Simple memory allocator
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <inttypes.h>
+#include <skiboot.h>
+#include <mem-map.h>
+#include <libfdt_env.h>
+#include <lock.h>
+#include <device.h>
+#include <cpu.h>
+#include <chip.h>
+#include <affinity.h>
+#include <types.h>
+#include <mem_region.h>
+#include <mem_region-malloc.h>
+
+/* Memory poisoning on free (if POISON_MEM_REGION set to 1) */
+#ifdef DEBUG
+#define POISON_MEM_REGION 1
+#else
+#define POISON_MEM_REGION 0
+#endif
+#define POISON_MEM_REGION_WITH 0x99
+#define POISON_MEM_REGION_LIMIT 1*1024*1024*1024
+
+/* Locking: The mem_region_lock protects the regions list from concurrent
+ * updates. Additions to, or removals from, the region list must be done
+ * with this lock held. This is typically done when we're establishing
+ * the memory & reserved regions.
+ *
+ * Each region has a lock (region->free_list_lock) to protect the free list
+ * from concurrent modification. This lock is used when we're allocating
+ * memory out of a specific region.
+ *
+ * If both locks are needed (eg, __local_alloc, where we need to find a region,
+ * then allocate from it), the mem_region_lock must be acquired before (and
+ * released after) the per-region lock.
+ */
+struct lock mem_region_lock = LOCK_UNLOCKED;
+
+static struct list_head regions = LIST_HEAD_INIT(regions);
+static struct list_head early_reserves = LIST_HEAD_INIT(early_reserves);
+
+static bool mem_region_init_done = false;
+static bool mem_regions_finalised = false;
+
+unsigned long top_of_ram = SKIBOOT_BASE + SKIBOOT_SIZE;
+
+static struct mem_region skiboot_os_reserve = {
+ .name = "ibm,os-reserve",
+ .start = 0,
+ .len = SKIBOOT_BASE,
+ .type = REGION_OS,
+};
+
+struct mem_region skiboot_heap = {
+ .name = "ibm,firmware-heap",
+ .start = HEAP_BASE,
+ .len = HEAP_SIZE,
+ .type = REGION_SKIBOOT_HEAP,
+};
+
+static struct mem_region skiboot_code_and_text = {
+ .name = "ibm,firmware-code",
+ .start = SKIBOOT_BASE,
+ .len = HEAP_BASE - SKIBOOT_BASE,
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_after_heap = {
+ .name = "ibm,firmware-data",
+ .start = HEAP_BASE + HEAP_SIZE,
+ .len = SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_cpu_stacks = {
+ .name = "ibm,firmware-stacks",
+ .start = CPU_STACKS_BASE,
+ .len = 0, /* TBA */
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_mambo_kernel = {
+ .name = "ibm,firmware-mambo-kernel",
+ .start = (unsigned long)KERNEL_LOAD_BASE,
+ .len = KERNEL_LOAD_SIZE,
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_mambo_initramfs = {
+ .name = "ibm,firmware-mambo-initramfs",
+ .start = (unsigned long)INITRAMFS_LOAD_BASE,
+ .len = INITRAMFS_LOAD_SIZE,
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+
+struct alloc_hdr {
+ bool free : 1;
+ bool prev_free : 1;
+ bool printed : 1;
+ unsigned long num_longs : BITS_PER_LONG-3; /* Including header. */
+ const char *location;
+};
+
+struct free_hdr {
+ struct alloc_hdr hdr;
+ struct list_node list;
+ /* ... unsigned long tailer; */
+};
+
+#define ALLOC_HDR_LONGS (sizeof(struct alloc_hdr) / sizeof(long))
+#define ALLOC_MIN_LONGS (sizeof(struct free_hdr) / sizeof(long) + 1)
+
+/* Avoid ugly casts. */
+static void *region_start(const struct mem_region *region)
+{
+ return (void *)(unsigned long)region->start;
+}
+
+/* Each free block has a tailer, so we can walk backwards. */
+static unsigned long *tailer(struct free_hdr *f)
+{
+ return (unsigned long *)f + f->hdr.num_longs - 1;
+}
+
+/* This walks forward to the next hdr (or NULL if at the end). */
+static struct alloc_hdr *next_hdr(const struct mem_region *region,
+ const struct alloc_hdr *hdr)
+{
+ void *next;
+
+ next = ((unsigned long *)hdr + hdr->num_longs);
+ if (next >= region_start(region) + region->len)
+ next = NULL;
+ return next;
+}
+
+#if POISON_MEM_REGION == 1
+static void mem_poison(struct free_hdr *f)
+{
+ size_t poison_size = (void*)tailer(f) - (void*)(f+1);
+
+ /* We only poison up to a limit, as otherwise boot is
+ * kinda slow */
+ if (poison_size > POISON_MEM_REGION_LIMIT)
+ poison_size = POISON_MEM_REGION_LIMIT;
+
+ memset(f+1, POISON_MEM_REGION_WITH, poison_size);
+}
+#endif
+
+/* Creates free block covering entire region. */
+static void init_allocatable_region(struct mem_region *region)
+{
+ struct free_hdr *f = region_start(region);
+ assert(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY);
+ f->hdr.num_longs = region->len / sizeof(long);
+ f->hdr.free = true;
+ f->hdr.prev_free = false;
+ *tailer(f) = f->hdr.num_longs;
+ list_head_init(&region->free_list);
+ list_add(&region->free_list, &f->list);
+#if POISON_MEM_REGION == 1
+ mem_poison(f);
+#endif
+}
+
+static void make_free(struct mem_region *region, struct free_hdr *f,
+ const char *location, bool skip_poison)
+{
+ struct alloc_hdr *next;
+
+#if POISON_MEM_REGION == 1
+ if (!skip_poison)
+ mem_poison(f);
+#else
+ (void)skip_poison;
+#endif
+
+ if (f->hdr.prev_free) {
+ struct free_hdr *prev;
+ unsigned long *prev_tailer = (unsigned long *)f - 1;
+
+ assert(*prev_tailer);
+ prev = (void *)((unsigned long *)f - *prev_tailer);
+ assert(prev->hdr.free);
+ assert(!prev->hdr.prev_free);
+
+ /* Expand to cover the one we just freed. */
+ prev->hdr.num_longs += f->hdr.num_longs;
+ f = prev;
+ } else {
+ f->hdr.free = true;
+ f->hdr.location = location;
+ list_add(&region->free_list, &f->list);
+ }
+
+ /* Fix up tailer. */
+ *tailer(f) = f->hdr.num_longs;
+
+ /* If next is free, coalesce it */
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ next->prev_free = true;
+ if (next->free) {
+ struct free_hdr *next_free = (void *)next;
+ list_del_from(&region->free_list, &next_free->list);
+ /* Maximum of one level of recursion */
+ make_free(region, next_free, location, true);
+ }
+ }
+}
+
+/* Can we fit this many longs with this alignment in this free block? */
+static bool fits(struct free_hdr *f, size_t longs, size_t align, size_t *offset)
+{
+ *offset = 0;
+
+ while (f->hdr.num_longs >= *offset + longs) {
+ size_t addr;
+
+ addr = (unsigned long)f
+ + (*offset + ALLOC_HDR_LONGS) * sizeof(long);
+ if ((addr & (align - 1)) == 0)
+ return true;
+
+ /* Don't make tiny chunks! */
+ if (*offset == 0)
+ *offset = ALLOC_MIN_LONGS;
+ else
+ (*offset)++;
+ }
+ return false;
+}
+
+static void discard_excess(struct mem_region *region,
+ struct alloc_hdr *hdr, size_t alloc_longs,
+ const char *location, bool skip_poison)
+{
+ /* Do we have excess? */
+ if (hdr->num_longs > alloc_longs + ALLOC_MIN_LONGS) {
+ struct free_hdr *post;
+
+ /* Set up post block. */
+ post = (void *)hdr + alloc_longs * sizeof(long);
+ post->hdr.num_longs = hdr->num_longs - alloc_longs;
+ post->hdr.prev_free = false;
+
+ /* Trim our block. */
+ hdr->num_longs = alloc_longs;
+
+ /* This coalesces as required. */
+ make_free(region, post, location, skip_poison);
+ }
+}
+
+static const char *hdr_location(const struct alloc_hdr *hdr)
+{
+ /* Corrupt: step carefully! */
+ if (is_rodata(hdr->location))
+ return hdr->location;
+ return "*CORRUPT*";
+}
+
+static void bad_header(const struct mem_region *region,
+ const struct alloc_hdr *hdr,
+ const char *during,
+ const char *location)
+{
+ /* Corrupt: step carefully! */
+ if (is_rodata(hdr->location))
+ prerror("%p (in %s) %s at %s, previously %s\n",
+ hdr-1, region->name, during, location, hdr->location);
+ else
+ prerror("%p (in %s) %s at %s, previously %p\n",
+ hdr-1, region->name, during, location, hdr->location);
+ abort();
+}
+
+static bool region_is_reservable(struct mem_region *region)
+{
+ return region->type != REGION_OS;
+}
+
+static bool region_is_reserved(struct mem_region *region)
+{
+ return region->type != REGION_OS && region->type != REGION_MEMORY;
+}
+
+void mem_dump_allocs(void)
+{
+ struct mem_region *region;
+ struct alloc_hdr *h, *i;
+
+ /* Second pass: populate property data */
+ prlog(PR_INFO, "Memory regions:\n");
+ list_for_each(&regions, region, list) {
+ if (!(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY))
+ continue;
+ prlog(PR_INFO, " 0x%012llx..%012llx : %s\n",
+ (long long)region->start,
+ (long long)(region->start + region->len - 1),
+ region->name);
+ if (region->free_list.n.next == NULL) {
+ prlog(PR_INFO, " no allocs\n");
+ continue;
+ }
+
+ /*
+ * XXX: When dumping the allocation list we coalase allocations
+ * with the same location and size into a single line. This is
+ * quadratic, but it makes the dump human-readable and the raw
+ * dump sometimes causes the log buffer to wrap.
+ */
+ for (h = region_start(region); h; h = next_hdr(region, h))
+ h->printed = false;
+
+ for (h = region_start(region); h; h = next_hdr(region, h)) {
+ unsigned long bytes;
+ int count = 0;
+
+ if (h->free)
+ continue;
+ if (h->printed)
+ continue;
+
+ for (i = h; i; i = next_hdr(region, i)) {
+ if (i->free)
+ continue;
+ if (i->num_longs != h->num_longs)
+ continue;
+ if (strcmp(i->location, h->location))
+ continue;
+
+ i->printed = true;
+ count++;
+ }
+
+ bytes = h->num_longs * sizeof(long);
+ prlog(PR_NOTICE, " % 8d allocs of 0x%.8lx bytes at %s (total 0x%lx)\n",
+ count, bytes, hdr_location(h), bytes * count);
+ }
+ }
+}
+
+int64_t mem_dump_free(void)
+{
+ struct mem_region *region;
+ struct alloc_hdr *hdr;
+ int64_t total_free;
+ int64_t region_free;
+
+ total_free = 0;
+
+ prlog(PR_INFO, "Free space in HEAP memory regions:\n");
+ list_for_each(&regions, region, list) {
+ if (!(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY))
+ continue;
+ region_free = 0;
+
+ if (region->free_list.n.next == NULL) {
+ continue;
+ }
+ for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) {
+ if (!hdr->free)
+ continue;
+
+ region_free+= hdr->num_longs * sizeof(long);
+ }
+ prlog(PR_INFO, "Region %s free: %"PRIx64"\n",
+ region->name, region_free);
+ total_free += region_free;
+ }
+
+ prlog(PR_INFO, "Total free: %"PRIu64"\n", total_free);
+
+ return total_free;
+}
+
+static void *__mem_alloc(struct mem_region *region, size_t size, size_t align,
+ const char *location)
+{
+ size_t alloc_longs, offset;
+ struct free_hdr *f;
+ struct alloc_hdr *next;
+
+ /* Align must be power of 2. */
+ assert(!((align - 1) & align));
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ /* Unallocatable region? */
+ if (!(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY))
+ return NULL;
+
+ /* First allocation? */
+ if (region->free_list.n.next == NULL)
+ init_allocatable_region(region);
+
+ /* Don't do screwy sizes. */
+ if (size > region->len)
+ return NULL;
+
+ /* Don't do tiny alignments, we deal in long increments. */
+ if (align < sizeof(long))
+ align = sizeof(long);
+
+ /* Convert size to number of longs, too. */
+ alloc_longs = (size + sizeof(long)-1) / sizeof(long) + ALLOC_HDR_LONGS;
+
+ /* Can't be too small for when we free it, either. */
+ if (alloc_longs < ALLOC_MIN_LONGS)
+ alloc_longs = ALLOC_MIN_LONGS;
+
+ /* Walk free list. */
+ list_for_each(&region->free_list, f, list) {
+ /* We may have to skip some to meet alignment. */
+ if (fits(f, alloc_longs, align, &offset))
+ goto found;
+ }
+
+ return NULL;
+
+found:
+ assert(f->hdr.free);
+ assert(!f->hdr.prev_free);
+
+ /* This block is no longer free. */
+ list_del_from(&region->free_list, &f->list);
+ f->hdr.free = false;
+ f->hdr.location = location;
+
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ assert(next->prev_free);
+ next->prev_free = false;
+ }
+
+ if (offset != 0) {
+ struct free_hdr *pre = f;
+
+ f = (void *)f + offset * sizeof(long);
+ assert(f >= pre + 1);
+
+ /* Set up new header. */
+ f->hdr.num_longs = pre->hdr.num_longs - offset;
+ /* f->hdr.prev_free will be set by make_free below. */
+ f->hdr.free = false;
+ f->hdr.location = location;
+
+ /* Fix up old header. */
+ pre->hdr.num_longs = offset;
+ pre->hdr.prev_free = false;
+
+ /* This coalesces as required. */
+ make_free(region, pre, location, true);
+ }
+
+ /* We might be too long; put the rest back. */
+ discard_excess(region, &f->hdr, alloc_longs, location, true);
+
+ /* Clear tailer for debugging */
+ *tailer(f) = 0;
+
+ /* Their pointer is immediately after header. */
+ return &f->hdr + 1;
+}
+
+void *mem_alloc(struct mem_region *region, size_t size, size_t align,
+ const char *location)
+{
+ static bool dumped = false;
+ void *r;
+
+ assert(lock_held_by_me(&region->free_list_lock));
+
+ r = __mem_alloc(region, size, align, location);
+ if (r)
+ return r;
+
+ prerror("mem_alloc(0x%lx, 0x%lx, \"%s\", %s) failed !\n",
+ size, align, location, region->name);
+ if (!dumped) {
+ mem_dump_allocs();
+ dumped = true;
+ }
+
+ return NULL;
+}
+
+void mem_free(struct mem_region *region, void *mem, const char *location)
+{
+ struct alloc_hdr *hdr;
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ assert(lock_held_by_me(&region->free_list_lock));
+
+ /* Freeing NULL is always a noop. */
+ if (!mem)
+ return;
+
+ /* Your memory is in the region, right? */
+ assert(mem >= region_start(region) + sizeof(*hdr));
+ assert(mem < region_start(region) + region->len);
+
+ /* Grab header. */
+ hdr = mem - sizeof(*hdr);
+
+ if (hdr->free)
+ bad_header(region, hdr, "re-freed", location);
+
+ make_free(region, (struct free_hdr *)hdr, location, false);
+}
+
+size_t mem_allocated_size(const void *ptr)
+{
+ const struct alloc_hdr *hdr = ptr - sizeof(*hdr);
+ return hdr->num_longs * sizeof(long) - sizeof(struct alloc_hdr);
+}
+
+bool mem_resize(struct mem_region *region, void *mem, size_t len,
+ const char *location)
+{
+ struct alloc_hdr *hdr, *next;
+ struct free_hdr *f;
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ assert(lock_held_by_me(&region->free_list_lock));
+
+ /* Get header. */
+ hdr = mem - sizeof(*hdr);
+ if (hdr->free)
+ bad_header(region, hdr, "resize", location);
+
+ /* Round up size to multiple of longs. */
+ len = (sizeof(*hdr) + len + sizeof(long) - 1) / sizeof(long);
+
+ /* Can't be too small for when we free it, either. */
+ if (len < ALLOC_MIN_LONGS)
+ len = ALLOC_MIN_LONGS;
+
+ /* Shrinking is simple. */
+ if (len <= hdr->num_longs) {
+ hdr->location = location;
+ discard_excess(region, hdr, len, location, false);
+ return true;
+ }
+
+ /* Check if we can expand. */
+ next = next_hdr(region, hdr);
+ if (!next || !next->free || hdr->num_longs + next->num_longs < len)
+ return false;
+
+ /* OK, it's free and big enough, absorb it. */
+ f = (struct free_hdr *)next;
+ list_del_from(&region->free_list, &f->list);
+ hdr->num_longs += next->num_longs;
+ hdr->location = location;
+
+ /* Update next prev_free */
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ assert(next->prev_free);
+ next->prev_free = false;
+ }
+
+ /* Clear tailer for debugging */
+ *tailer(f) = 0;
+
+ /* Now we might have *too* much. */
+ discard_excess(region, hdr, len, location, true);
+ return true;
+}
+
+bool mem_check(const struct mem_region *region)
+{
+ size_t frees = 0;
+ struct alloc_hdr *hdr, *prev_free = NULL;
+ struct free_hdr *f;
+
+ /* Check it's sanely aligned. */
+ if (region->start % sizeof(long)) {
+ prerror("Region '%s' not sanely aligned (%llx)\n",
+ region->name, (unsigned long long)region->start);
+ return false;
+ }
+ if ((long)region->len % sizeof(long)) {
+ prerror("Region '%s' not sane length (%llu)\n",
+ region->name, (unsigned long long)region->len);
+ return false;
+ }
+
+ /* Not ours to play with, or empty? Don't do anything. */
+ if (!(region->type == REGION_MEMORY ||
+ region->type == REGION_SKIBOOT_HEAP) ||
+ region->free_list.n.next == NULL)
+ return true;
+
+ /* Walk linearly. */
+ for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) {
+ if (hdr->num_longs < ALLOC_MIN_LONGS) {
+ prerror("Region '%s' %s %p (%s) size %zu\n",
+ region->name, hdr->free ? "free" : "alloc",
+ hdr, hdr_location(hdr),
+ hdr->num_longs * sizeof(long));
+ return false;
+ }
+ if ((unsigned long)hdr + hdr->num_longs * sizeof(long) >
+ region->start + region->len) {
+ prerror("Region '%s' %s %p (%s) oversize %zu\n",
+ region->name, hdr->free ? "free" : "alloc",
+ hdr, hdr_location(hdr),
+ hdr->num_longs * sizeof(long));
+ return false;
+ }
+ if (hdr->free) {
+ if (hdr->prev_free || prev_free) {
+ prerror("Region '%s' free %p (%s) has prev_free"
+ " %p (%s) %sset?\n",
+ region->name, hdr, hdr_location(hdr),
+ prev_free,
+ prev_free ? hdr_location(prev_free)
+ : "NULL",
+ hdr->prev_free ? "" : "un");
+ return false;
+ }
+ prev_free = hdr;
+ frees ^= (unsigned long)hdr - region->start;
+ } else {
+ if (hdr->prev_free != (bool)prev_free) {
+ prerror("Region '%s' alloc %p (%s) has"
+ " prev_free %p %sset?\n",
+ region->name, hdr, hdr_location(hdr),
+ prev_free, hdr->prev_free ? "" : "un");
+ return false;
+ }
+ prev_free = NULL;
+ }
+ }
+
+ /* Now walk free list. */
+ list_for_each(&region->free_list, f, list)
+ frees ^= (unsigned long)f - region->start;
+
+ if (frees) {
+ prerror("Region '%s' free list and walk do not match!\n",
+ region->name);
+ return false;
+ }
+ return true;
+}
+
+bool mem_check_all(void)
+{
+ struct mem_region *r;
+
+ list_for_each(&regions, r, list) {
+ if (!mem_check(r))
+ return false;
+ }
+
+ return true;
+}
+
+static struct mem_region *new_region(const char *name,
+ uint64_t start, uint64_t len,
+ struct dt_node *node,
+ enum mem_region_type type)
+{
+ struct mem_region *region;
+
+ region = malloc(sizeof(*region));
+ if (!region)
+ return NULL;
+
+ region->name = name;
+ region->start = start;
+ region->len = len;
+ region->node = node;
+ region->type = type;
+ region->free_list.n.next = NULL;
+ init_lock(&region->free_list_lock);
+
+ return region;
+}
+
+/* We always split regions, so we only have to replace one. */
+static struct mem_region *split_region(struct mem_region *head,
+ uint64_t split_at,
+ enum mem_region_type type)
+{
+ struct mem_region *tail;
+ uint64_t end = head->start + head->len;
+
+ tail = new_region(head->name, split_at, end - split_at,
+ head->node, type);
+ /* Original region becomes head. */
+ if (tail)
+ head->len -= tail->len;
+
+ return tail;
+}
+
+static bool intersects(const struct mem_region *region, uint64_t addr)
+{
+ return addr > region->start &&
+ addr < region->start + region->len;
+}
+
+static bool maybe_split(struct mem_region *r, uint64_t split_at)
+{
+ struct mem_region *tail;
+
+ if (!intersects(r, split_at))
+ return true;
+
+ tail = split_region(r, split_at, r->type);
+ if (!tail)
+ return false;
+
+ /* Tail add is important: we may need to split again! */
+ list_add_after(&regions, &tail->list, &r->list);
+ return true;
+}
+
+static bool overlaps(const struct mem_region *r1, const struct mem_region *r2)
+{
+ return (r1->start + r1->len > r2->start
+ && r1->start < r2->start + r2->len);
+}
+
+static bool contains(const struct mem_region *r1, const struct mem_region *r2)
+{
+ u64 r1_end = r1->start + r1->len;
+ u64 r2_end = r2->start + r2->len;
+
+ return (r1->start <= r2->start && r2_end <= r1_end);
+}
+
+static struct mem_region *get_overlap(const struct mem_region *region)
+{
+ struct mem_region *i;
+
+ list_for_each(&regions, i, list) {
+ if (overlaps(region, i))
+ return i;
+ }
+ return NULL;
+}
+
+static void add_region_to_regions(struct mem_region *region)
+{
+ struct mem_region *r;
+
+ list_for_each(&regions, r, list) {
+ if (r->start < region->start)
+ continue;
+
+ list_add_before(&regions, &region->list, &r->list);
+ return;
+ }
+ list_add_tail(&regions, &region->list);
+}
+
+static bool add_region(struct mem_region *region)
+{
+ struct mem_region *r;
+
+ if (mem_regions_finalised) {
+ prerror("MEM: add_region(%s@0x%"PRIx64") called after finalise!\n",
+ region->name, region->start);
+ return false;
+ }
+
+ /* First split any regions which intersect. */
+ list_for_each(&regions, r, list) {
+ /*
+ * The new region should be fully contained by an existing one.
+ * If it's not then we have a problem where reservations
+ * partially overlap which is probably broken.
+ *
+ * NB: There *might* be situations where this is legitimate,
+ * but the region handling does not currently support this.
+ */
+ if (overlaps(r, region) && !contains(r, region)) {
+ prerror("MEM: Partial overlap detected between regions:\n");
+ prerror("MEM: %s [0x%"PRIx64"-0x%"PRIx64"] (new)\n",
+ region->name, region->start,
+ region->start + region->len);
+ prerror("MEM: %s [0x%"PRIx64"-0x%"PRIx64"]\n",
+ r->name, r->start, r->start + r->len);
+ return false;
+ }
+
+ if (!maybe_split(r, region->start) ||
+ !maybe_split(r, region->start + region->len))
+ return false;
+ }
+
+ /* Now we have only whole overlaps, if any. */
+ while ((r = get_overlap(region)) != NULL) {
+ assert(r->start == region->start);
+ assert(r->len == region->len);
+ list_del_from(&regions, &r->list);
+ free(r);
+ }
+
+ /* Finally, add in our own region. */
+ add_region_to_regions(region);
+ return true;
+}
+
+static void mem_reserve(enum mem_region_type type, const char *name,
+ uint64_t start, uint64_t len)
+{
+ struct mem_region *region;
+ bool added = true;
+
+ lock(&mem_region_lock);
+ region = new_region(name, start, len, NULL, type);
+ assert(region);
+
+ if (!mem_region_init_done)
+ list_add(&early_reserves, &region->list);
+ else
+ added = add_region(region);
+
+ assert(added);
+ unlock(&mem_region_lock);
+}
+
+void mem_reserve_fw(const char *name, uint64_t start, uint64_t len)
+{
+ mem_reserve(REGION_FW_RESERVED, name, start, len);
+}
+
+void mem_reserve_hwbuf(const char *name, uint64_t start, uint64_t len)
+{
+ mem_reserve(REGION_RESERVED, name, start, len);
+}
+
+static bool matches_chip_id(const __be32 ids[], size_t num, u32 chip_id)
+{
+ size_t i;
+
+ for (i = 0; i < num; i++)
+ if (be32_to_cpu(ids[i]) == chip_id)
+ return true;
+
+ return false;
+}
+
+void *__local_alloc(unsigned int chip_id, size_t size, size_t align,
+ const char *location)
+{
+ struct mem_region *region;
+ void *p = NULL;
+ bool use_local = true;
+
+ lock(&mem_region_lock);
+
+restart:
+ list_for_each(&regions, region, list) {
+ const struct dt_property *prop;
+ const __be32 *ids;
+
+ if (!(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY))
+ continue;
+
+ /* Don't allocate from normal heap. */
+ if (region == &skiboot_heap)
+ continue;
+
+ /* First pass, only match node local regions */
+ if (use_local) {
+ if (!region->node)
+ continue;
+ prop = dt_find_property(region->node, "ibm,chip-id");
+ ids = (const __be32 *)prop->prop;
+ if (!matches_chip_id(ids, prop->len/sizeof(u32),
+ chip_id))
+ continue;
+ }
+
+ /* Second pass, match anything */
+ lock(&region->free_list_lock);
+ p = mem_alloc(region, size, align, location);
+ unlock(&region->free_list_lock);
+ if (p)
+ break;
+ }
+
+ /*
+ * If we can't allocate the memory block from the expected
+ * node, we bail to any one that can accommodate our request.
+ */
+ if (!p && use_local) {
+ use_local = false;
+ goto restart;
+ }
+
+ unlock(&mem_region_lock);
+
+ return p;
+}
+
+struct mem_region *find_mem_region(const char *name)
+{
+ struct mem_region *region;
+
+ list_for_each(&regions, region, list) {
+ if (streq(region->name, name))
+ return region;
+ }
+ return NULL;
+}
+
+bool mem_range_is_reserved(uint64_t start, uint64_t size)
+{
+ uint64_t end = start + size;
+ struct mem_region *region;
+ struct list_head *search;
+
+ /* We may have the range covered by a number of regions, which could
+ * appear in any order. So, we look for a region that covers the
+ * start address, and bump start up to the end of that region.
+ *
+ * We repeat until we've either bumped past the end of the range,
+ * or we didn't find a matching region.
+ *
+ * This has a worst-case of O(n^2), but n is well bounded by the
+ * small number of reservations.
+ */
+
+ if (!mem_region_init_done)
+ search = &early_reserves;
+ else
+ search = &regions;
+
+ for (;;) {
+ bool found = false;
+
+ list_for_each(search, region, list) {
+ if (!region_is_reserved(region))
+ continue;
+
+ /* does this region overlap the start address, and
+ * have a non-zero size? */
+ if (region->start <= start &&
+ region->start + region->len > start &&
+ region->len) {
+ start = region->start + region->len;
+ found = true;
+ }
+ }
+
+ /* 'end' is the first byte outside of the range */
+ if (start >= end)
+ return true;
+
+ if (!found)
+ break;
+ }
+
+ return false;
+}
+
+static void mem_region_parse_reserved_properties(void)
+{
+ const struct dt_property *names, *ranges;
+ struct mem_region *region;
+
+ prlog(PR_DEBUG, "MEM: parsing reserved memory from "
+ "reserved-names/-ranges properties\n");
+
+ names = dt_find_property(dt_root, "reserved-names");
+ ranges = dt_find_property(dt_root, "reserved-ranges");
+ if (names && ranges) {
+ const uint64_t *range;
+ int n, len;
+
+ range = (const void *)ranges->prop;
+
+ for (n = 0; n < names->len; n += len, range += 2) {
+ char *name;
+
+ len = strlen(names->prop + n) + 1;
+ name = strdup(names->prop + n);
+
+ region = new_region(name,
+ dt_get_number(range, 2),
+ dt_get_number(range + 1, 2),
+ NULL, REGION_FW_RESERVED);
+ if (!add_region(region)) {
+ prerror("Couldn't add mem_region %s\n", name);
+ abort();
+ }
+ }
+ } else if (names || ranges) {
+ prerror("Invalid properties: reserved-names=%p "
+ "with reserved-ranges=%p\n",
+ names, ranges);
+ abort();
+ } else {
+ return;
+ }
+}
+
+static bool mem_region_parse_reserved_nodes(const char *path)
+{
+ struct dt_node *parent, *node;
+
+ parent = dt_find_by_path(dt_root, path);
+ if (!parent)
+ return false;
+
+ prlog(PR_INFO, "MEM: parsing reserved memory from node %s\n", path);
+
+ dt_for_each_child(parent, node) {
+ const struct dt_property *reg;
+ struct mem_region *region;
+ int type;
+
+ reg = dt_find_property(node, "reg");
+ if (!reg) {
+ char *nodepath = dt_get_path(node);
+ prerror("node %s has no reg property, ignoring\n",
+ nodepath);
+ free(nodepath);
+ continue;
+ }
+
+ if (dt_has_node_property(node, "no-map", NULL))
+ type = REGION_RESERVED;
+ else
+ type = REGION_FW_RESERVED;
+
+ region = new_region(strdup(node->name),
+ dt_get_number(reg->prop, 2),
+ dt_get_number(reg->prop + sizeof(u64), 2),
+ node, type);
+ if (!add_region(region)) {
+ char *nodepath = dt_get_path(node);
+ prerror("node %s failed to add_region()\n", nodepath);
+ free(nodepath);
+ }
+ }
+
+ return true;
+}
+
+/* Trawl through device tree, create memory regions from nodes. */
+void mem_region_init(void)
+{
+ struct mem_region *region, *next;
+ struct dt_node *i;
+ bool rc;
+
+ /*
+ * Add associativity properties outside of the lock
+ * to avoid recursive locking caused by allocations
+ * done by add_chip_dev_associativity()
+ */
+ dt_for_each_node(dt_root, i) {
+ if (!dt_has_node_property(i, "device_type", "memory") &&
+ !dt_has_node_property(i, "compatible", "pmem-region"))
+ continue;
+
+ /* Add associativity properties */
+ add_chip_dev_associativity(i);
+ }
+
+ /* Add each memory node. */
+ dt_for_each_node(dt_root, i) {
+ uint64_t start, len;
+ char *rname;
+#define NODE_REGION_PREFIX "ibm,firmware-allocs-"
+
+ if (!dt_has_node_property(i, "device_type", "memory"))
+ continue;
+ rname = zalloc(strlen(i->name) + strlen(NODE_REGION_PREFIX) + 1);
+ assert(rname);
+ strcat(rname, NODE_REGION_PREFIX);
+ strcat(rname, i->name);
+ start = dt_get_address(i, 0, &len);
+ lock(&mem_region_lock);
+ region = new_region(rname, start, len, i, REGION_MEMORY);
+ if (!region) {
+ prerror("MEM: Could not add mem region %s!\n", i->name);
+ abort();
+ }
+ add_region_to_regions(region);
+ if ((start + len) > top_of_ram)
+ top_of_ram = start + len;
+ unlock(&mem_region_lock);
+ }
+
+ /*
+ * This is called after we know the maximum PIR of all CPUs,
+ * so we can dynamically set the stack length.
+ */
+ skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE;
+
+ lock(&mem_region_lock);
+
+ /* Now carve out our own reserved areas. */
+ if (!add_region(&skiboot_os_reserve) ||
+ !add_region(&skiboot_code_and_text) ||
+ !add_region(&skiboot_heap) ||
+ !add_region(&skiboot_after_heap) ||
+ !add_region(&skiboot_cpu_stacks)) {
+ prerror("Out of memory adding skiboot reserved areas\n");
+ abort();
+ }
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ if (!add_region(&skiboot_mambo_kernel) ||
+ !add_region(&skiboot_mambo_initramfs)) {
+ prerror("Out of memory adding mambo payload\n");
+ abort();
+ }
+ }
+
+ /* Add reserved reanges from HDAT */
+ list_for_each_safe(&early_reserves, region, next, list) {
+ bool added;
+
+ list_del(&region->list);
+ added = add_region(region);
+ assert(added);
+ }
+
+ /* Add reserved ranges from the DT */
+ rc = mem_region_parse_reserved_nodes("/reserved-memory");
+ if (!rc)
+ rc = mem_region_parse_reserved_nodes(
+ "/ibm,hostboot/reserved-memory");
+ if (!rc)
+ mem_region_parse_reserved_properties();
+
+ mem_region_init_done = true;
+ unlock(&mem_region_lock);
+}
+
+static uint64_t allocated_length(const struct mem_region *r)
+{
+ struct free_hdr *f, *last = NULL;
+
+ /* No allocations at all? */
+ if (r->free_list.n.next == NULL)
+ return 0;
+
+ /* Find last free block. */
+ list_for_each(&r->free_list, f, list)
+ if (f > last)
+ last = f;
+
+ /* No free blocks? */
+ if (!last)
+ return r->len;
+
+ /* Last free block isn't at end? */
+ if (next_hdr(r, &last->hdr))
+ return r->len;
+ return (unsigned long)last - r->start;
+}
+
+/* Separate out allocated sections into their own region. */
+void mem_region_release_unused(void)
+{
+ struct mem_region *r;
+
+ lock(&mem_region_lock);
+ assert(!mem_regions_finalised);
+
+ prlog(PR_INFO, "Releasing unused memory:\n");
+ list_for_each(&regions, r, list) {
+ uint64_t used_len;
+
+ /* If it's not allocatable, ignore it. */
+ if (!(r->type == REGION_SKIBOOT_HEAP ||
+ r->type == REGION_MEMORY))
+ continue;
+
+ used_len = allocated_length(r);
+
+ prlog(PR_INFO, " %s: %llu/%llu used\n",
+ r->name, (long long)used_len, (long long)r->len);
+
+ /* We keep the skiboot heap. */
+ if (r == &skiboot_heap)
+ continue;
+
+ /* Nothing used? Whole thing is for Linux. */
+ if (used_len == 0)
+ r->type = REGION_OS;
+ /* Partially used? Split region. */
+ else if (used_len != r->len) {
+ struct mem_region *for_linux;
+ struct free_hdr *last = region_start(r) + used_len;
+
+ /* Remove the final free block. */
+ list_del_from(&r->free_list, &last->list);
+
+ for_linux = split_region(r, r->start + used_len,
+ REGION_OS);
+ if (!for_linux) {
+ prerror("OOM splitting mem node %s for linux\n",
+ r->name);
+ abort();
+ }
+ list_add(&regions, &for_linux->list);
+ }
+ }
+ unlock(&mem_region_lock);
+}
+
+static void mem_clear_range(uint64_t s, uint64_t e)
+{
+ uint64_t res_start, res_end;
+
+ /* Skip exception vectors */
+ if (s < EXCEPTION_VECTORS_END)
+ s = EXCEPTION_VECTORS_END;
+
+ /* Skip kernel preload area */
+ res_start = (uint64_t)KERNEL_LOAD_BASE;
+ res_end = res_start + KERNEL_LOAD_SIZE;
+
+ if (s >= res_start && s < res_end)
+ s = res_end;
+ if (e > res_start && e <= res_end)
+ e = res_start;
+ if (e <= s)
+ return;
+ if (s < res_start && e > res_end) {
+ mem_clear_range(s, res_start);
+ mem_clear_range(res_end, e);
+ return;
+ }
+
+ /* Skip initramfs preload area */
+ res_start = (uint64_t)INITRAMFS_LOAD_BASE;
+ res_end = res_start + INITRAMFS_LOAD_SIZE;
+
+ if (s >= res_start && s < res_end)
+ s = res_end;
+ if (e > res_start && e <= res_end)
+ e = res_start;
+ if (e <= s)
+ return;
+ if (s < res_start && e > res_end) {
+ mem_clear_range(s, res_start);
+ mem_clear_range(res_end, e);
+ return;
+ }
+
+ prlog(PR_DEBUG, "Clearing region %llx-%llx\n",
+ (long long)s, (long long)e);
+ memset((void *)s, 0, e - s);
+}
+
+struct mem_region_clear_job_args {
+ char *job_name;
+ uint64_t s,e;
+};
+
+static void mem_region_clear_job(void *data)
+{
+ struct mem_region_clear_job_args *arg = (struct mem_region_clear_job_args*)data;
+ mem_clear_range(arg->s, arg->e);
+}
+
+#define MEM_REGION_CLEAR_JOB_SIZE (16ULL*(1<<30))
+
+static struct cpu_job **mem_clear_jobs;
+static struct mem_region_clear_job_args *mem_clear_job_args;
+static int mem_clear_njobs = 0;
+
+void start_mem_region_clear_unused(void)
+{
+ struct mem_region *r;
+ uint64_t s,l;
+ uint64_t total = 0;
+ uint32_t chip_id;
+ char *path;
+ int i;
+ struct cpu_job **jobs;
+ struct mem_region_clear_job_args *job_args;
+
+ lock(&mem_region_lock);
+ assert(mem_regions_finalised);
+
+ mem_clear_njobs = 0;
+
+ list_for_each(&regions, r, list) {
+ if (!(r->type == REGION_OS))
+ continue;
+ mem_clear_njobs++;
+ /* One job per 16GB */
+ mem_clear_njobs += r->len / MEM_REGION_CLEAR_JOB_SIZE;
+ }
+
+ jobs = malloc(mem_clear_njobs * sizeof(struct cpu_job*));
+ job_args = malloc(mem_clear_njobs * sizeof(struct mem_region_clear_job_args));
+ mem_clear_jobs = jobs;
+ mem_clear_job_args = job_args;
+
+ prlog(PR_NOTICE, "Clearing unused memory:\n");
+ i = 0;
+ list_for_each(&regions, r, list) {
+ /* If it's not unused, ignore it. */
+ if (!(r->type == REGION_OS))
+ continue;
+
+ assert(r != &skiboot_heap);
+
+ s = r->start;
+ l = r->len;
+ while(l > MEM_REGION_CLEAR_JOB_SIZE) {
+ job_args[i].s = s+l - MEM_REGION_CLEAR_JOB_SIZE;
+ job_args[i].e = s+l;
+ l-=MEM_REGION_CLEAR_JOB_SIZE;
+ job_args[i].job_name = malloc(sizeof(char)*100);
+ total+=MEM_REGION_CLEAR_JOB_SIZE;
+ chip_id = __dt_get_chip_id(r->node);
+ if (chip_id == -1)
+ chip_id = 0;
+ path = dt_get_path(r->node);
+ snprintf(job_args[i].job_name, 100,
+ "clear %s, %s 0x%"PRIx64" len: %"PRIx64" on %d",
+ r->name, path,
+ job_args[i].s,
+ (job_args[i].e - job_args[i].s),
+ chip_id);
+ free(path);
+ jobs[i] = cpu_queue_job_on_node(chip_id,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ if (!jobs[i])
+ jobs[i] = cpu_queue_job(NULL,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ assert(jobs[i]);
+ i++;
+ }
+ job_args[i].s = s;
+ job_args[i].e = s+l;
+ job_args[i].job_name = malloc(sizeof(char)*100);
+ total+=l;
+ chip_id = __dt_get_chip_id(r->node);
+ if (chip_id == -1)
+ chip_id = 0;
+ path = dt_get_path(r->node);
+ snprintf(job_args[i].job_name,100,
+ "clear %s, %s 0x%"PRIx64" len: 0x%"PRIx64" on %d",
+ r->name, path,
+ job_args[i].s,
+ (job_args[i].e - job_args[i].s),
+ chip_id);
+ free(path);
+ jobs[i] = cpu_queue_job_on_node(chip_id,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ if (!jobs[i])
+ jobs[i] = cpu_queue_job(NULL,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ assert(jobs[i]);
+ i++;
+ }
+ unlock(&mem_region_lock);
+ cpu_process_local_jobs();
+}
+
+void wait_mem_region_clear_unused(void)
+{
+ uint64_t l;
+ uint64_t total = 0;
+ int i;
+
+ for(i=0; i < mem_clear_njobs; i++) {
+ total += (mem_clear_job_args[i].e - mem_clear_job_args[i].s);
+ }
+
+ l = 0;
+ for(i=0; i < mem_clear_njobs; i++) {
+ cpu_wait_job(mem_clear_jobs[i], true);
+ l += (mem_clear_job_args[i].e - mem_clear_job_args[i].s);
+ printf("Clearing memory... %"PRIu64"/%"PRIu64"GB done\n",
+ l>>30, total>>30);
+ free(mem_clear_job_args[i].job_name);
+ }
+ free(mem_clear_jobs);
+ free(mem_clear_job_args);
+}
+
+static void mem_region_add_dt_reserved_node(struct dt_node *parent,
+ struct mem_region *region)
+{
+ char *name, *p;
+
+ /* If a reserved region was established before skiboot, it may be
+ * referenced by a device-tree node with extra data. In that case,
+ * copy the node to /reserved-memory/, unless it's already there.
+ *
+ * We update region->node to the new copy here, as the prd code may
+ * update regions' device-tree nodes, and we want those updates to
+ * apply to the nodes in /reserved-memory/.
+ */
+ if (region->type == REGION_FW_RESERVED && region->node) {
+ if (region->node->parent != parent)
+ region->node = dt_copy(region->node, parent);
+ return;
+ }
+
+ name = strdup(region->name);
+ assert(name);
+
+ /* remove any cell addresses in the region name; we have our own cell
+ * addresses here */
+ p = strchr(name, '@');
+ if (p)
+ *p = '\0';
+
+ region->node = dt_new_addr(parent, name, region->start);
+ assert(region->node);
+ dt_add_property_u64s(region->node, "reg", region->start, region->len);
+
+ /*
+ * This memory is used by hardware and may need special handling. Ask
+ * the host kernel not to map it by default.
+ */
+ if (region->type == REGION_RESERVED)
+ dt_add_property(region->node, "no-map", NULL, 0);
+
+ free(name);
+}
+
+void mem_region_add_dt_reserved(void)
+{
+ int names_len, ranges_len, len;
+ const struct dt_property *prop;
+ struct mem_region *region;
+ void *names, *ranges;
+ struct dt_node *node;
+ fdt64_t *range;
+ char *name;
+
+ names_len = 0;
+ ranges_len = 0;
+
+ /* Finalise the region list, so we know that the regions list won't be
+ * altered after this point. The regions' free lists may change after
+ * we drop the lock, but we don't access those. */
+ lock(&mem_region_lock);
+ mem_regions_finalised = true;
+
+ /* establish top-level reservation node */
+ node = dt_find_by_path(dt_root, "reserved-memory");
+ if (!node) {
+ node = dt_new(dt_root, "reserved-memory");
+ dt_add_property_cells(node, "#address-cells", 2);
+ dt_add_property_cells(node, "#size-cells", 2);
+ dt_add_property(node, "ranges", NULL, 0);
+ }
+
+ prlog(PR_INFO, "Reserved regions:\n");
+
+ /* First pass, create /reserved-memory/ nodes for each reservation,
+ * and calculate the length for the /reserved-names and
+ * /reserved-ranges properties */
+ list_for_each(&regions, region, list) {
+ if (!region_is_reservable(region))
+ continue;
+
+ prlog(PR_INFO, " 0x%012llx..%012llx : %s\n",
+ (long long)region->start,
+ (long long)(region->start + region->len - 1),
+ region->name);
+
+ mem_region_add_dt_reserved_node(node, region);
+
+ /* calculate the size of the properties populated later */
+ names_len += strlen(region->node->name) + 1;
+ ranges_len += 2 * sizeof(uint64_t);
+ }
+
+ name = names = malloc(names_len);
+ range = ranges = malloc(ranges_len);
+
+ /* Second pass: populate the old-style reserved-names and
+ * reserved-regions arrays based on the node data */
+ list_for_each(&regions, region, list) {
+ if (!region_is_reservable(region))
+ continue;
+
+ len = strlen(region->node->name) + 1;
+ memcpy(name, region->node->name, len);
+ name += len;
+
+ range[0] = cpu_to_fdt64(region->start);
+ range[1] = cpu_to_fdt64(region->len);
+ range += 2;
+ }
+ unlock(&mem_region_lock);
+
+ prop = dt_find_property(dt_root, "reserved-names");
+ if (prop)
+ dt_del_property(dt_root, (struct dt_property *)prop);
+
+ prop = dt_find_property(dt_root, "reserved-ranges");
+ if (prop)
+ dt_del_property(dt_root, (struct dt_property *)prop);
+
+ dt_add_property(dt_root, "reserved-names", names, names_len);
+ dt_add_property(dt_root, "reserved-ranges", ranges, ranges_len);
+
+ free(names);
+ free(ranges);
+}
+
+struct mem_region *mem_region_next(struct mem_region *region)
+{
+ struct list_node *node;
+
+ assert(lock_held_by_me(&mem_region_lock));
+
+ node = region ? &region->list : &regions.n;
+
+ if (node->next == &regions.n)
+ return NULL;
+
+ return list_entry(node->next, struct mem_region, list);
+}
diff --git a/roms/skiboot/core/nvram-format.c b/roms/skiboot/core/nvram-format.c
new file mode 100644
index 000000000..8aa5abf22
--- /dev/null
+++ b/roms/skiboot/core/nvram-format.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * NVRAM Format as specified in PAPR
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <nvram.h>
+
+struct chrp_nvram_hdr {
+ uint8_t sig;
+ uint8_t cksum;
+ be16 len;
+ char name[12];
+};
+
+static struct chrp_nvram_hdr *skiboot_part_hdr;
+
+#define NVRAM_SIG_FW_PRIV 0x51
+#define NVRAM_SIG_SYSTEM 0x70
+#define NVRAM_SIG_FREE 0x7f
+
+#define NVRAM_NAME_COMMON "common"
+#define NVRAM_NAME_FW_PRIV "ibm,skiboot"
+#define NVRAM_NAME_FREE "wwwwwwwwwwww"
+
+/* 64k should be enough, famous last words... */
+#define NVRAM_SIZE_COMMON 0x10000
+
+/* 4k should be enough, famous last words... */
+#define NVRAM_SIZE_FW_PRIV 0x1000
+
+static uint8_t chrp_nv_cksum(struct chrp_nvram_hdr *hdr)
+{
+ struct chrp_nvram_hdr h_copy = *hdr;
+ uint8_t b_data, i_sum, c_sum;
+ uint8_t *p = (uint8_t *)&h_copy;
+ unsigned int nbytes = sizeof(h_copy);
+
+ h_copy.cksum = 0;
+ for (c_sum = 0; nbytes; nbytes--) {
+ b_data = *(p++);
+ i_sum = c_sum + b_data;
+ if (i_sum < c_sum)
+ i_sum++;
+ c_sum = i_sum;
+ }
+ return c_sum;
+}
+
+int nvram_format(void *nvram_image, uint32_t nvram_size)
+{
+ struct chrp_nvram_hdr *h;
+ unsigned int offset = 0;
+
+ prerror("NVRAM: Re-initializing (size: 0x%08x)\n", nvram_size);
+ memset(nvram_image, 0, nvram_size);
+
+ /* Create private partition */
+ if (nvram_size - offset < NVRAM_SIZE_FW_PRIV)
+ return -1;
+ h = nvram_image + offset;
+ h->sig = NVRAM_SIG_FW_PRIV;
+ h->len = cpu_to_be16(NVRAM_SIZE_FW_PRIV >> 4);
+ strcpy(h->name, NVRAM_NAME_FW_PRIV);
+ h->cksum = chrp_nv_cksum(h);
+ prlog(PR_DEBUG, "NVRAM: Created '%s' partition at 0x%08x"
+ " for size 0x%08x with cksum 0x%02x\n",
+ NVRAM_NAME_FW_PRIV, offset,
+ be16_to_cpu(h->len), h->cksum);
+ offset += NVRAM_SIZE_FW_PRIV;
+
+ /* Create common partition */
+ if (nvram_size - offset < NVRAM_SIZE_COMMON)
+ return -1;
+ h = nvram_image + offset;
+ h->sig = NVRAM_SIG_SYSTEM;
+ h->len = cpu_to_be16(NVRAM_SIZE_COMMON >> 4);
+ strcpy(h->name, NVRAM_NAME_COMMON);
+ h->cksum = chrp_nv_cksum(h);
+ prlog(PR_DEBUG, "NVRAM: Created '%s' partition at 0x%08x"
+ " for size 0x%08x with cksum 0x%02x\n",
+ NVRAM_NAME_COMMON, offset,
+ be16_to_cpu(h->len), h->cksum);
+ offset += NVRAM_SIZE_COMMON;
+
+ /* Create free space partition */
+ if (nvram_size - offset < sizeof(struct chrp_nvram_hdr))
+ return -1;
+ h = nvram_image + offset;
+ h->sig = NVRAM_SIG_FREE;
+ h->len = cpu_to_be16((nvram_size - offset) >> 4);
+ /* We have the full 12 bytes here */
+ memcpy(h->name, NVRAM_NAME_FREE, 12);
+ h->cksum = chrp_nv_cksum(h);
+ prlog(PR_DEBUG, "NVRAM: Created '%s' partition at 0x%08x"
+ " for size 0x%08x with cksum 0x%02x\n",
+ NVRAM_NAME_FREE, offset, be16_to_cpu(h->len), h->cksum);
+ return 0;
+}
+
+/*
+ * Check that the nvram partition layout is sane and that it
+ * contains our required partitions. If not, we re-format the
+ * lot of it
+ */
+int nvram_check(void *nvram_image, const uint32_t nvram_size)
+{
+ unsigned int offset = 0;
+ bool found_common = false;
+
+ skiboot_part_hdr = NULL;
+
+ while (offset + sizeof(struct chrp_nvram_hdr) < nvram_size) {
+ struct chrp_nvram_hdr *h = nvram_image + offset;
+
+ if (chrp_nv_cksum(h) != h->cksum) {
+ prerror("NVRAM: Partition at offset 0x%x"
+ " has bad checksum: 0x%02x vs 0x%02x\n",
+ offset, h->cksum, chrp_nv_cksum(h));
+ goto failed;
+ }
+ if (be16_to_cpu(h->len) < 1) {
+ prerror("NVRAM: Partition at offset 0x%x"
+ " has incorrect 0 length\n", offset);
+ goto failed;
+ }
+
+ if (h->sig == NVRAM_SIG_SYSTEM &&
+ strcmp(h->name, NVRAM_NAME_COMMON) == 0)
+ found_common = true;
+
+ if (h->sig == NVRAM_SIG_FW_PRIV &&
+ strcmp(h->name, NVRAM_NAME_FW_PRIV) == 0)
+ skiboot_part_hdr = h;
+
+ offset += be16_to_cpu(h->len) << 4;
+ if (offset > nvram_size) {
+ prerror("NVRAM: Partition at offset 0x%x"
+ " extends beyond end of nvram !\n", offset);
+ goto failed;
+ }
+ }
+ if (!found_common) {
+ prlog_once(PR_ERR, "NVRAM: Common partition not found !\n");
+ goto failed;
+ }
+
+ if (!skiboot_part_hdr) {
+ prlog_once(PR_ERR, "NVRAM: Skiboot private partition not found !\n");
+ goto failed;
+ } else {
+ /*
+ * The OF NVRAM format requires config strings to be NUL
+ * terminated and unused memory to be set to zero. Well behaved
+ * software should ensure this is done for us, but we should
+ * always check.
+ */
+ const char *last_byte = (const char *) skiboot_part_hdr +
+ be16_to_cpu(skiboot_part_hdr->len) * 16 - 1;
+
+ if (*last_byte != 0) {
+ prerror("NVRAM: Skiboot private partition is not NUL terminated");
+ goto failed;
+ }
+ }
+
+ prlog(PR_INFO, "NVRAM: Layout appears sane\n");
+ assert(skiboot_part_hdr);
+ return 0;
+ failed:
+ return -1;
+}
+
+static const char *find_next_key(const char *start, const char *end)
+{
+ /*
+ * Unused parts of the partition are set to NUL. If we hit two
+ * NULs in a row then we assume that we have hit the end of the
+ * partition.
+ */
+ if (*start == 0)
+ return NULL;
+
+ while (start < end) {
+ if (*start == 0)
+ return start + 1;
+
+ start++;
+ }
+
+ return NULL;
+}
+
+static void nvram_dangerous(const char *key)
+{
+ prlog(PR_ERR, " ___________________________________________________________\n");
+ prlog(PR_ERR, "< Dangerous NVRAM option: %s\n", key);
+ prlog(PR_ERR, " -----------------------------------------------------------\n");
+ prlog(PR_ERR, " \\ \n");
+ prlog(PR_ERR, " \\ WW \n");
+ prlog(PR_ERR, " <^ \\___/| \n");
+ prlog(PR_ERR, " \\ / \n");
+ prlog(PR_ERR, " \\_ _/ \n");
+ prlog(PR_ERR, " }{ \n");
+}
+
+
+/*
+ * nvram_query_safe/dangerous() - Searches skiboot NVRAM partition
+ * for a key=value pair.
+ *
+ * Dangerous means it should only be used for testing as it may
+ * mask issues. Safe is ok for long term use.
+ *
+ * Returns a pointer to a NUL terminated string that contains the value
+ * associated with the given key.
+ */
+static const char *__nvram_query(const char *key, bool dangerous)
+{
+ const char *part_end, *start;
+ int key_len = strlen(key);
+
+ assert(key);
+
+ if (!nvram_has_loaded()) {
+ prlog(PR_DEBUG,
+ "NVRAM: Query for '%s' must wait for NVRAM to load\n",
+ key);
+ if (!nvram_wait_for_load()) {
+ prlog(PR_CRIT, "NVRAM: Failed to load\n");
+ return NULL;
+ }
+ }
+
+ /*
+ * The running OS can modify the NVRAM as it pleases so we need to be
+ * a little paranoid and check that it's ok before we try parse it.
+ *
+ * NB: nvram_validate() can update skiboot_part_hdr
+ */
+ if (!nvram_validate())
+ return NULL;
+
+ assert(skiboot_part_hdr);
+
+ part_end = (const char *) skiboot_part_hdr
+ + be16_to_cpu(skiboot_part_hdr->len) * 16 - 1;
+
+ start = (const char *) skiboot_part_hdr
+ + sizeof(*skiboot_part_hdr);
+
+ if (!key_len) {
+ prlog(PR_WARNING, "NVRAM: search key is empty!\n");
+ return NULL;
+ }
+
+ if (key_len > 32)
+ prlog(PR_WARNING, "NVRAM: search key '%s' is longer than 32 chars\n", key);
+
+ while (start) {
+ int remaining = part_end - start;
+
+ prlog(PR_TRACE, "NVRAM: '%s' (%lu)\n",
+ start, strlen(start));
+
+ if (key_len + 1 > remaining)
+ return NULL;
+
+ if (!strncmp(key, start, key_len) && start[key_len] == '=') {
+ const char *value = &start[key_len + 1];
+
+ prlog(PR_DEBUG, "NVRAM: Searched for '%s' found '%s'\n",
+ key, value);
+
+ if (dangerous)
+ nvram_dangerous(start);
+ return value;
+ }
+
+ start = find_next_key(start, part_end);
+ }
+
+ prlog(PR_DEBUG, "NVRAM: '%s' not found\n", key);
+
+ return NULL;
+}
+
+const char *nvram_query_safe(const char *key)
+{
+ return __nvram_query(key, false);
+}
+
+const char *nvram_query_dangerous(const char *key)
+{
+ return __nvram_query(key, true);
+}
+
+/*
+ * nvram_query_eq_safe/dangerous() - Check if the given 'key' exists
+ * and is set to 'value'.
+ *
+ * Dangerous means it should only be used for testing as it may
+ * mask issues. Safe is ok for long term use.
+ *
+ * Note: Its an error to check for non-existence of a key
+ * by passing 'value == NULL' as a key's value can never be
+ * NULL in nvram.
+ */
+static bool __nvram_query_eq(const char *key, const char *value, bool dangerous)
+{
+ const char *s = __nvram_query(key, dangerous);
+
+ if (!s)
+ return false;
+
+ assert(value != NULL);
+ return !strcmp(s, value);
+}
+
+bool nvram_query_eq_safe(const char *key, const char *value)
+{
+ return __nvram_query_eq(key, value, false);
+}
+
+bool nvram_query_eq_dangerous(const char *key, const char *value)
+{
+ return __nvram_query_eq(key, value, true);
+}
+
diff --git a/roms/skiboot/core/nvram.c b/roms/skiboot/core/nvram.c
new file mode 100644
index 000000000..773d20280
--- /dev/null
+++ b/roms/skiboot/core/nvram.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * NVRAM support
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <fsp.h>
+#include <opal.h>
+#include <lock.h>
+#include <device.h>
+#include <platform.h>
+#include <nvram.h>
+#include <timebase.h>
+
+static void *nvram_image;
+static uint32_t nvram_size;
+
+static bool nvram_ready; /* has the nvram been loaded? */
+static bool nvram_valid; /* is the nvram format ok? */
+
+static int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset)
+{
+ if (!nvram_ready)
+ return OPAL_HARDWARE;
+
+ if (!opal_addr_valid((void *)buffer))
+ return OPAL_PARAMETER;
+
+ if (offset >= nvram_size || (offset + size) > nvram_size)
+ return OPAL_PARAMETER;
+
+ memcpy((void *)buffer, nvram_image + offset, size);
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_READ_NVRAM, opal_read_nvram, 3);
+
+static int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset)
+{
+ if (!nvram_ready)
+ return OPAL_HARDWARE;
+
+ if (!opal_addr_valid((void *)buffer))
+ return OPAL_PARAMETER;
+
+ if (offset >= nvram_size || (offset + size) > nvram_size)
+ return OPAL_PARAMETER;
+ memcpy(nvram_image + offset, (void *)buffer, size);
+ if (platform.nvram_write)
+ platform.nvram_write(offset, nvram_image + offset, size);
+
+ /* The host OS has written to the NVRAM so we can't be sure that it's
+ * well formatted.
+ */
+ nvram_valid = false;
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_WRITE_NVRAM, opal_write_nvram, 3);
+
+bool nvram_validate(void)
+{
+ if (!nvram_valid) {
+ if (!nvram_check(nvram_image, nvram_size))
+ nvram_valid = true;
+ }
+
+ return nvram_valid;
+}
+
+static void nvram_reformat(void)
+{
+ if (nvram_format(nvram_image, nvram_size)) {
+ prerror("NVRAM: Failed to format NVRAM!\n");
+ nvram_valid = false;
+ return;
+ }
+
+ /* Write the whole thing back */
+ if (platform.nvram_write)
+ platform.nvram_write(0, nvram_image, nvram_size);
+
+ nvram_validate();
+}
+
+void nvram_reinit(void)
+{
+ /* It's possible we failed to load nvram at boot. */
+ if (!nvram_ready)
+ nvram_init();
+ else if (!nvram_validate())
+ nvram_reformat();
+}
+
+void nvram_read_complete(bool success)
+{
+ struct dt_node *np;
+
+ /* Read not successful, error out and free the buffer */
+ if (!success) {
+ free(nvram_image);
+ nvram_size = 0;
+ return;
+ }
+
+ if (!nvram_validate())
+ nvram_reformat();
+
+ /* Add nvram node */
+ np = dt_new(opal_node, "nvram");
+ dt_add_property_cells(np, "#bytes", nvram_size);
+ dt_add_property_string(np, "compatible", "ibm,opal-nvram");
+
+ /* Mark ready */
+ nvram_ready = true;
+}
+
+bool nvram_wait_for_load(void)
+{
+ uint64_t started;
+
+ /* Short cut */
+ if (nvram_ready)
+ return true;
+
+ /* Tell the caller it will never happen */
+ if (!platform.nvram_info)
+ return false;
+
+ /*
+ * One of two things has happened here.
+ * 1. nvram_wait_for_load() was called before nvram_init()
+ * 2. The read of NVRAM failed.
+ * Either way, this is quite a bad event.
+ */
+ if (!nvram_image && !nvram_size) {
+ prlog(PR_CRIT, "NVRAM: Possible wait before nvram_init()!\n");
+ return false;
+ }
+
+ started = mftb();
+
+ while (!nvram_ready) {
+ opal_run_pollers();
+ /* If the read fails, tell the caller */
+ if (!nvram_image && !nvram_size)
+ return false;
+ }
+
+ prlog(PR_DEBUG, "NVRAM: Waited %lums for nvram to load\n",
+ tb_to_msecs(mftb() - started));
+
+ return true;
+}
+
+bool nvram_has_loaded(void)
+{
+ return nvram_ready;
+}
+
+void nvram_init(void)
+{
+ int rc;
+
+ if (!platform.nvram_info)
+ return;
+ rc = platform.nvram_info(&nvram_size);
+ if (rc) {
+ prerror("NVRAM: Error %d retrieving nvram info\n", rc);
+ return;
+ }
+ prlog(PR_INFO, "NVRAM: Size is %d KB\n", nvram_size >> 10);
+ if (nvram_size > 0x100000) {
+ prlog(PR_WARNING, "NVRAM: Cropping to 1MB !\n");
+ nvram_size = 0x100000;
+ }
+
+ /*
+ * We allocate the nvram image with 4k alignment to make the
+ * FSP backend job's easier
+ */
+ nvram_image = memalign(0x1000, nvram_size);
+ if (!nvram_image) {
+ prerror("NVRAM: Failed to allocate nvram image\n");
+ nvram_size = 0;
+ return;
+ }
+
+ /* Read it in */
+ rc = platform.nvram_start_read(nvram_image, 0, nvram_size);
+ if (rc) {
+ prerror("NVRAM: Failed to read NVRAM from FSP !\n");
+ nvram_size = 0;
+ free(nvram_image);
+ return;
+ }
+
+ /*
+ * We'll get called back later (or recursively from
+ * nvram_start_read) in nvram_read_complete()
+ */
+}
diff --git a/roms/skiboot/core/opal-dump.c b/roms/skiboot/core/opal-dump.c
new file mode 100644
index 000000000..4f54a3ef1
--- /dev/null
+++ b/roms/skiboot/core/opal-dump.c
@@ -0,0 +1,582 @@
+/* Copyright 2019 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define pr_fmt(fmt) "DUMP: " fmt
+
+#include <chip.h>
+#include <cpu.h>
+#include <device.h>
+#include <mem-map.h>
+#include <mem_region.h>
+#include <mem_region-malloc.h>
+#include <opal.h>
+#include <opal-dump.h>
+#include <opal-internal.h>
+#include <sbe-p9.h>
+#include <skiboot.h>
+
+#include <ccan/endian/endian.h>
+
+#include "hdata/spira.h"
+
+/* XXX Ideally we should use HDAT provided data (proc_dump_area->thread_size).
+ * But we are not getting this data durig boot. Hence lets reserve fixed
+ * memory for architected registers data collection.
+ */
+#define ARCH_REGS_DATA_SIZE_PER_CHIP (512 * 1024)
+
+/* Actual address of MDST and MDDT table */
+#define MDST_TABLE_BASE (SKIBOOT_BASE + MDST_TABLE_OFF)
+#define MDDT_TABLE_BASE (SKIBOOT_BASE + MDDT_TABLE_OFF)
+#define PROC_DUMP_AREA_BASE (SKIBOOT_BASE + PROC_DUMP_AREA_OFF)
+
+static struct spira_ntuple *ntuple_mdst;
+static struct spira_ntuple *ntuple_mddt;
+static struct spira_ntuple *ntuple_mdrt;
+
+static struct mpipl_metadata *mpipl_metadata;
+
+/* Dump metadata area */
+static struct opal_mpipl_fadump *opal_mpipl_data;
+static struct opal_mpipl_fadump *opal_mpipl_cpu_data;
+
+/*
+ * Number of tags passed by OPAL to kernel after MPIPL boot.
+ * Currently it supports below tags:
+ * - CPU register data area
+ * - OPAL metadata area address
+ * - Kernel passed tag during MPIPL registration
+ * - Post MPIPL boot memory size
+ */
+#define MAX_OPAL_MPIPL_TAGS 0x04
+static u64 opal_mpipl_tags[MAX_OPAL_MPIPL_TAGS];
+static int opal_mpipl_max_tags = MAX_OPAL_MPIPL_TAGS;
+
+static u64 opal_dump_addr, opal_dump_size;
+
+static bool mpipl_enabled;
+
+static int opal_mpipl_add_entry(u8 region, u64 src, u64 dest, u64 size)
+{
+ int i;
+ int mdst_cnt = be16_to_cpu(ntuple_mdst->act_cnt);
+ int mddt_cnt = be16_to_cpu(ntuple_mddt->act_cnt);
+ struct mdst_table *mdst;
+ struct mddt_table *mddt;
+
+ if (mdst_cnt >= MDST_TABLE_SIZE / sizeof(struct mdst_table)) {
+ prlog(PR_DEBUG, "MDST table is full\n");
+ return OPAL_RESOURCE;
+ }
+
+ if (mddt_cnt >= MDDT_TABLE_SIZE / sizeof(struct mddt_table)) {
+ prlog(PR_DEBUG, "MDDT table is full\n");
+ return OPAL_RESOURCE;
+ }
+
+ /* Use relocated memory address */
+ mdst = (void *)(MDST_TABLE_BASE);
+ mddt = (void *)(MDDT_TABLE_BASE);
+
+ /* Check for duplicate entry */
+ for (i = 0; i < mdst_cnt; i++) {
+ if (be64_to_cpu(mdst->addr) == (src | HRMOR_BIT)) {
+ prlog(PR_DEBUG,
+ "Duplicate source address : 0x%llx", src);
+ return OPAL_PARAMETER;
+ }
+ mdst++;
+ }
+ for (i = 0; i < mddt_cnt; i++) {
+ if (be64_to_cpu(mddt->addr) == (dest | HRMOR_BIT)) {
+ prlog(PR_DEBUG,
+ "Duplicate destination address : 0x%llx", dest);
+ return OPAL_PARAMETER;
+ }
+ mddt++;
+ }
+
+ /* Add OPAL source address to MDST entry */
+ mdst->addr = cpu_to_be64(src | HRMOR_BIT);
+ mdst->data_region = region;
+ mdst->size = cpu_to_be32(size);
+ ntuple_mdst->act_cnt = cpu_to_be16(mdst_cnt + 1);
+
+ /* Add OPAL destination address to MDDT entry */
+ mddt->addr = cpu_to_be64(dest | HRMOR_BIT);
+ mddt->data_region = region;
+ mddt->size = cpu_to_be32(size);
+ ntuple_mddt->act_cnt = cpu_to_be16(mddt_cnt + 1);
+
+ prlog(PR_TRACE, "Added new entry. src : 0x%llx, dest : 0x%llx,"
+ " size : 0x%llx\n", src, dest, size);
+ return OPAL_SUCCESS;
+}
+
+/* Remove entry from source (MDST) table */
+static int opal_mpipl_remove_entry_mdst(bool remove_all, u8 region, u64 src)
+{
+ bool found = false;
+ int i, j;
+ int mdst_cnt = be16_to_cpu(ntuple_mdst->act_cnt);
+ struct mdst_table *tmp_mdst;
+ struct mdst_table *mdst = (void *)(MDST_TABLE_BASE);
+
+ for (i = 0; i < mdst_cnt;) {
+ if (mdst->data_region != region) {
+ mdst++;
+ i++;
+ continue;
+ }
+
+ if (remove_all != true &&
+ be64_to_cpu(mdst->addr) != (src | HRMOR_BIT)) {
+ mdst++;
+ i++;
+ continue;
+ }
+
+ tmp_mdst = mdst;
+ memset(tmp_mdst, 0, sizeof(struct mdst_table));
+
+ for (j = i; j < mdst_cnt - 1; j++) {
+ memcpy((void *)tmp_mdst,
+ (void *)(tmp_mdst + 1), sizeof(struct mdst_table));
+ tmp_mdst++;
+ memset(tmp_mdst, 0, sizeof(struct mdst_table));
+ }
+
+ mdst_cnt--;
+
+ if (remove_all == false) {
+ found = true;
+ break;
+ }
+ } /* end - for loop */
+
+ ntuple_mdst->act_cnt = cpu_to_be16((u16)mdst_cnt);
+
+ if (remove_all == false && found == false) {
+ prlog(PR_DEBUG,
+ "Source address [0x%llx] not found in MDST table\n", src);
+ return OPAL_PARAMETER;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+/* Remove entry from destination (MDDT) table */
+static int opal_mpipl_remove_entry_mddt(bool remove_all, u8 region, u64 dest)
+{
+ bool found = false;
+ int i, j;
+ int mddt_cnt = be16_to_cpu(ntuple_mddt->act_cnt);
+ struct mddt_table *tmp_mddt;
+ struct mddt_table *mddt = (void *)(MDDT_TABLE_BASE);
+
+ for (i = 0; i < mddt_cnt;) {
+ if (mddt->data_region != region) {
+ mddt++;
+ i++;
+ continue;
+ }
+
+ if (remove_all != true &&
+ be64_to_cpu(mddt->addr) != (dest | HRMOR_BIT)) {
+ mddt++;
+ i++;
+ continue;
+ }
+
+ tmp_mddt = mddt;
+ memset(tmp_mddt, 0, sizeof(struct mddt_table));
+
+ for (j = i; j < mddt_cnt - 1; j++) {
+ memcpy((void *)tmp_mddt,
+ (void *)(tmp_mddt + 1), sizeof(struct mddt_table));
+ tmp_mddt++;
+ memset(tmp_mddt, 0, sizeof(struct mddt_table));
+ }
+
+ mddt_cnt--;
+
+ if (remove_all == false) {
+ found = true;
+ break;
+ }
+ } /* end - for loop */
+
+ ntuple_mddt->act_cnt = cpu_to_be16((u16)mddt_cnt);
+
+ if (remove_all == false && found == false) {
+ prlog(PR_DEBUG,
+ "Dest address [0x%llx] not found in MDDT table\n", dest);
+ return OPAL_PARAMETER;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+/* Register for OPAL dump. */
+static void opal_mpipl_register(void)
+{
+ u64 arch_regs_dest, arch_regs_size;
+ struct proc_dump_area *proc_dump = (void *)(PROC_DUMP_AREA_BASE);
+
+ /* Add OPAL reservation detail to MDST/MDDT table */
+ opal_mpipl_add_entry(DUMP_REGION_OPAL_MEMORY,
+ SKIBOOT_BASE, opal_dump_addr, opal_dump_size);
+
+ /* Thread size check */
+ if (proc_dump->thread_size != 0) {
+ prlog(PR_INFO, "Thread register entry size is available, "
+ "but not supported.\n");
+ }
+
+ /* Reserve memory used to capture architected register state */
+ arch_regs_dest = opal_dump_addr + opal_dump_size;
+ arch_regs_size = nr_chips() * ARCH_REGS_DATA_SIZE_PER_CHIP;
+ proc_dump->alloc_addr = cpu_to_be64(arch_regs_dest | HRMOR_BIT);
+ proc_dump->alloc_size = cpu_to_be32(arch_regs_size);
+ prlog(PR_NOTICE, "Architected register dest addr : 0x%llx, "
+ "size : 0x%llx\n", arch_regs_dest, arch_regs_size);
+}
+
+static int payload_mpipl_register(u64 src, u64 dest, u64 size)
+{
+ if (!opal_addr_valid((void *)src)) {
+ prlog(PR_DEBUG, "Invalid source address [0x%llx]\n", src);
+ return OPAL_PARAMETER;
+ }
+
+ if (!opal_addr_valid((void *)dest)) {
+ prlog(PR_DEBUG, "Invalid dest address [0x%llx]\n", dest);
+ return OPAL_PARAMETER;
+ }
+
+ if (size <= 0) {
+ prlog(PR_DEBUG, "Invalid size [0x%llx]\n", size);
+ return OPAL_PARAMETER;
+ }
+
+ return opal_mpipl_add_entry(DUMP_REGION_KERNEL, src, dest, size);
+}
+
+static int payload_mpipl_unregister(u64 src, u64 dest)
+{
+ int rc;
+
+ /* Remove src from MDST table */
+ rc = opal_mpipl_remove_entry_mdst(false, DUMP_REGION_KERNEL, src);
+ if (rc)
+ return rc;
+
+ /* Remove dest from MDDT table */
+ rc = opal_mpipl_remove_entry_mddt(false, DUMP_REGION_KERNEL, dest);
+ return rc;
+}
+
+static int payload_mpipl_unregister_all(void)
+{
+ opal_mpipl_remove_entry_mdst(true, DUMP_REGION_KERNEL, 0);
+ opal_mpipl_remove_entry_mddt(true, DUMP_REGION_KERNEL, 0);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t opal_mpipl_update(enum opal_mpipl_ops ops,
+ u64 src, u64 dest, u64 size)
+{
+ int rc;
+
+ switch (ops) {
+ case OPAL_MPIPL_ADD_RANGE:
+ rc = payload_mpipl_register(src, dest, size);
+ if (!rc)
+ prlog(PR_NOTICE, "Payload registered for MPIPL\n");
+ break;
+ case OPAL_MPIPL_REMOVE_RANGE:
+ rc = payload_mpipl_unregister(src, dest);
+ if (!rc) {
+ prlog(PR_NOTICE, "Payload removed entry from MPIPL."
+ "[src : 0x%llx, dest : 0x%llx]\n", src, dest);
+ }
+ break;
+ case OPAL_MPIPL_REMOVE_ALL:
+ rc = payload_mpipl_unregister_all();
+ if (!rc)
+ prlog(PR_NOTICE, "Payload unregistered for MPIPL\n");
+ break;
+ case OPAL_MPIPL_FREE_PRESERVED_MEMORY:
+ /* Clear tags */
+ memset(&opal_mpipl_tags, 0, (sizeof(u64) * MAX_OPAL_MPIPL_TAGS));
+ opal_mpipl_max_tags = 0;
+ /* Release memory */
+ free(opal_mpipl_data);
+ opal_mpipl_data = NULL;
+ free(opal_mpipl_cpu_data);
+ opal_mpipl_cpu_data = NULL;
+ /* Clear MDRT table */
+ memset((void *)MDRT_TABLE_BASE, 0, MDRT_TABLE_SIZE);
+ /* Set MDRT count to max allocated count */
+ ntuple_mdrt->act_cnt = cpu_to_be16(MDRT_TABLE_SIZE / sizeof(struct mdrt_table));
+ rc = OPAL_SUCCESS;
+ prlog(PR_NOTICE, "Payload Invalidated MPIPL\n");
+ break;
+ default:
+ prlog(PR_DEBUG, "Unsupported MPIPL update operation : 0x%x\n", ops);
+ rc = OPAL_PARAMETER;
+ break;
+ }
+
+ return rc;
+}
+
+static int64_t opal_mpipl_register_tag(enum opal_mpipl_tags tag,
+ uint64_t tag_val)
+{
+ int rc = OPAL_SUCCESS;
+
+ switch (tag) {
+ case OPAL_MPIPL_TAG_BOOT_MEM:
+ if (tag_val <= 0 || tag_val > top_of_ram) {
+ prlog(PR_DEBUG, "Payload sent invalid boot mem size"
+ " : 0x%llx\n", tag_val);
+ rc = OPAL_PARAMETER;
+ } else {
+ mpipl_metadata->boot_mem_size = tag_val;
+ prlog(PR_NOTICE, "Boot mem size : 0x%llx\n", tag_val);
+ }
+ break;
+ case OPAL_MPIPL_TAG_KERNEL:
+ mpipl_metadata->kernel_tag = tag_val;
+ prlog(PR_NOTICE, "Payload sent metadata tag : 0x%llx\n", tag_val);
+ break;
+ default:
+ prlog(PR_DEBUG, "Payload sent unsupported tag : 0x%x\n", tag);
+ rc = OPAL_PARAMETER;
+ break;
+ }
+ return rc;
+}
+
+static uint64_t opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *tag_val)
+{
+ if (!opal_addr_valid(tag_val)) {
+ prlog(PR_DEBUG, "Invalid tag address\n");
+ return OPAL_PARAMETER;
+ }
+
+ if (tag >= opal_mpipl_max_tags)
+ return OPAL_PARAMETER;
+
+ *tag_val = cpu_to_be64(opal_mpipl_tags[tag]);
+ return OPAL_SUCCESS;
+}
+
+static inline void post_mpipl_get_preserved_tags(void)
+{
+ if (mpipl_metadata->kernel_tag)
+ opal_mpipl_tags[OPAL_MPIPL_TAG_KERNEL] = mpipl_metadata->kernel_tag;
+ if (mpipl_metadata->boot_mem_size)
+ opal_mpipl_tags[OPAL_MPIPL_TAG_BOOT_MEM] = mpipl_metadata->boot_mem_size;
+}
+
+static void post_mpipl_arch_regs_data(void)
+{
+ struct proc_dump_area *proc_dump = (void *)(PROC_DUMP_AREA_BASE);
+
+ if (proc_dump->dest_addr == 0) {
+ prlog(PR_DEBUG, "Invalid CPU registers destination address\n");
+ return;
+ }
+
+ if (proc_dump->act_size == 0) {
+ prlog(PR_DEBUG, "Invalid CPU registers destination size\n");
+ return;
+ }
+
+ opal_mpipl_cpu_data = zalloc(sizeof(struct opal_mpipl_fadump) +
+ sizeof(struct opal_mpipl_region));
+ if (!opal_mpipl_cpu_data) {
+ prlog(PR_ERR, "Failed to allocate memory\n");
+ return;
+ }
+
+ /* Fill CPU register details */
+ opal_mpipl_cpu_data->version = OPAL_MPIPL_VERSION;
+ opal_mpipl_cpu_data->cpu_data_version = cpu_to_be32((u32)proc_dump->version);
+ opal_mpipl_cpu_data->cpu_data_size = proc_dump->thread_size;
+ opal_mpipl_cpu_data->region_cnt = cpu_to_be32(1);
+
+ opal_mpipl_cpu_data->region[0].src = proc_dump->dest_addr & ~(cpu_to_be64(HRMOR_BIT));
+ opal_mpipl_cpu_data->region[0].dest = proc_dump->dest_addr & ~(cpu_to_be64(HRMOR_BIT));
+ opal_mpipl_cpu_data->region[0].size = cpu_to_be64(be32_to_cpu(proc_dump->act_size));
+
+ /* Update tag */
+ opal_mpipl_tags[OPAL_MPIPL_TAG_CPU] = (u64)opal_mpipl_cpu_data;
+}
+
+static void post_mpipl_get_opal_data(void)
+{
+ struct mdrt_table *mdrt = (void *)(MDRT_TABLE_BASE);
+ int i, j = 0, count = 0;
+ int mdrt_cnt = be16_to_cpu(ntuple_mdrt->act_cnt);
+ struct opal_mpipl_region *region;
+
+ /* Count OPAL dump regions */
+ for (i = 0; i < mdrt_cnt; i++) {
+ if (mdrt->data_region == DUMP_REGION_OPAL_MEMORY)
+ count++;
+ mdrt++;
+ }
+
+ if (count == 0) {
+ prlog(PR_INFO, "OPAL dump is not available\n");
+ return;
+ }
+
+ opal_mpipl_data = zalloc(sizeof(struct opal_mpipl_fadump) +
+ count * sizeof(struct opal_mpipl_region));
+ if (!opal_mpipl_data) {
+ prlog(PR_ERR, "Failed to allocate memory\n");
+ return;
+ }
+
+ /* Fill OPAL dump details */
+ opal_mpipl_data->version = OPAL_MPIPL_VERSION;
+ opal_mpipl_data->crashing_pir = cpu_to_be32(mpipl_metadata->crashing_pir);
+ opal_mpipl_data->region_cnt = cpu_to_be32(count);
+ region = opal_mpipl_data->region;
+
+ mdrt = (void *)(MDRT_TABLE_BASE);
+ for (i = 0; i < mdrt_cnt; i++) {
+ if (mdrt->data_region != DUMP_REGION_OPAL_MEMORY) {
+ mdrt++;
+ continue;
+ }
+
+ region[j].src = mdrt->src_addr & ~(cpu_to_be64(HRMOR_BIT));
+ region[j].dest = mdrt->dest_addr & ~(cpu_to_be64(HRMOR_BIT));
+ region[j].size = cpu_to_be64(be32_to_cpu(mdrt->size));
+
+ prlog(PR_NOTICE, "OPAL reserved region %d - src : 0x%llx, "
+ "dest : 0x%llx, size : 0x%llx\n", j,
+ be64_to_cpu(region[j].src), be64_to_cpu(region[j].dest),
+ be64_to_cpu(region[j].size));
+
+ mdrt++;
+ j++;
+ if (j == count)
+ break;
+ }
+
+ opal_mpipl_tags[OPAL_MPIPL_TAG_OPAL] = (u64)opal_mpipl_data;
+}
+
+void opal_mpipl_save_crashing_pir(void)
+{
+ if (!is_mpipl_enabled())
+ return;
+
+ mpipl_metadata->crashing_pir = this_cpu()->pir;
+ prlog(PR_NOTICE, "Crashing PIR = 0x%x\n", this_cpu()->pir);
+}
+
+void opal_mpipl_reserve_mem(void)
+{
+ struct dt_node *opal_node, *dump_node;
+ u64 arch_regs_dest, arch_regs_size;
+
+ opal_node = dt_find_by_path(dt_root, "ibm,opal");
+ if (!opal_node)
+ return;
+
+ dump_node = dt_find_by_path(opal_node, "dump");
+ if (!dump_node)
+ return;
+
+ /* Calculcate and Reserve OPAL dump destination memory */
+ opal_dump_size = SKIBOOT_SIZE + (cpu_max_pir + 1) * STACK_SIZE;
+ opal_dump_addr = SKIBOOT_BASE + opal_dump_size;
+ mem_reserve_fw("ibm,firmware-dump",
+ opal_dump_addr, opal_dump_size);
+
+ /* Reserve memory to capture CPU register data */
+ arch_regs_dest = opal_dump_addr + opal_dump_size;
+ arch_regs_size = nr_chips() * ARCH_REGS_DATA_SIZE_PER_CHIP;
+ mem_reserve_fw("ibm,firmware-arch-registers",
+ arch_regs_dest, arch_regs_size);
+}
+
+bool is_mpipl_enabled(void)
+{
+ return mpipl_enabled;
+}
+
+void opal_mpipl_init(void)
+{
+ void *mdst_base = (void *)MDST_TABLE_BASE;
+ void *mddt_base = (void *)MDDT_TABLE_BASE;
+ struct dt_node *dump_node;
+
+ dump_node = dt_find_by_path(opal_node, "dump");
+ if (!dump_node)
+ return;
+
+ /* Get MDST and MDDT ntuple from SPIRAH */
+ ntuple_mdst = &(spirah.ntuples.mdump_src);
+ ntuple_mddt = &(spirah.ntuples.mdump_dst);
+ ntuple_mdrt = &(spirah.ntuples.mdump_res);
+
+ /* Get metadata area pointer */
+ mpipl_metadata = (void *)(DUMP_METADATA_AREA_BASE);
+
+ if (dt_find_property(dump_node, "mpipl-boot")) {
+ disable_fast_reboot("MPIPL Boot");
+
+ post_mpipl_get_preserved_tags();
+ post_mpipl_get_opal_data();
+ post_mpipl_arch_regs_data();
+ }
+
+ /* Clear OPAL metadata area */
+ if (sizeof(struct mpipl_metadata) > DUMP_METADATA_AREA_SIZE) {
+ prlog(PR_ERR, "INSUFFICIENT OPAL METADATA AREA\n");
+ prlog(PR_ERR, "INCREASE OPAL MEDTADATA AREA SIZE\n");
+ assert(false);
+ }
+ memset(mpipl_metadata, 0, sizeof(struct mpipl_metadata));
+
+ /* Clear MDST and MDDT table */
+ memset(mdst_base, 0, MDST_TABLE_SIZE);
+ ntuple_mdst->act_cnt = 0;
+ memset(mddt_base, 0, MDDT_TABLE_SIZE);
+ ntuple_mddt->act_cnt = 0;
+
+ opal_mpipl_register();
+
+ /* Send OPAL relocated base address to SBE */
+ p9_sbe_send_relocated_base(SKIBOOT_BASE);
+
+ /* OPAL API for MPIPL update */
+ opal_register(OPAL_MPIPL_UPDATE, opal_mpipl_update, 4);
+ opal_register(OPAL_MPIPL_REGISTER_TAG, opal_mpipl_register_tag, 2);
+ opal_register(OPAL_MPIPL_QUERY_TAG, opal_mpipl_query_tag, 2);
+
+ /* Enable MPIPL */
+ mpipl_enabled = true;
+}
diff --git a/roms/skiboot/core/opal-msg.c b/roms/skiboot/core/opal-msg.c
new file mode 100644
index 000000000..65a2476b2
--- /dev/null
+++ b/roms/skiboot/core/opal-msg.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * OPAL Message queue between host and skiboot
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "opalmsg: " fmt
+#include <skiboot.h>
+#include <opal-msg.h>
+#include <opal-api.h>
+#include <lock.h>
+
+#define OPAL_MAX_MSGS (OPAL_MSG_TYPE_MAX + OPAL_MAX_ASYNC_COMP - 1)
+
+struct opal_msg_entry {
+ struct list_node link;
+ void (*consumed)(void *data, int status);
+ bool extended;
+ void *data;
+ struct opal_msg msg;
+};
+
+static LIST_HEAD(msg_free_list);
+static LIST_HEAD(msg_pending_list);
+
+static struct lock opal_msg_lock = LOCK_UNLOCKED;
+
+int _opal_queue_msg(enum opal_msg_type msg_type, void *data,
+ void (*consumed)(void *data, int status),
+ size_t params_size, const void *params)
+{
+ struct opal_msg_entry *entry;
+ uint64_t entry_size;
+
+ if ((params_size + OPAL_MSG_HDR_SIZE) > OPAL_MSG_SIZE) {
+ prlog(PR_DEBUG, "param_size (0x%x) > opal_msg param size (0x%x)\n",
+ (u32)params_size, (u32)(OPAL_MSG_SIZE - OPAL_MSG_HDR_SIZE));
+ return OPAL_PARAMETER;
+ }
+
+ lock(&opal_msg_lock);
+
+ if (params_size > OPAL_MSG_FIXED_PARAMS_SIZE) {
+ entry_size = sizeof(struct opal_msg_entry) + params_size;
+ entry_size -= OPAL_MSG_FIXED_PARAMS_SIZE;
+ entry = zalloc(entry_size);
+ if (entry)
+ entry->extended = true;
+ } else {
+ entry = list_pop(&msg_free_list, struct opal_msg_entry, link);
+ if (!entry) {
+ prerror("No available node in the free list, allocating\n");
+ entry = zalloc(sizeof(struct opal_msg_entry));
+ }
+ }
+ if (!entry) {
+ prerror("Allocation failed\n");
+ unlock(&opal_msg_lock);
+ return OPAL_RESOURCE;
+ }
+
+ entry->consumed = consumed;
+ entry->data = data;
+ entry->msg.msg_type = cpu_to_be32(msg_type);
+ entry->msg.size = cpu_to_be32(params_size);
+ memcpy(entry->msg.params, params, params_size);
+
+ list_add_tail(&msg_pending_list, &entry->link);
+ opal_update_pending_evt(OPAL_EVENT_MSG_PENDING,
+ OPAL_EVENT_MSG_PENDING);
+ unlock(&opal_msg_lock);
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t opal_get_msg(uint64_t *buffer, uint64_t size)
+{
+ struct opal_msg_entry *entry;
+ void (*callback)(void *data, int status);
+ void *data;
+ uint64_t msg_size;
+ int rc = OPAL_SUCCESS;
+
+ if (size < sizeof(struct opal_msg) || !buffer)
+ return OPAL_PARAMETER;
+
+ if (!opal_addr_valid(buffer))
+ return OPAL_PARAMETER;
+
+ lock(&opal_msg_lock);
+
+ entry = list_pop(&msg_pending_list, struct opal_msg_entry, link);
+ if (!entry) {
+ unlock(&opal_msg_lock);
+ return OPAL_RESOURCE;
+ }
+
+ msg_size = OPAL_MSG_HDR_SIZE + be32_to_cpu(entry->msg.size);
+ if (size < msg_size) {
+ /* Send partial data to Linux */
+ prlog(PR_NOTICE, "Sending partial data [msg_type : 0x%x, "
+ "msg_size : 0x%x, buf_size : 0x%x]\n",
+ be32_to_cpu(entry->msg.msg_type),
+ (u32)msg_size, (u32)size);
+
+ entry->msg.size = cpu_to_be32(size - OPAL_MSG_HDR_SIZE);
+ msg_size = size;
+ rc = OPAL_PARTIAL;
+ }
+
+ memcpy((void *)buffer, (void *)&entry->msg, msg_size);
+ callback = entry->consumed;
+ data = entry->data;
+
+ if (entry->extended)
+ free(entry);
+ else
+ list_add(&msg_free_list, &entry->link);
+
+ if (list_empty(&msg_pending_list))
+ opal_update_pending_evt(OPAL_EVENT_MSG_PENDING, 0);
+
+ unlock(&opal_msg_lock);
+
+ if (callback)
+ callback(data, rc);
+
+ return rc;
+}
+opal_call(OPAL_GET_MSG, opal_get_msg, 2);
+
+static int64_t opal_check_completion(uint64_t *buffer, uint64_t size,
+ uint64_t token)
+{
+ struct opal_msg_entry *entry, *next_entry;
+ void (*callback)(void *data, int status) = NULL;
+ int rc = OPAL_BUSY;
+ void *data = NULL;
+
+ if (!opal_addr_valid(buffer))
+ return OPAL_PARAMETER;
+
+ lock(&opal_msg_lock);
+ list_for_each_safe(&msg_pending_list, entry, next_entry, link) {
+ if (be32_to_cpu(entry->msg.msg_type) == OPAL_MSG_ASYNC_COMP &&
+ be64_to_cpu(entry->msg.params[0]) == token) {
+ list_del(&entry->link);
+ callback = entry->consumed;
+ data = entry->data;
+ list_add(&msg_free_list, &entry->link);
+ if (list_empty(&msg_pending_list))
+ opal_update_pending_evt(OPAL_EVENT_MSG_PENDING,
+ 0);
+ rc = OPAL_SUCCESS;
+ break;
+ }
+ }
+
+ if (rc == OPAL_SUCCESS && size >= sizeof(struct opal_msg))
+ memcpy(buffer, &entry->msg, sizeof(entry->msg));
+
+ unlock(&opal_msg_lock);
+
+ if (callback)
+ callback(data, OPAL_SUCCESS);
+
+ return rc;
+
+}
+opal_call(OPAL_CHECK_ASYNC_COMPLETION, opal_check_completion, 3);
+
+void opal_init_msg(void)
+{
+ struct opal_msg_entry *entry;
+ int i;
+
+ for (i = 0; i < OPAL_MAX_MSGS; i++, entry++) {
+ entry = zalloc(sizeof(*entry));
+ if (!entry)
+ goto err;
+ list_add_tail(&msg_free_list, &entry->link);
+ }
+ return;
+
+err:
+ for (; i > 0; i--) {
+ entry = list_pop(&msg_free_list, struct opal_msg_entry, link);
+ if (entry)
+ free(entry);
+ }
+}
+
diff --git a/roms/skiboot/core/opal.c b/roms/skiboot/core/opal.c
new file mode 100644
index 000000000..2898a45ce
--- /dev/null
+++ b/roms/skiboot/core/opal.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Base support for OPAL calls
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <stack.h>
+#include <lock.h>
+#include <fsp.h>
+#include <cpu.h>
+#include <interrupts.h>
+#include <op-panel.h>
+#include <device.h>
+#include <console.h>
+#include <trace.h>
+#include <timebase.h>
+#include <affinity.h>
+#include <opal-msg.h>
+#include <timer.h>
+#include <elf-abi.h>
+#include <errorlog.h>
+#include <occ.h>
+
+/* Pending events to signal via opal_poll_events */
+uint64_t opal_pending_events;
+
+/* OPAL dispatch table defined in head.S */
+extern const uint64_t opal_branch_table[];
+
+/* Number of args expected for each call. */
+static const u8 opal_num_args[OPAL_LAST+1];
+
+/* OPAL anchor node */
+struct dt_node *opal_node;
+
+/* mask of dynamic vs fixed events; opal_allocate_dynamic_event will
+ * only allocate from this range */
+static const uint64_t opal_dynamic_events_mask = 0xffffffff00000000ul;
+static uint64_t opal_dynamic_events;
+
+extern uint32_t attn_trigger;
+extern uint32_t hir_trigger;
+
+
+void opal_table_init(void)
+{
+ struct opal_table_entry *s = __opal_table_start;
+ struct opal_table_entry *e = __opal_table_end;
+
+ prlog(PR_DEBUG, "OPAL table: %p .. %p, branch table: %p\n",
+ s, e, opal_branch_table);
+ while(s < e) {
+ ((uint64_t *)opal_branch_table)[s->token] = function_entry_address(s->func);
+ ((u8 *)opal_num_args)[s->token] = s->nargs;
+ s++;
+ }
+}
+
+/* Called from head.S, thus no prototype */
+long opal_bad_token(uint64_t token);
+
+long opal_bad_token(uint64_t token)
+{
+ /**
+ * @fwts-label OPALBadToken
+ * @fwts-advice OPAL was called with a bad token. On POWER8 and
+ * earlier, Linux kernels had a bug where they wouldn't check
+ * if firmware supported particular OPAL calls before making them.
+ * It is, in fact, harmless for these cases. On systems newer than
+ * POWER8, this should never happen and indicates a kernel bug
+ * where OPAL_CHECK_TOKEN isn't being called where it should be.
+ */
+ prlog(PR_ERR, "OPAL: Called with bad token %lld !\n", token);
+
+ return OPAL_PARAMETER;
+}
+
+#ifdef OPAL_TRACE_ENTRY
+static void opal_trace_entry(struct stack_frame *eframe __unused)
+{
+ union trace t;
+ unsigned nargs, i;
+
+ if (eframe->gpr[0] > OPAL_LAST)
+ nargs = 0;
+ else
+ nargs = opal_num_args[eframe->gpr[0]];
+
+ t.opal.token = cpu_to_be64(eframe->gpr[0]);
+ t.opal.lr = cpu_to_be64(eframe->lr);
+ t.opal.sp = cpu_to_be64(eframe->gpr[1]);
+ for(i=0; i<nargs; i++)
+ t.opal.r3_to_11[i] = cpu_to_be64(eframe->gpr[3+i]);
+
+ trace_add(&t, TRACE_OPAL, offsetof(struct trace_opal, r3_to_11[nargs]));
+}
+#endif
+
+/*
+ * opal_quiesce_state is used as a lock. Don't use an actual lock to avoid
+ * lock busting.
+ */
+static uint32_t opal_quiesce_state; /* 0 or QUIESCE_HOLD/QUIESCE_REJECT */
+static int32_t opal_quiesce_owner; /* PIR */
+static int32_t opal_quiesce_target; /* -1 or PIR */
+
+static int64_t opal_check_token(uint64_t token);
+
+/* Called from head.S, thus no prototype */
+int64_t opal_entry_check(struct stack_frame *eframe);
+
+int64_t opal_entry_check(struct stack_frame *eframe)
+{
+ struct cpu_thread *cpu = this_cpu();
+ uint64_t token = eframe->gpr[0];
+
+ if (cpu->pir != mfspr(SPR_PIR)) {
+ printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n",
+ mfspr(SPR_PIR), cpu, cpu->pir, token);
+ abort();
+ }
+
+#ifdef OPAL_TRACE_ENTRY
+ opal_trace_entry(eframe);
+#endif
+
+ if (!opal_check_token(token))
+ return opal_bad_token(token);
+
+ if (!opal_quiesce_state && cpu->in_opal_call > 1) {
+ disable_fast_reboot("Kernel re-entered OPAL");
+ switch (token) {
+ case OPAL_CONSOLE_READ:
+ case OPAL_CONSOLE_WRITE:
+ case OPAL_CONSOLE_WRITE_BUFFER_SPACE:
+ case OPAL_CONSOLE_FLUSH:
+ case OPAL_POLL_EVENTS:
+ case OPAL_CHECK_TOKEN:
+ case OPAL_CEC_REBOOT:
+ case OPAL_CEC_REBOOT2:
+ case OPAL_SIGNAL_SYSTEM_RESET:
+ break;
+ default:
+ printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n",
+ mfspr(SPR_PIR), cpu, cpu->pir, token);
+ if (cpu->in_opal_call > 2) {
+ printf("Emergency stack is destroyed, can't continue.\n");
+ abort();
+ }
+ return OPAL_INTERNAL_ERROR;
+ }
+ }
+
+ cpu->entered_opal_call_at = mftb();
+ return OPAL_SUCCESS;
+}
+
+int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe);
+
+int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe)
+{
+ struct cpu_thread *cpu = this_cpu();
+ uint64_t token = eframe->gpr[0];
+ uint64_t now = mftb();
+ uint64_t call_time = tb_to_msecs(now - cpu->entered_opal_call_at);
+
+ if (!cpu->in_opal_call) {
+ disable_fast_reboot("Un-accounted firmware entry");
+ printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n",
+ mfspr(SPR_PIR), cpu, cpu->pir, token, retval);
+ cpu->in_opal_call++; /* avoid exit path underflowing */
+ } else {
+ if (cpu->in_opal_call > 2) {
+ printf("Emergency stack is destroyed, can't continue.\n");
+ abort();
+ }
+ if (!list_empty(&cpu->locks_held)) {
+ prlog(PR_ERR, "OPAL exiting with locks held, pir=%04x token=%llu retval=%lld\n",
+ cpu->pir, token, retval);
+ drop_my_locks(true);
+ }
+ }
+
+ if (call_time > 100 && token != OPAL_RESYNC_TIMEBASE) {
+ prlog((call_time < 1000) ? PR_DEBUG : PR_WARNING,
+ "Spent %llu msecs in OPAL call %llu!\n",
+ call_time, token);
+ }
+
+ cpu->current_token = 0;
+
+ return retval;
+}
+
+int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
+{
+ struct cpu_thread *cpu = this_cpu();
+ struct cpu_thread *target = NULL;
+ struct cpu_thread *c;
+ uint64_t end;
+ bool stuck = false;
+
+ if (cpu_target >= 0) {
+ target = find_cpu_by_server(cpu_target);
+ if (!target)
+ return OPAL_PARAMETER;
+ } else if (cpu_target != -1) {
+ return OPAL_PARAMETER;
+ }
+
+ if (quiesce_type == QUIESCE_HOLD || quiesce_type == QUIESCE_REJECT) {
+ if (cmpxchg32(&opal_quiesce_state, 0, quiesce_type) != 0) {
+ if (opal_quiesce_owner != cpu->pir) {
+ /*
+ * Nested is allowed for now just for
+ * internal uses, so an error is returned
+ * for OS callers, but no error message
+ * printed if we are nested.
+ */
+ printf("opal_quiesce already quiescing\n");
+ }
+ return OPAL_BUSY;
+ }
+ opal_quiesce_owner = cpu->pir;
+ opal_quiesce_target = cpu_target;
+ }
+
+ if (opal_quiesce_owner != cpu->pir) {
+ printf("opal_quiesce CPU does not own quiesce state (must call QUIESCE_HOLD or QUIESCE_REJECT)\n");
+ return OPAL_BUSY;
+ }
+
+ /* Okay now we own the quiesce state */
+
+ if (quiesce_type == QUIESCE_RESUME ||
+ quiesce_type == QUIESCE_RESUME_FAST_REBOOT) {
+ bust_locks = false;
+ sync(); /* release barrier vs opal entry */
+ if (target) {
+ target->quiesce_opal_call = 0;
+ } else {
+ for_each_cpu(c) {
+ if (quiesce_type == QUIESCE_RESUME_FAST_REBOOT)
+ c->in_opal_call = 0;
+
+ if (c == cpu) {
+ assert(!c->quiesce_opal_call);
+ continue;
+ }
+ c->quiesce_opal_call = 0;
+ }
+ }
+ sync();
+ opal_quiesce_state = 0;
+ return OPAL_SUCCESS;
+ }
+
+ if (quiesce_type == QUIESCE_LOCK_BREAK) {
+ if (opal_quiesce_target != -1) {
+ printf("opal_quiesce has not quiesced all CPUs (must target -1)\n");
+ return OPAL_BUSY;
+ }
+ bust_locks = true;
+ return OPAL_SUCCESS;
+ }
+
+ if (target) {
+ target->quiesce_opal_call = quiesce_type;
+ } else {
+ for_each_cpu(c) {
+ if (c == cpu)
+ continue;
+ c->quiesce_opal_call = quiesce_type;
+ }
+ }
+
+ sync(); /* Order stores to quiesce_opal_call vs loads of in_opal_call */
+
+ end = mftb() + msecs_to_tb(1000);
+
+ smt_lowest();
+ if (target) {
+ while (target->in_opal_call) {
+ if (tb_compare(mftb(), end) == TB_AAFTERB) {
+ printf("OPAL quiesce CPU:%04x stuck in OPAL\n", target->pir);
+ stuck = true;
+ break;
+ }
+ barrier();
+ }
+ } else {
+ for_each_cpu(c) {
+ if (c == cpu)
+ continue;
+ while (c->in_opal_call) {
+ if (tb_compare(mftb(), end) == TB_AAFTERB) {
+ printf("OPAL quiesce CPU:%04x stuck in OPAL\n", c->pir);
+ stuck = true;
+ break;
+ }
+ barrier();
+ }
+ }
+ }
+ smt_medium();
+ sync(); /* acquire barrier vs opal entry */
+
+ if (stuck) {
+ printf("OPAL quiesce could not kick all CPUs out of OPAL\n");
+ return OPAL_PARTIAL;
+ }
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_QUIESCE, opal_quiesce, 2);
+
+void __opal_register(uint64_t token, void *func, unsigned int nargs)
+{
+ assert(token <= OPAL_LAST);
+
+ ((uint64_t *)opal_branch_table)[token] = function_entry_address(func);
+ ((u8 *)opal_num_args)[token] = nargs;
+}
+
+/*
+ * add_opal_firmware_exports_node: adds properties to the device-tree which
+ * the OS will then change into sysfs nodes.
+ * The properties must be placed under /ibm,opal/firmware/exports.
+ * The new sysfs nodes are created under /opal/exports.
+ * To be correctly exported the properties must contain:
+ * name
+ * base memory location (u64)
+ * size (u64)
+ */
+static void add_opal_firmware_exports_node(struct dt_node *node)
+{
+ struct dt_node *exports = dt_new(node, "exports");
+ uint64_t sym_start = (uint64_t)__sym_map_start;
+ uint64_t sym_size = (uint64_t)__sym_map_end - sym_start;
+
+ /*
+ * These property names will be used by Linux as the user-visible file
+ * name, so make them meaningful if possible. We use _ as the separator
+ * here to remain consistent with existing file names in /sys/opal.
+ */
+ dt_add_property_u64s(exports, "symbol_map", sym_start, sym_size);
+ dt_add_property_u64s(exports, "hdat_map", SPIRA_HEAP_BASE,
+ SPIRA_HEAP_SIZE);
+#ifdef SKIBOOT_GCOV
+ dt_add_property_u64s(exports, "gcov", SKIBOOT_BASE,
+ HEAP_BASE - SKIBOOT_BASE);
+#endif
+}
+
+static void add_opal_firmware_node(void)
+{
+ struct dt_node *firmware = dt_new(opal_node, "firmware");
+ uint64_t sym_start = (uint64_t)__sym_map_start;
+ uint64_t sym_size = (uint64_t)__sym_map_end - sym_start;
+
+ dt_add_property_string(firmware, "compatible", "ibm,opal-firmware");
+ dt_add_property_string(firmware, "name", "firmware");
+ dt_add_property_string(firmware, "version", version);
+ /*
+ * As previous OS versions use symbol-map located at
+ * /ibm,opal/firmware we will keep a copy of symbol-map here
+ * for backwards compatibility
+ */
+ dt_add_property_u64s(firmware, "symbol-map", sym_start, sym_size);
+
+ add_opal_firmware_exports_node(firmware);
+}
+
+void add_opal_node(void)
+{
+ uint64_t base, entry, size;
+ extern uint32_t opal_entry;
+ extern uint32_t boot_entry;
+ struct dt_node *opal_event;
+
+ /* XXX TODO: Reorg this. We should create the base OPAL
+ * node early on, and have the various sub modules populate
+ * their own entries (console etc...)
+ *
+ * The logic of which console backend to use should be
+ * extracted
+ */
+
+ entry = (uint64_t)&opal_entry;
+ base = SKIBOOT_BASE;
+ size = (CPU_STACKS_BASE +
+ (uint64_t)(cpu_max_pir + 1) * STACK_SIZE) - SKIBOOT_BASE;
+
+ opal_node = dt_new_check(dt_root, "ibm,opal");
+ dt_add_property_cells(opal_node, "#address-cells", 0);
+ dt_add_property_cells(opal_node, "#size-cells", 0);
+
+ if (proc_gen < proc_gen_p9)
+ dt_add_property_strings(opal_node, "compatible", "ibm,opal-v2",
+ "ibm,opal-v3");
+ else
+ dt_add_property_strings(opal_node, "compatible", "ibm,opal-v3");
+
+ dt_add_property_cells(opal_node, "opal-msg-async-num", OPAL_MAX_ASYNC_COMP);
+ dt_add_property_cells(opal_node, "opal-msg-size", OPAL_MSG_SIZE);
+ dt_add_property_u64(opal_node, "opal-base-address", base);
+ dt_add_property_u64(opal_node, "opal-entry-address", entry);
+ dt_add_property_u64(opal_node, "opal-boot-address", (uint64_t)&boot_entry);
+ dt_add_property_u64(opal_node, "opal-runtime-size", size);
+
+ /* Add irqchip interrupt controller */
+ opal_event = dt_new(opal_node, "event");
+ dt_add_property_strings(opal_event, "compatible", "ibm,opal-event");
+ dt_add_property_cells(opal_event, "#interrupt-cells", 0x1);
+ dt_add_property(opal_event, "interrupt-controller", NULL, 0);
+
+ add_opal_firmware_node();
+ add_associativity_ref_point();
+ memcons_add_properties();
+}
+
+static struct lock evt_lock = LOCK_UNLOCKED;
+
+void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values)
+{
+ uint64_t new_evts;
+
+ lock(&evt_lock);
+ new_evts = (opal_pending_events & ~evt_mask) | evt_values;
+ if (opal_pending_events != new_evts) {
+ uint64_t tok;
+
+#ifdef OPAL_TRACE_EVT_CHG
+ printf("OPAL: Evt change: 0x%016llx -> 0x%016llx\n",
+ opal_pending_events, new_evts);
+#endif
+ /*
+ * If an event gets *set* while we are in a different call chain
+ * than opal_handle_interrupt() or opal_handle_hmi(), then we
+ * artificially generate an interrupt (OCC interrupt specifically)
+ * to ensure that Linux properly broadcast the event change internally
+ */
+ if ((new_evts & ~opal_pending_events) != 0) {
+ tok = this_cpu()->current_token;
+ if (tok != OPAL_HANDLE_INTERRUPT && tok != OPAL_HANDLE_HMI)
+ occ_send_dummy_interrupt();
+ }
+ opal_pending_events = new_evts;
+ }
+ unlock(&evt_lock);
+}
+
+uint64_t opal_dynamic_event_alloc(void)
+{
+ uint64_t new_event;
+ int n;
+
+ lock(&evt_lock);
+
+ /* Create the event mask. This set-bit will be within the event mask
+ * iff there are free events, or out of the mask if there are no free
+ * events. If opal_dynamic_events is all ones (ie, all events are
+ * dynamic, and allocated), then ilog2 will return -1, and we'll have a
+ * zero mask.
+ */
+ n = ilog2(~opal_dynamic_events);
+ new_event = 1ull << n;
+
+ /* Ensure we're still within the allocatable dynamic events range */
+ if (new_event & opal_dynamic_events_mask)
+ opal_dynamic_events |= new_event;
+ else
+ new_event = 0;
+
+ unlock(&evt_lock);
+ return new_event;
+}
+
+void opal_dynamic_event_free(uint64_t event)
+{
+ lock(&evt_lock);
+ opal_dynamic_events &= ~event;
+ unlock(&evt_lock);
+}
+
+static uint64_t opal_test_func(uint64_t arg)
+{
+ printf("OPAL: Test function called with arg 0x%llx\n", arg);
+
+ return 0xfeedf00d;
+}
+opal_call(OPAL_TEST, opal_test_func, 1);
+
+struct opal_poll_entry {
+ struct list_node link;
+ void (*poller)(void *data);
+ void *data;
+};
+
+static struct list_head opal_pollers = LIST_HEAD_INIT(opal_pollers);
+static struct lock opal_poll_lock = LOCK_UNLOCKED;
+
+void opal_add_poller(void (*poller)(void *data), void *data)
+{
+ struct opal_poll_entry *ent;
+
+ ent = zalloc(sizeof(struct opal_poll_entry));
+ assert(ent);
+ ent->poller = poller;
+ ent->data = data;
+ lock(&opal_poll_lock);
+ list_add_tail(&opal_pollers, &ent->link);
+ unlock(&opal_poll_lock);
+}
+
+void opal_del_poller(void (*poller)(void *data))
+{
+ struct opal_poll_entry *ent;
+
+ /* XXX This is currently unused. To solve various "interesting"
+ * locking issues, the pollers are run locklessly, so if we were
+ * to free them, we would have to be careful, using something
+ * akin to RCU to synchronize with other OPAL entries. For now
+ * if anybody uses it, print a warning and leak the entry, don't
+ * free it.
+ */
+ /**
+ * @fwts-label UnsupportedOPALdelpoller
+ * @fwts-advice Currently removing a poller is DANGEROUS and
+ * MUST NOT be done in production firmware.
+ */
+ prlog(PR_ALERT, "WARNING: Unsupported opal_del_poller."
+ " Interesting locking issues, don't call this.\n");
+
+ lock(&opal_poll_lock);
+ list_for_each(&opal_pollers, ent, link) {
+ if (ent->poller == poller) {
+ list_del(&ent->link);
+ /* free(ent); */
+ break;
+ }
+ }
+ unlock(&opal_poll_lock);
+}
+
+void opal_run_pollers(void)
+{
+ static int pollers_with_lock_warnings = 0;
+ static int poller_recursion = 0;
+ struct opal_poll_entry *poll_ent;
+ bool was_in_poller;
+
+ /* Don't re-enter on this CPU, unless it was an OPAL re-entry */
+ if (this_cpu()->in_opal_call == 1 && this_cpu()->in_poller) {
+
+ /**
+ * @fwts-label OPALPollerRecursion
+ * @fwts-advice Recursion detected in opal_run_pollers(). This
+ * indicates a bug in OPAL where a poller ended up running
+ * pollers, which doesn't lead anywhere good.
+ */
+ poller_recursion++;
+ if (poller_recursion <= 16) {
+ disable_fast_reboot("Poller recursion detected.");
+ prlog(PR_ERR, "OPAL: Poller recursion detected.\n");
+ backtrace();
+
+ }
+
+ if (poller_recursion == 16)
+ prlog(PR_ERR, "OPAL: Squashing future poller recursion warnings (>16).\n");
+
+ return;
+ }
+ was_in_poller = this_cpu()->in_poller;
+ this_cpu()->in_poller = true;
+
+ if (!list_empty(&this_cpu()->locks_held) && pollers_with_lock_warnings < 64) {
+ /**
+ * @fwts-label OPALPollerWithLock
+ * @fwts-advice opal_run_pollers() was called with a lock
+ * held, which could lead to deadlock if not excessively
+ * lucky/careful.
+ */
+ prlog(PR_ERR, "Running pollers with lock held !\n");
+ dump_locks_list();
+ backtrace();
+ pollers_with_lock_warnings++;
+ if (pollers_with_lock_warnings == 64) {
+ /**
+ * @fwts-label OPALPollerWithLock64
+ * @fwts-advice Your firmware is buggy, see the 64
+ * messages complaining about opal_run_pollers with
+ * lock held.
+ */
+ prlog(PR_ERR, "opal_run_pollers with lock run 64 "
+ "times, disabling warning.\n");
+ }
+ }
+
+ /* We run the timers first */
+ check_timers(false);
+
+ /* The pollers are run lokelessly, see comment in opal_del_poller */
+ list_for_each(&opal_pollers, poll_ent, link)
+ poll_ent->poller(poll_ent->data);
+
+ /* Disable poller flag */
+ this_cpu()->in_poller = was_in_poller;
+
+ /* On debug builds, print max stack usage */
+ check_stacks();
+}
+
+static int64_t opal_poll_events(__be64 *outstanding_event_mask)
+{
+
+ if (!opal_addr_valid(outstanding_event_mask))
+ return OPAL_PARAMETER;
+
+ /* Check if we need to trigger an attn for test use */
+ if (attn_trigger == 0xdeadbeef) {
+ prlog(PR_EMERG, "Triggering attn\n");
+ assert(false);
+ }
+
+ opal_run_pollers();
+
+ if (outstanding_event_mask)
+ *outstanding_event_mask = cpu_to_be64(opal_pending_events);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_POLL_EVENTS, opal_poll_events, 1);
+
+static int64_t opal_check_token(uint64_t token)
+{
+ if (token > OPAL_LAST)
+ return OPAL_TOKEN_ABSENT;
+
+ if (opal_branch_table[token])
+ return OPAL_TOKEN_PRESENT;
+
+ return OPAL_TOKEN_ABSENT;
+}
+opal_call(OPAL_CHECK_TOKEN, opal_check_token, 1);
+
+struct opal_sync_entry {
+ struct list_node link;
+ bool (*notify)(void *data);
+ void *data;
+};
+
+static struct list_head opal_syncers = LIST_HEAD_INIT(opal_syncers);
+
+void opal_add_host_sync_notifier(bool (*notify)(void *data), void *data)
+{
+ struct opal_sync_entry *ent;
+
+ ent = zalloc(sizeof(struct opal_sync_entry));
+ assert(ent);
+ ent->notify = notify;
+ ent->data = data;
+ list_add_tail(&opal_syncers, &ent->link);
+}
+
+/*
+ * Remove a host sync notifier for given callback and data
+ */
+void opal_del_host_sync_notifier(bool (*notify)(void *data), void *data)
+{
+ struct opal_sync_entry *ent;
+
+ list_for_each(&opal_syncers, ent, link) {
+ if (ent->notify == notify && ent->data == data) {
+ list_del(&ent->link);
+ free(ent);
+ return;
+ }
+ }
+}
+
+/*
+ * OPAL call to handle host kexec'ing scenario
+ */
+static int64_t opal_sync_host_reboot(void)
+{
+ struct opal_sync_entry *ent, *nxt;
+ int ret = OPAL_SUCCESS;
+
+ list_for_each_safe(&opal_syncers, ent, nxt, link)
+ if (! ent->notify(ent->data))
+ ret = OPAL_BUSY_EVENT;
+
+ return ret;
+}
+opal_call(OPAL_SYNC_HOST_REBOOT, opal_sync_host_reboot, 0);
diff --git a/roms/skiboot/core/pci-dt-slot.c b/roms/skiboot/core/pci-dt-slot.c
new file mode 100644
index 000000000..2441bf940
--- /dev/null
+++ b/roms/skiboot/core/pci-dt-slot.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * PCI slots in the device tree.
+ *
+ * Copyright 2017-2018 IBM Corp.
+ */
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <skiboot.h>
+#include <device.h>
+
+#include <pci.h>
+#include <pci-cfg.h>
+#include <pci-slot.h>
+#include <ccan/list/list.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "DT-SLOT: " fmt
+
+struct dt_node *dt_slots;
+
+static struct dt_node *map_phb_to_slot(struct phb *phb)
+{
+ uint32_t chip_id = dt_get_chip_id(phb->dt_node);
+ uint32_t phb_idx = dt_prop_get_u32_def(phb->dt_node,
+ "ibm,phb-index", 0);
+ struct dt_node *slot_node;
+
+ if (!dt_slots)
+ dt_slots = dt_find_by_path(dt_root, "/ibm,pcie-slots");
+
+ if (!dt_slots)
+ return NULL;
+
+ dt_for_each_child(dt_slots, slot_node) {
+ u32 reg[2];
+
+ if (!dt_node_is_compatible(slot_node, "ibm,pcie-root-port"))
+ continue;
+
+ reg[0] = dt_prop_get_cell(slot_node, "reg", 0);
+ reg[1] = dt_prop_get_cell(slot_node, "reg", 1);
+
+ if (reg[0] == chip_id && reg[1] == phb_idx)
+ return slot_node;
+ }
+
+ return NULL;
+}
+
+static struct dt_node *find_devfn(struct dt_node *bus, uint32_t bdfn)
+{
+ uint32_t port_dev_id = PCI_DEV(bdfn);
+ struct dt_node *child;
+
+ dt_for_each_child(bus, child)
+ if (dt_prop_get_u32_def(child, "reg", ~0u) == port_dev_id)
+ return child;
+
+ return NULL;
+}
+
+/* Looks for a device device under this slot. */
+static struct dt_node *find_dev_under_slot(struct dt_node *slot,
+ struct pci_device *pd)
+{
+ struct dt_node *child, *wildcard = NULL;
+
+ /* find the device in the parent bus node */
+ dt_for_each_child(slot, child) {
+ u32 vdid;
+
+ /* "pluggable" and "builtin" without unit addrs are wildcards */
+ if (!dt_has_node_property(child, "reg", NULL)) {
+ if (wildcard)
+ prerror("Duplicate wildcard entry! Already have %s, found %s",
+ wildcard->name, child->name);
+
+ wildcard = child;
+ continue;
+ }
+
+ /* NB: the pci_device vdid is did,vid rather than vid,did */
+ vdid = dt_prop_get_cell(child, "reg", 1) << 16 |
+ dt_prop_get_cell(child, "reg", 0);
+
+ if (vdid == pd->vdid)
+ return child;
+ }
+
+ if (!wildcard)
+ PCIDBG(pd->phb, pd->bdfn,
+ "Unable to find a slot for device %.4x:%.4x\n",
+ (pd->vdid & 0xffff0000) >> 16, pd->vdid & 0xffff);
+
+ return wildcard;
+}
+
+/*
+ * If the `pd` is a bridge this returns a node with a compatible of
+ * ibm,pcie-port to indicate it's a "slot node".
+ */
+static struct dt_node *find_node_for_dev(struct phb *phb,
+ struct pci_device *pd)
+{
+ struct dt_node *sw_slot, *sw_up;
+
+ assert(pd);
+
+ if (pd->slot && pd->slot->data)
+ return pd->slot->data;
+
+ /*
+ * Example DT:
+ * /root-complex@8,5/switch-up@10b5,8725/down-port@4
+ */
+ switch (pd->dev_type) {
+ case PCIE_TYPE_ROOT_PORT: // find the root-complex@<chip>,<phb> node
+ return map_phb_to_slot(phb);
+
+ case PCIE_TYPE_SWITCH_DNPORT: // grab the down-port@<devfn>
+ /*
+ * Walk up the topology to find the slot that contains
+ * the switch upstream port is connected to. In the example
+ * this would be the root-complex@8,5 node.
+ */
+ sw_slot = find_node_for_dev(phb, pd->parent->parent);
+ if (!sw_slot)
+ return NULL;
+
+ /* find the per-device node for this switch */
+ sw_up = find_dev_under_slot(sw_slot, pd->parent);
+ if (!sw_up)
+ return NULL;
+
+ /* find this down port */
+ return find_devfn(sw_up, pd->bdfn);
+
+ default:
+ PCIDBG(phb, pd->bdfn,
+ "Trying to find a slot for non-pcie bridge type %d\n",
+ pd->dev_type);
+ assert(0);
+ }
+
+ return NULL;
+}
+
+struct dt_node *map_pci_dev_to_slot(struct phb *phb, struct pci_device *pd)
+{
+ struct dt_node *n;
+ char *path;
+
+ assert(pd);
+
+ /*
+ * Having a slot only makes sense for root and switch downstream ports.
+ * We don't care about PCI-X.
+ */
+ if (pd->dev_type != PCIE_TYPE_SWITCH_DNPORT &&
+ pd->dev_type != PCIE_TYPE_ROOT_PORT)
+ return NULL;
+
+ PCIDBG(phb, pd->bdfn, "Finding slot\n");
+
+ n = find_node_for_dev(phb, pd);
+ if (!n) {
+ PCIDBG(phb, pd->bdfn, "No slot found!\n");
+ } else {
+ path = dt_get_path(n);
+ PCIDBG(phb, pd->bdfn, "Slot found %s\n", path);
+ free(path);
+ }
+
+ return n;
+}
+
+int __print_slot(struct phb *phb, struct pci_device *pd, void *userdata);
+int __print_slot(struct phb *phb, struct pci_device *pd,
+ void __unused *userdata)
+{
+ struct dt_node *node;
+ struct dt_node *pnode;
+ char *c = NULL;
+ u32 phandle = 0;
+
+ if (!pd)
+ return 0;
+
+ node = map_pci_dev_to_slot(phb, pd);
+
+ /* at this point all node associations should be done */
+ if (pd->dn && dt_has_node_property(pd->dn, "ibm,pcie-slot", NULL)) {
+ phandle = dt_prop_get_u32(pd->dn, "ibm,pcie-slot");
+ pnode = dt_find_by_phandle(dt_root, phandle);
+
+ assert(node == pnode);
+ }
+
+ if (node)
+ c = dt_get_path(node);
+
+ PCIDBG(phb, pd->bdfn, "Mapped to slot %s (%x)\n",
+ c ? c : "<null>", phandle);
+
+ free(c);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/pci-opal.c b/roms/skiboot/core/pci-opal.c
new file mode 100644
index 000000000..aa375c6aa
--- /dev/null
+++ b/roms/skiboot/core/pci-opal.c
@@ -0,0 +1,1135 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * PCIe OPAL Calls
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <opal-api.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <pci-slot.h>
+#include <opal-msg.h>
+#include <timebase.h>
+#include <timer.h>
+
+#define OPAL_PCICFG_ACCESS_READ(op, cb, type) \
+static int64_t opal_pci_config_##op(uint64_t phb_id, \
+ uint64_t bus_dev_func, \
+ uint64_t offset, type data) \
+{ \
+ struct phb *phb = pci_get_phb(phb_id); \
+ int64_t rc; \
+ \
+ if (!opal_addr_valid((void *)data)) \
+ return OPAL_PARAMETER; \
+ \
+ if (!phb) \
+ return OPAL_PARAMETER; \
+ phb_lock(phb); \
+ rc = phb->ops->cfg_##cb(phb, bus_dev_func, offset, data); \
+ phb_unlock(phb); \
+ \
+ return rc; \
+}
+
+#define OPAL_PCICFG_ACCESS_WRITE(op, cb, type) \
+static int64_t opal_pci_config_##op(uint64_t phb_id, \
+ uint64_t bus_dev_func, \
+ uint64_t offset, type data) \
+{ \
+ struct phb *phb = pci_get_phb(phb_id); \
+ int64_t rc; \
+ \
+ if (!phb) \
+ return OPAL_PARAMETER; \
+ phb_lock(phb); \
+ rc = phb->ops->cfg_##cb(phb, bus_dev_func, offset, data); \
+ phb_unlock(phb); \
+ \
+ return rc; \
+}
+
+OPAL_PCICFG_ACCESS_READ(read_byte, read8, uint8_t *)
+OPAL_PCICFG_ACCESS_READ(read_half_word, read16, uint16_t *)
+OPAL_PCICFG_ACCESS_READ(read_word, read32, uint32_t *)
+OPAL_PCICFG_ACCESS_WRITE(write_byte, write8, uint8_t)
+OPAL_PCICFG_ACCESS_WRITE(write_half_word, write16, uint16_t)
+OPAL_PCICFG_ACCESS_WRITE(write_word, write32, uint32_t)
+
+static int64_t opal_pci_config_read_half_word_be(uint64_t phb_id,
+ uint64_t bus_dev_func,
+ uint64_t offset,
+ __be16 *__data)
+{
+ uint16_t data;
+ int64_t rc;
+
+ rc = opal_pci_config_read_half_word(phb_id, bus_dev_func, offset, &data);
+ *__data = cpu_to_be16(data);
+
+ return rc;
+}
+
+static int64_t opal_pci_config_read_word_be(uint64_t phb_id,
+ uint64_t bus_dev_func,
+ uint64_t offset,
+ __be32 *__data)
+{
+ uint32_t data;
+ int64_t rc;
+
+ rc = opal_pci_config_read_word(phb_id, bus_dev_func, offset, &data);
+ *__data = cpu_to_be32(data);
+
+ return rc;
+}
+
+
+opal_call(OPAL_PCI_CONFIG_READ_BYTE, opal_pci_config_read_byte, 4);
+opal_call(OPAL_PCI_CONFIG_READ_HALF_WORD, opal_pci_config_read_half_word_be, 4);
+opal_call(OPAL_PCI_CONFIG_READ_WORD, opal_pci_config_read_word_be, 4);
+opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, opal_pci_config_write_byte, 4);
+opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, opal_pci_config_write_half_word, 4);
+opal_call(OPAL_PCI_CONFIG_WRITE_WORD, opal_pci_config_write_word, 4);
+
+static struct lock opal_eeh_evt_lock = LOCK_UNLOCKED;
+static uint64_t opal_eeh_evt = 0;
+
+void opal_pci_eeh_set_evt(uint64_t phb_id)
+{
+ lock(&opal_eeh_evt_lock);
+ opal_eeh_evt |= 1ULL << phb_id;
+ opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR);
+ unlock(&opal_eeh_evt_lock);
+}
+
+void opal_pci_eeh_clear_evt(uint64_t phb_id)
+{
+ lock(&opal_eeh_evt_lock);
+ opal_eeh_evt &= ~(1ULL << phb_id);
+ if (!opal_eeh_evt)
+ opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, 0);
+ unlock(&opal_eeh_evt_lock);
+}
+
+static int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
+ uint8_t *freeze_state,
+ __be16 *__pci_error_type,
+ __be64 *__phb_status)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ uint16_t pci_error_type;
+ int64_t rc;
+
+ if (!opal_addr_valid(freeze_state) || !opal_addr_valid(__pci_error_type)
+ || !opal_addr_valid(__phb_status))
+ return OPAL_PARAMETER;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->eeh_freeze_status)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+
+ if (__phb_status)
+ prlog(PR_ERR, "PHB#%04llx: %s: deprecated PHB status\n",
+ phb_id, __func__);
+
+ rc = phb->ops->eeh_freeze_status(phb, pe_number, freeze_state,
+ &pci_error_type, NULL);
+ *__pci_error_type = cpu_to_be16(pci_error_type);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_EEH_FREEZE_STATUS, opal_pci_eeh_freeze_status, 5);
+
+static int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
+ uint64_t eeh_action_token)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->eeh_freeze_clear)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->eeh_freeze_clear(phb, pe_number, eeh_action_token);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_EEH_FREEZE_CLEAR, opal_pci_eeh_freeze_clear, 3);
+
+static int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number,
+ uint64_t eeh_action_token)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->eeh_freeze_set)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->eeh_freeze_set(phb, pe_number, eeh_action_token);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_EEH_FREEZE_SET, opal_pci_eeh_freeze_set, 3);
+
+static int64_t opal_pci_err_inject(uint64_t phb_id, uint64_t pe_number,
+ uint32_t type, uint32_t func,
+ uint64_t addr, uint64_t mask)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops || !phb->ops->err_inject)
+ return OPAL_UNSUPPORTED;
+
+ if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
+ type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
+ return OPAL_PARAMETER;
+
+ phb_lock(phb);
+ rc = phb->ops->err_inject(phb, pe_number, type, func, addr, mask);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_ERR_INJECT, opal_pci_err_inject, 6);
+
+static int64_t opal_pci_phb_mmio_enable(uint64_t phb_id, uint16_t window_type,
+ uint16_t window_num, uint16_t enable)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->phb_mmio_enable)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->phb_mmio_enable(phb, window_type, window_num, enable);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_PHB_MMIO_ENABLE, opal_pci_phb_mmio_enable, 4);
+
+static int64_t opal_pci_set_phb_mem_window(uint64_t phb_id,
+ uint16_t window_type,
+ uint16_t window_num,
+ uint64_t addr,
+ uint64_t pci_addr,
+ uint64_t size)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_phb_mem_window)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->set_phb_mem_window(phb, window_type, window_num,
+ addr, pci_addr, size);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PHB_MEM_WINDOW, opal_pci_set_phb_mem_window, 6);
+
+static int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint64_t pe_number,
+ uint16_t window_type,
+ uint16_t window_num,
+ uint16_t segment_num)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->map_pe_mmio_window)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->map_pe_mmio_window(phb, pe_number, window_type,
+ window_num, segment_num);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_MAP_PE_MMIO_WINDOW, opal_pci_map_pe_mmio_window, 5);
+
+static int64_t opal_pci_set_pe(uint64_t phb_id, uint64_t pe_number,
+ uint64_t bus_dev_func, uint8_t bus_compare,
+ uint8_t dev_compare, uint8_t func_compare,
+ uint8_t pe_action)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_pe)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->set_pe(phb, pe_number, bus_dev_func, bus_compare,
+ dev_compare, func_compare, pe_action);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PE, opal_pci_set_pe, 7);
+
+static int64_t opal_pci_set_peltv(uint64_t phb_id, uint32_t parent_pe,
+ uint32_t child_pe, uint8_t state)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_peltv)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->set_peltv(phb, parent_pe, child_pe, state);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PELTV, opal_pci_set_peltv, 4);
+
+static int64_t opal_pci_set_mve(uint64_t phb_id, uint32_t mve_number,
+ uint64_t pe_number)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_mve)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->set_mve(phb, mve_number, pe_number);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_MVE, opal_pci_set_mve, 3);
+
+static int64_t opal_pci_set_mve_enable(uint64_t phb_id, uint32_t mve_number,
+ uint32_t state)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_mve_enable)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->set_mve_enable(phb, mve_number, state);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_MVE_ENABLE, opal_pci_set_mve_enable, 3);
+
+static int64_t opal_pci_msi_eoi(uint64_t phb_id,
+ uint32_t hwirq)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->pci_msi_eoi)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->pci_msi_eoi(phb, hwirq);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_MSI_EOI, opal_pci_msi_eoi, 2);
+
+static int64_t opal_pci_tce_kill(uint64_t phb_id,
+ uint32_t kill_type,
+ uint64_t pe_number, uint32_t tce_size,
+ uint64_t dma_addr, uint32_t npages)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->tce_kill)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->tce_kill(phb, kill_type, pe_number, tce_size,
+ dma_addr, npages);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_TCE_KILL, opal_pci_tce_kill, 6);
+
+static int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint64_t pe_number,
+ uint32_t xive_num)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_xive_pe)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->set_xive_pe(phb, pe_number, xive_num);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_XIVE_PE, opal_pci_set_xive_pe, 3);
+
+static int64_t opal_get_msi_32(uint64_t phb_id, uint32_t mve_number,
+ uint32_t xive_num, uint8_t msi_range,
+ __be32 *__msi_address, __be32 *__message_data)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ uint32_t msi_address;
+ uint32_t message_data;
+ int64_t rc;
+
+ if (!opal_addr_valid(__msi_address) || !opal_addr_valid(__message_data))
+ return OPAL_PARAMETER;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_msi_32)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->get_msi_32(phb, mve_number, xive_num, msi_range,
+ &msi_address, &message_data);
+ phb_unlock(phb);
+
+ *__msi_address = cpu_to_be32(msi_address);
+ *__message_data = cpu_to_be32(message_data);
+
+ return rc;
+}
+opal_call(OPAL_GET_MSI_32, opal_get_msi_32, 6);
+
+static int64_t opal_get_msi_64(uint64_t phb_id, uint32_t mve_number,
+ uint32_t xive_num, uint8_t msi_range,
+ __be64 *__msi_address, __be32 *__message_data)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ uint64_t msi_address;
+ uint32_t message_data;
+ int64_t rc;
+
+ if (!opal_addr_valid(__msi_address) || !opal_addr_valid(__message_data))
+ return OPAL_PARAMETER;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_msi_64)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->get_msi_64(phb, mve_number, xive_num, msi_range,
+ &msi_address, &message_data);
+ phb_unlock(phb);
+
+ *__msi_address = cpu_to_be64(msi_address);
+ *__message_data = cpu_to_be32(message_data);
+
+ return rc;
+}
+opal_call(OPAL_GET_MSI_64, opal_get_msi_64, 6);
+
+static int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint64_t pe_number,
+ uint16_t window_id,
+ uint16_t tce_levels,
+ uint64_t tce_table_addr,
+ uint64_t tce_table_size,
+ uint64_t tce_page_size)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->map_pe_dma_window)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->map_pe_dma_window(phb, pe_number, window_id,
+ tce_levels, tce_table_addr,
+ tce_table_size, tce_page_size);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW, opal_pci_map_pe_dma_window, 7);
+
+static int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id,
+ uint64_t pe_number,
+ uint16_t window_id,
+ uint64_t pci_start_addr,
+ uint64_t pci_mem_size)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->map_pe_dma_window_real)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->map_pe_dma_window_real(phb, pe_number, window_id,
+ pci_start_addr, pci_mem_size);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, opal_pci_map_pe_dma_window_real, 5);
+
+static int64_t opal_phb_set_option(uint64_t phb_id, uint64_t opt,
+ uint64_t setting)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+
+ if (!phb->ops->set_option)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ rc = phb->ops->set_option(phb, opt, setting);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PHB_SET_OPTION, opal_phb_set_option, 3);
+
+static int64_t opal_phb_get_option(uint64_t phb_id, uint64_t opt,
+ __be64 *setting)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb || !setting)
+ return OPAL_PARAMETER;
+
+ if (!phb->ops->get_option)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ rc = phb->ops->get_option(phb, opt, setting);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PHB_GET_OPTION, opal_phb_get_option, 3);
+
+static int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope,
+ uint8_t assert_state)
+{
+ struct pci_slot *slot = pci_slot_find(id);
+ struct phb *phb = slot ? slot->phb : NULL;
+ int64_t rc = OPAL_SUCCESS;
+
+ if (!slot || !phb)
+ return OPAL_PARAMETER;
+ if (assert_state != OPAL_ASSERT_RESET &&
+ assert_state != OPAL_DEASSERT_RESET)
+ return OPAL_PARAMETER;
+
+ phb_lock(phb);
+
+ switch(reset_scope) {
+ case OPAL_RESET_PHB_COMPLETE:
+ /* Complete reset is applicable to PHB slot only */
+ if (!slot->ops.creset || slot->pd) {
+ rc = OPAL_UNSUPPORTED;
+ break;
+ }
+
+ if (assert_state != OPAL_ASSERT_RESET)
+ break;
+
+ rc = slot->ops.creset(slot);
+ if (rc < 0)
+ prlog(PR_ERR, "SLOT-%016llx: Error %lld on complete reset\n",
+ slot->id, rc);
+ break;
+ case OPAL_RESET_PCI_FUNDAMENTAL:
+ if (!slot->ops.freset) {
+ rc = OPAL_UNSUPPORTED;
+ break;
+ }
+
+ /* We need do nothing on deassert time */
+ if (assert_state != OPAL_ASSERT_RESET)
+ break;
+
+ rc = slot->ops.freset(slot);
+ if (rc < 0)
+ prlog(PR_ERR, "SLOT-%016llx: Error %lld on fundamental reset\n",
+ slot->id, rc);
+ break;
+ case OPAL_RESET_PCI_HOT:
+ if (!slot->ops.hreset) {
+ rc = OPAL_UNSUPPORTED;
+ break;
+ }
+
+ /* We need do nothing on deassert time */
+ if (assert_state != OPAL_ASSERT_RESET)
+ break;
+
+ rc = slot->ops.hreset(slot);
+ if (rc < 0)
+ prlog(PR_ERR, "SLOT-%016llx: Error %lld on hot reset\n",
+ slot->id, rc);
+ break;
+ case OPAL_RESET_PCI_IODA_TABLE:
+ /* It's allowed on PHB slot only */
+ if (slot->pd || !phb->ops || !phb->ops->ioda_reset) {
+ rc = OPAL_UNSUPPORTED;
+ break;
+ }
+
+ if (assert_state != OPAL_ASSERT_RESET)
+ break;
+
+ rc = phb->ops->ioda_reset(phb, true);
+ break;
+ case OPAL_RESET_PHB_ERROR:
+ /* It's allowed on PHB slot only */
+ if (slot->pd || !phb->ops || !phb->ops->papr_errinjct_reset) {
+ rc = OPAL_UNSUPPORTED;
+ break;
+ }
+
+ if (assert_state != OPAL_ASSERT_RESET)
+ break;
+
+ rc = phb->ops->papr_errinjct_reset(phb);
+ break;
+ default:
+ rc = OPAL_UNSUPPORTED;
+ }
+ phb_unlock(phb);
+
+ return (rc > 0) ? tb_to_msecs(rc) : rc;
+}
+opal_call(OPAL_PCI_RESET, opal_pci_reset, 3);
+
+static int64_t opal_pci_reinit(uint64_t phb_id,
+ uint64_t reinit_scope,
+ uint64_t data)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops || !phb->ops->pci_reinit)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ rc = phb->ops->pci_reinit(phb, reinit_scope, data);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_REINIT, opal_pci_reinit, 3);
+
+static int64_t opal_pci_poll(uint64_t id)
+{
+ struct pci_slot *slot = pci_slot_find(id);
+ struct phb *phb = slot ? slot->phb : NULL;
+ int64_t rc;
+
+ if (!slot || !phb)
+ return OPAL_PARAMETER;
+ if (!slot->ops.run_sm)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ rc = slot->ops.run_sm(slot);
+ phb_unlock(phb);
+
+ /* Return milliseconds for caller to sleep: round up */
+ if (rc > 0) {
+ rc = tb_to_msecs(rc);
+ if (rc == 0)
+ rc = 1;
+ }
+
+ return rc;
+}
+opal_call(OPAL_PCI_POLL, opal_pci_poll, 1);
+
+static int64_t opal_pci_get_presence_state(uint64_t id, uint64_t data)
+{
+ struct pci_slot *slot = pci_slot_find(id);
+ struct phb *phb = slot ? slot->phb : NULL;
+ uint8_t *presence = (uint8_t *)data;
+ int64_t rc;
+
+ if (!opal_addr_valid(presence))
+ return OPAL_PARAMETER;
+
+ if (!slot || !phb)
+ return OPAL_PARAMETER;
+ if (!slot->ops.get_presence_state)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ rc = slot->ops.get_presence_state(slot, presence);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_GET_PRESENCE_STATE, opal_pci_get_presence_state, 2);
+
+static int64_t opal_pci_get_power_state(uint64_t id, uint64_t data)
+{
+ struct pci_slot *slot = pci_slot_find(id);
+ struct phb *phb = slot ? slot->phb : NULL;
+ uint8_t *power_state = (uint8_t *)data;
+ int64_t rc;
+
+ if (!opal_addr_valid(power_state))
+ return OPAL_PARAMETER;
+
+ if (!slot || !phb)
+ return OPAL_PARAMETER;
+ if (!slot->ops.get_power_state)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ rc = slot->ops.get_power_state(slot, power_state);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_GET_POWER_STATE, opal_pci_get_power_state, 2);
+
+static u32 get_slot_phandle(struct pci_slot *slot)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+
+ if (pd)
+ return pd->dn->phandle;
+ else
+ return phb->dt_node->phandle;
+}
+
+static void rescan_slot_devices(struct pci_slot *slot)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+
+ /*
+ * prepare_link_change() is called (if needed) by the state
+ * machine during the slot reset or link polling
+ */
+ if (phb->phb_type != phb_type_npu_v2_opencapi) {
+ pci_scan_bus(phb, pd->secondary_bus,
+ pd->subordinate_bus, &pd->children, pd, true);
+ pci_add_device_nodes(phb, &pd->children, pd->dn,
+ &phb->lstate, 0);
+ } else {
+ pci_scan_bus(phb, 0, 0xff, &phb->devices, NULL, true);
+ pci_add_device_nodes(phb, &phb->devices,
+ phb->dt_node, &phb->lstate, 0);
+ phb->ops->phb_final_fixup(phb);
+ }
+}
+
+static void remove_slot_devices(struct pci_slot *slot)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+
+ if (phb->phb_type != phb_type_npu_v2_opencapi)
+ pci_remove_bus(phb, &pd->children);
+ else
+ pci_remove_bus(phb, &phb->devices);
+}
+
+static void link_up_timer(struct timer *t, void *data,
+ uint64_t now __unused)
+{
+ struct pci_slot *slot = data;
+ struct phb *phb = slot->phb;
+ uint8_t link;
+ int64_t rc = 0;
+
+ if (!phb_try_lock(phb)) {
+ schedule_timer(&slot->timer, msecs_to_tb(10));
+ return;
+ }
+
+ rc = slot->ops.run_sm(slot);
+ if (rc < 0)
+ goto out;
+ if (rc > 0) {
+ schedule_timer(t, rc);
+ phb_unlock(phb);
+ return;
+ }
+
+ if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
+ link = 0;
+ if (!link) {
+ rc = OPAL_HARDWARE;
+ goto out;
+ }
+
+ rescan_slot_devices(slot);
+out:
+ opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+ cpu_to_be64(slot->async_token),
+ cpu_to_be64(get_slot_phandle(slot)),
+ cpu_to_be64(slot->power_state),
+ rc <= 0 ? cpu_to_be64(rc) : cpu_to_be64(OPAL_BUSY));
+ phb_unlock(phb);
+}
+
+static bool training_needed(struct pci_slot *slot)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+
+ /* only for opencapi slots for now */
+ if (!pd && phb->phb_type == phb_type_npu_v2_opencapi)
+ return true;
+ return false;
+}
+
+static void wait_for_link_up_and_rescan(struct pci_slot *slot)
+{
+ int64_t rc = 1;
+
+ /*
+ * Links for PHB slots need to be retrained by triggering a
+ * fundamental reset. Other slots also need to be tested for
+ * readiness
+ */
+ if (training_needed(slot)) {
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ rc = slot->ops.freset(slot);
+ if (rc < 0) {
+ opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+ cpu_to_be64(slot->async_token),
+ cpu_to_be64(get_slot_phandle(slot)),
+ cpu_to_be64(slot->power_state),
+ cpu_to_be64(rc))
+ return;
+ }
+ } else {
+ pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL);
+ rc = msecs_to_tb(20);
+ }
+ init_timer(&slot->timer, link_up_timer, slot);
+ schedule_timer(&slot->timer, rc);
+}
+
+static void set_power_timer(struct timer *t __unused, void *data,
+ uint64_t now __unused)
+{
+ struct pci_slot *slot = data;
+ struct phb *phb = slot->phb;
+
+ if (!phb_try_lock(phb)) {
+ schedule_timer(&slot->timer, msecs_to_tb(10));
+ return;
+ }
+
+ switch (slot->state) {
+ case PCI_SLOT_STATE_SPOWER_START:
+ if (slot->retries-- == 0) {
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+ cpu_to_be64(slot->async_token),
+ cpu_to_be64(get_slot_phandle(slot)),
+ cpu_to_be64(slot->power_state),
+ cpu_to_be64(OPAL_BUSY));
+ } else {
+ schedule_timer(&slot->timer, msecs_to_tb(10));
+ }
+
+ break;
+ case PCI_SLOT_STATE_SPOWER_DONE:
+ if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
+ remove_slot_devices(slot);
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
+ cpu_to_be64(slot->async_token),
+ cpu_to_be64(get_slot_phandle(slot)),
+ cpu_to_be64(OPAL_PCI_SLOT_POWER_OFF),
+ cpu_to_be64(OPAL_SUCCESS));
+ break;
+ }
+
+ /* Power on */
+ wait_for_link_up_and_rescan(slot);
+ break;
+ default:
+ prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
+ slot->id, slot->state);
+ }
+ phb_unlock(phb);
+}
+
+static int64_t opal_pci_set_power_state(uint64_t async_token,
+ uint64_t id,
+ uint64_t data)
+{
+ struct pci_slot *slot = pci_slot_find(id);
+ struct phb *phb = slot ? slot->phb : NULL;
+ struct pci_device *pd = slot ? slot->pd : NULL;
+ uint8_t *state = (uint8_t *)data;
+ int64_t rc;
+
+ if (!slot || !phb)
+ return OPAL_PARAMETER;
+
+ if (!opal_addr_valid(state))
+ return OPAL_PARAMETER;
+
+ phb_lock(phb);
+ switch (*state) {
+ case OPAL_PCI_SLOT_POWER_OFF:
+ if (!slot->ops.prepare_link_change ||
+ !slot->ops.set_power_state) {
+ phb_unlock(phb);
+ return OPAL_UNSUPPORTED;
+ }
+
+ slot->async_token = async_token;
+ slot->ops.prepare_link_change(slot, false);
+ rc = slot->ops.set_power_state(slot, PCI_SLOT_POWER_OFF);
+ break;
+ case OPAL_PCI_SLOT_POWER_ON:
+ if (!slot->ops.set_power_state ||
+ !slot->ops.get_link_state) {
+ phb_unlock(phb);
+ return OPAL_UNSUPPORTED;
+ }
+
+ slot->async_token = async_token;
+ rc = slot->ops.set_power_state(slot, PCI_SLOT_POWER_ON);
+ break;
+ case OPAL_PCI_SLOT_OFFLINE:
+ if (!pd) {
+ phb_unlock(phb);
+ return OPAL_PARAMETER;
+ }
+
+ pci_remove_bus(phb, &pd->children);
+ phb_unlock(phb);
+ return OPAL_SUCCESS;
+ case OPAL_PCI_SLOT_ONLINE:
+ if (!pd) {
+ phb_unlock(phb);
+ return OPAL_PARAMETER;
+ }
+ pci_scan_bus(phb, pd->secondary_bus, pd->subordinate_bus,
+ &pd->children, pd, true);
+ pci_add_device_nodes(phb, &pd->children, pd->dn,
+ &phb->lstate, 0);
+ phb_unlock(phb);
+ return OPAL_SUCCESS;
+ default:
+ rc = OPAL_PARAMETER;
+ }
+
+ /*
+ * OPAL_ASYNC_COMPLETION is returned when delay is needed to change
+ * the power state in the backend. When it can be finished without
+ * delay, OPAL_SUCCESS is returned. The PCI topology needs to be
+ * updated in both cases.
+ */
+ if (rc == OPAL_ASYNC_COMPLETION) {
+ slot->retries = 500;
+ init_timer(&slot->timer, set_power_timer, slot);
+ schedule_timer(&slot->timer, msecs_to_tb(10));
+ } else if (rc == OPAL_SUCCESS) {
+ if (*state == OPAL_PCI_SLOT_POWER_OFF) {
+ remove_slot_devices(slot);
+ } else {
+ wait_for_link_up_and_rescan(slot);
+ rc = OPAL_ASYNC_COMPLETION;
+ }
+ }
+
+ phb_unlock(phb);
+ return rc;
+}
+opal_call(OPAL_PCI_SET_POWER_STATE, opal_pci_set_power_state, 3);
+
+static int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id,
+ void *diag_buffer,
+ uint64_t diag_buffer_len)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!opal_addr_valid(diag_buffer))
+ return OPAL_PARAMETER;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_diag_data2)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+ rc = phb->ops->get_diag_data2(phb, diag_buffer, diag_buffer_len);
+ phb_unlock(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_GET_PHB_DIAG_DATA2, opal_pci_get_phb_diag_data2, 3);
+
+static int64_t opal_pci_next_error(uint64_t phb_id, __be64 *__first_frozen_pe,
+ __be16 *__pci_error_type, __be16 *__severity)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ uint64_t first_frozen_pe;
+ uint16_t pci_error_type;
+ uint16_t severity;
+ int64_t rc;
+
+ if (!opal_addr_valid(__first_frozen_pe) ||
+ !opal_addr_valid(__pci_error_type) || !opal_addr_valid(__severity))
+ return OPAL_PARAMETER;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->next_error)
+ return OPAL_UNSUPPORTED;
+ phb_lock(phb);
+
+ opal_pci_eeh_clear_evt(phb_id);
+ rc = phb->ops->next_error(phb, &first_frozen_pe, &pci_error_type,
+ &severity);
+ phb_unlock(phb);
+
+ *__first_frozen_pe = cpu_to_be64(first_frozen_pe);
+ *__pci_error_type = cpu_to_be16(pci_error_type);
+ *__severity = cpu_to_be16(severity);
+
+ return rc;
+}
+opal_call(OPAL_PCI_NEXT_ERROR, opal_pci_next_error, 4);
+
+static int64_t opal_pci_set_phb_capi_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_capi_mode)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ rc = phb->ops->set_capi_mode(phb, mode, pe_number);
+ phb_unlock(phb);
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PHB_CAPI_MODE, opal_pci_set_phb_capi_mode, 3);
+
+static int64_t opal_pci_set_p2p(uint64_t phbid_init, uint64_t phbid_target,
+ uint64_t desc, uint16_t pe_number)
+{
+ struct phb *phb_init = pci_get_phb(phbid_init);
+ struct phb *phb_target = pci_get_phb(phbid_target);
+
+ if (!phb_init || !phb_target)
+ return OPAL_PARAMETER;
+ /*
+ * Having the 2 devices under the same PHB may require tuning
+ * the configuration of intermediate switch(es), more easily
+ * done from linux. And it shouldn't require a PHB config
+ * change.
+ * Return an error for the time being.
+ */
+ if (phb_init == phb_target)
+ return OPAL_UNSUPPORTED;
+ if (!phb_init->ops->set_p2p || !phb_target->ops->set_p2p)
+ return OPAL_UNSUPPORTED;
+ /*
+ * Loads would be supported on p9 if the 2 devices are under
+ * the same PHB, but we ruled it out above.
+ */
+ if (desc & OPAL_PCI_P2P_LOAD)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb_init);
+ phb_init->ops->set_p2p(phb_init, OPAL_PCI_P2P_INITIATOR, desc,
+ pe_number);
+ phb_unlock(phb_init);
+
+ phb_lock(phb_target);
+ phb_target->ops->set_p2p(phb_target, OPAL_PCI_P2P_TARGET, desc,
+ pe_number);
+ phb_unlock(phb_target);
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_PCI_SET_P2P, opal_pci_set_p2p, 4);
+
+static int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, __be64 *__addr)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ uint64_t addr;
+
+ if (!opal_addr_valid(__addr))
+ return OPAL_PARAMETER;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_tunnel_bar)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ phb->ops->get_tunnel_bar(phb, &addr);
+ phb_unlock(phb);
+
+ *__addr = cpu_to_be64(addr);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_PCI_GET_PBCQ_TUNNEL_BAR, opal_pci_get_pbcq_tunnel_bar, 2);
+
+static int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_tunnel_bar)
+ return OPAL_UNSUPPORTED;
+
+ phb_lock(phb);
+ rc = phb->ops->set_tunnel_bar(phb, addr);
+ phb_unlock(phb);
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PBCQ_TUNNEL_BAR, opal_pci_set_pbcq_tunnel_bar, 2);
diff --git a/roms/skiboot/core/pci-quirk.c b/roms/skiboot/core/pci-quirk.c
new file mode 100644
index 000000000..5c8b091ea
--- /dev/null
+++ b/roms/skiboot/core/pci-quirk.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Deal with PCI device quirks
+ *
+ * Copyright 2017-2018 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "PCI-QUIRK: " fmt
+
+#include <skiboot.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <pci-quirk.h>
+#include <platform.h>
+#include <ast.h>
+
+static int64_t cfg_block_filter(void *dev __unused,
+ struct pci_cfg_reg_filter *pcrf __unused,
+ uint32_t offset __unused, uint32_t len,
+ uint32_t *data, bool write)
+{
+ if (write)
+ return OPAL_SUCCESS;
+
+ switch (len) {
+ case 4:
+ *data = 0x0;
+ return OPAL_SUCCESS;
+ case 2:
+ *((uint16_t *)data) = 0x0;
+ return OPAL_SUCCESS;
+ case 1:
+ *((uint8_t *)data) = 0x0;
+ return OPAL_SUCCESS;
+ }
+
+ return OPAL_PARAMETER; /* should never happen */
+}
+
+/* blocks config accesses to registers in the range: [start, end] */
+#define BLOCK_CFG_RANGE(pd, start, end) \
+ pci_add_cfg_reg_filter(pd, start, end - start + 1, \
+ PCI_REG_FLAG_WRITE | PCI_REG_FLAG_READ, \
+ cfg_block_filter);
+
+static void quirk_microsemi_gen4_sw(struct phb *phb, struct pci_device *pd)
+{
+ uint8_t data;
+ bool frozen;
+ int offset;
+ int start;
+
+ pci_check_clear_freeze(phb);
+
+ /*
+ * Reading from 0xff should trigger a UR on the affected switches.
+ * If we don't get a freeze then we don't need the workaround
+ */
+ pci_cfg_read8(phb, pd->bdfn, 0xff, &data);
+ frozen = pci_check_clear_freeze(phb);
+ if (!frozen)
+ return;
+
+ for (start = -1, offset = 0; offset < 4096; offset++) {
+ pci_cfg_read8(phb, pd->bdfn, offset, &data);
+ frozen = pci_check_clear_freeze(phb);
+
+ if (start < 0 && frozen) { /* new UR range */
+ start = offset;
+ } else if (start >= 0 && !frozen) { /* end of range */
+ BLOCK_CFG_RANGE(pd, start, offset - 1);
+ PCINOTICE(phb, pd->bdfn, "Applied UR workaround to [%03x..%03x]\n", start, offset - 1);
+
+ start = -1;
+ }
+ }
+
+ /* range lasted until the end of config space */
+ if (start >= 0) {
+ BLOCK_CFG_RANGE(pd, start, 0xfff);
+ PCINOTICE(phb, pd->bdfn, "Applied UR workaround to [%03x..fff]\n", start);
+ }
+}
+
+static void quirk_astbmc_vga(struct phb *phb __unused,
+ struct pci_device *pd)
+{
+ struct dt_node *np = pd->dn;
+ uint32_t revision, mcr_configuration, mcr_scu_mpll, mcr_scu_strap;
+
+ if (ast_sio_is_enabled()) {
+ revision = ast_ahb_readl(SCU_REVISION_ID);
+ mcr_configuration = ast_ahb_readl(MCR_CONFIGURATION);
+ mcr_scu_mpll = ast_ahb_readl(MCR_SCU_MPLL);
+ mcr_scu_strap = ast_ahb_readl(MCR_SCU_STRAP);
+ } else {
+ /* Previously we would warn, now SIO disabled by design */
+ prlog(PR_INFO, "Assumed platform default parameters for %s\n",
+ __func__);
+ revision = bmc_platform->hw->scu_revision_id;
+ mcr_configuration = bmc_platform->hw->mcr_configuration;
+ mcr_scu_mpll = bmc_platform->hw->mcr_scu_mpll;
+ mcr_scu_strap = bmc_platform->hw->mcr_scu_strap;
+ }
+
+ dt_add_property_cells(np, "aspeed,scu-revision-id", revision);
+ dt_add_property_cells(np, "aspeed,mcr-configuration", mcr_configuration);
+ dt_add_property_cells(np, "aspeed,mcr-scu-mpll", mcr_scu_mpll);
+ dt_add_property_cells(np, "aspeed,mcr-scu-strap", mcr_scu_strap);
+}
+
+/* Quirks are: {fixup function, vendor ID, (device ID or PCI_ANY_ID)} */
+static const struct pci_quirk quirk_table[] = {
+ /* ASPEED 2400 VGA device */
+ { 0x1a03, 0x2000, &quirk_astbmc_vga },
+ { 0x11f8, 0x4052, &quirk_microsemi_gen4_sw },
+ { 0, 0, NULL }
+};
+
+static void __pci_handle_quirk(struct phb *phb, struct pci_device *pd,
+ const struct pci_quirk *quirks)
+{
+ while (quirks->vendor_id) {
+ if (quirks->vendor_id == PCI_VENDOR_ID(pd->vdid) &&
+ (quirks->device_id == PCI_ANY_ID ||
+ quirks->device_id == PCI_DEVICE_ID(pd->vdid)))
+ quirks->fixup(phb, pd);
+ quirks++;
+ }
+}
+
+void pci_handle_quirk(struct phb *phb, struct pci_device *pd)
+{
+ __pci_handle_quirk(phb, pd, quirk_table);
+}
diff --git a/roms/skiboot/core/pci-slot.c b/roms/skiboot/core/pci-slot.c
new file mode 100644
index 000000000..71d3d329c
--- /dev/null
+++ b/roms/skiboot/core/pci-slot.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * PCI Slots
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <opal-msg.h>
+#include <pci-cfg.h>
+#include <pci.h>
+#include <pci-slot.h>
+
+/* Debugging options */
+#define PCI_SLOT_PREFIX "PCI-SLOT-%016llx "
+#define PCI_SLOT_DBG(s, fmt, a...) \
+ prlog(PR_DEBUG, PCI_SLOT_PREFIX fmt, (s)->id, ##a)
+
+static void pci_slot_prepare_link_change(struct pci_slot *slot, bool up)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint32_t aercap, mask;
+
+ /*
+ * Mask the link down and receiver error before the link becomes
+ * down. Otherwise, unmask the errors when the link is up.
+ */
+ if (pci_has_cap(pd, PCIECAP_ID_AER, true)) {
+ aercap = pci_cap(pd, PCIECAP_ID_AER, true);
+
+ /* Mask link surprise down event. The event is always
+ * masked when the associated PCI slot supports PCI
+ * surprise hotplug. We needn't toggle it when the link
+ * bounces caused by reset and just keep it always masked.
+ */
+ if (!pd->slot || !pd->slot->surprise_pluggable) {
+ pci_cfg_read32(phb, pd->bdfn,
+ aercap + PCIECAP_AER_UE_MASK, &mask);
+ if (up)
+ mask &= ~PCIECAP_AER_UE_MASK_SURPRISE_DOWN;
+ else
+ mask |= PCIECAP_AER_UE_MASK_SURPRISE_DOWN;
+ pci_cfg_write32(phb, pd->bdfn,
+ aercap + PCIECAP_AER_UE_MASK, mask);
+ }
+
+ /* Receiver error */
+ pci_cfg_read32(phb, pd->bdfn, aercap + PCIECAP_AER_CE_MASK,
+ &mask);
+ if (up)
+ mask &= ~PCIECAP_AER_CE_RECVR_ERR;
+ else
+ mask |= PCIECAP_AER_CE_RECVR_ERR;
+ pci_cfg_write32(phb, pd->bdfn, aercap + PCIECAP_AER_CE_MASK,
+ mask);
+ }
+
+ /*
+ * We're coming back from reset. We need restore bus ranges
+ * and reinitialize the affected bridges and devices.
+ */
+ if (up) {
+ pci_restore_bridge_buses(phb, pd);
+ if (phb->ops->device_init)
+ pci_walk_dev(phb, pd, phb->ops->device_init, NULL);
+ }
+}
+
+static int64_t pci_slot_run_sm(struct pci_slot *slot)
+{
+ uint64_t now = mftb();
+ int64_t ret;
+
+ /* Return remaining timeout if we're still waiting */
+ if (slot->delay_tgt_tb &&
+ tb_compare(now, slot->delay_tgt_tb) == TB_ABEFOREB)
+ return slot->delay_tgt_tb - now;
+
+ slot->delay_tgt_tb = 0;
+ switch (slot->state & PCI_SLOT_STATE_MASK) {
+ case PCI_SLOT_STATE_LINK:
+ ret = slot->ops.poll_link(slot);
+ break;
+ case PCI_SLOT_STATE_HRESET:
+ ret = slot->ops.hreset(slot);
+ break;
+ case PCI_SLOT_STATE_FRESET:
+ ret = slot->ops.freset(slot);
+ break;
+ case PCI_SLOT_STATE_CRESET:
+ ret = slot->ops.creset(slot);
+ break;
+ default:
+ prlog(PR_ERR, PCI_SLOT_PREFIX
+ "Invalid state %08x\n", slot->id, slot->state);
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ ret = OPAL_HARDWARE;
+ }
+
+ /* Notify about the pci slot state machine completion */
+ if (ret <= 0 && slot->ops.completed_sm_run)
+ slot->ops.completed_sm_run(slot, ret);
+
+ return ret;
+}
+
+void pci_slot_add_dt_properties(struct pci_slot *slot,
+ struct dt_node *np)
+{
+ /* Bail without device node */
+ if (!np)
+ return;
+
+ dt_add_property_cells(np, "ibm,reset-by-firmware", 1);
+ dt_add_property_cells(np, "ibm,slot-pluggable", slot->pluggable);
+ dt_add_property_cells(np, "ibm,slot-surprise-pluggable",
+ slot->surprise_pluggable);
+ if (pci_slot_has_flags(slot, PCI_SLOT_FLAG_BROKEN_PDC))
+ dt_add_property_cells(np, "ibm,slot-broken-pdc", 1);
+
+ dt_add_property_cells(np, "ibm,slot-power-ctl", slot->power_ctl);
+ dt_add_property_cells(np, "ibm,slot-power-led-ctlled",
+ slot->power_led_ctl);
+ dt_add_property_cells(np, "ibm,slot-attn-led", slot->attn_led_ctl);
+ dt_add_property_cells(np, "ibm,slot-connector-type",
+ slot->connector_type);
+ dt_add_property_cells(np, "ibm,slot-card-desc", slot->card_desc);
+ dt_add_property_cells(np, "ibm,slot-card-mech", slot->card_mech);
+ dt_add_property_cells(np, "ibm,slot-wired-lanes", slot->wired_lanes);
+ dt_add_property_cells(np, "ibm,power-limit", slot->power_limit);
+
+ if (slot->ops.add_properties)
+ slot->ops.add_properties(slot, np);
+}
+
+struct pci_slot *pci_slot_alloc(struct phb *phb,
+ struct pci_device *pd)
+{
+ struct pci_slot *slot = NULL;
+
+ /*
+ * The function can be used to allocate either PHB slot or normal
+ * one. For both cases, the @phb should be always valid.
+ */
+ if (!phb)
+ return NULL;
+
+ /*
+ * When @pd is NULL, we're going to create a PHB slot. Otherwise,
+ * a normal slot will be created. Check if the specified slot
+ * already exists or not.
+ */
+ slot = pd ? pd->slot : phb->slot;
+ if (slot) {
+ prlog(PR_ERR, PCI_SLOT_PREFIX "Already exists\n", slot->id);
+ return slot;
+ }
+
+ /* Allocate memory chunk */
+ slot = zalloc(sizeof(struct pci_slot));
+ if (!slot) {
+ prlog(PR_ERR, "%s: Out of memory\n", __func__);
+ return NULL;
+ }
+
+ /*
+ * The polling function sholdn't be overridden by individual
+ * platforms
+ */
+ slot->phb = phb;
+ slot->pd = pd;
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ slot->power_state = PCI_SLOT_POWER_ON;
+ slot->ops.run_sm = pci_slot_run_sm;
+ slot->ops.prepare_link_change = pci_slot_prepare_link_change;
+ slot->peer_slot = NULL;
+ if (!pd) {
+ slot->id = PCI_PHB_SLOT_ID(phb);
+ phb->slot = slot;
+ } else {
+ slot->id = PCI_SLOT_ID(phb, pd->bdfn);
+ pd->slot = slot;
+ }
+
+ return slot;
+}
+
+struct pci_slot *pci_slot_find(uint64_t id)
+{
+ struct phb *phb;
+ struct pci_device *pd;
+ struct pci_slot *slot;
+ uint64_t index;
+ uint16_t bdfn;
+
+ index = PCI_SLOT_PHB_INDEX(id);
+ phb = pci_get_phb(index);
+
+ /* PHB slot */
+ if (!(id & PCI_SLOT_ID_PREFIX)) {
+ slot = phb ? phb->slot : NULL;
+ return slot;
+ }
+
+ /* Normal PCI slot */
+ bdfn = PCI_SLOT_BDFN(id);
+ pd = phb ? pci_find_dev(phb, bdfn) : NULL;
+ slot = pd ? pd->slot : NULL;
+ return slot;
+}
+
+void pci_slot_add_loc(struct pci_slot *slot,
+ struct dt_node *np, const char *label)
+{
+ char tmp[8], loc_code[LOC_CODE_SIZE];
+ struct pci_device *pd = slot->pd;
+ struct phb *phb = slot->phb;
+
+ if (!np)
+ return;
+
+ /* didn't get a real slot label? generate one! */
+ if (!label) {
+ snprintf(tmp, sizeof(tmp), "S%04x%02x", phb->opal_id,
+ pd->secondary_bus);
+ label = tmp;
+ }
+
+ /* Make a <PHB_LOC_CODE>-<LABEL> pair if we have a PHB loc code */
+ if (phb->base_loc_code) {
+ snprintf(loc_code, sizeof(loc_code), "%s-%s",
+ phb->base_loc_code, label);
+ } else {
+ strncpy(loc_code, label, sizeof(loc_code) - 1);
+ loc_code[LOC_CODE_SIZE - 1] = '\0';
+ }
+
+ dt_add_property_string(np, "ibm,slot-label", label);
+ dt_add_property_string(np, "ibm,slot-location-code", loc_code);
+}
diff --git a/roms/skiboot/core/pci-virt.c b/roms/skiboot/core/pci-virt.c
new file mode 100644
index 000000000..e0cb9949c
--- /dev/null
+++ b/roms/skiboot/core/pci-virt.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Support virtual PCI devices
+ *
+ * Copyright 2013-2016 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <pci.h>
+#include <pci-virt.h>
+
+void pci_virt_cfg_read_raw(struct pci_virt_device *pvd,
+ uint32_t space, uint32_t offset,
+ uint32_t size, uint32_t *data)
+{
+ uint32_t i;
+
+ if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space])
+ return;
+
+ for (*data = 0, i = 0; i < size; i++)
+ *data |= ((uint32_t)(pvd->config[space][offset + i]) << (i * 8));
+}
+
+void pci_virt_cfg_write_raw(struct pci_virt_device *pvd,
+ uint32_t space, uint32_t offset,
+ uint32_t size, uint32_t data)
+{
+ int i;
+
+ if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space])
+ return;
+
+ for (i = 0; i < size; i++) {
+ pvd->config[space][offset + i] = data;
+ data = (data >> 8);
+ }
+}
+
+static struct pci_cfg_reg_filter *pci_virt_find_filter(
+ struct pci_virt_device *pvd,
+ uint32_t start, uint32_t len)
+{
+ struct pci_cfg_reg_filter *pcrf;
+
+ if (!pvd || !len || start >= pvd->cfg_size)
+ return NULL;
+
+ /* Return filter if there is overlapped region. We don't
+ * require strict matching for more flexibility. It also
+ * means the associated handler should validate the register
+ * offset and length.
+ */
+ list_for_each(&pvd->pcrf, pcrf, link) {
+ if (start < (pcrf->start + pcrf->len) &&
+ (start + len) > pcrf->start)
+ return pcrf;
+ }
+
+ return NULL;
+}
+
+struct pci_cfg_reg_filter *pci_virt_add_filter(struct pci_virt_device *pvd,
+ uint32_t start,
+ uint32_t len,
+ uint32_t flags,
+ pci_cfg_reg_func func,
+ void *data)
+{
+ struct pci_cfg_reg_filter *pcrf;
+
+ if (!pvd || !len || (start + len) >= pvd->cfg_size)
+ return NULL;
+ if (!(flags & PCI_REG_FLAG_MASK))
+ return NULL;
+
+ pcrf = pci_virt_find_filter(pvd, start, len);
+ if (pcrf) {
+ prlog(PR_ERR, "%s: Filter [%x, %x] overlapped with [%x, %x]\n",
+ __func__, start, len, pcrf->start, pcrf->len);
+ return NULL;
+ }
+
+ pcrf = zalloc(sizeof(*pcrf));
+ if (!pcrf) {
+ prlog(PR_ERR, "%s: Out of memory!\n", __func__);
+ return NULL;
+ }
+
+ pcrf->start = start;
+ pcrf->len = len;
+ pcrf->flags = flags;
+ pcrf->func = func;
+ pcrf->data = data;
+ list_add_tail(&pvd->pcrf, &pcrf->link);
+
+ return pcrf;
+}
+
+struct pci_virt_device *pci_virt_find_device(struct phb *phb,
+ uint32_t bdfn)
+{
+ struct pci_virt_device *pvd;
+
+ list_for_each(&phb->virt_devices, pvd, node) {
+ if (pvd->bdfn == bdfn)
+ return pvd;
+ }
+
+ return NULL;
+}
+
+static inline bool pci_virt_cfg_valid(struct pci_virt_device *pvd,
+ uint32_t offset, uint32_t size)
+{
+ if ((offset + size) > pvd->cfg_size)
+ return false;
+
+ if (!size || (size > 4))
+ return false;
+
+ if ((size & (size - 1)) || (offset & (size - 1)))
+ return false;
+
+ return true;
+}
+
+int64_t pci_virt_cfg_read(struct phb *phb, uint32_t bdfn,
+ uint32_t offset, uint32_t size,
+ uint32_t *data)
+{
+ struct pci_virt_device *pvd;
+ struct pci_cfg_reg_filter *pcrf;
+ int64_t ret = OPAL_SUCCESS;
+
+ *data = 0xffffffff;
+
+ /* Search for PCI virtual device */
+ pvd = pci_virt_find_device(phb, bdfn);
+ if (!pvd)
+ return OPAL_PARAMETER;
+
+ /* Check if config address is valid or not */
+ if (!pci_virt_cfg_valid(pvd, offset, size))
+ return OPAL_PARAMETER;
+
+ /* The value is fetched from the normal config space when the
+ * trap handler returns OPAL_PARTIAL. Otherwise, the trap handler
+ * should provide the return value.
+ */
+ pcrf = pci_virt_find_filter(pvd, offset, size);
+ if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_READ))
+ goto out;
+
+ ret = pcrf->func(pvd, pcrf, offset, size, data, false);
+ if (ret != OPAL_PARTIAL)
+ return ret;
+out:
+ pci_virt_cfg_read_raw(pvd, PCI_VIRT_CFG_NORMAL, offset, size, data);
+ return OPAL_SUCCESS;
+}
+
+int64_t pci_virt_cfg_write(struct phb *phb, uint32_t bdfn,
+ uint32_t offset, uint32_t size,
+ uint32_t data)
+{
+ struct pci_virt_device *pvd;
+ struct pci_cfg_reg_filter *pcrf;
+ uint32_t val, v, r, c, i;
+ int64_t ret = OPAL_SUCCESS;
+
+ /* Search for PCI virtual device */
+ pvd = pci_virt_find_device(phb, bdfn);
+ if (!pvd)
+ return OPAL_PARAMETER;
+
+ /* Check if config address is valid or not */
+ if (!pci_virt_cfg_valid(pvd, offset, size))
+ return OPAL_PARAMETER;
+
+ /* The value is written to the config space if the trap handler
+ * returns OPAL_PARTIAL. Otherwise, the value to be written is
+ * dropped.
+ */
+ pcrf = pci_virt_find_filter(pvd, offset, size);
+ if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_WRITE))
+ goto out;
+
+ ret = pcrf->func(pvd, pcrf, offset, size, &data, true);
+ if (ret != OPAL_PARTIAL)
+ return ret;
+out:
+ val = data;
+ for (i = 0; i < size; i++) {
+ PCI_VIRT_CFG_NORMAL_RD(pvd, offset + i, 1, &v);
+ PCI_VIRT_CFG_RDONLY_RD(pvd, offset + i, 1, &r);
+ PCI_VIRT_CFG_W1CLR_RD(pvd, offset + i, 1, &c);
+
+ /* Drop read-only bits */
+ val &= ~(r << (i * 8));
+ val |= (r & v) << (i * 8);
+
+ /* Drop W1C bits */
+ val &= ~(val & ((c & v) << (i * 8)));
+ }
+
+ PCI_VIRT_CFG_NORMAL_WR(pvd, offset, size, val);
+ return OPAL_SUCCESS;
+}
+
+struct pci_virt_device *pci_virt_add_device(struct phb *phb, uint32_t bdfn,
+ uint32_t cfg_size, void *data)
+{
+ struct pci_virt_device *pvd;
+ uint8_t *cfg;
+ uint32_t i;
+
+ /* The standard config header size is 64 bytes */
+ if (!phb || (bdfn & 0xffff0000) || (cfg_size < 64))
+ return NULL;
+
+ /* Check if the bdfn is available */
+ pvd = pci_virt_find_device(phb, bdfn);
+ if (pvd) {
+ prlog(PR_ERR, "%s: bdfn 0x%x was reserved\n",
+ __func__, bdfn);
+ return NULL;
+ }
+
+ /* Populate the PCI virtual device */
+ pvd = zalloc(sizeof(*pvd));
+ if (!pvd) {
+ prlog(PR_ERR, "%s: Cannot alloate PCI virtual device (0x%x)\n",
+ __func__, bdfn);
+ return NULL;
+ }
+
+ cfg = zalloc(cfg_size * PCI_VIRT_CFG_MAX);
+ if (!cfg) {
+ prlog(PR_ERR, "%s: Cannot allocate config space (0x%x)\n",
+ __func__, bdfn);
+ free(pvd);
+ return NULL;
+ }
+
+ for (i = 0; i < PCI_VIRT_CFG_MAX; i++, cfg += cfg_size)
+ pvd->config[i] = cfg;
+
+ pvd->bdfn = bdfn;
+ pvd->cfg_size = cfg_size;
+ pvd->data = data;
+ list_head_init(&pvd->pcrf);
+ list_add_tail(&phb->virt_devices, &pvd->node);
+
+ return pvd;
+}
diff --git a/roms/skiboot/core/pci.c b/roms/skiboot/core/pci.c
new file mode 100644
index 000000000..e195ecbf4
--- /dev/null
+++ b/roms/skiboot/core/pci.c
@@ -0,0 +1,1962 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Base PCI support
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <cpu.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <pci-slot.h>
+#include <pci-quirk.h>
+#include <timebase.h>
+#include <device.h>
+
+#define MAX_PHB_ID 256
+static struct phb *phbs[MAX_PHB_ID];
+int last_phb_id = 0;
+
+/*
+ * Generic PCI utilities
+ */
+
+static int64_t __pci_find_cap(struct phb *phb, uint16_t bdfn,
+ uint8_t want, bool check_cap_indicator)
+{
+ int64_t rc;
+ uint16_t stat, cap;
+ uint8_t pos, next;
+
+ rc = pci_cfg_read16(phb, bdfn, PCI_CFG_STAT, &stat);
+ if (rc)
+ return rc;
+ if (check_cap_indicator && !(stat & PCI_CFG_STAT_CAP))
+ return OPAL_UNSUPPORTED;
+ rc = pci_cfg_read8(phb, bdfn, PCI_CFG_CAP, &pos);
+ if (rc)
+ return rc;
+ pos &= 0xfc;
+ while(pos) {
+ rc = pci_cfg_read16(phb, bdfn, pos, &cap);
+ if (rc)
+ return rc;
+ if ((cap & 0xff) == want)
+ return pos;
+ next = (cap >> 8) & 0xfc;
+ if (next == pos) {
+ PCIERR(phb, bdfn, "pci_find_cap hit a loop !\n");
+ break;
+ }
+ pos = next;
+ }
+ return OPAL_UNSUPPORTED;
+}
+
+/* pci_find_cap - Find a PCI capability in a device config space
+ *
+ * This will return a config space offset (positive) or a negative
+ * error (OPAL error codes).
+ *
+ * OPAL_UNSUPPORTED is returned if the capability doesn't exist
+ */
+int64_t pci_find_cap(struct phb *phb, uint16_t bdfn, uint8_t want)
+{
+ return __pci_find_cap(phb, bdfn, want, true);
+}
+
+/* pci_find_ecap - Find a PCIe extended capability in a device
+ * config space
+ *
+ * This will return a config space offset (positive) or a negative
+ * error (OPAL error code). Additionally, if the "version" argument
+ * is non-NULL, the capability version will be returned there.
+ *
+ * OPAL_UNSUPPORTED is returned if the capability doesn't exist
+ */
+int64_t pci_find_ecap(struct phb *phb, uint16_t bdfn, uint16_t want,
+ uint8_t *version)
+{
+ int64_t rc;
+ uint32_t cap;
+ uint16_t off, prev = 0;
+
+ for (off = 0x100; off && off < 0x1000; off = (cap >> 20) & 0xffc ) {
+ if (off == prev) {
+ PCIERR(phb, bdfn, "pci_find_ecap hit a loop !\n");
+ break;
+ }
+ prev = off;
+ rc = pci_cfg_read32(phb, bdfn, off, &cap);
+ if (rc)
+ return rc;
+
+ /* no ecaps supported */
+ if (cap == 0 || (cap & 0xffff) == 0xffff)
+ return OPAL_UNSUPPORTED;
+
+ if ((cap & 0xffff) == want) {
+ if (version)
+ *version = (cap >> 16) & 0xf;
+ return off;
+ }
+ }
+ return OPAL_UNSUPPORTED;
+}
+
+static void pci_init_pcie_cap(struct phb *phb, struct pci_device *pd)
+{
+ int64_t ecap = 0;
+ uint16_t reg;
+ uint32_t val;
+
+ /* On the upstream port of PLX bridge 8724 (rev ba), PCI_STATUS
+ * register doesn't have capability indicator though it support
+ * various PCI capabilities. So we need ignore that bit when
+ * looking for PCI capabilities on the upstream port, which is
+ * limited to one that seats directly under root port.
+ */
+ if (pd->vdid == 0x872410b5 && pd->parent && !pd->parent->parent) {
+ uint8_t rev;
+
+ pci_cfg_read8(phb, pd->bdfn, PCI_CFG_REV_ID, &rev);
+ if (rev == 0xba)
+ ecap = __pci_find_cap(phb, pd->bdfn,
+ PCI_CFG_CAP_ID_EXP, false);
+ else
+ ecap = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_EXP);
+ } else {
+ ecap = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_EXP);
+ }
+
+ if (ecap <= 0) {
+ pd->dev_type = PCIE_TYPE_LEGACY;
+ return;
+ }
+
+ pci_set_cap(pd, PCI_CFG_CAP_ID_EXP, ecap, NULL, NULL, false);
+
+ /*
+ * XXX We observe a problem on some PLX switches where one
+ * of the downstream ports appears as an upstream port, we
+ * fix that up here otherwise, other code will misbehave
+ */
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_CAPABILITY_REG, &reg);
+ pd->dev_type = GETFIELD(PCICAP_EXP_CAP_TYPE, reg);
+ if (pd->parent && pd->parent->dev_type == PCIE_TYPE_SWITCH_UPPORT &&
+ pd->vdid == 0x874810b5 && pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
+ PCIDBG(phb, pd->bdfn, "Fixing up bad PLX downstream port !\n");
+ pd->dev_type = PCIE_TYPE_SWITCH_DNPORT;
+ }
+
+ /* XXX Handle ARI */
+ if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT ||
+ pd->dev_type == PCIE_TYPE_ROOT_PORT)
+ pd->scan_map = 0x1;
+
+ /* Read MPS capability, whose maximal size is 4096 */
+ pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCAP, &val);
+ pd->mps = (128 << GETFIELD(PCICAP_EXP_DEVCAP_MPSS, val));
+ if (pd->mps > 4096)
+ pd->mps = 4096;
+}
+
+static void pci_init_aer_cap(struct phb *phb, struct pci_device *pd)
+{
+ int64_t pos;
+
+ if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
+ return;
+
+ pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_AER, NULL);
+ if (pos > 0)
+ pci_set_cap(pd, PCIECAP_ID_AER, pos, NULL, NULL, true);
+}
+
+static void pci_init_pm_cap(struct phb *phb, struct pci_device *pd)
+{
+ int64_t pos;
+
+ pos = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_PM);
+ if (pos > 0)
+ pci_set_cap(pd, PCI_CFG_CAP_ID_PM, pos, NULL, NULL, false);
+}
+
+void pci_init_capabilities(struct phb *phb, struct pci_device *pd)
+{
+ pci_init_pcie_cap(phb, pd);
+ pci_init_aer_cap(phb, pd);
+ pci_init_pm_cap(phb, pd);
+}
+
+bool pci_wait_crs(struct phb *phb, uint16_t bdfn, uint32_t *out_vdid)
+{
+ uint32_t retries, vdid;
+ int64_t rc;
+ bool had_crs = false;
+
+ for (retries = 0; retries < 40; retries++) {
+ rc = pci_cfg_read32(phb, bdfn, PCI_CFG_VENDOR_ID, &vdid);
+ if (rc)
+ return false;
+ if (vdid == 0xffffffff || vdid == 0x00000000)
+ return false;
+ if (vdid != 0xffff0001)
+ break;
+ had_crs = true;
+ time_wait_ms(100);
+ }
+ if (vdid == 0xffff0001) {
+ PCIERR(phb, bdfn, "CRS timeout !\n");
+ return false;
+ }
+ if (had_crs)
+ PCIDBG(phb, bdfn, "Probe success after %d CRS\n", retries);
+
+ if (out_vdid)
+ *out_vdid = vdid;
+ return true;
+}
+
+static struct pci_device *pci_scan_one(struct phb *phb, struct pci_device *parent,
+ uint16_t bdfn)
+{
+ struct pci_device *pd = NULL;
+ uint32_t vdid;
+ int64_t rc;
+ uint8_t htype;
+
+ if (!pci_wait_crs(phb, bdfn, &vdid))
+ return NULL;
+
+ /* Perform a dummy write to the device in order for it to
+ * capture it's own bus number, so any subsequent error
+ * messages will be properly tagged
+ */
+ pci_cfg_write32(phb, bdfn, PCI_CFG_VENDOR_ID, vdid);
+
+ pd = zalloc(sizeof(struct pci_device));
+ if (!pd) {
+ PCIERR(phb, bdfn,"Failed to allocate structure pci_device !\n");
+ goto fail;
+ }
+ pd->phb = phb;
+ pd->bdfn = bdfn;
+ pd->vdid = vdid;
+ pci_cfg_read32(phb, bdfn, PCI_CFG_SUBSYS_VENDOR_ID, &pd->sub_vdid);
+ pci_cfg_read32(phb, bdfn, PCI_CFG_REV_ID, &pd->class);
+ pd->class >>= 8;
+
+ pd->parent = parent;
+ list_head_init(&pd->pcrf);
+ list_head_init(&pd->children);
+ rc = pci_cfg_read8(phb, bdfn, PCI_CFG_HDR_TYPE, &htype);
+ if (rc) {
+ PCIERR(phb, bdfn, "Failed to read header type !\n");
+ goto fail;
+ }
+ pd->is_multifunction = !!(htype & 0x80);
+ pd->is_bridge = (htype & 0x7f) != 0;
+ pd->is_vf = false;
+ pd->scan_map = 0xffffffff; /* Default */
+ pd->primary_bus = PCI_BUS_NUM(bdfn);
+
+ pci_init_capabilities(phb, pd);
+
+ /* If it's a bridge, sanitize the bus numbers to avoid forwarding
+ *
+ * This will help when walking down those bridges later on
+ */
+ if (pd->is_bridge) {
+ pci_cfg_write8(phb, bdfn, PCI_CFG_PRIMARY_BUS, pd->primary_bus);
+ pci_cfg_write8(phb, bdfn, PCI_CFG_SECONDARY_BUS, 0);
+ pci_cfg_write8(phb, bdfn, PCI_CFG_SUBORDINATE_BUS, 0);
+ }
+
+ /* XXX Need to do some basic setups, such as MPSS, MRS,
+ * RCB, etc...
+ */
+
+ PCIDBG(phb, bdfn, "Found VID:%04x DEV:%04x TYP:%d MF%s BR%s EX%s\n",
+ vdid & 0xffff, vdid >> 16, pd->dev_type,
+ pd->is_multifunction ? "+" : "-",
+ pd->is_bridge ? "+" : "-",
+ pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) ? "+" : "-");
+
+ /* Try to get PCI slot behind the device */
+ if (platform.pci_get_slot_info)
+ platform.pci_get_slot_info(phb, pd);
+
+ /* Put it to the child device of list of PHB or parent */
+ if (!parent)
+ list_add_tail(&phb->devices, &pd->link);
+ else
+ list_add_tail(&parent->children, &pd->link);
+
+ /*
+ * Call PHB hook
+ */
+ if (phb->ops->device_init)
+ phb->ops->device_init(phb, pd, NULL);
+
+ return pd;
+ fail:
+ if (pd)
+ free(pd);
+ return NULL;
+}
+
+/* pci_check_clear_freeze - Probing empty slot will result in an EEH
+ * freeze. Currently we have a single PE mapping
+ * everything (default state of our backend) so
+ * we just check and clear the state of PE#0
+ *
+ * returns true if a freeze was detected
+ *
+ * NOTE: We currently only handle simple PE freeze, not PHB fencing
+ * (or rather our backend does)
+ */
+bool pci_check_clear_freeze(struct phb *phb)
+{
+ uint8_t freeze_state;
+ uint16_t pci_error_type, sev;
+ int64_t pe_number, rc;
+
+ /* Retrieve the reserved PE number */
+ pe_number = OPAL_PARAMETER;
+ if (phb->ops->get_reserved_pe_number)
+ pe_number = phb->ops->get_reserved_pe_number(phb);
+ if (pe_number < 0)
+ return false;
+
+ /* Retrieve the frozen state */
+ rc = phb->ops->eeh_freeze_status(phb, pe_number, &freeze_state,
+ &pci_error_type, &sev);
+ if (rc)
+ return true; /* phb fence? */
+
+ if (freeze_state == OPAL_EEH_STOPPED_NOT_FROZEN)
+ return false;
+ /* We can't handle anything worse than an ER here */
+ if (sev > OPAL_EEH_SEV_NO_ERROR &&
+ sev < OPAL_EEH_SEV_PE_ER) {
+ PCIERR(phb, 0, "Fatal probe in %s error !\n", __func__);
+ return true;
+ }
+
+ phb->ops->eeh_freeze_clear(phb, pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ return true;
+}
+
+/*
+ * Turn off slot's power supply if there are nothing connected for
+ * 2 purposes: power saving obviously and initialize the slot to
+ * to initial power-off state for hotplug.
+ *
+ * The power should be turned on if the downstream link of the slot
+ * isn't up.
+ */
+static void pci_slot_set_power_state(struct phb *phb,
+ struct pci_device *pd,
+ uint8_t state)
+{
+ struct pci_slot *slot;
+ uint8_t cur_state;
+ int32_t wait = 100;
+ int64_t rc;
+
+ if (!pd || !pd->slot)
+ return;
+
+ slot = pd->slot;
+ if (!slot->pluggable ||
+ !slot->ops.get_power_state ||
+ !slot->ops.set_power_state)
+ return;
+
+ if (state == PCI_SLOT_POWER_OFF) {
+ /* Bail if there're something connected */
+ if (!list_empty(&pd->children)) {
+ PCIERR(phb, pd->bdfn, "Attempted to power off slot with attached devices!\n");
+ return;
+ }
+
+ pci_slot_add_flags(slot, PCI_SLOT_FLAG_BOOTUP);
+ rc = slot->ops.get_power_state(slot, &cur_state);
+ if (rc != OPAL_SUCCESS) {
+ PCINOTICE(phb, pd->bdfn, "Error %lld getting slot power state\n", rc);
+ cur_state = PCI_SLOT_POWER_OFF;
+ }
+
+ pci_slot_remove_flags(slot, PCI_SLOT_FLAG_BOOTUP);
+ if (cur_state == PCI_SLOT_POWER_OFF)
+ return;
+ }
+
+ pci_slot_add_flags(slot,
+ (PCI_SLOT_FLAG_BOOTUP | PCI_SLOT_FLAG_ENFORCE));
+ rc = slot->ops.set_power_state(slot, state);
+ if (rc == OPAL_SUCCESS)
+ goto success;
+ if (rc != OPAL_ASYNC_COMPLETION) {
+ PCINOTICE(phb, pd->bdfn, "Error %lld powering %s slot\n",
+ rc, state == PCI_SLOT_POWER_ON ? "on" : "off");
+ goto error;
+ }
+
+ /* Wait until the operation is completed */
+ do {
+ if (slot->state == PCI_SLOT_STATE_SPOWER_DONE)
+ break;
+
+ check_timers(false);
+ time_wait_ms(10);
+ } while (--wait >= 0);
+
+ if (wait < 0) {
+ PCINOTICE(phb, pd->bdfn, "Timeout powering %s slot\n",
+ state == PCI_SLOT_POWER_ON ? "on" : "off");
+ goto error;
+ }
+
+success:
+ PCIDBG(phb, pd->bdfn, "Powering %s hotpluggable slot\n",
+ state == PCI_SLOT_POWER_ON ? "on" : "off");
+error:
+ pci_slot_remove_flags(slot,
+ (PCI_SLOT_FLAG_BOOTUP | PCI_SLOT_FLAG_ENFORCE));
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+}
+
+static bool pci_bridge_power_on(struct phb *phb, struct pci_device *pd)
+{
+ int32_t ecap;
+ uint16_t pcie_cap, slot_sts, slot_ctl, link_ctl;
+ uint32_t slot_cap;
+ int64_t rc;
+
+ /*
+ * If there is a PCI slot associated with the bridge, to use
+ * the PCI slot's facality to power it on.
+ */
+ if (pd->slot) {
+ struct pci_slot *slot = pd->slot;
+ uint8_t presence;
+
+ /*
+ * We assume the presence state is OPAL_PCI_SLOT_PRESENT
+ * by default. In this way, we won't miss anything when
+ * the operation isn't supported or hitting error upon
+ * retrieving it.
+ */
+ if (slot->ops.get_presence_state) {
+ rc = slot->ops.get_presence_state(slot, &presence);
+ if (rc == OPAL_SUCCESS &&
+ presence == OPAL_PCI_SLOT_EMPTY)
+ return false;
+ }
+
+ /* To power it on */
+ pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_ON);
+ return true;
+ }
+
+ if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
+ return true;
+
+ /* Check if slot is supported */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn,
+ ecap + PCICAP_EXP_CAPABILITY_REG, &pcie_cap);
+ if (!(pcie_cap & PCICAP_EXP_CAP_SLOT))
+ return true;
+
+ /* Check presence */
+ pci_cfg_read16(phb, pd->bdfn,
+ ecap + PCICAP_EXP_SLOTSTAT, &slot_sts);
+ if (!(slot_sts & PCICAP_EXP_SLOTSTAT_PDETECTST))
+ return false;
+
+ /* Ensure that power control is supported */
+ pci_cfg_read32(phb, pd->bdfn,
+ ecap + PCICAP_EXP_SLOTCAP, &slot_cap);
+ if (!(slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL))
+ return true;
+
+
+ /* Read the slot control register, check if the slot is off */
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &slot_ctl);
+ PCITRACE(phb, pd->bdfn, " SLOT_CTL=%04x\n", slot_ctl);
+ if (slot_ctl & PCICAP_EXP_SLOTCTL_PWRCTLR) {
+ PCIDBG(phb, pd->bdfn, "Bridge power is off, turning on ...\n");
+ slot_ctl &= ~PCICAP_EXP_SLOTCTL_PWRCTLR;
+ slot_ctl |= SETFIELD(PCICAP_EXP_SLOTCTL_PWRI, 0, PCIE_INDIC_ON);
+ pci_cfg_write16(phb, pd->bdfn,
+ ecap + PCICAP_EXP_SLOTCTL, slot_ctl);
+
+ /* Wait a couple of seconds */
+ time_wait_ms(2000);
+ }
+
+ /* Enable link */
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, &link_ctl);
+ PCITRACE(phb, pd->bdfn, " LINK_CTL=%04x\n", link_ctl);
+ link_ctl &= ~PCICAP_EXP_LCTL_LINK_DIS;
+ pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, link_ctl);
+
+ return true;
+}
+
+static bool pci_bridge_wait_link(struct phb *phb,
+ struct pci_device *pd,
+ bool was_reset)
+{
+ int32_t ecap = 0;
+ uint32_t link_cap = 0, retries = 100;
+ uint16_t link_sts;
+
+ if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) {
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_LCAP, &link_cap);
+ }
+
+ /*
+ * If link state reporting isn't supported, wait 1 second
+ * if the downstream link was ever resetted.
+ */
+ if (!(link_cap & PCICAP_EXP_LCAP_DL_ACT_REP)) {
+ if (was_reset)
+ time_wait_ms(1000);
+
+ return true;
+ }
+
+ /*
+ * Link state reporting is supported, wait for the link to
+ * come up until timeout.
+ */
+ PCIDBG(phb, pd->bdfn, "waiting for link... \n");
+ while (retries--) {
+ pci_cfg_read16(phb, pd->bdfn,
+ ecap + PCICAP_EXP_LSTAT, &link_sts);
+ if (link_sts & PCICAP_EXP_LSTAT_DLLL_ACT)
+ break;
+
+ time_wait_ms(100);
+ }
+
+ if (!(link_sts & PCICAP_EXP_LSTAT_DLLL_ACT)) {
+ PCIERR(phb, pd->bdfn, "Timeout waiting for downstream link\n");
+ return false;
+ }
+
+ /* Need another 100ms before touching the config space */
+ time_wait_ms(100);
+ PCIDBG(phb, pd->bdfn, "link is up\n");
+
+ return true;
+}
+
+/* pci_enable_bridge - Called before scanning a bridge
+ *
+ * Ensures error flags are clean, disable master abort, and
+ * check if the subordinate bus isn't reset, the slot is enabled
+ * on PCIe, etc...
+ */
+static bool pci_enable_bridge(struct phb *phb, struct pci_device *pd)
+{
+ uint16_t bctl;
+ bool was_reset = false;
+
+ /* Disable master aborts, clear errors */
+ pci_cfg_read16(phb, pd->bdfn, PCI_CFG_BRCTL, &bctl);
+ bctl &= ~PCI_CFG_BRCTL_MABORT_REPORT;
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl);
+
+
+ /* PCI-E bridge, check the slot state. We don't do that on the
+ * root complex as this is handled separately and not all our
+ * RCs implement the standard register set.
+ */
+ if ((pd->dev_type == PCIE_TYPE_ROOT_PORT && pd->primary_bus > 0) ||
+ pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
+ if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) {
+ int32_t ecap;
+ uint32_t link_cap = 0;
+ uint16_t link_sts = 0;
+
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read32(phb, pd->bdfn,
+ ecap + PCICAP_EXP_LCAP, &link_cap);
+
+ /*
+ * No need to touch the power supply if the PCIe link has
+ * been up. Further more, the slot presence bit is lost while
+ * the PCIe link is up on the specific PCI topology. In that
+ * case, we need ignore the slot presence bit and go ahead for
+ * probing. Otherwise, the NVMe adapter won't be probed.
+ *
+ * PHB3 root port, PLX switch 8748 (10b5:8748), PLX swich 9733
+ * (10b5:9733), PMC 8546 swtich (11f8:8546), NVMe adapter
+ * (1c58:0023).
+ */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read32(phb, pd->bdfn,
+ ecap + PCICAP_EXP_LCAP, &link_cap);
+ pci_cfg_read16(phb, pd->bdfn,
+ ecap + PCICAP_EXP_LSTAT, &link_sts);
+ if ((link_cap & PCICAP_EXP_LCAP_DL_ACT_REP) &&
+ (link_sts & PCICAP_EXP_LSTAT_DLLL_ACT))
+ return true;
+ }
+
+ /* Power on the downstream slot or link */
+ if (!pci_bridge_power_on(phb, pd))
+ return false;
+ }
+
+ /* Clear secondary reset */
+ if (bctl & PCI_CFG_BRCTL_SECONDARY_RESET) {
+ PCIDBG(phb, pd->bdfn,
+ "Bridge secondary reset is on, clearing it ...\n");
+ bctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET;
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl);
+ time_wait_ms(1000);
+ was_reset = true;
+ }
+
+ /* PCI-E bridge, wait for link */
+ if (pd->dev_type == PCIE_TYPE_ROOT_PORT ||
+ pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
+ if (!pci_bridge_wait_link(phb, pd, was_reset))
+ return false;
+ }
+
+ /* Clear error status */
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_STAT, 0xffff);
+ return true;
+}
+
+/* Clear up bridge resources */
+static void pci_cleanup_bridge(struct phb *phb, struct pci_device *pd)
+{
+ uint16_t cmd;
+
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_BASE_U16, 0xffff);
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_BASE, 0xf0);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_LIMIT_U16, 0);
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_LIMIT, 0);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_BASE, 0xfff0);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_LIMIT, 0);
+ pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE_U32, 0xffffffff);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE, 0xfff0);
+ pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT_U32, 0);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT, 0);
+
+ /* Note: This is a bit fishy but since we have closed all the
+ * bridge windows above, it shouldn't be a problem. Basically
+ * we enable Memory, IO and Bus Master on the bridge because
+ * some versions of Linux will fail to do it themselves.
+ */
+ pci_cfg_read16(phb, pd->bdfn, PCI_CFG_CMD, &cmd);
+ cmd |= PCI_CFG_CMD_IO_EN | PCI_CFG_CMD_MEM_EN;
+ cmd |= PCI_CFG_CMD_BUS_MASTER_EN;
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_CMD, cmd);
+}
+
+/* Remove all subordinate PCI devices leading from the indicated
+ * PCI bus. It's used to remove all PCI devices behind one PCI
+ * slot at unplugging time
+ */
+void pci_remove_bus(struct phb *phb, struct list_head *list)
+{
+ struct pci_device *pd, *tmp;
+
+ list_for_each_safe(list, pd, tmp, link) {
+ pci_remove_bus(phb, &pd->children);
+
+ if (phb->ops->device_remove)
+ phb->ops->device_remove(phb, pd);
+
+ /* Release device node and PCI slot */
+ if (pd->dn)
+ dt_free(pd->dn);
+ if (pd->slot)
+ free(pd->slot);
+
+ /* Remove from parent list and release itself */
+ list_del(&pd->link);
+ free(pd);
+ }
+}
+
+static void pci_set_power_limit(struct pci_device *pd)
+{
+ uint32_t offset, val;
+ uint16_t caps;
+
+ offset = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ if (!offset)
+ return; /* legacy dev */
+
+ pci_cfg_read16(pd->phb, pd->bdfn,
+ offset + PCICAP_EXP_CAPABILITY_REG, &caps);
+
+ if (!(caps & PCICAP_EXP_CAP_SLOT))
+ return; /* bridge has no slot capabilities */
+ if (!pd->slot || !pd->slot->power_limit)
+ return;
+
+ pci_cfg_read32(pd->phb, pd->bdfn, offset + PCICAP_EXP_SLOTCAP, &val);
+
+ val = SETFIELD(PCICAP_EXP_SLOTCAP_SPLSC, val, 0); /* 1W scale */
+ val = SETFIELD(PCICAP_EXP_SLOTCAP_SPLVA, val, pd->slot->power_limit);
+
+ pci_cfg_write32(pd->phb, pd->bdfn, offset + PCICAP_EXP_SLOTCAP, val);
+
+ /* update the cached copy in the slot */
+ pd->slot->slot_cap = val;
+
+ PCIDBG(pd->phb, pd->bdfn, "Slot power limit set to %dW\n",
+ pd->slot->power_limit);
+}
+
+/* Perform a recursive scan of the bus at bus_number populating
+ * the list passed as an argument. This also performs the bus
+ * numbering, so it returns the largest bus number that was
+ * assigned.
+ *
+ * Note: Eventually this might want to access some VPD information
+ * in order to know what slots to scan and what not etc..
+ *
+ * XXX NOTE: We might want to enable ARI along the way...
+ *
+ * XXX NOTE: We might also want to setup the PCIe MPS/MRSS properly
+ * here as Linux may or may not do it
+ */
+uint8_t pci_scan_bus(struct phb *phb, uint8_t bus, uint8_t max_bus,
+ struct list_head *list, struct pci_device *parent,
+ bool scan_downstream)
+{
+ struct pci_device *pd = NULL, *rc = NULL;
+ uint8_t dev, fn, next_bus, max_sub;
+ uint32_t scan_map;
+
+ /* Decide what to scan */
+ scan_map = parent ? parent->scan_map : phb->scan_map;
+
+ /* Do scan */
+ for (dev = 0; dev < 32; dev++) {
+ if (!(scan_map & (1ul << dev)))
+ continue;
+
+ /* Scan the device */
+ pd = pci_scan_one(phb, parent, (bus << 8) | (dev << 3));
+ pci_check_clear_freeze(phb);
+ if (!pd)
+ continue;
+
+ /* Record RC when its downstream link is down */
+ if (!scan_downstream && dev == 0 && !rc)
+ rc = pd;
+
+ /* XXX Handle ARI */
+ if (!pd->is_multifunction)
+ continue;
+ for (fn = 1; fn < 8; fn++) {
+ pd = pci_scan_one(phb, parent,
+ ((uint16_t)bus << 8) | (dev << 3) | fn);
+ pci_check_clear_freeze(phb);
+ }
+ }
+
+ /* Reserve all possible buses if RC's downstream link is down
+ * if PCI hotplug is supported.
+ */
+ if (rc && rc->slot && rc->slot->pluggable) {
+ next_bus = bus + 1;
+ rc->secondary_bus = next_bus;
+ rc->subordinate_bus = max_bus;
+ pci_cfg_write8(phb, rc->bdfn, PCI_CFG_SECONDARY_BUS,
+ rc->secondary_bus);
+ pci_cfg_write8(phb, rc->bdfn, PCI_CFG_SUBORDINATE_BUS,
+ rc->subordinate_bus);
+ }
+
+ /* set the power limit for any downstream slots while we're here */
+ list_for_each(list, pd, link) {
+ if (pd->is_bridge)
+ pci_set_power_limit(pd);
+ }
+
+ /*
+ * We only scan downstream if instructed to do so by the
+ * caller. Typically we avoid the scan when we know the
+ * link is down already, which happens for the top level
+ * root complex, and avoids a long secondary timeout
+ */
+ if (!scan_downstream) {
+ list_for_each(list, pd, link)
+ pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_OFF);
+
+ return bus;
+ }
+
+ next_bus = bus + 1;
+ max_sub = bus;
+
+ /* Scan down bridges */
+ list_for_each(list, pd, link) {
+ bool do_scan;
+
+ if (!pd->is_bridge)
+ continue;
+
+ /* Configure the bridge with the returned values */
+ if (next_bus <= bus) {
+ PCIERR(phb, pd->bdfn, "Out of bus numbers !\n");
+ max_bus = next_bus = 0; /* Failure case */
+ }
+
+ pd->secondary_bus = next_bus;
+ pd->subordinate_bus = max_bus;
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, next_bus);
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_bus);
+ if (!next_bus)
+ break;
+
+ PCIDBG(phb, pd->bdfn, "Bus %02x..%02x scanning...\n",
+ next_bus, max_bus);
+
+ /* Clear up bridge resources */
+ pci_cleanup_bridge(phb, pd);
+
+ /* Configure the bridge. This will enable power to the slot
+ * if it's currently disabled, lift reset, etc...
+ *
+ * Return false if we know there's nothing behind the bridge
+ */
+ do_scan = pci_enable_bridge(phb, pd);
+
+ /* Perform recursive scan */
+ if (do_scan) {
+ max_sub = pci_scan_bus(phb, next_bus, max_bus,
+ &pd->children, pd, true);
+ } else {
+ /* Empty bridge. We leave room for hotplug
+ * slots if the downstream port is pluggable.
+ */
+ if (pd->slot && !pd->slot->pluggable)
+ max_sub = next_bus;
+ else {
+ max_sub = next_bus + 4;
+ if (max_sub > max_bus)
+ max_sub = max_bus;
+ }
+ }
+
+ pd->subordinate_bus = max_sub;
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_sub);
+ next_bus = max_sub + 1;
+
+ /* power off the slot if there's nothing below it */
+ if (list_empty(&pd->children))
+ pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_OFF);
+ }
+
+ return max_sub;
+}
+
+static int pci_get_mps(struct phb *phb,
+ struct pci_device *pd, void *userdata)
+{
+ uint32_t *mps = (uint32_t *)userdata;
+
+ /* Only check PCI device that had MPS capacity */
+ if (phb && pd && pd->mps && *mps > pd->mps)
+ *mps = pd->mps;
+
+ return 0;
+}
+
+static int pci_configure_mps(struct phb *phb,
+ struct pci_device *pd,
+ void *userdata __unused)
+{
+ uint32_t ecap, aercap, mps;
+ uint16_t val;
+
+ assert(phb);
+ assert(pd);
+
+ /* If the MPS isn't acceptable one, bail immediately */
+ mps = phb->mps;
+ if (mps < 128 || mps > 4096)
+ return 1;
+
+ /* Retrieve PCIe and AER capability */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ aercap = pci_cap(pd, PCIECAP_ID_AER, true);
+
+ /* PCIe device always has MPS capacity */
+ if (pd->mps) {
+ mps = ilog2(mps) - 7;
+
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, &val);
+ val = SETFIELD(PCICAP_EXP_DEVCTL_MPS, val, mps);
+ pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, val);
+ }
+
+ /* Changing MPS on upstream PCI bridge might cause some error
+ * bits in PCIe and AER capability. To clear them to avoid
+ * confusion.
+ */
+ if (aercap) {
+ pci_cfg_write32(phb, pd->bdfn, aercap + PCIECAP_AER_UE_STATUS,
+ 0xffffffff);
+ pci_cfg_write32(phb, pd->bdfn, aercap + PCIECAP_AER_CE_STATUS,
+ 0xffffffff);
+ }
+ if (ecap)
+ pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVSTAT, 0xf);
+
+ return 0;
+}
+
+static void pci_disable_completion_timeout(struct phb *phb, struct pci_device *pd)
+{
+ uint32_t ecap, val;
+ uint16_t pcie_cap;
+
+ /* PCIE capability required */
+ if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
+ return;
+
+ /* Check PCIe capability version */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn,
+ ecap + PCICAP_EXP_CAPABILITY_REG, &pcie_cap);
+ if ((pcie_cap & PCICAP_EXP_CAP_VERSION) <= 1)
+ return;
+
+ /* Check if it has capability to disable completion timeout */
+ pci_cfg_read32(phb, pd->bdfn, ecap + PCIECAP_EXP_DCAP2, &val);
+ if (!(val & PCICAP_EXP_DCAP2_CMPTOUT_DIS))
+ return;
+
+ /* Disable completion timeout without more check */
+ pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_DCTL2, &val);
+ val |= PCICAP_EXP_DCTL2_CMPTOUT_DIS;
+ pci_cfg_write32(phb, pd->bdfn, ecap + PCICAP_EXP_DCTL2, val);
+}
+
+void pci_device_init(struct phb *phb, struct pci_device *pd)
+{
+ pci_configure_mps(phb, pd, NULL);
+ pci_disable_completion_timeout(phb, pd);
+}
+
+static void pci_reset_phb(void *data)
+{
+ struct phb *phb = data;
+ struct pci_slot *slot = phb->slot;
+ int64_t rc;
+
+ if (!slot || !slot->ops.run_sm) {
+ PCINOTICE(phb, 0, "Cannot issue reset\n");
+ return;
+ }
+
+ pci_slot_add_flags(slot, PCI_SLOT_FLAG_BOOTUP);
+ rc = slot->ops.run_sm(slot);
+ while (rc > 0) {
+ PCITRACE(phb, 0, "Waiting %ld ms\n", tb_to_msecs(rc));
+ time_wait(rc);
+ rc = slot->ops.run_sm(slot);
+ }
+ pci_slot_remove_flags(slot, PCI_SLOT_FLAG_BOOTUP);
+ if (rc < 0)
+ PCIDBG(phb, 0, "Error %lld resetting\n", rc);
+}
+
+static void pci_scan_phb(void *data)
+{
+ struct phb *phb = data;
+ struct pci_slot *slot = phb->slot;
+ uint8_t link;
+ uint32_t mps = 0xffffffff;
+ int64_t rc;
+
+ if (!slot || !slot->ops.get_link_state) {
+ PCIERR(phb, 0, "Cannot query link status\n");
+ link = 0;
+ } else {
+ rc = slot->ops.get_link_state(slot, &link);
+ if (rc != OPAL_SUCCESS) {
+ PCIERR(phb, 0, "Error %lld querying link status\n",
+ rc);
+ link = 0;
+ }
+ }
+
+ if (!link)
+ PCIDBG(phb, 0, "Link down\n");
+ else
+ PCIDBG(phb, 0, "Link up at x%d width\n", link);
+
+ /* Scan root port and downstream ports if applicable */
+ PCIDBG(phb, 0, "Scanning (upstream%s)...\n",
+ link ? "+downsteam" : " only");
+ pci_scan_bus(phb, 0, 0xff, &phb->devices, NULL, link);
+
+ /* Configure MPS (Max Payload Size) for PCIe domain */
+ pci_walk_dev(phb, NULL, pci_get_mps, &mps);
+ phb->mps = mps;
+ pci_walk_dev(phb, NULL, pci_configure_mps, NULL);
+}
+
+int64_t pci_register_phb(struct phb *phb, int opal_id)
+{
+ /* The user didn't specify an opal_id, allocate one */
+ if (opal_id == OPAL_DYNAMIC_PHB_ID) {
+ /* This is called at init time in non-concurrent way, so no lock needed */
+ for (opal_id = 0; opal_id < ARRAY_SIZE(phbs); opal_id++)
+ if (!phbs[opal_id])
+ break;
+ if (opal_id >= ARRAY_SIZE(phbs)) {
+ prerror("PHB: Failed to find a free ID slot\n");
+ return OPAL_RESOURCE;
+ }
+ } else {
+ if (opal_id >= ARRAY_SIZE(phbs)) {
+ prerror("PHB: ID %x out of range !\n", opal_id);
+ return OPAL_PARAMETER;
+ }
+ /* The user did specify an opal_id, check it's free */
+ if (phbs[opal_id]) {
+ prerror("PHB: Duplicate registration of ID %x\n", opal_id);
+ return OPAL_PARAMETER;
+ }
+ }
+
+ phbs[opal_id] = phb;
+ phb->opal_id = opal_id;
+ if (opal_id > last_phb_id)
+ last_phb_id = opal_id;
+ dt_add_property_cells(phb->dt_node, "ibm,opal-phbid", 0, phb->opal_id);
+ PCIDBG(phb, 0, "PCI: Registered PHB\n");
+
+ init_lock(&phb->lock);
+ list_head_init(&phb->devices);
+
+ phb->filter_map = zalloc(BITMAP_BYTES(0x10000));
+ assert(phb->filter_map);
+
+ return OPAL_SUCCESS;
+}
+
+int64_t pci_unregister_phb(struct phb *phb)
+{
+ /* XXX We want some kind of RCU or RWlock to make things
+ * like that happen while no OPAL callback is in progress,
+ * that way we avoid taking a lock in each of them.
+ *
+ * Right now we don't unregister so we are fine
+ */
+ phbs[phb->opal_id] = phb;
+
+ return OPAL_SUCCESS;
+}
+
+struct phb *pci_get_phb(uint64_t phb_id)
+{
+ if (phb_id >= ARRAY_SIZE(phbs))
+ return NULL;
+
+ /* XXX See comment in pci_unregister_phb() about locking etc... */
+ return phbs[phb_id];
+}
+
+static const char *pci_class_name(uint32_t class_code)
+{
+ uint8_t class = class_code >> 16;
+ uint8_t sub = (class_code >> 8) & 0xff;
+ uint8_t pif = class_code & 0xff;
+
+ switch(class) {
+ case 0x00:
+ switch(sub) {
+ case 0x00: return "device";
+ case 0x01: return "vga";
+ }
+ break;
+ case 0x01:
+ switch(sub) {
+ case 0x00: return "scsi";
+ case 0x01: return "ide";
+ case 0x02: return "fdc";
+ case 0x03: return "ipi";
+ case 0x04: return "raid";
+ case 0x05: return "ata";
+ case 0x06: return "sata";
+ case 0x07: return "sas";
+ default: return "mass-storage";
+ }
+ case 0x02:
+ switch(sub) {
+ case 0x00: return "ethernet";
+ case 0x01: return "token-ring";
+ case 0x02: return "fddi";
+ case 0x03: return "atm";
+ case 0x04: return "isdn";
+ case 0x05: return "worldfip";
+ case 0x06: return "picmg";
+ default: return "network";
+ }
+ case 0x03:
+ switch(sub) {
+ case 0x00: return "vga";
+ case 0x01: return "xga";
+ case 0x02: return "3d-controller";
+ default: return "display";
+ }
+ case 0x04:
+ switch(sub) {
+ case 0x00: return "video";
+ case 0x01: return "sound";
+ case 0x02: return "telephony";
+ default: return "multimedia-device";
+ }
+ case 0x05:
+ switch(sub) {
+ case 0x00: return "memory";
+ case 0x01: return "flash";
+ default: return "memory-controller";
+ }
+ case 0x06:
+ switch(sub) {
+ case 0x00: return "host";
+ case 0x01: return "isa";
+ case 0x02: return "eisa";
+ case 0x03: return "mca";
+ case 0x04: return "pci";
+ case 0x05: return "pcmcia";
+ case 0x06: return "nubus";
+ case 0x07: return "cardbus";
+ case 0x08: return "raceway";
+ case 0x09: return "semi-transparent-pci";
+ case 0x0a: return "infiniband";
+ default: return "unknown-bridge";
+ }
+ case 0x07:
+ switch(sub) {
+ case 0x00:
+ switch(pif) {
+ case 0x01: return "16450-serial";
+ case 0x02: return "16550-serial";
+ case 0x03: return "16650-serial";
+ case 0x04: return "16750-serial";
+ case 0x05: return "16850-serial";
+ case 0x06: return "16950-serial";
+ default: return "serial";
+ }
+ case 0x01:
+ switch(pif) {
+ case 0x01: return "bi-directional-parallel";
+ case 0x02: return "ecp-1.x-parallel";
+ case 0x03: return "ieee1284-controller";
+ case 0xfe: return "ieee1284-device";
+ default: return "parallel";
+ }
+ case 0x02: return "multiport-serial";
+ case 0x03:
+ switch(pif) {
+ case 0x01: return "16450-modem";
+ case 0x02: return "16550-modem";
+ case 0x03: return "16650-modem";
+ case 0x04: return "16750-modem";
+ default: return "modem";
+ }
+ case 0x04: return "gpib";
+ case 0x05: return "smart-card";
+ default: return "communication-controller";
+ }
+ case 0x08:
+ switch(sub) {
+ case 0x00:
+ switch(pif) {
+ case 0x01: return "isa-pic";
+ case 0x02: return "eisa-pic";
+ case 0x10: return "io-apic";
+ case 0x20: return "iox-apic";
+ default: return "interrupt-controller";
+ }
+ case 0x01:
+ switch(pif) {
+ case 0x01: return "isa-dma";
+ case 0x02: return "eisa-dma";
+ default: return "dma-controller";
+ }
+ case 0x02:
+ switch(pif) {
+ case 0x01: return "isa-system-timer";
+ case 0x02: return "eisa-system-timer";
+ default: return "timer";
+ }
+ case 0x03:
+ switch(pif) {
+ case 0x01: return "isa-rtc";
+ default: return "rtc";
+ }
+ case 0x04: return "hotplug-controller";
+ case 0x05: return "sd-host-controller";
+ default: return "system-peripheral";
+ }
+ case 0x09:
+ switch(sub) {
+ case 0x00: return "keyboard";
+ case 0x01: return "pen";
+ case 0x02: return "mouse";
+ case 0x03: return "scanner";
+ case 0x04: return "gameport";
+ default: return "input-controller";
+ }
+ case 0x0a:
+ switch(sub) {
+ case 0x00: return "clock";
+ default: return "docking-station";
+ }
+ case 0x0b:
+ switch(sub) {
+ case 0x00: return "386";
+ case 0x01: return "486";
+ case 0x02: return "pentium";
+ case 0x10: return "alpha";
+ case 0x20: return "powerpc";
+ case 0x30: return "mips";
+ case 0x40: return "co-processor";
+ default: return "cpu";
+ }
+ case 0x0c:
+ switch(sub) {
+ case 0x00: return "firewire";
+ case 0x01: return "access-bus";
+ case 0x02: return "ssa";
+ case 0x03:
+ switch(pif) {
+ case 0x00: return "usb-uhci";
+ case 0x10: return "usb-ohci";
+ case 0x20: return "usb-ehci";
+ case 0x30: return "usb-xhci";
+ case 0xfe: return "usb-device";
+ default: return "usb";
+ }
+ case 0x04: return "fibre-channel";
+ case 0x05: return "smb";
+ case 0x06: return "infiniband";
+ case 0x07:
+ switch(pif) {
+ case 0x00: return "impi-smic";
+ case 0x01: return "impi-kbrd";
+ case 0x02: return "impi-bltr";
+ default: return "impi";
+ }
+ case 0x08: return "secos";
+ case 0x09: return "canbus";
+ default: return "serial-bus";
+ }
+ case 0x0d:
+ switch(sub) {
+ case 0x00: return "irda";
+ case 0x01: return "consumer-ir";
+ case 0x10: return "rf-controller";
+ case 0x11: return "bluetooth";
+ case 0x12: return "broadband";
+ case 0x20: return "enet-802.11a";
+ case 0x21: return "enet-802.11b";
+ default: return "wireless-controller";
+ }
+ case 0x0e: return "intelligent-controller";
+ case 0x0f:
+ switch(sub) {
+ case 0x01: return "satellite-tv";
+ case 0x02: return "satellite-audio";
+ case 0x03: return "satellite-voice";
+ case 0x04: return "satellite-data";
+ default: return "satellite-device";
+ }
+ case 0x10:
+ switch(sub) {
+ case 0x00: return "network-encryption";
+ case 0x01: return "entertainment-encryption";
+ default: return "encryption";
+ }
+ case 0x011:
+ switch(sub) {
+ case 0x00: return "dpio";
+ case 0x01: return "counter";
+ case 0x10: return "measurement";
+ case 0x20: return "management-card";
+ default: return "data-processing";
+ }
+ }
+ return "device";
+}
+
+void pci_std_swizzle_irq_map(struct dt_node *np,
+ struct pci_device *pd,
+ struct pci_lsi_state *lstate,
+ uint8_t swizzle)
+{
+ __be32 *p, *map;
+ int dev, irq, esize, edevcount;
+ size_t map_size;
+
+ /* Some emulated setups don't use standard interrupts
+ * representation
+ */
+ if (lstate->int_size == 0)
+ return;
+
+ /* Calculate the size of a map entry:
+ *
+ * 3 cells : PCI Address
+ * 1 cell : PCI IRQ
+ * 1 cell : PIC phandle
+ * n cells : PIC irq (n = lstate->int_size)
+ *
+ * Assumption: PIC address is 0-size
+ */
+ esize = 3 + 1 + 1 + lstate->int_size;
+
+ /* Number of map "device" entries
+ *
+ * A PCI Express root or downstream port needs only one
+ * entry for device 0. Anything else will get a full map
+ * for all possible 32 child device numbers
+ *
+ * If we have been passed a host bridge (pd == NULL) we also
+ * do a simple per-pin map
+ */
+ if (!pd || (pd->dev_type == PCIE_TYPE_ROOT_PORT ||
+ pd->dev_type == PCIE_TYPE_SWITCH_DNPORT)) {
+ edevcount = 1;
+ dt_add_property_cells(np, "interrupt-map-mask", 0, 0, 0, 7);
+ } else {
+ edevcount = 32;
+ dt_add_property_cells(np, "interrupt-map-mask",
+ 0xf800, 0, 0, 7);
+ }
+ map_size = esize * edevcount * 4 * sizeof(u32);
+ map = p = zalloc(map_size);
+ if (!map) {
+ prerror("Failed to allocate interrupt-map-mask !\n");
+ return;
+ }
+
+ for (dev = 0; dev < edevcount; dev++) {
+ for (irq = 0; irq < 4; irq++) {
+ /* Calculate pin */
+ size_t i;
+ uint32_t new_irq = (irq + dev + swizzle) % 4;
+
+ /* PCI address portion */
+ *(p++) = cpu_to_be32(dev << (8 + 3));
+ *(p++) = 0;
+ *(p++) = 0;
+
+ /* PCI interrupt portion */
+ *(p++) = cpu_to_be32(irq + 1);
+
+ /* Parent phandle */
+ *(p++) = cpu_to_be32(lstate->int_parent[new_irq]);
+
+ /* Parent desc */
+ for (i = 0; i < lstate->int_size; i++)
+ *(p++) = cpu_to_be32(lstate->int_val[new_irq][i]);
+ }
+ }
+
+ dt_add_property(np, "interrupt-map", map, map_size);
+ free(map);
+}
+
+static void pci_add_loc_code(struct dt_node *np)
+{
+ struct dt_node *p;
+ const char *lcode = NULL;
+
+ for (p = np->parent; p; p = p->parent) {
+ /* prefer slot-label by default */
+ lcode = dt_prop_get_def(p, "ibm,slot-label", NULL);
+ if (lcode)
+ break;
+
+ /* otherwise use the fully qualified location code */
+ lcode = dt_prop_get_def(p, "ibm,slot-location-code", NULL);
+ if (lcode)
+ break;
+ }
+
+ if (!lcode)
+ lcode = dt_prop_get_def(np, "ibm,slot-location-code", NULL);
+
+ if (!lcode) {
+ /* Fall back to finding a ibm,loc-code */
+ for (p = np->parent; p; p = p->parent) {
+ lcode = dt_prop_get_def(p, "ibm,loc-code", NULL);
+ if (lcode)
+ break;
+ }
+ }
+
+ if (!lcode)
+ return;
+
+ dt_add_property_string(np, "ibm,loc-code", lcode);
+}
+
+static void pci_print_summary_line(struct phb *phb, struct pci_device *pd,
+ struct dt_node *np, u32 rev_class,
+ const char *cname)
+{
+ const char *label, *dtype, *s;
+#define MAX_SLOTSTR 80
+ char slotstr[MAX_SLOTSTR + 1] = { 0, };
+
+ /* If it's a slot, it has a slot-label */
+ label = dt_prop_get_def(np, "ibm,slot-label", NULL);
+ if (label) {
+ u32 lanes = dt_prop_get_u32_def(np, "ibm,slot-wired-lanes", 0);
+ static const char *lanestrs[] = {
+ "", " x1", " x2", " x4", " x8", "x16", "x32", "32b", "64b"
+ };
+ const char *lstr = lanes > PCI_SLOT_WIRED_LANES_PCIX_64 ? "" : lanestrs[lanes];
+ snprintf(slotstr, MAX_SLOTSTR, "SLOT=%3s %s", label, lstr);
+ /* XXX Add more slot info */
+ } else {
+ /*
+ * No label, ignore downstream switch legs and root complex,
+ * Those would essentially be non-populated
+ */
+ if (pd->dev_type != PCIE_TYPE_ROOT_PORT &&
+ pd->dev_type != PCIE_TYPE_SWITCH_DNPORT) {
+ /* It's a mere device, get loc code */
+ s = dt_prop_get_def(np, "ibm,loc-code", NULL);
+ if (s)
+ snprintf(slotstr, MAX_SLOTSTR, "LOC_CODE=%s", s);
+ }
+ }
+
+ if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) {
+ static const char *pcie_types[] = {
+ "EP ", "LGCY", "????", "????", "ROOT", "SWUP", "SWDN",
+ "ETOX", "XTOE", "RINT", "EVTC" };
+ if (pd->dev_type >= ARRAY_SIZE(pcie_types))
+ dtype = "????";
+ else
+ dtype = pcie_types[pd->dev_type];
+ } else
+ dtype = pd->is_bridge ? "PCIB" : "PCID";
+
+ if (pd->is_bridge)
+ PCINOTICE(phb, pd->bdfn,
+ "[%s] %04x %04x R:%02x C:%06x B:%02x..%02x %s\n",
+ dtype, PCI_VENDOR_ID(pd->vdid),
+ PCI_DEVICE_ID(pd->vdid),
+ rev_class & 0xff, rev_class >> 8, pd->secondary_bus,
+ pd->subordinate_bus, slotstr);
+ else
+ PCINOTICE(phb, pd->bdfn,
+ "[%s] %04x %04x R:%02x C:%06x (%14s) %s\n",
+ dtype, PCI_VENDOR_ID(pd->vdid),
+ PCI_DEVICE_ID(pd->vdid),
+ rev_class & 0xff, rev_class >> 8, cname, slotstr);
+}
+
+static void __noinline pci_add_one_device_node(struct phb *phb,
+ struct pci_device *pd,
+ struct dt_node *parent_node,
+ struct pci_lsi_state *lstate,
+ uint8_t swizzle)
+{
+ struct dt_node *np;
+ const char *cname;
+#define MAX_NAME 256
+ char name[MAX_NAME];
+ char compat[MAX_NAME];
+ uint32_t rev_class;
+ uint8_t intpin;
+ bool is_pcie;
+
+ pci_cfg_read32(phb, pd->bdfn, PCI_CFG_REV_ID, &rev_class);
+ pci_cfg_read8(phb, pd->bdfn, PCI_CFG_INT_PIN, &intpin);
+ is_pcie = pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+
+ /*
+ * Some IBM PHBs (p7ioc?) have an invalid PCI class code. Linux
+ * uses prefers to read the class code from the DT rather than
+ * re-reading config space we can hack around it here.
+ */
+ if (is_pcie && pd->dev_type == PCIE_TYPE_ROOT_PORT)
+ rev_class = (rev_class & 0xff) | 0x6040000;
+ cname = pci_class_name(rev_class >> 8);
+
+ if (PCI_FUNC(pd->bdfn))
+ snprintf(name, MAX_NAME - 1, "%s@%x,%x",
+ cname, PCI_DEV(pd->bdfn), PCI_FUNC(pd->bdfn));
+ else
+ snprintf(name, MAX_NAME - 1, "%s@%x",
+ cname, PCI_DEV(pd->bdfn));
+ pd->dn = np = dt_new(parent_node, name);
+
+ /*
+ * NB: ibm,pci-config-space-type is the PAPR way of indicating the
+ * device has a 4KB config space. It's got nothing to do with the
+ * standard Type 0/1 config spaces defined by PCI.
+ */
+ if (is_pcie || phb->phb_type == phb_type_npu_v2_opencapi) {
+ snprintf(compat, MAX_NAME, "pciex%x,%x",
+ PCI_VENDOR_ID(pd->vdid), PCI_DEVICE_ID(pd->vdid));
+ dt_add_property_cells(np, "ibm,pci-config-space-type", 1);
+ } else {
+ snprintf(compat, MAX_NAME, "pci%x,%x",
+ PCI_VENDOR_ID(pd->vdid), PCI_DEVICE_ID(pd->vdid));
+ dt_add_property_cells(np, "ibm,pci-config-space-type", 0);
+ }
+ dt_add_property_cells(np, "class-code", rev_class >> 8);
+ dt_add_property_cells(np, "revision-id", rev_class & 0xff);
+ dt_add_property_cells(np, "vendor-id", PCI_VENDOR_ID(pd->vdid));
+ dt_add_property_cells(np, "device-id", PCI_DEVICE_ID(pd->vdid));
+ if (intpin)
+ dt_add_property_cells(np, "interrupts", intpin);
+
+ pci_handle_quirk(phb, pd);
+
+ /* XXX FIXME: Add a few missing ones such as
+ *
+ * - devsel-speed (!express)
+ * - max-latency
+ * - min-grant
+ * - subsystem-id
+ * - subsystem-vendor-id
+ * - ...
+ */
+
+ /* Add slot properties if needed and iff this is a bridge */
+ if (pd->slot)
+ pci_slot_add_dt_properties(pd->slot, np);
+
+ /*
+ * Use the phb base location code for root ports if the platform
+ * doesn't provide one via slot->add_properties() operation.
+ */
+ if (pd->dev_type == PCIE_TYPE_ROOT_PORT && phb->base_loc_code &&
+ !dt_has_node_property(np, "ibm,slot-location-code", NULL))
+ dt_add_property_string(np, "ibm,slot-location-code",
+ phb->base_loc_code);
+
+ /* Make up location code */
+ if (platform.pci_add_loc_code)
+ platform.pci_add_loc_code(np, pd);
+ else
+ pci_add_loc_code(np);
+
+ /* XXX FIXME: We don't look for BARs, we only put the config space
+ * entry in the "reg" property. That's enough for Linux and we might
+ * even want to make this legit in future ePAPR
+ */
+ dt_add_property_cells(np, "reg", pd->bdfn << 8, 0, 0, 0, 0);
+
+ /* Print summary info about the device */
+ pci_print_summary_line(phb, pd, np, rev_class, cname);
+ if (!pd->is_bridge)
+ return;
+
+ dt_add_property_cells(np, "#address-cells", 3);
+ dt_add_property_cells(np, "#size-cells", 2);
+ dt_add_property_cells(np, "#interrupt-cells", 1);
+
+ /* We want "device_type" for bridges */
+ if (is_pcie)
+ dt_add_property_string(np, "device_type", "pciex");
+ else
+ dt_add_property_string(np, "device_type", "pci");
+
+ /* Update the current interrupt swizzling level based on our own
+ * device number
+ */
+ swizzle = (swizzle + PCI_DEV(pd->bdfn)) & 3;
+
+ /* We generate a standard-swizzling interrupt map. This is pretty
+ * big, we *could* try to be smarter for things that aren't hotplug
+ * slots at least and only populate those entries for which there's
+ * an actual children (especially on PCI Express), but for now that
+ * will do
+ */
+ pci_std_swizzle_irq_map(np, pd, lstate, swizzle);
+
+ /* Parts of the OF address translation in the kernel will fail to
+ * correctly translate a PCI address if translating a 1:1 mapping
+ * (ie. an empty ranges property).
+ * Instead add a ranges property that explicitly translates 1:1.
+ */
+ dt_add_property_cells(np, "ranges",
+ /* 64-bit direct mapping. We know the bridges
+ * don't cover the entire address space so
+ * use 0xf00... as a good compromise. */
+ 0x02000000, 0x0, 0x0,
+ 0x02000000, 0x0, 0x0,
+ 0xf0000000, 0x0);
+}
+
+void __noinline pci_add_device_nodes(struct phb *phb,
+ struct list_head *list,
+ struct dt_node *parent_node,
+ struct pci_lsi_state *lstate,
+ uint8_t swizzle)
+{
+ struct pci_device *pd;
+
+ /* Add all child devices */
+ list_for_each(list, pd, link) {
+ pci_add_one_device_node(phb, pd, parent_node,
+ lstate, swizzle);
+ if (list_empty(&pd->children))
+ continue;
+
+ pci_add_device_nodes(phb, &pd->children,
+ pd->dn, lstate, swizzle);
+ }
+}
+
+static void pci_do_jobs(void (*fn)(void *))
+{
+ struct cpu_job **jobs;
+ int i;
+
+ jobs = zalloc(sizeof(struct cpu_job *) * ARRAY_SIZE(phbs));
+ assert(jobs);
+ for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+ if (!phbs[i]) {
+ jobs[i] = NULL;
+ continue;
+ }
+
+ jobs[i] = __cpu_queue_job(NULL, phbs[i]->dt_node->name,
+ fn, phbs[i], false);
+ assert(jobs[i]);
+
+ }
+
+ /* If no secondary CPUs, do everything sync */
+ cpu_process_local_jobs();
+
+ /* Wait until all tasks are done */
+ for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+ if (!jobs[i])
+ continue;
+
+ cpu_wait_job(jobs[i], true);
+ }
+ free(jobs);
+}
+
+static void __pci_init_slots(void)
+{
+ unsigned int i;
+
+ /* Some PHBs may need that long to debounce the presence detect
+ * after HW initialization.
+ */
+ for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+ if (phbs[i]) {
+ time_wait_ms(20);
+ break;
+ }
+ }
+
+ if (platform.pre_pci_fixup)
+ platform.pre_pci_fixup();
+
+ prlog(PR_NOTICE, "PCI: Resetting PHBs and training links...\n");
+ pci_do_jobs(pci_reset_phb);
+
+ prlog(PR_NOTICE, "PCI: Probing slots...\n");
+ pci_do_jobs(pci_scan_phb);
+
+ if (platform.pci_probe_complete)
+ platform.pci_probe_complete();
+
+ prlog(PR_NOTICE, "PCI Summary:\n");
+
+ for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+ if (!phbs[i])
+ continue;
+
+ pci_add_device_nodes(phbs[i], &phbs[i]->devices,
+ phbs[i]->dt_node, &phbs[i]->lstate, 0);
+ }
+
+ /* PHB final fixup */
+ for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+ if (!phbs[i] || !phbs[i]->ops || !phbs[i]->ops->phb_final_fixup)
+ continue;
+
+ phbs[i]->ops->phb_final_fixup(phbs[i]);
+ }
+}
+
+static void __pci_reset(struct list_head *list)
+{
+ struct pci_device *pd;
+ struct pci_cfg_reg_filter *pcrf;
+ int i;
+
+ while ((pd = list_pop(list, struct pci_device, link)) != NULL) {
+ __pci_reset(&pd->children);
+ dt_free(pd->dn);
+ free(pd->slot);
+ while((pcrf = list_pop(&pd->pcrf, struct pci_cfg_reg_filter, link)) != NULL) {
+ free(pcrf);
+ }
+ for(i=0; i < 64; i++)
+ if (pd->cap[i].free_func)
+ pd->cap[i].free_func(pd->cap[i].data);
+ free(pd);
+ }
+}
+
+int64_t pci_reset(void)
+{
+ unsigned int i;
+
+ prlog(PR_NOTICE, "PCI: Clearing all devices...\n");
+
+ for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+ struct phb *phb = phbs[i];
+ if (!phb)
+ continue;
+ __pci_reset(&phb->devices);
+
+ pci_slot_set_state(phb->slot, PCI_SLOT_STATE_CRESET_START);
+ }
+
+ /* Do init and discovery of PCI slots in parallel */
+ __pci_init_slots();
+
+ return 0;
+}
+
+void pci_init_slots(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(phbs); i++) {
+ struct phb *phb = phbs[i];
+ if (!phb)
+ continue;
+ pci_slot_set_state(phb->slot, PCI_SLOT_STATE_FRESET_POWER_OFF);
+ }
+ __pci_init_slots();
+}
+
+/*
+ * Complete iteration on current level before switching to
+ * child level, which is the proper order for restoring
+ * PCI bus range on bridges.
+ */
+static struct pci_device *__pci_walk_dev(struct phb *phb,
+ struct list_head *l,
+ int (*cb)(struct phb *,
+ struct pci_device *,
+ void *),
+ void *userdata)
+{
+ struct pci_device *pd, *child;
+
+ if (list_empty(l))
+ return NULL;
+
+ list_for_each(l, pd, link) {
+ if (cb && cb(phb, pd, userdata))
+ return pd;
+ }
+
+ list_for_each(l, pd, link) {
+ child = __pci_walk_dev(phb, &pd->children, cb, userdata);
+ if (child)
+ return child;
+ }
+
+ return NULL;
+}
+
+struct pci_device *pci_walk_dev(struct phb *phb,
+ struct pci_device *pd,
+ int (*cb)(struct phb *,
+ struct pci_device *,
+ void *),
+ void *userdata)
+{
+ if (pd)
+ return __pci_walk_dev(phb, &pd->children, cb, userdata);
+
+ return __pci_walk_dev(phb, &phb->devices, cb, userdata);
+}
+
+static int __pci_find_dev(struct phb *phb,
+ struct pci_device *pd, void *userdata)
+{
+ uint16_t bdfn = *((uint16_t *)userdata);
+
+ if (!phb || !pd)
+ return 0;
+
+ if (pd->bdfn == bdfn)
+ return 1;
+
+ return 0;
+}
+
+struct pci_device *pci_find_dev(struct phb *phb, uint16_t bdfn)
+{
+ return pci_walk_dev(phb, NULL, __pci_find_dev, &bdfn);
+}
+
+static int __pci_restore_bridge_buses(struct phb *phb,
+ struct pci_device *pd,
+ void *data __unused)
+{
+ uint32_t vdid;
+
+ /* If the device is behind a switch, wait for the switch */
+ if (!pd->is_vf && !(pd->bdfn & 7) && pd->parent != NULL &&
+ pd->parent->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
+ if (!pci_bridge_wait_link(phb, pd->parent, true)) {
+ PCIERR(phb, pd->bdfn, "Timeout waiting for switch\n");
+ return -1;
+ }
+ }
+
+ /* Wait for config space to stop returning CRS */
+ if (!pci_wait_crs(phb, pd->bdfn, &vdid))
+ return -1;
+
+ /* Make all devices below a bridge "re-capture" the bdfn */
+ pci_cfg_write32(phb, pd->bdfn, PCI_CFG_VENDOR_ID, vdid);
+
+ if (!pd->is_bridge)
+ return 0;
+
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_PRIMARY_BUS,
+ pd->primary_bus);
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS,
+ pd->secondary_bus);
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS,
+ pd->subordinate_bus);
+ return 0;
+}
+
+void pci_restore_bridge_buses(struct phb *phb, struct pci_device *pd)
+{
+ pci_walk_dev(phb, pd, __pci_restore_bridge_buses, NULL);
+}
+
+void pci_restore_slot_bus_configs(struct pci_slot *slot)
+{
+ /*
+ * We might lose the bus numbers during the reset operation
+ * and we need to restore them. Otherwise, some adapters (e.g.
+ * IPR) can't be probed properly by the kernel. We don't need
+ * to restore bus numbers for every kind of reset, however,
+ * it's not harmful to always restore the bus numbers, which
+ * simplifies the logic.
+ */
+ pci_restore_bridge_buses(slot->phb, slot->pd);
+ if (slot->phb->ops->device_init)
+ pci_walk_dev(slot->phb, slot->pd,
+ slot->phb->ops->device_init, NULL);
+}
+
+struct pci_cfg_reg_filter *pci_find_cfg_reg_filter(struct pci_device *pd,
+ uint32_t start, uint32_t len)
+{
+ struct pci_cfg_reg_filter *pcrf;
+
+ /* Check on the cached range, which contains holes */
+ if ((start + len) <= pd->pcrf_start ||
+ pd->pcrf_end <= start)
+ return NULL;
+
+ list_for_each(&pd->pcrf, pcrf, link) {
+ if (start >= pcrf->start &&
+ (start + len) <= (pcrf->start + pcrf->len))
+ return pcrf;
+ }
+
+ return NULL;
+}
+
+static bool pci_device_has_cfg_reg_filters(struct phb *phb, uint16_t bdfn)
+{
+ return bitmap_tst_bit(*phb->filter_map, bdfn);
+}
+
+int64_t pci_handle_cfg_filters(struct phb *phb, uint32_t bdfn,
+ uint32_t offset, uint32_t len,
+ uint32_t *data, bool write)
+{
+ struct pci_device *pd;
+ struct pci_cfg_reg_filter *pcrf;
+ uint32_t flags;
+
+ if (!pci_device_has_cfg_reg_filters(phb, bdfn))
+ return OPAL_PARTIAL;
+ pd = pci_find_dev(phb, bdfn);
+ pcrf = pd ? pci_find_cfg_reg_filter(pd, offset, len) : NULL;
+ if (!pcrf || !pcrf->func)
+ return OPAL_PARTIAL;
+
+ flags = write ? PCI_REG_FLAG_WRITE : PCI_REG_FLAG_READ;
+ if ((pcrf->flags & flags) != flags)
+ return OPAL_PARTIAL;
+
+ return pcrf->func(pd, pcrf, offset, len, data, write);
+}
+
+struct pci_cfg_reg_filter *pci_add_cfg_reg_filter(struct pci_device *pd,
+ uint32_t start, uint32_t len,
+ uint32_t flags,
+ pci_cfg_reg_func func)
+{
+ struct pci_cfg_reg_filter *pcrf;
+
+ pcrf = pci_find_cfg_reg_filter(pd, start, len);
+ if (pcrf)
+ return pcrf;
+
+ pcrf = zalloc(sizeof(*pcrf) + ((len + 0x4) & ~0x3));
+ if (!pcrf)
+ return NULL;
+
+ /* Don't validate the flags so that the private flags
+ * can be supported for debugging purpose.
+ */
+ pcrf->flags = flags;
+ pcrf->start = start;
+ pcrf->len = len;
+ pcrf->func = func;
+ pcrf->data = (uint8_t *)(pcrf + 1);
+
+ if (start < pd->pcrf_start)
+ pd->pcrf_start = start;
+ if (pd->pcrf_end < (start + len))
+ pd->pcrf_end = start + len;
+ list_add_tail(&pd->pcrf, &pcrf->link);
+ bitmap_set_bit(*pd->phb->filter_map, pd->bdfn);
+
+ return pcrf;
+}
diff --git a/roms/skiboot/core/pcie-slot.c b/roms/skiboot/core/pcie-slot.c
new file mode 100644
index 000000000..03326e58f
--- /dev/null
+++ b/roms/skiboot/core/pcie-slot.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * PCIe Slots
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <opal-msg.h>
+#include <pci-cfg.h>
+#include <pci.h>
+#include <pci-slot.h>
+
+/* Debugging options */
+#define PCIE_SLOT_PREFIX "PCIE-SLOT-%016llx "
+#define PCIE_SLOT_DBG(s, fmt, a...) \
+ prlog(PR_DEBUG, PCIE_SLOT_PREFIX fmt, (s)->id, ##a)
+
+static int64_t pcie_slot_get_presence_state(struct pci_slot *slot, uint8_t *val)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint32_t ecap;
+ uint16_t state;
+
+ /* The presence is always on if it's a switch upstream port */
+ if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
+ *val = OPAL_PCI_SLOT_PRESENT;
+ return OPAL_SUCCESS;
+ }
+
+ /*
+ * The presence is always on if a switch downstream port
+ * doesn't support slot capability according to PCIE spec.
+ */
+ if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT &&
+ !(slot->pcie_cap & PCICAP_EXP_CAP_SLOT)) {
+ *val = OPAL_PCI_SLOT_PRESENT;
+ return OPAL_SUCCESS;
+ }
+
+ /* Retrieve presence status */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTSTAT, &state);
+ if (state & PCICAP_EXP_SLOTSTAT_PDETECTST)
+ *val = OPAL_PCI_SLOT_PRESENT;
+ else
+ *val = OPAL_PCI_SLOT_EMPTY;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_get_link_state(struct pci_slot *slot,
+ uint8_t *val)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint32_t ecap;
+ int16_t state;
+
+ /*
+ * The link behind switch upstream port is always on
+ * since it doesn't have a valid link indicator.
+ */
+ if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
+ *val = 1;
+ return OPAL_SUCCESS;
+ }
+
+ /* Retrieve link width */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LSTAT, &state);
+ if (state & PCICAP_EXP_LSTAT_DLLL_ACT)
+ *val = ((state & PCICAP_EXP_LSTAT_WIDTH) >> 4);
+ else
+ *val = 0;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_get_power_state(struct pci_slot *slot __unused,
+ uint8_t *val)
+{
+ /* We should return the cached power state that is same to
+ * the PCI slot hotplug state (added/removed). Otherwise,
+ * the OS will see mismatched states, causing the adapter
+ * behind the slot can't be probed successfully on request
+ * of hot add. So we could run into the situation where the
+ * OS sees power-off but it's on in hardware.
+ */
+ *val = slot->power_state;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_get_attention_state(struct pci_slot *slot,
+ uint8_t *val)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint32_t ecap;
+ uint16_t state;
+
+ /* Attention is off if the capability is missing */
+ if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI)) {
+ *val = 0;
+ return OPAL_SUCCESS;
+ }
+
+ /* Retrieve attention state */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &state);
+ state = (state & PCICAP_EXP_SLOTCTL_ATTNI) >> 6;
+ switch (state) {
+ case PCIE_INDIC_ON:
+ *val = PCI_SLOT_ATTN_LED_ON;
+ break;
+ case PCIE_INDIC_BLINK:
+ *val = PCI_SLOT_ATTN_LED_BLINK;
+ break;
+ case PCIE_INDIC_OFF:
+ default:
+ *val = PCI_SLOT_ATTN_LED_OFF;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_get_latch_state(struct pci_slot *slot,
+ uint8_t *val)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint32_t ecap;
+ uint16_t state;
+
+ /* Latch is off if MRL sensor doesn't exist */
+ if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_MRLSENS)) {
+ *val = 0;
+ return OPAL_SUCCESS;
+ }
+
+ /* Retrieve MRL sensor state */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTSTAT, &state);
+ if (state & PCICAP_EXP_SLOTSTAT_MRLSENSST)
+ *val = 1;
+ else
+ *val = 0;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_set_attention_state(struct pci_slot *slot,
+ uint8_t val)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint32_t ecap;
+ uint16_t state;
+
+ /* Drop the request if functionality doesn't exist */
+ if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI))
+ return OPAL_SUCCESS;
+
+ /* Update with the requested state */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &state);
+ state &= ~PCICAP_EXP_SLOTCTL_ATTNI;
+ switch (val) {
+ case PCI_SLOT_ATTN_LED_ON:
+ state |= (PCIE_INDIC_ON << 6);
+ break;
+ case PCI_SLOT_ATTN_LED_BLINK:
+ state |= (PCIE_INDIC_BLINK << 6);
+ break;
+ case PCI_SLOT_ATTN_LED_OFF:
+ state |= (PCIE_INDIC_OFF << 6);
+ break;
+ default:
+ prlog(PR_ERR, PCIE_SLOT_PREFIX
+ "Invalid attention state (0x%x)\n", slot->id, val);
+ return OPAL_PARAMETER;
+ }
+
+ pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, state);
+ return OPAL_SUCCESS;
+}
+
+static int64_t pcie_slot_set_power_state_ext(struct pci_slot *slot, uint8_t val,
+ bool surprise_check)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint32_t ecap;
+ uint16_t state;
+
+ if (slot->power_state == val)
+ return OPAL_SUCCESS;
+
+ /* Update the power state and return immediately if the power
+ * control functionality isn't supported on the PCI slot.
+ */
+ if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL)) {
+ slot->power_state = val;
+ return OPAL_SUCCESS;
+ }
+
+ /*
+ * Suprise hotpluggable slots need to be handled with care since
+ * many systems do not implement the presence detect side-band
+ * signal. Instead, they rely on in-band presence to report the
+ * existence of a hotplugged card.
+ *
+ * This is problematic because:
+ * a) When PERST is asserted in-band presence doesn't work, and
+ * b) Switches assert PERST as a part of the "slot power down" sequence
+ *
+ * To work around the problem we leave the slot physically powered on
+ * and exit early here. This way when a new card is inserted, the switch
+ * will raise an interrupt due to the PresDet status changing.
+ */
+ if (surprise_check && slot->surprise_pluggable) {
+ slot->power_state = val;
+ if (val == PCI_SLOT_POWER_OFF)
+ return OPAL_SUCCESS;
+
+ /*
+ * Some systems have the slot power disabled by default
+ * so we always perform the power-on step. This is not
+ * *strictly* required, but it's probably a good idea.
+ */
+ }
+
+ pci_slot_set_state(slot, PCI_SLOT_STATE_SPOWER_START);
+ slot->power_state = val;
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &state);
+ state &= ~(PCICAP_EXP_SLOTCTL_PWRCTLR | PCICAP_EXP_SLOTCTL_PWRI);
+ switch (val) {
+ case PCI_SLOT_POWER_OFF:
+ state |= (PCICAP_EXP_SLOTCTL_PWRCTLR | (PCIE_INDIC_OFF << 8));
+ break;
+ case PCI_SLOT_POWER_ON:
+ state |= (PCIE_INDIC_ON << 8);
+ break;
+ default:
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ prlog(PR_ERR, PCIE_SLOT_PREFIX
+ "Invalid power state (0x%x)\n", slot->id, val);
+ return OPAL_PARAMETER;
+ }
+
+ pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, state);
+ pci_slot_set_state(slot, PCI_SLOT_STATE_SPOWER_DONE);
+
+ return OPAL_ASYNC_COMPLETION;
+}
+
+static int64_t pcie_slot_set_power_state(struct pci_slot *slot, uint8_t val)
+{
+ return pcie_slot_set_power_state_ext(slot, val, true);
+}
+
+static int64_t pcie_slot_sm_poll_link(struct pci_slot *slot)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint32_t ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ uint16_t val;
+ uint8_t presence = 0;
+
+ switch (slot->state) {
+ case PCI_SLOT_STATE_LINK_START_POLL:
+ PCIE_SLOT_DBG(slot, "LINK: Start polling\n");
+
+ /* Link is down for ever without devices attached */
+ if (slot->ops.get_presence_state)
+ slot->ops.get_presence_state(slot, &presence);
+ if (!presence) {
+ PCIE_SLOT_DBG(slot, "LINK: No adapter, end polling\n");
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ return OPAL_SUCCESS;
+ }
+
+ /* Enable the link without check */
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, &val);
+ val &= ~PCICAP_EXP_LCTL_LINK_DIS;
+ pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, val);
+
+ /*
+ * If the link change report isn't supported, we expect
+ * the link is up and stabilized after one second.
+ */
+ if (!(slot->link_cap & PCICAP_EXP_LCAP_DL_ACT_REP)) {
+ pci_slot_set_state(slot,
+ PCI_SLOT_STATE_LINK_DELAY_FINALIZED);
+ return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
+ }
+
+ /*
+ * Poll the link state if link state change report is
+ * supported on the link.
+ */
+ pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_POLLING);
+ slot->retries = 250;
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(20));
+ case PCI_SLOT_STATE_LINK_DELAY_FINALIZED:
+ PCIE_SLOT_DBG(slot, "LINK: No link report, end polling\n");
+ if (slot->ops.prepare_link_change)
+ slot->ops.prepare_link_change(slot, true);
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ return OPAL_SUCCESS;
+ case PCI_SLOT_STATE_LINK_POLLING:
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LSTAT, &val);
+ if (val & PCICAP_EXP_LSTAT_DLLL_ACT) {
+ PCIE_SLOT_DBG(slot, "LINK: Link is up, end polling\n");
+ if (slot->ops.prepare_link_change)
+ slot->ops.prepare_link_change(slot, true);
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ return OPAL_SUCCESS;
+ }
+
+ /* Check link state again until timeout */
+ if (slot->retries-- == 0) {
+ prlog(PR_ERR, PCIE_SLOT_PREFIX
+ "LINK: Timeout waiting for up (%04x)\n",
+ slot->id, val);
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ return OPAL_SUCCESS;
+ }
+
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(20));
+ default:
+ prlog(PR_ERR, PCIE_SLOT_PREFIX
+ "Link: Unexpected slot state %08x\n",
+ slot->id, slot->state);
+ }
+
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ return OPAL_HARDWARE;
+}
+
+static void pcie_slot_reset(struct pci_slot *slot, bool assert)
+{
+ struct phb *phb = slot->phb;
+ struct pci_device *pd = slot->pd;
+ uint16_t ctl;
+
+ pci_cfg_read16(phb, pd->bdfn, PCI_CFG_BRCTL, &ctl);
+ if (assert)
+ ctl |= PCI_CFG_BRCTL_SECONDARY_RESET;
+ else
+ ctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET;
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, ctl);
+}
+
+static int64_t pcie_slot_sm_hreset(struct pci_slot *slot)
+{
+ switch (slot->state) {
+ case PCI_SLOT_STATE_NORMAL:
+ PCIE_SLOT_DBG(slot, "HRESET: Starts\n");
+ if (slot->ops.prepare_link_change) {
+ PCIE_SLOT_DBG(slot, "HRESET: Prepare for link down\n");
+ slot->ops.prepare_link_change(slot, false);
+ }
+ /* fall through */
+ case PCI_SLOT_STATE_HRESET_START:
+ PCIE_SLOT_DBG(slot, "HRESET: Assert\n");
+ pcie_slot_reset(slot, true);
+ pci_slot_set_state(slot, PCI_SLOT_STATE_HRESET_HOLD);
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
+ case PCI_SLOT_STATE_HRESET_HOLD:
+ PCIE_SLOT_DBG(slot, "HRESET: Deassert\n");
+ pcie_slot_reset(slot, false);
+ pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL);
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(1800));
+ default:
+ PCIE_SLOT_DBG(slot, "HRESET: Unexpected slot state %08x\n",
+ slot->state);
+ }
+
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ return OPAL_HARDWARE;
+}
+
+/*
+ * Usually, individual platforms need to override the power
+ * management methods for fundamental reset, but the hot
+ * reset method is commonly shared.
+ */
+static int64_t pcie_slot_sm_freset(struct pci_slot *slot)
+{
+ uint8_t power_state = PCI_SLOT_POWER_ON;
+
+ switch (slot->state) {
+ case PCI_SLOT_STATE_NORMAL:
+ PCIE_SLOT_DBG(slot, "FRESET: Starts\n");
+ if (slot->ops.prepare_link_change)
+ slot->ops.prepare_link_change(slot, false);
+
+ /* Retrieve power state */
+ if (slot->ops.get_power_state) {
+ PCIE_SLOT_DBG(slot, "FRESET: Retrieve power state\n");
+ slot->ops.get_power_state(slot, &power_state);
+ }
+
+ /* In power on state, power it off */
+ if (power_state == PCI_SLOT_POWER_ON) {
+ PCIE_SLOT_DBG(slot, "FRESET: Power is on, turn off\n");
+ pcie_slot_set_power_state_ext(slot,
+ PCI_SLOT_POWER_OFF, false);
+ pci_slot_set_state(slot,
+ PCI_SLOT_STATE_FRESET_POWER_OFF);
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(50));
+ }
+ /* No power state change, */
+ /* fallthrough */
+ case PCI_SLOT_STATE_FRESET_POWER_OFF:
+ PCIE_SLOT_DBG(slot, "FRESET: Power is off, turn on\n");
+ pcie_slot_set_power_state_ext(slot, PCI_SLOT_POWER_ON, false);
+
+ pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL);
+ return pci_slot_set_sm_timeout(slot, msecs_to_tb(50));
+ default:
+ prlog(PR_ERR, PCIE_SLOT_PREFIX
+ "FRESET: Unexpected slot state %08x\n",
+ slot->id, slot->state);
+ }
+
+ pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
+ return OPAL_HARDWARE;
+}
+
+struct pci_slot *pcie_slot_create(struct phb *phb, struct pci_device *pd)
+{
+ struct pci_slot *slot;
+ uint32_t ecap;
+ uint16_t slot_ctl;
+
+ /* Allocate PCI slot */
+ slot = pci_slot_alloc(phb, pd);
+ if (!slot)
+ return NULL;
+
+ /* Cache the link and slot capabilities */
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_CAPABILITY_REG,
+ &slot->pcie_cap);
+ pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_LCAP,
+ &slot->link_cap);
+
+ /* Leave PCI slot capability blank if PCI slot isn't supported */
+ if (slot->pcie_cap & PCICAP_EXP_CAP_SLOT)
+ pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCAP,
+ &slot->slot_cap);
+ else
+ slot->slot_cap = 0;
+
+ if (slot->slot_cap & PCICAP_EXP_SLOTCAP_HPLUG_CAP)
+ slot->pluggable = 1;
+
+ /* Assume the slot is powered on by default */
+ slot->power_state = PCI_SLOT_POWER_ON;
+ if (slot->slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL) {
+ slot->power_ctl = 1;
+
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL,
+ &slot_ctl);
+ if (slot_ctl & PCICAP_EXP_SLOTCTL_PWRCTLR)
+ slot->power_state = PCI_SLOT_POWER_OFF;
+ }
+
+ if (slot->slot_cap & PCICAP_EXP_SLOTCAP_PWRI)
+ slot->power_led_ctl = PCI_SLOT_PWR_LED_CTL_KERNEL;
+ if (slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI)
+ slot->attn_led_ctl = PCI_SLOT_ATTN_LED_CTL_KERNEL;
+ slot->wired_lanes = ((slot->link_cap & PCICAP_EXP_LCAP_MAXWDTH) >> 4);
+
+ /* The surprise hotplug capability is claimed when it's supported
+ * in the slot's capability bits or link state change reporting is
+ * supported in PCIe link capability. It means the surprise hotplug
+ * relies on presence or link state change events. In order for the
+ * link state change event to be properly raised during surprise hot
+ * add/remove, the power supply to the slot should be always on.
+ *
+ * For PCI slots that don't claim surprise hotplug capability explicitly.
+ * Its PDC (Presence Detection Change) isn't reliable. To mark that as
+ * broken on them.
+ */
+ if (slot->pcie_cap & PCICAP_EXP_CAP_SLOT) {
+ if (slot->slot_cap & PCICAP_EXP_SLOTCAP_HPLUG_SURP) {
+ slot->surprise_pluggable = 1;
+ } else if (slot->link_cap & PCICAP_EXP_LCAP_DL_ACT_REP) {
+ slot->surprise_pluggable = 1;
+
+ pci_slot_add_flags(slot, PCI_SLOT_FLAG_BROKEN_PDC);
+ }
+ }
+
+ /* Standard slot operations */
+ slot->ops.get_presence_state = pcie_slot_get_presence_state;
+ slot->ops.get_link_state = pcie_slot_get_link_state;
+ slot->ops.get_power_state = pcie_slot_get_power_state;
+ slot->ops.get_attention_state = pcie_slot_get_attention_state;
+ slot->ops.get_latch_state = pcie_slot_get_latch_state;
+ slot->ops.set_power_state = pcie_slot_set_power_state;
+ slot->ops.set_attention_state = pcie_slot_set_attention_state;
+
+ /*
+ * State machine (SM) based reset stuff. The poll function is always
+ * unified for all cases.
+ */
+ slot->ops.poll_link = pcie_slot_sm_poll_link;
+ slot->ops.hreset = pcie_slot_sm_hreset;
+ slot->ops.freset = pcie_slot_sm_freset;
+
+ slot->wired_lanes = PCI_SLOT_WIRED_LANES_UNKNOWN;
+ slot->connector_type = PCI_SLOT_CONNECTOR_PCIE_NS;
+ slot->card_desc = PCI_SLOT_DESC_NON_STANDARD;
+ slot->card_mech = PCI_SLOT_MECH_NONE;
+ slot->power_led_ctl = PCI_SLOT_PWR_LED_CTL_NONE;
+ slot->attn_led_ctl = PCI_SLOT_ATTN_LED_CTL_NONE;
+
+ return slot;
+}
+
+/* FIXME: this is kind of insane */
+struct pci_slot *pcie_slot_create_dynamic(struct phb *phb,
+ struct pci_device *pd)
+{
+ uint32_t ecap, val;
+ struct pci_slot *slot;
+
+ if (!phb || !pd || pd->slot)
+ return NULL;
+
+ /* Try to create slot whose details aren't provided by platform. */
+ if (pd->dev_type != PCIE_TYPE_SWITCH_DNPORT)
+ return NULL;
+
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCAP, &val);
+ if (!(val & PCICAP_EXP_SLOTCAP_HPLUG_CAP))
+ return NULL;
+
+ slot = pcie_slot_create(phb, pd);
+
+ /* On superMicro's "p8dnu" platform, we create dynamic PCI slots
+ * for all downstream ports of PEX9733 that is connected to PHB
+ * direct slot. The power supply to the PCI slot is lost after
+ * PCI adapter is removed from it. The power supply can't be
+ * turned on when the slot is in empty state. The power supply
+ * isn't turned on automatically when inserting PCI adapter to
+ * the slot at later point. We set a flag to the slot here, to
+ * turn on the power supply in (suprise or managed) hot-add path.
+ *
+ * We have same issue with PEX8718 as above on "p8dnu" platform.
+ */
+ if (dt_node_is_compatible(dt_root, "supermicro,p8dnu") && slot &&
+ slot->pd && (slot->pd->vdid == 0x973310b5 ||
+ slot->pd->vdid == 0x871810b5))
+ pci_slot_add_flags(slot, PCI_SLOT_FLAG_FORCE_POWERON);
+
+ return slot;
+}
diff --git a/roms/skiboot/core/pel.c b/roms/skiboot/core/pel.c
new file mode 100644
index 000000000..ec13e5590
--- /dev/null
+++ b/roms/skiboot/core/pel.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Platform Error Log (PEL) generation
+ *
+ * Copyright 2014-2016 IBM Corp
+ */
+
+#include <string.h>
+#include <errorlog.h>
+#include <device.h>
+#include <fsp.h>
+#include <pel.h>
+#include <rtc.h>
+
+/* Create MTMS section for sapphire log */
+static void create_mtms_section(struct errorlog *elog_data,
+ char *pel_buffer, int *pel_offset)
+{
+ const struct dt_property *p;
+
+ struct opal_mtms_section *mtms = (struct opal_mtms_section *)
+ (pel_buffer + *pel_offset);
+
+ mtms->v6header.id = cpu_to_be16(ELOG_SID_MACHINE_TYPE);
+ mtms->v6header.length = cpu_to_be16(MTMS_SECTION_SIZE);
+ mtms->v6header.version = OPAL_EXT_HRD_VER;
+ mtms->v6header.subtype = 0;
+ mtms->v6header.component_id = cpu_to_be16(elog_data->component_id);
+
+ memset(mtms->model, 0x00, sizeof(mtms->model));
+ memcpy(mtms->model, dt_prop_get(dt_root, "model"), OPAL_SYS_MODEL_LEN);
+
+ memset(mtms->serial_no, 0x00, sizeof(mtms->serial_no));
+ p = dt_find_property(dt_root, "system-id");
+ if (p)
+ memcpy(mtms->serial_no, p->prop, OPAL_SYS_SERIAL_LEN);
+ else
+ memset(mtms->serial_no, 0, OPAL_SYS_SERIAL_LEN);
+
+ *pel_offset += MTMS_SECTION_SIZE;
+}
+
+/* Create extended header section */
+static void create_extended_header_section(struct errorlog *elog_data,
+ char *pel_buffer, int *pel_offset)
+{
+ const char *opalmodel = NULL;
+ const struct dt_property *p;
+ uint64_t extd_time;
+ uint32_t extd_date;
+
+ struct opal_extended_header_section *extdhdr =
+ (struct opal_extended_header_section *)
+ (pel_buffer + *pel_offset);
+
+ extdhdr->v6header.id = cpu_to_be16(ELOG_SID_EXTENDED_HEADER);
+ extdhdr->v6header.length = cpu_to_be16(EXTENDED_HEADER_SECTION_SIZE);
+ extdhdr->v6header.version = OPAL_EXT_HRD_VER;
+ extdhdr->v6header.subtype = 0;
+ extdhdr->v6header.component_id = cpu_to_be16(elog_data->component_id);
+
+ memset(extdhdr->model, 0x00, sizeof(extdhdr->model));
+ opalmodel = dt_prop_get(dt_root, "model");
+ memcpy(extdhdr->model, opalmodel, OPAL_SYS_MODEL_LEN);
+
+ memset(extdhdr->serial_no, 0x00, sizeof(extdhdr->serial_no));
+ p = dt_find_property(dt_root, "system-id");
+ if (p)
+ memcpy(extdhdr->serial_no, p->prop, OPAL_SYS_SERIAL_LEN);
+ else
+ memset(extdhdr->serial_no, 0, OPAL_SYS_SERIAL_LEN);
+
+ memset(extdhdr->opal_release_version, 0x00,
+ sizeof(extdhdr->opal_release_version));
+ memset(extdhdr->opal_subsys_version, 0x00,
+ sizeof(extdhdr->opal_subsys_version));
+
+ rtc_cache_get_datetime(&extd_date, &extd_time);
+ extdhdr->extended_header_date = cpu_to_be32(extd_date);
+ extdhdr->extended_header_time = cpu_to_be32(extd_time >> 32);
+ extdhdr->opal_symid_len = 0;
+
+ *pel_offset += EXTENDED_HEADER_SECTION_SIZE;
+}
+
+/* set src type */
+static void settype(struct opal_src_section *src, uint8_t src_type)
+{
+ char type[4];
+ snprintf(type, sizeof(type), "%02X", src_type);
+ memcpy(src->srcstring, type, 2);
+}
+
+/* set SRC subsystem type */
+static void setsubsys(struct opal_src_section *src, uint8_t src_subsys)
+{
+ char subsys[4];
+ snprintf(subsys, sizeof(subsys), "%02X", src_subsys);
+ memcpy(src->srcstring+2, subsys, 2);
+}
+
+/* Ser reason code of SRC */
+static void setrefcode(struct opal_src_section *src, uint16_t src_refcode)
+{
+ char refcode[8];
+ snprintf(refcode, sizeof(refcode), "%04X", src_refcode);
+ memcpy(src->srcstring+4, refcode, 4);
+}
+
+/* Create SRC section of OPAL log */
+static void create_src_section(struct errorlog *elog_data,
+ char *pel_buffer, int *pel_offset)
+{
+ struct opal_src_section *src = (struct opal_src_section *)
+ (pel_buffer + *pel_offset);
+
+ src->v6header.id = cpu_to_be16(ELOG_SID_PRIMARY_SRC);
+ src->v6header.length = cpu_to_be16(SRC_SECTION_SIZE);
+ src->v6header.version = OPAL_ELOG_VERSION;
+ src->v6header.subtype = OPAL_ELOG_SST;
+ src->v6header.component_id = cpu_to_be16(elog_data->component_id);
+
+ src->version = OPAL_SRC_SEC_VER;
+ src->flags = 0;
+ src->wordcount = OPAL_SRC_MAX_WORD_COUNT;
+ src->srclength = cpu_to_be16(SRC_LENGTH);
+ settype(src, OPAL_SRC_TYPE_ERROR);
+ setsubsys(src, OPAL_FAILING_SUBSYSTEM);
+ setrefcode(src, elog_data->reason_code);
+ memset(src->hexwords, 0 , (8 * 4));
+ src->hexwords[0] = cpu_to_be32(OPAL_SRC_FORMAT);
+ src->hexwords[4] = cpu_to_be32(elog_data->additional_info[0]);
+ src->hexwords[5] = cpu_to_be32(elog_data->additional_info[1]);
+ src->hexwords[6] = cpu_to_be32(elog_data->additional_info[2]);
+ src->hexwords[7] = cpu_to_be32(elog_data->additional_info[3]);
+ *pel_offset += SRC_SECTION_SIZE;
+}
+
+/* Create user header section */
+static void create_user_header_section(struct errorlog *elog_data,
+ char *pel_buffer, int *pel_offset)
+{
+ struct opal_user_header_section *usrhdr =
+ (struct opal_user_header_section *)
+ (pel_buffer + *pel_offset);
+
+ usrhdr->v6header.id = cpu_to_be16(ELOG_SID_USER_HEADER);
+ usrhdr->v6header.length = cpu_to_be16(USER_HEADER_SECTION_SIZE);
+ usrhdr->v6header.version = OPAL_ELOG_VERSION;
+ usrhdr->v6header.subtype = OPAL_ELOG_SST;
+ usrhdr->v6header.component_id = cpu_to_be16(elog_data->component_id);
+
+ usrhdr->subsystem_id = elog_data->subsystem_id;
+ usrhdr->event_scope = 0;
+ usrhdr->event_severity = elog_data->event_severity;
+ usrhdr->event_type = elog_data->event_subtype;
+
+ if (elog_data->elog_origin == ORG_SAPPHIRE)
+ usrhdr->action_flags = cpu_to_be16(ERRL_ACTION_REPORT);
+ else
+ usrhdr->action_flags = cpu_to_be16(ERRL_ACTION_NONE);
+
+ *pel_offset += USER_HEADER_SECTION_SIZE;
+}
+
+/* Create private header section */
+static void create_private_header_section(struct errorlog *elog_data,
+ char *pel_buffer, int *pel_offset)
+{
+ uint64_t ctime;
+ uint32_t cdate;
+ struct opal_private_header_section *privhdr =
+ (struct opal_private_header_section *)
+ pel_buffer;
+
+ privhdr->v6header.id = cpu_to_be16(ELOG_SID_PRIVATE_HEADER);
+ privhdr->v6header.length = cpu_to_be16(PRIVATE_HEADER_SECTION_SIZE);
+ privhdr->v6header.version = OPAL_ELOG_VERSION;
+ privhdr->v6header.subtype = OPAL_ELOG_SST;
+ privhdr->v6header.component_id = cpu_to_be16(elog_data->component_id);
+ privhdr->plid = cpu_to_be32(elog_data->plid);
+
+ rtc_cache_get_datetime(&cdate, &ctime);
+ privhdr->create_date = cpu_to_be32(cdate);
+ privhdr->create_time = cpu_to_be32(ctime >> 32);
+ privhdr->section_count = 5;
+
+ privhdr->creator_subid_hi = 0x00;
+ privhdr->creator_subid_lo = 0x00;
+
+ if (elog_data->elog_origin == ORG_SAPPHIRE)
+ privhdr->creator_id = OPAL_CID_SAPPHIRE;
+ else
+ privhdr->creator_id = OPAL_CID_POWERNV;
+
+ privhdr->log_entry_id = cpu_to_be32(elog_data->plid); /*entry id is updated by FSP*/
+
+ *pel_offset += PRIVATE_HEADER_SECTION_SIZE;
+}
+
+static void create_user_defined_section(struct errorlog *elog_data,
+ char *pel_buffer, int *pel_offset)
+{
+ char *dump = (char *)pel_buffer + *pel_offset;
+ char *opal_buf = (char *)elog_data->user_data_dump;
+ struct opal_user_section *usrhdr;
+ struct elog_user_data_section *opal_usr_data;
+ struct opal_private_header_section *privhdr =
+ (struct opal_private_header_section *)pel_buffer;
+ int i;
+
+ for (i = 0; i < elog_data->user_section_count; i++) {
+
+ usrhdr = (struct opal_user_section *)dump;
+ opal_usr_data = (struct elog_user_data_section *)opal_buf;
+
+ usrhdr->v6header.id = cpu_to_be16(ELOG_SID_USER_DEFINED);
+ usrhdr->v6header.length = cpu_to_be16(
+ sizeof(struct opal_v6_header) +
+ be16_to_cpu(opal_usr_data->size));
+ usrhdr->v6header.version = OPAL_ELOG_VERSION;
+ usrhdr->v6header.subtype = OPAL_ELOG_SST;
+ usrhdr->v6header.component_id = cpu_to_be16(elog_data->component_id);
+
+ memcpy(usrhdr->dump, opal_buf, be16_to_cpu(opal_usr_data->size));
+ *pel_offset += be16_to_cpu(usrhdr->v6header.length);
+ dump += be16_to_cpu(usrhdr->v6header.length);
+ opal_buf += be16_to_cpu(opal_usr_data->size);
+ privhdr->section_count++;
+ }
+}
+
+static size_t pel_user_section_size(struct errorlog *elog_data)
+{
+ int i;
+ size_t total = 0;
+ char *opal_buf = (char *)elog_data->user_data_dump;
+ struct elog_user_data_section *opal_usr_data;
+
+ for (i = 0; i < elog_data->user_section_count; i++) {
+ u16 s;
+
+ opal_usr_data = (struct elog_user_data_section *)opal_buf;
+ s = be16_to_cpu(opal_usr_data->size);
+ total += sizeof(struct opal_v6_header) + s;
+ opal_buf += s;
+ }
+
+ return total;
+}
+
+size_t pel_size(struct errorlog *elog_data)
+{
+ return PEL_MIN_SIZE + pel_user_section_size(elog_data);
+}
+
+/* Converts an OPAL errorlog into a PEL formatted log */
+int create_pel_log(struct errorlog *elog_data, char *pel_buffer,
+ size_t pel_buffer_size)
+{
+ int pel_offset = 0;
+
+ if (pel_buffer_size < pel_size(elog_data)) {
+ prerror("PEL buffer too small to create record\n");
+ return 0;
+ }
+
+ memset(pel_buffer, 0, pel_buffer_size);
+
+ create_private_header_section(elog_data, pel_buffer, &pel_offset);
+ create_user_header_section(elog_data, pel_buffer, &pel_offset);
+ create_src_section(elog_data, pel_buffer, &pel_offset);
+ create_extended_header_section(elog_data, pel_buffer, &pel_offset);
+ create_mtms_section(elog_data, pel_buffer, &pel_offset);
+ if (elog_data->user_section_count)
+ create_user_defined_section(elog_data, pel_buffer, &pel_offset);
+
+ return pel_offset;
+}
diff --git a/roms/skiboot/core/platform.c b/roms/skiboot/core/platform.c
new file mode 100644
index 000000000..320fdea03
--- /dev/null
+++ b/roms/skiboot/core/platform.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * OPAL Platform abstraction
+ *
+ * Some OPAL calls may/may not call into the struct platform that's
+ * probed during boot. There's also a bunch of platform specific init
+ * and configuration that's called.
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <stdlib.h>
+#include <skiboot.h>
+#include <opal.h>
+#include <console.h>
+#include <timebase.h>
+#include <cpu.h>
+#include <chip.h>
+#include <xscom.h>
+#include <errorlog.h>
+#include <bt.h>
+#include <nvram.h>
+#include <npu2.h>
+#include <platforms/astbmc/astbmc.h>
+
+bool manufacturing_mode = false;
+struct platform platform;
+
+DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
+ OPAL_CEC_HARDWARE, OPAL_ERROR_PANIC,
+ OPAL_ABNORMAL_POWER_OFF);
+
+/*
+ * Various wrappers for platform functions
+ */
+static int64_t opal_cec_power_down(uint64_t request)
+{
+ prlog(PR_NOTICE, "OPAL: Shutdown request type 0x%llx...\n", request);
+
+ opal_quiesce(QUIESCE_HOLD, -1);
+
+ console_complete_flush();
+
+ if (platform.cec_power_down)
+ return platform.cec_power_down(request);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_CEC_POWER_DOWN, opal_cec_power_down, 1);
+
+static int64_t full_reboot(void)
+{
+ prlog(PR_NOTICE, "OPAL: Reboot request...\n");
+
+ console_complete_flush();
+
+ if (platform.cec_reboot)
+ return platform.cec_reboot();
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t opal_cec_reboot(void)
+{
+ opal_quiesce(QUIESCE_HOLD, -1);
+
+ /*
+ * Fast-reset was enabled by default for a long time in an attempt to
+ * make it more stable by exercising it more frequently. This resulted
+ * in a fair amount of pain due to mis-behaving hardware and confusion
+ * about what a "reset" is supposed to do exactly. Additionally,
+ * secure variables require a full reboot to work at all.
+ *
+ * Due to all that fast-reset should only be used if it's explicitly
+ * enabled. It started life as a debug hack and should remain one.
+ */
+ if (nvram_query_eq_safe("fast-reset", "1"))
+ fast_reboot();
+
+ return full_reboot();
+}
+opal_call(OPAL_CEC_REBOOT, opal_cec_reboot, 0);
+
+static int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag)
+{
+ struct errorlog *buf;
+
+ opal_quiesce(QUIESCE_HOLD, -1);
+
+ switch (reboot_type) {
+ case OPAL_REBOOT_NORMAL:
+ return opal_cec_reboot();
+ case OPAL_REBOOT_PLATFORM_ERROR:
+ prlog(PR_EMERG,
+ "OPAL: Reboot requested due to Platform error.\n");
+ buf = opal_elog_create(&e_info(OPAL_RC_ABNORMAL_REBOOT), 0);
+ if (buf) {
+ log_append_msg(buf,
+ "OPAL: Reboot requested due to Platform error.");
+ if (diag) {
+ /* Add user section "DESC" */
+ log_add_section(buf, OPAL_ELOG_SEC_DESC);
+ log_append_data(buf, diag, strlen(diag));
+ }
+ log_commit(buf);
+ } else {
+ prerror("OPAL: failed to log an error\n");
+ }
+ disable_fast_reboot("Reboot due to Platform Error");
+ console_complete_flush();
+ return xscom_trigger_xstop();
+ case OPAL_REBOOT_FULL_IPL:
+ prlog(PR_NOTICE, "Reboot: Full reboot requested");
+ return full_reboot();
+ case OPAL_REBOOT_MPIPL:
+ prlog(PR_NOTICE, "Reboot: OS reported error. Performing MPIPL\n");
+ console_complete_flush();
+ if (platform.terminate)
+ platform.terminate("OS reported error. Performing MPIPL\n");
+ else
+ full_reboot();
+ for (;;);
+ break;
+ case OPAL_REBOOT_FAST:
+ prlog(PR_NOTICE, "Reboot: Fast reboot requested by OS\n");
+ fast_reboot();
+ prlog(PR_NOTICE, "Reboot: Fast reboot failed\n");
+ return OPAL_UNSUPPORTED;
+ default:
+ prlog(PR_NOTICE, "OPAL: Unsupported reboot request %d\n", reboot_type);
+ return OPAL_UNSUPPORTED;
+ break;
+ }
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_CEC_REBOOT2, opal_cec_reboot2, 2);
+
+static bool generic_platform_probe(void)
+{
+ if (dt_find_by_path(dt_root, "bmc")) {
+ /* We appear to have a BMC... so let's cross our fingers
+ * and see if we can do anything!
+ */
+ prlog(PR_ERR, "GENERIC BMC PLATFORM: **GUESSING** that there's "
+ "*maybe* a BMC we can talk to.\n");
+ prlog(PR_ERR, "THIS IS ****UNSUPPORTED****, BRINGUP USE ONLY.\n");
+ astbmc_early_init();
+ } else {
+ uart_init();
+ }
+
+ return true;
+}
+
+static void generic_platform_init(void)
+{
+ if (uart_enabled())
+ set_opal_console(&uart_opal_con);
+
+ if (dt_find_by_path(dt_root, "bmc")) {
+ prlog(PR_ERR, "BMC-GUESSWORK: Here be dragons with a taste for human flesh\n");
+ astbmc_init();
+ } else {
+ /* Otherwise we go down the ultra-minimal path */
+
+ /* Enable a BT interface if we find one too */
+ bt_init();
+ }
+
+ /* Fake a real time clock */
+ fake_rtc_init();
+}
+
+static int64_t generic_cec_power_down(uint64_t request __unused)
+{
+ return OPAL_UNSUPPORTED;
+}
+
+static int generic_resource_loaded(enum resource_id id, uint32_t subid)
+{
+ if (dt_find_by_path(dt_root, "bmc"))
+ return flash_resource_loaded(id, subid);
+
+ return OPAL_EMPTY;
+}
+
+static int generic_start_preload_resource(enum resource_id id, uint32_t subid,
+ void *buf, size_t *len)
+{
+ if (dt_find_by_path(dt_root, "bmc"))
+ return flash_start_preload_resource(id, subid, buf, len);
+
+ return OPAL_EMPTY;
+}
+
+/* These values will work for a ZZ booted using BML */
+static const struct platform_ocapi generic_ocapi = {
+ .i2c_engine = 1,
+ .i2c_port = 4,
+ .i2c_reset_addr = 0x20,
+ .i2c_reset_brick2 = (1 << 1),
+ .i2c_reset_brick3 = (1 << 6),
+ .i2c_reset_brick4 = 0, /* unused */
+ .i2c_reset_brick5 = 0, /* unused */
+ .i2c_presence_addr = 0x20,
+ .i2c_presence_brick2 = (1 << 2), /* bottom connector */
+ .i2c_presence_brick3 = (1 << 7), /* top connector */
+ .i2c_presence_brick4 = 0, /* unused */
+ .i2c_presence_brick5 = 0, /* unused */
+ .odl_phy_swap = true,
+};
+
+static struct bmc_platform generic_bmc = {
+ .name = "generic",
+};
+
+static struct platform generic_platform = {
+ .name = "generic",
+ .bmc = &generic_bmc,
+ .probe = generic_platform_probe,
+ .init = generic_platform_init,
+ .nvram_info = fake_nvram_info,
+ .nvram_start_read = fake_nvram_start_read,
+ .nvram_write = fake_nvram_write,
+ .cec_power_down = generic_cec_power_down,
+ .start_preload_resource = generic_start_preload_resource,
+ .resource_loaded = generic_resource_loaded,
+ .ocapi = &generic_ocapi,
+ .npu2_device_detect = npu2_i2c_presence_detect, /* Assumes ZZ */
+};
+
+const struct bmc_platform *bmc_platform = &generic_bmc;
+
+void set_bmc_platform(const struct bmc_platform *bmc)
+{
+ if (bmc)
+ prlog(PR_NOTICE, "PLAT: Detected BMC platform %s\n", bmc->name);
+ else
+ bmc = &generic_bmc;
+
+ bmc_platform = bmc;
+}
+
+void probe_platform(void)
+{
+ struct platform *platforms = &__platforms_start;
+ unsigned int i;
+
+ /* Detect Manufacturing mode */
+ if (dt_find_property(dt_root, "ibm,manufacturing-mode")) {
+ /**
+ * @fwts-label ManufacturingMode
+ * @fwts-advice You are running in manufacturing mode.
+ * This mode should only be enabled in a factory during
+ * manufacturing.
+ */
+ prlog(PR_NOTICE, "PLAT: Manufacturing mode ON\n");
+ manufacturing_mode = true;
+ }
+
+ for (i = 0; &platforms[i] < &__platforms_end; i++) {
+ if (platforms[i].probe && platforms[i].probe()) {
+ platform = platforms[i];
+ break;
+ }
+ }
+ if (!platform.name) {
+ platform = generic_platform;
+ if (platform.probe)
+ platform.probe();
+ }
+
+ prlog(PR_NOTICE, "PLAT: Detected %s platform\n", platform.name);
+
+ set_bmc_platform(platform.bmc);
+}
+
+
+int start_preload_resource(enum resource_id id, uint32_t subid,
+ void *buf, size_t *len)
+{
+ if (!platform.start_preload_resource)
+ return OPAL_UNSUPPORTED;
+
+ return platform.start_preload_resource(id, subid, buf, len);
+}
+
+int resource_loaded(enum resource_id id, uint32_t idx)
+{
+ if (!platform.resource_loaded)
+ return OPAL_SUCCESS;
+
+ return platform.resource_loaded(id, idx);
+}
+
+int wait_for_resource_loaded(enum resource_id id, uint32_t idx)
+{
+ int r = resource_loaded(id, idx);
+ int waited = 0;
+
+ while(r == OPAL_BUSY) {
+ opal_run_pollers();
+ r = resource_loaded(id, idx);
+ if (r != OPAL_BUSY)
+ break;
+ time_wait_ms_nopoll(5);
+ waited+=5;
+ }
+
+ prlog(PR_TRACE, "PLATFORM: wait_for_resource_loaded %x/%x %u ms\n",
+ id, idx, waited);
+ return r;
+}
+
+void op_display(enum op_severity sev, enum op_module mod, uint16_t code)
+{
+ if (platform.op_display)
+ platform.op_display(sev, mod, code);
+}
diff --git a/roms/skiboot/core/pool.c b/roms/skiboot/core/pool.c
new file mode 100644
index 000000000..a0283199a
--- /dev/null
+++ b/roms/skiboot/core/pool.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * This file provides some functions to manage a pool of pre-allocated
+ * objects. It also provides a method to reserve a pre-defined number
+ * of objects for higher priorty requests. The allocations follow the
+ * following rules:
+ *
+ * 1. An allocation will succeed at any priority if there is more than
+ * the reserved number of objects free.
+ * 2. Only high priority allocations will succeed when there are less
+ * than the reserved number of objects free.
+ * 3. When an allocation is freed it is always added to the high priority
+ * pool if there are less than the reserved number of allocations
+ * available.
+ *
+ * Copyright 2013-2014 IBM Corp.
+ */
+
+#include <pool.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ccan/list/list.h>
+
+void* pool_get(struct pool *pool, enum pool_priority priority)
+{
+ void *obj;
+
+ if (!pool->free_count ||
+ ((pool->free_count <= pool->reserved) && priority == POOL_NORMAL))
+ return NULL;
+
+ pool->free_count--;
+ obj = (void *) list_pop_(&pool->free_list, 0);
+ assert(obj);
+ memset(obj, 0, pool->obj_size);
+ return obj;
+}
+
+void pool_free_object(struct pool *pool, void *obj)
+{
+ pool->free_count++;
+ list_add_tail(&pool->free_list,
+ (struct list_node *) (obj));
+}
+
+int pool_init(struct pool *pool, size_t obj_size, int count, int reserved)
+{
+ int i;
+
+ if (obj_size < sizeof(struct list_node))
+ obj_size = sizeof(struct list_node);
+
+ assert(count >= reserved);
+ pool->buf = malloc(obj_size*count);
+ if (!pool->buf)
+ return -1;
+
+ pool->obj_size = obj_size;
+ pool->free_count = count;
+ pool->reserved = reserved;
+ list_head_init(&pool->free_list);
+
+ for(i = 0; i < count; i++)
+ list_add_tail(&pool->free_list,
+ (struct list_node *) (pool->buf + obj_size*i));
+
+ return 0;
+}
diff --git a/roms/skiboot/core/powercap.c b/roms/skiboot/core/powercap.c
new file mode 100644
index 000000000..6ae58eb86
--- /dev/null
+++ b/roms/skiboot/core/powercap.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * OPAL calls to get/set power caps
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#include <powercap.h>
+
+static int opal_get_powercap(u32 handle, int token __unused, __be32 *__pcap)
+{
+ if (!__pcap || !opal_addr_valid(__pcap))
+ return OPAL_PARAMETER;
+
+ if (powercap_get_class(handle) == POWERCAP_CLASS_OCC) {
+ u32 pcap;
+ int rc;
+
+ rc = occ_get_powercap(handle, &pcap);
+ *__pcap = cpu_to_be32(pcap);
+ return rc;
+ }
+
+ return OPAL_UNSUPPORTED;
+};
+
+opal_call(OPAL_GET_POWERCAP, opal_get_powercap, 3);
+
+static int opal_set_powercap(u32 handle, int token, u32 pcap)
+{
+ if (powercap_get_class(handle) == POWERCAP_CLASS_OCC)
+ return occ_set_powercap(handle, token, pcap);
+
+ return OPAL_UNSUPPORTED;
+};
+
+opal_call(OPAL_SET_POWERCAP, opal_set_powercap, 3);
diff --git a/roms/skiboot/core/psr.c b/roms/skiboot/core/psr.c
new file mode 100644
index 000000000..75ccc6617
--- /dev/null
+++ b/roms/skiboot/core/psr.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * OPAL calls to get/set Power Shift Ratio (PSR)
+ *
+ * i.e. when something has to be throttled, what gets throttled?
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#include <psr.h>
+
+static int opal_get_power_shift_ratio(u32 handle, int token __unused,
+ __be32 *__ratio)
+{
+ if (!__ratio || !opal_addr_valid(__ratio))
+ return OPAL_PARAMETER;
+
+ if (psr_get_class(handle) == PSR_CLASS_OCC) {
+ u32 ratio;
+ int rc;
+
+ rc = occ_get_psr(handle, &ratio);
+ *__ratio = cpu_to_be32(ratio);
+ return rc;
+ }
+
+ return OPAL_UNSUPPORTED;
+};
+
+opal_call(OPAL_GET_POWER_SHIFT_RATIO, opal_get_power_shift_ratio, 3);
+
+static int opal_set_power_shift_ratio(u32 handle, int token,
+ u32 ratio)
+{
+ if (psr_get_class(handle) == PSR_CLASS_OCC)
+ return occ_set_psr(handle, token, ratio);
+
+ return OPAL_UNSUPPORTED;
+};
+
+opal_call(OPAL_SET_POWER_SHIFT_RATIO, opal_set_power_shift_ratio, 3);
diff --git a/roms/skiboot/core/relocate.c b/roms/skiboot/core/relocate.c
new file mode 100644
index 000000000..6295927e2
--- /dev/null
+++ b/roms/skiboot/core/relocate.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Relocate ourselves
+ *
+ * WARNING: This code is used to self-relocate, it cannot have any
+ * global reference nor TOC reference. It's also called before BSS
+ * is cleared.
+ *
+ * Copyright 2013-2015 IBM Corp.
+ */
+
+#include <stdbool.h>
+#include <elf.h>
+
+/* Called from head.S, thus no header. */
+int relocate(uint64_t offset, struct elf64_dyn *dyn, struct elf64_rela *rela);
+
+/* Note: This code is simplified according to the assumptions
+ * that our link address is 0 and we are running at the
+ * target address already.
+ */
+int relocate(uint64_t offset, struct elf64_dyn *dyn, struct elf64_rela *rela)
+{
+ uint64_t dt_rela = 0;
+ uint64_t dt_relacount = 0;
+ unsigned int i;
+
+ /* Look for relocation table */
+ for (; dyn->d_tag != DT_NULL; dyn++) {
+ if (dyn->d_tag == DT_RELA)
+ dt_rela = dyn->d_val;
+ else if (dyn->d_tag == DT_RELACOUNT)
+ dt_relacount = dyn->d_val;
+ }
+
+ /* If we miss either rela or relacount, bail */
+ if (!dt_rela || !dt_relacount)
+ return -1;
+
+ /* Check if the offset is consistent */
+ if ((offset + dt_rela) != (uint64_t)rela)
+ return -2;
+
+ /* Perform relocations */
+ for (i = 0; i < dt_relacount; i++, rela++) {
+ uint64_t *t;
+
+ if (ELF64_R_TYPE(rela->r_info) != R_PPC64_RELATIVE)
+ return -3;
+ t = (uint64_t *)(rela->r_offset + offset);
+ *t = rela->r_addend + offset;
+ }
+
+ return 0;
+}
diff --git a/roms/skiboot/core/rtc.c b/roms/skiboot/core/rtc.c
new file mode 100644
index 000000000..3c0dda71e
--- /dev/null
+++ b/roms/skiboot/core/rtc.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Real Time Clock (RTC) Cache
+ *
+ * Copyright 2013-2014 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <lock.h>
+#include <rtc.h>
+#include <timebase.h>
+
+static struct lock rtc_tod_lock = LOCK_UNLOCKED;
+
+static struct {
+ struct tm tm;
+ unsigned long tb;
+ bool valid;
+} rtc_tod_cache;
+
+void rtc_cache_update(struct tm *tm)
+{
+ lock(&rtc_tod_lock);
+ rtc_tod_cache.tb = mftb();
+ rtc_tod_cache.tm = *tm;
+ rtc_tod_cache.valid = true;
+ unlock(&rtc_tod_lock);
+}
+
+int rtc_cache_get(struct tm *tm)
+{
+ unsigned long cache_age_sec;
+
+ lock(&rtc_tod_lock);
+
+ if (!rtc_tod_cache.valid) {
+ unlock(&rtc_tod_lock);
+ return -1;
+ }
+
+ cache_age_sec = tb_to_msecs(mftb() - rtc_tod_cache.tb) / 1000;
+ *tm = rtc_tod_cache.tm;
+ unlock(&rtc_tod_lock);
+
+ tm->tm_sec += cache_age_sec;
+ mktime(tm);
+
+ return 0;
+}
+
+int rtc_cache_get_datetime(uint32_t *year_month_day,
+ uint64_t *hour_minute_second_millisecond)
+{
+ struct tm tm;
+
+ if (rtc_cache_get(&tm) < 0)
+ return -1;
+
+ tm_to_datetime(&tm, year_month_day, hour_minute_second_millisecond);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/sensor.c b/roms/skiboot/core/sensor.c
new file mode 100644
index 000000000..303d867e2
--- /dev/null
+++ b/roms/skiboot/core/sensor.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * OPAL Sensor APIs
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <sensor.h>
+#include <skiboot.h>
+#include <device.h>
+#include <opal.h>
+#include <dts.h>
+#include <lock.h>
+#include <occ.h>
+
+struct dt_node *sensor_node;
+
+static struct lock async_read_list_lock = LOCK_UNLOCKED;
+static LIST_HEAD(async_read_list);
+
+struct sensor_async_read {
+ struct list_node link;
+ __be64 *val;
+ __be32 *opal_data;
+ int token;
+};
+
+static int add_to_async_read_list(int token, __be32 *opal_data, __be64 *val)
+{
+ struct sensor_async_read *req;
+
+ req = zalloc(sizeof(*req));
+ if (!req)
+ return OPAL_NO_MEM;
+
+ req->token = token;
+ req->val = val;
+ req->opal_data = opal_data;
+
+ lock(&async_read_list_lock);
+ list_add_tail(&async_read_list, &req->link);
+ unlock(&async_read_list_lock);
+
+ return OPAL_ASYNC_COMPLETION;
+}
+
+void check_sensor_read(int token)
+{
+ struct sensor_async_read *req = NULL;
+
+ lock(&async_read_list_lock);
+ if (list_empty(&async_read_list))
+ goto out;
+
+ list_for_each(&async_read_list, req, link) {
+ if (req->token == token)
+ break;
+ }
+ if (!req)
+ goto out;
+
+ *req->opal_data = cpu_to_be32(be64_to_cpu(*req->val));
+ free(req->val);
+ list_del(&req->link);
+ free(req);
+out:
+ unlock(&async_read_list_lock);
+}
+
+static s64 opal_sensor_read_64(u32 sensor_hndl, int token, __be64 *data)
+{
+ s64 rc;
+
+ switch (sensor_get_family(sensor_hndl)) {
+ case SENSOR_DTS:
+ rc = dts_sensor_read(sensor_hndl, token, data);
+ return rc;
+
+ case SENSOR_OCC:
+ rc = occ_sensor_read(sensor_hndl, data);
+ return rc;
+
+ default:
+ break;
+ }
+
+ if (platform.sensor_read) {
+ rc = platform.sensor_read(sensor_hndl, token, data);
+ return rc;
+ }
+
+ return OPAL_UNSUPPORTED;
+}
+
+static int64_t opal_sensor_read(uint32_t sensor_hndl, int token,
+ __be32 *data)
+{
+ __be64 *val;
+ s64 rc;
+
+ val = zalloc(sizeof(*val));
+ if (!val)
+ return OPAL_NO_MEM;
+
+ rc = opal_sensor_read_64(sensor_hndl, token, val);
+ if (rc == OPAL_SUCCESS) {
+ *data = cpu_to_be32(be64_to_cpu(*val));
+ free(val);
+ } else if (rc == OPAL_ASYNC_COMPLETION) {
+ rc = add_to_async_read_list(token, data, val);
+ }
+
+ return rc;
+}
+
+static int opal_sensor_group_clear(u32 group_hndl, int token)
+{
+ switch (sensor_get_family(group_hndl)) {
+ case SENSOR_OCC:
+ return occ_sensor_group_clear(group_hndl, token);
+ default:
+ break;
+ }
+
+ return OPAL_UNSUPPORTED;
+}
+
+static int opal_sensor_group_enable(u32 group_hndl, int token, bool enable)
+{
+ switch (sensor_get_family(group_hndl)) {
+ case SENSOR_OCC:
+ return occ_sensor_group_enable(group_hndl, token, enable);
+ default:
+ break;
+ }
+
+ return OPAL_UNSUPPORTED;
+}
+void sensor_init(void)
+{
+ sensor_node = dt_new(opal_node, "sensors");
+
+ dt_add_property_string(sensor_node, "compatible", "ibm,opal-sensor");
+ dt_add_property_cells(sensor_node, "#address-cells", 1);
+ dt_add_property_cells(sensor_node, "#size-cells", 0);
+
+ /* Register OPAL interface */
+ opal_register(OPAL_SENSOR_READ, opal_sensor_read, 3);
+ opal_register(OPAL_SENSOR_GROUP_CLEAR, opal_sensor_group_clear, 2);
+ opal_register(OPAL_SENSOR_READ_U64, opal_sensor_read_64, 3);
+ opal_register(OPAL_SENSOR_GROUP_ENABLE, opal_sensor_group_enable, 3);
+}
diff --git a/roms/skiboot/core/stack.c b/roms/skiboot/core/stack.c
new file mode 100644
index 000000000..3edf98411
--- /dev/null
+++ b/roms/skiboot/core/stack.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Create/Print backtraces, check stack usage etc.
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <processor.h>
+#include <cpu.h>
+#include <stack.h>
+#include <mem_region.h>
+#include <unistd.h>
+#include <lock.h>
+
+#define STACK_BUF_ENTRIES 60
+static struct bt_entry bt_buf[STACK_BUF_ENTRIES];
+
+/* Dumps backtrace to buffer */
+static void __nomcount __backtrace_create(struct bt_entry *entries,
+ unsigned int max_ents,
+ struct bt_metadata *metadata,
+ struct stack_frame *eframe)
+{
+ unsigned long *fp = (unsigned long *)eframe;
+ unsigned long top_adj = top_of_ram;
+
+ /* Assume one stack for early backtraces */
+ if (top_of_ram == SKIBOOT_BASE + SKIBOOT_SIZE)
+ top_adj = top_of_ram + STACK_SIZE;
+
+ metadata->ents = 0;
+ while (max_ents) {
+ fp = (unsigned long *)fp[0];
+ if (!fp || (unsigned long)fp > top_adj)
+ break;
+ eframe = (struct stack_frame *)fp;
+ if (eframe->magic == STACK_INT_MAGIC) {
+ entries->exception_type = eframe->type;
+ entries->exception_pc = eframe->pc;
+ } else {
+ entries->exception_type = 0;
+ }
+ entries->sp = (unsigned long)fp;
+ entries->pc = fp[2];
+ entries++;
+ metadata->ents++;
+ max_ents--;
+ }
+
+ metadata->r1_caller = eframe->gpr[1];
+
+ if (fp)
+ metadata->token = eframe->gpr[0];
+ else
+ metadata->token = -1UL;
+
+ metadata->pir = mfspr(SPR_PIR);
+}
+
+void __nomcount backtrace_create(struct bt_entry *entries,
+ unsigned int max_ents,
+ struct bt_metadata *metadata)
+{
+ unsigned long *fp = __builtin_frame_address(0);
+ struct stack_frame *eframe = (struct stack_frame *)fp;
+
+ __backtrace_create(entries, max_ents, metadata, eframe);
+}
+
+void backtrace_print(struct bt_entry *entries, struct bt_metadata *metadata,
+ char *out_buf, unsigned int *len, bool symbols)
+{
+ static char bt_text_buf[4096];
+ int i, l = 0, max;
+ char *buf = out_buf;
+ unsigned long bottom, top, normal_top, tbot, ttop;
+ char mark;
+
+ if (!out_buf) {
+ buf = bt_text_buf;
+ max = sizeof(bt_text_buf) - 16;
+ } else
+ max = *len - 1;
+
+ bottom = cpu_stack_bottom(metadata->pir);
+ normal_top = cpu_stack_top(metadata->pir);
+ top = cpu_emergency_stack_top(metadata->pir);
+ tbot = SKIBOOT_BASE;
+ ttop = (unsigned long)&_etext;
+
+ l += snprintf(buf, max, "CPU %04lx Backtrace:\n", metadata->pir);
+ for (i = 0; i < metadata->ents && l < max; i++) {
+ if (entries->sp < bottom || entries->sp > top)
+ mark = '!';
+ else if (entries->sp > normal_top)
+ mark = 'E';
+ else if (entries->pc < tbot || entries->pc > ttop)
+ mark = '*';
+ else
+ mark = ' ';
+ l += snprintf(buf + l, max - l,
+ " S: %016lx R: %016lx %c ",
+ entries->sp, entries->pc, mark);
+ if (symbols)
+ l += snprintf_symbol(buf + l, max - l, entries->pc);
+ l += snprintf(buf + l, max - l, "\n");
+ if (entries->exception_type) {
+ l += snprintf(buf + l, max - l,
+ " --- Interrupt 0x%lx at %016lx ---\n",
+ entries->exception_type, entries->exception_pc);
+ }
+ entries++;
+ }
+ if (metadata->token <= OPAL_LAST)
+ l += snprintf(buf + l, max - l,
+ " --- OPAL call token: 0x%lx caller R1: 0x%016lx ---\n",
+ metadata->token, metadata->r1_caller);
+ else if (metadata->token == -1UL)
+ l += snprintf(buf + l, max - l, " --- OPAL boot ---\n");
+ if (!out_buf)
+ write(stdout->fd, bt_text_buf, l);
+ buf[l++] = 0;
+ if (len)
+ *len = l;
+}
+
+/*
+ * To ensure that we always get backtrace output we bypass the usual console
+ * locking paths. The downside is that when multiple threads need to print
+ * a backtrace they garble each other. To prevent this we use a seperate
+ * lock to serialise printing of the dumps.
+ */
+static struct lock bt_lock = LOCK_UNLOCKED;
+
+void backtrace(void)
+{
+ struct bt_metadata metadata;
+
+ lock(&bt_lock);
+
+ backtrace_create(bt_buf, STACK_BUF_ENTRIES, &metadata);
+ backtrace_print(bt_buf, &metadata, NULL, NULL, true);
+
+ unlock(&bt_lock);
+}
+
+void backtrace_r1(uint64_t r1)
+{
+ struct bt_metadata metadata;
+
+ lock(&bt_lock);
+
+ __backtrace_create(bt_buf, STACK_BUF_ENTRIES, &metadata, (struct stack_frame *)r1);
+ backtrace_print(bt_buf, &metadata, NULL, NULL, true);
+
+ unlock(&bt_lock);
+}
+
+void __nomcount __stack_chk_fail(void);
+void __nomcount __stack_chk_fail(void)
+{
+ static bool failed_once;
+
+ if (failed_once)
+ return;
+ failed_once = true;
+ prlog(PR_EMERG, "Stack corruption detected !\n");
+ abort();
+}
+
+#ifdef STACK_CHECK_ENABLED
+
+static int64_t lowest_stack_mark = LONG_MAX;
+static struct lock stack_check_lock = LOCK_UNLOCKED;
+
+void __nomcount __mcount_stack_check(uint64_t sp, uint64_t lr);
+void __nomcount __mcount_stack_check(uint64_t sp, uint64_t lr)
+{
+ struct cpu_thread *c = this_cpu();
+ uint64_t base = (uint64_t)c;
+ uint64_t bot = base + sizeof(struct cpu_thread);
+ int64_t mark = sp - bot;
+ uint64_t top = base + NORMAL_STACK_SIZE;
+
+ /*
+ * Don't check the emergency stack just yet.
+ */
+ if (c->in_opal_call > 1)
+ return;
+
+ /*
+ * Don't re-enter on this CPU or don't enter at all if somebody
+ * has spotted an overflow
+ */
+ if (c->in_mcount)
+ return;
+ c->in_mcount = true;
+
+ /* Capture lowest stack for this thread */
+ if (mark < c->stack_bot_mark) {
+ lock(&stack_check_lock);
+ c->stack_bot_mark = mark;
+ c->stack_bot_pc = lr;
+ c->stack_bot_tok = c->current_token;
+ backtrace_create(c->stack_bot_bt, CPU_BACKTRACE_SIZE,
+ &c->stack_bot_bt_metadata);
+ unlock(&stack_check_lock);
+
+ if (mark < STACK_WARNING_GAP) {
+ prlog(PR_EMERG, "CPU %04x Stack usage danger !"
+ " pc=%08llx sp=%08llx (gap=%lld) token=%lld\n",
+ c->pir, lr, sp, mark, c->current_token);
+ }
+ }
+
+ /* Stack is within bounds? */
+ if (sp >= (bot + STACK_SAFETY_GAP) && sp < top) {
+ c->in_mcount = false;
+ return;
+ }
+
+ prlog(PR_EMERG, "CPU %04x Stack overflow detected !"
+ " pc=%08llx sp=%08llx (gap=%lld) token=%lld\n",
+ c->pir, lr, sp, mark, c->current_token);
+ abort();
+}
+
+void check_stacks(void)
+{
+ struct cpu_thread *c, *lowest = NULL;
+
+ /* We should never call that from mcount */
+ assert(!this_cpu()->in_mcount);
+
+ /* Mark ourselves "in_mcount" to avoid deadlock on stack
+ * check lock
+ */
+ this_cpu()->in_mcount = true;
+
+ for_each_cpu(c) {
+ if (!c->stack_bot_mark ||
+ c->stack_bot_mark >= lowest_stack_mark)
+ continue;
+ lock(&stack_check_lock);
+ if (c->stack_bot_mark < lowest_stack_mark) {
+ lowest = c;
+ lowest_stack_mark = c->stack_bot_mark;
+ }
+ unlock(&stack_check_lock);
+ }
+ if (lowest) {
+ lock(&bt_lock);
+ prlog(PR_NOTICE, "CPU %04x lowest stack mark %lld bytes left"
+ " pc=%08llx token=%lld\n",
+ lowest->pir, lowest->stack_bot_mark, lowest->stack_bot_pc,
+ lowest->stack_bot_tok);
+ backtrace_print(lowest->stack_bot_bt,
+ &lowest->stack_bot_bt_metadata,
+ NULL, NULL, true);
+ unlock(&bt_lock);
+ }
+
+ this_cpu()->in_mcount = false;
+}
+#endif /* STACK_CHECK_ENABLED */
diff --git a/roms/skiboot/core/test/Makefile.check b/roms/skiboot/core/test/Makefile.check
new file mode 100644
index 000000000..7c347bea2
--- /dev/null
+++ b/roms/skiboot/core/test/Makefile.check
@@ -0,0 +1,101 @@
+# -*-Makefile-*-
+CORE_TEST := \
+ core/test/run-bitmap \
+ core/test/run-cpufeatures \
+ core/test/run-device \
+ core/test/run-flash-subpartition \
+ core/test/run-flash-firmware-versions \
+ core/test/run-mem_region \
+ core/test/run-malloc \
+ core/test/run-malloc-speed \
+ core/test/run-mem_region_init \
+ core/test/run-mem_region_next \
+ core/test/run-mem_region_release_unused \
+ core/test/run-mem_region_release_unused_noalloc \
+ core/test/run-mem_region_reservations \
+ core/test/run-mem_range_is_reserved \
+ core/test/run-nvram-format \
+ core/test/run-trace core/test/run-msg \
+ core/test/run-pel \
+ core/test/run-pool \
+ core/test/run-time-utils \
+ core/test/run-timebase \
+ core/test/run-timer \
+ core/test/run-buddy \
+ core/test/run-pci-quirk
+
+HOSTCFLAGS+=-I . -I include -Wno-error=attributes
+
+CORE_TEST_NOSTUB := core/test/run-console-log
+CORE_TEST_NOSTUB += core/test/run-console-log-buf-overrun
+CORE_TEST_NOSTUB += core/test/run-console-log-pr_fmt
+CORE_TEST_NOSTUB += core/test/run-api-test
+
+LCOV_EXCLUDE += $(CORE_TEST:%=%.c) core/test/stubs.c
+LCOV_EXCLUDE += $(CORE_TEST_NOSTUB:%=%.c) /usr/include/*
+
+.PHONY : core-check
+core-check: $(CORE_TEST:%=%-check) $(CORE_TEST_NOSTUB:%=%-check)
+
+.PHONY : core-coverage
+core-coverage: $(CORE_TEST:%=%-gcov-run)
+core-coverage: $(CORE_TEST_NOSTUB:%=%-gcov-run)
+
+check: core-check
+coverage: core-coverage
+
+$(CORE_TEST:%=%-gcov-run) : %-run: %
+ $(call QTEST, TEST-COVERAGE ,$< , $<)
+
+$(CORE_TEST_NOSTUB:%=%-gcov-run) : %-run: %
+ $(call QTEST, TEST-COVERAGE ,$< , $<)
+
+$(CORE_TEST:%=%-check) : %-check: %
+ $(call QTEST, RUN-TEST ,$(VALGRIND) $<, $<)
+
+$(CORE_TEST_NOSTUB:%=%-check) : %-check: %
+ $(call QTEST, RUN-TEST ,$(VALGRIND) $<, $<)
+
+core/test/stubs.o: core/test/stubs.c
+ $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) -g -c -o $@ $<, $<)
+
+$(CORE_TEST) : core/test/stubs.o
+
+$(CORE_TEST) : % : %.c
+ $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) -O0 -g -I include -I . -I libfdt -o $@ $< core/test/stubs.o, $<)
+
+$(CORE_TEST_NOSTUB) : % : %.c
+ $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) -O0 -g -I include -I . -I libfdt -o $@ $< , $<)
+
+$(CORE_TEST:%=%-gcov): %-gcov : %.c %
+ $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) $(HOSTGCOVCFLAGS) -I include -I . -I libfdt -lgcov -o $@ $< core/test/stubs.o, $<)
+
+$(CORE_TEST_NOSTUB:%=%-gcov) : %-gcov : %.c %
+ $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) $(HOSTGCOVCFLAGS) -I include -I . -I libfdt -lgcov -o $@ $< , $<)
+
+core/test/run-flash-firmware-versions-gcov-run: core/test/run-flash-firmware-versions-inputs-gcov-run
+
+core/test/run-flash-firmware-versions-inputs-gcov-run: core/test/run-flash-firmware-versions-gcov
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-0 > /dev/null, $< version-0)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-1 > /dev/null, $< version-1)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-2 > /dev/null, $< version-2)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-10 > /dev/null, $< version-10)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-11 > /dev/null, $< version-11)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-16 > /dev/null, $< version-16)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-26 > /dev/null, $< version-26)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-27 > /dev/null, $< version-27)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-29 > /dev/null, $< version-29)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-trunc > /dev/null, $< version-trunc)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-long > /dev/null, $< version-long)
+ $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-nodash > /dev/null, $< version-nodash)
+
+
+-include $(wildcard core/test/*.d)
+
+clean: core-test-clean
+
+core-test-clean:
+ $(RM) -f core/test/*.[od] $(CORE_TEST) $(CORE_TEST:%=%-gcov)
+ $(RM) -f $(CORE_TEST_NOSTUB) $(CORE_TEST_NOSTUB:%=%-gcov)
+ $(RM) -f *.gcda *.gcno skiboot.info
+ $(RM) -rf coverage-report
diff --git a/roms/skiboot/core/test/dummy-cpu.h b/roms/skiboot/core/test/dummy-cpu.h
new file mode 100644
index 000000000..64fb71bce
--- /dev/null
+++ b/roms/skiboot/core/test/dummy-cpu.h
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2018 IBM Corp.
+ *
+ * A dummy cpu.h for tests.
+ * We don't want to include the real skiboot cpu.h, it's PPC-specific
+ */
+
+#ifndef __CPU_H
+#define __CPU_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+static unsigned int cpu_max_pir = 1;
+struct cpu_thread {
+ unsigned int chip_id;
+};
+struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
+ const char *name,
+ void (*func)(void *data), void *data,
+ bool no_return);
+static inline struct cpu_job *cpu_queue_job(struct cpu_thread *cpu,
+ const char *name,
+ void (*func)(void *data),
+ void *data)
+{
+ return __cpu_queue_job(cpu, name, func, data, false);
+}
+void cpu_wait_job(struct cpu_job *job, bool free_it);
+void cpu_process_local_jobs(void);
+struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id,
+ const char *name,
+ void (*func)(void *data), void *data);
+#endif /* __CPU_H */
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-0 b/roms/skiboot/core/test/firmware-versions-input/version-0
new file mode 100644
index 000000000..2ab241af5
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-0
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-1 b/roms/skiboot/core/test/firmware-versions-input/version-1
new file mode 100644
index 000000000..746327a8b
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-1
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-10 b/roms/skiboot/core/test/firmware-versions-input/version-10
new file mode 100644
index 000000000..013af6089
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-10
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-11 b/roms/skiboot/core/test/firmware-versions-input/version-11
new file mode 100644
index 000000000..55e835321
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-11
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-16 b/roms/skiboot/core/test/firmware-versions-input/version-16
new file mode 100644
index 000000000..8906af4e9
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-16
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-2 b/roms/skiboot/core/test/firmware-versions-input/version-2
new file mode 100644
index 000000000..f012ffd23
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-2
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-26 b/roms/skiboot/core/test/firmware-versions-input/version-26
new file mode 100644
index 000000000..adfd5bbcf
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-26
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-27 b/roms/skiboot/core/test/firmware-versions-input/version-27
new file mode 100644
index 000000000..d7ade9863
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-27
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-29 b/roms/skiboot/core/test/firmware-versions-input/version-29
new file mode 100644
index 000000000..b1476a3a5
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-29
Binary files differ
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-long b/roms/skiboot/core/test/firmware-versions-input/version-long
new file mode 100644
index 000000000..f814fa6f4
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-long
@@ -0,0 +1,2 @@
+open-power-whatever-v2.0-10-g1cec21d-dirty
+ Well, I wonder what a short essay here will mean for parsing everything. I hope it is all okay, but we want to get greater than 80 chars.
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-nodash b/roms/skiboot/core/test/firmware-versions-input/version-nodash
new file mode 100644
index 000000000..139aa9350
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-nodash
@@ -0,0 +1,2 @@
+no_dashes_in_version
+ this_is_wrong
diff --git a/roms/skiboot/core/test/firmware-versions-input/version-trunc b/roms/skiboot/core/test/firmware-versions-input/version-trunc
new file mode 100644
index 000000000..c9c92a01f
--- /dev/null
+++ b/roms/skiboot/core/test/firmware-versions-input/version-trunc
@@ -0,0 +1,2 @@
+open-power-SUPERMICRO-P8DTU-V2.00.GA2-20161028
+ op
diff --git a/roms/skiboot/core/test/run-api-test.c b/roms/skiboot/core/test/run-api-test.c
new file mode 100644
index 000000000..35e8135d4
--- /dev/null
+++ b/roms/skiboot/core/test/run-api-test.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2014-2016 IBM Corp.
+ *
+ * For now it just validates that addresses passed are sane and test the
+ * wrapper that validates addresses
+ *
+ * Copyright 2016 IBM Corp.
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <compiler.h>
+#include <opal-internal.h>
+
+#define __TEST__
+unsigned long top_of_ram; /* Fake it here */
+int main(void)
+{
+ unsigned long addr = 0xd000000000000000;
+
+ top_of_ram = 16ULL * 1024 * 1024 * 1024; /* 16 GB */
+ assert(opal_addr_valid((void *)addr) == false);
+
+ addr = 0xc000000000000000;
+ assert(opal_addr_valid((void *)addr) == true);
+
+ addr = 0x0;
+ assert(opal_addr_valid((void *)addr) == true);
+
+ addr = ~0;
+ assert(opal_addr_valid((void *)addr) == false);
+
+ addr = top_of_ram + 1;
+ assert(opal_addr_valid((void *)addr) == false);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-bitmap.c b/roms/skiboot/core/test/run-bitmap.c
new file mode 100644
index 000000000..e474915b8
--- /dev/null
+++ b/roms/skiboot/core/test/run-bitmap.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2017 IBM Corp.
+ */
+
+#include "../bitmap.c"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+int main(void)
+{
+ bitmap_t *map = malloc(sizeof(bitmap_elem_t));
+ int i;
+ memset(map, 0, sizeof(bitmap_elem_t));
+
+ assert(BITMAP_ELEMS(16) == (BITMAP_ELEMS(8)));
+ assert(BITMAP_ELEMS(128) == (BITMAP_ELEMS(64)*2));
+
+ assert(BITMAP_BYTES(64) == 8);
+ assert(BITMAP_BYTES(128) == 16);
+
+ assert(BITMAP_BIT(1) == 0x1);
+ assert(BITMAP_BIT(2) == 0x2);
+ assert(BITMAP_BIT(3) == 0x3);
+ assert(BITMAP_BIT(8) == 0x8);
+
+ assert(BITMAP_MASK(0) == 0x1);
+ assert(BITMAP_MASK(1) == 0x2);
+ assert(BITMAP_MASK(8) == 0x100);
+ assert(BITMAP_MASK(9) == 0x200);
+
+ assert(BITMAP_ELEM(1) == 0);
+ assert(BITMAP_ELEM(128) == BITMAP_ELEMS(128));
+
+ bitmap_set_bit(*map, 0);
+ assert(*(unsigned long*)map == 0x1);
+ assert(bitmap_tst_bit(*map, 0) == true);
+ bitmap_clr_bit(*map, 0);
+ assert(*(unsigned long*)map == 0x00);
+
+ bitmap_set_bit(*map, 8);
+ assert(*(unsigned long*)map == 0x100);
+ assert(bitmap_tst_bit(*map, 0) == false);
+ assert(bitmap_tst_bit(*map, 1) == false);
+ assert(bitmap_tst_bit(*map, 2) == false);
+ assert(bitmap_tst_bit(*map, 3) == false);
+ assert(bitmap_tst_bit(*map, 4) == false);
+ assert(bitmap_tst_bit(*map, 5) == false);
+ assert(bitmap_tst_bit(*map, 6) == false);
+ assert(bitmap_tst_bit(*map, 7) == false);
+ assert(bitmap_tst_bit(*map, 8) == true);
+ assert(bitmap_tst_bit(*map, 9) == false);
+ assert(bitmap_tst_bit(*map, 10) == false);
+ assert(bitmap_tst_bit(*map, 11) == false);
+ assert(bitmap_tst_bit(*map, 12) == false);
+ assert(bitmap_tst_bit(*map, 13) == false);
+ assert(bitmap_tst_bit(*map, 14) == false);
+ assert(bitmap_tst_bit(*map, 15) == false);
+ assert(bitmap_find_one_bit(*map, 0, 16) == 8);
+ bitmap_clr_bit(*map, 8);
+ assert(bitmap_find_one_bit(*map, 0, 16) == -1);
+ assert(*(unsigned long*)map == 0x00);
+ assert(bitmap_tst_bit(*map, 8) == false);
+
+ bitmap_for_each_zero(*map, 7, i) {
+ bitmap_set_bit(*map, i);
+ }
+
+ for (i = 0; i < 7; i++)
+ assert(bitmap_tst_bit(*map, i) == true);
+
+ assert(bitmap_tst_bit(*map, 8) == false);
+
+
+ free(map);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-buddy.c b/roms/skiboot/core/test/run-buddy.c
new file mode 100644
index 000000000..8ae26cb6c
--- /dev/null
+++ b/roms/skiboot/core/test/run-buddy.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2016-2017 IBM Corp.
+ */
+
+#include <buddy.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+static void *zalloc(size_t size)
+{
+ return calloc(size, 1);
+}
+
+#include "../buddy.c"
+#include "../bitmap.c"
+
+#define BUDDY_ORDER 8
+
+int main(void)
+{
+ struct buddy *b;
+ int i, a[10];
+
+ b = buddy_create(BUDDY_ORDER);
+ assert(b);
+
+ buddy_reserve(b, 127, 0);
+ buddy_reserve(b, 0, 4);
+ assert(buddy_reserve(b, 0, 4) == false);
+
+ a[0] = buddy_alloc(b, 0);
+ assert(a[0] >= 0);
+ a[1] = buddy_alloc(b, 0);
+ assert(a[1] >= 0);
+ a[2] = buddy_alloc(b, 3);
+ assert(a[2] >= 0);
+ a[3] = buddy_alloc(b, 4);
+ assert(a[3] >= 0);
+ a[4] = buddy_alloc(b, 5);
+ assert(a[4] >= 0);
+ a[5] = buddy_alloc(b, 4);
+ assert(a[5] >= 0);
+ a[6] = buddy_alloc(b, 3);
+ assert(a[6] >= 0);
+ a[7] = buddy_alloc(b, 2);
+ assert(a[7] >= 0);
+ a[8] = buddy_alloc(b, 1);
+ assert(a[8] >= 0);
+ a[9] = buddy_alloc(b, 8);
+ assert(a[9] < 0);
+
+ buddy_free(b, a[0], 0);
+ buddy_free(b, a[8], 1);
+ buddy_free(b, a[1], 0);
+ buddy_free(b, a[7], 2);
+ buddy_free(b, a[2], 3);
+ buddy_free(b, a[6], 3);
+ buddy_free(b, a[3], 4);
+ buddy_free(b, a[5], 4);
+ buddy_free(b, a[4], 5);
+
+ buddy_free(b, 127, 0);
+ buddy_free(b, 0, 4);
+
+ for (i = 2; i < buddy_map_size(b); i++)
+ assert(bitmap_tst_bit(b->map, i));
+ assert(!bitmap_tst_bit(b->map, 1));
+
+ buddy_destroy(b);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-console-log-buf-overrun.c b/roms/skiboot/core/test/run-console-log-buf-overrun.c
new file mode 100644
index 000000000..83774c4c9
--- /dev/null
+++ b/roms/skiboot/core/test/run-console-log-buf-overrun.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2015-2016 IBM Corp.
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <compiler.h>
+
+unsigned long tb_hz = 512000000;
+
+#define __TEST__
+
+#define CHECK_BUF_ASSERT(buf, str) \
+ assert(memcmp(buf, str, strlen(str)) == 0)
+
+#define CHECK_ASSERT(str) \
+ CHECK_BUF_ASSERT(console_buffer, str)
+
+int huge_tb;
+
+static inline unsigned long mftb(void)
+{
+ /*
+ * return huge value for TB that overrun tmp[16] buffer defined
+ * in print_itoa().
+ */
+ if (huge_tb)
+ return 1223372515963611388;
+ else
+ return 42;
+}
+
+#include "../../libc/include/stdio.h"
+#include "../console-log.c"
+#include "../../libc/stdio/snprintf.c"
+#include "../../libc/stdio/vsnprintf.c"
+
+char console_buffer[4096];
+struct debug_descriptor debug_descriptor;
+
+bool flushed_to_drivers;
+
+ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count)
+{
+ flushed_to_drivers = flush_to_drivers;
+ memcpy(console_buffer, buf, count);
+ return count;
+}
+
+int main(void)
+{
+ unsigned long value = 0xffffffffffffffff;
+ char *ptr = console_buffer;
+
+ debug_descriptor.console_log_levels = 0x75;
+
+ /* Test for huge TB value. */
+ huge_tb = 1;
+
+ prlog(PR_EMERG, "Hello World");
+ CHECK_ASSERT("[2389399445.123611388,0] Hello World");
+
+ memset(console_buffer, 0, sizeof(console_buffer));
+
+ /* Test for normal TB with huge unsigned long value */
+ huge_tb = 0;
+
+ prlog(PR_EMERG, "Hello World %lu", value);
+ CHECK_ASSERT("[ 0.000000042,0] Hello World 18446744073709551615");
+
+ printf("Hello World %lu", value);
+ CHECK_ASSERT("[ 0.000000042,5] Hello World 18446744073709551615");
+
+ /*
+ * Test string of size > 320
+ *
+ * core/console-log.c:vprlog() uses buffer[320] to print message
+ * Try printing more than 320 bytes to test stack corruption.
+ * You would see Segmentation fault on stack corruption.
+ */
+ prlog(PR_EMERG, "%330s", "Hello World");
+
+ memset(console_buffer, 0, sizeof(console_buffer));
+
+ /*
+ * Test boundary condition.
+ *
+ * Print string of exact size 320. We should see string truncated
+ * with console_buffer[319] == '\0'.
+ */
+ memset(console_buffer, 0, sizeof(console_buffer));
+
+ prlog(PR_EMERG, "%300s", "Hello World");
+ assert(console_buffer[319] == 0);
+
+ /* compare truncated string */
+ ptr += 320 - strlen("Hello World");
+ CHECK_BUF_ASSERT(ptr, "Hello Worl");
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-console-log-pr_fmt.c b/roms/skiboot/core/test/run-console-log-pr_fmt.c
new file mode 100644
index 000000000..457de03fb
--- /dev/null
+++ b/roms/skiboot/core/test/run-console-log-pr_fmt.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2015-2016 IBM Corp.
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+
+#define __TEST__
+
+unsigned long tb_hz = 512000000;
+
+static inline unsigned long mftb(void)
+{
+ return 42;
+}
+
+#define pr_fmt(f) "PREFIX: " f
+#include "../../libc/include/stdio.h"
+#include "../console-log.c"
+#include "../../libc/stdio/snprintf.c"
+#include "../../libc/stdio/vsnprintf.c"
+
+struct debug_descriptor debug_descriptor;
+
+bool flushed_to_drivers;
+char console_buffer[4096];
+
+ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count)
+{
+ flushed_to_drivers = flush_to_drivers;
+ memcpy(console_buffer, buf, count);
+ return count;
+}
+
+int main(void)
+{
+ debug_descriptor.console_log_levels = 0x75;
+
+ prlog(PR_EMERG, "Hello World");
+ assert(strcmp(console_buffer, "[ 0.000000042,0] PREFIX: Hello World") == 0);
+ assert(flushed_to_drivers==true);
+
+ memset(console_buffer, 0, sizeof(console_buffer));
+
+ // Below log level
+ prlog(PR_TRACE, "Hello World");
+ assert(console_buffer[0] == 0);
+
+ // Should not be flushed to console
+ prlog(PR_DEBUG, "Hello World");
+ assert(strcmp(console_buffer, "[ 0.000000042,7] PREFIX: Hello World") == 0);
+ assert(flushed_to_drivers==false);
+
+ printf("Hello World");
+ assert(strcmp(console_buffer, "[ 0.000000042,5] PREFIX: Hello World") == 0);
+ assert(flushed_to_drivers==true);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-console-log.c b/roms/skiboot/core/test/run-console-log.c
new file mode 100644
index 000000000..bec281b6e
--- /dev/null
+++ b/roms/skiboot/core/test/run-console-log.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2014-2016 IBM Corp.
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+
+#define __TEST__
+
+#define _printf printf
+
+unsigned long tb_hz = 512000000;
+
+static inline unsigned long mftb(void)
+{
+ return 42;
+}
+
+int _printf(const char* fmt, ...);
+
+#include "../console-log.c"
+
+struct debug_descriptor debug_descriptor;
+
+bool flushed_to_drivers;
+char console_buffer[4096];
+
+ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count)
+{
+ flushed_to_drivers = flush_to_drivers;
+ memcpy(console_buffer, buf, count);
+ return count;
+}
+
+int main(void)
+{
+ debug_descriptor.console_log_levels = 0x75;
+
+ prlog(PR_EMERG, "Hello World");
+ assert(strcmp(console_buffer, "[ 0.000000042,0] Hello World") == 0);
+ assert(flushed_to_drivers==true);
+
+ memset(console_buffer, 0, sizeof(console_buffer));
+
+ // Below log level
+ prlog(PR_TRACE, "Hello World");
+ assert(console_buffer[0] == 0);
+
+ // Should not be flushed to console
+ prlog(PR_DEBUG, "Hello World");
+ assert(strcmp(console_buffer, "[ 0.000000042,7] Hello World") == 0);
+ assert(flushed_to_drivers==false);
+
+ printf("Hello World");
+ assert(strcmp(console_buffer, "[ 0.000000042,5] Hello World") == 0);
+ assert(flushed_to_drivers==true);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-cpufeatures.c b/roms/skiboot/core/test/run-cpufeatures.c
new file mode 100644
index 000000000..bb89b2573
--- /dev/null
+++ b/roms/skiboot/core/test/run-cpufeatures.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+/* Override this for testing. */
+#define is_rodata(p) fake_is_rodata(p)
+
+char __rodata_start[16];
+#define __rodata_end (__rodata_start + sizeof(__rodata_start))
+
+static inline bool fake_is_rodata(const void *p)
+{
+ return ((char *)p >= __rodata_start && (char *)p < __rodata_end);
+}
+
+#define zalloc(bytes) calloc((bytes), 1)
+
+#include "../device.c"
+#include <assert.h>
+#include "../../test/dt_common.c"
+
+#define __TEST__
+
+static inline unsigned long mfspr(unsigned int spr);
+
+#include <ccan/str/str.c>
+
+#include "../cpufeatures.c"
+
+static unsigned long fake_pvr = PVR_TYPE_P8;
+
+static inline unsigned long mfspr(unsigned int spr)
+{
+ assert(spr == SPR_PVR);
+ return fake_pvr;
+}
+
+int main(void)
+{
+ struct dt_node *dt_root;
+
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, true);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P8E << 16) | 0x100; // P8E DD1.0
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P8E << 16) | 0x200; // P8E DD2.0
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P8 << 16) | 0x100; // P8 DD1.0
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P8 << 16) | 0x200; // P8 DD2.0
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P8NVL << 16) | 0x100; // P8NVL DD1.0
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P9 << 16) | 0x200; // P9 DD2.0
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix"));
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P9 << 16) | 0x201; // P9 DD2.1
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix"));
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P9 << 16) | 0x202; // P9 DD2.2
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix"));
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") != 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") != 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P9 << 16) | 0x203; // P9 DD2.3
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix"));
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") != 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ fake_pvr = (PVR_TYPE_P9P << 16) | 0x100; // P9P DD1.0
+ dt_root = dt_new_root("");
+ dt_add_cpufeatures(dt_root);
+ dump_dt(dt_root, 0, false);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix"));
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") != 0);
+ assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0);
+ dt_free(dt_root);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/roms/skiboot/core/test/run-device.c b/roms/skiboot/core/test/run-device.c
new file mode 100644
index 000000000..4a12382bb
--- /dev/null
+++ b/roms/skiboot/core/test/run-device.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2012-2018 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <stdlib.h>
+
+/* Override this for testing. */
+#define is_rodata(p) fake_is_rodata(p)
+
+char __rodata_start[16];
+#define __rodata_end (__rodata_start + sizeof(__rodata_start))
+
+static inline bool fake_is_rodata(const void *p)
+{
+ return ((char *)p >= __rodata_start && (char *)p < __rodata_end);
+}
+
+#define zalloc(bytes) calloc((bytes), 1)
+
+#include "../device.c"
+#include <assert.h>
+#include "../../test/dt_common.c"
+const char *prop_to_fix[] = {"something", NULL};
+const char **props_to_fix(struct dt_node *node);
+
+static void check_path(const struct dt_node *node, const char * expected_path)
+{
+ char * path;
+ path = dt_get_path(node);
+ if (strcmp(path, expected_path) != 0) {
+ printf("check_path: expected %s, got %s\n", expected_path, path);
+ }
+ assert(strcmp(path, expected_path) == 0);
+ free(path);
+}
+
+/* constructs a random nodes only device tree */
+static void build_tree(int max_depth, int min_depth, struct dt_node *parent)
+{
+ char name[64];
+ int i;
+
+ for (i = 0; i < max_depth; i++) {
+ struct dt_node *new;
+
+ snprintf(name, sizeof name, "prefix@%.8x", rand());
+
+ new = dt_new(parent, name);
+
+ if(max_depth > min_depth)
+ build_tree(max_depth - 1, min_depth, new);
+ }
+}
+
+static bool is_sorted(const struct dt_node *root)
+{
+ struct dt_node *end = list_tail(&root->children, struct dt_node, list);
+ struct dt_node *node;
+
+ dt_for_each_child(root, node) {
+ struct dt_node *next =
+ list_entry(node->list.next, struct dt_node, list);
+
+ /* current node must be "less than" the next node */
+ if (node != end && dt_cmp_subnodes(node, next) != -1) {
+ printf("nodes '%s' and '%s' out of order\n",
+ node->name, next->name);
+
+ return false;
+ }
+
+ if (!is_sorted(node))
+ return false;
+ }
+
+ return true;
+}
+
+/*handler for phandle fixup test */
+const char **props_to_fix(struct dt_node *node)
+{
+ const struct dt_property *prop;
+
+ prop = dt_find_property(node, "something");
+ if (prop)
+ return prop_to_fix;
+
+ return NULL;
+}
+
+int main(void)
+{
+ struct dt_node *root, *other_root, *c1, *c2, *c2_c, *gc1, *gc2, *gc3, *ggc1, *ggc2;
+ struct dt_node *addrs, *addr1, *addr2;
+ struct dt_node *i, *subtree, *ev1, *ut1, *ut2;
+ const struct dt_property *p;
+ struct dt_property *p2;
+ unsigned int n;
+ char *s;
+ size_t sz;
+ u32 phandle, ev1_ph, new_prop_ph;
+
+ root = dt_new_root("");
+ assert(!list_top(&root->properties, struct dt_property, list));
+ check_path(root, "/");
+
+ c1 = dt_new_check(root, "c1");
+ assert(!list_top(&c1->properties, struct dt_property, list));
+ check_path(c1, "/c1");
+ assert(dt_find_by_name(root, "c1") == c1);
+ assert(dt_find_by_path(root, "/c1") == c1);
+ assert(dt_new(root, "c1") == NULL);
+
+ c2 = dt_new(root, "c2");
+ c2_c = dt_new_check(root, "c2");
+ assert(c2 == c2_c);
+ assert(!list_top(&c2->properties, struct dt_property, list));
+ check_path(c2, "/c2");
+ assert(dt_find_by_name(root, "c2") == c2);
+ assert(dt_find_by_path(root, "/c2") == c2);
+
+ gc1 = dt_new(c1, "gc1");
+ assert(!list_top(&gc1->properties, struct dt_property, list));
+ check_path(gc1, "/c1/gc1");
+ assert(dt_find_by_name(root, "gc1") == gc1);
+ assert(dt_find_by_path(root, "/c1/gc1") == gc1);
+
+ gc2 = dt_new(c1, "gc2");
+ assert(!list_top(&gc2->properties, struct dt_property, list));
+ check_path(gc2, "/c1/gc2");
+ assert(dt_find_by_name(root, "gc2") == gc2);
+ assert(dt_find_by_path(root, "/c1/gc2") == gc2);
+
+ gc3 = dt_new(c1, "gc3");
+ assert(!list_top(&gc3->properties, struct dt_property, list));
+ check_path(gc3, "/c1/gc3");
+ assert(dt_find_by_name(root, "gc3") == gc3);
+ assert(dt_find_by_path(root, "/c1/gc3") == gc3);
+
+ ggc1 = dt_new(gc1, "ggc1");
+ assert(!list_top(&ggc1->properties, struct dt_property, list));
+ check_path(ggc1, "/c1/gc1/ggc1");
+ assert(dt_find_by_name(root, "ggc1") == ggc1);
+ assert(dt_find_by_path(root, "/c1/gc1/ggc1") == ggc1);
+
+ addrs = dt_new(root, "addrs");
+ assert(!list_top(&addrs->properties, struct dt_property, list));
+ check_path(addrs, "/addrs");
+ assert(dt_find_by_name(root, "addrs") == addrs);
+ assert(dt_find_by_path(root, "/addrs") == addrs);
+
+ addr1 = dt_new_addr(addrs, "addr", 0x1337);
+ assert(!list_top(&addr1->properties, struct dt_property, list));
+ check_path(addr1, "/addrs/addr@1337");
+ assert(dt_find_by_name(root, "addr@1337") == addr1);
+ assert(dt_find_by_name_addr(root, "addr", 0x1337) == addr1);
+ assert(dt_find_by_path(root, "/addrs/addr@1337") == addr1);
+ assert(dt_new_addr(addrs, "addr", 0x1337) == NULL);
+
+ addr2 = dt_new_2addr(addrs, "2addr", 0xdead, 0xbeef);
+ assert(!list_top(&addr2->properties, struct dt_property, list));
+ check_path(addr2, "/addrs/2addr@dead,beef");
+ assert(dt_find_by_name(root, "2addr@dead,beef") == addr2);
+ assert(dt_find_by_path(root, "/addrs/2addr@dead,beef") == addr2);
+ assert(dt_new_2addr(addrs, "2addr", 0xdead, 0xbeef) == NULL);
+
+ /* Test walking the tree, checking and setting values */
+ for (n = 0, i = dt_first(root); i; i = dt_next(root, i), n++) {
+ assert(!list_top(&i->properties, struct dt_property, list));
+ dt_add_property_cells(i, "visited", 1);
+ }
+ assert(n == 9);
+
+ for (n = 0, i = dt_first(root); i; i = dt_next(root, i), n++) {
+ p = list_top(&i->properties, struct dt_property, list);
+ assert(strcmp(p->name, "visited") == 0);
+ assert(p->len == sizeof(u32));
+ assert(fdt32_to_cpu(*(u32 *)p->prop) == 1);
+ }
+ assert(n == 9);
+
+ /* Test cells */
+ dt_add_property_cells(c1, "some-property", 1, 2, 3);
+ p = dt_find_property(c1, "some-property");
+ assert(p);
+ assert(strcmp(p->name, "some-property") == 0);
+ assert(p->len == sizeof(u32) * 3);
+ assert(fdt32_to_cpu(*(u32 *)p->prop) == 1);
+ assert(dt_prop_get_cell(c1, "some-property", 0) == 1);
+ assert(fdt32_to_cpu(*((u32 *)p->prop + 1)) == 2);
+ assert(dt_prop_get_cell(c1, "some-property", 1) == 2);
+ assert(fdt32_to_cpu(*((u32 *)p->prop + 2)) == 3);
+ assert(dt_prop_get_cell_def(c1, "some-property", 2, 42) == 3);
+
+ assert(dt_prop_get_cell_def(c1, "not-a-property", 2, 42) == 42);
+
+ /* Test u64s */
+ dt_add_property_u64s(c2, "some-property", (2LL << 33), (3LL << 33), (4LL << 33));
+ p = dt_find_property(c2, "some-property");
+ assert(p);
+ assert(p->len == sizeof(u64) * 3);
+ assert(fdt64_to_cpu(*(u64 *)p->prop) == (2LL << 33));
+ assert(fdt64_to_cpu(*((u64 *)p->prop + 1)) == (3LL << 33));
+ assert(fdt64_to_cpu(*((u64 *)p->prop + 2)) == (4LL << 33));
+
+ /* Test u32/u64 get defaults */
+ assert(dt_prop_get_u32_def(c1, "u32", 42) == 42);
+ dt_add_property_cells(c1, "u32", 1337);
+ assert(dt_prop_get_u32_def(c1, "u32", 42) == 1337);
+ assert(dt_prop_get_u32(c1, "u32") == 1337);
+
+ assert(dt_prop_get_u64_def(c1, "u64", (42LL << 42)) == (42LL << 42));
+ dt_add_property_u64s(c1, "u64", (1337LL << 42));
+ assert(dt_prop_get_u64_def(c1, "u64", (42LL << 42)) == (1337LL << 42));
+ assert(dt_prop_get_u64(c1, "u64") == (1337LL << 42));
+
+ /* Test freeing a single node */
+ assert(!list_empty(&gc1->children));
+ dt_free(ggc1);
+ assert(list_empty(&gc1->children));
+
+ /* Test rodata logic. */
+ assert(!is_rodata("hello"));
+ assert(is_rodata(__rodata_start));
+ strcpy(__rodata_start, "name");
+ ggc1 = dt_new(root, __rodata_start);
+ assert(ggc1->name == __rodata_start);
+
+ /* Test string node. */
+ dt_add_property_string(ggc1, "somestring", "someval");
+ assert(dt_has_node_property(ggc1, "somestring", "someval"));
+ assert(!dt_has_node_property(ggc1, "somestrin", "someval"));
+ assert(!dt_has_node_property(ggc1, "somestring", "someva"));
+ assert(!dt_has_node_property(ggc1, "somestring", "somevale"));
+
+ /* Test nstr, which allows for non-null-terminated inputs */
+ dt_add_property_nstr(ggc1, "nstring", "somevalue_long", 7);
+ assert(dt_has_node_property(ggc1, "nstring", "someval"));
+ assert(!dt_has_node_property(ggc1, "nstring", "someva"));
+ assert(!dt_has_node_property(ggc1, "nstring", "somevalue_long"));
+
+ /* Test multiple strings */
+ dt_add_property_strings(ggc1, "somestrings",
+ "These", "are", "strings!");
+ p = dt_find_property(ggc1, "somestrings");
+ assert(p);
+ assert(p->len == sizeof(char) * (6 + 4 + 9));
+ s = (char *)p->prop;
+ assert(strcmp(s, "These") == 0);
+ assert(strlen(s) == 5);
+ s += 6;
+ assert(strcmp(s, "are") == 0);
+ assert(strlen(s) == 3);
+ s += 4;
+ assert(strcmp(s, "strings!") == 0);
+ assert(strlen(s) == 8);
+ s += 9;
+ assert(s == (char *)p->prop + p->len);
+ assert(dt_prop_find_string(p, "These"));
+ /* dt_prop_find_string is case insensitve */
+ assert(dt_prop_find_string(p, "ARE"));
+ assert(!dt_prop_find_string(p, "integers!"));
+ /* And always returns false for NULL properties */
+ assert(!dt_prop_find_string(NULL, "anything!"));
+
+ /* Test more get/get_def varieties */
+ assert(dt_prop_get_def(c1, "does-not-exist", NULL) == NULL);
+ sz = 0xbad;
+ assert(dt_prop_get_def_size(c1, "does-not-exist", NULL, &sz) == NULL);
+ assert(sz == 0);
+ dt_add_property_string(c1, "another-property", "xyzzy");
+ assert(dt_prop_get_def(c1, "another-property", NULL) != NULL);
+ assert(strcmp(dt_prop_get(c1, "another-property"), "xyzzy") == 0);
+ n = 0xbad;
+ assert(dt_prop_get_def_size(c1, "another-property", NULL, &sz) != NULL);
+ assert(sz == strlen("xyzzy") + 1);
+
+ /* Test resizing property. */
+ p = p2 = __dt_find_property(c1, "some-property");
+ assert(p);
+ n = p2->len;
+ while (p2 == p) {
+ n *= 2;
+ dt_resize_property(&p2, n);
+ }
+
+ assert(dt_find_property(c1, "some-property") == p2);
+ list_check(&c1->properties, "properties after resizing");
+
+ dt_del_property(c1, p2);
+ list_check(&c1->properties, "properties after delete");
+
+ /* No leaks for valgrind! */
+ dt_free(root);
+
+ /* Test compatible and chip id. */
+ root = dt_new_root("");
+
+ c1 = dt_new(root, "chip1");
+ dt_add_property_cells(c1, "ibm,chip-id", 0xcafe);
+ assert(dt_get_chip_id(c1) == 0xcafe);
+ dt_add_property_strings(c1, "compatible",
+ "specific-fake-chip",
+ "generic-fake-chip");
+ assert(dt_node_is_compatible(c1, "specific-fake-chip"));
+ assert(dt_node_is_compatible(c1, "generic-fake-chip"));
+
+ c2 = dt_new(root, "chip2");
+ dt_add_property_cells(c2, "ibm,chip-id", 0xbeef);
+ assert(dt_get_chip_id(c2) == 0xbeef);
+ dt_add_property_strings(c2, "compatible",
+ "specific-fake-bus",
+ "generic-fake-bus");
+
+ gc1 = dt_new(c1, "coprocessor1");
+ dt_add_property_strings(gc1, "compatible",
+ "specific-fake-coprocessor");
+ gc2 = dt_new(gc1, "coprocessor2");
+ dt_add_property_strings(gc2, "compatible",
+ "specific-fake-coprocessor");
+ gc3 = dt_new(c1, "coprocessor3");
+ dt_add_property_strings(gc3, "compatible",
+ "specific-fake-coprocessor");
+
+
+ assert(dt_find_compatible_node(root, NULL, "generic-fake-bus") == c2);
+ assert(dt_find_compatible_node(root, c2, "generic-fake-bus") == NULL);
+
+ /* we can find all compatible nodes */
+ assert(dt_find_compatible_node(c1, NULL, "specific-fake-coprocessor") == gc1);
+ assert(dt_find_compatible_node(c1, gc1, "specific-fake-coprocessor") == gc2);
+ assert(dt_find_compatible_node(c1, gc2, "specific-fake-coprocessor") == gc3);
+ assert(dt_find_compatible_node(c1, gc3, "specific-fake-coprocessor") == NULL);
+ assert(dt_find_compatible_node(root, NULL, "specific-fake-coprocessor") == gc1);
+ assert(dt_find_compatible_node(root, gc1, "specific-fake-coprocessor") == gc2);
+ assert(dt_find_compatible_node(root, gc2, "specific-fake-coprocessor") == gc3);
+ assert(dt_find_compatible_node(root, gc3, "specific-fake-coprocessor") == NULL);
+
+ /* we can find the coprocessor once on the cpu */
+ assert(dt_find_compatible_node_on_chip(root,
+ NULL,
+ "specific-fake-coprocessor",
+ 0xcafe) == gc1);
+ assert(dt_find_compatible_node_on_chip(root,
+ gc1,
+ "specific-fake-coprocessor",
+ 0xcafe) == gc2);
+ assert(dt_find_compatible_node_on_chip(root,
+ gc2,
+ "specific-fake-coprocessor",
+ 0xcafe) == gc3);
+ assert(dt_find_compatible_node_on_chip(root,
+ gc3,
+ "specific-fake-coprocessor",
+ 0xcafe) == NULL);
+
+ /* we can't find the coprocessor on the bus */
+ assert(dt_find_compatible_node_on_chip(root,
+ NULL,
+ "specific-fake-coprocessor",
+ 0xbeef) == NULL);
+
+ /* Test phandles. We override the automatically generated one. */
+ phandle = 0xf00;
+ dt_add_property(gc3, "phandle", (const void *)&phandle, 4);
+ assert(last_phandle == 0xf00);
+ assert(dt_find_by_phandle(root, 0xf00) == gc3);
+ assert(dt_find_by_phandle(root, 0xf0f) == NULL);
+
+ dt_free(root);
+
+ /* basic sorting */
+ root = dt_new_root("rewt");
+ dt_new(root, "a@1");
+ dt_new(root, "a@2");
+ dt_new(root, "a@3");
+ dt_new(root, "a@4");
+ dt_new(root, "b@4");
+ dt_new(root, "c@4");
+
+ assert(is_sorted(root));
+
+ /* Now test dt_attach_root */
+ other_root = dt_new_root("other_root");
+ dt_new(other_root, "d@1");
+
+ assert(dt_attach_root(root, other_root));
+ other_root = dt_new_root("other_root");
+ assert(!dt_attach_root(root, other_root));
+ dt_free(root);
+
+ /* Test child node sorting */
+ root = dt_new_root("test root");
+ build_tree(5, 3, root);
+
+ if (!is_sorted(root)) {
+ dump_dt(root, 1, false);
+ }
+ assert(is_sorted(root));
+
+ dt_free(root);
+
+ /* check dt_translate_address */
+
+ /* NB: the root bus has two address cells */
+ root = dt_new_root("");
+
+ c1 = dt_new_addr(root, "some-32bit-bus", 0x80000000);
+ dt_add_property_cells(c1, "#address-cells", 1);
+ dt_add_property_cells(c1, "#size-cells", 1);
+ dt_add_property_cells(c1, "ranges", 0x0, 0x8, 0x0, 0x1000);
+
+ gc1 = dt_new_addr(c1, "test", 0x0500);
+ dt_add_property_cells(gc1, "reg", 0x0500, 0x10);
+
+ assert(dt_translate_address(gc1, 0, NULL) == 0x800000500ul);
+
+ /* try three level translation */
+
+ gc2 = dt_new_addr(c1, "another-32bit-bus", 0x40000000);
+ dt_add_property_cells(gc2, "#address-cells", 1);
+ dt_add_property_cells(gc2, "#size-cells", 1);
+ dt_add_property_cells(gc2, "ranges", 0x0, 0x600, 0x100,
+ 0x100, 0x800, 0x100);
+
+ ggc1 = dt_new_addr(gc2, "test", 0x50);
+ dt_add_property_cells(ggc1, "reg", 0x50, 0x10);
+ assert(dt_translate_address(ggc1, 0, NULL) == 0x800000650ul);
+
+ /* test multiple ranges work */
+ ggc2 = dt_new_addr(gc2, "test", 0x150);
+ dt_add_property_cells(ggc2, "reg", 0x150, 0x10);
+ assert(dt_translate_address(ggc2, 0, NULL) == 0x800000850ul);
+
+ /* try 64bit -> 64bit */
+
+ c2 = dt_new_addr(root, "some-64bit-bus", 0xe00000000);
+ dt_add_property_cells(c2, "#address-cells", 2);
+ dt_add_property_cells(c2, "#size-cells", 2);
+ dt_add_property_cells(c2, "ranges", 0x0, 0x0, 0xe, 0x0, 0x2, 0x0);
+
+ gc2 = dt_new_addr(c2, "test", 0x100000000ul);
+ dt_add_property_u64s(gc2, "reg", 0x100000000ul, 0x10ul);
+ assert(dt_translate_address(gc2, 0, NULL) == 0xf00000000ul);
+
+ dt_free(root);
+
+ /* phandle fixup test */
+ subtree = dt_new_root("subtree");
+ ev1 = dt_new(subtree, "ev@1");
+ ev1_ph = ev1->phandle;
+ dt_new(ev1,"a@1");
+ dt_new(ev1,"a@2");
+ dt_new(ev1,"a@3");
+ ut1 = dt_new(subtree, "ut@1");
+ dt_add_property(ut1, "something", (const void *)&ev1->phandle, 4);
+ ut2 = dt_new(subtree, "ut@2");
+ dt_add_property(ut2, "something", (const void *)&ev1->phandle, 4);
+
+ dt_adjust_subtree_phandle(subtree, props_to_fix);
+ assert(!(ev1->phandle == ev1_ph));
+ new_prop_ph = dt_prop_get_u32(ut1, "something");
+ assert(!(new_prop_ph == ev1_ph));
+ new_prop_ph = dt_prop_get_u32(ut2, "something");
+ assert(!(new_prop_ph == ev1_ph));
+ dt_free(subtree);
+ return 0;
+}
+
diff --git a/roms/skiboot/core/test/run-flash-firmware-versions.c b/roms/skiboot/core/test/run-flash-firmware-versions.c
new file mode 100644
index 000000000..9f96f5c19
--- /dev/null
+++ b/roms/skiboot/core/test/run-flash-firmware-versions.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2018-2019 IBM Corp.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <malloc.h>
+#include <stdint.h>
+
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+
+
+#include <interrupts.h>
+#include <bitutils.h>
+
+#include <compiler.h>
+
+/*
+ * Skiboot malloc stubs
+ *
+ * The actual prototypes for these are defined in mem_region-malloc.h,
+ * but that file also #defines malloc, and friends so we don't pull that in
+ * directly.
+ */
+
+#define DEFAULT_ALIGN __alignof__(long)
+
+void *__memalign(size_t blocksize, size_t bytes, const char *location __unused);
+void *__memalign(size_t blocksize, size_t bytes, const char *location __unused)
+{
+ return memalign(blocksize, bytes);
+}
+
+void *__malloc(size_t bytes, const char *location);
+void *__malloc(size_t bytes, const char *location)
+{
+ return __memalign(DEFAULT_ALIGN, bytes, location);
+}
+
+void __free(void *p, const char *location __unused);
+void __free(void *p, const char *location __unused)
+{
+ free(p);
+}
+
+void *__realloc(void *ptr, size_t size, const char *location __unused);
+void *__realloc(void *ptr, size_t size, const char *location __unused)
+{
+ return realloc(ptr, size);
+}
+
+void *__zalloc(size_t bytes, const char *location);
+void *__zalloc(size_t bytes, const char *location)
+{
+ void *p = __malloc(bytes, location);
+
+ if (p)
+ memset(p, 0, bytes);
+ return p;
+}
+
+#include <mem_region-malloc.h>
+
+#include <opal-api.h>
+
+#include "../../libfdt/fdt.c"
+#include "../../libfdt/fdt_ro.c"
+#include "../../libfdt/fdt_sw.c"
+#include "../../libfdt/fdt_strerror.c"
+
+#include "../../core/device.c"
+
+#include "../../libstb/container-utils.h"
+#include "../../libstb/container.h"
+#include "../../libstb/container.c"
+
+#include "../flash-firmware-versions.c"
+#include <assert.h>
+
+char __rodata_start[1], __rodata_end[1];
+
+const char version[]="Hello world!";
+
+enum proc_gen proc_gen = proc_gen_p8;
+
+static char *loaded_version_buf;
+static size_t loaded_version_buf_size;
+
+#define min(x,y) ((x) < (y) ? x : y)
+
+int start_preload_resource(enum resource_id id, uint32_t subid,
+ void *buf, size_t *len)
+{
+ (void)id;
+ (void)subid;
+ (void)buf;
+ if (loaded_version_buf) {
+ *len = min(*len, loaded_version_buf_size);
+ memcpy(buf, loaded_version_buf, *len);
+ } else {
+ *len = 0;
+ }
+
+ return 0;
+}
+
+int wait_for_resource_loaded(enum resource_id id, uint32_t idx)
+{
+ (void)id;
+ (void)idx;
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ struct stat ver_st;
+ int r;
+
+ dt_root = dt_new_root("");
+
+ if (argc > 1) {
+ fd = open(argv[1], O_RDONLY);
+
+ assert(fd > 0);
+ r = fstat(fd, &ver_st);
+ assert(r == 0);
+
+ loaded_version_buf = mmap(NULL, ver_st.st_size,
+ PROT_READ, MAP_PRIVATE, fd, 0);
+ assert(loaded_version_buf != (char*)-1);
+ loaded_version_buf_size = ver_st.st_size;
+ }
+
+ flash_fw_version_preload();
+
+ proc_gen = proc_gen_p9;
+ flash_fw_version_preload();
+ flash_dt_add_fw_version();
+
+ return 0;
+}
+
diff --git a/roms/skiboot/core/test/run-flash-subpartition.c b/roms/skiboot/core/test/run-flash-subpartition.c
new file mode 100644
index 000000000..5b6df87f2
--- /dev/null
+++ b/roms/skiboot/core/test/run-flash-subpartition.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2016 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <opal-api.h>
+#include <stdlib.h>
+
+#include "../flash-subpartition.c"
+#include <assert.h>
+
+/* This is a straight dump of the CAPP ucode partition header */
+char capp[4096] = {0x43, 0x41, 0x50, 0x50, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x01, 0x00, 0xea, 0x00, 0x00, 0x10, 0x00,
+ 0x00, 0x00, 0x8e, 0x50, 0x00, 0x02, 0x00, 0xea,
+ 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8e, 0x50,
+ 0x00, 0x02, 0x00, 0xef, 0x00, 0x00, 0x10, 0x00,
+ 0x00, 0x00, 0x8e, 0x50, 0x00, 0x02, 0x01, 0xef,
+ 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8e, 0x50,
+ 0x00, 0x01, 0x00, 0xd3, 0x00, 0x00, 0x10, 0x00,
+ 0x00, 0x00, 0x8e, 0x50, 0x00, 0x00, 0x00, 0x00 };
+
+int main(void)
+{
+ int rc;
+ uint32_t part_actual;
+ uint32_t offset;
+ uint32_t size;
+ uint32_t subids[] = { 0x100ea, 0x200ea, 0x200ef, 0x201ef, 0x100d3 };
+
+ for (int i = 0; i < sizeof(subids)/sizeof(uint32_t); i++) {
+ offset = 0;
+ rc = flash_subpart_info(capp, sizeof(capp), 0x24000,
+ &part_actual, subids[i],
+ &offset, &size);
+ printf("\nsubid %x\n", subids[i]);
+ printf("part_actual %u\n", part_actual);
+ printf("offset %u\n", offset);
+ printf("size %u\n", size);
+ assert (rc == 0);
+ assert (size == 36432);
+ assert (offset == 4096);
+ assert (part_actual == 40960);
+ }
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-malloc-speed.c b/roms/skiboot/core/test/run-malloc-speed.c
new file mode 100644
index 000000000..39a24f9cb
--- /dev/null
+++ b/roms/skiboot/core/test/run-malloc-speed.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+
+/* Use these before we undefine them below. */
+static inline void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static inline void real_free(void *p)
+{
+ return free(p);
+}
+
+#include <skiboot.h>
+
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../malloc.c"
+#include "../mem_region.c"
+#include "../device.c"
+
+#undef malloc
+#undef free
+#undef realloc
+
+#include <assert.h>
+#include <stdio.h>
+
+char __rodata_start[1], __rodata_end[1];
+struct dt_node *dt_root;
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+#define TEST_HEAP_ORDER 27
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+#define NUM_ALLOCS 4096
+
+int main(void)
+{
+ uint64_t i, len;
+ void **p = real_malloc(sizeof(void*)*NUM_ALLOCS);
+
+ assert(p);
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)real_malloc(skiboot_heap.len);
+
+ len = skiboot_heap.len / NUM_ALLOCS - sizeof(struct alloc_hdr);
+ for (i = 0; i < NUM_ALLOCS; i++) {
+ p[i] = __malloc(len, __location__);
+ assert(p[i] > region_start(&skiboot_heap));
+ assert(p[i] + len <= region_start(&skiboot_heap)
+ + skiboot_heap.len);
+ }
+ assert(mem_check(&skiboot_heap));
+ assert(skiboot_heap.free_list_lock.lock_val == 0);
+ free(region_start(&skiboot_heap));
+ real_free(p);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-malloc.c b/roms/skiboot/core/test/run-malloc.c
new file mode 100644
index 000000000..10cc64e86
--- /dev/null
+++ b/roms/skiboot/core/test/run-malloc.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+
+/* Use these before we undefine them below. */
+static inline void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static inline void real_free(void *p)
+{
+ return free(p);
+}
+
+#undef malloc
+#undef free
+#undef realloc
+
+#include <skiboot.h>
+
+#define is_rodata(p) true
+
+#include "../mem_region.c"
+#include "../malloc.c"
+#include "../device.c"
+
+#include "mem_region-malloc.h"
+
+#define TEST_HEAP_ORDER 16
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+struct dt_node *dt_root;
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+static bool heap_empty(void)
+{
+ const struct alloc_hdr *h = region_start(&skiboot_heap);
+ return h->num_longs == skiboot_heap.len / sizeof(long);
+}
+
+int main(void)
+{
+ char *test_heap = real_malloc(TEST_HEAP_SIZE);
+ char *p, *p2, *p3, *p4;
+ char *pr;
+ size_t i;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)test_heap;
+ skiboot_heap.len = TEST_HEAP_SIZE;
+
+ /* Allocations of various sizes. */
+ for (i = 0; i < TEST_HEAP_ORDER; i++) {
+ p = malloc(1ULL << i);
+ assert(p);
+ assert(p > (char *)test_heap);
+ assert(p + (1ULL << i) <= (char *)test_heap + TEST_HEAP_SIZE);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ free(p);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ assert(heap_empty());
+ }
+
+ /* Realloc as malloc. */
+ skiboot_heap.free_list_lock.lock_val = 0;
+ p = realloc(NULL, 100);
+ assert(p);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+
+ /* Realloc as free. */
+ p = realloc(p, 0);
+ assert(!p);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ assert(heap_empty());
+
+ /* Realloc longer. */
+ p = realloc(NULL, 100);
+ assert(p);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ p2 = realloc(p, 200);
+ assert(p2 == p);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ free(p2);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ assert(heap_empty());
+
+ /* Realloc shorter. */
+ skiboot_heap.free_list_lock.lock_val = 0;
+ p = realloc(NULL, 100);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ assert(p);
+ p2 = realloc(p, 1);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ assert(p2 == p);
+ free(p2);
+ assert(!skiboot_heap.free_list_lock.lock_val);
+ assert(heap_empty());
+
+ /* zalloc failure */
+ p2 = zalloc(TEST_HEAP_SIZE * 2);
+ assert(p2 == NULL);
+
+ /* Realloc with move. */
+ p2 = malloc(TEST_HEAP_SIZE - 64 - sizeof(struct alloc_hdr)*2);
+ memset(p2, 'a', TEST_HEAP_SIZE - 64 - sizeof(struct alloc_hdr)*2);
+ assert(p2);
+ p = malloc(64);
+ memset(p, 'b', 64);
+ p[63] = 'c';
+ assert(p);
+ free(p2);
+
+ p2 = realloc(p, 128);
+ assert(p2 != p);
+ assert(p2[63] == 'c');
+ free(p2);
+ assert(heap_empty());
+ assert(!skiboot_heap.free_list_lock.lock_val);
+
+ /* Realloc with failure to allocate new size */
+ p2 = malloc(TEST_HEAP_SIZE - sizeof(struct alloc_hdr)*2);
+ assert(p2);
+ memset(p2, 'a', TEST_HEAP_SIZE - sizeof(struct alloc_hdr)*2);
+ p = p2;
+ p2 = realloc(p, TEST_HEAP_SIZE*2);
+ assert(p2==NULL);
+ memset(p, 'b', TEST_HEAP_SIZE - sizeof(struct alloc_hdr)*2);
+ free(p);
+
+ /* Reproduce bug BZ109128/SW257364 */
+ p = malloc(100);
+ p2 = malloc(100);
+ p3 = malloc(100);
+ p4 = malloc(100);
+ free(p2);
+ pr = realloc(p,216);
+ assert(pr);
+ free(p3);
+ free(pr);
+ free(p4);
+ assert(heap_empty());
+ assert(!skiboot_heap.free_list_lock.lock_val);
+
+ real_free(test_heap);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-mem_range_is_reserved.c b/roms/skiboot/core/test/run-mem_range_is_reserved.c
new file mode 100644
index 000000000..9891dbd9a
--- /dev/null
+++ b/roms/skiboot/core/test/run-mem_range_is_reserved.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2015-2019 IBM Corp.
+ */
+
+#include <config.h>
+
+/* The lock backtrace structures consume too much room on the skiboot heap */
+#undef DEBUG_LOCKS_BACKTRACE
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+
+static void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static void real_free(void *p)
+{
+ return free(p);
+}
+
+#undef malloc
+#undef free
+#undef realloc
+
+#include <skiboot.h>
+#include <mem_region-malloc.h>
+
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../mem_region.c"
+#include "../malloc.c"
+
+/* But we need device tree to make copies of names. */
+#undef is_rodata
+#define is_rodata(p) false
+#include "../../libc/string/strdup.c"
+
+#include "../device.c"
+#include <assert.h>
+#include <stdio.h>
+
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val--;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+#define TEST_HEAP_ORDER 16
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static void add_mem_node(uint64_t start, uint64_t len)
+{
+ struct dt_node *mem;
+ u64 reg[2];
+ char *name;
+
+ name = (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0]));
+ assert(name);
+
+ /* reg contains start and length */
+ reg[0] = cpu_to_be64(start);
+ reg[1] = cpu_to_be64(len);
+
+ sprintf(name, "memory@%llx", (long long)start);
+
+ mem = dt_new(dt_root, name);
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property(mem, "reg", reg, sizeof(reg));
+ free(name);
+}
+
+void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused)))
+{
+}
+
+struct test_region {
+ uint64_t start;
+ uint64_t end;
+};
+
+static struct test {
+ struct test_region regions[3];
+ bool reserved;
+} tests[] = {
+ /* empty region set */
+ { { { 0 } }, false },
+
+ /* single exact match */
+ { { { 0x1000, 0x2000 }, }, true },
+
+ /* overlap downwards */
+ { { { 0x0fff, 0x2000 }, }, true },
+
+ /* overlap upwards */
+ { { { 0x1000, 0x2001 }, }, true },
+
+ /* missing first byte */
+ { { { 0x1001, 0x2000 }, }, false },
+
+ /* missing last byte */
+ { { { 0x1000, 0x1fff }, }, false },
+
+ /* two regions, full coverage, split before start of range */
+ { { { 0x0500, 0x1000 }, { 0x1000, 0x2500 } }, true },
+
+ /* two regions, full coverage, split after start of range */
+ { { { 0x0500, 0x1001 }, { 0x1001, 0x2500 } }, true },
+
+ /* two regions, full coverage, split at middle of range */
+ { { { 0x0500, 0x1500 }, { 0x1500, 0x2500 } }, true },
+
+ /* two regions, full coverage, split before end of range */
+ { { { 0x0500, 0x1fff }, { 0x1fff, 0x2500 } }, true },
+
+ /* two regions, full coverage, split after end of range */
+ { { { 0x0500, 0x2000 }, { 0x2000, 0x2500 } }, true },
+
+ /* two regions, missing byte in middle of range */
+ { { { 0x0500, 0x14ff }, { 0x1500, 0x2500 } }, false },
+
+ /* two regions, missing byte after start of range */
+ { { { 0x0500, 0x1000 }, { 0x1001, 0x2500 } }, false },
+
+ /* two regions, missing byte before end of range */
+ { { { 0x0500, 0x1fff }, { 0x2000, 0x2500 } }, false },
+};
+
+static void run_test(struct test *test)
+{
+ struct test_region *r;
+ bool reserved;
+
+ list_head_init(&regions);
+
+ mem_region_init();
+
+ /* create our reservations */
+ for (r = test->regions; r->start; r++)
+ mem_reserve_fw("r", r->start, r->end - r->start);
+
+ reserved = mem_range_is_reserved(0x1000, 0x1000);
+
+ if (reserved != test->reserved) {
+ struct mem_region *r;
+ fprintf(stderr, "test failed; got %s, expected %s\n",
+ reserved ? "reserved" : "unreserved",
+ test->reserved ? "reserved" : "unreserved");
+
+ fprintf(stderr, "reserved regions:\n");
+
+ list_for_each(&regions, r, list) {
+ fprintf(stderr, "\t: %08"PRIx64"[%08"PRIx64"] %s\n",
+ r->start, r->len, r->name);
+ }
+ exit(EXIT_FAILURE);
+ }
+}
+
+
+int main(void)
+{
+ unsigned int i;
+ void *buf;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (long)real_malloc(TEST_HEAP_SIZE);
+ skiboot_heap.len = TEST_HEAP_SIZE;
+
+ /* shift the OS reserve area out of the way of our playground */
+ skiboot_os_reserve.start = 0x100000;
+ skiboot_os_reserve.len = 0x1000;
+
+ dt_root = dt_new_root("");
+ dt_add_property_cells(dt_root, "#address-cells", 2);
+ dt_add_property_cells(dt_root, "#size-cells", 2);
+
+ buf = real_malloc(1024*1024);
+ add_mem_node((unsigned long)buf, 1024*1024);
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++)
+ run_test(&tests[i]);
+
+ dt_free(dt_root);
+ real_free(buf);
+ real_free((void *)(long)skiboot_heap.start);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-mem_region.c b/roms/skiboot/core/test/run-mem_region.c
new file mode 100644
index 000000000..50da8033c
--- /dev/null
+++ b/roms/skiboot/core/test/run-mem_region.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <config.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+/* The lock backtrace structures consume too much room on the skiboot heap */
+#undef DEBUG_LOCKS_BACKTRACE
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/* Use these before we override definitions below. */
+static void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static inline void real_free(void *p)
+{
+ return free(p);
+}
+
+#undef malloc
+#undef free
+#undef realloc
+
+#include <skiboot.h>
+
+#define is_rodata(p) true
+
+#include "../mem_region.c"
+#include "../malloc.c"
+#include "../device.c"
+
+#include <assert.h>
+#include <stdio.h>
+
+struct dt_node *dt_root;
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val--;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+#define TEST_HEAP_ORDER 16
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static bool heap_empty(void)
+{
+ const struct alloc_hdr *h = region_start(&skiboot_heap);
+ return h->num_longs == skiboot_heap.len / sizeof(long);
+}
+
+int main(void)
+{
+ char *test_heap;
+ void *p, *ptrs[100];
+ size_t i;
+ struct mem_region *r;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ test_heap = real_malloc(TEST_HEAP_SIZE);
+ skiboot_heap.start = (unsigned long)test_heap;
+ skiboot_heap.len = TEST_HEAP_SIZE;
+
+ lock(&skiboot_heap.free_list_lock);
+
+ /* Allocations of various sizes. */
+ for (i = 0; i < TEST_HEAP_ORDER; i++) {
+ p = mem_alloc(&skiboot_heap, 1ULL << i, 1, "here");
+ assert(p);
+ assert(mem_check(&skiboot_heap));
+ assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "here"));
+ assert(p > (void *)test_heap);
+ assert(p + (1ULL << i) <= (void *)test_heap + TEST_HEAP_SIZE);
+ assert(mem_allocated_size(p) >= 1ULL << i);
+ mem_free(&skiboot_heap, p, "freed");
+ assert(heap_empty());
+ assert(mem_check(&skiboot_heap));
+ assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "freed"));
+ }
+ p = mem_alloc(&skiboot_heap, 1ULL << i, 1, "here");
+ assert(!p);
+ mem_free(&skiboot_heap, p, "freed");
+ assert(heap_empty());
+ assert(mem_check(&skiboot_heap));
+
+ /* Allocations of various alignments: use small alloc first. */
+ ptrs[0] = mem_alloc(&skiboot_heap, 1, 1, "small");
+ for (i = 0; ; i++) {
+ p = mem_alloc(&skiboot_heap, 1, 1ULL << i, "here");
+ assert(mem_check(&skiboot_heap));
+ /* We will eventually fail... */
+ if (!p) {
+ assert(i >= TEST_HEAP_ORDER);
+ break;
+ }
+ assert(p);
+ assert((long)p % (1ULL << i) == 0);
+ assert(p > (void *)test_heap);
+ assert(p + 1 <= (void *)test_heap + TEST_HEAP_SIZE);
+ mem_free(&skiboot_heap, p, "freed");
+ assert(mem_check(&skiboot_heap));
+ }
+ mem_free(&skiboot_heap, ptrs[0], "small freed");
+ assert(heap_empty());
+ assert(mem_check(&skiboot_heap));
+
+ /* Many little allocations, freed in reverse order. */
+ for (i = 0; i < 100; i++) {
+ ptrs[i] = mem_alloc(&skiboot_heap, sizeof(long), 1, "here");
+ assert(ptrs[i]);
+ assert(ptrs[i] > (void *)test_heap);
+ assert(ptrs[i] + sizeof(long)
+ <= (void *)test_heap + TEST_HEAP_SIZE);
+ assert(mem_check(&skiboot_heap));
+ }
+ mem_dump_free();
+ for (i = 0; i < 100; i++)
+ mem_free(&skiboot_heap, ptrs[100 - 1 - i], "freed");
+
+ assert(heap_empty());
+ assert(mem_check(&skiboot_heap));
+
+ /* Check the prev_free gets updated properly. */
+ ptrs[0] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[0]");
+ ptrs[1] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[1]");
+ assert(ptrs[1] > ptrs[0]);
+ mem_free(&skiboot_heap, ptrs[0], "ptrs[0] free");
+ assert(mem_check(&skiboot_heap));
+ ptrs[0] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[0] again");
+ assert(mem_check(&skiboot_heap));
+ mem_free(&skiboot_heap, ptrs[1], "ptrs[1] free");
+ mem_free(&skiboot_heap, ptrs[0], "ptrs[0] free");
+ assert(mem_check(&skiboot_heap));
+ assert(heap_empty());
+
+#if 0
+ printf("Heap map:\n");
+ for (i = 0; i < TEST_HEAP_SIZE / sizeof(long); i++) {
+ printf("%u", test_bit(skiboot_heap.bitmap, i));
+ if (i % 64 == 63)
+ printf("\n");
+ else if (i % 8 == 7)
+ printf(" ");
+ }
+#endif
+
+ /* Simple enlargement, then free */
+ p = mem_alloc(&skiboot_heap, 1, 1, "one byte");
+ assert(p);
+ assert(mem_resize(&skiboot_heap, p, 100, "hundred bytes"));
+ assert(mem_allocated_size(p) >= 100);
+ assert(mem_check(&skiboot_heap));
+ assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "hundred bytes"));
+ mem_free(&skiboot_heap, p, "freed");
+
+ /* Simple shrink, then free */
+ p = mem_alloc(&skiboot_heap, 100, 1, "100 bytes");
+ assert(p);
+ assert(mem_resize(&skiboot_heap, p, 1, "1 byte"));
+ assert(mem_allocated_size(p) < 100);
+ assert(mem_check(&skiboot_heap));
+ assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "1 byte"));
+ mem_free(&skiboot_heap, p, "freed");
+
+ /* Lots of resizing (enlarge). */
+ p = mem_alloc(&skiboot_heap, 1, 1, "one byte");
+ assert(p);
+ for (i = 1; i <= TEST_HEAP_SIZE - sizeof(struct alloc_hdr); i++) {
+ assert(mem_resize(&skiboot_heap, p, i, "enlarge"));
+ assert(mem_allocated_size(p) >= i);
+ assert(mem_check(&skiboot_heap));
+ }
+
+ /* Can't make it larger though. */
+ assert(!mem_resize(&skiboot_heap, p, i, "enlarge"));
+
+ for (i = TEST_HEAP_SIZE - sizeof(struct alloc_hdr); i > 0; i--) {
+ assert(mem_resize(&skiboot_heap, p, i, "shrink"));
+ assert(mem_check(&skiboot_heap));
+ }
+
+ mem_free(&skiboot_heap, p, "freed");
+ assert(mem_check(&skiboot_heap));
+
+ unlock(&skiboot_heap.free_list_lock);
+
+ /* lock the regions list */
+ lock(&mem_region_lock);
+ /* Test splitting of a region. */
+ r = new_region("base", (unsigned long)test_heap,
+ TEST_HEAP_SIZE, NULL, REGION_SKIBOOT_HEAP);
+ assert(add_region(r));
+ r = new_region("splitter", (unsigned long)test_heap + TEST_HEAP_SIZE/4,
+ TEST_HEAP_SIZE/2, NULL, REGION_RESERVED);
+ assert(add_region(r));
+ /* Now we should have *three* regions. */
+ i = 0;
+ list_for_each(&regions, r, list) {
+ if (region_start(r) == test_heap) {
+ assert(r->len == TEST_HEAP_SIZE/4);
+ assert(strcmp(r->name, "base") == 0);
+ assert(r->type == REGION_SKIBOOT_HEAP);
+ } else if (region_start(r) == test_heap + TEST_HEAP_SIZE / 4) {
+ assert(r->len == TEST_HEAP_SIZE/2);
+ assert(strcmp(r->name, "splitter") == 0);
+ assert(r->type == REGION_RESERVED);
+ assert(!r->free_list.n.next);
+ } else if (region_start(r) == test_heap + TEST_HEAP_SIZE/4*3) {
+ assert(r->len == TEST_HEAP_SIZE/4);
+ assert(strcmp(r->name, "base") == 0);
+ assert(r->type == REGION_SKIBOOT_HEAP);
+ } else
+ abort();
+ assert(mem_check(r));
+ i++;
+ }
+ mem_dump_free();
+ assert(i == 3);
+ while ((r = list_pop(&regions, struct mem_region, list)) != NULL) {
+ lock(&skiboot_heap.free_list_lock);
+ mem_free(&skiboot_heap, r, __location__);
+ unlock(&skiboot_heap.free_list_lock);
+ }
+ unlock(&mem_region_lock);
+ assert(skiboot_heap.free_list_lock.lock_val == 0);
+ real_free(test_heap);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-mem_region_init.c b/roms/skiboot/core/test/run-mem_region_init.c
new file mode 100644
index 000000000..e96282de8
--- /dev/null
+++ b/roms/skiboot/core/test/run-mem_region_init.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+
+/* Use these before we undefine them below. */
+static inline void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static inline void real_free(void *p)
+{
+ return free(p);
+}
+
+#include "../malloc.c"
+
+#include <skiboot.h>
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../mem_region.c"
+
+/* But we need device tree to make copies of names. */
+#undef is_rodata
+#define is_rodata(p) false
+
+static inline char *skiboot_strdup(const char *str)
+{
+ char *ret = __malloc(strlen(str) + 1, "");
+ return memcpy(ret, str, strlen(str) + 1);
+}
+#undef strdup
+#define strdup skiboot_strdup
+
+#include "../device.c"
+
+#include <skiboot.h>
+
+#include <assert.h>
+#include <stdio.h>
+
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+/* We actually need a lot of room for the bitmaps! */
+#define TEST_HEAP_ORDER 27
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static void add_mem_node(uint64_t start, uint64_t len)
+{
+ struct dt_node *mem;
+ u64 reg[2];
+ char *name= (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0]));
+
+ assert(name);
+
+ /* reg contains start and length */
+ reg[0] = cpu_to_be64(start);
+ reg[1] = cpu_to_be64(len);
+
+ sprintf(name, "memory@%llx", (unsigned long long)start);
+
+ mem = dt_new(dt_root, name);
+ assert(mem);
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property(mem, "reg", reg, sizeof(reg));
+ free(name);
+}
+
+void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused)))
+{
+}
+
+int main(void)
+{
+ uint64_t end;
+ int builtins;
+ struct mem_region *r;
+ char *heap = real_malloc(TEST_HEAP_SIZE);
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)heap;
+ skiboot_heap.len = TEST_HEAP_SIZE;
+ skiboot_os_reserve.len = 16384;
+
+ dt_root = dt_new_root("");
+ dt_add_property_cells(dt_root, "#address-cells", 2);
+ dt_add_property_cells(dt_root, "#size-cells", 2);
+
+ /* Make sure we overlap the heap, at least. */
+ add_mem_node(0, (uint64_t)(heap + 0x100000000ULL));
+ add_mem_node((uint64_t)heap+0x100000000ULL , 0x100000000ULL);
+ end = (uint64_t)(heap+ 0x100000000ULL + 0x100000000ULL);
+
+ /* Now convert. */
+ mem_region_init();
+ mem_dump_allocs();
+ assert(mem_check(&skiboot_heap));
+
+ builtins = 0;
+ list_for_each(&regions, r, list) {
+ /* Regions must not overlap. */
+ struct mem_region *r2, *pre = NULL, *post = NULL;
+ list_for_each(&regions, r2, list) {
+ if (r == r2)
+ continue;
+ assert(!overlaps(r, r2));
+ }
+
+ /* But should have exact neighbours. */
+ list_for_each(&regions, r2, list) {
+ if (r == r2)
+ continue;
+ if (r2->start == r->start + r->len)
+ post = r2;
+ if (r2->start + r2->len == r->start)
+ pre = r2;
+ }
+ assert(r->start == 0 || pre);
+ assert(r->start + r->len == end || post);
+
+ if (r == &skiboot_code_and_text ||
+ r == &skiboot_heap ||
+ r == &skiboot_after_heap ||
+ r == &skiboot_cpu_stacks ||
+ r == &skiboot_os_reserve)
+ builtins++;
+ else
+ assert(r->type == REGION_MEMORY);
+ assert(mem_check(r));
+ }
+ assert(builtins == 5);
+
+ dt_free(dt_root);
+
+ while ((r = list_pop(&regions, struct mem_region, list)) != NULL) {
+ if (r != &skiboot_code_and_text &&
+ r != &skiboot_heap &&
+ r != &skiboot_after_heap &&
+ r != &skiboot_os_reserve &&
+ r != &skiboot_cpu_stacks) {
+ free(r);
+ }
+ assert(mem_check(&skiboot_heap));
+ }
+ assert(skiboot_heap.free_list_lock.lock_val == 0);
+ real_free(heap);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-mem_region_next.c b/roms/skiboot/core/test/run-mem_region_next.c
new file mode 100644
index 000000000..4f2f73c55
--- /dev/null
+++ b/roms/skiboot/core/test/run-mem_region_next.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2015-2018 IBM Corp.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/* Use these before we override definitions below. */
+static void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static void real_free(void *p)
+{
+ return free(p);
+}
+
+#undef malloc
+#undef free
+
+#include <skiboot.h>
+
+#define is_rodata(p) true
+
+#include "../mem_region.c"
+#include "../malloc.c"
+#include "../device.c"
+
+#include <assert.h>
+#include <stdio.h>
+
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val--;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+
+#define TEST_HEAP_ORDER 16
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+int main(void)
+{
+ struct mem_region *r;
+ char *test_heap;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ test_heap = real_malloc(TEST_HEAP_SIZE);
+ skiboot_heap.start = (unsigned long)test_heap;
+ skiboot_heap.len = TEST_HEAP_SIZE;
+
+ lock(&mem_region_lock);
+
+ /* empty regions */
+ r = mem_region_next(NULL);
+ assert(!r);
+
+ r = new_region("test.1", 0x1000, 0x1000, NULL, REGION_RESERVED);
+ assert(add_region(r));
+ r = new_region("test.2", 0x2000, 0x1000, NULL, REGION_RESERVED);
+ assert(add_region(r));
+ mem_regions_finalised = true;
+
+ r = mem_region_next(NULL);
+ assert(r);
+ assert(r->start == 0x1000);
+ assert(r->len == 0x1000);
+ assert(r->type == REGION_RESERVED);
+
+ r = mem_region_next(r);
+ assert(r);
+ assert(r->start == 0x2000);
+ assert(r->len == 0x1000);
+ assert(r->type == REGION_RESERVED);
+
+ r = mem_region_next(r);
+ assert(!r);
+
+ unlock(&mem_region_lock);
+ real_free(test_heap);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-mem_region_release_unused.c b/roms/skiboot/core/test/run-mem_region_release_unused.c
new file mode 100644
index 000000000..463f54283
--- /dev/null
+++ b/roms/skiboot/core/test/run-mem_region_release_unused.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+
+static void *__malloc(size_t size, const char *location __attribute__((unused)))
+{
+ return malloc(size);
+}
+
+static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused)))
+{
+ return realloc(ptr, size);
+}
+
+static void *__zalloc(size_t size, const char *location __attribute__((unused)))
+{
+ return calloc(size, 1);
+}
+
+static inline void __free(void *p, const char *location __attribute__((unused)))
+{
+ return free(p);
+}
+
+#include <skiboot.h>
+
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../mem_region.c"
+
+/* But we need device tree to make copies of names. */
+#undef is_rodata
+#define is_rodata(p) false
+
+#include "../device.c"
+#include <assert.h>
+#include <stdio.h>
+
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ l->lock_val--;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+#define TEST_HEAP_ORDER 16
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static void add_mem_node(uint64_t start, uint64_t len)
+{
+ struct dt_node *mem;
+ u64 reg[2];
+ char *name;
+
+ name = (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0]));
+ assert(name);
+
+ /* reg contains start and length */
+ reg[0] = cpu_to_be64(start);
+ reg[1] = cpu_to_be64(len);
+
+ sprintf(name, "memory@%llx", (long long)start);
+
+ mem = dt_new(dt_root, name);
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property(mem, "reg", reg, sizeof(reg));
+ free(name);
+}
+
+void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused)))
+{
+}
+
+int main(void)
+{
+ uint64_t i;
+ struct mem_region *r, *other = NULL;
+ void *other_mem;
+ const char *last;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)malloc(TEST_HEAP_SIZE);
+ skiboot_heap.len = TEST_HEAP_SIZE;
+ skiboot_os_reserve.len = 0;
+
+ dt_root = dt_new_root("");
+ dt_add_property_cells(dt_root, "#address-cells", 2);
+ dt_add_property_cells(dt_root, "#size-cells", 2);
+
+ other_mem = malloc(1024*1024);
+ add_mem_node((unsigned long)other_mem, 1024*1024);
+
+ /* Now convert. */
+ mem_region_init();
+
+ /* Find our node to allocate from */
+ list_for_each(&regions, r, list) {
+ if (region_start(r) == other_mem)
+ other = r;
+ }
+ /* This could happen if skiboot addresses clashed with our alloc. */
+ assert(other);
+ assert(mem_check(other));
+
+ /* Allocate 1k from other region. */
+ lock(&other->free_list_lock);
+ mem_alloc(other, 1024, 1, "1k");
+ unlock(&other->free_list_lock);
+
+ mem_region_release_unused();
+
+ assert(mem_check(&skiboot_heap));
+
+ /* Now we expect it to be split. */
+ i = 0;
+ list_for_each(&regions, r, list) {
+ assert(mem_check(r));
+ i++;
+ if (r == &skiboot_os_reserve)
+ continue;
+ if (r == &skiboot_code_and_text)
+ continue;
+ if (r == &skiboot_heap)
+ continue;
+ if (r == &skiboot_after_heap)
+ continue;
+ if (r == &skiboot_cpu_stacks)
+ continue;
+ if (r == other) {
+ assert(r->type == REGION_MEMORY);
+ assert(r->len < 1024 * 1024);
+ } else {
+ assert(r->type == REGION_OS);
+ assert(r->start == other->start + other->len);
+ assert(r->start + r->len == other->start + 1024*1024);
+ }
+ }
+ assert(i == 7);
+
+ last = NULL;
+ list_for_each(&regions, r, list) {
+ if (last != r->name &&
+ strncmp(r->name, NODE_REGION_PREFIX,
+ strlen(NODE_REGION_PREFIX)) == 0) {
+ /* It's safe to cast away const as this is
+ * only going to happen in test code */
+ free((void*)r->name);
+ break;
+ }
+ last = r->name;
+ }
+
+ dt_free(dt_root);
+ free((void *)(long)skiboot_heap.start);
+ free(other_mem);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-mem_region_release_unused_noalloc.c b/roms/skiboot/core/test/run-mem_region_release_unused_noalloc.c
new file mode 100644
index 000000000..d7adc5a9a
--- /dev/null
+++ b/roms/skiboot/core/test/run-mem_region_release_unused_noalloc.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+
+static void *__malloc(size_t size, const char *location __attribute__((unused)))
+{
+ return malloc(size);
+}
+
+static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused)))
+{
+ return realloc(ptr, size);
+}
+
+static void *__zalloc(size_t size, const char *location __attribute__((unused)))
+{
+ return calloc(size, 1);
+}
+
+static inline void __free(void *p, const char *location __attribute__((unused)))
+{
+ return free(p);
+}
+
+#include <skiboot.h>
+
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../mem_region.c"
+
+/* But we need device tree to make copies of names. */
+#undef is_rodata
+#define is_rodata(p) false
+
+#include "../device.c"
+#include <assert.h>
+#include <stdio.h>
+
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ l->lock_val--;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+#define TEST_HEAP_ORDER 16
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static void add_mem_node(uint64_t start, uint64_t len)
+{
+ struct dt_node *mem;
+ u64 reg[2];
+ char *name;
+
+ name = (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0]));
+ assert(name);
+
+ /* reg contains start and length */
+ reg[0] = cpu_to_be64(start);
+ reg[1] = cpu_to_be64(len);
+
+ sprintf(name, "memory@%llx", (long long)start);
+
+ mem = dt_new(dt_root, name);
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property(mem, "reg", reg, sizeof(reg));
+ free(name);
+}
+
+void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused)))
+{
+}
+
+int main(void)
+{
+ uint64_t i;
+ struct mem_region *r;
+ const char *last;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = 0;
+ skiboot_heap.len = TEST_HEAP_SIZE;
+ skiboot_os_reserve.start = 0;
+ skiboot_os_reserve.len = 0;
+
+ dt_root = dt_new_root("");
+ dt_add_property_cells(dt_root, "#address-cells", 2);
+ dt_add_property_cells(dt_root, "#size-cells", 2);
+
+ add_mem_node(0, 0x100000000ULL);
+ add_mem_node(0x100000000ULL, 0x100000000ULL);
+
+ mem_region_init();
+
+ mem_region_release_unused();
+
+ assert(mem_check(&skiboot_heap));
+
+ /* Now we expect it to be split. */
+ i = 0;
+ list_for_each(&regions, r, list) {
+ assert(mem_check(r));
+ i++;
+ if (r == &skiboot_os_reserve)
+ continue;
+ if (r == &skiboot_code_and_text)
+ continue;
+ if (r == &skiboot_heap)
+ continue;
+ if (r == &skiboot_after_heap)
+ continue;
+ if (r == &skiboot_cpu_stacks)
+ continue;
+
+ /* the memory nodes should all be available to the OS now */
+ assert(r->type == REGION_OS);
+ }
+ assert(i == 9);
+
+ last = NULL;
+ list_for_each(&regions, r, list) {
+ if (last != r->name &&
+ strncmp(r->name, NODE_REGION_PREFIX,
+ strlen(NODE_REGION_PREFIX)) == 0) {
+ /* It's safe to cast away the const as
+ * this never happens at runtime,
+ * only in test and only for valgrind
+ */
+ free((void*)r->name);
+ last = r->name;
+ }
+ }
+
+ dt_free(dt_root);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-mem_region_reservations.c b/roms/skiboot/core/test/run-mem_region_reservations.c
new file mode 100644
index 000000000..c24652f41
--- /dev/null
+++ b/roms/skiboot/core/test/run-mem_region_reservations.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+#include "dummy-cpu.h"
+
+#include <stdlib.h>
+
+static void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static void real_free(void *p)
+{
+ return free(p);
+}
+
+#undef malloc
+#undef free
+#undef realloc
+
+#include <skiboot.h>
+#include <mem_region-malloc.h>
+
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../mem_region.c"
+#include "../malloc.c"
+
+/* But we need device tree to make copies of names. */
+#undef is_rodata
+#define is_rodata(p) false
+#include "../../libc/string/strdup.c"
+
+#include "../device.c"
+#include <assert.h>
+#include <stdio.h>
+
+enum proc_chip_quirks proc_chip_quirks;
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val--;
+}
+
+bool lock_held_by_me(struct lock *l)
+{
+ return l->lock_val;
+}
+
+#define TEST_HEAP_ORDER 16
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static void add_mem_node(uint64_t start, uint64_t len)
+{
+ struct dt_node *mem;
+ u64 reg[2];
+ char *name;
+
+ name = (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0]));
+ assert(name);
+
+ /* reg contains start and length */
+ reg[0] = cpu_to_be64(start);
+ reg[1] = cpu_to_be64(len);
+
+ sprintf(name, "memory@%llx", (long long)start);
+
+ mem = dt_new(dt_root, name);
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property(mem, "reg", reg, sizeof(reg));
+ free(name);
+}
+
+void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused)))
+{
+}
+
+static struct {
+ const char *name;
+ uint64_t addr;
+ bool found;
+} test_regions[] = {
+ { "test.1", 0x1000, false },
+ { "test.2", 0x2000, false },
+ { "test.3", 0x4000, false },
+};
+
+static void check_property_reservations(void)
+{
+ const struct dt_property *names, *ranges;
+ unsigned int i, l;
+ const char *name;
+ uint64_t *rangep;
+ const char *at;
+
+ /* check dt properties */
+ names = dt_find_property(dt_root, "reserved-names");
+ ranges = dt_find_property(dt_root, "reserved-ranges");
+
+ assert(names && ranges);
+
+ /* walk through names & ranges properies, ensuring that the test
+ * regions are all present */
+ for (name = names->prop, rangep = (uint64_t *)ranges->prop;
+ name < names->prop + names->len;
+ name += l, rangep += 2) {
+ uint64_t addr;
+
+ addr = dt_get_number(rangep, 2);
+ l = strlen(name) + 1;
+
+ for (i = 0; i < ARRAY_SIZE(test_regions); i++) {
+ at = strchr(name, '@');
+ if (strncmp(test_regions[i].name, name,
+ at ? at-name: strlen(name)))
+ continue;
+ assert(test_regions[i].addr == addr);
+ assert(!test_regions[i].found);
+ test_regions[i].found = true;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_regions); i++) {
+ assert(test_regions[i].found);
+ test_regions[i].found = false;
+ }
+}
+
+static void check_node_reservations(void)
+{
+ struct dt_node *parent, *node;
+ unsigned int i;
+
+ parent = dt_find_by_name(dt_root, "reserved-memory");
+ assert(parent);
+
+ assert(dt_prop_get_cell(parent, "#address-cells", 0) == 2);
+ assert(dt_prop_get_cell(parent, "#size-cells", 0) == 2);
+ dt_require_property(parent, "ranges", 0);
+
+ dt_for_each_child(parent, node) {
+ uint64_t addr, size;
+
+ addr = dt_get_address(node, 0, &size);
+
+ for (i = 0; i < ARRAY_SIZE(test_regions); i++) {
+ if (strncmp(test_regions[i].name, node->name,
+ strlen(test_regions[i].name)))
+ continue;
+
+ assert(!test_regions[i].found);
+ assert(test_regions[i].addr == addr);
+ assert(size == 0x1000);
+ test_regions[i].found = true;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_regions); i++) {
+ assert(test_regions[i].found);
+ test_regions[i].found = false;
+ }
+}
+
+int main(void)
+{
+ struct mem_region *r;
+ unsigned int i;
+ void *buf;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (long)real_malloc(TEST_HEAP_SIZE);
+ skiboot_heap.len = TEST_HEAP_SIZE;
+ skiboot_os_reserve.len = skiboot_heap.start;
+
+ dt_root = dt_new_root("");
+ dt_add_property_cells(dt_root, "#address-cells", 2);
+ dt_add_property_cells(dt_root, "#size-cells", 2);
+
+ buf = real_malloc(1024*1024);
+ add_mem_node((unsigned long)buf, 1024*1024);
+
+ /* add pre-init reservations */
+ for (i = 0; i < ARRAY_SIZE(test_regions); i++)
+ mem_reserve_fw(test_regions[i].name,
+ test_regions[i].addr, 0x1000);
+
+ /* Now convert. */
+ mem_region_init();
+
+ /* add a post-init reservation */
+ mem_reserve_fw("test.4", 0x5000, 0x1000);
+
+ /* release unused */
+ mem_region_release_unused();
+
+ /* and create reservations */
+ mem_region_add_dt_reserved();
+
+ /* ensure we can't create further reservations */
+ r = new_region("test.5", 0x5000, 0x1000, NULL, REGION_RESERVED);
+ assert(!add_region(r));
+
+ /* check old property-style reservations */
+ check_property_reservations();
+
+ /* and new node-style reservations */
+ check_node_reservations();
+
+ dt_free(dt_root);
+ real_free(buf);
+ real_free((void *)(long)skiboot_heap.start);
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-msg.c b/roms/skiboot/core/test/run-msg.c
new file mode 100644
index 000000000..3659a12d7
--- /dev/null
+++ b/roms/skiboot/core/test/run-msg.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+
+static bool zalloc_should_fail = false;
+static int zalloc_should_fail_after = 0;
+
+/* Fake top_of_ram -- needed for API's */
+unsigned long top_of_ram = 0xffffffffffffffffULL;
+
+static void *zalloc(size_t size)
+{
+ if (zalloc_should_fail && zalloc_should_fail_after == 0) {
+ errno = ENOMEM;
+ return NULL;
+ }
+ if (zalloc_should_fail_after > 0)
+ zalloc_should_fail_after--;
+
+ return calloc(size, 1);
+}
+
+#include "../opal-msg.c"
+#include <skiboot.h>
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values)
+{
+ (void)evt_mask;
+ (void)evt_values;
+}
+
+static long magic = 8097883813087437089UL;
+static void callback(void *data, int status)
+{
+ assert((status == OPAL_SUCCESS || status == OPAL_PARTIAL));
+ assert(*(uint64_t *)data == magic);
+}
+
+static size_t list_count(struct list_head *list)
+{
+ size_t count = 0;
+ struct opal_msg_entry *dummy;
+
+ list_for_each(list, dummy, link)
+ count++;
+ return count;
+}
+
+int main(void)
+{
+ struct opal_msg_entry* entry;
+ int free_size = OPAL_MAX_MSGS;
+ int nfree = free_size;
+ int npending = 0;
+ int r;
+ static struct opal_msg m;
+ uint64_t *m_ptr = (uint64_t *)&m;
+
+ zalloc_should_fail = true;
+ zalloc_should_fail_after = 3;
+ opal_init_msg();
+
+ zalloc_should_fail = false;
+ opal_init_msg();
+
+ assert(list_count(&msg_pending_list) == npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ /* Callback. */
+ r = opal_queue_msg(0, &magic, callback, (u64)0, (u64)1, (u64)2);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ assert(m.params[0] == 0);
+ assert(m.params[1] == 1);
+ assert(m.params[2] == 2);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == ++nfree);
+
+ /* No params. */
+ r = opal_queue_msg(0, NULL, NULL);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == ++nfree);
+
+ /* > 8 params (ARRAY_SIZE(entry->msg.params) */
+ r = opal_queue_msg(0, NULL, NULL, 0, 1, 2, 3, 4, 5, 6, 7, 0xBADDA7A);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == OPAL_PARTIAL);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ /* Return OPAL_PARTIAL to callback */
+ r = opal_queue_msg(0, &magic, callback, 0, 1, 2, 3, 4, 5, 6, 7, 0xBADDA7A);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == OPAL_PARTIAL);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ /* return OPAL_PARAMETER */
+ r = _opal_queue_msg(0, NULL, NULL, OPAL_MSG_SIZE, m_ptr);
+ assert(r == OPAL_PARAMETER);
+
+ assert(m.params[0] == 0);
+ assert(m.params[1] == 1);
+ assert(m.params[2] == 2);
+ assert(m.params[3] == 3);
+ assert(m.params[4] == 4);
+ assert(m.params[5] == 5);
+ assert(m.params[6] == 6);
+ assert(m.params[7] == 7);
+
+ /* 8 params (ARRAY_SIZE(entry->msg.params) */
+ r = opal_queue_msg(0, NULL, NULL, 0, 10, 20, 30, 40, 50, 60, 70);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == ++nfree);
+
+ assert(m.params[0] == 0);
+ assert(m.params[1] == 10);
+ assert(m.params[2] == 20);
+ assert(m.params[3] == 30);
+ assert(m.params[4] == 40);
+ assert(m.params[5] == 50);
+ assert(m.params[6] == 60);
+ assert(m.params[7] == 70);
+
+ /* Full list (no free nodes in pending). */
+ while (nfree > 0) {
+ r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL);
+ assert(r == 0);
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+ }
+ assert(list_count(&msg_free_list) == 0);
+ assert(nfree == 0);
+ assert(npending == OPAL_MAX_MSGS);
+
+ r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == OPAL_MAX_MSGS+1);
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ /* Make zalloc fail to test error handling. */
+ zalloc_should_fail = true;
+ r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL);
+ assert(r == OPAL_RESOURCE);
+
+ assert(list_count(&msg_pending_list) == OPAL_MAX_MSGS+1);
+ assert(list_count(&msg_pending_list) == npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ /* Empty list (no nodes). */
+ while(!list_empty(&msg_pending_list)) {
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+ npending--;
+ nfree++;
+ }
+ assert(list_count(&msg_pending_list) == npending);
+ assert(list_count(&msg_free_list) == nfree);
+ assert(npending == 0);
+ assert(nfree == OPAL_MAX_MSGS+1);
+
+ r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ /* Request invalid size. */
+ r = opal_get_msg(m_ptr, sizeof(m) - 1);
+ assert(r == OPAL_PARAMETER);
+
+ /* Pass null buffer. */
+ r = opal_get_msg(NULL, sizeof(m));
+ assert(r == OPAL_PARAMETER);
+
+ /* Get msg when none are pending. */
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == OPAL_RESOURCE);
+
+#define test_queue_num(type, val) \
+ r = opal_queue_msg(0, NULL, NULL, \
+ (type)val, (type)val, (type)val, (type)val, \
+ (type)val, (type)val, (type)val, (type)val); \
+ assert(r == 0); \
+ opal_get_msg(m_ptr, sizeof(m)); \
+ assert(r == OPAL_SUCCESS); \
+ assert(m.params[0] == (type)val); \
+ assert(m.params[1] == (type)val); \
+ assert(m.params[2] == (type)val); \
+ assert(m.params[3] == (type)val); \
+ assert(m.params[4] == (type)val); \
+ assert(m.params[5] == (type)val); \
+ assert(m.params[6] == (type)val); \
+ assert(m.params[7] == (type)val)
+
+ /* Test types of various widths */
+ test_queue_num(u64, -1);
+ test_queue_num(s64, -1);
+ test_queue_num(u32, -1);
+ test_queue_num(s32, -1);
+ test_queue_num(u16, -1);
+ test_queue_num(s16, -1);
+ test_queue_num(u8, -1);
+ test_queue_num(s8, -1);
+
+ /* Clean up the list to keep valgrind happy. */
+ while(!list_empty(&msg_free_list)) {
+ entry = list_pop(&msg_free_list, struct opal_msg_entry, link);
+ assert(entry);
+ free(entry);
+ }
+
+ while(!list_empty(&msg_pending_list)) {
+ entry = list_pop(&msg_pending_list, struct opal_msg_entry, link);
+ assert(entry);
+ free(entry);
+ }
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-nvram-format.c b/roms/skiboot/core/test/run-nvram-format.c
new file mode 100644
index 000000000..ba286bea3
--- /dev/null
+++ b/roms/skiboot/core/test/run-nvram-format.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <stdlib.h>
+
+#include "../nvram-format.c"
+
+bool nvram_wait_for_load(void)
+{
+ return true;
+}
+
+bool nvram_validate(void)
+{
+ return true;
+}
+
+bool nvram_has_loaded(void)
+{
+ return true;
+}
+
+static char *nvram_reset(void *nvram_image, int size)
+{
+ struct chrp_nvram_hdr *h = nvram_image;
+
+ /* entire partition used by one key */
+ assert(nvram_format(nvram_image, size) == 0);
+ memset((char *) h + sizeof(*h), 0, NVRAM_SIZE_FW_PRIV - sizeof(*h));
+ assert(nvram_check(nvram_image, size) == 0);
+
+ return (char *) h + sizeof(*h);
+}
+
+int main(void)
+{
+ char *nvram_image;
+ size_t sz;
+ struct chrp_nvram_hdr *h;
+ char *data;
+ const char *result;
+
+ /* 1024 bytes is too small for our NVRAM */
+ nvram_image = malloc(1024);
+ assert(nvram_format(nvram_image, 1024)!=0);
+ free(nvram_image);
+
+ /* 4096 bytes is too small for our NVRAM */
+ nvram_image = malloc(4096);
+ assert(nvram_format(nvram_image, 4096)!=0);
+ free(nvram_image);
+
+ /* 64k is too small for our NVRAM */
+ nvram_image = malloc(0x10000);
+ assert(nvram_format(nvram_image, 0x10000)!=0);
+ free(nvram_image);
+
+ /* 68k is too small for our NVRAM */
+ nvram_image = malloc(68*1024);
+ assert(nvram_format(nvram_image, 68*1024)!=0);
+ free(nvram_image);
+
+ /* 68k+16 bytes (nvram header) should generate empty free space */
+ sz = NVRAM_SIZE_COMMON + NVRAM_SIZE_FW_PRIV
+ + sizeof(struct chrp_nvram_hdr);
+ nvram_image = malloc(sz);
+ assert(nvram_format(nvram_image, sz)==0);
+ assert(nvram_check(nvram_image, sz)==0);
+ assert(nvram_image[sz-14]==0);
+ assert(nvram_image[sz-13]==1);
+ h = (struct chrp_nvram_hdr*)(&nvram_image[NVRAM_SIZE_COMMON + NVRAM_SIZE_FW_PRIV]);
+ assert(memcmp(h->name, "wwwwwwwwwwww", 12)==0);
+ free(nvram_image);
+
+ /* 128k NVRAM check */
+ nvram_image = malloc(128*1024);
+ assert(nvram_format(nvram_image, 128*1024)==0);
+ assert(nvram_check(nvram_image,128*1024)==0);
+
+ /* Now, we corrupt it */
+ nvram_image[0] = 0;
+ assert(nvram_check(nvram_image,128*1024) != 0);
+
+ /* Does our NUL checking work? */
+ assert(nvram_format(nvram_image, 128 * 1024) == 0);
+ h = (struct chrp_nvram_hdr *) nvram_image;
+ memset((char *) h + sizeof(*h), 0xFF, be16_to_cpu(h->len) * 16 - sizeof(*h));
+ assert(nvram_check(nvram_image, 128 * 1024) != 0);
+
+ assert(nvram_format(nvram_image, 128*1024)==0);
+ /* corrupt the length of the partition */
+ nvram_image[2] = 0;
+ nvram_image[3] = 0;
+ assert(nvram_check(nvram_image,128*1024) != 0);
+
+ assert(nvram_format(nvram_image, 128*1024)==0);
+ /* corrupt the length of the partition */
+ nvram_image[2] = 0;
+ nvram_image[3] = 0;
+ /* but reset checksum! */
+ h = (struct chrp_nvram_hdr*)nvram_image;
+ h->cksum = chrp_nv_cksum(h);
+ assert(nvram_check(nvram_image,128*1024) != 0);
+
+ assert(nvram_format(nvram_image, 128*1024)==0);
+ /* make the length insanely beyond end of nvram */
+ nvram_image[2] = 42;
+ nvram_image[3] = 32;
+ /* but reset checksum! */
+ h = (struct chrp_nvram_hdr*)nvram_image;
+ h->cksum = chrp_nv_cksum(h);
+ assert(nvram_check(nvram_image,128*1024) != 0);
+
+ assert(nvram_format(nvram_image, 128*1024)==0);
+ /* remove skiboot partition */
+ nvram_image[12] = '\0';
+ /* but reset checksum! */
+ h = (struct chrp_nvram_hdr*)nvram_image;
+ h->cksum = chrp_nv_cksum(h);
+ assert(nvram_check(nvram_image,128*1024) != 0);
+
+ assert(nvram_format(nvram_image, 128*1024)==0);
+ /* remove common partition */
+ nvram_image[NVRAM_SIZE_FW_PRIV+5] = '\0';
+ /* but reset checksum! */
+ h = (struct chrp_nvram_hdr*)(&nvram_image[NVRAM_SIZE_FW_PRIV]);
+ h->cksum = chrp_nv_cksum(h);
+ assert(nvram_check(nvram_image,128*1024) != 0);
+
+ /* test nvram_query() */
+
+ /* does an empty partition break us? */
+ data = nvram_reset(nvram_image, 128*1024);
+ assert(nvram_query_safe("test") == NULL);
+
+ /* does a zero length key break us? */
+ data = nvram_reset(nvram_image, 128*1024);
+ data[0] = '=';
+ assert(nvram_query_safe("test") == NULL);
+
+ /* does a missing = break us? */
+ data = nvram_reset(nvram_image, 128*1024);
+ data[0] = 'a';
+ assert(nvram_query_safe("test") == NULL);
+
+ /* does an empty value break us? */
+ data = nvram_reset(nvram_image, 128*1024);
+ data[0] = 'a';
+ data[1] = '=';
+ result = nvram_query_safe("a");
+ assert(result);
+ assert(strlen(result) == 0);
+
+ /* do we trip over malformed keys? */
+ data = nvram_reset(nvram_image, 128*1024);
+#define TEST_1 "a\0a=\0test=test\0"
+ memcpy(data, TEST_1, sizeof(TEST_1));
+ result = nvram_query_safe("test");
+ assert(result);
+ assert(strcmp(result, "test") == 0);
+
+ free(nvram_image);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-pci-quirk.c b/roms/skiboot/core/test/run-pci-quirk.c
new file mode 100644
index 000000000..fd4d95c10
--- /dev/null
+++ b/roms/skiboot/core/test/run-pci-quirk.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2018 IBM Corp
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <compiler.h>
+#include <stdbool.h>
+
+/* Stubs for quirk_astbmc_vga() */
+
+struct dt_property;
+struct dt_node;
+
+static struct bmc_platform fake_bmc;
+const struct bmc_platform *bmc_platform = &fake_bmc;
+
+static int ast_sio_is_enabled(void)
+{
+ return 0;
+}
+
+static uint32_t ast_ahb_readl(uint32_t reg)
+{
+ return reg;
+}
+
+static struct dt_property *__dt_add_property_cells(
+ struct dt_node *node __unused, const char *name __unused,
+ int count __unused, ...)
+{
+ return (void *)0;
+}
+
+struct pci_device;
+struct pci_cfg_reg_filter;
+typedef int64_t (*pci_cfg_reg_func)(void *dev,
+ struct pci_cfg_reg_filter *pcrf,
+ uint32_t offset, uint32_t len,
+ uint32_t *data, bool write);
+
+
+static struct pci_cfg_reg_filter *pci_add_cfg_reg_filter(
+ struct pci_device *pd __unused,
+ uint32_t start __unused,
+ uint32_t len __unused,
+ uint32_t flags __unused,
+ pci_cfg_reg_func func __unused)
+{
+ return NULL;
+}
+
+#include "../pci-quirk.c"
+
+struct pci_device test_pd;
+int test_fixup_ran;
+
+static void test_fixup(struct phb *phb __unused, struct pci_device *pd __unused)
+{
+ assert(PCI_VENDOR_ID(pd->vdid) == 0x1a03);
+ assert(PCI_DEVICE_ID(pd->vdid) == 0x2000);
+ test_fixup_ran = 1;
+}
+
+/* Quirks are: {fixup function, vendor ID, (device ID or PCI_ANY_ID)} */
+static const struct pci_quirk test_quirk_table[] = {
+ /* ASPEED 2400 VGA device */
+ { 0x1a03, 0x2000, &test_fixup },
+ { 0, 0, NULL }
+};
+
+#define PCI_COMPOSE_VDID(vendor, device) (((device) << 16) | (vendor))
+
+int main(void)
+{
+ /* Unrecognised vendor and device ID */
+ test_pd.vdid = PCI_COMPOSE_VDID(0xabcd, 0xef01);
+ __pci_handle_quirk(NULL, &test_pd, test_quirk_table);
+ assert(test_fixup_ran == 0);
+
+ /* Unrecognised vendor ID, matching device ID */
+ test_pd.vdid = PCI_COMPOSE_VDID(0xabcd, 0x2000);
+ __pci_handle_quirk(NULL, &test_pd, test_quirk_table);
+ assert(test_fixup_ran == 0);
+
+ /* Matching vendor ID, unrecognised device ID */
+ test_pd.vdid = PCI_COMPOSE_VDID(0x1a03, 0xef01);
+ __pci_handle_quirk(NULL, &test_pd, test_quirk_table);
+ assert(test_fixup_ran == 0);
+
+ /* Matching vendor and device ID */
+ test_pd.vdid = PCI_COMPOSE_VDID(0x1a03, 0x2000);
+ __pci_handle_quirk(NULL, &test_pd, test_quirk_table);
+ assert(test_fixup_ran == 1);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-pel.c b/roms/skiboot/core/test/run-pel.c
new file mode 100644
index 000000000..812c8996c
--- /dev/null
+++ b/roms/skiboot/core/test/run-pel.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Test for our PEL record generation. Currently this doesn't actually
+ * test that the records we generate are correct, but it at least lets
+ * us run valgrind over the generation routines to check for buffer
+ * overflows, etc.
+ *
+ * Copyright 2013-2016 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <pel.h>
+#include <errorlog.h>
+#include <device.h>
+
+#define TEST_ERROR 0x1234
+#define TEST_SUBSYS 0x5678
+
+DEFINE_LOG_ENTRY(TEST_ERROR, OPAL_PLATFORM_ERR_EVT, TEST_SUBSYS,
+ OPAL_PLATFORM_FIRMWARE, OPAL_INFO,
+ OPAL_NA);
+
+/* Override this for testing. */
+#define is_rodata(p) fake_is_rodata(p)
+
+char __rodata_start[16];
+#define __rodata_end (__rodata_start + sizeof(__rodata_start))
+
+static inline bool fake_is_rodata(const void *p)
+{
+ return ((char *)p >= __rodata_start && (char *)p < __rodata_end);
+}
+
+#define zalloc(bytes) calloc((bytes), 1)
+
+#include "../device.c"
+#include "../pel.c"
+
+struct dt_node *dt_root = NULL;
+char dt_prop[] = "DUMMY DT PROP";
+
+int rtc_cache_get_datetime(uint32_t *year_month_day,
+ uint64_t *hour_minute_second_millisecond)
+{
+ *year_month_day = 0;
+ *hour_minute_second_millisecond = 0;
+
+ return 0;
+}
+
+int main(void)
+{
+ char *pel_buf;
+ size_t size;
+ struct errorlog *elog;
+ struct opal_err_info *opal_err_info = &err_TEST_ERROR;
+ char *buffer;
+ struct elog_user_data_section *tmp;
+
+ dt_root = dt_new_root("");
+ dt_add_property_string(dt_root, "model", "run-pel-unittest");
+
+ elog = malloc(sizeof(struct errorlog));
+ pel_buf = malloc(PEL_MIN_SIZE + 4);
+ assert(elog);
+ assert(pel_buf);
+
+ memset(elog, 0, sizeof(struct errorlog));
+
+ elog->error_event_type = opal_err_info->err_type;
+ elog->component_id = opal_err_info->cmp_id;
+ elog->subsystem_id = opal_err_info->subsystem;
+ elog->event_severity = opal_err_info->sev;
+ elog->event_subtype = opal_err_info->event_subtype;
+ elog->reason_code = opal_err_info->reason_code;
+ elog->elog_origin = ORG_SAPPHIRE;
+
+ size = pel_size(elog);
+
+ printf("Test buffer too small: ");
+ assert(0 == create_pel_log(elog, NULL, size - 1));
+
+ assert(size <= PEL_MIN_SIZE + 4);
+ assert(size == create_pel_log(elog, pel_buf, size));
+
+ memset(elog, 0, sizeof(struct errorlog));
+
+ elog->error_event_type = opal_err_info->err_type;
+ elog->component_id = opal_err_info->cmp_id;
+ elog->subsystem_id = opal_err_info->subsystem;
+ elog->event_severity = opal_err_info->sev;
+ elog->event_subtype = opal_err_info->event_subtype;
+ elog->reason_code = opal_err_info->reason_code;
+ elog->elog_origin = ORG_SAPPHIRE;
+
+ size = pel_size(elog);
+ pel_buf = realloc(pel_buf, size);
+ assert(pel_buf);
+
+ buffer = elog->user_data_dump + elog->user_section_size;
+ tmp = (struct elog_user_data_section *)buffer;
+ tmp->tag = OPAL_ELOG_SEC_DESC; /* ASCII of DESC */
+ tmp->size = size + sizeof(struct elog_user_data_section) - 1;
+ strcpy(tmp->data_dump, "Hello World!");
+ elog->user_section_size += tmp->size;
+ elog->user_section_count++;
+
+ size = pel_size(elog);
+ pel_buf = realloc(pel_buf, size);
+ assert(pel_buf);
+
+ assert(size == create_pel_log(elog, pel_buf, size));
+
+ free(pel_buf);
+ free(elog);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-pool.c b/roms/skiboot/core/test/run-pool.c
new file mode 100644
index 000000000..e1c3843ff
--- /dev/null
+++ b/roms/skiboot/core/test/run-pool.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp
+ */
+
+#include <pool.h>
+
+#include "../pool.c"
+
+#define POOL_OBJ_COUNT 10
+#define POOL_RESERVED_COUNT 2
+#define POOL_NORMAL_COUNT (POOL_OBJ_COUNT - POOL_RESERVED_COUNT)
+
+struct test_object
+{
+ int a;
+ int b;
+ int c;
+};
+
+int main(void)
+{
+ int i, count = 0;
+ struct pool pool;
+ struct test_object *a[POOL_OBJ_COUNT];
+
+ assert(!pool_init(&pool, sizeof(struct test_object), POOL_OBJ_COUNT,
+ POOL_RESERVED_COUNT));
+
+ a[0] = pool_get(&pool, POOL_NORMAL);
+ assert(a[0]);
+ pool_free_object(&pool, a[0]);
+
+ for(i = 0; i < POOL_NORMAL_COUNT; i++)
+ {
+ a[i] = pool_get(&pool, POOL_NORMAL);
+ if (a[i])
+ count++;
+ }
+ assert(count == POOL_NORMAL_COUNT);
+
+ /* Normal pool should be exhausted */
+ assert(!pool_get(&pool, POOL_NORMAL));
+
+ /* Reserved pool should still be available */
+ a[POOL_NORMAL_COUNT] = pool_get(&pool, POOL_HIGH);
+ assert(a[POOL_NORMAL_COUNT]);
+ a[POOL_NORMAL_COUNT + 1] = pool_get(&pool, POOL_HIGH);
+ assert(a[POOL_NORMAL_COUNT + 1]);
+
+ pool_free_object(&pool, a[3]);
+
+ /* Should be a free object to get now */
+ a[3] = pool_get(&pool, POOL_HIGH);
+ assert(a[3]);
+
+ /* This exits depending on whether all tests passed */
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-time-utils.c b/roms/skiboot/core/test/run-time-utils.c
new file mode 100644
index 000000000..04723dd61
--- /dev/null
+++ b/roms/skiboot/core/test/run-time-utils.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2015-2017 IBM Corp.
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#define __TEST__
+
+#include "../time-utils.c"
+
+int main(void)
+{
+ struct tm *t = malloc(sizeof(struct tm));
+ uint32_t *ymd = malloc(sizeof(uint32_t));
+ uint64_t *hms = malloc(sizeof(uint64_t));
+
+ t->tm_year = 1982;
+ t->tm_mon = 0;
+ t->tm_mday = 29;
+ t->tm_hour = 7;
+ t->tm_min = 42;
+ t->tm_sec = 24;
+
+ tm_to_datetime(t, ymd, hms);
+
+ assert(*ymd == 0x19820129);
+ assert(*hms == 0x742240000000000ULL);
+
+ memset(t, 0, sizeof(struct tm));
+
+ *ymd = 0x19760412;
+
+ datetime_to_tm(*ymd, *hms, t);
+ assert(t->tm_year == 1976);
+ assert(t->tm_mon == 03);
+ assert(t->tm_mday == 12);
+ assert(t->tm_hour == 7);
+ assert(t->tm_min == 42);
+ assert(t->tm_sec == 24);
+
+ free(t);
+ free(ymd);
+ free(hms);
+ return 0;
+}
+
diff --git a/roms/skiboot/core/test/run-timebase.c b/roms/skiboot/core/test/run-timebase.c
new file mode 100644
index 000000000..a613609a0
--- /dev/null
+++ b/roms/skiboot/core/test/run-timebase.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2015-2016 IBM Corp.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+
+#define __TEST__
+#include <timebase.h>
+
+unsigned long tb_hz = 512000000;
+
+int main(void)
+{
+ /* This is a fairly solid assumption that the math we're doing
+ * is based on tb_hz of exactly 512mhz.
+ * If we do start doing the math on different tb_hz, you probably
+ * want to go and audit every bit of code that touches tb to
+ * count/delay things.
+ */
+ assert(tb_hz == 512000000);
+ assert(secs_to_tb(1) == tb_hz);
+ assert(secs_to_tb(2) == 1024000000);
+ assert(secs_to_tb(10) == 5120000000);
+ assert(tb_to_secs(512000000) == 1);
+ assert(tb_to_secs(5120000000) == 10);
+ assert(tb_to_secs(1024000000) == 2);
+
+ assert(msecs_to_tb(1) == 512000);
+ assert(msecs_to_tb(100) == 51200000);
+ assert(msecs_to_tb(5) == 2560000);
+ assert(tb_to_msecs(512000) == 1);
+
+ assert(usecs_to_tb(5) == 2560);
+ assert(tb_to_usecs(2560) == 5);
+ assert(usecs_to_tb(5)*1000 == msecs_to_tb(5));
+ assert(tb_to_usecs(512000) == 1000);
+
+ assert(tb_compare(msecs_to_tb(5), usecs_to_tb(5)) == TB_AAFTERB);
+ assert(tb_compare(msecs_to_tb(5), usecs_to_tb(50000)) == TB_ABEFOREB);
+ assert(tb_compare(msecs_to_tb(5), usecs_to_tb(5)*1000) == TB_AEQUALB);
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-timer.c b/roms/skiboot/core/test/run-timer.c
new file mode 100644
index 000000000..8f8b20ed3
--- /dev/null
+++ b/roms/skiboot/core/test/run-timer.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2014-2018 IBM Corp
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define __TEST__
+#include <timer.h>
+#include <skiboot.h>
+
+#define mftb() (stamp)
+#define sync()
+#define smt_lowest()
+#define smt_medium()
+
+enum proc_gen proc_gen = proc_gen_unknown;
+
+static uint64_t stamp, last;
+struct lock;
+static inline void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ (void)l;
+}
+static inline void unlock(struct lock *l) { (void)l; }
+
+unsigned long tb_hz = 512000000;
+
+#include "../timer.c"
+
+#define NUM_TIMERS 100
+
+static struct timer timers[NUM_TIMERS];
+static unsigned int rand_shift, count;
+
+static void init_rand(void)
+{
+ unsigned long max = RAND_MAX;
+
+ /* Get something reasonably small */
+ while(max > 0x10000) {
+ rand_shift++;
+ max >>= 1;
+ }
+}
+
+static void expiry(struct timer *t, void *data, uint64_t now)
+{
+ (void)data;
+ (void)now;
+ assert(t->target >= last);
+ count--;
+}
+
+void p8_sbe_update_timer_expiry(uint64_t new_target)
+{
+ (void)new_target;
+ /* FIXME: do intersting SLW timer sim */
+}
+
+void p9_sbe_update_timer_expiry(uint64_t new_target)
+{
+ (void)new_target;
+}
+
+int main(void)
+{
+ unsigned int i;
+
+ init_rand();
+ for (i = 0; i < NUM_TIMERS; i++) {
+ init_timer(&timers[i], expiry, NULL);
+ schedule_timer(&timers[i], random() >> rand_shift);
+ }
+ count = NUM_TIMERS;
+ while(count) {
+ check_timers(false);
+ stamp++;
+ }
+ return 0;
+}
diff --git a/roms/skiboot/core/test/run-trace.c b/roms/skiboot/core/test/run-trace.c
new file mode 100644
index 000000000..88b090358
--- /dev/null
+++ b/roms/skiboot/core/test/run-trace.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <skiboot-valgrind.h>
+
+/* Don't include these: PPC-specific */
+#define __CPU_H
+#define __TIME_H
+#define __PROCESSOR_H
+
+#if defined(__i386__) || defined(__x86_64__)
+/* This is more than a lwsync, but it'll work */
+static void full_barrier(void)
+{
+ asm volatile("mfence" : : : "memory");
+}
+#define lwsync full_barrier
+#elif defined(__powerpc__) || defined(__powerpc64__)
+static inline void lwsync(void)
+{
+ asm volatile("lwsync" : : : "memory");
+}
+#else
+#error "Define lwsync for this arch"
+#endif
+
+#define zalloc(size) calloc((size), 1)
+
+struct cpu_thread {
+ uint32_t pir;
+ uint32_t chip_id;
+ struct trace_info *trace;
+ uint32_t server_no;
+ bool is_secondary;
+ struct cpu_thread *primary;
+};
+static struct cpu_thread *this_cpu(void);
+
+#define CPUS 4
+
+static struct cpu_thread fake_cpus[CPUS];
+
+static inline struct cpu_thread *next_cpu(struct cpu_thread *cpu)
+{
+ if (cpu == NULL)
+ return &fake_cpus[0];
+ cpu++;
+ if (cpu == &fake_cpus[CPUS])
+ return NULL;
+ return cpu;
+}
+
+#define first_cpu() next_cpu(NULL)
+
+#define for_each_cpu(cpu) \
+ for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
+
+static unsigned long timestamp;
+static unsigned long mftb(void)
+{
+ return timestamp;
+}
+
+static void *local_alloc(unsigned int chip_id,
+ size_t size, size_t align)
+{
+ void *p;
+
+ (void)chip_id;
+ if (posix_memalign(&p, align, size))
+ p = NULL;
+ return p;
+}
+
+struct dt_node;
+extern struct dt_node *opal_node;
+
+#include "../trace.c"
+
+#include "../external/trace/trace.c"
+static struct trace_reader trace_readers[CPUS];
+struct trace_reader *my_trace_reader;
+#include "../device.c"
+
+char __rodata_start[1], __rodata_end[1];
+struct dt_node *opal_node;
+struct debug_descriptor debug_descriptor = {
+ .trace_mask = -1
+};
+
+const char *nvram_query_safe(const char *key __unused)
+{
+ return NULL;
+}
+
+void lock_caller(struct lock *l, const char *caller)
+{
+ (void)caller;
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+struct cpu_thread *my_fake_cpu;
+static struct cpu_thread *this_cpu(void)
+{
+ return my_fake_cpu;
+}
+
+#include <sys/mman.h>
+#define PER_CHILD_TRACES ((RUNNING_ON_VALGRIND) ? (1024*16) : (1024*1024))
+
+static void write_trace_entries(int id)
+{
+ void exit(int);
+ unsigned int i;
+ union trace trace;
+
+ timestamp = id;
+ for (i = 0; i < PER_CHILD_TRACES; i++) {
+ timestamp = i * CPUS + id;
+ assert(sizeof(trace.hdr) % 8 == 0);
+ /* First child never repeats, second repeats once, etc. */
+ trace_add(&trace, 3 + ((i / (id + 1)) % 0x40),
+ sizeof(trace.hdr));
+ }
+
+ /* Final entry has special type, so parent knows it's over. */
+ trace_add(&trace, 0x70, sizeof(trace.hdr));
+ exit(0);
+}
+
+static bool all_done(const bool done[])
+{
+ unsigned int i;
+
+ for (i = 0; i < CPUS; i++)
+ if (!done[i])
+ return false;
+ return true;
+}
+
+static void test_parallel(void)
+{
+ void *p;
+ unsigned int cpu;
+ unsigned int i, counts[CPUS] = { 0 }, overflows[CPUS] = { 0 };
+ unsigned int repeats[CPUS] = { 0 }, num_overflows[CPUS] = { 0 };
+ bool done[CPUS] = { false };
+ size_t len = sizeof(struct trace_info) + TBUF_SZ + sizeof(union trace);
+ int last = 0;
+
+ /* Use a shared mmap to test actual parallel buffers. */
+ i = (CPUS*len + getpagesize()-1)&~(getpagesize()-1);
+ p = mmap(NULL, i, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_SHARED, -1, 0);
+
+ for (i = 0; i < CPUS; i++) {
+ fake_cpus[i].trace = p + i * len;
+ fake_cpus[i].trace->tb.buf_size = cpu_to_be64(TBUF_SZ);
+ fake_cpus[i].trace->tb.max_size = cpu_to_be32(sizeof(union trace));
+ fake_cpus[i].is_secondary = false;
+ memset(&trace_readers[i], 0, sizeof(struct trace_reader));
+ trace_readers[i].tb = &fake_cpus[i].trace->tb;
+ }
+
+ for (i = 0; i < CPUS; i++) {
+ if (!fork()) {
+ /* Child. */
+ my_fake_cpu = &fake_cpus[i];
+ write_trace_entries(i);
+ }
+ }
+
+ while (!all_done(done)) {
+ union trace t;
+
+ for (i = 0; i < CPUS; i++) {
+ if (trace_get(&t, &trace_readers[(i+last) % CPUS]))
+ break;
+ }
+
+ if (i == CPUS) {
+ sched_yield();
+ continue;
+ }
+ i = (i + last) % CPUS;
+ last = i;
+
+ if (t.hdr.type == TRACE_OVERFLOW) {
+ /* Conveniently, each record is 16 bytes here. */
+ assert(be64_to_cpu(t.overflow.bytes_missed) % 16 == 0);
+ overflows[i] += be64_to_cpu(t.overflow.bytes_missed) / 16;
+ num_overflows[i]++;
+ continue;
+ }
+
+ assert(be16_to_cpu(t.hdr.cpu) < CPUS);
+ assert(!done[be16_to_cpu(t.hdr.cpu)]);
+ assert(be64_to_cpu(t.hdr.timestamp) % CPUS == be16_to_cpu(t.hdr.cpu));
+ if (t.hdr.type == TRACE_REPEAT) {
+ assert(t.hdr.len_div_8 * 8 == sizeof(t.repeat));
+ assert(be16_to_cpu(t.repeat.num) != 0);
+ assert(be16_to_cpu(t.repeat.num) <= be16_to_cpu(t.hdr.cpu));
+ repeats[be16_to_cpu(t.hdr.cpu)] += be16_to_cpu(t.repeat.num);
+ } else if (t.hdr.type == 0x70) {
+ cpu = be16_to_cpu(t.hdr.cpu);
+ assert(cpu < CPUS);
+ done[cpu] = true;
+ } else {
+ cpu = be16_to_cpu(t.hdr.cpu);
+ assert(cpu < CPUS);
+ counts[cpu]++;
+ }
+ }
+
+ /* Gather children. */
+ for (i = 0; i < CPUS; i++) {
+ int status;
+ wait(&status);
+ }
+
+ for (i = 0; i < CPUS; i++) {
+ printf("Child %i: %u produced, %u overflows, %llu total\n", i,
+ counts[i], overflows[i],
+ (long long)be64_to_cpu(fake_cpus[i].trace->tb.end));
+ assert(counts[i] + repeats[i] <= PER_CHILD_TRACES);
+ }
+ /* Child 0 never repeats. */
+ assert(repeats[0] == 0);
+ assert(counts[0] + overflows[0] == PER_CHILD_TRACES);
+
+ /*
+ * FIXME: Other children have some fuzz, since overflows may
+ * include repeat record we already read. And odd-numbered
+ * overflows may include more repeat records than normal
+ * records (they alternate).
+ */
+}
+
+int main(void)
+{
+ union trace minimal;
+ union trace large;
+ union trace trace;
+ unsigned int i, j;
+
+ opal_node = dt_new_root("opal");
+ dt_new(dt_new(opal_node, "firmware"), "exports");
+ for (i = 0; i < CPUS; i++) {
+ fake_cpus[i].server_no = i;
+ fake_cpus[i].pir = i;
+ fake_cpus[i].is_secondary = (i & 0x1);
+ fake_cpus[i].primary = &fake_cpus[i & ~0x1];
+ }
+ my_fake_cpu = &fake_cpus[0];
+ my_trace_reader = &trace_readers[0];
+ init_trace_buffers();
+
+ for (i = 0; i < CPUS; i++) {
+ trace_readers[i].tb = &fake_cpus[i].trace->tb;
+ assert(trace_empty(&trace_readers[i]));
+ assert(!trace_get(&trace, &trace_readers[i]));
+ }
+
+ assert(sizeof(trace.hdr) % 8 == 0);
+ timestamp = 1;
+ trace_add(&minimal, 100, sizeof(trace.hdr));
+ assert(trace_get(&trace, my_trace_reader));
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(be64_to_cpu(trace.hdr.timestamp) == timestamp);
+
+ /* Make it wrap once. */
+ for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8) + 1; i++) {
+ timestamp = i;
+ trace_add(&minimal, 99 + (i%2), sizeof(trace.hdr));
+ }
+
+ assert(trace_get(&trace, my_trace_reader));
+ /* First one must be overflow marker. */
+ assert(trace.hdr.type == TRACE_OVERFLOW);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.overflow));
+ assert(be64_to_cpu(trace.overflow.bytes_missed) == minimal.hdr.len_div_8 * 8);
+
+ for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8); i++) {
+ assert(trace_get(&trace, my_trace_reader));
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(be64_to_cpu(trace.hdr.timestamp) == i+1);
+ assert(trace.hdr.type == 99 + ((i+1)%2));
+ }
+ assert(!trace_get(&trace, my_trace_reader));
+
+ /* Now put in some weird-length ones, to test overlap.
+ * Last power of 2, minus 8. */
+ for (j = 0; (1 << j) < sizeof(large); j++);
+ for (i = 0; i < TBUF_SZ; i++) {
+ timestamp = i;
+ trace_add(&large, 100 + (i%2), (1 << (j-1)));
+ }
+ assert(trace_get(&trace, my_trace_reader));
+ assert(trace.hdr.type == TRACE_OVERFLOW);
+ assert(trace_get(&trace, my_trace_reader));
+ assert(trace.hdr.len_div_8 == large.hdr.len_div_8);
+ i = be64_to_cpu(trace.hdr.timestamp);
+ while (trace_get(&trace, my_trace_reader))
+ assert(be64_to_cpu(trace.hdr.timestamp) == ++i);
+
+ /* Test repeats. */
+ for (i = 0; i < 65538; i++) {
+ timestamp = i;
+ trace_add(&minimal, 100, sizeof(trace.hdr));
+ }
+ timestamp = i;
+ trace_add(&minimal, 101, sizeof(trace.hdr));
+ timestamp = i+1;
+ trace_add(&minimal, 101, sizeof(trace.hdr));
+
+ assert(trace_get(&trace, my_trace_reader));
+ assert(trace.hdr.timestamp == 0);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.type == 100);
+ assert(trace_get(&trace, my_trace_reader));
+ assert(trace.hdr.type == TRACE_REPEAT);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
+ assert(be16_to_cpu(trace.repeat.num) == 65535);
+ assert(be64_to_cpu(trace.repeat.timestamp) == 65535);
+ assert(trace_get(&trace, my_trace_reader));
+ assert(be64_to_cpu(trace.hdr.timestamp) == 65536);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.type == 100);
+ assert(trace_get(&trace, my_trace_reader));
+ assert(trace.hdr.type == TRACE_REPEAT);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
+ assert(be16_to_cpu(trace.repeat.num) == 1);
+ assert(be64_to_cpu(trace.repeat.timestamp) == 65537);
+
+ assert(trace_get(&trace, my_trace_reader));
+ assert(be64_to_cpu(trace.hdr.timestamp) == 65538);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.type == 101);
+ assert(trace_get(&trace, my_trace_reader));
+ assert(trace.hdr.type == TRACE_REPEAT);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
+ assert(be16_to_cpu(trace.repeat.num) == 1);
+ assert(be64_to_cpu(trace.repeat.timestamp) == 65539);
+
+ /* Now, test adding repeat while we're reading... */
+ timestamp = 0;
+ trace_add(&minimal, 100, sizeof(trace.hdr));
+ assert(trace_get(&trace, my_trace_reader));
+ assert(be64_to_cpu(trace.hdr.timestamp) == 0);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.type == 100);
+
+ for (i = 1; i < TBUF_SZ; i++) {
+ timestamp = i;
+ trace_add(&minimal, 100, sizeof(trace.hdr));
+ assert(trace_get(&trace, my_trace_reader));
+ if (i % 65536 == 0) {
+ assert(trace.hdr.type == 100);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ } else {
+ assert(trace.hdr.type == TRACE_REPEAT);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
+ assert(be16_to_cpu(trace.repeat.num) == 1);
+ }
+ assert(be64_to_cpu(trace.repeat.timestamp) == i);
+ assert(!trace_get(&trace, my_trace_reader));
+ }
+
+ for (i = 0; i < CPUS; i++)
+ if (!fake_cpus[i].is_secondary)
+ free(fake_cpus[i].trace);
+
+ test_parallel();
+
+ return 0;
+}
diff --git a/roms/skiboot/core/test/stubs.c b/roms/skiboot/core/test/stubs.c
new file mode 100644
index 000000000..0e97af249
--- /dev/null
+++ b/roms/skiboot/core/test/stubs.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Copyright 2013-2019 IBM Corp
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdint.h>
+
+#include <compiler.h>
+#include "../../ccan/list/list.c"
+
+void _prlog(int log_level __attribute__((unused)), const char* fmt, ...) __attribute__((format (printf, 2, 3)));
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+#define prlog(l, f, ...) do { _prlog(l, pr_fmt(f), ##__VA_ARGS__); } while(0)
+
+void _prlog(int log_level __attribute__((unused)), const char* fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+/* Add any stub functions required for linking here. */
+static void stub_function(void)
+{
+ abort();
+}
+
+struct cpu_thread;
+
+struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
+ const char *name,
+ void (*func)(void *data), void *data,
+ bool no_return);
+
+void cpu_wait_job(struct cpu_job *job, bool free_it);
+void cpu_process_local_jobs(void);
+struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id,
+ const char *name,
+ void (*func)(void *data), void *data);
+
+struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id,
+ const char *name,
+ void (*func)(void *data), void *data)
+{
+ (void)chip_id;
+ return __cpu_queue_job(NULL, name, func, data, false);
+}
+
+struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
+ const char *name,
+ void (*func)(void *data), void *data,
+ bool no_return)
+{
+ (void)cpu;
+ (void)name;
+ (func)(data);
+ (void)no_return;
+ return NULL;
+}
+
+void cpu_wait_job(struct cpu_job *job, bool free_it)
+{
+ (void)job;
+ (void)free_it;
+ return;
+}
+
+void cpu_process_local_jobs(void)
+{
+}
+
+#define STUB(fnname) \
+ void fnname(void) __attribute__((weak, alias ("stub_function")))
+
+STUB(fdt_begin_node);
+STUB(fdt_property);
+STUB(fdt_end_node);
+STUB(fdt_create_with_flags);
+STUB(fdt_add_reservemap_entry);
+STUB(fdt_finish_reservemap);
+STUB(fdt_strerror);
+STUB(fdt_check_header);
+STUB(fdt_check_node_offset_);
+STUB(fdt_next_tag);
+STUB(fdt_string);
+STUB(fdt_get_name);
+STUB(dt_first);
+STUB(dt_next);
+STUB(dt_has_node_property);
+STUB(dt_get_address);
+STUB(add_chip_dev_associativity);
+STUB(pci_check_clear_freeze);
diff --git a/roms/skiboot/core/time-utils.c b/roms/skiboot/core/time-utils.c
new file mode 100644
index 000000000..e948654d3
--- /dev/null
+++ b/roms/skiboot/core/time-utils.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Converts an OPAL formatted datetime into a struct tm. We ignore microseconds
+ * as Linux doesn't use them anyway.
+ *
+ * | year | month | mday |
+ * +------------------------------------+
+ * | hour | minute | secs | reserved |
+ * +------------------------------------+
+ * | microseconds |
+ *
+ * Copyright 2013-2014 IBM Corp.
+ */
+
+#include <time-utils.h>
+
+void datetime_to_tm(uint32_t y_m_d, uint64_t h_m_s_m, struct tm *tm)
+{
+ uint32_t x;
+
+ tm->tm_year = bcd_byte(y_m_d, 3) * 100 + bcd_byte(y_m_d, 2);
+ tm->tm_mon = bcd_byte(y_m_d, 1) - 1;
+ tm->tm_mday = bcd_byte(y_m_d, 0);
+
+ x = h_m_s_m >> 32;
+ tm->tm_hour = bcd_byte(x, 3);
+ tm->tm_min = bcd_byte(x, 2);
+ tm->tm_sec = bcd_byte(x, 1);
+}
+
+/*
+ * The OPAL API is defined as returned a u64 of a similar
+ * format to the FSP message; the 32-bit date field is
+ * in the format:
+ *
+ * | year | month | mday |
+ *
+ * ... and the 64-bit time field is in the format
+ *
+ * | hour | minutes | secs | millisec |
+ * | -------------------------------------
+ * | millisec | reserved |
+ *
+ * We simply ignore the microseconds/milliseconds for now
+ * as I don't quite understand why the OPAL API defines that
+ * it needs 6 digits for the milliseconds :-) I suspect the
+ * doc got that wrong and it's supposed to be micro but
+ * let's ignore it.
+ *
+ * Note that Linux doesn't use nor set the ms field anyway.
+ */
+void tm_to_datetime(struct tm *tm, uint32_t *y_m_d, uint64_t *h_m_s_m)
+{
+ uint64_t h_m_s;
+ *y_m_d = int_to_bcd4(tm->tm_year) << 16 |
+ int_to_bcd2(tm->tm_mon + 1) << 8 |
+ int_to_bcd2(tm->tm_mday);
+
+ h_m_s = int_to_bcd2(tm->tm_hour) << 24 |
+ int_to_bcd2(tm->tm_min) << 16 |
+ int_to_bcd2(tm->tm_sec) << 8;
+
+ *h_m_s_m = h_m_s << 32;
+}
diff --git a/roms/skiboot/core/timebase.c b/roms/skiboot/core/timebase.c
new file mode 100644
index 000000000..451e3710e
--- /dev/null
+++ b/roms/skiboot/core/timebase.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Wait for things, by waiting for timebase to tick over
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <timebase.h>
+#include <opal.h>
+#include <cpu.h>
+#include <chip.h>
+#include <debug_descriptor.h>
+
+unsigned long tb_hz = 512000000;
+
+static void time_wait_poll(unsigned long duration)
+{
+ unsigned long now = mftb();
+ unsigned long end = now + duration;
+ unsigned long period = msecs_to_tb(5);
+
+ if (this_cpu()->tb_invalid) {
+ /*
+ * Run pollers to allow some backends to process response.
+ *
+ * In TOD failure case where TOD is unrecoverable, running
+ * pollers allows ipmi backend to deal with ipmi response
+ * from bmc and helps ipmi_queue_msg_sync() to get un-stuck.
+ * Thus it avoids linux kernel to hang during panic due to
+ * TOD failure.
+ */
+ opal_run_pollers();
+ cpu_relax();
+ return;
+ }
+
+ while (tb_compare(now, end) != TB_AAFTERB) {
+
+ unsigned long remaining = end - now;
+
+ /* Call pollers periodically but not continually to avoid
+ * bouncing cachelines due to lock contention. */
+ if (remaining >= period) {
+ opal_run_pollers();
+ time_wait_nopoll(period);
+ } else
+ time_wait_nopoll(remaining);
+
+ now = mftb();
+ }
+}
+
+void time_wait(unsigned long duration)
+{
+ struct cpu_thread *c = this_cpu();
+
+ if (!list_empty(&this_cpu()->locks_held)) {
+ time_wait_nopoll(duration);
+ return;
+ }
+
+ if (c != boot_cpu && opal_booting())
+ time_wait_nopoll(duration);
+ else
+ time_wait_poll(duration);
+}
+
+void time_wait_nopoll(unsigned long duration)
+{
+ if (this_cpu()->tb_invalid) {
+ cpu_relax();
+ return;
+ }
+
+ cpu_idle_delay(duration);
+}
+
+void time_wait_ms(unsigned long ms)
+{
+ time_wait(msecs_to_tb(ms));
+}
+
+void time_wait_ms_nopoll(unsigned long ms)
+{
+ time_wait_nopoll(msecs_to_tb(ms));
+}
+
+void time_wait_us(unsigned long us)
+{
+ time_wait(usecs_to_tb(us));
+}
+
+void time_wait_us_nopoll(unsigned long us)
+{
+ time_wait_nopoll(usecs_to_tb(us));
+}
+
+unsigned long timespec_to_tb(const struct timespec *ts)
+{
+ unsigned long ns;
+
+ /* First convert to ns */
+ ns = ts->tv_sec * 1000000000ul;
+ ns += ts->tv_nsec;
+
+ /*
+ * This is a very rough approximation, it works provided
+ * we never try to pass too long delays here and the TB
+ * frequency isn't significantly lower than 512Mhz.
+ *
+ * We could improve the precision by shifting less bits
+ * at the expense of capacity or do 128 bit math which
+ * I'm not eager to do :-)
+ */
+ if (chip_quirk(QUIRK_SLOW_SIM))
+ return (ns * (tb_hz >> 16)) / (1000000000ul >> 16);
+ else
+ return (ns * (tb_hz >> 24)) / (1000000000ul >> 24);
+}
+
+int nanosleep(const struct timespec *req, struct timespec *rem)
+{
+ time_wait(timespec_to_tb(req));
+
+ if (rem) {
+ rem->tv_sec = 0;
+ rem->tv_nsec = 0;
+ }
+ return 0;
+}
+
+int nanosleep_nopoll(const struct timespec *req, struct timespec *rem)
+{
+ time_wait_nopoll(timespec_to_tb(req));
+
+ if (rem) {
+ rem->tv_sec = 0;
+ rem->tv_nsec = 0;
+ }
+ return 0;
+}
diff --git a/roms/skiboot/core/timer.c b/roms/skiboot/core/timer.c
new file mode 100644
index 000000000..652ffba30
--- /dev/null
+++ b/roms/skiboot/core/timer.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * run something, but later.
+ *
+ * Timers are run when the SBE timer interrupt triggers (based on us setting
+ * it) or when the regular heartbeat call from the OS occurs and there's a
+ * timer that's expired.
+ *
+ * Copyright 2014-2019 IBM Corp.
+ */
+
+#include <timer.h>
+#include <timebase.h>
+#include <lock.h>
+#include <fsp.h>
+#include <device.h>
+#include <opal.h>
+#include <sbe-p8.h>
+#include <sbe-p9.h>
+
+#ifdef __TEST__
+#define this_cpu() ((void *)-1)
+#define cpu_relax()
+#else
+#include <cpu.h>
+#endif
+
+/* Heartbeat requested from Linux */
+#define HEARTBEAT_DEFAULT_MS 200
+
+static struct lock timer_lock = LOCK_UNLOCKED;
+static LIST_HEAD(timer_list);
+static LIST_HEAD(timer_poll_list);
+static bool timer_in_poll;
+static uint64_t timer_poll_gen;
+
+static inline void update_timer_expiry(uint64_t target)
+{
+ if (proc_gen < proc_gen_p9)
+ p8_sbe_update_timer_expiry(target);
+ else
+ p9_sbe_update_timer_expiry(target);
+}
+
+void init_timer(struct timer *t, timer_func_t expiry, void *data)
+{
+ t->link.next = t->link.prev = NULL;
+ t->target = 0;
+ t->expiry = expiry;
+ t->user_data = data;
+ t->running = NULL;
+}
+
+static void __remove_timer(struct timer *t)
+{
+ list_del(&t->link);
+ t->link.next = t->link.prev = NULL;
+}
+
+static void __sync_timer(struct timer *t)
+{
+ sync();
+
+ /* Guard against re-entrancy */
+ assert(t->running != this_cpu());
+
+ while (t->running) {
+ unlock(&timer_lock);
+ smt_lowest();
+ while (t->running)
+ barrier();
+ smt_medium();
+ /* Should we call the pollers here ? */
+ lock(&timer_lock);
+ }
+}
+
+void sync_timer(struct timer *t)
+{
+ lock(&timer_lock);
+ __sync_timer(t);
+ unlock(&timer_lock);
+}
+
+void cancel_timer(struct timer *t)
+{
+ lock(&timer_lock);
+ __sync_timer(t);
+ if (t->link.next)
+ __remove_timer(t);
+ unlock(&timer_lock);
+}
+
+void cancel_timer_async(struct timer *t)
+{
+ lock(&timer_lock);
+ if (t->link.next)
+ __remove_timer(t);
+ unlock(&timer_lock);
+}
+
+static void __schedule_timer_at(struct timer *t, uint64_t when)
+{
+ struct timer *lt;
+
+ /* If the timer is already scheduled, take it out */
+ if (t->link.next)
+ __remove_timer(t);
+
+ /* Update target */
+ t->target = when;
+
+ if (when == TIMER_POLL) {
+ /* It's a poller, add it to the poller list */
+ t->gen = timer_poll_gen;
+ list_add_tail(&timer_poll_list, &t->link);
+ } else {
+ /* It's a real timer, add it in the right spot in the
+ * ordered timer list
+ */
+ list_for_each(&timer_list, lt, link) {
+ if (when >= lt->target)
+ continue;
+ list_add_before(&timer_list, &t->link, &lt->link);
+ goto bail;
+ }
+ list_add_tail(&timer_list, &t->link);
+ }
+ bail:
+ /* Pick up the next timer and upddate the SBE HW timer */
+ lt = list_top(&timer_list, struct timer, link);
+ if (lt) {
+ update_timer_expiry(lt->target);
+ }
+}
+
+void schedule_timer_at(struct timer *t, uint64_t when)
+{
+ lock(&timer_lock);
+ __schedule_timer_at(t, when);
+ unlock(&timer_lock);
+}
+
+uint64_t schedule_timer(struct timer *t, uint64_t how_long)
+{
+ uint64_t now = mftb();
+
+ if (how_long == TIMER_POLL)
+ schedule_timer_at(t, TIMER_POLL);
+ else
+ schedule_timer_at(t, now + how_long);
+
+ return now;
+}
+
+static void __check_poll_timers(uint64_t now)
+{
+ struct timer *t;
+
+ /* Don't call this from multiple CPUs at once */
+ if (timer_in_poll)
+ return;
+ timer_in_poll = true;
+
+ /*
+ * Poll timers might re-enqueue themselves and don't have an
+ * expiry so we can't do like normal timers and just run until
+ * we hit a wall. Instead, each timer has a generation count,
+ * which we set to the current global gen count when we schedule
+ * it and update when we run it. It will only be considered if
+ * the generation count is different than the current one. We
+ * don't try to compare generations being larger or smaller
+ * because at boot, this can be called quite quickly and I want
+ * to be safe vs. wraps.
+ */
+ timer_poll_gen++;
+ for (;;) {
+ t = list_top(&timer_poll_list, struct timer, link);
+
+ /* Top timer has a different generation than current ? Must
+ * be older, we are done.
+ */
+ if (!t || t->gen == timer_poll_gen)
+ break;
+
+ /* Top of list still running, we have to delay handling it,
+ * let's reprogram the SLW with a small delay. We chose
+ * arbitrarily 1us.
+ */
+ if (t->running) {
+ update_timer_expiry(now + usecs_to_tb(1));
+ break;
+ }
+
+ /* Allright, first remove it and mark it running */
+ __remove_timer(t);
+ t->running = this_cpu();
+
+ /* Now we can unlock and call it's expiry */
+ unlock(&timer_lock);
+ t->expiry(t, t->user_data, now);
+
+ /* Re-lock and mark not running */
+ lock(&timer_lock);
+ t->running = NULL;
+ }
+ timer_in_poll = false;
+}
+
+static void __check_timers(uint64_t now)
+{
+ struct timer *t;
+
+ for (;;) {
+ t = list_top(&timer_list, struct timer, link);
+
+ /* Top of list not expired ? that's it ... */
+ if (!t || t->target > now)
+ break;
+
+ /* Top of list still running, we have to delay handling
+ * it. For now just skip until the next poll, when we have
+ * SLW interrupts, we'll probably want to trip another one
+ * ASAP
+ */
+ if (t->running)
+ break;
+
+ /* Allright, first remove it and mark it running */
+ __remove_timer(t);
+ t->running = this_cpu();
+
+ /* Now we can unlock and call it's expiry */
+ unlock(&timer_lock);
+ t->expiry(t, t->user_data, now);
+
+ /* Re-lock and mark not running */
+ lock(&timer_lock);
+ t->running = NULL;
+
+ /* Update time stamp */
+ now = mftb();
+ }
+}
+
+void check_timers(bool from_interrupt)
+{
+ uint64_t now = mftb();
+
+ /* This is the polling variant, the SLW interrupt path, when it
+ * exists, will use a slight variant of this that doesn't call
+ * the pollers
+ */
+
+ /* Lockless "peek", a bit racy but shouldn't be a problem as
+ * we are only looking at whether the list is empty
+ */
+ if (list_empty_nocheck(&timer_poll_list) &&
+ list_empty_nocheck(&timer_list))
+ return;
+
+ /* Take lock and try again */
+ lock(&timer_lock);
+ if (!from_interrupt)
+ __check_poll_timers(now);
+ __check_timers(now);
+ unlock(&timer_lock);
+}
+
+#ifndef __TEST__
+
+void late_init_timers(void)
+{
+ int heartbeat = HEARTBEAT_DEFAULT_MS;
+
+ /* Add a property requesting the OS to call opal_poll_event() at
+ * a specified interval in order for us to run our background
+ * low priority pollers.
+ *
+ * If a platform quirk exists, use that, else use the default.
+ *
+ * If we have an SBE timer facility, we run this 10 times slower,
+ * we could possibly completely get rid of it.
+ *
+ * We use a value in milliseconds, we don't want this to ever be
+ * faster than that.
+ */
+ if (platform.heartbeat_time) {
+ heartbeat = platform.heartbeat_time();
+ } else if (p9_sbe_timer_ok()) {
+ heartbeat = HEARTBEAT_DEFAULT_MS * 10;
+ } else if (p8_sbe_timer_ok()) {
+ heartbeat = HEARTBEAT_DEFAULT_MS * 10;
+ }
+
+ dt_add_property_cells(opal_node, "ibm,heartbeat-ms", heartbeat);
+}
+#endif
diff --git a/roms/skiboot/core/trace.c b/roms/skiboot/core/trace.c
new file mode 100644
index 000000000..561bd79e0
--- /dev/null
+++ b/roms/skiboot/core/trace.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Trace various things into in-memory buffers
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <trace.h>
+#include <timebase.h>
+#include <lock.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <cpu.h>
+#include <device.h>
+#include <libfdt.h>
+#include <processor.h>
+#include <skiboot.h>
+#include <opal-api.h>
+#include <debug_descriptor.h>
+#include <nvram.h>
+
+#define DEBUG_TRACES
+
+#define MAX_SIZE sizeof(union trace)
+
+/* Smaller trace buffer for early booting */
+#define BOOT_TBUF_SZ 65536
+static struct {
+ struct trace_info trace_info;
+ char buf[BOOT_TBUF_SZ + MAX_SIZE];
+} boot_tracebuf __section(".data.boot_trace");
+
+void init_boot_tracebuf(struct cpu_thread *boot_cpu)
+{
+ init_lock(&boot_tracebuf.trace_info.lock);
+ boot_tracebuf.trace_info.tb.buf_size = cpu_to_be64(BOOT_TBUF_SZ);
+ boot_tracebuf.trace_info.tb.max_size = cpu_to_be32(MAX_SIZE);
+
+ boot_cpu->trace = &boot_tracebuf.trace_info;
+}
+
+static size_t tracebuf_extra(void)
+{
+ /* We make room for the largest possible record */
+ return TBUF_SZ + MAX_SIZE;
+}
+
+/* To avoid bloating each entry, repeats are actually specific entries.
+ * tb->last points to the last (non-repeat) entry. */
+static bool handle_repeat(struct tracebuf *tb, const union trace *trace)
+{
+ struct trace_hdr *prev;
+ struct trace_repeat *rpt;
+ u32 len;
+
+ prev = (void *)tb->buf + be64_to_cpu(tb->last) % be64_to_cpu(tb->buf_size);
+
+ if (prev->type != trace->hdr.type
+ || prev->len_div_8 != trace->hdr.len_div_8
+ || prev->cpu != trace->hdr.cpu)
+ return false;
+
+ len = prev->len_div_8 << 3;
+ if (memcmp(prev + 1, &trace->hdr + 1, len - sizeof(*prev)) != 0)
+ return false;
+
+ /* If they've consumed prev entry, don't repeat. */
+ if (be64_to_cpu(tb->last) < be64_to_cpu(tb->start))
+ return false;
+
+ /* OK, it's a duplicate. Do we already have repeat? */
+ if (be64_to_cpu(tb->last) + len != be64_to_cpu(tb->end)) {
+ u64 pos = be64_to_cpu(tb->last) + len;
+ /* FIXME: Reader is not protected from seeing this! */
+ rpt = (void *)tb->buf + pos % be64_to_cpu(tb->buf_size);
+ assert(pos + rpt->len_div_8*8 == be64_to_cpu(tb->end));
+ assert(rpt->type == TRACE_REPEAT);
+
+ /* If this repeat entry is full, don't repeat. */
+ if (be16_to_cpu(rpt->num) == 0xFFFF)
+ return false;
+
+ rpt->num = cpu_to_be16(be16_to_cpu(rpt->num) + 1);
+ rpt->timestamp = trace->hdr.timestamp;
+ return true;
+ }
+
+ /*
+ * Generate repeat entry: it's the smallest possible entry, so we
+ * must have eliminated old entries.
+ */
+ assert(trace->hdr.len_div_8 * 8 >= sizeof(*rpt));
+
+ rpt = (void *)tb->buf + be64_to_cpu(tb->end) % be64_to_cpu(tb->buf_size);
+ rpt->timestamp = trace->hdr.timestamp;
+ rpt->type = TRACE_REPEAT;
+ rpt->len_div_8 = sizeof(*rpt) >> 3;
+ rpt->cpu = trace->hdr.cpu;
+ rpt->prev_len = cpu_to_be16(trace->hdr.len_div_8 << 3);
+ rpt->num = cpu_to_be16(1);
+ lwsync(); /* write barrier: complete repeat record before exposing */
+ tb->end = cpu_to_be64(be64_to_cpu(tb->end) + sizeof(*rpt));
+ return true;
+}
+
+void trace_add(union trace *trace, u8 type, u16 len)
+{
+ struct trace_info *ti = this_cpu()->trace;
+ unsigned int tsz;
+
+ trace->hdr.type = type;
+ trace->hdr.len_div_8 = (len + 7) >> 3;
+
+ tsz = trace->hdr.len_div_8 << 3;
+
+#ifdef DEBUG_TRACES
+ assert(tsz >= sizeof(trace->hdr));
+ assert(tsz <= sizeof(*trace));
+ assert(trace->hdr.type != TRACE_REPEAT);
+ assert(trace->hdr.type != TRACE_OVERFLOW);
+#endif
+ /* Skip traces not enabled in the debug descriptor */
+ if (trace->hdr.type < (8 * sizeof(debug_descriptor.trace_mask)) &&
+ !((1ul << trace->hdr.type) & be64_to_cpu(debug_descriptor.trace_mask)))
+ return;
+
+ trace->hdr.timestamp = cpu_to_be64(mftb());
+ trace->hdr.cpu = cpu_to_be16(this_cpu()->server_no);
+
+ lock(&ti->lock);
+
+ /* Throw away old entries before we overwrite them. */
+ while ((be64_to_cpu(ti->tb.start) + be64_to_cpu(ti->tb.buf_size))
+ < (be64_to_cpu(ti->tb.end) + tsz)) {
+ struct trace_hdr *hdr;
+
+ hdr = (void *)ti->tb.buf +
+ be64_to_cpu(ti->tb.start) % be64_to_cpu(ti->tb.buf_size);
+ ti->tb.start = cpu_to_be64(be64_to_cpu(ti->tb.start) +
+ (hdr->len_div_8 << 3));
+ }
+
+ /* Must update ->start before we rewrite new entries. */
+ lwsync(); /* write barrier */
+
+ /* Check for duplicates... */
+ if (!handle_repeat(&ti->tb, trace)) {
+ /* This may go off end, and that's why ti->tb.buf is oversize */
+ memcpy(ti->tb.buf + be64_to_cpu(ti->tb.end) % be64_to_cpu(ti->tb.buf_size),
+ trace, tsz);
+ ti->tb.last = ti->tb.end;
+ lwsync(); /* write barrier: write entry before exposing */
+ ti->tb.end = cpu_to_be64(be64_to_cpu(ti->tb.end) + tsz);
+ }
+ unlock(&ti->lock);
+}
+
+void trace_add_dt_props(void)
+{
+ uint64_t boot_buf_phys = (uint64_t) &boot_tracebuf.trace_info;
+ struct dt_node *exports, *traces;
+ unsigned int i;
+ fdt64_t *prop;
+ u64 tmask;
+ char tname[256];
+
+ exports = dt_find_by_path(opal_node, "firmware/exports");
+ if (!exports)
+ return;
+
+ /*
+ * nvram hack to put all the trace buffer exports in the exports
+ * node. This is useful if the kernel doesn't also export subnodes.
+ */
+ if (nvram_query_safe("flat-trace-buf"))
+ traces = exports;
+ else
+ traces = dt_new(exports, "traces");
+
+ prop = malloc(sizeof(u64) * 2 * be32_to_cpu(debug_descriptor.num_traces));
+
+ for (i = 0; i < be32_to_cpu(debug_descriptor.num_traces); i++) {
+ uint64_t addr = be64_to_cpu(debug_descriptor.trace_phys[i]);
+ uint64_t size = be32_to_cpu(debug_descriptor.trace_size[i]);
+ uint32_t pir = be16_to_cpu(debug_descriptor.trace_pir[i]);
+
+ prop[i * 2] = cpu_to_fdt64(addr);
+ prop[i * 2 + 1] = cpu_to_fdt64(size);
+
+ if (addr == boot_buf_phys)
+ snprintf(tname, sizeof(tname), "boot-%x", pir);
+ else
+ snprintf(tname, sizeof(tname), "trace-%x", pir);
+
+ dt_add_property_u64s(traces, tname, addr, size);
+ }
+
+ dt_add_property(opal_node, "ibm,opal-traces",
+ prop, sizeof(u64) * 2 * i);
+ free(prop);
+
+ tmask = (uint64_t)&debug_descriptor.trace_mask;
+ dt_add_property_u64(opal_node, "ibm,opal-trace-mask", tmask);
+}
+
+static void trace_add_desc(struct trace_info *t, uint64_t size, uint16_t pir)
+{
+ unsigned int i = be32_to_cpu(debug_descriptor.num_traces);
+
+ if (i >= DEBUG_DESC_MAX_TRACES) {
+ prerror("TRACE: Debug descriptor trace list full !\n");
+ return;
+ }
+
+ debug_descriptor.num_traces = cpu_to_be32(i + 1);
+ debug_descriptor.trace_phys[i] = cpu_to_be64((uint64_t)t);
+ debug_descriptor.trace_tce[i] = 0; /* populated later */
+ debug_descriptor.trace_size[i] = cpu_to_be32(size);
+ debug_descriptor.trace_pir[i] = cpu_to_be16(pir);
+}
+
+/* Allocate trace buffers once we know memory topology */
+void init_trace_buffers(void)
+{
+ struct cpu_thread *t;
+ struct trace_info *any = &boot_tracebuf.trace_info;
+ uint64_t size;
+
+ /* Boot the boot trace in the debug descriptor */
+ trace_add_desc(any, sizeof(boot_tracebuf), this_cpu()->pir);
+
+ /* Allocate a trace buffer for each primary cpu. */
+ for_each_cpu(t) {
+ if (t->is_secondary)
+ continue;
+
+ /* Use a 64K alignment for TCE mapping */
+ size = ALIGN_UP(sizeof(*t->trace) + tracebuf_extra(), 0x10000);
+ t->trace = local_alloc(t->chip_id, size, 0x10000);
+ if (t->trace) {
+ any = t->trace;
+ memset(t->trace, 0, size);
+ init_lock(&t->trace->lock);
+ t->trace->tb.max_size = cpu_to_be32(MAX_SIZE);
+ t->trace->tb.buf_size = cpu_to_be64(TBUF_SZ);
+ trace_add_desc(any, sizeof(t->trace->tb) +
+ tracebuf_extra(), t->pir);
+ } else
+ prerror("TRACE: cpu 0x%x allocation failed\n", t->pir);
+ }
+
+ /* In case any allocations failed, share trace buffers. */
+ for_each_cpu(t) {
+ if (!t->is_secondary && !t->trace)
+ t->trace = any;
+ }
+
+ /* And copy those to the secondaries. */
+ for_each_cpu(t) {
+ if (!t->is_secondary)
+ continue;
+ t->trace = t->primary->trace;
+ }
+}
diff --git a/roms/skiboot/core/utils.c b/roms/skiboot/core/utils.c
new file mode 100644
index 000000000..0d2f5e894
--- /dev/null
+++ b/roms/skiboot/core/utils.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Misc utility functions
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <lock.h>
+#include <fsp.h>
+#include <platform.h>
+#include <processor.h>
+#include <cpu.h>
+#include <stack.h>
+
+void __noreturn assert_fail(const char *msg, const char *file,
+ unsigned int line, const char *function)
+{
+ static bool in_abort = false;
+
+ (void)function;
+ if (in_abort)
+ for (;;) ;
+ in_abort = true;
+
+ /**
+ * @fwts-label FailedAssert2
+ * @fwts-advice OPAL hit an assert(). During normal usage (even
+ * testing) we should never hit an assert. There are other code
+ * paths for controlled shutdown/panic in the event of catastrophic
+ * errors.
+ */
+ prlog(PR_EMERG, "assert failed at %s:%u: %s\n", file, line, msg);
+ backtrace();
+
+ if (platform.terminate)
+ platform.terminate(msg);
+
+ for (;;) ;
+}
+
+char __attrconst tohex(uint8_t nibble)
+{
+ static const char __tohex[] = {'0','1','2','3','4','5','6','7','8','9',
+ 'A','B','C','D','E','F'};
+ if (nibble > 0xf)
+ return '?';
+ return __tohex[nibble];
+}
+
+static unsigned long get_symbol(unsigned long addr, char **sym, char **sym_end)
+{
+ unsigned long prev = 0, next;
+ char *psym = NULL, *p = __sym_map_start;
+
+ *sym = *sym_end = NULL;
+ while(p < __sym_map_end) {
+ next = strtoul(p, &p, 16) | SKIBOOT_BASE;
+ if (next > addr && prev <= addr) {
+ p = psym + 3;;
+ if (p >= __sym_map_end)
+ return 0;
+ *sym = p;
+ while(p < __sym_map_end && *p != 10)
+ p++;
+ *sym_end = p;
+ return prev;
+ }
+ prev = next;
+ psym = p;
+ while(p < __sym_map_end && *p != 10)
+ p++;
+ p++;
+ }
+ return 0;
+}
+
+size_t snprintf_symbol(char *buf, size_t len, uint64_t addr)
+{
+ unsigned long saddr;
+ char *sym, *sym_end;
+ size_t l;
+
+ saddr = get_symbol(addr, &sym, &sym_end);
+ if (!saddr)
+ return 0;
+
+ if (len > sym_end - sym)
+ l = sym_end - sym;
+ else
+ l = len - 1;
+ memcpy(buf, sym, l);
+
+ /*
+ * This snprintf will insert the terminating NUL even if the
+ * symbol has used up the entire buffer less 1.
+ */
+ l += snprintf(buf + l, len - l, "+0x%llx", addr - saddr);
+
+ return l;
+}
diff --git a/roms/skiboot/core/vpd.c b/roms/skiboot/core/vpd.c
new file mode 100644
index 000000000..20fe09597
--- /dev/null
+++ b/roms/skiboot/core/vpd.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Parse Vital Product Data (VPD)
+ *
+ * Copyright 2013-2019 IBM Corp.
+ */
+
+#include <skiboot.h>
+#include <vpd.h>
+#include <string.h>
+#include <device.h>
+
+#define CHECK_SPACE(_p, _n, _e) (((_e) - (_p)) >= (_n))
+
+/* Low level keyword search in a record. Can be used when we
+ * need to find the next keyword of a given type, for example
+ * when having multiple MF/SM keyword pairs
+ */
+const void *vpd_find_keyword(const void *rec, size_t rec_sz,
+ const char *kw, uint8_t *kw_size)
+{
+ const uint8_t *p = rec, *end = rec + rec_sz;
+
+ while (CHECK_SPACE(p, 3, end)) {
+ uint8_t k1 = *(p++);
+ uint8_t k2 = *(p++);
+ uint8_t sz = *(p++);
+
+ if (k1 == kw[0] && k2 == kw[1]) {
+ if (kw_size)
+ *kw_size = sz;
+ return p;
+ }
+ p += sz;
+ }
+ return NULL;
+}
+
+/* vpd_valid - does some basic sanity checks to ensure a VPD blob is
+ * actually a VPD blob
+ */
+bool vpd_valid(const void *vvpd, size_t vpd_size)
+{
+ const uint8_t *vpd = vvpd;
+ int size, i = 0;
+
+ /* find the record start byte */
+ while (i < vpd_size)
+ if (vpd[i++] == 0x84)
+ break;
+
+ if (i >= vpd_size)
+ return false;
+
+ /* next two bytes are the record length, little endian */
+ size = 2;
+ size += vpd[i];
+ size += vpd[i + 1] << 8;
+
+ i += size; /* skip to the end marker */
+
+ if (i >= vpd_size || vpd[i] != 0x78)
+ return false;
+
+ return true;
+}
+
+/* Locate a record in a VPD blob
+ *
+ * Note: This works with VPD LIDs. It will scan until it finds
+ * the first 0x84, so it will skip all those 0's that the VPD
+ * LIDs seem to contain
+ */
+const void *vpd_find_record(const void *vpd, size_t vpd_size,
+ const char *record, size_t *sz)
+{
+ const uint8_t *p = vpd, *end = vpd + vpd_size;
+ bool first_start = true;
+ size_t rec_sz;
+ uint8_t namesz = 0;
+ const char *rec_name;
+
+ if (!vpd)
+ return NULL;
+
+ while (CHECK_SPACE(p, 4, end)) {
+ /* Get header byte */
+ if (*(p++) != 0x84) {
+ /* Skip initial crap in VPD LIDs */
+ if (first_start)
+ continue;
+ break;
+ }
+ first_start = false;
+ rec_sz = *(p++);
+ rec_sz |= *(p++) << 8;
+ if (!CHECK_SPACE(p, rec_sz, end)) {
+ prerror("VPD: Malformed or truncated VPD,"
+ " record size doesn't fit\n");
+ return NULL;
+ }
+
+ /* Find record name */
+ rec_name = vpd_find_keyword(p, rec_sz, "RT", &namesz);
+ if (rec_name && strncmp(record, rec_name, namesz) == 0) {
+ if (sz)
+ *sz = rec_sz;
+ return p;
+ }
+
+ p += rec_sz;
+ if (*(p++) != 0x78) {
+ prerror("VPD: Malformed or truncated VPD,"
+ " missing final 0x78 in record %.4s\n",
+ rec_name ? rec_name : "????");
+ return NULL;
+ }
+ }
+ return NULL;
+}
+
+/* Locate a keyword in a record in a VPD blob
+ *
+ * Note: This works with VPD LIDs. It will scan until it finds
+ * the first 0x84, so it will skip all those 0's that the VPD
+ * LIDs seem to contain
+ */
+const void *vpd_find(const void *vpd, size_t vpd_size,
+ const char *record, const char *keyword,
+ uint8_t *sz)
+{
+ size_t rec_sz;
+ const uint8_t *p;
+
+ p = vpd_find_record(vpd, vpd_size, record, &rec_sz);
+ if (p)
+ p = vpd_find_keyword(p, rec_sz, keyword, sz);
+ return p;
+}